From 18922761a6537ea173168c3accb24b269f36ff95 Mon Sep 17 00:00:00 2001 From: Dean Troyer Date: Wed, 30 May 2018 16:15:37 -0700 Subject: [PATCH] StarlingX open source release updates Signed-off-by: Dean Troyer --- CONTRIBUTORS.wrs | 8 + LICENSE | 202 + README.rst | 5 + bsp-files/centos-ks-gen.pl | 297 + bsp-files/centos.syslinux.cfg | 188 + bsp-files/filter_out_from_compute | 263 + bsp-files/filter_out_from_compute_lowlatency | 265 + bsp-files/filter_out_from_controller | 52 + bsp-files/filter_out_from_smallsystem | 24 + .../filter_out_from_smallsystem_lowlatency | 23 + bsp-files/filter_out_from_storage | 303 + bsp-files/grub.cfg | 155 + bsp-files/kickstarts/functions.sh | 72 + bsp-files/kickstarts/post_clone_iso_ks.cfg | 41 + bsp-files/kickstarts/post_common.cfg | 104 + .../post_kernel_aio_and_compute.cfg | 98 + .../kickstarts/post_kernel_controller.cfg | 38 + bsp-files/kickstarts/post_kernel_storage.cfg | 32 + .../kickstarts/post_lvm_no_pv_on_rootfs.cfg | 12 + .../kickstarts/post_lvm_pv_on_rootfs.cfg | 17 + bsp-files/kickstarts/post_net_common.cfg | 127 + bsp-files/kickstarts/post_net_controller.cfg | 79 + .../kickstarts/post_platform_conf_aio.cfg | 22 + .../post_platform_conf_aio_lowlatency.cfg | 22 + .../kickstarts/post_platform_conf_compute.cfg | 34 + .../post_platform_conf_compute_lowlatency.cfg | 34 + .../post_platform_conf_controller.cfg | 22 + .../kickstarts/post_platform_conf_storage.cfg | 29 + .../kickstarts/post_pxeboot_controller.cfg | 114 + bsp-files/kickstarts/post_system_aio.cfg | 35 + bsp-files/kickstarts/post_usb_controller.cfg | 80 + bsp-files/kickstarts/post_yow_controller.cfg | 115 + bsp-files/kickstarts/pre_common_head.cfg | 72 + bsp-files/kickstarts/pre_disk_aio.cfg | 102 + bsp-files/kickstarts/pre_disk_compute.cfg | 31 + bsp-files/kickstarts/pre_disk_controller.cfg | 34 + .../kickstarts/pre_disk_setup_common.cfg | 154 + bsp-files/kickstarts/pre_disk_storage.cfg | 31 + bsp-files/kickstarts/pre_pkglist.cfg | 6 + .../kickstarts/pre_pkglist_lowlatency.cfg | 8 + bsp-files/pkg-list.pl | 34 + bsp-files/platform_comps.py | 137 + bsp-files/pxeboot.cfg | 180 + bsp-files/pxeboot_grub.cfg | 104 + bsp-files/pxeboot_setup.sh | 111 + bsp-files/upgrades/import.sh | 64 + bsp-files/upgrades/metadata.xml | 24 + installer/initrd/README | 254 + .../centos/build_srpm.data | 12 + .../centos/pxe-network-installer.spec | 155 + .../pxe-network-installer/LICENSE | 202 + .../centos-pxe-compute-install | 27 + .../centos-pxe-compute_lowlatency-install | 27 + .../centos-pxe-controller-install | 27 + .../centos-pxe-smallsystem-install | 27 + .../centos-pxe-smallsystem_lowlatency-install | 27 + .../centos-pxe-storage-install | 27 + .../pxe-network-installer/default | 38 + .../pxe-network-installer/default.static | 47 + .../efi-centos-pxe-compute-install | 9 + .../efi-centos-pxe-compute_lowlatency-install | 9 + .../efi-centos-pxe-controller-install | 9 + .../efi-centos-pxe-smallsystem-install | 9 + ...-centos-pxe-smallsystem_lowlatency-install | 9 + .../efi-centos-pxe-storage-install | 9 + .../pxe-network-installer/pxe-grub.cfg | 56 + .../pxe-network-installer/pxe-grub.cfg.static | 70 + .../pxe-network-installer/pxeboot-update.sh | 172 + kickstart/LICENSE | 202 + kickstart/centos/build_srpm.data | 3 + kickstart/centos/platform-kickstarts.spec | 64 + mtce-common/LICENSE | 202 + mtce-common/PKG-INFO | 16 + mtce-common/centos/build_srpm.data | 3 + mtce-common/centos/cgts-mtce-common.spec | 768 ++ mtce-common/cgts-mtce-common-1.0/.gitignore | 4 + mtce-common/cgts-mtce-common-1.0/LICENSE | 202 + mtce-common/cgts-mtce-common-1.0/Makefile | 47 + .../cgts-mtce-common-1.0/alarm/Makefile | 44 + .../cgts-mtce-common-1.0/alarm/alarm.cpp | 189 + .../cgts-mtce-common-1.0/alarm/alarm.h | 166 + .../cgts-mtce-common-1.0/alarm/alarmData.cpp | 195 + .../cgts-mtce-common-1.0/alarm/alarmHdlr.cpp | 134 + .../cgts-mtce-common-1.0/alarm/alarmInit.cpp | 305 + .../cgts-mtce-common-1.0/alarm/alarmMgr.cpp | 106 + .../cgts-mtce-common-1.0/alarm/alarmUtil.cpp | 500 + .../alarm/scripts/mtcalarm.init | 108 + .../alarm/scripts/mtcalarm.logrotate | 17 + .../alarm/scripts/mtcalarm.pmon.conf | 17 + .../alarm/scripts/mtcalarm.service | 14 + .../cgts-mtce-common-1.0/common/Makefile | 91 + .../cgts-mtce-common-1.0/common/alarmUtil.cpp | 422 + .../cgts-mtce-common-1.0/common/alarmUtil.h | 142 + .../cgts-mtce-common-1.0/common/fitCodes.h | 170 + .../cgts-mtce-common-1.0/common/fsync.c | 41 + .../cgts-mtce-common-1.0/common/hostClass.cpp | 454 + .../cgts-mtce-common-1.0/common/hostClass.h | 221 + .../cgts-mtce-common-1.0/common/hostUtil.cpp | 209 + .../cgts-mtce-common-1.0/common/hostUtil.h | 85 + .../cgts-mtce-common-1.0/common/httpUtil.cpp | 1065 ++ .../cgts-mtce-common-1.0/common/httpUtil.h | 346 + .../cgts-mtce-common-1.0/common/ipmiUtil.cpp | 247 + .../cgts-mtce-common-1.0/common/ipmiUtil.h | 102 + .../cgts-mtce-common-1.0/common/jsonUtil.cpp | 1260 +++ .../cgts-mtce-common-1.0/common/jsonUtil.h | 160 + .../cgts-mtce-common-1.0/common/keyClass.cpp | 558 ++ .../cgts-mtce-common-1.0/common/keyClass.h | 89 + .../cgts-mtce-common-1.0/common/logMacros.h | 421 + .../cgts-mtce-common-1.0/common/msgClass.cpp | 1094 +++ .../cgts-mtce-common-1.0/common/msgClass.h | 216 + .../cgts-mtce-common-1.0/common/nlEvent.cpp | 311 + .../cgts-mtce-common-1.0/common/nlEvent.h | 24 + .../cgts-mtce-common-1.0/common/nodeBase.cpp | 707 ++ .../cgts-mtce-common-1.0/common/nodeBase.h | 1255 +++ .../cgts-mtce-common-1.0/common/nodeClass.cpp | 8607 +++++++++++++++++ .../cgts-mtce-common-1.0/common/nodeClass.h | 2033 ++++ .../cgts-mtce-common-1.0/common/nodeCmds.h | 91 + .../cgts-mtce-common-1.0/common/nodeEvent.cpp | 348 + .../cgts-mtce-common-1.0/common/nodeEvent.h | 48 + .../cgts-mtce-common-1.0/common/nodeMacro.h | 318 + .../common/nodeTimers.cpp | 604 ++ .../cgts-mtce-common-1.0/common/nodeTimers.h | 179 + .../cgts-mtce-common-1.0/common/nodeUtil.cpp | 1977 ++++ .../cgts-mtce-common-1.0/common/nodeUtil.h | 133 + .../common/pgdbClass.cpp.OBS | 182 + .../common/pgdbClass.h.OBS | 83 + .../common/pgdbUtil.cpp.OBS | 427 + .../cgts-mtce-common-1.0/common/pingUtil.cpp | 832 ++ .../cgts-mtce-common-1.0/common/pingUtil.h | 149 + .../cgts-mtce-common-1.0/common/regexUtil.cpp | 220 + .../cgts-mtce-common-1.0/common/regexUtil.h | 19 + .../cgts-mtce-common-1.0/common/returnCodes.h | 147 + .../common/threadUtil.cpp | 817 ++ .../cgts-mtce-common-1.0/common/threadUtil.h | 288 + .../cgts-mtce-common-1.0/common/timeUtil.cpp | 116 + .../cgts-mtce-common-1.0/common/timeUtil.h | 30 + .../cgts-mtce-common-1.0/common/tokenUtil.cpp | 582 ++ .../cgts-mtce-common-1.0/common/tokenUtil.h | 58 + .../cgts-mtce-common-1.0/daemon/Makefile | 33 + .../daemon/daemon_common.h | 353 + .../daemon/daemon_config.cpp | 355 + .../daemon/daemon_debug.cpp | 376 + .../daemon/daemon_files.cpp | 1456 +++ .../daemon/daemon_ini.cpp | 292 + .../cgts-mtce-common-1.0/daemon/daemon_ini.h | 90 + .../daemon/daemon_main.cpp | 435 + .../daemon/daemon_option.h | 64 + .../daemon/daemon_signal.cpp | 261 + .../cgts-mtce-common-1.0/fsmon/Makefile | 42 + .../cgts-mtce-common-1.0/fsmon/fsmon.h | 79 + .../cgts-mtce-common-1.0/fsmon/fsmonHdlr.cpp | 222 + .../cgts-mtce-common-1.0/fsmon/fsmonInit.cpp | 189 + .../cgts-mtce-common-1.0/fsmon/scripts/fsmon | 91 + .../fsmon/scripts/fsmon.conf | 25 + .../fsmon/scripts/fsmon.logrotate | 15 + .../fsmon/scripts/fsmon.service | 23 + .../fsmon/scripts/fsmond.conf | 20 + .../cgts-mtce-common-1.0/guest/Makefile | 48 + .../cgts-mtce-common-1.0/guest/README.txt | 88 + .../cgts-mtce-common-1.0/guest/guestAgent.cpp | 1434 +++ .../cgts-mtce-common-1.0/guest/guestBase.h | 319 + .../cgts-mtce-common-1.0/guest/guestClass.cpp | 1335 +++ .../cgts-mtce-common-1.0/guest/guestClass.h | 202 + .../guest/guestHttpSvr.cpp | 1092 +++ .../cgts-mtce-common-1.0/guest/guestHttpSvr.h | 27 + .../guest/guestHttpUtil.cpp | 227 + .../guest/guestHttpUtil.h | 32 + .../guest/guestInstClass.cpp | 764 ++ .../guest/guestInstClass.h | 221 + .../guest/guestServer.cpp | 580 ++ .../cgts-mtce-common-1.0/guest/guestStubs.cpp | 18 + .../guest/guestSvrFsm.cpp | 93 + .../guest/guestSvrHdlr.cpp | 1435 +++ .../guest/guestSvrMsg.cpp | 701 ++ .../cgts-mtce-common-1.0/guest/guestSvrMsg.h | 27 + .../guest/guestSvrUtil.cpp | 234 + .../cgts-mtce-common-1.0/guest/guestSvrUtil.h | 18 + .../cgts-mtce-common-1.0/guest/guestUtil.cpp | 330 + .../cgts-mtce-common-1.0/guest/guestUtil.h | 34 + .../guest/guestVimApi.cpp | 777 ++ .../cgts-mtce-common-1.0/guest/guestVimApi.h | 47 + .../guest/guestVirtio.cpp | 373 + .../cgts-mtce-common-1.0/guest/guestVirtio.h | 24 + .../guest/scripts/guest.ini | 28 + .../guest/scripts/guestAgent | 113 + .../guest/scripts/guestAgent.logrotate | 16 + .../guest/scripts/guestAgent.ocf | 442 + .../guest/scripts/guestAgent.service | 16 + .../guest/scripts/guestAgentTest.sh | 160 + .../guest/scripts/guestServer | 113 + .../guest/scripts/guestServer.logrotate | 17 + .../guest/scripts/guestServer.pmon | 25 + .../guest/scripts/guestServer.service | 23 + .../cgts-mtce-common-1.0/heartbeat/Makefile | 51 + .../heartbeat/hbsAgent.cpp | 1808 ++++ .../heartbeat/hbsAlarm.cpp | 67 + .../cgts-mtce-common-1.0/heartbeat/hbsAlarm.h | 32 + .../cgts-mtce-common-1.0/heartbeat/hbsBase.h | 182 + .../heartbeat/hbsClient.cpp | 1454 +++ .../heartbeat/hbsPmon.cpp | 186 + .../heartbeat/hbsStubs.cpp | 370 + .../cgts-mtce-common-1.0/hostw/Makefile | 43 + .../cgts-mtce-common-1.0/hostw/hostw.h | 131 + .../cgts-mtce-common-1.0/hostw/hostwHdlr.cpp | 276 + .../cgts-mtce-common-1.0/hostw/hostwInit.cpp | 335 + .../cgts-mtce-common-1.0/hostw/hostwMsg.cpp | 56 + .../cgts-mtce-common-1.0/hostw/hostwMsg.h | 17 + .../cgts-mtce-common-1.0/hostw/scripts/hostw | 85 + .../hostw/scripts/hostw.logrotate | 16 + .../hostw/scripts/hostw.service | 23 + .../hostw/scripts/hostwd.conf | 12 + .../cgts-mtce-common-1.0/hwmon/Makefile | 57 + .../cgts-mtce-common-1.0/hwmon/hp380.cpp | 0 .../cgts-mtce-common-1.0/hwmon/hwmon.h | 512 + .../cgts-mtce-common-1.0/hwmon/hwmonAlarm.cpp | 552 ++ .../cgts-mtce-common-1.0/hwmon/hwmonAlarm.h | 90 + .../cgts-mtce-common-1.0/hwmon/hwmonClass.cpp | 2432 +++++ .../cgts-mtce-common-1.0/hwmon/hwmonClass.h | 606 ++ .../cgts-mtce-common-1.0/hwmon/hwmonFsm.cpp | 183 + .../cgts-mtce-common-1.0/hwmon/hwmonGroup.cpp | 772 ++ .../cgts-mtce-common-1.0/hwmon/hwmonGroup.h | 45 + .../cgts-mtce-common-1.0/hwmon/hwmonHdlr.cpp | 2573 +++++ .../cgts-mtce-common-1.0/hwmon/hwmonHttp.cpp | 2216 +++++ .../cgts-mtce-common-1.0/hwmon/hwmonHttp.h | 93 + .../hwmon/hwmonHttpClient.cpp | 0 .../cgts-mtce-common-1.0/hwmon/hwmonInit.cpp | 430 + .../cgts-mtce-common-1.0/hwmon/hwmonIpmi.cpp | 775 ++ .../cgts-mtce-common-1.0/hwmon/hwmonIpmi.h | 49 + .../cgts-mtce-common-1.0/hwmon/hwmonJson.cpp | 106 + .../cgts-mtce-common-1.0/hwmon/hwmonJson.h | 23 + .../cgts-mtce-common-1.0/hwmon/hwmonModel.cpp | 554 ++ .../cgts-mtce-common-1.0/hwmon/hwmonModel.h | 16 + .../cgts-mtce-common-1.0/hwmon/hwmonMsg.cpp | 353 + .../cgts-mtce-common-1.0/hwmon/hwmonParse.cpp | 0 .../hwmon/hwmonSensor.cpp | 2281 +++++ .../cgts-mtce-common-1.0/hwmon/hwmonSensor.h | 48 + .../hwmon/hwmonThreads.cpp | 783 ++ .../cgts-mtce-common-1.0/hwmon/hwmonThreads.h | 170 + .../cgts-mtce-common-1.0/hwmon/hwmonUtil.cpp | 714 ++ .../cgts-mtce-common-1.0/hwmon/hwmonUtil.h | 80 + .../hwmon/scripts/hwmon.logrotate | 28 + .../hwmon/scripts/hwmon.service | 16 + .../hwmon/scripts/hwmond.conf | 51 + .../hwmon/scripts/hwmond_notify.py | 19 + .../hwmon/scripts/lsb/hwmon | 91 + .../hwmon/scripts/ocf/hwmon | 497 + .../hwmon/scripts/show_hp360 | 78 + .../hwmon/scripts/show_hp380 | 6301 ++++++++++++ .../hwmon/scripts/show_quanta | 184 + .../cgts-mtce-common-1.0/maintenance/Makefile | 81 + .../maintenance/ipmiClient.h | 297 + .../maintenance/mtcAlarm.cpp | 784 ++ .../maintenance/mtcAlarm.h | 114 + .../maintenance/mtcBrdMgmt.cpp | 0 .../maintenance/mtcBrdMgmt.h | 0 .../maintenance/mtcCmdHdlr.cpp | 826 ++ .../maintenance/mtcCompMsg.cpp | 995 ++ .../maintenance/mtcCtrlMsg.cpp | 1134 +++ .../maintenance/mtcHttpSvr.cpp | 1121 +++ .../maintenance/mtcHttpSvr.h | 29 + .../maintenance/mtcHttpUtil.cpp | 1373 +++ .../maintenance/mtcHttpUtil.h | 87 + .../maintenance/mtcInvApi.cpp | 1652 ++++ .../maintenance/mtcInvApi.h | 125 + .../maintenance/mtcIpmiUtil.cpp | 343 + .../maintenance/mtcIpmiUtil.h | 89 + .../maintenance/mtcKeyApi.cpp | 183 + .../maintenance/mtcKeyApi.h | 25 + .../maintenance/mtcNodeComp.cpp | 1903 ++++ .../maintenance/mtcNodeComp.h | 113 + .../maintenance/mtcNodeCtrl.cpp | 1545 +++ .../maintenance/mtcNodeFsm.cpp | 477 + .../maintenance/mtcNodeFsm.h | 38 + .../maintenance/mtcNodeHdlrs.cpp | 6828 +++++++++++++ .../maintenance/mtcNodeHdlrs.h | 24 + .../maintenance/mtcNodeMnfa.cpp | 354 + .../maintenance/mtcNodeMsg.h | 139 + .../maintenance/mtcSmgrApi.cpp | 364 + .../maintenance/mtcSmgrApi.h | 51 + .../maintenance/mtcSubfHdlrs.cpp | 659 ++ .../maintenance/mtcThreads.cpp | 385 + .../maintenance/mtcThreads.h | 29 + .../maintenance/mtcVimApi.cpp | 263 + .../maintenance/mtcVimApi.h | 49 + .../maintenance/mtcWorkQueue.cpp | 868 ++ .../cgts-mtce-common-1.0/mtclog/Makefile | 43 + .../cgts-mtce-common-1.0/mtclog/mtclog.cpp | 338 + .../cgts-mtce-common-1.0/pmon/Makefile | 42 + mtce-common/cgts-mtce-common-1.0/pmon/pmon.h | 655 ++ .../cgts-mtce-common-1.0/pmon/pmonAlarm.cpp | 365 + .../cgts-mtce-common-1.0/pmon/pmonAlarm.h | 64 + .../cgts-mtce-common-1.0/pmon/pmonFsm.cpp | 1157 +++ .../cgts-mtce-common-1.0/pmon/pmonHdlr.cpp | 2070 ++++ .../cgts-mtce-common-1.0/pmon/pmonInit.cpp | 581 ++ .../cgts-mtce-common-1.0/pmon/pmonMsg.cpp | 788 ++ .../pmon/scripts/acpid.conf | 17 + .../pmon/scripts/ntpd.conf | 20 + .../cgts-mtce-common-1.0/pmon/scripts/pmon | 85 + .../pmon/scripts/pmon-restart | 47 + .../pmon/scripts/pmon-start | 47 + .../pmon/scripts/pmon-stop | 47 + .../pmon/scripts/pmon-test.sh | 236 + .../pmon/scripts/pmon.logrotate | 16 + .../pmon/scripts/pmon.service | 24 + .../pmon/scripts/pmond.conf | 38 + .../pmon/scripts/sshd.conf | 17 + .../pmon/scripts/syslog-ng.conf | 17 + .../cgts-mtce-common-1.0/public/Makefile | 51 + .../cgts-mtce-common-1.0/public/amon.c | 303 + .../cgts-mtce-common-1.0/public/amon.h | 130 + .../cgts-mtce-common-1.0/rmon/Makefile | 40 + .../rmon/interfaceHdlr.cpp | 1183 +++ .../rmon/remoteloggingHdlr.cpp | 376 + mtce-common/cgts-mtce-common-1.0/rmon/rmon.h | 852 ++ .../cgts-mtce-common-1.0/rmon/rmonApi.h | 50 + .../rmon/rmonApi/Makefile | 51 + .../rmon/rmonApi/rmon_api.c | 475 + .../rmon/rmonApi/rmon_api.h | 142 + .../rmon/rmonApi/rmon_nodeMacro.h | 76 + .../rmon/rmonApi/tests/Makefile | 22 + .../rmonApi/tests/rmon_api_client_test.cpp | 262 + .../rmon/rmonApi/tests/rmon_api_client_test.h | 109 + .../rmon/rmonApi/tests/rmond_api_test | Bin 0 -> 63446 bytes .../cgts-mtce-common-1.0/rmon/rmonFM.cpp | 203 + .../cgts-mtce-common-1.0/rmon/rmonHdlr.cpp | 4914 ++++++++++ .../cgts-mtce-common-1.0/rmon/rmonHttp.cpp | 674 ++ .../cgts-mtce-common-1.0/rmon/rmonHttp.h | 161 + .../cgts-mtce-common-1.0/rmon/rmonInit.cpp | 616 ++ .../cgts-mtce-common-1.0/rmon/rmonMsg.cpp | 777 ++ .../rmon/rmon_resource_notify/Makefile | 32 + .../rmon_resource_notify.cpp | 550 ++ .../rmon_resource_notify.h | 106 + .../rmon/scripts/cinder_virtual_resource.conf | 27 + .../rmon/scripts/cpu_resource.conf | 16 + .../rmon/scripts/filesystem_resource.conf | 16 + .../rmon/scripts/infrastructure_resource.conf | 6 + .../rmon/scripts/instance_resource.conf | 16 + .../rmon/scripts/management_resource.conf | 6 + .../rmon/scripts/memory_resource.conf | 16 + .../rmon/scripts/nova_virtual_resource.conf | 16 + .../rmon/scripts/oam_resource.conf | 6 + .../rmon/scripts/query_ntp_servers.sh | 179 + .../rmon/scripts/remotelogging_resource.conf | 6 + .../cgts-mtce-common-1.0/rmon/scripts/rmon | 97 + .../rmon/scripts/rmon.conf | 21 + .../rmon/scripts/rmon.logrotate | 43 + .../rmon/scripts/rmon.service | 23 + .../rmon/scripts/rmon_reload_on_cpe.sh | 47 + .../rmon/scripts/rmond.conf | 34 + .../rmon/scripts/virtual_resource.conf | 16 + .../rmon/thinmetaHdlr.cpp | 396 + .../cgts-mtce-common-1.0/scripts/config | 37 + .../scripts/config.service | 13 + .../cgts-mtce-common-1.0/scripts/dmemchk.sh | 358 + .../cgts-mtce-common-1.0/scripts/goenabled | 186 + .../scripts/goenabled.service | 13 + .../cgts-mtce-common-1.0/scripts/hbsAgent | 555 ++ .../cgts-mtce-common-1.0/scripts/hbsClient | 105 + .../scripts/hbsClient.conf | 30 + .../scripts/hbsClient.service | 23 + .../scripts/hwclock.service | 13 + .../cgts-mtce-common-1.0/scripts/hwclock.sh | 92 + .../cgts-mtce-common-1.0/scripts/mgmtlinkup | 78 + .../cgts-mtce-common-1.0/scripts/mtc.conf | 170 + .../cgts-mtce-common-1.0/scripts/mtc.ini | 14 + .../cgts-mtce-common-1.0/scripts/mtcAgent | 521 + .../cgts-mtce-common-1.0/scripts/mtcClient | 118 + .../scripts/mtcClient.conf | 26 + .../scripts/mtcClient.service | 23 + .../cgts-mtce-common-1.0/scripts/mtcTest | 38 + .../scripts/mtce.logrotate | 59 + .../cgts-mtce-common-1.0/scripts/mtcinit | 129 + .../cgts-mtce-common-1.0/scripts/mtclog | 94 + .../scripts/mtclog.service | 23 + .../cgts-mtce-common-1.0/scripts/mtclogd.conf | 17 + .../cgts-mtce-common-1.0/scripts/runservices | 189 + .../scripts/runservices.service | 12 + .../cgts-mtce-common-1.0/scripts/sched_trace | 54 + .../scripts/sensor_hp360_v1_ilo_v4.profile | 267 + .../scripts/sensor_hp380_v1_ilo_v4.profile | 265 + .../scripts/sensor_integration_profile.README | 456 + .../scripts/sensor_quanta_v1_ilo_v4.profile | 312 + .../cgts-mtce-common-1.0/scripts/store_trace | 59 + .../scripts/stress_ras.sh | 73 + .../scripts/stress_swact.sh | 39 + .../cgts-mtce-common-1.0/scripts/wipedisk | 118 + mtce-compute/LICENSE | 202 + mtce-compute/PKG-INFO | 12 + mtce-compute/centos/build_srpm.data | 3 + mtce-compute/centos/cgts-mtce-compute.spec | 94 + .../cgts-mtce-compute-1.0/scripts/LICENSE | 202 + .../cgts-mtce-compute-1.0/scripts/e_nova-init | 159 + .../scripts/e_nova-init.service | 13 + .../cgts-mtce-compute-1.0/scripts/goenabled | 187 + .../scripts/goenabled-compute.service | 12 + .../scripts/libvirtd.conf | 25 + .../scripts/nova-cleanup | 200 + .../scripts/nova-cleanup.conf | 25 + .../scripts/nova-compute.conf | 25 + .../scripts/nova-goenabled.sh | 80 + .../scripts/nova-init.service | 13 + .../scripts/nova-startup | 111 + .../scripts/virt-support-goenabled.sh | 58 + mtce-control/LICENSE | 202 + mtce-control/PKG-INFO | 12 + mtce-control/centos/build_srpm.data | 4 + mtce-control/centos/cgts-mtce-control.spec | 58 + .../cgts-mtce-control-1.0/scripts/LICENSE | 202 + .../cgts-mtce-control-1.0/scripts/goenabled | 52 + mtce-storage/LICENSE | 202 + mtce-storage/PKG-INFO | 12 + mtce-storage/centos/build_srpm.data | 4 + mtce-storage/centos/cgts-mtce-storage.spec | 61 + .../cgts-mtce-storage-1.0/scripts/LICENSE | 202 + .../cgts-mtce-storage-1.0/scripts/goenabled | 175 + .../scripts/goenabled-storage.service | 13 + mwa-beas.map | 7 + 417 files changed, 131695 insertions(+) create mode 100644 CONTRIBUTORS.wrs create mode 100644 LICENSE create mode 100644 README.rst create mode 100755 bsp-files/centos-ks-gen.pl create mode 100644 bsp-files/centos.syslinux.cfg create mode 100644 bsp-files/filter_out_from_compute create mode 100644 bsp-files/filter_out_from_compute_lowlatency create mode 100644 bsp-files/filter_out_from_controller create mode 100644 bsp-files/filter_out_from_smallsystem create mode 100644 bsp-files/filter_out_from_smallsystem_lowlatency create mode 100644 bsp-files/filter_out_from_storage create mode 100644 bsp-files/grub.cfg create mode 100644 bsp-files/kickstarts/functions.sh create mode 100644 bsp-files/kickstarts/post_clone_iso_ks.cfg create mode 100644 bsp-files/kickstarts/post_common.cfg create mode 100644 bsp-files/kickstarts/post_kernel_aio_and_compute.cfg create mode 100644 bsp-files/kickstarts/post_kernel_controller.cfg create mode 100644 bsp-files/kickstarts/post_kernel_storage.cfg create mode 100644 bsp-files/kickstarts/post_lvm_no_pv_on_rootfs.cfg create mode 100644 bsp-files/kickstarts/post_lvm_pv_on_rootfs.cfg create mode 100755 bsp-files/kickstarts/post_net_common.cfg create mode 100644 bsp-files/kickstarts/post_net_controller.cfg create mode 100644 bsp-files/kickstarts/post_platform_conf_aio.cfg create mode 100644 bsp-files/kickstarts/post_platform_conf_aio_lowlatency.cfg create mode 100644 bsp-files/kickstarts/post_platform_conf_compute.cfg create mode 100644 bsp-files/kickstarts/post_platform_conf_compute_lowlatency.cfg create mode 100644 bsp-files/kickstarts/post_platform_conf_controller.cfg create mode 100644 bsp-files/kickstarts/post_platform_conf_storage.cfg create mode 100644 bsp-files/kickstarts/post_pxeboot_controller.cfg create mode 100644 bsp-files/kickstarts/post_system_aio.cfg create mode 100644 bsp-files/kickstarts/post_usb_controller.cfg create mode 100644 bsp-files/kickstarts/post_yow_controller.cfg create mode 100644 bsp-files/kickstarts/pre_common_head.cfg create mode 100755 bsp-files/kickstarts/pre_disk_aio.cfg create mode 100755 bsp-files/kickstarts/pre_disk_compute.cfg create mode 100755 bsp-files/kickstarts/pre_disk_controller.cfg create mode 100644 bsp-files/kickstarts/pre_disk_setup_common.cfg create mode 100755 bsp-files/kickstarts/pre_disk_storage.cfg create mode 100644 bsp-files/kickstarts/pre_pkglist.cfg create mode 100644 bsp-files/kickstarts/pre_pkglist_lowlatency.cfg create mode 100755 bsp-files/pkg-list.pl create mode 100644 bsp-files/platform_comps.py create mode 100644 bsp-files/pxeboot.cfg create mode 100644 bsp-files/pxeboot_grub.cfg create mode 100755 bsp-files/pxeboot_setup.sh create mode 100644 bsp-files/upgrades/import.sh create mode 100644 bsp-files/upgrades/metadata.xml create mode 100644 installer/initrd/README create mode 100644 installer/pxe-network-installer/centos/build_srpm.data create mode 100644 installer/pxe-network-installer/centos/pxe-network-installer.spec create mode 100644 installer/pxe-network-installer/pxe-network-installer/LICENSE create mode 100644 installer/pxe-network-installer/pxe-network-installer/centos-pxe-compute-install create mode 100644 installer/pxe-network-installer/pxe-network-installer/centos-pxe-compute_lowlatency-install create mode 100644 installer/pxe-network-installer/pxe-network-installer/centos-pxe-controller-install create mode 100644 installer/pxe-network-installer/pxe-network-installer/centos-pxe-smallsystem-install create mode 100644 installer/pxe-network-installer/pxe-network-installer/centos-pxe-smallsystem_lowlatency-install create mode 100644 installer/pxe-network-installer/pxe-network-installer/centos-pxe-storage-install create mode 100644 installer/pxe-network-installer/pxe-network-installer/default create mode 100644 installer/pxe-network-installer/pxe-network-installer/default.static create mode 100755 installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-compute-install create mode 100755 installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-compute_lowlatency-install create mode 100755 installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-controller-install create mode 100755 installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-smallsystem-install create mode 100755 installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-smallsystem_lowlatency-install create mode 100755 installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-storage-install create mode 100755 installer/pxe-network-installer/pxe-network-installer/pxe-grub.cfg create mode 100755 installer/pxe-network-installer/pxe-network-installer/pxe-grub.cfg.static create mode 100755 installer/pxe-network-installer/pxe-network-installer/pxeboot-update.sh create mode 100755 kickstart/LICENSE create mode 100644 kickstart/centos/build_srpm.data create mode 100644 kickstart/centos/platform-kickstarts.spec create mode 100644 mtce-common/LICENSE create mode 100644 mtce-common/PKG-INFO create mode 100644 mtce-common/centos/build_srpm.data create mode 100644 mtce-common/centos/cgts-mtce-common.spec create mode 100644 mtce-common/cgts-mtce-common-1.0/.gitignore create mode 100644 mtce-common/cgts-mtce-common-1.0/LICENSE create mode 100755 mtce-common/cgts-mtce-common-1.0/Makefile create mode 100755 mtce-common/cgts-mtce-common-1.0/alarm/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/alarm/alarm.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/alarm/alarm.h create mode 100644 mtce-common/cgts-mtce-common-1.0/alarm/alarmData.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/alarm/alarmHdlr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/alarm/alarmInit.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/alarm/alarmMgr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/alarm/alarmUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.init create mode 100644 mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.logrotate create mode 100644 mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.pmon.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.service create mode 100755 mtce-common/cgts-mtce-common-1.0/common/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/common/alarmUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/alarmUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/fitCodes.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/fsync.c create mode 100644 mtce-common/cgts-mtce-common-1.0/common/hostClass.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/hostClass.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/hostUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/hostUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/httpUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/httpUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/ipmiUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/ipmiUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/jsonUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/jsonUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/keyClass.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/keyClass.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/logMacros.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/msgClass.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/msgClass.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/nlEvent.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/nlEvent.h create mode 100755 mtce-common/cgts-mtce-common-1.0/common/nodeBase.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/common/nodeBase.h create mode 100755 mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/common/nodeClass.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/nodeCmds.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/nodeEvent.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/nodeEvent.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/nodeMacro.h create mode 100755 mtce-common/cgts-mtce-common-1.0/common/nodeTimers.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/common/nodeTimers.h create mode 100755 mtce-common/cgts-mtce-common-1.0/common/nodeUtil.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/common/nodeUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/pgdbClass.cpp.OBS create mode 100644 mtce-common/cgts-mtce-common-1.0/common/pgdbClass.h.OBS create mode 100644 mtce-common/cgts-mtce-common-1.0/common/pgdbUtil.cpp.OBS create mode 100644 mtce-common/cgts-mtce-common-1.0/common/pingUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/pingUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/regexUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/regexUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/returnCodes.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/threadUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/threadUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/timeUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/timeUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/common/tokenUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/common/tokenUtil.h create mode 100755 mtce-common/cgts-mtce-common-1.0/daemon/Makefile create mode 100755 mtce-common/cgts-mtce-common-1.0/daemon/daemon_common.h create mode 100644 mtce-common/cgts-mtce-common-1.0/daemon/daemon_config.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/daemon/daemon_debug.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/daemon/daemon_files.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/daemon/daemon_ini.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/daemon/daemon_ini.h create mode 100755 mtce-common/cgts-mtce-common-1.0/daemon/daemon_main.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/daemon/daemon_option.h create mode 100755 mtce-common/cgts-mtce-common-1.0/daemon/daemon_signal.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/fsmon/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/fsmon/fsmon.h create mode 100644 mtce-common/cgts-mtce-common-1.0/fsmon/fsmonHdlr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/fsmon/fsmonInit.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon create mode 100644 mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.logrotate create mode 100644 mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.service create mode 100644 mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmond.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/README.txt create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestAgent.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestBase.h create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestClass.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestClass.h create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestHttpSvr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestHttpSvr.h create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestHttpUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestHttpUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestInstClass.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestInstClass.h create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestServer.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestStubs.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestSvrFsm.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestSvrHdlr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestSvrMsg.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestSvrMsg.h create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestSvrUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestSvrUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestVimApi.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestVimApi.h create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestVirtio.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/guestVirtio.h create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/scripts/guest.ini create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.logrotate create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.ocf create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.service create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgentTest.sh create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.logrotate create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.pmon create mode 100644 mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.service create mode 100755 mtce-common/cgts-mtce-common-1.0/heartbeat/Makefile create mode 100755 mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAgent.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAlarm.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAlarm.h create mode 100755 mtce-common/cgts-mtce-common-1.0/heartbeat/hbsBase.h create mode 100755 mtce-common/cgts-mtce-common-1.0/heartbeat/hbsClient.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/heartbeat/hbsPmon.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/heartbeat/hbsStubs.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/hostw/Makefile create mode 100755 mtce-common/cgts-mtce-common-1.0/hostw/hostw.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hostw/hostwHdlr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hostw/hostwInit.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hostw/hostwMsg.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hostw/hostwMsg.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw create mode 100644 mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw.logrotate create mode 100644 mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw.service create mode 100755 mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostwd.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hp380.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmon.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonAlarm.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonAlarm.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonClass.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonClass.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonFsm.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonGroup.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonGroup.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHdlr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHttp.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHttp.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHttpClient.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonInit.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonIpmi.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonIpmi.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonJson.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonJson.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonModel.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonModel.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonMsg.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonParse.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonSensor.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonSensor.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonThreads.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonThreads.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/hwmonUtil.h create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmon.logrotate create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmon.service create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmond.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmond_notify.py create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/scripts/lsb/hwmon create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/scripts/ocf/hwmon create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_hp360 create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_hp380 create mode 100644 mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_quanta create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/ipmiClient.h create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcAlarm.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcAlarm.h create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcBrdMgmt.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcBrdMgmt.h create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcCmdHdlr.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcCompMsg.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcCtrlMsg.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpSvr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpSvr.h create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpUtil.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpUtil.h create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcInvApi.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcInvApi.h create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcIpmiUtil.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcIpmiUtil.h create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcKeyApi.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcKeyApi.h create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeComp.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeComp.h create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeCtrl.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.h create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.h create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeMnfa.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeMsg.h create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcSmgrApi.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcSmgrApi.h create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcSubfHdlrs.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcThreads.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcThreads.h create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcVimApi.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcVimApi.h create mode 100644 mtce-common/cgts-mtce-common-1.0/maintenance/mtcWorkQueue.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/mtclog/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/mtclog/mtclog.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/pmon/Makefile create mode 100755 mtce-common/cgts-mtce-common-1.0/pmon/pmon.h create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/pmonAlarm.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/pmonAlarm.h create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/pmonFsm.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/pmonInit.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/scripts/acpid.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/scripts/ntpd.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon create mode 100755 mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-restart create mode 100755 mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-start create mode 100755 mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-stop create mode 100755 mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-test.sh create mode 100755 mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon.logrotate create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon.service create mode 100755 mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmond.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/scripts/sshd.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/pmon/scripts/syslog-ng.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/public/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/public/amon.c create mode 100644 mtce-common/cgts-mtce-common-1.0/public/amon.h create mode 100755 mtce-common/cgts-mtce-common-1.0/rmon/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/interfaceHdlr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/remoteloggingHdlr.cpp create mode 100755 mtce-common/cgts-mtce-common-1.0/rmon/rmon.h create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonApi.h create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_api.c create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_api.h create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_nodeMacro.h create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/rmon_api_client_test.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/rmon_api_client_test.h create mode 100755 mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/rmond_api_test create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonFM.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonHdlr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonHttp.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonHttp.h create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonInit.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmonMsg.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/Makefile create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/rmon_resource_notify.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/rmon_resource_notify.h create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/cinder_virtual_resource.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/cpu_resource.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/filesystem_resource.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/infrastructure_resource.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/instance_resource.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/management_resource.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/memory_resource.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/nova_virtual_resource.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/oam_resource.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/query_ntp_servers.sh create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/remotelogging_resource.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.conf create mode 100755 mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.logrotate create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.service create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon_reload_on_cpe.sh create mode 100755 mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmond.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/scripts/virtual_resource.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/rmon/thinmetaHdlr.cpp create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/config create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/config.service create mode 100755 mtce-common/cgts-mtce-common-1.0/scripts/dmemchk.sh create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/goenabled create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/goenabled.service create mode 100755 mtce-common/cgts-mtce-common-1.0/scripts/hbsAgent create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/hbsClient create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/hbsClient.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/hbsClient.service create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/hwclock.service create mode 100755 mtce-common/cgts-mtce-common-1.0/scripts/hwclock.sh create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/mgmtlinkup create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/mtc.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/mtc.ini create mode 100755 mtce-common/cgts-mtce-common-1.0/scripts/mtcAgent create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/mtcClient create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/mtcClient.conf create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/mtcClient.service create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/mtcTest create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/mtce.logrotate create mode 100755 mtce-common/cgts-mtce-common-1.0/scripts/mtcinit create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/mtclog create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/mtclog.service create mode 100755 mtce-common/cgts-mtce-common-1.0/scripts/mtclogd.conf create mode 100755 mtce-common/cgts-mtce-common-1.0/scripts/runservices create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/runservices.service create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/sched_trace create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/sensor_hp360_v1_ilo_v4.profile create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/sensor_hp380_v1_ilo_v4.profile create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/sensor_integration_profile.README create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/sensor_quanta_v1_ilo_v4.profile create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/store_trace create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/stress_ras.sh create mode 100644 mtce-common/cgts-mtce-common-1.0/scripts/stress_swact.sh create mode 100755 mtce-common/cgts-mtce-common-1.0/scripts/wipedisk create mode 100644 mtce-compute/LICENSE create mode 100644 mtce-compute/PKG-INFO create mode 100644 mtce-compute/centos/build_srpm.data create mode 100644 mtce-compute/centos/cgts-mtce-compute.spec create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/LICENSE create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/e_nova-init create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/e_nova-init.service create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/goenabled create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/goenabled-compute.service create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/libvirtd.conf create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/nova-cleanup create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/nova-cleanup.conf create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/nova-compute.conf create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/nova-goenabled.sh create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/nova-init.service create mode 100644 mtce-compute/cgts-mtce-compute-1.0/scripts/nova-startup create mode 100755 mtce-compute/cgts-mtce-compute-1.0/scripts/virt-support-goenabled.sh create mode 100644 mtce-control/LICENSE create mode 100644 mtce-control/PKG-INFO create mode 100644 mtce-control/centos/build_srpm.data create mode 100644 mtce-control/centos/cgts-mtce-control.spec create mode 100644 mtce-control/cgts-mtce-control-1.0/scripts/LICENSE create mode 100644 mtce-control/cgts-mtce-control-1.0/scripts/goenabled create mode 100644 mtce-storage/LICENSE create mode 100644 mtce-storage/PKG-INFO create mode 100644 mtce-storage/centos/build_srpm.data create mode 100644 mtce-storage/centos/cgts-mtce-storage.spec create mode 100644 mtce-storage/cgts-mtce-storage-1.0/scripts/LICENSE create mode 100644 mtce-storage/cgts-mtce-storage-1.0/scripts/goenabled create mode 100644 mtce-storage/cgts-mtce-storage-1.0/scripts/goenabled-storage.service create mode 100644 mwa-beas.map diff --git a/CONTRIBUTORS.wrs b/CONTRIBUTORS.wrs new file mode 100644 index 00000000..c9eca717 --- /dev/null +++ b/CONTRIBUTORS.wrs @@ -0,0 +1,8 @@ +The following contributors from Wind River have developed the seed code in this +repository. We look forward to community collaboration and contributions for +additional features, enhancements and refactoring. + +Contributors: +============= +Eric Macdonald +Don Penney diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..433e7e0f --- /dev/null +++ b/README.rst @@ -0,0 +1,5 @@ +========= +stx-metal +========= + +StarlingX Bare Metal Management diff --git a/bsp-files/centos-ks-gen.pl b/bsp-files/centos-ks-gen.pl new file mode 100755 index 00000000..3e95683f --- /dev/null +++ b/bsp-files/centos-ks-gen.pl @@ -0,0 +1,297 @@ +#!/usr/bin/perl +# +# Copyright (c) 2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +use strict; +use Getopt::Long; +use POSIX qw(strftime); + +# Defines the current list of YOW boot servers +my %boot_servers = ("yow-tuxlab", "128.224.150.9", + "yow-tuxlab2", "128.224.151.254", + "yow", "128.224.150.9"); # obsolete; kept for backwards compatibility + +my $PLATFORM_RELEASE; +my $files_dir; +my $output_dir = 'generated'; +my $pxeboot_output_dir = 'pxeboot'; +my $extra_output_dir = 'extra_cfgs'; + +GetOptions("release=s" => \$PLATFORM_RELEASE, + "basedir=s" => \$files_dir); + +die "Please specify release with --release" if (!$PLATFORM_RELEASE); +if (!$files_dir) +{ + $files_dir = '.'; +} + +my $BOOT_SERVER = "none"; + +my $template_dir = "$files_dir/kickstarts"; + +system("mkdir -p ${output_dir}"); + +# Write USB image files +write_config_file("controller", + "${output_dir}/controller_ks.cfg", "filter_out_from_controller", + "pre_common_head.cfg", + "pre_pkglist.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_controller.cfg", + "post_platform_conf_controller.cfg", + "post_common.cfg", + "post_kernel_controller.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_usb_controller.cfg"); +write_config_file("controller-compute", + "${output_dir}/smallsystem_ks.cfg", "filter_out_from_smallsystem", + "pre_common_head.cfg", + "pre_pkglist.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_aio.cfg", + "post_platform_conf_aio.cfg", + "post_common.cfg", + "post_kernel_aio_and_compute.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_system_aio.cfg", + "post_usb_controller.cfg"); +write_config_file("controller-compute-lowlatency", + "${output_dir}/smallsystem_lowlatency_ks.cfg", "filter_out_from_smallsystem_lowlatency", + "pre_common_head.cfg", + "pre_pkglist_lowlatency.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_aio.cfg", + "post_platform_conf_aio_lowlatency.cfg", + "post_common.cfg", + "post_kernel_aio_and_compute.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_system_aio.cfg", + "post_usb_controller.cfg"); + +system("mkdir -p ${pxeboot_output_dir}"); + +# Write PXE boot files +write_config_file("controller", + "${pxeboot_output_dir}/pxeboot_controller.cfg", "filter_out_from_controller", + "pre_common_head.cfg", + "pre_pkglist.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_controller.cfg", + "post_platform_conf_controller.cfg", + "post_common.cfg", + "post_kernel_controller.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_pxeboot_controller.cfg"); +write_config_file("controller-compute", + "${pxeboot_output_dir}/pxeboot_smallsystem.cfg", "filter_out_from_smallsystem", + "pre_common_head.cfg", + "pre_pkglist.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_aio.cfg", + "post_platform_conf_aio.cfg", + "post_common.cfg", + "post_kernel_aio_and_compute.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_system_aio.cfg", + "post_pxeboot_controller.cfg"); +write_config_file("controller-compute-lowlatency", + "${pxeboot_output_dir}/pxeboot_smallsystem_lowlatency.cfg", "filter_out_from_smallsystem_lowlatency", + "pre_common_head.cfg", + "pre_pkglist_lowlatency.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_aio.cfg", + "post_platform_conf_aio_lowlatency.cfg", + "post_common.cfg", + "post_kernel_aio_and_compute.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_system_aio.cfg", + "post_pxeboot_controller.cfg"); + + +# Write same net files +write_config_file("controller", + "${output_dir}/net_controller_ks.cfg", "filter_out_from_controller", + "pre_common_head.cfg", + "pre_pkglist.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_controller.cfg", + "post_platform_conf_controller.cfg", + "post_common.cfg", + "post_kernel_controller.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_net_controller.cfg", + "post_net_common.cfg"); +write_config_file("controller-compute", + "${output_dir}/net_smallsystem_ks.cfg", "filter_out_from_smallsystem", + "pre_common_head.cfg", + "pre_pkglist.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_aio.cfg", + "post_platform_conf_aio.cfg", + "post_common.cfg", + "post_kernel_aio_and_compute.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_system_aio.cfg", + "post_net_controller.cfg", + "post_net_common.cfg"); +write_config_file("controller-compute-lowlatency", + "${output_dir}/net_smallsystem_lowlatency_ks.cfg", "filter_out_from_smallsystem_lowlatency", + "pre_common_head.cfg", + "pre_pkglist_lowlatency.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_aio.cfg", + "post_platform_conf_aio_lowlatency.cfg", + "post_common.cfg", + "post_kernel_aio_and_compute.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_system_aio.cfg", + "post_net_controller.cfg", + "post_net_common.cfg"); +write_config_file("compute", + "${output_dir}/net_compute_ks.cfg", "filter_out_from_compute", + "pre_common_head.cfg", + "pre_pkglist.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_compute.cfg", + "post_platform_conf_compute.cfg", + "post_common.cfg", + "post_kernel_aio_and_compute.cfg", + "post_lvm_no_pv_on_rootfs.cfg", + "post_net_common.cfg"); +write_config_file("compute-lowlatency", + "${output_dir}/net_compute_lowlatency_ks.cfg", "filter_out_from_compute_lowlatency", + "pre_common_head.cfg", + "pre_pkglist_lowlatency.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_compute.cfg", + "post_platform_conf_compute_lowlatency.cfg", + "post_common.cfg", + "post_kernel_aio_and_compute.cfg", + "post_lvm_no_pv_on_rootfs.cfg", + "post_net_common.cfg"); +write_config_file("storage", + "${output_dir}/net_storage_ks.cfg", "filter_out_from_storage", + "pre_common_head.cfg", + "pre_pkglist.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_storage.cfg", + "post_platform_conf_storage.cfg", + "post_common.cfg", + "post_kernel_storage.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_net_common.cfg"); + +system("mkdir -p ${extra_output_dir}"); + +# write Ottawa Lab files +my $server; +foreach $server (keys %boot_servers) +{ + $BOOT_SERVER = $boot_servers{$server}; + + write_config_file("controller", + "${extra_output_dir}/${server}_controller.cfg", "filter_out_from_controller", + "pre_common_head.cfg", + "pre_pkglist.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_controller.cfg", + "post_platform_conf_controller.cfg", + "post_common.cfg", + "post_kernel_controller.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_yow_controller.cfg"); + write_config_file("controller-compute", + "${extra_output_dir}/${server}_smallsystem.cfg", "filter_out_from_smallsystem", + "pre_common_head.cfg", + "pre_pkglist.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_aio.cfg", + "post_platform_conf_aio.cfg", + "post_common.cfg", + "post_kernel_aio_and_compute.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_system_aio.cfg", + "post_yow_controller.cfg"); + write_config_file("controller-compute-lowlatency", + "${extra_output_dir}/${server}_smallsystem_lowlatency.cfg", "filter_out_from_smallsystem_lowlatency", + "pre_common_head.cfg", + "pre_pkglist_lowlatency.cfg", + "pre_disk_setup_common.cfg", + "pre_disk_aio.cfg", + "post_platform_conf_aio_lowlatency.cfg", + "post_common.cfg", + "post_kernel_aio_and_compute.cfg", + "post_lvm_pv_on_rootfs.cfg", + "post_system_aio.cfg", + "post_yow_controller.cfg"); +} + +exit 0; + +#------------------------# + +sub write_config_file { + my ($personality, $ksout, $filter_file, @templates) = @_; + my %filter; + if ($filter_file ne "") { + if (!(open(FILTER, "$files_dir/$filter_file"))) { + die "Could not open template $files_dir/$filter_file"; + } + while () { + chop(); + next if ($_ =~ /^#/); + $filter{$_} = 1; + } + close(FILTER); + } + print "Writing: $ksout\n"; + open(OUT, ">$ksout") || die "Could not write $ksout"; + + my $year = strftime "%Y", localtime; + print OUT "#\n"; + print OUT "# Copyright (c) $year Wind River Systems, Inc.\n"; + print OUT "# SPDX-License-Identifier: Apache-2.0\n"; + print OUT "#\n"; + print OUT "\n"; + + # Add functions header + foreach my $block ("\%pre", "\%post") { + if (!(open(FUNCTIONS, "$template_dir/functions.sh"))) { + die "Could not open functions.sh"; + } + print OUT "$block\n"; + while () { + s/xxxPLATFORM_RELEASExxx/$PLATFORM_RELEASE/g; + s/xxxBOOT_SERVERxxx/$BOOT_SERVER/g; + s/xxxYEARxxx/$year/g; + print OUT $_; + } + print OUT "\%end\n\n"; + close FUNCTIONS; + } + + my $template; + foreach $template (@templates) { + if (!(open(TEMPLATE_IN, "$template_dir/$template"))) { + die "Could not open template $template_dir/$template"; + } + print OUT "\n# Template from: $template\n"; + while () { + $_ =~ s/\n$//; + s/xxxPLATFORM_RELEASExxx/$PLATFORM_RELEASE/g; + s/xxxBOOT_SERVERxxx/$BOOT_SERVER/g; + s/xxxYEARxxx/$year/g; + + s/xxxPACKAGE_LISTxxx/\@platform-$personality\n\@updates-$personality/; + + print OUT "$_\n"; + } + close(TEMPLATE_IN); + } + + close(OUT); +} diff --git a/bsp-files/centos.syslinux.cfg b/bsp-files/centos.syslinux.cfg new file mode 100644 index 00000000..f6b05695 --- /dev/null +++ b/bsp-files/centos.syslinux.cfg @@ -0,0 +1,188 @@ +display splash.cfg +timeout 0 +F1 help.txt +F2 devices.txt +F3 splash.cfg +serial 0 115200 + +# Pull in the menu User Interface +ui vesamenu.c32 + +menu title Select kernel options and boot kernel +menu tabmsg Press [Tab] to edit, [Return] to select, [ESC] to return to previous menu + +# Dark grey +menu background #ff555555 + +# ----------------- NOTE ----------------- +# If you are updating label numbers, make sure that controllerconfig/clone.py +# is in sync with your changes (only serial console entries). +# STANDARD_STANDARD = '0' +# STANDARD_EXTENDED = 'S0' +# AIO_STANDARD = '2' +# AIO_EXTENDED = 'S2' +# AIO_LL_STANDARD = '4' +# AIO_LL_EXTENDED = 'S4' +# ---------------------------------------- + + +# Standard Controller menu +menu begin + menu title Standard Controller Configuration + # Serial Console submenu + menu begin + menu title Serial Console + label 0 + menu label STANDARD Security Boot Profile + text help + Standard Controller, console=ttyS0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + initrd initrd.img + append rootwait console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=standard + + label S0 + menu label EXTENDED Security Boot Profile + text help + Standard Controller, console=ttyS0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + initrd initrd.img + append rootwait console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=extended + menu end + + # Graphical Console submenu + menu begin + menu title Graphical Console + label 1 + menu label STANDARD Security Boot Profile + text help + Standard Controller, console=tty0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + initrd initrd.img + append rootwait console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=standard + + label S1 + menu label EXTENDED Security Boot Profile + text help + Standard Controller, console=tty0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + initrd initrd.img + append rootwait console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=extended + menu end +menu end + +menu SEPARATOR + +# AIO Controller menu +menu begin + menu title All-in-one Controller Configuration + # Serial Console submenu + menu begin + menu title Serial Console + label 2 + menu label STANDARD Security Boot Profile + text help + All-in-one Controller, console=ttyS0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + initrd initrd.img + append rootwait console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=standard + + label S2 + menu label EXTENDED Security Boot Profile + text help + All-in-one Controller, console=ttyS0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + initrd initrd.img + # Security profile option + append rootwait console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=extended + menu end + + # Graphical Console submenu + menu begin + menu title Graphical Console + label 3 + menu label STANDARD Security Boot Profile + text help + All-in-one Controller, console=tty0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + initrd initrd.img + append rootwait console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=standard + + label S3 + menu label EXTENDED Security Boot Profile + text help + All-in-one Controller, console=tty0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + initrd initrd.img + append rootwait console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=extended + menu end +menu end + +menu SEPARATOR + +# AIO (Low Latency) Controller menu +menu begin + menu title All-in-one (lowlatency) Controller Configuration + # Serial Console submenu + menu begin + menu title Serial Console + label 4 + menu label STANDARD Security Boot Profile + text help + All-in-one (lowlatency) Controller, console=ttyS0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + initrd initrd.img + append rootwait console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_lowlatency_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=standard + + label S4 + menu label EXTENDED Security Boot Profile + text help + All-in-one (lowlatency) Controller, console=ttyS0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + initrd initrd.img + append rootwait console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_lowlatency_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=extended + menu end + + # Graphical Console submenu + menu begin + menu title Graphical Console + label 5 + menu label STANDARD Security Boot Profile + text help + All-in-one (lowlatency) Controller, console=tty0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + initrd initrd.img + append rootwait console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_lowlatency_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=standard + + label S5 + menu label EXTENDED Security Boot Profile + text help + All-in-one (lowlatency) Controller, console=tty0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + initrd initrd.img + append rootwait console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_lowlatency_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 inst.gpt security_profile=extended + menu end +menu end diff --git a/bsp-files/filter_out_from_compute b/bsp-files/filter_out_from_compute new file mode 100644 index 00000000..80582593 --- /dev/null +++ b/bsp-files/filter_out_from_compute @@ -0,0 +1,263 @@ +ceilometer-api +ceilometer-alarm-evaluator +ceilometer-alarm-notifier +ceilometer-collector +ceilometer-tests +ceph-manager +cgcs-dpdk-rt +cgcs-dpdk-rt-dev +cgcs-dpdk-rt-staticdev +cgcs-dpdk-rt-apps +cgts-client +cgts-client-bash-completion +cgcs-patch-controller +cgts-mtce-control +cgts-mtce-storage +cgts-mtce-common-guestAgent +cgtssys +cinder +cinder-api +cinder-scheduler +cinder-tests +cinder-volume +computeconfig-subfunction +configvalidator +controllerconfig +corosync +createrepo +d2to1 +distributedcloud-dcmanager +distributedcloud-client-dcmanagerclient +distributedcloud-dcorch +drbd-tools +fm-doc +glance +glance-api +glance-registry +glance-tests +horizon +horizon-standalone +horizon-tests +heat-api-cfn +heat-api +heat-cfn +heat-common +heat-contrib-nova-flavor +heat-engine +heat-templates +heat-tests +io-monitor +isomd5sum +kernel-module-openvswitch +keystone +keystone-tests +ldapscripts +libconfig-general-perl +libssh2 +lighttpd-module-access +lighttpd-module-accesslog +lighttpd-module-dirlisting +lighttpd-module-indexfile +lighttpd-module-proxy +lighttpd-module-staticfile +lighttpd +nodejs +nova-api +nova-conductor +nova-consoleauth +nova-controller +nova-network +nova-novncproxy +nova-scheduler +nova-spicehtml5proxy +nova-tests +nova-api-proxy +nova-placement-api +novnc +net-snmp +openldap-backend-bdb +openldap-backend-dnssrv +openldap-backend-hdb +openldap-backend-ldap +openldap-backend-mdb +openldap-backend-meta +openldap-backend-monitor +openldap-backend-null +openldap-backend-passwd +openldap-backend-shell +openldap-backends +openldap-dbg +openldap-doc +openldap-overlay-proxycache +openldap-overlay-syncprov +openldap-servers +openldap-slapd +openstack-ras +pacemaker +pacemaker-cli +pacemaker-cluster-libs +pacemaker-libs +patch-alarm +patching-controller +pbr +pecan +postgresql-client +postgresql-contrib +postgresql-timezone +postgresql +pxe-network-installer +python-cephclient +python-cinder +python-django +python-django-appconf +python-django-compressor +python-django-openstack-auth +python-glance +python2-gunicorn +python-heatclient +python-heatclient-bash-completion +python-heat +python-horizon +python-keystone +python-lockfile +python-novnc +python-pam +python-passlib +python-pytz +python-swiftclient +python-wsme +fm-mgr +snmp-ext +sm +sm-api +sm-client +sm-common +sm-db +sm-tools +storageconfig +storage-topology +task-cloud-controller +tgt +nfv-plugins +nfv-vim +vm-topology +remote-clients +lighttpd-fastcgi +lighttpd-mod_geoip +lighttpd-mod_mysql_vhost +openstack-aodh-api +openstack-aodh-commmon +openstack-aodh-compat +openstack-aodh-evaluator +openstack-aodh-expirer +openstack-aodh-listener +openstack-aodh-notifier +openstack-cinder +openstack-cinder-doc +openstack-dashboard +openstack-glance +openstack-glance-doc +openstack-heat-api +openstack-heat-api-cfn +openstack-heat-api-cloudwatch +openstack-heat-common +openstack-heat-engine +openstack-ironic-api +openstack-ironic-common +openstack-ironic-conductor +python-ironic-tests +python-ironic-inspector-client +python-ironic-lib +python2-ironicclient +openstack-keystone +openstack-keystone-doc +openstack-murano-api +openstack-murano-cf-api +openstack-murano-common +openstack-murano-doc +openstack-murano-engine +openstack-murano-ui +openstack-murano-ui-doc +python2-muranoclient +python-muranoclient-doc +openstack-magnum-api +openstack-magnum-common +openstack-magnum-conductor +openstack-magnum-doc +openstack-magnum-ui +python2-magnumclient +python-magnum +python-magnumclient-doc +python-magnumclient-tests +python-magnum-tests +python-magnum-ui-doc +openstack-neutron-common +openstack-neutron-ml2 +openstack-nova-api +openstack-nova-cells +openstack-nova-conductor +openstack-nova-console +openstack-nova-doc +openstack-nova-network +openstack-nova-novncproxy +openstack-nova-objectstore +openstack-nova-scheduler +openstack-nova-serialproxy +openstack-nova-spicehtml5proxy +openstack-nova-placement-api +openstack-panko-api +openstack-panko-common +openstack-panko-doc +python-aodh +python-aodhclient +python-ceilometerclient-doc +python-cinder +python-cinderclient-doc +python-glance +python-heatclient +python-keystone +python-novaclient-doc +python-panko +python2-pankoclient +configutilities +drbd-bash-completion +drbd-udev +drbd-utils +drbd-heartbeat +drbd-pacemaker +drbd +kmod-drbd +python-networking-odl +tis-extensions-controller +wrs-heat-templates +python-django-horizon +kernel-rt +kernel-module-igb-uio-rt +kernel-rt-kvm +kmod-e1000e-rt +kmod-i40e-rt +kmod-ixgbe-rt +kmod-tpm-rt +kmod-integrity-rt +mlnx-ofa_kernel-rt-modules +rtctl +rt-setup +qat17-rt +kernel-rt-tools +kernel-rt-tools-libs +NaviCLI-Linux-64-x86-en_US +kmod-drbd-rt +snmp-audittrail +wrs-ssl +tpm2-tools +tss2 +tpm2-openssl-engine +python2-networking-bgpvpn +python-networking-bgpvpn-dashboard +python-networking-bgpvpn-heat +python2-neutron-dynamic-routing +python2-ryu +python-ryu-common +platform-kickstarts +python-3parclient +python-lefthandclient diff --git a/bsp-files/filter_out_from_compute_lowlatency b/bsp-files/filter_out_from_compute_lowlatency new file mode 100644 index 00000000..fbbb6f20 --- /dev/null +++ b/bsp-files/filter_out_from_compute_lowlatency @@ -0,0 +1,265 @@ +ceilometer-api +ceilometer-alarm-evaluator +ceilometer-alarm-notifier +ceilometer-collector +ceilometer-tests +ceph-manager +cgcs-dpdk +cgcs-dpdk-dev +cgcs-dpdk-staticdev +cgcs-dpdk-apps +cgts-client +cgts-client-bash-completion +cgcs-patch-controller +cgts-mtce-control +cgts-mtce-storage +cgts-mtce-common-guestAgent +cgtssys +cinder +cinder-api +cinder-scheduler +cinder-tests +cinder-volume +computeconfig-subfunction +configvalidator +controllerconfig +corosync +createrepo +d2to1 +distributedcloud-dcmanager +distributedcloud-client-dcmanagerclient +distributedcloud-dcorch +drbd-tools +fm-doc +glance +glance-api +glance-registry +glance-tests +horizon +horizon-standalone +horizon-tests +heat-api-cfn +heat-api +heat-cfn +heat-common +heat-contrib-nova-flavor +heat-engine +heat-templates +heat-tests +io-monitor +isomd5sum +kernel-module-openvswitch +keystone +keystone-tests +ldapscripts +libconfig-general-perl +libssh2 +lighttpd-module-access +lighttpd-module-accesslog +lighttpd-module-dirlisting +lighttpd-module-indexfile +lighttpd-module-proxy +lighttpd-module-staticfile +lighttpd +nodejs +nova-api +nova-conductor +nova-consoleauth +nova-controller +nova-network +nova-novncproxy +nova-scheduler +nova-spicehtml5proxy +nova-tests +nova-api-proxy +nova-placement-api +novnc +net-snmp +neutron-plugin-ml2 +neutron-server +neutron-tests +openldap-backend-bdb +openldap-backend-dnssrv +openldap-backend-hdb +openldap-backend-ldap +openldap-backend-mdb +openldap-backend-meta +openldap-backend-monitor +openldap-backend-null +openldap-backend-passwd +openldap-backend-shell +openldap-backends +openldap-dbg +openldap-doc +openldap-overlay-proxycache +openldap-overlay-syncprov +openldap-servers +openldap-slapd +openstack-ras +pacemaker +pacemaker-cli +pacemaker-cluster-libs +pacemaker-libs +patch-alarm +patching-controller +pbr +pecan +postgresql-client +postgresql-contrib +postgresql-timezone +postgresql +pxe-network-installer +python-cephclient +python-cinder +python-django +python-django-appconf +python-django-compressor +python-django-openstack-auth +python-glance +python2-gunicorn +python-heatclient +python-heatclient-bash-completion +python-heat +python-horizon +python-keystone +python-lockfile +python-novnc +python-pam +python-passlib +python-pytz +python-swiftclient +python-wsme +fm-mgr +snmp-ext +sm +sm-api +sm-client +sm-common +sm-db +sm-tools +storageconfig +storage-topology +task-cloud-controller +tgt +nfv-plugins +nfv-vim +vm-topology +remote-clients +lighttpd-fastcgi +lighttpd-mod_geoip +lighttpd-mod_mysql_vhost +openstack-aodh-api +openstack-aodh-commmon +openstack-aodh-compat +openstack-aodh-evaluator +openstack-aodh-expirer +openstack-aodh-listener +openstack-aodh-notifier +openstack-cinder +openstack-cinder-doc +openstack-dashboard +openstack-glance +openstack-glance-doc +openstack-heat-api +openstack-heat-api-cfn +openstack-heat-api-cloudwatch +openstack-heat-common +openstack-heat-engine +openstack-ironic-api +openstack-ironic-common +openstack-ironic-conductor +python-ironic-tests +python-ironic-inspector-client +python-ironic-lib +python2-ironicclient +openstack-keystone +openstack-keystone-doc +openstack-murano-api +openstack-murano-cf-api +openstack-murano-common +openstack-murano-doc +openstack-murano-engine +openstack-murano-ui +openstack-murano-ui-doc +python2-muranoclient +python-muranoclient-doc +openstack-magnum-api +openstack-magnum-common +openstack-magnum-conductor +openstack-magnum-doc +openstack-magnum-ui +python2-magnumclient +python-magnum +python-magnumclient-doc +python-magnumclient-tests +python-magnum-tests +python-magnum-ui-doc +openstack-neutron-common +openstack-neutron-plugin-ml2 +openstack-neutron-server +openstack-nova-api +openstack-nova-cells +openstack-nova-conductor +openstack-nova-console +openstack-nova-doc +openstack-nova-network +openstack-nova-novncproxy +openstack-nova-objectstore +openstack-nova-scheduler +openstack-nova-serialproxy +openstack-nova-spicehtml5proxy +openstack-nova-placement-api +openstack-panko-api +openstack-panko-common +openstack-panko-doc +python-aodh +python-aodhclient +python-ceilometerclient-doc +python-cinder +python-cinderclient-doc +python-glance +python-heatclient +python-keystone +python-novaclient-doc +python-panko +python2-pankoclient +configutilities +drbd-bash-completion +drbd-udev +drbd-utils +drbd-heartbeat +drbd-pacemaker +drbd +kmod-drbd +python-networking-odl +tis-extensions-controller +wrs-heat-templates +python-django-horizon +kernel +kernel-module-igb-uio +kmod-e1000e +kmod-i40e +kmod-i40evf +kmod-ixgbe +kmod-ixgbevf +kmod-tpm +kmod-integrity +mlnx-ofa_kernel-modules +qat17 +kernel-tools +kernel-tools-libs +NaviCLI-Linux-64-x86-en_US +kmod-drbd-rt +snmp-audittrail +wrs-ssl +tpm2-tools +tss2 +python2-networking-bgpvpn +python-networking-bgpvpn-dashboard +python-networking-bgpvpn-heat +python2-neutron-dynamic-routing +python2-ryu +python-ryu-common +platform-kickstarts +python-3parclient +python-lefthandclient diff --git a/bsp-files/filter_out_from_controller b/bsp-files/filter_out_from_controller new file mode 100644 index 00000000..bb706bb5 --- /dev/null +++ b/bsp-files/filter_out_from_controller @@ -0,0 +1,52 @@ +task-cloud-compute +compute-huge +computeconfig +computeconfig-standalone +computeconfig-subfunction +guest-scale-helper +heartbeat +storageconfig +kernel-module-openvswitch +openstack-neutron-sriov-nic-agent +platform-util-noncontroller +ceilometer-compute +openstack-ceilometer-compute +cgcs-dpdk +cgcs-dpdk-rt +cgcs-dpdk-dev +cgcs-dpdk-rt-dev +cgcs-dpdk-staticdev +cgcs-dpdk-rt-staticdev +cgcs-dpdk-rt-apps +cgts-mtce-compute +cgts-mtce-storage +cgts-mtce-common-guestServer +host-guest-comm +host-guest-comm-dev +nfscheck +radvd +remote-clients +config-gate-compute +kernel-rt +kernel-module-igb-uio +kernel-module-igb-uio-rt +kernel-rt-kvm +kmod-e1000e-rt +kmod-i40e-rt +kmod-ixgbe-rt +kmod-tpm-rt +kmod-integrity-rt +mlnx-ofa_kernel-rt-modules +rtctl +rt-setup +qat17-rt +kernel-rt-tools +kernel-rt-tools-libs +kmod-drbd-rt +libtpms +swtpm +swtpm-tools +swtpm-cuse +swtpm-libs +OVMF +openvswitch diff --git a/bsp-files/filter_out_from_smallsystem b/bsp-files/filter_out_from_smallsystem new file mode 100644 index 00000000..ca885748 --- /dev/null +++ b/bsp-files/filter_out_from_smallsystem @@ -0,0 +1,24 @@ +cgcs-dpdk-rt +cgcs-dpdk-rt-apps +cgts-mtce-storage +computeconfig-standalone +kernel-module-openvswitch +platform-util-noncontroller +storageconfig +nfscheck +remote-clients +kernel-rt +kernel-module-igb-uio-rt +kernel-rt-kvm +kmod-e1000e-rt +kmod-i40e-rt +kmod-ixgbe-rt +kmod-tpm-rt +kmod-integrity-rt +mlnx-ofa_kernel-rt-modules +rtctl +rt-setup +qat17-rt +kernel-rt-tools +kernel-rt-tools-libs +kmod-drbd-rt diff --git a/bsp-files/filter_out_from_smallsystem_lowlatency b/bsp-files/filter_out_from_smallsystem_lowlatency new file mode 100644 index 00000000..75a2b9da --- /dev/null +++ b/bsp-files/filter_out_from_smallsystem_lowlatency @@ -0,0 +1,23 @@ +cgcs-dpdk +cgcs-dpdk-apps +cgts-mtce-storage +computeconfig-standalone +kernel-module-openvswitch +platform-util-noncontroller +storageconfig +nfscheck +remote-clients +kernel +kernel-module-igb-uio +kmod-e1000e +kmod-i40e +kmod-i40evf +kmod-ixgbe +kmod-ixgbevf +kmod-tpm +kmod-integrity +mlnx-ofa_kernel-modules +qat17 +kernel-tools +kernel-tools-libs +kmod-drbd diff --git a/bsp-files/filter_out_from_storage b/bsp-files/filter_out_from_storage new file mode 100644 index 00000000..073e0b31 --- /dev/null +++ b/bsp-files/filter_out_from_storage @@ -0,0 +1,303 @@ +ceilometer-alarm-evaluator +ceilometer-alarm-notifier +ceilometer-api +ceilometer-collector +ceilometer-compute +ceilometer-tests +ceph-manager +cgcs-dpdk +cgcs-dpdk-rt +cgcs-dpdk-dev +cgcs-dpdk-rt-dev +cgcs-dpdk-staticdev +cgcs-dpdk-rt-staticdev +cgcs-dpdk-rt-apps +cgts-client +cgts-client-bash-completion +cgcs-patch-controller +cgts-mtce-compute +cgts-mtce-control +cgts-mtce-common-guestServer +cgts-mtce-common-guestAgent +cgtssys +cinder +cinder-api +cinder-scheduler +cinder-tests +cinder-volume +compute-huge +computeconfig +computeconfig-standalone +computeconfig-subfunction +configvalidator +controllerconfig +createrepo +d2to1 +distributedcloud-dcmanager +distributedcloud-client-dcmanagerclient +distributedcloud-dcorch +erlang +fm-doc +glance +glance-api +glance-registry +glance-tests +glusterfs +glusterfs-api +glusterfs-client-xlators +glusterfs-libs +guest-scale-helper +haproxy +heartbeat +heat-api +heat-api-cfn +heat-cfn +heat-common +heat-contrib-nova-flavor +heat-engine +heat-templates +heat-tests +horizon +horizon-standalone +horizon-tests +host-guest-comm +io-monitor +io-scheduler +isomd5sum +ipxe-roms-qemu +kernel-module-openvswitch +keystone +keystone-tests +libconfig-general-perl +libguestfs +libssh2 +lighttpd-module-access +lighttpd-module-accesslog +lighttpd-module-dirlisting +lighttpd-module-indexfile +lighttpd-module-proxy +lighttpd-module-staticfile +lighttpd +lighttpd-mod_geoip +lighttpd-fastcgi +lighttpd-mod_mysql_vhost +libvirt +libvirt-daemon +libvirt-daemon-config-network +libvirt-daemon-config-nwfilter +libvirt-daemon-driver-interface +libvirt-daemon-driver-lxc +libvirt-daemon-driver-network +libvirt-daemon-driver-nodedev +libvirt-daemon-driver-nwfilter +libvirt-daemon-driver-qemu +libvirt-daemon-driver-qemu +libvirt-daemon-driver-secret +libvirt-daemon-driver-storage +libvirt-daemon-kvm +libvirt-docs +nodejs +nova-api +nova-common +nova-compute +nova-compute-preclean +nova-compute-setup +nova-conductor +nova-consoleauth +nova-controller +nova-network +nova-novncproxy +nova-scheduler +nova-spicehtml5proxy +nova-tests +nova-api-proxy +nova-placement-api +novnc +net-snmp +openstack-aodh-api +openstack-aodh-commmon +openstack-aodh-compat +openstack-aodh-evaluator +openstack-aodh-expirer +openstack-aodh-listener +openstack-aodh-notifier +openstack-ceilometer-alarm +openstack-ceilometer-central +openstack-ceilometer-collector +openstack-ceilometer-compute +openstack-ceilometer-ipmi +openstack-ceilometer-notification +openstack-cinder +openstack-cinder-doc +openstack-dashboard +openstack-glance +openstack-glance-doc +openstack-heat-api +openstack-heat-api-cfn +openstack-heat-api-cloudwatch +openstack-heat-common +openstack-heat-engine +openstack-ironic-api +openstack-ironic-common +openstack-ironic-conductor +python-ironic-tests +python-ironic-inspector-client +python-ironic-lib +python2-ironicclient +openstack-keystone +openstack-keystone-doc +openstack-murano-api +openstack-murano-cf-api +openstack-murano-common +openstack-murano-doc +openstack-murano-engine +openstack-murano-ui +openstack-murano-ui-doc +python2-muranoclient +python-muranoclient-doc +openstack-magnum-api +openstack-magnum-common +openstack-magnum-conductor +openstack-magnum-doc +openstack-magnum-ui +python2-magnumclient +python-magnum +python-magnumclient-doc +python-magnumclient-tests +python-magnum-tests +python-magnum-ui-doc +openstack-neutron +openstack-neutron-common +openstack-neutron-sriov-nic-agent +openstack-neutron-ml2 +openstack-nova-api +openstack-nova-cells +openstack-nova-common +openstack-nova-compute +openstack-nova-conductor +openstack-nova-console +openstack-nova-doc +openstack-nova-network +openstack-nova-novncproxy +openstack-nova-objectstore +openstack-nova-scheduler +openstack-nova-serialproxy +openstack-nova-spicehtml5proxy +openstack-nova-placement-api +openstack-panko-api +openstack-panko-common +openstack-panko-doc +openstack-ras +patch-alarm +patching-controller +pbr +pecan +postgresql +postgresql-client +postgresql-contrib +postgresql-timezone +pxe-network-installer +python-aodh +python-aodhclient +python-ceilometerclient +python-cephclient +python-cinder +python-cinderclient +python-django +python-django-appconf +python-django-compressor +python-django-openstack-auth +python-glance +python-glanceclient +python2-gunicorn +python-heat +python-heatclient +python-heatclient-bash-completion +python-horizon +python-keystone +python-libguestfs +python-lockfile +python-netclient +python-neutron +python-neutron-lib +python2-neutronclient +python-nova +python2-novaclient +python-novnc +python-openstackclient +python-pam +python-panko +python2-pankoclient +python-passlib +python-pytz +python-swiftclient +python-wsme +qemu-img-ev +qemu-kvm-common-ev +qemu-kvm-ev +qemu-kvm-tools-ev +radvd +rubygem-rdoc +snmp-ext +task-cloud-compute +task-cloud-controller +tgt +fm-mgr +sm +sm-api +sm-client +sm-common +sm-db +sm-tools +storage-topology +nfv-common +nfv-plugins +nfv-vim +nfv-tools +vm-topology +remote-clients +python-networking-odl +config-gate-compute +tis-extensions-controller +wrs-heat-templates +python-django-horizon +libmlx5 +kernel-rt +kernel-module-igb-uio +kernel-module-igb-uio-rt +kernel-rt-kvm +kmod-e1000e-rt +kmod-i40e-rt +kmod-ixgbe-rt +kmod-tpm-rt +kmod-integrity-rt +mlnx-ofa_kernel-rt-modules +rtctl +rt-setup +qat17-rt +kernel-rt-tools +kernel-rt-tools-libs +NaviCLI-Linux-64-x86-en_US +kmod-drbd-rt +snmp-audittrail +wrs-ssl +tpm2-tools +tss2 +tpm2-openssl-engine +libtpms +swtpm +swtpm-tools +swtpm-cuse +swtpm-libs +OVMF +python2-networking-bgpvpn +python-networking-bgpvpn-dashboard +python-networking-bgpvpn-heat +python2-neutron-dynamic-routing +python2-ryu +python-ryu-common +python2-networking-sfc +openvswitch +platform-kickstarts +python-3parclient +python-lefthandclient diff --git a/bsp-files/grub.cfg b/bsp-files/grub.cfg new file mode 100644 index 00000000..f5c9fe4e --- /dev/null +++ b/bsp-files/grub.cfg @@ -0,0 +1,155 @@ +default=2 +timeout=-1 + +search --no-floppy --set=root -l 'oe_iso_boot' + +# ---------------------- NOTE ---------------------- +# If you are updating menus, make sure that controllerconfig/clone.py +# is in sync with your changes (only serial console ids). +# STANDARD_STANDARD = 'standard>serial>' + +# sysinv_constants.SYSTEM_SECURITY_PROFILE_STANDARD +# STANDARD_EXTENDED = 'standard>serial>' + +# sysinv_constants.SYSTEM_SECURITY_PROFILE_EXTENDED +# AIO_STANDARD = 'standard>aio>' + +# sysinv_constants.SYSTEM_SECURITY_PROFILE_STANDARD +# AIO_EXTENDED = 'standard>aio>' + +# sysinv_constants.SYSTEM_SECURITY_PROFILE_EXTENDED +# AIO_LL_STANDARD = 'standard>aio-lowlat>' + +# sysinv_constants.SYSTEM_SECURITY_PROFILE_STANDARD +# AIO_LL_EXTENDED = 'standard>aio-lowlat>' + +# sysinv_constants.SYSTEM_SECURITY_PROFILE_EXTENDED +# SUBMENUITEM_TBOOT = 'tboot' +# SUBMENUITEM_SECUREBOOT = 'secureboot' +# -------------------------------------------------- + + +# Menu 0 +menuentry 'Select kernel options and boot kernel' { + echo " " +} + +# Create a space in the menu +menuentry ' ' { + echo " " +} + +# Standard Controller menu +submenu 'UEFI Standard Controller Configuration' --id=standard { + submenu 'Serial Console' --id=serial { + menuentry 'STANDARD Security Profile' --id=standard { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=standard + initrdefi /initrd.img + } + submenu 'EXTENDED Security Profile' --id=extended { + menuentry 'Secure Boot Profile' --id=secureboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=false + initrdefi /initrd.img + } + menuentry 'Trusted Boot Profile' --id=tboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=true + initrdefi /initrd.img + } + } + } + + submenu 'Graphical Console' --id=graphical { + menuentry 'STANDARD Security Profile' --id=standard { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=tty0 serial inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=standard + initrdefi /initrd.img + } + submenu 'EXTENDED Security Profile' --id=extended { + menuentry 'Secure Boot Profile' --id=secureboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=tty0 serial inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=false + initrdefi /initrd.img + } + menuentry 'Trusted Boot Profile' --id=tboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=tty0 serial inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=true + initrdefi /initrd.img + } + } + } +} + +# Create a space in the menu +menuentry ' '{ + echo " " +} + +# AIO Controller menu +submenu 'UEFI All-in-one Controller Configuration' --id=aio { + submenu 'Serial Console' --id=serial { + menuentry 'STANDARD Security Profile' --id=standard { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=standard + initrdefi /initrd.img + } + submenu 'EXTENDED Security Profile' --id=extended { + menuentry 'Secure Boot Profile' --id=secureboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=false + initrdefi /initrd.img + } + menuentry 'Trusted Boot Profile' --id=tboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=true + initrdefi /initrd.img + } + } + } + + submenu 'Graphical Console' --id=graphical { + menuentry 'STANDARD Security Profile' --id=standard { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=standard + initrdefi /initrd.img + } + submenu 'EXTENDED Security Profile' --id=extended { + menuentry 'Secure Boot Profile' --id=secureboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=false + initrdefi /initrd.img + } + menuentry 'Trusted Boot Profile' --id=tboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=true + initrdefi /initrd.img + } + } + } +} + +# Create a space in the menu +menuentry ' '{ + echo " " +} + +# AIO (lowlatency) Controller menu +submenu 'UEFI All-in-one (lowlatency) Controller Configuration' --id=aio-lowlat { + submenu 'Serial Console' --id=serial { + menuentry 'STANDARD Security Profile' --id=standard { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_lowlatency_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=standard + initrdefi /initrd.img + } + submenu 'EXTENDED Security Profile' --id=extended { + menuentry 'Secure Boot Profile' --id=secureboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_lowlatency_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=false + initrdefi /initrd.img + } + menuentry 'Trusted Boot Profile' --id=tboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_lowlatency_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=ttyS0,115200 inst.text serial inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=true + initrdefi /initrd.img + } + } + } + + submenu 'Graphical Console' --id=graphical { + menuentry 'STANDARD Security Profile' --id=standard { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_lowlatency_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=standard + initrdefi /initrd.img + } + submenu 'EXTENDED Security Profile' --id=extended { + menuentry 'Secure Boot Profile' --id=secureboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_lowlatency_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=false + initrdefi /initrd.img + } + menuentry 'Trusted Boot Profile' --id=tboot { + linuxefi /vmlinuz inst.ks=hd:LABEL=oe_iso_boot:/smallsystem_lowlatency_ks.cfg boot_device=sda rootfs_device=sda biosdevname=0 usbcore.autosuspend=-1 console=tty0 inst.text inst.stage2=hd:LABEL=oe_iso_boot inst.gpt security_profile=extended tboot=true + initrdefi /initrd.img + } + } + } +} diff --git a/bsp-files/kickstarts/functions.sh b/bsp-files/kickstarts/functions.sh new file mode 100644 index 00000000..41b48f65 --- /dev/null +++ b/bsp-files/kickstarts/functions.sh @@ -0,0 +1,72 @@ +# This file defines functions that can be used in %pre and %post kickstart sections, by including: +# . /tmp/ks-functions.sh +# + +cat </tmp/ks-functions.sh +# +# Copyright (c) xxxYEARxxx Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +function get_by_path() +{ + local disk=\$(cd /dev ; readlink -f \$1) + for p in /dev/disk/by-path/*; do + if [ "\$disk" = "\$(readlink -f \$p)" ]; then + echo \$p + return + fi + done +} + +function get_disk() +{ + echo \$(cd /dev ; readlink -f \$1) +} + +function report_pre_failure_with_msg() +{ + local msg=\$1 + echo -e '\n\nInstallation failed.\n' + echo "\$msg" + + exit 1 +} + +function report_post_failure_with_msg() +{ + local msg=\$1 + cat <> /etc/motd + +Installation failed. +\$msg + +EOF + echo "\$msg" >/etc/platform/installation_failed + + echo -e '\n\nInstallation failed.\n' + echo "\$msg" + + exit 1 +} + +function report_post_failure_with_logfile() +{ + local logfile=\$1 + cat <> /etc/motd + +Installation failed. +Please see \$logfile for details of failure + +EOF + echo \$logfile >/etc/platform/installation_failed + + echo -e '\n\nInstallation failed.\n' + cat \$logfile + + exit 1 +} + +END_FUNCTIONS + diff --git a/bsp-files/kickstarts/post_clone_iso_ks.cfg b/bsp-files/kickstarts/post_clone_iso_ks.cfg new file mode 100644 index 00000000..6553b181 --- /dev/null +++ b/bsp-files/kickstarts/post_clone_iso_ks.cfg @@ -0,0 +1,41 @@ + +# clone post installation script +%post --nochroot +if [ -d /mnt/install/source ]; then + srcdir=/mnt/install/source +else + srcdir=/run/install/repo +fi + +if [ -d $srcdir/clone-archive ]; then + cp -r $srcdir/clone-archive /mnt/sysimage/ + cp $srcdir/install_clone /mnt/sysimage/usr/bin/install_clone + chmod 500 /mnt/sysimage/usr/bin/install_clone +fi +%end + + +# clone post installation script to start install-clone service +%post --erroronfail + +# Build install-clone service file here, so that it is patchable. +cat << EOF > /usr/lib/systemd/system/install-clone.service +[Unit] +Description=Titanium install-clone service +After=hostw.service +Before=serial-getty@ttyS0.service getty@tty1.service +[Service] +User=root +Type=oneshot +RemainAfterExit=yes +Environment=PYTHONUNBUFFERED=1 +ExecStart=/usr/bin/install_clone +StandardOutput=syslog+console +[Install] +WantedBy=multi-user.target +EOF + +systemctl enable install-clone + +%end + diff --git a/bsp-files/kickstarts/post_common.cfg b/bsp-files/kickstarts/post_common.cfg new file mode 100644 index 00000000..fa5ebe33 --- /dev/null +++ b/bsp-files/kickstarts/post_common.cfg @@ -0,0 +1,104 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# Turn off locale support for i18n if is not installed +if [ ! -d /usr/share/i18n ] ; then + rm -f /etc/sysconfig/i18n +fi +# Unset the hostname +rm /etc/hostname + +# If using a serial install make sure to add a getty on the tty1 +conarg=`cat /proc/cmdline |xargs -n1 echo |grep console= |grep ttyS` +if [ -n "$conarg" ] ; then + echo "1:2345:respawn:/sbin/mingetty tty1" >> /etc/inittab +fi + +#### SECURITY PROFILE HANDLING (Post Installation) #### +# Check if the Security profile mode is enabled +# and load the appropriate kernel modules +secprofile=`cat /proc/cmdline |xargs -n1 echo |grep security_profile= | grep extended` +if [ -n "$secprofile" ]; then + echo "In Extended Security profile mode. Loading IMA kernel module" + systemctl enable auditd.service + # Add the securityfs mount for the IMA Runtime measurement list + echo "securityfs /sys/kernel/security securityfs defaults,nodev 0 0" >> /etc/fstab +else + # Disable audit daemon in the Standard Security Profile + systemctl disable auditd +fi + + +# Configure smart package manager channels +rm -rf /var/lib/smart +mkdir /var/lib/smart +/usr/bin/smart channel -y \ + --add rpmdb type=rpm-sys name="RPM Database" +/usr/bin/smart channel -y \ + --add base type=rpm-md name="Base" baseurl=http://controller/feed/rel-xxxPLATFORM_RELEASExxx +/usr/bin/smart channel -y \ + --add updates type=rpm-md name="Patches" baseurl=http://controller/updates/rel-xxxPLATFORM_RELEASExxx + +# Configure smart to use rpm --nolinktos option +/usr/bin/smart config --set rpm-nolinktos=true + +# Configure smart to use rpm --nosignature option +/usr/bin/smart config --set rpm-check-signatures=false + +# Delete the CentOS yum repo files +rm -f /etc/yum.repos.d/CentOS-* + +# Persist the boot device naming as UDEV rules so that if the network device +# order changes post-install that we will still be able to DHCP from the +# correct interface to reach the active controller. For most nodes only the +# management/boot interface needs to be persisted but because we require both +# controllers to be identically configured and controller-0 and controller-1 +# are installed differently (e.g., controller-0 from USB and controller-1 from +# network) it is not possible to know which interface to persist for +# controller-0. The simplest solution is to persist all interfaces. +# +mkdir -p /etc/udev/rules.d +echo "# Persisted network interfaces from anaconda installer" > /etc/udev/rules.d/70-persistent-net.rules +for dir in /sys/class/net/*; do + if [ -e ${dir}/device ]; then + dev=$(basename ${dir}) + mac_address=$(cat /sys/class/net/${dev}/address) + echo "ACTION==\"add\", SUBSYSTEM==\"net\", DRIVERS==\"?*\", ATTR{address}==\"${mac_address}\", NAME=\"${dev}\"" >> /etc/udev/rules.d/70-persistent-net.rules + fi +done + +# Mark the wrsroot password as expired immediately +chage -d 0 wrsroot + +# Lock the root password +passwd -l root + +# Enable tmpfs mount for /tmp +# delete /var/tmp so that it can similinked in +rm -rf /var/tmp +systemctl enable tmp.mount + +# Disable automount of /dev/hugepages +systemctl mask dev-hugepages.mount + +# Disable firewall +systemctl disable firewalld + +# Disable libvirtd +systemctl disable libvirtd.service + +# Enable rsyncd +systemctl enable rsyncd.service + +# Allow root to run sudo from a non-tty (for scripts running as root that run sudo cmds) +echo 'Defaults:root !requiretty' > /etc/sudoers.d/root + +# Make fstab just root read/writable +chmod 600 /etc/fstab + +# Create first_boot flag +touch /etc/platform/.first_boot + +%end diff --git a/bsp-files/kickstarts/post_kernel_aio_and_compute.cfg b/bsp-files/kickstarts/post_kernel_aio_and_compute.cfg new file mode 100644 index 00000000..8ec5cf7a --- /dev/null +++ b/bsp-files/kickstarts/post_kernel_aio_and_compute.cfg @@ -0,0 +1,98 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# Source the generated platform.conf +. /etc/platform/platform.conf + +# Update grub with custom kernel bootargs +source /etc/init.d/cpumap_functions.sh +n_cpus=$(cat /proc/cpuinfo 2>/dev/null | \ + awk '/^[pP]rocessor/ { n +=1 } END { print (n>0) ? n : 1}') +n_numa=$(ls -d /sys/devices/system/node/node* 2>/dev/null | wc -l) +KERN_OPTS=" iommu=pt usbcore.autosuspend=-1" + +# Add kernel options for hugepages +if grep -q pdpe1gb /proc/cpuinfo +then + KERN_OPTS="${KERN_OPTS} hugepagesz=1G hugepages=${n_numa}" + KERN_OPTS="${KERN_OPTS} hugepagesz=2M hugepages=0 default_hugepagesz=2M" +else + KERN_OPTS="${KERN_OPTS} hugepagesz=2M hugepages=0 default_hugepagesz=2M" +fi + +# If this is an all-in-one system, we need at least 4 CPUs +if [ "$system_type" = "All-in-one" -a ${n_cpus} -lt 4 ]; then + report_post_failure_with_msg "ERROR: At least 4 CPUs are required for controller+compute node." +fi + +# Add kernel options for cpu isolation / affinity +if [ ${n_cpus} -gt 1 ] +then + base_cpulist=$(platform_expanded_cpu_list) + base_cpumap=$(cpulist_to_cpumap ${base_cpulist} ${n_cpus}) + avp_cpulist=$(vswitch_expanded_cpu_list) + norcu_cpumap=$(invert_cpumap ${base_cpumap} ${n_cpus}) + norcu_cpulist=$(cpumap_to_cpulist ${norcu_cpumap} ${n_cpus}) + + if [[ "$subfunction" =~ lowlatency ]]; then + KERN_OPTS="${KERN_OPTS} isolcpus=${norcu_cpulist}" + KERN_OPTS="${KERN_OPTS} nohz_full=${norcu_cpulist}" + else + KERN_OPTS="${KERN_OPTS} isolcpus=${avp_cpulist}" + fi + KERN_OPTS="${KERN_OPTS} rcu_nocbs=${norcu_cpulist}" + KERN_OPTS="${KERN_OPTS} kthread_cpus=${base_cpulist}" + KERN_OPTS="${KERN_OPTS} irqaffinity=${base_cpulist}" + # Update vswitch.conf + sed -i "s/^VSWITCH_CPU_LIST=.*/VSWITCH_CPU_LIST=\"${avp_cpulist}\"/" /etc/vswitch/vswitch.conf +fi + +# Add kernel options to ensure an selinux is disabled +KERN_OPTS="${KERN_OPTS} selinux=0 enforcing=0" + +# Add kernel options to set NMI watchdog +if [[ "$subfunction" =~ lowlatency ]]; then + KERN_OPTS="${KERN_OPTS} nmi_watchdog=0 softlockup_panic=0" +else + KERN_OPTS="${KERN_OPTS} nmi_watchdog=panic,1 softlockup_panic=1" +fi + +if [[ "$(dmidecode -s system-product-name)" =~ ^ProLiant.*Gen8$ ]]; then + KERN_OPTS="${KERN_OPTS} intel_iommu=on,eth_no_rmrr" +else + KERN_OPTS="${KERN_OPTS} intel_iommu=on" +fi + +# Add kernel option to disable biosdevname if enabled +# As this may already be in GRUB_CMDLINE_LINUX, only add if it is not already present +grep -q '^GRUB_CMDLINE_LINUX=.*biosdevname=0' /etc/default/grub +if [ $? -ne 0 ]; then + KERN_OPTS="${KERN_OPTS} biosdevname=0" +fi + +# Add kernel options to disable kvm-intel.eptad on Broadwell +# Broadwell: Model: 79, Model name: Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz +if grep -q -E "^model\s+:\s+79$" /proc/cpuinfo +then + KERN_OPTS="${KERN_OPTS} kvm-intel.eptad=0" +fi + +KERN_OPTS="${KERN_OPTS} cgroup_disable=memory" + +# Add kernel option to avoid jiffies_lock contention on real-time kernel +if [[ "$subfunction" =~ lowlatency ]]; then + KERN_OPTS="${KERN_OPTS} skew_tick=1" +fi + +perl -pi -e 's/(GRUB_CMDLINE_LINUX=.*)\"/\1'"$KERN_OPTS"'\"/g' /etc/default/grub + +if [ -d /sys/firmware/efi ] ; then + grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg +else + grub2-mkconfig -o /boot/grub2/grub.cfg +fi + +%end + diff --git a/bsp-files/kickstarts/post_kernel_controller.cfg b/bsp-files/kickstarts/post_kernel_controller.cfg new file mode 100644 index 00000000..f782b6c4 --- /dev/null +++ b/bsp-files/kickstarts/post_kernel_controller.cfg @@ -0,0 +1,38 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +## Custom kernel options +KERN_OPTS=" intel_iommu=off usbcore.autosuspend=-1" + +## Setup the loop module to support up to 15 partitions so that we can enable the +## customer to manually resize images if needed. +## +KERN_OPTS="${KERN_OPTS} loop.max_part=15" + +## Add kernel options to ensure an selinux is disabled +KERN_OPTS="${KERN_OPTS} selinux=0 enforcing=0" + +# Add kernel options to ensure NMI watchdog is enabled, if supported +KERN_OPTS="${KERN_OPTS} nmi_watchdog=panic,1 softlockup_panic=1" + +# Add kernel option to disable biosdevname if enabled +# As this may already be in GRUB_CMDLINE_LINUX, only add if it is not already present +grep -q '^GRUB_CMDLINE_LINUX=.*biosdevname=0' /etc/default/grub +if [ $? -ne 0 ]; then + KERN_OPTS="${KERN_OPTS} biosdevname=0" +fi + +KERN_OPTS="${KERN_OPTS} cgroup_disable=memory" + +perl -pi -e 's/(GRUB_CMDLINE_LINUX=.*)\"/\1'"$KERN_OPTS"'\"/g' /etc/default/grub + +if [ -d /sys/firmware/efi ] ; then + grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg +else + grub2-mkconfig -o /boot/grub2/grub.cfg +fi + +%end + diff --git a/bsp-files/kickstarts/post_kernel_storage.cfg b/bsp-files/kickstarts/post_kernel_storage.cfg new file mode 100644 index 00000000..7ede2eb9 --- /dev/null +++ b/bsp-files/kickstarts/post_kernel_storage.cfg @@ -0,0 +1,32 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +## Custom kernel options +KERN_OPTS=" intel_iommu=off usbcore.autosuspend=-1" +## Add kernel options to ensure an selinux is disabled +KERN_OPTS="${KERN_OPTS} selinux=0 enforcing=0" + +# Add kernel options to ensure NMI watchdog is enabled, if supported +KERN_OPTS="${KERN_OPTS} nmi_watchdog=panic,1 softlockup_panic=1" + +# Add kernel option to disable biosdevname if enabled +# As this may already be in GRUB_CMDLINE_LINUX, only add if it is not already present +grep -q '^GRUB_CMDLINE_LINUX=.*biosdevname=0' /etc/default/grub +if [ $? -ne 0 ]; then + KERN_OPTS="${KERN_OPTS} biosdevname=0" +fi + +KERN_OPTS="${KERN_OPTS} cgroup_disable=memory" + +perl -pi -e 's/(GRUB_CMDLINE_LINUX=.*)\"/\1'"$KERN_OPTS"'\"/g' /etc/default/grub + +if [ -d /sys/firmware/efi ] ; then + grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg +else + grub2-mkconfig -o /boot/grub2/grub.cfg +fi + +%end + diff --git a/bsp-files/kickstarts/post_lvm_no_pv_on_rootfs.cfg b/bsp-files/kickstarts/post_lvm_no_pv_on_rootfs.cfg new file mode 100644 index 00000000..f933d2bb --- /dev/null +++ b/bsp-files/kickstarts/post_lvm_no_pv_on_rootfs.cfg @@ -0,0 +1,12 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# uncomment the global_filter line in lvm.conf +perl -0777 -i.bak -pe 's:(# This configuration option has an automatic default value\.\n)\t# global_filter:$1 global_filter:m' /etc/lvm/lvm.conf + +# Edit the LVM config so LVM does not look for LVs (we have none on compute nodes) +sed -i "s#^\( *\)global_filter = \[.*#\1global_filter = [ \"r|.*|\" ]#" /etc/lvm/lvm.conf +%end + diff --git a/bsp-files/kickstarts/post_lvm_pv_on_rootfs.cfg b/bsp-files/kickstarts/post_lvm_pv_on_rootfs.cfg new file mode 100644 index 00000000..bf7bd705 --- /dev/null +++ b/bsp-files/kickstarts/post_lvm_pv_on_rootfs.cfg @@ -0,0 +1,17 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# uncomment the global_filter line in lvm.conf +perl -0777 -i.bak -pe 's:(# This configuration option has an automatic default value\.\n)\t# global_filter:$1 global_filter:m' /etc/lvm/lvm.conf + +# Determine which disk we created our PV on (i.e. the root disk) +ROOTDISK=$(get_by_path $(pvdisplay --select 'vg_name=cgts-vg' -C -o pv_name --noheadings)) +if [ -z "$ROOTDISK" ]; then + report_post_failure_with_msg "ERROR: failed to identify rootdisk via pvdisplay" +fi +# Edit the LVM config so LVM only looks for LVs on the root disk +sed -i "s#^\( *\)global_filter = \[.*#\1global_filter = [ \"a|${ROOTDISK}|\", \"r|.*|\" ]#" /etc/lvm/lvm.conf +%end + diff --git a/bsp-files/kickstarts/post_net_common.cfg b/bsp-files/kickstarts/post_net_common.cfg new file mode 100755 index 00000000..2db90a44 --- /dev/null +++ b/bsp-files/kickstarts/post_net_common.cfg @@ -0,0 +1,127 @@ +repo --name=base --baseurl=http://pxecontroller/feed/rel-xxxPLATFORM_RELEASExxx/ +repo --name=updates --baseurl=http://pxecontroller/updates/rel-xxxPLATFORM_RELEASExxx/ + +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# Obtain the boot interface from the PXE boot +BOOTIF=`cat /proc/cmdline |xargs -n1 echo |grep BOOTIF=` +if [ -d /sys/firmware/efi ] ; then + BOOTIF=${BOOTIF#BOOTIF=} +else + BOOTIF=${BOOTIF#BOOTIF=01-} + BOOTIF=${BOOTIF//-/:} +fi + +mgmt_dev=none +mgmt_vlan=0 +if [ -n "$BOOTIF" ] ; then + ndev=`ip link show |grep -B 1 $BOOTIF |head -1 |awk '{print $2}' |sed -e 's/://'` + if [ -n "$ndev" ] ; then + mgmt_dev=$ndev + # Retrieve the management VLAN from sysinv if it exists + mgmt_vlan=`curl -sf http://pxecontroller:6385/v1/isystems/mgmtvlan` + if [ $? -ne 0 ] + then + report_post_failure_with_msg "ERROR: Unable to communicate with System Inventory REST API. Aborting installation." + fi + else + report_post_failure_with_msg "ERROR: Unable to determine mgmt interface from BOOTIF=$BOOTIF." + fi +else + report_post_failure_with_msg "ERROR: BOOTIF is not set. Unable to determine mgmt interface." +fi + +if [ $mgmt_vlan -eq 0 ] ; then + + # Persist the boot device to the platform configuration. This will get + # overwritten later if the management_interface is on a bonded interface. + echo management_interface=$mgmt_dev >> /etc/platform/platform.conf + + # Build networking scripts + cat << EOF > /etc/sysconfig/network-scripts/ifcfg-lo +DEVICE=lo +IPADDR=127.0.0.1 +NETMASK=255.0.0.0 +NETWORK=127.0.0.0 +BROADCAST=127.255.255.255 +ONBOOT=yes +NAME=loopback +EOF + + cat << EOF > /etc/sysconfig/network-scripts/ifcfg-$mgmt_dev +DEVICE=$mgmt_dev +BOOTPROTO=dhcp +ONBOOT=yes +LINKDELAY=20 +EOF + +else + + # Check whether to use inet or inet6 + ipv6_addr=$(dig +short AAAA controller) + if [[ -n "$ipv6_addr" ]] + then + mgmt_address_family=inet6 + ipv6init=yes + dhcpv6c=yes + dhclientargs=-1 + else + mgmt_address_family=inet + ipv6init=no + dhcpv6c=no + dhclientargs= + fi + + # Persist the boot device to the platform configuration. This will get + # overwritten later if the management_interface is on a bonded interface. + echo management_interface=$mgmt_dev.$mgmt_vlan >> /etc/platform/platform.conf + + # Build networking scripts + cat << EOF > /etc/sysconfig/network-scripts/ifcfg-lo +DEVICE=lo +IPADDR=127.0.0.1 +NETMASK=255.0.0.0 +NETWORK=127.0.0.0 +BROADCAST=127.255.255.255 +ONBOOT=yes +NAME=loopback +EOF + + cat << EOF > /etc/sysconfig/network-scripts/ifcfg-$mgmt_dev +DEVICE=$mgmt_dev +BOOTPROTO=none +ONBOOT=yes +LINKDELAY=20 +EOF + + cat << EOF > /etc/sysconfig/network-scripts/ifcfg-$mgmt_dev.$mgmt_vlan +DEVICE=$mgmt_dev.$mgmt_vlan +BOOTPROTO=dhcp +DHCLIENTARGS=$dhclientargs +IPV6INIT=$ipv6init +DHCPV6C=$dhcpv6c +ONBOOT=yes +VLAN=yes +LINKDELAY=20 +EOF + + # Reject DHCPOFFER from DHCP server that doesn't send + # wrs-install-uuid option + echo "require wrs-install-uuid;" >>/etc/dhcp/dhclient.conf + echo "require dhcp6.wrs-install-uuid;" >>/etc/dhcp/dhclient.conf + + # Bring up the mgmt vlan so that a dhcp lease is acquired and an address is + # setup prior to the post-install reboot. This is so that the timing of the IP + # address allocation is similar to how normal/non-pxe installation works. + mgmt_iface=$mgmt_dev.$mgmt_vlan + dhclient_family=$([[ $mgmt_address_family == "inet" ]] && echo -4 || echo -6) + ip link add link $mgmt_dev name $mgmt_iface type vlan id $mgmt_vlan + ip link set up dev $mgmt_iface + dhclient $dhclient_family $mgmt_iface || true + +fi + +%end diff --git a/bsp-files/kickstarts/post_net_controller.cfg b/bsp-files/kickstarts/post_net_controller.cfg new file mode 100644 index 00000000..c3e4b8f6 --- /dev/null +++ b/bsp-files/kickstarts/post_net_controller.cfg @@ -0,0 +1,79 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# Retrieve the installation uuid from the controller we booted from +INSTALL_UUID=`curl -sf http://pxecontroller/feed/rel-xxxPLATFORM_RELEASExxx/install_uuid` +if [ $? -ne 0 ] +then + INSTALL_UUID=unknown +fi + +grep -q INSTALL_UUID /etc/platform/platform.conf +if [ $? -ne 0 ]; then + echo "INSTALL_UUID=$INSTALL_UUID" >> /etc/platform/platform.conf +fi + +cd /www/pages +# Sync software repository +feed_url=http://pxecontroller/feed/ +anaconda_logdir=/var/log/anaconda +mkdir -p $anaconda_logdir + +echo "Mirroring software repository (may take several minutes)..." >/dev/console +wget --mirror --no-parent --no-host-directories --reject 'index.html*' --reject '*.log' $feed_url/ -o $anaconda_logdir/wget-feed-mirror.log \ + || report_post_failure_with_logfile $anaconda_logdir/wget-feed-mirror.log + +# Sync patching repository +updates_url=http://pxecontroller/updates/ +wget --mirror --no-parent --no-host-directories --reject 'index.html*' --reject '*.log' $updates_url/ -o $anaconda_logdir/wget-updates-mirror.log \ + || report_post_failure_with_logfile $anaconda_logdir/wget-updates-mirror.log +echo "Done" >/dev/console + +shopt -s nullglob + +# Check whether a second release is installed +. /etc/build.info +CURRENT_REL_DIR=rel-${SW_VERSION} +OTHER_REL_DIR= +for REL_DIR in /www/pages/feed/*; do + if [[ ! $REL_DIR =~ "${SW_VERSION}" ]]; then + OTHER_REL_DIR=`basename $REL_DIR` + OTHER_REL_VERSION=${OTHER_REL_DIR:4} + break + fi +done + +# If second release is installed, find the latest version of the installer +# RPM and install the pxeboot files we require to boot hosts with that release. +if [ ! -z "$OTHER_REL_DIR" ]; then + PATCH_RPM=`find /www/pages/updates/${OTHER_REL_DIR}/Packages -name 'pxe-network-installer*' | sort -V | tail -1` + BASE_RPM=`find /www/pages/feed/${OTHER_REL_DIR}/Packages -name 'pxe-network-installer*' | sort -V | tail -1` + + if [ ! -z "$PATCH_RPM" ]; then + INSTALL_RPM=$PATCH_RPM + elif [ ! -z "$BASE_RPM" ]; then + INSTALL_RPM=$BASE_RPM + else + report_post_failure_with_msg "ERROR: Unable to find pxe-network-installer RPM for $OTHER_REL_DIR. Aborting installation." + fi + + echo "Installing pxeboot files for release $OTHER_REL_DIR from $INSTALL_RPM" >/dev/console + TMP_RPM=/tmp/pxe-network-installer + mkdir $TMP_RPM + pushd $TMP_RPM + /usr/bin/rpm2cpio $INSTALL_RPM | cpio -idm \ + || report_post_failure_with_msg "Failed to extract pxe-network-installer" + + cp -r $TMP_RPM/usr / \ + || report_post_failure_with_msg "Failed to copy pxe-network-installer /usr" + cp -r $TMP_RPM/pxeboot/$OTHER_REL_DIR /pxeboot/ \ + || report_post_failure_with_msg "Failed to copy pxe-network-installer /pxeboot/$OTHER_REL_DIR" + cp $TMP_RPM/pxeboot/pxelinux.cfg.files/*-$OTHER_REL_VERSION /pxeboot/pxelinux.cfg.files/ \ + || report_post_failure_with_msg "Failed to copy pxe-network-installer pxelinux.cfg files" + + rm -rf $TMP_RPM +fi + +%end diff --git a/bsp-files/kickstarts/post_platform_conf_aio.cfg b/bsp-files/kickstarts/post_platform_conf_aio.cfg new file mode 100644 index 00000000..b0660cc4 --- /dev/null +++ b/bsp-files/kickstarts/post_platform_conf_aio.cfg @@ -0,0 +1,22 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# Set the security profile mode +secprofile="standard" +profile_mode=`cat /proc/cmdline |xargs -n1 echo |grep security_profile= | grep extended` +if [ -n "$profile_mode" ]; then + secprofile="extended" +fi + +mkdir -p -m 0775 /etc/platform +cat < /etc/platform/platform.conf +nodetype=controller +subfunction=controller,compute +system_type=All-in-one +security_profile=$secprofile +EOF + +%end + diff --git a/bsp-files/kickstarts/post_platform_conf_aio_lowlatency.cfg b/bsp-files/kickstarts/post_platform_conf_aio_lowlatency.cfg new file mode 100644 index 00000000..048f07a6 --- /dev/null +++ b/bsp-files/kickstarts/post_platform_conf_aio_lowlatency.cfg @@ -0,0 +1,22 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# Set the security profile mode +secprofile="standard" +profile_mode=`cat /proc/cmdline |xargs -n1 echo |grep security_profile= | grep extended` +if [ -n "$profile_mode" ]; then + secprofile="extended" +fi + +mkdir -p -m 0775 /etc/platform +cat < /etc/platform/platform.conf +nodetype=controller +subfunction=controller,compute,lowlatency +system_type=All-in-one +security_profile=$secprofile +EOF + +%end + diff --git a/bsp-files/kickstarts/post_platform_conf_compute.cfg b/bsp-files/kickstarts/post_platform_conf_compute.cfg new file mode 100644 index 00000000..35c82522 --- /dev/null +++ b/bsp-files/kickstarts/post_platform_conf_compute.cfg @@ -0,0 +1,34 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# Retrieve the installation uuid from the controller we booted from +INSTALL_UUID=`curl -sf http://pxecontroller/feed/rel-xxxPLATFORM_RELEASExxx/install_uuid` +if [ $? -ne 0 ] +then + INSTALL_UUID=unknown +fi + +# Set the security profile mode +secprofile="standard" +profile_mode=`cat /proc/cmdline |xargs -n1 echo |grep security_profile= | grep extended` +if [ -n "$profile_mode" ]; then + secprofile="extended" +fi + +mkdir -p -m 0775 /etc/platform +cat < /etc/platform/platform.conf +nodetype=compute +subfunction=compute +system_type=Standard +security_profile=$secprofile +INSTALL_UUID=$INSTALL_UUID +EOF + +# mount the platform directory from the controller +cat >> /etc/fstab < /etc/platform/platform.conf +nodetype=compute +subfunction=compute,lowlatency +system_type=Standard +security_profile=$secprofile +INSTALL_UUID=$INSTALL_UUID +EOF + +# mount the platform directory from the controller +cat >> /etc/fstab < /etc/platform/platform.conf +nodetype=controller +subfunction=controller +system_type=Standard +security_profile=$secprofile +EOF + +%end + diff --git a/bsp-files/kickstarts/post_platform_conf_storage.cfg b/bsp-files/kickstarts/post_platform_conf_storage.cfg new file mode 100644 index 00000000..5b22f183 --- /dev/null +++ b/bsp-files/kickstarts/post_platform_conf_storage.cfg @@ -0,0 +1,29 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# Retrieve the installation uuid from the controller we booted from +INSTALL_UUID=`curl -sf http://pxecontroller/feed/rel-xxxPLATFORM_RELEASExxx/install_uuid` +if [ $? -ne 0 ] +then + INSTALL_UUID=unknown +fi + +# Set the security profile mode +secprofile="standard" +profile_mode=`cat /proc/cmdline |xargs -n1 echo |grep security_profile= | grep extended` +if [ -n "$profile_mode" ]; then + secprofile="extended" +fi + +mkdir -p -m 0775 /etc/platform +cat < /etc/platform/platform.conf +nodetype=storage +subfunction=storage +system_type=Standard +security_profile=$secprofile +INSTALL_UUID=$INSTALL_UUID +EOF + +%end diff --git a/bsp-files/kickstarts/post_pxeboot_controller.cfg b/bsp-files/kickstarts/post_pxeboot_controller.cfg new file mode 100644 index 00000000..918115af --- /dev/null +++ b/bsp-files/kickstarts/post_pxeboot_controller.cfg @@ -0,0 +1,114 @@ +%pre --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +echo "repo --name=base --baseurl=xxxHTTP_URLxxx/" > /tmp/repo-include +echo "repo --name=updates --baseurl=xxxHTTP_URLxxx/patches/" > /tmp/repo-include +%end + +# Repository arguments from %pre +%include /tmp/repo-include + + +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# Obtain the boot interface from the PXE boot +BOOTIF=`cat /proc/cmdline |xargs -n1 echo |grep BOOTIF=` +if [ -d /sys/firmware/efi ] ; then + BOOTIF=${BOOTIF#BOOTIF=} +else + BOOTIF=${BOOTIF#BOOTIF=01-} + BOOTIF=${BOOTIF//-/:} +fi + +mgmt_dev=none +if [ -n "$BOOTIF" ] ; then + ndev=`ip link show |grep -B 1 $BOOTIF |head -1 |awk '{print $2}' |sed -e 's/://'` + if [ -n "$ndev" ] ; then + mgmt_dev=$ndev + else + report_post_failure_with_msg "ERROR: Unable to determine mgmt interface from BOOTIF=$BOOTIF." + fi +else + report_post_failure_with_msg "ERROR: BOOTIF is not set. Unable to determine mgmt interface." +fi + +# Persist the boot device to the platform configuration. This will get +# overwritten when config_controller is run. +echo management_interface=$mgmt_dev >> /etc/platform/platform.conf + +# Build networking scripts +cat << EOF > /etc/sysconfig/network-scripts/ifcfg-lo +DEVICE=lo +IPADDR=127.0.0.1 +NETMASK=255.0.0.0 +NETWORK=127.0.0.0 +BROADCAST=127.255.255.255 +ONBOOT=yes +NAME=loopback +EOF + +cat << EOF > /etc/sysconfig/network-scripts/ifcfg-$mgmt_dev +DEVICE=$mgmt_dev +BOOTPROTO=dhcp +ONBOOT=yes +EOF + +%end + +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +anaconda_logdir=/var/log/anaconda +mkdir -p $anaconda_logdir + +cd /www/pages +mkdir -p feed/rel-xxxPLATFORM_RELEASExxx/Packages +mkdir -p feed/rel-xxxPLATFORM_RELEASExxx/repodata +cd feed/rel-xxxPLATFORM_RELEASExxx +feed_url=xxxHTTP_URLxxx +declare -i cut_dirs=NUM_DIRS +echo "Mirroring software repository (may take several minutes)..." >/dev/console +wget --mirror --no-parent --no-host-directories --reject 'index.html*' --cut-dirs=$cut_dirs $feed_url/Packages/ -o $anaconda_logdir/rpmget.log \ + || report_post_failure_with_logfile $anaconda_logdir/rpmget.log +wget --mirror --no-parent --no-host-directories --reject 'index.html*' --cut-dirs=$cut_dirs $feed_url/repodata/ -o $anaconda_logdir/rpmget_repo.log \ + || report_post_failure_with_logfile $anaconda_logdir/rpmget_repo.log +wget $feed_url/isolinux.cfg --append $anaconda_logdir/wget_kickstarts.log \ + || report_post_failure_with_logfile $anaconda_logdir/wget_kickstarts.log +echo "Done" >/dev/console + +patches_url=xxxHTTP_URLxxx/patches +wget -q --spider ${patches_url}/ +if [ $? -eq 0 ]; then + echo "Downloading patches..." >/dev/console + mkdir -p updates/rel-xxxPLATFORM_RELEASExxx/Packages + mkdir -p updates/rel-xxxPLATFORM_RELEASExxx/repodata + declare -i patches_cut_dirs=$((cut_dirs+1)) + + wget --mirror --no-parent --no-host-directories --reject 'index.html*' --cut-dirs=$patches_cut_dirs $patches_url/Packages/ -o $anaconda_logdir/patches_rpmget.log \ + || report_post_failure_with_logfile $anaconda_logdir/patches_rpmget.log + wget --mirror --no-parent --no-host-directories --reject 'index.html*' --cut-dirs=$patches_cut_dirs $patches_url/repodata/ -o $anaconda_logdir/patches_rpmget_repo.log \ + || report_post_failure_with_logfile $anaconda_logdir/patches_rpmget_repo.log + + mkdir -p /opt/patching/metadata + mkdir -p /opt/patching/packages/xxxPLATFORM_RELEASExxx + cd /opt/patching + wget --mirror --no-parent --no-host-directories --reject 'index.html*' --cut-dirs=$patches_cut_dirs $patches_url/metadata/ -o $anaconda_logdir/patches_rpmget_metadata.log \ + || report_post_failure_with_logfile $anaconda_logdir/patches_rpmget_metadata.log + find /www/pages/updates/rel-xxxPLATFORM_RELEASExxx/Packages -name '*.rpm' \ + | xargs --no-run-if-empty -I files cp --preserve=all files /opt/patching/packages/xxxPLATFORM_RELEASExxx/ + + echo "Done" >/dev/console +fi + +# Create a uuid specific to this installation +INSTALL_UUID=`uuidgen` +echo $INSTALL_UUID > /www/pages/feed/rel-xxxPLATFORM_RELEASExxx/install_uuid +echo "INSTALL_UUID=$INSTALL_UUID" >> /etc/platform/platform.conf +%end diff --git a/bsp-files/kickstarts/post_system_aio.cfg b/bsp-files/kickstarts/post_system_aio.cfg new file mode 100644 index 00000000..ac61e5a0 --- /dev/null +++ b/bsp-files/kickstarts/post_system_aio.cfg @@ -0,0 +1,35 @@ +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# Source the generated platform.conf +. /etc/platform/platform.conf + +## Reserve more memory for base processes since the controller has higher +## memory requirements but cap it to better handle systems with large +## amounts of memory +TOTALMEM=$(grep MemTotal /proc/meminfo | awk '{print int($2/1024)}') + +if [ -e /sys/devices/system/node/node0 ]; then + RESERVEDMEM=$(grep MemTotal /sys/devices/system/node/node0/meminfo | awk '{printf "%d\n", $4/1024}') +else + RESERVEDMEM=$(grep MemTotal /proc/meminfo | awk '{print int($2/1024/4)}') +fi + +if [ ${RESERVEDMEM} -lt 6144 ]; then + RESERVEDMEM=6144 +elif [ ${RESERVEDMEM} -gt 14500 ]; then + RESERVEDMEM=14500 +elif [ ${RESERVEDMEM} -gt 8192 ]; then + RESERVEDMEM=8192 +fi + +sed -i -e "s#\(COMPUTE_BASE_RESERVED\)=.*#\1=(\"node0:${RESERVEDMEM}MB:1\" \"node1:2000MB:0\" \"node2:2000MB:0\" \"node3:2000MB:0\")#g" /etc/nova/compute_reserved.conf + +# Update COMPUTE_CPU_LIST +N_CPUS=$(cat /proc/cpuinfo 2>/dev/null | awk '/^[pP]rocessor/ { n +=1 } END { print (n>0) ? n : 1}') +sed -i "s/^COMPUTE_CPU_LIST=.*/COMPUTE_CPU_LIST=\"0-$((N_CPUS-1))\"/" /etc/nova/compute_reserved.conf + +%end + diff --git a/bsp-files/kickstarts/post_usb_controller.cfg b/bsp-files/kickstarts/post_usb_controller.cfg new file mode 100644 index 00000000..f8d97c5c --- /dev/null +++ b/bsp-files/kickstarts/post_usb_controller.cfg @@ -0,0 +1,80 @@ +%pre --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +if [ -d /mnt/install/source ]; then + srcdir=/mnt/install/source +else + srcdir=/run/install/repo +fi + +touch /tmp/repo-include + +if [ -d ${srcdir}/patches ]; then + echo "repo --name=updates --baseurl=file://${srcdir}/patches/" > /tmp/repo-include +fi + +%end + +# Repository arguments from %pre +%include /tmp/repo-include + +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +mgmt_dev=none + +# Persist the boot device to the platform configuration. This will get +# overwritten when config_controller is run. +echo management_interface=$mgmt_dev >> /etc/platform/platform.conf + +# Build networking scripts +cat << EOF > /etc/sysconfig/network-scripts/ifcfg-lo +DEVICE=lo +IPADDR=127.0.0.1 +NETMASK=255.0.0.0 +NETWORK=127.0.0.0 +BROADCAST=127.255.255.255 +ONBOOT=yes +NAME=loopback +EOF + +%end + + +# Note, this section is different and replaced with a wget +# if doing the initial install off the network +%post --nochroot +if [ -d /mnt/install/source ]; then + srcdir=/mnt/install/source +else + srcdir=/run/install/repo +fi + +if [ -d $srcdir/Packages ] ; then + mkdir -p /mnt/sysimage/www/pages/feed/rel-xxxPLATFORM_RELEASExxx + cp -r $srcdir/Packages /mnt/sysimage/www/pages/feed/rel-xxxPLATFORM_RELEASExxx/Packages + cp -r $srcdir/repodata /mnt/sysimage/www/pages/feed/rel-xxxPLATFORM_RELEASExxx/repodata + cp $srcdir/*.cfg /mnt/sysimage/www/pages/feed/rel-xxxPLATFORM_RELEASExxx +fi + +if [ -d $srcdir/patches ]; then + mkdir -p /mnt/sysimage/www/pages/updates/rel-xxxPLATFORM_RELEASExxx + cp -r $srcdir/patches/Packages /mnt/sysimage/www/pages/updates/rel-xxxPLATFORM_RELEASExxx/Packages + cp -r $srcdir/patches/repodata /mnt/sysimage/www/pages/updates/rel-xxxPLATFORM_RELEASExxx/repodata + mkdir -p /mnt/sysimage/opt/patching + cp -r $srcdir/patches/metadata /mnt/sysimage/opt/patching/metadata + mkdir -p /mnt/sysimage/opt/patching/packages/xxxPLATFORM_RELEASExxx + + find /mnt/sysimage/www/pages/updates/rel-xxxPLATFORM_RELEASExxx/Packages -name '*.rpm' \ + | xargs --no-run-if-empty -I files cp --preserve=all files /mnt/sysimage/opt/patching/packages/xxxPLATFORM_RELEASExxx/ +fi + +# Create a uuid specific to this installation +INSTALL_UUID=`uuidgen` +echo $INSTALL_UUID > /mnt/sysimage/www/pages/feed/rel-xxxPLATFORM_RELEASExxx/install_uuid +echo "INSTALL_UUID=$INSTALL_UUID" >> /mnt/sysimage/etc/platform/platform.conf +%end diff --git a/bsp-files/kickstarts/post_yow_controller.cfg b/bsp-files/kickstarts/post_yow_controller.cfg new file mode 100644 index 00000000..b8bd8e2e --- /dev/null +++ b/bsp-files/kickstarts/post_yow_controller.cfg @@ -0,0 +1,115 @@ +%pre --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +echo "repo --name=base --baseurl=http://xxxBOOT_SERVERxxx/umalab/`hostname`_feed/" > /tmp/repo-include +echo "repo --name=updates --baseurl=http://xxxBOOT_SERVERxxx/umalab/`hostname`_feed/patches" > /tmp/repo-include +%end + +# Repository arguments from %pre +%include /tmp/repo-include + + +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# Obtain the boot interface from the PXE boot +BOOTIF=`cat /proc/cmdline |xargs -n1 echo |grep BOOTIF=` +if [ -d /sys/firmware/efi ] ; then + BOOTIF=${BOOTIF#BOOTIF=} +else + BOOTIF=${BOOTIF#BOOTIF=01-} + BOOTIF=${BOOTIF//-/:} +fi + +mgmt_dev=none +if [ -n "$BOOTIF" ] ; then + ndev=`ip link show |grep -B 1 $BOOTIF |head -1 |awk '{print $2}' |sed -e 's/://'` + if [ -n "$ndev" ] ; then + mgmt_dev=$ndev + else + report_post_failure_with_msg "ERROR: Unable to determine mgmt interface from BOOTIF=$BOOTIF." + fi +else + report_post_failure_with_msg "ERROR: BOOTIF is not set. Unable to determine mgmt interface." +fi + +# Persist the boot device to the platform configuration. This will get +# overwritten when config_controller is run. +echo management_interface=$mgmt_dev >> /etc/platform/platform.conf + +# Build networking scripts +cat << EOF > /etc/sysconfig/network-scripts/ifcfg-lo +DEVICE=lo +IPADDR=127.0.0.1 +NETMASK=255.0.0.0 +NETWORK=127.0.0.0 +BROADCAST=127.255.255.255 +ONBOOT=yes +NAME=loopback +EOF + +cat << EOF > /etc/sysconfig/network-scripts/ifcfg-$mgmt_dev +DEVICE=$mgmt_dev +BOOTPROTO=dhcp +ONBOOT=yes +LINKDELAY=20 +EOF + +%end + +%post --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +anaconda_logdir=/var/log/anaconda +mkdir -p $anaconda_logdir + +cd /www/pages +mkdir -p feed/rel-xxxPLATFORM_RELEASExxx/Packages +mkdir -p feed/rel-xxxPLATFORM_RELEASExxx/repodata +cd feed/rel-xxxPLATFORM_RELEASExxx +feed_url=http://xxxBOOT_SERVERxxx/umalab/`hostname`_feed +declare -i cut_dirs=2 +echo "Mirroring software repository (may take several minutes)..." >/dev/console +wget --mirror --no-parent --no-host-directories --reject 'index.html*' --cut-dirs=$cut_dirs $feed_url/Packages/ -o $anaconda_logdir/rpmget.log \ + || report_post_failure_with_logfile $anaconda_logdir/rpmget.log +wget --mirror --no-parent --no-host-directories --reject 'index.html*' --cut-dirs=$cut_dirs $feed_url/repodata/ -o $anaconda_logdir/rpmget_repo.log \ + || report_post_failure_with_logfile $anaconda_logdir/rpmget_repo.log +wget $feed_url/isolinux.cfg --append $anaconda_logdir/wget_kickstarts.log \ + || report_post_failure_with_logfile $anaconda_logdir/wget_kickstarts.log + +# Check for patches +patches_url=http://xxxBOOT_SERVERxxx/umalab/`hostname`_feed/patches +wget -q --spider ${patches_url}/ +if [ $? -eq 0 ]; then + cd /www/pages + mkdir -p updates/rel-xxxPLATFORM_RELEASExxx/Packages + mkdir -p updates/rel-xxxPLATFORM_RELEASExxx/repodata + cd updates/rel-xxxPLATFORM_RELEASExxx + declare -i patches_cut_dirs=$((cut_dirs+1)) + wget --mirror --no-parent --no-host-directories --reject 'index.html*' --cut-dirs=$patches_cut_dirs $patches_url/Packages/ -o $anaconda_logdir/patches_rpmget.log \ + || report_post_failure_with_logfile $anaconda_logdir/patches_rpmget.log + wget --mirror --no-parent --no-host-directories --reject 'index.html*' --cut-dirs=$patches_cut_dirs $patches_url/repodata/ -o $anaconda_logdir/patches_rpmget_repo.log \ + || report_post_failure_with_logfile $anaconda_logdir/patches_rpmget_repo.log + + mkdir -p /opt/patching/metadata + mkdir -p /opt/patching/packages/xxxPLATFORM_RELEASExxx + cd /opt/patching + wget --mirror --no-parent --no-host-directories --reject 'index.html*' --cut-dirs=$patches_cut_dirs $patches_url/metadata/ -o $anaconda_logdir/patches_rpmget_metadata.log \ + || report_post_failure_with_logfile $anaconda_logdir/patches_rpmget_metadata.log + find /www/pages/updates/rel-xxxPLATFORM_RELEASExxx/Packages -name '*.rpm' \ + | xargs --no-run-if-empty -I files cp --preserve=all files /opt/patching/packages/xxxPLATFORM_RELEASExxx/ + + echo "Done" >/dev/console +fi + +# Create a uuid specific to this installation +INSTALL_UUID=`uuidgen` +echo $INSTALL_UUID > /www/pages/feed/rel-xxxPLATFORM_RELEASExxx/install_uuid +echo "INSTALL_UUID=$INSTALL_UUID" >> /etc/platform/platform.conf +%end diff --git a/bsp-files/kickstarts/pre_common_head.cfg b/bsp-files/kickstarts/pre_common_head.cfg new file mode 100644 index 00000000..88edab48 --- /dev/null +++ b/bsp-files/kickstarts/pre_common_head.cfg @@ -0,0 +1,72 @@ +%pre --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# First, parse /proc/cmdline to find the boot args +set -- `cat /proc/cmdline` +for I in $*; do case "$I" in *=*) eval $I 2>/dev/null;; esac; done + +append= +if [ -n "$console" ] ; then + append="console=$console" +fi + +if [ -n "$security_profile" ]; then + append="$append security_profile=$security_profile" +fi + +#### SECURITY PROFILE HANDLING (Pre Installation) #### +if [ -n "$security_profile" ] && [ "$security_profile" == "extended" ]; then + # IMA specific boot options: + # Enable Kernel auditing + append="$append audit=1" +else + # we need to blacklist the IMA and Integrity Modules + # on standard security profile + append="$append module_blacklist=integrity,ima" + + # Disable Kernel auditing in Standard Security Profile mode + append="$append audit=0" +fi + +if [ -n "$tboot" ]; then + append="$append tboot=$tboot" +else + append="$append tboot=false" +fi + +boot_device_arg= +if [ -n "$boot_device" ] ; then + boot_device_arg="--boot-drive=$(get_by_path $boot_device)" +fi + +echo "bootloader --location=mbr $boot_device_arg --timeout=5 --append=\"$append\"" > /tmp/bootloader-include + +echo "timezone --nontp --utc UTC" >/tmp/timezone-include +%end + +#version=DEVEL +install +lang en_US.UTF-8 +keyboard us +%include /tmp/timezone-include +# set to 'x' so we can use shadow password +rootpw --iscrypted x +selinux --disabled +authconfig --enableshadow --passalgo=sha512 +firewall --service=ssh + +# The following is the partition information you requested +# Note that any partitions you deleted are not expressed +# here so unless you clear all partitions first, this is +# not guaranteed to work +zerombr + +# Disk layout from %pre +%include /tmp/part-include +# Bootloader parms from %pre +%include /tmp/bootloader-include + +reboot --eject + diff --git a/bsp-files/kickstarts/pre_disk_aio.cfg b/bsp-files/kickstarts/pre_disk_aio.cfg new file mode 100755 index 00000000..54a819b1 --- /dev/null +++ b/bsp-files/kickstarts/pre_disk_aio.cfg @@ -0,0 +1,102 @@ + +## NOTE: updates to partition sizes need to be also reflected in +## _controller_filesystem_limits() in sysinv/api/controllers/v1/istorconfig.py +## +## NOTE: When adding partitions, we currently have a max of 4 primary partitions. +## If more than 4 partitions are required, we can use a max of 3 --asprimary, +## to allow 1 primary logical partition with extended partitions +## +## NOTE: Max default PV size must align with the default controllerfs sizes +## +## BACKUP_OVERHEAD = 20 +## +## Physical install +## - DB size is doubled to allow for upgrades +## +## DEFAULT_IMAGE_STOR_SIZE = 10 +## DEFAULT_DATABASE_STOR_SIZE = 20 +## DEFAULT_IMG_CONVERSION_STOR_SIZE = 20 +## BACKUP = DEFAULT_DATABASE_STOR_SIZE + DEFAULT_IMAGE_STOR_SIZE +## + BACKUP_OVERHEAD = 50 +## LOG_VOL_SIZE = 8192 +## SCRATCH_VOL_SIZE = 8192 +## RABBIT = 2048 +## PLATFORM = 2048 +## ANCHOR = 1024 +## EXTENSION = 1024 +## RESERVED_PE = 16 (based on pesize=32768) +## +## CGCS_PV_SIZE = 10240 + 2*20480 + 20480 + 51200 + 8196 + +## 8196 + 2048 + 2048 + 1024 + 1024 + 16 = 145424 +## +## small install - uses is_virtual check +## - DB size is doubled to allow for upgrades +## +## DEFAULT_VIRTUAL_IMAGE_STOR_SIZE = 8 +## DEFAULT_VIRTUAL_DATABASE_STOR_SIZE = 5 +## DEFAULT_VIRTUAL_IMG_CONVERSION_STOR_SIZE = 8 +## DEFAULT_VIRTUAL_BACKUP_STOR_SIZE = 5 +## +## LOG_VOL_SIZE = 4096 +## SCRATCH_VOL_SIZE = 4096 +## RABBIT = 2048 +## PLATFORM = 2048 +## ANCHOR = 1024 +## EXTENSION = 1024 +## RESERVED_PE = 16 (based on pesize=32768) +## +## CGCS_PV_SIZE = 8192 + 2*5120 + 8192 + 5120 + 4096 + +## 4096 + 2048 + 2048 + 1024 + 1024 +16 = 46096 +## +## NOTE: To maintain upgrade compatability within the volume group, keep the +## undersized LOG_VOL_SIZE and SCRATCH_VOL_SIZE, but size the minimally size +## physical volume correctly. +## +## R4 AIO installations: +## - R4 (case #1): /boot (0.5G), / (20G), +## cgts-vg PV (239G), /local_pv (239G) +## - R4 (case #2): /boot (0.5G), / (20G), +## cgts-vg PV (239G), cgts-vg (239G) +## +## Upgrade migration will start with R5 install and create a partition to align +## above so filesystems within the volume group will be able to maintain their +## sizes in R5 +## - R5 install : /boot (0.5G), / (20G), +## cgts-vg PV (142G), un-partitioned (336G) +## - R5 (case #1): /boot (0.5G), / (20G), +## cgts-vg PV (142G), cgts-vg PV (97G), unpartitioned (239G) +## - R5 (case #2): /boot (0.5G), / (20G), +## cgts-vg PV (142G), cgts-vg PV (336G) +## +sz=$(blockdev --getsize64 $(get_disk $rootfs_device)) +if [ $sz -le $((161*$gb)) ] ; then + ## This covers vbox or any 55g < disk < 162gb: < 55g won't install + LOG_VOL_SIZE=4000 + SCRATCH_VOL_SIZE=4000 + ROOTFS_SIZE=10000 + CGCS_PV_SIZE=46096 +else + LOG_VOL_SIZE=8000 + SCRATCH_VOL_SIZE=8000 + ROOTFS_SIZE=20000 + CGCS_PV_SIZE=145424 +fi + +ROOTFS_OPTIONS="defaults" +profile_mode=`cat /proc/cmdline |xargs -n1 echo |grep security_profile= | grep extended` +if [ -n "$profile_mode" ]; then + # Enable iversion labelling for rootfs when IMA is enabled + ROOTFS_OPTIONS="${ROOTFS_OPTIONS},iversion" +fi + +cat<>/tmp/part-include +part /boot --fstype=ext4 --asprimary --size=500 --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" +part pv.253004 --grow --size=500 --maxsize=$CGCS_PV_SIZE --ondrive=$(get_disk $rootfs_device) +volgroup cgts-vg --pesize=32768 pv.253004 +logvol /var/log --fstype=ext4 --vgname=cgts-vg --size=$LOG_VOL_SIZE --name=log-lv +logvol /scratch --fstype=ext4 --vgname=cgts-vg --size=$SCRATCH_VOL_SIZE --name=scratch-lv +part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" +EOF + +%end + diff --git a/bsp-files/kickstarts/pre_disk_compute.cfg b/bsp-files/kickstarts/pre_disk_compute.cfg new file mode 100755 index 00000000..032e2b19 --- /dev/null +++ b/bsp-files/kickstarts/pre_disk_compute.cfg @@ -0,0 +1,31 @@ + +sz=$(blockdev --getsize64 $(get_disk $rootfs_device)) +if [ $sz -le $((80*$gb)) ] ; then + ## Less than 80GB use a 10GB root partition + LOG_VOL_SIZE=4000 + SCRATCH_VOL_SIZE=4000 + ROOTFS_SIZE=10000 +else + ## Use a 20GB root partition + LOG_VOL_SIZE=4000 + SCRATCH_VOL_SIZE=4000 + ROOTFS_SIZE=20000 +fi + +ROOTFS_OPTIONS="defaults" +profile_mode=`cat /proc/cmdline |xargs -n1 echo |grep security_profile= | grep extended` +if [ -n "$profile_mode" ]; then + # Enable iversion labelling for rootfs when IMA is enabled + ROOTFS_OPTIONS="${ROOTFS_OPTIONS},iversion" +fi + +cat<>/tmp/part-include +part /boot --fstype=ext4 --asprimary --size=500 --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" +part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" +part /var/log --fstype=ext4 --size=$LOG_VOL_SIZE --ondrive=$(get_disk $rootfs_device) +part /scratch --fstype=ext4 --size=$SCRATCH_VOL_SIZE --ondrive=$(get_disk $rootfs_device) + +EOF + +%end + diff --git a/bsp-files/kickstarts/pre_disk_controller.cfg b/bsp-files/kickstarts/pre_disk_controller.cfg new file mode 100755 index 00000000..e4217e75 --- /dev/null +++ b/bsp-files/kickstarts/pre_disk_controller.cfg @@ -0,0 +1,34 @@ + +## NOTE: updates to partition sizes need to be also reflected in +## _controller_filesystem_limits() in sysinv/api/controllers/v1/istorconfig.py + +sz=$(blockdev --getsize64 $(get_disk $rootfs_device)) +if [ $sz -le $((60*$gb)) ] ; then + LOG_VOL_SIZE=4000 + SCRATCH_VOL_SIZE=4000 + ROOTFS_SIZE=10000 +else + LOG_VOL_SIZE=8000 + SCRATCH_VOL_SIZE=8000 + ROOTFS_SIZE=20000 +fi + +ROOTFS_OPTIONS="defaults" +profile_mode=`cat /proc/cmdline |xargs -n1 echo |grep security_profile= | grep extended` +if [ -n "$profile_mode" ]; then + # Enable iversion labelling for rootfs when IMA is enabled + ROOTFS_OPTIONS="${ROOTFS_OPTIONS},iversion" +fi + +cat<>/tmp/part-include +part /boot --fstype=ext4 --asprimary --size=500 --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" +part pv.253004 --grow --asprimary --size=500 --ondrive=$(get_disk $rootfs_device) +volgroup cgts-vg --pesize=32768 pv.253004 +logvol /var/log --fstype=ext4 --vgname=cgts-vg --size=$LOG_VOL_SIZE --name=log-lv +logvol /scratch --fstype=ext4 --vgname=cgts-vg --size=$SCRATCH_VOL_SIZE --name=scratch-lv +part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" + +EOF + +%end + diff --git a/bsp-files/kickstarts/pre_disk_setup_common.cfg b/bsp-files/kickstarts/pre_disk_setup_common.cfg new file mode 100644 index 00000000..173b9b1a --- /dev/null +++ b/bsp-files/kickstarts/pre_disk_setup_common.cfg @@ -0,0 +1,154 @@ +%pre --erroronfail + +# Source common functions +. /tmp/ks-functions.sh + +# This is a really fancy way of finding the first usable disk for the +# install and not stomping on the USB device if it comes up first + +# First, parse /proc/cmdline to find the boot args +set -- `cat /proc/cmdline` +for I in $*; do case "$I" in *=*) eval $I 2>/dev/null;; esac; done + +# Find either the ISO or USB device first chopping off partition +ISO_DEV=`readlink /dev/disk/by-label/oe_iso_boot` +sdev=`echo $ISO_DEV | sed -e 's/.$//'` +if [ -e /dev/disk/by-label/$sdev ] ; then + ISO_DEV=$sdev +fi +USB_DEV=`readlink /dev/disk/by-label/wr_usb_boot` +sdev=`echo $USB_DEV | sed -e 's/.$//'` +if [ -e /dev/disk/by-label/$sdev ] ; then + USB_DEV=$sdev +fi + +# Temporary, until lab pxelinux.cfg files are updated to specify install devices +if [ -z "$rootfs_device" -o -z "$boot_device" ] +then + INST_HDD="" + # Prefer a vd* device if this is kvm/qemu + for e in vda vdb sda sdb nvme0n1; do + if [ -e /dev/$e -a "$ISO_DEV" != "../../$e" -a "$USB_DEV" != "../../$e" ] ; then + INST_HDD=$e + break + fi + done + + # Set variables to $INST_HDD if not set + rootfs_device=${rootfs_device:-$INST_HDD} + boot_device=${boot_device:-$INST_HDD} +fi + +# Convert to by-path +orig_rootfs_device=$rootfs_device +rootfs_device=$(get_by_path $rootfs_device) + +orig_boot_device=$boot_device +boot_device=$(get_by_path $boot_device) + +if [ ! -e "$rootfs_device" -o ! -e "$boot_device" ] ; then + # Touch this file to prevent Anaconda from dying an ungraceful death + touch /tmp/part-include + + report_pre_failure_with_msg "ERROR: Specified installation ($orig_rootfs_device) or boot ($orig_boot_device) device is invalid." +fi + +# Ensure specified device is not a USB drive +udevadm info --query=property --name=$rootfs_device |grep -q '^ID_BUS=usb' || \ + udevadm info --query=property --name=$boot_device |grep -q '^ID_BUS=usb' +if [ $? -eq 0 ]; then + # Touch this file to prevent Anaconda from dying an ungraceful death + touch /tmp/part-include + + report_pre_failure_with_msg "ERROR: Specified installation ($orig_rootfs_device) or boot ($orig_boot_device) device is a USB drive." +fi + +# Deactivate existing volume groups to avoid Anaconda issues with pre-existing groups +vgs --noheadings -o vg_name | xargs --no-run-if-empty -n 1 vgchange -an + +ONLYUSE_HDD="" +if [ "$(curl -sf http://pxecontroller:6385/v1/upgrade/$(hostname)/in_upgrade 2>/dev/null)" = "true" ]; then + # In an upgrade, only wipe the disk with the rootfs and boot partition + echo "In upgrade, wiping only $rootfs_device" + WIPE_HDD="$(get_disk $rootfs_device)" + ONLYUSE_HDD="$(basename $(get_disk $rootfs_device))" + if [ "$(get_disk $rootfs_device)" != "$(get_disk $boot_device)" ]; then + WIPE_HDD="$WIPE_HDD,$(get_disk $boot_device)" + ONLYUSE_HDD="$ONLYUSE_HDD,$(basename $(get_disk $boot_device))" + fi +else + # Make a list of all the hard drives that are to be wiped + WIPE_HDD="" + for f in /dev/disk/by-path/* + do + dev=$(readlink -f $f) + lsblk --nodeps --pairs $dev | grep -q 'TYPE="disk"' + if [ $? -ne 0 ] + then + continue + fi + + # Avoid wiping USB drives + udevadm info --query=property --name=$dev |grep -q '^ID_BUS=usb' && continue + + devname=$(basename $dev) + if [ -e $dev -a "$ISO_DEV" != "../../$devname" -a "$USB_DEV" != "../../$devname" ] ; then + if [ -n "$WIPE_HDD" ] ; then + WIPE_HDD=$WIPE_HDD,$dev + else + WIPE_HDD=$dev + fi + fi + done + echo "Not in upgrade, wiping all disks: $WIPE_HDD" +fi + +for dev in ${WIPE_HDD//,/ } +do + # Clearing previous GPT tables or LVM data + # Delete the first few bytes at the start and end of the partition. This is required with + # GPT partitions, they save partition info at the start and the end of the block. + # Do this for each partition on the disk, as well. + partitions=$(lsblk -rip $dev -o TYPE,NAME |awk '$1 == "part" {print $2}') + for p in $partitions $dev + do + echo "Pre-wiping $p from kickstart" + dd if=/dev/zero of=$p bs=512 count=34 + dd if=/dev/zero of=$p bs=512 count=34 seek=$((`blockdev --getsz $p` - 34)) + done +done + +# Check for remaining cgts-vg PVs, which could potentially happen +# in an upgrade where we're not wiping all disks. +# If we ever create other volume groups from kickstart in the future, +# include them in this search as well. +partitions=$(pvs --select 'vg_name=cgts-vg' -o pv_name --noheading | grep -v '\[unknown\]') +for p in $partitions +do + echo "Pre-wiping $p from kickstart (cgts-vg present)" + dd if=/dev/zero of=$p bs=512 count=34 + dd if=/dev/zero of=$p bs=512 count=34 seek=$((`blockdev --getsz $p` - 34)) +done + +let -i gb=1024*1024*1024 + +cat</tmp/part-include +clearpart --all --drives=$WIPE_HDD --initlabel +EOF + +if [ -n "$ONLYUSE_HDD" ]; then + cat<>/tmp/part-include +ignoredisk --only-use=$ONLYUSE_HDD +EOF +fi + +if [ -d /sys/firmware/efi ] ; then + cat<>/tmp/part-include +part /boot/efi --fstype=efi --size=300 --ondrive=$(get_disk $boot_device) +EOF +else + cat<>/tmp/part-include +part biosboot --asprimary --fstype=biosboot --size=1 --ondrive=$(get_disk $boot_device) +EOF +fi + diff --git a/bsp-files/kickstarts/pre_disk_storage.cfg b/bsp-files/kickstarts/pre_disk_storage.cfg new file mode 100755 index 00000000..5cabf645 --- /dev/null +++ b/bsp-files/kickstarts/pre_disk_storage.cfg @@ -0,0 +1,31 @@ + +sz=$(blockdev --getsize64 $(get_disk $rootfs_device)) +if [ $sz -le $((60*$gb)) ] ; then + LOG_VOL_SIZE=4000 + SCRATCH_VOL_SIZE=4000 + ROOTFS_SIZE=10000 +else + LOG_VOL_SIZE=8000 + SCRATCH_VOL_SIZE=8000 + ROOTFS_SIZE=20000 +fi + +ROOTFS_OPTIONS="defaults" +profile_mode=`cat /proc/cmdline |xargs -n1 echo |grep security_profile= | grep extended` +if [ -n "$profile_mode" ]; then + # Enable iversion labelling for rootfs when IMA is enabled + ROOTFS_OPTIONS="${ROOTFS_OPTIONS},iversion" +fi + +cat<>/tmp/part-include +part /boot --fstype=ext4 --asprimary --size=500 --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" +part pv.253004 --grow --asprimary --size=500 --ondrive=$(get_disk $rootfs_device) +volgroup cgts-vg --pesize=32768 pv.253004 +logvol /var/log --fstype=ext4 --vgname=cgts-vg --size=$LOG_VOL_SIZE --name=log-lv +logvol /scratch --fstype=ext4 --vgname=cgts-vg --size=$SCRATCH_VOL_SIZE --name=scratch-lv +part / --fstype=ext4 --asprimary --size=$ROOTFS_SIZE --ondrive=$(get_disk $rootfs_device) --fsoptions="$ROOTFS_OPTIONS" + +EOF + +%end + diff --git a/bsp-files/kickstarts/pre_pkglist.cfg b/bsp-files/kickstarts/pre_pkglist.cfg new file mode 100644 index 00000000..021f0d35 --- /dev/null +++ b/bsp-files/kickstarts/pre_pkglist.cfg @@ -0,0 +1,6 @@ +%packages +@core +@base +xxxPACKAGE_LISTxxx +%end + diff --git a/bsp-files/kickstarts/pre_pkglist_lowlatency.cfg b/bsp-files/kickstarts/pre_pkglist_lowlatency.cfg new file mode 100644 index 00000000..49e7d395 --- /dev/null +++ b/bsp-files/kickstarts/pre_pkglist_lowlatency.cfg @@ -0,0 +1,8 @@ +%packages +@core +@base +-kernel +-kernel-tools +xxxPACKAGE_LISTxxx +%end + diff --git a/bsp-files/pkg-list.pl b/bsp-files/pkg-list.pl new file mode 100755 index 00000000..06731781 --- /dev/null +++ b/bsp-files/pkg-list.pl @@ -0,0 +1,34 @@ +#!/usr/bin/perl +use strict; + +#my $file_in = `ls -tr bitbake_build/tmp/work/*/*/*/installed_pkgs.txt |tail -1`; +my $file_in = `ls -tr bitbake_build/tmp/work/intel_x86_64-wrs-linux/wrlinux-image-cgcs-base/*/installed_pkgs.txt |tail -1`; +$file_in = $ARGV[0] if ($ARGV[0] ne ""); + +open(FILES_IN, $file_in) || die "Could not open list of files"; + +my %pkgs; +while () { + chop; + my @v = split(); + my $pkg = $v[0]; + my $arch = $v[1]; + # Fix up any lib32 packages + if ($pkg =~ /lib32-(.*)$/) { + $pkg = "$1"; + $arch = "lib32_x86"; + } + $pkgs{"$pkg.$arch"} = "1"; +} +close(FILES_IN); + +# Print all packages in sorted unique order with architecture +foreach (sort keys %pkgs) { + if (/(.*)\.(.*)/) { + my $pkg = $1; + my $arch = $2; + print "$pkg $arch\n"; + } +} + +exit 0; diff --git a/bsp-files/platform_comps.py b/bsp-files/platform_comps.py new file mode 100644 index 00000000..54e345b8 --- /dev/null +++ b/bsp-files/platform_comps.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python + +""" +Copyright (c) 2018 Wind River Systems, Inc. + +SPDX-License-Identifier: Apache-2.0 + +""" + +import getopt +import os +import platform +import subprocess +import sys +import xml.etree.ElementTree as ElementTree + +def usage(): + print "Usage: %s --groups --pkgdir " \ + % os.path.basename(sys.argv[0]) + exit(1) + +def add_text_tag_to_xml(parent, + name, + text): + """ + Utility function for adding a text tag to an XML object + :param parent: Parent element + :param name: Element name + :param text: Text value + :return:The created element + """ + tag = ElementTree.SubElement(parent, name) + tag.text = text + tag.tail = '\n ' + return tag + + +def add_group(comps, personality, rpmlist=None, filter_dir=None, filter=None): + """ + Add a software group to the comps.xml + :param comps: comps element + :param personality: Personality of node for group + :param rpmlist: List of all rpms in the base load + :param filter_dir: Path to filter files + :param filter: Name of filter file to use + """ + + if rpmlist is not None: + # Define a base platform group + groupname = "platform-%s" % personality + desc = "Platform packages for %s" % personality + else: + # Define an empty patch group + groupname = "updates-%s" % personality + desc = "Patches for %s" % personality + + group = ElementTree.SubElement(comps, 'group') + group.tail = '\n' + + add_text_tag_to_xml(group, 'id', groupname) + add_text_tag_to_xml(group, 'default', "false") + add_text_tag_to_xml(group, 'uservisible', "true") + add_text_tag_to_xml(group, 'display_order', "1024") + add_text_tag_to_xml(group, 'name', groupname) + add_text_tag_to_xml(group, 'description', desc) + + package_element = ElementTree.SubElement(group, + 'packagelist') + package_element.tail = '\n ' + + if rpmlist is not None: + # Read the filter file + f = open(os.path.join(filter_dir, filter), 'r') + filtered = f.read().split() + f.close() + + for pkg in sorted(rpmlist): + if pkg not in filtered: + tag = ElementTree.SubElement(package_element, + 'packagereq', + type="mandatory") + tag.text = pkg + tag.tail = '\n ' + + +def main(): + try: + opts, remainder = getopt.getopt(sys.argv[1:], + '', + ['pkgdir=', + 'groups=']) + except getopt.GetoptError: + usage() + + pkgdir = None + groups_file = None + + # Filters are colocated with this script + filter_dir = os.path.dirname(sys.argv[0]) + + for opt, arg in opts: + if opt == "--pkgdir": + pkgdir = arg + elif opt == "--groups": + groups_file = arg + + if pkgdir is None or groups_file is None: + usage() + + # Get the pkglist + cmd = "find %s -name '*.rpm' | xargs rpm -qp --queryformat '%%{NAME}\n'" % pkgdir + rpmlist = subprocess.check_output(cmd, shell=True).split() + + tree = ElementTree.parse(groups_file) + comps = tree.getroot() + comps.tail = '\n' + + add_group(comps, 'controller', rpmlist, filter_dir, 'filter_out_from_controller') + add_group(comps, 'controller-compute', rpmlist, filter_dir, 'filter_out_from_smallsystem') + add_group(comps, 'controller-compute-lowlatency', rpmlist, filter_dir, 'filter_out_from_smallsystem_lowlatency') + add_group(comps, 'compute', rpmlist, filter_dir, 'filter_out_from_compute') + add_group(comps, 'compute-lowlatency', rpmlist, filter_dir, 'filter_out_from_compute_lowlatency') + add_group(comps, 'storage', rpmlist, filter_dir, 'filter_out_from_storage') + + add_group(comps, 'controller') + add_group(comps, 'controller-compute') + add_group(comps, 'controller-compute-lowlatency') + add_group(comps, 'compute') + add_group(comps, 'compute-lowlatency') + add_group(comps, 'storage') + + tree.write(groups_file, encoding="UTF-8") + + +if __name__ == "__main__": + main() + diff --git a/bsp-files/pxeboot.cfg b/bsp-files/pxeboot.cfg new file mode 100644 index 00000000..bde20cd1 --- /dev/null +++ b/bsp-files/pxeboot.cfg @@ -0,0 +1,180 @@ +SERIAL 0 115200 +TIMEOUT 100 +DEFAULT menu.c32 + +# Menu Configuration +MENU WIDTH 80 +MENU MARGIN 10 +MENU PASSWORDMARGIN 3 +MENU ROWS 12 +MENU TABMSGROW 18 +MENU CMDLINEROW 18 +MENU ENDROW 24 +MENU PASSWORDROW 11 +MENU TIMEOUTROW 20 + +PROMPT 0 +NOESCAPE 1 +NOCOMPLETE 1 +ALLOWOPTIONS 0 + +label 0 + menu label ^1) Boot from hard drive + COM32 chain.c32 + APPEND hd0 + +# Standard Controller menu +menu begin + menu title Standard Controller Configuration + # Serial Console submenu + menu begin + menu title Serial Console + label 1 + menu label STANDARD Security Boot Profile + text help + Standard Controller, console=ttyS0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_controller.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text serial console=ttyS0,115200n8 inst.gpt security_profile=standard + ipappend 2 + + label S1 + menu label EXTENDED Security Boot Profile + text help + Standard Controller, console=ttyS0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_controller.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text serial console=ttyS0,115200n8 inst.gpt security_profile=extended + ipappend 2 + menu end + + # Graphical Console submenu + menu begin + menu title Graphical Console + label 2 + menu label STANDARD Security Boot Profile + text help + Standard Controller, console=tty0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_controller.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=standard + ipappend 2 + + label S2 + menu label EXTENDED Security Boot Profile + text help + Standard Controller, console=tty0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_controller.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=extended + ipappend 2 + menu end +menu end + +# AIO Controller menu +menu begin + menu title All-in-one Controller Configuration + # Serial Console submenu + menu begin + menu title Serial Console + label 3 + menu label STANDARD Security Boot Profile + text help + All-in-one Controller, console=ttyS0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text serial console=ttyS0,115200n8 inst.gpt security_profile=standard + ipappend 2 + + label S3 + menu label EXTENDED Security Boot Profile + text help + All-in-one Controller, console=ttyS0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text serial console=ttyS0,115200n8 inst.gpt security_profile=extended + ipappend 2 + menu end + + # Graphical Console submenu + menu begin + menu title Graphical Console + label 4 + menu label STANDARD Security Boot Profile + text help + All-in-one Controller, console=tty0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=standard + ipappend 2 + + label S4 + menu label EXTENDED Security Boot Profile + text help + All-in-one Controller, console=tty0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=extended + ipappend 2 + menu end +menu end + +# AIO (Low Latency) Controller menu +menu begin + menu title All-in-one (lowlatency) Controller Configuration + # Serial Console submenu + menu begin + menu title Serial Console + label 5 + menu label STANDARD Security Boot Profile + text help + All-in-one (lowlatency) Controller, console=ttyS0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem_lowlatency.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=ttyS0,115200n8 inst.gpt security_profile=standard + ipappend 2 + + label S5 + menu label EXTENDED Security Boot Profile + text help + All-in-one (lowlatency) Controller, console=ttyS0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem_lowlatency.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=ttyS0,115200n8 inst.gpt security_profile=extended + ipappend 2 + menu end + + # Graphical Console submenu + menu begin + menu title Graphical Console + label 6 + menu label STANDARD Security Boot Profile + text help + All-in-one (lowlatency) Controller, console=tty0 + Standard Security Profile Enabled (default setting) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem_lowlatency.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=standard + ipappend 2 + + label S6 + menu label EXTENDED Security Boot Profile + text help + All-in-one (lowlatency) Controller, console=tty0 + Extended Security Profile Enabled (will impact performance) + endtext + kernel vmlinuz + append initrd=initrd.img bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem_lowlatency.cfg boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=extended + ipappend 2 + menu end +menu end diff --git a/bsp-files/pxeboot_grub.cfg b/bsp-files/pxeboot_grub.cfg new file mode 100644 index 00000000..997d3cde --- /dev/null +++ b/bsp-files/pxeboot_grub.cfg @@ -0,0 +1,104 @@ +timeout=10 +default=0 + +GRUB_HIDDEN_TIMEOUT=0 +GRUB_TIMEOUT_STYLE='countdown' + +pxe_root=${root} + +menuentry '1) UEFI Boot from hard drive' { + insmod part_gpt + insmod chain + # The EFI bootloader is located in /boot/efi/EFI/centos/ + # /boot/efi being the mount + # /dev/sda1 307016 9680 297336 4% /boot/efi + set root=(hd0,gpt1) + chainloader /EFI/centos/grubx64.efi +} + +# Standard Controller menu +submenu 'UEFI Standard Controller' { + submenu 'Serial Console' { + menuentry 'STANDARD Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_controller.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text serial console=ttyS0,115200n8 inst.gpt security_profile=standard + initrdefi initrd.img + } + menuentry 'EXTENDED Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_controller.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text serial console=ttyS0,115200n8 inst.gpt security_profile=extended tboot=true + initrdefi initrd.img + } + } + + submenu 'Graphical Console' { + menuentry 'STANDARD Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_controller.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=standard + initrdefi initrd.img + } + menuentry 'EXTENDED Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_controller.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=extended tboot=true + initrdefi initrd.img + } + } +} + +# AIO Controller menu +submenu 'UEFI All-in-one Controller' { + submenu 'Serial Console' { + menuentry 'STANDARD Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text serial console=ttyS0,115200n8 inst.gpt security_profile=standard + initrdefi initrd.img + } + menuentry 'EXTENDED Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text serial console=ttyS0,115200n8 inst.gpt security_profile=extended tboot=true + initrdefi initrd.img + } + } + + submenu 'Graphical Console' { + menuentry 'STANDARD Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=standard + initrdefi initrd.img + } + menuentry 'EXTENDED Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=extended tboot=true + initrdefi initrd.img + } + } +} + +# AIO (lowlatency) Controller menu +submenu 'UEFI All-in-one (lowlatency) Controller' { + submenu 'Serial Console' { + menuentry 'STANDARD Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem_lowlatency.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text serial console=ttyS0,115200n8 inst.gpt security_profile=standard + initrdefi initrd.img + } + menuentry 'EXTENDED Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem_lowlatency.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text serial console=ttyS0,115200n8 inst.gpt security_profile=extended tboot=true + initrdefi initrd.img + } + } + + submenu 'Graphical Console' { + menuentry 'STANDARD Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem_lowlatency.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=standard + initrdefi initrd.img + } + menuentry 'EXTENDED Security Boot Profile' { + set root=${pxe_root} + linuxefi vmlinuz bootifonly=1 devfs=nomount inst.repo=xxxHTTP_URLxxx inst.ks=xxxHTTP_URLxxx/pxeboot_smallsystem_lowlatency.cfg ksdevice=$net_default_mac BOOTIF=$net_default_mac boot_device=sda rootfs_device=sda biosdevname=0 inst.text console=tty0 inst.gpt security_profile=extended tboot=true + initrdefi initrd.img + } + } +} diff --git a/bsp-files/pxeboot_setup.sh b/bsp-files/pxeboot_setup.sh new file mode 100755 index 00000000..35f3d870 --- /dev/null +++ b/bsp-files/pxeboot_setup.sh @@ -0,0 +1,111 @@ +#!/bin/bash + +OPTIND=1 + +BASE_URL="" +TFTP_DIR="" +WORKING_DIR="" +COPY_DIR="" +ISODIR=$(dirname `readlink -f $0`) + +usage() { + echo "Usage: $0 -u [-t ] or [-w ]" 1>&2; + exit 0; +} + +while getopts ":u:t:w:" opt; do + case "$opt" in + u) + BASE_URL=${OPTARG} + ;; + t) + TFTP_DIR=${OPTARG} + ;; + w) + WORKING_DIR=${OPTARG} + ;; + *) + usage + ;; + esac +done + +shift $((OPTIND-1)) + +if [ -z "$BASE_URL" ]; then + echo "HTTP base URL is required: -u " + exit 0 +fi + +if [ -z "$TFTP_DIR" ] && [ -z "$WORKING_DIR" ]; then + echo "Either tftp pxeboot directory or working directory has to be specified:" + echo "-t or -w " + exit 0 +elif [ -n "$TFTP_DIR" ]; then + if [ -n "$WORKING_DIR" ]; then + echo "tftp pxeboot directory is supplied, working directory will be ignored." + fi + COPY_DIR=$TFTP_DIR +elif [ -n "$WORKING_DIR" ]; then + COPY_DIR=$WORKING_DIR +fi + +if [ ! -d ${COPY_DIR} ] ; then + if [ -w "$(dirname $COPY_DIR)" ]; then + echo "Create ${COPY_DIR}" + mkdir ${COPY_DIR} + chmod +w ${COPY_DIR} + if [ $? -ne 0 ]; then + echo "Can't create ${COPY_DIR}" + exit 1 + fi + else + echo "$COPY_DIR parent directory is not writeable." + exit 0 + fi +else + echo "$COPY_DIR already exists" + exit 0 +fi + +#Copy the vmlinuz and initrd files to the destination directory +cp ${ISODIR}/vmlinuz ${COPY_DIR}/ +cp ${ISODIR}/initrd.img ${COPY_DIR}/ + +#Copy the contents of distribution to the destination directory +cp -r ${ISODIR}/* ${COPY_DIR}/ + +#Find the number of directories in the URL +dirpath=$(echo ${BASE_URL#"http://"}) +DIRS=$(grep -o "/" <<< "$dirpath" | wc -l) + +#Escape path for sed +BASE_URL="${BASE_URL//\//\\/}" + +#Copy pxeboot files +mkdir -p ${COPY_DIR}/EFI/centos/x86_64-efi/ +cp -Rf ${COPY_DIR}/pxeboot/* ${COPY_DIR}/ + +#Rename the UEFI grub config +mv ${COPY_DIR}/pxeboot_grub.cfg ${COPY_DIR}/grub.cfg + +#Variable replacement +sed -i "s#xxxHTTP_URLxxx#${BASE_URL}#g; + s#xxxHTTP_URL_PATCHESxxx#${BASE_URL}/patches#g; + s#NUM_DIRS#${DIRS}#g" \ + ${COPY_DIR}/pxeboot.cfg \ + ${COPY_DIR}/grub.cfg \ + ${COPY_DIR}/pxeboot_controller.cfg \ + ${COPY_DIR}/pxeboot_smallsystem.cfg \ + ${COPY_DIR}/pxeboot_smallsystem_lowlatency.cfg + +# Delete unnecessary files +rm -Rf ${COPY_DIR}/EFI/BOOT +rm -Rf ${COPY_DIR}/pxeboot + +if [ -n "$TFTP_DIR" ]; then + #Create pxelinux.cfg directory and default link + mkdir ${TFTP_DIR}/pxelinux.cfg + chmod 755 ${TFTP_DIR}/pxelinux.cfg + ln -s ../pxeboot.cfg ${TFTP_DIR}/pxelinux.cfg/default +fi diff --git a/bsp-files/upgrades/import.sh b/bsp-files/upgrades/import.sh new file mode 100644 index 00000000..0d863ae8 --- /dev/null +++ b/bsp-files/upgrades/import.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Copyright (c) 2015-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# This script is run during the load-import command +# It is used to copy the required files from the iso to the +# controller. + + +set -e + +exec 2>>/var/log/load-import.log +set -x +echo "$(date): Starting execution: $0 $@" + +cleanup() { + rm -rf $TMP_RPM +} +rollback() { + rm -rf $FEED_DIR +} + +error() { + local parent_lineno="$1" + local err_code="${2}" + echo "Error executing import script at line: ${parent_lineno} with error code: ${err_code}" + rollback + exit "${err_code}" +} + +trap 'error ${LINENO} $?' ERR +trap cleanup 0 + +SCRIPT_DIR=$(dirname $0) +ISO_DIR=$(dirname $SCRIPT_DIR) + +source $SCRIPT_DIR/version +source /etc/build.info + +FEED_DIR=/www/pages/feed/rel-$VERSION +CURRENT_FEED_DIR=/www/pages/feed/rel-$SW_VERSION +TMP_RPM=/tmp/cpio + +rm -rf $TMP_RPM +rm -rf $FEED_DIR + +mkdir -p $FEED_DIR + +cp -rp $ISO_DIR/Packages $ISO_DIR/repodata $ISO_DIR/LiveOS $FEED_DIR/ + +cp -p $CURRENT_FEED_DIR/install_uuid $FEED_DIR/ + +if [ -d $ISO_DIR/patches ]; then + mkdir -p /www/pages/updates/rel-${VERSION} + cp -r ${ISO_DIR}/patches/Packages ${ISO_DIR}/patches/repodata /www/pages/updates/rel-${VERSION}/ + rsync -ac ${ISO_DIR}/patches/metadata/ /opt/patching/metadata/ + mkdir -p /opt/patching/packages/${VERSION} + + find /www/pages/updates/rel-${VERSION}/Packages -name '*.rpm' \ + | xargs --no-run-if-empty -I files cp --preserve=all files /opt/patching/packages/${VERSION}/ +fi + diff --git a/bsp-files/upgrades/metadata.xml b/bsp-files/upgrades/metadata.xml new file mode 100644 index 00000000..ed2598f1 --- /dev/null +++ b/bsp-files/upgrades/metadata.xml @@ -0,0 +1,24 @@ + + + xxxSW_VERSIONxxx + + + 17.06 + TC_17.06_PATCH_0001 + TC_17.06_PATCH_0002 + TC_17.06_PATCH_0003 + TC_17.06_PATCH_0004 + TC_17.06_PATCH_0005 + TC_17.06_PATCH_0006 + TC_17.06_PATCH_0007 + TC_17.06_PATCH_0008 + TC_17.06_PATCH_0009 + TC_17.06_PATCH_0010 + TC_17.06_PATCH_0011 + TC_17.06_PATCH_0012 + TC_17.06_PATCH_0013 + TC_17.06_PATCH_0014 + TC_17.06_PATCH_0015 + + + diff --git a/installer/initrd/README b/installer/initrd/README new file mode 100644 index 00000000..d9ddfd1e --- /dev/null +++ b/installer/initrd/README @@ -0,0 +1,254 @@ +There are three prebuilt files that we can update when we need to make changes +to the installer: +- vmlinuz - The kernel +- initrd.img - Initial initrd loaded when the installer boots. Has kernel + modules, etc, and loads the squashfs.img +- squashfs.img - Provides the rootfs for the installer, which includes + components like anaconda + +The original stock CentOS files are found: +/import/mirrors/CentOS/7.4.1708/os/x86_64/isolinux/initrd.img +/import/mirrors/CentOS/7.4.1708/os/x86_64/LiveOS/squashfs.img + +# NOTE: before updating these files, assure that you've had a successful build +for the necessary files used in "kernel-rpms" and "rootfs-rpms". +# here are build procedures: + +# kernel, kernel modules will be generated by this step +build-pkgs + +# anaconda-*.tis.*.rpm rpm-*.tis*.rpm will be generated by this step +build-srpms --installer && build-rpms --installer + +# systemd-*.tis.*.rpm and bind-utils-9.9.4-61.el7.x86_64.rpm, ima-evm-utils-1.0-1.el7.x86_64.rpm +# will be available after this step done +build-iso + +=========== +vmlinuz: +We're using the runtime kernel for this, so when we update the installer, we +just grab the kernel from build server or your own build environment. + +Note -- The kernel to be used should be signed. The Jenkins builds will +produce signed kernels, but if you are concerned about the signature (for +example, if you are concerned that a developer kernel is being used or if +the signing server is behaving odd) then you can verify the integrity of +the kernel by going onto yow-cgts3-lx and using the command: + +sbverify --cert [CERT] vmlinuz + +The certificate specfied as the argument can be found in +$MY_REPO/addons/wr-cgcs/layers/cgcs/recipes-extended/shim-unsigned/files/tis-shim.crt + +=========== +initrd.img: +When we update the kernel and kernel modules for the installer, we need to +update the initrd.img. This is a manual procedure currently, and must be done +as **root**. + +# First, extract the initrd.img: + +ORIG_INITRD=$PWD/initrd.img +mkdir initrd.work +cd initrd.work +xzcat $ORIG_INITRD | cpio -i + +# Make the necessary changes, and rebuild the initrd + +# We want to include our kernel and required modules in the initrd. Copy their RPMs from +# the Jenkins build or local build folders (~/export/dist/isolinux/Packages/ and ~/std/rpmbuild/RPMS/) +# to ../kernel-rpms, to be extracted with rpm2cpio: +# kernel +# kmod-i40e +# kmod-ixgbe +# kmod-e1000e +# kmod-tpm +# mlnx-ofa_kernel-modules +# +# Also adding in dmidecode for debug purposes + + +# Delete the old kernel files: +rm -rf boot/ etc/modules-load.d/ etc/ld.so.conf.d/kernel-3.10.0-693.2.2.el7.tis.29.x86_64.conf lib/modules/3.10.0-693.2.2.el7.tis.29.x86_64/ + +# Extract the RPMs from the build system +# FYI, here are RPMs in "kernel-rpms": + +# ls kernel-rpms/ +kernel-3.10.0-693.2.2.el7.33.tis.x86_64.rpm kmod-i40e-rt-2.4.3-0.tis.6.x86_64.rpm mlnx-ofa_kernel-4.3-OFED.4.3.1.0.1.1.g8509e41.tis.1.x86_64.rpm +kernel-rt-3.10.0-693.2.2.rt56.623.el7.tis.39.x86_64.rpm kmod-ixgbe-5.2.3-0.tis.2.x86_64.rpm mlnx-ofa_kernel-devel-4.3-OFED.4.3.1.0.1.1.g8509e41.tis.1.x86_64.rpm +kmod-e1000e-3.3.6-0.tis.1.x86_64.rpm kmod-ixgbe-rt-5.2.3-0.tis.2.x86_64.rpm mlnx-ofa_kernel-modules-4.3-OFED.4.3.1.0.1.1.g8509e41.tis.1.x86_64.rpm +kmod-e1000e-rt-3.3.6-0.tis.1.x86_64.rpm kmod-tpm-4.12-0.tis.5.x86_64.rpm +kmod-i40e-2.4.3-0.tis.6.x86_64.rpm kmod-tpm-rt-4.12-0.tis.5.x86_64.rpm + + +for f in ../kernel-rpms/*.rpm ; do rpm2cpio $f | cpio -idu; done + +# In a chroot, run depmods. The command is available from the kmod-i40e for syntax/args: +# [root@yow-cgts4-lx initrd.work]# rpm -qp --scripts ../kernel-rpms/kmod-i40e-* |grep depmod +# /usr/sbin/depmod -aeF "/boot/System.map-3.10.0-327.36.2.el7.9.tis.x86_64" "3.10.0-327.36.2.el7.9.tis.x86_64" > /dev/null || : +# /usr/sbin/depmod -aeF "/boot/System.map-3.10.0-327.36.2.el7.9.tis.x86_64" "3.10.0-327.36.2.el7.9.tis.x86_64" > /dev/null || : + +chroot . +# need be replaced, for example, 3.10.0-693.2.2.el7.33.tis.x86_64 +/usr/sbin/depmod -aeF "/boot/System.map-" "" +exit + +# Remove the bisodevname package from initrd and squashfs +rm -f ./usr/lib/udev/rules.d/71-biosdevname.rules ./usr/sbin/biosdevname + +# Rebuild the initrd +find . \ + | cpio -o -H newc \ + | xz --check=crc32 --x86 --lzma2=dict=512KiB \ + > ../new-initrd.img + + + +=========== +squashfs.img: +If we want to make changes to the rootfs of the installer (ie. update anaconda), +we need to update the squashfs.img file. This file can be mounted, and contains +a rootfs image: LiveOS/rootfs.img + +# Build the TIS-modified installer RPMs first (see anaconda jiggery-pokery at end of this file): +build-srpms --installer && build-rpms --installer + +# We also need TIS-built systemd for NVME support, so copy the following from Jenkins build: +systemd-219-42.el7_4.1.tis.10.x86_64.rpm +systemd-libs-219-42.el7_4.1.tis.10.x86_64.rpm +systemd-sysv-219-42.el7_4.1.tis.10.x86_64.rpm + +# We need to update the following RPMs in the rootfs +ima-evm-utils-1.0-1.el7.x86_64.rpm +rpm-4.14.0-1.tis.1.x86_64.rpm +rpm-build-4.14.0-1.tis.1.x86_64.rpm +rpm-build-libs-4.14.0-1.tis.1.x86_64.rpm +rpm-libs-4.14.0-1.tis.1.x86_64.rpm +rpm-plugin-systemd-inhibit-4.14.0-1.tis.1.x86_64.rpm +rpm-python-4.14.0-1.tis.1.x86_64.rpm + +# We also now need bind-utils in the squashfs, due to the anaconda-preexec we've added. +bind-utils-9.9.4-51.el7.x86_64.rpm + +# Mount the squashfs.img and copy the rootfs out: +ORIG_SQUASHFS=$PWD/squashfs.img +mkdir squashfs.mnt +mount -o loop -t squashfs $ORIG_SQUASHFS squashfs.mnt +mkdir LiveOS +cp squashfs.mnt/LiveOS/rootfs.img LiveOS/ +umount squashfs.mnt + +# Now mount the rootfs.img file: +mkdir squashfs.work +mount -o loop LiveOS/rootfs.img squashfs.work +cd squashfs.work + +# You can now make changes as needed, and they're reflected in the rootfs.img + +# For anaconda, ignore these RPMs that are built: +# anaconda-debuginfo +# anaconda-dracut +# anaconda-widgets-devel +# anaconda-gui +# + +# FYI, here are RPMs from "rootfs-rpms": +ls rootfs-rpms/ +anaconda-21.48.22.121-1.el7.centos.tis.5.x86_64.rpm rpm-4.14.0-1.tis.1.x86_64.rpm +anaconda-core-21.48.22.121-1.el7.centos.tis.5.x86_64.rpm rpm-build-4.14.0-1.tis.1.x86_64.rpm +anaconda-debuginfo-21.48.22.121-1.el7.centos.tis.5.x86_64.rpm rpm-build-libs-4.14.0-1.tis.1.x86_64.rpm +anaconda-dracut-21.48.22.121-1.el7.centos.tis.5.x86_64.rpm rpm-libs-4.14.0-1.tis.1.x86_64.rpm +anaconda-gui-21.48.22.121-1.el7.centos.tis.5.x86_64.rpm rpm-plugin-systemd-inhibit-4.14.0-1.tis.1.x86_64.rpm +anaconda-tui-21.48.22.121-1.el7.centos.tis.5.x86_64.rpm rpm-python-4.14.0-1.tis.1.x86_64.rpm +anaconda-widgets-21.48.22.121-1.el7.centos.tis.5.x86_64.rpm systemd-219-42.el7_4.1.tis.10.x86_64.rpm +anaconda-widgets-devel-21.48.22.121-1.el7.centos.tis.5.x86_64.rpm systemd-libs-219-42.el7_4.1.tis.10.x86_64.rpm +bind-utils-9.9.4-61.el7.x86_64.rpm systemd-sysv-219-42.el7_4.1.tis.10.x86_64.rpm +ima-evm-utils-1.0-1.el7.x86_64.rpm + +# IMPORTANT Note: when copying “systemd-219-42.el7_4.1.tis.10.x86_64.rpm” from build target folders to “rootfs-rpms”, +# we need to get it from “../export/dist/isolinux/Packages/systemd-219-42.el7_4.1.tis.10.x86_64.rpm”, instead of other +# folders (for example, ../std/rpmbuild/RPMS/systemd-219-42.el7_4.1.tis.10.x86_64.rpm), they are actually having +# different sizes and delta in contents. + +# Extract the rest into the squashfs +rm -rf usr/lib64/python2.7/site-packages/pyanaconda/ +rm -rf usr/lib64/python2.7/site-packages/rpm/ +# get files from RPMs and copy them into corresponding folders +for f in ../rootfs-rpms/*.rpm ; do rpm2cpio $f | cpio -idu; done +#find old .pyo files and delete them +find usr/lib64/python2.7/site-packages/pyanaconda/ usr/lib64/python2.7/site-packages/rpm/ -name *.pyo | xargs rm + + +# IMPORTANT Note, please do NOT forget the following step: +# MUST HAVE!!! Update the kernel and related kernel modules, same as what was done for "initrd.img" updating +for f in ../kernel-rpms/*.rpm ; do rpm2cpio $f | cpio -idu; done + +# Remove the bisodevname package from initrd and squashfs +rm -f ./usr/lib/udev/rules.d/71-biosdevname.rules ./usr/sbin/biosdevname + +cd .. +umount squashfs.work + +# Build/rebuild a new squashfs.img (output file is test.squashfs.img in this +# example) +# Note: You may need to install "squashfs-tools" in advance +#remove the old version named test.squashfs.img +rm -f test.squashfs.img +#make the new squashfs image named "test.squashfs.img" or another name +mksquashfs LiveOS test.squashfs.img -keep-as-directory -comp xz -b 1M + + +=========== +To test with design build, update the following files with paths to your test +files and filenames: +recipes-installer/pxe-network-installer/centos/build_srpm.data +recipes-installer/pxe-network-installer/centos/pxe-network-installer.spec + + +=========== +Delivering changes: +Copy the updated vmlinuz (from kernel rpm), initrd.img, and squashfs.img files, +with appropriate version suffixes, to mirror folder where you placethese files. + +Update the pxe-network-installer files to point to the new versions. + + +=========== +NOTE: The following jiggery-pokery is no longer required after recent mirror updates. +I'm keeping the text here, though, for future reference. Just in case. + +Jiggery-pokery required to build anaconda after rebase to 7.3: + +The anaconda build reports a dependency error: + +12:45:41 Error: Package: libgudev1-219-19.el7_2.13.x86_64 (TisCentos7Distro) +12:45:41 Requires: systemd-libs = 219-19.el7_2.13 +12:45:41 Installed: systemd-libs-219-30.el7_3.6.x86_64 (@TisCentos7Distro) +12:45:41 systemd-libs = 219-30.el7_3.6 +12:45:41 Available: systemd-libs-219-19.el7_2.13.x86_64 (TisCentos7Distro) +12:45:41 systemd-libs = 219-19.el7_2.13 +12:45:41 You could try using --skip-broken to work around the problem +12:45:41 You could try running: rpm -Va --nofiles --nodigest + +Our build system is setup to install the highest versions of packages, but there's a dependency +that has a version-specific dependency to an older version of the RPM, so it requires a little +massaging to get the anaconda RPM to build. + +Step 1: Add a symlink into the cgcs-centos-repo dir: +ln -s /import/mirrors/CentOS/tis-r4-CentOS/mitaka/Binary/x86_64/systemd-libs-219-19.el7_2.13.x86_64.rpm \ + $MY_REPO/cgcs-centos-repo/Binary/x86_64/ + +Step 2: Try to build, so the repodata is updated (this will still fail) +build-srpms --installer && build-rpms --installer + +Step 3: Short-circuit the update_cgcs_repo function in build-rpms to avoid repodata update and mock env clearout + +Step 4: Manually install RPM in mock env +mock -r installer/${MY_BUILD_ENVIRONMENT}-installer.cfg --copyin systemd-libs-219-19.el7_2.13.x86_64.rpm systemd-libs-219-19.el7_2.13.x86_64.rpm +mock -r installer/${MY_BUILD_ENVIRONMENT}-installer.cfg --shell +rpm -i --force systemd-libs-219-19.el7_2.13.x86_64.rpm + +Step 5: Build with success! +build-srpms --installer && build-rpms --installer + diff --git a/installer/pxe-network-installer/centos/build_srpm.data b/installer/pxe-network-installer/centos/build_srpm.data new file mode 100644 index 00000000..02ff9f30 --- /dev/null +++ b/installer/pxe-network-installer/centos/build_srpm.data @@ -0,0 +1,12 @@ +COPY_LIST="pxe-network-installer//* \ + $CGCS_BASE/mwa-beas/bsp-files/grub.cfg \ + $CGCS_BASE/mwa-beas/bsp-files/kickstarts/post_clone_iso_ks.cfg \ + $SRC_BASE/cgcs-centos-repo/Binary/images/efiboot.img \ + /import/mirrors/CentOS/tis-installer/initrd.img-stx-0.1 \ + /import/mirrors/CentOS/tis-installer/squashfs.img-stx-0.1 \ + /import/mirrors/CentOS/tis-installer/vmlinuz-stx-0.1 \ +" + +TIS_PATCH_VER=25 +BUILD_IS_BIG=4 +BUILD_IS_SLOW=4 diff --git a/installer/pxe-network-installer/centos/pxe-network-installer.spec b/installer/pxe-network-installer/centos/pxe-network-installer.spec new file mode 100644 index 00000000..5defd020 --- /dev/null +++ b/installer/pxe-network-installer/centos/pxe-network-installer.spec @@ -0,0 +1,155 @@ +Summary: TIS Network Installation +Name: pxe-network-installer +Version: 1.0 +Release: %{tis_patch_ver}%{?_tis_dist} +License: Apache-2.0 +Group: base +Packager: Wind River +URL: unknown + +%define tis_image_version stx-0.1 + +Source0: LICENSE + +Source001: vmlinuz-%{tis_image_version} +Source002: initrd.img-%{tis_image_version} +Source003: squashfs.img-%{tis_image_version} + +Source010: pxeboot-update.sh +Source011: grub.cfg +Source012: efiboot.img +Source013: post_clone_iso_ks.cfg + +Source030: default +Source031: default.static +Source032: centos-pxe-controller-install +Source033: centos-pxe-compute-install +Source034: centos-pxe-smallsystem-install +Source035: centos-pxe-storage-install +Source036: centos-pxe-compute_lowlatency-install +Source037: centos-pxe-smallsystem_lowlatency-install + +Source050: pxe-grub.cfg +Source051: pxe-grub.cfg.static +Source052: efi-centos-pxe-controller-install +Source053: efi-centos-pxe-compute-install +Source054: efi-centos-pxe-smallsystem-install +Source055: efi-centos-pxe-storage-install +Source056: efi-centos-pxe-compute_lowlatency-install +Source057: efi-centos-pxe-smallsystem_lowlatency-install + + +BuildRequires: syslinux +BuildRequires: grub2 + +Requires: grub2-efi-pxeboot + +%description +TIS Network Installation + +%files +%defattr(-,root,root,-) + +%install +install -v -d -m 755 %{buildroot}/pxeboot +install -v -d -m 755 %{buildroot}/pxeboot/pxelinux.cfg.files +install -v -d -m 755 %{buildroot}/pxeboot/rel-%{platform_release} +install -v -d -m 755 %{buildroot}/pxeboot/EFI +install -v -d -m 755 %{buildroot}/pxeboot/EFI/centos +install -v -d -m 755 %{buildroot}/pxeboot/EFI/centos/x86_64-efi + +install -v -m 644 %{_sourcedir}/vmlinuz-%{tis_image_version} \ + %{buildroot}/pxeboot/rel-%{platform_release}/installer-bzImage_1.0 +install -v -m 644 %{_sourcedir}/initrd.img-%{tis_image_version} \ + %{buildroot}/pxeboot/rel-%{platform_release}/installer-intel-x86-64-initrd_1.0 +ln -s installer-bzImage_1.0 %{buildroot}/pxeboot/rel-%{platform_release}/installer-bzImage +ln -s installer-intel-x86-64-initrd_1.0 %{buildroot}/pxeboot/rel-%{platform_release}/installer-initrd + +install -v -D -m 644 %{_sourcedir}/squashfs.img-%{tis_image_version} \ + %{buildroot}/www/pages/feed/rel-%{platform_release}/LiveOS/squashfs.img + +install -v -d -m 755 %{buildroot}%{_sbindir} + +install -v -m 755 %{_sourcedir}/pxeboot-update.sh %{buildroot}%{_sbindir}/pxeboot-update-%{platform_release}.sh + +install -v -m 644 %{_sourcedir}/post_clone_iso_ks.cfg \ + %{buildroot}/pxeboot/post_clone_iso_ks.cfg + +install -v -m 644 %{_sourcedir}/default \ + %{buildroot}/pxeboot/pxelinux.cfg.files/default +install -v -m 644 %{_sourcedir}/default.static \ + %{buildroot}/pxeboot/pxelinux.cfg.files/default.static +install -v -m 644 %{_sourcedir}/centos-pxe-controller-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/pxe-controller-install-%{platform_release} +install -v -m 644 %{_sourcedir}/centos-pxe-compute-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/pxe-compute-install-%{platform_release} +install -v -m 644 %{_sourcedir}/centos-pxe-smallsystem-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/pxe-smallsystem-install-%{platform_release} +install -v -m 644 %{_sourcedir}/centos-pxe-storage-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/pxe-storage-install-%{platform_release} +install -v -m 644 %{_sourcedir}/centos-pxe-compute_lowlatency-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/pxe-compute_lowlatency-install-%{platform_release} +install -v -m 644 %{_sourcedir}/centos-pxe-smallsystem_lowlatency-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/pxe-smallsystem_lowlatency-install-%{platform_release} + + +# UEFI support +install -v -m 644 %{_sourcedir}/pxe-grub.cfg \ + %{buildroot}/pxeboot/pxelinux.cfg.files/grub.cfg +install -v -m 644 %{_sourcedir}/pxe-grub.cfg.static \ + %{buildroot}/pxeboot/pxelinux.cfg.files/grub.cfg.static +# Copy EFI boot image. It will be used to create ISO on the Controller. +install -v -m 644 %{_sourcedir}/efiboot.img \ + %{buildroot}/pxeboot/rel-%{platform_release}/ +install -v -m 644 %{_sourcedir}/efi-centos-pxe-controller-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/efi-pxe-controller-install-%{platform_release} +install -v -m 644 %{_sourcedir}/efi-centos-pxe-compute-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/efi-pxe-compute-install-%{platform_release} +install -v -m 644 %{_sourcedir}/efi-centos-pxe-smallsystem-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/efi-pxe-smallsystem-install-%{platform_release} +install -v -m 644 %{_sourcedir}/efi-centos-pxe-storage-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/efi-pxe-storage-install-%{platform_release} +install -v -m 644 %{_sourcedir}/efi-centos-pxe-compute_lowlatency-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/efi-pxe-compute_lowlatency-install-%{platform_release} +install -v -m 644 %{_sourcedir}/efi-centos-pxe-smallsystem_lowlatency-install \ + %{buildroot}/pxeboot/pxelinux.cfg.files/efi-pxe-smallsystem_lowlatency-install-%{platform_release} + + +sed -i "s/xxxSW_VERSIONxxx/%{platform_release}/g" \ + %{buildroot}/pxeboot/pxelinux.cfg.files/pxe-* \ + %{buildroot}/pxeboot/pxelinux.cfg.files/efi-pxe-* + +# Copy files from the syslinux pkg +install -v -m 0644 \ + %{_datadir}/syslinux/menu.c32 \ + %{_datadir}/syslinux/vesamenu.c32 \ + %{_datadir}/syslinux/chain.c32 \ + %{_datadir}/syslinux/linux.c32 \ + %{_datadir}/syslinux/reboot.c32 \ + %{_datadir}/syslinux/pxechain.com \ + %{_datadir}/syslinux/pxelinux.0 \ + %{_datadir}/syslinux/gpxelinux.0 \ + %{buildroot}/pxeboot + +# Copy files from grub2. Centos UEFI bootloader expect these files +install -v -m 0644 \ + %{_prefix}/lib/grub/i386-pc/command.lst \ + %{_prefix}/lib/grub/i386-pc/fs.lst \ + %{_prefix}/lib/grub/i386-pc/crypto.lst \ + %{_prefix}/lib/grub/i386-pc/terminal.lst \ + %{buildroot}/pxeboot/EFI/centos/x86_64-efi +# Copy Titanium grub.cfg. It will be used to create ISO on the Controller. +install -v -m 0644 %{_sourcedir}/grub.cfg \ + %{buildroot}/pxeboot/EFI/ + +# UEFI bootloader expect the grub.cfg file to be in /pxeboot/ so create a symlink for it +ln -s pxelinux.cfg/grub.cfg %{buildroot}/pxeboot/grub.cfg + +%files +%license ../SOURCES/LICENSE +%defattr(-,root,root,-) +%dir /pxeboot +/pxeboot/* +%{_sbindir}/pxeboot-update-%{platform_release}.sh +/www/pages/feed/rel-%{platform_release}/LiveOS/squashfs.img + diff --git a/installer/pxe-network-installer/pxe-network-installer/LICENSE b/installer/pxe-network-installer/pxe-network-installer/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/installer/pxe-network-installer/pxe-network-installer/centos-pxe-compute-install b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-compute-install new file mode 100644 index 00000000..bfbfcfa0 --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-compute-install @@ -0,0 +1,27 @@ +SERIAL 0 115200 +TIMEOUT 50 +DEFAULT menu.c32 + +# Menu Configuration +MENU WIDTH 80 +MENU MARGIN 10 +MENU PASSWORDMARGIN 3 +MENU ROWS 12 +MENU TABMSGROW 18 +MENU CMDLINEROW 18 +MENU ENDROW 24 +MENU PASSWORDROW 11 +MENU TIMEOUTROW 20 + +PROMPT 0 +NOESCAPE 1 +NOCOMPLETE 1 +ALLOWOPTIONS 0 + +LABEL 1 + MENU LABEL ^1) Compute + MENU DEFAULT + KERNEL rel-xxxSW_VERSIONxxx/installer-bzImage + APPEND initrd=rel-xxxSW_VERSIONxxx/installer-initrd bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_compute_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 xxxAPPEND_OPTIONSxxx + IPAPPEND 2 + diff --git a/installer/pxe-network-installer/pxe-network-installer/centos-pxe-compute_lowlatency-install b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-compute_lowlatency-install new file mode 100644 index 00000000..2b2f0dd1 --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-compute_lowlatency-install @@ -0,0 +1,27 @@ +SERIAL 0 115200 +TIMEOUT 50 +DEFAULT menu.c32 + +# Menu Configuration +MENU WIDTH 80 +MENU MARGIN 10 +MENU PASSWORDMARGIN 3 +MENU ROWS 12 +MENU TABMSGROW 18 +MENU CMDLINEROW 18 +MENU ENDROW 24 +MENU PASSWORDROW 11 +MENU TIMEOUTROW 20 + +PROMPT 0 +NOESCAPE 1 +NOCOMPLETE 1 +ALLOWOPTIONS 0 + +LABEL 1 + MENU LABEL ^1) Lowlatency Compute + MENU DEFAULT + KERNEL rel-xxxSW_VERSIONxxx/installer-bzImage + APPEND initrd=rel-xxxSW_VERSIONxxx/installer-initrd bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_compute_lowlatency_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 xxxAPPEND_OPTIONSxxx + IPAPPEND 2 + diff --git a/installer/pxe-network-installer/pxe-network-installer/centos-pxe-controller-install b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-controller-install new file mode 100644 index 00000000..111051f3 --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-controller-install @@ -0,0 +1,27 @@ +SERIAL 0 115200 +TIMEOUT 50 +DEFAULT menu.c32 + +# Menu Configuration +MENU WIDTH 80 +MENU MARGIN 10 +MENU PASSWORDMARGIN 3 +MENU ROWS 12 +MENU TABMSGROW 18 +MENU CMDLINEROW 18 +MENU ENDROW 24 +MENU PASSWORDROW 11 +MENU TIMEOUTROW 20 + +PROMPT 0 +NOESCAPE 1 +NOCOMPLETE 1 +ALLOWOPTIONS 0 + +LABEL 1 + MENU LABEL ^1) Standard Controller + MENU DEFAULT + KERNEL rel-xxxSW_VERSIONxxx/installer-bzImage + APPEND initrd=rel-xxxSW_VERSIONxxx/installer-initrd bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_controller_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 xxxAPPEND_OPTIONSxxx + IPAPPEND 2 + diff --git a/installer/pxe-network-installer/pxe-network-installer/centos-pxe-smallsystem-install b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-smallsystem-install new file mode 100644 index 00000000..717f4b4b --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-smallsystem-install @@ -0,0 +1,27 @@ +SERIAL 0 115200 +TIMEOUT 50 +DEFAULT menu.c32 + +# Menu Configuration +MENU WIDTH 80 +MENU MARGIN 10 +MENU PASSWORDMARGIN 3 +MENU ROWS 12 +MENU TABMSGROW 18 +MENU CMDLINEROW 18 +MENU ENDROW 24 +MENU PASSWORDROW 11 +MENU TIMEOUTROW 20 + +PROMPT 0 +NOESCAPE 1 +NOCOMPLETE 1 +ALLOWOPTIONS 0 + +LABEL 1 + MENU LABEL ^1) All-in-one + MENU DEFAULT + KERNEL rel-xxxSW_VERSIONxxx/installer-bzImage + APPEND initrd=rel-xxxSW_VERSIONxxx/installer-initrd bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_smallsystem_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 xxxAPPEND_OPTIONSxxx + IPAPPEND 2 + diff --git a/installer/pxe-network-installer/pxe-network-installer/centos-pxe-smallsystem_lowlatency-install b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-smallsystem_lowlatency-install new file mode 100644 index 00000000..a4b339fc --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-smallsystem_lowlatency-install @@ -0,0 +1,27 @@ +SERIAL 0 115200 +TIMEOUT 50 +DEFAULT menu.c32 + +# Menu Configuration +MENU WIDTH 80 +MENU MARGIN 10 +MENU PASSWORDMARGIN 3 +MENU ROWS 12 +MENU TABMSGROW 18 +MENU CMDLINEROW 18 +MENU ENDROW 24 +MENU PASSWORDROW 11 +MENU TIMEOUTROW 20 + +PROMPT 0 +NOESCAPE 1 +NOCOMPLETE 1 +ALLOWOPTIONS 0 + +LABEL 1 + MENU LABEL ^1) All-in-one (lowlatency) + MENU DEFAULT + KERNEL rel-xxxSW_VERSIONxxx/installer-bzImage + APPEND initrd=rel-xxxSW_VERSIONxxx/installer-initrd bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_smallsystem_lowlatency_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 xxxAPPEND_OPTIONSxxx + IPAPPEND 2 + diff --git a/installer/pxe-network-installer/pxe-network-installer/centos-pxe-storage-install b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-storage-install new file mode 100644 index 00000000..58e924fb --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/centos-pxe-storage-install @@ -0,0 +1,27 @@ +SERIAL 0 115200 +TIMEOUT 50 +DEFAULT menu.c32 + +# Menu Configuration +MENU WIDTH 80 +MENU MARGIN 10 +MENU PASSWORDMARGIN 3 +MENU ROWS 12 +MENU TABMSGROW 18 +MENU CMDLINEROW 18 +MENU ENDROW 24 +MENU PASSWORDROW 11 +MENU TIMEOUTROW 20 + +PROMPT 0 +NOESCAPE 1 +NOCOMPLETE 1 +ALLOWOPTIONS 0 + +LABEL 1 + MENU LABEL ^1) Storage + MENU DEFAULT + KERNEL rel-xxxSW_VERSIONxxx/installer-bzImage + APPEND initrd=rel-xxxSW_VERSIONxxx/installer-initrd bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_storage_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 xxxAPPEND_OPTIONSxxx + IPAPPEND 2 + diff --git a/installer/pxe-network-installer/pxe-network-installer/default b/installer/pxe-network-installer/pxe-network-installer/default new file mode 100644 index 00000000..4c49036c --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/default @@ -0,0 +1,38 @@ +SERIAL 0 115200 +TIMEOUT 100 +DEFAULT menu.c32 + +# Menu Configuration +MENU WIDTH 80 +MENU MARGIN 10 +MENU PASSWORDMARGIN 3 +MENU ROWS 12 +MENU TABMSGROW 18 +MENU CMDLINEROW 18 +MENU ENDROW 24 +MENU PASSWORDROW 11 +MENU TIMEOUTROW 20 +MENU AUTOBOOT Automatic retry in # seconds + +MENU COLOR UNSEL 36;44 +MENU COLOR SEL 36;44 +MENU COLOR DISABLED 36;44 + +PROMPT 0 +NOESCAPE 1 +NOCOMPLETE 1 +ALLOWOPTIONS 0 + +MENU SEPARATOR + +LABEL Waiting for this node to be configured. + MENU DISABLE + +MENU SEPARATOR + +LABEL Please configure the personality for this node from the + MENU DISABLE + +LABEL controller node in order to proceed. + MENU DEFAULT + KERNEL pxelinux.0 diff --git a/installer/pxe-network-installer/pxe-network-installer/default.static b/installer/pxe-network-installer/pxe-network-installer/default.static new file mode 100644 index 00000000..7d36ba55 --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/default.static @@ -0,0 +1,47 @@ +SERIAL 0 115200 +TIMEOUT 100 +DEFAULT menu.c32 + +# Menu Configuration +MENU WIDTH 80 +MENU MARGIN 10 +MENU PASSWORDMARGIN 3 +MENU ROWS 12 +MENU TABMSGROW 18 +MENU CMDLINEROW 18 +MENU ENDROW 24 +MENU PASSWORDROW 11 +MENU TIMEOUTROW 20 +MENU AUTOBOOT Automatic retry in # seconds + +MENU COLOR UNSEL 36;44 +MENU COLOR SEL 36;44 +MENU COLOR DISABLED 36;44 + +PROMPT 0 +NOESCAPE 1 +NOCOMPLETE 1 +ALLOWOPTIONS 0 + +MENU SEPARATOR + +LABEL Waiting for this node to be configured. + MENU DISABLE + +MENU SEPARATOR + +LABEL This system has been configured with static management + MENU DISABLE + +LABEL and infrastructure IP address allocation. This requires + MENU DISABLE + +LABEL that the node be manually provisioned in System + MENU DISABLE + +LABEL Inventory using the 'system host-add' CLI, GUI, or + MENU DISABLE + +LABEL sysinv-api equivalent. + MENU DISABLE + KERNEL pxelinux.0 diff --git a/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-compute-install b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-compute-install new file mode 100755 index 00000000..63a04a56 --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-compute-install @@ -0,0 +1,9 @@ +default=0 +timeout=10 +GRUB_HIDDEN_TIMEOUT=0 +GRUB_TIMEOUT_STYLE='countdown' + +menuentry '1) UEFI Compute' { + linuxefi rel-xxxSW_VERSIONxxx/installer-bzImage bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_compute_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 ksdevice=$net_default_mac BOOTIF=$net_default_mac xxxAPPEND_OPTIONSxxx + initrdefi rel-xxxSW_VERSIONxxx/installer-initrd +} diff --git a/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-compute_lowlatency-install b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-compute_lowlatency-install new file mode 100755 index 00000000..8c985758 --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-compute_lowlatency-install @@ -0,0 +1,9 @@ +default=0 +timeout=10 +GRUB_HIDDEN_TIMEOUT=0 +GRUB_TIMEOUT_STYLE='countdown' + +menuentry '1) UEFI Lowlatency Compute' { + linuxefi rel-xxxSW_VERSIONxxx/installer-bzImage bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_compute_lowlatency_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 ksdevice=$net_default_mac BOOTIF=$net_default_mac xxxAPPEND_OPTIONSxxx + initrdefi rel-xxxSW_VERSIONxxx/installer-initrd +} diff --git a/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-controller-install b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-controller-install new file mode 100755 index 00000000..189ef47b --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-controller-install @@ -0,0 +1,9 @@ +default=0 +timeout=10 +GRUB_HIDDEN_TIMEOUT=0 +GRUB_TIMEOUT_STYLE='countdown' + +menuentry '1) UEFI Standard Controller' { + linuxefi rel-xxxSW_VERSIONxxx/installer-bzImage bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_controller_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 ksdevice=$net_default_mac BOOTIF=$net_default_mac xxxAPPEND_OPTIONSxxx + initrdefi rel-xxxSW_VERSIONxxx/installer-initrd +} diff --git a/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-smallsystem-install b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-smallsystem-install new file mode 100755 index 00000000..4382aec8 --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-smallsystem-install @@ -0,0 +1,9 @@ +default=0 +timeout=10 +GRUB_HIDDEN_TIMEOUT=0 +GRUB_TIMEOUT_STYLE='countdown' + +menuentry '1) UEFI All-in-one' { + linuxefi rel-xxxSW_VERSIONxxx/installer-bzImage bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_smallsystem_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 ksdevice=$net_default_mac BOOTIF=$net_default_mac xxxAPPEND_OPTIONSxxx + initrdefi rel-xxxSW_VERSIONxxx/installer-initrd +} diff --git a/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-smallsystem_lowlatency-install b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-smallsystem_lowlatency-install new file mode 100755 index 00000000..d0cc674d --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-smallsystem_lowlatency-install @@ -0,0 +1,9 @@ +default=0 +timeout=10 +GRUB_HIDDEN_TIMEOUT=0 +GRUB_TIMEOUT_STYLE='countdown' + +menuentry '1) UEFI All-in-one (lowlatency)' { + linuxefi rel-xxxSW_VERSIONxxx/installer-bzImage bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_smallsystem_lowlatency_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 ksdevice=$net_default_mac BOOTIF=$net_default_mac xxxAPPEND_OPTIONSxxx + initrdefi rel-xxxSW_VERSIONxxx/installer-initrd +} diff --git a/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-storage-install b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-storage-install new file mode 100755 index 00000000..4a76af22 --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/efi-centos-pxe-storage-install @@ -0,0 +1,9 @@ +default=0 +timeout=10 +GRUB_HIDDEN_TIMEOUT=0 +GRUB_TIMEOUT_STYLE='countdown' + +menuentry '1) UEFI Storage' { + linuxefi rel-xxxSW_VERSIONxxx/installer-bzImage bootifonly=1 devfs=nomount inst.repo=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/ inst.ks=http://pxecontroller/feed/rel-xxxSW_VERSIONxxx/net_storage_ks.cfg usbcore.autosuspend=-1 biosdevname=0 rd.net.timeout.dhcp=120 ksdevice=$net_default_mac BOOTIF=$net_default_mac xxxAPPEND_OPTIONSxxx + initrdefi rel-xxxSW_VERSIONxxx/installer-initrd +} diff --git a/installer/pxe-network-installer/pxe-network-installer/pxe-grub.cfg b/installer/pxe-network-installer/pxe-network-installer/pxe-grub.cfg new file mode 100755 index 00000000..1487632d --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/pxe-grub.cfg @@ -0,0 +1,56 @@ +default=0 + +# If the default menu fails then menu entry 5 is used. +fallback=5 + +timeout=10 +GRUB_HIDDEN_TIMEOUT=0 +GRUB_TIMEOUT_STYLE='countdown' + +mac=$net_default_mac + +# net_default_mac is the network interface that was used to load grub. +# We need to convert from 01:00:1e:67:56:9d:c1 to 01-00-1e-67-56-9d-c1 to +# match the config file format. + +regexp --set=new '(^..)' "$mac" +conf=$new +regexp --set=new '^.{3}(.{2})' "$mac" +conf=$conf-$new +regexp --set=new '^.{6}(.{2})' "$mac" +conf=$conf-$new +regexp --set=new '^.{9}(.{2})' "$mac" +conf=$conf-$new +regexp --set=new '^.{12}(.{2})' "$mac" +conf=$conf-$new +regexp --set=new '^.{15}(.{2})' "$mac" +conf=$conf-$new + +# First try to load the mac config, if it does not exist yet (its created when a +# personality is assigned to a node) then grub.conf is loaded again. We timeout +# for 10 sec between re-tries. + +# Menu 0 +menuentry 'Waiting for this node to be configured.' { + insmod net + insmod efinet + configfile pxelinux.cfg/efi-01-$conf +} +menuentry ' ' { + echo " " +} +menuentry 'Please configure the personality for this node from the' { + echo " " +} +menuentry 'controller node in order to proceed.' { + echo " " +} +menuentry ' ' { + echo " " +} +# Menu 5 +menuentry 'Node not configured.' { + insmod net + insmod efinet + configfile grub.cfg +} diff --git a/installer/pxe-network-installer/pxe-network-installer/pxe-grub.cfg.static b/installer/pxe-network-installer/pxe-network-installer/pxe-grub.cfg.static new file mode 100755 index 00000000..45c19571 --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/pxe-grub.cfg.static @@ -0,0 +1,70 @@ +default=0 + +# If the default menu fails then menu entry 8 is used. +fallback=8 + +timeout=10 +GRUB_HIDDEN_TIMEOUT=0 +GRUB_TIMEOUT_STYLE='countdown' + +mac=$net_default_mac + +# net_default_mac is the network interface that was used to load grub. +# We need to convert from 01:00:1e:67:56:9d:c1 to 01-00-1e-67-56-9d-c1 to +# match the config file format. + +regexp --set=new '(^..)' "$mac" +conf=$new +regexp --set=new '^.{3}(.{2})' "$mac" +conf=$conf-$new +regexp --set=new '^.{6}(.{2})' "$mac" +conf=$conf-$new +regexp --set=new '^.{9}(.{2})' "$mac" +conf=$conf-$new +regexp --set=new '^.{12}(.{2})' "$mac" +conf=$conf-$new +regexp --set=new '^.{15}(.{2})' "$mac" +conf=$conf-$new + +# First try to load the mac config, if it does not exist yet (its created when a +# personality is assigned to a node) then grub.conf is loaded again. We timeout +# for 10 sec between re-tries. + +# Menu 0 +menuentry 'Waiting for this node to be configured.' { + insmod net + insmod efinet + configfile pxelinux.cfg/efi-01-$conf +} +menuentry ' ' { + echo " " +} +menuentry 'This system has been configured with static management' { + echo " " +} +menuentry 'and infrastructure IP address allocation. This requires' { + echo " " +} +menuentry 'that the node be manually provisioned in System' { + echo " " +} +menuentry "Inventory using the 'system host-add' CLI, GUI, or" { + echo " " +} + +# Menu 6 +menuentry 'sysinv-api equivalent.' { + echo " " +} + +# Menu 7 +menuentry ' ' { + echo " " +} + +# Menu 8 +menuentry 'Node not configured.' { + insmod net + insmod efinet + configfile grub.cfg +} diff --git a/installer/pxe-network-installer/pxe-network-installer/pxeboot-update.sh b/installer/pxe-network-installer/pxe-network-installer/pxeboot-update.sh new file mode 100755 index 00000000..5664af2d --- /dev/null +++ b/installer/pxe-network-installer/pxe-network-installer/pxeboot-update.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# +# Copyright (c) 2016-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# Using a specified template, generate a node-specific pxeboot.cfg file +# for BIOS and UEFI mode. This script logs to user.log +# +# Command example; +# /usr/sbin/pxeboot-update-18.03.sh -i /pxeboot/pxelinux.cfg.files/pxe-controller-install-18.03 +# -o /pxeboot/pxelinux.cfg/01-08-00-27-3e-f8-05 -b sda -r sda -t -c ttyS0,115200 +# + +function usage() +{ + cat >&2 < : Specify template to use + -o : Specify output filename + -t : Use text install (optional) + -g : Use graphical install (optional) + -c : Specify serial console (optional) + -b : Specify boot device + -r : Specify rootfs device + -u : Base url for TIS install progress notification + -s : Specify Security Profile mode (optional) + -T : Specify whether or not to use tboot (optional) + +EOF +} + +declare text_install="inst.text" + +function generate_config() +{ + input=$1 + output=$2 + + if [ ! -f "$input" ] + then + logger --stderr -t $0 "Error: Input file $input does not exist" + exit 1 + fi + + if [ ! -w $(dirname $output) ] + then + logger --stderr -t $0 "Error: Destination directory $(dirname $output) not writeable" + exit 1 + fi + + if [ -e $output -a ! -w $output ] + then + logger --stderr -t $0 "Error: Destination file $output_file_efi exists and is not writeable" + exit 1 + fi + + sed -e "s#xxxAPPEND_OPTIONSxxx#$APPEND_OPTIONS#" $input > $output + + if [ $? -ne 0 -o ! -f $output ] + then + logger --stderr -t $0 "Error: Failed to generate pxeboot file $output" + exit 1 + fi +} + +parms=$@ +logger -t $0 " $parms" + +while getopts "i:o:tgc:b:r:u:s:T:h" opt +do + case $opt in + i) + input_file=$OPTARG + input_file_efi=$(dirname $input_file)/efi-$(basename $input_file) + ;; + o) + output_file=$OPTARG + output_file_efi=$(dirname $output_file)/efi-$(basename $output_file) + ;; + t) + text_install="inst.text" + ;; + g) + # We currently do not support Graphics install with Centos. Enforce + # the text install. + # text_install="inst.graphical" + text_install="inst.text" + ;; + c) + console=$OPTARG + ;; + b) + boot_device=$OPTARG + ;; + r) + rootfs_device=$OPTARG + ;; + u) + tisnotify=$OPTARG + ;; + s) + security_profile=$OPTARG + ;; + T) + tboot=$OPTARG + ;; + h) + usage + exit 1 + ;; + *) + usage + exit 1 + ;; + esac +done + +# Validate parameters +if [ -z "$input_file" \ + -o -z "$input_file_efi" \ + -o -z "$output_file" \ + -o -z "$output_file_efi" \ + -o -z "$boot_device" \ + -o -z "$rootfs_device" ] +then + logger --stderr -t $0 "Error: One or more mandatory options not specified: $@" + usage + exit 1 +fi + +APPEND_OPTIONS="boot_device=$boot_device rootfs_device=$rootfs_device" + +if [ -n "$text_install" ] +then + APPEND_OPTIONS="$APPEND_OPTIONS $text_install" +fi + +if [ -n "$console" ] +then + APPEND_OPTIONS="$APPEND_OPTIONS console=$console" +fi + +if [ -n "$tisnotify" ] +then + APPEND_OPTIONS="$APPEND_OPTIONS tisnotify=$tisnotify" +fi + +# We now require GPT partitions for all disks regardless of size +APPEND_OPTIONS="$APPEND_OPTIONS inst.gpt" + +if [ -n "$security_profile" ] +then + APPEND_OPTIONS="$APPEND_OPTIONS security_profile=$security_profile" +fi + +generate_config $input_file $output_file + +# for extended security profile UEFI boot only, +# a tboot option will be passed to target boot option menu +if [ "$security_profile" == "extended" -a -n "$tboot" ] +then + APPEND_OPTIONS="$APPEND_OPTIONS tboot=$tboot" +fi + +generate_config $input_file_efi $output_file_efi + +exit 0 diff --git a/kickstart/LICENSE b/kickstart/LICENSE new file mode 100755 index 00000000..d6456956 --- /dev/null +++ b/kickstart/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/kickstart/centos/build_srpm.data b/kickstart/centos/build_srpm.data new file mode 100644 index 00000000..15b87a79 --- /dev/null +++ b/kickstart/centos/build_srpm.data @@ -0,0 +1,3 @@ +SRC_DIR="${CGCS_BASE}/mwa-beas/bsp-files" +COPY_LIST="$PKG_BASE/LICENSE" +TIS_PATCH_VER=0 diff --git a/kickstart/centos/platform-kickstarts.spec b/kickstart/centos/platform-kickstarts.spec new file mode 100644 index 00000000..94204259 --- /dev/null +++ b/kickstart/centos/platform-kickstarts.spec @@ -0,0 +1,64 @@ +Name: platform-kickstarts +Version: 1.0.0 +Release: %{tis_patch_ver}%{?_tis_dist} +Summary: Platform Kickstarts +License: Apache-2.0 +Packager: Wind River +URL: unknown + +Source0: %{name}-%{version}.tar.gz +Source1: LICENSE + +BuildArch: noarch + +%description +Platform kickstart files + +BuildRequires: perl +BuildRequires: perl(Getopt::Long) +BuildRequires: perl(POSIX) + +%define feed_dir /www/pages/feed/rel-%{platform_release} + +%prep +%setup + +%build +./centos-ks-gen.pl --release %{platform_release} +cp %{SOURCE1} . + +%install + +install -d -m 0755 %{buildroot}%{feed_dir} +install -m 0444 generated/* %{buildroot}%{feed_dir}/ + +install -d -m 0755 %{buildroot}/pxeboot +install -D -m 0444 pxeboot/* %{buildroot}/pxeboot + +install -d -m 0755 %{buildroot}/extra_cfgs +install -D -m 0444 extra_cfgs/* %{buildroot}/extra_cfgs + +%files +%defattr(-,root,root,-) +%license LICENSE +%{feed_dir} + +%package pxeboot +Summary: Kickstarts for pxeboot server + +%description pxeboot +Kickstarts for pxeboot server + +%files pxeboot +%defattr(-,root,root,-) +/pxeboot/ + +%package extracfgs +Summary: Extra lab-usage kickstarts + +%description extracfgs +Extra lab-usage kickstarts + +%files extracfgs +%defattr(-,root,root,-) +/extra_cfgs/ diff --git a/mtce-common/LICENSE b/mtce-common/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/mtce-common/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/mtce-common/PKG-INFO b/mtce-common/PKG-INFO new file mode 100644 index 00000000..8fd02144 --- /dev/null +++ b/mtce-common/PKG-INFO @@ -0,0 +1,16 @@ +Metadata-Version: 1.1 +Name: cgts-mtce-common +Version: 1.0 +Summary: Titanium Cloud Platform Common Node Maintenance Package +Home-page: +Author: Windriver +Author-email: info@windriver.com +License: Apache-2.0 + +Description: Maintenance common package containing host maintenace and heatbeat + agent/client daemons as well as the hardware and process monitor + daemons, hardware watchdog process, guest heartbeat agent/client + daemons, resource and file system daemons as well as initialization + and support files for each. + +Platform: UNKNOWN diff --git a/mtce-common/centos/build_srpm.data b/mtce-common/centos/build_srpm.data new file mode 100644 index 00000000..951eace4 --- /dev/null +++ b/mtce-common/centos/build_srpm.data @@ -0,0 +1,3 @@ +SRC_DIR="cgts-mtce-common-1.0" +TIS_PATCH_VER=133 +BUILD_IS_SLOW=5 diff --git a/mtce-common/centos/cgts-mtce-common.spec b/mtce-common/centos/cgts-mtce-common.spec new file mode 100644 index 00000000..98e1564d --- /dev/null +++ b/mtce-common/centos/cgts-mtce-common.spec @@ -0,0 +1,768 @@ +Summary: Titanuim Server Common Maintenance Package +Name: cgts-mtce-common +Version: 1.0 +Release: %{tis_patch_ver}%{?_tis_dist} +License: Apache-2.0 +Group: base +Packager: Wind River +URL: unknown + +Source0: %{name}-%{version}.tar.gz + +BuildRequires: libssh2 +BuildRequires: libssh2-devel +BuildRequires: json-c +BuildRequires: json-c-devel +BuildRequires: fm-common +BuildRequires: fm-common-dev +BuildRequires: openssl +BuildRequires: openssl-devel +BuildRequires: libevent +BuildRequires: libevent-devel +BuildRequires: fm-mgr +BuildRequires: expect +BuildRequires: postgresql +BuildRequires: libuuid-devel +BuildRequires: guest-client-devel +BuildRequires: systemd-devel +BuildRequires: cppcheck +Requires: util-linux +Requires: /bin/bash +Requires: /bin/systemctl +Requires: dpkg +Requires: time +Requires: cgts-mtce-common-rmon >= 1.0 +Requires: libevent-2.0.so.5()(64bit) +Requires: expect +Requires: libfmcommon.so.1()(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.14)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.9)(64bit) +Requires: fm-common >= 1.0 +Requires: libamon.so.1()(64bit) +Requires: libc.so.6(GLIBC_2.2.5)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.11)(64bit) +Requires: /bin/sh +Requires: cgts-mtce-common-pmon >= 1.0 +Requires: librt.so.1()(64bit) +Requires: libc.so.6(GLIBC_2.3)(64bit) +Requires: libc.so.6(GLIBC_2.14)(64bit) +Requires: libjson-c.so.2()(64bit) +Requires: libpthread.so.0(GLIBC_2.2.5)(64bit) +Requires: librmonapi.so.1()(64bit) +Requires: librt.so.1(GLIBC_2.3.3)(64bit) +Requires: libgcc_s.so.1(GCC_3.0)(64bit) +Requires: libstdc++.so.6(CXXABI_1.3)(64bit) +Requires: libevent >= 2.0.21 +Requires: librt.so.1(GLIBC_2.2.5)(64bit) +Requires: libuuid.so.1()(64bit) +Requires: libm.so.6()(64bit) +Requires: rtld(GNU_HASH) +Requires: libstdc++.so.6()(64bit) +Requires: libc.so.6(GLIBC_2.4)(64bit) +Requires: libc.so.6()(64bit) +Requires: libssh2.so.1()(64bit) +Requires: libgcc_s.so.1()(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.15)(64bit) +Requires: libpthread.so.0()(64bit) +Requires: /usr/bin/expect +Requires: python-rtslib + +%description +Titanium Cloud Host Maintenance services. A suite of daemons that provide +host maintainability and a high level of fault detection with automatic +notification and recovery.The Maintenance Service (mtcAgent/mtcClient) +manages hosts according to an abbreviated version of the CCITT X.731 ITU +specification. The Heartbeat Service (hbsAgent/hbsClient) adds fast failure +detection over the management and infstructure networks. The Process +Monitor service (pmond) add both passive and active process monitoring and +automatic recovery of stopped or killed processes. The File System Monitor +Service (fsmond) adds detection and reporting of local file system +problems. The Hardware Monitor Service (hwmond) adds present and predictive +hardware failure detection, reporting and recovery. The Resource Monitor +Service (rmond) adds resource monitoring with present and predictive +failure and overload detection and reporting. The Guest Services +(guestAgent/guestServer) daemons control access into and heartbeat of guest +VMs on the compute. The Host Watchdog (hostwd) daemon watches for errors in +pmond and logs system information on error. All of these maintenance +services improve MTTD of node failures as well as resource overload and out +of spec operating conditions that can reduce outage time through automated +notification and recovery thereby improving overall platform availability +for the customer. + +%package -n cgts-mtce-common-pmon +Summary: Titanuim Server Maintenance Process Monitor Package +Group: base +BuildRequires: cppcheck +Requires: util-linux +Requires: /bin/bash +Requires: /bin/systemctl +Requires: dpkg +Requires: time +Requires: libstdc++.so.6(CXXABI_1.3)(64bit) +Requires: libfmcommon.so.1()(64bit) +Requires: libc.so.6(GLIBC_2.7)(64bit) +Requires: fm-common >= 1.0 +Requires: libc.so.6(GLIBC_2.2.5)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.11)(64bit) +Requires: /bin/sh +Requires: librt.so.1()(64bit) +Requires: libc.so.6(GLIBC_2.3)(64bit) +Requires: libc.so.6(GLIBC_2.14)(64bit) +Requires: libpthread.so.0(GLIBC_2.2.5)(64bit) +Requires: librt.so.1(GLIBC_2.3.3)(64bit) +Requires: libgcc_s.so.1(GCC_3.0)(64bit) +Requires: librt.so.1(GLIBC_2.2.5)(64bit) +Requires: libm.so.6()(64bit) +Requires: rtld(GNU_HASH) +Requires: libstdc++.so.6()(64bit) +Requires: libc.so.6(GLIBC_2.4)(64bit) +Requires: libc.so.6()(64bit) +Requires: libgcc_s.so.1()(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.15)(64bit) +Requires: libpthread.so.0()(64bit) +Provides: libamon.so.1()(64bit) + +%description -n cgts-mtce-common-pmon +Titanium Cloud Maintenance Process Monitor service (pmond) with +passive (pid), active (msg) and status (qry) process monitoring with +automatic recovery and failure reporting of registered failed processes. + +%package -n cgts-mtce-common-rmon +Summary: Titanuim Server Maintenance Resource Monitor Package +Group: base +Requires: /bin/bash +Requires: util-linux +Requires: /bin/systemctl +Requires: dpkg +Requires: time +Requires: libjson-c.so.2()(64bit) +Requires: libstdc++.so.6(CXXABI_1.3)(64bit) +Requires: libevent-2.0.so.5()(64bit) +Requires: libfmcommon.so.1()(64bit) +Requires: librmonapi.so.1()(64bit) +Requires: fm-common >= 1.0 +Requires: libc.so.6(GLIBC_2.2.5)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.11)(64bit) +Requires: /bin/sh +Requires: librt.so.1()(64bit) +Requires: libc.so.6(GLIBC_2.3)(64bit) +Requires: libc.so.6(GLIBC_2.14)(64bit) +Requires: libpthread.so.0(GLIBC_2.2.5)(64bit) +Requires: librt.so.1(GLIBC_2.3.3)(64bit) +Requires: libgcc_s.so.1(GCC_3.0)(64bit) +Requires: libevent >= 2.0.21 +Requires: librt.so.1(GLIBC_2.2.5)(64bit) +Requires: libuuid.so.1()(64bit) +Requires: libm.so.6()(64bit) +Requires: rtld(GNU_HASH) +Requires: libstdc++.so.6()(64bit) +Requires: libc.so.6()(64bit) +Requires: libgcc_s.so.1()(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.15)(64bit) +Requires: libpthread.so.0()(64bit) +Provides: librmonapi.so.1()(64bit) + +%description -n cgts-mtce-common-rmon +Titanium Cloud Host Maintenance Resource Monitor Service (rmond) adds +threshold based monitoring with predictive severity level alarming for +out of tolerance utilization of critical resourses such as memory, cpu +file system, interface state, etc. + +%package -n cgts-mtce-common-hwmon +Summary: Titanuim Server Maintenance Hardware Monitor Package +Group: base +Requires: dpkg +Requires: time +Requires: /bin/bash +Requires: libjson-c.so.2()(64bit) +Requires: libstdc++.so.6(CXXABI_1.3)(64bit) +Requires: librt.so.1(GLIBC_2.2.5)(64bit) +Requires: libfmcommon.so.1()(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.14)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.9)(64bit) +Requires: fm-common >= 1.0 +Requires: libc.so.6(GLIBC_2.2.5)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.11)(64bit) +Requires: /bin/sh +Requires: librt.so.1()(64bit) +Requires: libc.so.6(GLIBC_2.3)(64bit) +Requires: libc.so.6(GLIBC_2.14)(64bit) +Requires: libpthread.so.0(GLIBC_2.2.5)(64bit) +Requires: librt.so.1(GLIBC_2.3.3)(64bit) +Requires: libgcc_s.so.1(GCC_3.0)(64bit) +Requires: libevent >= 2.0.21 +Requires: libevent-2.0.so.5()(64bit) +Requires: libm.so.6()(64bit) +Requires: rtld(GNU_HASH) +Requires: libstdc++.so.6()(64bit) +Requires: libc.so.6()(64bit) +Requires: libssh2.so.1()(64bit) +Requires: libgcc_s.so.1()(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.15)(64bit) +Requires: libpthread.so.0()(64bit) + +%description -n cgts-mtce-common-hwmon +Titanium Cloud Host Maintenance Hardware Monitor Service (hwmond) adds +in and out of service hardware sensor monitoring, alarming and recovery +handling. + +%package -n cgts-mtce-common-guestAgent +Summary: Titanuim Server Maintenance Guest Agent Package +Group: base +Requires: dpkg +Requires: time +Requires: libjson-c.so.2()(64bit) +Requires: libstdc++.so.6(CXXABI_1.3)(64bit) +Requires: librt.so.1(GLIBC_2.2.5)(64bit) +Requires: libfmcommon.so.1()(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.9)(64bit) +Requires: fm-common >= 1.0 +Requires: libc.so.6(GLIBC_2.2.5)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.11)(64bit) +Requires: /bin/sh +Requires: librt.so.1()(64bit) +Requires: libc.so.6(GLIBC_2.3)(64bit) +Requires: libc.so.6(GLIBC_2.14)(64bit) +Requires: libpthread.so.0(GLIBC_2.2.5)(64bit) +Requires: librt.so.1(GLIBC_2.3.3)(64bit) +Requires: libgcc_s.so.1(GCC_3.0)(64bit) +Requires: libevent >= 2.0.21 +Requires: libevent-2.0.so.5()(64bit) +Requires: libuuid.so.1()(64bit) +Requires: libm.so.6()(64bit) +Requires: rtld(GNU_HASH) +Requires: libstdc++.so.6()(64bit) +Requires: libc.so.6()(64bit) +Requires: libgcc_s.so.1()(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.15)(64bit) +Requires: libpthread.so.0()(64bit) + +%description -n cgts-mtce-common-guestAgent +Titanium Cloud Host Maintenance Guest Agent Service assists in +VM guest heartbeat control and failure reporting at the controller +level. + +%package -n cgts-mtce-common-guestServer +Summary: Titanuim Server Maintenance Guest Server Package +Group: base +Requires: util-linux +Requires: /bin/bash +Requires: /bin/systemctl +Requires: dpkg +Requires: libjson-c.so.2()(64bit) +Requires: libstdc++.so.6(CXXABI_1.3)(64bit) +Requires: librt.so.1(GLIBC_2.2.5)(64bit) +Requires: libfmcommon.so.1()(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.9)(64bit) +Requires: fm-common >= 1.0 +Requires: libc.so.6(GLIBC_2.2.5)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.11)(64bit) +Requires: /bin/sh +Requires: librt.so.1()(64bit) +Requires: libc.so.6(GLIBC_2.3)(64bit) +Requires: libc.so.6(GLIBC_2.14)(64bit) +Requires: libpthread.so.0(GLIBC_2.2.5)(64bit) +Requires: librt.so.1(GLIBC_2.3.3)(64bit) +Requires: libgcc_s.so.1(GCC_3.0)(64bit) +Requires: libevent >= 2.0.21 +Requires: libevent-2.0.so.5()(64bit) +Requires: libuuid.so.1()(64bit) +Requires: libm.so.6()(64bit) +Requires: rtld(GNU_HASH) +Requires: libstdc++.so.6()(64bit) +Requires: libc.so.6(GLIBC_2.4)(64bit) +Requires: libc.so.6()(64bit) +Requires: libgcc_s.so.1()(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.15)(64bit) +Requires: libpthread.so.0()(64bit) + +%description -n cgts-mtce-common-guestServer +Titanium Cloud Maintenance Guest Server assists in VM guest +heartbeat control and failure reporting at the compute level. + +%package -n cgts-mtce-common-hostw +Summary: Titanuim Server Common Maintenance Package +Group: base +Requires: util-linux +Requires: /bin/bash +Requires: /bin/systemctl +Requires: dpkg +Requires: libstdc++.so.6(CXXABI_1.3)(64bit) +Requires: libc.so.6(GLIBC_2.2.5)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.11)(64bit) +Requires: librt.so.1()(64bit) +Requires: libc.so.6(GLIBC_2.3)(64bit) +Requires: libpthread.so.0(GLIBC_2.2.5)(64bit) +Requires: librt.so.1(GLIBC_2.3.3)(64bit) +Requires: libgcc_s.so.1(GCC_3.0)(64bit) +Requires: librt.so.1(GLIBC_2.2.5)(64bit) +Requires: libm.so.6()(64bit) +Requires: rtld(GNU_HASH) +Requires: libstdc++.so.6()(64bit) +Requires: libc.so.6()(64bit) +Requires: libgcc_s.so.1()(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4)(64bit) +Requires: libstdc++.so.6(GLIBCXX_3.4.15)(64bit) +Requires: libpthread.so.0()(64bit) + +%description -n cgts-mtce-common-hostw +Titanium Cloud Host Maintenance services. A suite of daemons that provide +host maintainability and a high level of fault detection with automatic +notification and recovery.The Maintenance Service (mtcAgent/mtcClient) +manages hosts according to an abbreviated version of the CCITT X.731 ITU +specification. The Heartbeat Service (hbsAgent/hbsClient) adds fast failure +detection over the management and infstructure networks. The Process +Monitor service (pmond) add both passive and active process monitoring and +automatic recovery of stopped or killed processes. The File System Monitor +Service (fsmond) adds detection and reporting of local file system +problems. The Hardware Monitor Service (hwmond) adds present and predictive +hardware failure detection, reporting and recovery. The Resource Monitor +Service (rmond) adds resource monitoring with present and predictive +failure and overload detection and reporting. The Guest Services +(guestAgent/guestServer) daemons control access into and heartbeat of guest +VMs on the compute. The Host Watchdog (hostwd) daemon watches for errors in +pmond and logs system information on error. All of these maintenance +services improve MTTD of node failures as well as resource overload and out +of spec operating conditions that can reduce outage time through automated +notification and recovery thereby improving overall platform availability +for the customer. + +%define local_dir /usr/local +%define local_bindir %{local_dir}/bin +%define local_sbindir %{local_dir}/sbin +%define local_etc_pmond %{_sysconfdir}/pmon.d +%define local_etc_rmond %{_sysconfdir}/rmon.d +%define local_etc_goenabledd %{_sysconfdir}/goenabled.d +%define local_etc_servicesd %{_sysconfdir}/services.d +%define local_etc_logrotated %{_sysconfdir}/logrotate.d +%define bmc_profilesd %{_sysconfdir}/bmc/server_profiles.d +%define ocf_resourced /usr/lib/ocf/resource.d + +%prep +%setup + +# Build for main cgts-mtce-common package +%build +VER=%{version} +MAJOR=`echo $VER | awk -F . '{print $1}'` +MINOR=`echo $VER | awk -F . '{print $2}'` +make MAJOR=$MAJOR MINOR=$MINOR %{?_smp_mflags} build + +%global _buildsubdir %{_builddir}/%{name}-%{version} + +# Install for main cgts-mtce-common package +%install + +VER=%{version} +MAJOR=`echo $VER | awk -F . '{print $1}'` +MINOR=`echo $VER | awk -F . '{print $2}'` + +install -m 755 -d %{buildroot}%{_sysconfdir} +install -m 755 -d %{buildroot}/usr +install -m 755 -d %{buildroot}/%{_bindir} +install -m 755 -d %{buildroot}/usr/local +install -m 755 -d %{buildroot}%{local_bindir} +install -m 755 -d %{buildroot}/usr/local/sbin +install -m 755 -d %{buildroot}/%{_sbindir} +install -m 755 -d %{buildroot}/lib +install -m 755 -d %{buildroot}%{_sysconfdir}/mtc +install -m 755 -d %{buildroot}%{_sysconfdir}/mtc/tmp + +# Resource Agent Stuff +install -m 755 -d %{buildroot}/usr/lib +install -m 755 -d %{buildroot}/usr/lib/ocf +install -m 755 -d %{buildroot}/usr/lib/ocf/resource.d +install -m 755 -d %{buildroot}/usr/lib/ocf/resource.d/platform +install -m 755 -p -D %{_buildsubdir}/scripts/mtcAgent %{buildroot}/usr/lib/ocf/resource.d/platform/mtcAgent +install -m 755 -p -D %{_buildsubdir}/scripts/hbsAgent %{buildroot}/usr/lib/ocf/resource.d/platform/hbsAgent +install -m 755 -p -D %{_buildsubdir}/hwmon/scripts/ocf/hwmon %{buildroot}/usr/lib/ocf/resource.d/platform/hwmon +install -m 755 -p -D %{_buildsubdir}/guest/scripts/guestAgent.ocf %{buildroot}/usr/lib/ocf/resource.d/platform/guestAgent + +# config files +install -m 644 -p -D %{_buildsubdir}/scripts/mtc.ini %{buildroot}%{_sysconfdir}/mtc.ini +install -m 644 -p -D %{_buildsubdir}/scripts/mtc.conf %{buildroot}%{_sysconfdir}/mtc.conf +install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmond.conf %{buildroot}%{_sysconfdir}/mtc/fsmond.conf +install -m 644 -p -D %{_buildsubdir}/hwmon/scripts/hwmond.conf %{buildroot}%{_sysconfdir}/mtc/hwmond.conf +install -m 644 -p -D %{_buildsubdir}/pmon/scripts/pmond.conf %{buildroot}%{_sysconfdir}/mtc/pmond.conf +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/rmond.conf %{buildroot}%{_sysconfdir}/mtc/rmond.conf +install -m 644 -p -D %{_buildsubdir}/guest/scripts/guest.ini %{buildroot}%{_sysconfdir}/mtc/guestAgent.ini +install -m 644 -p -D %{_buildsubdir}/guest/scripts/guest.ini %{buildroot}%{_sysconfdir}/mtc/guestServer.ini +install -m 644 -p -D %{_buildsubdir}/hostw/scripts/hostwd.conf %{buildroot}%{_sysconfdir}/mtc/hostwd.conf + +install -m 755 -d %{buildroot}/%{_sysconfdir}/etc/bmc/server_profiles.d +install -m 644 -p -D %{_buildsubdir}/scripts/sensor_hp360_v1_ilo_v4.profile %{buildroot}/%{_sysconfdir}/bmc/server_profiles.d/sensor_hp360_v1_ilo_v4.profile +install -m 644 -p -D %{_buildsubdir}/scripts/sensor_hp380_v1_ilo_v4.profile %{buildroot}/%{_sysconfdir}/bmc/server_profiles.d/sensor_hp380_v1_ilo_v4.profile +install -m 644 -p -D %{_buildsubdir}/scripts/sensor_quanta_v1_ilo_v4.profile %{buildroot}/%{_sysconfdir}/bmc/server_profiles.d/sensor_quanta_v1_ilo_v4.profile + +# binaries +install -m 755 -p -D %{_buildsubdir}/maintenance/mtcAgent %{buildroot}/%{local_bindir}/mtcAgent +install -m 755 -p -D %{_buildsubdir}/maintenance/mtcClient %{buildroot}/%{local_bindir}/mtcClient +install -m 755 -p -D %{_buildsubdir}/heartbeat/hbsAgent %{buildroot}/%{local_bindir}/hbsAgent +install -m 755 -p -D %{_buildsubdir}/heartbeat/hbsClient %{buildroot}/%{local_bindir}/hbsClient +install -m 755 -p -D %{_buildsubdir}/guest/guestServer %{buildroot}/%{local_bindir}/guestServer +install -m 755 -p -D %{_buildsubdir}/guest/guestAgent %{buildroot}/%{local_bindir}/guestAgent +install -m 755 -p -D %{_buildsubdir}/pmon/pmond %{buildroot}/%{local_bindir}/pmond +install -m 755 -p -D %{_buildsubdir}/hostw/hostwd %{buildroot}/%{local_bindir}/hostwd +install -m 755 -p -D %{_buildsubdir}/rmon/rmond %{buildroot}/%{local_bindir}/rmond +install -m 755 -p -D %{_buildsubdir}/fsmon/fsmond %{buildroot}/%{local_bindir}/fsmond +install -m 755 -p -D %{_buildsubdir}/hwmon/hwmond %{buildroot}/%{local_bindir}/hwmond +install -m 755 -p -D %{_buildsubdir}/mtclog/mtclogd %{buildroot}/%{local_bindir}/mtclogd +install -m 755 -p -D %{_buildsubdir}/alarm/mtcalarmd %{buildroot}/%{local_bindir}/mtcalarmd +install -m 755 -p -D %{_buildsubdir}/rmon/rmon_resource_notify/rmon_resource_notify %{buildroot}/%{local_bindir}/rmon_resource_notify +install -m 755 -p -D %{_buildsubdir}/scripts/wipedisk %{buildroot}/%{local_bindir}/wipedisk +install -m 755 -p -D %{_buildsubdir}/common/fsync %{buildroot}/%{_sbindir}/fsync +install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-restart %{buildroot}/%{local_sbindir}/pmon-restart +install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-start %{buildroot}/%{local_sbindir}/pmon-start +install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-stop %{buildroot}/%{local_sbindir}/pmon-stop + +# test tools +install -m 755 %{_buildsubdir}/hwmon/scripts/show_hp360 %{buildroot}/%{_sbindir}/show_hp360 +install -m 755 %{_buildsubdir}/hwmon/scripts/show_hp380 %{buildroot}/%{_sbindir}/show_hp380 +install -m 755 %{_buildsubdir}/hwmon/scripts/show_quanta %{buildroot}/%{_sbindir}/show_quanta + +# init script files +install -m 755 -p -D %{_buildsubdir}/scripts/mtcClient %{buildroot}%{_sysconfdir}/init.d/mtcClient +install -m 755 -p -D %{_buildsubdir}/scripts/hbsClient %{buildroot}%{_sysconfdir}/init.d/hbsClient +install -m 755 -p -D %{_buildsubdir}/guest/scripts/guestServer %{buildroot}%{_sysconfdir}/init.d/guestServer +install -m 755 -p -D %{_buildsubdir}/guest/scripts/guestAgent %{buildroot}%{_sysconfdir}/init.d/guestAgent +install -m 755 -p -D %{_buildsubdir}/hwmon/scripts/lsb/hwmon %{buildroot}%{_sysconfdir}/init.d/hwmon +install -m 755 -p -D %{_buildsubdir}/fsmon/scripts/fsmon %{buildroot}%{_sysconfdir}/init.d/fsmon +install -m 755 -p -D %{_buildsubdir}/scripts/mtclog %{buildroot}%{_sysconfdir}/init.d/mtclog +install -m 755 -p -D %{_buildsubdir}/pmon/scripts/pmon %{buildroot}%{_sysconfdir}/init.d/pmon +install -m 755 -p -D %{_buildsubdir}/rmon/scripts/rmon %{buildroot}%{_sysconfdir}/init.d/rmon +install -m 755 -p -D %{_buildsubdir}/hostw/scripts/hostw %{buildroot}%{_sysconfdir}/init.d/hostw +install -m 755 -p -D %{_buildsubdir}/alarm/scripts/mtcalarm.init %{buildroot}%{_sysconfdir}/init.d/mtcalarm + +# install -m 755 -p -D %{_buildsubdir}/scripts/config %{buildroot}%{_sysconfdir}/init.d/config + +# TODO: Init hack. Should move to proper module +install -m 755 -p -D %{_buildsubdir}/scripts/hwclock.sh %{buildroot}%{_sysconfdir}/init.d/hwclock.sh +install -m 644 -p -D %{_buildsubdir}/scripts/hwclock.service %{buildroot}%{_unitdir}/hwclock.service + +# systemd service files +install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmon.service %{buildroot}%{_unitdir}/fsmon.service +install -m 644 -p -D %{_buildsubdir}/hwmon/scripts/hwmon.service %{buildroot}%{_unitdir}/hwmon.service +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/rmon.service %{buildroot}%{_unitdir}/rmon.service +install -m 644 -p -D %{_buildsubdir}/pmon/scripts/pmon.service %{buildroot}%{_unitdir}/pmon.service +install -m 644 -p -D %{_buildsubdir}/hostw/scripts/hostw.service %{buildroot}%{_unitdir}/hostw.service +install -m 644 -p -D %{_buildsubdir}/guest/scripts/guestServer.service %{buildroot}%{_unitdir}/guestServer.service +install -m 644 -p -D %{_buildsubdir}/guest/scripts/guestAgent.service %{buildroot}%{_unitdir}/guestAgent.service +install -m 644 -p -D %{_buildsubdir}/scripts/mtcClient.service %{buildroot}%{_unitdir}/mtcClient.service +install -m 644 -p -D %{_buildsubdir}/scripts/hbsClient.service %{buildroot}%{_unitdir}/hbsClient.service +install -m 644 -p -D %{_buildsubdir}/scripts/mtclog.service %{buildroot}%{_unitdir}/mtclog.service +install -m 644 -p -D %{_buildsubdir}/scripts/goenabled.service %{buildroot}%{_unitdir}/goenabled.service +install -m 644 -p -D %{_buildsubdir}/scripts/runservices.service %{buildroot}%{_unitdir}/runservices.service +install -m 644 -p -D %{_buildsubdir}/alarm/scripts/mtcalarm.service %{buildroot}%{_unitdir}/mtcalarm.service + +# go enabled stuff +install -m 755 -d %{buildroot}%{local_etc_goenabledd} +install -m 755 -p -D %{_buildsubdir}/scripts/goenabled %{buildroot}%{_sysconfdir}/init.d/goenabled + +# start or stop services test script +install -m 755 -d %{buildroot}%{local_etc_servicesd} +install -m 755 -d %{buildroot}%{local_etc_servicesd}/controller +install -m 755 -d %{buildroot}%{local_etc_servicesd}/compute +install -m 755 -d %{buildroot}%{local_etc_servicesd}/storage +install -m 755 -p -D %{_buildsubdir}/scripts/mtcTest %{buildroot}/%{local_etc_servicesd}/compute +install -m 755 -p -D %{_buildsubdir}/scripts/mtcTest %{buildroot}/%{local_etc_servicesd}/controller +install -m 755 -p -D %{_buildsubdir}/scripts/mtcTest %{buildroot}/%{local_etc_servicesd}/storage +install -m 755 -p -D %{_buildsubdir}/scripts/runservices %{buildroot}%{_sysconfdir}/init.d/runservices + +# test tools +install -m 755 -p -D %{_buildsubdir}/scripts/dmemchk.sh %{buildroot}%{local_sbindir} + +# process monitor config files +install -m 755 -d %{buildroot}%{local_etc_pmond} +install -m 644 -p -D %{_buildsubdir}/scripts/mtcClient.conf %{buildroot}%{local_etc_pmond}/mtcClient.conf +install -m 644 -p -D %{_buildsubdir}/scripts/hbsClient.conf %{buildroot}%{local_etc_pmond}/hbsClient.conf +install -m 644 -p -D %{_buildsubdir}/pmon/scripts/acpid.conf %{buildroot}%{local_etc_pmond}/acpid.conf +install -m 644 -p -D %{_buildsubdir}/pmon/scripts/sshd.conf %{buildroot}%{local_etc_pmond}/sshd.conf +install -m 644 -p -D %{_buildsubdir}/pmon/scripts/ntpd.conf %{buildroot}%{local_etc_pmond}/ntpd.conf +install -m 644 -p -D %{_buildsubdir}/pmon/scripts/syslog-ng.conf %{buildroot}%{local_etc_pmond}/syslog-ng.conf +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/rmon.conf %{buildroot}%{local_etc_pmond}/rmon.conf +install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmon.conf %{buildroot}%{local_etc_pmond}/fsmon.conf +install -m 644 -p -D %{_buildsubdir}/scripts/mtclogd.conf %{buildroot}%{local_etc_pmond}/mtclogd.conf +install -m 644 -p -D %{_buildsubdir}/guest/scripts/guestServer.pmon %{buildroot}%{local_etc_pmond}/guestServer.conf +install -m 644 -p -D %{_buildsubdir}/alarm/scripts/mtcalarm.pmon.conf %{buildroot}%{local_etc_pmond}/mtcalarm.conf + +# resource monitor config files +install -m 755 -d %{buildroot}%{local_etc_rmond} +install -m 755 -d %{buildroot}%{_sysconfdir}/rmonapi.d +install -m 755 -d %{buildroot}%{_sysconfdir}/rmonfiles.d +install -m 755 -d %{buildroot}%{_sysconfdir}/rmon_interfaces.d +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/remotelogging_resource.conf %{buildroot}%{local_etc_rmond}/remotelogging_resource.conf +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/cpu_resource.conf %{buildroot}%{local_etc_rmond}/cpu_resource.conf +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/memory_resource.conf %{buildroot}%{local_etc_rmond}/memory_resource.conf +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/filesystem_resource.conf %{buildroot}%{local_etc_rmond}/filesystem_resource.conf +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/cinder_virtual_resource.conf %{buildroot}%{local_etc_rmond}/cinder_virtual_resource.conf +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/nova_virtual_resource.conf %{buildroot}%{local_etc_rmond}/nova_virtual_resource.conf +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/oam_resource.conf %{buildroot}%{_sysconfdir}/rmon_interfaces.d/oam_resource.conf +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/management_resource.conf %{buildroot}%{_sysconfdir}/rmon_interfaces.d/management_resource.conf +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/infrastructure_resource.conf %{buildroot}%{_sysconfdir}/rmon_interfaces.d/infrastructure_resource.conf +install -m 755 -p -D %{_buildsubdir}/rmon/scripts/query_ntp_servers.sh %{buildroot}%{_sysconfdir}/rmonfiles.d/query_ntp_servers.sh +install -m 755 -p -D %{_buildsubdir}/rmon/scripts/rmon_reload_on_cpe.sh %{buildroot}%{local_etc_goenabledd}/rmon_reload_on_cpe.sh + +# log rotation +install -m 755 -d %{buildroot}%{_sysconfdir}/logrotate.d +install -m 644 -p -D %{_buildsubdir}/scripts/mtce.logrotate %{buildroot}%{local_etc_logrotated}/mtce.logrotate +install -m 644 -p -D %{_buildsubdir}/hostw/scripts/hostw.logrotate %{buildroot}%{local_etc_logrotated}/hostw.logrotate +install -m 644 -p -D %{_buildsubdir}/pmon/scripts/pmon.logrotate %{buildroot}%{local_etc_logrotated}/pmon.logrotate +install -m 644 -p -D %{_buildsubdir}/rmon/scripts/rmon.logrotate %{buildroot}%{local_etc_logrotated}/rmon.logrotate +install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmon.logrotate %{buildroot}%{local_etc_logrotated}/fsmon.logrotate +install -m 644 -p -D %{_buildsubdir}/hwmon/scripts/hwmon.logrotate %{buildroot}%{local_etc_logrotated}/hwmon.logrotate +install -m 644 -p -D %{_buildsubdir}/guest/scripts/guestAgent.logrotate %{buildroot}%{local_etc_logrotated}/guestAgent.logrotate +install -m 644 -p -D %{_buildsubdir}/guest/scripts/guestServer.logrotate %{buildroot}%{local_etc_logrotated}/guestServer.logrotate +install -m 644 -p -D %{_buildsubdir}/alarm/scripts/mtcalarm.logrotate %{buildroot}%{local_etc_logrotated}/mtcalarm.logrotate + +install -m 755 -p -D %{_buildsubdir}/public/libamon.so.$MAJOR %{buildroot}%{_libdir}/libamon.so.$MAJOR +cd %{buildroot}%{_libdir} ; ln -s libamon.so.$MAJOR libamon.so.$MAJOR.$MINOR +cd %{buildroot}%{_libdir} ; ln -s libamon.so.$MAJOR libamon.so + +install -m 755 -p -D %{_buildsubdir}/rmon/rmonApi/librmonapi.so.$MAJOR %{buildroot}%{_libdir}/librmonapi.so.$MAJOR +cd %{buildroot}%{_libdir} ; ln -s librmonapi.so.$MAJOR librmonapi.so.$MAJOR.$MINOR +cd %{buildroot}%{_libdir} ; ln -s librmonapi.so.$MAJOR librmonapi.so + +install -m 755 -d %{buildroot}/var +install -m 755 -d %{buildroot}/var/run + +# Enable all services in systemd +%post +/bin/systemctl enable fsmon.service +/bin/systemctl enable mtcClient.service +/bin/systemctl enable hbsClient.service +/bin/systemctl enable mtclog.service +/bin/systemctl enable iscsid.service +/bin/systemctl enable rsyncd.service +/bin/systemctl enable goenabled.service +/bin/systemctl enable mtcalarm.service + +%post -n cgts-mtce-common-hostw +/bin/systemctl enable hostw.service + +%post -n cgts-mtce-common-pmon +/bin/systemctl enable pmon.service + +%post -n cgts-mtce-common-guestServer +/bin/systemctl enable guestServer.service + +%post -n cgts-mtce-common-rmon +/bin/systemctl enable rmon.service + + +############################### +# Maintenance Common RPM Files +############################### + +%files +%license LICENSE + +%defattr(-,root,root,-) + +# create the mtc and its tmp dir +%dir %{_sysconfdir}/mtc +%dir %{_sysconfdir}/mtc/tmp + +# SM OCF Start/Stop/Monitor Scripts +%{ocf_resourced}/platform/mtcAgent +%{ocf_resourced}/platform/hbsAgent + +# Config files +%config(noreplace)/etc/mtc.ini + +# Config files - Non-Modifiable +%{_sysconfdir}/mtc.conf +%{_sysconfdir}/mtc/fsmond.conf + +# Mainteance Process Monitor Config Files +%{local_etc_pmond}/sshd.conf +%{local_etc_pmond}/ntpd.conf +%{local_etc_pmond}/mtcClient.conf +%{local_etc_pmond}/acpid.conf +%{local_etc_pmond}/hbsClient.conf +%{local_etc_pmond}/syslog-ng.conf +%{local_etc_pmond}/fsmon.conf +%{local_etc_pmond}/mtclogd.conf +%{local_etc_pmond}/mtcalarm.conf + +# Mainteance log rotation config files +%{local_etc_logrotated}/fsmon.logrotate +%{local_etc_logrotated}/mtce.logrotate +%{local_etc_logrotated}/mtcalarm.logrotate + +# Maintenance start/stop services scripts +%{local_etc_servicesd}/controller/mtcTest +%{local_etc_servicesd}/storage/mtcTest +%{local_etc_servicesd}/compute/mtcTest + +# BMC profile Files +%{bmc_profilesd}/sensor_hp360_v1_ilo_v4.profile +%{bmc_profilesd}/sensor_quanta_v1_ilo_v4.profile +%{bmc_profilesd}/sensor_hp380_v1_ilo_v4.profile + +# Init scripts +%{_sysconfdir}/init.d/runservices +%{_sysconfdir}/init.d/goenabled +%{_sysconfdir}/init.d/fsmon +%{_sysconfdir}/init.d/mtclog +%{_sysconfdir}/init.d/hbsClient +%{_sysconfdir}/init.d/mtcClient +%{_sysconfdir}/init.d/mtcalarm +%{_sysconfdir}/init.d/hwclock.sh + +%{_unitdir}/runservices.service +%{_unitdir}/goenabled.service +%{_unitdir}/mtclog.service +%{_unitdir}/mtcalarm.service +%{_unitdir}/fsmon.service +%{_unitdir}/mtcClient.service +%{_unitdir}/hbsClient.service +%{_unitdir}/hwclock.service + +# Binaries +%{local_bindir}/mtcAgent +%{local_bindir}/fsmond +%{local_bindir}/hbsAgent +%{local_bindir}/mtclogd +%{local_bindir}/mtcalarmd +%{local_bindir}/hbsClient +%{local_bindir}/mtcClient +%{local_bindir}/wipedisk +%{local_sbindir}/dmemchk.sh +%{_sbindir}/fsync + +############################### +# Process Monitor RPM Files +############################### +%files -n cgts-mtce-common-pmon + +%defattr(-,root,root,-) + +# Config files - Non-Modifiable +%{_sysconfdir}/mtc/pmond.conf + +%{local_etc_logrotated}/pmon.logrotate +%{_unitdir}/pmon.service +%{local_sbindir}/pmon-restart +%{local_sbindir}/pmon-start +%{local_sbindir}/pmon-stop + +/usr/lib64/libamon.so.1.0 +/usr/lib64/libamon.so.1 +/usr/lib64/libamon.so + +%{_sysconfdir}/init.d/pmon +%{local_bindir}/pmond + +############################### +# Resource Monitor RPM Files +############################### +%files -n cgts-mtce-common-rmon + +%defattr(-,root,root,-) + +# Config files - Non-Modifiable +%{_sysconfdir}/mtc/rmond.conf + +%{local_etc_pmond}/rmon.conf +%{local_etc_logrotated}/rmon.logrotate +%{_unitdir}/rmon.service + +%{local_etc_rmond}/filesystem_resource.conf +%{local_etc_rmond}/cpu_resource.conf +%{local_etc_rmond}/remotelogging_resource.conf +%{local_etc_rmond}/memory_resource.conf +%{local_etc_rmond}/cinder_virtual_resource.conf +%{local_etc_rmond}/nova_virtual_resource.conf + +%{_sysconfdir}/rmon_interfaces.d/management_resource.conf +%{_sysconfdir}/rmon_interfaces.d/oam_resource.conf +%{_sysconfdir}/rmon_interfaces.d/infrastructure_resource.conf +%{_sysconfdir}/rmonfiles.d/query_ntp_servers.sh + +/usr/lib64/librmonapi.so.1.0 +/usr/lib64/librmonapi.so.1 +/usr/lib64/librmonapi.so + +%dir %{_sysconfdir}/rmonapi.d + +%{_sysconfdir}/init.d/rmon +%{local_bindir}/rmond +%{local_bindir}/rmon_resource_notify +%{local_etc_goenabledd}/rmon_reload_on_cpe.sh + +############################### +# Hardware Monitor RPM Files +############################### +%files -n cgts-mtce-common-hwmon + +%defattr(-,root,root,-) + +# Config files - Non-Modifiable +%{_sysconfdir}/mtc/hwmond.conf + +%{_unitdir}/hwmon.service +%{local_etc_logrotated}/hwmon.logrotate +%{ocf_resourced}/platform/hwmon + +%{_sbindir}/show_hp380 +%{_sbindir}/show_hp360 +%{_sbindir}/show_quanta + +%{_sysconfdir}/init.d/hwmon +%{local_bindir}/hwmond + +############################### +# Guest Agent RPM Files +############################### +%files -n cgts-mtce-common-guestAgent + +%defattr(-,root,root,-) + +# Config files - Non-Modifiable +%{_sysconfdir}/mtc/guestAgent.ini + +%{_unitdir}/guestAgent.service +%{local_etc_logrotated}/guestAgent.logrotate +%{ocf_resourced}/platform/guestAgent + +%{_sysconfdir}/init.d/guestAgent +%{local_bindir}/guestAgent + +############################### +# Guest Server RPM Files +############################### +%files -n cgts-mtce-common-guestServer + +%defattr(-,root,root,-) + +# Config files - Non-Modifiable +%{_sysconfdir}/mtc/guestServer.ini + +%{local_etc_pmond}/guestServer.conf +%{local_etc_logrotated}/guestServer.logrotate +%{_unitdir}/guestServer.service + +%{_sysconfdir}/init.d/guestServer +%{local_bindir}/guestServer + + +############################### +# Host Watchdog RPM Files +############################### +%files -n cgts-mtce-common-hostw + +%defattr(-,root,root,-) + +# Config files - Non-Modifiable +%{_sysconfdir}/mtc/hostwd.conf + +%{local_etc_logrotated}/hostw.logrotate +%{_unitdir}/hostw.service +%{_sysconfdir}/init.d/hostw +%{local_bindir}/hostwd diff --git a/mtce-common/cgts-mtce-common-1.0/.gitignore b/mtce-common/cgts-mtce-common-1.0/.gitignore new file mode 100644 index 00000000..4e5d65ff --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/.gitignore @@ -0,0 +1,4 @@ +*.swp +*.o +*.a +*~ diff --git a/mtce-common/cgts-mtce-common-1.0/LICENSE b/mtce-common/cgts-mtce-common-1.0/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/mtce-common/cgts-mtce-common-1.0/Makefile b/mtce-common/cgts-mtce-common-1.0/Makefile new file mode 100755 index 00000000..f942d70c --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/Makefile @@ -0,0 +1,47 @@ +# +# Copyright (c) 2013-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +VER=1 +VER_MJR=1 + +build: + @(cd public ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd rmon/rmonApi ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd daemon ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd common ; make lib fsync VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd alarm ; make build VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd heartbeat ; make build VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd maintenance ; make build VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd hwmon ; make build VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd mtclog ; make build VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd pmon ; make build VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd guest ; make build VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd fsmon ; make build VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd rmon ; make build VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd rmon/rmon_resource_notify ; make build VER=$(VER) VER_MJR=$(VER_MJR)) + @(cd hostw ; make build VER=$(VER) VER_MJR=$(VER_MJR)) + +clean: + @( cd public ; make clean ) + @( cd rmon/rmonApi ; make clean ) + @( cd daemon ; make clean ) + @( cd common ; make clean ) + @( cd alarm ; make clean ) + @( cd mtclog ; make clean ) + @( cd hwmon ; make clean ) + @( cd pmon ; make clean ) + @( cd guest ; make clean ) + @( cd fsmon ; make clean ) + @( cd heartbeat ; make clean ) + @( cd maintenance ; make clean ) + @( cd rmon ; make clean ) + @( cd rmon/rmon_resource_notify ; make clean ) + @( cd hostw ; make clean ) + @( rm -rf release ) + +backup: clean + mkdir -p ~/mtce + cp -a * ~/mtce diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/Makefile b/mtce-common/cgts-mtce-common-1.0/alarm/Makefile new file mode 100755 index 00000000..210de21b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/Makefile @@ -0,0 +1,44 @@ +SRCS = alarmInit.cpp alarmData.cpp alarmHdlr.cpp alarmMgr.cpp alarmUtil.cpp alarm.cpp +OBJS = $(SRCS:.cpp=.o) +LDLIBS = -lstdc++ -ldaemon -lcommon -lfmcommon -ljson-c -lrt -lcrypto -luuid +INCLUDES = -I../daemon -I../common -I../smash -I../maintenance -I../public -I. -I.. +CCFLAGS = -g -O2 -Wall -Wextra -Werror + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +BINS = mtcalarmd + + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +all: clean_bins static_analysis common daemon process library + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +build: clean_bins static_analysis ${OBJS} library process + +common: + ( cd ../common ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +daemon: + ( cd ../daemon ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +library: + ar rcs libalarm.a alarm.o $(EXTRAARFLAGS) + +process: $(OBJS) + $(CXX) $(CCFLAGS) ${OBJS} -L../daemon -L../common $(LDLIBS) -o mtcalarmd + +clean_bins: + @rm -f $(BINS) + +clean: + @rm -f $(OBJ) $(BINS) *.o *.a + diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/alarm.cpp b/mtce-common/cgts-mtce-common-1.0/alarm/alarm.cpp new file mode 100644 index 00000000..6b113f89 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/alarm.cpp @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud 'Heartbeat Agent' Alarm Module + */ + +#include +#include +#include +#include +#include + +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "alm" + +#include "daemon_common.h" +#include "nodeBase.h" /* for ... fail codes */ +#include "jsonUtil.h" +#include "alarm.h" /* for ... module header */ + +static msgClassSock * user_sock_ptr = NULL ; + +/* A call to this API is required in advance of sending an alarm request */ +int alarm_register_user ( msgClassSock * sock_ptr ) +{ + int rc = PASS ; + if ( sock_ptr && sock_ptr->getFD() && sock_ptr->sock_ok() ) + { + ilog ("Registered with maintenance alarm service\n"); + user_sock_ptr = sock_ptr ; + } + else + { + elog ("Failed to register with maintenance alarm service\n"); + rc = FAIL_SOCKET_BIND ; + } + return (rc); +} + +/* Construct an alarm request json string in the following form + {\"mtcalarm\":[{\"alarmid\":\"200.009\",\"hostname\":\"compute-3\",\"operation\":\"set\",\"severity\":\"major\",\"entity\":\"Infrastructure\",\"prefix\":\"service=heartbeat\"}, {\"alarmid\":\"200.005\",\"hostname\":\"compute-3\",\"operation\":\"set\",\"severity\":\"major\",\"entity\":\"Management\",\"prefix\":\"service=heartbeat\"}]}" + + or + + { \"mtcalarm\": + [ + { + \"alarmid\":\"200.009\", + \"hostname\":\"compute-3\", + \"operation\":\"set\", + \"severity\":\"major\", + \"entity\":\"Infrastructure\", + \"prefix\":\"service=heartbeat\" + } + ] + } + +*/ +int alarm_ ( string hostname, const char * id, EFmAlarmStateT state, EFmAlarmSeverityT severity, const char * entity, string prefix ) +{ + int rc = PASS ; + char request [MAX_ALARM_REQ_MSG_SIZE] ; + string msg_type ; + string sev ; + + if ( state == FM_ALARM_STATE_MSG ) + msg_type = "msg" ; + else if ( state == FM_ALARM_STATE_SET ) + msg_type = "set" ; + else + msg_type = "clear" ; + + switch ( severity ) + { + case FM_ALARM_SEVERITY_CLEAR: + sev = "clear" ; + break ; + case FM_ALARM_SEVERITY_WARNING: + sev = "warning"; + break ; + case FM_ALARM_SEVERITY_MINOR: + sev = "minor"; + break ; + case FM_ALARM_SEVERITY_MAJOR: + sev = "major"; + break ; + case FM_ALARM_SEVERITY_CRITICAL: + sev = "critical"; + break ; + default : + sev = "unknown"; + break ; + } + + snprintf ( request, MAX_ALARM_REQ_MSG_SIZE, "{\"mtcalarm\":[{\"alarmid\":\"%s\",\"hostname\":\"%s\",\"operation\":\"%s\",\"severity\":\"%s\",\"entity\":\"%s\",\"prefix\":\"%s\"}]}", + id, + hostname.data(), + msg_type.data(), + sev.data(), + entity, + prefix.data()); + size_t len = strlen(request) ; + + /* Retrying up to 3 times if the send fails */ + for ( int i = 0 ; i < 3 ; i++ ) + { + int bytes = user_sock_ptr->write((char*)&request[0], len ); + if ( bytes <= 0 ) + { + elog("%s failed to send alarm request (%d:%m)\n", hostname.c_str(), errno ); + elog("... %s\n", request); + rc = FAIL_SOCKET_SENDTO ; + } + else if ( ((int)len) != bytes ) + { + elog ("%s failed to send complete alarm message (%d:%ld)\n", hostname.c_str(), bytes, len ); + } + else + { + ilog ("%s %s\n", hostname.c_str(), request); + return ( PASS ) ; + } + daemon_signal_hdlr (); + + usleep (1000); + } + return (rc); +} + + +int alarm_clear ( string hostname, const char * alarm_id_ptr , string entity ) +{ + string prefix = "" ; + return (alarm_ ( hostname, alarm_id_ptr, FM_ALARM_STATE_CLEAR, FM_ALARM_SEVERITY_CLEAR, entity.data(), prefix.data() )); +} + +int alarm_warning ( string hostname, const char * alarm_id_ptr , string entity ) +{ + string prefix = "" ; + return (alarm_ ( hostname, alarm_id_ptr, FM_ALARM_STATE_SET, FM_ALARM_SEVERITY_WARNING, entity.data(), prefix.data() )); +} + +int alarm_minor ( string hostname, const char * alarm_id_ptr , string entity ) +{ + string prefix = "" ; + return (alarm_ ( hostname, alarm_id_ptr, FM_ALARM_STATE_SET, FM_ALARM_SEVERITY_MINOR, entity.data(), prefix.data() )); +} + +int alarm_major ( string hostname, const char * alarm_id_ptr , string entity ) +{ + string prefix = "" ; + return (alarm_ ( hostname, alarm_id_ptr, FM_ALARM_STATE_SET, FM_ALARM_SEVERITY_MAJOR, entity.data(), prefix.data() )); +} + +int alarm_critical ( string hostname, const char * alarm_id_ptr , string entity ) +{ + string prefix = "" ; + return (alarm_ ( hostname, alarm_id_ptr, FM_ALARM_STATE_SET, FM_ALARM_SEVERITY_CRITICAL, entity.data(), prefix.data() )); +} + +int alarm_warning_log ( string hostname, const char * alarm_id_ptr , string entity , string prefix ) +{ + return (alarm_ ( hostname, alarm_id_ptr, FM_ALARM_STATE_MSG, FM_ALARM_SEVERITY_WARNING, entity.data(), prefix.data() )); +} + +int alarm_minor_log ( string hostname, const char * alarm_id_ptr , string entity , string prefix ) +{ + return (alarm_ ( hostname, alarm_id_ptr, FM_ALARM_STATE_MSG, FM_ALARM_SEVERITY_MINOR, entity.data(), prefix.data() )); +} + +int alarm_major_log ( string hostname, const char * alarm_id_ptr , string entity , string prefix ) +{ + return (alarm_ ( hostname, alarm_id_ptr, FM_ALARM_STATE_MSG, FM_ALARM_SEVERITY_MAJOR, entity.data(), prefix.data() )); +} + +int alarm_critical_log ( string hostname, const char * alarm_id_ptr , string entity , string prefix ) +{ + return (alarm_ ( hostname, alarm_id_ptr, FM_ALARM_STATE_MSG, FM_ALARM_SEVERITY_CRITICAL, entity.data(), prefix.data() )); +} diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/alarm.h b/mtce-common/cgts-mtce-common-1.0/alarm/alarm.h new file mode 100644 index 00000000..9e29c971 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/alarm.h @@ -0,0 +1,166 @@ +#ifndef __INCLUDE_ALARM_H__ +#define __INCLUDE_ALARM_H__ + +/* + * Copyright (c) 2016-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Maintenance Alarm Service Header + */ + +#include "nodeBase.h" +#include "nodeUtil.h" /* for ... common utilities */ + +#include "msgClass.h" /* for ... msgClassSock type definition */ + +/* external APIs */ +#include "fmAPI.h" + +#define ENTITY_PREFIX ((const char *)"host=") + +#define MAX_ALARMS (10) +#define MAX_ALARM_REQ_PER_MSG (4) +#define MAX_ALARM_REQ_MSG_SIZE (500) +#define MAX_ALARM_REQ_SIZE (MAX_ALARM_REQ_PER_MSG*MAX_ALARM_REQ_MSG_SIZE) + +#define SWERR_ALARM_ID ((const char *)"200.000") /* Do No Use */ +#define LOCK_ALARM_ID ((const char *)"200.001") +#define ENABLE_ALARM_ID ((const char *)"200.004") +#define MGMNT_HB_ALARM_ID ((const char *)"200.005") +#define PMOND_ALARM_ID ((const char *)"200.006") +#define SENSOR_ALARM_ID ((const char *)"200.007") /* Sensor read alarm ; i.e. the sensor read value bad */ +#define INFRA_HB_ALARM_ID ((const char *)"200.009") +#define BM_ALARM_ID ((const char *)"200.010") +#define CONFIG_ALARM_ID ((const char *)"200.011") +#define CH_CONT_ALARM_ID ((const char *)"200.012") /* Combo Host Controller Failure - with Active Compute */ +#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */ +#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */ +#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */ + +#define EVENT_LOG_ID ((const char *)"200.020") +#define COMMAND_LOG_ID ((const char *)"200.021") +#define STATECHANGE_LOG_ID ((const char *)"200.022") +#define SERVICESTATUS_LOG_ID ((const char *)"200.023") /* log used to report service failure events against */ + + +/** Heartbeat Alarm Abstract Reference IDs */ +typedef enum +{ + HBS_ALARM_ID__HB_MGMNT = 0, + HBS_ALARM_ID__HB_INFRA = 1, + HBS_ALARM_ID__PMOND = 2, + HBS_ALARM_ID__SERVICE = 3, + HBS_ALARM_ID__LAST = 4, +} alarm_id_enum ; + +string alarmUtil_getId_str ( alarm_id_enum alarm_id_num ); +int alarmUtil_getId_enum ( string alarm_id_str, alarm_id_enum & alarm_id_num ); + +/** Converts FM severity to representative string */ +string alarmUtil_getSev_str ( EFmAlarmSeverityT severity ); +EFmAlarmSeverityT alarmUtil_getSev_enum ( string severity ); + +#ifndef __MODULE_PRIVATE__ + +int alarm_register_user ( msgClassSock * sock_ptr ); + +/* Public API */ +int alarm_ ( string hostname, const char * id, EFmAlarmStateT state, EFmAlarmSeverityT severity, const char * entity, string prefix ); +int alarm_clear ( string hostname, const char * id_ptr, string entity ); +int alarm_warning ( string hostname, const char * id_ptr, string entity ); +int alarm_minor ( string hostname, const char * id_ptr, string entity ); +int alarm_major ( string hostname, const char * id_ptr, string entity ); +int alarm_critical ( string hostname, const char * id_ptr, string entity ); +int alarm_critical_log ( string hostname, const char * id_ptr, string entity ); +int alarm_major_log ( string hostname, const char * id_ptr, string entity ); +int alarm_minor_log ( string hostname, const char * id_ptr, string entity ); +int alarm_warning_log ( string hostname, const char * id_ptr, string entity, string prefix ); +int alarm_log ( string hostname, const char * id_ptr, string entity ); + +#else + + +typedef struct +{ + SFmAlarmDataT alarm ; + string name ; + string instc_prefix ; /* Instance prefix i.e. "=sensor." or "=process." */ + string critl_reason ; + string minor_reason ; + string major_reason ; + string clear_reason ; +} alarmUtil_type ; + + +#define MAX_FAILED_B2B_RECEIVES_B4_RESTART (5) + + +/* Test Commandss + * +STR="{\"mtcalarm\":[{\"alarmid\":\"200.009\",\"hostname\":\"compute-3\",\"operation\":\"clear\",\"severity\":\"clear\",\"entity\":\"Infrastructure\",\"prefix\":\"service=heartbeat\"}, {\"alarmid\":\"200.005\",\"hostname\":\"compute-3\",\"operation\":\"set\",\"severity\":\"major\",\"entity\":\"Management\",\"prefix\":\"service=heartbeat\"}]}" +PROTOCOL="UDP4-DATAGRAM" +ADDRESS="127.0.0.1" +port="2122" +echo "${STR}" | socat - ${PROTOCOL}:${ADDRESS}:${port} +*/ + +#define MTCALARM_REQ_LABEL ((const char *)"mtcalarm") + +#define MTCALARM_REQ_KEY__OPERATION ((const char *)"operation") +#define MTCALARM_REQ_KEY__HOSTNAME ((const char *)"hostname") +#define MTCALARM_REQ_KEY__ALARMID ((const char *)"alarmid") +#define MTCALARM_REQ_KEY__SEVERITY ((const char *)"severity") +#define MTCALARM_REQ_KEY__ENTITY ((const char *)"entity") +#define MTCALARM_REQ_KEY__PREFIX ((const char *)"prefix") + +/* in alarmData.cpp */ +void alarmData_init ( void ); +alarmUtil_type * alarmData_getAlarm_ptr ( string alarm_id_str ); + +/* in alarmUtil.cpp */ +// EFmAlarmSeverityT mtcAlarm_state ( string hostname, alarm_id_enum id ); + + +/* in alarmHdlr.cpp */ +int alarmHdlr_request_handler ( char * msg_ptr ); + +/* in alarmMgr.cpp */ +int alarmMgr_manage_alarm ( string alarmid , + string hostname, + string operation, + string severity, + string entity, + string prefix); + +/* Clear all alarms against this host */ +void alarmUtil_clear_all ( string hostname ); + +/** + * Query the specified alarm severity level. + * Severity levels are specified in fmAPI.h + **/ +EFmAlarmSeverityT alarmUtil_query ( string hostname, + string identity, + string instance ); + +int alarmUtil_query_identity ( string identity, + SFmAlarmDataT * alarm_list_ptr, + unsigned int alarms_max ); + +int alarmUtil_clear ( string hostname, string alarm_id, string entity ); +int alarmUtil_critical ( string hostname, string alarm_id, string entity ); +int alarmUtil_major ( string hostname, string alarm_id, string entity ); +int alarmUtil_minor ( string hostname, string alarm_id, string entity ); +int alarmUtil_warning ( string hostname, string alarm_id, string entity ); +int alarmUtil_critical_log ( string hostname, string alarm_id, string entity ); +int alarmUtil_major_log ( string hostname, string alarm_id, string entity ); +int alarmUtil_minor_log ( string hostname, string alarm_id, string entity ); +int alarmUtil_warning_log ( string hostname, string alarm_id, string entity, string prefix ); + +#endif // _MODULE_PRIVATE_ +#endif // __INCLUDE_ALARM_H__ diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/alarmData.cpp b/mtce-common/cgts-mtce-common-1.0/alarm/alarmData.cpp new file mode 100644 index 00000000..b9c12851 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/alarmData.cpp @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2016-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Maintenance Alarm Daemon Utility + **/ + +#include +#include +#include +#include +#include + +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "alm" + +#define __MODULE_PRIVATE__ + +#include "daemon_common.h" /* */ +#include "alarm.h" /* for ... this module header */ + +/* TODO: Replace this with YAML Parsing */ +static alarmUtil_type alarm_list[HBS_ALARM_ID__LAST] ; + +alarmUtil_type * alarmData_getAlarm_ptr ( string alarm_id_str ) +{ + alarm_id_enum id = HBS_ALARM_ID__LAST ; + if ( alarmUtil_getId_enum ( alarm_id_str, id ) == PASS ) + { + if ( id < HBS_ALARM_ID__LAST ) + { + return (&alarm_list[id]) ; + } + } + wlog ("failed to find alarm data for '%s'\n", alarm_id_str.c_str() ); + return (NULL); +} + +typedef struct +{ + const char * identity_str ; + alarm_id_enum identity_num ; +} alarm_id_table_type ; + +alarm_id_table_type alarm_id_table[HBS_ALARM_ID__LAST]; + + +void alarmData_init ( void ) +{ + alarmUtil_type * ptr ; + + alarm_id_table[HBS_ALARM_ID__HB_MGMNT].identity_str = MGMNT_HB_ALARM_ID ; + alarm_id_table[HBS_ALARM_ID__HB_MGMNT].identity_num = HBS_ALARM_ID__HB_MGMNT ; + alarm_id_table[HBS_ALARM_ID__HB_INFRA].identity_str = INFRA_HB_ALARM_ID; + alarm_id_table[HBS_ALARM_ID__HB_INFRA].identity_num = HBS_ALARM_ID__HB_INFRA; + alarm_id_table[HBS_ALARM_ID__PMOND].identity_str = PMOND_ALARM_ID; + alarm_id_table[HBS_ALARM_ID__PMOND].identity_num = HBS_ALARM_ID__PMOND; + alarm_id_table[HBS_ALARM_ID__SERVICE].identity_str = SERVICESTATUS_LOG_ID; + alarm_id_table[HBS_ALARM_ID__SERVICE].identity_num = HBS_ALARM_ID__SERVICE; + + /** Management Network Heartbeat Alarm ************************************/ + + ptr = &alarm_list[HBS_ALARM_ID__HB_MGMNT]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", MGMNT_HB_ALARM_ID); + + ptr->name = "Management Network' Heartbeat" ; + ptr->instc_prefix = "network=" ; + + ptr->critl_reason = "experienced a persistent critical 'Management Network' " + "communication failure."; + ptr->major_reason = + ptr->minor_reason = "is experiencing intermittent 'Management Network' " + "communication failures that have exceeded its lower alarming threshold."; + + ptr->clear_reason = "'Management Network' Heartbeat has 'resumed' if host is 'unlocked' " + "or 'stopped' if host is 'locked or deleted'"; + + ptr->alarm.alarm_type = FM_ALARM_COMM ; + ptr->alarm.probable_cause = FM_ALARM_LOSS_OF_SIGNAL ; + ptr->alarm.inhibit_alarms = FM_FALSE; + ptr->alarm.service_affecting = FM_TRUE ; + ptr->alarm.suppression = FM_TRUE ; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */ + + snprintf( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "Check 'Management Network' connectivity and support for multicast messaging." + "If problem consistently occurs after that and Host is reset, then" + "contact next level of support or lock and replace failing Host."); + + /** Infrastructure Network Heartbeat Alarm ************************************/ + + ptr = &alarm_list[HBS_ALARM_ID__HB_INFRA]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", INFRA_HB_ALARM_ID); + + ptr->name = "Infrastructure Network' Heartbeat" ; + ptr->instc_prefix = "network=" ; + + ptr->critl_reason = "experienced a persistent critical 'Infrastructure Network' " + "communication failure."; + + ptr->major_reason = + ptr->minor_reason = "is experiencing intermittent 'Infrastructure Network' " + "communication failures that have exceeded its lower alarming threshold."; + + ptr->clear_reason = "'Infrastructure Network' Heartbeat has 'resumed' if host is 'unlocked' " + "or 'stopped' if host is 'locked or deleted'"; + + ptr->alarm.alarm_type = FM_ALARM_COMM ; + ptr->alarm.probable_cause = FM_ALARM_LOSS_OF_SIGNAL ; + ptr->alarm.inhibit_alarms = FM_FALSE; + ptr->alarm.service_affecting = FM_TRUE ; + ptr->alarm.suppression = FM_TRUE ; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */ + + snprintf( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "Check 'Infrastructure Network' connectivity and support for multicast messaging." + "If problem consistently occurs after that and Host is reset, then" + "contact next level of support or lock and replace failing Host."); + + /** Process Failure Alarm ****************************************************/ + + ptr = &alarm_list[HBS_ALARM_ID__PMOND]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", PMOND_ALARM_ID); + + ptr->name = "Process Monitor Failure" ; + ptr->instc_prefix = "process=" ; + + ptr->critl_reason = + ptr->minor_reason = + ptr->major_reason = "'Process Monitor' (pmond) process is not running or functioning properly. " + "The system is trying to recover this process." ; + ptr->clear_reason = "Process Monitor has been successfully recovered and is functioning properly."; + + ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL ; + ptr->alarm.probable_cause = FM_ALARM_CAUSE_UNKNOWN; + ptr->alarm.inhibit_alarms = FM_FALSE; + ptr->alarm.service_affecting = FM_FALSE; + ptr->alarm.suppression = FM_TRUE ; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */ + + snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "If this alarm does not automatically clear after some time and " + "continues to be asserted after Host is locked and unlocked then " + "contact next level of support for root cause analysis and recovery."); + + /** Service Status Log ****************************************************/ + + ptr = &alarm_list[HBS_ALARM_ID__SERVICE]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", SERVICESTATUS_LOG_ID); + + ptr->name = "Service Status" ; + + ptr->minor_reason = + ptr->major_reason = + ptr->critl_reason = + ptr->clear_reason = ""; + + ptr->alarm.alarm_type = FM_ALARM_TYPE_UNKNOWN ; + ptr->alarm.probable_cause = FM_ALARM_CAUSE_UNKNOWN ; + ptr->alarm.inhibit_alarms = FM_FALSE ; + ptr->alarm.service_affecting = FM_FALSE ; + ptr->alarm.suppression = FM_FALSE ; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_MSG ; /* Dynamic */ + + snprintf ( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, "%s", ""); +} + +/* Translate alarm identity enum to alarm identity string */ +string _getIdentity ( alarm_id_enum id ) +{ + if ( id < HBS_ALARM_ID__LAST ) + return ( alarm_id_table[id].identity_str) ; + return ("200.000"); +} diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/alarmHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/alarm/alarmHdlr.cpp new file mode 100644 index 00000000..8c2f7438 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/alarmHdlr.cpp @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2016-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Maintenance Alarm Manager Daemon Handler + */ + +#include +#include +#include +#include +#include +#include +#include +#include /* for close and usleep */ +#include /* for ... json-c json string parsing */ + +using namespace std; + +#define __MODULE_PRIVATE__ + +#include "alarm.h" /* module header */ +#include "jsonUtil.h" /* for ... jsonUtil_ utiltiies */ +#include "nodeTimers.h" /* for ... maintenance timers */ +#include "daemon_common.h" /* for UNUSED() */ + +void daemon_sigchld_hdlr ( void ) { ; } + + +/** Daemon timer handler */ +void _timer_handler ( int sig, siginfo_t *si, void *uc) +{ + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + UNUSED(sig); + UNUSED(uc); + if ( !(*tid_ptr) ) + { + return ; + } + else + { + mtcTimer_stop_tid_int_safe (tid_ptr); + } +} + +int alarmHdlr_request_handler ( char * msg_ptr ) +{ + int rc = FAIL_JSON_PARSE ; + struct json_object *raw_obj = json_tokener_parse( msg_ptr ); + jlog ("Alarm Request: %s\n", msg_ptr ); + if ( raw_obj ) + { + int elements ; + + /* Check response sanity */ + rc = jsonUtil_array_elements ( msg_ptr, MTCALARM_REQ_LABEL, elements ); + if ( elements ) + { + #define PARSE_FAILURE ((const char *)"failed to parse value for key") + string alarmid = "" ; + string hostname = "" ; + string operation = "" ; + string severity = "" ; + string entity = "" ; + string prefix = "" ; + string alarm_req = "" ; + + for ( int i = 0 ; i < elements ; i++ ) + { + if ( ( rc = jsonUtil_get_array_idx ( msg_ptr, MTCALARM_REQ_LABEL, i, alarm_req ) ) == PASS ) + { + if (( rc = jsonUtil_get_key_val ( (char*)alarm_req.data(), MTCALARM_REQ_KEY__ALARMID, alarmid )) != PASS ) + { + elog ("%s '%s'\n", PARSE_FAILURE, MTCALARM_REQ_KEY__ALARMID); + } + else if (( rc = jsonUtil_get_key_val ( (char*)alarm_req.data(), MTCALARM_REQ_KEY__HOSTNAME, hostname )) != PASS ) + { + elog ("%s '%s'\n", PARSE_FAILURE, MTCALARM_REQ_KEY__HOSTNAME); + } + else if (( rc = jsonUtil_get_key_val ( (char*)alarm_req.data(), MTCALARM_REQ_KEY__OPERATION, operation )) != PASS ) + { + elog ("%s '%s'\n", PARSE_FAILURE, MTCALARM_REQ_KEY__OPERATION); + } + else if (( rc = jsonUtil_get_key_val ( (char*)alarm_req.data(), MTCALARM_REQ_KEY__SEVERITY, severity)) != PASS ) + { + elog ("%s '%s'\n", PARSE_FAILURE, MTCALARM_REQ_KEY__SEVERITY); + } + else if (( rc = jsonUtil_get_key_val ( (char*)alarm_req.data(), MTCALARM_REQ_KEY__ENTITY, entity )) != PASS ) + { + elog ("%s '%s'\n", PARSE_FAILURE, MTCALARM_REQ_KEY__ENTITY); + } + else if (( rc = jsonUtil_get_key_val ( (char*)alarm_req.data(), MTCALARM_REQ_KEY__PREFIX, prefix)) != PASS ) + { + elog ("%s '%s'\n", PARSE_FAILURE, MTCALARM_REQ_KEY__PREFIX); + } + else + { + jlog ("Alarm Message has %d requests\n", elements ); + rc = alarmMgr_manage_alarm ( alarmid, + hostname, + tolowercase(operation), + tolowercase(severity), + entity, + prefix); + } + if ( rc ) break ; + } + else + { + wlog ("failed to get index '%d of %d' from alarm request", i, elements); + } + } /* for loop */ + } + else + { + elog ("failed to find '%s' label in json object\n", MTCALARM_REQ_LABEL ); + elog (" ... %s\n", msg_ptr ); + rc = FAIL_JSON_OBJECT ; + } + } + else + { + elog ("failed to parse json request\n"); + elog (" ... %s\n", msg_ptr ); + rc = FAIL_JSON_OBJECT ; + } + if (raw_obj) json_object_put(raw_obj); + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/alarmInit.cpp b/mtce-common/cgts-mtce-common-1.0/alarm/alarmInit.cpp new file mode 100644 index 00000000..dcf29332 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/alarmInit.cpp @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2016-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Maintenance Alarm Manager Daemon Initialization + */ + +#include +#include +#include +#include +#include +#include +#include +#include /* for close and usleep */ + +using namespace std; + +#define __MODULE_PRIVATE__ + +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ + +#include "alarm.h" /* module header */ +#include "msgClass.h" /* for ... socket message setup */ + +/** Local Identity */ +static string my_hostname = "" ; +static string my_local_ip = "" ; +static string my_float_ip = "" ; + + +/** Maintenance Alarm request socket and port - UDP over lo */ +msgClassSock * mtcalarm_req_sock_ptr = NULL ; +int mtcalarm_req_port = 0 ; + + +/** Common Daemon Config Struct */ +static daemon_config_type _config ; +daemon_config_type * daemon_get_cfg_ptr () { return &_config ; } + + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + daemon_dump_info (); + daemon_files_fini (); + + /* Close sockets */ + if ( mtcalarm_req_sock_ptr ) + { + delete (mtcalarm_req_sock_ptr ); + mtcalarm_req_sock_ptr = NULL ; + mtcalarm_req_port = 0 ; + } + exit (0); +} + + +/** Client Config mask */ +#define CONFIG_CLIENT_MASK (CONFIG_CLIENT_PULSE_PORT) + +/* Startup config read */ +static int _config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("client", "mtcalarm_req_port")) + { + config_ptr->mtcalarm_req_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_PULSE_PORT ; + } + else + { + return (PASS); + } + return (FAIL); +} + +/* Configure the daemon */ +int daemon_configure ( void ) +{ + int rc = FAIL ; + + if (ini_parse(MTCE_CONF_FILE, _config_handler, &_config) < 0) + { + elog("Failed to load '%s'\n", MTCE_CONF_FILE ); + return(FAIL_LOAD_INI); + } + + get_debug_options ( MTCE_CONF_FILE, &_config ); + + /* Verify loaded config against an expected mask + * as an ini file fault detection method */ + if ( _config.mask != CONFIG_CLIENT_MASK ) + { + elog ("Client configuration failed (%x)\n", + (( -1 ^ _config.mask ) & CONFIG_CLIENT_MASK) ); + rc = FAIL_INI_CONFIG ; + } + else + { + ilog("Alarm Port : %d\n", _config.mtcalarm_req_port ); + rc = PASS ; + } + + return (rc); +} + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +int daemon_socket_init ( void ) +{ + int rc = PASS ; + + /***********************************************************/ + /* Setup the Alarm Request Receiver Socket */ + /***********************************************************/ + + mtcalarm_req_sock_ptr = new msgClassRx ( LOOPBACK_IP, _config.mtcalarm_req_port, IPPROTO_UDP); + if (rc) + return (rc) ; + if (mtcalarm_req_sock_ptr) + mtcalarm_req_sock_ptr->sock_ok(true); + + return (rc); +} + + +/* The main heartbeat service loop */ +int daemon_init ( string iface, string nodeType_str ) +{ + int rc = PASS ; + UNUSED(nodeType_str); + + /* Assign interface to config */ + _config.mgmnt_iface = (char*)iface.data() ; + + if ((rc = daemon_files_init ()) != PASS) + { + elog ("daemon_files_init failed (rc:%d)\n", rc ); + return ( FAIL_FILES_INIT ); + } + + /* Bind signal handlers */ + if ((rc = daemon_signal_init ()) != PASS) + { + elog ("daemon_signal_init failed (rc:%d)\n", rc ); + return ( FAIL_SIGNAL_INIT ); + } + + /************************************************************************ + * There is no point continuing with init ; i.e. running daemon_configure, + * initializing sockets and trying to query for an ip address until the + * daemon's configuration requirements are met. Here we wait for those + * flag files to be present before continuing. + ************************************************************************ + * Wait for /etc/platform/.initial_config_complete & /var/run/.goenabled */ + daemon_wait_for_file ( CONFIG_COMPLETE_FILE , 0); + daemon_wait_for_file ( GOENABLED_MAIN_READY , 0); + + /* Configure the client */ + if ((rc = daemon_configure ()) != PASS) + { + elog ("daemon_configure failed (rc:%d)\n", rc ); + rc = FAIL_DAEMON_CONFIG ; + } + + /* Setup messaging sockets */ + else if ((rc = daemon_socket_init ()) != PASS) + { + elog ("daemon_socket_init failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT; + } + + alarmData_init (); + + return (rc); +} + +void daemon_service_run ( void ) +{ + int rc = PASS ; + if (( mtcalarm_req_sock_ptr ) && ( mtcalarm_req_sock_ptr->getFD() )) + { + std::list socks ; + + /* For select dispatch */ + struct timeval waitd ; + fd_set readfds; + + int failed_receiver_log_throttle = 0 ; + int failed_receiver_b2b_count = 0 ; + int failed_socket_log_throttle = 0 ; + + socks.clear(); + socks.push_front (mtcalarm_req_sock_ptr->getFD()); + socks.sort(); + + /* Run service forever */ + for ( ; ; ) + { + daemon_signal_hdlr (); + waitd.tv_sec = 0; + waitd.tv_usec = SOCKET_WAIT; + + /* Initialize the master fd_set */ + FD_ZERO(&readfds); + FD_SET( mtcalarm_req_sock_ptr->getFD(), &readfds); + rc = select( socks.back()+1, &readfds, NULL, NULL, &waitd); + if (( rc < 0 ) || ( rc == 0 )) + { + if (( rc < 0 ) && ( errno != EINTR )) + { + wlog_throttled ( failed_socket_log_throttle, 100, + "Socket Select Failed (%d:%m)\n", errno); + } + } + + if ( FD_ISSET(mtcalarm_req_sock_ptr->getFD(), &readfds)) + { + failed_socket_log_throttle = 0 ; + if ( mtcalarm_req_sock_ptr && ( mtcalarm_req_sock_ptr->sock_ok() == true )) + { + char msg [MAX_ALARM_REQ_SIZE] ; + memset ( &msg , 0, MAX_ALARM_REQ_MSG_SIZE ); + int bytes = mtcalarm_req_sock_ptr->read((char*)&msg, MAX_ALARM_REQ_SIZE-1 ); + if ( bytes > 0 ) + { + failed_receiver_b2b_count = 0 ; + failed_receiver_log_throttle = 0 ; + if ( ( rc = alarmHdlr_request_handler ( msg )) != PASS ) + { + wlog ("failed to handle alarm request (rc:%d)\n", rc ); + } + } + else if ( bytes < 0 ) + { + failed_receiver_b2b_count++ ; + wlog_throttled ( failed_receiver_log_throttle, 20, "alarm request receive error ; thresholeded ; (%d:%m)\n", errno ); + } + else + { + failed_receiver_b2b_count++ ; + wlog_throttled ( failed_receiver_log_throttle, 20, "alarm request receive ; no data\n" ); + } + } + else + { + elog ("alarm request socket error ; fatal\n"); + failed_receiver_b2b_count = MAX_FAILED_B2B_RECEIVES_B4_RESTART ; + } + + if ( failed_receiver_b2b_count >= MAX_FAILED_B2B_RECEIVES_B4_RESTART ) + { + /* exit and allow process restart by pmond */ + elog ("max (%d) alarm request receive errors reached ; forcing process restart\n", MAX_FAILED_B2B_RECEIVES_B4_RESTART ); + break ; + } + } + } + } + else + { + elog ("alarm request socket error ; not initialized ; exiting\n"); + } + daemon_exit(); +} + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + daemon_dump_membuf_banner (); + daemon_dump_membuf(); +} + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/*************************************************************************** + * * + * Module Test Head * + * * + ***************************************************************************/ + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + int rc = PASS; + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/alarmMgr.cpp b/mtce-common/cgts-mtce-common-1.0/alarm/alarmMgr.cpp new file mode 100644 index 00000000..817e20b1 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/alarmMgr.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Maintenance Alarm Manager Daemon Manager + */ + +#include +#include +#include +#include + +using namespace std; + +#define __MODULE_PRIVATE__ + +#include "alarm.h" /* module header */ + +int alarmMgr_manage_alarm ( string alarmid, + string hostname, + string operation, + string severity, + string entity, + string prefix) +{ + int rc = PASS ; + string action = operation ; + action.append (" alarm"); + EFmAlarmSeverityT sev ; + + ilog ("Alarm: alarmid:%s hostname:%s operation:%s severity:%s entity:%s prefix:%s\n", + alarmid.c_str(), + hostname.c_str(), + operation.c_str(), + severity.c_str(), + entity.c_str(), + prefix.c_str()); + + sev = alarmUtil_getSev_enum ( severity ); + if (!operation.compare("msg")) + { + if ( sev == FM_ALARM_SEVERITY_WARNING ) + { + //if ( prefix.compare("none")) + alarmUtil_warning_log ( hostname, alarmid, entity, prefix ); + //else + // mtcAlarm_warning_log ( hostname, id, entity ); + } + else if ( sev == FM_ALARM_SEVERITY_MINOR ) + { + rc = alarmUtil_minor_log ( hostname, alarmid, entity ); + } + else if ( sev == FM_ALARM_SEVERITY_MAJOR) + { + rc = alarmUtil_major_log ( hostname, alarmid, entity ); + } + else if ( sev == FM_ALARM_SEVERITY_CRITICAL ) + { + rc = alarmUtil_critical_log ( hostname, alarmid, entity ); + } + else + { + rc = FAIL_INVALID_OPERATION ; + wlog ("Unsupported log severity '%d:%s'\n", sev, severity.c_str()); + } + action="create log" ; + } + + /* Get the state */ + else if ( !operation.compare("clear")) + { + rc = alarmUtil_clear ( hostname, alarmid, entity ); + } + + else if ( !operation.compare("set") ) + { + if ( sev == FM_ALARM_SEVERITY_WARNING ) + rc = alarmUtil_warning ( hostname, alarmid, entity ); + else if ( sev == FM_ALARM_SEVERITY_MINOR ) + rc = alarmUtil_minor ( hostname, alarmid, entity ); + else if ( sev == FM_ALARM_SEVERITY_MAJOR ) + rc = alarmUtil_major ( hostname, alarmid, entity ); + else if ( sev == FM_ALARM_SEVERITY_CRITICAL ) + rc = alarmUtil_critical ( hostname, alarmid, entity ); + else + { + rc = FAIL_INVALID_OPERATION ; + } + } + else + { + rc = FAIL_BAD_CASE ; + } + if ( rc ) + { + elog ("%s failed to %s '%s:%s'\n", hostname.c_str(), action.c_str(), alarmid.c_str(), entity.c_str() ) + } + + return (rc); +} + diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/alarmUtil.cpp b/mtce-common/cgts-mtce-common-1.0/alarm/alarmUtil.cpp new file mode 100644 index 00000000..11eb882e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/alarmUtil.cpp @@ -0,0 +1,500 @@ +/* + * Copyright (c) 2013 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform common Alarm utilities + */ + +#define __MODULE_PRIVATE__ + +#include "daemon_common.h" /* for ... daemon_is_file_present */ + +#include "nodeBase.h" +#include "nodeUtil.h" /* for ... get_mtclogd_sockPtr */ +#include "alarm.h" /* for ... alarm Utilities */ + + +string alarmUtil_getSev_str ( EFmAlarmSeverityT sev ) +{ + switch ( sev ) + { + case FM_ALARM_SEVERITY_CLEAR: return ("clear"); + case FM_ALARM_SEVERITY_WARNING: return ("warning"); + case FM_ALARM_SEVERITY_MINOR: return ("minor"); + case FM_ALARM_SEVERITY_MAJOR: return ("major"); + case FM_ALARM_SEVERITY_CRITICAL:return ("critical"); + default : return ("unknown"); + } +} + +EFmAlarmSeverityT alarmUtil_getSev_enum ( string sev ) +{ + if ( !sev.compare("clear")) return (FM_ALARM_SEVERITY_CLEAR) ; + if ( !sev.compare("warning")) return (FM_ALARM_SEVERITY_WARNING); + if ( !sev.compare("minor")) return (FM_ALARM_SEVERITY_MINOR); + if ( !sev.compare("major")) return (FM_ALARM_SEVERITY_MAJOR); + if ( !sev.compare("critical")) return (FM_ALARM_SEVERITY_CRITICAL); + wlog ("Unsupported severity '%s'\n", sev.c_str() ); + return (FM_ALARM_SEVERITY_CLEAR) ; +} + +int alarmUtil_getId_enum ( string alarmid, alarm_id_enum & id ) +{ + /* Get the ID */ + if ( alarmid.compare(INFRA_HB_ALARM_ID) == 0 ) + { + id = HBS_ALARM_ID__HB_INFRA ; + } + else if ( alarmid.compare(MGMNT_HB_ALARM_ID) == 0 ) + { + id = HBS_ALARM_ID__HB_MGMNT ; + } + else if ( alarmid.compare(PMOND_ALARM_ID) == 0 ) + { + id = HBS_ALARM_ID__PMOND ; + } + else if ( alarmid.compare(SERVICESTATUS_LOG_ID) == 0 ) + { + id = HBS_ALARM_ID__SERVICE ; + } + else + { + wlog ("Unsupported alarm id '%s'\n", alarmid.c_str()); + return (FAIL_BAD_PARM); + } + return (PASS); +} + +/* update the passed in alarm struct's instance_id entity path for the specified host */ +void _build_entity_path ( string & hostname, string & instance, SFmAlarmDataT & alarm ) +{ + snprintf ( &alarm.entity_type_id[0], FM_MAX_BUFFER_LENGTH, "system.host" ); + + if ( instance.empty() ) + { + snprintf ( &alarm.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "%s%s", + ENTITY_PREFIX, hostname.data()); + } + else + { + snprintf ( &alarm.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "%s%s.%s", + ENTITY_PREFIX, hostname.data(), instance.data()); + } +} + +void alarmUtil_clear_all ( string hostname ) +{ + SFmAlarmDataT alarm ; + string instance = "" ; + + _build_entity_path ( hostname, instance, alarm ); + + /* This will clear all the alarms for this host ; + * even ones that are raised against this host by other daemons */ + fm_clear_all ( &alarm.entity_instance_id ); +} + +/****************************************************************************** + * + * Name : alarmUtil_query + * + * Description: Utility will query a specific alarm for its current severity + * + * @param : identity may be 200.xxx + * + * @param : instance may be + * + * host= + * + * example: + * + * hostname=compute-1 + * + * host=.= + * + * example: + * + * hostname=compute-1.process=mtcClient + * + * hostname=compute-1.sensor=Fan_PSU2 + * + * Updates : None + * + * Returns : FM severity code for the specified alarm. + * FM_ALARM_SEVERITY_CLEAR if it not set. + * + ******************************************************************************/ +EFmAlarmSeverityT alarmUtil_query ( string hostname, + string identity, + string instance ) +{ + SFmAlarmDataT alarm_query ; + AlarmFilter alarm_filter ; + EFmErrorT rc ; + + memset(&alarm_query, 0, sizeof(alarm_query)); + memset(&alarm_filter, 0, sizeof(alarm_filter)); + + snprintf ( &alarm_filter.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", identity.data()); + + if ( instance.empty() ) + { + snprintf ( &alarm_filter.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "%s%s", + ENTITY_PREFIX, hostname.data()); + } + else + { + snprintf ( &alarm_filter.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "%s%s.%s", + ENTITY_PREFIX, hostname.data(), instance.data()); + } + + alog ("entity_instance:%s\n", alarm_filter.entity_instance_id ); + if (( rc = fm_get_fault ( &alarm_filter, &alarm_query )) == FM_ERR_OK ) + { + dlog ("Found with Severity: %d\n", alarm_query.severity ); + return (alarm_query.severity) ; + } + else if ( rc != FM_ERR_ENTITY_NOT_FOUND ) + { + elog ("%s fm_get_fault returned error (%d)\n", hostname.c_str(), rc ); + } + return (FM_ALARM_SEVERITY_CLEAR); +} + +int alarmUtil_query_identity ( string identity, SFmAlarmDataT * alarm_list_ptr, unsigned int max_alarms ) +{ + int rc = 0 ; + + if ( max_alarms == 0 ) + { + slog ("max alarms is zero !\n"); + } + else if ( identity.empty() ) + { + slog ("empty alarm 'identity'\n"); + } + else if ( alarm_list_ptr ) + { + AlarmFilter alarm_filter ; + + memset(&alarm_filter, 0, sizeof(alarm_filter)); + snprintf ( alarm_filter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", identity.data()); + rc = fm_get_faults_by_id ( &alarm_filter.alarm_id, alarm_list_ptr, &max_alarms ); + alog ("%s fm_get_faults_by_id rc = %d\n", alarm_filter.alarm_id, rc ); + if ( rc == FM_ERR_OK ) + { + return (PASS); + } + else if ( rc == FM_ERR_ENTITY_NOT_FOUND ) + { + return (RETRY); + } + else + { + return (FAIL); + } + } + else + { + slog ("caller supplied null alarm_list_ptr\n"); + } + return (FAIL_NULL_POINTER); +} + + +/********************************************************************************* + * + * Name : alarmUtil + * + * Purpose : Primary module API used to set/clear severity alarms and logs in FM. + * + * Description : Other maintenance services are expected to use ths interface to + * + * + ********************************************************************************/ +int alarmUtil ( string & hostname, + string & identity, + string & instance, + SFmAlarmDataT & alarm ) +{ + int rc = PASS ; + + // msgSock_type * mtclogd_ptr = get_mtclogd_sockPtr() ; + + /* Don't report events while we are in reset mode */ + if ( daemon_is_file_present ( NODE_RESET_FILE ) ) + { + return (rc); + } + + _build_entity_path ( hostname, instance, alarm ); + +#ifdef WANT_ALARM_QUERY + + /* See if the alarm is already in the requested state */ + EFmAlarmSeverityT curr_sev = alarmUtil_query ( hostname, identity, instance ) ; + + /* If its not a log message and we are already at this + * severity level then ignore the call */ + if (( alarm.alarm_state != FM_ALARM_STATE_MSG ) && + ( curr_sev == alarm.severity )) + { + ilog ("%s %s %s already at desired (%s) severity level\n", + hostname.c_str(), + identity.c_str(), + instance.c_str(), + alarmUtil_getSev_str(alarm.severity).c_str()); + return (rc); + } +#endif + + snprintf(&alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", identity.data()); + + if (( alarm.alarm_state == FM_ALARM_STATE_SET ) || + ( alarm.alarm_state == FM_ALARM_STATE_MSG )) + { + if ( alarm.alarm_state == FM_ALARM_STATE_SET ) + { + alog ("%s setting %s %s alarm\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id ); + } + else + { + alog ("%s creating %s %s log\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id ); + } + + /* Debug Logs */ + alog ("%s Alarm Reason: %s\n", hostname.c_str(), alarm.reason_text ); + alog ("%s Alarm Action: %s\n", hostname.c_str(), alarm.proposed_repair_action ); + alog ("%s Alarm Ident : %s : %s\n", hostname.c_str(), alarm.entity_type_id, alarm.entity_instance_id ); + alog ("%s Alarm State : state:%d sev:%d type:%d cause:%d sa:%c supp:%c\n", + hostname.c_str(), + alarm.alarm_state, + alarm.severity, + alarm.alarm_type, + alarm.probable_cause, + alarm.service_affecting ? 'Y' : 'N', + alarm.suppression ? 'Y' : 'N' ); + + ilog ( "fm_set_fault: %s %s state:%d sev:%d type:%d cause:%d sa:%c supp:%c", + hostname.c_str(), + alarm.alarm_id, + alarm.alarm_state, + alarm.severity, + alarm.alarm_type, + alarm.probable_cause, + alarm.service_affecting ? 'Y' : 'N', + alarm.suppression ? 'Y' : 'N' ); + + rc = fm_set_fault ( &alarm , NULL ); + if ( rc != FM_ERR_OK ) + { + wlog ("%s fm_set_fault call failed for alarm %s (rc:%d) ; retrying\n", hostname.c_str(), alarm.alarm_id, rc); + usleep (100000); /* sleep 100 msec */ + rc = fm_set_fault ( &alarm , NULL ); + if ( rc != FM_ERR_OK ) + { + elog ("%s failed to set alarm %s (rc:%d) ; giving up\n", hostname.c_str(), alarm.alarm_id, rc); + rc = FAIL ; + } + } + } + else + { + AlarmFilter filter ; memset(&filter, 0, sizeof(filter)); + + /* Setup the alarm filter */ + snprintf(filter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.alarm_id); + snprintf(filter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.entity_instance_id); + + alog ( "fm_clear_fault: %s %s:%s", hostname.c_str(), alarm.entity_instance_id, alarm.alarm_id ); + + ilog ("%s clearing %s %s alarm\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id); + if ( ( rc = fm_clear_fault ( &filter )) != FM_ERR_OK ) + { + if ( rc != FM_ERR_ENTITY_NOT_FOUND ) + { + elog ("%s failed to fm_clear_fault (rc:%d)\n", hostname.c_str(), rc ); + rc = FAIL ; + } + else + { + rc = PASS ; + } + } + } + + return (rc); +} + +/* Clear the specified hosts's alarm */ +int alarmUtil_clear ( string hostname, string alarm_id , string entity ) +{ + alarmUtil_type * alarm_ptr = alarmData_getAlarm_ptr(alarm_id); + if ( alarm_ptr ) + { + string instance = alarm_ptr->instc_prefix ; + instance.append(entity); + + alarm_ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; + alarm_ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; + return ( alarmUtil ( hostname, alarm_id, instance, alarm_ptr->alarm )); + } + return (FAIL_NULL_POINTER); +} + +/** Assert a specified hosts's alarm with a CRITICAL severity level */ +int alarmUtil_critical ( string hostname, string alarm_id , string entity ) +{ + alarmUtil_type * alarm_ptr = alarmData_getAlarm_ptr(alarm_id); + if ( alarm_ptr ) + { + string instance = alarm_ptr->instc_prefix ; + instance.append(entity); + + alarm_ptr->alarm.severity = FM_ALARM_SEVERITY_CRITICAL ; + alarm_ptr->alarm.alarm_state = FM_ALARM_STATE_SET ; + + snprintf ( alarm_ptr->alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_ptr->critl_reason.data()); + + return ( alarmUtil ( hostname, alarm_id, instance, alarm_ptr->alarm )); + } + return (FAIL_NULL_POINTER); +} + + +/** Assert a specified host's alarm with a MAJOR severity level */ +int alarmUtil_major ( string hostname, string alarm_id , string entity ) +{ + alarmUtil_type * alarm_ptr = alarmData_getAlarm_ptr(alarm_id); + if ( alarm_ptr ) + { + string instance = alarm_ptr->instc_prefix ; + instance.append(entity); + + alarm_ptr->alarm.severity = FM_ALARM_SEVERITY_MAJOR ; + alarm_ptr->alarm.alarm_state = FM_ALARM_STATE_SET ; + + snprintf ( alarm_ptr->alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_ptr->major_reason.data()); + + return ( alarmUtil ( hostname, alarm_id, instance, alarm_ptr->alarm )); + } + return (FAIL_NULL_POINTER); +} +/** Assert a specified host's alarm with a MINOR severity level */ +int alarmUtil_minor ( string hostname, string alarm_id , string entity ) +{ + alarmUtil_type * alarm_ptr = alarmData_getAlarm_ptr(alarm_id); + if ( alarm_ptr ) + { + string instance = alarm_ptr->instc_prefix ; + instance.append(entity); + + alarm_ptr->alarm.severity = FM_ALARM_SEVERITY_MINOR ; + alarm_ptr->alarm.alarm_state = FM_ALARM_STATE_SET ; + + snprintf ( alarm_ptr->alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_ptr->minor_reason.data()); + + return ( alarmUtil ( hostname, alarm_id, instance, alarm_ptr->alarm )); + } + return (FAIL_NULL_POINTER); +} + +/** Assert a specified host's alarm with a WARNING severity level */ +int alarmUtil_warning ( string hostname, string alarm_id , string entity ) +{ + alarmUtil_type * alarm_ptr = alarmData_getAlarm_ptr(alarm_id); + if ( alarm_ptr ) + { + string instance = alarm_ptr->instc_prefix ; + instance.append(entity); + + alarm_ptr->alarm.severity = FM_ALARM_SEVERITY_WARNING ; + alarm_ptr->alarm.alarm_state = FM_ALARM_STATE_SET ; + + snprintf ( alarm_ptr->alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_ptr->minor_reason.data()); + + return ( alarmUtil ( hostname, alarm_id, instance, alarm_ptr->alarm )); + } + return (FAIL_NULL_POINTER); +} + +/** Create CRITICAL log */ +int alarmUtil_critical_log ( string hostname, string alarm_id , string entity ) +{ + alarmUtil_type * alarm_ptr = alarmData_getAlarm_ptr(alarm_id); + if ( alarm_ptr ) + { + string instance = alarm_ptr->instc_prefix ; + instance.append(entity); + + alarm_ptr->alarm.severity = FM_ALARM_SEVERITY_CRITICAL ; + alarm_ptr->alarm.alarm_state = FM_ALARM_STATE_MSG ; + + snprintf ( alarm_ptr->alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_ptr->critl_reason.data()); + + return ( alarmUtil ( hostname, alarm_id, instance, alarm_ptr->alarm )); + } + return (FAIL_NULL_POINTER); +} + + +/** Create MAJOR log */ +int alarmUtil_major_log ( string hostname, string alarm_id , string entity ) +{ + alarmUtil_type * alarm_ptr = alarmData_getAlarm_ptr(alarm_id); + if ( alarm_ptr ) + { + string instance = alarm_ptr->instc_prefix ; + instance.append(entity); + + alarm_ptr->alarm.severity = FM_ALARM_SEVERITY_MAJOR ; + alarm_ptr->alarm.alarm_state = FM_ALARM_STATE_MSG ; + + snprintf ( alarm_ptr->alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_ptr->major_reason.data()); + + return ( alarmUtil ( hostname, alarm_id, instance, alarm_ptr->alarm )); + } + return (FAIL_NULL_POINTER); +} +/** Create MINOR log */ +int alarmUtil_minor_log ( string hostname, string alarm_id , string entity ) +{ + alarmUtil_type * alarm_ptr = alarmData_getAlarm_ptr(alarm_id); + if ( alarm_ptr ) + { + string instance = alarm_ptr->instc_prefix ; + instance.append(entity); + + alarm_ptr->alarm.severity = FM_ALARM_SEVERITY_MINOR ; + alarm_ptr->alarm.alarm_state = FM_ALARM_STATE_MSG ; + + snprintf ( alarm_ptr->alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_ptr->minor_reason.data()); + + return ( alarmUtil ( hostname, alarm_id, instance, alarm_ptr->alarm )); + } + return (FAIL_NULL_POINTER); +} + +/** Create WARNING log */ +int alarmUtil_warning_log ( string hostname, string alarm_id, string entity, string prefix ) +{ + alarmUtil_type * alarm_ptr = alarmData_getAlarm_ptr(alarm_id); + if ( alarm_ptr ) + { + string instance = alarm_ptr->instc_prefix ; + instance.append(prefix); + + alarm_ptr->alarm.severity = FM_ALARM_SEVERITY_WARNING ; + alarm_ptr->alarm.alarm_state = FM_ALARM_STATE_MSG ; + + snprintf ( alarm_ptr->alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), entity.data()); + + return ( alarmUtil ( hostname, alarm_id, instance, alarm_ptr->alarm )); + } + return (FAIL_NULL_POINTER); +} diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.init b/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.init new file mode 100644 index 00000000..4d33179e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.init @@ -0,0 +1,108 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 95 95 +# +### BEGIN INIT INFO +# Provides: mtcalarm +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Maintenance Alarm Daemon +### END INIT INFO + +. /etc/init.d/functions + +DAEMON_NAME="mtcalarmd" +DAEMON="/usr/local/bin/${DAEMON_NAME}" +PIDFILE="/var/run/${DAEMON_NAME}.pid" +PLATFORM_CONF="/etc/platform/platform.conf" + +IFACE="" + +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +if [ ! -e "${DAEMON}" ] ; then + logger "${DAEMON} is missing" + exit ${NOT_INSTALLED} +fi + +case "$1" in + start) + logger "Starting ${DAEMON_NAME}" + echo -n "Starting ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo -n "is already running " + RETVAL=0 + else + start-stop-daemon --start -b -x ${DAEMON} -- -l + RETVAL=$? + fi + if [ ${RETVAL} -eq 0 ] ; then + pid=`pidof ${DAEMON_NAME}` + echo "OK" + logger "${DAEMON} (${pid})" + else + echo "FAIL" + RETVAL=${GENERIC_ERROR} + fi + ;; + + stop) + logger "Stopping ${DAEMON_NAME}" + echo -n "Stopping ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + killproc ${DAEMON_NAME} + fi + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo "FAIL" + RETVAL=${NOT_RUNNING} + else + echo "OK" + fi + rm -f ${PIDFILE} + ;; + + restart) + $0 stop + $0 start + ;; + + status) + pid=`pidof ${DAEMON_NAME}` + RETVAL=$? + if [ ${RETVAL} -eq 0 ] ; then + echo "${DAEMON_NAME} is running" + else + echo "${DAEMON_NAME} is NOT running" + RETVAL=${NOT_RUNNING} + fi + ;; + + reload) + pkill -hup ${DAEMON_NAME} + ;; + + condrestart) + $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | reload | status }" + ;; +esac + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.logrotate b/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.logrotate new file mode 100644 index 00000000..c1b91aa2 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.logrotate @@ -0,0 +1,17 @@ +#daily +nodateext +start 1 +compress +copytruncate +notifempty +missingok + +/var/log/mtcalarmd.log +{ + size 10M + rotate 20 + sharedscripts + postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true + endscript +} diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.pmon.conf b/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.pmon.conf new file mode 100644 index 00000000..8e132dce --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.pmon.conf @@ -0,0 +1,17 @@ +[process] +process = mtcalarmd +service = mtcalarm +pidfile = /var/run/mtcalarmd.pid +script = /etc/init.d/mtcalarmd +style = lsb ; ocf or lsb +severity = major ; minor, major, critical +restarts = 3 ; restart retries before error assertion +interval = 1 ; number of seconds to wait between restarts +debounce = 3 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 1 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring diff --git a/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.service b/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.service new file mode 100644 index 00000000..d97fa7b6 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/alarm/scripts/mtcalarm.service @@ -0,0 +1,14 @@ +[Unit] +Description=Titanium Cloud Maintenance Alarm Handler Client +After=network.target syslog.service config.service +Before=pmon.service + +[Service] +Type=forking +ExecStart=/etc/rc.d/init.d/mtcalarm start +ExecStop=/etc/rc.d/init.d/mtcalarm stop +ExecReload=/etc/rc.d/init.d/mtcalarm reload +PIDFile=/var/run/mtcalarmd.pid + +[Install] +WantedBy=multi-user.target diff --git a/mtce-common/cgts-mtce-common-1.0/common/Makefile b/mtce-common/cgts-mtce-common-1.0/common/Makefile new file mode 100755 index 00000000..f8d2962f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/Makefile @@ -0,0 +1,91 @@ +# +# Copyright (c) 2013-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = regexUtil.cpp \ + timeUtil.cpp \ + ipmiUtil.cpp \ + pingUtil.cpp \ + keyClass.cpp \ + hostClass.cpp \ + nodeClass.cpp \ + nodeBase.cpp \ + nodeTimers.cpp \ + nodeUtil.cpp \ + hostUtil.cpp \ + nodeEvent.cpp \ + alarmUtil.cpp \ + nlEvent.cpp \ + jsonUtil.cpp \ + httpUtil.cpp \ + tokenUtil.cpp \ + msgClass.cpp + +COMMON_OBJS = regexUtil.o \ + timeUtil.o \ + pingUtil.o \ + keyClass.o \ + keyClass.cpp \ + hostClass.o \ + nodeClass.o \ + nodeBase.o \ + nodeTimers.o \ + nodeUtil.o \ + hostUtil.o \ + nodeEvent.o \ + alarmUtil.o \ + nlEvent.o \ + jsonUtil.o \ + httpUtil.o \ + tokenUtil.o \ + msgClass.o + +OBJS = $(SRCS:.cpp=.o) +LDLIBS += -lstdc++ -ldaemon -lcommon -lfmcommon -lrt -lpq -levent -ljson-c -lcrypto -luuid + +INCLUDES = -I../daemon -I../alarm -I../maintenance -I../heartbeat -I.. -I. +CCFLAGS = -g -O2 -Wall -Wextra -Werror -std=c++11 + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +ifeq (,$(shell which ${CC})) +CC=g++ +endif + +.cpp.o: + $(CXX) $(CCFLAGS) $(INCLUDES) $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -UWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +lib: clean threadUtil static_analysis $(OBJS) library + +build: threadUtil static_analysis $(OBJS) library + +threadUtil: + $(CXX) -c threadUtil.cpp $(CCFLAGS) $(INCLUDES) $(EXTRACCFLAGS) $(LDLIBS) -lpthread -o threadUtil.o + ar rcs libthreadUtil.a threadUtil.o $(EXTRAARFLAGS) + +library: + ar rcs libcommon.a $(COMMON_OBJS) $(EXTRAARFLAGS) + ar rcs libipmiUtil.a ipmiUtil.o $(EXTRAARFLAGS) + ar rcs libpingUtil.a pingUtil.o $(EXTRAARFLAGS) + ar rcs libnodeBase.a nodeBase.o $(EXTRAARFLAGS) + ar rcs libregexUtil.a regexUtil.o $(EXTRAARFLAGS) + ar rcs libhostUtil.a hostUtil.o $(EXTRAARFLAGS) + +testhead: + $(CCFLAGS) $(INCLUDES) -L../daemon -L../common -L. $(LDLIBS) -lpingUtil -lnodeBase -lregexUtil -lhostUtil + +fsync: fsync.o + $(CC) fsync.o -L../daemon -L../common -L. -ldaemon -lcommon -o fsync + +clean: + @rm -f *.o *.a fsync *.o diff --git a/mtce-common/cgts-mtce-common-1.0/common/alarmUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/alarmUtil.cpp new file mode 100644 index 00000000..09b4eca1 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/alarmUtil.cpp @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform common Alarm utilities + */ + +#include "daemon_common.h" /* for ... daemon_is_file_present */ + +#include "nodeBase.h" +#include "nodeUtil.h" /* for ... get_mtclogd_sockPtr */ +#include "alarmUtil.h" /* for ... alarmUtilClass and Utilities */ + + +alarmUtilClass __alarmObject ; + +/* module init */ +void alarmUtil_init ( void ) +{ + snprintf ( &__alarmObject.varlog_filename[0], MAX_FILENAME_LEN, "/var/log/%s_alarm.log", + program_invocation_short_name ); +} + +/********************** alarmUtilClass API Implementation ************************/ + +alarmUtilClass::alarmUtilClass() +{ + temp_str[0] = '\0'; + varlog_filename[0] = '\0'; + //ilog ("class constructor\n"); + alarmUtil_init (); +} + +alarmUtilClass::~alarmUtilClass() +{ + // ilog ("class destructor\n"); +} + +/***************************** Open API Implementation ***************************/ + +string alarmUtil_getSev_str ( EFmAlarmSeverityT sev ) +{ + switch ( sev ) + { + case FM_ALARM_SEVERITY_CLEAR: return ("clear"); + case FM_ALARM_SEVERITY_WARNING: return ("warning"); + case FM_ALARM_SEVERITY_MINOR: return ("minor"); + case FM_ALARM_SEVERITY_MAJOR: return ("major"); + case FM_ALARM_SEVERITY_CRITICAL:return ("critical"); + default : return ("unknown"); + } +} + +EFmAlarmSeverityT alarmUtil_getSev_enum ( string sev ) +{ + if ( !sev.compare("clear")) return (FM_ALARM_SEVERITY_CLEAR) ; + if ( !sev.compare("warning")) return (FM_ALARM_SEVERITY_WARNING); + if ( !sev.compare("minor")) return (FM_ALARM_SEVERITY_MINOR); + if ( !sev.compare("major")) return (FM_ALARM_SEVERITY_MAJOR); + if ( !sev.compare("critical")) return (FM_ALARM_SEVERITY_CRITICAL); + wlog ("Unsupported severity '%s'\n", sev.c_str() ); + return (FM_ALARM_SEVERITY_CLEAR) ; +} + + +/* update the passed in alarm struct's instance_id entity path for the specified host */ +void _build_entity_path ( string & hostname, string & instance, SFmAlarmDataT & alarm ) +{ + snprintf ( &alarm.entity_type_id[0], FM_MAX_BUFFER_LENGTH, "system.host" ); + + if ( instance.empty() ) + { + snprintf ( &alarm.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "%s%s", + ENTITY_PREFIX, hostname.data()); + } + else + { + snprintf ( &alarm.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "%s%s.%s", + ENTITY_PREFIX, hostname.data(), instance.data()); + } +} + +void alarmUtil_clear_all ( string hostname ) +{ + SFmAlarmDataT alarm ; + string instance = "" ; + + _build_entity_path ( hostname, instance, alarm ); + + /* This will clear all the alarms for this host ; + * even ones that are raised against this host by other daemons */ + fm_clear_all_async ( &alarm.entity_instance_id ); +} + +/****************************************************************************** + * + * Name : alarmUtil_query + * + * Description: Utility will query a specific alarm for its current severity + * + * @param : identity may be 200.xxx + * + * @param : instance may be + * + * host= + * + * example: + * + * hostname=compute-1 + * + * host=.= + * + * example: + * + * hostname=compute-1.process=mtcClient + * + * hostname=compute-1.sensor=Fan_PSU2 + * + * Updates : None + * + * Returns : FM severity code for the specified alarm. + * FM_ALARM_SEVERITY_CLEAR if it not set. + * + ******************************************************************************/ +EFmAlarmSeverityT alarmUtil_query ( string & hostname, + string & identity, + string & instance ) +{ + SFmAlarmDataT alarm_query ; + AlarmFilter alarm_filter ; + EFmErrorT rc ; + + memset(&alarm_query, 0, sizeof(alarm_query)); + memset(&alarm_filter, 0, sizeof(alarm_filter)); + + snprintf ( &alarm_filter.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", identity.data()); + + if ( instance.empty() ) + { + snprintf ( &alarm_filter.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "%s%s", + ENTITY_PREFIX, hostname.data()); + } + else + { + snprintf ( &alarm_filter.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "%s%s.%s", + ENTITY_PREFIX, hostname.data(), instance.data()); + } + + alog ("entity_instance:%s\n", alarm_filter.entity_instance_id ); + if (( rc = fm_get_fault ( &alarm_filter, &alarm_query )) == FM_ERR_OK ) + { + dlog ("Found with Severity: %d\n", alarm_query.severity ); + return (alarm_query.severity) ; + } + else if ( rc != FM_ERR_ENTITY_NOT_FOUND ) + { + elog ("%s fm_get_fault returned error (%d)\n", hostname.c_str(), rc ); + } + return (FM_ALARM_SEVERITY_CLEAR); +} + +int alarmUtil_query_identity ( string identity, SFmAlarmDataT * alarm_list_ptr, unsigned int max_alarms ) +{ + int rc = 0 ; + + if ( max_alarms == 0 ) + { + slog ("max alarms is zero !\n"); + } + else if ( identity.empty() ) + { + slog ("empty alarm 'identity'\n"); + } + else if ( alarm_list_ptr ) + { + AlarmFilter alarm_filter ; + + memset(&alarm_filter, 0, sizeof(alarm_filter)); + snprintf ( alarm_filter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", identity.data()); + rc = fm_get_faults_by_id ( &alarm_filter.alarm_id, alarm_list_ptr, &max_alarms ); + alog ("%s fm_get_faults_by_id rc = %d\n", alarm_filter.alarm_id, rc ); + if ( rc == FM_ERR_OK ) + { + return (PASS); + } + else if ( rc == FM_ERR_ENTITY_NOT_FOUND ) + { + return (RETRY); + } + else + { + return (FAIL); + } + } + else + { + slog ("caller supplied null alarm_list_ptr\n"); + } + return (FAIL_NULL_POINTER); +} + + +/********************************************************************************* + * + * Name : alarmUtil + * + * Purpose : Primary module API used to set/clear severity alarms and logs in FM. + * + * Description : Other maintenance services are expected to use ths interface to + * + * + ********************************************************************************/ +int alarmUtil ( string & hostname, + string & identity, + string & instance, + SFmAlarmDataT & alarm ) +{ + int rc = PASS ; + + // msgSock_type * mtclogd_ptr = get_mtclogd_sockPtr() ; + + /* Don't report events while we are in reset mode */ + if ( daemon_is_file_present ( NODE_RESET_FILE ) ) + { + return (rc); + } + + _build_entity_path ( hostname, instance, alarm ); + +#ifdef WANT_ALARM_QUERY + + /* See if the alarm is already in the requested state */ + EFmAlarmSeverityT curr_sev = alarmUtil_query ( hostname, identity, instance ) ; + + /* If its not a log message and we are already at this + * severity level then ignore the call */ + if (( alarm.alarm_state != FM_ALARM_STATE_MSG ) && + ( curr_sev == alarm.severity )) + { + alog ("%s %s %s already at desired (%s) severity level\n", + hostname.c_str(), + identity.c_str(), + instance.c_str(), + alarmUtil_getSev_str(alarm.severity).c_str()); + return (rc); + } + +#endif + + snprintf(&alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", identity.data()); + + if (( alarm.alarm_state == FM_ALARM_STATE_SET ) || + ( alarm.alarm_state == FM_ALARM_STATE_MSG )) + { + if ( alarm.alarm_state == FM_ALARM_STATE_SET ) + { + alog ("%s setting %s %s alarm\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id ); + } + else + { + alog ("%s creating %s %s log\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id ); + } + + /* Debug Logs */ + alog ("%s Alarm Reason: %s\n", hostname.c_str(), alarm.reason_text ); + alog ("%s Alarm Action: %s\n", hostname.c_str(), alarm.proposed_repair_action ); + alog ("%s Alarm Ident : %s : %s\n", hostname.c_str(), alarm.entity_type_id, alarm.entity_instance_id ); + alog ("%s Alarm State : state:%d sev:%d type:%d cause:%d sa:%c supp:%c\n", + hostname.c_str(), + alarm.alarm_state, + alarm.severity, + alarm.alarm_type, + alarm.probable_cause, + alarm.service_affecting ? 'Y' : 'N', + alarm.suppression ? 'Y' : 'N' ); + + snprintf (&__alarmObject.temp_str[0], MAX_API_LOG_LEN-1, + "\n%s [%5d] fm_set_fault: %s %s state:%d sev:%d type:%d cause:%d sa:%c supp:%c", + pt(), getpid(), hostname.c_str(), alarm.alarm_id, alarm.alarm_state, + alarm.severity, + alarm.alarm_type, + alarm.probable_cause, + alarm.service_affecting ? 'Y' : 'N', + alarm.suppression ? 'Y' : 'N' ); + + //send_log_message ( mtclogd_ptr, hostname.data(), &__alarmObject.varlog_filename[0], + // &__alarmObject.temp_str[0] ); + + nodeUtil_latency_log ( hostname, NODEUTIL_LATENCY_MON_START , 0 ); + rc = fm_set_fault_async ( &alarm , NULL ); + nodeUtil_latency_log ( hostname, "fm_set_fault - alarm - common" , LATENCY_1SEC ); + + snprintf (&__alarmObject.temp_str[0], MAX_API_LOG_LEN-1, + "%s [%5d] fm_set_fault: %s returned %d", + pt(), getpid(), hostname.c_str(), rc ); + + //send_log_message ( mtclogd_ptr, hostname.data(), &__alarmObject.varlog_filename[0], + // &__alarmObject.temp_str[0] ); + + if ( rc != FM_ERR_OK ) + { + elog ("%s failed to set alarm %s (rc:%d)\n", hostname.c_str(), alarm.alarm_id, rc); + rc = FAIL ; + } + } + else // ( alarm.alarm_state == FM_ALARM_STATE_CLEAR ) + { + AlarmFilter filter ; memset(&filter, 0, sizeof(filter)); + + /* Setup the alarm filter */ + snprintf(filter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.alarm_id); + snprintf(filter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.entity_instance_id); + + snprintf (&__alarmObject.temp_str[0], MAX_API_LOG_LEN-1, "\n%s [%5d] fm_clear_fault: %s %s:%s", + pt(), getpid(), hostname.c_str(), alarm.entity_instance_id, alarm.alarm_id ); + + // send_log_message ( mtclogd_ptr, hostname.data(), &__alarmObject.varlog_filename[0], + // &__alarmObject.temp_str[0] ); + + alog ("%s clearing %s %s alarm\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id); + nodeUtil_latency_log ( hostname, NODEUTIL_LATENCY_MON_START , 0 ); + if ( ( rc = fm_clear_fault_async ( &filter )) != FM_ERR_OK ) + { + if ( rc != FM_ERR_ENTITY_NOT_FOUND ) + { + elog ("%s failed to fm_clear_fault (rc:%d)\n", hostname.c_str(), rc ); + rc = FAIL ; + } + } + nodeUtil_latency_log ( hostname, "fm_clear_fault - common" , LATENCY_1SEC ); + } + + return (rc); +} + +/* Clear the specified hosts's alarm */ +int alarmUtil_clear ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ) +{ + alarm.severity = FM_ALARM_SEVERITY_CLEAR ; + alarm.alarm_state = FM_ALARM_STATE_CLEAR ; + return ( alarmUtil ( hostname, identity, instance, alarm )); +} + +/************************* A L A R M I N G **************************/ + +/** Assert a specified hosts's alarm with a CRITICAL severity level */ +int alarmUtil_critical ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ) +{ + alarm.severity = FM_ALARM_SEVERITY_CRITICAL ; + alarm.alarm_state = FM_ALARM_STATE_SET ; + return ( alarmUtil ( hostname, identity, instance, alarm )); +} + +/** Assert a specified host's alarm with a MAJOR severity level */ +int alarmUtil_major ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ) +{ + alarm.severity = FM_ALARM_SEVERITY_MAJOR ; + alarm.alarm_state = FM_ALARM_STATE_SET ; + return ( alarmUtil ( hostname, identity, instance, alarm )); +} + +/** Assert a specified host's alarm with a MINOR severity level */ +int alarmUtil_minor ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ) +{ + alarm.severity = FM_ALARM_SEVERITY_MINOR ; + alarm.alarm_state = FM_ALARM_STATE_SET ; + return ( alarmUtil ( hostname, identity, instance, alarm )); +} + +/** Assert a specified host's mtce alarm with a WARNING severity level */ +int alarmUtil_warning ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ) +{ + alarm.severity = FM_ALARM_SEVERITY_WARNING ; + alarm.alarm_state = FM_ALARM_STATE_SET ; + return ( alarmUtil ( hostname, identity, instance, alarm )); +} + +/*************************** L O G G I N G **********************************/ + +/** Create a CRITICAL log */ +int alarmUtil_critical_log ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ) +{ + alarm.severity = FM_ALARM_SEVERITY_CRITICAL ; + alarm.alarm_state = FM_ALARM_STATE_MSG ; + return ( alarmUtil ( hostname,identity, instance, alarm )); +} + +/** Create a MAJOR log */ +int alarmUtil_major_log ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ) +{ + alarm.severity = FM_ALARM_SEVERITY_MAJOR ; + alarm.alarm_state = FM_ALARM_STATE_MSG ; + return ( alarmUtil ( hostname, identity, instance, alarm )); +} + +/** Create a MINOR log */ +int alarmUtil_minor_log ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ) +{ + alarm.severity = FM_ALARM_SEVERITY_MINOR ; + alarm.alarm_state = FM_ALARM_STATE_MSG ; + return ( alarmUtil ( hostname, identity, instance, alarm )); +} + +/** Create a WARNING log */ +int alarmUtil_warning_log ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ) +{ + alarm.severity = FM_ALARM_SEVERITY_WARNING ; + alarm.alarm_state = FM_ALARM_STATE_MSG ; + return ( alarmUtil ( hostname, identity, instance, alarm )); +} + +/** Create a neutral log */ +int alarmUtil_log ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ) +{ + alarm.alarm_state = FM_ALARM_STATE_MSG ; + return ( alarmUtil ( hostname, identity, instance, alarm )); +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/alarmUtil.h b/mtce-common/cgts-mtce-common-1.0/common/alarmUtil.h new file mode 100644 index 00000000..d9ca7ce4 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/alarmUtil.h @@ -0,0 +1,142 @@ +#ifndef __ALARMUTIL_H__ +#define __ALARMUTIL_H__ + +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Common Maintenance 'Alarm' Header + */ + +#include +#include +#include +#include +#include + +//using namespace std; + +/* external header APIs */ +#include "nodeBase.h" +#include "fmAPI.h" /* for fm_set_fault, fm_clear_fault, etc */ + +#define ENTITY_PREFIX ((const char *)"host=") + +#define MAX_ALARMS (10) + +#define SWERR_ALARM_ID ((const char *)"200.000") /* Do No Use */ +#define LOCK_ALARM_ID ((const char *)"200.001") +#define ENABLE_ALARM_ID ((const char *)"200.004") +#define MGMNT_HB_ALARM_ID ((const char *)"200.005") +#define PMOND_ALARM_ID ((const char *)"200.006") +#define SENSOR_ALARM_ID ((const char *)"200.007") /* Sensor read alarm ; i.e. the sensor read value bad */ +#define INFRA_HB_ALARM_ID ((const char *)"200.009") +#define BM_ALARM_ID ((const char *)"200.010") +#define CONFIG_ALARM_ID ((const char *)"200.011") +#define CH_CONT_ALARM_ID ((const char *)"200.012") /* Combo Host Controller Failure - with Active Compute */ +#define CH_COMP_ALARM_ID ((const char *)"200.013") /* Combo Host Compute Failure - on last Controller */ +#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */ +#define SENSORGROUP_ALARM_ID ((const char *)"200.015") /* Sensor Group Read Error */ + +#define EVENT_LOG_ID ((const char *)"200.020") +#define COMMAND_LOG_ID ((const char *)"200.021") +#define STATECHANGE_LOG_ID ((const char *)"200.022") +#define SERVICESTATUS_LOG_ID ((const char *)"200.023") /* log used to report service failure events against */ + +/** + * TODO: This class is more of a place holder for + * more centralized management of alarms + * It is useless in its present form. + **/ +class alarmUtilClass +{ + private: + + public: + + alarmUtilClass(); + ~alarmUtilClass(); + + char temp_str [MAX_API_LOG_LEN] ; + char varlog_filename[MAX_FILENAME_LEN]; +}; + +typedef struct +{ + SFmAlarmDataT alarm ; + string name ; + string instc_prefix ; /* Instance prefix i.e. "=sensor." or "=process." */ + string critl_reason ; + string minor_reason ; + string major_reason ; + string clear_reason ; +} alarmUtil_type ; + +/** Converts FM severity to representative string */ +string alarmUtil_getSev_str ( EFmAlarmSeverityT sev ); +EFmAlarmSeverityT alarmUtil_getSev_enum ( string sev ); + +/* Clear all alarms against this host */ +void alarmUtil_clear_all ( string hostname ); + +/** + * Query the specified alarm severity level. + * Severity levels are specified in fmAPI.h + **/ +EFmAlarmSeverityT alarmUtil_query ( string & hostname, + string & identity, + string & instance ); + +int alarmUtil_query_identity ( string identity, + SFmAlarmDataT * alarm_list_ptr, + unsigned int alarms_max ); + +int alarmUtil_clear ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ); + +/************************* A L A R M I N G **************************/ + +/** + * Assert a unique identity alarm or log against specified + * hostname/instance using the supplied alarm data + **/ +int alarmUtil (string & hostname, string & identity, string & instance, SFmAlarmDataT & alarm); + +/** Return a string that represents the specified severity enum */ +string alarmUtil_getSev_str ( EFmAlarmSeverityT sev ); + +/** Assert a specified host's alarm with a CRITICAL severity level */ +int alarmUtil_critical ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ); + +/** Assert a specified host's alarm with a MAJOR severity level */ +int alarmUtil_major ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ); + +/** Assert a specified host's alarm with a MINOR severity level */ +int alarmUtil_minor ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ); + +/** Assert a specified host's mtce alarm with a WARNING severity level */ +int alarmUtil_warning ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ); + + +/*************************** L O G G I N G **********************************/ + +/** Create a CRITICAL log */ +int alarmUtil_critical_log ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ); + +/** Create a MAJOR log */ +int alarmUtil_major_log ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ); + +/** Create a MINOR log */ +int alarmUtil_minor_log ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ); + +/** Create a WARNING log */ +int alarmUtil_warning_log ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ); + +/** Create a neutral customer log */ +int alarmUtil_log ( string hostname, string identity, string instance, SFmAlarmDataT & alarm ); + +#endif /* __ALARMUTIL_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/common/fitCodes.h b/mtce-common/cgts-mtce-common-1.0/common/fitCodes.h new file mode 100644 index 00000000..2f041d5e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/fitCodes.h @@ -0,0 +1,170 @@ +#ifndef __INCLUDE_FITCODES_H__ +#define __INCLUDE_FITCODES_H__ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Common Fault Insertion Code Definitions + */ + +/************************************************************************************* + * + * These definitions are used for fault insertion testing. + * + * Here are examples of how they are used, + * + * - touch the 'no_reboot' file on the mtcClient to cause it to + * servie the reboot request but don't actually reboot + * + * - touch the 'no_mgmnt_ack' file on the mtcClient to cause + * it to handle command requests but drop/not send the ack message + * if it came in on themanagement network ; same for infra + * + * - touch the 'no_mtcAlive file to tell mtcClient to stop sending + * its mtcAlive messages while this file is present. + * + **************************************************************************************/ + +/** + * This is the Fault Insertion Dir - Code that looks for multiple fit files need not + * bother if the dir is not present + **/ +#define MTC_CMD_FIT__DIR ("/var/run/fit") + + +#define MTC_CMD_FIT__NO_REBOOT ("/var/run/fit/no_reboot") /* mtcClient */ +#define MTC_CMD_FIT__NO_RESET ("/var/run/fit/no_reset") /* mtcClient */ +#define MTC_CMD_FIT__NO_WIPEDISK ("/var/run/fit/no_wipedisk") /* mtcClient */ +#define MTC_CMD_FIT__NO_MGMNT_ACK ("/var/run/fit/no_mgmnt_ack") /* mtcClient */ +#define MTC_CMD_FIT__NO_INFRA_ACK ("/var/run/fit/no_infra_ack") /* mtcClient */ +#define MTC_CMD_FIT__NO_MTCALIVE ("/var/run/fit/no_mtcalive") /* mtcClient */ +#define MTC_CMD_FIT__MGMNT_RXSOCK ("/var/run/fit/mgmnt_rxsock") /* mtcClient */ +#define MTC_CMD_FIT__MGMNT_TXSOCK ("/var/run/fit/mgmnt_txsock") /* mtcClient */ +#define MTC_CMD_FIT__INFRA_RXSOCK ("/var/run/fit/infra_rxsock") /* mtcClient */ +#define MTC_CMD_FIT__INFRA_TXSOCK ("/var/run/fit/infra_txsock") /* mtcClient */ +#define MTC_CMD_FIT__RMON_SOCK ("/var/run/fit/rmon_sock") /* mtcClient */ +#define MTC_CMD_FIT__AMON_SOCK ("/var/run/fit/amon_sock") /* mtcClient */ +#define MTC_CMD_FIT__NO_INFRA_RSP ("/var/run/fit/no_infra_rsp") /* hbsClient */ +#define MTC_CMD_FIT__NO_MGMNT_RSP ("/var/run/fit/no_mgmnt_rsp") /* hbsClient */ +#define MTC_CMD_FIT__LINKLIST ("/var/run/fit/linklist") /* hbsAgent */ +#define MTC_CMD_FIT__HBSSILENT ("/var/run/fit/hbs_silent_fault") /* hbsAgent */ +#define MTC_CMD_FIT__SENSOR_DATA ("/var/run/fit/sensor_data") /* hwmond */ +#define MTC_CMD_FIT__POWER_CMD ("/var/run/fit/power_cmd_result") /* mtcAgent */ +#define MTC_CMD_FIT__MC_INFO ("/var/run/fit/mc_info") /* mtcAgent */ +#define MTC_CMD_FIT__POWER_STATUS ("/var/run/fit/power_status") /* mtcAgent */ +#define MTC_CMD_FIT__RESTART_CAUSE ("/var/run/fit/restart_cause") /* mtcAgent */ +#define MTC_CMD_FIT__UPTIME ("/var/run/fit/uptime") /* mtcAgent */ +#define MTC_CMD_FIT__LOUD_BM_PW ("/var/run/fit/loud_bm_pw") /* mtcAgent & hwmond */ +#define MTC_CMD_FIT__START_SVCS ("/var/run/fit/host_services") /* mtcClient */ +#define MTC_CMD_FIT__NO_HS_ACK ("/var/run/fit/no_hs_ack") /* mtcClient */ +#define MTC_CMD_FIT__GOENABLE_AUDIT ("/var/run/fit/goenable_audit") /* mtcAgent */ + +/***************************************************** + * Fault Insertion Codes + *****************************************************/ + +/***************************************************************************** + * + * the fit /var/run/fit/fitinfo file contains the following format, + * - code and process are required + * - other fields are optional + * - no spaces, exclude <> + * + * proc= + * code= + * host= + * name= + * data= + * + *****************************************************************************/ + +/*********************** Common FIT Codes **********************************/ + +#define FIT_CODE__NONE (0) +#define FIT_CODE__CORRUPT_TOKEN (1) +#define FIT_CODE__ADD_DELETE (2) +#define FIT_CODE__STUCK_TASK (3) +#define FIT_CODE__AVOID_N_FAIL_IPMITOOL_REQUEST (4) +#define FIT_CODE__THREAD_TIMEOUT (5) +#define FIT_CODE__THREAD_SEGFAULT (6) +#define FIT_CODE__SIGNAL_NOEXIT (7) +#define FIT_CODE__STRESS_THREAD (8) +#define FIT_CODE__DO_NOTHING_THREAD (9) +#define FIT_CODE__EMPTY_BM_PASSWORD (10) +#define FIT_CODE__INVALIDATE_MGMNT_IP (11) +#define FIT_CODE__INVALIDATE_INFRA_IP (12) +#define FIT_CODE__WORK_QUEUE (13) +#define FIT_CODE__NO_READY_EVENT (14) +#define FIT_CODE__NO_PULSE_REQUEST (15) +#define FIT_CODE__NO_PULSE_RESPONSE (16) + +#define FIT_CODE__FAST_PING_AUDIT_HOST (20) +#define FIT_CODE__FAST_PING_AUDIT_ALL (21) + +#define FIT_CODE__TRANSLATE_LOCK_TO_FORCELOCK (30) +#define FIT_CODE__LOCK_HOST (31) +#define FIT_CODE__FORCE_LOCK_HOST (32) +#define FIT_CODE__UNLOCK_HOST (33) + +#define FIT_CODE__FM_SET_ALARM (40) +#define FIT_CODE__FM_GET_ALARM (41) +#define FIT_CODE__FM_QRY_ALARMS (42) + +#define FIT_CODE__IPMI_COMMAND_SEND (60) +#define FIT_CODE__IPMI_COMMAND_RECV (61) + +#define FIT_CODE__START_HOST_SERVICES (70) +#define FIT_CODE__STOP_HOST_SERVICES (71) + +/***************** Process Fit Codes ********************************/ + +/* Hardware Monitor FIT Codes */ +#define FIT_CODE__HWMON__CORRUPT_TOKEN (101) +#define FIT_CODE__HWMON__AVOID_TOKEN_REFRESH (102) +#define FIT_CODE__HWMON__THREAD_TIMEOUT (103) +#define FIT_CODE__HWMON__AVOID_SENSOR_QUERY (104) +#define FIT_CODE__HWMON__SENSOR_STATUS (105) +#define FIT_CODE__HWMON__STARTUP_STATES_FAILURE (106) + +#define FIT_CODE__HWMON__HTTP_LOAD_SENSORS (120) +#define FIT_CODE__HWMON__HTTP_ADD_SENSOR (121) +#define FIT_CODE__HWMON__HTTP_DEL_SENSOR (122) +#define FIT_CODE__HWMON__HTTP_MOD_SENSOR (123) + +#define FIT_CODE__HWMON__ADD_SENSOR (130) +#define FIT_CODE__HWMON__BAD_SENSOR (131) +#define FIT_CODE__HWMON__GET_SENSOR (132) + +#define FIT_CODE__HWMON__CREATE_ORPHAN_SENSOR_ALARM (136) + + +#define FIT_CODE__HWMON__HTTP_LOAD_GROUPS (140) +#define FIT_CODE__HWMON__HTTP_ADD_GROUP (141) +#define FIT_CODE__HWMON__HTTP_DEL_GROUP (142) +#define FIT_CODE__HWMON__HTTP_MOD_GROUP (143) +#define FIT_CODE__HWMON__HTTP_GROUP_SENSORS (144) + +#define FIT_CODE__HWMON__ADD_GROUP (150) +#define FIT_CODE__HWMON__BAD_GROUP (151) +#define FIT_CODE__HWMON__GET_GROUP (152) + +#define FIT_CODE__HWMON__CREATE_ORPHAN_GROUP_ALARM (156) + +#define FIT_CODE__HWMON__NO_DATA (160) + +#define FIT_CODE__HWMON__RAISE_SENSOR_ALARM (170) +#define FIT_CODE__HWMON__CLEAR_SENSOR_ALARM (171) +#define FIT_CODE__HWMON__RAISE_GROUP_ALARM (172) +#define FIT_CODE__HWMON__CLEAR_GROUP_ALARM (173) + +#define FIT_CODE__HWMON__SET_DB_SENSOR_STATUS (175) +#define FIT_CODE__HWMON__SET_DB_SENSOR_STATE (176) +#define FIT_CODE__HWMON__SET_DB_GROUP_STATUS (177) +#define FIT_CODE__HWMON__SET_DB_GROUP_STATE (178) + +#endif /* __INCLUDE_FITCODES_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/common/fsync.c b/mtce-common/cgts-mtce-common-1.0/common/fsync.c new file mode 100644 index 00000000..6ce29efd --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/fsync.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2014 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include +#include +#include +#include +#include + +/* helper app to fsync a single file/directory */ + +int main(int argc, char **argv) +{ + int fd,rc; + + if (argc != 2) { + printf("usage: %s \n", argv[0]); + return -1; + } + + fd = open(argv[1], O_RDONLY); + if (fd == -1) { + printf("unable to open file %s: %m\n", argv[1]); + return -1; + } + + rc = fsync(fd); + if (rc == -1) { + printf("error fsyncing file %s: %m\n", argv[1]); + } + + if (close(fd) == -1) { + printf("error closing file %s: %m\n", argv[1]); + } + + return rc; +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/hostClass.cpp b/mtce-common/cgts-mtce-common-1.0/common/hostClass.cpp new file mode 100644 index 00000000..e9324f37 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/hostClass.cpp @@ -0,0 +1,454 @@ +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Host Base Class Member Implementation. + */ + +#include +#include +#include +#include +#include /* for ENODEV, EFAULT and ENXIO */ +#include /* for close and usleep */ + +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#define __AREA__ "~~~" +#endif + +#include "nodeBase.h" +#include "hostClass.h" +#include "nodeUtil.h" + + +hostBaseClass::hostBaseClass() /* constructor */ +{ + for(unsigned int i = 0; i < MAX_HOSTS; ++i) + { + host_ptrs[i] = NULL; + } + bm_provisioned= false; + head = tail = NULL; + memory_allocs = 0 ; + memory_used = 0 ; + hosts = 0 ; + service = 0 ; + + /* Start with null identity */ + my_hostname.clear() ; + my_local_ip.clear() ; + my_float_ip.clear() ; +} + +hostBaseClass::~hostBaseClass() /* destructor */ +{ + host * ptr = head ; + host * temp_ptr = ptr ; + while ( ptr != NULL ) + { + temp_ptr = ptr ; + ptr = ptr->next ; + delHost (temp_ptr); + } + if ( memory_used != 0 ) + { + elog ( "Apparent Memory Leak - Allocs:%d and Bytes:%d\n", + memory_allocs, memory_used ); + } + else + { + dlog ( "No Memory Leaks\n\n"); + } +} + +/* + * Allocates memory for a new host and stores its the address in host_ptrs + * + * @param void + * @return pointer to the newly allocted host memory + */ +struct hostBaseClass::host * hostBaseClass::newHost ( void ) +{ + struct hostBaseClass::host * host_ptr = NULL ; + + if ( memory_allocs == 0 ) + { + memset ( host_ptrs, 0 , sizeof(struct host *)*MAX_HOSTS); + } + + // find an empty spot + for ( int i = 0 ; i < MAX_HOSTS ; i++ ) + { + if ( host_ptrs[i] == NULL ) + { + host_ptrs[i] = host_ptr = new host ; + memory_allocs++ ; + memory_used += sizeof (struct hostBaseClass::host); + // ilog ("%p:%p - mem after new: allocs:%d used:%d\n", host_ptr , host_ptrs[i], memory_allocs, memory_used); + return host_ptr ; + } + } + elog ( "Failed to save new host pointer address\n" ); + return host_ptr ; +} + + +/* Frees the memory of a pre-allocated host and removes + * it from the host_ptrs list + * @param host * pointer to the host memory address to be freed + * @return int return code { PASS or -EINVAL } + */ +int hostBaseClass::delHost ( struct hostBaseClass::host * host_ptr ) +{ + if ( memory_allocs > 0 ) + { + for ( int i = 0 ; i < MAX_NODES ; i++ ) + { + if ( host_ptrs[i] == host_ptr ) + { + // ilog ("%p:%p - mem before del: allocs:%d used:%d\n", host_ptr , host_ptrs[i], memory_allocs, memory_used); + delete host_ptr ; + host_ptrs[i] = NULL ; + memory_allocs-- ; + memory_used -= sizeof (struct hostBaseClass::host); + return PASS ; + } + } + elog ( "Error: Unable to validate memory address being freed\n" ); + } + else + elog ( "Error: Free memory called when there is no memory to free\n" ); + + return -EINVAL ; +} + + /* + * Allocate new host and tack it on the end of the host_list + */ +struct +hostBaseClass::host* hostBaseClass::addHost( string hostname ) +{ + /* verify host is not already provisioned */ + struct host * ptr = getHost ( hostname ); + if ( ptr ) + { + if ( remHost ( hostname ) ) + { + /* Should never get here but if we do then */ + /* something is seriously wrong */ + elog ("Error: Unable to remove host during reprovision\n"); + return static_cast(NULL); + } + } + + /* allocate memory for new host */ + ptr = newHost (); + if( ptr == NULL ) + { + elog ( "Error: Failed to allocate memory for new host\n" ); + return static_cast(NULL); + } + + /* Init the new host */ + ptr->hostname = hostname ; + + /* If the host list is empty add it to the head */ + if( head == NULL ) + { + head = ptr ; + tail = ptr ; + ptr->prev = NULL ; + ptr->next = NULL ; + } + else + { + /* link the new_host to the tail of the host_list + * then mark the next field as the end of the host_list + * adjust tail to point to the last host + */ + tail->next = ptr ; + ptr->prev = tail ; + ptr->next = NULL ; + tail = ptr ; + } + + hosts++ ; + + return ptr ; +} + +struct hostBaseClass::host* hostBaseClass::getHost ( string hostname ) +{ + /* check for empty list condition */ + if ( head == NULL ) + return NULL ; + + for ( struct host * ptr = head ; ; ptr = ptr->next ) + { + if ( !hostname.compare ( ptr->hostname )) + { + // ilog ("%s %p\n", hostname.c_str(), ptr ); + return ptr ; + } + else if ( !hostname.compare ( ptr->uuid )) + { + // ilog ("%s %p\n", hostname.c_str(), ptr ); + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + return static_cast(NULL); +} + +/* Remove a hist from the linked list of hosts - may require splice action */ +int hostBaseClass::remHost( string hostname ) +{ + if ( hostname.c_str() == NULL ) + return -ENODEV ; + + if ( head == NULL ) + return -ENXIO ; + + struct host * ptr = getHost ( hostname ); + + if ( ptr == NULL ) + return -EFAULT ; + + /* If the host is the head host */ + if ( ptr == head ) + { + /* only one host in the list case */ + if ( head == tail ) + { + dlog ("Single Host -> Head Case\n"); + head = NULL ; + tail = NULL ; + } + else + { + dlog ("Multiple Hosts -> Head Case\n"); + head = head->next ; + head->prev = NULL ; + } + } + /* if not head but tail then there must be more than one + * host in the list so go ahead and chop the tail. + */ + else if ( ptr == tail ) + { + dlog ("Multiple Host -> Tail Case\n"); + tail = tail->prev ; + tail->next = NULL ; + } + else + { + dlog ("Multiple Host -> Full Splice Out\n"); + ptr->prev->next = ptr->next ; + ptr->next->prev = ptr->prev ; + } + delHost ( ptr ); + hosts-- ; + return (PASS) ; +} + + + +/****************************************************************************************** + ****************************************************************************************** + *****************************************************************************************/ + + + +int hostBaseClass::add_host ( node_inv_type & inv ) +{ + int rc = FAIL ; + struct hostBaseClass::host * host_ptr = static_cast(NULL); + + if (( inv.name.empty()) || + ( !inv.name.compare ("none")) || + ( !inv.name.compare ("None"))) + { + wlog ("Refusing to add host with 'null' or 'invalid' hostname (%s)\n", + inv.uuid.c_str()); + return (FAIL_INVALID_HOSTNAME) ; + } + + host_ptr = hostBaseClass::getHost(inv.name); + if ( host_ptr ) + { + dlog ("%s Already provisioned\n", host_ptr->hostname.c_str()); + + /* Send back a retry in case the add needs to be converted to a modify */ + return (RETRY); + } + /* Otherwise add it as a new host */ + else + { + if ( daemon_get_cfg_ptr()->debug_level > 1 ) + print_inv ( inv ); + + host_ptr = hostBaseClass::addHost(inv.name); + if ( host_ptr ) + { + host_ptr->ip = inv.ip ; + host_ptr->mac = inv.mac ; + host_ptr->uuid = inv.uuid ; + + host_ptr->type = inv.type ; + host_ptr->nodetype = CGTS_NODE_NULL ; + + host_ptr->retries = 0 ; + host_ptr->toggle = false ; + + /* Add to the end of inventory */ + hostlist.push_back ( host_ptr->hostname ); + dlog ("%s Added Host Base\n", inv.name.c_str()); + rc = PASS ; + } + else + { + elog ("%s Host Base Add Failed\n", inv.name.c_str()); + rc = FAIL_NULL_POINTER ; + } + } + return (rc); +} + +int hostBaseClass::rem_host ( string hostname ) +{ + int rc = FAIL ; + if ( ! hostname.empty() ) + { + hostlist.remove ( hostname ); + rc = hostBaseClass::remHost ( hostname ); + } + return ( rc ); +} + +int hostBaseClass::del_host ( string hostname ) +{ + int rc = FAIL_DEL_UNKNOWN ; + hostBaseClass::host * host_ptr = hostBaseClass::getHost( hostname ); + if ( host_ptr ) + { + rc = rem_host ( host_ptr->hostname ); + if ( rc == PASS ) + { + dlog ("%s Deleted\n", host_ptr->hostname.c_str()); + print_node_info(); + } + else + { + elog ("%s Delete Failed (rc:%d)\n", hostname.c_str(), rc ); + } + } + else + { + wlog ("Unknown hostname: %s\n", hostname.c_str()); + } + return (rc); +} + +/** Get this hosts uuid address */ +string hostBaseClass::get_uuid ( string hostname ) +{ + hostBaseClass::host * host_ptr ; + host_ptr = hostBaseClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + return (host_ptr->uuid ); + } + elog ("%s uuid lookup failed\n", hostname.c_str() ); + return (""); +} + +/** Get this hosts uuid address */ +string hostBaseClass::get_hostname ( string uuid ) +{ + hostBaseClass::host * host_ptr ; + host_ptr = hostBaseClass::getHost ( uuid ); + if ( host_ptr != NULL ) + { + return (host_ptr->hostname ); + } + elog ("%s hostname lookup failed\n", uuid.c_str() ); + return (""); +} + +/** Get this hosts ip address */ +string hostBaseClass::get_ip ( string hostname ) +{ + hostBaseClass::host * host_ptr ; + host_ptr = hostBaseClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + return (host_ptr->ip ); + } + elog ("%s ip lookup failed\n", hostname.c_str() ); + return (""); +} + +static string null_str = "" ; +string hostBaseClass::get_hostaddr ( string hostname ) +{ + hostBaseClass::host* host_ptr ; + host_ptr = hostBaseClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + return ( host_ptr->ip ); + } + return ( null_str ); +} + + + +void hostBaseClass::print_node_info ( void ) +{ + fflush (stdout); + fflush (stderr); +} + +void hostBaseClass::memLogDelimit ( void ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "-------------------------------------------------------------\n"); + mem_log (str); +} + +void hostBaseClass::mem_log_host ( struct hostBaseClass::host * host_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s - %s - %s - %s\n", + host_ptr->hostname.c_str(), + host_ptr->ip.c_str(), + host_ptr->mac.c_str(), + host_ptr->uuid.c_str(), + host_ptr->type.c_str()); + mem_log (str); +} + +void hostBaseClass::memDumpNodeState ( string hostname ) +{ + hostBaseClass::host* host_ptr ; + host_ptr = hostBaseClass::getHost ( hostname ); + if ( host_ptr == NULL ) + { + mem_log ( hostname, ": ", "Not Found hostBaseClass\n" ); + return ; + } + else + { + mem_log_host ( host_ptr ); + // memLogDelimit (); + } +} + diff --git a/mtce-common/cgts-mtce-common-1.0/common/hostClass.h b/mtce-common/cgts-mtce-common-1.0/common/hostClass.h new file mode 100644 index 00000000..36d615b7 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/hostClass.h @@ -0,0 +1,221 @@ +#ifndef __INCLUDE_HOSTCLASS_H__ +#define __INCLUDE_HOSTCLASS_H__ +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Host Maintenance "Host Manager" + * class, support structs and enums. + */ + +#include +#include +#include +#include +#include +#include + +//using namespace std; + +#include "nodeTimers.h" /* for ... mtcTimer */ + +/** + * @addtogroup hostBaseClass + * @{ + * This class is used to maintain a linked list of hosts for a given application. + */ + +class hostBaseClass +{ + private: + + /** + * A single host entity within the hostBaseClass. + * Used to build a linked list of added/provisioned hosts. + */ + struct host { + + /** The name of the host */ + std::string hostname ; + + /** The name of the host */ + std::string uuid ; + + /** The IP address of the host */ + std::string ip ; + + /** The Mac address of the host node */ + std::string mac ; + + /** A string indicating the host type as 'compute' , 'storage' or 'controller' */ + std::string type ; + + /** The Type ; host specific service refinement */ + int nodetype ; + + /** general retry counter */ + int retries ; + + /** Generic toggle switch */ + bool toggle ; + + /** Pointer to the previous host in the list */ + struct host * prev; + + /** Pointer to the next host in the list */ + struct host * next; + } ; + + struct host * head ; /**< Host Linked List Head pointer */ + struct host * tail ; /**< Host Linked List Tail pointer */ + + /** Allocate memory for a new host. + * + * Preserves the host address in the host_ptr list and increments + * the memory_allocs counter used by the inservice test audit. + * + * @return + * a pointer to the memory of the newly allocated host */ + struct hostBaseClass::host * newHost ( void ); + + /** Start heartbeating a new host. + * + * host is added to the end of the host linked list. + * + * @param host_info_ptr + * is a pointer containing pertinent info about the physical host + * @return + * a pointer to the newly added host + */ + struct hostBaseClass::host* addHost ( string hostname ); + + /** Get pointer to "hostname" host. + * + * Host list lookup by pointer from hostname. + * + * @param host_info_ptr + * is a pointer containing info required to find the host in the host list + * @return + * a pointer to the hostname's host + */ + struct hostBaseClass::host* getHost ( string hostname ); + + /** Free the memory of a previously allocated host. + * + * The memory to be removed is found in the host_ptr list, cleared and + * the memory_allocs counter is decremented. + * If the memory cannot be found then an error is returned. + * + * @param host_ptr + * is a pointer to the host to be freed + * @return + * a signed integer of PASS or -EINVAL + */ + int delHost ( struct hostBaseClass::host * host_ptr ); + + + /** Remove a host from the linked list. + * + * Node is spliced out of the host linked list. + * + * @param node_info_ptr + * is a pointer containing info required to find the host in the host list + * @return + * an integer of PASS or -EINVAL */ + int remHost ( string hostname ); + + /** List of allocated host memory. + * + * An array of host pointers. + */ + hostBaseClass::host * host_ptrs[MAX_HOSTS] ; + + /** A memory allocation counter. + * + * Should represent the number of hosts in the linked list. + */ + int memory_allocs ; + + /** A memory used counter + * + * A variable storing the accumulated host memory + */ + int memory_used ; + + void mem_log_host ( struct hostBaseClass::host * host_ptr ); + +/** Public Interfaces that allow hosts to be + * added or removed from maintenance. + */ +public: + + hostBaseClass(); /**< constructor */ + ~hostBaseClass(); /**< destructor */ + + /**< The service this list is associated with */ + int service ; + + int hosts ; + + string my_hostname ; /**< My hostname */ + string my_local_ip ; /**< Primary IP address */ + string my_float_ip ; /**< Secondary (floating) IP address */ + + + bool bm_provisioned ; + + /** Add a host to the linked list using public API */ + int add_host ( node_inv_type & inv ); + + /** Mod a host to the linked list using public API */ + int mod_host ( node_inv_type & inv ); + + /** Remove a host from the linked list using public API */ + int rem_host ( string hostname ); + + /** Free the memory of an already allocated host link using public API */ + int del_host ( string hostname ); + + string get_ip ( string hostname ); + string get_uuid ( string hostname ); + string get_hostaddr ( string hostname ); + string get_hostname ( string uuid ); + + + + void memLogDelimit ( void ); /**< Debug log delimiter */ + void memDumpNodeState ( string hostname ); + void memDumpAllState ( void ); + void print_node_info ( void ); /**< Print node info banner */ + + /** This is a list of host names. */ + std::list hostlist ; + std::list::iterator hostlist_iter_ptr ; + + + +} ; + +/** + * @addtogroup hostBaseClass_base + * @{ + */ + +hostBaseClass * get_hostBaseClass_ptr ( void ); + +/* allocates hostBaseClass obj_ptr and host_ptr */ +#define GET_HOST_PTR(hostname) \ + hostBaseClass * obj_ptr = get_hostBaseClass_ptr () ; \ + hostBaseClass::host * host_ptr = obj_ptr->getHost ( hostname ) ; \ + if ( host_ptr == NULL ) \ + { \ + elog ("%s hostname unknown\n", hostname.c_str()); \ + return (FAIL_HOSTNAME_LOOKUP); \ + } + +#endif // __INCLUDE_HOSTCLASS_H__ diff --git a/mtce-common/cgts-mtce-common-1.0/common/hostUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/hostUtil.cpp new file mode 100644 index 00000000..ed079ea1 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/hostUtil.cpp @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform - Host Server Utility Module + */ + +#include "hostUtil.h" + +string hostUtil_getServiceIp ( mtc_service_enum service ) +{ + string ip = "0.0.0.0" ; + + daemon_config_type * cfg_ptr = daemon_get_cfg_ptr(); + + switch (service) + { + case SERVICE_SYSINV: + { + ip = cfg_ptr->sysinv_api_bind_ip ; + break ; + } + case SERVICE_TOKEN: + { + if ( cfg_ptr->keystone_auth_host) + { + ip = cfg_ptr->keystone_auth_host; + } + else + { + ip = "localhost"; + } + break ; + } + case SERVICE_SMGR: + case SERVICE_VIM: + { + ip = "localhost" ; + break ; + } + default: + { + slog ("Unsupported service (%d)\n", service ); + break ; + } + } + return (ip); +} + +string hostUtil_getPrefixPath ( ) +{ + string prefix_path = ""; + + daemon_config_type * cfg_ptr = daemon_get_cfg_ptr(); + + if ( cfg_ptr->keystone_prefix_path) + { + prefix_path = cfg_ptr->keystone_prefix_path; + } + + return (prefix_path); +} + +int hostUtil_getServicePort ( mtc_service_enum service ) +{ + daemon_config_type * cfg_ptr = daemon_get_cfg_ptr(); + + switch (service) + { + case SERVICE_SYSINV: + return(cfg_ptr->sysinv_api_port); + + case SERVICE_SMGR: + return(cfg_ptr->ha_port); + + case SERVICE_VIM: + return(cfg_ptr->vim_cmd_port); + + case SERVICE_TOKEN: + return(cfg_ptr->keystone_port); + + default: + { + slog ("Unsupported service (%d)\n", service ); + break ; + } + } + return (0); +} + +bool hostUtil_is_valid_ip_addr ( string ip ) +{ + if ( !ip.empty() ) + if ( ip.compare(NONE) ) + return (true); + return (false); +} + +bool hostUtil_is_valid_mac_addr ( string mac ) +{ + if ( !mac.empty() ) + if ( mac.length() == COL_CHARS_IN_MAC_ADDR ) + return (true); + return (false); +} + +bool hostUtil_is_valid_bm_type ( string bm_type ) +{ + dlog3 ("BM_Type:%s\n", bm_type.c_str()); + if ( !bm_type.empty() ) + { + if (( bm_type == "bmc" ) || + ( bm_type == "ilo" ) || + ( bm_type == "ilo3" ) || + ( bm_type == "ilo4" ) || + ( bm_type == "quanta" )) + { + return (true); + } + } + return ( false ); +} + +bool hostUtil_is_valid_uuid ( string uuid ) +{ + if (( !uuid.empty() ) && ( uuid.length() == UUID_LEN ) ) + return (true); + return (false); +} + +/***************************************************************************** + * + * Name : hostUtil_tmpfile + * + * Description : Create a temporary file with a randomized suffix. + * Write the specified 'data' to it and return its + * open file descriptor. + * + * The file is unlinked so that it is automatically deleted by the kernel + * when the file descriptor is closed or the program exits. + * + * TODO: fix or figure out why the unlink removes the file right away even + * with the file open. + * + *****************************************************************************/ + +int hostUtil_mktmpfile ( string hostname, string basename, string & filename, string data ) +{ + // buffer to hold the temporary file name + char tempBuff[MAX_FILENAME_LEN]; + + int fd = -1; + + memset(tempBuff,0,sizeof(tempBuff)); + + if ( basename.empty() || data.empty() ) + { + slog ("%s called with one or more bad parameters (%d:%d)\n", + hostname.c_str(), basename.empty(), data.empty()); + return (0); + } + + /* add what mkstemp will make unique */ + basename.append("XXXXXX"); + + // Copy the relevant information in the buffers + snprintf ( &tempBuff[0], MAX_FILENAME_LEN, "%s", basename.data()); + + // Create the temporary file, this function will + // replace the 'X's with random letters + fd = mkstemp(tempBuff); + + // Call unlink so that whenever the file is closed or the program exits + // the temporary file is deleted. + // + // Note: Unlinking removes the file immediately. + // Commenting out. Caller must remove file. + // + // unlink(tempBuff); + + if(fd<1) + { + elog ("%s failed to create temp file (%d:%m)\n", hostname.c_str(), errno ); + return 0 ; + } + else + { + filename = tempBuff ; + dlog2 ("%s temporary file [%s] created\n", hostname.c_str(), tempBuff ); + } + + // Write the data to the temporary file + if ( write ( fd, data.data(), data.size()) < 0 ) + { + elog ("%s failed to write data to '%s' (%d:%m)\n", + hostname.c_str(), filename.c_str(), errno ); + return 0 ; + } + else + { + dlog2 ("%s wrote %s to %s\n", hostname.c_str(), data.c_str(), filename.c_str()); + } + return (fd); +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/hostUtil.h b/mtce-common/cgts-mtce-common-1.0/common/hostUtil.h new file mode 100644 index 00000000..d207f4da --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/hostUtil.h @@ -0,0 +1,85 @@ +#ifndef __INCLUDE_HOSTUTIL_H__ +#define __INCLUDE_HOSTUTIL_H__ + +/* +* Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* +*/ + +#include +#include +#include +#include +#include +#include /* for ... HTTP_ status definitions */ + +using namespace std; + +#include "nodeBase.h" + +/* Supported Server Names */ +//#define SERVER__UNKNOWN ((const char*)"Undetermined Server") +//#define SERVER__NOKIA_QUANTA_1234_GEN1 ((const char*)"Quanta Computer") +//#define SERVER__HP_PROLIANT_DL380_GEN9 ((const char*)"ProLiant DL380 Gen9") +//#define SERVER__HP_PROLIANT_DL360_GEN9 ((const char*)"ProLiant DL360 Gen9") + +/* Supported Board Management Controller Names */ +//#define SERVER_BMC__UNKNOWN ((const char*)"Unknown BMC") +//#define SERVER_BMC__STANDARD_ILO_V3 ((const char*)"iLO 3 Standard") +//#define SERVER_BMC__STANDARD_ILO_V4 ((const char*)"iLO 4 Standard") + + +/* A list of supported servers */ +//typedef enum +//{ +// SERVER_IS_UNKNOWN = 0, +// SERVER_IS_NOKIA__QUANTA_1234____GEN1__ILO_V4 = 1, +// SERVER_IS_HP_____PROLIANT_DL380_GEN9__ILO_V4 = 2, +// SERVER_IS_HP_____PROLIANT_DL360_GEN9__ILO_V4 = 3, +// SERVER_IS_LAST = 4 +//} server_enum ; + +/* Server Table Entry Type */ +//typedef struct +//{ +// server_enum server_code ; +// protocol_enum protocol ; +// const char * server_name ; +// const char * server_bmc ; +// const char * profile ; +// +//} server_table_entry_type ; +//server_table_entry_type * hostUtil_get_server_info ( server_enum server_code ); + +typedef enum +{ + CLIENT_NONE = 0, + CLIENT_SYSINV = 1, + CLIENT_VIM_HOSTS = 2, + CLIENT_VIM_SYSTEMS = 3, + CLIENT_SENSORS = 4, + CLIENT_SENSORGROUPS = 5, + CLIENT_SM = 6, +} mtc_client_enum ; + +typedef enum +{ + SERVICE_SYSINV = 0, + SERVICE_TOKEN = 1, + SERVICE_SMGR = 2, + SERVICE_VIM = 3, +} mtc_service_enum ; + +string hostUtil_getServiceIp ( mtc_service_enum service ); +int hostUtil_getServicePort ( mtc_service_enum service ); +string hostUtil_getPrefixPath ( void ); + +bool hostUtil_is_valid_uuid ( string uuid ); +bool hostUtil_is_valid_ip_addr ( string ip ); +bool hostUtil_is_valid_bm_type ( string bm_type ); + +int hostUtil_mktmpfile ( string hostname, string basename, string & filename, string data ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/common/httpUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/httpUtil.cpp new file mode 100644 index 00000000..e626961e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/httpUtil.cpp @@ -0,0 +1,1065 @@ +/* + * Copyright (c) 2015-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Common HTTP utility Module + * + */ + +#include + +using namespace std; + +#include "httpUtil.h" +#include "jsonUtil.h" +#include "tokenUtil.h" /* for ... tokenUtil_handler */ +#include "nodeUtil.h" /* for ... string_contains */ +#include "nodeClass.h" +#include "timeUtil.h" /* for ... time_debug_type */ +#include "keyClass.h" /* for ... add_key, del_key */ + +static keyClass keyValObject ; +static char rest_api_filename[MAX_FILENAME_LEN]; +static char rest_api_log_str [MAX_API_LOG_LEN]; +static libEvent nullEvent ; + +/* *********************************************************************** + * + * Name : httpUtil_event_init + * + * Description: Initialize the supplied libevent structure to default + * start values including with the supplied hostname, + * service , ip and port values. + * + * Note: No memory allication is performed. + * + * ************************************************************************/ + +int httpUtil_event_init ( libEvent * ptr , + string hostname, + string service, + string ip, + int port ) +{ + /* Default Starting States */ + ptr->sequence = 0 ; + ptr->request = SERVICE_NONE ; + ptr->state = HTTP__TRANSMIT ; + ptr->log_prefix = hostname ; + ptr->log_prefix.append(" ") ; + ptr->log_prefix.append(service) ; + + /* Execution Controls */ + ptr->stuck = 0 ; + ptr->count = 0 ; + ptr->timeout = 0 ; + ptr->retries = 0 ; + ptr->cur_retries = 0 ; + ptr->max_retries = 0 ; + ptr->active = false ; + ptr->mutex = false ; + ptr->found = false ; + ptr->blocking = false ; + ptr->noncritical = false ; + ptr->rx_retry_cnt= 0 ; + ptr->rx_retry_max= 1000 ; + + ptr->uuid.clear(); + ptr->new_uuid.clear() ; + + /* Service Specific Request Info */ + ptr->ip = ip ; + ptr->port = port ; + ptr->hostname = hostname ; + ptr->service = service ; + + /* Copy the mtce token into the libEvent struct for this command */ + ptr->token.url.clear(); + ptr->token.token.clear(); + ptr->token.issued.clear(); + ptr->token.expiry.clear(); + ptr->token.delay = false ; + ptr->token.refreshed = false ; + + /* Instance Specific Request Data Data */ + ptr->entity_path.clear() ; + ptr->entity_path_next.clear() ; + ptr->address.clear(); + ptr->payload.clear(); + ptr->response.clear(); + + ptr->operation.clear(); + ptr->information.clear(); + ptr->result.clear(); + ptr->label.clear(); + + /** Default the user agent to mtce ; other users and commands can override */ + ptr->user_agent = "mtce/1.0" ; + + ptr->admin_url.clear(); + ptr->internal_url.clear(); + ptr->public_url.clear(); + + /* HTTP Specific Info */ + ptr->type = EVHTTP_REQ_GET ; /* request type GET/PUT/PATCH etc */ + + /* Result Info */ + ptr->status = FAIL; + ptr->http_status = 0 ; + ptr->low_wm = ptr->med_wm = ptr->high_wm = false ; + node_inv_init ( ptr->inv_info ) ; + + ptr->this_time = 0 ; + ptr->prev_time = 0 ; + + memset (&ptr->req_str[0], 0, MAX_API_LOG_LEN); + + return (PASS); +} + + +/* initialize this module */ +void httpUtil_init ( void ) +{ + httpUtil_event_init ( &nullEvent, "null", "null" , "0.0.0.0", 0); + nullEvent.request = SERVICE_NONE ; + + snprintf (&rest_api_filename[0], MAX_FILENAME_LEN, "/var/log/%s_api.log", + program_invocation_short_name ); +} + +/* *********************************************************************** + * + * Name : httpUtil_free_conn + * + * Description: Free an event's connection memory if it exists. + * + * ************************************************************************/ + +void httpUtil_free_conn ( libEvent & event ) +{ + if ( event.conn ) + { + hlog3 ("%s Free Connection (%p)\n", event.log_prefix.c_str(), event.conn ); + evhttp_connection_free ( event.conn ); + event.conn = NULL ; + } + else + { + hlog1 ("%s Already Freed Connection\n", event.log_prefix.c_str()); + } +} + +/* *********************************************************************** + * + * Name : httpUtil_free_base + * + * Description: Free an event's base memory if it exists. + * + * ************************************************************************/ + +void httpUtil_free_base ( libEvent & event ) +{ + /* Free the base */ + if ( event.base ) + { + hlog3 ("%s Free Base (%p)\n", event.log_prefix.c_str(), event.base ); + + event_base_free(event.base); + event.base = NULL ; + if ( event.conn ) + { + hlog ("%s Free Connection (%p) --------- along with base\n", + event.log_prefix.c_str(), event.conn ); + + evhttp_connection_free ( event.conn ); + event.conn = NULL ; + } + } + else + { + hlog1 ("%s Already Freed Event Base\n", event.log_prefix.c_str()); + } +} + +/* *********************************************************************** + * + * Name : httpUtil_connect + * + * Description: Allocate memory for a new connection off the supplied + * base with respect to an ip and port. + * + * ************************************************************************/ + +int httpUtil_connect ( libEvent & event ) +{ + if ( event.base ) + { + hlog ("%s target:%s:%d\n", event.log_prefix.c_str(), event.ip.c_str(), event.port); + + /* Open an http connection to specified IP and port */ + event.conn = evhttp_connection_base_new ( event.base, NULL, + event.ip.c_str(), + event.port ); + /* bind to the correctly-versioned local address */ + if ( event.conn ) + { + return(PASS) ; + } + else + { + elog ("%s create connection failed (evhttp_connection_base_new)\n", event.log_prefix.c_str()); + return (FAIL_CONNECT); + } + } + else + { + slog ("%s Null Event base\n", event.log_prefix.c_str()); + return (FAIL_EVENT_BASE); + } +} + +/* *********************************************************************** + * + * Name : httpUtil_request + * + * Description: Allocate memory for a new request off the supplied base. + * + * ************************************************************************/ + +int httpUtil_request ( libEvent & event, + void(*hdlr)(struct evhttp_request *, void *)) +{ + int rc = PASS ; + + /* make a new request and bind the event handler to it */ + event.req = evhttp_request_new( hdlr , event.base ); + if ( ! event.req ) + { + elog ("%s evhttp_request_new returned NULL\n", event.log_prefix.c_str() ); + rc = FAIL ; + } + return (rc); +} + +/* *********************************************************************** + * + * Name : httpUtil_payload_add + * + * Description: Add the payload to the output buffer. + * + * @returns 0 for success or -1 in error case + * + * ************************************************************************/ + +int httpUtil_payload_add ( libEvent & event ) +{ + int rc = PASS ; + + /* Returns the output buffer. */ + event.buf = evhttp_request_get_output_buffer ( event.req ); + + /* Check for no buffer */ + if ( ! event.buf ) + { + elog ("%s evhttp_request_get_output_buffer returned null (%p)\n", + event.log_prefix.c_str(), event.req ); + + rc = FAIL ; + } + else + { + /* write the body into the buffer */ + rc = evbuffer_add_printf ( event.buf, "%s", event.payload.c_str()); + if ( rc == -1 ) + { + elog ("%s evbuffer_add_printf returned error (-1)\n", + event.log_prefix.c_str()); + + rc = FAIL ; + } + else if ( rc == 0 ) + { + elog ("%s no data added to output buffer (len=0)\n", + event.log_prefix.c_str()); + + rc = FAIL ; + } + else + { + rc = PASS ; + } + } + return (rc); +} + +/* *********************************************************************** + * + * Name : httpUtil_payload_len + * + * Description: Calculate payload length from the output buffer + * and return a string representing that length value. + * + * ************************************************************************/ + +string httpUtil_payload_len ( libEvent * ptr ) +{ + string body_len ; + char len_str[10] ; + int len = evbuffer_get_length ( ptr->req->output_buffer ) ; + if (( len == -1 ) || ( len == 0 )) + { + body_len = "" ; + } + else + { + memset ( &len_str[0], 0 , 10 ); + sprintf ( &len_str[0], "%d", len ); + body_len = len_str ; + hlog2 ("%s Payload Length: %s\n", ptr->log_prefix.c_str(), body_len.c_str() ); + } + return ( body_len ); +} + +/* *********************************************************************** + * + * Name : httpUtil_header_add + * + * Description: Add the supplied list of headers to the http request + * headers section. + * + * ************************************************************************/ + +int httpUtil_header_add ( libEvent * ptr, http_headers_type * hdrs_ptr ) +{ + int rc = PASS ; + + if ( hdrs_ptr->entries > MAX_HEADERS ) + { + elog ("%s Too many headers (%d:%d)\n", + ptr->log_prefix.c_str(), MAX_HEADERS, hdrs_ptr->entries ); + return FAIL ; + } + for ( int i = 0 ; i < hdrs_ptr->entries ; i++ ) + { + /* Add the header */ + rc = evhttp_add_header( ptr->req->output_headers, + hdrs_ptr->entry[i].key.c_str() , + hdrs_ptr->entry[i].value.c_str()); + if ( rc ) + { + elog ("%s evhttp_add_header returned failure (%d:%s:%s)\n", + ptr->log_prefix.c_str(), rc, + hdrs_ptr->entry[i].key.c_str(), + hdrs_ptr->entry[i].value.c_str()); + rc = FAIL ; + break ; + } + } + return (rc); +} + + + +/* *********************************************************************** + * + * Name : httpUtil_get_length + * + * Description: Loads libEvent.response_len with the length of the + * input buffer so we can allocate enough memory to + * copy it into. + * + * Get the length of the json response. + * Deal with oversized messages. + * + * @param event is a reference to the callers libEvent struct + * where it inds the input buffer pointer + * + * @return integer value representing the length of the input buffer + * + * ************************************************************************/ + +int httpUtil_get_length ( libEvent & event ) +{ + event.response_len = evbuffer_get_length (event.req->input_buffer); + if ( event.response_len == 0 ) + { + hlog ("%s Request - Response has not content\n", + event.log_prefix.c_str()); + event.status = FAIL_JSON_ZERO_LEN ; + } + return ( event.response_len ); +} + +/* Load the response string into the event struct */ +int httpUtil_get_response ( libEvent & event ) +{ + if ( httpUtil_get_length ( event ) ) + { + size_t real_len ; + + /* Get a stack buffer, zero it, copy to it and terminate it */ + char * stack_buf_ptr = (char*)malloc (event.response_len+1); + memset ( stack_buf_ptr, 0, event.response_len+1 ); + real_len = evbuffer_remove( event.req->input_buffer, stack_buf_ptr, + event.response_len); + + if ( real_len != event.response_len ) + { + wlog ("%s Length differs from removed length (%ld:%ld)\n", + event.log_prefix.c_str(), + event.response_len, + real_len ); + } + + if ( real_len == 0 ) + { + hlog1 ("%s has no response data\n", event.log_prefix.c_str() ); + } + /* Terminate the buffer , this is where the +1 above is required. + * Without it there is memory corruption reported by Linux */ + *(stack_buf_ptr+event.response_len) = '\0'; + + /* Store the response */ + event.response = stack_buf_ptr ; + + free (stack_buf_ptr); + } + return ( event.status ); +} + +/* *********************************************************************** + * + * Name : mtcHttpUtil_status + * + * Description: Extracts and returns the HTTP execution status + * + * ************************************************************************/ + +int httpUtil_status ( libEvent & event ) +{ + int rc = PASS ; + + if ( !event.req ) + { + elog ("%s Invalid request\n", event.hostname.length() ? event.hostname.c_str() : "unknown" ); + return (FAIL_UNKNOWN_HOSTNAME); + } + event.status = event.http_status = evhttp_request_get_response_code (event.req); + switch (event.status) + { + case HTTP_OK: + case 201: + case 202: + case 203: + case 204: + { + hlog ("%s HTTP_OK (%d)\n", event.hostname.c_str(), event.status ); + event.status = PASS ; + break; + } + /* Authentication error - refresh the token */ + case 401: + { + keyToken_type * token_ptr = tokenUtil_get_ptr() ; + rc = FAIL_AUTHENTICATION ; + token_ptr->delay = true ; /* force delayed token renewal on authentication error */ + break ; + } + case 0: + { + wlog ("%s failed to maintain connection to '%s:%d' for '%s'\n", + event.hostname.c_str(), event.ip.c_str(), event.port, event.log_prefix.c_str() ); + event.status = FAIL_HTTP_ZERO_STATUS ; + rc = FAIL_HTTP_ZERO_STATUS ; + break ; + } + default: + { + hlog2 ("%s Status: %d\n", event.hostname.c_str(), event.status ); + rc = event.status ; + break; + } + } + return (rc); +} + + + +void httpUtil_handler ( struct evhttp_request *req, void *arg ) +{ + unsigned long temp ; + int rc = PASS ; + + UNUSED(req); + libEvent * event_ptr ; + + if ( arg == NULL ) + { + elog ("null base pointer\n"); + return ; + } + + /* find the event for this base */ + if ( keyValObject.get_key ((unsigned long)arg, temp ) != PASS ) + { + wlog ("get_key value 'event' lookup from base (%p) key failed\n", arg ); + return ; + } + + event_ptr = (libEvent*)temp; + if (( event_ptr->request >= SERVICE_LAST ) || ( event_ptr->request == SERVICE_NONE )) + { + slog ("HTTP Event Lookup Failed for http base (%p) <------\n", arg); + return ; + } + + /* Check the HTTP Status Code */ + event_ptr->status = httpUtil_status ( (*event_ptr) ) ; + if ( event_ptr->status == HTTP_NOTFOUND ) + { + elog ("%s returned (Not-Found) (%d)\n", + event_ptr->log_prefix.c_str(), + event_ptr->status); + if ( event_ptr->type != EVHTTP_REQ_POST ) + event_ptr->status = PASS ; + + goto httpUtil_handler_done ; + } + + else if (( event_ptr->status != PASS ) && ( ! req )) + { + elog ("%s Request Timeout (%d)\n", + event_ptr->log_prefix.c_str(), + event_ptr->timeout); + + event_ptr->status = FAIL_TIMEOUT ; + goto httpUtil_handler_done ; + } + + else if ( event_ptr->status != PASS ) + { + goto httpUtil_handler_done ; + } + + /* Delete commands don't have a response unless there is an error. + * Deal with this as a special case - + * Currently only Neutron uses the delete */ + if ( event_ptr->type == EVHTTP_REQ_DELETE ) + { + if ( httpUtil_get_length ( (*event_ptr) ) != 0 ) + { + /* Preserve the incoming status over the get response */ + rc = event_ptr->status ; + httpUtil_get_response ( (*event_ptr) ) ; + event_ptr->status = rc ; + } + if (event_ptr->status == FAIL_JSON_ZERO_LEN ) + event_ptr->status = PASS ; + } + else if ( httpUtil_get_response ( (*event_ptr) ) != PASS ) + { + elog ("%s failed to get response\n", event_ptr->log_prefix.c_str()); + goto httpUtil_handler_done ; + } + + if ( event_ptr->handler ) + { + // ilog ("%s calling event specific handler\n", event_ptr->log_prefix.c_str() ); + rc = event_ptr->handler ( (*event_ptr) ) ; + } + else + { + slog ("%s no event handler bound in\n", event_ptr->log_prefix.c_str() ); + rc = event_ptr->status = FAIL_NULL_POINTER ; + } + +httpUtil_handler_done: + + // hlog2 ("%s Base:%p:%p Event:%p\n", event_ptr->log_prefix.c_str(), event_ptr->base, arg, event_ptr ); + + keyValObject.del_key ((unsigned long)arg ); + event_ptr->active = false ; + + gettime ( event_ptr->done_time ); + timedelta ( event_ptr->send_time, event_ptr->done_time, event_ptr->diff_time ); + + if ( event_ptr->status ) + { + elog ( "%s Failed (rc:%d)\n", + event_ptr->log_prefix.c_str(), + event_ptr->status ); + } + httpUtil_log_event ( event_ptr ); +} + + +/* *********************************************************************** + * + * Name : httpUtil_api_request + * + * Description: Makes an HTTP request based on all the info + * in the supplied libEvent. + * + * This is the primary external interface in this module. + * + * Both blocking and non-blocking request type are supported. + * + * ************************************************************************/ + +/*************************************************************************** + * + * Name : httpUtil_latency_log + * + * Description: Measures command handling time and creates a Latency log + * if that time exceeds the specified threshold (msecs). + * + * Parms: + * event - the event in context + * + * label_ptr - "start" to init the prev_timer or + * - "some label" to identify the point in the code and to + * measure time against the previous call. + * + * msecs - the latency log threshold + * + * Usage: + * + * httpUtil_latency_log ( event, HTTPUTIL_SCHED_MON_START, 0 ); + * + * [ timed code ] + * + * httpUtil_latency_log ( event, "label 1" , msecs ); + * + * [ timed code ] + * + * httpUtil_latency_log ( event, "label 2", msecs ); + * + * ... + * + *****************************************************************************/ + +#define HTTPUTIL_SCHED_MON_START ((const char *)"start") +#define MAX_DELAY_B4_LATENCY_LOG (1700) +void httpUtil_latency_log ( libEvent & event, const char * label_ptr, int line , int msecs ) +{ + event.this_time = gettime_monotonic_nsec () ; + + /* If label_ptr is != NULL and != start then take the measurement */ + if ( label_ptr && strncmp ( label_ptr, HTTPUTIL_SCHED_MON_START, strlen(HTTPUTIL_SCHED_MON_START))) + { + if ( event.this_time > (event.prev_time + (NSEC_TO_MSEC*(msecs)))) + { + llog ("%s ... %4llu.%-4llu msec - %s (%d)\n", event.hostname.c_str(), + ((event.this_time-event.prev_time) > NSEC_TO_MSEC) ? ((event.this_time-event.prev_time)/NSEC_TO_MSEC) : 0, + ((event.this_time-event.prev_time) > NSEC_TO_MSEC) ? ((event.this_time-event.prev_time)%NSEC_TO_MSEC) : 0, + label_ptr, line ); + } + } + /* reset to be equal for next round */ + event.prev_time = event.this_time ; +} + +bool token_recursion = false ; + +int httpUtil_api_request ( libEvent & event ) + +{ + http_headers_type hdrs ; + int hdr_entry = 0 ; + string path = "" ; + bool free_key = true ; + event.status = PASS ; + + event.log_prefix = event.hostname ; + event.log_prefix.append (" "); + event.log_prefix.append (event.service) ; + event.log_prefix.append (" '"); + event.log_prefix.append (event.operation) ; + event.log_prefix.append ("'"); + + hlog ("%s '%s' request\n", event.log_prefix.c_str(), getHttpCmdType_str(event.type)); + + if (( event.request == SERVICE_NONE ) || + ( event.request >= SERVICE_LAST )) + { + slog ("%s Invalid request %d\n", event.log_prefix.c_str(), event.request); + event.status = FAIL_BAD_PARM ; + return (event.status); + } + /* Check for memory leaks */ + if ( event.base ) + { + slog ("%s http base memory leak avoidance (%p)\n", + event.log_prefix.c_str(), event.base ); + + // Be sure to free the key + keyValObject.del_key ((unsigned long)event.base ); + // event_base_free(event.base); + } + + /* Allocate the base */ + event.base = event_base_new(); + if ( event.base == NULL ) + { + elog ("%s No Memory for Request\n", event.log_prefix.c_str()); + event.status = FAIL_EVENT_BASE ; + return (event.status) ; + } + else + { + if ( keyValObject.add_key ((unsigned long)event.base, (unsigned long)&event) != PASS ) + { + slog ("%s failed to store base:event as key (%p) value pair\n", + event.log_prefix.c_str(), event.base ); + + /* lets try and recover from this */ + keyValObject.del_key ((unsigned long)event.base); + if ( keyValObject.add_key ((unsigned long)event.base, (unsigned long)&event) != PASS ) + { + slog ("%s still cannot store base:event after key_del\n", event.log_prefix.c_str()); + + event.status = FAIL_LOCATE_KEY_VALUE ; + goto httpUtil_api_request_done ; + } + } + } + + if ( event.request == KEYSTONE_GET_TOKEN ) + { + event.payload = "" ; + + /* create the json string that can request an authority + * token and write that string to 'payload' */ + event.status = jsonApi_auth_request ( event.hostname, event.payload ); + if ( event.status != PASS ) + { + elog ("%s unable to perform get token request (rc:%d)\n", event.hostname.c_str(), event.status ); + goto httpUtil_api_request_done ; + } + } + else if (( event.request == KEYSTONE_GET_ENDPOINT_LIST ) || + ( event.request == KEYSTONE_GET_SERVICE_LIST )) + { + ; + } + + else if (( event.request == SYSINV_SENSOR_ADD ) || + ( event.request == SYSINV_SENSOR_DEL ) || + ( event.request == SYSINV_SENSOR_LOAD ) || + ( event.request == SYSINV_SENSOR_MOD ) || + ( event.request == SYSINV_SENSOR_MOD_GROUP ) || + ( event.request == SYSINV_SENSOR_ADD_GROUP ) || + ( event.request == SYSINV_SENSOR_DEL_GROUP ) || + ( event.request == SYSINV_SENSOR_LOAD_GROUPS ) || + ( event.request == SYSINV_SENSOR_LOAD_GROUP ) || + ( event.request == SYSINV_SENSOR_GROUP_SENSORS )) + { + ; + } + + else if ( TEST_WITH_NO_TOKEN ) + { + ; + } + else + { + slog ("%s Unsupported Request (%d)\n", event.hostname.c_str(), event.request); + event.status = FAIL_BAD_CASE ; + goto httpUtil_api_request_done ; + } + + /* Establish connection */ + if ( httpUtil_connect ( event )) + { + event.status = FAIL_CONNECT ; + goto httpUtil_api_request_done ; + } + + if ( httpUtil_request ( event, &httpUtil_handler )) + { + event.status = FAIL_REQUEST_NEW ; + goto httpUtil_api_request_done ; + } + + if ( event.request != KEYSTONE_GET_TOKEN ) + { + jlog ("%s Address : %s\n", event.hostname.c_str(), event.address.c_str()); + } + + if (( event.type != EVHTTP_REQ_GET ) && + ( event.type != EVHTTP_REQ_DELETE )) + { + /* Add payload to the output buffer but only for PUT, POST and PATCH requests */ + if ( httpUtil_payload_add ( event )) + { + event.status = FAIL_PAYLOAD_ADD ; + goto httpUtil_api_request_done ; + } + if ( daemon_get_cfg_ptr()->debug_json ) + { + if ((!string_contains(event.payload,"token")) && + (!string_contains(event.payload,"assword"))) + { + jlog ("%s Payload : %s\n", event.hostname.c_str(), + event.payload.c_str() ); + } + else + { + jlog ("%s Payload : ... contains private content ...\n", + event.hostname.c_str()); + + } + } + } + + /* Build the HTTP Header */ + hdrs.entry[hdr_entry].key = "Host" ; + hdrs.entry[hdr_entry].value = event.ip ; + hdr_entry++; + + hdrs.entry[hdr_entry].key = "X-Auth-Project-Id" ; + hdrs.entry[hdr_entry].value = "admin"; + hdr_entry++; + + if (( event.type != EVHTTP_REQ_GET ) && + ( event.type != EVHTTP_REQ_DELETE )) + { + hdrs.entry[hdr_entry].key = "Content-Length" ; + hdrs.entry[hdr_entry].value = httpUtil_payload_len ( &event ); + hdr_entry++; + } + + hdrs.entry[hdr_entry].key = "User-Agent" ; + hdrs.entry[hdr_entry].value = event.user_agent ; + hdr_entry++; + + hdrs.entry[hdr_entry].key = "Content-Type" ; + hdrs.entry[hdr_entry].value = "application/json" ; + hdr_entry++; + + hdrs.entry[hdr_entry].key = "Accept" ; + hdrs.entry[hdr_entry].value = "application/json" ; + hdr_entry++; + + if ( event.request != KEYSTONE_GET_TOKEN ) + { + hdrs.entry[hdr_entry].key = "X-Auth-Token" ; + hdrs.entry[hdr_entry].value = tokenUtil_get_ptr()->token ; + hdr_entry++; + } + + hdrs.entry[hdr_entry].key = "Connection" ; + hdrs.entry[hdr_entry].value = "close" ; + hdr_entry++; + hdrs.entries = hdr_entry ; + + /* Add the headers */ + if ( httpUtil_header_add ( &event, &hdrs )) + { + event.status = FAIL_HEADER_ADD ; + goto httpUtil_api_request_done ; + } + + /* get some timestamps and log the request */ + snprintf (&event.req_str[0], MAX_API_LOG_LEN-1, + "\n%s [%5d] %s %s '%s' seq:%d -> Address : %s:%d %s %s ... %s", + pt(), getpid(), + event.hostname.c_str(), + event.service.c_str(), + event.operation.c_str(), + event.sequence, event.ip.c_str(), event.port, + getHttpCmdType_str( event.type ), + event.address.c_str(), + event.information.c_str()); + + gettime ( event.send_time ); + gettime ( event.done_time ); /* create a valid done value */ + + if ( event.request == KEYSTONE_GET_TOKEN ) + { + path = MTC_POST_KEY_LABEL ; + event.address = path ; + event.prefix_path += path; + hlog ("%s Keystone Internal Address : %s\n", event.hostname.c_str(), event.prefix_path.c_str()); + event.status = evhttp_make_request ( event.conn, event.req, event.type, event.prefix_path.data()); + } + else + { + event.status = evhttp_make_request ( event.conn, event.req, event.type, event.address.data()); + } + daemon_signal_hdlr (); + if ( event.status == PASS ) + { + string label = event.log_prefix ; + label.append (" - "); + label.append (event.operation); + + /* O.K we are commited to making the request */ + free_key = false ; + + evhttp_connection_set_timeout(event.conn, event.timeout); + + httpUtil_latency_log ( event, HTTPUTIL_SCHED_MON_START,__LINE__, 0 ); + + /* Default to retry for both blocking and non-blocking command */ + event.status = RETRY ; + if ( event.blocking == true ) + { + hlog ("%s Requested (blocking) (to:%d)\n", event.log_prefix.c_str(), event.timeout); + + /* Send the message with timeout */ + event_base_dispatch(event.base); + httpUtil_latency_log ( event, label.c_str(), __LINE__, MAX_DELAY_B4_LATENCY_LOG ); + goto httpUtil_api_request_done ; + } + else if ( event.request == KEYSTONE_GET_TOKEN ) + { + httpUtil_event_info (event); + event.active = true ; + event.status = event_base_loop(event.base, EVLOOP_NONBLOCK); + httpUtil_latency_log ( event, label.c_str(), __LINE__, MAX_DELAY_B4_LATENCY_LOG ); /* Should be immediate ; non blocking */ + return (event.status); + // goto httpUtil_api_request_done ; + } + else + { + hlog ("%s Requested (blocking) (to:%d)\n", event.log_prefix.c_str(), event.timeout ); + event_base_dispatch(event.base); + httpUtil_latency_log ( event, label.c_str(), __LINE__, MAX_DELAY_B4_LATENCY_LOG ) ; + goto httpUtil_api_request_done ; + } + } + else + { + elog ("%s Call to 'evhttp_make_request' failed (rc:%d)\n", + event.hostname.c_str(), event.status); + } + +httpUtil_api_request_done: + + httpUtil_free_conn ( event ); + httpUtil_free_base ( event ); + + /* If the request fails then delete the key here */ + if ( free_key ) + { + keyValObject.del_key ((unsigned long)event.base) ; + } + + return (event.status); +} + + +void httpUtil_event_info ( libEvent & event ) +{ + ilog ("%s request to %s.%d Status:%d \n", + event.log_prefix.c_str(), + event.ip.c_str(), + event.port, + event.status); + if ( event.request == KEYSTONE_GET_TOKEN ) + { + ilog ("--- Address : %s\n", event.prefix_path.c_str()); + } + else + { + ilog ("--- Address : %s\n", event.address.c_str()); + } + ilog ("--- Payload : %s\n", event.payload.c_str()); + ilog ("--- Response: %s\n", event.response.c_str()); + ilog ("--- TokenUrl: %s\n", event.token.url.c_str()); +} + +void httpUtil_log_event ( libEvent * event_ptr ) +{ + string event_sig = daemon_get_cfg_ptr()->debug_event ; + msgSock_type * mtclogd_ptr = get_mtclogd_sockPtr (); + + send_log_message ( get_mtclogd_sockPtr(), event_ptr->hostname.data(), &rest_api_filename[0], &event_ptr->req_str[0] ); + + if ( event_ptr->request == KEYSTONE_GET_TOKEN ) + { + jlog1 ("%s seq:%d -> %s:%d %s %s http status: %d ... %s\n", + event_ptr->log_prefix.c_str(), + event_ptr->sequence, + event_ptr->ip.c_str(), + event_ptr->port, + getHttpCmdType_str( event_ptr->type ), + event_ptr->prefix_path.c_str(), + event_ptr->http_status, + event_ptr->information.c_str()); + } + else + { + jlog1 ("%s seq:%d -> %s:%d %s %s http status: %d ... %s\n", + event_ptr->log_prefix.c_str(), + event_ptr->sequence, + event_ptr->ip.c_str(), + event_ptr->port, + getHttpCmdType_str( event_ptr->type ), + event_ptr->address.c_str(), + event_ptr->http_status, + event_ptr->information.c_str()); + } + + if (!event_ptr->payload.empty()) + { + if ((!string_contains(event_ptr->payload,"token")) && + (!string_contains(event_ptr->payload,"assword"))) + { + snprintf (&rest_api_log_str[0], MAX_API_LOG_LEN-1, + "%s [%5d] %s seq:%d -> Payload : %s", + pt(), getpid(), event_ptr->log_prefix.c_str(), event_ptr->sequence, event_ptr->payload.c_str() ); + } + else + { + snprintf (&rest_api_log_str[0], MAX_API_LOG_LEN-1, + "%s [%5d] %s seq:%d -> Payload : ... contains private content ...", + pt(), getpid(), event_ptr->log_prefix.c_str(), event_ptr->sequence ); + } + send_log_message ( mtclogd_ptr, event_ptr->hostname.data(), &rest_api_filename[0], &rest_api_log_str[0] ); + } + + if ( !event_ptr->response.empty() ) + { + if ((!string_contains(event_ptr->response,"token")) && + (!string_contains(event_ptr->response,"assword"))) + { + snprintf (&rest_api_log_str[0], MAX_API_LOG_LEN-1, + "%s [%5d] %s seq:%d -> Response: %s", + pt(), getpid(), event_ptr->log_prefix.c_str(), event_ptr->sequence, event_ptr->response.c_str() ); + } + else + { + snprintf (&rest_api_log_str[0], MAX_API_LOG_LEN-1, + "%s [%5d] %s seq:%d -> Response: ... contains private content ...", + pt(), getpid(), event_ptr->log_prefix.c_str(), event_ptr->sequence ); + } + send_log_message ( mtclogd_ptr, event_ptr->hostname.data(), rest_api_filename, &rest_api_log_str[0] ); + } + + snprintf (&rest_api_log_str[0], MAX_API_LOG_LEN-1, + "%s [%5d] %s %s '%s' seq:%d -> Status : %d {execution time %ld.%06ld secs}\n", + pt(), getpid(), + event_ptr->hostname.c_str(), + event_ptr->service.c_str(), + event_ptr->operation.c_str(), + event_ptr->sequence, + event_ptr->http_status, + event_ptr->diff_time.secs, + event_ptr->diff_time.msecs ); + + if (( event_ptr->diff_time.secs > 2 ) || (event_ptr->http_status != HTTP_OK ) ) + { + int len = strlen (rest_api_log_str) ; + snprintf (&rest_api_log_str[len-1], 20, " <---------"); + } + + send_log_message ( mtclogd_ptr, event_ptr->hostname.data(), &rest_api_filename[0], &rest_api_log_str[0] ); +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/httpUtil.h b/mtce-common/cgts-mtce-common-1.0/common/httpUtil.h new file mode 100644 index 00000000..294f2a51 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/httpUtil.h @@ -0,0 +1,346 @@ +#ifndef __INCLUDE_HTTPUTIL_H__ +#define __INCLUDE_HTTPUTIL_H__ + +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include /* for ... string */ +#include /* for ... http libevent client */ +#include +#include + +using namespace std; + +#include "nodeBase.h" +#include "timeUtil.h" /* for ... time_delta_type */ + +/* HTTP Error Codes with no specific existing define MACRO */ +#define MTC_HTTP_BAD_REQUEST 400 +#define MTC_HTTP_UNAUTHORIZED 401 +#define MTC_HTTP_FORBIDDEN 403 +#define MTC_HTTP_CONFLICT 409 +#define MTC_HTTP_LENGTH_REQUIRED 411 +#define MTC_HTTP_NORESPONSE 444 +#define MTC_HTTP_UNPROCESSABLE_ENTITY 422 + +#define MTC_HTTP_ACCEPTED 202 + +#define EVENT_METHODS (EVHTTP_REQ_PATCH | \ + EVHTTP_REQ_POST | \ + EVHTTP_REQ_GET | \ + EVHTTP_REQ_PUT | \ + EVHTTP_REQ_DELETE) + +/** Maximum libevent response message size in bytes. */ +// #define MAX_EVENT_LEN (163840) +#define MAX_URL_LEN (200) + +#define HTTP_VIM_TIMEOUT (20) + +#define HTTP_MAX_RETRIES (3) + +#define HTTP_SYSINV_CRIT_TIMEOUT (20) +#define HTTP_SYSINV_NONC_TIMEOUT (10) + +#define HTTP_TOKEN_TIMEOUT (15) +#define HTTP_KEYSTONE_GET_TIMEOUT (10) +#define HTTP_SMGR_TIMEOUT (20) +#define HTTP_VIM_TIMEOUT (20) + +#define SMGR_MAX_RETRIES (3) + +#define CLIENT_HEADER "User-Agent" +#define CLIENT_SYSINV_1_0 "sysinv/1.0" +#define EVENT_SERVER "HTTP Event Server" + +#define SMGR_EVENT_SIG "smgrEvent" +#define SYSINV_EVENT_SIG "sysinvEvent" + +#define KEYSTONE_SIG "token" +#define SENSOR_SIG "sensor" +#define SYSINV_SIG "sysinv" +#define SMGR_SIG "smgr" +#define VIM_SIG "vim" + +#define SYSINV_OPER__LOAD_HOST "load host" +#define SYSINV_OPER__UPDATE_TASK "update task" +#define SYSINV_OPER__FORCE_TASK "force task" +#define SYSINV_OPER__UPDATE_UPTIME "update uptime" +#define SYSINV_OPER__UPDATE_VALUE "update value" +#define SYSINV_OPER__UPDATE_STATE "update state" +#define SYSINV_OPER__UPDATE_STATES "update states" +#define SYSINV_OPER__FORCE_STATES "force states" +#define SYSINV_OPER__CONFIG_SHOW "config show" +#define SYSINV_OPER__CONFIG_MODIFY "config modify" + +#define VIM_HOST__DISABLED "disabled" +#define VIM_HOST__ENABLED "enabled" +#define VIM_HOST__OFFLINE "offline" +#define VIM_HOST__FAILED "failed" + +/** The workQueue_process FSM states */ +typedef enum { + HTTP__TRANSMIT = 0, + HTTP__RECEIVE_WAIT = 1, + HTTP__RECEIVE = 2, + HTTP__FAILURE = 3, + HTTP__DONE_FAIL = 4, + HTTP__DONE_PASS = 5, + HTTP__STAGES = 6 +} httpStages_enum ; + +#define HTTP_RECEIVE_WAIT_MSEC (10) + +typedef struct +{ + string url ; /**< Keystone server URL string */ + string issued ; /**< Timestamp token was issued */ + string expiry ; /**< Timestamp when token is expired */ + string token ; /**< The huge 3kb token */ + bool refreshed; /**< set true when refreshed */ + bool delay ; /**< trigger renew with small delay + error renewal - flood avoidance */ +} keyToken_type ; + +/** All supported Request Type Enums */ +typedef enum { + SERVICE_NONE, + + SYSINV_ADD, + SYSINV_GET, + SYSINV_HOST_QUERY, + SYSINV_UPDATE, + + SYSINV_CONFIG_SHOW, + SYSINV_CONFIG_MODIFY, + + SYSINV_SENSOR_LOAD, + SYSINV_SENSOR_LOAD_GROUPS, + SYSINV_SENSOR_LOAD_GROUP, + SYSINV_SENSOR_ADD, + SYSINV_SENSOR_ADD_GROUP, + SYSINV_SENSOR_DEL, + SYSINV_SENSOR_DEL_GROUP, + SYSINV_SENSOR_MOD, + SYSINV_SENSOR_MOD_GROUP, + SYSINV_SENSOR_GROUP_SENSORS, + + VIM_UPDATE, + VIM_HOST_DISABLED, + VIM_HOST_ENABLED, + VIM_HOST_OFFLINE, + VIM_HOST_FAILED, + VIM_DPORT_FAILED, + VIM_DPORT_CLEARED, + VIM_DPORT_DEGRADED, + VIM_DPORT_OFFLINE, + VIM_HOST_QUERY, + + VIM_HOST_STATE_QUERY, + VIM_HOST_INSTANCE_QUERY, + VIM_HOST_INSTANCE_FAILED, + VIM_HOST_INSTANCE_STATUS, + VIM_HOST_INSTANCE_NOTIFY, + + SMGR_START_SWACT, + SMGR_QUERY_SWACT, + SMGR_HOST_UNLOCKED, + SMGR_HOST_LOCKED, + SMGR_HOST_ENABLED, + SMGR_HOST_DISABLED, + + KEYSTONE_TOKEN, + KEYSTONE_GET_TOKEN, + KEYSTONE_GET_SERVICE_LIST, + KEYSTONE_GET_ENDPOINT_LIST, + + TEST_WITH_NO_TOKEN, + TEST_WITH_TOKEN, + + SERVICE_LAST +} libEvent_enum ; + + +/** Local event control structure for REST API services + * + * Nova, Neutron, Keystone and Inventory + * + */ +struct libEvent +{ + /** Execution Controls */ + httpStages_enum state ; /**< This http request FSM state */ + int sequence ; /**< Event sequence number */ + bool mutex ; /**< single operation at a time */ + bool active ; /**< true if waiting on response */ + int stuck ; /**< Count mutex active stuck state */ + bool blocking ; /**< true if command is blocking */ + bool found ; /**< true if query was found */ + int timeout ; /**< Request timeout */ + int count ; /**< retry recover counter */ + int fails ; /**< fail counter */ + int retries ; /**< number of retries on failure*/ + int cur_retries ; + int max_retries ; + bool noncritical ; /**< true: event is non-ctitical */ + int rx_retry_cnt ; /**< help avoid infinite rx retry*/ + int rx_retry_max ; /**< each cmd can have a max */ + /* HTTP request Info */ + enum evhttp_cmd_type type; /**< HTTP Request Type ; PUT/GET */ + struct event_base *base; /**< libEvent API service base */ + struct evhttp_connection *conn; /**< HTTP connection ptr */ + struct evhttp_request *req ; /**< HTTP request ptr */ + struct evbuffer *buf ; /**< HTTP output buffer ptr */ + struct evbuffer_ptr evp ; /**< HTTP output buffer ptr */ + + string log_prefix ; /**< log prefix for this event */ + + /** Service Specific Request Info */ + libEvent_enum request ; /**< Specify the request command */ + keyToken_type token ; /**< Copy of the active token */ + string service ; /**< Service being executed */ + string hostname ; /**< Target hostname */ + string uuid ; /**< The UUID for this request */ + string new_uuid ; /**< The UUID created & returned */ + string ip ; /**< Server IP address */ + int port ; /**< Server port number */ + string operation ; /**< Specify the operation */ + string information ; + string key ; + string value ; + string prefix_path ; + string label ; /**< typically a response label */ + string entity_path ; /**< HTTP entity request string */ + string entity_path_next ; /**< next entity request string */ + string address ; /**< http url address */ + string payload ; /**< the request's payload */ + string user_agent ; /**< set the User-Agent header */ + + /** Result Info */ + int status ; /**< Execution Status */ + int http_status ; /**< raw http returned status */ + int exec_time_msec ; /**< execution time in msec */ + node_inv_type inv_info ; + size_t response_len ; /**< the json response length */ + string response ; /**< the json response string */ + string result ; /**< Command specific result str */ + + /* Endpoint strings */ + string admin_url ; + string internal_url ; + string public_url ; + + time_debug_type send_time ; /**< Request Dispatch Timestamp */ + time_debug_type done_time ; /**< Response Handler Timestamp */ + time_delta_type diff_time ; /**< how long the command handling took */ + + bool low_wm ; + bool med_wm ; + bool high_wm ; + + int (*handler) (struct libEvent &) ; + + char req_str[MAX_API_LOG_LEN] ; + + unsigned long long prev_time ; /* latency log candidate start (prev) time */ + unsigned long long this_time ; /* ... end (now or this) time */ + +} ; + + +typedef struct +{ + struct event_base * base_ptr ; + struct libEvent * event_ptr ; +} event_base_pair_type ; + +typedef struct +{ + int elements ; + list pair_list ; +} event_base_list_type ; + + + +/** Maximum number of headers that can be added to an HTTP message. */ +#define MAX_HEADERS (10) + +/** A header entry type. */ +typedef struct +{ + string key ; /**< the header label. */ + string value ; /**< the header value. */ +} http_header_entry_type; + +/** The header entry table. */ +typedef struct +{ + int entries ; /**< Number of entries in the header table. */ + http_header_entry_type entry[MAX_HEADERS]; /**< entry array. */ +} http_headers_type ; + +void httpUtil_init ( void ); + +int httpUtil_event_init ( libEvent * ptr , + string hostname, + string service, + string ip, + int port ); + +/** Add payload to the HTTP message body. */ +int httpUtil_payload_add ( libEvent & event ); + +/** Add all headers in header table to the HTTP connection message. */ +int httpUtil_header_add ( libEvent * ptr, http_headers_type * hdrs_ptr ); + +/** Create an HTTP request. */ +int httpUtil_request_make ( libEvent * ptr, enum evhttp_cmd_type type, string path ); + +/** Open a connection to an HTTP server. */ +int httpUtil_connect ( libEvent & event ); + +/** Get a new HTTP request pointer. */ +int httpUtil_request ( libEvent & event, + void(*hdlr)(struct evhttp_request *, void *)); + +/** Common REST API Request Utility */ +int httpUtil_api_request ( libEvent & event ); + +/** Common REST API Request Utility */ +int httpUtil_request ( libEvent & event , bool block, + void(*hdlr)(struct evhttp_request *, void *)); + +/** Common REST API Receive Utility for non-blocking requests */ +int httpUtil_receive ( libEvent & event ); + +/** HTTP response status checker */ +int httpUtil_status ( libEvent & event ); + +/** Free the libEvent */ +void httpUtil_free_base ( libEvent & event ); + +/** Free the event lib connection */ +void httpUtil_free_conn ( libEvent & event ); + +/** TODO: FIXME: Get the payload string length. */ +string httpUtil_payload_len ( libEvent * ptr ); + +/** Get the length of the json response */ +int httpUtil_get_length ( libEvent & event ); + +/** Load the json response into the event struct */ +int httpUtil_get_response ( libEvent & event ); + +/** print event filtered event */ +void httpUtil_log_event ( libEvent * event ); + +void httpUtil_event_info ( libEvent & event ); + +const char * getHttpCmdType_str ( evhttp_cmd_type type ); + + +#endif /* __INCLUDE_HTTPUTIL_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/common/ipmiUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/ipmiUtil.cpp new file mode 100644 index 00000000..7cc0e05b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/ipmiUtil.cpp @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + * + * + * @file + * Wind River Titanium Cloud Common IPMI Utilities + */ +#include +#include +#include + +using namespace std; + +#include "nodeBase.h" /* for ... mtce node common definitions */ +#include "hostUtil.h" /* for ... mtce host common definitions */ +#include "ipmiUtil.h" /* for ... this module header */ +#include "nodeClass.h" /* for ... */ + +/* Create a randomly named password filename */ +void ipmiUtil_create_pw_fn ( thread_info_type * info_ptr, string pw ) +{ + info_ptr->password_file.clear (); + string password_tempfile = IPMITOOL_OUTPUT_DIR ; + password_tempfile.append(".") ; + password_tempfile.append(program_invocation_short_name); + password_tempfile.append("-"); + password_tempfile.append(info_ptr->hostname); + password_tempfile.append("-"); + + info_ptr->pw_file_fd = hostUtil_mktmpfile (info_ptr->hostname, + password_tempfile, + info_ptr->password_file, + pw ); + if ( info_ptr->pw_file_fd <= 0 ) + { + info_ptr->status_string = "failed to get an open temporary password filedesc" ; + info_ptr->status = FAIL_FILE_CREATE ; + info_ptr->password_file.clear(); + } + else + { + /* clean-up */ + if ( info_ptr->pw_file_fd > 0 ) + close(info_ptr->pw_file_fd); + info_ptr->pw_file_fd = 0 ; + } +} + +/* Create the ipmitool output_filename */ +string ipmiUtil_create_data_fn ( string & hostname, string file_suffix ) +{ + /* create the output filename */ + string ipmitool_datafile = IPMITOOL_OUTPUT_DIR ; + ipmitool_datafile.append(program_invocation_short_name); + ipmitool_datafile.append("_"); + ipmitool_datafile.append(hostname); + + /* add the sensor list command */ + ipmitool_datafile.append(file_suffix); + + return ( ipmitool_datafile ); +} + +/* Create the ipmi request */ +string ipmiUtil_create_request ( string cmd, string & ip, string & un, string & pw, string & out ) +{ + /* ipmitool -I lanplus -H $uut_ip -U $uut_un -E */ + /* build the ipmitool command */ + string ipmitool_request = IPMITOOL_PATH_AND_FILENAME ; + + /* Specify lanplus network mode for centralized power control, 1 retry + * followed by the bm ip address and password file */ + ipmitool_request.append(" -I lanplus -R 1 -H "); + ipmitool_request.append(ip); + + /* then specify the bmc username */ + ipmitool_request.append(" -U "); + ipmitool_request.append(un); + + if ( daemon_is_file_present ( MTC_CMD_FIT__LOUD_BM_PW ) == true ) + { + /* get the password from the file and put it on the command line */ + ipmitool_request.append(" -P "); + ipmitool_request.append(daemon_get_file_str(pw.data())); + } + else + { + /* add the password file option and file */ + ipmitool_request.append(" -f "); + ipmitool_request.append(pw); + } + + /* add the command */ + ipmitool_request.append(" "); + ipmitool_request.append(cmd); + + /* output filename */ + ipmitool_request.append (" > "); + ipmitool_request.append (out); + + return (ipmitool_request); +} + +/* init the mc info struct */ +void ipmiUtil_mc_info_init ( mc_info_type & mc_info ) +{ + mc_info.device_id.clear(); + mc_info.manufacturer_name.clear(); + mc_info.manufacturer_id.clear(); + mc_info.product_name.clear(); + mc_info.product_id.clear(); + mc_info.fw_version.clear(); + mc_info.hw_version.clear(); +} + +/* print a log of the mc info data */ +void mc_info_log ( string hostname, mc_info_type & mc_info, int rc ) +{ + if ( rc ) + { + elog ("%s mc info load failed (rc:%d)\n", hostname.c_str(), rc ); + } + else + { + ilog ("%s Manufacturer: %s [id:%s] [ Device: %s ver %s ]\n", + hostname.c_str(), + mc_info.manufacturer_name.c_str(), + mc_info.manufacturer_id.c_str(), + mc_info.device_id.c_str(), + mc_info.hw_version.c_str()); + + ilog ("%s Product Name: %s [id:%s] [ BMC FW: ver %s ]\n", + hostname.c_str(), + mc_info.product_name.c_str(), + mc_info.product_id.c_str(), + mc_info.fw_version.c_str()); + } +} + +/* load the specified key value in buffer line into 'value' */ +bool _got_delimited_value ( char * buf_ptr, const char * key, const char * delimiter, string & value ) +{ + if ( strstr ( buf_ptr, key )) + { + string _str = buf_ptr ; + if ( _str.find(key) != std::string::npos ) + { + if ( _str.find( delimiter ) != std::string::npos ) + { + int y = _str.find( delimiter ) ; + value = _str.substr ( y+strlen(delimiter), std::string::npos) ; + value.erase ( value.size()-1, std::string::npos ) ; + return (true); + } + } + } + return (false); +} + +/***************************************************************************** + * + * Name : ipmiUtil_mc_info_load + * + * Description: Load the contents of a file containing an ipmitool formatted + * output from an mc info request into the passed in mc_info + * struct. Loaded info includes + * + * Manufacturer (id/name) + * Product (id/name) + * Device (id/version) + * Firmware (version) + * + * A log like the following is generated. + * + * controller-0 mc info: Nokia:7244 - Quanta:12866 (0x3242) [bmc fw:3.29] [device:32 ver:1] + * + * Example MC Info output from ipmitool + * + * Device ID : 32 + * Device Revision : 1 + * Firmware Revision : 3.29 + * IPMI Version : 2.0 + * Manufacturer ID : 7244 + * Manufacturer Name : Nokia + * Product ID : 12866 (0x3242) + * Product Name : Quanta + * Device Available : yes + * Provides Device SDRs : no + * Additional Device Support : + * Sensor Device + * SDR Repository Device + * SEL Device + * FRU Inventory Device + * Chassis Device + * + **************************************************************************/ + +#define BUFFER (80) +int ipmiUtil_mc_info_load ( string hostname, const char * filename, mc_info_type & mc_info ) +{ + int rc = FAIL ; + ipmiUtil_mc_info_init ( mc_info ); + if ( daemon_is_file_present ( filename ) ) + { + FILE * _stream = fopen ( filename, "r" ); + if ( _stream ) + { + char buffer [BUFFER]; + MEMSET_ZERO(buffer); + while ( fgets (buffer, BUFFER, _stream) ) + { + if ( _got_delimited_value ( buffer, MC_INFO_LABEL_FW_VERSION, MC_INFO_LABEL_DELIMITER, mc_info.fw_version )) + { + rc = PASS ; + continue; + } + if ( _got_delimited_value ( buffer, MC_INFO_LABEL_HW_VERSION, MC_INFO_LABEL_DELIMITER, mc_info.hw_version )) + continue; + if ( _got_delimited_value ( buffer, MC_INFO_LABEL_DEVICE_ID, MC_INFO_LABEL_DELIMITER, mc_info.device_id )) + continue; + if ( _got_delimited_value ( buffer, MC_INFO_LABEL_PRODUCT_ID, MC_INFO_LABEL_DELIMITER, mc_info.product_id )) + continue; + if ( _got_delimited_value ( buffer, MC_INFO_LABEL_PRODUCT_NAME, MC_INFO_LABEL_DELIMITER, mc_info.product_name )) + continue; + if ( _got_delimited_value ( buffer, MC_INFO_LABEL_MANUFACTURE_ID, MC_INFO_LABEL_DELIMITER, mc_info.manufacturer_id )) + continue; + if ( _got_delimited_value ( buffer, MC_INFO_LABEL_MANUFACTURE_NAME, MC_INFO_LABEL_DELIMITER, mc_info.manufacturer_name )) + continue; + else + blog3 ("buffer: %s\n", buffer ); + MEMSET_ZERO(buffer); + } + fclose(_stream); + } + } + else + { + elog ("%s failed to open mc info file '%s'\n", hostname.c_str(), filename); + rc = FAIL_FILE_ACCESS ; + } + + mc_info_log ( hostname, mc_info, rc ); + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/ipmiUtil.h b/mtce-common/cgts-mtce-common-1.0/common/ipmiUtil.h new file mode 100644 index 00000000..3a4c650f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/ipmiUtil.h @@ -0,0 +1,102 @@ +#ifndef __INCLUDE_IPMIUTIL_H__ +#define __INCLUDE_IPMIUTIL_H__ + +/* + * Copyright (c) 2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud's Maintenance Common IPMI Utilities Header + */ + +#include "nodeBase.h" /* for ... */ +#include "threadUtil.h" /* for ... thread utilities */ + +#define MC_INFO_LABEL_DELIMITER ((const char *)(": ")) +#define MC_INFO_LABEL_FW_VERSION ((const char *)("Firmware Revision")) +#define MC_INFO_LABEL_HW_VERSION ((const char *)("Device Revision")) +#define MC_INFO_LABEL_DEVICE_ID ((const char *)("Device ID")) +#define MC_INFO_LABEL_PRODUCT_ID ((const char *)("Product ID")) +#define MC_INFO_LABEL_PRODUCT_NAME ((const char *)("Product Name")) +#define MC_INFO_LABEL_MANUFACTURE_ID ((const char *)("Manufacturer ID")) +#define MC_INFO_LABEL_MANUFACTURE_NAME ((const char *)("Manufacturer Name")) + +#define IPMITOOL_POWER_RESET_CMD ((const char *)("chassis power reset")) +#define IPMITOOL_POWER_RESET_RESP ((const char *)("Chassis Power Control: Reset")) + +#define IPMITOOL_POWER_OFF_CMD ((const char *)("chassis power off")) +#define IPMITOOL_POWER_OFF_RESP ((const char *)("Chassis Power Control: Down/Off")) + +#define IPMITOOL_POWER_ON_CMD ((const char *)("chassis power on")) +#define IPMITOOL_POWER_ON_RESP ((const char *)("Chassis Power Control: Up/On")) + +#define IPMITOOL_POWER_CYCLE_CMD ((const char *)("chassis power cycle")) +#define IPMITOOL_POWER_CYCLE_RESP ((const char *)("Chassis Power Control: Cycle")) + +#define IPMITOOL_POWER_STATUS_CMD ((const char *)("chassis power status")) +#define IPMITOOL_POWER_ON_STATUS ((const char *)("Chassis Power is on")) +#define IPMITOOL_POWER_OFF_STATUS ((const char *)("Chassis Power is off")) + +#define IPMITOOL_RESTART_CAUSE_CMD ((const char *)("chassis restart_cause")) + +#define IPMITOOL_MC_INFO_CMD ((const char *)("mc info")) + +#define IPMITOOL_CMD_FILE_SUFFIX ((const char *)("_power_cmd_result")) +#define IPMITOOL_MC_INFO_FILE_SUFFIX ((const char *)("_mc_info")) +#define IPMITOOL_RESTART_CAUSE_FILE_SUFFIX ((const char *)("_restart_cause")) +#define IPMITOOL_POWER_STATUS_FILE_SUFFIX ((const char *)("_power_status")) + +#define IPMITOOL_MAX_RECV_RETRIES (10) + +/* Warning : Changes here require 'mtc_ipmiRequest_str' string array to be updated */ +typedef enum +{ + IPMITOOL_THREAD_CMD__NULL = 0, + IPMITOOL_THREAD_CMD__POWER_RESET, + + IPMITOOL_THREAD_CMD__POWER_ON, + IPMITOOL_THREAD_CMD__POWER_OFF, + IPMITOOL_THREAD_CMD__POWER_CYCLE, + + IPMITOOL_THREAD_CMD__MC_INFO, + IPMITOOL_THREAD_CMD__POWER_STATUS, + IPMITOOL_THREAD_CMD__RESTART_CAUSE, + + IPMITOOL_THREAD_CMD__READ_SENSORS, + + IPMITOOL_THREAD_CMD__LAST + +} ipmitool_cmd_enum ; + +const char * getIpmiCmd_str ( int command ); +const char * getIpmiAction_str ( int command ); + + +typedef struct +{ + std::string product_name ; + std::string product_id ; + std::string manufacturer_name ; + std::string manufacturer_id ; + std::string device_id ; + std::string fw_version ; + std::string hw_version ; +} mc_info_type ; + +int ipmiUtil_mc_info_load ( string hostname, const char * filename, mc_info_type & mc_info ); +void ipmiUtil_mc_info_init ( mc_info_type & mc_info ); + +/* Create a randomly named password filename */ +void ipmiUtil_create_pw_fn ( thread_info_type * info_ptr, string pw ); + +/* Create the ipmitool output_filename in info_ptr->password_file */ +string ipmiUtil_create_data_fn ( string & hostname, string file_suffix ); + +/* Create the ipmi request */ +string ipmiUtil_create_request ( string cmd, string & ip, string & un, string & pw, string & out ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/common/jsonUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/jsonUtil.cpp new file mode 100644 index 00000000..ee84ce7c --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/jsonUtil.cpp @@ -0,0 +1,1260 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance JSON Utilities + */ + +#include +#include +#include +#include +#include /* for ... json-c json string parsing */ +#include + +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "jsn" + +#include "nodeClass.h" +#include "nodeUtil.h" +#include "jsonUtil.h" /* JSON Utilities */ + +/* Internal Private Interfaces */ +static struct json_object * _json_verify_object ( struct json_object * obj, + const char * label); +static struct json_object * _json_get_host_next ( struct json_object * obj ); +static struct json_object * _json_object_array_get_idx ( struct json_object * obj, + int index ); +static int _json_get_object_number ( struct json_object * obj ); +static string _json_get_key_value_string ( struct json_object * obj, + const char * key ); + +/* init one element of the struct */ +void jsonUtil_init ( jsonUtil_info_type & info , int index ) +{ + node_inv_init ( info.host[index] ) ; +} + +/* init the entire struct */ +void jsonUtil_init ( jsonUtil_info_type & info ) +{ + info.elements = 0 ; + for ( int i = 0 ; i < MAX_JSON_INV_GET_HOST_NUM ; i++ ) + { + jsonUtil_init ( info , i ) ; + } + info.next = "" ; +} + +/* Validate the supplied label is in the specified object */ +static struct json_object * _json_verify_object ( struct json_object * obj, + const char * label ) +{ + struct json_object * req_obj = (struct json_object *)(NULL); + + json_bool status = json_object_object_get_ex (obj, label, &req_obj); + if (( status == TRUE ) && ( req_obj )) + { + return (req_obj); + } + wlog ("Specified label '%s' not found in response\n", label ); + status = json_object_object_get_ex (obj, "error", &req_obj ); + if (( status == TRUE ) && ( req_obj )) + { + elog ("Found 'error' label instead\n"); + } + else + { + elog ("Neither specified nor error label found in object\n"); + } + return ((struct json_object *)(NULL)) ; +} + +static struct +json_object * _json_get_host_next ( struct json_object * obj ) +{ + /* Get the next host entity path */ + struct json_object * next_obj = (struct json_object *)(NULL); + json_bool status = json_object_object_get_ex(obj, MTC_JSON_INV_NEXT, &next_obj ); + if (( status == TRUE ) && ( next_obj )) + { + return (next_obj); + } + else + { + return ((struct json_object *)(NULL)) ; + } +} + + +/* Get the json host info object */ +static struct +json_object * _json_object_array_get_idx ( struct json_object * obj, int index ) +{ + /* Get the json host array list ; there should be one since + * we read inventory one host at a time */ + struct array_list *array_list_obj = json_object_get_array(obj); + if ( array_list_obj ) + { + int len = array_list_length (array_list_obj); + if ( len == 0 ) + { + ilog ( "No provisioned hosts\n"); + } + else if ( index < len ) + { + struct json_object *node_obj ; + node_obj = json_object_array_get_idx (obj, index ); + if ( node_obj ) + { + return ( node_obj ); + } + else + { + elog ("No json host info object\n"); + } + } + else + { + elog ("No json host for requested index\n"); + } + } + else + { + elog ("No json host array list\n"); + } + + return ((struct json_object *)(NULL)) ; +} + +/* return the number of array objects in the specified object */ +static int _json_get_object_number ( struct json_object * obj ) +{ + /* Get number of elements in the json host array list */ + struct array_list *array_list_obj = json_object_get_array(obj); + if ( array_list_obj ) + { + return ( array_list_length (array_list_obj)); + } + return (0); +} + +string _json_get_key_value_string ( struct json_object * obj, const char * key ) +{ + std::string value = "" ; + + /* Get the node uuid */ + struct json_object * key_obj = (struct json_object *)(NULL); + json_bool status = json_object_object_get_ex(obj, key, &key_obj ); + if ( ( status == TRUE ) && ( key_obj )) + { + value.append(json_object_get_string(key_obj)); + } + else + { + value.append("none"); + } + return ( value ); +} + +string jsonUtil_get_key_value_string ( struct json_object * obj, const char * key ) +{ + return (_json_get_key_value_string ( obj, key )); +} + +int jsonUtil_get_key_value_int ( struct json_object * obj, const char * key ) +{ + int value = 0 ; + + /* Get the node uuid */ + struct json_object * key_obj = (struct json_object *)(NULL); + json_bool status = json_object_object_get_ex(obj, key, &key_obj); + if ( (status == TRUE ) && ( key_obj )) + { + value = json_object_get_int(key_obj); + } + return ( value ); +} + +bool jsonUtil_get_key_value_bool ( struct json_object * obj, const char * key ) +{ + bool value = false ; + + /* Get the node uuid */ + struct json_object * key_obj = (struct json_object *)(NULL); + json_bool status = json_object_object_get_ex(obj, key, &key_obj ); + if (( status == TRUE ) && ( key_obj )) + { + value = json_object_get_boolean(key_obj); + } + else + { + wlog ("failed to get key object\n"); + } + return ( value ); +} + +int jsonUtil_get_key_val ( char * json_str_ptr, + string key, + string & value ) +{ + value = "" ; + + /* init to null to avoid trap on early cleanup call with + * bad non-null default pointer value */ + struct json_object *raw_obj = (struct json_object *)(NULL); + + if ((json_str_ptr == NULL) || ( *json_str_ptr == '\0' ) || ( ! strncmp ( json_str_ptr, "(null)" , 6 ))) + { + elog ("Cannot tokenize a null json string\n"); + elog ("... json string: %s\n", json_str_ptr ); + return (FAIL); + } + + size_t len_before = strlen (json_str_ptr); + + jlog2 ("String: %s\n", json_str_ptr ); + + raw_obj = json_tokener_parse( json_str_ptr ); + if ( raw_obj ) + { + value = _json_get_key_value_string ( raw_obj, key.data() ) ; + jlog1 ("%s:%s\n", key.c_str(), value.c_str()); + } + else + { + size_t len_after = strlen (json_str_ptr); + + elog ("Unable to tokenize string (before:%ld after:%ld);\n", len_before, len_after); + elog ("... json string: %s\n", json_str_ptr ); + } + + if (raw_obj) + json_object_put(raw_obj); + +/* Sometimes gettibng an empty key is acceptable */ +// if ( value.empty() || !value.compare("none") ) +// { +// return (FAIL); +// } + + return (PASS); +} + + +/** This utility freads the passed in inventory GET request + * response json character string and performes the following + * operations with failure detection for each step. + * + * 1. tokenizes the passed in string + * 2. confirms string as valid inventory GET response + * 3. + * 4. + * + * @returns + * PASS if no inventory, + * RETRY if there is a next element + * FAIL if there was a error + * + * Parse and load the data from the json inv request */ +int jsonUtil_inv_load ( char * json_str_ptr, + jsonUtil_info_type & info ) +{ + int rc = PASS ; + + /* init to null to avoid trap on early cleanup call with + * bad non-null default pointer value */ + struct json_object *req_obj = (struct json_object *)(NULL); + struct json_object *raw_obj = (struct json_object *)(NULL); + struct json_object *node_obj = (struct json_object *)(NULL); + struct json_object *next_obj = (struct json_object *)(NULL); + + // printf ("String: <%s>\n", json_str_ptr ); + if (( json_str_ptr == NULL ) || ( *json_str_ptr == '\0' ) || + ( ! strncmp ( json_str_ptr, "(null)" , 6 ))) + { + elog ("Cannot tokenize a null json string\n"); + return (FAIL); + } + raw_obj = json_tokener_parse( json_str_ptr ); + if ( !raw_obj ) + { + elog ("No or invalid inventory GET response\n"); + rc = FAIL ; + goto cleanup ; + } + else + { + jlog1 ("%s\n", json_object_get_string(raw_obj)); + } + + /* Check response sanity */ + req_obj = _json_verify_object ( raw_obj, MTC_JSON_INV_LABEL ); + if ( !req_obj ) + { + elog ("Missing or Invalid JSON Inventory Object\n"); + rc = FAIL ; + goto cleanup ; + } + + /* Get the label used to request the next inventory element */ + next_obj = _json_get_host_next ( raw_obj ); + if ( !next_obj ) + { + info.next.clear(); + } + else + { + info.next.clear(); + info.next.append(json_object_get_string(next_obj)); + } + + /* Limit the amount of batched inventory that can be read at once */ + info.elements = _json_get_object_number ( req_obj ) ; + if ( info.elements > MAX_JSON_INV_GET_HOST_NUM ) + info.elements = MAX_JSON_INV_GET_HOST_NUM ; + + for ( int i = 0 ; i < info.elements ; i++ ) + { + node_obj = _json_object_array_get_idx ( req_obj, i ); + if ( !node_obj ) + { + wlog ("Host object index %d is not present.\n", i ); + rc = RETRY ; + goto cleanup ; + } + + /* Get all required fields */ + info.host[i].uuid = _json_get_key_value_string ( node_obj, MTC_JSON_INV_UUID ); + info.host[i].name = _json_get_key_value_string ( node_obj, MTC_JSON_INV_NAME ); + info.host[i].avail = _json_get_key_value_string ( node_obj, MTC_JSON_INV_AVAIL ); + info.host[i].admin = _json_get_key_value_string ( node_obj, MTC_JSON_INV_ADMIN ); + info.host[i].oper = _json_get_key_value_string ( node_obj, MTC_JSON_INV_OPER ); + info.host[i].mac = _json_get_key_value_string ( node_obj, MTC_JSON_INV_HOSTMAC ); + info.host[i].ip = _json_get_key_value_string ( node_obj, MTC_JSON_INV_HOSTIP ); + info.host[i].type = _json_get_key_value_string ( node_obj, MTC_JSON_INV_TYPE ); + info.host[i].func = _json_get_key_value_string ( node_obj, MTC_JSON_INV_FUNC ); + info.host[i].task = _json_get_key_value_string ( node_obj, MTC_JSON_INV_TASK ); + info.host[i].bm_ip = _json_get_key_value_string ( node_obj, MTC_JSON_INV_BMIP ); + info.host[i].bm_un = _json_get_key_value_string ( node_obj, MTC_JSON_INV_BMUN ); + info.host[i].bm_type = _json_get_key_value_string( node_obj, MTC_JSON_INV_BMTYPE ); + info.host[i].action = _json_get_key_value_string ( node_obj, MTC_JSON_INV_ACTION ); + info.host[i].uptime = _json_get_key_value_string ( node_obj, MTC_JSON_INV_UPTIME ); + info.host[i].oper_subf = _json_get_key_value_string ( node_obj, MTC_JSON_INV_OPER_SUBF ); + info.host[i].avail_subf = _json_get_key_value_string ( node_obj, MTC_JSON_INV_AVAIL_SUBF); + info.host[i].infra_ip = _json_get_key_value_string ( node_obj, MTC_JSON_INV_INFRAIP ); + + if ( info.host[i].uuid.length() != UUID_LEN ) + { + elog ("Failed to get json host uuid string\n"); + rc = FAIL; + } + if ( info.host[i].name.length() == 0 ) + { + elog ("Failed to get json host name string\n"); + rc = FAIL ; + } + // jsonUtil_print ( info, i ); + } + +cleanup: + + if (raw_obj) json_object_put(raw_obj); + if (req_obj) json_object_put(req_obj); + if (next_obj) json_object_put(next_obj); + if (node_obj) json_object_put(node_obj); + + return (rc); +} + +/* This handler does nothing but verify the + tokenization of the response */ +int jsonUtil_patch_load ( char * json_str_ptr, + node_inv_type & info ) +{ + /* init to null to avoid trap on early + * cleanup call with bad non null + * default pointer value */ + struct json_object *node_obj = (struct json_object *)(NULL); + + node_obj = json_tokener_parse( json_str_ptr ); + if ( !node_obj ) + { + elog ("No or invalid inventory PATCH response\n"); + return (FAIL); + } + + /* Get all required fields */ + info.uuid = _json_get_key_value_string ( node_obj, MTC_JSON_INV_UUID ); + info.name = _json_get_key_value_string ( node_obj, MTC_JSON_INV_NAME ); + info.avail = _json_get_key_value_string ( node_obj, MTC_JSON_INV_AVAIL ); + info.admin = _json_get_key_value_string ( node_obj, MTC_JSON_INV_ADMIN ); + info.oper = _json_get_key_value_string ( node_obj, MTC_JSON_INV_OPER ); + info.mac = _json_get_key_value_string ( node_obj, MTC_JSON_INV_HOSTMAC ); + info.ip = _json_get_key_value_string ( node_obj, MTC_JSON_INV_HOSTIP ); + info.type = _json_get_key_value_string ( node_obj, MTC_JSON_INV_TYPE ); + info.func = _json_get_key_value_string ( node_obj, MTC_JSON_INV_FUNC ); + info.task = _json_get_key_value_string ( node_obj, MTC_JSON_INV_TASK ); + info.bm_ip = _json_get_key_value_string ( node_obj, MTC_JSON_INV_BMIP ); + info.bm_un = _json_get_key_value_string ( node_obj, MTC_JSON_INV_BMUN ); + info.bm_type = _json_get_key_value_string( node_obj, MTC_JSON_INV_BMTYPE ); + info.action= _json_get_key_value_string ( node_obj, MTC_JSON_INV_ACTION ); + info.uptime= _json_get_key_value_string ( node_obj, MTC_JSON_INV_UPTIME ); + info.oper_subf = _json_get_key_value_string ( node_obj, MTC_JSON_INV_OPER_SUBF ); + info.avail_subf = _json_get_key_value_string ( node_obj, MTC_JSON_INV_AVAIL_SUBF); + info.infra_ip = _json_get_key_value_string ( node_obj, MTC_JSON_INV_INFRAIP ); + + if (node_obj) json_object_put(node_obj); + + return (PASS); +} + + + +/* Load up json_info with the contents of the json_str */ +int jsonUtil_load_host ( char * json_str_ptr, node_inv_type & info ) +{ + int rc = PASS ; + string error = "" ; + + /* init to null to avoid trap on early cleanup call with + * bad non-null default pointer value */ + struct json_object *node_obj = (struct json_object *)(NULL); + struct json_object *err_obj = (struct json_object *)(NULL); + + if (( json_str_ptr == NULL ) || ( *json_str_ptr == '\0' ) || + ( ! strncmp ( json_str_ptr, "(null)" , 6 ))) + { + elog ("Cannot tokenize a null json string\n"); + return (FAIL); + } + node_obj = json_tokener_parse( json_str_ptr ); + if ( !node_obj ) + { + elog ("No or invalid inventory response\n"); + rc = FAIL ; + goto load_host_cleanup ; + } + + /* Check for error */ + error = _json_get_key_value_string ( node_obj, "error_message" ); + if ( error == "none" ) + { + node_inv_init ( info ); + + /* Get all required fields */ + info.mac = _json_get_key_value_string ( node_obj, MTC_JSON_INV_HOSTMAC); + info.ip = _json_get_key_value_string ( node_obj, MTC_JSON_INV_HOSTIP ); + info.uuid = _json_get_key_value_string ( node_obj, MTC_JSON_INV_UUID ); + info.name = _json_get_key_value_string ( node_obj, MTC_JSON_INV_NAME ); + info.avail = _json_get_key_value_string ( node_obj, MTC_JSON_INV_AVAIL ); + info.admin = _json_get_key_value_string ( node_obj, MTC_JSON_INV_ADMIN ); + info.oper = _json_get_key_value_string ( node_obj, MTC_JSON_INV_OPER ); + info.type = _json_get_key_value_string ( node_obj, MTC_JSON_INV_TYPE ); + info.func = _json_get_key_value_string ( node_obj, MTC_JSON_INV_FUNC ); + info.task = _json_get_key_value_string ( node_obj, MTC_JSON_INV_TASK ); + info.bm_ip = _json_get_key_value_string ( node_obj, MTC_JSON_INV_BMIP ); + info.bm_un = _json_get_key_value_string ( node_obj, MTC_JSON_INV_BMUN ); + info.bm_type = _json_get_key_value_string( node_obj, MTC_JSON_INV_BMTYPE); + info.action= _json_get_key_value_string ( node_obj, MTC_JSON_INV_ACTION ); + info.uptime= _json_get_key_value_string ( node_obj, MTC_JSON_INV_UPTIME ); + info.id = _json_get_key_value_string ( node_obj, "id" ); + info.oper_subf = _json_get_key_value_string ( node_obj, MTC_JSON_INV_OPER_SUBF ); + info.avail_subf = _json_get_key_value_string ( node_obj, MTC_JSON_INV_AVAIL_SUBF); + info.infra_ip = _json_get_key_value_string ( node_obj, MTC_JSON_INV_INFRAIP ); + + if ( info.uuid.length() != UUID_LEN ) + { + elog ("Failed to get json host uuid string\n"); + rc = FAIL; + } + if ( info.name.length() == 0 ) + { + elog ("Failed to get json host name string\n"); + rc = FAIL ; + } + + } + else + { + std::size_t found ; + /* { "error_message": + "{ "debuginfo": null, + "faultcode": "Server", + "faultstring": "Server controller-0 could not be found." + }" + } + */ + /* get the error */ + found=error.find ( "could not be found" ); + if ( found!=std::string::npos) + { + elog ("%s\n", error.c_str()); + elog ("Requested host not found\n"); + rc = PASS ; + goto load_host_cleanup ; + } + else + { + elog ("Unknown error (%s)\n", error.c_str()); + rc = PASS ; + } + } + +load_host_cleanup: + + if (node_obj) json_object_put(node_obj); + if (err_obj) json_object_put(err_obj); + + return (rc); +} + + +/* Load up json_info with the contents of the json_str */ +int jsonUtil_load_host_state ( char * json_str_ptr, node_inv_type & info ) +{ + int rc = PASS ; + string error = "" ; + + /* init to null to avoid trap on early cleanup call with + * bad non-null default pointer value */ + struct json_object *node_obj = (struct json_object *)(NULL); + struct json_object *err_obj = (struct json_object *)(NULL); + + if (( json_str_ptr == NULL ) || ( *json_str_ptr == '\0' ) || + ( ! strncmp ( json_str_ptr, "(null)" , 6 ))) + { + elog ("Cannot tokenize a null json string\n"); + return (FAIL); + } + node_obj = json_tokener_parse( json_str_ptr ); + if ( !node_obj ) + { + elog ("No or invalid inventory response\n"); + rc = FAIL ; + goto load_host_cleanup ; + } + + /* Check for error */ + error = _json_get_key_value_string ( node_obj, "error_message" ); + if ( error == "none" ) + { + node_inv_init ( info ); + + /* Get all required fields */ + info.uuid = _json_get_key_value_string ( node_obj, MTC_JSON_INV_UUID ); + info.name = _json_get_key_value_string ( node_obj, MTC_JSON_INV_NAME ); + info.avail = _json_get_key_value_string ( node_obj, MTC_JSON_INV_AVAIL ); + info.admin = _json_get_key_value_string ( node_obj, MTC_JSON_INV_ADMIN ); + info.oper = _json_get_key_value_string ( node_obj, MTC_JSON_INV_OPER ); + } + else + { + std::size_t found ; + + /* get the error */ + found=error.find ( "could not be found" ); + if ( found!=std::string::npos) + { + elog ("%s\n", error.c_str()); + elog ("Requested host not found\n"); + rc = PASS ; + goto load_host_cleanup ; + } + else + { + elog ("Unknown error (%s)\n", error.c_str()); + rc = PASS ; + } + } + +load_host_cleanup: + + if (node_obj) json_object_put(node_obj); + if (err_obj) json_object_put(err_obj); + + return (rc); +} + +void jsonUtil_print ( jsonUtil_info_type & info, int index ) +{ + if ( info.elements == 0 ) + return ; + + if ( info.elements < index ) + return ; + + print_inv ( info.host[index] ) ; +} + +// {"auth": {"tenantName": "admin", "passwordCredentials": {"username": "admin", "password": "password"}}} + + +/* TODO: remove "assumed" case in favor of failing utility */ +int jsonApi_auth_request ( string & hostname, string & payload ) +{ + int rc = PASS ; + char * getenv_ptr = NULL ; + string projectname = "" ; + string username = "" ; + string password = "" ; + string userdomain = "" ; + string projectdomain = "" ; + + /* Get local username an password credentials */ + getenv_ptr = daemon_get_cfg_ptr()->keystone_auth_project; + if ( getenv_ptr == NULL ) + { + wlog ("%s Null Project Name\n", hostname.c_str()); + return ( FAIL_AUTHENTICATION ); + } + projectname = getenv_ptr ; + + getenv_ptr = daemon_get_cfg_ptr()->keystone_user_domain; + if ( getenv_ptr == NULL ) + { + wlog ("%s Null User Domain Name\n", hostname.c_str()); + return ( FAIL_AUTHENTICATION ); + } + userdomain = getenv_ptr ; + + getenv_ptr = daemon_get_cfg_ptr()->keystone_project_domain; + if ( getenv_ptr == NULL ) + { + wlog ("%s Null Project Domain Name\n", hostname.c_str()); + return ( FAIL_AUTHENTICATION ); + } + projectdomain = getenv_ptr ; + + getenv_ptr = daemon_get_cfg_ptr()->keystone_auth_username; + if ( getenv_ptr == NULL ) + { + wlog ("%s Null Username\n", hostname.c_str()); + return ( FAIL_AUTHENTICATION ); + } + username = getenv_ptr ; + + getenv_ptr = daemon_get_cfg_ptr()->keystone_auth_pw; + if ( getenv_ptr == NULL ) + { + wlog ("%s Null Password for '%s'\n", hostname.c_str(), username.c_str()); + return ( FAIL_AUTHENTICATION ); + } + password = getenv_ptr ; + + /* + * { + * "auth": + * { + * "identity": + * { + * "methods": ["password"], + * "password": + * { + * "user": + * { + * "name": "user name", + * "domain": { "name": "user domain name" }, + * "password": "password" + * } + * } + * }, + * "scope": + * { + * "project": + * { + * "name": "project name", + * "domain": { "name": "project domain name" } + * } + * } + * } + * } + * + */ + + /***** Create the payload *****/ + payload.append ("{\"auth\": "); + payload.append ("{\"identity\": "); + payload.append ("{\"methods\": [\"password\"],"); + payload.append ("\"password\": "); + payload.append ("{\"user\": {\"name\": \""); + payload.append (username.data()); + payload.append ("\",\"domain\": {\"name\": \""); + payload.append (userdomain.data()); + payload.append ("\"},\"password\": \""); + payload.append (password.data()); + payload.append ("\"}}},"); + payload.append ("\"scope\": "); + payload.append ("{\"project\": {\"name\": \""); + payload.append (projectname.data()); + payload.append ("\", \"domain\": { \"name\": \""); + payload.append (projectdomain.data()); + payload.append ("\"}}}}}"); + + return (rc); +} + +/* Tokenizes the json string and loads info with + * the received token and adminURL. + * The search algorithm below + * + * An authentication response returns the token ID in X-Subject-Token header + * rather than in the response body. + * + * + "token": { <--- 1 + "audit_ids": [ + "LdAVJLRPQwiVN2OkJnuewg" + ], + "expires_at": "2016-06-08T21:16:04.408053Z", <--- 2 + "extras": {}, + "issued_at": "2016-06-08T20:16:04.408082Z", <--- 3 + "methods": [ + "password" + ], + "catalog": [ <--- 4 + { + "endpoints": [ + { + "id": "24177b65a50548519ae5182a5fd44533", + "interface": "public", + "region": "RegionOne", + "region_id": "RegionOne", + "url": "http://10.10.10.2:9696" + }, + { + "id": "52cd1c1b83d9449babfe85b69797c952", + "interface": "internal", + "region": "RegionOne", + "region_id": "RegionOne", + "url": "http://192.168.204.2:9696" + }, + { + "id": "ea7b94d6a9fb4b86acd29985babb61b3", + "interface": "admin", <--- 5 + "region": "RegionOne", + "region_id": "RegionOne", + "url": "http://192.168.204.2:9696" <--- 6 + } + ], + "id": "367ab50ca63e468f8abbf8fe348efe12", + "name": "neutron", + "type": "network" + }, + { ... }]} +*/ +int jsonApi_auth_load ( string & hostname, + char * json_str_ptr, + jsonUtil_auth_type & info ) +{ + json_bool status ; + int rc = FAIL ; + + info.tokenid = "" ; + info.issued = "" ; + info.expiry = "" ; + info.adminURL= "" ; + + /* init to null to avoid trap on early cleanup call */ + struct array_list * array_list_obj = (struct array_list *)(NULL) ; + struct json_object *raw_obj = (struct json_object *)(NULL); + struct json_object *token_obj = (struct json_object *)(NULL); + struct json_object *svccat_obj = (struct json_object *)(NULL); + struct json_object *tuple_obj = (struct json_object *)(NULL); + struct json_object *type_obj = (struct json_object *)(NULL); + struct json_object *url_obj = (struct json_object *)(NULL); + struct json_object *end_obj = (struct json_object *)(NULL); + + bool found_type = false ; + + raw_obj = json_tokener_parse( json_str_ptr ); + if ( !raw_obj ) + { + elog ("%s No or invalid inventory GET response\n", hostname.c_str()); + goto auth_load_cleanup ; + } + + /* Get the token object */ + status = json_object_object_get_ex(raw_obj, MTC_JSON_AUTH_TOKEN, &token_obj); + if (( status == TRUE ) && ( token_obj )) + { + info.issued.append (_json_get_key_value_string(token_obj, MTC_JSON_AUTH_ISSUE )); + info.expiry.append (_json_get_key_value_string(token_obj, MTC_JSON_AUTH_EXPIRE)); + } + else + { + elog ("%s Failed to read %s object label\n", hostname.c_str(), MTC_JSON_AUTH_TOKEN ); + goto auth_load_cleanup ; + } + /* Now look for the compute admin URL */ + /* Get the token object */ + status = json_object_object_get_ex(token_obj, MTC_JSON_AUTH_SVCCAT, &svccat_obj ); + if (( status == TRUE ) && ( svccat_obj )) + { + array_list_obj = json_object_get_array(svccat_obj); + if ( array_list_obj ) + { + string entity ; + int len = array_list_length (array_list_obj); + if ( len == 0 ) + { + ilog ( "%s No reply %s elements\n", hostname.c_str(), MTC_JSON_AUTH_SVCCAT ); + goto auth_load_cleanup; + } + for ( int i = 0 ; i < len ; i++ ) + { + tuple_obj = _json_object_array_get_idx (svccat_obj, i); + entity = _json_get_key_value_string ( tuple_obj, MTC_JSON_AUTH_TYPE ); + if ( entity == MTC_JSON_AUTH_COMP ) + { + found_type = true ; + break ; + } + } + } + } + + if ( found_type == true ) + { + json_bool status = json_object_object_get_ex(tuple_obj, MTC_JSON_AUTH_ENDPOINTS, &end_obj); + if ( ( status == TRUE ) && ( end_obj )) + { + array_list_obj = json_object_get_array(end_obj); + if ( array_list_obj ) + { + + + int len = array_list_length (array_list_obj); + if ( len == 0 ) + { + ilog ( "No reply %s elements in array\n", MTC_JSON_AUTH_ENDPOINTS); + goto auth_load_cleanup; + } + for ( int i = 0 ; i < len ; i++ ) + { + url_obj = _json_object_array_get_idx (end_obj, i); + if ( url_obj ) + { + string inf = _json_get_key_value_string ( url_obj, MTC_JSON_AUTH_INTERFACE ); + if (( inf.length() > 0 ) && (inf == MTC_JSON_AUTH_ADMIN)) + { + info.adminURL = _json_get_key_value_string ( url_obj, MTC_JSON_AUTH_URL); + jlog ( "Found adminURL %s\n", info.adminURL.c_str()); + rc = PASS ; + break ; + } + } + } + } + else + { + elog ("%s Failed to find %s object array\n", + hostname.c_str(), MTC_JSON_AUTH_ENDPOINTS ); + } + } + else + { + elog ("%s Failed to find %s object\n", + hostname.c_str(), MTC_JSON_AUTH_ENDPOINTS ); + } + } + else + { + elog ("%s Failed to find %s object label\n", + hostname.c_str(), MTC_JSON_AUTH_SVCCAT); + } + +auth_load_cleanup: + + if (raw_obj) + { + if ( rc ) + { + wlog ("%s JSON String:%s\n", hostname.c_str(), json_object_get_string(raw_obj)); + } + json_object_put(raw_obj); + } + if (token_obj) json_object_put(token_obj); + if (svccat_obj) json_object_put(svccat_obj); + if (tuple_obj) json_object_put(tuple_obj); + if (end_obj) json_object_put(end_obj); + if (url_obj) json_object_put(type_obj); + if (type_obj) json_object_put(type_obj); + + return (rc); +} + +/*********************************************************************** + * This utility updates the reference key_list with all the + * values for the specified label. + ***********************************************************************/ +int jsonUtil_get_list ( char * json_str_ptr, string label, list & key_list ) +{ + int rc = PASS ; + struct array_list * list_obj = (struct array_list *)(NULL); + struct json_object * label_obj = (struct json_object *)(NULL); + struct json_object * raw_obj = (struct json_object *)(NULL); + struct json_object * item_obj = (struct json_object *)(NULL); + + raw_obj = json_tokener_parse( json_str_ptr ); + if ( !raw_obj ) + { + elog ("unable to parse raw object (%s)\n", json_str_ptr); + rc = FAIL_JSON_OBJECT ; + goto get_list_cleanup ; + } + + label_obj = _json_verify_object (raw_obj, label.data()); + if ( !label_obj ) + { + elog ("unable to find label '%s'\n", label.c_str()); + rc = FAIL_JSON_OBJECT ; + goto get_list_cleanup ; + } + list_obj = json_object_get_array(label_obj); + if ( list_obj ) + { + int len = array_list_length (list_obj); + jlog ( "'%s' array has %d elements\n", label.c_str(), len ); + for ( int i = 0 ; i < len ; i++ ) + { + item_obj = _json_object_array_get_idx (label_obj, i); + jlog1 ("%s %d:%s\n", label.c_str(), i, json_object_get_string(item_obj)); + key_list.push_back (json_object_get_string(item_obj)); + } + } + +get_list_cleanup: + + if (raw_obj) json_object_put(raw_obj); + if (label_obj) json_object_put(label_obj); + if (item_obj) json_object_put(item_obj); + + return (rc); +} + +/*************************************************************************** + * This utility searches for an 'array_label' and then loops over the array + * looking at each element for the specified 'search_key' and 'search_value' + * Once found it searches that same element for the specified 'element_key' + * and loads its value content into 'element_value' - what we're looking for + ***************************************************************************/ +int jsonApi_array_value ( char * json_str_ptr, + string array_label, + string search_key, + string search_value, + string element_key, + string & element_value) +{ + json_bool status ; + int rc = FAIL ; + + /* init to null to avoid trap on early cleanup call */ + struct array_list * array_list_obj = (struct array_list *)(NULL) ; + struct json_object *raw_obj = (struct json_object *)(NULL); + struct json_object *array_obj = (struct json_object *)(NULL); + struct json_object *tuple_obj = (struct json_object *)(NULL); + struct json_object *type_obj = (struct json_object *)(NULL); + + if ( strlen(json_str_ptr) < 3 ) + { + elog ("Null json string\n" ); + rc = FAIL_NULL_POINTER; + goto array_value_cleanup ; + } + raw_obj = json_tokener_parse( json_str_ptr ); + if ( !raw_obj ) + { + elog ("No or invalid json string (%s)\n", json_str_ptr ); + rc = FAIL_JSON_PARSE ; + goto array_value_cleanup ; + } + + /* Now look in each array element for the 'search_key' */ + status = json_object_object_get_ex(raw_obj, array_label.data(), &array_obj ); + if (( status == TRUE ) && ( array_obj )) + { + /* Leaking array_list_obj ???? */ + array_list_obj = json_object_get_array(array_obj); + if ( array_list_obj ) + { + int len = array_list_length (array_list_obj); + if ( len == 0 ) + { + wlog ( "%s has zero array elements\n", array_label.c_str() ); + goto array_value_cleanup; + } + for ( int i = 0 ; i < len ; i++ ) + { + tuple_obj = _json_object_array_get_idx (array_obj, i); + if ( tuple_obj ) + { + string element = _json_get_key_value_string ( tuple_obj, search_key.data() ); + if ( !search_value.compare(element)) + { + /* ok we found "secname : cUSERS" now get the uuid */ + element_value = _json_get_key_value_string ( tuple_obj, element_key.data() ); + if ( !element_value.empty() ) + { + jlog1 ("%s with %s element has %s:%s\n", array_label.c_str(), + search_value.c_str(), + element_key.c_str(), + element_value.c_str()); + rc = PASS ; + } + } + } + } + if ( rc ) + { + elog ("Failed to find %s : %s\n", search_key.c_str(), element_key.c_str()); + rc = FAIL_JSON_OBJECT ; + } + } + } + else + { + elog ("Failed to locate array label (%s)\n", array_label.c_str()); + rc = FAIL_JSON_OBJECT ; + } + +array_value_cleanup: + + if (raw_obj) json_object_put(raw_obj); + if (array_obj) json_object_put(array_obj); + if (tuple_obj) json_object_put(tuple_obj); + if (type_obj) json_object_put(type_obj); + + return (rc); +} + +/*********************************************************************** + * This utility updates the reference string 'element' with the + * contents of the specified labeled array element index. + ***********************************************************************/ +int jsonUtil_get_array_idx ( char * json_str_ptr, + string label, + int idx, + string & element ) +{ + json_bool status ; + int rc = PASS ; + + /* init to null to avoid trap on early cleanup call */ + struct array_list * array_list_obj = (struct array_list *)(NULL) ; + struct json_object *raw_obj = (struct json_object *)(NULL); + struct json_object *array_obj = (struct json_object *)(NULL); + struct json_object *tuple_obj = (struct json_object *)(NULL); + + if ( strlen(json_str_ptr) < 3 ) + { + elog ("Null json string\n" ); + rc = FAIL_NULL_POINTER; + goto get_array_idx_cleanup ; + } + raw_obj = json_tokener_parse( json_str_ptr ); + if ( !raw_obj ) + { + elog ("No or invalid json string (%s)\n", json_str_ptr ); + rc = FAIL_JSON_PARSE ; + goto get_array_idx_cleanup ; + } + + /* Now look in each array element for the 'search_key' */ + status = json_object_object_get_ex(raw_obj, label.data(), &array_obj ); + if (( status == TRUE ) && ( array_obj )) + { + element.clear(); + + array_list_obj = json_object_get_array(array_obj); + if ( array_list_obj ) + { + int len = array_list_length (array_list_obj); + if ( idx < len ) + { + tuple_obj = _json_object_array_get_idx (array_obj, idx); + if ( tuple_obj ) + { + jlog1 ("%s %d:%s\n", label.c_str(), idx, json_object_get_string(tuple_obj)); + element = json_object_get_string(tuple_obj); + } + if ( rc ) + { + elog ("Failed to get '%s' array index %d\n", label.c_str(), idx); + rc = FAIL_JSON_OBJECT ; + } + } + else if ( len == 0 ) + { + dlog ( "%s array has zero elements\n", label.c_str() ); + } + else + { + dlog ( "%s array has fewer elements than the specified index (%d)\n", label.c_str(), idx ); + } + } + } + else + { + elog ("Failed to locate array label (%s)\n", label.c_str()); + rc = FAIL_JSON_OBJECT ; + } + +get_array_idx_cleanup: + + if (raw_obj) json_object_put(raw_obj); + if (array_obj) json_object_put(array_obj); + if (tuple_obj) json_object_put(tuple_obj); + return (rc); +} + + +/*********************************************************************** + * This utility updates the reference element with the number of array + * elements for the specified label in the provided string + ***********************************************************************/ +int jsonUtil_array_elements ( char * json_str_ptr, string label, int & elements ) +{ + json_bool status ; + int rc = FAIL ; + + /* init to null to avoid trap on early cleanup call */ + struct array_list * array_list_obj = (struct array_list *)(NULL) ; + struct json_object *raw_obj = (struct json_object *)(NULL); + struct json_object *array_obj = (struct json_object *)(NULL); + + if ( strlen(json_str_ptr) < 3 ) + { + elog ("Null json string\n" ); + rc = FAIL_NULL_POINTER; + goto array_elements_cleanup ; + } + raw_obj = json_tokener_parse( json_str_ptr ); + if ( !raw_obj ) + { + elog ("No or invalid json string (%s)\n", json_str_ptr ); + rc = FAIL_JSON_PARSE ; + goto array_elements_cleanup ; + } + + /* Now look in each array element for the 'search_key' */ + status = json_object_object_get_ex(raw_obj, label.data(), &array_obj ); + if (( status == TRUE ) && ( array_obj )) + { + array_list_obj = json_object_get_array(array_obj); + if ( array_list_obj ) + { + elements = array_list_length (array_list_obj); + rc = PASS ; + } + } + else + { + elog ("Failed to locate array label (%s)\n", label.c_str()); + rc = FAIL_JSON_OBJECT ; + } + +array_elements_cleanup: + + if (raw_obj) json_object_put(raw_obj); + if (array_obj) json_object_put(array_obj); + + return (rc); +} + +/************************************************************************************* + * + * Name : escapeJsonString + * + * Description: escape special characters in JSON string + * + **************************************************************************************/ +string jsonUtil_escapeSpecialChar(const string& input) +{ + ostringstream ss; + for (string::const_iterator iter = input.begin(); iter != input.end(); iter++) + { + switch (*iter) + { + case '\\': ss << "\\\\"; break; + case '"': ss << "\\\""; break; + case '/': ss << "\\/"; break; + case '\b': ss << "\\b"; break; + case '\f': ss << "\\f"; break; + case '\n': ss << "\\n"; break; + case '\r': ss << "\\r"; break; + case '\t': ss << "\\t"; break; + default: ss << *iter; break; + } + } + return ss.str(); +} + +/*********************************************************************** + * Get JSON Integer Value from Key + * return PASS if success, FAIL if fail. + ***********************************************************************/ +int jsonUtil_get_int( struct json_object *jobj, + const char *key, void *value ) +{ + struct json_object *jobj_value; + if (!json_object_object_get_ex(jobj, key, &jobj_value)) + { + return FAIL; + } + enum json_type type = json_object_get_type(jobj_value); + switch(type) + { + case json_type_boolean: + *(unsigned int *)value = json_object_get_boolean(jobj_value); + break; + case json_type_int: + *(unsigned int *)value = json_object_get_int(jobj_value); + break; + case json_type_double: + *(double *)value = json_object_get_double(jobj_value); + break; + default: + elog("type %d is not supported\n", type); + return FAIL; + break; + } + return PASS; +} + +/*********************************************************************** + * Get JSON String Value from Key + * return PASS if success, FAIL if fail. + ***********************************************************************/ +int jsonUtil_get_string( struct json_object* jobj, + const char* key, string * value ) +{ + struct json_object *jobj_value; + if (!json_object_object_get_ex(jobj, key, &jobj_value)) + { + return FAIL; + } + enum json_type type = json_object_get_type(jobj_value); + const char *str; + switch(type) + { + case json_type_string: + str = json_object_get_string(jobj_value); + break; + default: + elog("type %d is not supported\n", type); + return FAIL; + break; + } + *value = str; + return PASS; +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/jsonUtil.h b/mtce-common/cgts-mtce-common-1.0/common/jsonUtil.h new file mode 100644 index 00000000..780a519b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/jsonUtil.h @@ -0,0 +1,160 @@ +#ifndef __INCLUDE_JSONUTIL_H__ +#define __INCLUDE_JSONUTIL_H__ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance + * + * JSON Utility Header + */ + +#include +#include +#include "json-c/json.h" + +using namespace std; + +/** Inventory json GET request load struct. + * + * Used to hold/load the parsed contents of the response + * json string returned from an inventory HTTP GET request. */ +typedef struct +{ + int elements ; /**< converted elements */ + /** An array of inventory information, one for each host */ + node_inv_type host[MAX_JSON_INV_GET_HOST_NUM]; + string next ; /**< pointer to the next inventory element */ +} jsonUtil_info_type ; + +#define MTC_JSON_AUTH_TOKEN "token" +#define MTC_JSON_AUTH_SVCCAT "catalog" +#define MTC_JSON_AUTH_TYPE "type" /**< looking for "compute" */ +#define MTC_JSON_AUTH_ENDPOINTS "endpoints" +#define MTC_JSON_AUTH_URL "url" +#define MTC_JSON_AUTH_ISSUE "issued_at" +#define MTC_JSON_AUTH_EXPIRE "expires_at" +#define MTC_JSON_AUTH_ID "X-Subject-Token" +#define MTC_JSON_AUTH_COMP "compute" +#define MTC_JSON_AUTH_INTERFACE "interface" /** looking for admin */ +#define MTC_JSON_AUTH_ADMIN "admin" + + +/** Authroization info loaded from the authorization server */ +typedef struct +{ + bool updated ; /**< true if struct has been updated. */ + int status ; /**< PASS or error code. token is only valid if PASS.*/ + string tokenid ; /**< The long encrypted toke.n */ + string issued ; /**< The "issued_at": "". */ + string expiry ; /**< The "expires": "". */ + string adminURL; /**< path to the nova server. */ +} jsonUtil_auth_type ; + +/** Module initialization interface. + */ +void jsonUtil_init ( jsonUtil_info_type & info ); + +/** Print the authroization struct to stdio. + */ +void jsonUtil_print ( jsonUtil_info_type & info , int index ); +void jsonUtil_print_inv ( node_inv_type & info ); + +int jsonUtil_get_key_val ( char * json_str_ptr, + string key, + string & value ); + +/** Submit a request to get an authorization token and nova URL */ +int jsonApi_auth_request ( string & hostname, string & payload ); + +/** Parse through the authorization request's response json string + * and load the relavent information into the passed in structure */ + int jsonUtil_inv_load ( char * json_str_ptr, + jsonUtil_info_type & info ); + +int jsonUtil_load_host ( char * json_str_ptr, node_inv_type & info ); +int jsonUtil_load_host_state ( char * json_str_ptr, node_inv_type & info ); + +int jsonUtil_hwmon_info ( char * json_str_ptr, node_inv_type & info ); + +/** Handle the patch request response and verify execution status */ +int jsonUtil_patch_load ( char * json_str_ptr, node_inv_type & info ); + +/** Tokenizes the json string and loads 'info' with the received token + * + * @param json_str_ptr + * to a json string + * @param info + * is the updated jsonUtil_auth_type bucket containing the token + * + * @return execution status (PASS or FAIL) + * + *- PASS indicates tokenization ok and info is updated. + *- FAIL indicates bad or error reply in json string. + * + */ +int jsonApi_auth_load ( string & hostname, char * json_str_ptr, + jsonUtil_auth_type & info ); + + +/*************************************************************************** + * This utility searches for an 'array_label' and then loops over the array + * looking at each element for the specified 'search_key' and 'search_value' + * Once found it searches that same element for the specified 'element_key' + * and loads its value content into 'element_value' - what we're looking for + ***************************************************************************/ +int jsonApi_array_value ( char * json_str_ptr, + string array_label, + string search_key, + string search_value, + string element_key, + string & element_value); + +/*********************************************************************** + * This utility updates the reference key_list with all the + * values for the specified label. + ***********************************************************************/ +int jsonUtil_get_list ( char * json_str_ptr, + string label, list & key_list ); + +/*********************************************************************** + * This utility updates the reference element with the number of array + * elements for the specified label in the provided string + ***********************************************************************/ +int jsonUtil_array_elements ( char * json_str_ptr, string label, int & elements ); + +/*********************************************************************** + * This utility updates the reference string 'element' with the + * contents of the specified labeled array element index. + ***********************************************************************/ +int jsonUtil_get_array_idx ( char * json_str_ptr, string label, int idx, string & element ); + +/*********************************************************************** +* Escape special characters in JSON string +************************************************************************/ +string jsonUtil_escapeSpecialChar(const string& input); + +int jsonUtil_get_key_value_int ( struct json_object * obj, const char * key ); +bool jsonUtil_get_key_value_bool ( struct json_object * obj, const char * key ); +string jsonUtil_get_key_value_string ( struct json_object * obj, const char * key ); + +/*********************************************************************** + * Get JSON Integer Value from Key + * return 0 if success, -1 if fail. + ***********************************************************************/ +int jsonUtil_get_int( struct json_object *jobj, + const char *key, void *value ); + +/*********************************************************************** + * Get JSON String Value from Key + * return 0 if success, -1 if fail. + ***********************************************************************/ +int jsonUtil_get_string( struct json_object* jobj, + const char* key, string * value ); + +#endif /* __INCLUDE_JSONUTIL_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/common/keyClass.cpp b/mtce-common/cgts-mtce-common-1.0/common/keyClass.cpp new file mode 100644 index 00000000..51ed2655 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/keyClass.cpp @@ -0,0 +1,558 @@ +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/* This module implements a linked list for unsigned long key:value pairs. + * + * Was first introduced to provide to associate a libEvent address with a + * eventLib base pointer so that the event handler could lookup the event + * from the base, Key is base and value is the libEvent address. + * + */ + +#ifdef __AREA__ +#undef __AREA__ +#define __AREA__ "kvp" +#endif + +#include "nodeBase.h" +#include "keyClass.h" +//#include "nodeUtil.h" + + +/* keyClass Constructor */ +keyClass::keyClass() +{ + head = tail = NULL; + memory_allocs = 0 ; + memory_used = 0 ; + keys = 0 ; + + for ( int i = 0 ; i < MAX_KEYS ; i++ ) + { + key_ptrs[i] = NULL ; + } +} + +/* keyClass Destructor */ +keyClass::~keyClass() +{ + for ( int i = 0 ; i < MAX_KEYS ; i++ ) + { + if ( key_ptrs[i] ) + { + delete key_ptrs[i] ; + } + } +} + +/****************************************************************************************** + ********** C O M M O N P R I V A T E I N T E R F A C E S *************** + *****************************************************************************************/ + +/****************************************************************** + * + * Name : newKey (private) + * + * Description: Allocates memory for a new key value pair and + * stores its the address in key_ptrs + * + * @param void + * @return pointer to the newly allocted Key memory + * + *******************************************************************/ +struct keyClass::keyValue * keyClass::newKey ( void ) +{ + struct keyClass::keyValue * key_ptr = NULL ; + + // find an empty spot + for ( int i = 0 ; i < MAX_KEYS ; i++ ) + { + if ( key_ptrs[i] == NULL ) + { + key_ptrs[i] = key_ptr = new keyValue ; + memory_allocs++ ; + memory_used += sizeof (struct keyClass::keyValue); + keys++ ; + return key_ptr ; + } + } + elog ( "Failed to save new key pointer address\n" ); + return key_ptr ; +} + + +/************************************************************************** + * + * Name : delKey (private) + * + * Description: Frees the memory of a pre-allocated key and removes + * it from the key_ptrs list. + * + * @param key * pointer to the key memory address to be freed + * @return int return code { PASS or -EINVAL } + * + **************************************************************************/ +int keyClass::delKey ( struct keyClass::keyValue * key_ptr ) +{ + if ( memory_allocs > 0 ) + { + for ( int i = 0 ; i < MAX_KEYS ; i++ ) + { + if ( key_ptrs[i] == key_ptr ) + { + delete key_ptr ; + key_ptrs[i] = NULL ; + memory_used -= sizeof (struct keyClass::keyValue); + if ( memory_allocs ) memory_allocs-- ; + if ( keys ) keys-- ; + return PASS ; + } + } + elog ( "Unable to validate memory address being freed\n" ); + } + else + elog ( "Free memory called when there is no memory to free\n" ); + + return -EINVAL ; +} + +/***************************************************************** + * + * Name : remKey + * + * Description: Remove a specified key from the linked list + * + *@param unsigned long key signature + *@return PASS or failure codes ENODEV, ENXIO, EFAULT + * + *****************************************************************/ +int keyClass::remKey( struct keyValue * ptr ) +{ + if ( head == NULL ) + return -ENXIO ; + + if ( ptr == NULL ) + return -EFAULT ; + + /* If the key is the head key */ + if ( ptr == head ) + { + /* only one key in the list case */ + if ( head == tail ) + { + dlog3 ("Single Key -> Head Case\n"); + head = NULL ; + tail = NULL ; + } + else + { + dlog3 ("Multiple Keys -> Head Case\n"); + head = head->next ; + head->prev = NULL ; + } + } + /* if not head but tail then there must be more than one + * key in the list so go ahead and chop the tail. + */ + else if ( ptr == tail ) + { + dlog3 ("Multiple Key -> Tail Case\n"); + tail = tail->prev ; + tail->next = NULL ; + } + else + { + dlog3 ("Multiple Key -> Full Splice Out\n"); + ptr->prev->next = ptr->next ; + ptr->next->prev = ptr->prev ; + } + delKey ( ptr ); + + return (PASS) ; +} + + + +/****************************************************************************************** + ********** K E Y T Y P E P R I V A T E I N T E R F A C E S ************* + *****************************************************************************************/ + +/****************************************************************** + * + * Name : addKey (private) + * + * Description: Reprovisions the key:value if it already exists + * Calls newkey to allocates memory for a new key + * and then tacks the new key value onto the end + * of the linked list. + * + * @param unsigned long key and value + * @return pointer to the newly allocted Key memory + * + *******************************************************************/ +struct keyClass::keyValue* keyClass::addKey( unsigned long key__int, unsigned long val__int ) +{ + /* verify key is not already provisioned */ + struct keyValue * ptr = getKey ( key__int ); + if ( ptr ) + { + if ( delKey ( ptr ) ) + { + /* Should never get here but if we do then */ + /* something is seriously wrong */ + elog ("Unable to remove key during reprovision\n"); + return static_cast(NULL); + } + } + + /* allocate memory for new key */ + ptr = newKey (); + if( ptr == NULL ) + { + elog ( "Failed to allocate memory for new key\n" ); + return static_cast(NULL); + } + + /* push the calling data into new key */ + ptr->key__int = key__int ; + ptr->val__int = val__int ; + + dlog3 ("add key:%lx val:%lx\n", ptr->key__int, ptr->val__int ); + + /* If the key list is empty add it to the head */ + if( head == NULL ) + { + head = ptr ; + tail = ptr ; + ptr->prev = NULL ; + ptr->next = NULL ; + } + else + { + /* link the new_key to the tail of the key_list + * then mark the next field as the end of the key_list + * adjust tail to point to the last key + */ + tail->next = ptr ; + ptr->prev = tail ; + ptr->next = NULL ; + tail = ptr ; + } + return ptr ; +} + +/****************************************************************** + * + * Name : getKey (private) + * + * Description: Looks for a key in the linked list. + * + * @param unsigned long key + * @return pointer to the matching key in the linked list + * + *******************************************************************/ +struct keyClass::keyValue * keyClass::getKey ( unsigned long key__int ) +{ + /* check for empty list condition */ + if ( head == NULL ) + return NULL ; + + for ( struct keyValue * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->key__int == key__int ) + { + dlog3 ("get key:%lx val:%lx\n", ptr->key__int, ptr->val__int ); + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + return static_cast(NULL); +} + +/*****************************************************************/ +/* String key version */ +/****************************************************************** + * + * Name : addKey (private) + * + * Description: Reprovisions the key:value if it already exists + * Calls newkey to allocates memory for a new key + * and then tacks the new key value onto the end + * of the linked list. + * + * @param unsigned long key and value + * @return pointer to the newly allocted Key memory + * + *******************************************************************/ +struct keyClass::keyValue* keyClass::addKey( string key__str, unsigned long * val__ptr ) +{ + /* verify key is not already provisioned */ + struct keyValue * ptr = getKey ( key__str ); + if ( ptr ) + { + if ( delKey ( ptr ) ) + { + /* Should never get here but if we do then */ + /* something is seriously wrong */ + elog ("Unable to remove key during reprovision\n"); + return static_cast(NULL); + } + } + + /* allocate memory for new key */ + ptr = newKey (); + if( ptr == NULL ) + { + elog ( "Failed to allocate memory for new key\n" ); + return static_cast(NULL); + } + + /* push the calling data into new key */ + ptr->key__str = key__str ; + ptr->val__ptr = val__ptr ; + + dlog3 ("add key:%s val:%p\n", ptr->key__str.c_str(), ptr->val__ptr ); + + /* If the key list is empty add it to the head */ + if( head == NULL ) + { + head = ptr ; + tail = ptr ; + ptr->prev = NULL ; + ptr->next = NULL ; + } + else + { + /* link the new_key to the tail of the key_list + * then mark the next field as the end of the key_list + * adjust tail to point to the last key + */ + tail->next = ptr ; + ptr->prev = tail ; + ptr->next = NULL ; + tail = ptr ; + } + return ptr ; +} + + +/****************************************************************** + * + * Name : getKey (private) + * + * Description: Looks for a key in the linked list. + * + * @param unsigned long key + * @return pointer to the matching key in the linked list + * + *******************************************************************/ +struct keyClass::keyValue * keyClass::getKey ( string key__str ) +{ + /* check for empty list condition */ + if ( head == NULL ) + return NULL ; + + for ( struct keyValue * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->key__str == key__str ) + { + dlog3 ("get key:%s val:%p\n", ptr->key__str.c_str(), ptr->val__ptr ); + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + return static_cast(NULL); +} + +/****************************************************************************************** + **************** P U B L I C I N T E R F A C E S **************************** + *****************************************************************************************/ + +/****************************************************************************************** + * + * Name : get_key + * + * Purpose : look up the alue based on a key + * + ******************************************************************************************/ + +/* + * Unsigned Long K E Y + * Unsigned Long V A L u e + */ +int keyClass::get_key ( unsigned long key__int, unsigned long & val__int ) +{ + /* verify key is not already provisioned */ + struct keyValue * keyValue_ptr = getKey ( key__int ); + if ( keyValue_ptr ) + { + val__int = keyValue_ptr->val__int ; + return (PASS); + } + return(FAIL_NOT_FOUND); +} + +/* + * String K E Y + * Unsigned Long Pointer V A L u e + */ +int keyClass::get_key ( string key__str, unsigned long *& val__ptr ) +{ + /* verify key is not already provisioned */ + struct keyValue * keyValue_ptr = getKey ( key__str ); + if ( keyValue_ptr ) + { + val__ptr = keyValue_ptr->val__ptr ; + return (PASS); + } + return(FAIL_NOT_FOUND); +} + + +/****************************************************************************************** + * + * Name : add_key + * + * Purpose : add a key value to the object + * + ******************************************************************************************/ + +/* + * Unsigned Long K E Y + * Unsigned Long V A L u e + */ + +int keyClass::add_key ( unsigned long key__int, unsigned long val__int ) +{ + int rc = FAIL ; + struct keyClass::keyValue * keyValue_ptr = static_cast(NULL); + + keyValue_ptr = keyClass::getKey(key__int); + if ( keyValue_ptr ) + { + wlog ("add key:%lx failed - in use (value:%lx)\n", keyValue_ptr->key__int, keyValue_ptr->val__int ); + + /* Send back a retry in case the add needs to be converted to a modify */ + return (RETRY); + } + /* Otherwise add it as a new key */ + else + { + keyValue_ptr = keyClass::addKey( key__int , val__int); + if ( keyValue_ptr ) + { + dlog3 ("add key:%lx\n", key__int ); + rc = PASS ; + } + else + { + rc = FAIL_NULL_POINTER ; + } + } + return (rc); +} + +/* + * String K E Y + * Unsigned Long Pointer V A L u e + */ +int keyClass::add_key ( string key__str, unsigned long * val__ptr ) +{ + int rc = FAIL ; + struct keyClass::keyValue * keyValue_ptr = static_cast(NULL); + + keyValue_ptr = keyClass::getKey(key__str); + if ( keyValue_ptr ) + { + wlog ("add key:%s failed - in use (value:%p)\n", keyValue_ptr->key__str.c_str(), keyValue_ptr->val__ptr ); + + return (RETRY); + } + /* Otherwise add it as a new key */ + else + { + keyValue_ptr = keyClass::addKey( key__str, val__ptr); + if ( keyValue_ptr ) + { + dlog3 ("add key:%s\n", key__str.c_str()); + + rc = PASS ; + } + else + { + rc = FAIL_NULL_POINTER ; + } + } + return (rc); +} + + +/****************************************************************************************** + * + * Name : del_key + * + * Purpose : del a key value from the object + * + ******************************************************************************************/ + +/* + * Unsigned Long K E Y + * Unsigned Long V A L u e + */ + + +int keyClass::del_key ( unsigned long key__int ) +{ + int rc = FAIL_DEL_UNKNOWN ; + keyClass::keyValue * keyValue_ptr = keyClass::getKey( key__int ); + if ( keyValue_ptr ) + { + rc = remKey ( keyValue_ptr ); + if ( rc != PASS ) + { + elog ("del key:%lx failed - rc:%d\n", key__int , rc ); + } + else + { + dlog3 ("del key:%lx\n", key__int ); + } + } + else + { + wlog ("del key:%lx failed - not found\n", key__int ); + } + return (rc); +} + +/* + * String K E Y + * Unsigned Long Pointer V A L u e + */ +int keyClass::del_key ( string key__str ) +{ + int rc = FAIL_DEL_UNKNOWN ; + keyClass::keyValue * keyValue_ptr = keyClass::getKey( key__str ); + if ( keyValue_ptr ) + { + rc = remKey ( keyValue_ptr ); + if ( rc != PASS ) + { + elog ("del key:%s failed - rc:%d\n", key__str.c_str(), rc ); + } + else + { + dlog3 ("del key:%s\n", key__str.c_str()); + } + } + else + { + wlog ("del key:%s failed - not found\n", key__str.c_str()); + } + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/keyClass.h b/mtce-common/cgts-mtce-common-1.0/common/keyClass.h new file mode 100644 index 00000000..3d29a7ef --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/keyClass.h @@ -0,0 +1,89 @@ +#ifndef __INCLUDE_KEYCLASS_H__ +#define __INCLUDE_KEYCLASS_H__ +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include +#include +#include +#include +#include + + +using namespace std; + +// #include "nodeBase.h" + +#define MAX_KEYS 100 + +class keyClass +{ + private: + + /** + * A single key:value pair within the keyClass. + * Used to build a linked list of key value pairs. + */ + struct keyValue { + + string key__str ; + unsigned long * val__ptr ; /* The value for the string key is a unsigned long pointer */ + + unsigned long key__int ; + unsigned long val__int ; + + struct keyValue * prev; + struct keyValue * next; + } ; + + struct keyValue * head ; + struct keyValue * tail ; + + /** Allocate memory for a new key. */ + struct keyClass::keyValue * newKey ( void ); + + struct keyClass::keyValue * addKey ( string key__str, unsigned long * val__ptr ); + struct keyClass::keyValue * addKey ( unsigned long key__int, unsigned long val__int ); + + struct keyClass::keyValue * getKey ( string key__str ); + struct keyClass::keyValue * getKey ( unsigned long key__int ); + + int remKey ( struct keyClass::keyValue * keyValue_ptr ); + int delKey ( struct keyClass::keyValue * keyValue_ptr ); + + keyClass::keyValue * key_ptrs[MAX_KEYS] ; + + int memory_allocs ; + int memory_used ; + +public: + + keyClass(); /**< constructor */ + ~keyClass(); /**< destructor */ + + int keys ; + + int get_key ( unsigned long key__int , unsigned long & val__int ); + int get_key ( string key__str , unsigned long *& val__ptr ); + int add_key ( unsigned long key__int , unsigned long val__int ); + int add_key ( string key__str , unsigned long * val__ptr ); + int del_key ( unsigned long key__int ); + int del_key ( string key__str ); +} ; + +keyClass * get_keyClass_ptr ( void ); + +#define GET_KEYVALUE_PTR(key) \ + keyClass * keyClass_ptr = get_keyClass_ptr () ; \ + keyClass::keyValue * keyValue_ptr = keyClass_ptr->getKey ( key ) ; \ + if ( keyValue_ptr == NULL ) \ + { \ + elog ("key %ld not fault\n", hostname.c_str()); \ + return (FAIL_HOSTNAME_LOOKUP); \ + } + +#endif // __INCLUDE_KEYCLASS_H__ diff --git a/mtce-common/cgts-mtce-common-1.0/common/logMacros.h b/mtce-common/cgts-mtce-common-1.0/common/logMacros.h new file mode 100644 index 00000000..267eaa41 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/logMacros.h @@ -0,0 +1,421 @@ +#ifndef __INCLUDE_NODELOG_HH__ +#define __INCLUDE_NODELOG_HH__ +/* + * Copyright (c) 2013-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform "Node Log" Header + */ + +#include + +#define DEBUG_LEVEL1 0x00000001 +#define DEBUG_LEVEL2 0x00000002 +#define DEBUG_LEVEL3 0x00000004 +#define DEBUG_LEVEL4 0x00000008 +#define DEBUG_MEM_LOG 0x00000010 +#ifndef __AREA__ +#define __AREA__ "---" +#endif + +// #include "daemon_common.h" + +/* including for getpid */ +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** configuration options */ +typedef struct +{ + int scheduling_priority ; /**< Scheduling priority of this daemon */ + bool active ; /**< Maintenance activity state true|false */ + int hbs_pulse_period ; /**< time (msec) between heartbeat requests */ + int token_refresh_rate ; /**< token refresh rate in seconds */ + int hbs_minor_threshold ; /**< heartbeat miss minor threshold */ + int hbs_degrade_threshold ; /**< heartbeat miss degrade threshold */ + int hbs_failure_threshold ; /**< heartbeat miss failure threshold */ + + int hbs_calibrate_threshold ; /**< number of hosts where threshold calibration begins to take effect */ + int hbs_calibrate_period_factor ; /**< hbs_pulse_period = hbs_pulse_period * hosts */ + int hbs_calibrate_minor_factor ; /**< hbs_minor_threshold = threshold factor * hosts */ + int hbs_calibrate_degrade_factor; /**< hbs_degrade_threshold = threshold factor * hosts */ + int hbs_calibrate_fail_factor ; /**< hbs_failure_threshold = threshold factor * hosts */ + + char* mgmnt_iface ; /**< management interface name pointer */ + char* infra_iface ; /**< infrastructure interface name pointer */ + char* multicast ; /**< Multicast address */ + int ha_port ; /**< HA REST API Port Number */ + int vim_cmd_port ; /**< Mtce -> VIM Command REST API Port */ + int vim_event_port ; /**< VIM -> Mtce Event REST API Port */ + int mtc_agent_port ; /**< mtcAgent receive port (from Client) */ + int mtc_client_port ; /**< mtcClient receive port (from Agent) */ + + int keystone_port ; /**< Keystone REST API port number */ + char* keystone_prefix_path ; /**< Keystone REST API prefix path */ + char* keystone_auth_host ; /**< =auth_host=192.168.204.2 */ + char* keystone_identity_uri ; /**< =http://192.168.204.2:5000/ */ + char* keystone_auth_uri ; /**< =http://192.168.204.2:5000/ */ + char* keystone_auth_username ; /**< =mtce */ + char* keystone_auth_pw ; /**< =abc123 */ + char* keystone_region_name ; /**< =RegionOne */ + char* keystone_auth_project ; /**< =services */ + char* keystone_user_domain; /**< = Default */ + char* keystone_project_domain; /**< = Default */ + char* keyring_directory ; /**< =/opt/platform/.keyring/ */ + + char* sysinv_mtc_inv_label ; /**< =/v1/hosts/ */ + int sysinv_api_port ; /**< =6385 */ + char* sysinv_api_bind_ip ; /**< = */ + + char* ceilometer_url ; /**< ceilometer sensor sample database url */ + int ceilometer_port ; /**< ceilometer REST API port number */ + + int mtc_rx_mgmnt_port ; /**< mtcClient listens mgmnt nwk cmd reqs */ + int mtc_rx_infra_port ; /**< mtcClient listens infra nwk cmd reqs */ + int mtc_tx_mgmnt_port ; /**< mtcClient sends mgmnt nwk cmds/resp's */ + int mtc_tx_infra_port ; /**< mtcClient sends infra nwk cmds/resp's */ + + int hbs_agent_mgmnt_port ; /**< hbsAgent mgmnt network pulse resp port */ + int hbs_client_mgmnt_port ; /**< hbsClient mgmnt network pulse req port */ + int hbs_agent_infra_port ; /**< hbsAgent infra network pulse resp port */ + int hbs_client_infra_port ; /**< hbsClient infra network pulse req port */ + int daemon_log_port ; /**< daemon log port */ + + int mtcalarm_req_port ; /**< port daemons send alarm requests to */ + + int agent_rx_port ; + int client_rx_port ; + + bool infra_degrade_only ; /**< Only degrade on infra heartbeat failure */ + int mtc_to_hbs_cmd_port ; /**< mtcAgent to hbsAgent command port */ + int mtc_to_guest_cmd_port ; /**< mtcAgent to guestAgent command port */ + int hwmon_cmd_port ; /**< mtcAgent to hwmon command port */ + int hbs_to_mtc_event_port ; /**< hbsAgent tm mtcAgent event port */ + int inv_event_port ; /**< Port inventory sends change events on */ + int per_node ; /**< Memory usage per node or per resource */ + int audit_period ; /**< daemon specific audit period */ + int pm_period ; /**< Resmon specific pm period */ + int ntp_audit_period ; /**< Resmon specific ntp audit period */ + int ntpq_cmd_timeout ; /**< Resmon specific ntpq command timeout */ + int pmon_amon_port ; /**< active process monitor pulse rx port */ + int pmon_event_port ; /**< process monitor tx event port */ + int pmon_pulse_port ; /**< process Monitor I'm Alive pulse port */ + int pmon_cmd_port ; /**< process Monitor command receive port */ + int rmon_api_tx_port ; /**< resource monitor api tx port */ + int rmon_event_port ; /**< resource monitor api event port */ + int rmon_critical_thr ; /**< resmon critical threshold in use */ + int rmon_tx_port ; /**< resource monitor tx event port */ + int log_step ; /**< used to throttle logging at step rate */ + int event_port ; /**< daemon specific event tx port */ + int cmd_port ; /**< daemon specific command rx port */ + int sensor_port ; /**< sensor read value port */ + int start_delay ; /**< startup delay, added for pmon */ + int api_retries ; /**< api retries before failure */ + int hostwd_failure_threshold ; /**< allowed # of missed pmon/hostwd messages */ + bool hostwd_reboot_on_err ; /**< should hostwd reboot on fault detected */ + bool hostwd_use_kern_wd ; /**< use the kernel watchdog for extra safety */ + bool need_infra_poll_audit ; /**< true if we need to poll for infra */ + char *hostwd_console_path ; /**< console on which to log extreme events */ + char *mode ; /**< Test Mode String */ + int testmode ; /**< Test Head Test Mode */ + int testmask ; /**< bit mask of stress tests */ + unsigned int mask ; /**< Config init mask */ + + + /* Debug of compute hang issue */ + unsigned int stall_pmon_thld; + int stall_mon_period ; + int stall_poll_period ; + int stall_rec_thld ; + char* mon_process_1 ; + char* mon_process_2 ; + char* mon_process_3 ; + char* mon_process_4 ; + char* mon_process_5 ; + char* mon_process_6 ; + char* mon_process_7 ; + + int latency_thld ; /**< scheduling latency threshold in msec b4 log */ + + /** Multi Node Failure Avoidance Controls */ + char * mnfa_threshold_type ; /**< value used in multi node failure + avoidance calculation ; + 'number' / 'percent'age of hosts */ + int mnfa_threshold_percent ; /**< number of hosts simultaneously + failing heartbeat */ + int mnfa_threshold_number ; /**< percentage of pool + simultanepously failing heartbeat*/ + int mnfa_recovery_threshold ; /**< Multi-Node-Failure Avoidance Recovery Threshold + Similar to the LOC above for graceful recovery + hosts that have LOC for longer than this time in + seconds are failed and sent into the enable_handler + FSM while those that recover before this period are + sent into the graceful recovery_handler FSM. */ + + /** Configurable Timeouts ; unit is 'seconds' */ + int controller_mtcalive_timeout ; /**< mtcAlive wait timeout */ + int compute_mtcalive_timeout ; /**< mtcAlive wait timeout */ + int goenabled_timeout ; /**< goenabled wait timeout */ + int host_services_timeout ; /**< host services start/stop timeout*/ + int swact_timeout ; /**< swact wait timeout */ + int sysinv_timeout ; /**< sysinv reset api timeout secs */ + int sysinv_noncrit_timeout ; /**< sysinv nonc request timeout */ + int work_queue_timeout ; /**< end of action workq complete TO */ + int loc_recovery_timeout ; /**< loss of comms recovery timeout */ + int mnfa_recovery_timeout ; /**< mnfa recovery timeout */ + int node_reinstall_timeout ; /**< node reinstall timeout */ + int dor_mode_timeout ; /**< dead office recovery timeout */ + int dor_recovery_timeout_ext ; /**< dor recovery timeout extension */ + int uptime_period ; /**< Uptime refresh timer period */ + int online_period ; /**< locked availability refresh */ + int insv_test_period ; /**< insv test period in secs */ + int oos_test_period ; /**< oos test period in secs */ + int failsafe_shutdown_delay ; /**< seconds before failsafe reboot */ + int hostwd_update_period ; /**< expect hostwd to be updated */ + int kernwd_update_period ; /**< expect kernel watchdog to be updated */ + int autorecovery_threshold ; /**< AIO stop autorecovery threshold */ + + int debug_all ; + int debug_json ; /**< Enable jlog (json string ) output if not false */ + int debug_timer ; /**< Enable tlog (timer logs ) output if not false */ + int debug_fsm ; /**< Enable flog (fsm debug ) output if not false */ + int debug_http ; /**< Enable hlog (http logs ) output if not false */ + int debug_msg ; /**< Enable mlog (msg logs ) output if not false */ + int debug_work ; /**< Enable qlog (work Q logs ) output if not false */ + int debug_state ; /**< Enable clog (state changes) output if not false */ + int debug_alive ; /**< Enable alog (mtcAlive logs) output if not false */ + int debug_bmgmt ; /**< Enable alog (brd mgmt logs) output if not false */ + int debug_level ; /**< Enable dlog (debug levels ) output if not 0 */ + char* debug_filter ; + char* debug_event ; /**< Event signature to trace */ + bool flush ; /**< Force log flush in main loop */ + int flush_thld ; /**< Flush threshold */ + + int fit_code ; /**< fault insertion code ; nodeBase.h fit_code_enum */ + char* fit_host ; /**< the host to apply the fault insertion code to */ +} daemon_config_type ; + +daemon_config_type * daemon_get_cfg_ptr (void); +int daemon_set_cfg_option ( const char * option , int value ); + +bool ltc ( void ); + +/* returns the current log count */ +int lc (void); + +char * pt ( void ) ; /* returns pointer to the current time */ +char * _hn ( void ) ; /* returns pointer to the current host name */ +void set_hn ( char * hn ); /* set the current host name */ + +/* copy time (not date) into callers buffer */ +void gettime ( char * now_time_ptr ) ; + +extern char *program_invocation_name; +extern char *program_invocation_short_name; +#define _pn program_invocation_short_name + +#define SYSLOG_OPTION LOG_NDELAY +#define SYSLOG_FACILITY LOG_LOCAL5 + +/** Open syslog */ +#define open_syslog() \ +{ \ + openlog(program_invocation_short_name, SYSLOG_OPTION, SYSLOG_FACILITY ) ; \ +} + +/** Open syslog using filename identifier */ +#define open_syslog_args(filename) \ +{ \ + openlog(filename, SYSLOG_OPTION, SYSLOG_FACILITY ) ; \ +} + +/** Close syslog */ +#define close_syslog() \ +{ \ + closelog(); \ +} + +/* ltc represents '-f' option for running in forground and means 'log to console' */ + +/** Scheduling Latency */ +#define NSEC_TO_MSEC (1000000) +#define llog(format, args...) \ + { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Latncy: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + +/** Swerr logger macro*/ +#define slog(format, args...) { \ + if ( ltc() ) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Swerr : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Swerr : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ +} + +/** Error log macro */ +#define elog(format, args...) { \ + if ( ltc() ) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Error : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Error : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ +} + +/** Warning logger macro */ +#define wlog(format, args...) { \ + if ( ltc() ) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Warn : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Warn : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ +} + +/** Warning logger macro with throttling */ +#define wlog_throttled(cnt,max,format,args...) { \ + if ( ++cnt == 1 ) \ + { \ + if (ltc()) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Warn : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Warn : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + } \ + if ( cnt >= max ) \ + { \ + cnt = 0 ; \ + } \ +} + +/** Info logger macro with throttling */ +#define ilog_throttled(cnt,max,format,args...) { \ + if ( ++cnt == 1 ) \ + { \ + if (ltc()) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Info : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Info : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + } \ + if ( cnt >= max ) \ + { \ + cnt = 0 ; \ + } \ +} + +/** Work Queue logger macro with throttling */ +#define qlog_throttled(cnt,max,format,args...) { \ + if ( daemon_get_cfg_ptr()->debug_work ) \ + { \ + if ( ++cnt == 1 ) \ + { \ + if (ltc()) { printf ("%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + } \ + if ( cnt >= max ) \ + { \ + cnt = 0 ; \ + } \ + } \ +} + +/** Info logger macro*/ +#define ilog(format, args...) { \ + if ( ltc() ) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Info : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Info : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ +} + +/** Info logger macro*/ +#define dlog(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_level&1) \ + { \ + if ( ltc() ) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + } \ +} + +/** Debug print macro used to record a "debug log" with file, line and function. */ + +/** Info logger macro*/ +#define dlog1(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_level&2) \ + { \ + if ( ltc() ) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug2: " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + } \ +} + +/** Info logger macro*/ +#define dlog2(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_level&4) \ + { \ + if ( ltc() ) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug4: " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + } \ +} + +/** Info logger macro*/ +#define dlog3(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_level&8) \ + { \ + if ( ltc() ) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug8: " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + } \ +} + +#define blog(format, args...) { \ + if ( ltc() ) { if(daemon_get_cfg_ptr()->debug_bmgmt) printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { if(daemon_get_cfg_ptr()->debug_bmgmt) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ +} + +#define blog1(format, args...) { \ + if ( ltc() ) { if(daemon_get_cfg_ptr()->debug_bmgmt&2) printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt2: " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { if(daemon_get_cfg_ptr()->debug_bmgmt&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ +} + +#define blog2(format, args...) { \ + if ( ltc() ) { if(daemon_get_cfg_ptr()->debug_bmgmt&4) printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt4: " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { if(daemon_get_cfg_ptr()->debug_bmgmt&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ +} + +#define blog3(format, args...) { \ + if ( ltc() ) { if(daemon_get_cfg_ptr()->debug_bmgmt&8) printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt8: " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { if(daemon_get_cfg_ptr()->debug_bmgmt&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ +} + + +/* This is a progress log with a unique symbol that can be searched on |-| */ +/* This log can be used for automated log analysis */ +#define plog(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Info : " format, getpid(), lc(), _hn(), _pn, "|-|", __FILE__, __LINE__, __FUNCTION__, ##args) ; } + +#define mlog(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&1 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define mlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&2 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg2 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define mlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&4 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg4 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define mlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&8 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg8 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } + +#define jlog(format, args...) { if(daemon_get_cfg_ptr()->debug_json ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define jlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_json&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define jlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_json&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define jlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_json&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } + +#define hlog(format, args...) { if(daemon_get_cfg_ptr()->debug_http) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define hlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_http&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define hlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_http&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define hlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_http&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } + +#define alog(format, args...) { if(daemon_get_cfg_ptr()->debug_alive ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define alog1(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define alog2(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define alog3(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } + +#define qlog(format, args...) { if(daemon_get_cfg_ptr()->debug_work) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define qlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_work&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define qlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_work&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define qlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_work&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } + +#define flog(format, args...) { if(daemon_get_cfg_ptr()->debug_fsm) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: FSM : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define tlog(format, args...) { if(daemon_get_cfg_ptr()->debug_timer) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Timer: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define clog(format, args...) { if(daemon_get_cfg_ptr()->debug_state) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Change: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } + +#define log_event(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Event: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define log_stress(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Stress: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } + + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_NODELOG_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/common/msgClass.cpp b/mtce-common/cgts-mtce-common-1.0/common/msgClass.cpp new file mode 100644 index 00000000..9f785fa1 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/msgClass.cpp @@ -0,0 +1,1094 @@ +/* + * Copyright (c) 2015-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ +#include "msgClass.h" +#include "daemon_common.h" + +/** + * Creating a msgClassAddr address without port and protocol is + * to allow an easy way to check the version of an address. It can + * not be used to create a socket. + * + * @param IP address or hostname + */ +msgClassAddr::msgClassAddr(const char* address) +{ + initAddr(address, 0, 0); + this->return_status = FAIL_BAD_PARM; +} + + +/** + * @param IP address or hostname + * @param host-byte order port + * @param IP protocol + */ +msgClassAddr::msgClassAddr(const char* address, int port, int proto) +{ + this->return_status = initAddr(address, port, proto); +} + + +/* copy constructor */ +msgClassAddr::msgClassAddr(const msgClassAddr& addr) +{ + this->return_status = initAddr(addr.address_str, addr.port, addr.proto); +} + + +/** + * To be used by constructors to initialize the instance. Uses + * getaddrinfo to allocate the sockaddr and determine the IP + * version, then builds based on the IP version. Also sets + * the sockaddr's port to the network-byte order version of the + * host-byte order port value passed in. + * + * + * @param IP address or hostname + * @param host-byte order port + * @param IP protocol + * + * @return PASS if successful, failure code otherwise + */ +int msgClassAddr::initAddr(const char* address, int port, int proto) +{ + int rc; + struct addrinfo *res = NULL; + struct addrinfo hints; + this->proto = proto; + this->port = port; + this->address_str = new char[strlen(address)+1]; + this->address_numeric_string = new char[INET6_ADDRSTRLEN]; + snprintf(this->address_str, strlen(address)+1, "%s", address); + this->addr_any = false; + + memset(&hints, 0, sizeof(struct addrinfo)); + hints.ai_family = AF_INET6; + rc = getaddrinfo(this->address_str, NULL, &hints, &res); + if(rc) + { + dlog("IPv6 address resolution failed, rc=%d", rc); + if(res) + { + freeaddrinfo(res); + } + hints.ai_family = AF_INET; + rc = getaddrinfo(this->address_str, NULL, &hints, &res); + if(rc) + { + dlog("IPv4 address resolution failed, rc=%d", rc); + this->address_info = NULL; + delete[] this->address_numeric_string; + this->address_numeric_string = NULL; + return FAIL_HOSTADDR_LOOKUP; + } + } + this->address_info = res; + switch(this->getIPVersion()) + { + case AF_INET: + inet_ntop(AF_INET, &((sockaddr_in*)this->address_info->ai_addr)->sin_addr, this->address_numeric_string, INET_ADDRSTRLEN); + ((sockaddr_in*)this->address_info->ai_addr)->sin_port = htons(port); + return PASS; + break; + case AF_INET6: + inet_ntop(AF_INET6, &((sockaddr_in6*)this->address_info->ai_addr)->sin6_addr, this->address_numeric_string, INET6_ADDRSTRLEN); + ((sockaddr_in6*)this->address_info->ai_addr)->sin6_port = htons(port); + return PASS; + break; + default: + delete[] this->address_numeric_string; + this->address_numeric_string = NULL; + wlog("IP version %d not supported", this->getIPVersion()); + return FAIL_NO_IP_SUPPORT; + break; + } +} + + +/* destructor */ +msgClassAddr::~msgClassAddr() +{ + if(this->address_str) + { + delete[] this->address_str; + } + + if(this->address_numeric_string) + { + delete[] this->address_numeric_string; + } + + if(this->address_info) + { + freeaddrinfo(this->address_info); + } +} + + +/** + * @return IP address or unresolved hostname this instance was created with + */ +const char* msgClassAddr::toString() const +{ + if(this->address_info == NULL) + { + return NULL; + } + return this->address_str; +} + + +/** + * @return IP address or resolved hostname this instance was created with + */ +const char* msgClassAddr::toNumericString() const +{ + if(this->address_info == NULL) + { + return NULL; + } + if(this->address_numeric_string == NULL) + { + return NULL; + } + switch(this->getIPVersion()) + { + case AF_INET: + inet_ntop(AF_INET, &((sockaddr_in*)this->address_info->ai_addr)->sin_addr, this->address_numeric_string, INET6_ADDRSTRLEN); + break; + case AF_INET6: + inet_ntop(AF_INET6, &((sockaddr_in6*)this->address_info->ai_addr)->sin6_addr, this->address_numeric_string, INET6_ADDRSTRLEN); + break; + } + return this->address_numeric_string; +} + + +/** + * @return IP version of address + */ +int msgClassAddr::getIPVersion() const +{ + if(this->address_info == NULL) + { + return AF_UNSPEC; + } + return this->address_info->ai_family; +} + + +/** + * @return IP protocol instance was created with + */ +int msgClassAddr::getIPProtocol() const +{ + return this->proto; +} + + +/** + * @return port in host-byte order + */ +int msgClassAddr::getPort() const +{ + return this->port; +} + + +/** + * Constant accessor for sockaddr + * Intended to be used external to msgClassSock + */ +const struct sockaddr* msgClassAddr::getSockAddr() const +{ + if(this->address_info == NULL) + { + return NULL; + } + return this->address_info->ai_addr; +} + + +/** + * Non-constant accessor for sockaddr. + * Intended to be used internally by msgClassSock + */ +struct sockaddr* msgClassAddr::getSockAddr() +{ + if(this->address_info == NULL) + { + return NULL; + } + return this->address_info->ai_addr; +} + + +/* + * if(this->getIPVersion==AF_INET) + * return sizeof(sockaddr_in); + * if(this->getIPVersion==AF_INET) + * return sizeof(sockaddr_in6); + * + * @return size of the sockaddr + */ +socklen_t msgClassAddr::getSockLen() const +{ + if(this->address_info == NULL) + { + return 0; + } + return this->address_info->ai_addrlen; +} + + +/** + * Given an interface, find the hostname that resolves on this interface + * and use that to get the IP address of this interface. Will only resolve + * hostnames on the Management or Infra interfaces + * + * @param Name of the interface to get address for + * @param Character pointer to pass back address of interface + * @param Length of character array. + * @return Returns PASS if (mgmnt or infra) interface has address, FAIL otherwise + */ +int msgClassAddr::getAddressFromInterface(const char* interface, char* address, int len) +{ + int rc = FAIL; + + // before proceeding further, confirm if the interface + // is either the management interface or the infra interface. + // Mtce doesn't care about others besides these. + iface_enum interface_type = iface_enum(0); + char *infra_iface_name = NULL; + + get_infra_iface(&infra_iface_name); + if (infra_iface_name && strlen(infra_iface_name)) { + if (!strcmp(interface, infra_iface_name)) { + // requesting address for the infra interface + interface_type = INFRA_IFACE; + } + free (infra_iface_name); + } + + if (interface_type != INFRA_IFACE) { + // check if this is the mgmt interface + // otherwise return error + if (!strcmp(interface, daemon_mgmnt_iface().data())) { + interface_type = MGMNT_IFACE; + dlog ("Resolving %s as Management interface", interface); + } else { + return rc; + } + } + char hostname[MAX_HOST_NAME_SIZE+1] = {0}; + if (gethostname(hostname, + MAX_HOST_NAME_SIZE) < 0) { + elog("Failed to get system host name (err: %d)", errno); + return rc; + } + + // if hostname is localhost then resolution will give us + // the interface loopback address. Detect this case and + // return. + if (!strncmp(hostname, "localhost", 9)) { + wlog ("Detected localhost as system hostname." + " Cannot resolve IP address"); + return rc; + } + + // if it is infra then we need to determine the interface + // host name. For management interface, the system hostname + // is the intf hostname + char iface_hostname[MAX_HOST_NAME_SIZE+1] = {0}; + snprintf(iface_hostname, MAX_HOST_NAME_SIZE, + "%s%s", hostname, + (((interface_type == INFRA_IFACE)) ? "-infra" : "")); + + struct addrinfo *res = NULL; + int ret = getaddrinfo(iface_hostname, NULL, NULL, &res); + if(ret) + { + elog("IP address resolution failed for %s (err: %s)", + iface_hostname, gai_strerror(ret)); + return rc; + } + + struct addrinfo *if_address; + void *src = NULL; + for(if_address=res; ((if_address!=NULL)&&(rc!=PASS)); if_address=if_address->ai_next) + { + switch(if_address->ai_family) + { + case AF_INET: + src = (void *) &((sockaddr_in*)if_address->ai_addr)->sin_addr; + if (inet_ntop(AF_INET, src, address, len)) + rc = PASS; + + break; + case AF_INET6: + src = (void *) &((sockaddr_in6*)if_address->ai_addr)->sin6_addr; + // skip if this is a link-local address + if (IN6_IS_ADDR_LINKLOCAL(src)) + continue; + + if (inet_ntop(AF_INET6, src, address, INET6_ADDRSTRLEN)) + rc = PASS; + + break; + } + } + + freeaddrinfo(res); + return rc; +} + + + +/** + * Loop through all addresses on an interface until the first one is found that + * is either AF_INET or AF_INET6. This should get the address family of the + * first address on the interface. This is only intended to be used if there is + * only one family of addresses on the interface; otherwise, the value of address + * is undefined. Ignores the default IPv6 link-local address. + * + * @param name of interface + * @return IP version of interface + */ +int msgClassAddr::getVersionFromInterface(const char* interface) +{ + struct ifaddrs *if_address_list = NULL; + struct ifaddrs *if_address; + char ip_address[INET6_ADDRSTRLEN]; + + if(getifaddrs(&if_address_list)) + { + if(if_address_list) + { + freeifaddrs(if_address_list); + } + return FAIL; + } + + for(if_address=if_address_list; if_address!=NULL; if_address=if_address->ifa_next) + { + if((strlen(interface)==strlen(if_address->ifa_name)) && (!strncmp(interface, if_address->ifa_name, strlen(if_address->ifa_name))) && if_address->ifa_addr) + { + switch(if_address->ifa_addr->sa_family) + { + case AF_INET: + freeifaddrs(if_address_list); + return AF_INET; + case AF_INET6: + inet_ntop(AF_INET6, &((sockaddr_in6*)if_address->ifa_addr)->sin6_addr, ip_address, INET6_ADDRSTRLEN); + if(strncmp(ip_address,"fe80", 4)) + { + freeifaddrs(if_address_list); + return AF_INET6; + } + } + } + } + if(if_address_list) + { + freeifaddrs(if_address_list); + } + return AF_UNSPEC; +} + + + + + +/** + * @return source msgClassAddr + */ +const msgClassAddr* msgClassSock::get_src_addr() +{ + return this->src_addr; +} + + +/** + * @return source msgClassAddr + */ +const msgClassAddr* msgClassSock::get_dst_addr() +{ + return this->dst_addr; +} + + +/** + * @return character array representation of source IP address + */ +const char* msgClassSock::get_src_str() const +{ + return this->src_addr->toNumericString(); +} + + +/** + * @return character array representation of destination IP address + */ +const char* msgClassSock::get_dst_str() const +{ + return this->dst_addr->toNumericString(); +} + + +/** + * Calls a specialized method to create a socket, based on the given + * IP version and protocol. + * + * @param IP version to be used to created socket. Either AF_INET or AF_INET6. + * @param Protocol of socket. Supports IPPROTO_UDP and IPPROTO_RAW for now. + * @return true if created successfully, false otherwise + */ +bool msgClassSock::createSocket(int ip_version, int proto) +{ + if(proto==IPPROTO_UDP) + { + switch(ip_version) + { + case AF_INET: + return createSocketUDP4(); + case AF_INET6: + return createSocketUDP6(); + default: + elog("Failed to create UDP socket: address family %d is invalid", ip_version); + errno = EAFNOSUPPORT; + return false; + }; + } + else if(proto==IPPROTO_RAW) + { + switch(ip_version) + { + case AF_INET: + return createSocketRaw4(); + case AF_INET6: + return createSocketRaw6(); + default: + elog("Failed to create Raw socket: address family %d is invalid", ip_version); + errno = EAFNOSUPPORT; + return false; + }; + } + else + { + elog("Failed to create socket: protocol %d not supported", proto); + errno = EPFNOSUPPORT; + return false; + } +} + +/* get the current socket status, + * ok = true if the socket is initialized and seemingly working or + * ok = false if the socket has failed and needs reinitialization + **/ +bool msgClassSock::sock_ok ( void ) +{ + return (this->ok); +} + +void msgClassSock::sock_ok ( bool status ) +{ + this->ok = status ; +} + +/** + * Creates an IPv4 UDP socket + * + * @return true if created successfully, false otherwise + */ +bool msgClassSock::createSocketUDP4() +{ + int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if(sock < 0) + { + elog("Failed to create IPv4 UDP socket"); + return false; + } + else + { + this->sock = sock; + return true; + } +} + +/** + * Creates an IPv4 Raw (ping) socket + * + * @return true if created successfully, false otherwise + */ +bool msgClassSock::createSocketRaw4() +{ + int sock = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP); + if(sock < 0) + { + elog("Failed to create IPv4 Raw socket"); + return false; + } + else + { + this->sock = sock; + return true; + } +} + +/** + * Creates an IPv6 UDP socket + * + * @return true if created successfully, false otherwise + */ +bool msgClassSock::createSocketUDP6() +{ + int sock = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); + if(sock < 0) + { + elog("Failed to create IPv6 UDP socket"); + return false; + } + else + { + this->sock = sock; + return true; + } +} + +/** + * Creates an IPv6 Raw (ping) socket + * + * @return true if created successfully, false otherwise + */ +bool msgClassSock::createSocketRaw6() +{ + int sock = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6); + if(sock < 0) + { + elog("Failed to create IPv6 Raw socket"); + return false; + } + else + { + this->sock = sock; + return true; + } +} + + +/* destructor */ +msgClassSock::~msgClassSock() +{ + delete this->src_addr; + delete this->dst_addr; + if(sock >= 0) + { + close(sock); + } + if(this->interface) + { + delete[] this->interface; + } +} + + +/* default constructor */ +msgClassSock::msgClassSock() +{ + this->dst_addr = NULL; + this->interface= NULL; + this->src_addr = NULL; + this->sock = 0; + this->return_status = RETRY; + this->ok = false ; +} + + +/** + * Reads data from socket into buffer. Data is to be destined + * to the instance's dst_addr, or any address if allow_any is true, + * and once read, the src_addr flag will be set to the address that + * the data came originated from. + * + * @param Buffer to read data to + * @param maximum size of buffer + * @return number of bytes read + */ +int msgClassSock::read(char* data, int len) +{ + socklen_t socklen = this->src_addr->getSockLen(); + return recvfrom(this->sock, data, len, 0, this->src_addr->getSockAddr(), &socklen); +} + +/** + * Reads a reply from the specified socket into buffer ignoring the address + * of where it came from. + * + * @param Buffer to read data to + * @param maximum size of buffer + * @return number of bytes read + */ +int msgClassSock::readReply(char* data, int len) +{ + return recv(this->sock, data, len, 0 ); +} + +/** + * If it is sending to the "correct" port and address, then it can use + * the existing sockaddr. While that would ideally always be the case, + * in order to preserve the existing structure, there are some cases + * where the port and/or address are only specified when the data is + * being sent. In these cases, a new sockaddr is allocated to be + * used for the call to sendto. If src_addr is set, then the source + * address will be that address. + * + * @param data to be sent + * @param size of data + * @param destination IP address. If none is specified, uses instance's dst_addr + * @param destination port. If none is specified, uses instance's stored port + * @return number of bytes sent + */ +int msgClassSock::write(const char* data, int len, const char* dst, int port) +{ + int ret = 0 ; + sockaddr* dst_sock_addr = this->dst_addr->getSockAddr(); + + sockaddr_in dst_addr_in; + sockaddr_in6 dst_addr_in6; + + if((port!=0) || (dst!=NULL)) + { + ret = 1 ; + switch(this->dst_addr->getIPVersion()) + { + case AF_INET: + dst_sock_addr = (sockaddr*) &dst_addr_in; + memcpy(dst_sock_addr, this->dst_addr->getSockAddr(), sizeof(sockaddr_in)); + if(port!=0) + { + ((sockaddr_in*)dst_sock_addr)->sin_port = htons(port); + } + if(dst!=NULL) + { + ret = inet_pton(AF_INET, dst, &(((sockaddr_in*)dst_sock_addr)->sin_addr)); + } + break; + case AF_INET6: + dst_sock_addr = (sockaddr*) &dst_addr_in6; + memcpy(dst_sock_addr, this->dst_addr->getSockAddr(), sizeof(sockaddr_in6)); + if(port!=0) + { + ((sockaddr_in6*)dst_sock_addr)->sin6_port = htons(port); + } + if(dst!=NULL) + { + ret = inet_pton(AF_INET6, dst, &(((sockaddr_in6*)dst_sock_addr)->sin6_addr)); + } + break; + default: + slog ("invalid AF network family (%d)\n", this->dst_addr->getIPVersion()); + return (-1); + } + if ( ret != 1 ) + { + wlog ("write requires address resolution; inet_pton returned %d for ip '%s'\n", ret, dst ); + return (-1); + } + } + + ret = sendto(this->sock, data, len, 0, dst_sock_addr, this->dst_addr->getSockLen()); + if(ret<0) + { + elog("Failed to send with errno=%d", errno); + } + return ret; +} + +/** + * Given an Rx socket, send a message to the last address that a messaged has received from. + * + * @param data to be sent + * @param size of data + * @param destination IP address. If none is specified, uses instance's dst_addr + * @param destination port. If none is specified, uses instance's stored port + * @return number of bytes sent + */ +int msgClassSock::reply(const msgClassSock* source, const char* data, int len) +{ + return write(data, len, source->get_src_str()); +} + + +/* + * This is to be used when code needs to know the actual file descriptor + * of socket, for example to poll for received data. + * + * @return the file descriptor as an int + */ +int msgClassSock::getFD() +{ + return this->sock; +} + + +/** + * Forces packets to be sent from interface of Tx socket + * + * @return PASS if successful, failure code otherwise + */ +int msgClassSock::interfaceBind() +{ + struct ifreq ifr; + if(this->interface) + { + memset(&ifr, 0, sizeof(ifreq)); + snprintf(ifr.ifr_name, IFNAMSIZ, "%s", this->interface); + if(setsockopt(this->sock, SOL_SOCKET, SO_BINDTODEVICE, (void *)&ifr, sizeof(ifr))) + { + elog("Failed to bind socket to interface (%d:%m)\n", errno); + return FAIL_SOCKET_BIND; + } + return PASS; + } + return FAIL_BAD_CASE; +} + + +int msgClassSock::setPriortyMessaging( const char * iface ) +{ + int flags = 0 ; + int result = 0 ; + + switch( this->dst_addr->getIPVersion()) + { + case AF_INET: + + ilog ("Setting %s with IPv4 priority messaging\n", iface ); + flags = IPTOS_CLASS_CS6; + result = setsockopt( this->sock, IPPROTO_IP, IP_TOS, + &flags, sizeof(flags) ); + if( 0 > result ) + { + elog ( "Failed to set socket send priority for interface (%s) (%d:%m)\n", iface, errno ); + return( FAIL_SOCKET_OPTION ); + } + break ; + + case AF_INET6: + + ilog ("Setting %s with IPv6 priority messaging\n", iface ); + flags = IPTOS_CLASS_CS6; + result = setsockopt( this->sock, IPPROTO_IPV6, IPV6_TCLASS, &flags, sizeof(flags) ); + if( 0 > result ) + { + elog ( "Failed to set socket send priority for interface (%s) (%d:%m)\n", iface, errno ); + return( FAIL_SOCKET_OPTION ); + } + break ; + } + + flags = 6; + result = setsockopt( this->sock, SOL_SOCKET, SO_PRIORITY, &flags, sizeof(flags) ); + if( 0 > result ) + { + elog ( "Failed to set socket send priority for interface (%s) (%d:%m)\n", iface, errno ); + return( FAIL_SOCKET_OPTION ); + } + return(PASS); +} + + +/* Set socket memory size */ +int msgClassSock::setSocketMemory( const char * iface, const char * name, int size ) +{ + /* don't use whats on the stack ; create a local var */ + int _size = size ; + int rx_buff_memory = 0 ; + int len = sizeof(rx_buff_memory); + int result = setsockopt( this->sock, SOL_SOCKET, SO_RCVBUF, &_size, sizeof(_size) ); + if( 0 > result ) + { + elog ( "failed to set socket memory size for '%s' (%d:%m)\n", iface, errno ); + return( FAIL_SOCKET_OPTION ); + } + else + { + getsockopt(this->sock , SOL_SOCKET, SO_RCVBUF, &rx_buff_memory, (socklen_t*)&len); + } + /*********************************************************************************** + * From setsockopt SO_RCVBUF man page. + * + * Sets or gets the maximum socket receive buffer in bytes. The + * kernel doubles this value (to allow space for bookkeeping + * overhead) when it is set using setsockopt(2), and this doubled + * value is returned by getsockopt(2). The default value is set + * by the /proc/sys/net/core/rmem_default file, and the maximum + * allowed value is set by the /proc/sys/net/core/rmem_max file. + * The minimum (doubled) value for this option is 256. + ************************************************************************************ + * Note: Value is divided by 2 because when you set the SO_RCVBUF the + * kernel doubles the value for book keeping. + ************************************************************************************/ + ilog ("Setting %s %s to %d bytes\n", iface, name, (rx_buff_memory/2) ); + return(PASS); +} + +/* Set the socket as non-blocking */ +int msgClassSock::setSocketNonBlocking ( void ) +{ + int on = 1; + + if ( 0 > ioctl(this->sock, FIONBIO, (char *)&on)) + { + elog ("unable to set socket to non-blocking [%d:%m]\n", errno ); + return FAIL_SOCKET_NOBLOCK; + } + return PASS; +} + +/** + * Creates a socket to be used to receive data on a given address/port. + * First, it creates the destination msgClassAddr, which is the address + * that is is listening on, then it stores the interface if it is to be + * used, and then calls the Rx socket initialization function, which + * does the majority of the socket initialization. The return_status is set to PASS if it succeeds, or the failure code elsewise. + * + * @param IP address or hostname + * @param host-byte order port + * @param IP protocol + * @param name of interface to bind to, or null otherwise + * @param Whether to listen on all addresses instead + * @param Whether the address is multicast + */ +msgClassRx::msgClassRx(const char* address, int port, int proto, const char* interface, bool allow_any, bool is_multicast) +{ + this->dst_addr = new msgClassAddr(address, port, proto); + this->src_addr = new msgClassAddr(address, port, proto); + if(interface) + { + ilog ("Creating %s socket on port %d with address: %s\n", interface, port, address); + this->interface = new char[strlen(interface)+1]; + snprintf(this->interface, strlen(interface)+1, "%s", interface); + } + else + { + ilog ("Creating localhost socket on port %d with address: %s\n", port, address); + this->interface = NULL; + } + this->return_status = initSocket(allow_any, is_multicast); +} + + +/** + * Creates a socket to be used to receive data on a given address/port. + * First, it copies the destination msgClassAddr, which is the address + * that is is listening on, then it stores the interface if it is to be + * used, and then calls the Rx socket initialization function, which + * does the majority of the socket initialization. The return_status + * is set to PASS if it succeeds, or the failure code elsewise. + * + * @param destination address + * @param name of interface to bind to, or null otherwise + * @param Whether to listen on all addresses instead + * @param Whether the address is multicast + */ +msgClassRx::msgClassRx(const msgClassAddr& addr, const char* interface, bool allow_any, bool is_multicast) +{ + this->dst_addr = new msgClassAddr(addr); + this->src_addr = new msgClassAddr(addr.toString()); + if(interface) + { + this->interface = new char[strlen(interface)+1]; + snprintf(this->interface, strlen(interface)+1, "%s", interface); + } + else + { + this->interface = NULL; + } + this->return_status = initSocket(allow_any, is_multicast); +} + + +/** + * Creates socket, sets necessary socket options for Rx socket, + * and binds to the correct address and interface if applicable + * + * @param whether to bind to all sockets of that family. + * @param Whether the address is multicast + * @return PASS if successful, failure code otherwise + */ +int msgClassRx::initSocket(bool allow_any, bool is_multicast) +{ + int on = 1; + char address[INET6_ADDRSTRLEN]; + if(createSocket(this->dst_addr->getIPVersion(), this->dst_addr->getIPProtocol()) == false) + { + return FAIL_SOCKET_CREATE; + } + if(setsockopt(this->sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) + { + return FAIL; + } + + if(is_multicast) + { + switch(this->dst_addr->getIPVersion()) + { + case AF_INET: + { + struct ip_mreqn mreq; + memset(&mreq, 0, sizeof(mreq)); + mreq.imr_multiaddr.s_addr = ((sockaddr_in*)this->dst_addr->getSockAddr())->sin_addr.s_addr; + mreq.imr_ifindex = if_nametoindex(this->interface); + if(setsockopt(this->sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq))) + { + elog("Failed to set multicast address (%d:%m)\n", errno); + return FAIL_SOCKET_OPTION; + } + break; + } + case AF_INET6: + { + struct ipv6_mreq mreq; + memset(&mreq, 0, sizeof(mreq)); + mreq.ipv6mr_multiaddr = ((sockaddr_in6*)this->dst_addr->getSockAddr())->sin6_addr; + mreq.ipv6mr_interface = if_nametoindex(this->interface); + if(setsockopt(this->sock, IPPROTO_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq))) + { + elog("Failed to set multicast address (%d:%m)\n", errno); + return FAIL_SOCKET_OPTION; + } + break; + } + } + } + else if(allow_any) + { + switch(this->dst_addr->getIPVersion()) + { + case AF_INET: + ((sockaddr_in*)this->dst_addr->getSockAddr())->sin_addr.s_addr = htonl(INADDR_ANY); + break; + case AF_INET6: + ((sockaddr_in6*)this->dst_addr->getSockAddr())->sin6_addr = in6addr_any; + break; + } + } + else if(this->interface) + { + msgClassAddr::getAddressFromInterface(this->interface, address, INET6_ADDRSTRLEN); + switch(this->dst_addr->getIPVersion()) + { + case AF_INET: + inet_pton(AF_INET, address, &((sockaddr_in*)this->dst_addr->getSockAddr())->sin_addr); + break; + case AF_INET6: + inet_pton(AF_INET6, address, &((sockaddr_in6*)this->dst_addr->getSockAddr())->sin6_addr); + break; + } + } + + if(bind(this->sock, this->dst_addr->getSockAddr(), this->dst_addr->getSockLen())) + { + elog("Failed to bind socket to address (%d:%m)\n", errno); + return FAIL_SOCKET_BIND; + } + + return (this->setSocketNonBlocking ()); +} + + + + +/** + * Creates a socket to be used to transmit data to a given address/port. + * First, it creates the destination msgClassAddr, then stores the interface + * if it is to be used, and then calls the Tx socket initialization function, + * which does the majority of the socket initialization. The return_status is set to PASS if it succeeds, or the failure code elsewise. + * + * @param IP address or hostname + * @param host-byte order port + * @param IP protocol + * @param name of interface to bind to, or null otherwise + */ +msgClassTx::msgClassTx(const char* address, int port, int proto, const char* interface) +{ + this->dst_addr = new msgClassAddr(address, port, proto); + this->src_addr = new msgClassAddr(address, port, proto); + if(interface) + { + ilog ("Creating %s socket on port %d with address: %s\n", interface, port, address); + this->interface = new char[strlen(interface)+1]; + snprintf(this->interface, strlen(interface)+1, "%s", interface); + } + else + { + ilog ("Creating socket on port %d with address: %s\n", port, address); + this->interface = NULL; + } + this->return_status = initSocket(); +} + + +/** + * Creates a socket to be used to transmit data to a given address/port. + * First, it copies the destination msgClassAddr, then stores the interface + * if it is to be used, and then calls the Tx socket initialization function, + * which does the majority of the socket initialization. The return_status is set to PASS if it succeeds, or the failure code elsewise. + * + * @param destination address + * @param name of interface to bind to, or null otherwise + */ +msgClassTx::msgClassTx(const msgClassAddr& addr, const char* interface) +{ + this->dst_addr = new msgClassAddr(addr); + this->src_addr = new msgClassAddr(addr); + if(interface) + { + this->interface = new char[strlen(interface)+1]; + snprintf(this->interface, strlen(interface)+1, "%s", interface); + } + else + { + this->interface = NULL; + } + this->return_status = initSocket(); +} + + +/** + * Creates socket, sets necessary socket options for Tx socket, + * and binds to the correct address and interface if applicable + * + * @return PASS if successful, failure code otherwise + */ +int msgClassTx::initSocket() +{ + int on = 1; + char address[INET6_ADDRSTRLEN]; + if(createSocket(this->dst_addr->getIPVersion(), this->dst_addr->getIPProtocol()) == false) + { + return FAIL_SOCKET_CREATE; + } + if(setsockopt(this->sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) + { + return FAIL; + } + if(this->interface) + { + msgClassAddr::getAddressFromInterface(this->interface, address, INET6_ADDRSTRLEN); + dlog("Address of interface %s is %s", this->interface, address); + delete this->src_addr; + this->src_addr = new msgClassAddr(address); + if(bind(this->sock, this->src_addr->getSockAddr(), this->src_addr->getSockLen())) + { + elog("Failed to bind socket to address (%d:%m)\n", errno); + return FAIL_SOCKET_BIND; + } + } + return PASS; +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/msgClass.h b/mtce-common/cgts-mtce-common-1.0/common/msgClass.h new file mode 100644 index 00000000..cbeee29f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/msgClass.h @@ -0,0 +1,216 @@ +#ifndef __INCLUDE_MSGCLASS_H__ +#define __INCLUDE_MSGCLASS_H__ + +/* + * Copyright (c) 2015-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nodeBase.h" +#include "returnCodes.h" +#include "nodeUtil.h" + + +/* + * The msgClassAddr class is a version-independent representation + * of an IP address with TLP and port specified. Note that only UDP sockets + * are supported at this point. + */ +class msgClassAddr +{ +public: + msgClassAddr(const char* address, int port, int proto); + msgClassAddr(const msgClassAddr& addr); + msgClassAddr(const char* address); + ~msgClassAddr(); + int getAddress(char* address, int addr_len, int* port, int* proto) const; + int setAddress(char* address, int port, int proto); + const char* toString() const; + const char* toNumericString() const; + int getIPVersion() const; + int getIPProtocol() const; + int getPort() const; + const struct sockaddr* getSockAddr() const; + struct sockaddr* getSockAddr(); + socklen_t getSockLen() const; + msgClassAddr& operator= ( const msgClassAddr &rhs ) + { + this->return_status = initAddr(rhs.address_str, rhs.port, rhs.proto); + return *this; + } + /** + * Is to be set to allow the status of the last operation to be checked. + * Useful in checking if an instance was initialized correctly. + */ + int return_status; + + /** + * Intended to mark the address as being all, equivalent to either + * "0.0.0.0" or "::" depending on the address family. Is to allow + * a normal address to be passed in to determine the IP family, but + * uses that value instead of the passed in value. + **/ + bool addr_any; + static int getAddressFromInterface(const char* interface, char* address, int len); + static int getVersionFromInterface(const char* interface); + +protected: + /** + * Stores the protocol of the address. + */ + int proto; + + /** + * Stores the port in host-byte order. + */ + int port; + + /** + * Stores the address string used to create this msgClassAddr instance. + * If it is resolved to an IP address, this can be used to access the + * original unresolved value. + */ + char* address_str; + + /** + * Stores a numeric representation of the IP address of the address. + * This differs from address_str if address_str was a hostname that + * was resolved. + */ + char* address_numeric_string; + + /** + * Stores the addrinfo struct created by getaddrinfo. This contains + * both the IP version and the sockaddr. + */ + struct addrinfo* address_info; + +private: + int initAddr(const char* address, int port, int proto); + +}; + +/* + * The msgClassSock class is an abstraction of the inet sockets used + * by maintenance,which are not dependent on the socket protocol. This is needed + * to allow both IPv4 and IPv6 addresses to be used on the management network. + */ +class msgClassSock +{ +public: + ~msgClassSock(); + int read(char* data, int len); + int write(const char* data, int len, const char* dest=NULL, int port=0); + int reply(const msgClassSock* source, const char* data, int len); + int readReply(char* data, int len); + int getFD(); + int interfaceBind(); + int setPriortyMessaging( const char * iface ); + int setSocketMemory ( const char * iface, const char * name, int rmem ); + int setSocketNonBlocking ( void ); + + const msgClassAddr* get_src_addr(); + const msgClassAddr* get_dst_addr(); + const char* get_src_str() const; + const char* get_dst_str() const; + + /* get the current socket status, + * ok = true if the socket is initialized and seemingly working or + * ok = false if the socket has failed and needs reinitialization + **/ + bool sock_ok ( void ); + void sock_ok ( bool status ); + + /** + * Is to be set to allow the status of the last operation to be checked. + * Useful in checking if an instance was initialized correctly. + */ + int return_status; + const char* toString(); + +protected: + msgClassSock(); + + /** + * Stores the file descriptor of the allocated socket. Should + * only be accessed via the getFD() accessor. + */ + int sock; + + bool createSocket(int ip_version, int proto); + + /** + * Source address of outgoing packets. May be specified for Tx sockets, + * and is set by Rx sockets when receiving packets. + */ + msgClassAddr* src_addr; + + /** + * Address that this socket is to be either transmitting to or receiving + * on, depending if it is Rx or Tx. + */ + msgClassAddr* dst_addr; + + /** + * If the socket is to be bound to a specific interface, that the + * interface name is stores here; otherwise, this is NULL. + */ + char* interface; + + /** + * Set true when interface is initialized and working + * or false if it is not initialized or failed. + * This boolean can be used in a main loop to decide + * if a socket needs to be re initialized or not. + */ + bool ok ; + +private: + bool createSocketUDP4(); + bool createSocketUDP6(); + bool createSocketRaw4(); + bool createSocketRaw6(); +}; + +/* + * A socket for receiving data, this replaces the Rx sockets + * that are tied to inet. The dst_addr is set to the address to + * be receiving packets on when reading. + */ +class msgClassRx : public msgClassSock +{ +public: + msgClassRx(const char* address, int port, int proto, const char* interface=NULL, bool allow_any=false, bool is_multicast=false); + msgClassRx(const msgClassAddr& addr, const char* interface=NULL, bool allow_any=false, bool is_multicast=false); +private: + int initSocket(bool allow_any, bool is_multicast=false); +}; + +/* + * A socket for receiving data, this replaces the Tx sockets + * that are tied to inet. The dst_addr is set to the address to + * be sending packets to when writing. + */ +class msgClassTx : public msgClassSock +{ +public: + msgClassTx(const char* address, int port, int proto, const char* interface=NULL); + msgClassTx(const msgClassAddr& addr, const char* interface=NULL); +private: + int initSocket(); +}; + +#endif /* __INCLUDE_MSGCLASS_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/common/nlEvent.cpp b/mtce-common/cgts-mtce-common-1.0/common/nlEvent.cpp new file mode 100644 index 00000000..fafb00f9 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nlEvent.cpp @@ -0,0 +1,311 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform netlink listener event support for maintenance + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "nodeBase.h" +#include "nodeUtil.h" + +int get_netlink_events_throttle = 0 ; +int get_netlink_events ( int nl_socket , std::list & links_gone_down, + std::list & links_gone_up ) +{ + char buf[4096]; + char name[IF_NAMESIZE]; + + int len ; + int ret = 0; + + struct sockaddr_nl sa; + struct iovec iov = { buf, sizeof (buf) }; + struct msghdr msg = { &sa, sizeof (sa), &iov, 1, NULL, 0, 0 }; + struct nlmsghdr *h; + struct ifinfomsg *ifi; + /* struct ifaddrmsg * ifa ; used for addr change events */ + + links_gone_up.clear(); + links_gone_down.clear(); + + len = recvmsg (nl_socket, &msg, 0); + if (len < 0) + { + /* Socket non-blocking so bail out once we have read everything */ + if ( (errno == EWOULDBLOCK) || (errno == EAGAIN)) + { + return ret ; + } + + /* Anything else is an error */ + elog ("failed netlink recvmsg (%d:%d) (%d:%s)\n", nl_socket, len, errno, strerror(errno)); + return len; + } + + if (len == 0) + { + wlog ("No netlink data read_netlink: EOF\n"); + } + + /* Handle all the messages from 'recvmsg' */ + h = (struct nlmsghdr *) &buf[0] ; + for ( ; NLMSG_OK (h,(unsigned int)len); h=NLMSG_NEXT (h,len)) + { + /* ignore address change events */ + if (h->nlmsg_type == RTM_NEWADDR ) + { + +#ifdef RTM_NEWADDR_SUPPORTED + ifa = (struct ifaddrmsg *) NLMSG_DATA (nlh); + + struct rtattr * rth = IFA_RTA (ifa); + int rtl = IFA_PAYLOAD (nlh); + for (;rtl && RTA_OK (rth, rtl); rth = RTA_NEXT (rth,rtl)) + { + char name[IFNAMSIZ]; + uint32_t ipaddr; + + if (rth->rta_type != IFA_LOCAL) continue; + + ipaddr = * ((uint32_t *)RTA_DATA(rth)); + ipaddr = htonl(ipaddr); + + fprintf (stdout,"%s is now %X\n",if_indextoname(ifa->ifa_index,name),ipaddr); + } +#else + dlog ("unsupported netlink event: RTM_NEWADDR\n"); + continue ; +#endif + } + + /* Finish reading */ + if (h->nlmsg_type == NLMSG_NOOP ) + { + ilog ("netlink message: Nothing to read\n"); + return ret; + } + + /* Finish reading */ + if (h->nlmsg_type == NLMSG_DONE) + { + ilog ("netlink message: No more messages\n"); + return ret; + } + + /* Message is some kind of error */ + if (h->nlmsg_type == NLMSG_ERROR) + { + wlog ("netlink message: indicates error\n"); + return -1; + } + + ifi = (ifinfomsg*) NLMSG_DATA (h); + memset ( name, 0 , IF_NAMESIZE ); + if ( ifi->ifi_index ) + { + if_indextoname(ifi->ifi_index, name); + if (ifi->ifi_flags & IFF_RUNNING) + { + /* if 'up' then remove interface from 'down' list and add it to the 'up' list */ + links_gone_down.remove(name); + links_gone_up.push_front(name); + dlog ( "%s is up and running \n", name ); + } + else + { + if ( ifi->ifi_flags & IFF_UP ) + { + dlog ("%s is admin:up but oper:down\n", name ); + } + else + { + dlog ("%s is admin:down and oper:down\n", name ); + } + + /* if 'down' then remove interface from 'up' list and add it to the 'down' list */ + links_gone_up.remove(name); + links_gone_down.push_front(name); + } + get_netlink_events_throttle = 0 ; + } + else + { + wlog_throttled (get_netlink_events_throttle, 100, "got netlink event for unknown interface index\n"); + } + ret++ ; + } + links_gone_up.unique(); + links_gone_down.unique(); + + return ret; +} + + +void log_link_events ( int netlink_sock, + int ioctl_sock, + const char * mgmnt_iface_ptr, + const char * infra_iface_ptr, + bool & mgmnt_link_up_and_running, + bool & infra_link_up_and_running) +{ + std::list links_gone_down ; + std::list links_gone_up ; + std::list::iterator iter_curr_ptr ; + dlog3 ("logging for interfaces %s and %s\n", mgmnt_iface_ptr, infra_iface_ptr); + if ( get_netlink_events ( netlink_sock, links_gone_down, links_gone_up )) + { + bool running = false ; + if ( !links_gone_down.empty() ) + { + dlog3 ("%ld links have dropped\n", links_gone_down.size() ); + + /* Look at the down list */ + for ( iter_curr_ptr = links_gone_down.begin(); + iter_curr_ptr != links_gone_down.end() ; + iter_curr_ptr++ ) + { + dlog3 ( "downed link: %s (running:%d:%d)\n", + iter_curr_ptr->c_str(), + mgmnt_link_up_and_running, + infra_link_up_and_running ); + + if ( !strcmp (mgmnt_iface_ptr, iter_curr_ptr->data())) + { + if ( mgmnt_link_up_and_running == true ) + { + mgmnt_link_up_and_running = false ; + wlog ("Mgmnt link %s is down\n", mgmnt_iface_ptr ); + } + } + if ( !strcmp (infra_iface_ptr, iter_curr_ptr->data())) + { + if ( infra_link_up_and_running == true ) + { + infra_link_up_and_running = false ; + wlog ("Infra link %s is down\n", infra_iface_ptr ); + } + } + + if ( get_link_state ( ioctl_sock, iter_curr_ptr->data(), &running ) == PASS ) + { + dlog ("%s is down (oper:%s)\n", iter_curr_ptr->c_str(), running ? "up" : "down" ); + } + else + { + wlog ("%s is down (driver query failed)\n", iter_curr_ptr->c_str() ); + } + } + } + if ( !links_gone_up.empty() ) + { + dlog3 ("%ld links have recovered\n", links_gone_up.size()); + + /* Look at the up list */ + for ( iter_curr_ptr = links_gone_up.begin(); + iter_curr_ptr != links_gone_up.end() ; + iter_curr_ptr++ ) + { + dlog3 ( "recovered link: %s (running:%d:%d)\n", + iter_curr_ptr->c_str(), + mgmnt_link_up_and_running, + infra_link_up_and_running ); + + if ( !strcmp (mgmnt_iface_ptr, iter_curr_ptr->data())) + { + mgmnt_link_up_and_running = true ; + wlog ("Mgmnt link %s is up\n", mgmnt_iface_ptr ); + } + if ( !strcmp (infra_iface_ptr, iter_curr_ptr->data())) + { + infra_link_up_and_running = true ; + wlog ("Infra link %s is up\n", infra_iface_ptr ); + } + + if ( get_link_state ( ioctl_sock, iter_curr_ptr->data(), &running ) == PASS ) + { + dlog ("%s is up (oper:%s)\n", + iter_curr_ptr->c_str(), + running ? "up" : "down" ); + } + else + { + wlog ("%s is up (driver query failed)\n", iter_curr_ptr->c_str() ); + } + } + } + } +} + + + + +/* Open a netlink listener socket and return that socket id. + * Return 0 on create or bind failure */ +int open_netlink_socket ( int groups ) +{ + struct sockaddr_nl addr; + int on = 1 ; + ilog ( "NLMon Groups: %d\n", groups ) ; + + int nl_socket = socket (AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (nl_socket < 0) + { + elog ("Failed to open netlink socket (%d:%s)\n", errno, strerror(errno)); + return (0); + } + + else if ( 0 > ioctl( nl_socket, FIONBIO, (char *)&on)) + { + elog ("failed to set 'netlink monitor' socket non-blocking (%d:%m)\n", errno ); + close (nl_socket); + nl_socket = 0 ; + } + else + { + memset ((void *) &addr, 0, sizeof (addr)); + + addr.nl_family = AF_NETLINK; + addr.nl_pid = getpid (); + /* addr.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR; */ + addr.nl_groups = groups ; /* allow the caller to specify the groups */ + + if (bind (nl_socket, (struct sockaddr *) &addr, sizeof (addr)) < 0) + { + elog ( "Failed to bind netlink socket (%d:%s)\n", errno, strerror(errno)); + close (nl_socket); + nl_socket = 0 ; + } + } + return (nl_socket); +} + +void close_netlink_socket ( int socket ) +{ + if ( socket ) + { + close (socket); + } +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/nlEvent.h b/mtce-common/cgts-mtce-common-1.0/common/nlEvent.h new file mode 100644 index 00000000..a1d4e872 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nlEvent.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform netlink listener event support for maintenance + */ + +void close_netlink_socket ( int socket ); +int open_netlink_socket ( int groups ); +int get_netlink_events ( int nl_socket , + std::list & links_gone_down, + std::list & links_gone_up ); +void log_link_events ( int netlink_sock, + int ioctl_sock, + const char * mgmnt_iface_ptr, + const char * infra_iface_ptr, + bool & mgmnt_link_up_and_running, + bool & infra_link_up_and_running); + diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeBase.cpp b/mtce-common/cgts-mtce-common-1.0/common/nodeBase.cpp new file mode 100755 index 00000000..3f70200a --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeBase.cpp @@ -0,0 +1,707 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform "Node Base" Utility + */ + +#include +#include +#include + +using namespace std; + +#include "nodeBase.h" + +/** Maintenance command request to heartbeat servcie message header */ +const char mtc_hbs_cmd_req_header [MSG_HEADER_SIZE] = {"cgts mtc hbs cmd:"}; + +/** Maintenance Request Message header content */ +const char mtc_cmd_req_msg_header [MSG_HEADER_SIZE] = {"cgts mtc cmd req:"}; + +/** Maintenance Response Message header content */ +const char mtc_cmd_rsp_msg_header [MSG_HEADER_SIZE] = {"cgts mtc cmd rsp:"}; + +/** Maintenance Reply Message header content */ +const char mtc_msg_rep_msg_header [MSG_HEADER_SIZE] = {"cgts mtc rep msg:"}; + +/** Maintenance Log message header */ +const char mtc_log_msg_hdr [MSG_HEADER_SIZE] = {"cgts mtc log msg:"}; + +/** Maintenance Message header content */ +const char mtc_compute_msg_header [MSG_HEADER_SIZE] = {"cgts mtc message:"}; + +const char mtc_event_hdr [MSG_HEADER_SIZE] = {"mtce event msg :"}; + +const char mtc_pmond_pulse_header[MSG_HEADER_SIZE] = {"pmond alive pulse"}; + +const char mtc_heartbeat_event_hdr[MSG_HEADER_SIZE] = {"heart beat event:"}; + +const char mtc_heartbeat_loss_hdr [MSG_HEADER_SIZE] = {"heart beat loss :"}; +const char mtc_heartbeat_ready_hdr[MSG_HEADER_SIZE] = {"heart beat ready:"}; +const char mtc_loopback_hdr [MSG_HEADER_SIZE] = {"mtc loopback msg:"}; + +const char * get_loopback_header (void) { return mtc_loopback_hdr;} +const char * get_hbs_cmd_req_header (void) { return mtc_hbs_cmd_req_header ;} +const char * get_cmd_req_msg_header (void) { return mtc_cmd_req_msg_header ;} +const char * get_cmd_rsp_msg_header (void) { return mtc_cmd_rsp_msg_header ;} +const char * get_compute_msg_header (void) { return mtc_compute_msg_header ;} +const char * get_pmond_pulse_header (void) { return mtc_pmond_pulse_header ;} +const char * get_mtc_log_msg_hdr (void) { return mtc_log_msg_hdr ;} +const char * get_mtce_event_header (void) { return mtc_event_hdr ;} +const char * get_heartbeat_ready_header(void) { return mtc_heartbeat_ready_hdr;} +const char * get_heartbeat_loss_header (void) { return mtc_heartbeat_loss_hdr ;} +const char * get_heartbeat_event_header(void) { return mtc_heartbeat_event_hdr;} +const char * get_msg_rep_msg_header (void) { return mtc_msg_rep_msg_header ;} + +int print_mtc_message ( mtc_message_type * msg_ptr ) +{ + if ( msg_ptr->buf[0] ) + { + mlog1 ("Message: %d.%d.%d %x:%x:%x.%x.%x.%x [%s][%s]\n", + msg_ptr->ver, + msg_ptr->rev, + msg_ptr->res, + msg_ptr->cmd, + msg_ptr->num, + msg_ptr->parm[0], + msg_ptr->parm[1], + msg_ptr->parm[2], + msg_ptr->parm[3], + msg_ptr->hdr, + &msg_ptr->buf[0]); + } + else + { + mlog1 ("Message: %d.%d.%d %x:%x:%x.%x.%x.%x [%s]\n", + msg_ptr->ver, + msg_ptr->rev, + msg_ptr->res, + msg_ptr->cmd, + msg_ptr->num, + msg_ptr->parm[0], + msg_ptr->parm[1], + msg_ptr->parm[2], + msg_ptr->parm[3], + msg_ptr->hdr); + } + return (PASS); +} + +int print_mtc_message ( mtc_message_type * msg_ptr , bool force ) +{ + if ( force == true ) + { + if ( msg_ptr->buf[0] ) + { + ilog ("Message: %d.%d.%u %x:%x:%x.%x.%x.%x [%s][%s]\n", + msg_ptr->ver, + msg_ptr->rev, + msg_ptr->res, + msg_ptr->cmd, + msg_ptr->num, + msg_ptr->parm[0], + msg_ptr->parm[1], + msg_ptr->parm[2], + msg_ptr->parm[3], + msg_ptr->hdr, + &msg_ptr->buf[0]); + } + else + { + mlog1("Message: %d.%d.%d %x:%x:%x.%x.%x.%x [%s]\n", + msg_ptr->ver, + msg_ptr->rev, + msg_ptr->res, + msg_ptr->cmd, + msg_ptr->num, + msg_ptr->parm[0], + msg_ptr->parm[1], + msg_ptr->parm[2], + msg_ptr->parm[3], + msg_ptr->hdr); + } + } + else + { + print_mtc_message ( msg_ptr ); + } + return (PASS); +} + +const char * get_mtcNodeCommand_str ( int cmd ) +{ + switch ( cmd ) + { + case MTC_CMD_NONE: return ("none" ); + + /* general action command */ + case MTC_CMD_LOOPBACK: return ("loopback"); + case MTC_CMD_REBOOT: return ("reboot"); + case MTC_CMD_WIPEDISK: return ("wipedisk"); + case MTC_CMD_RESET: return ("reset"); + case MTC_MSG_MTCALIVE: return ("mtcAlive msg"); + case MTC_REQ_MTCALIVE: return ("mtcAlive req"); + case MTC_MSG_LOCKED: return ("locked msg"); + case MTC_CMD_LAZY_REBOOT: return ("lazy reboot"); + + /* goenabled commands and messages */ + case MTC_MSG_MAIN_GOENABLED: return ("goEnabled main msg"); + case MTC_MSG_SUBF_GOENABLED: return ("goEnabled subf msg"); + case MTC_REQ_MAIN_GOENABLED: return ("goEnabled main req"); + case MTC_REQ_SUBF_GOENABLED: return ("goEnabled subf req"); + case MTC_MSG_MAIN_GOENABLED_FAILED: return ("goEnabled main failed"); + case MTC_MSG_SUBF_GOENABLED_FAILED: return ("goEnabled subf failed"); + + /* start and stop services commands and messages */ + case MTC_CMD_STOP_CONTROL_SVCS: return ("stop controller host services"); + case MTC_CMD_STOP_COMPUTE_SVCS: return ("stop compute host services"); + case MTC_CMD_STOP_STORAGE_SVCS: return ("stop storage host services"); + case MTC_CMD_START_CONTROL_SVCS: return ("start controller host services"); + case MTC_CMD_START_COMPUTE_SVCS: return ("start compute host services"); + case MTC_CMD_START_STORAGE_SVCS: return ("start storage host services"); + case MTC_CMD_HOST_SVCS_RESULT: return ("host services result"); + + /* Heartbeat Control Commands */ + case MTC_RESTART_HBS: return("heartbeat restart"); + case MTC_BACKOFF_HBS: return("heartbeat backoff"); + case MTC_RECOVER_HBS: return("heartbeat recover"); + + /* heartbeat service messages */ + case MTC_EVENT_HEARTBEAT_READY: return("heartbeat ready event"); + case MTC_EVENT_HEARTBEAT_LOSS: return("heartbeat loss"); + case MTC_EVENT_HEARTBEAT_RUNNING: return("heartbeat running"); + case MTC_EVENT_HEARTBEAT_ILLHEALTH: return("heartbeat illhealth"); + case MTC_EVENT_HEARTBEAT_STOPPED: return("heartbeat stopped"); + case MTC_EVENT_HEARTBEAT_DEGRADE_SET:return("heartbeat degrade set"); + case MTC_EVENT_HEARTBEAT_MINOR_CLR: return("heartbeat minor clear"); + case MTC_EVENT_HEARTBEAT_DEGRADE_CLR:return("heartbeat degrade clear"); + case MTC_EVENT_HEARTBEAT_MINOR_SET: return("heartbeat minor set"); + + /* degrade events */ + case MTC_DEGRADE_RAISE: return ("degrade raise"); + case MTC_DEGRADE_CLEAR: return ("degrade clear"); + + /* general events */ + case MTC_EVENT_LOOPBACK: return ("loopback"); + case MTC_EVENT_MONITOR_READY: return ("monitor ready event"); + case MTC_EVENT_GOENABLE_FAIL: return ("goenable fail"); + case MTC_EVENT_HOST_STALLED: return("host stalled event"); + + /* pmon events */ + case MTC_EVENT_PMON_CLEAR: return("pmon clear"); + case MTC_EVENT_PMON_CRIT: return("pmon critical event"); + case MTC_EVENT_PMON_MAJOR: return("pmon major event"); + case MTC_EVENT_PMON_MINOR: return("pmon minor event"); + case MTC_EVENT_PMON_LOG: return("pmon log"); + case MTC_EVENT_PMOND_RAISE: return("pmon raise"); + + /* rmon events */ + case MTC_EVENT_RMON_READY: return("rmon ready event"); + case MTC_EVENT_RMON_CLEAR: return("rmon clear"); + case MTC_EVENT_RMON_CRIT: return("rmon critical event"); + case MTC_EVENT_RMON_MAJOR: return("rmon major event"); + case MTC_EVENT_RMON_MINOR: return("rmon minor event"); + case MTC_EVENT_RMON_LOG: return("rmon log"); + + /* data port events */ + case MTC_EVENT_AVS_CLEAR: return("AVS clear"); + case MTC_EVENT_AVS_MAJOR: return("AVS major"); + case MTC_EVENT_AVS_CRITICAL: return("AVS critical"); + case MTC_EVENT_AVS_OFFLINE: return("AVS offline"); + + /* hardware Monitor events */ + case MTC_EVENT_HWMON_CONFIG: return("hwmon config event"); /* OBS */ + case MTC_EVENT_HWMON_CLEAR: return("hwmon clear"); + case MTC_EVENT_HWMON_MINOR: return("hwmon minor event"); + case MTC_EVENT_HWMON_MAJOR: return("hwmon major event"); + case MTC_EVENT_HWMON_CRIT: return("hwmon critical event"); + case MTC_EVENT_HWMON_RESET: return("hwmon reset event"); + case MTC_EVENT_HWMON_LOG: return("hwmon log"); + case MTC_EVENT_HWMON_POWERDOWN: return("hwmon powerdown event"); /* OBS */ + case MTC_EVENT_HWMON_POWERCYCLE:return("hwmon powercycle event"); + + /* Host Commands */ + case MTC_CMD_ADD_HOST: return("add host"); + case MTC_CMD_DEL_HOST: return("del host"); + case MTC_CMD_MOD_HOST: return("modify host"); + case MTC_CMD_QRY_HOST: return("query host"); + case MTC_CMD_START_HOST: return("start host service"); + case MTC_CMD_STOP_HOST: return("stop host service"); + + /* VM Instance Commands */ + case MTC_CMD_ADD_INST: return("add instance"); + case MTC_CMD_DEL_INST: return("delete instance"); + case MTC_CMD_MOD_INST: return("modify instance"); + case MTC_CMD_QRY_INST: return("query instance"); + case MTC_CMD_VOTE_INST: return ("vote instance"); + case MTC_CMD_NOTIFY_INST: return ("notify instance"); + case MTC_EVENT_VOTE_NOTIFY: return ("notify instance event"); + + /* service events */ + case MTC_SERVICE_PMOND: return ("pmond service"); + case MTC_SERVICE_RMOND: return ("rmond service"); + case MTC_SERVICE_HWMOND: return ("hwmond service"); + case MTC_SERVICE_HEARTBEAT: return ("heartbeat service"); + default: + break ; + } + return ( "unknown"); +} + + +void print_mtc_message ( string hostname, int direction, mtc_message_type & msg , const char * iface, bool force ) +{ + string str = "-" ; + if ( msg.buf[0] ) + str = msg.buf ; + + if ( force ) + { + ilog ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n", + hostname.c_str(), + direction ? "rx <-" : "tx ->" , + get_mtcNodeCommand_str (msg.cmd), + iface, + msg.ver, + msg.rev, + msg.cmd, + msg.num, + msg.parm[0], + msg.parm[1], + msg.parm[2], + msg.parm[3], + msg.hdr, + str.c_str()); + } + else + { + mlog1 ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n", + hostname.c_str(), + direction ? "rx <-" : "tx ->" , + get_mtcNodeCommand_str (msg.cmd), + iface, + msg.ver, + msg.rev, + msg.cmd, + msg.num, + msg.parm[0], + msg.parm[1], + msg.parm[2], + msg.parm[3], + msg.hdr, + str.c_str()); + } +} + +/* Graceful recovery stages strings and string get'er */ +static std::string recoveryStages_str [MTC_RECOVERY__STAGES +1] ; +static std::string disableStages_str [MTC_DISABLE__STAGES +1] ; +static std::string enableStages_str [MTC_ENABLE__STAGES +1] ; +static std::string sensorStages_str [MTC_SENSOR__STAGES +1] ; +static std::string powerStages_str [MTC_POWER__STAGES +1] ; +static std::string powercycleStages_str [MTC_POWERCYCLE__STAGES +1] ; +static std::string resetStages_str [MTC_RESET__STAGES +1] ; +static std::string reinstallStages_str [MTC_RESET__STAGES +1] ; +static std::string oosTestStages_str [MTC_OOS_TEST__STAGES +1] ; +static std::string insvTestStages_str [MTC_INSV_TEST__STAGES +1] ; +static std::string configStages_str [MTC_CONFIG__STAGES +1] ; +static std::string addStages_str [MTC_ADD__STAGES +1] ; +static std::string delStages_str [MTC_DEL__STAGES +1] ; +static std::string subStages_str [MTC_SUBSTAGE__STAGES +1] ; + +void mtc_stages_init ( void ) +{ + enableStages_str [MTC_ENABLE__START ] = "Handler-Start"; + enableStages_str [MTC_ENABLE__RESERVED_1 ] = "reserved 1"; + enableStages_str [MTC_ENABLE__HEARTBEAT_CHECK ] = "Heartbeat-Check"; + enableStages_str [MTC_ENABLE__HEARTBEAT_STOP_CMD ] = "Heartbeat-Stop"; + enableStages_str [MTC_ENABLE__RECOVERY_TIMER ] = "Recovery-Start"; + enableStages_str [MTC_ENABLE__RECOVERY_WAIT ] = "Recovery-Wait"; + enableStages_str [MTC_ENABLE__RESET_PROGRESSION ] = "Reset-Prog"; + enableStages_str [MTC_ENABLE__RESET_WAIT ] = "Reset-Prog-Wait"; + enableStages_str [MTC_ENABLE__INTEST_START ] = "Intest-Start"; + enableStages_str [MTC_ENABLE__MTCALIVE_PURGE ] = "MtcAlive-Purge"; + enableStages_str [MTC_ENABLE__MTCALIVE_WAIT ] = "MtcAlive-Wait"; + enableStages_str [MTC_ENABLE__CONFIG_COMPLETE_WAIT ] = "Config-Complete-Wait"; + enableStages_str [MTC_ENABLE__GOENABLED_TIMER ] = "GoEnable-Start"; + enableStages_str [MTC_ENABLE__GOENABLED_WAIT ] = "GoEnable-Wait"; + enableStages_str [MTC_ENABLE__PMOND_READY_WAIT ] = "PmondReady-Wait"; + enableStages_str [MTC_ENABLE__HOST_SERVICES_START ] = "HostServices-Start"; + enableStages_str [MTC_ENABLE__HOST_SERVICES_WAIT ] = "HostServices-Wait"; + enableStages_str [MTC_ENABLE__SERVICES_START_WAIT ] = "Services-Start"; + enableStages_str [MTC_ENABLE__HEARTBEAT_WAIT ] = "Heartbeat-Wait"; + enableStages_str [MTC_ENABLE__HEARTBEAT_SOAK ] = "Heartbeat-Soak"; + enableStages_str [MTC_ENABLE__STATE_CHANGE ] = "State-Change"; + enableStages_str [MTC_ENABLE__WORKQUEUE_WAIT ] = "WorkQueue-Wait"; + enableStages_str [MTC_ENABLE__WAIT ] = "Enable-Wait"; + enableStages_str [MTC_ENABLE__DONE ] = "Enable-Done"; + enableStages_str [MTC_ENABLE__ENABLED ] = "Host-Enabled"; + enableStages_str [MTC_ENABLE__SUBF_FAILED ] = "Host-Degraded-Subf-Failed"; + enableStages_str [MTC_ENABLE__DEGRADED ] = "Host-Degraded"; + enableStages_str [MTC_ENABLE__FAILURE ] = "Failure"; + enableStages_str [MTC_ENABLE__FAILURE_WAIT ] = "Failure-Wait"; + enableStages_str [MTC_ENABLE__FAILURE_SWACT_WAIT ] = "Failure-Swact-Wait"; + enableStages_str [MTC_ENABLE__STAGES ] = "unknown" ; + + recoveryStages_str[MTC_RECOVERY__START ] = "Handler-Start"; + recoveryStages_str[MTC_RECOVERY__REQ_MTCALIVE ] = "Req-MtcAlive"; + recoveryStages_str[MTC_RECOVERY__REQ_MTCALIVE_WAIT ] = "Req-MtcAlive-Wait"; + recoveryStages_str[MTC_RECOVERY__RESET_RECV_WAIT ] = "Reset-Recv-Wait"; + recoveryStages_str[MTC_RECOVERY__MTCALIVE_TIMER ] = "MtcAlive-Timer"; + recoveryStages_str[MTC_RECOVERY__MTCALIVE_WAIT ] = "MtcAlive-Wait"; + recoveryStages_str[MTC_RECOVERY__GOENABLED_TIMER ] = "GoEnable-Timer"; + recoveryStages_str[MTC_RECOVERY__GOENABLED_WAIT ] = "GoEnable-Wait"; + recoveryStages_str[MTC_RECOVERY__HOST_SERVICES_START] = "HostServices-Start"; + recoveryStages_str[MTC_RECOVERY__HOST_SERVICES_WAIT ] = "HostServices-Wait"; + recoveryStages_str[MTC_RECOVERY__CONFIG_COMPLETE_WAIT]= "Compute-Config-Wait"; + recoveryStages_str[MTC_RECOVERY__SUBF_GOENABLED_TIMER]= "Subf-GoEnable-Timer"; + recoveryStages_str[MTC_RECOVERY__SUBF_GOENABLED_WAIT] = "Subf-GoEnable-Wait"; + recoveryStages_str[MTC_RECOVERY__SUBF_SERVICES_START] = "Subf-Services-Start"; + recoveryStages_str[MTC_RECOVERY__SUBF_SERVICES_WAIT ] = "Subf-Services-Wait"; + recoveryStages_str[MTC_RECOVERY__HEARTBEAT_START ] = "Heartbeat-Start"; + recoveryStages_str[MTC_RECOVERY__HEARTBEAT_SOAK ] = "Heartbeat-Soak"; + recoveryStages_str[MTC_RECOVERY__STATE_CHANGE ] = "State Change"; + recoveryStages_str[MTC_RECOVERY__ENABLE_START ] = "Enable-Start"; + recoveryStages_str[MTC_RECOVERY__FAILURE ] = "Failure"; + recoveryStages_str[MTC_RECOVERY__WORKQUEUE_WAIT ] = "WorkQ-Wait"; + recoveryStages_str[MTC_RECOVERY__ENABLE_WAIT ] = "Enable-Wait"; + recoveryStages_str[MTC_RECOVERY__STAGES ] = "unknown"; + + disableStages_str [MTC_DISABLE__START ] = "Disable-Start"; + disableStages_str [MTC_DISABLE__HANDLE_POWERON_SEND ] = "Disable-PowerOn-Send"; + disableStages_str [MTC_DISABLE__HANDLE_POWERON_RECV ] = "Disable-PowerOn-Recv"; + disableStages_str [MTC_DISABLE__HANDLE_FORCE_LOCK ] = "Disable-Force-Lock"; + disableStages_str [MTC_DISABLE__RESET_HOST_WAIT ] = "Disable-Reset-Wait"; + disableStages_str [MTC_DISABLE__DISABLE_SERVICES ] = "Disable-Services-Start"; + disableStages_str [MTC_DISABLE__DIS_SERVICES_WAIT ] = "Disable-Services-Wait"; + disableStages_str [MTC_DISABLE__HANDLE_CEPH_LOCK ] = "Disable-Ceph-Lock-Wait"; + disableStages_str [MTC_DISABLE__RESERVED ] = "Disable-reserved"; + disableStages_str [MTC_DISABLE__TASK_STATE_UPDATE ] = "Disable-States-Update"; + disableStages_str [MTC_DISABLE__WORKQUEUE_WAIT ] = "Disable-WorkQ-Wait"; + disableStages_str [MTC_DISABLE__DISABLED ] = "Host-Disabled"; + disableStages_str [MTC_DISABLE__STAGES ] = "Unknown"; + + powerStages_str [MTC_POWERON__START ] = "Power-On-Start"; + powerStages_str [MTC_POWERON__POWER_STATUS_WAIT ] = "Power-On-Status"; + powerStages_str [MTC_POWERON__POWER_STATUS_WAIT ] = "Power-On-Status-Wait"; + powerStages_str [MTC_POWERON__REQ_SEND ] = "Power-On-Req-Send"; + powerStages_str [MTC_POWERON__RESP_WAIT ] = "Power-On-Resp-Wait"; + powerStages_str [MTC_POWERON__DONE ] = "Power-On-Done"; + powerStages_str [MTC_POWERON__FAIL ] = "Power-On-Fail"; + powerStages_str [MTC_POWERON__FAIL_WAIT ] = "Power-On-Fail-Wait"; + powerStages_str [MTC_POWEROFF__START ] = "Power-Off-Start"; + powerStages_str [MTC_POWEROFF__REQ_SEND ] = "Power-Off-Req-Send"; + powerStages_str [MTC_POWEROFF__RESP_WAIT ] = "Power-Off-Resp-Wait"; + powerStages_str [MTC_POWEROFF__DONE ] = "Power-Off-Done"; + powerStages_str [MTC_POWEROFF__FAIL ] = "Power-Off-Fail"; + powerStages_str [MTC_POWEROFF__FAIL_WAIT ] = "Power-Off-Fail-Wait"; + powerStages_str [MTC_POWER__DONE ] = "Power-Done"; + powerStages_str [MTC_POWER__STAGES ] = "Power-Unknown"; + + + powercycleStages_str [MTC_POWERCYCLE__START ] = "Power-Cycle-Start"; + powercycleStages_str [MTC_POWERCYCLE__POWEROFF ] = "Power-Cycle-Off"; + powercycleStages_str [MTC_POWERCYCLE__POWEROFF_WAIT ] = "Power-Cycle-Off-Wait"; + powercycleStages_str [MTC_POWERCYCLE__POWERON ] = "Power-Cycle-On"; + powercycleStages_str [MTC_POWERCYCLE__POWERON_REQWAIT] = "Power-Cycle-On-Req-Wait"; + powercycleStages_str [MTC_POWERCYCLE__POWERON_VERIFY] = "Power-Cycle-On-Verify"; + powercycleStages_str [MTC_POWERCYCLE__POWERON_WAIT ] = "Power-Cycle-On-Wait"; + powercycleStages_str [MTC_POWERCYCLE__DONE ] = "Power-Cycle-Done"; + powercycleStages_str [MTC_POWERCYCLE__FAIL ] = "Power-Cycle-Fail"; + powercycleStages_str [MTC_POWERCYCLE__HOLDOFF ] = "Power-Cycle-Hold-Off"; + powercycleStages_str [MTC_POWERCYCLE__COOLOFF ] = "Power-Cycle-Cool-Off"; + + powercycleStages_str [MTC_POWERCYCLE__POWEROFF_CMND_WAIT] = "Power-Cycle-Off-Cmnd-Wait"; + powercycleStages_str [MTC_POWERCYCLE__POWERON_CMND_WAIT] = "Power-Cycle-On-Cmnd-Wait"; + powercycleStages_str [MTC_POWERCYCLE__POWERON_VERIFY_WAIT]= "Power-Cycle-On-Verify-Wait"; + + + resetStages_str [MTC_RESET__START ] = "Reset-Start"; + resetStages_str [MTC_RESET__REQ_SEND ] = "Reset-Req-Send"; + resetStages_str [MTC_RESET__RESP_WAIT ] = "Reset-Resp-Wait"; + resetStages_str [MTC_RESET__QUEUE ] = "Reset-Queue"; + resetStages_str [MTC_RESET__OFFLINE_WAIT ] = "Reset-Offline-Wait"; + resetStages_str [MTC_RESET__DONE ] = "Reset-Done"; + resetStages_str [MTC_RESET__FAIL ] = "Reset-Fail"; + resetStages_str [MTC_RESET__FAIL_WAIT ] = "Reset-Fail-Wait"; + resetStages_str [MTC_RESET__STAGES ] = "Reset-Unknown"; + + reinstallStages_str [MTC_REINSTALL__START ] = "Reinstall-Start"; + reinstallStages_str [MTC_REINSTALL__RESP_WAIT ] = "Reinstall-Response-Wait"; + reinstallStages_str [MTC_REINSTALL__OFFLINE_WAIT ] = "Reinstall-Offline-Wait"; + reinstallStages_str [MTC_REINSTALL__ONLINE_WAIT ] = "Reinstall-Online-Wait"; + reinstallStages_str [MTC_REINSTALL__FAIL ] = "Reinstall-Failure"; + reinstallStages_str [MTC_REINSTALL__MSG_DISPLAY ] = "Reinstall-Message-Display"; + reinstallStages_str [MTC_REINSTALL__DONE ] = "Reinstall-Done"; + reinstallStages_str [MTC_REINSTALL__STAGES ] = "Reinstall-Unknown"; + + oosTestStages_str [MTC_OOS_TEST__LOAD_NEXT_TEST ] = "Test-Load-Next"; + oosTestStages_str [MTC_OOS_TEST__BMC_ACCESS_TEST ] = "Test-BMC-Access-Test"; + oosTestStages_str [MTC_OOS_TEST__BMC_ACCESS_RESULT ] = "Test-BMC-Access-Result"; + oosTestStages_str [MTC_OOS_TEST__START_WAIT ] = "Test-Start-Wait"; + oosTestStages_str [MTC_OOS_TEST__WAIT ] = "Test-Wait"; + oosTestStages_str [MTC_OOS_TEST__DONE ] = "Test-Done"; + oosTestStages_str [MTC_OOS_TEST__STAGES ] = "Test-Unknown"; + + insvTestStages_str[MTC_INSV_TEST__START ] = "Test-Start"; + insvTestStages_str[MTC_INSV_TEST__WAIT ] = "Test-Wait"; + insvTestStages_str[MTC_INSV_TEST__RUN ] = "Test-Run"; + insvTestStages_str[MTC_INSV_TEST__STAGES ] = "Test-Unknown"; + + sensorStages_str [MTC_SENSOR__START ] = "Sensor-Read-Start"; + sensorStages_str [MTC_SENSOR__READ_FAN ] = "Sensor-Read-Fans"; + sensorStages_str [MTC_SENSOR__READ_TEMP ] = "Sensor-Read-Temp"; + sensorStages_str [MTC_SENSOR__STAGES ] = "Sensor-Unknown"; + + configStages_str [MTC_CONFIG__START ] = "Config-Start"; + configStages_str [MTC_CONFIG__SHOW ] = "Config-Show"; + configStages_str [MTC_CONFIG__MODIFY ] = "Config-Modify"; + configStages_str [MTC_CONFIG__VERIFY ] = "Config-Verify"; + configStages_str [MTC_CONFIG__FAILURE ] = "Config-Fail"; + configStages_str [MTC_CONFIG__TIMEOUT ] = "Config-Timeout"; + configStages_str [MTC_CONFIG__DONE ] = "Config-Done"; + configStages_str [MTC_CONFIG__STAGES ] = "Config-Unknown"; + + addStages_str [MTC_ADD__START ] = "Add-Start"; + addStages_str [MTC_ADD__START_DELAY ] = "Add-Start-Delay"; + addStages_str [MTC_ADD__START_SERVICES ] = "Add-Start-Services"; + addStages_str [MTC_ADD__START_SERVICES_WAIT ] = "Add-Start-Services-Wait"; +// addStages_str [MTC_ADD__CLEAR_ALARMS ] = "Add-Clear-Alarms"; + addStages_str [MTC_ADD__MTC_SERVICES ] = "Add-Mtc-Services"; + addStages_str [MTC_ADD__CLEAR_TASK ] = "Add-Clear-Task"; + addStages_str [MTC_ADD__WORKQUEUE_WAIT ] = "Add-WorkQ-Wait"; + addStages_str [MTC_ADD__DONE ] = "Add-Done"; + addStages_str [MTC_ADD__STAGES ] = "Add-Unknown"; + + delStages_str [MTC_DEL__START ] = "Del-Start"; + delStages_str [MTC_DEL__WAIT ] = "Del-Wait"; + delStages_str [MTC_DEL__DONE ] = "Del-Done"; + + subStages_str [MTC_SUBSTAGE__START ] = "subStage-Start"; + subStages_str [MTC_SUBSTAGE__SEND ] = "subStage-Send"; + subStages_str [MTC_SUBSTAGE__RECV ] = "subStage-Recv"; + subStages_str [MTC_SUBSTAGE__WAIT ] = "subStage-Wait"; + subStages_str [MTC_SUBSTAGE__DONE ] = "subStage-Done"; + subStages_str [MTC_SUBSTAGE__FAIL ] = "subStage-Fail"; +} + +string get_delStages_str ( mtc_delStages_enum stage ) +{ + if ( stage >= MTC_DEL__STAGES ) + { + return (delStages_str[MTC_DEL__STAGES]); + } + return (delStages_str[stage]); +} + +/* Get the specified 'enable' stage string */ +string get_enableStages_str ( mtc_enableStages_enum stage ) +{ + if ( stage >= MTC_ENABLE__STAGES ) + { + return (enableStages_str[MTC_ENABLE__STAGES]); + } + return (enableStages_str[stage]); +} + +/* Get the specified 'recovery' stage string */ +string get_recoveryStages_str ( mtc_recoveryStages_enum stage ) +{ + if ( stage >= MTC_RECOVERY__STAGES ) + { + return (recoveryStages_str[MTC_RECOVERY__STAGES]); + } + return (recoveryStages_str[stage]); +} + +/* Get the specified 'config' stage string */ +string get_configStages_str ( mtc_configStages_enum stage ) +{ + if ( stage >= MTC_CONFIG__STAGES ) + { + return (configStages_str[MTC_CONFIG__STAGES]); + } + return (configStages_str[stage]); +} + +/* Get the specified 'disable' stage string */ +string get_disableStages_str ( mtc_disableStages_enum stage ) +{ + if ( stage >= MTC_DISABLE__STAGES ) + { + return (disableStages_str[MTC_DISABLE__STAGES]); + } + return (disableStages_str[stage]); +} + +/* Get the specified 'power' stage string */ +string get_powerStages_str ( mtc_powerStages_enum stage ) +{ + if ( stage >= MTC_POWER__STAGES ) + { + return (powerStages_str[MTC_POWER__STAGES]); + } + return (powerStages_str[stage]); +} + +/* Get the specified 'powercycle' stage string */ +string get_powercycleStages_str ( mtc_powercycleStages_enum stage ) +{ + if ( stage >= MTC_POWERCYCLE__STAGES ) + { + return (powercycleStages_str[MTC_POWERCYCLE__STAGES]); + } + return (powercycleStages_str[stage]); +} + +/* Get the specified 'reset' stage string */ +string get_resetStages_str ( mtc_resetStages_enum stage ) +{ + if ( stage >= MTC_RESET__STAGES ) + { + return (resetStages_str[MTC_RESET__STAGES]); + } + return (resetStages_str[stage]); +} + +/* Get the specified 'reinstall' stage string */ +string get_reinstallStages_str ( mtc_reinstallStages_enum stage ) +{ + if ( stage >= MTC_REINSTALL__STAGES ) + { + return (reinstallStages_str[MTC_REINSTALL__STAGES]); + } + return (reinstallStages_str[stage]); +} + +/* Get the specified 'out-of-service test' stage string */ +string get_oosTestStages_str ( mtc_oosTestStages_enum stage ) +{ + if ( stage >= MTC_OOS_TEST__STAGES ) + { + return (oosTestStages_str[MTC_OOS_TEST__STAGES]); + } + return (oosTestStages_str[stage]); +} + +/* Get the specified 'in-service test' stage string */ +string get_insvTestStages_str ( mtc_insvTestStages_enum stage ) +{ + if ( stage >= MTC_INSV_TEST__STAGES ) + { + return (insvTestStages_str[MTC_INSV_TEST__STAGES]); + } + return (insvTestStages_str[stage]); +} + +string get_sensorStages_str ( mtc_sensorStages_enum stage ) +{ + if ( stage >= MTC_SENSOR__STAGES ) + { + return (sensorStages_str[MTC_SENSOR__STAGES]); + } + return (sensorStages_str[stage]); +} + +/** Return the string representing the specified 'sub' stage */ +string get_subStages_str ( mtc_subStages_enum stage ) +{ + if ( stage >= MTC_SUBSTAGE__STAGES ) + { + return (subStages_str[MTC_SUBSTAGE__STAGES]); + } + return (subStages_str[stage]); +} + +void log_adminAction ( string hostname, + mtc_nodeAdminAction_enum currAction, + mtc_nodeAdminAction_enum newAction ) +{ + if (( currAction != MTC_ADMIN_ACTION__LOCK ) && + ( newAction == MTC_ADMIN_ACTION__LOCK )) + { + ilog ("%s Lock Action\n", hostname.c_str()); + } + else if (( currAction != MTC_ADMIN_ACTION__FORCE_LOCK ) && + ( newAction == MTC_ADMIN_ACTION__FORCE_LOCK )) + { + ilog ("%s Lock Action (Force)\n", hostname.c_str()); + } + else if (( currAction != MTC_ADMIN_ACTION__UNLOCK ) && + ( newAction == MTC_ADMIN_ACTION__UNLOCK )) + { + ilog ("%s Unlock Action\n", hostname.c_str()); + } + else if (( currAction != MTC_ADMIN_ACTION__SWACT ) && + ( newAction == MTC_ADMIN_ACTION__SWACT )) + { + ilog ("%s Swact Action\n", hostname.c_str()); + } + else if (( currAction != MTC_ADMIN_ACTION__FORCE_SWACT ) && + ( newAction == MTC_ADMIN_ACTION__FORCE_SWACT )) + { + ilog ("%s Swact Action (Force)\n", hostname.c_str()); + } + else if (( currAction != MTC_ADMIN_ACTION__ADD ) && + ( newAction == MTC_ADMIN_ACTION__ADD )) + { + ilog ("%s Add Action\n", hostname.c_str()); + } + else if (( currAction != MTC_ADMIN_ACTION__RESET ) && + ( newAction == MTC_ADMIN_ACTION__RESET )) + { + ilog ("%s Reset Action\n", hostname.c_str()); + } + else if (( currAction != MTC_ADMIN_ACTION__REBOOT ) && + ( newAction == MTC_ADMIN_ACTION__REBOOT )) + { + ilog ("%s Reboot Action\n", hostname.c_str()); + } + else if (( currAction != MTC_ADMIN_ACTION__REINSTALL ) && + ( newAction == MTC_ADMIN_ACTION__REINSTALL )) + { + ilog ("%s Reinstall Action\n", hostname.c_str()); + } + else if (( currAction != MTC_ADMIN_ACTION__POWEROFF ) && + ( newAction == MTC_ADMIN_ACTION__POWEROFF )) + { + ilog ("%s Power-Off Action\n", hostname.c_str()); + } + else if (( currAction != MTC_ADMIN_ACTION__POWERON ) && + ( newAction == MTC_ADMIN_ACTION__POWERON )) + { + ilog ("%s Power-On Action\n", hostname.c_str()); + } +} + +/* Init recovery control structure */ +void recovery_ctrl_init ( recovery_ctrl_type & recovery_ctrl ) +{ + recovery_ctrl.state = RECOVERY_STATE__INIT ; + recovery_ctrl.attempts = 0 ; + recovery_ctrl.holdoff = 0 ; + recovery_ctrl.queries = 0 ; + recovery_ctrl.retries = 0 ; +} + +/* returns 'true' if the specified command is a host services command */ +bool is_host_services_cmd ( unsigned int cmd ) +{ + if (( cmd == MTC_CMD_START_CONTROL_SVCS ) || + ( cmd == MTC_CMD_START_COMPUTE_SVCS ) || + ( cmd == MTC_CMD_START_STORAGE_SVCS ) || + ( cmd == MTC_CMD_STOP_CONTROL_SVCS ) || + ( cmd == MTC_CMD_STOP_COMPUTE_SVCS ) || + ( cmd == MTC_CMD_STOP_STORAGE_SVCS ) || + ( cmd == MTC_CMD_HOST_SVCS_RESULT )) + { + return (true); + } + return (false); +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeBase.h b/mtce-common/cgts-mtce-common-1.0/common/nodeBase.h new file mode 100755 index 00000000..cf42f347 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeBase.h @@ -0,0 +1,1255 @@ +#ifndef __INCLUDE_NODEBASE_HH__ +#define __INCLUDE_NODEBASE_HH__ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform "Node Base" Header + */ + +#include +#include +#include + +using namespace std; + +#include "fitCodes.h" +#include "logMacros.h" +#include "returnCodes.h" +#include "nodeTimers.h" + +#define ALIGN_PACK(x) __attribute__((packed)) x + +/* Out-Of-Service Stress tests */ +#define WANT_SYSINV_API_STRESS 0x00000001 +#define WANT_SERVICE_UPDOWN_API_STRESS 0x00000002 +#define WANT_SMGR_API_STRESS 0x00000004 +#define WANT_FM_API_STRESS 0x00000008 +#define WANT_TOKEN_REFRESH_STRESS 0x00000010 + +void daemon_exit ( void ); + +#define MAX_EVENT_BUF (2048) +#define MAX_RECV_FAILS_B4_RECONNECT (4) +#define MAX_INFLIGHT_HTTP_REQUESTS (7) +#define MAX_FILE_SIZE (128) +#define MAX_FSM_SSH2_RETRIES (20) +#define MAX_POWERCYCLE_STAGE_RETRIES (3) +#define MAX_POWERCYCLE_ATTEMPT_RETRIES (3) +#define MAX_POWERCYCLE_QUERY_RETRIES (10) +#define MAX_BMC_POWER_CTRL_RETRIES (5) + +/* Added for failure handling offline feature */ +#define MIN_OFFLINE_PERIOD_MSECS (10) +#define MIN_OFFLINE_THRESHOLD (1) + +/* Board management Expect Script return Codes: 1xx range + * These codes are also defined in /usr/local/sbin/bmvars.exp */ +#define FAIL_BM_UNSUPPORTED (100*256) +#define FAIL_BM_INV_QUERY (101*256) +#define FAIL_BM_NO_IP (102*256) + +#define FAIL_BM_PING_TEST (112*256) +#define FAIL_BM_PING_SPAWN (113*256) +#define FAIL_BM_PING_TIMEOUT (114*256) +#define FAIL_BM_PING_ZERO_PERCENT (115*256) +#define FAIL_BM_PING_FIFTY_PERCENT (116*256) + +#define FAIL_BM_USERNAME (120*256) +#define FAIL_BM_IPADDR (121*256) +#define FAIL_BM_PASSWORD (122*256) + + +#define MTC_PARM_UPTIME_IDX (0) +#define MTC_PARM_HEALTH_IDX (1) +#define MTC_PARM_FLAGS_IDX (2) +#define MTC_PARM_MAX_IDX (3) + +/** 'I Am ' flags for maintenance. + * + * These flags are shipped in the parm[2] if the + * mtcAlive message from each host. */ +#define MTC_FLAG__I_AM_CONFIGURED (0x00000001) +#define MTC_FLAG__I_AM_NOT_HEALTHY (0x00000002) +#define MTC_FLAG__I_AM_HEALTHY (0x00000004) +#define MTC_FLAG__I_AM_LOCKED (0x00000008) +#define MTC_FLAG__SUBF_CONFIGURED (0x00000010) +#define MTC_FLAG__MAIN_GOENABLED (0x00000020) +#define MTC_FLAG__SUBF_GOENABLED (0x00000040) +#define MTC_FLAG__SM_DEGRADED (0x00000080) +#define MTC_FLAG__PATCHING (0x00000100) /* Patching in progress */ +#define MTC_FLAG__PATCHED (0x00000200) /* Patched but not reset */ + +#define MTC_UNHEALTHY_THRESHOLD (3) + +/* Node Health States */ +#define NODE_HEALTH_UNKNOWN (0) +#define NODE_HEALTHY (1) +#define NODE_UNHEALTHY (2) + +#define HOST_IS_VIRTUAL ((const char *)"/var/run/virtual.host") + +/** Configuration Pass/Fail Flag File */ +#define CONFIG_PASS_FILE ((const char *)"/var/run/.config_pass") +#define CONFIG_FAIL_FILE ((const char *)"/var/run/.config_fail") +#define NODE_LOCKED_FILE ((const char *)"/var/run/.node_locked") +#define NODE_RESET_FILE ((const char *)"/var/run/.node_reset") +#define SMGMT_DEGRADED_FILE ((const char *)"/var/run/.sm_degraded") +#define SMGMT_UNHEALTHY_FILE ((const char *)"/var/run/.sm_node_unhealthy") + +/** path to and module init file name */ +#define MTCE_CONF_FILE ((const char *)"/etc/mtc.conf") +#define MTCE_INI_FILE ((const char *)"/etc/mtc.ini") +#define NFVI_PLUGIN_CFG_FILE ((const char *)"/etc/nfv/nfv_plugins/nfvi_plugins/config.ini") +#define SYSINV_CFG_FILE ((const char *)"/etc/sysinv/sysinv.conf") +#define HWMON_CONF_FILE ((const char *)"/etc/mtc/hwmond.conf") + + +#define GOENABLED_DIR ((const char *)"/etc/goenabled.d") /* generic */ +#define GOENABLED_COMPUTE_DIR ((const char *)"/etc/goenabled.d/compute") +#define GOENABLED_STORAGE_DIR ((const char *)"/etc/goenabled.d/storage") +#define GOENABLED_CONTROL_DIR ((const char *)"/etc/goenabled.d/control") + +#define GOENABLED_MAIN_READY ((const char *)"/var/run/.goenabled") +#define GOENABLED_SUBF_READY ((const char *)"/var/run/.goenabled_subf") + +#define GOENABLED_MAIN_PASS ((const char *)"/var/run/goenabled") +#define GOENABLED_SUBF_PASS ((const char *)"/var/run/goenabled_subf") +#define GOENABLED_MAIN_FAIL ((const char *)"/var/run/goenabled_failed") +#define GOENABLED_SUBF_FAIL ((const char *)"/var/run/goenabled_subf_failed") + +#define CONFIG_COMPLETE_CONTROL ((const char *)"/var/run/.controller_config_complete") +#define CONFIG_COMPLETE_COMPUTE ((const char *)"/var/run/.compute_config_complete") +#define CONFIG_COMPLETE_STORAGE ((const char *)"/var/run/.storage_config_complete") +#define CONFIG_COMPLETE_FILE ((const char *)"/etc/platform/.initial_config_complete") + +#define DISABLE_COMPUTE_SERVICES ((const char *)"/var/run/.disable_compute_services") + +#define PATCHING_IN_PROG_FILE ((const char *)"/var/run/patch_installing") +#define NODE_IS_PATCHED_FILE ((const char *)"/var/run/node_is_patched") + +#define PLATFORM_CONF_FILE ((const char *)"/etc/platform/platform.conf") +#define PLATFORM_CONF_DIR ((const char *)"/etc/platform") +#define PLATFORM_SIMPLEX_MODE ((const char *)"/etc/platform/simplex") +#define SERVICES_DIR ((const char *)"/etc/services.d") +#define SERVER_PROFILE_DIR ((const char *)"/etc/bmc/server_profiles.d") +#define PASSWORD_FILE ((const char *)"/etc/passwd") +#define SHADOW_FILE ((const char *)"/etc/shadow") +#define USERNAME_ROOT ("wrsroot") + +#define PMON_CONF_FILE_DIR ((const char *)"/etc/pmon.d") + +#define BM_DNSMASQ_FILENAME ((const char *)"dnsmasq.bmc_hosts") + +/* Added for Centos */ +#define CENTOS_RELEASE_FILE ((const char *)"/etc/centos-release") +#define SYSTEMD_SERVICE_FILE_DIR ((const char *)"/usr/lib/systemd/system") + +#define THREAD_NAME__IPMITOOL ((const char *)("ipmitool")) + +#define IPMITOOL_PATH_AND_FILENAME ((const char *)("/usr/bin/ipmitool")) +#define IPMITOOL_OUTPUT_DIR ((const char *)("/var/run/ipmitool/")) + +/** 'lo' interface IP address - TODO: get it from the interface */ +#define LOOPBACK_IP "127.0.0.1" +#define LOOPBACK_IPV6 "::1" +#define LOCALHOST "localhost" + +#define NONE (const char *)"none" + +/** Largest heartbeat pulse (req/resp) message size */ +#define MAX_API_LOG_LEN (0x1000) +#define MAX_FILENAME_LEN (100) +#define MAX_SYSTEM_CMD_LEN (200) + +#define MAX_START_SERVICES_RETRY (20) + +#define DEFAULT_MTCALIVE_TIMEOUT (1200) +#define DEFAULT_GOENABLE_TIMEOUT (300) +#define DEFAULT_DOR_MODE_TIMEOUT (20) +#define DEFAULT_DOR_MODE_CPE_TIMEOUT (600) + +/** TODO: Convert names to omit JSON part */ +#define MTC_JSON_INV_LABEL "ihosts" +#define MTC_JSON_INV_NEXT "next" +#define MTC_JSON_INV_UUID "uuid" +#define MTC_JSON_INV_NAME "hostname" +#define MTC_JSON_INV_HOSTIP "mgmt_ip" +#define MTC_JSON_INV_HOSTMAC "mgmt_mac" +#define MTC_JSON_INV_INFRAIP "infra_ip" +#define MTC_JSON_INV_AVAIL "availability" +#define MTC_JSON_INV_OPER "operational" +#define MTC_JSON_INV_ADMIN "administrative" +#define MTC_JSON_INV_OPER_SUBF "subfunction_oper" +#define MTC_JSON_INV_AVAIL_SUBF "subfunction_avail" +#define MTC_JSON_INV_TYPE "personality" +#define MTC_JSON_INV_FUNC "subfunctions" // personality" +#define MTC_JSON_INV_TASK "task" +#define MTC_JSON_INV_ACTION "action" +#define MTC_JSON_INV_UPTIME "uptime" +#define MTC_JSON_INV_BMIP "bm_ip" +#define MTC_JSON_INV_BMTYPE "bm_type" +#define MTC_JSON_INV_BMUN "bm_username" + +#define MTC_JSON_SEVERITY "severity" + +/* These Task strings should not be changed without + * the corresponding change in Horizon. + * + * Task strings must be limited to less than 64 bytes. + * + **/ +#define MAX_TASK_STR_LEN (63) /* leave room for null termination */ +#define MTC_TASK_DISABLE_CONTROL "Disabling Controller" +#define MTC_TASK_DISABLE_SERVICES "Disabling Services" +#define MTC_TASK_UNLOCK_FAILED "Unlock Operation Failed" +#define MTC_TASK_REBOOT_REQUEST "Reboot Request" +#define MTC_TASK_RESET_REQUEST "Reset Request" +#define MTC_TASK_REBOOTING "Rebooting" +#define MTC_TASK_RESETTING "Resetting" +#define MTC_TASK_REBOOT_FAIL "Reboot Failed" +#define MTC_TASK_REBOOT_TIMEOUT "Reboot/Reset Timeout" +#define MTC_TASK_REBOOT_FAIL_RETRY "Reboot Failed, retrying (%d of %d)" +#define MTC_TASK_REBOOT_ABORT "Reboot Failed, try again when host is 'online'" +#define MTC_TASK_RESET_PROG "Rebooting/Resetting Host" +#define MTC_TASK_REINSTALL "Reinstalling Host" +#define MTC_TASK_REINSTALL_FAIL "Reinstall Failed" +#define MTC_TASK_REINSTALL_SUCCESS "Reinstall Succeeded" +#define MTC_TASK_BOOTING "Booting" +#define MTC_TASK_BOOT_FAIL "Boot Failed, rebooting" +#define MTC_TASK_TESTING "Testing" +#define MTC_TASK_INITIALIZING "Initializing" +#define MTC_TASK_INIT_FAIL "Initialization Failed, recovering" +#define MTC_TASK_START_SERVICE_FAIL "Start Services Failed" +#define MTC_TASK_START_SERVICE_TO "Start Services Timeout" +#define MTC_TASK_ENABLING "Enabling" +#define MTC_TASK_ENABLING_SUBF "Enabling Compute Service" +#define MTC_TASK_ENABLING_SUBF_FAIL "Enabling Compute Service Failed" +#define MTC_TASK_ENABLING_SUBF_TO "Enabling Compute Service Timeout" +#define MTC_TASK_ENABLE_WORK_FAIL "Enable Action Failed, re-enabling" +#define MTC_TASK_ENABLE_WORK_FAIL_ "Enable Action Failed" +#define MTC_TASK_ENABLE_WORK_TO "Enable Action Timeout, re-enabling" +#define MTC_TASK_ENABLE_WORK_TO_ "Enable Action Timeout" +#define MTC_TASK_ENABLE_FAIL_HB "Enable Heartbeat Failure, re-enabling" +#define MTC_TASK_RECOVERY_FAIL "Graceful Recovery Failed, re-enabling" +#define MTC_TASK_RECOVERY_WAIT "Graceful Recovery Wait" +#define MTC_TASK_RECOVERED "Gracefully Recovered" +#define MTC_TASK_MAIN_CONFIG_FAIL "Configuration Failed, re-enabling" +#define MTC_TASK_SUBF_CONFIG_FAIL "Compute Configuration Failed, re-enabling" +#define MTC_TASK_SUBF_CONFIG_FAIL_ "Compute Configuration Failed" +#define MTC_TASK_MAIN_CONFIG_TO "Configuration Timeout, re-enabling" +#define MTC_TASK_SUBF_CONFIG_TO "Compute Configuration Timeout, re-enabling" +#define MTC_TASK_SUBF_CONFIG_TO_ "Compute Configuration Timeout" +#define MTC_TASK_INTEST_FAIL "In-Test Failed, re-enabling" +#define MTC_TASK_INTEST_FAIL_ "In-Test Failed" +#define MTC_TASK_INTEST_FAIL_TO "In-Test Timeout, re-enabling" +#define MTC_TASK_INTEST_FAIL_TO_ "In-Test Timeout" +#define MTC_TASK_RESET_FAIL "Reset Failed" +#define MTC_TASK_RESET_QUEUE "Reset Failed, retrying (%d of %d)" +#define MTC_TASK_POWERON_FAIL "Power-On Failed" +#define MTC_TASK_POWERON_QUEUE "Power-On Failed, retrying (%d of %d)" +#define MTC_TASK_POWEROFF_FAIL "Power-Off Failed" +#define MTC_TASK_POWEROFF_QUEUE "Power-Off Failed, retrying (%d of %d)" + +#define MTC_TASK_BMC_NOT_PROV "Request Failed, Management Controller Not Provisioned" +#define MTC_TASK_BMC_NOT_CONNECTED "Request Failed, Management Controlle Not Accessible" + +#define MTC_TASK_DISABLE_REQ "Requesting Disable" +#define MTC_TASK_MIGRATE_INSTANCES "Migrating Instances" +#define MTC_TASK_DISABLE_SERVICES "Disabling Services" +#define MTC_TASK_POWERCYCLE_HOST "Critical Event Power-Cycle %d; due to critical sensor" +#define MTC_TASK_POWERCYCLE_HOLD "Critical Event Power-Cycle %d; recovery in %d minute(s)" +#define MTC_TASK_POWERCYCLE_COOL "Critical Event Power-Cycle %d; power-on in %d minute(s)" +#define MTC_TASK_POWERCYCLE_ON "Critical Event Power-Cycle %d; power-on host" +#define MTC_TASK_POWERCYCLE_RETRY "Critical Event Power-Cycle %d; power-on host (retry)" +#define MTC_TASK_POWERCYCLE_BOOT "Critical Event Power-Cycle %d; host is booting" +#define MTC_TASK_POWERCYCLE_FAIL "Critical Event Power-Cycle %d; failed" +#define MTC_TASK_POWERCYCLE_DOWN "Critical Event Power-Down ; due to persistent critical sensor" +#define MTC_TASK_RESETTING_HOST "Resetting Host, critical sensor" +#define MTC_TASK_CPE_SX_UNLOCK_MSG "Unlocking, please stand-by while the system gracefully reboots" +#define MTC_TASK_SELF_UNLOCK_MSG "Unlocking active controller, please stand-by while it reboots" +#define MTC_TASK_AUTO_RECOVERY "Critical failure. Auto-recovery enabled, re-enabling" +#define MTC_TASK_AUTO_RECOVERY_DISABLED "Critical failure. Auto-recovery disabled, threshold reached" +#define MTC_TASK_FAILED_SWACT_REQ "Critical failure.Requesting SWACT to enabled standby controller" +#define MTC_TASK_FAILED_NO_BACKUP "Critical failure.Please provision/enable standby controller" + +#define COMMAND_RETRY_DELAY (8) /* from sshUtil.h */ +#define COMMAND_DELAY (2) /* from sshUtil.h */ + +#define MTC_POWER_ACTION_RETRY_DELAY (20) +#define MTC_POWER_ACTION_RETRY_COUNT (10) +#define MTC_RESET_ACTION_RETRY_COUNT (5) + +/* number of calls to the bm_handler while bm_access is not confirmed */ +#define MTC_MAX_B2B_BM_ACCESS_FAIL_COUNT_B4_ALARM (5) + /* string too long for inv */ +#define MTC_TASK_DISABLE_REJ "Lock Rejected: Incomplete Migration" /* Please Enable More Compute Resources" */ +#define MTC_TASK_DISABLE_NOHOST "Lock Rejected: Please Enable More Compute Resources" +#define MTC_TASK_MIGRATE_FAIL "Lock Failed: Undetermined Reason" +#define MTC_TASK_DISABLE_NOHOSTS "Insufficient Enabled Resources for Live Migration" +#define MTC_TASK_DISABLE_FORCE "Force Lock Reset in Progress" +#define MAX_JSON_INV_GET_HOST_NUM (10) + +#define MTC_TASK_SWACT_COMPLETE "Swact: Complete" +#define MTC_TASK_SWACT_NO_COMPLETE "Swact: Did Not Complete" +#define MTC_TASK_SWACT_REQUEST "Swact: Request" +#define MTC_TASK_SWACT_INPROGRESS "Swact: In-Progress" +#define MTC_TASK_SWACT_FAILED "Swact: Failed" +#define MTC_TASK_SWACT_TIMEOUT "Swact: Timeout" +#define MTC_TASK_SWACT_NOSERVICE "Swact: No active services" +#define MTC_TASK_SWACT_FAIL_QUERY "Swact: Query Services Failed" + +/** The character length of a UUID */ +#ifndef UUID_LEN +#define UUID_LEN (36) +#endif + +/** Range of characters in an IP address */ +#define COL_CHARS_IN_MAC_ADDR (17) /**< Colin del'ed xx:xx:xx:xx:xx:xx */ +#define MIN_CHARS_IN_IP_ADDR (3) /**< Min chars in ipv4 or ipv6 address */ +#define MAX_CHARS_IN_IP_ADDR (INET6_ADDRSTRLEN) /**< Max chars in the longer of IPV4 or IPV6 address */ + +/* root@controller-0:~# getconf HOST_NAME_MAX + * 64 + */ +#define MAX_CHARS_HOSTNAME (32) /**< The largest hostname length */ +// #define MAX_CHARS_HOSTNAME (64) /**< The largest hostname length */ +#define MAX_CHARS_FILENAME (256) /**< The largest hostname length */ + +#define MAX_CHARS_ON_LINE (256) /**> max number of chars on a single line */ +#define MAX_CHARS_IN_INT (65) /**> max number of chars in an integer */ + +/** Maximum number of nodes supported by this module */ +#define MAX_NODES (int)(500) + +/** Maximum number of nodes supported by CGTS platform */ +#define MAX_HOSTS (MAX_NODES) + +/** Longest hostname size */ +#define MAX_HOST_NAME_SIZE (int) (MAX_CHARS_HOSTNAME) + +/** maximum number f queued actions supported */ +#define MTC_MAX_QUEUED_ACTIONS (2) + +/* 50 milliseconds */ +#define SOCKET_WAIT 50000 + +/* 5 milliseconds */ +#define MTCAGENT_SELECT_TIMEOUT (5000) + +/* dedicate more idle time in CPE ; there is less maintenance to do */ +#define MTCAGENT_CPE_SELECT_TIMEOUT (10000) + +/** Number of retries maintenance will do when it experiences + * a REST API call failure ; any failure */ +#define REST_API_RETRY_COUNT (3) + +/** Number of mtcAlive misses before transitioning a locked-disabled + * host from 'online' to 'offline'. + * See mtcTimers.h for the mtcAlive and offline timer periods */ +#define MTC_OFFLINE_MISSES (1) + +/** Number of back to back mtcAlive messages before we allow + * a power-off to online transition */ +#define MTC_MTCALIVE_HITS_TO_GO_ONLINE (5) + +#define CONTROLLER_0 ((const char *)"controller-0") +#define CONTROLLER_1 ((const char *)"controller-1") +#define CONTROLLER ((const char *)"controller") + +/* The infrastructure networking floating IP + * + * Note: If there is no infra then this label will resolve + * to another floating IP on the management network. + * + * If there is no Infra network then this label is not and should not be used */ +#define CONTROLLER_NFS ((const char *)"controller-nfs") + +#define CGTS_NODE_TYPES 4 +#define CGTS_NODE_TYPE_SIZE 12 +#define CGTS_NODE_NULL (0x00) +#define CONTROLLER_TYPE (0x01) +#define COMPUTE_TYPE (0x02) +#define STORAGE_TYPE (0x04) +#define CGCS_STORAGE_NFS 0 +#define CGCS_STORAGE_CEPH 1 + +#define MAX_SENSOR_NAME_LEN 64 +#define MAX_PROCESS_NAME_LEN 64 +#define MAX_MTCE_EVENT_NAME_LEN 64 +#define MAX_RESOURCE_NAME_LEN 64 + +/** RMON message codes **/ +#define RMON_CRITICAL (3) +#define RMON_MAJOR (2) +#define RMON_MINOR (1) +#define RMON_CLEAR (0) + +/** Interface Codes **/ +#define MGMNT_INTERFACE (0) +#define INFRA_INTERFACE (1) + + +/** Maintenance Inventory struct */ +typedef struct +{ + unsigned int nodetype ; + std::string type ; + std::string uuid ; + std::string name ; + std::string ip ; + std::string mac ; + std::string infra_ip ; + std::string admin ; + std::string oper ; + std::string avail ; + std::string task ; + std::string action ; + std::string uptime ; + std::string bm_ip ; + std::string bm_un ; + std::string bm_type ; + std::string id ; + + /* Added to support sub-function state and status */ + std::string func ; + std::string oper_subf ; + std::string avail_subf ; + +} node_inv_type ; +void node_inv_init (node_inv_type & inv); + +#define RECOVERY_STATE__INIT (0) +#define RECOVERY_STATE__ACTION (1) +#define RECOVERY_STATE__COOLOFF (2) +#define RECOVERY_STATE__HOLDOFF (3) +#define RECOVERY_STATE__MONITOR (4) +#define RECOVERY_STATE__BLOCKED (5) +typedef struct +{ + int state ; /* recovery state */ + int holdoff ; /* holdoff minute count */ + int queries ; /* query retries */ + int retries ; /* general retries */ + int attempts ; /* unrecovered attempts */ + struct mtc_timer control_timer ; + struct mtc_timer recovery_timer ; +} recovery_ctrl_type ; +void recovery_ctrl_init ( recovery_ctrl_type & recovery_ctrl ); + + +const char * get_loopback_header ( void ) ; +const char * get_hbs_cmd_req_header ( void ) ; +const char * get_cmd_req_msg_header ( void ) ; +const char * get_cmd_rsp_msg_header ( void ) ; +const char * get_msg_rep_msg_header ( void ) ; +const char * get_compute_msg_header ( void ) ; +const char * get_mtc_log_msg_hdr ( void ) ; +const char * get_pmond_pulse_header ( void ) ; +const char * get_mtce_event_header ( void ) ; +const char * get_heartbeat_loss_header ( void ) ; +const char * get_heartbeat_event_header( void ) ; +const char * get_heartbeat_ready_header( void ) ; + +#define MSG_HEADER_SIZE (18) /* this is the length of the first bytes + of every message as a string. Its the message + signature or label */ + +#define MAX_MSG (1024) + +/************************************************ + * Common Maintenace Message Structure Layout * + ************************************************ + +--------------------------------------------+ + | Message Signature | 18 bytes - MSG_HEADER_SIZE - string return from the above procs + +--------------------------------------------+ + | Free Format Header Buffer | 74 bytes - for 64 byte hostname and a bit extra - 128 bytes - 18 - 36 = 74 bytes + +--------------------------------------------+ + | Message Version and Cmd Parms | 36 bytes - ver/rev, res, cmd, num, parm[5] + +--------------------------------------------+ + above is 128 bytes + +--------------------------------------------+ + | Message Buffer | BUF_SIZE + +--------------------------------------------+ + full message is 1024 bytes max + + ** Maintenance Message header byte size minus the ver/rev,res,cmd.num,parm[5] */ +#define HDR_SIZE ((128)-(sizeof(unsigned int)*8)-(sizeof(unsigned short)*2)-28) +#define BUF_SIZE ((MAX_MSG)-(HDR_SIZE)) + +#define MTC_CMD_VERSION (1) +#define MTC_CMD_REVISION (0) + +#define MTC_CMD_FEATURE_VER__MACADDR_IN_CMD (1) + +typedef struct +{ + char hdr[HDR_SIZE] ; + unsigned short ver ; /* major version number */ + unsigned short rev ; /* minor revision number */ + unsigned int res ; /* a reserved field */ + unsigned int cmd ; + unsigned int num ; + unsigned int parm[5] ; + char buf[BUF_SIZE] ; +} ALIGN_PACK(mtc_message_type); + +#define MTC_CMD_TX (0) +#define MTC_CMD_RX (1) + +int print_mtc_message ( mtc_message_type * msg_ptr ); +int print_mtc_message ( mtc_message_type * msg_ptr , bool force ); +void print_mtc_message ( string hostname, int direction, mtc_message_type & msg, const char * iface, bool force ); + +#define MAX_LOG_MSG (6000-HDR_SIZE-MAX_HOST_NAME_SIZE-MAX_FILENAME_LEN) +typedef struct +{ + char header [HDR_SIZE] ; + char filename [MAX_FILENAME_LEN+1] ; + char hostname [MAX_HOST_NAME_SIZE+1] ; + char logbuffer[MAX_LOG_MSG] ; +} log_message_type; + +/** Generic Maintenance Commands */ +#define MTC_CMD_ADD_HOST (0x11110010) /* Add Host */ +#define MTC_CMD_DEL_HOST (0x11110011) /* Delete Host */ +#define MTC_CMD_MOD_HOST (0x11110012) /* Query Host */ +#define MTC_CMD_QRY_HOST (0x11110013) /* Modify Host */ +#define MTC_CMD_START_HOST (0x11110014) /* Start Monitoring Host */ +#define MTC_CMD_STOP_HOST (0x11110015) /* Stop Moniroting Host */ + +#define MTC_CMD_ADD_INST (0x11110020) /* Add Inst */ +#define MTC_CMD_DEL_INST (0x11110021) /* Delete Inst */ +#define MTC_CMD_MOD_INST (0x11110022) /* Query Inst */ +#define MTC_CMD_QRY_INST (0x11110023) /* Modify Inst */ + +#define MTC_CMD_VOTE_INST (0x11110024) /* Vote Inst */ +#define MTC_CMD_NOTIFY_INST (0x11110025) /* Notify Inst */ + +#define MTC_SERVICE_PMOND (0xB00BF00D) +#define MTC_SERVICE_RMOND (0xFAABF00D) +#define MTC_SERVICE_HWMOND (0xF00BF00D) +#define MTC_SERVICE_HEARTBEAT (0xBABEF00D) + +/** process to process loopback command */ +#define MTC_EVENT_LOOPBACK (0x01010101) + +#define MTC_EVENT_GOENABLE_FAIL (0x7AB00BAE) + +#define MTC_ENHANCED_HOST_SERVICES (0x1B0000B1) + +/******************************************************** + * The following 4 definitions are Events signatures + * the process monitor service sends to maintenance. + ********************************************************/ + +/* Generic Monitor Service ready event */ +#define MTC_EVENT_MONITOR_READY (0xf0f0f0f0) + +/* TODO: Obsolete code */ +#define MTC_EVENT_RMON_READY (0x0f0f0f0f) + +/** Process Monitor Event codes */ +#define MTC_EVENT_PMON_CLEAR (0x02020202) /**< Clear Action */ +#define MTC_EVENT_PMON_CRIT (0x04040404) /**< Crit Failed Action */ +#define MTC_EVENT_PMON_MAJOR (0x05050505) /**< Major Degrade Action */ +#define MTC_EVENT_PMON_MINOR (0x08080808) /**< Minor Log action */ +#define MTC_EVENT_PMON_LOG (0x03030303) /**< Minor Log action */ + +/** Process Monitor Event codes */ +#define MTC_EVENT_RMON_CLEAR (0x10101010) /**< Clear Action */ +#define MTC_EVENT_RMON_CRIT (0x20202020) /**< Crit Failed Action */ +#define MTC_EVENT_RMON_MAJOR (0x30303030) /**< Major Degrade Action */ +#define MTC_EVENT_RMON_MINOR (0x40404040) /**< Minor Log action */ +#define MTC_EVENT_RMON_LOG (0x50505050) /**< Minor Log action */ + +/** Process Monitor Daemon Running - Event Raise / Clear Codes */ +#define MTC_EVENT_PMOND_CLEAR (0x06060606) +#define MTC_EVENT_PMOND_RAISE (0x07070707) + +/** Host Appears to be Stalled */ +#define MTC_EVENT_HOST_STALLED (0x66600999) + +/** Accelerated Virtual Switch Event Codes - Clear, Major and Critical */ +#define MTC_EVENT_AVS_CLEAR (0x12340000) +#define MTC_EVENT_AVS_MAJOR (0x12340001) +#define MTC_EVENT_AVS_CRITICAL (0x12340002) +#define MTC_EVENT_AVS_OFFLINE (0x12340003) + +/** Hardware Monitor (hwmond) Action Request Codes + * Action based event messages that hwmond sends to maintenance */ +#define MTC_EVENT_HWMON_CONFIG (0x11110000) /* Sensor Config Log */ +#define MTC_EVENT_HWMON_CLEAR (0x11110001) /* Clear Event */ +#define MTC_EVENT_HWMON_MINOR (0x11110002) /* Raise Minor Alarm */ +#define MTC_EVENT_HWMON_MAJOR (0x11110003) /* ... Major */ +#define MTC_EVENT_HWMON_CRIT (0x11110004) /* ... Critical */ +#define MTC_EVENT_HWMON_RESET (0x11110005) /* Reset the Host */ +#define MTC_EVENT_HWMON_LOG (0x11110006) /* Create a log */ +#define MTC_EVENT_HWMON_reserved (0x11110007) /* Reinstall the Host */ +#define MTC_EVENT_HWMON_POWERDOWN (0x11110008) /* Power Down the Host */ +#define MTC_EVENT_HWMON_POWERCYCLE (0x11110009) /* Power Cycle the Host */ + +/* Specialized Heartbeat Commands */ +#define MTC_RESTART_HBS (0x0000f11f) /**< Restart monitoring specified host */ +#define MTC_BACKOFF_HBS (0x0000f00f) /**< Cmd Hbs to reduce heartbeat period */ +#define MTC_RECOVER_HBS (0x00000ff0) /**< Recover to default heartbeat period */ + +#define MTC_DEGRADE_RAISE (0x77770001) /**< command to trigger host degrade */ +#define MTC_DEGRADE_CLEAR (0x77770000) /**< command to clear host degrade */ + +/******************************************************* + * The following 4 definitions are Events signatures + * the heartbeat service sends to maintenance + *******************************************************/ + +/** Inform maintenance that the heartbeat service is runing + * and ready to accept control commands */ +#define MTC_EVENT_HEARTBEAT_READY (0x5a5a5a5a) + +/** Specified Host has exceeded the heartbeat-miss FAILURE threshold */ +#define MTC_EVENT_HEARTBEAT_LOSS (0x0000fead) +#define MTC_EVENT_HEARTBEAT_RUNNING (0x0110fead) +#define MTC_EVENT_HEARTBEAT_ILLHEALTH (0x0001fead) +#define MTC_EVENT_HEARTBEAT_STOPPED (0x0100fead) + +/** Specified Host has exceeded the heartbeat-miss DEGRADE threshold */ +#define MTC_EVENT_HEARTBEAT_DEGRADE_SET (0xbeefbeef) + +/** Specified Host has exceeded the heartbeat-miss MINOR threshold */ +#define MTC_EVENT_HEARTBEAT_MINOR_SET (0xdadeedad) + +/** Specified Host has recovered from MINOR assertion */ +#define MTC_EVENT_HEARTBEAT_MINOR_CLR (0xdad00dad) + +/** A degraded but not failed host host has responsed to a heartbeat + * So we can clear its degrade condition */ +#define MTC_EVENT_HEARTBEAT_DEGRADE_CLR (0xf00df00d) + +/** Response received for voting and notification */ +#define MTC_EVENT_VOTE_NOTIFY (0xfeedfeed) + +#define PMOND_MISSING_THRESHOLD (100) /**< Count before degrade */ +#define NULL_PULSE_FLAGS (0xffffffff) /**< Unknown flags value */ +#define PMOND_FLAG (0x00000001) /**< Process Monitor O.K. Flag */ +#define INFRA_FLAG (0x00000002) /**< Infrastructure iface provisioned Flag */ + +#define STALL_MON_FLAG (0x00010000) /**< Flag indicating hang monitor running */ +#define STALL_REC_FLAG (0x00020000) /**< Flag indicating hbsClient took action */ +#define STALL_ERR1_FLAG (0x00100000) /**< Error 1 Flag */ +#define STALL_ERR2_FLAG (0x00200000) /**< Error 2 Flag */ +#define STALL_ERR3_FLAG (0x00400000) /**< Error 3 Flag */ +#define STALL_ERR4_FLAG (0x00800000) /**< Error 4 Flag */ +#define STALL_PID1_FLAG (0x01000000) /**< Monitored process 1 is stalled */ +#define STALL_PID2_FLAG (0x02000000) /**< Monitored process 2 is stalled */ +#define STALL_PID3_FLAG (0x04000000) /**< Monitored process 3 is stalled */ +#define STALL_PID4_FLAG (0x08000000) /**< Monitored process 4 is stalled */ +#define STALL_PID5_FLAG (0x10000000) /**< Monitored process 5 is stalled */ +#define STALL_PID6_FLAG (0x20000000) /**< Monitored process 6 is stalled */ +#define STALL_PID7_FLAG (0x40000000) /**< Monitored process 7 is stalled */ +#define STALL_REC_FAIL_FLAG (0x80000000) /**< Auto recover failed, still running */ + +#define STALL_ERROR_FLAGS (STALL_ERR1_FLAG | \ + STALL_ERR2_FLAG | \ + STALL_ERR3_FLAG | \ + STALL_ERR4_FLAG | \ + STALL_PID1_FLAG | \ + STALL_PID2_FLAG | \ + STALL_PID3_FLAG | \ + STALL_PID4_FLAG | \ + STALL_PID5_FLAG | \ + STALL_PID6_FLAG | \ + STALL_PID7_FLAG | \ + STALL_REC_FAIL_FLAG) + +#define STALL_MSG_THLD (20) + +#define STALL_SYSREQ_CMD (0x66006600) /**< Stall SYSREQ Recovery Command */ +#define STALL_REBOOT_CMD (0x00990099) /**< Stall REBOOT Recovery Command */ + +/* MD5_DIGEST_LENGTH is 16 and need space for *2 plus cr */ +#define MD5_STRING_LENGTH ((MD5_DIGEST_LENGTH*2)+1) + + +#define MTC_CMD_NONE 0 +#define MTC_CMD_LOOPBACK 1 /* to host */ +#define MTC_CMD_REBOOT 2 /* to host */ +#define MTC_CMD_WIPEDISK 3 /* to host */ +#define MTC_CMD_RESET 4 /* to host */ +#define MTC_MSG_MTCALIVE 5 /* from host */ +#define MTC_REQ_MTCALIVE 6 /* to host */ +#define MTC_MSG_MAIN_GOENABLED 7 /* from host */ +#define MTC_MSG_SUBF_GOENABLED 8 /* from host */ +#define MTC_REQ_MAIN_GOENABLED 9 /* to host */ +#define MTC_REQ_SUBF_GOENABLED 10 /* to host */ +#define MTC_MSG_MAIN_GOENABLED_FAILED 11 /* from host */ +#define MTC_MSG_SUBF_GOENABLED_FAILED 12 /* from host */ +#define MTC_MSG_LOCKED 13 /* to host */ +#define MTC_CMD_STOP_CONTROL_SVCS 14 /* to host */ +#define MTC_CMD_STOP_COMPUTE_SVCS 15 /* to host */ +#define MTC_CMD_STOP_STORAGE_SVCS 16 /* to host */ +#define MTC_CMD_START_CONTROL_SVCS 17 /* to host */ +#define MTC_CMD_START_COMPUTE_SVCS 18 /* to host */ +#define MTC_CMD_START_STORAGE_SVCS 19 /* to host */ +#define MTC_CMD_LAZY_REBOOT 20 /* to host */ +#define MTC_CMD_HOST_SVCS_RESULT 21 /* to host */ +#define MTC_CMD_LAST 22 + +#define RESET_PROG_MAX_REBOOTS_B4_RESET (5) +#define RESET_PROG_MAX_REBOOTS_B4_RETRY (RESET_PROG_MAX_REBOOTS_B4_RESET+2) + +const char * get_mtcNodeCommand_str ( int cmd ); + +typedef enum +{ + PROTOCOL__NONE = 0, + PROTOCOL__SMASH = 1, + PROTOCOL__IPMI = 2, + PROTOCOL__MAX = 3 +} protocol_enum ; + + +/** Maintenance Commands used to specify HTTP REST API Command operations */ +typedef enum +{ + MTC_CMD_NOT_SET, + MTC_CMD_DISABLE, + MTC_CMD_ENABLE, + MTC_CMD_VOTE, + MTC_CMD_NOTIFY, + + /** HA Service Manager Commands - Command Descriptions */ + CONTROLLER_LOCKED, /**< specified controller is locked */ + CONTROLLER_UNLOCKED, /**< specified controller is unlocked */ + CONTROLLER_DISABLED, /**< specified controller is unlocked-disabled */ + CONTROLLER_ENABLED, /**< specified controller is unlocked-enabled */ + CONTROLLER_SWACT, /**< swact services away from specified controller */ + CONTROLLER_QUERY, /**< query active services on specified controller */ +} mtc_cmd_enum ; + +typedef enum +{ + MTC_SWACT__START = 0, + MTC_SWACT__QUERY, + MTC_SWACT__QUERY_FAIL, + MTC_SWACT__QUERY_RECV, + MTC_SWACT__SWACT, + MTC_SWACT__SWACT_FAIL, + MTC_SWACT__SWACT_RECV, + MTC_SWACT__SWACT_POLL, + MTC_SWACT__DONE, + MTC_SWACT__STAGES, +} mtc_swactStages_enum ; + +/** Maintenance Administrative actions */ +typedef enum +{ + MTC_ADMIN_ACTION__NONE = 0, + MTC_ADMIN_ACTION__LOCK = 1, + MTC_ADMIN_ACTION__UNLOCK = 2, + MTC_ADMIN_ACTION__RESET = 3, + MTC_ADMIN_ACTION__REBOOT = 4, + MTC_ADMIN_ACTION__REINSTALL = 5, + MTC_ADMIN_ACTION__POWEROFF = 6, + MTC_ADMIN_ACTION__POWERON = 7, + MTC_ADMIN_ACTION__RECOVER = 8, + MTC_ADMIN_ACTION__DELETE = 9, + MTC_ADMIN_ACTION__POWERCYCLE =10, + MTC_ADMIN_ACTION__ADD =11, + MTC_ADMIN_ACTION__SWACT =12, + MTC_ADMIN_ACTION__FORCE_LOCK =13, + MTC_ADMIN_ACTION__FORCE_SWACT =14, + + /* FSM Actions */ + MTC_ADMIN_ACTION__ENABLE =15, + MTC_ADMIN_ACTION__ENABLE_SUBF =16, + MTC_ADMIN_ACTIONS =17 +} mtc_nodeAdminAction_enum ; + +typedef enum +{ + MTC_CONFIG_ACTION__NONE = 0, + MTC_CONFIG_ACTION__INSTALL_PASSWD = 1, + MTC_CONFIG_ACTION__CHANGE_PASSWD = 2, + MTC_CONFIG_ACTION__CHANGE_PASSWD_AGAIN = 3, + MTC_CONFIG_ACTIONS = 4, +} mtc_nodeConfigAction_enum ; + + +/** Maintenance Administrative states */ +typedef enum +{ + MTC_ADMIN_STATE__LOCKED = 0, + MTC_ADMIN_STATE__UNLOCKED = 1, + MTC_ADMIN_STATES = 2 +} mtc_nodeAdminState_enum ; + +/** Maintenance Operational states */ +typedef enum +{ + MTC_OPER_STATE__DISABLED = 0, + MTC_OPER_STATE__ENABLED = 1, + MTC_OPER_STATES = 2 +} mtc_nodeOperState_enum ; + +/** Maintenance Availablity status */ +typedef enum +{ + MTC_AVAIL_STATUS__NOT_INSTALLED = 0, + MTC_AVAIL_STATUS__AVAILABLE = 1, + MTC_AVAIL_STATUS__DEGRADED = 2, + MTC_AVAIL_STATUS__FAILED = 3, + MTC_AVAIL_STATUS__INTEST = 4, + MTC_AVAIL_STATUS__POWERED_OFF = 5, + MTC_AVAIL_STATUS__OFFLINE = 6, + MTC_AVAIL_STATUS__ONLINE = 7, + MTC_AVAIL_STATUS__OFFDUTY = 8, + MTC_AVAIL_STATUS = 9 +} mtc_nodeAvailStatus_enum ; + + +void mtc_stages_init ( void ); + +typedef enum +{ + MTC_ENABLE__START = 0, + MTC_ENABLE__RESERVED_1 = 1, + MTC_ENABLE__HEARTBEAT_CHECK = 2, + MTC_ENABLE__HEARTBEAT_STOP_CMD = 3, + MTC_ENABLE__RECOVERY_TIMER = 4, + MTC_ENABLE__RECOVERY_WAIT = 5, + MTC_ENABLE__RESET_PROGRESSION = 6, + MTC_ENABLE__RESET_WAIT = 7, + MTC_ENABLE__INTEST_START = 8, + MTC_ENABLE__MTCALIVE_PURGE = 9, + MTC_ENABLE__MTCALIVE_WAIT = 10, + MTC_ENABLE__CONFIG_COMPLETE_WAIT = 11, + MTC_ENABLE__GOENABLED_TIMER = 12, + MTC_ENABLE__GOENABLED_WAIT = 13, + MTC_ENABLE__PMOND_READY_WAIT = 14, + MTC_ENABLE__HOST_SERVICES_START = 15, + MTC_ENABLE__HOST_SERVICES_WAIT = 16, + MTC_ENABLE__SERVICES_START_WAIT = 17, + MTC_ENABLE__HEARTBEAT_WAIT = 18, + MTC_ENABLE__HEARTBEAT_SOAK = 19, + MTC_ENABLE__STATE_CHANGE = 20, + MTC_ENABLE__WORKQUEUE_WAIT = 21, + MTC_ENABLE__WAIT = 22, + MTC_ENABLE__ENABLED = 23, + MTC_ENABLE__SUBF_FAILED = 24, + MTC_ENABLE__DEGRADED = 25, + MTC_ENABLE__DONE = 26, + MTC_ENABLE__FAILURE = 27, + MTC_ENABLE__FAILURE_WAIT = 28, + MTC_ENABLE__FAILURE_SWACT_WAIT = 29, + MTC_ENABLE__STAGES = 30, +} mtc_enableStages_enum ; + +/** Return the string representing the specified 'enable' stage */ +string get_enableStages_str ( mtc_enableStages_enum stage ); + +typedef enum +{ + MTC_DISABLE__START = 0, + MTC_DISABLE__HANDLE_FORCE_LOCK = 1, + MTC_DISABLE__RESET_HOST_WAIT = 2, + MTC_DISABLE__DISABLE_SERVICES = 3, + MTC_DISABLE__DIS_SERVICES_WAIT = 4, + MTC_DISABLE__HANDLE_CEPH_LOCK = 5, + MTC_DISABLE__RESERVED = 6, + MTC_DISABLE__TASK_STATE_UPDATE = 7, + MTC_DISABLE__WORKQUEUE_WAIT = 8, + MTC_DISABLE__DISABLED = 9, + MTC_DISABLE__HANDLE_POWERON_SEND =10, + MTC_DISABLE__HANDLE_POWERON_RECV =11, + MTC_DISABLE__STAGES =12, +} mtc_disableStages_enum ; + +/** Return the string representing the specified 'disable' stage */ +string get_disableStages_str ( mtc_disableStages_enum stage ); + +typedef enum +{ + MTC_ADD__START = 0, + MTC_ADD__START_DELAY, + MTC_ADD__START_SERVICES, + MTC_ADD__START_SERVICES_WAIT, + MTC_ADD__MTC_SERVICES, + MTC_ADD__CLEAR_TASK, + MTC_ADD__WORKQUEUE_WAIT, + MTC_ADD__DONE, + MTC_ADD__STAGES +} mtc_addStages_enum ; + + +/** Return the string representing the specified 'add' stage */ +string get_addStages_str ( mtc_addStages_enum stage ); + + +typedef enum +{ + MTC_DEL__START = 0, + MTC_DEL__WAIT, + MTC_DEL__DONE, + MTC_DEL__STAGES +} mtc_delStages_enum ; + +string get_delStages_str ( mtc_delStages_enum stage ); + + +#define MTC_MAX_FAST_ENABLES (2) +typedef enum +{ + MTC_RECOVERY__START = 0, + MTC_RECOVERY__REQ_MTCALIVE, + MTC_RECOVERY__REQ_MTCALIVE_WAIT, + MTC_RECOVERY__RESET_RECV_WAIT, + MTC_RECOVERY__RESET_WAIT, + MTC_RECOVERY__MTCALIVE_TIMER, + MTC_RECOVERY__MTCALIVE_WAIT, + MTC_RECOVERY__GOENABLED_TIMER, + MTC_RECOVERY__GOENABLED_WAIT, + MTC_RECOVERY__HOST_SERVICES_START, + MTC_RECOVERY__HOST_SERVICES_WAIT, + + /* Subfunction stages */ + MTC_RECOVERY__CONFIG_COMPLETE_WAIT, + MTC_RECOVERY__SUBF_GOENABLED_TIMER, + MTC_RECOVERY__SUBF_GOENABLED_WAIT, + MTC_RECOVERY__SUBF_SERVICES_START, + MTC_RECOVERY__SUBF_SERVICES_WAIT, + + MTC_RECOVERY__HEARTBEAT_START, + MTC_RECOVERY__HEARTBEAT_SOAK, + MTC_RECOVERY__STATE_CHANGE, + MTC_RECOVERY__ENABLE_START, + MTC_RECOVERY__FAILURE, + MTC_RECOVERY__WORKQUEUE_WAIT, + MTC_RECOVERY__ENABLE_WAIT, + MTC_RECOVERY__STAGES, +} mtc_recoveryStages_enum ; + +/** Return the string representing the specified 'recovery' stage */ +string get_recoveryStages_str ( mtc_recoveryStages_enum stage ); + +/* mtce support for sysinv driven configuration changes */ +typedef enum { + MTC_CONFIG__START, + MTC_CONFIG__SHOW, + MTC_CONFIG__MODIFY, + MTC_CONFIG__VERIFY, + MTC_CONFIG__FAILURE, + MTC_CONFIG__TIMEOUT, + MTC_CONFIG__DONE, + MTC_CONFIG__STAGES +} mtc_configStages_enum ; + +/** Return the string representing the specified 'add' stage */ +string get_configStages_str ( mtc_configStages_enum stage ); + + +/** Service Degrade Mask + * + * Hosts can become degraded for more than one reason. + * The following are bit field definitions that represent + * various degrade reasons ; heartbeat, process error, + * inservice test, etc. */ +#define DEGRADE_MASK_NONE 0x00000000 +#define DEGRADE_MASK_HEARTBEAT_MGMNT 0x00000001 +#define DEGRADE_MASK_HEARTBEAT_INFRA 0x00000002 +#define DEGRADE_MASK_PMON 0x00000004 +#define DEGRADE_MASK_INSV_TEST 0x00000008 +#define DEGRADE_MASK_AVS_MAJOR 0x00000010 +#define DEGRADE_MASK_AVS_CRITICAL 0x00000020 +#define DEGRADE_MASK_RESMON 0x00000040 +#define DEGRADE_MASK_HWMON 0x00000080 +#define DEGRADE_MASK_SUBF 0x00000100 +#define DEGRADE_MASK_SM 0x00000200 +#define DEGRADE_MASK_CONFIG 0x00000400 +#define DEGRADE_MASK_RES2 0x00000800 +#define DEGRADE_MASK_ENABLE 0x00001000 +#define DEGRADE_MASK_RES4 0x00002000 +#define DEGRADE_MASK_RES5 0x00004000 +#define DEGRADE_MASK_RES6 0x00008000 + +/* future masks up to 0x80000000 */ + +/* FSM Stages for handling host 'reset' through + * board management controller interface */ +typedef enum +{ + MTC_RESET__START = 0, + MTC_RESET__REQ_SEND, + MTC_RESET__RESP_WAIT, + MTC_RESET__QUEUE, + MTC_RESET__OFFLINE_WAIT, + MTC_RESET__DONE, + MTC_RESET__FAIL, + MTC_RESET__FAIL_WAIT, + MTC_RESET__STAGES +} mtc_resetStages_enum ; + + +/* FSM Stages for handling host 'reset' through + * board management controller interface */ +typedef enum +{ + MTC_RESETPROG__START = 0, + MTC_RESETPROG__REBOOT, + MTC_RESETPROG__WAIT, + MTC_RESETPROG__FAIL, + MTC_RESETPROG__STAGES +} mtc_resetProgStages_enum ; + +/** Return the string representing the specified 'reset' stage */ +string get_resetStages_str ( mtc_resetStages_enum stage ); + +/* FSM Stages for handling host 'reinstall' */ +typedef enum +{ + MTC_REINSTALL__START = 0, + MTC_REINSTALL__RESP_WAIT, + MTC_REINSTALL__OFFLINE_WAIT, + MTC_REINSTALL__ONLINE_WAIT, + MTC_REINSTALL__FAIL, + MTC_REINSTALL__MSG_DISPLAY, + MTC_REINSTALL__DONE, + MTC_REINSTALL__STAGES +} mtc_reinstallStages_enum ; + +/** Return the string representing the specified 'reinstall' stage */ +string get_reinstallStages_str ( mtc_reinstallStages_enum stage ); + +typedef enum +{ + MTC_POWERON__START = 0, + MTC_POWERON__POWER_STATUS, + MTC_POWERON__POWER_STATUS_WAIT, + MTC_POWERON__REQ_SEND, + MTC_POWERON__RETRY_WAIT, + MTC_POWERON__RESP_WAIT, + MTC_POWERON__DONE, + MTC_POWERON__FAIL, + MTC_POWERON__FAIL_WAIT, + MTC_POWERON__QUEUE, + + MTC_POWEROFF__START, + MTC_POWEROFF__REQ_SEND, + MTC_POWEROFF__RESP_WAIT, + MTC_POWEROFF__DONE, + MTC_POWEROFF__FAIL, + MTC_POWEROFF__FAIL_WAIT, + MTC_POWEROFF__QUEUE, + + MTC_POWER__DONE, /* clear power action */ + MTC_POWER__STAGES +} mtc_powerStages_enum ; + +/** Return the string representing the specified 'power' stage */ +string get_powerStages_str ( mtc_powerStages_enum stage ); + +/* FSM Stages for handling host 'powercycle' through + * board management controller interface */ +typedef enum +{ + MTC_POWERCYCLE__START = 0, + MTC_POWERCYCLE__POWEROFF, + MTC_POWERCYCLE__POWEROFF_CMND_WAIT, + MTC_POWERCYCLE__POWEROFF_WAIT, + MTC_POWERCYCLE__POWERON, + MTC_POWERCYCLE__POWERON_REQWAIT, + MTC_POWERCYCLE__POWERON_VERIFY, + MTC_POWERCYCLE__POWERON_VERIFY_WAIT, + MTC_POWERCYCLE__POWERON_CMND_WAIT, + MTC_POWERCYCLE__POWERON_WAIT, + MTC_POWERCYCLE__DONE, + MTC_POWERCYCLE__FAIL, + MTC_POWERCYCLE__HOLDOFF, + MTC_POWERCYCLE__COOLOFF, + MTC_POWERCYCLE__STAGES, +} mtc_powercycleStages_enum ; + +/** Return the string representing the specified 'powercycle' stage */ +string get_powercycleStages_str ( mtc_powercycleStages_enum stage ); + +typedef enum +{ + MTC_SUBSTAGE__START = 0, + MTC_SUBSTAGE__SEND = 1, + MTC_SUBSTAGE__RECV = 2, + MTC_SUBSTAGE__WAIT = 3, + MTC_SUBSTAGE__DONE = 4, + MTC_SUBSTAGE__FAIL = 5, + MTC_SUBSTAGE__STAGES = 6 +} mtc_subStages_enum ; + +/** Return the string representing the specified 'sub' stage */ +string get_subStages_str ( mtc_subStages_enum stage ); + +typedef enum +{ + MTC_OOS_TEST__LOAD_NEXT_TEST = 0, + MTC_OOS_TEST__BMC_ACCESS_TEST = 1, + MTC_OOS_TEST__BMC_ACCESS_RESULT = 2, + MTC_OOS_TEST__START_WAIT = 3, + MTC_OOS_TEST__WAIT = 4, + MTC_OOS_TEST__DONE = 5, + MTC_OOS_TEST__STAGES = 6, +} mtc_oosTestStages_enum ; + +/** Return the string representing the specified 'test' stage */ +string get_oosTestStages_str ( mtc_oosTestStages_enum stage ); + +typedef enum +{ + MTC_INSV_TEST__START = 0, + MTC_INSV_TEST__WAIT = 1, + MTC_INSV_TEST__RUN = 2, + MTC_INSV_TEST__STAGES = 3, +} mtc_insvTestStages_enum ; + +/** Return the string representing the specified 'test' stage */ +string get_insvTestStages_str ( mtc_insvTestStages_enum stage ); + +#define MTC_NO_TEST 0 +#define MTC_OOS_TEST 1 +#define MTC_INSV_TEST 2 + +typedef enum +{ + MTC_SENSOR__START = 0, + MTC_SENSOR__READ_FAN = 1, + MTC_SENSOR__READ_TEMP= 2, + MTC_SENSOR__STAGES = 3, +} mtc_sensorStages_enum ; + +/** Return the string representing the specified 'sensor' stage */ +string get_sensorStages_str ( mtc_sensorStages_enum stage ); + +typedef enum +{ + MTC_OFFLINE__IDLE = 0, + MTC_OFFLINE__START, + MTC_OFFLINE__SEND_MTCALIVE, + MTC_OFFLINE__WAIT, + MTC_OFFLINE__STAGES +} mtc_offlineStages_enum ; + +typedef enum +{ + MTC_ONLINE__START = 0, + MTC_ONLINE__WAITING, + MTC_ONLINE__RETRYING, + MTC_ONLINE__STAGES +} mtc_onlineStages_enum ; + +#define MTC_ENABLE 0x12345678 +#define MTC_DEGRADE 0x87654321 +#define MTC_DISABLE 0xdeadbeef +#define MTC_RESET 0xdeadb00b +#define MTC_WIPEDISK 0xdeadfeed + + +typedef enum +{ + MTC_STRESS_TEST__START = 0, + MTC_STRESS_TEST__DO = 1, + MTC_STRESS_TEST__WAIT = 2, + MTC_STRESS_TEST__VERIFY = 3, + MTC_STRESS_TEST__NEXT = 4, + MTC_STRESS_TEST__DONE = 5, + MTC_STRESS_TEST__STAGES = 6, +} mtc_stressStages_enum ; + +typedef union +{ + mtc_enableStages_enum enable ; + mtc_disableStages_enum disable ; + int raw ; +} mtc_stages_union ; + +typedef struct +{ + mtc_nodeAdminAction_enum adminAction ; + mtc_nodeAdminState_enum adminState ; + mtc_nodeOperState_enum operState ; + mtc_nodeAvailStatus_enum availStatus ; +} fsm_states_type ; + +/** Maintenance FSM test case codes */ +typedef enum +{ + FSM_TC_ENABLED_NOACTION, + FSM_TC_ENABLED_TO_DISABLED_FAILED, + FSM_TC_ENABLED_TO_ENABLED_DEGRADED, + FSM_TC_ENABLED_DEGRADED_TO_ENABLED_DEGRADED, + FSM_TC_ENABLED_DEGRADED_TO_ENABLED, + FSM_TC_LAST, +} mtcNodeFsm_tc_enum ; + +/* The list of heartbeat interfaces / networks */ +typedef enum +{ + MGMNT_IFACE = 0, + INFRA_IFACE = 1, + MAX_IFACES = 2 +} iface_enum ; + +/** Returns true if the specified admin state string is valid */ +bool adminStateOk ( string admin ); + +/** Returns true if the specified oper state string is valid */ +bool operStateOk ( string oper ); + +/** Returns true if the specified avail status string is valid */ +bool availStatusOk ( string avail ); + +string get_availStatus_str ( mtc_nodeAvailStatus_enum availStatus ); +string get_operState_str ( mtc_nodeOperState_enum operState ); +string get_adminState_str ( mtc_nodeAdminState_enum adminState ); + +void log_adminAction ( string hostname, + mtc_nodeAdminAction_enum currAction, + mtc_nodeAdminAction_enum newAction ); + +int send_hbs_command ( string hostname, int command ); +int send_hwmon_command ( string hostname, int command ); +int send_guest_command ( string hostname, int command ); + +int daemon_log_message ( const char * hostname, + const char * filename, + const char * log_str ); + +bool is_host_services_cmd ( unsigned int cmd ); + +/** Runtime Trace Log Utilities */ +void daemon_dump_membuf ( void ); +void daemon_dump_membuf_banner ( void ); + +void mem_log ( char * log ); +void mem_log ( string log ); +void mem_log ( char log ); +void mem_log ( string one, string two ); +void mem_log ( string one, string two, string three ); +void mem_log ( string label, int value, string data ); + +string get_hostname ( void ); + +#define MTC_FSM_ENABLE_TEST 0x12345678 + +#define MAX_MEM_LIST_SIZE (2000) +#define MAX_MEM_LOG_LEN (1000) +#define MAX_MEM_LOG_DATA (MAX_MEM_LOG_LEN-100) + +#define TESTHEAD_BAR "+--------------------------------------------------------------------+\n" +#define NODEBUG printf ( "\tDebug: Not implemented\n" ) +#define FAILED printf ( "Failed |\n" ); +#define PASSED printf ( "Passed |\n" ); +#define PENDING printf ( "To-Do |\n" ); +#define FAILED_STR printf ( "Failed |\n" ); + +#endif /* __INCLUDE_NODEBASE_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp new file mode 100755 index 00000000..4b8e7ca9 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp @@ -0,0 +1,8607 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Nodal Health Check Service Node Implementation + */ + +#include +#include +#include +#include +#include /* for ENODEV, EFAULT and ENXIO */ +#include /* for close and usleep */ + +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#define __AREA__ "---" +#endif + +#include "nodeBase.h" +#include "threadUtil.h" +#include "nodeClass.h" +#include "nodeUtil.h" +#include "mtcNodeMsg.h" /* for ... send_mtc_cmd */ +#include "nlEvent.h" /* for ... get_netlink_events */ +#include "daemon_common.h" + +#include "alarmUtil.h" +#include "mtcAlarm.h" +#include "alarm.h" +#include "hbsAlarm.h" + +extern void mtcTimer_handler ( int sig, siginfo_t *si, void *uc); + +const char mtc_nodeAdminAction_str[MTC_ADMIN_ACTIONS][20] = +{ + "none", + "lock", + "unlock", + "reset", + "reboot", + "reinstall", + "power-off", + "power-on", + "recovery", + "delete", + "powercycle", + "add", + "swact", + "force-lock", + "force-swact", + "enable", + "enable-subf", +}; + +const char * get_adminAction_str ( mtc_nodeAdminAction_enum action ) +{ + if ( action > MTC_ADMIN_ACTIONS ) + { + slog ("Invalid admin action (%d)\n", action); + action = MTC_ADMIN_ACTION__NONE ; + } + + return ( &mtc_nodeAdminAction_str[action][0] ); +} + +const char mtc_nodeAdminState_str[MTC_ADMIN_STATES][15] = +{ + "locked", + "unlocked", +}; + +string get_adminState_str ( mtc_nodeAdminState_enum adminState ) +{ + if ( adminState > MTC_ADMIN_STATES ) + { + slog ("Invalid admin state (%d)\n", adminState ); + adminState = MTC_ADMIN_STATE__LOCKED ; + } + return ( mtc_nodeAdminState_str[adminState] ); +} + +bool adminStateOk ( string admin ) +{ + if (( admin.compare(mtc_nodeAdminState_str[0])) && + ( admin.compare(mtc_nodeAdminState_str[1]))) + { + wlog ("Invalid 'admin' state (%s)\n", admin.c_str()); + return ( false ); + } + return (true); +} +const char mtc_nodeOperState_str[MTC_OPER_STATES][15] = +{ + "disabled", + "enabled" +}; + +string get_operState_str ( mtc_nodeOperState_enum operState ) +{ + if ( operState > MTC_OPER_STATES ) + { + slog ("Invalid oper state (%d)\n", operState ); + operState = MTC_OPER_STATE__DISABLED ; + } + return ( mtc_nodeOperState_str[operState] ); +} + +bool operStateOk ( string oper ) +{ + if (( oper.compare(mtc_nodeOperState_str[0])) && + ( oper.compare(mtc_nodeOperState_str[1]))) + { + wlog ("Invalid 'oper' state (%s)\n", oper.c_str()); + return ( false ); + } + return (true); +} + +const char mtc_nodeAvailStatus_str[MTC_AVAIL_STATUS][15] = +{ + "not-installed", + "available", + "degraded", + "failed", + "intest", + "power-off", + "offline", + "online", + "offduty" +}; + +bool availStatusOk ( string avail ) +{ + if (( avail.compare(mtc_nodeAvailStatus_str[0])) && + ( avail.compare(mtc_nodeAvailStatus_str[1])) && + ( avail.compare(mtc_nodeAvailStatus_str[2])) && + ( avail.compare(mtc_nodeAvailStatus_str[3])) && + ( avail.compare(mtc_nodeAvailStatus_str[4])) && + ( avail.compare(mtc_nodeAvailStatus_str[5])) && + ( avail.compare(mtc_nodeAvailStatus_str[6])) && + ( avail.compare(mtc_nodeAvailStatus_str[7])) && + ( avail.compare(mtc_nodeAvailStatus_str[8]))) + { + wlog ("Invalid 'avail' status (%s)\n", avail.c_str()); + return ( false ); + } + return (true); +} + +string get_availStatus_str ( mtc_nodeAvailStatus_enum availStatus ) +{ + if ( availStatus > MTC_AVAIL_STATUS ) + { + slog ("Invalid avail status (%d)\n", availStatus ); + availStatus = MTC_AVAIL_STATUS__FAILED ; + } + return ( mtc_nodeAvailStatus_str[availStatus] ); +} + +#ifdef WANT_nodeClass_latency_log /* Needs to be tied to a node */ +#define NODECLASS_LATENCY_MON_START ((const char *)"start") +#define MAX_DELAY_B4_LATENCY_LOG (1700) +void nodeClass_latency_log ( const char * label_ptr, int msecs ) +{ + static unsigned long long prev__time = 0 ; + static unsigned long long this__time = 0 ; + + this__time = gettime_monotonic_nsec () ; + + /* If label_ptr is != NULL and != start then take the measurement */ + if ( label_ptr && strncmp ( label_ptr, NODECLASS_LATENCY_MON_START, strlen(NODECLASS_LATENCY_MON_START))) + { + if ( this__time > (prev__time + (NSEC_TO_MSEC*(msecs)))) + { + llog ("%4llu.%-4llu msec - %s\n", + ((this__time-prev__time) > NSEC_TO_MSEC) ? ((this__time-prev__time)/NSEC_TO_MSEC) : 0, + ((this__time-prev__time) > NSEC_TO_MSEC) ? ((this__time-prev__time)%NSEC_TO_MSEC) : 0, + label_ptr); + } + } + /* reset to be equal for next round */ + prev__time = this__time ; +} +#endif + +/* nodeLinkClass constructor */ +nodeLinkClass::nodeLinkClass() +{ + this->is_poweron_handler = NULL; + for(unsigned int i=0; inode_ptrs[i] = NULL; + } + + this->offline_threshold = 0; + this->offline_period = 0; + /* this->mtcTimer = mtc_timer(); + * this->mtcTimer_mnfa = mtc_timer(); + * this->mtcTimer_token = mtc_timer(); + * this->mtcTimer_uptime = mtc_timer(); + */ + this->api_retries = 0; + for(unsigned int i =0; ipulse_requests[i] = 0; + this->hbs_expected_pulses[i] = 0; + this->hbs_detected_pulses[i] = 0; + } + this->compute_mtcalive_timeout = 0; + this->controller_mtcalive_timeout = 0; + this->goenabled_timeout = 0; + this->loc_recovery_timeout = 0; + this->mnfa_recovery_timeout = 0; + this->node_reinstall_timeout = 0; + this->token_refresh_rate = 0; + this->autorecovery_enabled = false ; + this->autorecovery_disabled = false ; + + + head = tail = NULL; + memory_allocs = 0 ; + memory_used = 0 ; + hosts = 0 ; + host_deleted = false ; + + /* Init the base level pulse info and pointers for all interfaces */ + pulse_ptr = NULL ; + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + pulse_list[i].head_ptr = NULL ; + pulse_list[i].tail_ptr = NULL ; + pulse_list[i].last_ptr = NULL ; + pulses[i] = 0 ; + } + /* init the resource reference index to null */ + rrri = 0 ; + + /* Entry of RRA is reserved (not used) and set to NULL */ + hbs_rra[0] = static_cast(NULL) ; + + /* Make no assumption on the service */ + maintenance = false ; + heartbeat = false ; + active = false ; + + /* Set some defaults for the hearbeat service */ + hbs_ready = false ; + hbs_state_change = false ; + hbs_disabled = true ; + hbs_pulse_period = hbs_pulse_period_save = 200 ; + hbs_minor_threshold = HBS_MINOR_THRESHOLD ; + hbs_degrade_threshold = HBS_DEGRADE_THRESHOLD ; + hbs_failure_threshold = HBS_FAILURE_THRESHOLD ; + + hbs_silent_fault_detector = 0 ; + hbs_silent_fault_logged = false ; + + /* Start with null identity */ + my_hostname.clear() ; + my_local_ip.clear() ; + my_float_ip.clear() ; + active_controller_hostname.clear() ; + inactive_controller_hostname.clear() ; + + /* Start with no failures */ + mnfa_awol_list.clear(); + mnfa_host_count[MGMNT_IFACE] = 0 ; + mnfa_host_count[INFRA_IFACE] = 0 ; + mnfa_occurances = 0 ; + mnfa_active = false ; + mnfa_threshold_type = MNFA_NUMBER ; + mnfa_threshold_percent = 5 ; + mnfa_threshold_number = 3 ; + mnfa_threshold = mnfa_threshold_number ; + + mgmnt_link_up_and_running = false ; + infra_link_up_and_running = false ; + infra_network_provisioned = false ; + infra_degrade_only = false ; + + dor_mode_active = false ; + dor_start_time = 0 ; + dor_mode_active_log_throttle = 0 ; + + swact_timeout = MTC_MINS_2 ; + uptime_period = MTC_UPTIME_REFRESH_TIMER ; + online_period = MTC_OFFLINE_TIMER ; + sysinv_timeout = HTTP_SYSINV_CRIT_TIMEOUT ; + sysinv_noncrit_timeout = HTTP_SYSINV_NONC_TIMEOUT ; + work_queue_timeout = MTC_WORKQUEUE_TIMEOUT ; + + /* Inservice test periods in seconds - 0 = disabled */ + insv_test_period = 0 ; + oos_test_period = 0 ; + + /* Init the inotify shadow password file descriptors to zero */ + inotify_shadow_file_fd = 0 ; + inotify_shadow_file_wd = 0 ; + + /* Ensure that HA Swact gate is open on init. + * This true gates maintenance commands */ + smgrEvent.mutex = false ; + + /* Init the event bases to null as they have not been allocated yet */ + sysinvEvent.base = NULL ; + smgrEvent.base = NULL ; + tokenEvent.base = NULL ; + sysinvEvent.conn = NULL ; + smgrEvent.conn = NULL ; + tokenEvent.conn = NULL ; + sysinvEvent.req = NULL ; + smgrEvent.req = NULL ; + tokenEvent.req = NULL ; + sysinvEvent.buf = NULL ; + smgrEvent.buf = NULL ; + tokenEvent.buf = NULL ; + + unknown_host_throttle = 0 ; + invalid_arg_throttle = 0 ; + + testmode = 0 ; + module_init( ); +} + +/* nodeLinkClass destructor */ +nodeLinkClass::~nodeLinkClass() +{ + /* Free any allocated host memory */ + for ( int i = 0 ; i < MAX_HOSTS ; i++ ) + { + if ( node_ptrs[i] ) + { + delete node_ptrs[i] ; + } + } +} + +/* Clear start host service controls */ +void nodeLinkClass::clear_hostservices_ctls ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr ) + { + node_ptr->start_services_needed = false ; + node_ptr->start_services_needed_subf = false ; + node_ptr->start_services_running_main = false ; + node_ptr->start_services_running_subf = false ; + node_ptr->start_services_retries = 0 ; + } +} + +/* Clear all the main function enable failure bools */ +void nodeLinkClass::clear_main_failed_bools ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr ) + { + node_ptr->config_failed = false ; + node_ptr->goEnabled_failed = false ; + node_ptr->inservice_failed = false ; + node_ptr->hostservices_failed = false ; + return; + } + slog ("null pointer\n"); +} + +/* Clear all the sub function enable failure bools */ +void nodeLinkClass::clear_subf_failed_bools ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr ) + { + node_ptr->config_failed_subf = false ; + node_ptr->goEnabled_failed_subf = false ; + node_ptr->inservice_failed_subf = false ; + node_ptr->hostservices_failed_subf = false ; + return; + } + slog ("null pointer\n"); +} + +/* + * Allocates memory for a new node and stores its the address in node_ptrs + * @param void + * @return node pointer to the newly allocted node + */ +struct nodeLinkClass::node * nodeLinkClass::newNode ( void ) +{ + struct nodeLinkClass::node * temp_node_ptr = NULL ; + + if ( memory_allocs == 0 ) + { + memset ( node_ptrs, 0 , sizeof(struct node *)*MAX_NODES); + } + + // find an empty spot + for ( int i = 0 ; i < MAX_NODES ; i++ ) + { + if ( node_ptrs[i] == NULL ) + { + node_ptrs[i] = temp_node_ptr = new node ; + memory_allocs++ ; + memory_used += sizeof (struct nodeLinkClass::node); + + return temp_node_ptr ; + } + } + elog ( "Failed to save new node pointer address\n" ); + return temp_node_ptr ; +} + +/* Frees the memory of a pre-allocated node and removes + * it from the node_ptrs list + * @param node * pointer to the node memory address to be freed + * @return int return code { PASS or -EINVAL } + */ +int nodeLinkClass::delNode ( struct nodeLinkClass::node * node_ptr ) +{ + if ( memory_allocs > 0 ) + { + for ( int i = 0 ; i < MAX_NODES ; i++ ) + { + if ( node_ptrs[i] == node_ptr ) + { + delete node_ptr ; + node_ptrs[i] = NULL ; + memory_allocs-- ; + memory_used -= sizeof (struct nodeLinkClass::node); + return PASS ; + } + } + elog ( "Error: Unable to validate memory address being freed\n" ); + } + else + elog ( "Error: Free memory called when there is no memory to free\n" ); + + return -EINVAL ; +} + + /* + * Allocate new node and tack it on the end of the node_list + */ +struct +nodeLinkClass::node* nodeLinkClass::addNode( string hostname ) +{ + /* verify node is not already provisioned */ + struct node * ptr = getNode ( hostname ); + if ( ptr ) + { + /* if it is then clean it up and fall through */ + if ( !testmode ) + { + wlog ("Warning: Node already provisioned\n"); + } + if ( remNode ( hostname ) ) + { + /* Should never get here but if we do then */ + /* something is seriously wrong */ + elog ("Error: Unable to remove node during reprovision\n"); + return static_cast(NULL); + } + } + + /* allocate memory for new node */ + ptr = newNode (); + if( ptr == NULL ) + { + elog ( "Error: Failed to allocate memory for new node\n" ); + return static_cast(NULL); + } + + /* init the new node */ + ptr->hostname = hostname ; + + ptr->ip = "" ; + ptr->mac = "" ; + ptr->infra_ip = "" ; + ptr->infra_mac = "" ; + + ptr->patching = false ; + ptr->patched = false ; + + /* the goenabled state bool */ + ptr->goEnabled = false ; + ptr->goEnabled_subf = false ; + + clear_hostservices_ctls ( ptr ); + + /* clear all the enable failure bools */ + clear_main_failed_bools ( ptr ); + clear_subf_failed_bools ( ptr ); + + /* Set the subfunction to disabled */ + ptr->operState_subf = MTC_OPER_STATE__DISABLED ; + ptr->availStatus_subf = MTC_AVAIL_STATUS__NOT_INSTALLED ; + + ptr->operState_dport = MTC_OPER_STATE__DISABLED ; + ptr->availStatus_dport= MTC_AVAIL_STATUS__OFFDUTY ; + + ptr->enabled_count = 0 ; + + ptr->cmdName = ""; + ptr->cmdReq = 0 ; + ptr->cmdRsp = 0 ; + ptr->cmdRsp_status= 0 ; + ptr->cmdRsp_status_string = "" ; + + ptr->add_completed = false ; + + /* init the hwmon reset and powercycle recovery control structures */ + recovery_ctrl_init ( ptr->hwmon_reset ); + recovery_ctrl_init ( ptr->hwmon_powercycle ); + + /* Default timeout values */ + ptr->mtcalive_timeout = HOST_MTCALIVE_TIMEOUT ; + + /* no ned to send a reboot response back to any client */ + ptr->activeClient = CLIENT_NONE ; + + ptr->task = "none" ; + ptr->action = "none" ; + ptr->clear_task = false ; + + ptr->mtcAlive_gate = true ; + ptr->mtcAlive_online = false ; + ptr->mtcAlive_offline = true ; + ptr->mtcAlive_misses = 0 ; + ptr->mtcAlive_hits = 0 ; + ptr->mtcAlive_count = 0 ; + ptr->mtcAlive_purge = 0 ; + + ptr->offline_search_count = 0 ; + ptr->mtcAlive_mgmnt = false ; + ptr->mtcAlive_infra = false ; + ptr->reboot_cmd_ack_mgmnt = false ; + ptr->reboot_cmd_ack_infra = false ; + + ptr->offline_log_reported = true ; + ptr->online_log_reported = false ; + + ptr->dor_recovery_mode = false ; + ptr->was_dor_recovery_mode= false ; + ptr->dor_recovery_time = 0 ; + + mtcTimer_init ( ptr->mtcTimer, hostname, "mtc timer"); /* Init node's general mtc timer */ + mtcTimer_init ( ptr->insvTestTimer, hostname, "insv test timer"); + mtcTimer_init ( ptr->oosTestTimer, hostname, "oos test timer"); /* Init node's oos test timer */ + mtcTimer_init ( ptr->mtcSwact_timer, hostname, "mtcSwact timer"); /* Init node's mtcSwact timer */ + mtcTimer_init ( ptr->mtcCmd_timer, hostname, "mtcCmd timer"); /* Init node's mtcCmd timer */ + mtcTimer_init ( ptr->mtcConfig_timer, hostname, "mtcConfig timer"); /* Init node's mtcConfig timer */ + mtcTimer_init ( ptr->mtcAlive_timer , hostname, "mtcAlive timer"); /* Init node's mtcAlive timer */ + mtcTimer_init ( ptr->offline_timer, hostname, "offline timer"); /* Init node's FH offline timer */ + mtcTimer_init ( ptr->http_timer, hostname, "http timer" ); /* Init node's http timer */ + mtcTimer_init ( ptr->bm_timer, hostname, "bm timer" ); /* Init node's bm timer */ + mtcTimer_init ( ptr->bm_ping_info.timer,hostname,"ping timer" ); /* Init node's ping timer */ + mtcTimer_init ( ptr->bmc_access_timer, hostname, "bmc acc timer" ); /* Init node's bm access timer */ + mtcTimer_init ( ptr->host_services_timer, hostname, "host services timer" ); /* host services timer */ + + mtcTimer_init ( ptr->hwmon_powercycle.control_timer, hostname, "powercycle control timer"); + mtcTimer_init ( ptr->hwmon_powercycle.recovery_timer, hostname, "powercycle recovery timer"); + mtcTimer_init ( ptr->hwmon_reset.control_timer, hostname, "reset control timer"); + mtcTimer_init ( ptr->hwmon_reset.recovery_timer, hostname, "reset recovery timer"); + + mtcCmd_init ( ptr->host_services_req ); + mtcCmd_init ( ptr->mtcAlive_req ); + mtcCmd_init ( ptr->reboot_req ); + mtcCmd_init ( ptr->general_req ); + + ptr->configStage = MTC_CONFIG__START ; + ptr->swactStage = MTC_SWACT__START ; + ptr->offlineStage = MTC_OFFLINE__IDLE ; + ptr->onlineStage = MTC_ONLINE__START ; + ptr->addStage = MTC_ADD__START ; + ptr->delStage = MTC_DEL__START ; + ptr->recoveryStage = MTC_RECOVERY__START ; + ptr->insvTestStage = MTC_INSV_TEST__RUN ; /* Start wo initial delay */ + ptr->oosTestStage = MTC_OOS_TEST__LOAD_NEXT_TEST ; + ptr->resetProgStage = MTC_RESETPROG__START; + ptr->powerStage = MTC_POWER__DONE ; + ptr->powercycleStage = MTC_POWERCYCLE__DONE; + ptr->subStage = MTC_SUBSTAGE__DONE ; + ptr->reinstallStage = MTC_REINSTALL__DONE ; + ptr->resetStage = MTC_RESET__START ; + ptr->handlerStage.enable = MTC_ENABLE__START ; /* Enable and Disable */ + + ptr->oos_test_count = 0 ; + ptr->insv_test_count = 0 ; + ptr->insv_recovery_counter = 0 ; + + ptr->uptime = 0 ; + ptr->uptime_refresh_counter = 0 ; + ptr->node_unlocked_counter = 0 ; + + /* Default to a healthy config until mtcAlive messages prove otherwise */ + ptr->mtce_flags = ( MTC_FLAG__I_AM_CONFIGURED | + MTC_FLAG__I_AM_HEALTHY ) ; + + ptr->graceful_recovery_counter = 0 ; + ptr->health_threshold_counter = 0 ; + ptr->unknown_health_reported = false ; + ptr->mnfa_graceful_recovery = false ; + + /* initialize all board management variables for this host */ + ptr->bm_ip = NONE ; + ptr->bm_type = NONE ; + ptr->bm_un = NONE ; + ptr->bm_pw = NONE ; + + ptr->bm_provisioned = false ; /* assume not provisioned until learned */ + ptr->power_on = false ; /* learned on first BMC connection */ + bmc_access_data_init ( ptr ); /* init all the BMC access vars all modes */ + + /* init the alarm array only to have it updated later + * with current alarm severities */ + for ( int id = 0 ; id < MAX_ALARMS ; id++ ) + { + ptr->alarms[id] = FM_ALARM_SEVERITY_CLEAR ; + } + ptr->alarms_loaded = false ; + + ptr->cfgEvent.base = NULL ; + ptr->sysinvEvent.base= NULL ; + ptr->vimEvent.base = NULL ; + + ptr->httpReq.base = NULL ; + ptr->libEvent_done_fifo.clear(); + ptr->libEvent_work_fifo.clear(); + + ptr->oper_sequence = 0 ; + ptr->oper_failures = 0 ; + + ptr->mtcCmd_work_fifo.clear(); + ptr->mtcCmd_done_fifo.clear(); + + ptr->cfgEvent.conn = NULL ; + ptr->sysinvEvent.conn= NULL ; + ptr->vimEvent.conn = NULL ; + ptr->httpReq.conn = NULL ; + + ptr->cfgEvent.req = NULL ; + ptr->sysinvEvent.req = NULL ; + ptr->vimEvent.req = NULL ; + ptr->httpReq.req = NULL ; + + + ptr->cfgEvent.buf = NULL ; + ptr->sysinvEvent.buf = NULL ; + ptr->vimEvent.buf = NULL ; + ptr->httpReq.buf = NULL ; + + + ptr->stall_recovery_log_throttle = 0 ; + ptr->stall_monitor_log_throttle = 0 ; + ptr->unexpected_pulse_log_throttle = 0 ; + ptr->lookup_mismatch_log_throttle = 0 ; + + ptr->log_throttle = 0 ; + ptr->no_work_log_throttle = 0 ; + + /* Clear the degrade control structs */ + ptr->degrade_mask = DEGRADE_MASK_NONE ; + ptr->degraded_resources_list.clear () ; + ptr->pmond_ready = false ; + ptr->rmond_ready = false ; + ptr->hwmond_ready = false ; + ptr->hbsClient_ready = false ; + + ptr->toggle = false ; + + ptr->retries = 0 ; + ptr->http_retries_cur = 0 ; + ptr->cmd_retries = 0 ; + ptr->power_action_retries = 0 ; + + ptr->subf_enabled = false ; + + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + ptr->pulse_link[i].next_ptr = NULL ; + ptr->pulse_link[i].prev_ptr = NULL ; + ptr->monitor[i] = false ; + ptr->hbs_minor[i] = false ; + ptr->hbs_degrade[i] = false ; + ptr->hbs_failure[i] = false ; + ptr->max_count[i] = 0 ; + ptr->hbs_count[i] = 0 ; + ptr->hbs_minor_count[i] = 0 ; + ptr->b2b_misses_count[i] = 0 ; + ptr->hbs_degrade_count[i] = 0 ; + ptr->hbs_failure_count[i] = 0 ; + ptr->heartbeat_failed[i] = false; + } + + ptr->health = NODE_HEALTH_UNKNOWN ; + + ptr->pmon_missing_count = 0; + ptr->pmon_degraded = false ; + + /* now add it to the node list ; dealing with all conditions */ + + /* if the node list is empty add it to the head */ + if( head == NULL ) + { + head = ptr ; + tail = ptr ; + ptr->prev = NULL ; + ptr->next = NULL ; + } + else + { + /* link in the new_node to the tail of the node_list + * then mark the next field as the end of the node_list + * adjust tail to point to the last node + */ + tail->next = ptr ; + ptr->prev = tail ; + ptr->next = NULL ; + tail = ptr ; + } + + /* start with no action and an empty todo list */ + ptr->adminAction = MTC_ADMIN_ACTION__NONE ; + ptr->adminAction_todo_list.clear(); + + ptr->handlerStage.enable = MTC_ENABLE__START; + + hosts++ ; + + /* (re)build the Resource Reference Array */ + if ( heartbeat ) + build_rra (); + + return ptr ; +} + +struct nodeLinkClass::node* nodeLinkClass::getNode ( string hostname ) +{ + /* check for empty list condition */ + if ( head == NULL ) + return NULL ; + + if ( hostname.empty() ) + return static_cast(NULL); + + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( !hostname.compare ( ptr->hostname )) + { + return ptr ; + } + /* Node can be looked up by ip addr too */ + if ( !hostname.compare ( ptr->ip )) + { + return ptr ; + } + /* Node can be looked up by infra_ip addr too */ + if ( !hostname.compare ( ptr->infra_ip )) + { + return ptr ; + } + /* Node can be looked up by uuid too */ + if ( !hostname.compare ( ptr->uuid )) + { + return ptr ; + } + + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + return static_cast(NULL); +} + + +struct nodeLinkClass::node* nodeLinkClass::getEventBaseNode ( libEvent_enum request, + struct event_base * base_ptr) +{ + struct node * ptr = static_cast(NULL) ; + + /* check for empty list condition */ + if ( head == NULL ) + return NULL ; + + if ( base_ptr == NULL ) + return NULL ; + + for ( ptr = head ; ; ptr = ptr->next ) + { + switch ( request ) + { + case SYSINV_HOST_QUERY: + { + if ( ptr->sysinvEvent.base == base_ptr ) + { + hlog1 ("%s Found Sysinv Event Base Pointer (%p)\n", + ptr->hostname.c_str(), ptr->sysinvEvent.base); + + return ptr ; + } + } + case VIM_HOST_DISABLED: + case VIM_HOST_ENABLED: + case VIM_HOST_OFFLINE: + case VIM_HOST_FAILED: + { + if ( ptr->vimEvent.base == base_ptr ) + { + hlog1 ("%s Found vimEvent Base Pointer (%p) \n", + ptr->hostname.c_str(), ptr->vimEvent.base); + + return ptr ; + } + } + default: + ; + } /* End Switch */ + + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + + wlog ("%s Event Base Pointer (%p) - Not Found\n", + ptr->hostname.c_str(), base_ptr); + + return static_cast(NULL); + +} + +/* Find the node in the list of nodes being heartbeated and splice it out */ +int nodeLinkClass::remNode( string hostname ) +{ + int rc = -ENODEV ; + if ( hostname.c_str() == NULL ) + return -EFAULT ; + + if ( head == NULL ) + return -ENXIO ; + + struct node * ptr = getNode ( hostname ); + + if ( ptr == NULL ) + return -EFAULT ; + + mtcTimer_fini ( ptr->mtcTimer ); + mtcTimer_fini ( ptr->mtcSwact_timer ); + mtcTimer_fini ( ptr->mtcAlive_timer ); + mtcTimer_fini ( ptr->offline_timer ); + mtcTimer_fini ( ptr->mtcCmd_timer ); + mtcTimer_fini ( ptr->http_timer ); + + mtcTimer_fini ( ptr->insvTestTimer ); + mtcTimer_fini ( ptr->oosTestTimer ); + mtcTimer_fini ( ptr->mtcConfig_timer ); + mtcTimer_fini ( ptr->host_services_timer ); + mtcTimer_fini ( ptr->hwmon_powercycle.control_timer ); + mtcTimer_fini ( ptr->hwmon_powercycle.recovery_timer ); + mtcTimer_fini ( ptr->hwmon_reset.control_timer ); + mtcTimer_fini ( ptr->hwmon_reset.recovery_timer ); + + mtcTimer_fini ( ptr->bm_timer ); + mtcTimer_fini ( ptr->bmc_access_timer ); + mtcTimer_fini ( ptr->bm_ping_info.timer ); + +#ifdef WANT_PULSE_LIST_SEARCH_ON_DELETE + + /* Splice the node out of the pulse monitor list */ + + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + /* Does the pulse monitor list exist ? */ + if ( pulse_list[i].head_ptr != NULL ) + { + pulse_ptr = ptr ; + if ( pulse_list[i].head_ptr == pulse_ptr ) + { + if ( pulse_list[i].head_ptr == pulse_list[i].tail_ptr ) + { + pulse_list[i].head_ptr = NULL ; + pulse_list[i].tail_ptr = NULL ; + dlog ("Pulse: Single Node -> Head Case\n"); + } + else + { + dlog ("Pulse: Multiple Nodes -> Head Case\n"); + pulse_list[i].head_ptr = pulse_list[i].head_ptr->pulse_link[i].next_ptr ; + pulse_list[i].head_ptr->pulse_link[i].prev_ptr = NULL ; + } + } + else if ( pulse_list[i].tail_ptr == pulse_ptr ) + { + dlog ("Pulse: Multiple Node -> Tail Case\n"); + pulse_list[i].tail_ptr = pulse_list[i].tail_ptr->pulse_link[i].prev_ptr ; + pulse_list[i].tail_ptr->pulse_link[i].next_ptr = NULL ; + } + else + { + dlog ("Pulse: Multiple Node -> Full Splice Out\n"); + pulse_ptr->pulse_link[i].prev_ptr->pulse_link[i].next_ptr = pulse_ptr->pulse_link[i].next_ptr ; + pulse_ptr->pulse_link[i].next_ptr->pulse_link[i].prev_ptr = pulse_ptr->pulse_link[i].prev_ptr ; + } + } + } + +#endif + + /* If the node is the head node */ + if ( ptr == head ) + { + /* only one node in the list case */ + if ( head == tail ) + { + dlog ("Single Node -> Head Case\n"); + head = NULL ; + tail = NULL ; + delNode ( ptr ); + rc = PASS ; + } + else + { + dlog ("Multiple Nodes -> Head Case\n"); + head = head->next ; + head->prev = NULL ; + delNode ( ptr ); + rc = PASS ; + } + } + /* if not head but tail then there must be more than one + * node in the list so go ahead and chop the tail. + */ + else if ( ptr == tail ) + { + dlog ("Multiple Node -> Tail Case\n"); + tail = tail->prev ; + tail->next = NULL ; + delNode ( ptr ); + rc = PASS ; + } + else + { + dlog ("Multiple Node -> Full Splice Out\n"); + ptr->prev->next = ptr->next ; + ptr->next->prev = ptr->prev ; + delNode( ptr ); + rc = PASS ; + } + hosts-- ; + + /* (re)build the Resource Reference Array */ + if ( heartbeat ) + build_rra (); + + return rc ; +} + +/** + * Node state set'ers and get'ers + */ +mtc_nodeAdminAction_enum nodeLinkClass::get_adminAction ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr ) + return ( node_ptr->adminAction ); + + elog ("Failed getting 'admin action' for '%s'\n", hostname.c_str()); + return (MTC_ADMIN_ACTION__NONE); +} + +int nodeLinkClass::set_adminAction ( string & hostname, mtc_nodeAdminAction_enum adminAction ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr ) + { + adminActionChange ( node_ptr, adminAction ) ; + return (PASS) ; + } + elog ("Failed setting 'admin action' for '%s'\n", hostname.c_str()); + return (FAIL) ; +} + +mtc_nodeAdminState_enum nodeLinkClass::get_adminState ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr ) + return ( node_ptr->adminState ); + + elog ("Failed getting 'admin state' state for '%s'\n", hostname.c_str()); + return (MTC_ADMIN_STATE__LOCKED); +} + +int nodeLinkClass::set_adminState ( string & hostname, mtc_nodeAdminState_enum adminState ) +{ + int rc = FAIL ; + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr ) + { + rc = nodeLinkClass::adminStateChange ( node_ptr , adminState ); + } + if ( rc ) + { + elog ("Failed setting 'admin state' for '%s'\n", hostname.c_str()); + } + return (rc) ; +} + +mtc_nodeOperState_enum nodeLinkClass::get_operState ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr ) + return ( node_ptr->operState ); + + elog ("Failed getting 'operational state' for '%s'\n", hostname.c_str()); + return (MTC_OPER_STATE__DISABLED); +} + +int nodeLinkClass::set_operState ( string & hostname, mtc_nodeOperState_enum operState ) +{ + int rc = FAIL ; + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr ) + { + rc = nodeLinkClass::operStateChange ( node_ptr , operState ); + } + if ( rc ) + { + elog ("Failed setting 'operational state' for '%s'\n", hostname.c_str()); + } + return (rc) ; +} + +mtc_nodeAvailStatus_enum nodeLinkClass::get_availStatus ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr ) + return ( node_ptr->availStatus ); + + elog ("Failed getting 'availability status' for '%s'\n", hostname.c_str()); + return (MTC_AVAIL_STATUS__OFFDUTY); +} + +int nodeLinkClass::set_availStatus ( string & hostname, mtc_nodeAvailStatus_enum availStatus ) +{ + int rc = FAIL ; + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr != NULL ) + { + rc = nodeLinkClass::availStatusChange ( node_ptr , availStatus ); + } + if ( rc ) + { + elog ("Failed setting 'availability status' for '%s'\n", hostname.c_str()); + } + return (FAIL) ; +} + +/** Return a string representing the data port operational state + * according to the X.731 standard */ +string nodeLinkClass::get_operState_dport ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr ) + { + return ( operState_enum_to_str(node_ptr->operState_dport)); + } + elog ("%s failed getting 'operState_dport'\n", hostname.c_str()); + return (""); +} + +/** Return a string representing the data port availability status + * according to the X.731 standard */ +string nodeLinkClass::get_availStatus_dport ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr ) + { + return ( availStatus_enum_to_str(node_ptr->availStatus_dport)); + } + elog ("%s failed getting 'availStatus_dport'\n", hostname.c_str()); + return (""); +} + +void nodeLinkClass::print_node_info ( nodeLinkClass::node * ptr ) +{ + ilog ("%17s (%15s) %8s %8s-%-9s | %s-%s-%s | %s | %0X", + ptr->hostname.c_str(), + ptr->ip.c_str(), + mtc_nodeAdminState_str[ptr->adminState], + mtc_nodeOperState_str[ptr->operState], + mtc_nodeAvailStatus_str[ptr->availStatus], + ptr->subfunction_str.c_str(), + mtc_nodeOperState_str[ptr->operState_subf], + mtc_nodeAvailStatus_str[ptr->availStatus_subf], + mtc_nodeAdminAction_str [ptr->adminAction], + ptr->degrade_mask); +} + +void nodeLinkClass::print_node_info ( void ) +{ + if ( maintenance ) + { + syslog ( LOG_INFO,"+--------------------------------------+-------------------+-----------------+\n"); + for ( struct node * ptr = head ; ptr != NULL ; ptr = ptr->next ) + { + if ( LARGE_SYSTEM ) + { + syslog ( LOG_INFO, "| %36s | %17s | %15s | %8s %8s-%s", + ptr->uuid.length() ? ptr->uuid.c_str() : "", + ptr->hostname.c_str(), + ptr->ip.c_str(), + mtc_nodeAdminState_str[ptr->adminState], + mtc_nodeOperState_str[ptr->operState], + mtc_nodeAvailStatus_str[ptr->availStatus]); + } + else + { + syslog ( LOG_INFO, "| %36s | %17s | %15s | %8s %8s-%-9s | %s-%s-%s", + ptr->uuid.length() ? ptr->uuid.c_str() : "", + ptr->hostname.c_str(), + ptr->ip.c_str(), + mtc_nodeAdminState_str[ptr->adminState], + mtc_nodeOperState_str[ptr->operState], + mtc_nodeAvailStatus_str[ptr->availStatus], + ptr->subfunction_str.c_str(), + mtc_nodeOperState_str[ptr->operState_subf], + mtc_nodeAvailStatus_str[ptr->availStatus_subf]); + } + // syslog ( LOG_INFO, "\n"); + } + syslog ( LOG_INFO, "+--------------------------------------+-------------------+-----------------+\n\n"); + } + + if ( heartbeat ) + { + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + if (( i == INFRA_IFACE ) && ( infra_network_provisioned == false )) + continue ; + + syslog ( LOG_INFO, "+--------------+-----+-----+------+-----+------+------------+-----------------+\n"); + syslog ( LOG_INFO, "| %s: %3d | Mon | Mis | Max | Deg | Fail | Pulses | %s (%4d) |\n" , + get_iface_name_str ((iface_enum)i), hosts, hbs_disabled ? "DISABLED" : "Enabled ", hbs_pulse_period ); + syslog ( LOG_INFO, "+--------------+-----+-----+------+-----+------+------------+-----------------+\n"); + + for ( struct node * ptr = head ; ptr != NULL ; ptr = ptr->next ) + { + syslog ( LOG_INFO, "| %-12s | %c | %3i | %4i | %3i | %4i | %8x | %d msec\n", + ptr->hostname.c_str(), + ptr->monitor[i] ? 'Y' : 'n', + ptr->b2b_misses_count[i], + ptr->max_count[i], + ptr->hbs_degrade_count[i], + ptr->hbs_failure_count[i], + ptr->hbs_count[i], + hbs_pulse_period ); + } + } + syslog ( LOG_INFO, "+--------------+-----+-----+------+-----+------+------------+-----------------+\n"); + } +} + +/** Convert the supplied string to a valid maintenance Admin State enum */ +mtc_nodeAdminState_enum nodeLinkClass::adminState_str_to_enum ( const char * admin_ptr ) +{ + /* Default state */ + mtc_nodeAdminState_enum temp = MTC_ADMIN_STATE__LOCKED; + + if ( admin_ptr == NULL ) + { + wlog ("Administrative state is Null\n"); + } + else if ( !strcmp ( &mtc_nodeAdminState_str[MTC_ADMIN_STATE__UNLOCKED][0], admin_ptr )) + temp = MTC_ADMIN_STATE__UNLOCKED ; + + return (temp) ; +} + +/** Convert the supplied string to a valid maintenance Oper State enum */ +mtc_nodeOperState_enum nodeLinkClass::operState_str_to_enum ( const char * oper_ptr ) +{ + /* Default state */ + mtc_nodeOperState_enum temp = MTC_OPER_STATE__DISABLED; + + if ( oper_ptr == NULL ) + { + wlog ("Operation state is Null\n"); + } + else if ( !strcmp ( &mtc_nodeOperState_str[MTC_ADMIN_STATE__UNLOCKED][0], oper_ptr )) + temp = MTC_OPER_STATE__ENABLED ; + + return (temp) ; +} + +/** Convert the supplied string to a valid maintenance Avail Status enum */ +mtc_nodeAvailStatus_enum nodeLinkClass::availStatus_str_to_enum ( const char * avail_ptr ) +{ + /* Default state */ + mtc_nodeAvailStatus_enum temp = MTC_AVAIL_STATUS__OFFDUTY; + + /* Could do this as a loop but this is more resiliant to enum changes */ + /* TODO: consider using a paired list */ + if ( avail_ptr == NULL ) + { + wlog ("Availability status is Null\n"); + } + else if ( !strcmp ( &mtc_nodeAvailStatus_str[MTC_AVAIL_STATUS__AVAILABLE][0], avail_ptr )) + temp = MTC_AVAIL_STATUS__AVAILABLE ; + else if ( !strcmp ( &mtc_nodeAvailStatus_str[MTC_AVAIL_STATUS__FAILED][0], avail_ptr )) + temp = MTC_AVAIL_STATUS__FAILED ; + else if ( !strcmp ( &mtc_nodeAvailStatus_str[MTC_AVAIL_STATUS__INTEST][0], avail_ptr )) + temp = MTC_AVAIL_STATUS__INTEST ; + else if ( !strcmp ( &mtc_nodeAvailStatus_str[MTC_AVAIL_STATUS__DEGRADED][0], avail_ptr )) + temp = MTC_AVAIL_STATUS__DEGRADED ; + else if ( !strcmp ( &mtc_nodeAvailStatus_str[MTC_AVAIL_STATUS__OFFLINE][0], avail_ptr )) + temp = MTC_AVAIL_STATUS__OFFLINE ; + else if ( !strcmp ( &mtc_nodeAvailStatus_str[MTC_AVAIL_STATUS__ONLINE][0], avail_ptr )) + temp = MTC_AVAIL_STATUS__ONLINE ; + else if ( !strcmp ( &mtc_nodeAvailStatus_str[MTC_AVAIL_STATUS__POWERED_OFF][0], avail_ptr )) + temp = MTC_AVAIL_STATUS__POWERED_OFF ; + + return (temp) ; +} + +/** Convert the supplied enum to the corresponding Admin State string */ +string nodeLinkClass::adminAction_enum_to_str ( mtc_nodeAdminAction_enum val ) +{ + if ( val < MTC_ADMIN_ACTIONS ) + { + string adminAction_string = &mtc_nodeAdminAction_str[val][0] ; + return ( adminAction_string ); + } + return ( NULL ); +} + +/** Convert the supplied enum to the corresponding Admin State string */ +string nodeLinkClass::adminState_enum_to_str ( mtc_nodeAdminState_enum val ) +{ + if ( val < MTC_ADMIN_STATES ) + { + string adminState_string = &mtc_nodeAdminState_str[val][0] ; + return ( adminState_string ); + } + return ( NULL ); +} +/** Convert the supplied enum to the corresponding Oper State string */ +string nodeLinkClass::operState_enum_to_str ( mtc_nodeOperState_enum val ) +{ + if ( val < MTC_OPER_STATES ) + { + string operState_string = &mtc_nodeOperState_str[val][0] ; + return ( operState_string ); + } + return ( NULL ); +} +/** Convert the supplied enum to the corresponding Avail Status string */ +string nodeLinkClass::availStatus_enum_to_str ( mtc_nodeAvailStatus_enum val ) +{ + if ( val < MTC_AVAIL_STATUS ) + { + string availStatus_string = &mtc_nodeAvailStatus_str[val][0] ; + return ( availStatus_string ); + } + return ( NULL ); +} + +void nodeLinkClass::host_print ( struct nodeLinkClass::node * node_ptr ) +{ + string uuid ; + + if ( daemon_get_cfg_ptr()->debug_level == 1 ) + { + const char bar [] = { "+-------------+--------------------------------------+" }; + const char uar [] = { "+- Add Host -+--------------------------------------+" }; + syslog ( LOG_INFO, "%s\n", &uar[0]); + syslog ( LOG_INFO, "| uuid : %s\n", node_ptr->uuid.c_str()); + syslog ( LOG_INFO, "| main : %s\n", node_ptr->function_str.c_str()); + syslog ( LOG_INFO, "| subf : %s\n", node_ptr->subfunction_str.c_str()); + syslog ( LOG_INFO, "| name : %s\n", node_ptr->hostname.c_str()); + syslog ( LOG_INFO, "| ip : %s\n", node_ptr->ip.c_str()); + syslog ( LOG_INFO, "| admin : %s\n", adminState_enum_to_str (node_ptr->adminState).c_str()); + syslog ( LOG_INFO, "| oper : %s\n", operState_enum_to_str (node_ptr->operState).c_str()); + syslog ( LOG_INFO, "| avail_subf: %s\n", availStatus_enum_to_str (node_ptr->availStatus_subf).c_str()); + syslog ( LOG_INFO, "| oper_subf: %s\n", operState_enum_to_str (node_ptr->operState_subf).c_str()); + syslog ( LOG_INFO, "%s\n", &bar[0]); + } + /* ec3624cb-d80f-4e8b-a6d7-0c11f6937f6a */ + /* Just print the last 4 chars of the uuid */ + if ( node_ptr->uuid.empty() ) + uuid = "---" ; + else + uuid = node_ptr->uuid.substr(32) ; + + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) + { + ilog ("%s '%s' %s %s-%s (%s)\n", node_ptr->hostname.c_str(), + functions.c_str(), + node_ptr->ip.c_str(), + adminState_enum_to_str (node_ptr->adminState).c_str(), + operState_enum_to_str (node_ptr->operState).c_str(), + uuid.c_str()); + } + else + { + ilog ("%s '%s' %s %s-%s-%s (%s)\n",node_ptr->hostname.c_str(), + functions.c_str(), + node_ptr->ip.c_str(), + adminState_enum_to_str (node_ptr->adminState).c_str(), + operState_enum_to_str (node_ptr->operState).c_str(), + availStatus_enum_to_str(node_ptr->availStatus).c_str(), + uuid.c_str()); + } +} + + +/** Host Administrative State Change public member function */ +int nodeLinkClass::admin_state_change ( string hostname, + string newAdminState ) +{ + int rc = FAIL ; + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode( hostname ); + if ( node_ptr ) + { + if ( newAdminState.empty() ) + { + rc = FAIL_STRING_EMPTY ; + } + else if ( adminState_str_to_enum ( newAdminState.data() ) == node_ptr->adminState ) + { + rc = PASS ; + } + else if ( adminStateOk ( newAdminState ) ) + { + clog ("%s %s (from %s)\n", hostname.c_str(), newAdminState.c_str(), adminState_enum_to_str (node_ptr->adminState).c_str()); + node_ptr->adminState = adminState_str_to_enum ( newAdminState.data() ); + rc = PASS ; + } + else + { + elog ("%s Invalid 'admin' state (%s)\n", + hostname.c_str(), newAdminState.c_str() ); + } + } + else + { + wlog ("Cannot change 'admin' state for unknown hostname (%s)\n", + hostname.c_str()); + } + return (rc); +} + +/** Host Operational State Change public member function */ +int nodeLinkClass::oper_state_change ( string hostname, string newOperState ) +{ + int rc = FAIL ; + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode( hostname ); + if ( node_ptr ) + { + if ( newOperState.empty() ) + { + rc = FAIL_STRING_EMPTY ; + } + else if ( operState_str_to_enum ( newOperState.data() ) == node_ptr->operState ) + { + rc = PASS ; + } + else if ( operStateOk ( newOperState ) ) + { + mtc_nodeOperState_enum oper = operState_str_to_enum ( newOperState.data() ); + if (( node_ptr->operState == MTC_OPER_STATE__DISABLED ) && + ( oper == MTC_OPER_STATE__ENABLED )) + { + mtcAlarm_log ( hostname, MTC_LOG_ID__STATUSCHANGE_ENABLED ); + } + + if (( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + ( oper == MTC_OPER_STATE__DISABLED )) + { + mtcAlarm_log ( hostname, MTC_LOG_ID__STATUSCHANGE_DISABLED ); + } + clog ("%s %s (from %s)\n", hostname.c_str(), newOperState.c_str(), operState_enum_to_str (node_ptr->operState).c_str()); + node_ptr->operState = oper ; + rc = PASS ; + } + else + { + elog ("%s Invalid 'oper' state (%s)\n", + hostname.c_str(), newOperState.c_str() ); + } + } + else + { + wlog ("Cannot change 'oper' state for unknown hostname (%s)\n", + hostname.c_str() ); + } + return (rc); +} + +/** Host Availability Status Change public member function */ +int nodeLinkClass::avail_status_change ( string hostname, + string newAvailStatus ) +{ + int rc = FAIL ; + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode( hostname ); + if ( node_ptr ) + { + if ( newAvailStatus.empty() ) + { + rc = FAIL_STRING_EMPTY ; + } + else if ( availStatus_str_to_enum ( newAvailStatus.data() ) == node_ptr->availStatus ) + { + rc = PASS ; + } + else if ( availStatusOk ( newAvailStatus )) + { + mtc_nodeAvailStatus_enum avail = availStatus_str_to_enum ( newAvailStatus.data() ); + + /* if we go to the failed state then clear all mtcAlive counts + * so that the last ones don't look like we are online when we + * might not be - we should relearn the on/off line state */ + if (( node_ptr->availStatus != MTC_AVAIL_STATUS__FAILED ) && + ( avail == MTC_AVAIL_STATUS__FAILED )) + { + node_ptr->mtcAlive_misses = 0 ; + node_ptr->mtcAlive_hits = 0 ; + node_ptr->mtcAlive_gate = false ; + } + + /* check for need to generate power on log */ + if (( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ) && + ( avail != MTC_AVAIL_STATUS__POWERED_OFF )) + { + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__POWERON ) + { + mtcAlarm_log ( hostname, MTC_LOG_ID__COMMAND_MANUAL_POWER_ON ); + } + else + { + mtcAlarm_log ( hostname, MTC_LOG_ID__COMMAND_AUTO_POWER_ON ); + } + } + + /* check for need to generate power off log */ + if (( node_ptr->availStatus != MTC_AVAIL_STATUS__POWERED_OFF ) && + ( avail == MTC_AVAIL_STATUS__POWERED_OFF )) + { + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__POWEROFF ) + { + mtcAlarm_log ( hostname, MTC_LOG_ID__COMMAND_MANUAL_POWER_OFF ); + } + else + { + mtcAlarm_log ( hostname, MTC_LOG_ID__COMMAND_AUTO_POWER_OFF ); + } + } + + /* check for need to generate online log */ + if (( node_ptr->availStatus != MTC_AVAIL_STATUS__ONLINE ) && + ( avail == MTC_AVAIL_STATUS__ONLINE )) + { + if ( node_ptr->offline_log_reported == true ) + { + mtcAlarm_log ( hostname, MTC_LOG_ID__STATUSCHANGE_ONLINE ); + node_ptr->offline_log_reported = false ; + node_ptr->online_log_reported = true ; + } + } + + /* check for need to generate offline log */ + if (( node_ptr->availStatus != MTC_AVAIL_STATUS__OFFLINE ) && + ( avail == MTC_AVAIL_STATUS__OFFLINE )) + { + if ( node_ptr->online_log_reported == true ) + { + mtcAlarm_log ( hostname, MTC_LOG_ID__STATUSCHANGE_OFFLINE ); + node_ptr->offline_log_reported = true ; + node_ptr->online_log_reported = false ; + } + } + + /* If the availability status is moving away from off or online then + * be sure we cancel the mtcAlive timer */ + if ((( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE )) && + (( avail != MTC_AVAIL_STATUS__OFFLINE ) && + ( avail != MTC_AVAIL_STATUS__ONLINE ))) + { + /* Free the mtc timer if in use */ + if ( node_ptr->mtcAlive_timer.tid ) + { + tlog ("%s Stopping mtcAlive timer\n", node_ptr->hostname.c_str()); + mtcTimer_stop ( node_ptr->mtcAlive_timer ); + node_ptr->mtcAlive_timer.ring = false ; + node_ptr->mtcAlive_timer.tid = NULL ; + } + node_ptr->onlineStage = MTC_ONLINE__START ; + } + + clog ("%s %s (from %s)\n", hostname.c_str(), newAvailStatus.c_str(), availStatus_enum_to_str (node_ptr->availStatus).c_str()); + node_ptr->availStatus = avail ; + rc = PASS ; + } + else + { + elog ("%s Invalid 'avail' status (%s)\n", + hostname.c_str(), newAvailStatus.c_str() ); + } + } + else + { + wlog ("Cannot change 'avail' status for unknown hostname (%s)\n", + hostname.c_str()); + } + return (rc); +} + +/** Set host to the disabled failed state and generate the disabled-failed customer log + * This interface allows the disabled-failed state change to be combined so as to avoid + * setting a 'disabled' AND 'disabled-failed' customer log for the failure case */ +int nodeLinkClass::failed_state_change ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = FAIL_NULL_POINTER ; + if ( node_ptr ) + { + node_ptr->availStatus = MTC_AVAIL_STATUS__FAILED ; + node_ptr->operState = MTC_OPER_STATE__DISABLED ; + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__STATUSCHANGE_FAILED ); + rc = PASS ; + } + else + { + slog ("Cannot change to disabled-failed state for null node pointer\n"); + } + return (rc); +} + + +/***************************************************************************** + * + * Name : lazy_graceful_fs_reboot + * + * Description: Issue a lazy reboot and signal SM to shutdown services + * + * Assumptions: No return + * + *****************************************************************************/ + +int nodeLinkClass::lazy_graceful_fs_reboot ( struct nodeLinkClass::node * node_ptr ) +{ + /* issue a lazy reboot to the mtcClient and as a backup launch a sysreq reset thresd */ + send_mtc_cmd ( node_ptr->hostname, MTC_CMD_LAZY_REBOOT, MGMNT_INTERFACE ) ; + fork_sysreq_reboot ( daemon_get_cfg_ptr()->failsafe_shutdown_delay ); + + /* loop until reboot */ + for ( ; ; ) + { + for ( int i = 0 ; i < LAZY_REBOOT_RETRY_DELAY_SECS ; i++ ) + { + daemon_signal_hdlr (); + sleep (MTC_SECS_1); + + /* give sysinv time to handle the response and get its state in order */ + if ( i == SM_NOTIFY_UNHEALTHY_DELAY_SECS ) + { + daemon_log ( SMGMT_UNHEALTHY_FILE, "AIO shutdown request" ); + } + } + /* Should never get there but if we do resend the reboot request + * but this time not Lazy */ + send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, MGMNT_INTERFACE ) ; + } + return (FAIL); +} + + +/* Generate a log and a critical alarm if the node config failed */ +int nodeLinkClass::alarm_config_failure ( struct nodeLinkClass::node * node_ptr ) +{ + if ( (node_ptr->degrade_mask & DEGRADE_MASK_CONFIG) == 0 ) + { + node_ptr->degrade_mask |= DEGRADE_MASK_CONFIG ; + } + + if ( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CRITICAL ) + { + elog ("%s critical config failure\n", node_ptr->hostname.c_str()); + + mtcAlarm_critical ( node_ptr->hostname, MTC_ALARM_ID__CONFIG ); + node_ptr->alarms[MTC_ALARM_ID__CONFIG] = FM_ALARM_SEVERITY_CRITICAL ; + } + return (PASS); +} + +/* Clear the config alarm and degrade flag */ +int nodeLinkClass::alarm_config_clear ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr->degrade_mask & DEGRADE_MASK_CONFIG ) + { + node_ptr->degrade_mask &= ~DEGRADE_MASK_CONFIG ; + } + + if ( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CLEAR ) + { + ilog ("%s config alarm clear\n", node_ptr->hostname.c_str()); + + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__CONFIG ); + node_ptr->alarms[MTC_ALARM_ID__CONFIG] = FM_ALARM_SEVERITY_CLEAR ; + } + return (PASS); +} + +/* Generate a log and a critical alarm if the node enable failed */ +int nodeLinkClass::alarm_enabled_failure ( struct nodeLinkClass::node * node_ptr ) +{ + if ( (node_ptr->degrade_mask & DEGRADE_MASK_ENABLE) == 0 ) + { + node_ptr->degrade_mask |= DEGRADE_MASK_ENABLE ; + } + + if ( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_CRITICAL ) + { + elog ("%s critical enable failure\n", node_ptr->hostname.c_str()); + + mtcAlarm_critical ( node_ptr->hostname, MTC_ALARM_ID__ENABLE ); + node_ptr->alarms[MTC_ALARM_ID__ENABLE] = FM_ALARM_SEVERITY_CRITICAL ; + } + return (PASS); +} + +/* + * Generate a major (in-service) enable alarm + * - don't downgrade the alarm from critical + * - do nothing if the alarm is already at major level + * + **/ +int nodeLinkClass::alarm_insv_failure ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr ) + { + if ( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_MAJOR ) + { + if ( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_CRITICAL ) + { + elog ("%s major inservice enable failure\n", node_ptr->hostname.c_str()); + node_ptr->degrade_mask |= DEGRADE_MASK_INSV_TEST ; + mtcAlarm_major ( node_ptr->hostname, MTC_ALARM_ID__ENABLE ); + node_ptr->alarms[MTC_ALARM_ID__ENABLE] = FM_ALARM_SEVERITY_MAJOR ; + } + } + } + return (PASS); +} + +/* Clear the enable alarm and degrade flag */ +int nodeLinkClass::alarm_enabled_clear ( struct nodeLinkClass::node * node_ptr, bool force ) +{ + if ( node_ptr->degrade_mask & DEGRADE_MASK_ENABLE ) + { + node_ptr->degrade_mask &= ~DEGRADE_MASK_ENABLE ; + } + + /* The inservice test degrade flag needs to be cleared too. */ + if ( node_ptr->degrade_mask & DEGRADE_MASK_INSV_TEST ) + { + node_ptr->degrade_mask &= ~DEGRADE_MASK_INSV_TEST ; + } + + if (( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_CLEAR ) || + ( force == true )) + { + ilog ("%s enable alarm clear\n", node_ptr->hostname.c_str()); + + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__ENABLE ); + node_ptr->alarms[MTC_ALARM_ID__ENABLE] = FM_ALARM_SEVERITY_CLEAR ; + } + return (PASS); +} + +/* Generate compute subfunction failure alarm */ +int nodeLinkClass::alarm_compute_failure ( struct nodeLinkClass::node * node_ptr, EFmAlarmSeverityT sev ) +{ + if ( (node_ptr->degrade_mask & DEGRADE_MASK_SUBF) == 0 ) + { + node_ptr->degrade_mask |= DEGRADE_MASK_SUBF ; + } + + if ( node_ptr->alarms[MTC_ALARM_ID__CH_COMP] != sev ) + { + if ( sev == FM_ALARM_SEVERITY_CRITICAL ) + { + elog ("%s critical compute subf failure\n", node_ptr->hostname.c_str()); + mtcAlarm_critical ( node_ptr->hostname, MTC_ALARM_ID__CH_COMP ); + } + else + { + elog ("%s major compute subf failure\n", node_ptr->hostname.c_str()); + mtcAlarm_major ( node_ptr->hostname, MTC_ALARM_ID__CH_COMP ); + } + node_ptr->alarms[MTC_ALARM_ID__CH_COMP] = sev ; + } + return (PASS); +} + +/* Clear the enable alarm if is at the Major severity level */ +int nodeLinkClass::alarm_insv_clear ( struct nodeLinkClass::node * node_ptr, bool force ) +{ + if ( node_ptr->degrade_mask & DEGRADE_MASK_INSV_TEST ) + { + node_ptr->degrade_mask &= ~DEGRADE_MASK_INSV_TEST ; + } + + if (( node_ptr->alarms[MTC_ALARM_ID__ENABLE] == FM_ALARM_SEVERITY_MAJOR ) || + ( force == true )) + { + ilog ("%s %s enable alarm clear\n", node_ptr->hostname.c_str(), force ? "force" : "major" ); + + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__ENABLE ); + node_ptr->alarms[MTC_ALARM_ID__ENABLE] = FM_ALARM_SEVERITY_CLEAR ; + } + + if ( node_ptr->alarms[MTC_ALARM_ID__ENABLE] == FM_ALARM_SEVERITY_CLEAR ) + { + if ( node_ptr->degrade_mask & DEGRADE_MASK_ENABLE ) + { + node_ptr->degrade_mask &= ~DEGRADE_MASK_ENABLE ; + } + } + + return (PASS); +} + +/* Clear the compute subfunction alarm and degrade flag */ +int nodeLinkClass::alarm_compute_clear ( struct nodeLinkClass::node * node_ptr, bool force ) +{ + if ( node_ptr->degrade_mask & DEGRADE_MASK_SUBF ) + { + node_ptr->degrade_mask &= ~DEGRADE_MASK_SUBF ; + } + + if (( node_ptr->alarms[MTC_ALARM_ID__CH_COMP] != FM_ALARM_SEVERITY_CLEAR ) || + ( force == true )) + { + ilog ("%s major enable alarm clear\n", node_ptr->hostname.c_str()); + + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__CH_COMP ); + node_ptr->alarms[MTC_ALARM_ID__CH_COMP] = FM_ALARM_SEVERITY_CLEAR ; + } + return (PASS); +} + +/** Host Operational State Change public member function */ +int nodeLinkClass::oper_subf_state_change ( string hostname, string newOperState ) +{ + int rc = FAIL ; + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode( hostname ); + if ( node_ptr ) + { + if ( newOperState.empty() ) + { + rc = FAIL_STRING_EMPTY ; + } + else if ( operStateOk ( newOperState ) ) + { + node_ptr->operState_subf = operState_str_to_enum ( newOperState.data() ); + rc = PASS ; + } + else + { + elog ("%s Invalid subfunction 'oper' state (%s)\n", + hostname.c_str(), newOperState.c_str() ); + } + } + else + { + wlog ("Cannot change subfuction 'oper' state for unknown hostname (%s)\n", + hostname.c_str() ); + } + return (rc); +} + +/** Host Subfunction Availability Status Change public member function */ +int nodeLinkClass::avail_subf_status_change ( string hostname, string newAvailStatus ) +{ + int rc = FAIL ; + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode( hostname ); + if ( node_ptr ) + { + if ( newAvailStatus.empty() ) + { + rc = FAIL_STRING_EMPTY ; + } + else if ( availStatusOk ( newAvailStatus ) ) + { + node_ptr->availStatus_subf = availStatus_str_to_enum ( newAvailStatus.data() ); + rc = PASS ; + } + else + { + elog ("%s Invalid subfunction 'avail' status (%s)\n", + hostname.c_str(), newAvailStatus.c_str() ); + } + } + else + { + wlog ("Cannot change subfunction 'avail' status for unknown hostname (%s)\n", + hostname.c_str()); + } + return (rc); +} + + + +/** Update the mtce key with value */ +int nodeLinkClass::update_key_value ( string hostname, string key , string value ) +{ + int rc = PASS ; + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode( hostname ); + if ( node_ptr ) + { + /* TODO: Add all database members to this utility */ + if ( !key.compare(MTC_JSON_INV_BMIP) ) + node_ptr->bm_ip = value ; + else if ( !key.compare(MTC_JSON_INV_TASK) ) + node_ptr->task = value ; + else + { + wlog ("%s Unsupported key '%s' update with value '%s'\n", + hostname.c_str(), key.c_str(), value.c_str()); + rc = FAIL_BAD_PARM ; + } + } + else + { + wlog ("Cannot change 'admin' state for unknown hostname (%s)\n", + hostname.c_str()); + } + return (rc); +} + +int nodeLinkClass::del_host ( const string uuid ) +{ + string hostname = "unknown" ; + + int rc = FAIL_DEL_UNKNOWN ; + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode( uuid ); + if ( node_ptr ) + { + hostname = node_ptr->hostname ; + + if ( node_ptr->mtcTimer.tid ) + mtcTimer_stop ( node_ptr->mtcTimer ); + + if ( nodeLinkClass::maintenance == true ) + { + + if ( node_ptr->bm_provisioned == true ) + { + set_bm_prov ( node_ptr, false); + } + + doneQueue_purge ( node_ptr ); + workQueue_purge ( node_ptr ); + mtcCmd_doneQ_purge ( node_ptr ); + mtcCmd_workQ_purge ( node_ptr ); + + /* Cleanup if this is the inactive controller */ + if ( !node_ptr->hostname.compare(inactive_controller_hostname)) + { + inactive_controller_hostname = "" ; + } + } + rc = rem_host ( hostname ); + if ( rc == PASS ) + { + plog ("%s Deleted\n", hostname.c_str()); + + print_node_info(); + } + else + { + elog ("%s Delete Failed (rc:%d)\n", hostname.c_str(), rc ); + } + this->host_deleted = true ; + } + else + { + wlog ("Unknown uuid: %s\n", uuid.c_str()); + } + return (rc); +} + +int nodeLinkClass::set_host_failed ( node_inv_type & inv ) +{ + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode( inv.name ); + if(NULL == node_ptr) + { + return FAIL_UNKNOWN_HOSTNAME; + } + + if( (node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED) && + (node_ptr->operState == MTC_OPER_STATE__ENABLED) ) + { + elog( "%s is being force failed by SM", inv.name.c_str() ); + this->force_full_enable (node_ptr); + } + return PASS; +} + +int nodeLinkClass::mod_host ( node_inv_type & inv ) +{ + int rc = PASS ; + bool modify = false ; + bool modify_bm = false ; + + print_inv (inv); + + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode( inv.uuid ); + if ( node_ptr ) + { + dlog ("%s Modify\n", node_ptr->hostname.c_str()); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__TRANSLATE_LOCK_TO_FORCELOCK , node_ptr->hostname ) ) + { + if ( inv.action.compare("lock") == 0 ) + { + slog ("%s FIT action from 'lock' to 'force-lock'\n", node_ptr->hostname.c_str()); + inv.action = "force-lock"; + } + } +#endif + + /* Handle Administrative state mismatch between SYSINV and Maintenance */ + if ( strcmp ( mtc_nodeAdminState_str[node_ptr->adminState], inv.admin.data())) + { + plog ("%s Modify 'Administrative' state %s -> %sed\n", node_ptr->hostname.c_str(), + mtc_nodeAdminState_str[node_ptr->adminState], inv.action.c_str()); + + modify = true ; /* we have a delta */ + + /* Local admin state takes precedence */ + if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) + { + /* handle a lock request while unlocked */ + if ( !inv.action.compare ( "lock" ) ) + { + if ( node_ptr->dor_recovery_mode == true ) + node_ptr->dor_recovery_mode = false ; + + /* Set action to LOCK and let the FSM run the disable handler */ + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__LOCK ); + } + else + { + ilog ("%s Already Unlocked ; no action required\n", node_ptr->hostname.c_str() ); + } + } + else + { + if ( node_ptr->patching == true ) + { + wlog ("%s cannot unlock host while patching is in progress\n", node_ptr->hostname.c_str()); + rc = FAIL_PATCH_INPROGRESS ; + } + else + { + /* generate command=unlock log */ + mtcAlarm_log ( inv.name, MTC_LOG_ID__COMMAND_UNLOCK ); + + /* Set action to UNLOCK and let the FSM run the enable handler */ + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__UNLOCK ); + } + } + } + else if ( (!inv.action.empty()) && (inv.action.compare ( "none" ))) + { + dlog ("%s Modify Action is '%s'\n", node_ptr->hostname.c_str(), inv.action.c_str() ); + node_ptr->action = inv.action ; + modify = true ; /* we have a delta */ + + /* Do not permit administrative actions while Swact is in progress */ + /* Note: There is a self corrective clause in the mtcTimer_handler + * that will auto clear this flag if it gets stuck for 5 minutes */ + if ( smgrEvent.mutex ) + { + elog ("%s Rejecting '%s' - Swact Operation in-progress\n", + node_ptr->hostname.c_str(), inv.action.c_str()); + rc = FAIL_SWACT_INPROGRESS ; + } + + else if (!inv.action.compare ( "force-lock" )) + { + /* TODO: Create customer log of this action */ + ilog ("%s Force Lock Action\n", node_ptr->hostname.c_str()); + + if ( node_ptr->dor_recovery_mode == true ) + node_ptr->dor_recovery_mode = false ; + + if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) + { + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__FORCE_LOCK ) + { + ilog ("%s Force Lock Action - already in progress ...\n", node_ptr->hostname.c_str()); + } + else + { + /* generate command=forcelock log */ + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_FORCE_LOCK ); + + /* Set action to LOCK and let the FSM run the disable handler */ + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__FORCE_LOCK ); + } + } + else + { + wlog ("%s Already Locked\n", node_ptr->hostname.c_str() ); + } + } + else if (!inv.action.compare ( "lock" )) + { + if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) + { + if ( node_ptr->dor_recovery_mode == true ) + node_ptr->dor_recovery_mode = false ; + + /* Set action to LOCK and let the FSM run the disable handler */ + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__LOCK ); + } + else + { + wlog ("%s Already Locked\n", node_ptr->hostname.c_str() ); + } + } + else if (!inv.action.compare ( "unlock" )) + { + if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) + { + if ( node_ptr->patching == true ) + { + wlog ("%s cannot unlock host while patching is in progress\n", node_ptr->hostname.c_str()); + rc = FAIL_PATCH_INPROGRESS ; + } + else + { + recovery_ctrl_init ( node_ptr->hwmon_reset ); + recovery_ctrl_init ( node_ptr->hwmon_powercycle ); + + /* Set action to UNLOCK and let the FSM run the enable handler */ + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__UNLOCK ); + + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__LOCK ); + node_ptr->alarms[MTC_ALARM_ID__LOCK] = FM_ALARM_SEVERITY_CLEAR ; + + /* generate command=unlock log */ + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_UNLOCK ); + } + } + else + { + wlog ("%s Already UnLocked\n", node_ptr->hostname.c_str() ); + } + } + else if ( !inv.action.compare ( "swact" ) || + !inv.action.compare ( "force-swact" ) ) + { + if ( !((get_host_function_mask ( inv.type ) & CONTROLLER_TYPE) == CONTROLLER_TYPE) ) + { + elog ("%s Rejecting '%s' - Swact only supported for Controllers\n", + node_ptr->hostname.c_str(), + inv.action.c_str()); + rc = FAIL_NODETYPE ; + } + else if ( nodeLinkClass::is_inactive_controller_main_insv() != true ) + { + elog ("%s Rejecting '%s' - No In-Service Mate\n", + node_ptr->hostname.c_str(), + inv.action.c_str()); + rc = FAIL_SWACT_NOINSVMATE ; + } + else if ( node_ptr->adminAction != MTC_ADMIN_ACTION__NONE ) + { + elog ("%s Rejecting '%s' - '%s' In-Progress\n", + node_ptr->hostname.c_str(), + inv.action.c_str(), + get_adminAction_str( node_ptr->adminAction )); + rc = FAIL_OPER_INPROGRESS ; + } + else if ( smgrEvent.mutex ) + { + elog ("%s Rejecting '%s' - Operation in-progress\n", + node_ptr->hostname.c_str(), + inv.action.c_str()); + rc = FAIL_SWACT_INPROGRESS ; + } + // don't run the patching tests during a force-swact action + else if ( node_ptr->patching == true ) + { + wlog ("%s cannot swact active controller while patching is in progress\n", node_ptr->hostname.c_str()); + rc = FAIL_PATCH_INPROGRESS ; + } + else if ( inactive_controller_is_patching() == true ) + { + wlog ("%s cannot swact to inactive controller while patching is in progress\n", node_ptr->hostname.c_str()); + rc = FAIL_PATCH_INPROGRESS ; + } + // if this is a force-swact action then allow swact to a + // patched node that has not been rebooted yet, since + // this is a recoverable operation. The other two patching tests + // (above) need to be done on all swact actions since it may + // render the system non-recoverable. + else if ( !inv.action.compare ( "swact" ) && + inactive_controller_is_patched() == true ) + { + wlog ("%s cannot swact to a 'patched' but not 'rebooted' host\n", node_ptr->hostname.c_str()); + rc = FAIL_PATCHED_NOREBOOT ; + } + else + { + plog ("%s Action=%s\n", node_ptr->hostname.c_str(), + inv.action.c_str()); + if ( !inv.action.compare ( "force-swact" ) ) + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__FORCE_SWACT ); + else + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__SWACT ); + + /* generate command=swact log */ + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_SWACT ); + + smgrEvent.mutex = true ; + } + } + else if ( !inv.action.compare ( "reboot" ) ) + { + plog ("%s Reboot Action\n", node_ptr->hostname.c_str()); + node_ptr->resetProgStage = MTC_RESETPROG__START ; + node_ptr->retries = 0 ; + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_MANUAL_REBOOT ); + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__REBOOT ); + } + else if ( !inv.action.compare ( "reinstall" ) ) + { + plog ("%s Reinstall Action\n", node_ptr->hostname.c_str()); + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__REINSTALL ); + + /* generate command=reinstall log */ + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_REINSTALL ); + } + else if ( !inv.action.compare ( "reset" ) ) + { + string bm_ip = NONE ; + rc = FAIL_RESET_CONTROL ; + node_ptr->retries = 0 ; + plog ("%s Reset Action\n", node_ptr->hostname.c_str()); + + if ( hostUtil_is_valid_bm_type ( node_ptr->bm_type ) == false ) + { + wlog ("%s reset rejected due to unprovisioned bmc\n", + node_ptr->hostname.c_str()); + rc = FAIL_BM_PROVISION_ERR ; + } + else if ( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ) + { + wlog ("%s reset rejected for powered off host\n", + node_ptr->hostname.c_str()); + rc = FAIL_RESET_POWEROFF; + } + else if ( node_ptr->bm_un.empty() ) + { + wlog ("%s reset rejected due to unconfigured 'bm_username'\n", + node_ptr->bm_un.c_str()); + rc = FAIL_BM_PROVISION_ERR ; + } + else + { + rc = PASS ; + node_ptr->resetStage = MTC_RESET__START ; + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__RESET ); + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_MANUAL_RESET ); + } + } + else if ( !inv.action.compare ( "power-on" ) ) + { + string bm_ip = NONE ; + rc = FAIL_POWER_CONTROL ; + + plog ("%s Power-On Action\n", node_ptr->hostname.c_str()); + + if ( hostUtil_is_valid_bm_type ( node_ptr->bm_type ) == false ) + { + wlog ("%s power-on rejected due to unprovisioned bmc\n", + node_ptr->hostname.c_str()); + rc = FAIL_BM_PROVISION_ERR ; + } + + else if ( node_ptr->bm_un.empty() ) + { + wlog ("%s power-on rejected due to unconfigured 'bm_username'\n", + node_ptr->hostname.c_str()); + rc = FAIL_BM_PROVISION_ERR ; + } + else + { + rc = PASS ; + node_ptr->powerStage = MTC_POWERON__START ; + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__POWERON ); + } + mtcInvApi_update_task ( node_ptr, "" ); + } + else if ( !inv.action.compare ( "power-off" ) ) + { + string bm_ip = NONE ; + rc = FAIL_POWER_CONTROL ; + plog ("%s Power-Off Action\n", node_ptr->hostname.c_str()); + + if ( hostUtil_is_valid_bm_type ( node_ptr->bm_type ) == false ) + { + wlog ("%s power-off rejected due to unprovisioned bmc\n", + node_ptr->hostname.c_str()); + rc = FAIL_BM_PROVISION_ERR ; + } + else if ( node_ptr->bm_un.empty() ) + { + wlog ("%s power-off rejected due to unconfigured 'bm_username'\n", + node_ptr->hostname.c_str()); + rc = FAIL_BM_PROVISION_ERR ; + } + else + { + if (( !hostUtil_is_valid_ip_addr ( node_ptr->bm_ip )) && + ( !hostUtil_is_valid_ip_addr ( bm_ip ))) + { + wlog ("%s power-off may fail ; 'bm_ip' is undiscovered\n", + node_ptr->hostname.c_str()); + } + rc = PASS ; + node_ptr->powerStage = MTC_POWEROFF__START ; + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__POWEROFF ); + } + mtcInvApi_update_task ( node_ptr, "" ); + } + else + { + wlog ("%s Unsupported action '%s'\n", + node_ptr->hostname.c_str(), + inv.action.c_str()); + rc = FAIL_ADMIN_ACTION ; + mtcInvApi_update_task ( node_ptr, "" ); + } + } + if ( node_ptr->uuid.compare ( inv.uuid ) ) + { + send_hwmon_command ( node_ptr->hostname, MTC_CMD_DEL_HOST ); + plog ("%s Modify 'uuid' from %s -> %s\n", + node_ptr->hostname.c_str(), + node_ptr->uuid.c_str(), inv.uuid.c_str() ); + node_ptr->uuid = inv.uuid ; + send_hwmon_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); + modify = true ; /* we have a delta */ + } + if ( node_ptr->type.compare ( inv.type ) ) + { + plog ("%s Modify 'personality' from %s -> %s\n", + node_ptr->hostname.c_str(), + node_ptr->type.c_str(), inv.type.c_str() ); + + node_ptr->type = inv.type ; + node_ptr->nodetype = get_host_function_mask ( inv.type ); + + modify = true ; /* we have a delta */ + } + if ( node_ptr->ip.compare ( inv.ip ) ) + { + plog ("%s Modify 'mgmt_ip' from %s -> %s\n", + node_ptr->hostname.c_str(), + node_ptr->ip.c_str(), inv.ip.c_str()); + node_ptr->ip = inv.ip ; + + /* Tell the guestAgent the new IP */ + rc = send_guest_command(node_ptr->hostname,MTC_CMD_MOD_HOST); + + modify = true ; /* we have a delta */ + } + if ( node_ptr->mac.compare ( inv.mac ) ) + { + plog ("%s Modify 'mgmt_mac' from %s -> %s\n", + node_ptr->hostname.c_str(), + node_ptr->mac.c_str(), inv.mac.c_str() ); + node_ptr->mac = inv.mac ; + + modify = true ; /* we have a delta */ + } + if ( node_ptr->infra_ip.compare ( inv.infra_ip ) ) + { + if (( hostUtil_is_valid_ip_addr ( inv.infra_ip )) || ( hostUtil_is_valid_ip_addr ( node_ptr->infra_ip ))) + { + plog ("%s Modify 'infra_ip' from %s -> %s\n", + node_ptr->hostname.c_str(), + node_ptr->infra_ip.c_str(), inv.infra_ip.c_str() ); + + modify = true ; /* we have a delta */ + } + node_ptr->infra_ip = inv.infra_ip ; + } + if ( (!inv.name.empty()) && (node_ptr->hostname.compare ( inv.name)) ) + { + mtcCmd cmd ; + mtcCmd_init ( cmd ); + cmd.stage = MTC_CMD_STAGE__START ; + cmd.cmd = MTC_OPER__MODIFY_HOSTNAME ; + cmd.name = inv.name ; + node_ptr->mtcCmd_work_fifo.push_back(cmd); + plog ("%s Modify 'hostname' to %s (mtcCmd_queue:%ld)\n", + node_ptr->hostname.c_str(), + cmd.name.c_str() , + node_ptr->mtcCmd_work_fifo.size()); + + modify_bm = true ; /* board mgmnt change */ + modify = true ; /* we have some delta */ + } + if ( node_ptr->bm_un.compare ( inv.bm_un ) ) + { + if ( inv.bm_un.empty () ) + inv.bm_un = "none" ; + + plog ("%s Modify 'bm_username' from %s -> %s\n", + node_ptr->hostname.c_str(), + node_ptr->bm_un.c_str(), inv.bm_un.c_str()); + + node_ptr->bm_un = inv.bm_un ; + + modify_bm = true ; /* board mgmnt change */ + modify = true ; /* we have some delta */ + } + + /* PATCHBACK - issue found during BMC refactoring user story + * where there was a race condition found where the bmc dnsmasq file + * was updated with a new bm_ip close to when there was an + * administrative operation (unlock in this case). The newly learned + * bm_ip was overwritten by the now stale bm_ip that came in from + * inventory. The bm_ip should never come from sysinv while in + * internal mode. */ + if (( node_ptr->bm_ip.compare ( inv.bm_ip ))) + { + if ( inv.bm_ip.empty () ) + inv.bm_ip = NONE ; + + /* if not empty and not none and already used then reject */ + if ( is_bm_ip_already_used ( inv.bm_ip ) == true ) + { + wlog ("%s cannot use already provisioned bm ip %s\n", + node_ptr->hostname.c_str(), + inv.bm_ip.c_str()); + return (FAIL_DUP_IPADDR); + } + plog ("%s Modify 'bm_ip' from %s -> %s\n", + node_ptr->hostname.c_str(), + node_ptr->bm_ip.c_str(), inv.bm_ip.c_str()); + + node_ptr->bm_ip = inv.bm_ip ; + + modify_bm = true ; /* board mgmnt change */ + modify = true ; /* we have some delta */ + } + if ( node_ptr->bm_type.compare ( inv.bm_type ) ) + { + if ( inv.bm_type.empty() ) + inv.bm_type = "none" ; + else + inv.bm_type = tolowercase(inv.bm_type) ; + + plog ("%s Modify 'bm_type' from %s -> %s\n", + node_ptr->hostname.c_str(), + node_ptr->bm_type.c_str(), inv.bm_type.c_str()); + + modify_bm = true ; /* board mgmnt change */ + modify = true ; /* we have some delta */ + } + + /* print a log if we find that there was nothing to modify */ + if ( modify == false ) + { + wlog ("%s Modify request without anything to modify\n", node_ptr->hostname.c_str()); + } + if ( modify_bm == true ) + { + wlog ("%s Board Management provisioning has changed\n", node_ptr->hostname.c_str()); + bool bm_type_was_valid = hostUtil_is_valid_bm_type (node_ptr->bm_type) ; + bool bm_type_now_valid = hostUtil_is_valid_bm_type (inv.bm_type) ; + + /* update bm_type now */ + node_ptr->bm_type = inv.bm_type ; + + /* BM is provisioned */ + if ( bm_type_now_valid == true ) + { + /* force (re)provision */ + manage_bmc_provisioning ( node_ptr ); + } + + /* BM is already provisioned but is now deprovisioned */ + else if (( bm_type_was_valid == true ) && ( bm_type_now_valid == false )) + { + node_ptr->bm_type = NONE ; + node_ptr->bm_ip = NONE ; + node_ptr->bm_un = NONE ; + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_BM_DEPROVISIONED ); + set_bm_prov ( node_ptr, false ); + } + + /* BM was not provisioned and is still not provisioned */ + else + { + /* Handle all other provisioning changes ; username, ip address */ + manage_bmc_provisioning ( node_ptr ); + } + } + } + else + { + elog ("getNode failed to find uuid: %s\n", inv.uuid.c_str()); + } + + return ( rc ); +} + +void nodeLinkClass::start_offline_handler ( struct nodeLinkClass::node * node_ptr ) +{ + bool already_active = false ; + mtc_offlineStages_enum offlineStage_saved = node_ptr->offlineStage ; + + if ( node_ptr->offlineStage == MTC_OFFLINE__IDLE ) + { + node_ptr->offlineStage = MTC_OFFLINE__START ; + } + else + { + already_active = true ; + } + plog ("%s%soffline handler (%s-%s-%s) (stage:%d)\n", + node_ptr->hostname.c_str(), + already_active ? " " : " starting ", + adminState_enum_to_str(node_ptr->adminState).c_str(), + operState_enum_to_str(node_ptr->operState).c_str(), + availStatus_enum_to_str(node_ptr->availStatus).c_str(), + offlineStage_saved); +} + +void nodeLinkClass::stop_offline_handler ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr->offlineStage != MTC_OFFLINE__IDLE ) + { + plog ("%s stopping offline handler (%s-%s-%s) (stage:%d)\n", + node_ptr->hostname.c_str(), + adminState_enum_to_str(node_ptr->adminState).c_str(), + operState_enum_to_str(node_ptr->operState).c_str(), + availStatus_enum_to_str(node_ptr->availStatus).c_str(), + node_ptr->offlineStage); + node_ptr->offlineStage = MTC_OFFLINE__IDLE ; + } +} + +string nodeLinkClass::get_host ( string uuid ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( uuid ); + if ( node_ptr != NULL ) + { + return (node_ptr->hostname) ; + } + return ( "" ); +} + +/** Check to see if the node list already contains any of the following + * information and reject the add or modify if it does + * + * uuid + * hostname + * ip address + * mac address + * + **/ +int nodeLinkClass::add_host_precheck ( node_inv_type & inv ) +{ + struct node * ptr = static_cast(NULL) ; + struct node * node_ptr = static_cast(NULL) ; + int rc = PASS ; + + if ( head == NULL ) + return (PASS); + + for ( node_ptr = head ; ; node_ptr = node_ptr->next ) + { + /* look or the UUID */ + if ( !node_ptr->uuid.compare(inv.uuid)) + { + rc = RETRY ; + dlog ("%s found in mtce\n", node_ptr->uuid.c_str()); + break ; + } + else if (( node_ptr->next == NULL ) || ( node_ptr == tail )) + break ; + } + + /** If that uuid is not found then make sure there + * are no other entries in the list that already + * has the same info that we want to create a + * new host with. + * If so then reject by returning a failure. + */ + for ( ptr = head ; ; ptr = ptr->next ) + { + /* if this uuid is found then see if we are being + * asked to modify and make sure that we are not being + * asked to modify other members to values that are + * used by other hosts + * If so then reject by returning a failure. + * otherwise then allow the modification by returning a retry. + */ + + if (( rc == RETRY ) && ( ptr == node_ptr )) + { + dlog ("%s skip\n", ptr->hostname.c_str()); + /* skip the node we found the UUID on + * but make sure that none of the other nodes + * have the same data */ + } + else + { + dlog ("%s check\n", ptr->hostname.c_str()); + + if ( !ptr->hostname.compare(inv.name)) + { + wlog ("hostname (%s) already used ; rejecting add / modify\n", inv.name.c_str()); + return(FAIL_DUP_HOSTNAME); + } + if ( ptr->ip.compare("none") != 0 && !ptr->ip.compare(inv.ip)) + { + wlog ("ip address (%s) already used ; rejecting add / modify\n", inv.ip.c_str()); + return(FAIL_DUP_IPADDR); + } + if ( !ptr->mac.compare(inv.mac)) + { + wlog ("mac address (%s) already used ; rejecting add / modify\n", inv.mac.c_str()); + return(FAIL_DUP_MACADDR); + } + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + return (rc); +} + +int nodeLinkClass::add_host ( node_inv_type & inv ) +{ + int rc = FAIL ; + struct nodeLinkClass::node * node_ptr = static_cast(NULL); + + if ((!inv.name.compare("controller-0")) || + (!inv.name.compare("controller-1"))) + { + dlog ("Adding %s\n", inv.name.c_str()); + node_ptr = nodeLinkClass::getNode(inv.name); + } + else if (( inv.name.empty()) || + ( !inv.name.compare ("none") ) || + ( !inv.name.compare ("None") )) + { + wlog ("Refusing to add host with 'null' or 'invalid' hostname (%s)\n", + inv.uuid.c_str()); + return (FAIL_INVALID_HOSTNAME) ; + } + else if (( inv.uuid.empty()) || + ( !inv.uuid.compare ("none") ) || + ( !inv.uuid.compare ("None") )) + { + wlog ("Refusing to add host with 'null' or 'invalid' uuid (%s)\n", + inv.uuid.c_str()); + return (FAIL_INVALID_UUID) ; + } + + /* Ensure we don't add a host with critical info that is + * already used by other members of inventory like ; + * hostname, uuid, ip, mac, bm_ip */ + else if ( ( rc = add_host_precheck ( inv )) > RETRY ) + { + return (rc); + } + else + { + if ( rc == RETRY ) + { + dlog ("%s modify operation\n", inv.uuid.c_str()); + } + else + { + dlog ("%s add operation\n", inv.uuid.c_str()); + } + node_ptr = nodeLinkClass::getNode(inv.uuid); + } + + if ( node_ptr ) + { + dlog ("%s Already provisioned\n", node_ptr->hostname.c_str()); + + /* update some of the info */ + node_ptr->adminState = adminState_str_to_enum (inv.admin.data()); + node_ptr->operState = operState_str_to_enum (inv.oper.data ()); + node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data()); + + if ( CPE_SYSTEM ) + { + node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data()); + node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data()); + } + + /* Send back a retry so that this add is converted to a modify */ + return (RETRY); + } + /* Otherwise add it as a new node */ + else + { + if ( daemon_get_cfg_ptr()->debug_level != 0 ) + print_inv ( inv ); + + unsigned int nodetype_temp = get_host_function_mask ( inv.type ); + if ( (nodetype_temp & CONTROLLER_TYPE) == CONTROLLER_TYPE ) + { + if ( inactive_controller_hostname.empty () == false ) + { + wlog ("Cannot provision more than 2 controllers\n"); + wlog ("%s is already provisioned as inactive\n", + inactive_controller_hostname.c_str()); + return (FAIL); + } + } + + /* Prevent allowing add of a reserved hostname + * with and incorrect node type. Reserved names are + * + * controller-0 and controller-1 must be a controller type + * storage-0 must be a storage type + * + * */ + + if ((( !inv.name.compare ("controller-0")) && (( nodetype_temp & CONTROLLER_TYPE) != CONTROLLER_TYPE )) || + (( !inv.name.compare ("controller-1")) && (( nodetype_temp & CONTROLLER_TYPE ) != CONTROLLER_TYPE)) || + (( !inv.name.compare ("storage-0" )) && (( nodetype_temp & STORAGE_TYPE) != STORAGE_TYPE))) + { + wlog ("Cannot provision '%s' as a '%s' host\n", inv.name.c_str(), inv.type.c_str()); + return (FAIL_RESERVED_NAME); + } + node_ptr = nodeLinkClass::addNode(inv.name); + if ( node_ptr ) + { + bool validStates = false ; + node_ptr->hostname = inv.name ; + + /* set the node type ; string and define code */ + node_ptr->type = inv.type ; + node_ptr->nodetype = get_host_function_mask ( inv.type ) ; + + update_host_functions ( inv.name, inv.func ); + + node_ptr->ip = inv.ip ; + node_ptr->mac = inv.mac ; + node_ptr->uuid = inv.uuid ; + node_ptr->infra_ip = inv.infra_ip ; + + if ( inv.uptime.length() ) + { + sscanf ( inv.uptime.data(), "%u", &node_ptr->uptime ); + dlog2 ("%s Uptime (%s:%u)\n", inv.name.c_str(), inv.uptime.c_str(), node_ptr->uptime ); + } + else + { + node_ptr->uptime = 0 ; + } + + node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip = inv.bm_ip ; + node_ptr->thread_extra_info.bm_un = node_ptr->bm_un = inv.bm_un ; + node_ptr->thread_extra_info.bm_type= node_ptr->bm_type = inv.bm_type ; + + node_ptr->bm_ping_info.sock = 0 ; + + /* initialize the host power and reset control thread */ + thread_init ( node_ptr->ipmitool_thread_ctrl, + node_ptr->ipmitool_thread_info, + &node_ptr->thread_extra_info, + mtcThread_ipmitool, + DEFAULT_THREAD_TIMEOUT_SECS, + node_ptr->hostname, + THREAD_NAME__IPMITOOL); + + if ( adminStateOk (inv.admin) && + operStateOk (inv.oper ) && + availStatusOk (inv.avail)) + { + validStates = true ; + } + + clog ("%s subf state %s-%s\n", node_ptr->hostname.c_str(), inv.oper_subf.c_str(), inv.avail_subf.c_str() ); + + node_ptr->task = inv.task ; + + /* Add based on 'action' */ + if ((!inv.action.empty()) && (inv.action.compare ("none"))) + { + /* Save current action */ + node_ptr->action = inv.action ; + + if ( !inv.action.compare ("unlock") && validStates ) + { + ilog ("%s Added in 'unlocked' state\n", node_ptr->hostname.c_str()); + + print_inv ( inv ); + + node_ptr->adminState = adminState_str_to_enum (inv.admin.data()); + node_ptr->operState = operState_str_to_enum (inv.oper.data ()); + node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data()); + + if ( CPE_SYSTEM ) + { + node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data()); + node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data()); + } + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__UNLOCK ); + } + else if ( !inv.action.compare ("lock") && validStates ) + { + ilog ("%s Added in 'locked' state\n", node_ptr->hostname.c_str()); + + print_inv ( inv ); + + node_ptr->adminState = adminState_str_to_enum (inv.admin.data()); + node_ptr->operState = operState_str_to_enum (inv.oper.data ()); + node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data()); + + if ( CPE_SYSTEM ) + { + node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data()); + node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data()); + } + + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__LOCK ); + } + else if ( !inv.action.compare ("force-lock") && validStates ) + { + ilog ("%s Added in 'force-locked' state\n", node_ptr->hostname.c_str()); + + print_inv ( inv ); + + node_ptr->adminState = adminState_str_to_enum (inv.admin.data()); + node_ptr->operState = operState_str_to_enum (inv.oper.data ()); + node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data()); + + if ( CPE_SYSTEM ) + { + node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data()); + node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data()); + } + + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__FORCE_LOCK ); + } + else if ( !inv.action.compare ("reboot") && validStates ) + { + ilog ("%s Added with 'reboot' in 'locked' state\n", node_ptr->hostname.c_str()); + + print_inv ( inv ); + + node_ptr->adminState = adminState_str_to_enum (inv.admin.data()); + node_ptr->operState = operState_str_to_enum (inv.oper.data ()); + node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data()); + + if ( CPE_SYSTEM ) + { + node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data()); + node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data()); + } +; + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__REBOOT ); + } + else if ( !inv.action.compare ("reset") && validStates ) + { + ilog ("%s Added with 'reset' in 'locked' state\n", node_ptr->hostname.c_str()); + + print_inv ( inv ); + + node_ptr->adminState = adminState_str_to_enum (inv.admin.data()); + node_ptr->operState = operState_str_to_enum (inv.oper.data ()); + node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data()); + + if ( CPE_SYSTEM ) + { + node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data()); + node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data()); + } + + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__RESET ); + } + else if ( !inv.action.compare ("power-off") && validStates ) + { + ilog ("%s Added in a 'locked' and 'power-off' state\n", node_ptr->hostname.c_str()); + + print_inv ( inv ); + + node_ptr->adminState = MTC_ADMIN_STATE__LOCKED ; + node_ptr->operState = MTC_OPER_STATE__DISABLED; + node_ptr->availStatus = MTC_AVAIL_STATUS__POWERED_OFF ; + + node_ptr->operState_subf = MTC_OPER_STATE__DISABLED ; + node_ptr->availStatus_subf = MTC_AVAIL_STATUS__POWERED_OFF ; + + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__POWEROFF ); + } + else if ( !inv.action.compare ("power-on") && validStates ) + { + ilog ("%s Added with 'power-on' in 'locked' state\n", node_ptr->hostname.c_str()); + + print_inv ( inv ); + + node_ptr->adminState = MTC_ADMIN_STATE__LOCKED ; + node_ptr->operState = MTC_OPER_STATE__DISABLED; + node_ptr->availStatus = MTC_AVAIL_STATUS__OFFLINE ; + + node_ptr->operState_subf = MTC_OPER_STATE__DISABLED ; + node_ptr->availStatus_subf = MTC_AVAIL_STATUS__OFFLINE ; + + node_ptr->onlineStage = MTC_ONLINE__START ; + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__POWERON ); + } + else + { + wlog ("%s Need add Action support for '%s' action\n", node_ptr->hostname.c_str(), + inv.action.c_str()); + + print_inv ( inv ); + + /* Load in maintenance states */ + node_ptr->adminState = MTC_ADMIN_STATE__LOCKED ; + node_ptr->operState = MTC_OPER_STATE__DISABLED ; + node_ptr->availStatus = MTC_AVAIL_STATUS__OFFLINE ; + + if ( CPE_SYSTEM ) + { + node_ptr->operState_subf = MTC_OPER_STATE__DISABLED ; + node_ptr->availStatus_subf = MTC_AVAIL_STATUS__OFFLINE ; + } + + node_ptr->onlineStage = MTC_ONLINE__START ; + + wlog ("%s Need '%s' action enabled here\n", node_ptr->hostname.c_str(), + inv.action.c_str()); + } + } + else + { + node_ptr->adminState = adminState_str_to_enum (inv.admin.data()); + node_ptr->operState = operState_str_to_enum (inv.oper.data ()); + node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data()); + + if ( CPE_SYSTEM ) + { + node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data()); + node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data()); + } + } + + /* Clear the heartbeat failure conts for this host */ + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + node_ptr->hbs_degrade_count[iface] = 0; + node_ptr->hbs_failure_count[iface] = 0; + } + + /* Add to the end of inventory */ + hostname_inventory.push_back ( node_ptr->hostname ); + rc = PASS ; + } + } + + if (( rc == PASS ) && ( node_ptr )) + { + node_ptr->addStage = MTC_ADD__START ; + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__ADD ); + } + return (rc); +} + +void nodeLinkClass::clear_service_readies ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr ) + { + if ( node_ptr->hbsClient_ready || node_ptr->pmond_ready ) + { + ilog ("%s clearing service ready events\n", node_ptr->hostname.c_str()); + node_ptr->hbsClient_ready = false ; + node_ptr->pmond_ready = false ; + } + } +} + +/* Used by the heartbeat service to add a host to its list */ +int nodeLinkClass::add_heartbeat_host ( const node_inv_type & inv ) +{ + int rc = FAIL ; + struct nodeLinkClass::node * node_ptr = static_cast(NULL); + + dlog ("%s with nodetype %u\n", inv.name.c_str(), inv.nodetype ); + + /* no hostname - no add ! */ + if ( inv.name.length() ) + { + /* Handle the case where we are adding a node that is already */ + /* present if so just update the inventory data not the mtc state */ + node_ptr = nodeLinkClass::getNode(inv.name); + if ( node_ptr ) + { + dlog ("%s already provisioned\n", node_ptr->hostname.c_str()); + rc = RETRY ; + } + /* Otherwise add it as a new node */ + else + { + node_ptr = nodeLinkClass::addNode(inv.name); + if ( node_ptr != NULL ) + { + node_ptr->hostname = inv.name ; + node_ptr->nodetype = inv.nodetype ; + dlog ("%s added to linked list\n", inv.name.c_str()); + rc = PASS ; + } + else + { + elog ("Failed to addNode %s to heartbeat service\n", inv.name.c_str()); + } + } + } + return (rc); +} + +string nodeLinkClass::get_uuid ( string hostname ) +{ + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode(hostname); + if ( node_ptr ) + { + return (node_ptr->uuid); + } + else + { + return (""); + } +} + +void nodeLinkClass::set_uuid ( string hostname, string uuid ) +{ + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode(hostname); + if ( node_ptr ) + { + node_ptr->uuid = uuid ; + } +} + +/* Set the task field in the maintenance class object for the specified host */ +void nodeLinkClass::set_task ( string hostname, string task ) +{ + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode(hostname); + if ( node_ptr ) + { + node_ptr->task = task ; + } +} + +/* Lock Rules + * + * 1. Cannot lock this controller + * 2. Cannot lock inactive controller if storage-0 is locked + * 3. Cannot lock storage node with monitor if inactive conroller is locked or not present + * 4. Cannot lock last storage host. + */ +bool nodeLinkClass::can_uuid_be_locked ( string uuid , int & reason ) +{ + struct nodeLinkClass::node * node_ptr = nodeLinkClass::getNode(uuid); + if ( node_ptr ) + { + dlog1 ("%s Lock permission query\n", node_ptr->hostname.c_str()); + + /* Allow lock of already locked 'any' host */ + if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) + { + ilog ("%s is already 'locked'\n", node_ptr->hostname.c_str()); + return (true); + } + else if ( node_ptr->operState == MTC_OPER_STATE__DISABLED ) + { + ilog ("%s allowing lock of 'disabled' host\n", node_ptr->hostname.c_str() ); + return (true); + } + else if (is_controller(node_ptr)) + { + /* Rule 1 - Cannot lock active controller */ + if ( THIS_HOST ) + { + elog ("%s Cannot be 'locked' - controller is 'active'\n", node_ptr->hostname.c_str()); + reason = FAIL_UNIT_ACTIVE ; + return (false); + } + /* Rule 2 - Cannot lock inactive controller if the floating storage + * ceph monitor is locked */ + if (( get_storage_backend() == CGCS_STORAGE_CEPH ) && + ( is_storage_mon_enabled () == false )) + { + wlog ("%s cannot be 'locked' - failed storage redundancy check\n", node_ptr->hostname.c_str()); + reason = FAIL_NEED_STORAGE_MON ; + return (false); + } + ilog ("%s can be locked\n", node_ptr->hostname.c_str()); + return (true); + } + else if ( is_compute(node_ptr) ) + { + if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) + { + dlog ("%s is 'unlocked' and can be 'locked'\n", node_ptr->hostname.c_str()); + } + return (true); + } + /* Deal with lock of storage cases - Rules 3 and 4 */ + else if ( is_storage(node_ptr) ) + { + /* Only need to semantic check if this host is unlocked-enabled */ + if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) + { + /* Both active controllers path ... */ + if ( num_controllers_enabled () >= 2 ) + { + /* If we are locking storage-0 make sure that there + * is another enabled storage node */ + if ( !node_ptr->hostname.compare("storage-0") ) + { + /* We already know that this storage node is enabled so + * we need to see a count greated than 1 */ + if ( enabled_storage_nodes () > 1 ) + { + /* We have 2 enabled controllers and 2 enabled + * storage nodes so we can allow lock of storage-0 */ + ilog ("%s can be locked - there is storage redundancy\n", + node_ptr->hostname.c_str()); + return (true); + } + /* Rule 4 - Cannot lock last storage node */ + else + { + wlog ("%s cannot be locked - no storage redundancy\n", + node_ptr->hostname.c_str()); + reason = FAIL_LOW_STORAGE ; + return (false); + } + } + /* O.K. we are trying to lock a storage host tha is not + * the floating storage monitor */ + else if (( is_storage_mon_enabled () == true ) && + ( enabled_storage_nodes() > 1 )) + { + /* We have - 2 enabled controllers + * - the storage mon is enabled and + * - is not this one. */ + ilog ("%s can be locked - there is storage redundancy\n", + node_ptr->hostname.c_str()); + return (true); + } + /* Rule 4 - Cannot lock last storage node */ + else if (enabled_storage_nodes() <= 1) + { + wlog ("%s cannot be locked - no storage redundancy\n", + node_ptr->hostname.c_str()); + reason = FAIL_LOW_STORAGE ; + return (false); + } + else + { + /* Other redundancy checks here and in SysInv have passed. */ + ilog ("%s can be locked - storage redundancy filters passed.\n", + node_ptr->hostname.c_str()); + return (true); + } + } + + /* Rule 3 - Cannot lock storage node with monitor if inactive + * controller is locked or not present and there is + * not another storage node enabled */ + else + { + /* Cannot lock storage-0 if there is only a single enabled controller */ + if ( !node_ptr->hostname.compare("storage-0") ) + { + wlog ("%s cannot be locked - simplex system\n", + node_ptr->hostname.c_str()); + reason = FAIL_NEED_STORAGE_MON ; + return (false); + } + /* Only allow locking of a storage node if there is another in service */ + else if (( is_storage_mon_enabled () == true ) && + ( enabled_storage_nodes() > 1 )) + { + ilog ("%s can be locked - there is storage redundancy\n", + node_ptr->hostname.c_str()); + return (true); + } + /* Rule 4 - Cannot lock last storage node */ + else + { + wlog ("%s cannot be locked - no redundancy\n", + node_ptr->hostname.c_str()); + reason = FAIL_LOW_STORAGE ; + return (false); + } + } + } + else + { + ilog ("%s allowing lock of disabled storage host\n", + node_ptr->hostname.c_str()); + return (true); + } + } + else + { + elog ("%s unsupported nodetype (%u)\n", + node_ptr->hostname.c_str(), + node_ptr->nodetype); + return (false); + } + } + else + { + dlog ("Unknown uuid: %s\n", uuid.c_str()); + + /* allowing lock as a means to clear up error */ + return (true); + } +} + +int nodeLinkClass::rem_host ( string & hostname ) +{ + int rc = FAIL ; + if ( ! hostname.empty() ) + { + hostname_inventory.remove ( hostname ); + rc = nodeLinkClass::remNode ( hostname ); + } + return ( rc ); +} + +void nodeLinkClass::set_my_hostname ( string hostname ) +{ + struct nodeLinkClass::node * node_ptr ; + + nodeLinkClass::my_hostname = hostname ; + + /* set it in the local inventory as well */ + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->hostname = hostname ; + } +} + + +string nodeLinkClass::get_my_hostname ( void ) +{ + return( nodeLinkClass::my_hostname ); +} + +void nodeLinkClass::set_my_local_ip ( string & ip ) +{ + nodeLinkClass::node* node_ptr ; + + nodeLinkClass::my_local_ip = ip ; + + /* set it in the local inventory as well */ + node_ptr = nodeLinkClass::getNode ( my_hostname ); + if ( node_ptr != NULL ) + { + node_ptr->ip = ip ; + } +} + +string nodeLinkClass::get_my_local_ip ( void ) +{ + return( nodeLinkClass::my_local_ip ); +} + +void nodeLinkClass::set_my_float_ip ( string & ip ) +{ + nodeLinkClass::my_float_ip = ip ; +} + +string nodeLinkClass::get_my_float_ip ( void ) +{ + return( nodeLinkClass::my_float_ip ); +} + +static string null_str = "" ; +string nodeLinkClass::get_hostaddr ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return ( node_ptr->ip ); + } + return ( null_str ); +} + +string nodeLinkClass::get_infra_hostaddr ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return ( node_ptr->infra_ip ); + } + return ( null_str ); +} + +string nodeLinkClass::get_hostIfaceMac ( string & hostname, int iface ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + if ( iface == MGMNT_IFACE ) + return ( node_ptr->mac ); + if ( iface == INFRA_IFACE ) + return ( node_ptr->infra_mac ); + } + ilog ("%s has unknown mac address for %s interface\n", hostname.c_str(), get_iface_name_str(iface)); + return ( null_str ); +} + +int nodeLinkClass::set_hostaddr ( string & hostname, string & ip ) +{ + int rc = FAIL ; + + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->ip = ip ; + rc = PASS ; + } + return ( rc ); +} + +int nodeLinkClass::set_infra_hostaddr ( string & hostname, string & ip ) +{ + int rc = FAIL ; + + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->infra_ip = ip ; + rc = PASS ; + } + return ( rc ); +} + +string nodeLinkClass::get_hostname ( string & hostaddr ) +{ + if (( hostaddr == LOOPBACK_IPV6 ) || + ( hostaddr == LOOPBACK_IP ) || + ( hostaddr == LOCALHOST )) + { + return(my_hostname); + } + else + { + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostaddr ); + if ( node_ptr != NULL ) + { + return ( node_ptr->hostname ); + } + return ( null_str ); + } +} + +string nodeLinkClass::get_hostname_from_bm_ip ( string bm_ip ) +{ + if ( head ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ! ptr->bm_ip.compare(bm_ip) ) + { + return ( ptr->hostname ); + } + + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return ("") ; +} + +int nodeLinkClass::num_hosts ( void ) +{ + return ( nodeLinkClass::hosts ) ; +} + +void nodeLinkClass::set_cmd_resp ( string & hostname, mtc_message_type & msg ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + if ( is_host_services_cmd ( msg.cmd ) ) + { + /***************************************************** + * Host Services Request's Response Handling + *****************************************************/ + node_ptr->host_services_req.status = msg.parm[0] ; + if ( msg.cmd == node_ptr->host_services_req.cmd ) + { + // print_mtc_message ( &msg, true ); + + /* if num > 1 then expect a host services result message */ + if ( msg.cmd == MTC_CMD_HOST_SVCS_RESULT ) + { + if ( !node_ptr->host_services_req.ack ) + { + slog ("%s %s without initial command ACK\n", + hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + } + node_ptr->host_services_req.rsp = msg.cmd ; + if ( msg.buf[0] != '\0' ) + { + node_ptr->host_services_req.status_string = msg.buf ; + } + } + + /* Check to see if the start/stop host services command + * response demonstrates support for the enhanced host + * services extension. */ + else if (( msg.num > 1 ) && ( msg.parm[1] == MTC_ENHANCED_HOST_SERVICES )) + { + dlog ("%s %s request ack\n", + hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + node_ptr->host_services_req.ack = true ; + } + else + { + ilog ("%s %s request ack (legacy mode)\n", + hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + /* support legacy client by copying the cmd to cmdRsp */ + node_ptr->host_services_req.status = PASS ; + node_ptr->host_services_req.rsp = msg.cmd ; + node_ptr->host_services_req.ack = MTC_CMD_NONE ; + } + } + + if ( msg.num && ( node_ptr->host_services_req.status != PASS )) + { + dlog ("%s %s command failed (rc:%d) [%s]\n", + hostname.c_str(), + get_mtcNodeCommand_str(msg.cmd), + node_ptr->host_services_req.status, + node_ptr->host_services_req.status_string.empty() ? + "no error string" : node_ptr->host_services_req.status_string.c_str()); + } + } + else + { + node_ptr->cmdRsp = msg.cmd ; + if ( msg.num > 0 ) + node_ptr->cmdRsp_status = msg.parm[0] ; + else + node_ptr->cmdRsp_status = -1 ; + + dlog ("%s '%s' command response status [%u:%s]\n", + hostname.c_str(), + node_ptr->cmdName.c_str(), + msg.num ? node_ptr->cmdRsp_status : PASS, + node_ptr->cmdRsp_status_string.empty() ? "empty" : node_ptr->cmdRsp_status_string.c_str()); + } + } +} + +unsigned int nodeLinkClass::get_cmd_resp ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return ( node_ptr->cmdRsp ) ; + } + return (-1); +} + +mtc_client_enum nodeLinkClass::get_activeClient ( string hostname ) +{ + nodeLinkClass::node* node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return ( node_ptr->activeClient ) ; + } + else + { + slog ("Host lookup failed for '%s'\n", hostname.c_str()); + } + return (CLIENT_NONE); +} + +int nodeLinkClass::set_activeClient ( string hostname, mtc_client_enum client ) +{ + nodeLinkClass::node* node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->activeClient = client ; + return (PASS); + } + else + { + slog ("Host lookup failed for '%s'\n", hostname.c_str()); + } + return (FAIL_HOSTNAME_LOOKUP); +} + +/***************************************************************************** + * + * Name : set_mtcAlive + * + * Description: + * + * If mtcAlive is ungated then + * + * 1. manage the online/offline state bools + * 2. increment the mtcAlive count and + * 3. set the mtcAlive received bool for the specified interface + * + *****************************************************************************/ +void nodeLinkClass::set_mtcAlive ( string & hostname, int interface ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + if ( node_ptr->mtcAlive_gate == false ) + { + node_ptr->mtcAlive_online = true ; + node_ptr->mtcAlive_offline = false ; + node_ptr->mtcAlive_count++ ; + + if ( interface == INFRA_INTERFACE ) + { + node_ptr->mtcAlive_infra = true ; + } + else + { + node_ptr->mtcAlive_mgmnt = true ; + } + } + } +} + +bool nodeLinkClass::get_mtcAlive_gate ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return ( node_ptr->mtcAlive_gate ) ; + } + /* If we can't find the node then gate off the alive messages */ + return (true); +} + +void nodeLinkClass::ctl_mtcAlive_gate ( string & hostname, bool gated ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->mtcAlive_gate = gated ; + if ( gated == true ) + { + alog ("%s mtcAlive gated\n", node_ptr->hostname.c_str()); + } + else + { + alog ("%s mtcAlive ungated\n", node_ptr->hostname.c_str()); + } + } +} + +/* Main-Function Go Enabled member Functions */ + +void nodeLinkClass::set_goEnabled ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->goEnabled = true ; + } +} + +bool nodeLinkClass::get_goEnabled ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return ( node_ptr->goEnabled ) ; + } + return (false); +} + +void nodeLinkClass::set_goEnabled_failed ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->goEnabled_failed = true ; + } +} + +/* Sub-Function Go Enabled Member Functions */ + +void nodeLinkClass::set_goEnabled_subf ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->goEnabled_subf = true ; + } +} + +bool nodeLinkClass::get_goEnabled_subf ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return ( node_ptr->goEnabled_subf ) ; + } + return (false); +} + +void nodeLinkClass::set_goEnabled_failed_subf ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->goEnabled_failed_subf = true ; + } +} + +/* Set and Get Uptime Member Function */ + +void nodeLinkClass::set_uptime ( struct nodeLinkClass::node * node_ptr, unsigned int uptime, bool force ) +{ + if ( node_ptr != NULL ) + { + /* Force the uptime into the database if + * - passed in value is 0 and current value is !0 + * - passed in value is !0 and current value is 0 + * - if ther force option is used + * Otherwise allow the audit to push time to the database + */ + if ((force == true ) || + (( uptime != 0 ) && ( node_ptr->uptime == 0 )) || + (( node_ptr->uptime != 0 ) && ( uptime == 0 ))) + { + mtcInvApi_update_uptime ( node_ptr, uptime ); + } + node_ptr->uptime = uptime ; + } +} + +void nodeLinkClass::set_uptime ( string & hostname, unsigned int uptime, bool force ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + set_uptime ( node_ptr, uptime, force ); +} + + +unsigned int nodeLinkClass::get_uptime ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return ( node_ptr->uptime ) ; + } + return (0); +} + +void nodeLinkClass::set_uptime_refresh_ctr ( string & hostname, int value ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->uptime_refresh_counter = value ; + } +} + + +int nodeLinkClass::get_uptime_refresh_ctr ( string & hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return ( node_ptr->uptime_refresh_counter ) ; + } + return (0); +} + +void nodeLinkClass::set_mtce_flags ( string hostname, int flags ) +{ + nodeLinkClass::node* node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + /* Deal with host level */ + node_ptr->mtce_flags = flags ; + if ( flags & MTC_FLAG__MAIN_GOENABLED ) + node_ptr->goEnabled = true ; + else + node_ptr->goEnabled = false ; + + /* Track host patching state by Out-Of-Band flag */ + if ( flags & MTC_FLAG__PATCHING ) + { + if ( node_ptr->patching == false ) + { + plog ("%s software patching has begun\n", node_ptr->hostname.c_str()); + } + node_ptr->patching = true ; + } + else + { + if ( node_ptr->patching == true ) + { + plog ("%s software patching done\n", node_ptr->hostname.c_str()); + } + node_ptr->patching = false ; + } + + /* Track host patched state by Out-Of-Band flag. + * This flag is set when the host is patched but not reset */ + if ( flags & MTC_FLAG__PATCHED ) + { + if ( node_ptr->patched == false ) + { + plog ("%s software patched\n", node_ptr->hostname.c_str()); + } + node_ptr->patched = true ; + } + else + { + if ( node_ptr->patched == true ) + { + plog ("%s software patch is applied\n", node_ptr->hostname.c_str()); + } + node_ptr->patched = false ; + } + + + /* Deal with sub-function if combo host */ + if ( CPE_SYSTEM ) + { + if ( flags & MTC_FLAG__SUBF_GOENABLED ) + { + if ( node_ptr->operState_subf == MTC_OPER_STATE__ENABLED ) + { + node_ptr->goEnabled_subf = true ; + } + } + else + { + node_ptr->goEnabled_subf = false ; + } + } + } +} + +void nodeLinkClass::set_health ( string & hostname, int health ) +{ + switch ( health ) + { + case NODE_HEALTH_UNKNOWN: + case NODE_HEALTHY: + case NODE_UNHEALTHY: + { + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + if ( health == NODE_UNHEALTHY ) + { + if ( node_ptr->health != NODE_UNHEALTHY ) + { + if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) + { + wlog ("%s Health State Change -> UNHEALTHY\n", hostname.c_str()); + } + } + } + node_ptr->health = health ; + } + break ; + } + default: + { + wlog ("%s Unexpected health code (%d), defaulting to (unknown)\n", hostname.c_str(), health ); + break ; + } + } +} + +/************************************************************************************* + * + * Name : manage_bmc_provisioning + * + * Description: This utility manages a change in bmc provisioning for + * bm region EXTERNAL mode. Creates provisioning logs and + * sends START and STOP monitoring commands to the hardware monitor. + * + * Warning : Should only be called when there is a change to BM provisioning. + * as it will first always first disable provisioning and then + * decides whether it needs to be re-enabled or not. + * + *************************************************************************************/ + +int nodeLinkClass::manage_bmc_provisioning ( struct node * node_ptr ) +{ + int rc = PASS ; + + bool was_provisioned = node_ptr->bm_provisioned ; + + set_bm_prov ( node_ptr, false); + if ((hostUtil_is_valid_ip_addr ( node_ptr->bm_ip )) && + (!node_ptr->bm_un.empty())) + { + if ( was_provisioned == true ) + { + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_BM_REPROVISIONED ); + } + else + { + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_BM_PROVISIONED ); + } + + set_bm_prov ( node_ptr, true ); + } + else if ( was_provisioned == true ) + { + send_hwmon_command(node_ptr->hostname,MTC_CMD_STOP_HOST); + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_BM_DEPROVISIONED ); + } + + /* Send hmond updated bm info */ + ilog ("%s sending board management info update to hwmond\n", node_ptr->hostname.c_str() ); + if ( ( rc = send_hwmon_command(node_ptr->hostname,MTC_CMD_MOD_HOST) ) == PASS ) + { + if ( node_ptr->bm_provisioned == true ) + { + rc = send_hwmon_command(node_ptr->hostname,MTC_CMD_START_HOST); + } + else + { + rc = send_hwmon_command(node_ptr->hostname,MTC_CMD_STOP_HOST); + } + if ( rc ) + { + wlog ("%s failed to send START or STOP command to hwmond\n", node_ptr->hostname.c_str()); + } + } + else + { + wlog ("%s failed to send MODIFY command to hwmond\n", node_ptr->hostname.c_str()); + } + return (rc); +} + +bool nodeLinkClass::is_bm_ip_already_used ( string bm_ip ) +{ + if ( hostUtil_is_valid_ip_addr ( bm_ip ) == true ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( !bm_ip.compare(ptr->bm_ip) ) + { + return (true); + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return (false); +} + +int nodeLinkClass::set_bm_type ( string hostname , string bm_type ) +{ + int rc = FAIL_HOSTNAME_LOOKUP ; + + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->bm_type = bm_type ; + dlog ("%s '%s' updated to '%s'\n", + hostname.c_str(), + MTC_JSON_INV_BMTYPE, + node_ptr->bm_type.c_str()); + rc = PASS ; + } + return (rc); +} + +int nodeLinkClass::set_bm_un ( string hostname , string bm_un ) +{ + int rc = FAIL_HOSTNAME_LOOKUP ; + + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + if ( bm_un.length() ) + { + node_ptr->bm_un = bm_un ; + } + else + { + node_ptr->bm_un = NONE ; + } + dlog ("%s '%s' updated to '%s'\n", + hostname.c_str(), + MTC_JSON_INV_BMUN, + node_ptr->bm_un.c_str()); + rc = PASS ; + } + return (rc); +} + +int nodeLinkClass::set_bm_ip ( string hostname , string bm_ip ) +{ + int rc = FAIL_HOSTNAME_LOOKUP ; + + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->bm_ip = bm_ip ; + + dlog ("%s '%s' updated to '%s'\n", + hostname.c_str(), + MTC_JSON_INV_BMIP, + node_ptr->bm_ip.c_str()); + rc = PASS ; + } + return (rc); +} + +void nodeLinkClass::bmc_access_data_init ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr ) + { + node_ptr->bm_accessible = false; + node_ptr->mc_info_query_active = false ; + node_ptr->mc_info_query_done = false ; + node_ptr->reset_cause_query_active = false ; + node_ptr->reset_cause_query_done = false ; + node_ptr->power_status_query_active = false; + node_ptr->power_status_query_done = false ; + } +} + +/***************************************************************************** + * + * Name : set_bm_prov + * + * Description: Manage the local provisioning state of the + * board management connection. + * + * Assumptions: Does not set HTTP requests to sysinv so it is + * safe to call from thje modify handler + * + * Does not clear alarms. + * + ******************************************************************************/ +int nodeLinkClass::set_bm_prov ( struct nodeLinkClass::node * node_ptr, bool state ) +{ + int rc = FAIL_HOSTNAME_LOOKUP ; + if ( node_ptr != NULL ) + { + ilog ("%s bmc %sprovision request (provisioned:%s)\n", // ERIC blog + node_ptr->hostname.c_str(), + state ? "" : "de", + node_ptr->bm_provisioned ? "Yes" : "No" ); + + /* Clear the alarm if we are starting fresh from an unprovisioned state */ + if (( node_ptr->bm_provisioned == false ) && ( state == true )) + { + /* BMC is managed by IPMI/IPMITOOL */ + ilog ("%s starting BM ping monitor to address '%s'\n", + node_ptr->hostname.c_str(), + node_ptr->bm_ip.c_str()); + + // mtcTimer_reset ( node_ptr->bm_ping_info.timer ); + node_ptr->bm_ping_info.ip = node_ptr->bm_ip ; + node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ; + bmc_access_data_init ( node_ptr ); + node_ptr->bm_ping_info.timer_handler = &mtcTimer_handler ; + + node_ptr->thread_extra_info.bm_pw = + node_ptr->bm_pw = + get_bm_password (node_ptr->uuid.data()); + + node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip ; + node_ptr->thread_extra_info.bm_un = node_ptr->bm_un ; + + send_hwmon_command(node_ptr->hostname, MTC_CMD_ADD_HOST); + send_hwmon_command(node_ptr->hostname, MTC_CMD_START_HOST); + } + + /* handle the case going from provisioned to not provisioned */ + else if (( node_ptr->bm_provisioned == true ) && ( state == false )) + { + /* BMC is managed by IPMI/IPMITOOL */ + ilog ("%s deprovisioning bmc ; accessible:%s\n", + node_ptr->hostname.c_str(), + node_ptr->bm_accessible ? "Yes" : "No" ); + + pingUtil_fini ( node_ptr->bm_ping_info ); + bmc_access_data_init ( node_ptr ); + node_ptr->bm_accessible = false; + + if ( !thread_idle( node_ptr->ipmitool_thread_ctrl ) ) + { + thread_kill ( node_ptr->ipmitool_thread_ctrl , node_ptr->ipmitool_thread_info); + } + node_ptr->mc_info_query_active = false ; + node_ptr->mc_info_query_done = false ; + node_ptr->reset_cause_query_active = false ; + node_ptr->reset_cause_query_done = false ; + node_ptr->power_status_query_active = false; + node_ptr->power_status_query_done = false ; + + /* send a delete to hwmon if the provisioning data is NONE */ + if ( hostUtil_is_valid_bm_type ( node_ptr->bm_type ) == false ) + { + send_hwmon_command(node_ptr->hostname, MTC_CMD_DEL_HOST); + } + } + if (( node_ptr->bm_provisioned == false ) && ( state == true )) + { + /* start the connection timer - if it expires before we + * are 'accessible' then the BM Alarm is raised. + * Timer is further managed in mtcNodeHdlrs.cpp */ + plog ("%s bmc access timer started (%d secs)\n", node_ptr->hostname.c_str(), MTC_MINS_2); + mtcTimer_reset ( node_ptr->bmc_access_timer ); + mtcTimer_start ( node_ptr->bmc_access_timer, mtcTimer_handler, MTC_MINS_2 ); + } + + node_ptr->bm_provisioned = state ; + } + return (rc); +} + +string nodeLinkClass::get_bm_ip ( string hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return (node_ptr->bm_ip); + } + elog ("%s bm ip lookup failed\n", hostname.c_str() ); + return (""); +} + +string nodeLinkClass::get_bm_un ( string hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return (node_ptr->bm_un); + } + elog ("%s bm username lookup failed\n", hostname.c_str() ); + return (""); +} + +string nodeLinkClass::get_bm_type ( string hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return (node_ptr->bm_type); + } + elog ("%s bm type lookup failed\n", hostname.c_str() ); + return (""); +} + +string nodeLinkClass::get_hwmon_info ( string hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + string hwmon_info = "" ; + + hwmon_info.append( "{ \"personality\":\"" ) ; + hwmon_info.append( node_ptr->type ); + hwmon_info.append( "\""); + + hwmon_info.append( ",\"hostname\":\"" ) ; + hwmon_info.append( node_ptr->hostname ); + hwmon_info.append( "\""); + + hwmon_info.append( ",\"bm_ip\":\"" ) ; + hwmon_info.append( node_ptr->bm_ip ); + hwmon_info.append( "\""); + + hwmon_info.append( ",\"bm_type\":\""); + hwmon_info.append( node_ptr->bm_type ); + hwmon_info.append( "\""); + + hwmon_info.append( ",\"bm_username\":\""); + hwmon_info.append( node_ptr->bm_un ); + hwmon_info.append( "\""); + + hwmon_info.append( ",\"uuid\":\"" ) ; + hwmon_info.append( node_ptr->uuid ); + hwmon_info.append( "\" }"); + + return (hwmon_info); + } + elog ("%s hwmon info lookup failed\n", hostname.c_str() ); + return (""); +} + + + +int nodeLinkClass::manage_shadow_change ( string hostname ) +{ + int rc = FAIL ; + if ( ! hostname.empty() ) + { + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + rc = PASS ; + if ( node_ptr->configAction == MTC_CONFIG_ACTION__NONE ) + { + node_ptr->configStage = MTC_CONFIG__START ; + node_ptr->configAction = MTC_CONFIG_ACTION__CHANGE_PASSWD ; + } + else + { + node_ptr->configAction = MTC_CONFIG_ACTION__CHANGE_PASSWD_AGAIN ; + } + } + } + return (rc); +} + +/** Returns the number of compute hosts that are operationally 'enabled' */ +int nodeLinkClass::enabled_compute_nodes ( void ) +{ + int temp_count = 0 ; + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if (( is_compute( ptr )) && + ( ptr->operState == MTC_OPER_STATE__ENABLED )) + { + temp_count++ ; + } + else if (( is_compute_subfunction ( ptr )) && + ( ptr->operState_subf == MTC_OPER_STATE__ENABLED )) + { + temp_count++ ; + } + + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + return (temp_count); +} + +/** Returns the number of storage hosts that are operationally 'enabled' */ +int nodeLinkClass::enabled_storage_nodes ( void ) +{ + int temp_count = 0 ; + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if (( is_storage( ptr ) ) && + ( ptr->operState == MTC_OPER_STATE__ENABLED )) + { + temp_count++ ; + } + + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + return (temp_count); +} + +int nodeLinkClass::enabled_nodes ( void ) +{ + int temp_count = 0 ; + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->operState == MTC_OPER_STATE__ENABLED ) + { + temp_count++ ; + } + + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + /* Remove the active controller from the count */ + if (temp_count) + temp_count-- ; + + return (temp_count); +} + +/** Returns the system's storage back end type ceph or nfs */ +int nodeLinkClass::get_storage_backend ( void ) +{ + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( is_storage(ptr) ) + return ( CGCS_STORAGE_CEPH ) ; + + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + return (CGCS_STORAGE_NFS); +} + +/** Returns true if the storage pool has a monitor running on + * an unlocked-enabled storage host */ +bool nodeLinkClass::is_storage_mon_enabled ( void ) +{ + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if (( is_storage(ptr) ) && + ( ptr->operState == MTC_OPER_STATE__ENABLED ) && + ( !ptr->hostname.compare("storage-0"))) + { + return ( true ) ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + return (false); +} + +/** Returns number of enabled controllers */ +int nodeLinkClass::num_controllers_enabled ( void ) +{ + int cnt = 0 ; + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if (( is_controller(ptr) ) && + ( ptr->operState == MTC_OPER_STATE__ENABLED )) + { + ++cnt ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + return (cnt); +} + +int nodeLinkClass::service_netlink_events ( int nl_socket , int ioctl_socket ) +{ + std::list links_gone_down ; + std::list links_gone_up ; + std::list::iterator iter_curr_ptr ; + if ( get_netlink_events ( nl_socket, links_gone_down, links_gone_up )) + { + const char * mgmnt_iface_ptr = daemon_get_cfg_ptr()->mgmnt_iface ; + const char * infra_iface_ptr = daemon_get_cfg_ptr()->infra_iface ; + bool running = false ; + if ( !links_gone_down.empty() ) + { + //wlog ("one or more links have dropped\n"); + /* Look at the down list */ + for ( iter_curr_ptr = links_gone_down.begin(); + iter_curr_ptr != links_gone_down.end() ; + iter_curr_ptr++ ) + { + bool care = false ; + if ( iter_curr_ptr->size() == 0 ) + continue ; + + if ( !strcmp (mgmnt_iface_ptr, iter_curr_ptr->data())) + { + care = true ; + mgmnt_link_up_and_running = false ; + wlog ("Management link %s is down\n", mgmnt_iface_ptr ); + } + if ( !strcmp (infra_iface_ptr, iter_curr_ptr->data())) + { + care = true ; + infra_link_up_and_running = false ; + wlog ("Infrastructure link %s is down\n", infra_iface_ptr ); + } + + if ( care == true ) + { + if ( get_link_state ( ioctl_socket, iter_curr_ptr->data(), &running ) == PASS ) + { + wlog ("%s is down (oper:%s)\n", iter_curr_ptr->c_str(), running ? "up" : "down" ); + } + else + { + wlog ("%s is down (driver query failed)\n", iter_curr_ptr->c_str() ); + } + } + } + } + if ( !links_gone_up.empty() ) + { + // wlog ("one or more links have recovered\n"); + /* Look at the up list */ + for ( iter_curr_ptr = links_gone_up.begin(); + iter_curr_ptr != links_gone_up.end() ; + iter_curr_ptr++ ) + { + bool care = false ; + if ( iter_curr_ptr->size() == 0 ) + continue ; + if ( !strcmp (mgmnt_iface_ptr, iter_curr_ptr->data())) + { + mgmnt_link_up_and_running = true ; + wlog ("Management link %s is up\n", mgmnt_iface_ptr ); + } + if ( !strcmp (infra_iface_ptr, iter_curr_ptr->data())) + { + infra_link_up_and_running = true ; + wlog ("Infrastructure link %s is up\n", infra_iface_ptr ); + } + if ( care == true ) + { + if ( get_link_state ( ioctl_socket, iter_curr_ptr->data(), &running ) == PASS ) + { + wlog ("%s is up (oper:%s)\n", iter_curr_ptr->c_str(), running ? "up" : "down" ); + } + else + { + wlog ("%s is up (driver query failed)\n", iter_curr_ptr->c_str() ); + } + } + } + } + } + return (PASS); +} + + +/* *************************************************************************** + * + * Name : hbs_minor_clear + * + * Description: Clear the heartbeat minor state from the specified host. + * + * Manage overall mnfa counts and call mnfa_exit when the number crosses + * the recovery threwshold. + * + ******************************************************************************/ +void nodeLinkClass::hbs_minor_clear ( struct nodeLinkClass::node * node_ptr, iface_enum iface ) +{ + if ( mnfa_host_count[iface] == 0 ) + return ; + + /* Nothing to do if this host is not in the hbs_minor state */ + if ( node_ptr->hbs_minor[iface] == true ) + { + /* clear it - possibly temporarily */ + node_ptr->hbs_minor[iface] = false ; + + /* manage counts over heartbeat failure */ + if ( mnfa_host_count[iface] ) + { + /* If we are mnfa_active AND now below the threshold + * then trigger mnfa_exit */ + if (( --mnfa_host_count[iface] < mnfa_calculate_threshold( node_ptr->hostname ) ) && + ( mnfa_active == true )) + { + + wlog ("%s MNFA exit with graceful recovery (%s:%d)\n", + node_ptr->hostname.c_str(), + get_iface_name_str(iface), + mnfa_host_count[iface] ); + + /* re-activate this to true so that it is part + * of the recovery group in mnfa_exit */ + node_ptr->hbs_minor[iface] = true ; + mnfa_exit ( false ); + } + + /* Otherwise this is a single host that has recovered + * possibly as part of a mnfa group or simply a lone wolf */ + else + { + if ( node_ptr->mnfa_graceful_recovery == true ) + { + ilog ("%s MNFA removed from pool\n", node_ptr->hostname.c_str() ); + mnfa_awol_list.remove(node_ptr->hostname); + } + + mnfa_recover_host ( node_ptr ); + + if ( mnfa_active == true ) + { + /* Restart the heartbeat for this recovered host */ + send_hbs_command ( node_ptr->hostname, MTC_RESTART_HBS ); + + /* don't restart graceful recovery for this host if its already in that FSM */ + if ( node_ptr->adminAction != MTC_ADMIN_ACTION__RECOVER ) + { + recoveryStageChange ( node_ptr, MTC_RECOVERY__START ); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__RECOVER ); + } + } + } + } + } + + /* lets clean-up - walk the inventory and make sure the + * avoidance count meets the number of hosts in the minor + * degrade state */ + int temp_count = 0 ; + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->hbs_minor[iface] == true ) + { + if ( ptr->operState != MTC_OPER_STATE__ENABLED ) + { + slog ("%s found hbs_minor set for disabled host\n" , ptr->hostname.c_str() ); + } + temp_count++ ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + + if ( temp_count != mnfa_host_count[iface] ) + { + slog ("%s MNFA host tally (%s:%d incorrect - expected %d) ; correcting\n", + node_ptr->hostname.c_str(), + get_iface_name_str(iface), + mnfa_host_count[iface], temp_count ); + mnfa_host_count[iface] = temp_count ; + mnfa_host_count[iface] = temp_count ; + } + else + { + wlog ("%s MNFA host tally (%s:%d)\n", + node_ptr->hostname.c_str(), + get_iface_name_str(iface), + mnfa_host_count[iface] ); + } +} + +/**************************************************************************** + * + * Name : manage_dor_recovery + * + * Description: Enable DOR recovery mode for this host. + * Generate log + * + * The severity parm is used to enhance the logs to indicate what + * severity level this utility was called from ; + * minor, major, or critical + * + ***************************************************************************/ + +void nodeLinkClass::manage_dor_recovery ( struct nodeLinkClass::node * node_ptr, + EFmAlarmSeverityT severity ) +{ + if (( severity == FM_ALARM_SEVERITY_CLEAR ) && + ( node_ptr->dor_recovery_mode == true )) + { + node_ptr->dor_recovery_mode = false ; + node_ptr->was_dor_recovery_mode = true ; + } + + else if (( severity == FM_ALARM_SEVERITY_CRITICAL ) && + ( node_ptr->dor_recovery_mode == false )) + { + struct timespec ts ; + clock_gettime (CLOCK_MONOTONIC, &ts ); + wlog ("%-12s is waiting ; DOR recovery %2ld:%02ld mins (%4ld secs)\n", + node_ptr->hostname.c_str(), + ts.tv_sec/60, + ts.tv_sec%60, + ts.tv_sec); + + node_ptr->dor_recovery_time = 0 ; + node_ptr->dor_recovery_mode = true ; + node_ptr->hbsClient_ready = false ; + mtcInvApi_update_task ( node_ptr, MTC_TASK_RECOVERY_WAIT ); + + /* don't restart graceful recovery for this host if its already in that FSM */ + if (( node_ptr->adminAction != MTC_ADMIN_ACTION__RECOVER ) && + ( node_ptr->adminAction != MTC_ADMIN_ACTION__LOCK )) + { + recoveryStageChange ( node_ptr, MTC_RECOVERY__START ); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__RECOVER ); + } + } +} + + +/** Manage heartbeat failure events */ +void nodeLinkClass::manage_heartbeat_failure ( string hostname, iface_enum iface, bool clear_event ) +{ + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown host\n", hostname.c_str()); + return ; + } + + /* Handle clear */ + if ( clear_event == true ) + { + hbs_minor_clear ( node_ptr, iface ); + + plog ("%s %s Heartbeat failure clear\n", hostname.c_str(), get_iface_name_str(iface)); + + // if (( mnfa_host_count == 0 ) || ( iface == INFRA_IFACE )) + if ( mnfa_host_count[iface] == 0 ) // || ( iface == INFRA_IFACE )) + { + slog ("%s %s Heartbeat failure clear\n", hostname.c_str(), get_iface_name_str(iface)); + node_ptr->hbs_failure[iface] = false ; + } + } + else if ( this->mtcTimer_dor.tid ) + { + manage_dor_recovery ( node_ptr, FM_ALARM_SEVERITY_CRITICAL ); + } + else + { + mnfa_add_host ( node_ptr , iface ); + + if ( mnfa_active == false ) + { + elog ("%s %s *** Heartbeat Loss ***\n", hostname.c_str(), get_iface_name_str(iface)); + if ( iface == INFRA_IFACE ) + { + node_ptr->heartbeat_failed[INFRA_IFACE] = true ; + } + else if ( iface == MGMNT_IFACE ) + { + node_ptr->heartbeat_failed[MGMNT_IFACE] = true ; + } + if ( mnfa_host_count[iface] < mnfa_calculate_threshold( hostname )) + { + + elog ("%s %s network heartbeat failure\n", hostname.c_str(), get_iface_name_str(iface)); + + nodeLinkClass::set_availStatus ( hostname, MTC_AVAIL_STATUS__FAILED ); + if ( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_CRITICAL ) + { + mtcAlarm_critical ( node_ptr->hostname, MTC_ALARM_ID__ENABLE ); + node_ptr->alarms[MTC_ALARM_ID__ENABLE] = FM_ALARM_SEVERITY_CRITICAL; + } + + if (( node_ptr->adminAction != MTC_ADMIN_ACTION__ENABLE ) && + ( node_ptr->adminAction != MTC_ADMIN_ACTION__UNLOCK )) + { + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__RECOVER ) + { + wlog ("%s restarting graceful recovery\n", hostname.c_str() ); + } + else + { + wlog ("%s starting graceful recovery\n", hostname.c_str() ); + } + recoveryStageChange ( node_ptr, MTC_RECOVERY__START ); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__RECOVER ); + } + else + { + mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLE_FAIL_HB ); + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE ); + } + } + } + } +} + +void nodeLinkClass::manage_heartbeat_clear ( string hostname, iface_enum iface ) +{ + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown host\n", hostname.c_str()); + return ; + } + if ( iface == MAX_IFACES ) + { + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + node_ptr->heartbeat_failed[i] = false ; + } + } + else + { + node_ptr->heartbeat_failed[iface] = false ; + } +} + +/** Manage compute host maintenance based on this heartbeat + * degrade event and others that may be present at this moment */ +void nodeLinkClass::manage_heartbeat_degrade ( string hostname, iface_enum iface, bool clear_event ) +{ + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown host\n", hostname.c_str()); + return ; + } + + if ( clear_event == true ) + { + alog ("%s %s Heartbeat Degrade (clear)\n", hostname.c_str(), get_iface_name_str(iface)); + manage_heartbeat_clear ( hostname, iface ); + + if ( iface == MGMNT_IFACE ) + { + node_ptr->no_work_log_throttle = 0 ; + node_ptr->degrade_mask &= ~DEGRADE_MASK_HEARTBEAT_MGMNT ; + } + + else if ( iface == INFRA_IFACE ) + { + node_ptr->no_work_log_throttle = 0 ; + node_ptr->degrade_mask &= ~DEGRADE_MASK_HEARTBEAT_INFRA ; + } + + hbs_minor_clear ( node_ptr, iface ); + + /* Set the host available if the degrade mask is now + * cleared and we are degraded */ + if ( node_ptr->degrade_mask == 0 ) + { + if ( get_availStatus ( hostname ) == MTC_AVAIL_STATUS__DEGRADED ) + { + set_availStatus ( hostname, MTC_AVAIL_STATUS__AVAILABLE ); + } + } + } + else if ( this->mtcTimer_dor.tid ) + { + manage_dor_recovery ( node_ptr, FM_ALARM_SEVERITY_MAJOR ); + } + else + { + if ( mnfa_active == false ) + { + wlog ("%s %s *** Heartbeat Miss ***\n", hostname.c_str(), get_iface_name_str(iface) ); + } + + mnfa_add_host ( node_ptr, iface ); + + if ( nodeLinkClass::get_operState ( hostname ) == MTC_OPER_STATE__ENABLED ) + { + if ( iface == MGMNT_IFACE ) + { + /* Don't raise the alarm again if this host is already degraded */ + if ( !(node_ptr->degrade_mask & DEGRADE_MASK_HEARTBEAT_MGMNT) ) + { + node_ptr->degrade_mask |= DEGRADE_MASK_HEARTBEAT_MGMNT ; + } + } + if ( iface == INFRA_IFACE ) + { + if ( !(node_ptr->degrade_mask & DEGRADE_MASK_HEARTBEAT_INFRA) ) + { + node_ptr->degrade_mask |= DEGRADE_MASK_HEARTBEAT_INFRA ; + } + } + + /* No point in changing if we are already degraded */ + if ( nodeLinkClass::get_availStatus ( hostname ) == MTC_AVAIL_STATUS__AVAILABLE ) + { + set_availStatus ( hostname, MTC_AVAIL_STATUS__DEGRADED ); + } + } + } +} + +/** Manage heartbeat minor events */ +void nodeLinkClass::manage_heartbeat_minor ( string hostname, iface_enum iface, bool clear_event ) +{ + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown host\n", hostname.c_str()); + return ; + } + + /* is this a clear event ? */ + if ( clear_event == true ) + { + alog ("%s %s Heartbeat Minor (clear)\n", hostname.c_str(), get_iface_name_str(iface)); + hbs_minor_clear ( node_ptr, iface ); + } + /* if not a clear then only set if the host is enabled + * - we don't care about disabled hosts */ + else if ( nodeLinkClass::get_operState ( hostname ) == MTC_OPER_STATE__ENABLED ) + { + if ( this->mtcTimer_dor.tid ) + { + manage_dor_recovery ( node_ptr, FM_ALARM_SEVERITY_MINOR ); + } + + else if ( node_ptr->hbs_minor[iface] != true ) + { + mnfa_add_host ( node_ptr, iface ); + } + } +} + + +/** Interface to declare that a key service on the + * specified host is up, running and ready */ +int nodeLinkClass::declare_service_ready ( string & hostname, + unsigned int service ) +{ + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown Host\n", hostname.c_str()); + return FAIL_UNKNOWN_HOSTNAME ; + } + else if ( service == MTC_SERVICE_PMOND ) + { + node_ptr->pmond_ready = true ; + plog ("%s got pmond ready event\n", hostname.c_str()); + + /* A ready event means that pmond pocess has started. + * Any previous history is gone. Cleanup mtce. + * If there are still process issues on this host then + * they will be reported again.*/ + node_ptr->degrade_mask &= ~DEGRADE_MASK_PMON ; + if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE ) + { + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); + } + } + return (PASS); + } + else if ( service == MTC_SERVICE_HWMOND ) + { + node_ptr->hwmond_ready = true ; + plog ("%s got hwmond ready event\n", hostname.c_str()); + if ( node_ptr->bm_provisioned == true ) + { + send_hwmon_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); + send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + } + return (PASS); + } + else if ( service == MTC_SERVICE_RMOND ) + { + node_ptr->rmond_ready = true ; + plog ("%s got rmond ready event\n", hostname.c_str()); + return (PASS); + } + else if ( service == MTC_SERVICE_HEARTBEAT ) + { + if ( node_ptr->hbsClient_ready == false ) + { + node_ptr->hbsClient_ready = true ; + plog ("%s got hbsClient ready event\n", hostname.c_str()); + } + return (PASS); + } + else + { + return (FAIL_BAD_CASE); + } +} + +/** Clear pmond degrade flag */ +int nodeLinkClass::degrade_pmond_clear ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown Host\n", hostname.c_str()); + return (FAIL_UNKNOWN_HOSTNAME) ; + } + if ( node_ptr->degrade_mask ) + { + node_ptr->degrade_mask &= ~DEGRADE_MASK_PMON ; + + if ( !node_ptr->degrade_mask ) + { + if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); + } + } + } + + /* The only detectable inservice failures are process failures */ + node_ptr->inservice_failed_subf = false ; + node_ptr->inservice_failed = false ; + return (PASS); +} + +/** Resource Monitor 'Clear' Event handler. + * + * The resource specified will be removed from the + * 'degraded_resources_list' for specified host. + * if there are no other degraded resources or other + * degraded services/reasons against that host then + * this handler will clear the degrade state for the + * specified host all together. */ +int nodeLinkClass::degrade_resource_clear ( string & hostname, + string & resource ) +{ + /* lr - Log Prefix Rmon */ + string lr = hostname ; + lr.append (" rmond:"); + + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown Host\n", lr.c_str()); + return FAIL_UNKNOWN_HOSTNAME ; + } + else if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) + { + /* Clear all resource degrade conditions if there is no resource specified */ + /* this is used as a cleanup audit just in case things get stuck */ + if ( resource.empty() ) + { + node_ptr->degrade_mask &= ~DEGRADE_MASK_RESMON ; + node_ptr->degraded_resources_list.clear () ; + } + else if (( node_ptr->degraded_resources_list.empty()) || + ( node_ptr->degrade_mask == DEGRADE_MASK_NONE )) + { + dlog ("%s '%s' Non-Degraded Clear\n", + lr.c_str(), resource.c_str()); + } + else + { + if (is_string_in_string_list (node_ptr->degraded_resources_list, resource)) + { + node_ptr->degraded_resources_list.remove(resource); + ilog ("%s '%s' Degrade Clear\n", + lr.c_str(), resource.c_str()); + } + else + { + wlog ("%s '%s' Unexpected Degrade Clear\n", + lr.c_str(), resource.c_str()); + } + + if ( node_ptr->degraded_resources_list.empty() ) + { + node_ptr->degrade_mask &= ~DEGRADE_MASK_RESMON ; ; + if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE ) + { + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); + } + } + else + { + wlog ("%s Remains Degraded - Reason Mask:0x%08x\n", + hostname.c_str(), node_ptr->degrade_mask ); + } + } + else + { + string degraded_resources = + get_strings_in_string_list ( node_ptr->degraded_resources_list ); + wlog ("%s Degraded Resource List: %s\n", + lr.c_str(), degraded_resources.c_str()); + } + } + + } + return (PASS); +} + +/********************************************************************************* + * + * Name : node_degrade_control + * + * Purpose : Accept and handle degrade raise and clear requests from + * external services. + * + * Description: Maintenance maintains a degrade mask with a bit representing + * various services. The assertion of any one bit causes the host + * to be degraded. All bits need to be cleared in orde to exit + * the degrade state. + * + * Supported 'services' include + * + * "hwmon" - The Hardware Monitor process + * + * + * Future services might be rmon and pmon + * + **********************************************************************************/ +int nodeLinkClass::node_degrade_control ( string & hostname, int state, string service ) +{ + int rc = FAIL_UNKNOWN_HOSTNAME ; + + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr ) + { + unsigned int service_flag = 0 ; + + /* convert service string to degrade mask flag + * - handle empty string and unsupported service */ + if ( service.empty() ) + { + slog ("%s service not specified", hostname.c_str()); + return (FAIL_STRING_EMPTY); + } + else if ( !service.compare("hwmon") ) + { + service_flag = DEGRADE_MASK_HWMON ; + } + else + { + slog ("%s service '%s' not supported\n", + hostname.c_str(), + service.c_str()); + return (FAIL_INVALID_DATA); + } + + switch ( state ) + { + /* Handle clear case */ + case MTC_DEGRADE_CLEAR: + { + if ( node_ptr->degrade_mask & service_flag ) + { + ilog ("%s degrade 'clear' from '%s'\n", hostname.c_str(), service.c_str() ); + } + + /* clear the mask regardless of host state */ + node_ptr->degrade_mask &= ~service_flag ; + + /* only applies if host is unlocked-enabled-degraded and + * there are no other degrade flags in the degrade mask */ + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )) + { + if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); + } + else + { + /* TODO: convert lask to a sring or services and print that string */ + wlog ("%s remains degraded - degrade mask:0x%08x\n", + hostname.c_str(), + node_ptr->degrade_mask ); + } + } + else + { + dlog ("%s unexpected degrade clear for '%s' service\n", + hostname.c_str(), service.c_str() ); + } + rc = PASS ; + break ; + } + + /* Handle assertion case */ + case MTC_DEGRADE_RAISE: + { + if (( node_ptr->degrade_mask & service_flag ) == 0 ) + { + wlog ("%s degrade 'assert' from '%s'\n", hostname.c_str(), service.c_str() ); + node_ptr->degrade_mask |= service_flag ; + } + + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); + } + rc = PASS ; + break ; + } + default: + { + wlog ("%s invalid degrade control request '%d'\n", hostname.c_str(), state); + rc = FAIL_BAD_CASE ; + break ; + } + } /* end switch */ + } + else + { + dlog ("%s Unknown Host\n", hostname.c_str()); + } + return (rc); +} + + +int nodeLinkClass::hwmon_recovery_monitor ( struct nodeLinkClass::node * node_ptr, int hwmon_event ) +{ + int delay = MTC_MINS_15 ; + if ( hwmon_event == MTC_EVENT_HWMON_POWERCYCLE ) + { + node_ptr->hwmon_powercycle.retries = 0 ; + node_ptr->hwmon_powercycle.queries = 0 ; + node_ptr->hwmon_powercycle.state = RECOVERY_STATE__MONITOR ; + + mtcTimer_reset ( node_ptr->hwmon_powercycle.recovery_timer ); + mtcTimer_start ( node_ptr->hwmon_powercycle.recovery_timer, mtcTimer_handler, delay ); + + ilog ("%s starting hwmon 'powercycle' recovery monitor", node_ptr->hostname.c_str()); + ilog ("%s ... uninterrupted completion time: %s", node_ptr->hostname.c_str(), future_time(delay)); + } + else if ( hwmon_event == MTC_EVENT_HWMON_RESET ) + { + node_ptr->hwmon_reset.retries = 0 ; + node_ptr->hwmon_reset.queries = 0 ; + node_ptr->hwmon_reset.state = RECOVERY_STATE__MONITOR ; + + mtcTimer_reset ( node_ptr->hwmon_reset.recovery_timer ); + mtcTimer_start ( node_ptr->hwmon_reset.recovery_timer, mtcTimer_handler, delay ); + + ilog ("%s starting hwmon 'reset' recovery monitor", node_ptr->hostname.c_str()); + ilog ("%s ... uninterrupted completion time: %s", node_ptr->hostname.c_str(), future_time(delay)); + } + return (PASS); +} + +/* Hardware Monitor 'Action' Event method + * + * The hardware monitor daemon is calling out a sensor that + * is operating out of spec. The command is the accompanying + * action that hwmond requested as a recovery action to this failure. + * The sensor is the sensor name that triggersed the event. */ +int nodeLinkClass::invoke_hwmon_action ( string & hostname, int action, string & sensor ) +{ + int rc = PASS ; + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + + dlog ("%s request to '%s' due to critical sensor '%s' reading\n", + hostname.c_str(), + get_event_str(action).c_str(), + sensor.c_str()); + + if ( node_ptr ) + { + if ( node_ptr->bm_accessible == false ) + { + wlog ("%s rejecting %s hwmon action request for '%s' sensor ; BMC not accessible\n", + hostname.c_str(), + get_event_str(action).c_str(), + sensor.c_str()); + + return (PASS); + } + if ( action == MTC_EVENT_HWMON_RESET ) + { + if ( is_active_controller (hostname) == true ) + { + wlog ("%s refusing to 'reset' self due to critical '%s' sensor event\n", + hostname.c_str(), sensor.c_str()); + recovery_ctrl_init ( node_ptr->hwmon_reset ); + return(rc); + } + + /* Avoid interrupting higher priority powercycle action */ + else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__POWERCYCLE ) || + ( node_ptr->hwmon_powercycle.state != RECOVERY_STATE__INIT )) + { + wlog ("%s bypassing 'reset' request while 'powercycle' already in progress (%s)\n", + hostname.c_str(), sensor.c_str()); + } + else if ( node_ptr->adminAction != MTC_ADMIN_ACTION__NONE ) + { + wlog ("%s bypassing 'reset' request while '%s' action in progress (%s)\n", + hostname.c_str(), get_adminAction_str(node_ptr->adminAction), sensor.c_str()); + } + else if ( node_ptr->hwmon_reset.state ) + { + wlog ("%s rejecting 'reset' request while already in progress (%s)\n", + hostname.c_str(), sensor.c_str()); + } + else + { + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) + { + mtcTimer_reset ( node_ptr->hwmon_reset.recovery_timer ); + mtcTimer_start ( node_ptr->hwmon_reset.recovery_timer, mtcTimer_handler, MTC_MINS_15 ); + + force_full_enable ( node_ptr ); + } + else + { + if ( node_ptr->adminAction != MTC_ADMIN_ACTION__RESET ) + { + elog ("%s starting 'reset' FSM\n", hostname.c_str()); + + mtcTimer_reset ( node_ptr->hwmon_reset.recovery_timer ); + mtcTimer_start ( node_ptr->hwmon_reset.recovery_timer, mtcTimer_handler, MTC_MINS_15 ); + + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__RESET ); + } + else + { + wlog ("%s mtce 'reset' action already in progress\n", hostname.c_str()); + } + } + node_ptr->hwmon_reset.state = RECOVERY_STATE__HOLDOFF ; + } + } + else if ( action == MTC_EVENT_HWMON_POWERCYCLE ) + { + if ( node_ptr->hwmon_powercycle.attempts > MAX_POWERCYCLE_ATTEMPT_RETRIES ) + { + wlog ("%s ignoring 'powercycle' request ; too many failed attempts (%d)\n", + node_ptr->hostname.c_str(), node_ptr->hwmon_powercycle.attempts ); + } + else if ( is_active_controller (hostname) == true ) + { + wlog ("%s refusing to 'powercycle' self due to critical '%s' sensor event\n", + hostname.c_str(), sensor.c_str()); + recovery_ctrl_init ( node_ptr->hwmon_powercycle ) ; + } + else + { + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__POWERCYCLE ) + { + wlog ("%s bypassing 'powercycle' request while already in progress (%s)\n", + hostname.c_str(), sensor.c_str()); + } + else if ( node_ptr->adminAction != MTC_ADMIN_ACTION__NONE ) + { + wlog ("%s bypassing 'powercycle' request while '%s' action in progress (%s)\n", + hostname.c_str(), + get_adminAction_str(node_ptr->adminAction), + sensor.c_str()); + } + else if ( node_ptr->hwmon_powercycle.state == RECOVERY_STATE__COOLOFF ) + { + wlog ("%s avoiding 'powercycle' request while in powercycle recovery cooloff (%s)\n", + hostname.c_str(), sensor.c_str()); + } + else if ( node_ptr->hwmon_powercycle.state == RECOVERY_STATE__HOLDOFF ) + { + wlog ("%s avoiding 'powercycle' request while in powercycle recovery holdoff (%s)\n", + hostname.c_str(), sensor.c_str()); + } + else if ( node_ptr->hwmon_powercycle.state == RECOVERY_STATE__ACTION ) + { + wlog ("%s avoiding 'powercycle' request while already handling powercycle (%s)\n", + hostname.c_str(), sensor.c_str()); + } + else if ( node_ptr->hwmon_powercycle.state == RECOVERY_STATE__BLOCKED ) + { + wlog ("%s avoiding 'powercycle' request ; host is powered off due to protect hardware from damage due to critical '%s' sensor\n", + hostname.c_str(), sensor.c_str()); + } + else + { + if ( node_ptr->hwmon_powercycle.state == RECOVERY_STATE__MONITOR ) + { + wlog ("%s 'powercycle' request while in monitor phase (%s)\n", + hostname.c_str(), sensor.c_str()); + } + + /* Cancel the recovery timer only to have it started once the + * next power cycle phase is complete */ + mtcTimer_reset ( node_ptr->hwmon_powercycle.recovery_timer ); + + wlog ("%s invoking 'powercycle' due to critical '%s' sensor assertion\n", hostname.c_str(), sensor.c_str()); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__START ); + subStageChange ( node_ptr, MTC_SUBSTAGE__START ); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__POWERCYCLE ); + } + } + } + else + { + slog ("%s '%s' action not supported as request from hwmond\n", + hostname.c_str(), + get_event_str(action).c_str()); + rc = FAIL_BAD_PARM ; + } + } + else + { + slog ("%s cannot '%s' due to unknown host\n", hostname.c_str(), get_event_str(action).c_str()); + rc = FAIL_UNKNOWN_HOSTNAME ; + } + return (rc); +} + +/* Generate a log for the reported failed process if that host is + * unlocked */ +int nodeLinkClass::log_process_failure ( string & hostname, string & process ) +{ + /* lp - Log Prefix */ + string lp = hostname ; + lp.append (" pmon:"); + + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown Host ; '%s' failed (minor)\n", + lp.c_str(), process.c_str()); + return FAIL_UNKNOWN_HOSTNAME ; + } + else if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) + { + if ( process.compare("ntpd") ) + { + wlog ("%s '%s' process failed and is being auto recovered\n", + lp.c_str(), + process.c_str()); + } + else + { + wlog ("%s '%s' process has failed ; manual recovery action required\n", + lp.c_str(), + process.c_str()); + } + } + return (PASS); +} + +/* if unlocked-enabled generate an alarm for the reported failed process */ +int nodeLinkClass::alarm_process_failure ( string & hostname, string & process ) +{ + /* lp - Log Prefix */ + string lp = hostname ; + lp.append (" pmon:"); + + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown Host ; '%s' failed (minor)\n", + lp.c_str(), process.c_str()); + return FAIL_UNKNOWN_HOSTNAME ; + } + else if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) + { + /* TODO: Generate Alarm here */ + + wlog ("%s '%s' failed (minor)\n", lp.c_str(), process.c_str()); + } + return (PASS); +} + +/* Generate a log for the reported failed resource if that host is + * unlocked */ +int nodeLinkClass::log_resource_failure ( string & hostname, string & resource ) +{ + /* lr - Log Prefix Rmond */ + string lr = hostname ; + lr.append (" rmond:"); + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown Host ; '%s' failed (minor)\n", + lr.c_str(), resource.c_str()); + return FAIL_UNKNOWN_HOSTNAME ; + } + else if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) + { + ilog ("%s '%s' failed (minor)\n", + lr.c_str(), resource.c_str()); + } + return (PASS); +} + +/** Process Monitor Degrade Event handler. + * + * The host will enter degrade state due to the specified process + * not running properly. The process name is recorded in the + * 'degraded_processes_list' for specified host. + * Clearing degrade against this process requires that host to + * send a clear event against that process or for that host to + * fully re-enable */ +int nodeLinkClass::degrade_process_raise ( string & hostname, + string & process ) +{ + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown Host\n", hostname.c_str()); + return FAIL_UNKNOWN_HOSTNAME ; + } + else if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) + { + if ( (node_ptr->degrade_mask & DEGRADE_MASK_PMON) == 0 ) + { + node_ptr->degrade_mask |= DEGRADE_MASK_PMON ; + wlog ("%s is degraded due to '%s' process failure\n", hostname.c_str(), process.c_str()); + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); + } + } + } + return (PASS); +} + +/* + * Name : update_dport_states + * + * Purpose: Update data port states based on the event severity + * + * CLEAR = enabled-available + * MAJOR = enabled-degraded + * CRITICAL = disabled-failed + * + */ +int update_dport_states_throttle = 0 ; +int nodeLinkClass::update_dport_states ( struct nodeLinkClass::node * node_ptr, int event ) +{ + int rc = PASS ; + + /* if the host is locked then report the data ports as offline */ + if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) + { + event = MTC_EVENT_AVS_OFFLINE ; + } + + switch (event) + { + case MTC_EVENT_AVS_OFFLINE: + { + if ( node_ptr->operState_dport != MTC_OPER_STATE__DISABLED ) + { + ilog ("%s data port 'operState' change from '%s' -> 'disabled'", + node_ptr->hostname.c_str(), + operState_enum_to_str(node_ptr->operState_dport).c_str()); + + node_ptr->operState_dport = MTC_OPER_STATE__DISABLED ; + } + + if ( node_ptr->availStatus_dport != MTC_AVAIL_STATUS__OFFLINE ) + { + ilog ("%s data port 'availStat' change from '%s' -> 'offline'", + node_ptr->hostname.c_str(), + availStatus_enum_to_str(node_ptr->availStatus_dport).c_str()); + + node_ptr->availStatus_dport = MTC_AVAIL_STATUS__OFFLINE ; + } + break ; + } + case MTC_EVENT_AVS_CLEAR: + { + bool state_change = false ; + if ( node_ptr->operState_dport != MTC_OPER_STATE__ENABLED ) + { + ilog ("%s data port 'operState' change from '%s' -> 'enabled'", + node_ptr->hostname.c_str(), + operState_enum_to_str(node_ptr->operState_dport).c_str()); + + node_ptr->operState_dport = MTC_OPER_STATE__ENABLED ; + state_change = true ; + } + + if ( node_ptr->availStatus_dport != MTC_AVAIL_STATUS__AVAILABLE ) + { + ilog ("%s data port 'availStat' change from '%s' -> 'available'", + node_ptr->hostname.c_str(), + availStatus_enum_to_str(node_ptr->availStatus_dport).c_str()); + + node_ptr->availStatus_dport = MTC_AVAIL_STATUS__AVAILABLE ; + state_change = true ; + } + /** If there has been s state change as a result of a + * clear then send that to the VIM immediately + **/ + if ( state_change == true ) + { + /* Inform the VIM of the data port state change */ + mtcVimApi_state_change ( node_ptr, VIM_DPORT_CLEARED, 3 ); + } + break ; + } + case MTC_EVENT_AVS_MAJOR: + { + if ( node_ptr->operState_dport != MTC_OPER_STATE__ENABLED ) + { + ilog ("%s data port 'operState' change from '%s' -> 'enabled'", + node_ptr->hostname.c_str(), + operState_enum_to_str(node_ptr->operState_dport).c_str()); + + node_ptr->operState_dport = MTC_OPER_STATE__ENABLED ; + } + + if ( node_ptr->availStatus_dport != MTC_AVAIL_STATUS__DEGRADED ) + { + wlog ("%s data port 'availStat' change from '%s' -> 'degraded'", + node_ptr->hostname.c_str(), + availStatus_enum_to_str(node_ptr->availStatus_dport).c_str()); + + node_ptr->availStatus_dport = MTC_AVAIL_STATUS__DEGRADED ; + } + break ; + } + case MTC_EVENT_AVS_CRITICAL: + { + if ( node_ptr->operState_dport != MTC_OPER_STATE__DISABLED ) + { + elog ("%s data port 'operState' change from '%s' -> 'disabled'", + node_ptr->hostname.c_str(), + operState_enum_to_str(node_ptr->operState_dport).c_str()); + + node_ptr->operState_dport = MTC_OPER_STATE__DISABLED ; + } + + if ( node_ptr->availStatus_dport != MTC_AVAIL_STATUS__FAILED ) + { + elog ("%s data port 'availStat' change from '%s' -> 'failed'", + node_ptr->hostname.c_str(), + availStatus_enum_to_str(node_ptr->availStatus_dport).c_str()); + + node_ptr->availStatus_dport = MTC_AVAIL_STATUS__FAILED ; + } + break ; + } + default: + { + wlog_throttled (update_dport_states_throttle, 10, "Invalid port state (%x)\n", event ); + rc = FAIL_BAD_CASE ; + } + } + return (rc); +} + +/** Resource Monitor 'Raise' Event handler. + * + * The host will enter degrade state due to the specified resource + * threshold being surpased. The resource name is recorded in the + * 'degraded_resources_list' for specified host. + * Clearing degrade against this resource requires that host to + * send a clear event against that resource or for that host to + * fully re-enable */ +int nodeLinkClass::degrade_resource_raise ( string & hostname, + string & resource ) +{ + /* lr - Log Prefix Rmond */ + string lr = hostname ; + lr.append (" rmond:"); + + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown Host\n", lr.c_str()); + return FAIL_UNKNOWN_HOSTNAME ; + } + else if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) + { + if ( is_string_in_string_list ( node_ptr->degraded_resources_list, resource ) == false ) + { + string degraded_resources = ""; + + ilog ("%s '%s' Degraded\n", lr.c_str(), resource.c_str()); + node_ptr->degraded_resources_list.push_back (resource); + node_ptr->degrade_mask |= DEGRADE_MASK_RESMON ; + + /* Cleanup the list */ + node_ptr->degraded_resources_list.sort (); + node_ptr->degraded_resources_list.unique (); + + degraded_resources = + get_strings_in_string_list ( node_ptr->degraded_resources_list ); + wlog ("%s Failing Resources: %s\n", + lr.c_str(), degraded_resources.c_str()); + } + else + { + dlog ("%s '%s' Degraded (again)\n", lr.c_str(), resource.c_str()); + } + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); + } + + } + return (PASS); +} + +/** Process Monitor 'Critical Process Failed' Event handler. + * + * This utility handles critical process failure event notifications. + * Typically this interface will force a host re-enable through reset. + * + * For CPE Simplex this failure sets the auto recovery bool + * so that the main enable FSM can handle it through a thresholded + * self reboot. + * + * That as well as all other failure handling cases are deferred to + * the enable handler's from failure case. + * + **/ +int nodeLinkClass::critical_process_failed( string & hostname, + string & process, + unsigned int nodetype ) +{ + UNUSED(nodetype); + + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s pmon: Unknown host\n", hostname.c_str()); + return FAIL_UNKNOWN_HOSTNAME ; + } + + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) + { + elog ("%s has critical '%s' process failure\n", hostname.c_str(), process.c_str()); + + node_ptr->degrade_mask |= DEGRADE_MASK_PMON ; + + /* Special critical process failure handling for AIO system */ + if ( THIS_HOST && ( is_inactive_controller_main_insv() == false )) + { + if ( this->autorecovery_disabled == true ) + { + dlog ("%s bypassing persistent critical process failure\n", + node_ptr->hostname.c_str()); + return (PASS); + } + + dlog ("%s critical process failure (aio)\n", + node_ptr->hostname.c_str()); /* dlog */ + } + + /* Start fresh the next time we enter graceful recovery handler */ + node_ptr->graceful_recovery_counter = 0 ; + + /* Set node as unlocked-disabled-failed */ + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__DISABLED, + MTC_AVAIL_STATUS__FAILED ); + + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE ); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + + dlog ("%s adminState:%s EnableStage:%s\n", + node_ptr->hostname.c_str(), + adminAction_enum_to_str(node_ptr->adminAction).c_str(), + get_enableStages_str(node_ptr->handlerStage.enable).c_str()); + } + return (PASS); +} + +/** Resource Monitor 'Failed' Event handler. + * + * The host will go out of service, be reset and + * automatically re-enabled. */ +int nodeLinkClass::critical_resource_failed( string & hostname, + string & resource ) +{ + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s rmond: Unknown host\n", hostname.c_str()); + return FAIL_UNKNOWN_HOSTNAME ; + } + + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) + { + /* Start fresh the next time we enter graceful recovery handler */ + node_ptr->graceful_recovery_counter = 0 ; + + elog ("%s rmond: Critical Resource '%s' Failure\n", hostname.c_str(), resource.c_str()); + + /* Set node as unlocked-enabled */ + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__DISABLED, + MTC_AVAIL_STATUS__FAILED ); + } + return (PASS); +} + +bool nodeLinkClass::is_active_controller ( string hostname ) +{ + if ( nodeLinkClass::my_hostname.compare(hostname) ) + { + return (false) ; + } + return (true); +} + +string nodeLinkClass::get_inactive_controller_hostname ( void ) +{ + return (inactive_controller_hostname); +} + +void nodeLinkClass::set_inactive_controller_hostname ( string hostname ) +{ + inactive_controller_hostname = hostname ; +} + +string nodeLinkClass::get_active_controller_hostname ( void ) +{ + return (active_controller_hostname); +} + +void nodeLinkClass::set_active_controller_hostname ( string hostname ) +{ + active_controller_hostname = hostname ; +} + +bool nodeLinkClass::inactive_controller_is_patched ( void ) +{ + nodeLinkClass::node * node_ptr = getNode ( inactive_controller_hostname ) ; + if ( node_ptr != NULL ) + { + return ( node_ptr->patched ); + } + return (false) ; +} + +bool nodeLinkClass::inactive_controller_is_patching ( void ) +{ + nodeLinkClass::node * node_ptr = getNode ( inactive_controller_hostname ) ; + if ( node_ptr != NULL ) + { + return ( node_ptr->patching ); + } + return (false) ; +} + +bool nodeLinkClass::is_inactive_controller_main_insv ( void ) +{ + nodeLinkClass::node * node_ptr = getNode ( inactive_controller_hostname ) ; + if ( node_ptr != NULL ) + { + if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) + { + return (true) ; + } + } + return (false) ; +} + +bool nodeLinkClass::is_inactive_controller_subf_insv ( void ) +{ + nodeLinkClass::node * node_ptr = getNode ( inactive_controller_hostname ) ; + if ( node_ptr != NULL ) + { + if ( node_ptr->operState_subf == MTC_OPER_STATE__ENABLED ) + { + return (true) ; + } + } + return (false) ; +} + +int nodeLinkClass::set_subf_info ( string hostname, + string functions, + string operState_subf, + string availState_subf ) +{ + int rc = FAIL_HOSTNAME_LOOKUP ; + if ( functions.empty() ) + { + elog ("%s called with empty 'functions' string\n", hostname.c_str()); + return (FAIL_STRING_EMPTY); + } + + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr != NULL ) + { + node_ptr->functions = functions ; + node_ptr->operState_subf = operState_str_to_enum(operState_subf.data()); + node_ptr->availStatus_subf = availStatus_str_to_enum(availState_subf.data()); + rc = update_host_functions ( hostname, functions ); + } + return (rc); +} + + + +/********************************************************************************** + * + * Name : update_host_functions + * + * Purpose: Loads a nodeLinkClass with function information based on a comma + * delimited function string like. + * + * controller + * compute + * storage + * controller,compute + * controller,storage + * + **********************************************************************************/ +int nodeLinkClass::update_host_functions ( string hostname , string functions ) +{ + int rc = FAIL ; + + if ( functions.empty() ) + { + elog ("%s called with empty 'functions' string\n", hostname.c_str()); + return (rc); + } + + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr != NULL ) + { + node_ptr->functions = functions ; + if ( set_host_functions ( functions, &node_ptr->nodetype, &node_ptr->function, &node_ptr->subfunction ) != PASS ) + { + elog ("%s failed to extract nodetype\n", hostname.c_str()); + rc = FAIL_NODETYPE; + } + else + { + if ( node_ptr->function == CONTROLLER_TYPE ) + node_ptr->function_str = "controller" ; + else if ( node_ptr->function == COMPUTE_TYPE ) + node_ptr->function_str = "compute" ; + else if ( node_ptr->function == STORAGE_TYPE ) + node_ptr->function_str = "storage" ; + else + node_ptr->function_str = "" ; + + if ( node_ptr->subfunction == COMPUTE_TYPE ) + { + node_ptr->subfunction_str = "compute" ; + } + else if ( node_ptr->subfunction == STORAGE_TYPE ) + { + node_ptr->subfunction_str = "storage" ; + } + else + node_ptr->subfunction_str = "" ; + } + rc = PASS ; + } + return (rc); +} + + + + + +/** Fetch the node type (compute or controller) by hostname */ +int nodeLinkClass::get_nodetype ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr != NULL ) + { + return ( node_ptr->nodetype ); + } + return (false); +} + +/** Check if a node is a controller */ +bool nodeLinkClass::is_controller ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr != NULL ) + { + if ( (node_ptr->function & CONTROLLER_TYPE ) == CONTROLLER_TYPE ) + { + return (true); + } + } + return (false); +} + +/** Check if a node is a compute */ +bool nodeLinkClass::is_compute_subfunction ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr != NULL ) + { + if ( (node_ptr->subfunction & COMPUTE_TYPE ) == COMPUTE_TYPE ) + { + return (true); + } + } + return (false); +} + +/** Check if a node is a compute */ +bool nodeLinkClass::is_compute ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr != NULL ) + { + if ( (node_ptr->function & COMPUTE_TYPE ) == COMPUTE_TYPE ) + { + return (true); + } + } + return (false); +} + +/** Check if a node is a storage */ +bool nodeLinkClass::is_storage ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr != NULL ) + { + if ( (node_ptr->function & STORAGE_TYPE ) == STORAGE_TYPE ) + { + return (true); + } + } + return (false); +} + +string nodeLinkClass::get_node_function_str ( string hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ); + if ( node_ptr != NULL ) + { + return node_ptr->function_str ; + } + return "unknown" ; +} + +string nodeLinkClass::get_node_subfunction_str ( string hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ); + if ( node_ptr != NULL ) + { + return node_ptr->subfunction_str ; + } + return "unknown" ; +} + +/** Check if a node is a controller */ +bool nodeLinkClass::is_controller ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ); + return is_controller(node_ptr); +} + +/** Check if a node is a compute */ +bool nodeLinkClass::is_compute ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ); + return is_compute(node_ptr); +} + +/** Check if a node is a compute */ +bool nodeLinkClass::is_compute_subfunction ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ); + return is_compute_subfunction(node_ptr); +} + +/** Check if a node is a storage */ +bool nodeLinkClass::is_storage ( string & hostname ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ); + return is_storage(node_ptr); +} + +/** Maintenance FSM Test Case Setup procedure */ +int nodeLinkClass::set_enableStage ( string & hostname, + mtc_enableStages_enum stage ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr != NULL ) + { + node_ptr->handlerStage.enable = stage ; + return (PASS); + } + return (FAIL); +} + +/* Set the reboot stage */ +int nodeLinkClass::set_rebootStage ( string & hostname, mtc_resetProgStages_enum stage ) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr != NULL ) + { + node_ptr->resetProgStage = stage ; + return (PASS); + } + return (FAIL); +} + +/** Maintenance FSM Test Case Setup procedure */ +mtc_enableStages_enum nodeLinkClass::get_enableStage ( string & hostname) +{ + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( node_ptr != NULL ) + { + return ( node_ptr->handlerStage.enable ) ; + } + return (MTC_ENABLE__STAGES); +} + +int nodeLinkClass::allStateChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeAdminState_enum adminState, + mtc_nodeOperState_enum operState, + mtc_nodeAvailStatus_enum availStatus ) +{ + int rc = FAIL ; + + if (( adminState < MTC_ADMIN_STATES ) && + ( operState < MTC_OPER_STATES ) && + ( availStatus < MTC_AVAIL_STATUS )) + { + bool change = false ; + if (( node_ptr->adminState != adminState ) || + ( node_ptr->operState != operState ) || + ( node_ptr->availStatus != availStatus )) + { + change = true ; + } + + string admin = mtc_nodeAdminState_str [adminState ] ; + string oper = mtc_nodeOperState_str [operState ] ; + string avail = mtc_nodeAvailStatus_str[availStatus] ; + + rc = mtcInvApi_force_states ( node_ptr, admin, oper, avail ); + + admin_state_change ( node_ptr->hostname, admin ); + + if ((( operState == MTC_OPER_STATE__DISABLED ) && ( node_ptr->operState != MTC_OPER_STATE__DISABLED )) && + (( availStatus == MTC_AVAIL_STATUS__FAILED ) && ( node_ptr->availStatus != MTC_AVAIL_STATUS__FAILED ))) + { + failed_state_change ( node_ptr ); + } + else + { + oper_state_change ( node_ptr->hostname, oper ); + avail_status_change ( node_ptr->hostname, avail ); + } + + if ( change == true ) + { + /* after */ + ilog ("%s %s-%s-%s (seq:%d)\n", + node_ptr->hostname.c_str(), + mtc_nodeAdminState_str [node_ptr->adminState ], + mtc_nodeOperState_str [node_ptr->operState ], + mtc_nodeAvailStatus_str[node_ptr->availStatus], + node_ptr->oper_sequence-1); + } + } + else + { + slog ("Invalid State (%d:%d:%d)\n", adminState, operState, availStatus ); + } + return (rc); +} + +int nodeLinkClass::subfStateChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeOperState_enum operState_subf, + mtc_nodeAvailStatus_enum availStatus_subf ) +{ + int rc = FAIL ; + + if (( operState_subf < MTC_OPER_STATES ) && + ( availStatus_subf < MTC_AVAIL_STATUS )) + { + bool change = false ; + if (( node_ptr->operState_subf != operState_subf ) || + ( node_ptr->availStatus_subf != availStatus_subf )) + { + change = true ; + } + + string oper = mtc_nodeOperState_str [operState_subf ] ; + string avail = mtc_nodeAvailStatus_str[availStatus_subf] ; + + rc = mtcInvApi_subf_states ( node_ptr, oper, avail ); + + node_ptr->operState_subf = operState_subf ; + node_ptr->availStatus_subf = availStatus_subf; + + if ( change == true ) + { + /* after */ + ilog ("%s-%s %s-%s-%s (seq:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->subfunction_str.c_str(), + mtc_nodeAdminState_str [node_ptr->adminState ], + mtc_nodeOperState_str [node_ptr->operState_subf ], + mtc_nodeAvailStatus_str[node_ptr->availStatus_subf], + node_ptr->oper_sequence-1); + } + } + else + { + slog ("Invalid State (%d:%d:%d)\n", node_ptr->adminState, availStatus_subf, availStatus_subf ); + } + return (rc); +} + + + + + +/** + * Set the required action and then let the FSP and handlers deal with it + * If we are in an action already then just add the action to the + * action todo list. When we chnage the action to none then query the + * todo list and pop it off and apply it + **/ +int nodeLinkClass::adminActionChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeAdminAction_enum newActionState ) +{ + int rc = FAIL ; + + + if (( newActionState < MTC_ADMIN_ACTIONS ) && + ( node_ptr->adminAction < MTC_ADMIN_ACTIONS )) + { + rc = PASS ; + + if ( node_ptr->adminAction == newActionState ) + { + /* no action change */ + return (rc); + } + + /** + * Any of these actions need to complete before any + * other action can take effect. + * If its not one of these action then just proceed with it + **/ + if (( node_ptr->adminAction != MTC_ADMIN_ACTION__ADD ) && + ( node_ptr->adminAction != MTC_ADMIN_ACTION__FORCE_LOCK )) + { + clog ("%s Administrative Action '%s' -> '%s'\n", + node_ptr->hostname.c_str(), + mtc_nodeAdminAction_str [node_ptr->adminAction], + mtc_nodeAdminAction_str [newActionState]); + } + /* handle queue'd requests if here are any and + * we are done with the curent action */ + else if (( newActionState == MTC_ADMIN_ACTION__NONE ) && + ( !node_ptr->adminAction_todo_list.empty())) + { + newActionState = *(node_ptr->adminAction_todo_list.begin()); + node_ptr->adminAction_todo_list.pop_front(); + + clog ("%s Administrative Action '%s' -> '%s' from queue\n", + node_ptr->hostname.c_str(), + mtc_nodeAdminAction_str [node_ptr->adminAction], + mtc_nodeAdminAction_str [newActionState]); + } + /* queue the request if we are already acting on a current action + * ... handle unsupported action queueing conditions */ + else if (( node_ptr->adminAction != MTC_ADMIN_ACTION__NONE ) && + ( newActionState != MTC_ADMIN_ACTION__NONE )) + { + /* refuse to add duplicate action */ + if ( newActionState == node_ptr->adminAction ) + { + wlog ("%s refusing to queue duplicate of current action (%s)\n", + node_ptr->hostname.c_str(), + mtc_nodeAdminAction_str [node_ptr->adminAction] ); + + return (FAIL); + } + else if ( node_ptr->adminAction_todo_list.size() >= MTC_MAX_QUEUED_ACTIONS ) + { + wlog ("%s rejecting action '%s' request ; max queued actions reached (%ld of %d)\n", + node_ptr->hostname.c_str(), + mtc_nodeAdminAction_str [newActionState], + node_ptr->adminAction_todo_list.size(), + MTC_MAX_QUEUED_ACTIONS ); + return (FAIL); + } + + /* refuse to queue action that already exists in the queue */ + else + { + list::iterator adminAction_todo_list_ptr ; + for ( adminAction_todo_list_ptr = node_ptr->adminAction_todo_list.begin(); + adminAction_todo_list_ptr != node_ptr->adminAction_todo_list.end(); + adminAction_todo_list_ptr++ ) + { + if ( *adminAction_todo_list_ptr == newActionState ) + { + wlog ("%s refusing to queue duplicate already queued action (%s)\n", + node_ptr->hostname.c_str(), + mtc_nodeAdminAction_str [*adminAction_todo_list_ptr]); + + return (FAIL); + } + } + } + /* Add the action to the action todo list */ + node_ptr->adminAction_todo_list.push_back( newActionState ); + + ilog ("%s Administrative Action '%s' queued ; already handling '%s' action\n", + node_ptr->hostname.c_str(), + mtc_nodeAdminAction_str [newActionState], + mtc_nodeAdminAction_str [node_ptr->adminAction]); + return (PASS); + } + /* otherwise just take the action change */ + else + { + clog ("%s Administrative Action '%s' -> '%s'\n", + node_ptr->hostname.c_str(), + mtc_nodeAdminAction_str [node_ptr->adminAction], + mtc_nodeAdminAction_str [newActionState]); + } + + mtc_nodeAdminAction_enum oldActionState = node_ptr->adminAction ; + log_adminAction ( node_ptr->hostname, oldActionState, newActionState ); + + node_ptr->adminAction = newActionState ; + node_ptr->action = mtc_nodeAdminAction_str [node_ptr->adminAction] ; + + /* If we are starting a new ( not 'none' ) action ... + * be sure we start at the beginning */ + if ( newActionState != MTC_ADMIN_ACTION__NONE ) + { + if (( oldActionState == MTC_ADMIN_ACTION__POWERCYCLE ) && + (( newActionState != MTC_ADMIN_ACTION__POWERCYCLE ) && + ( newActionState != MTC_ADMIN_ACTION__POWEROFF ))) + { + blog ("%s (mon:%d:prov:%d)\n", node_ptr->hostname.c_str(), node_ptr->hwmond_monitor, node_ptr->bm_provisioned ); + + if (( node_ptr->hwmond_monitor == false ) && ( node_ptr->bm_provisioned == true )) + { + send_hwmon_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); + send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + } + } + /* Lets ensure that the handlers start in the right stage + * The enable_handler -> MTC_ENABLE__START + * The disable_handler -> MTC_DISABLE__START + * The reset_handler -> MTC_RESET__START + * The reboot_handler -> MTC_RESET__START + * + * This is a little detailed but exists for maintainability + * All START stages are 0. + */ + switch ( newActionState ) + { + case MTC_ADMIN_ACTION__UNLOCK: + { + if ( oldActionState != MTC_ADMIN_ACTION__UNLOCK ) + { + node_ptr->node_unlocked_counter++ ; + } + + if ( is_controller ( node_ptr ) ) + autorecovery_clear (node_ptr->hostname); + + node_ptr->handlerStage.enable = MTC_ENABLE__START ; + break ; + } + case MTC_ADMIN_ACTION__LOCK: + case MTC_ADMIN_ACTION__FORCE_LOCK: + { + node_ptr->handlerStage.disable = MTC_DISABLE__START ; + break ; + } + case MTC_ADMIN_ACTION__RESET: + { + node_ptr->resetStage = MTC_RESET__START ; + break ; + } + case MTC_ADMIN_ACTION__REBOOT: + { + break ; + } + case MTC_ADMIN_ACTION__REINSTALL: + { + node_ptr->reinstallStage = MTC_REINSTALL__START ; + break ; + } + case MTC_ADMIN_ACTION__POWERON: + { + node_ptr->powerStage = MTC_POWERON__START ; + break ; + } + case MTC_ADMIN_ACTION__RECOVER: + { + if ( node_ptr->mtcTimer.tid ) + { + mtcTimer_stop ( node_ptr->mtcTimer ) ; + } + if ( node_ptr->mtcSwact_timer.tid ) + { + mtcTimer_stop ( node_ptr->mtcSwact_timer ) ; + } + node_ptr->recoveryStage = MTC_RECOVERY__START ; + break ; + } + + case MTC_ADMIN_ACTION__POWEROFF: + { + node_ptr->powerStage = MTC_POWEROFF__START ; + break ; + } + case MTC_ADMIN_ACTION__DELETE: + { + node_ptr->delStage = MTC_DEL__START ; + break ; + } + case MTC_ADMIN_ACTION__ENABLE: + default: + { + break ; + } + } + } + } + return (rc); +} + +int nodeLinkClass::adminStateChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeAdminState_enum newAdminState ) +{ + int rc = FAIL ; + + if (( newAdminState < MTC_ADMIN_STATES ) && + ( node_ptr->adminState < MTC_ADMIN_STATES )) + { + rc = PASS ; + + /* See if we are actually changing the state */ + if ( node_ptr->adminState != newAdminState ) + { + ilog ("%s %s-%s-%s' -> %s-%s-%s\n", node_ptr->hostname.c_str(), + mtc_nodeAdminState_str [node_ptr->adminState], + mtc_nodeOperState_str [node_ptr->operState], + mtc_nodeAvailStatus_str[node_ptr->availStatus], + mtc_nodeAdminState_str [newAdminState], + mtc_nodeOperState_str [node_ptr->operState], + mtc_nodeAvailStatus_str[node_ptr->availStatus]); + node_ptr->adminState = newAdminState ; + } + } + else + { + slog ("Invalid Host Operational State (now:%d new:%d)\n", + node_ptr->adminState, newAdminState ); + } + return (rc); +} + + +int nodeLinkClass::operStateChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeOperState_enum newOperState ) +{ + int rc = FAIL ; + + if (( newOperState < MTC_OPER_STATES ) && + ( node_ptr->operState < MTC_OPER_STATES )) + { + rc = PASS ; + + /* See if we are actually changing the state */ + if ( node_ptr->operState != newOperState ) + { + clog ("%s %s-%s-%s\n", node_ptr->hostname.c_str(), + mtc_nodeAdminState_str [node_ptr->adminState], + mtc_nodeOperState_str [node_ptr->operState], + mtc_nodeAvailStatus_str[node_ptr->availStatus]); + + /* Push it to the database */ + if ( node_ptr->uuid.length() == UUID_LEN ) + { + string key = MTC_JSON_INV_OPER ; + string value = operState_enum_to_str(newOperState) ; + rc = mtcInvApi_update_state ( node_ptr, key, value ); + } + else + { + wlog ("%s has invalid uuid:%s so %s state not written to database\n", + node_ptr->hostname.c_str(), + node_ptr->uuid.c_str(), + operState_enum_to_str(newOperState).c_str()); + } + + node_ptr->operState = newOperState ; + + clog ("%s %s-%s-%s\n", node_ptr->hostname.c_str(), + mtc_nodeAdminState_str [node_ptr->adminState], + mtc_nodeOperState_str [node_ptr->operState], + mtc_nodeAvailStatus_str[node_ptr->availStatus]); + } + } + else + { + slog ("Invalid Host Operational State (now:%d new:%d)\n", + node_ptr->operState, newOperState ); + } + return (rc); +} + +int nodeLinkClass::availStatusChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeAvailStatus_enum newAvailStatus ) +{ + int rc = FAIL ; + + if (( newAvailStatus < MTC_AVAIL_STATUS ) && + ( node_ptr->availStatus < MTC_AVAIL_STATUS )) + { + rc = PASS ; + + /* See if we are actually changing the state */ + if ( node_ptr->availStatus != newAvailStatus ) + { + clog ("%s %s-%s-%s\n", node_ptr->hostname.c_str(), + mtc_nodeAdminState_str [node_ptr->adminState], + mtc_nodeOperState_str [node_ptr->operState], + mtc_nodeAvailStatus_str[node_ptr->availStatus]); + + /* Push it to the database */ + if ( node_ptr->uuid.length() == UUID_LEN ) + { + string key = MTC_JSON_INV_AVAIL ; + string value = availStatus_enum_to_str(newAvailStatus) ; + rc = mtcInvApi_update_state ( node_ptr, key, value ); + if ( rc != PASS ) + { + wlog ("%s Failed to update availability '%s' to '%s'\n", + node_ptr->hostname.c_str(), + mtc_nodeAvailStatus_str[node_ptr->availStatus], + mtc_nodeAvailStatus_str[newAvailStatus]); + } + } + else + { + wlog ("%s has invalid uuid:%s so %s state not written to database\n", + node_ptr->hostname.c_str(), + node_ptr->uuid.c_str(), + availStatus_enum_to_str(newAvailStatus).c_str()); + } + + if (( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + (( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )) && + ( newAvailStatus == MTC_AVAIL_STATUS__FAILED )) + { + enableStageChange ( node_ptr, MTC_ENABLE__START ); + } + + /* if we go to the failed state then clear all mtcAlive counts + * so that the last ones don't look like we are online when we + * might not be - we should relearn the on/off line state */ + if (( node_ptr->availStatus != MTC_AVAIL_STATUS__FAILED ) && + ( newAvailStatus == MTC_AVAIL_STATUS__FAILED )) + { + node_ptr->mtcAlive_misses = 0 ; + node_ptr->mtcAlive_hits = 0 ; + node_ptr->mtcAlive_gate = false ; + } + + /* check for need to generate power on log */ + if (( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ) && + ( newAvailStatus != MTC_AVAIL_STATUS__POWERED_OFF )) + { + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__POWERON ) + { + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_MANUAL_POWER_ON ); + } + else + { + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_AUTO_POWER_ON ); + } + } + + /* check for need to generate power off log */ + if (( node_ptr->availStatus != MTC_AVAIL_STATUS__POWERED_OFF ) && + ( newAvailStatus == MTC_AVAIL_STATUS__POWERED_OFF )) + { + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__POWEROFF ) + { + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_MANUAL_POWER_OFF ); + } + else + { + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_AUTO_POWER_OFF ); + } + } + + /* check for need to generate online log */ + if (( node_ptr->availStatus != MTC_AVAIL_STATUS__ONLINE ) && + ( newAvailStatus == MTC_AVAIL_STATUS__ONLINE )) + { + if ( node_ptr->offline_log_reported == true ) + { + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__STATUSCHANGE_ONLINE ); + node_ptr->offline_log_reported = false ; + node_ptr->online_log_reported = true ; + } + } + + /* check for need to generate offline log */ + if (( node_ptr->availStatus != MTC_AVAIL_STATUS__OFFLINE ) && + ( newAvailStatus == MTC_AVAIL_STATUS__OFFLINE )) + { + if ( node_ptr->online_log_reported == true ) + { + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__STATUSCHANGE_OFFLINE ); + node_ptr->offline_log_reported = true ; + node_ptr->online_log_reported = false ; + } + } + + /* If the availability status is moving away from off or online then + * be sure we cancel the mtcAlive timer */ + if ((( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE )) && + (( newAvailStatus != MTC_AVAIL_STATUS__OFFLINE ) && + ( newAvailStatus != MTC_AVAIL_STATUS__ONLINE ))) + { + /* Free the mtc timer if in use */ + if ( node_ptr->mtcAlive_timer.tid ) + { + tlog ("%s Stopping mtcAlive timer\n", node_ptr->hostname.c_str()); + mtcTimer_stop ( node_ptr->mtcAlive_timer ); + node_ptr->mtcAlive_timer.ring = false ; + node_ptr->mtcAlive_timer.tid = NULL ; + } + node_ptr->onlineStage = MTC_ONLINE__START ; + } + + + clog ("%s %s-%s-%s\n", node_ptr->hostname.c_str(), + mtc_nodeAdminState_str [node_ptr->adminState], + mtc_nodeOperState_str [node_ptr->operState], + mtc_nodeAvailStatus_str[node_ptr->availStatus]); + + node_ptr->availStatus = newAvailStatus ; + } + } + else + { + slog ("Invalid Host Availability Status (now:%d new:%d)\n", + node_ptr->availStatus, newAvailStatus ); + } + return (rc); +} + +/** Host Enable Handler Stage Change member function */ +int nodeLinkClass::enableStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_enableStages_enum newHdlrStage ) +{ + /* TODO: Consider converting stage to strings ... */ + if (( newHdlrStage >= MTC_ENABLE__STAGES ) || + ( node_ptr->handlerStage.enable >= MTC_ENABLE__STAGES )) + { + slog ("%s has invalid Enable stage (%d:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->handlerStage.enable, + newHdlrStage ); + + node_ptr->handlerStage.enable = MTC_ENABLE__FAILURE ; + + /* TODO: cause failed or degraded state ? */ + return (FAIL); + } + else if ( node_ptr->handlerStage.enable != newHdlrStage ) + { + clog ("%s %s -> %s\n", + node_ptr->hostname.c_str(), + get_enableStages_str(node_ptr->handlerStage.enable).c_str(), + get_enableStages_str(newHdlrStage).c_str()); + + node_ptr->handlerStage.enable = newHdlrStage ; + return (PASS); + } + else + { + /* No state change */ + dlog1 ("%s %s -> %s\n", + node_ptr->hostname.c_str(), + get_enableStages_str(node_ptr->handlerStage.enable).c_str(), + get_enableStages_str(newHdlrStage).c_str()); + return (PASS); + } +} + +/** Host Disable Handler Stage Change member function */ +int nodeLinkClass::disableStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_disableStages_enum newHdlrStage ) +{ + /* TODO: Consider converting stage to strings ... */ + if (( newHdlrStage >= MTC_DISABLE__STAGES ) || + ( node_ptr->handlerStage.disable >= MTC_DISABLE__STAGES )) + { + slog ("%s has invalid disable stage (%d:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->handlerStage.disable, + newHdlrStage ); + + node_ptr->handlerStage.disable = MTC_DISABLE__DISABLED ; + + /* TODO: cause failed or degraded state ? */ + return (FAIL); + } + else + { + clog ("%s %s -> %s\n", + node_ptr->hostname.c_str(), + get_disableStages_str(node_ptr->handlerStage.disable).c_str(), + get_disableStages_str(newHdlrStage).c_str()); + + node_ptr->handlerStage.disable = newHdlrStage ; + return (PASS); + } +} + +/** Validate and log Recovery stage changes */ +int nodeLinkClass::recoveryStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_recoveryStages_enum newHdlrStage ) +{ + int rc = PASS ; + + if (( newHdlrStage >= MTC_RECOVERY__STAGES ) || + ( node_ptr->recoveryStage >= MTC_RECOVERY__STAGES )) + { + slog ("%s Invalid recovery stage (%d:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->recoveryStage, + newHdlrStage ); + + if ( newHdlrStage < MTC_RECOVERY__STAGES ) + { + clog ("%s ? -> %s\n", + node_ptr->hostname.c_str(), + get_recoveryStages_str(newHdlrStage).c_str()); + + node_ptr->recoveryStage = newHdlrStage ; + } + else + { + node_ptr->recoveryStage = MTC_RECOVERY__FAILURE ; + rc = FAIL ; + } + } + else + { + clog ("%s %s -> %s\n", + node_ptr->hostname.c_str(), + get_recoveryStages_str(node_ptr->recoveryStage).c_str(), + get_recoveryStages_str(newHdlrStage).c_str()); + + node_ptr->recoveryStage = newHdlrStage ; + } + return (rc) ; +} + + +/** Validate and log Recovery stage changes */ +int nodeLinkClass::configStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_configStages_enum newHdlrStage ) +{ + int rc = PASS ; + + if (( newHdlrStage >= MTC_CONFIG__STAGES ) || + ( node_ptr->configStage >= MTC_CONFIG__STAGES )) + { + slog ("%s Invalid config stage (%d:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->configStage, + newHdlrStage ); + + if ( newHdlrStage < MTC_CONFIG__STAGES ) + { + clog ("%s ? -> %s\n", + node_ptr->hostname.c_str(), + get_configStages_str(newHdlrStage).c_str()); + + node_ptr->configStage = newHdlrStage ; + } + else + { + node_ptr->configStage = MTC_CONFIG__FAILURE ; + rc = FAIL ; + } + } + else + { + clog ("%s %s -> %s\n", + node_ptr->hostname.c_str(), + get_configStages_str(node_ptr->configStage).c_str(), + get_configStages_str(newHdlrStage).c_str()); + + node_ptr->configStage = newHdlrStage ; + } + return (rc) ; +} + +/** Host Reset Handler Stage Change member function */ +int nodeLinkClass::resetStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_resetStages_enum newHdlrStage ) +{ + if ( newHdlrStage < MTC_RESET__STAGES ) + { + clog ("%s stage %s -> %s\n", + node_ptr->hostname.c_str(), + get_resetStages_str(node_ptr->resetStage).c_str(), + get_resetStages_str(newHdlrStage).c_str()); + + node_ptr->resetStage = newHdlrStage ; + return (PASS) ; + } + else + { + slog ("%s Invalid reset stage (%d)\n", node_ptr->hostname.c_str(), newHdlrStage ); + node_ptr->resetStage = MTC_RESET__DONE ; + return (FAIL) ; + } +} + +/* Host Reset Handler Stage Change member function */ +int nodeLinkClass::reinstallStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_reinstallStages_enum newHdlrStage ) +{ + if ( newHdlrStage < MTC_REINSTALL__STAGES ) + { + clog ("%s stage %s -> %s\n", + node_ptr->hostname.c_str(), + get_reinstallStages_str(node_ptr->reinstallStage).c_str(), + get_reinstallStages_str(newHdlrStage).c_str()); + + node_ptr->reinstallStage = newHdlrStage ; + return (PASS) ; + } + else + { + slog ("%s Invalid reinstall stage (%d)\n", node_ptr->hostname.c_str(), newHdlrStage ); + node_ptr->reinstallStage = MTC_REINSTALL__DONE ; + return (FAIL) ; + } +} + +/** Host Power control Handler Stage Change member function */ +int nodeLinkClass::powerStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_powerStages_enum newHdlrStage ) +{ + if ( newHdlrStage < MTC_POWER__STAGES ) + { + clog ("%s stage %s -> %s\n", + node_ptr->hostname.c_str(), + get_powerStages_str(node_ptr->powerStage).c_str(), + get_powerStages_str(newHdlrStage).c_str()); + + node_ptr->powerStage = newHdlrStage ; + return (PASS) ; + } + else + { + slog ("%s Invalid power control stage (%d)\n", + node_ptr->hostname.c_str(), newHdlrStage ); + node_ptr->powerStage = MTC_POWER__DONE ; + return (FAIL) ; + } +} + +/** Host Power Cycle control Handler Stage Change member function */ +int nodeLinkClass::powercycleStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_powercycleStages_enum newHdlrStage ) +{ + if ( newHdlrStage < MTC_POWERCYCLE__STAGES ) + { + clog ("%s stage %s -> %s\n", + node_ptr->hostname.c_str(), + get_powercycleStages_str(node_ptr->powercycleStage).c_str(), + get_powercycleStages_str(newHdlrStage).c_str()); + + node_ptr->powercycleStage = newHdlrStage ; + return (PASS) ; + } + else + { + slog ("%s Invalid powercycle stage (%d)\n", + node_ptr->hostname.c_str(), newHdlrStage ); + node_ptr->powercycleStage = MTC_POWERCYCLE__DONE ; + return (FAIL) ; + } +} + + +/** Host Out-Of-Service Stage Change member function */ +int nodeLinkClass::oosTestStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_oosTestStages_enum newHdlrStage ) +{ + if ( newHdlrStage < MTC_OOS_TEST__STAGES ) + { + clog ("%s stage %s -> %s\n", + node_ptr->hostname.c_str(), + get_oosTestStages_str(node_ptr->oosTestStage).c_str(), + get_oosTestStages_str(newHdlrStage).c_str()); + + node_ptr->oosTestStage = newHdlrStage ; + return (PASS) ; + } + else + { + slog ("%s Invalid oos test stage (%d)\n", node_ptr->hostname.c_str(), newHdlrStage ); + node_ptr->oosTestStage = MTC_OOS_TEST__DONE ; + return (FAIL) ; + } +} + +/** Host in-Service Stage Change member function */ +int nodeLinkClass::insvTestStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_insvTestStages_enum newHdlrStage ) +{ + if ( newHdlrStage < MTC_INSV_TEST__STAGES ) + { + clog ("%s stage %s -> %s\n", + node_ptr->hostname.c_str(), + get_insvTestStages_str(node_ptr->insvTestStage).c_str(), + get_insvTestStages_str(newHdlrStage).c_str()); + + node_ptr->insvTestStage = newHdlrStage ; + return (PASS) ; + } + else + { + slog ("%s Invalid insv test stage (%d)\n", node_ptr->hostname.c_str(), newHdlrStage ); + node_ptr->insvTestStage = MTC_INSV_TEST__START ; + return (FAIL) ; + } +} + +/** SubStage Change member function */ +int nodeLinkClass::subStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_subStages_enum newHdlrStage ) +{ + if ( newHdlrStage < MTC_SUBSTAGE__STAGES ) + { + clog ("%s stage %s -> %s\n", + node_ptr->hostname.c_str(), + get_subStages_str(node_ptr->subStage).c_str(), + get_subStages_str(newHdlrStage).c_str()); + + node_ptr->subStage = newHdlrStage ; + return (PASS) ; + } + else + { + slog ("%s Invalid 'subStage' stage (%d)\n", + node_ptr->hostname.c_str(), newHdlrStage ); + node_ptr->subStage = MTC_SUBSTAGE__DONE ; + return (FAIL) ; + } +} + +struct nodeLinkClass::node * nodeLinkClass::get_mtcTimer_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->mtcTimer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_mtcCmd_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->mtcCmd_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_host_services_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->host_services_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_http_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->http_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_thread_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->ipmitool_thread_ctrl.timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_ping_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->bm_ping_info.timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_bm_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->bm_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_bmc_access_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->bmc_access_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + + + +struct nodeLinkClass::node * nodeLinkClass::get_mtcConfig_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->mtcConfig_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_powercycle_control_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->hwmon_powercycle.control_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_reset_control_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->hwmon_reset.control_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_powercycle_recovery_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->hwmon_powercycle.recovery_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_reset_recovery_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->hwmon_reset.recovery_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_mtcAlive_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->mtcAlive_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + + +struct nodeLinkClass::node * nodeLinkClass::get_offline_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->offline_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + + +struct nodeLinkClass::node * nodeLinkClass::get_mtcSwact_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->mtcSwact_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_oosTestTimer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->oosTestTimer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + +struct nodeLinkClass::node * nodeLinkClass::get_insvTestTimer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->insvTestTimer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + + +/***************************************************************************** + * + * Name : autorecovery_clear + * + * Assumptions: Applies when simplex. + * + * Description: Removes the auto recovery count file if it exists. + * + * Auto recovery count is tracked/preserved in a host named auto recovery + * counter file /tmp/hostname_ar_count. + * + *****************************************************************************/ + +#define TMP_DIR_PATH ((const char *)"/etc/mtc/tmp/") +#define AUTO_RECOVERY_FILE_SUFFIX ((const char *)"_ar_count") + +void autorecovery_clear ( string hostname ) +{ + string ar_file = TMP_DIR_PATH + hostname + AUTO_RECOVERY_FILE_SUFFIX ; + if ( daemon_is_file_present (ar_file.data())) + { + wlog ("%s clearing autorecovery counter\n", hostname.c_str()); + daemon_remove_file (ar_file.data()); + } +} + +/***************************************************************************** + * + * Name : manage_autorecovery + * + * Assumptions: Applies to the active controller only while simplex. + * + * Description: Issues an immediate lazy reboot if the autorecovery threshold + * is reached. Otherwise it disables autorecovery and returns + * do we don't get a rolling boot loop. + * + * Auto recovery count is tracked/preserved in a host named auto recovery + * counter file /etc/mtc/tmp/hostname_ar_count. + * + * in the event of a persistent autorecovery failure that results in a + * disable then the active controller goes enabled-degraded with a horizon + * status that indicates the active controller has a critical failure but + * auto recovery is disabled. The enable alarm is raised. + * + *****************************************************************************/ + +void nodeLinkClass::manage_autorecovery ( struct nodeLinkClass::node * node_ptr ) +{ + /* manage removing the auto recovery threshold count file */ + if ( ( THIS_HOST ) && + ( this->autorecovery_enabled == true ) && + ( this->autorecovery_disabled == false ) && + ( is_inactive_controller_main_insv() == false )) + { + int value = 0 ; + string ar_file = TMP_DIR_PATH + node_ptr->hostname + AUTO_RECOVERY_FILE_SUFFIX ; + int threshold = daemon_get_cfg_ptr()->autorecovery_threshold ; + + if ( daemon_is_file_present (ar_file.data())) + { + /* if the file is there then read the count and increment it */ + value = daemon_get_file_int ( ar_file.data() ); + } + value++ ; + + /* Save the new value in the file */ + daemon_log_value ( ar_file.data(), value ); + + /* set rc to reflect what the caller should do */ + if ( value > threshold ) + { + elog ("%s auto recovery threshold exceeded (%d)\n", + node_ptr->hostname.c_str(), threshold ); + + this->autorecovery_disabled = true ; + + if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX ) + { + alarm_compute_failure ( node_ptr , FM_ALARM_SEVERITY_CRITICAL ) ; + } + else + { + alarm_enabled_failure ( node_ptr ) ; + } + + allStateChange ( node_ptr, node_ptr->adminState, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__DEGRADED ); + + mtcInvApi_update_task ( node_ptr, + MTC_TASK_AUTO_RECOVERY_DISABLED ); + + return ; + } + + wlog ("%s auto recovery (try %d of %d)\n", + node_ptr->hostname.c_str(), value , threshold ); + + mtcInvApi_update_states_now ( node_ptr, + "unlocked", + "disabled", + "failed", + "disabled", + "failed" ); + + mtcInvApi_update_task_now ( node_ptr, + MTC_TASK_AUTO_RECOVERY ); + + lazy_graceful_fs_reboot ( node_ptr ); + } +} + +/**************************************************************************** + * + * Name : report_dor_recovery + * + * Description: Create a specifically formatted log for the the specified + * hosts DOR recovery state and timing. + * + * Parameters : The node and a caller prefix string that states if the node + * is ENABELD + * is FAILED + * is ENMABLED-degraded + * etc. + * + ***************************************************************************/ +void nodeLinkClass::report_dor_recovery ( struct nodeLinkClass::node * node_ptr, + string node_state_log_prefix ) +{ + struct timespec ts ; + clock_gettime (CLOCK_MONOTONIC, &ts ); + node_ptr->dor_recovery_time = ts.tv_sec ; + plog ("%-12s %s ; DOR Recovery %2d:%02d mins (%4d secs) (uptime:%2d:%02d mins)\n", + node_ptr->hostname.c_str(), + node_state_log_prefix.c_str(), + node_ptr->dor_recovery_time/60, + node_ptr->dor_recovery_time%60, + node_ptr->dor_recovery_time, + node_ptr->uptime/60, + node_ptr->uptime%60 ); + + node_ptr->dor_recovery_mode = false ; + node_ptr->was_dor_recovery_mode = false ; +} + +void nodeLinkClass::force_full_enable ( struct nodeLinkClass::node * node_ptr ) +{ + /* don't do a full enable if active controller in simplex mode */ + if ( THIS_HOST && SIMPLEX ) + { + wlog ("%s avoiding full enable of simplex system\n", node_ptr->hostname.c_str()); + wlog ("%s ... lock and unlock host to force recovery\n", node_ptr->hostname.c_str()); + return ; + } + + if ( node_ptr->was_dor_recovery_mode ) + { + report_dor_recovery ( node_ptr , "is FAILED " ); + } + + plog ("%s Forcing Full Enable Sequence\n", node_ptr->hostname.c_str()); + + /* Raise Critical Enable Alarm */ + alarm_enabled_failure ( node_ptr ); + + allStateChange ( node_ptr, node_ptr->adminState, MTC_OPER_STATE__DISABLED, MTC_AVAIL_STATUS__FAILED ); + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE ); + recoveryStageChange ( node_ptr, MTC_RECOVERY__START ); /* reset the fsm */ + // don't override the add action or lock actions / + if (( node_ptr->adminAction != MTC_ADMIN_ACTION__ADD ) && + ( node_ptr->adminAction != MTC_ADMIN_ACTION__LOCK ) && + ( node_ptr->adminAction != MTC_ADMIN_ACTION__FORCE_LOCK )) + { + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); // no action + } + else + { + wlog ("%s refusing to override '%s' action with 'none' action\n", + node_ptr->hostname.c_str(), + mtc_nodeAdminAction_str [node_ptr->adminAction]); + } +} + +/***************************************************************************** + * + * Name : launch_host_services_cmd + * + * Description: This is a multi timeslice service that is executed + * by the command handler. + * + * This interface just determines the host type and loads the + * command handler with the host type corresponding host + * services command based on the start bool. If 'subf' is + * specified then the start or stop command defaults to COMPUTE. + * + * Supported Commands are defined in nodeBase.h + * + * start = False (means stop) + * + * MTC_CMD_STOP_CONTROL_SVCS + * MTC_CMD_STOP_COMPUTE_SVCS + * MTC_CMD_STOP_STORAGE_SVCS + * + * start = True + * + * MTC_CMD_START_CONTROL_SVCS + * MTC_CMD_START_COMPUTE_SVCS + * MTC_CMD_START_STORAGE_SVCS + * + * Returns : PASS = launch success + * !PASS = launch failure + * + ****************************************************************************/ + +int nodeLinkClass::launch_host_services_cmd ( struct nodeLinkClass::node * node_ptr, bool start, bool subf ) +{ + if ( !node_ptr ) + return (FAIL_NULL_POINTER); + + /* Initialize the host's command request control structure */ + mtcCmd_init ( node_ptr->host_services_req ); + + /* Service subfunction override first, efficiency. */ + if ( subf == true ) + { + /* only supported subfunction (right now) is COMPUTE */ + if ( start == true ) + node_ptr->host_services_req.cmd = MTC_CMD_START_COMPUTE_SVCS ; + else + node_ptr->host_services_req.cmd = MTC_CMD_STOP_COMPUTE_SVCS ; + } + else if ( start == true ) + { + if ( is_controller (node_ptr) ) + node_ptr->host_services_req.cmd = MTC_CMD_START_CONTROL_SVCS ; + else if ( is_compute (node_ptr) ) + node_ptr->host_services_req.cmd = MTC_CMD_START_COMPUTE_SVCS ; + else if ( is_storage (node_ptr) ) + node_ptr->host_services_req.cmd = MTC_CMD_START_STORAGE_SVCS ; + else + { + slog ("%s start host services is not supported for this host type\n", + node_ptr->hostname.c_str()); + return (FAIL_BAD_CASE) ; + } + } + else + { + if ( is_controller (node_ptr) ) + node_ptr->host_services_req.cmd = MTC_CMD_STOP_CONTROL_SVCS ; + else if ( is_compute (node_ptr) ) + node_ptr->host_services_req.cmd = MTC_CMD_STOP_COMPUTE_SVCS ; + else if ( is_storage (node_ptr) ) + node_ptr->host_services_req.cmd = MTC_CMD_STOP_STORAGE_SVCS ; + else + { + slog ("%s stop host services is not supported for this host type\n", + node_ptr->hostname.c_str()); + return (FAIL_BAD_CASE); + } + } + + /* Translate that command to its named string */ + node_ptr->host_services_req.name = + get_mtcNodeCommand_str(node_ptr->host_services_req.cmd); + + /* Get the host services timeout and add MTC_AGENT_TIMEOUT_EXTENSION + * seconds so that it is a bit longer than the mtcClient timeout */ + int timeout = daemon_get_cfg_ptr()->host_services_timeout ; + timeout+= MTC_AGENT_TIMEOUT_EXTENSION ; + + ilog ("%s %s launch\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + + /* The launch part. + * init the */ + mtcCmd_init ( node_ptr->cmd ); + node_ptr->cmd.stage = MTC_CMD_STAGE__START ; + node_ptr->cmd.cmd = MTC_OPER__HOST_SERVICES_CMD ; + + node_ptr->mtcCmd_work_fifo.clear() ; + node_ptr->mtcCmd_work_fifo.push_front(node_ptr->cmd); + + /* start an unbrella timer and start waiting for the result, + * a little longer than the mtcClient version */ + mtcTimer_reset ( node_ptr->host_services_timer ); + mtcTimer_start ( node_ptr->host_services_timer, mtcTimer_handler, timeout ) ; + + + return (PASS); +} + +int send_event ( string & hostname, unsigned int cmd, iface_enum iface ); + +int nodeLinkClass::mon_host ( const string & hostname, iface_enum iface, bool true_false ) +{ + int rc = FAIL ; + if ( ! hostname.empty() ) + { + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->monitor[iface] = true_false ; + if ( true_false == true ) + { + node_ptr->no_work_log_throttle = 0 ; + node_ptr->b2b_misses_count[iface] = 0 ; + node_ptr->max_count[iface] = 0 ; + node_ptr->hbs_failure[iface] = false ; + send_event ( node_ptr->hostname, MTC_EVENT_HEARTBEAT_MINOR_CLR, iface ) ; + node_ptr->hbs_minor[iface] = false ; + send_event ( node_ptr->hostname, MTC_EVENT_HEARTBEAT_DEGRADE_CLR, iface ) ; + node_ptr->hbs_degrade[iface] = false ; + } + return PASS ; + } + } + return ( rc ); +} + +/* store the current hardware monitor monitoring state */ +void nodeLinkClass::set_hwmond_monitor_state ( string & hostname, bool state ) +{ + if ( hostname.length() ) + { + struct nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + node_ptr->hwmond_monitor = state ; + } + } +} + +/* get the current hardware monitor monitoring state */ +bool nodeLinkClass::get_hwmond_monitor_state ( string & hostname ) +{ + bool state = false ; + if ( hostname.length() ) + { + struct nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + state = node_ptr->hwmond_monitor ; + } + } + return (state); +} + +/* get the current heartbeat monitoring state */ +bool nodeLinkClass::get_hbs_monitor_state ( string & hostname, int iface ) +{ + bool state = false ; + if ( hostname.length() ) + { + struct nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + state = node_ptr->monitor[iface] ; + } + } + return (state); +} + +/* Manage the heartbeat pulse flags by hostname */ +void nodeLinkClass::manage_pulse_flags ( string & hostname, unsigned int flags ) +{ + if ( hostname.length() ) + { + struct nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + manage_pulse_flags ( node_ptr, flags ); + } + } +} + + +/* Manage the heartbeat pulse flags by pulse_ptr */ +void nodeLinkClass::manage_pulse_flags ( struct nodeLinkClass::node * node_ptr, unsigned int flags ) +{ + /* Do nothing with the flags for missing pulse + * responses (identified with flags=NULL_PULSE_FLAGS) */ + if ( flags == NULL_PULSE_FLAGS ) + { + return ; + } + + /* Code that manages enabling of Infrastructrure network moonitoring + * + * Algorithm: Only monitor a hosts infrastructure network while the + * management network of that same host is being monitored + * and while that host indicates support for the infrastructure + * network by setting the INFRA_FLAG in its management network + * pulse responses. */ + if ( node_ptr->monitor[MGMNT_IFACE] == false ) + { + node_ptr->monitor[INFRA_IFACE] = false ; + } + else if ( flags & INFRA_FLAG ) + { + /* TODO: Does this need to be debounced ??? */ + node_ptr->monitor[INFRA_IFACE] = true ; + } + + /* A host indicates that its process monitor is running by setting the + * PMOND_FLAG occasionally in its pulse response. + * The following if/else if clauses manage raising an alarm and degrading + * a host has stopped sending the PMOND_FLAG. */ + if ( flags & PMOND_FLAG ) + { + if ( node_ptr->pmon_degraded == true ) + { + if ( node_ptr->alarms[HBS_ALARM_ID__PMOND] != FM_ALARM_SEVERITY_CLEAR ) + { + alarm_clear ( node_ptr->hostname, PMOND_ALARM_ID, "pmond" ); + } + if ( send_event ( node_ptr->hostname, MTC_EVENT_PMOND_CLEAR, MGMNT_IFACE ) == PASS ) + { + node_ptr->alarms[HBS_ALARM_ID__PMOND] = FM_ALARM_SEVERITY_CLEAR ; + node_ptr->pmon_degraded = false ; + } + } + node_ptr->pmon_missing_count = 0 ; + node_ptr->stall_monitor_log_throttle = 0 ; + node_ptr->stall_recovery_log_throttle = 0 ; + } + else if ( ++node_ptr->pmon_missing_count > PMOND_MISSING_THRESHOLD ) + { + if ( node_ptr->pmon_degraded == false ) + { + wlog ("%s sending pmon degrade event to maintenance\n", node_ptr->hostname.c_str()); + if ( send_event ( node_ptr->hostname, MTC_EVENT_PMOND_RAISE, MGMNT_IFACE ) == PASS ) + { + node_ptr->pmon_degraded = true ; + node_ptr->alarms[HBS_ALARM_ID__PMOND] = FM_ALARM_SEVERITY_MAJOR ; + alarm_major ( node_ptr->hostname, PMOND_ALARM_ID, "pmond" ); + } + } + } + + /* A host indicates that a process stall condition exists by setting the + * STALL_REC_FLAG it its heartbeat pulse response messages */ + if ( flags & STALL_REC_FLAG ) + { + wlog ("%s hbsClient stall recovery action (flags:%08x)\n", node_ptr->hostname.c_str(), flags); + if ( node_ptr->stall_recovery_log_throttle++ == 0 ) + { + send_event ( node_ptr->hostname, MTC_EVENT_HOST_STALLED , MGMNT_IFACE ); + } + } + else if ( flags & STALL_MON_FLAG ) + { + if ( node_ptr->stall_monitor_log_throttle++ == 0 ) + { + wlog ("%s hbsClient running stall monitor (flags:%08x)\n", node_ptr->hostname.c_str(), flags ); + } + else if ( flags & STALL_ERROR_FLAGS ) + { + wlog ("%s hbsClient running stall monitor (flags:%08x)\n", node_ptr->hostname.c_str(), flags ); + } + } + + if ( node_ptr->stall_recovery_log_throttle > STALL_MSG_THLD ) + { + node_ptr->stall_recovery_log_throttle = 0 ; + } + if ( node_ptr->stall_monitor_log_throttle > STALL_MSG_THLD ) + { + node_ptr->stall_monitor_log_throttle = 0 ; + } +} + +/* Create the monitored pulse list for the specified interface */ +int nodeLinkClass::create_pulse_list ( iface_enum iface ) +{ + struct node * ptr = head ; + pulses[iface] = 0 ; + + /* No check-in list if there is no inventory */ + if (( head == NULL ) || ( hosts == 0 )) + { + return (pulses[iface]) ; + } + + if ( iface >= MAX_IFACES ) + { + dlog ("Invalid interface (%d)\n", iface ); + return (pulses[iface]); + } + + pulse_list[iface].last_ptr = NULL ; + pulse_list[iface].head_ptr = NULL ; + pulse_list[iface].tail_ptr = NULL ; + + /* walk the node list looking for nodes that should be monitored */ + for ( ; ptr != NULL ; ptr = ptr->next ) + { + if ( ptr->monitor[iface] == true ) + { + /* current monitored node pointer */ + pulse_ptr = ptr ; + + /* if first pulse node */ + if ( pulse_list[iface].head_ptr == NULL ) + { + /* need to keep track of the last node so we can deal with + * skipped nodes when they are not in monitor mode */ + pulse_list[iface].last_ptr = pulse_ptr ; + pulse_list[iface].head_ptr = pulse_ptr ; + pulse_list[iface].tail_ptr = pulse_ptr ; + pulse_ptr->pulse_link[iface].prev_ptr = NULL ; + } + else + { + pulse_list[iface].last_ptr->pulse_link[iface].next_ptr = pulse_ptr ; + pulse_ptr->pulse_link[iface].prev_ptr = pulse_list[iface].last_ptr ; + pulse_list[iface].last_ptr = pulse_ptr ; /* save current to handle a skip */ + pulse_list[iface].tail_ptr = pulse_ptr ; /* migrate tail as list is built */ + } + pulse_ptr->pulse_link[iface].next_ptr = NULL ; + + pulse_ptr->linknum[iface] = ++pulses[iface] ; + + mlog2 ("%s %s Pulse Info: %d:%d - %d:%p\n", + pulse_ptr->hostname.c_str(), + get_iface_name_str(iface), + pulse_ptr->linknum[iface], + pulses[iface], + pulse_ptr->rri, + pulse_ptr); + } + } + print_pulse_list(iface); + return (pulses[iface]); +} + + +/** Build the Reasource Reference Array */ +void nodeLinkClass::build_rra ( void ) +{ + struct node * ptr = NULL ; + int x = 1 ; + for ( ptr = head ; ptr != NULL ; ptr = ptr->next ) + { + hbs_rra [x] = ptr ; + ptr->rri=x ; + x++ ; + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + + if ( ptr != NULL ) + { + dlog ("%s forced RRA build (%d)\n", ptr->hostname.c_str(), x-1); + } + + /* fill the rest with NULL */ + for ( ; x < MAX_NODES ; x++ ) + hbs_rra[x] = NULL ; + + /* Reset the "Running RRI" */ + rrri = 0 ; +} + +/** Gets the next hostname and resource reference identifier + * (the rra index) and updates the callers variables with them. + * + * This is a helper function in support of the fast resource lookup feature. + * During steady state operation the heartbeat agent cycles through + * all the resources , one per heartbeat request, sending new + * reference identifiers (name and index) to the monitored resources. + * Each time this is called it get the next set. + * */ +void nodeLinkClass::get_rris ( string & hostname, int & rri ) +{ + if ( hosts ) + { + hostname = "none" ; + rrri++ ; + if ( rrri > hosts ) + { + rrri = 1 ; + } + hostname = hbs_rra[rrri]->hostname ; + rri = rrri ; + } +} + +struct nodeLinkClass::node* nodeLinkClass::getPulseNode ( string & hostname , iface_enum iface ) +{ + /* check for empty list condition */ + if ( pulse_list[iface].head_ptr == NULL ) + return NULL ; + + for ( pulse_ptr = pulse_list[iface].head_ptr ; ; pulse_ptr = pulse_ptr->pulse_link[iface].next_ptr ) + { + if ( !hostname.compare ( pulse_ptr->hostname )) + { + return pulse_ptr ; + } + if (( pulse_ptr->pulse_link[iface].next_ptr == NULL ) || + ( pulse_ptr==pulse_list[iface].tail_ptr )) + { + break ; + } + } + return static_cast(NULL); +} + +/* Find the node in the list of nodes being heartbeated and splice it out */ +int nodeLinkClass::remPulse_by_index ( string hostname, int index, iface_enum iface, bool clear_b2b_misses_count, unsigned int flags ) +{ + int rc = FAIL ; + if (( index > 0 ) && ( !(index > hosts))) + { + if ( hbs_rra[index] != NULL ) + { + struct nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + if (( hbs_rra[index] == node_ptr ) && + ( ! node_ptr->hostname.compare(hostname))) + { + node_ptr->lookup_mismatch_log_throttle = 0 ; + if ( node_ptr->monitor[iface] == true ) + { + node_ptr->unexpected_pulse_log_throttle = 0 ; + return ( remPulse ( hbs_rra[index], iface, clear_b2b_misses_count, flags )); + } + else + { + wlog_throttled ( node_ptr->unexpected_pulse_log_throttle, 200, "%s is not being monitored\n", hostname.c_str()); + rc = PASS; + } + } + else + { + rc = remPulse_by_name ( hostname, iface, clear_b2b_misses_count, flags ); + wlog_throttled ( node_ptr->lookup_mismatch_log_throttle, 200, "%s rri lookup mismatch (%s:%d) ; %s\n", hostname.c_str(), node_ptr->hostname.c_str(), index, rc ? "" : "removed by hostname" ); + return (rc); + } + } + else + { + dlog ("%s could not lookup by index or hostname (%d)\n", hostname.c_str(), index ); + rc = FAIL_HOSTNAME_LOOKUP ; + } + } + } + return (rc); +} + +/* Find the node in the list of nodes being heartbeated and splice it out */ +int nodeLinkClass::remPulse_by_name ( string & hostname, iface_enum iface, bool clear_b2b_misses_count, unsigned int flags ) +{ + return ( remPulse ( getPulseNode ( hostname, iface ), iface, clear_b2b_misses_count, flags )); +} + +/** WANT_LINKLIST_FIT is not defined by default. + * Needs to be explicitely defined and the undef commented out for testing + **/ +#ifdef WANT_LINKLIST_FIT +#undef WANT_LINKLIST_FIT +#endif + +#ifdef WANT_LINKLIST_FIT +static bool already_fit = false ; +#endif + +/* Find the node in the list of nodes being heartbeated and splice it out */ +int nodeLinkClass::remPulse ( struct node * node_ptr, iface_enum iface, bool clear_b2b_misses_count, unsigned int flags ) +{ + /* This default RC allows the caller to filter out unexpected pulse responses */ + int rc = ENXIO ; + + if ( head == NULL ) + { + return -ENODEV ; + } + else if ( node_ptr == NULL ) + { + return (rc) ; + } + + struct node * ptr = node_ptr ; + + // dlog ("%s\n", node_ptr->hostname.c_str()); + + /* Splice the node out of the pulse monitor list */ + + /* Does the pulse monitor list exist and is the node in the list */ + /* Need to gracefully handle being called when there is no pulse */ + /* list and/or the specified host is not in the pulse list */ + if (( pulse_list[iface].head_ptr != NULL ) && ( ptr != NULL ) && ( ptr->linknum[iface] != 0)) + { + pulse_ptr = ptr ; + ptr->hbs_count[iface]++ ; + + manage_pulse_flags ( pulse_ptr , flags ); + + if ( clear_b2b_misses_count == true ) + { + if ( ptr->b2b_misses_count[iface] > hbs_degrade_threshold ) + { + ilog ("%s %s Pulse Rxed (after %d misses)\n", + node_ptr->hostname.c_str(), + get_iface_name_str(iface), + node_ptr->b2b_misses_count[iface]); + } + + manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CLEAR, iface ); + + ptr->b2b_misses_count[iface] = 0 ; + if ( pulse_ptr->hbs_degrade[iface] == true ) + { + /* Send a degrade clear event to maintenance */ + if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_DEGRADE_CLR, iface ) == PASS ) + { + pulse_ptr->hbs_degrade[iface] = false ; + } + } + if ( pulse_ptr->hbs_minor[iface] == true ) + { + if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_MINOR_CLR, iface ) == PASS ) + { + pulse_ptr->hbs_minor[iface] = false ; + } + } + } + rc = PASS ; +#ifdef WANT_LINKLIST_FIT + if ( already_fit == false ) + { + if ( daemon_is_file_present ( MTC_CMD_FIT__LINKLIST ) == true ) + { + if ( pulse_list[iface].head_ptr->pulse_link[iface].next_ptr != NULL ) + { + slog ("FIT of next pointer\n"); + pulse_list[iface].head_ptr->pulse_link[iface].next_ptr = NULL ; + already_fit = true ; + } + } + } +#endif + if ( pulse_list[iface].head_ptr == pulse_ptr ) + { + if ( pulse_list[iface].head_ptr == pulse_list[iface].tail_ptr ) + { + qlog2 ("%s Pulse: Single Node -> Head Case : %d of %d\n", node_ptr->hostname.c_str(), pulse_ptr->linknum[iface], pulses[iface] ); + pulse_list[iface].head_ptr = NULL ; + pulse_list[iface].tail_ptr = NULL ; + } + else + { + qlog2 ("%s Pulse: Multiple Node -> Head Case : %d of %d\n", node_ptr->hostname.c_str(), pulse_ptr->linknum[iface], pulses[iface] ); + if ( pulse_list[iface].head_ptr->pulse_link[iface].next_ptr == NULL ) + { + slog ("%s unexpected NULL next_ptr ; aborting this pulse window\n", node_ptr->hostname.c_str()); + pulse_list[iface].head_ptr = NULL ; + pulse_list[iface].tail_ptr = NULL ; + pulse_ptr->linknum[iface] = 0 ; + pulses[iface] = 0 ; + return (FAIL_NULL_POINTER); + } + else + { + pulse_list[iface].head_ptr = pulse_list[iface].head_ptr->pulse_link[iface].next_ptr ; + pulse_list[iface].head_ptr->pulse_link[iface].prev_ptr = NULL ; + } + } + } + else if ( pulse_list[iface].tail_ptr == pulse_ptr ) + { + qlog2 ("%s Pulse: Multiple Node -> Tail Case : %d of %d\n", node_ptr->hostname.c_str(), pulse_ptr->linknum[iface], pulses[iface] ); + if ( pulse_list[iface].tail_ptr->pulse_link[iface].prev_ptr == NULL ) + { + slog ("%s unexpected NULL prev_ptr ; aborting this pulse window\n", node_ptr->hostname.c_str()); + pulse_list[iface].head_ptr = NULL ; + pulse_list[iface].tail_ptr = NULL ; + pulse_ptr->linknum[iface] = 0 ; + pulses[iface] = 0 ; + return (FAIL_NULL_POINTER); + } + else + { + pulse_list[iface].tail_ptr = pulse_list[iface].tail_ptr->pulse_link[iface].prev_ptr ; + pulse_list[iface].tail_ptr->pulse_link[iface].next_ptr = NULL ; + } + } + else + { + /* July 1 emacdona: Make failure path case more robust */ + if ( pulse_ptr == NULL ) { slog ("Internal Err 1\n"); rc = FAIL; } + else if ( pulse_ptr->pulse_link[iface].prev_ptr == NULL ) { slog ("Internal Err 2\n"); rc = FAIL; } + else if ( pulse_ptr->pulse_link[iface].prev_ptr->pulse_link[iface].next_ptr == NULL ) { slog ("Internal Err 3\n"); rc = FAIL; } + else if ( pulse_ptr->pulse_link[iface].next_ptr == NULL ) { slog ("Internal Err 4\n"); rc = FAIL; } + else if ( pulse_ptr->pulse_link[iface].next_ptr->pulse_link[iface].prev_ptr == NULL ) { slog ("Internal Err 5\n"); rc = FAIL; } + + if ( rc == FAIL ) + { + slog ("%s Null pointer error splicing %s out of pulse list with %d pulses remaining (Monitoring:%s)\n", + node_ptr->hostname.c_str(), + get_iface_name_str(iface), + pulses[iface], + node_ptr->monitor[iface] ? "Yes" : "No" ); + } + else + { + pulse_ptr->pulse_link[iface].prev_ptr->pulse_link[iface].next_ptr = pulse_ptr->pulse_link[iface].next_ptr ; + pulse_ptr->pulse_link[iface].next_ptr->pulse_link[iface].prev_ptr = pulse_ptr->pulse_link[iface].prev_ptr ; + } + } + if ( rc == PASS ) + { + pulse_ptr->linknum[iface]-- ; // = 0 ; + } + pulses[iface]-- ; + } + + return rc ; +} + +/** This utility will try and remove a pluse from the pulse + * linked list first by index and then by hostname. + * + * By index does not require a lookup whereas hostname does */ +int nodeLinkClass::remove_pulse ( string & hostname, iface_enum iface, int index, unsigned int flags ) +{ + if ( index ) + { + int rc = remPulse_by_index ( hostname, index , iface, true , flags ); + switch (rc) + { + case PASS: return (rc) ; + case ENXIO: return (rc); + default: mlog ("%s RRI Miss (rri:%d) (rc:%d)\n", hostname.c_str(), index, rc ); + } + } + else + { + if ( hostname.compare("localhost") ) + { + if ( get_hbs_monitor_state ( hostname , iface ) == true ) + { + wlog ("%s Not Offering RRI\n", hostname.c_str()); + } + } + else + { + /* localhost is not a supported hostname and indicates + * an unconfigured host response ; return the ignore response */ + return(ENXIO); + } + } + return ( remPulse_by_name ( hostname , iface, true, flags )); +} + +void nodeLinkClass::clear_pulse_list ( iface_enum iface ) +{ + struct node * ptr = head ; + for ( ; ptr != NULL ; ptr = ptr->next ) + { + ptr->pulse_link[iface].prev_ptr = NULL ; + ptr->pulse_link[iface].next_ptr = NULL ; + } + pulse_list[iface].head_ptr = NULL ; + pulse_list[iface].tail_ptr = NULL ; + + if ( ptr != NULL ) + { + ptr->linknum[iface] = 0 ; + pulses[iface] = 0 ; + } +} + + +/** Runs in the hbsAgent to set or clear heartbat alarms for all supported interfaces */ +void nodeLinkClass::manage_heartbeat_alarm ( struct nodeLinkClass::node * node_ptr, EFmAlarmSeverityT sev, int iface ) +{ + if ( this->heartbeat != true ) + return ; + + bool make_alarm_call = false ; + alarm_id_enum id ; + EFmAlarmStateT state = FM_ALARM_STATE_SET ; + const char * alarm_id_ptr = NULL ; + const char * entity_ptr = NULL ; + if ( iface == MGMNT_IFACE ) + { + entity_ptr = MGMNT_NAME ; + id = HBS_ALARM_ID__HB_MGMNT ; + alarm_id_ptr = MGMNT_HB_ALARM_ID; + } + else + { + entity_ptr = INFRA_NAME ; + id = HBS_ALARM_ID__HB_INFRA ; + alarm_id_ptr = INFRA_HB_ALARM_ID; + } + + if ( sev == FM_ALARM_SEVERITY_CLEAR ) + { + state = FM_ALARM_STATE_CLEAR ; + if ( node_ptr->alarms[id] != FM_ALARM_SEVERITY_CLEAR ) + { + make_alarm_call = true ; + node_ptr->alarms[id] = sev ; + } + } + else if ( sev == FM_ALARM_SEVERITY_MAJOR ) + { + if ( node_ptr->alarms[id] == FM_ALARM_SEVERITY_CRITICAL ) + { + ; /* we don't go from critical to degrade + need a clear first */ + } + else if ( node_ptr->alarms[id] != FM_ALARM_SEVERITY_MAJOR ) + { + make_alarm_call = true ; + node_ptr->alarms[id] = FM_ALARM_SEVERITY_MAJOR ; + } + } + else if ( sev == FM_ALARM_SEVERITY_CRITICAL ) + { + if ( node_ptr->alarms[id] != sev ) + { + make_alarm_call = true ; + node_ptr->alarms[id] = sev ; + } + } + else if ( sev == FM_ALARM_SEVERITY_MINOR ) + { + if ( node_ptr->alarms[id] != sev ) + { + make_alarm_call = true ; + node_ptr->alarms[id] = sev ; + } + } + else + { + if ( node_ptr->alarms[id] != FM_ALARM_SEVERITY_WARNING ) + { + make_alarm_call = true ; + node_ptr->alarms[id] = FM_ALARM_SEVERITY_WARNING ; + } + } + if ( make_alarm_call == true ) + { + alarm_ ( node_ptr->hostname, alarm_id_ptr, state, sev, entity_ptr , ""); + } +} + + + + +int nodeLinkClass::lost_pulses ( iface_enum iface ) +{ + int rc = PASS ; + + for ( ; pulse_list[iface].head_ptr != NULL ; ) + { + daemon_signal_hdlr (); + pulse_ptr = pulse_list[iface].head_ptr ; + if ( active ) + { + string flat = "Flat Line:" ; + pulse_ptr->b2b_misses_count[iface]++ ; + // pulse_ptr->max_count[iface]++ ; + + /* Don't log single misses unless in debug mode */ + if ( pulse_ptr->b2b_misses_count[iface] > 1 ) + { + // if ( pulse_ptr->b2b_misses_count[iface] >= 25 ) + if ( pulse_ptr->b2b_misses_count[iface] >= hbs_failure_threshold ) + { + // if ( pulse_ptr->b2b_misses_count[iface] == 25 ) + if ( pulse_ptr->b2b_misses_count[iface] == hbs_failure_threshold ) + { + ilog ("%-13s %s Pulse Miss (%d) (log throttled to every %d)\n", + pulse_ptr->hostname.c_str(), + get_iface_name_str(iface), + pulse_ptr->b2b_misses_count[iface], + 0xfff); + } + /* Once the misses exceed 25 then throttle the logging to avoid flooding */ + if ( (pulse_ptr->b2b_misses_count[iface] & 0xfff) == 0 ) + { + ilog ("%-13s %s Pulse Miss (%d)\n", pulse_ptr->hostname.c_str(), + get_iface_name_str(iface), + pulse_ptr->b2b_misses_count[iface] ); + } + } + else + { + if ( pulse_ptr->b2b_misses_count[iface] > hbs_failure_threshold ) + { + ilog ("%-13s %s Pulse Miss (%3d) (in failure)\n", pulse_ptr->hostname.c_str(), + get_iface_name_str(iface), + pulse_ptr->b2b_misses_count[iface] ); + } + else if ( pulse_ptr->b2b_misses_count[iface] > hbs_degrade_threshold ) + { + ilog ("%-13s %s Pulse Miss (%3d) (max:%3d) (in degrade)\n", pulse_ptr->hostname.c_str(), + get_iface_name_str(iface), + pulse_ptr->b2b_misses_count[iface], + pulse_ptr->max_count[iface]); + } + else if ( pulse_ptr->b2b_misses_count[iface] > hbs_minor_threshold ) + { + ilog ("%-13s %s Pulse Miss (%3d) (max:%3d) (in minor)\n", pulse_ptr->hostname.c_str(), + get_iface_name_str(iface), + pulse_ptr->b2b_misses_count[iface] , + pulse_ptr->max_count[iface]); + } + else + { + ilog ("%-13s %s Pulse Miss (%3d) (max:%3d)\n", pulse_ptr->hostname.c_str(), + get_iface_name_str(iface), + pulse_ptr->b2b_misses_count[iface], + pulse_ptr->max_count[iface]); + } + } + } + else + { + dlog ("%-13s %s Pulse Miss (%d)\n", pulse_ptr->hostname.c_str(), + get_iface_name_str(iface), + pulse_ptr->b2b_misses_count[iface] ); + } + mem_log ( flat, pulse_ptr->b2b_misses_count[iface], pulse_ptr->hostname.c_str()); + + if ( iface == MGMNT_IFACE ) + { + if ( pulse_ptr->b2b_misses_count[iface] == hbs_minor_threshold ) + { + send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_MINOR_SET, iface ); + pulse_ptr->hbs_minor[iface] = true ; + pulse_ptr->hbs_minor_count[iface]++ ; + wlog ("%s %s -> MINOR\n", pulse_ptr->hostname.c_str(), get_iface_name_str(iface)); + } + } + if ( pulse_ptr->b2b_misses_count[iface] == hbs_degrade_threshold ) + { + manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_MAJOR, iface ); + + /* report this host as failed */ + if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_DEGRADE_SET, iface ) == PASS ) + { + pulse_ptr->hbs_degrade[iface] = true ; + } + wlog ("%s %s -> DEGRADED\n", pulse_ptr->hostname.c_str(), get_iface_name_str(iface)); + pulse_ptr->hbs_degrade_count[iface]++ ; + + } + /* Handle lost degrade event case */ + if (( pulse_ptr->b2b_misses_count[iface] > hbs_degrade_threshold ) && + ( pulse_ptr->hbs_degrade[iface] == false )) + { + wlog ("%s -> DEGRADED - Auto-Correction\n", pulse_ptr->hostname.c_str()); + + manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_MAJOR, iface ); + + /* report this host as failed */ + if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_DEGRADE_SET, iface ) == PASS ) + { + pulse_ptr->hbs_degrade[iface] = true ; + } + } + + /* Turn the infra heartbeat loss into a degrade only + * condition if the infra_degrade_only flag is set */ + if (( iface == INFRA_IFACE ) && + ( pulse_ptr->b2b_misses_count[iface] >= hbs_failure_threshold ) && + ( infra_degrade_only == true )) + { + /* Only print the log at the threshold boundary */ + if ( pulse_ptr->b2b_misses_count[iface] == hbs_failure_threshold ) + { + manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface ); + + wlog_throttled ( pulse_ptr->no_work_log_throttle, 500, + "%s %s *** Heartbeat Loss *** (degrade only)\n", pulse_ptr->hostname.c_str(), + get_iface_name_str(iface) ); + } + } + + /* Turn the infra heartbeat loss into a degrade only + * condition for inactive controller on normal system. */ + else if (( iface == INFRA_IFACE ) && + ( pulse_ptr->b2b_misses_count[iface] >= hbs_failure_threshold ) && + ( this->system_type == SYSTEM_TYPE__NORMAL ) && + (( pulse_ptr->nodetype & CONTROLLER_TYPE) == CONTROLLER_TYPE )) + { + /* Only print the log at the threshold boundary */ + if ( pulse_ptr->b2b_misses_count[iface] == hbs_failure_threshold ) + { + manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface ); + + wlog_throttled ( pulse_ptr->no_work_log_throttle, 500, + "%s %s *** Heartbeat Loss *** (degrade only)\n", pulse_ptr->hostname.c_str(), + get_iface_name_str(iface) ); + } + } + + else if (( pulse_ptr->b2b_misses_count[iface] == hbs_failure_threshold ) && + ( pulse_ptr->hbs_failure[iface] == false )) + { + elog ("%s %s -> FAILED\n", pulse_ptr->hostname.c_str(), + get_iface_name_str(iface) ); + elog ("%s %s *** Heartbeat Loss ***\n", pulse_ptr->hostname.c_str(), + get_iface_name_str(iface) ); + + manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface ); + + /* report this host as failed */ + if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_LOSS , iface ) == PASS ) + { + pulse_ptr->hbs_failure[iface] = true ; + } + + pulse_ptr->hbs_failure_count[iface]++ ; + } + if ( pulse_ptr->b2b_misses_count[iface] > pulse_ptr->max_count[iface] ) + pulse_ptr->max_count[iface] = pulse_ptr->b2b_misses_count[iface] ; + } + rc = remPulse_by_name ( pulse_ptr->hostname, iface, false, NULL_PULSE_FLAGS ); + if ( rc != PASS ) + { + elog ("%s %s not in pulse list\n", pulse_ptr->hostname.c_str(), + get_iface_name_str(iface)); + clear_pulse_list ( iface ); + break ; + } + if ( pulse_list[iface].head_ptr == NULL ) + { + // dlog ("Pulse list is Empty\n"); + break ; + } + } + return (rc); +} + +/* Return true if the specified interface is being monitored for this host */ +bool nodeLinkClass::monitored_pulse ( string hostname , iface_enum iface ) +{ + if ( hostname.length() ) + { + struct nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return ( node_ptr->monitor[iface] ) ; + } + } + return(false); +} + +/* Reports pulse list empty status. + * true if empty + * false if not empty + */ +bool nodeLinkClass::pulse_list_empty ( iface_enum iface ) +{ + if ( pulse_list[iface].head_ptr == NULL ) + return true ; + return false ; +} + +void nodeLinkClass::print_pulse_list ( iface_enum iface ) +{ + string pulse_host_list = "- " ; + + if ( pulse_list[iface].head_ptr != NULL ) + { + for ( pulse_ptr = pulse_list[iface].head_ptr ; + pulse_ptr != NULL ; + pulse_ptr = pulse_ptr->pulse_link[iface].next_ptr ) + { + pulse_host_list.append(pulse_ptr->hostname.c_str()); + pulse_host_list.append(" "); + } + dlog ("Patients: %s\n", pulse_host_list.c_str()); + } + if ( pulses[iface] && !pulse_host_list.empty() ) + { + string temp = get_iface_name_str(iface) ; + temp.append(" Patients :") ; + mem_log ( temp, pulses[iface], pulse_host_list ); + } +} + + + +/* Clear all degrade flags except for the HWMON one */ +void clear_host_degrade_causes ( unsigned int & degrade_mask ) +{ + if ( degrade_mask & DEGRADE_MASK_HWMON ) + { + degrade_mask = DEGRADE_MASK_HWMON ; + } + else + { + degrade_mask = 0 ; + } +} + +/***************************************************************************/ +/******************* State Dump Utilities ***********************/ +/***************************************************************************/ + + + +void nodeLinkClass::mem_log_general ( void ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s %s %s %s:%s %s:%s \n", + my_hostname.c_str(), + my_local_ip.c_str(), + my_float_ip.c_str(), + daemon_get_cfg_ptr()->mgmnt_iface, + mgmnt_link_up_and_running ? "Up" : "Down", + daemon_get_cfg_ptr()->infra_iface, + infra_link_up_and_running ? "Up" : "Down"); + mem_log (str); +} + +void nodeLinkClass::mem_log_dor ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s DOR - Active: %c Was: %c Time: %5d (00:%02d:%02d)\n", + node_ptr->hostname.c_str(), + node_ptr->dor_recovery_mode ? 'Y' : 'N', + node_ptr->was_dor_recovery_mode ? 'Y' : 'N', + node_ptr->dor_recovery_time, + node_ptr->dor_recovery_time ? node_ptr->dor_recovery_time/60 : 0, + node_ptr->dor_recovery_time ? node_ptr->dor_recovery_time%60 : 0); + mem_log (str); +} + + + +/* Multi-Node Failure Avoidance Data */ +void nodeLinkClass::mem_log_mnfa ( void ) +{ + char str[MAX_MEM_LOG_DATA] ; + + int temp = mnfa_threshold_number ; + if ( mnfa_threshold_type == MNFA_PERCENT ) + temp = mnfa_threshold_percent ; + + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s MNFA: Mode:%s:%d State:%s Hosts:%d:%d Cases:%d Threshold:%d\n", + my_hostname.c_str(), + mnfa_threshold_type ? "Percent" : "Number", + temp, + mnfa_active ? "ACTIVE" : "inactive", + mnfa_host_count[MGMNT_IFACE], + mnfa_host_count[INFRA_IFACE], + mnfa_calculate_threshold( "" ), + mnfa_occurances); + mem_log (str); +} + +void nodeLinkClass::mem_log_general_mtce_hosts ( void ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s EnableHosts -> Cont:%d Comp:%d Stor:%d StorType:%d\n", + my_hostname.c_str(), + num_controllers_enabled(), + enabled_compute_nodes(), + enabled_storage_nodes(), + get_storage_backend()); + mem_log (str); +} + +void nodeLinkClass::mem_log_bm ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tbm_ip:%s bm_un:%s bm_type:%s provisioned: %s\n", + node_ptr->hostname.c_str(), + node_ptr->bm_ip.c_str(), + node_ptr->bm_un.c_str(), + node_ptr->bm_type.c_str(), + node_ptr->bm_provisioned ? "Yes" : "No" ); + mem_log (str); +} + +void nodeLinkClass::mem_log_identity ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s %s (%u)\n", + node_ptr->hostname.c_str(), + node_ptr->uuid.c_str(), + node_ptr->type.c_str(), + node_ptr->nodetype); + mem_log (str); +} + +void nodeLinkClass::mem_log_state1 ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + string ad = adminState_enum_to_str(node_ptr->adminState) ; + string op = operState_enum_to_str(node_ptr->operState) ; + string av = availStatus_enum_to_str(node_ptr->availStatus); + + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s-%s-%s degrade_mask:%08x\n", + node_ptr->hostname.c_str(), + ad.c_str(), + op.c_str(), + av.c_str(), + node_ptr->degrade_mask); + mem_log (str); + op = operState_enum_to_str(node_ptr->operState_subf) ; + av = availStatus_enum_to_str(node_ptr->availStatus_subf); + if ( node_ptr->subfunction_str.empty() ) + { + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tFunction: %s %s-%s-%s\n", + node_ptr->hostname.c_str(), + node_ptr->function_str.c_str(), + ad.c_str(), + op.c_str(), + av.c_str()); + } + else + { + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tFunctions %s-%s %s-%s-%s\n", + node_ptr->hostname.c_str(), + node_ptr->function_str.c_str(), + node_ptr->subfunction_str.c_str(), + ad.c_str(), + op.c_str(), + av.c_str()); + } + mem_log (str); +} + +void nodeLinkClass::mem_log_state2 ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + string aa = adminAction_enum_to_str(node_ptr->adminAction) ; + + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAction:%s invAction:%s Task:%s\n", + node_ptr->hostname.c_str(), + aa.c_str(), + node_ptr->action.c_str(), + node_ptr->task.c_str()); + mem_log (str); +} + +void nodeLinkClass::mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: on:%c off:%c Cnt:%d State:%s Misses:%d\n", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_online ? 'Y' : 'N', + node_ptr->mtcAlive_offline ? 'Y' : 'N', + node_ptr->mtcAlive_count, + node_ptr->mtcAlive_gate ? "gated" : "rxing", + node_ptr->mtcAlive_misses); + mem_log (str); +} + +void nodeLinkClass::mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAlarm List:%s%s%s%s%s%s\n", + node_ptr->hostname.c_str(), + node_ptr->alarms[MTC_ALARM_ID__LOCK ] ? " Locked" : " .", + node_ptr->alarms[MTC_ALARM_ID__CONFIG ] ? " Config" : " .", + node_ptr->alarms[MTC_ALARM_ID__ENABLE ] ? " Enable" : " .", + node_ptr->alarms[MTC_ALARM_ID__CH_CONT ] ? " Control" : " .", + node_ptr->alarms[MTC_ALARM_ID__CH_COMP ] ? " Compute" : " .", + node_ptr->alarms[MTC_ALARM_ID__BM ] ? " Brd Mgmt" : " ."); + mem_log (str); +} + +void nodeLinkClass::mem_log_stage ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Able:%d\n", + node_ptr->hostname.c_str(), + node_ptr->addStage, + node_ptr->offlineStage, + node_ptr->swactStage, + node_ptr->recoveryStage, + node_ptr->handlerStage.raw); + mem_log (str); +} + +void nodeLinkClass::mem_log_power_info ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tStage:%s Attempts:%d: Holdoff:%d Retry:%d State:%x ctid:%p rtid:%p\n", + node_ptr->hostname.c_str(), + get_powercycleStages_str(node_ptr->powercycleStage).c_str(), + node_ptr->hwmon_powercycle.attempts, + node_ptr->hwmon_powercycle.holdoff, + node_ptr->hwmon_powercycle.retries, + node_ptr->hwmon_powercycle.state, + node_ptr->hwmon_powercycle.control_timer.tid, + node_ptr->hwmon_powercycle.recovery_timer.tid); + mem_log (str); +} + +void nodeLinkClass::mem_log_reset_info ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tStage:%s Attempts:%d: Holdoff:%d Retry:%d State:%x ctid:%p rtid:%p\n", + node_ptr->hostname.c_str(), + get_resetStages_str(node_ptr->resetStage).c_str(), + node_ptr->hwmon_reset.attempts, + node_ptr->hwmon_reset.holdoff, + node_ptr->hwmon_reset.retries, + node_ptr->hwmon_reset.state, + node_ptr->hwmon_reset.control_timer.tid, + node_ptr->hwmon_reset.recovery_timer.tid); + mem_log (str); +} + +void nodeLinkClass::mem_log_network ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s %s infra_ip: %s Uptime: %u\n", + node_ptr->hostname.c_str(), + node_ptr->mac.c_str(), + node_ptr->ip.c_str(), + node_ptr->infra_ip.c_str(), + node_ptr->uptime ); + mem_log (str); +} + +void nodeLinkClass::mem_log_heartbeat ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s Minor:%s Degrade:%s Failed:%s Monitor:%s\n", + node_ptr->hostname.c_str(), + get_iface_name_str (iface), + node_ptr->hbs_minor[iface] ? "true " : "false", + node_ptr->hbs_degrade[iface] ? "true " : "false", + node_ptr->hbs_failure[iface] ? "true " : "false", + node_ptr->monitor[iface] ? "YES" : "no" ); + mem_log (str); + } +} + +void nodeLinkClass::mem_log_hbs_cnts ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s Counts Minor:%d Degrade:%d Failed:%d Max:%d Cur:%d\n", + node_ptr->hostname.c_str(), + get_iface_name_str(iface), + node_ptr->hbs_minor_count[iface], + node_ptr->hbs_degrade_count[iface], + node_ptr->hbs_failure_count[iface], + node_ptr->max_count[iface], + node_ptr->hbs_count[iface]); + mem_log (str); + } +} + +void nodeLinkClass::mem_log_test_info ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tOOS Stage:%s Runs:%d - INSV Stage:%s Runs:%d\n", + node_ptr->hostname.c_str(), + get_oosTestStages_str(node_ptr->oosTestStage).c_str(), + node_ptr->oos_test_count, + get_insvTestStages_str(node_ptr->insvTestStage).c_str(), + node_ptr->insv_test_count); + mem_log (str); +} + +void nodeLinkClass::mem_log_thread_info ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tThread Stage:%d Runs:%d Progress:%d Ctrl Status:%d Thread Status:%d\n", + node_ptr->hostname.c_str(), + node_ptr->ipmitool_thread_ctrl.stage, + node_ptr->ipmitool_thread_ctrl.runcount, + node_ptr->ipmitool_thread_info.progress, + node_ptr->ipmitool_thread_ctrl.status, + node_ptr->ipmitool_thread_info.status); + mem_log (str); +} + + +void nodeLinkClass::mem_log_type_info ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tSystem:%d NodeMask: %x Function: %s (%u)\n", + node_ptr->hostname.c_str(), + this->system_type, + node_ptr->nodetype, + node_ptr->function_str.c_str(), + node_ptr->function); + mem_log (str); + + if ( CPE_SYSTEM ) + { + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tSub-Function: %s (%u) (SubFunc Enabled:%c)\n", + node_ptr->hostname.c_str(), + node_ptr->subfunction_str.c_str(), node_ptr->subfunction, + node_ptr->subf_enabled ? 'Y' : 'n' ); + mem_log (str); + } +} + +void mem_log_delimit_host ( void ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "-------------------------------------------------------------\n"); + mem_log (str); +} + +void nodeLinkClass::memDumpNodeState ( string hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + mem_log ( hostname, ": ", "Not Found\n" ); + return ; + } + else + { + if ( maintenance == true ) + { + mem_log_dor ( node_ptr ); + mem_log_identity ( node_ptr ); + mem_log_type_info ( node_ptr ); + mem_log_network ( node_ptr ); + mem_log_state1 ( node_ptr ); + mem_log_state2 ( node_ptr ); + // mem_log_reset_info ( node_ptr ); + mem_log_power_info ( node_ptr ); + mem_log_alarm1 ( node_ptr ); + mem_log_mtcalive ( node_ptr ); + mem_log_stage ( node_ptr ); + mem_log_bm ( node_ptr ); + mem_log_test_info ( node_ptr ); + mem_log_thread_info( node_ptr ); + workQueue_dump ( node_ptr ); + } + if ( heartbeat == true ) + { + mem_log_heartbeat( node_ptr ); + mem_log_hbs_cnts ( node_ptr ); + } + mem_log_delimit_host (); + } +} + +void nodeLinkClass::memDumpAllState ( void ) +{ + mem_log_delimit_host (); + mem_log_general (); + + if ( nodeLinkClass::maintenance == true ) + { + mem_log_general_mtce_hosts(); + mem_log_mnfa (); + } + + mem_log_delimit_host (); + + /* walk the node list looking for nodes that should be monitored */ + for ( struct node * ptr = head ; ptr != NULL ; ptr = ptr->next ) + { + memDumpNodeState ( ptr->hostname ); + } +} + + +/*************************************************************************** + * * + * Module Test Head * + * * + ***************************************************************************/ + +int nodeLinkClass::testhead ( int test ) +{ + UNUSED(test); + return (PASS) ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeClass.h b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.h new file mode 100755 index 00000000..2b9b72ad --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.h @@ -0,0 +1,2033 @@ +#ifndef __INCLUDE_NODECLASS_H__ +#define __INCLUDE_NODECLASS_H__ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Node Maintenance "Node Manager" + * class, support structs and enums. + */ + +#include +#include +#include +#include +#include +#include + +#define WANT_MTC +#define WANT_HBS + +using namespace std; + +/* Include base class definition header */ +#include "nodeBase.h" +#include "hostUtil.h" /* for ... server_code and others */ +#include "nodeTimers.h" +#include "threadUtil.h" /* for ... thread_info_type thread_ctrl_type*/ +#include "pingUtil.h" /* for ... ping_info_type */ +#include "nodeCmds.h" /* for ... mtcCmd type */ +#include "httpUtil.h" /* for ... libevent stuff */ +#include "ipmiUtil.h" /* for ... mc_info_type */ +#include "mtcHttpUtil.h" /* for ... libevent stuff */ +#include "mtcSmgrApi.h" /* */ +#include "alarmUtil.h" /* for ... SFmAlarmDataT */ +#include "mtcAlarm.h" /* for ... MTC_ALARM_ID__xx and utils */ +#include "mtcThreads.h" /* for ... mtcThread_ipmitool */ + +/**Default back-to-back heartbeat failures for disabled-failed condition */ +#define HBS_FAILURE_THRESHOLD 10 + +/** Default back-to-back heartbeat failures for enabled-degraded condition */ +#define HBS_DEGRADE_THRESHOLD 6 + +/** Default back-to-back heartbeat failures for enabled-degraded condition */ +#define HBS_MINOR_THRESHOLD 4 + +/** If Debug, this number of missed heartbeats in a row creates a info log */ +#define HBS_DBG_LOG_THRESHOLD 1 + +/** Clear (reset) heartbeat counter value */ +#define HBS_CLEAR_COUNT 0 + +#ifdef SIMPLEX +#undef SIMPLEX +#endif +#define SIMPLEX \ + ( daemon_is_file_present ( PLATFORM_SIMPLEX_MODE ) == true ) + +#define THIS_HOST \ + ( node_ptr->hostname == this->my_hostname ) + +#define NOT_THIS_HOST \ + ( node_ptr->hostname != this->my_hostname ) + +#define LARGE_SYSTEM \ + ( this->system_type == SYSTEM_TYPE__NORMAL ) + +#define CPE_SYSTEM \ + ( this->system_type != SYSTEM_TYPE__NORMAL ) + +#define SIMPLEX_CPE_SYSTEM \ + ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX ) + +/** + * @addtogroup nodeLinkClass + * @{ + * + * This class is used to maintain a linked list of nodes that + * represent currently provisioned inventory. Its member + * functions and data members along with the support files + * in maintenance and heartbeat feature directories blend + * to create a Higly Available and Reuseable Maintenance system. + */ + +class nodeLinkClass +{ +private: + + /** A single node entity within the nodeLinkClass that can + * be spliced in or out of a node linked list + */ + struct node { + + /** + * @addtogroup private_Node_variables + * @{ + * + * A set of variables that make up a node including linking members. + */ + + /** The name of the host node */ + std::string uuid ; + + /** The name of the host node */ + std::string hostname ; + + /** The IP address of the host node */ + std::string ip ; + + /** The Mac address of the host node */ + std::string mac ; + + /** The infrastructure network IP address of the host node */ + std::string infra_ip ; + + /** The Mac address of the host's infra interface */ + std::string infra_mac ; + + /** The type of node 'controller' or 'compute' node */ + std::string type ; + + /** Short text phrase indicating the operation the FSM is + * taking on this host */ + std::string task ; + + /** Administrative action from inventory */ + std::string action ; + + /** The Node Type ; compute or control or storage as a mask */ + string functions ; /* comma delimited string of host types */ + unsigned int nodetype ; /* numeric mask of functions */ + + string function_str ; /* single host type string representing + the main function of the host */ + unsigned int function ; /* numeric representing function_str */ + + string subfunction_str ; /* single host type string ie "compute" */ + unsigned int subfunction ; /* numeric representing subfunction_str */ + + /** set to true if the host specific sub function enable handler passes */ + bool subf_enabled ; + + /** set true if the BMC is provisioned */ + bool bm_provisioned ; + + + /** general retry counter */ + int retries ; + + /** number of http rest API retries since last clear */ + int http_retries_cur ; + + /* Command handler retries counter */ + int cmd_retries ; + + /* Retry counter for power actions (on/off)*/ + int power_action_retries ; + + /** Generic toggle switch */ + bool toggle ; + + /** back to back health failure counter */ + int health_threshold_counter ; + + int mtce_flags ; + + /* true if this node is patching */ + bool patching ; + + /* true if this node is patched but not reset */ + bool patched ; + + /** The node's reported uptime */ + unsigned int uptime ; + unsigned int uptime_save ; + + /** Set to true once the host's add FSM is done */ + bool add_completed ; + + int uptime_refresh_counter ; + + /** Counts the number of times this node was unlocked. + * NOTE: This value should be stored in the database. + * so that it is not reset to 0 on every swact. + */ + int node_unlocked_counter ; + + int mtcalive_timeout ; + + /* start host service retry controls */ + int start_services_retries ; + + bool start_services_running_main ; + bool start_services_running_subf ; + + bool start_services_needed ; + bool start_services_needed_subf ; /* for the add handler that defers + start to the inservice test handler. + this provides a means of telling + maintenance that the subfunction + start needs to also be run. */ + + /** Pointer to the previous node in the list */ + struct node *prev; + + /** Pointer to the next node in the list */ + struct node *next; + + /** @} private_Node_variables */ + + + /** @addtogroup private_Maintenance_variables + * @{ + * + * Finite State Machine variables and member functions + * for 'this' host/node + * + * The CGTS Maintenacne syste follows the X.731 maintenance model + * which uses the states below; For full list of states please + * refer to nodeBase.h + * + * A brief summary is (host and node are used inter-changably) + * + * Administrative Action: Actions a user may take on a host at the user + * interface ; i.e. Lock, Unlock, Reset, Reinstall + * + * Administrative State : The state a host enters into when the above + * actions are taken ; i.e. Locked or Unlocked. + * + * Operational State : The operating state of the node based on the + * administrative actions ; Enabled or Disabled. + * + * Availability State : The useability state of a host based on the + * two previous states and events that may occur + * over time ; i.e. available, failed, degraded, + * intest. + */ + mtc_nodeAdminAction_enum adminAction ; /**< Administrative Action */ + list adminAction_todo_list ; /**< Administrative Action */ + + mtc_nodeAdminState_enum adminState ; /**< Administrative State */ + mtc_nodeOperState_enum operState ; /**< Operational State */ + mtc_nodeAvailStatus_enum availStatus ; /**< Availability Status */ + mtc_nodeConfigAction_enum configAction; /**< Configuration Action */ + + mtc_nodeOperState_enum operState_subf ; /**< Subfunction Operational State */ + mtc_nodeAvailStatus_enum availStatus_subf ; /**< Subfunction Availability Status */ + + mtc_nodeOperState_enum operState_dport ; /**< Data Port Operational State */ + mtc_nodeAvailStatus_enum availStatus_dport; /**< Data Port Availability Status */ + + + /** Maintains the current handler stage. + * This is a union of all handler types such as enable, + * disable, degrade etc. See nodeBase.h for list of union members */ + mtc_stages_union handlerStage; + + /* Individual FSM handler stages */ + mtc_offlineStages_enum offlineStage ; + mtc_onlineStages_enum onlineStage ; + mtc_swactStages_enum swactStage ; + mtc_addStages_enum addStage ; + mtc_delStages_enum delStage ; + mtc_recoveryStages_enum recoveryStage ; + mtc_oosTestStages_enum oosTestStage ; + mtc_insvTestStages_enum insvTestStage ; + mtc_configStages_enum configStage ; + mtc_resetProgStages_enum resetProgStage ; + mtc_reinstallStages_enum reinstallStage ; + + /** Board management specific FSM Stages */ + mtc_powerStages_enum powerStage ; + mtc_powercycleStages_enum powercycleStage ; + mtc_subStages_enum subStage ; + mtc_resetStages_enum resetStage ; + mtc_sensorStages_enum sensorStage ; + + + /** This gate is used to block mtcAlive messages from reaching + * the state machine until its ready to receive them. + * + * Issue: The mtcClient on a slave host will continuously send the + * mtcAlive 'I'm here' messages after a reboot and until that message + * is acknowledged. This is done to make the recovery of a host more + * robust in a potentially lossy network. Without this, a single + * dropped mtcAlive message could result in an unlock-enable timeout + * which would lead to a disabled-failed state and re-recovery attempt + * after a recovery timeout (mtcTimers.h:HOST_MTCALIVE_TIMEOUT) + * period. Besides the system administrator seeing a disabled-failed + * condition the customer would realize a longer than nessary outage + * of that host. + * + * Fix: By having the mtcClient repeatedly send the mtcAlive message + * on reset recovery until it is acknowledged by active mtcAgent + * prevents the above issue. However it has a side affect on the + * maintenance FSM for that host. This mtcAlive gate prevents + * the state machine from seeing mtcAlive messages when it does not + * care about them. + */ + bool mtcAlive_gate ; + int mtcAlive_count ; + int mtcAlive_misses ; + int mtcAlive_hits ; + int mtcAlive_purge ; + + bool mtcAlive_mgmnt ; /* set true when mtcAlive is rx'd from mgmnt network */ + bool mtcAlive_infra ; /* set true when mtcAlive is rx'd from infra network */ + + /* Both of these booleans are set true upon receipt of a mtcAlive message. */ + bool mtcAlive_online ; /* this is consumed by online and offline handler */ + bool mtcAlive_offline ; /* this is consumed by reset progression handler */ + + int offline_search_count ; /* count back-2-back mtcAlive request misses */ + + bool offline_log_reported ; /* prevents offline/online log flooding when */ + bool online_log_reported ; /* availStatus switches between these states */ + /* and failed */ + + /** Host's mtc timer struct. Use to time handler stages. + * + * reset -> reset command response + * reboot -> then wait for mtcalive message + * mtcalive -> then wait for go enabled message + */ + struct mtc_timer mtcAlive_timer ; + + /* the fault handling offline handler timer */ + struct mtc_timer offline_timer ; + + /* Host level DOR recovery mode time and bools */ + int dor_recovery_time ; + bool dor_recovery_mode ; + bool was_dor_recovery_mode ; + + /** Integer code representing the host health */ + int health ; + + /** Flag indicating that the unknown health state + * has already been reported */ + bool unknown_health_reported ; + + /* Booleans indicating the main or subfunction has config failure */ + bool config_failed ; + bool config_failed_subf ; + + /* Booleans indicating the main or subfunction has passed the OOS test */ + bool goEnabled ; + bool goEnabled_subf ; + + /* Booleans indicating the main or subfunction has failed the OOS test */ + bool goEnabled_failed ; + bool goEnabled_failed_subf ; + + /* Boolean indicating the main or subfunction has start host services + * failure. */ + bool hostservices_failed ; + bool hostservices_failed_subf ; + + /* Boolean indicating the main or subfunction has inservice failure */ + bool inservice_failed ; + bool inservice_failed_subf ; + + /** node has reached enabled state this number of times */ + bool enabled_count ; + + /** Number of OOS tests run so far */ + int oos_test_count ; + + /** Number of INSV tests run so far */ + int insv_test_count ; + + /** Used to throttle inservice recovery actions */ + int insv_recovery_counter ; + + /** when true requests the task for this host be cleared at first opportunity */ + bool clear_task ; + + /** Host's mtc timer struct. Use to time handler stages. + * + * reset -> reset command response + * reboot -> then wait for mtcalive message + * mtcalive -> then wait for go enabled message + */ + struct mtc_timer mtcTimer ; + struct mtc_timer http_timer ; + struct mtc_timer mtcCmd_timer ; + struct mtc_timer oosTestTimer ; + struct mtc_timer insvTestTimer ; + struct mtc_timer mtcSwact_timer ; + struct mtc_timer mtcConfig_timer ; + struct mtc_timer power_timer ; + struct mtc_timer host_services_timer ; + + mtcCmd host_services_req ; + mtcCmd mtcAlive_req ; + mtcCmd reboot_req ; + mtcCmd general_req ; + + /* String that is used in the command handling logs which represents + * the specific command handling that is in progress */ + string cmdName ; + + /** Indicates presence of a command request */ + unsigned int cmdReq ; + + /** Indicates presence of a command response */ + unsigned int cmdRsp; + + /** Indicates acknowledgement of the initial host + * services command in execution monitoroing mode */ + unsigned int cmdAck; + + /** Command Response Status - Execution Status */ + unsigned int cmdRsp_status ; + + /** Command Response Data - typically an error details string */ + string cmdRsp_status_string ; + + bool reboot_cmd_ack_mgmnt ; + bool reboot_cmd_ack_infra ; + + /** Tracks back to back Fast Fault Recovery counts */ + int graceful_recovery_counter; + + /** Reboot acknowledge */ + mtc_client_enum activeClient ; + + /** @} private_Maintenance_variables */ + + /** + * @addtogroup private_libEvent_structs + * @{ + * + * libEvent structures used to issue libEvent + * HTTP REST API Requests to control this host + * based on each service */ + + libEvent sysinvEvent; /**< Sysinv REST API Handling for host */ + libEvent cfgEvent; /**< Sysinv REST API Handling for config changes */ + libEvent vimEvent ; /**< VIM Event REST API Handling */ + + libEvent httpReq ; /**< Http libEvent Request Handling */ + libEvent thisReq ; /**< Http libEvent Request Handling */ + + list libEvent_work_fifo ; + list::iterator libEvent_work_fifo_ptr; + list libEvent_done_fifo ; + list::iterator libEvent_done_fifo_ptr; + + // bool work_ready ; + int oper_sequence ; + int oper_failures ; + int no_work_log_throttle ; + int log_throttle ; + + /* List of queue'ed mtce commands for this host */ + mtcCmd cmd; + list mtcCmd_work_fifo ; + list::iterator mtcCmd_work_fifo_ptr; + list mtcCmd_done_fifo ; + list::iterator mtcCmd_done_fifo_ptr; + + /** @} private_libEvent_structs and utils */ + + /** + * @addtogroup private_Heartbeat_variables + * @{ + * + * A grouping a of private variables at the node level used to + * control if a node is to be monitored, the monitoring failure + * counts and next / previous pointers used to create the + * monitored node pulse linked list + */ + + /** Set 'true' when node minor threshold has exceeded */ + bool hbs_minor[MAX_IFACES] ; + + /** Set 'true' when node is degraded due to back to back heartbeat pulse + * misses tha exceed the major threshold */ + bool hbs_degrade[MAX_IFACES] ; + + /** Set 'true' when node is failed due to back to back heartbeat pulse + * misses that exceed the critical threshold */ + bool hbs_failure[MAX_IFACES] ; + + /** log throttle controls for heartbeat service */ + int stall_recovery_log_throttle ; + int stall_monitor_log_throttle ; + int lookup_mismatch_log_throttle ; + int unexpected_pulse_log_throttle ; + + /** Pulse Next and Previous Link pointers for creating + * a per-interface pulse link list */ + struct { + + /** previous pulse pointer used to create the pulse linked list for one interface */ + struct node * prev_ptr ; + + /** next pulse pointer used to create the pulse linked list for one interface */ + struct node * next_ptr ; + + } pulse_link [MAX_IFACES] ; + + /** The link index number for this node is while in an interface pulse linked list */ + int linknum [MAX_IFACES] ; + + /** true if this host is to be monitored for this indexed interface */ + bool monitor [MAX_IFACES] ; + + /** Ongoing heartbeat count cleared on HBS_START reset */ + int hbs_count [MAX_IFACES] ; + + /** Immediate running count of consecutive heartbeat misses */ + int b2b_misses_count [MAX_IFACES]; + + /** Maximum heartbeat misses since node was last brought into service */ + int max_count [MAX_IFACES]; + + /** total times minor count was exceeded */ + int hbs_minor_count [MAX_IFACES]; + + /** total times this host degraded due to heartbeat misses */ + int hbs_degrade_count [MAX_IFACES]; + + /** total times this host failed due to heartbeat loss */ + int hbs_failure_count [MAX_IFACES]; + + /** current state of heartbeat failure per interface for mtcAgent */ + bool heartbeat_failed [MAX_IFACES]; + + /** Resource reference identifier, aka resource reference array index */ + int rri ; + + /** @} private_Heartbeat_variables */ + + /** + * @addtogroup private_boad_management_variables + * @{ + * + * Various host specific board management variables. + */ + + /** The IP address of the host's board management controller */ + string bm_ip ; + + /** The password of the host's board management controller */ + string bm_pw ; + + /** A string label that represents the board management + * controller type for this host */ + string bm_type ; + + /** The operator provisioned board management hostname */ + string bm_un ; + + /* Indicates there is a board management test + * for this host in progress */ + bool bm_test_in_progress ; + + /* Indicates there is a board management operation + * in progress on this host */ + bool bm_oper_in_progress ; + + /** + * The BMC is 'accessible' once provisioning data is available + * and bmc is verified pingable. + **/ + bool bm_accessible; + + /** @} private_boad_management_variables */ + + /** + * @addtogroup private_monitoring_services_variables + * @{ + * + * A grouping a of flags, mask and degrade resource lists + * used to manage the degrade state of a host for process + * and resource monitoring services. + */ + + /* Bit mask of degrade reasons */ + unsigned int degrade_mask ; + + /** Process Monitor Daemon Flag Missing count */ + int pmon_missing_count ; + + /** Host degraded due to loss of Process Monitor running flag */ + bool pmon_degraded ; + + /** Process Monitor Ready flag and degrade list */ + bool pmond_ready ; + + /** Hardware Monitor Ready flag and degrade list */ + bool hwmond_ready ; + bool hwmond_monitor ; + + /** Heartbeat Client process ready to heartbeat flag */ + bool hbsClient_ready ; + + /** hwmon reset and powercycle recovery control structure */ + recovery_ctrl_type hwmon_reset ; + recovery_ctrl_type hwmon_powercycle ; + + /** Resource Monitor Daemon Flag Missing count */ + int rmond_missing_count ; + + /** Host degraded due to loss of Resource Monitor running flag */ + bool rmond_degraded ; + + /** Resource Monitor Ready flag and degrade list */ + bool rmond_ready ; + std::list degraded_resources_list ; + + /** process or resource list string iterator */ + std::list::iterator string_iter_ptr ; + + /** @} private_monitoring_services_variables */ + + /* List of alarms and current severity */ + EFmAlarmSeverityT alarms[MAX_ALARMS]; + + /* tracks whether the alarms for this host have been loaded already or not */ + bool alarms_loaded ; + + /** true if this host has recovered before the mnfa timeout period. + * This bool flags the graceful recovery handler that this node + * is recovering from mnfa and should manage graceful recovery + * and uptime accordingly */ + bool mnfa_graceful_recovery ; + + int stress_iteration ; + + /* for bmc ping access monitor */ + ping_info_type bm_ping_info ; + + /* the bmc info struct filled in and log printed by a + * call to ipmiUtil_mc_info_load. */ + mc_info_type mc_info ; + + bool mc_info_query_active ; + bool mc_info_query_done ; + + bool reset_cause_query_active ; + bool reset_cause_query_done ; + + bool power_status_query_active ; + bool power_status_query_done ; + bool power_on = false ; + + /* a timer used in the bm_handler to query + * the mc_info and reset cause */ + struct mtc_timer bm_timer ; + + /* timer used to manage the bmc access alarm */ + struct mtc_timer bmc_access_timer ; + + /***************************************************** + * Maintenance Thread Structs + *****************************************************/ + /* control data the parent uses to manage the thread */ + thread_ctrl_type ipmitool_thread_ctrl ; + + /*info the thread uses to execute and post results */ + thread_info_type ipmitool_thread_info ; + + /* extra thread info for board management control thread */ + thread_extra_info_type thread_extra_info ; + + }; + + struct node * head ; /**< Node Linked List Head pointer */ + struct node * tail ; /**< Node Linked List Tail pointer */ + + /** Allocate memory for a new node. + * + * Preserves the node address in the node_ptr list and increments + * the memory_allocs counter used by the inservice test audit. + * + * @return + * a pointer to the memory of the newly allocated node */ + struct nodeLinkClass::node * newNode ( void ); + + /** Build the Resource Reference Array */ + void build_rra ( void ); + + /** Free the memory used by a node. + * + * The memory to be removed is found in the node_ptr list, cleared and + * the memory_allocs counter is decremented. + * If the memory cannot be found then an error is returned. + * + * @param node_ptr + * is a pointer to the node to be freed + * @return + * a signed integer of PASS or -EINVAL + */ + int delNode ( struct nodeLinkClass::node * node_ptr ); + + /** Start heartbeating a new node. + * + * Node is added to the end of the node linked list. + * + * @param node_info_ptr + * is a pointer containing pertinent info about the physical node + * @return + * a pointer to the newly added node + */ + struct nodeLinkClass::node* addNode ( string hostname ); + struct nodeLinkClass::node* addUuid ( string uuid ); + + /** Stop heartbeating a node. + * + * Node is spliced out of the node linked list. + * + * @param node_info_ptr + * is a pointer containing info required to find the node in the node list + * @return + * an integer of PASS or -EINVAL */ + int remNode ( string hostname ); + + /** Get pointer to "hostname" node. + * + * Node list lookup by pointer from hostname. + * + * @param node_info_ptr + * is a pointer containing info required to find the node in the node list + * @return + * a pointer to the hostname's node + */ + struct nodeLinkClass::node* getNode ( string hostname ); + + /** Get the node pointer based on the service and libevent base pointer. + * + * Node list lookup by pointer service and libevent base pointer. + * + * @param libEvent_enum + * service type + * @param base_ptr + * pointer to the libEvent base + * + * @return + * a pointer to the hostname's node + */ + struct nodeLinkClass::node* getEventBaseNode ( libEvent_enum service, + struct event_base * base_ptr); + + /** Get a reference to the libEvent containing the supplied + * libEvent.base pointer. + * + * @param base_ptr + * pointer to the libEvent base + * + * @return + * reference to valid or null libEvent + */ + libEvent & getEvent ( struct event_base * base_ptr); + + int manage_dnsmasq_bmc_hosts ( struct nodeLinkClass::node * node_ptr ); + + /* run the maintenance fsm against a host */ + int fsm ( struct nodeLinkClass::node * node_ptr ); + + /* specific handlers called within the fsm */ + int enable_handler ( struct nodeLinkClass::node * node_ptr ); + int recovery_handler ( struct nodeLinkClass::node * node_ptr ); + int disable_handler ( struct nodeLinkClass::node * node_ptr ); + int add_handler ( struct nodeLinkClass::node * node_ptr ); + int delete_handler ( struct nodeLinkClass::node * node_ptr ); + int cfg_handler ( struct nodeLinkClass::node * node_ptr ); + int cmd_handler ( struct nodeLinkClass::node * node_ptr ); + int swact_handler ( struct nodeLinkClass::node * node_ptr ); + int reset_handler ( struct nodeLinkClass::node * node_ptr ); + int reboot_handler ( struct nodeLinkClass::node * node_ptr ); + int reinstall_handler ( struct nodeLinkClass::node * node_ptr ); + int power_handler ( struct nodeLinkClass::node * node_ptr ); + int powercycle_handler ( struct nodeLinkClass::node * node_ptr ); + int offline_handler ( struct nodeLinkClass::node * node_ptr ); + int online_handler ( struct nodeLinkClass::node * node_ptr ); + int oos_test_handler ( struct nodeLinkClass::node * node_ptr ); + int insv_test_handler ( struct nodeLinkClass::node * node_ptr ); + int stress_handler ( struct nodeLinkClass::node * node_ptr ); + int bm_handler ( struct nodeLinkClass::node * node_ptr ); + int uptime_handler ( void ); + + int host_services_handler ( struct nodeLinkClass::node * node_ptr ); + + /* Starts the specified 'reset or powercycle' recovery monitor */ + int hwmon_recovery_monitor ( struct nodeLinkClass::node * node_ptr, int hwmon_event ); + + /* server specific power state query handler */ + bool (*is_poweron_handler) (string hostname, string query_response ); + + /* Calculate the overall reset progression timeout */ + int calc_reset_prog_timeout ( struct nodeLinkClass::node * node_ptr, int retries ); + + /* These interfaces will start and stop the offline FSM if not already active */ + void start_offline_handler ( struct nodeLinkClass::node * node_ptr ); + void stop_offline_handler ( struct nodeLinkClass::node * node_ptr ); + + /***************************************************************************** + * + * Name : ipmi_command_send + * + * Description: This utility starts the ipmitool command handling thread + * with the specified command. + * + * Returns : PASS if all the pre-start semantic checks pass and the + * thread was started. + * + * Otherwise the thread was not started and some non zero + * FAIL_xxxx code is returned after a representative design + * log is generated. + * + *****************************************************************************/ + + int ipmi_command_send ( struct nodeLinkClass::node * node_ptr, int command ) ; + + /***************************************************************************** + * + * Name : ipmi_command_recv + * + * Description: This utility will check for ipmitool command thread completion. + * + * Returns : PASS is returned if the thread reports done. + * RETRY is returned if the thread has not completed. + * FAIL_RETRY is returned after 10 back-to-back calls return RETRY. + * + * Assumptions: The caller is expected to call ipmi_command_done once it has + * consumed the results of the thread + * + *****************************************************************************/ + + int ipmi_command_recv ( struct nodeLinkClass::node * node_ptr ); + + /***************************************************************************** + * + * Name : ipmi_command_done + * + * Description: This utility frees the ipmitool command thread for next execution. + * + *****************************************************************************/ + + void ipmi_command_done ( struct nodeLinkClass::node * node_ptr ); + + /* default all the BMC access variaables to the "no access" state */ + void bmc_access_data_init ( struct nodeLinkClass::node * node_ptr ); + + /* Combo Host enable handler */ + int enable_subf_handler ( struct nodeLinkClass::node * node_ptr ); + + /** set all service readies to false so that when the first one comes in' + * it will be logged */ + void clear_service_readies ( struct nodeLinkClass::node * node_ptr ); + + int update_dport_states ( struct nodeLinkClass::node * node_ptr, int event ); + + /* manage deciding to return or issue an immediate reboot if the + * auto recovery threshold is exceeded. */ + void manage_autorecovery ( struct nodeLinkClass::node * node_ptr ); + + /** *********************************************************************** + * + * Name : nodeLinkClass::workQueue_process + * + * Description: This is a Per Host Finite State Machine (FSM) that + * processes the work queue for the supplied host's + * node pointer. + * + * Constructs: + * + * node_ptr->libEvent_work_fifo - the current work queue/fifo + * node_ptr->libEvent_done_fifo - queue/fifo of completed requests + * + * Operations: + * + * requests are added to the libEvent_work_fifo with workQueue_enqueue. + * requests are removed from the libEvent_done_fifo with workQueue_dequeue. + * + * Behavior: + * + * In process libEvents are copied from the callers work queue to + * its thisReq. + * + * Completed events including execution status are copied to the host's + * done fifo. + * + * Failed events may be retried up to max_retries as specified by + * the callers libEvent. + * + * @param event is a reference to the callers libEvent. + * + * @return an integer with values of PASS, FAIL, RETRY + * + * Implementation: in maintenance/mtcWorkQueue.cpp + * + * ************************************************************************/ + int workQueue_process ( struct nodeLinkClass::node * node_ptr ); + + /** *********************************************************************** + * + * Name : nodeLinkClass::workQueue_del_cmd + * + * Description: To handle the pathalogical case where an event seems to + * have timed out at the callers level then this interface + * can be called to delete it from the work queue. + * + * @param node_ptr so that the hosts work queue can be found + * @param sequence to specify the specific sequence number to remove + * @return always PASS since there is nothing the caller can or needs + * to do if the command is not present. + * + * Implementation: in maintenance/mtcWorkQueue.cpp + * + */ + int workQueue_del_cmd ( struct nodeLinkClass::node * node_ptr, int sequence ); + + int doneQueue_purge ( struct nodeLinkClass::node * node_ptr ); + int workQueue_purge ( struct nodeLinkClass::node * node_ptr ); + int workQueue_done ( struct nodeLinkClass::node * node_ptr ); + void workQueue_dump ( struct nodeLinkClass::node * node_ptr ); + void doneQueue_dump ( struct nodeLinkClass::node * node_ptr ); + + int mtcCmd_workQ_purge( struct nodeLinkClass::node * node_ptr ); + int mtcCmd_doneQ_purge( struct nodeLinkClass::node * node_ptr ); + void mtcCmd_workQ_dump ( struct nodeLinkClass::node * node_ptr ); + void mtcCmd_doneQ_dump ( struct nodeLinkClass::node * node_ptr ); + + void force_full_enable ( struct nodeLinkClass::node * node_ptr ); + + int adminActionChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeAdminAction_enum newActionState ); + + /** Host Administrative State Change member function */ + int adminStateChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeAdminState_enum newAdminState ); + + /** Host Operational State Change member function */ + int operStateChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeOperState_enum newOperState ); + + /** Host Availability Status Change member function */ + int availStatusChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeAvailStatus_enum newAvailStatus ); + + + int allStateChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeAdminState_enum adminState, + mtc_nodeOperState_enum operState, + mtc_nodeAvailStatus_enum availStatus ); + + int subfStateChange ( struct nodeLinkClass::node * node_ptr, + mtc_nodeOperState_enum operState_subf, + mtc_nodeAvailStatus_enum availStatus_subf ); + + /** Host Enable Handler Stage Change member function */ + int enableStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_enableStages_enum newHdlrStage ); + + /** Host Disable Handler Stage Change member function */ + int disableStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_disableStages_enum newHdlrStage ); + + /** Host configuration stage Change member function */ + int configStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_configStages_enum newHdlrStage ); + + /** Host Reset Handler Stage Change member function */ + int resetStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_resetStages_enum newHdlrStage ); + + /** Host Reinstall Handler Stage Change member function */ + int reinstallStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_reinstallStages_enum newHdlrStage ); + + /** Host Fast graceful Recovery Handler Stage Change member function */ + int recoveryStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_recoveryStages_enum newHdlrStage ); + + /** Host Power control Handler Stage Change member function */ + int powerStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_powerStages_enum newHdlrStage ); + + /** Host Powercycle control Handler Stage Change member function */ + int powercycleStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_powercycleStages_enum newHdlrStage ); + + /** Out-Of-Service Test Stage Change member function */ + int oosTestStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_oosTestStages_enum newHdlrStage ); + + /** Inservice Test Stage Change member function */ + int insvTestStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_insvTestStages_enum newHdlrStage ); + + /** Host Sensor Handler Stage Change member function */ + int sensorStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_sensorStages_enum newHdlrStage ); + + /** Generic Substage Stage change member function */ + int subStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_subStages_enum newHdlrStage ); + + int failed_state_change ( struct nodeLinkClass::node * node_ptr ); + + /* issue a + * - one way lazy reboot with + * - graceful SM services shutdown and + * - failsafe backup sysreq reset + */ + int lazy_graceful_fs_reboot ( struct nodeLinkClass::node * node_ptr ); + + int alarm_enabled_clear ( struct nodeLinkClass::node * node_ptr, bool force ); + int alarm_enabled_failure ( struct nodeLinkClass::node * node_ptr ); + + int alarm_insv_clear ( struct nodeLinkClass::node * node_ptr, bool force ); + int alarm_insv_failure ( struct nodeLinkClass::node * node_ptr ); + + int alarm_config_clear ( struct nodeLinkClass::node * node_ptr ); + int alarm_config_failure ( struct nodeLinkClass::node * node_ptr ); + + int alarm_compute_clear ( struct nodeLinkClass::node * node_ptr, bool force ); + int alarm_compute_failure ( struct nodeLinkClass::node * node_ptr , EFmAlarmSeverityT sev ); + + void clear_subf_failed_bools ( struct nodeLinkClass::node * node_ptr ); + void clear_main_failed_bools ( struct nodeLinkClass::node * node_ptr ); + void clear_hostservices_ctls ( struct nodeLinkClass::node * node_ptr ); + + /** Find the node that has this timerID in its general mtc timer */ + struct nodeLinkClass::node * get_mtcTimer_timer ( timer_t tid ); + struct nodeLinkClass::node * get_mtcConfig_timer ( timer_t tid ); + struct nodeLinkClass::node * get_mtcAlive_timer ( timer_t tid ); + struct nodeLinkClass::node * get_offline_timer ( timer_t tid ); + struct nodeLinkClass::node * get_mtcSwact_timer ( timer_t tid ); + struct nodeLinkClass::node * get_mtcCmd_timer ( timer_t tid ); + struct nodeLinkClass::node * get_oosTestTimer ( timer_t tid ); + struct nodeLinkClass::node * get_insvTestTimer ( timer_t tid ); + struct nodeLinkClass::node * get_power_timer ( timer_t tid ); + struct nodeLinkClass::node * get_http_timer ( timer_t tid ); + struct nodeLinkClass::node * get_thread_timer ( timer_t tid ); + struct nodeLinkClass::node * get_ping_timer ( timer_t tid ); + struct nodeLinkClass::node * get_bm_timer ( timer_t tid ); + struct nodeLinkClass::node * get_bmc_access_timer ( timer_t tid ); + struct nodeLinkClass::node * get_host_services_timer ( timer_t tid ); + + struct nodeLinkClass::node * get_powercycle_control_timer ( timer_t tid ); + struct nodeLinkClass::node * get_powercycle_recovery_timer ( timer_t tid ); + struct nodeLinkClass::node * get_reset_control_timer ( timer_t tid ); + struct nodeLinkClass::node * get_reset_recovery_timer ( timer_t tid ); + + /* Launch the specified host services command start or stop for any host + * type into the cmd_handler. In support of AIO a subf bool is optional + * and forces the command to be COMPUTE (subfunction). + * - requires cmd_handler fsm */ + int launch_host_services_cmd ( struct nodeLinkClass::node * node_ptr, bool start , bool subf=false ); + + /* Private SYSINV API */ + int mtcInvApi_update_task ( struct nodeLinkClass::node * node_ptr, string task ); + int mtcInvApi_update_task_now ( struct nodeLinkClass::node * node_ptr, string task ); + int mtcInvApi_force_task ( struct nodeLinkClass::node * node_ptr, string task ); + int mtcInvApi_update_task ( struct nodeLinkClass::node * node_ptr, const char * task_str_ptr, int one ); + int mtcInvApi_update_task ( struct nodeLinkClass::node * node_ptr, const char * task_str_ptr, int one, int two ); + + int mtcInvApi_update_value ( struct nodeLinkClass::node * node_ptr, string key, string value ); + int mtcInvApi_update_uptime ( struct nodeLinkClass::node * node_ptr, unsigned int uptime ); + + int mtcInvApi_subf_states ( struct nodeLinkClass::node * node_ptr, string oper_subf, string avail_subf ); + int mtcInvApi_force_states ( struct nodeLinkClass::node * node_ptr, string admin, string oper, string avail ); + int mtcInvApi_update_states ( struct nodeLinkClass::node * node_ptr, string admin, string oper, string avail ); + int mtcInvApi_update_states_now ( struct nodeLinkClass::node * node_ptr, string admin, string oper, string avail, string oper_subf, string avail_subf); + int mtcInvApi_update_state ( struct nodeLinkClass::node * node_ptr, string state, string value ); + + /* Private SM API */ + int mtcSmgrApi_request ( struct nodeLinkClass::node * node_ptr, mtc_cmd_enum operation, int retries ); + + /* Private VIM API */ + int mtcVimApi_state_change ( struct nodeLinkClass::node * node_ptr, libEvent_enum operation, int retries ); + + int set_bm_prov ( struct nodeLinkClass::node * node_ptr, bool state ); + + void set_uptime ( struct nodeLinkClass::node * node_ptr, unsigned int uptime, bool force ); + + // #endif /* WANT_MTC */ + + /** Interface to asser or clear severity specific heartbeat alarms */ + void manage_heartbeat_alarm ( struct nodeLinkClass::node * node_ptr, EFmAlarmSeverityT sev, int iface ); + + /** Returns the heartbeat monitoring state for the specified interface */ + bool get_hbs_monitor_state ( string & hostname, int iface ); + + /** List of allocated node memory. + * + * An array of node pointers. + */ + nodeLinkClass::node * node_ptrs[MAX_NODES] ; + + /** A memory allocation counter. + * + * Should represent the number of nodes in the linked list. + */ + int memory_allocs ; + + /** A memory used counter + * + * A variable storing the accumulated node memory + */ + int memory_used ; + + /** Inservice memory management audit. + * + * Verifies that the node_ptr list and memory_allocs jive as well + * as all the node pointers point to a node in the linked list. + * + * @return + * an integer representing a PASS or TODO: list other error codes. + */ + int memory_audit ( void ); + + + /* Simplex mode auto recovery bools + * + * Set to true when the autorecovery threshold is reached + * and we want to avoid taking further autorecovery action + * even though it may be requested. */ + bool autorecovery_disabled ; + + /* Set to true by fault detection methods that are + * autorecoverable when in simplex mode. */ + bool autorecovery_enabled ; + + /** Tracks the number of hosts that 'are currently' in service trouble + * wrt heartbeat (above minor threshold). + * This is used in multi-host failure avoidance. + **/ + int mnfa_host_count[MAX_IFACES] ; + + /** Tracks the number of times multi failure avoidance was exited */ + int mnfa_occurances ; + + /** true when the multi node failure count exceeds the multi + * node failure avoidance threshold and until there are no more + * in service trouble hosts */ + bool mnfa_active ; + + /** Recover or exit from the muli-node failure avoidance state + * This involves restarting the heartbeat on all the nodes + * that remain hbs_minor and clearing any heartbneat degrade + * states that remain. */ + void mnfa_exit ( bool force ); + void mnfa_enter ( void ); + void mnfa_add_host ( struct nodeLinkClass::node * node_ptr, iface_enum iface ); + void mnfa_recover_host ( struct nodeLinkClass::node * node_ptr ); + void hbs_minor_clear ( struct nodeLinkClass::node * node_ptr, iface_enum iface ); + + /* Dead Office Recovery - system level controls */ + void manage_dor_recovery ( struct nodeLinkClass::node * node_ptr, EFmAlarmSeverityT severity ); + void report_dor_recovery ( struct nodeLinkClass::node * node_ptr, string node_state_log_prefix ); + + struct { + struct node * head_ptr ; /**< Pulse Linked List Head pointer */ + struct node * tail_ptr ; /**< Pulse Linked List Tail pointer */ + struct node * last_ptr ; /**< Pulse Linked List running last pointer */ + } pulse_list [MAX_IFACES] ; + + /** General Pulse Pointer used to build pulse linked list */ + struct node * pulse_ptr ; + + /** Number monitored hosts (nodes) for a specified interface */ + int pulses[MAX_IFACES] ; + + /** Resource reference Array: An array used to store + * resource references for the purpose of fast resource + * lookup making thwe heartbat service more scalable. + * + * In this case it is an array of node link pointers + * that are in the current active pulse list. */ + struct node * hbs_rra[MAX_NODES]; + + /** Pulse list node lookup pointer by hostname. + * + * Get pointer to "hostname" node located in the pulse list. + * + * @param hostname - a string containing the name of the host + * to be searched for in the pulse list. + * @param iface - iface_enum specifying which interface linked + * list to search. + * + * @return pointer to the node's control struct + */ + struct nodeLinkClass::node* getPulseNode ( string & hostname, iface_enum iface ); + + /** Manage the heartbeat pulse flags by node pointer + * + * These flags contain service information sent by the replying host. + * One example of this is the pmond flag which indicates whether the process + * monitor is running on that host. + * + * Flags that are not set are thresholded for degrade or alarm assertion + * or cleared when found to be set again. + * + * @param pulse_ptr - node's control struct pointer + * @param flags - integer containing a bit field set of flags + * + * */ + void manage_pulse_flags ( struct nodeLinkClass::node* pulse_ptr, unsigned int flags ); + + /** Remove a node from the pulse list by name, index or node pointer + * + * Deal with all the removal cases ; head, tail, full splice + * + * @return + * an integer of PASS or -FAULT, -ENXIO + */ + int remPulse_by_name ( string & hostname, iface_enum iface, bool clear_b2b_misses_count, unsigned int flags ); + int remPulse_by_index ( string hostname, int index, iface_enum iface, bool clear_b2b_misses_count, unsigned int flags ); + int remPulse ( struct node * node_ptr, iface_enum iface, bool clear_b2b_misses_count, unsigned int flags ); + + + /** Debug Dump Log Interfaces */ + void mem_log_general ( void ); + void mem_log_general_mtce_hosts ( void ); + void mem_log_mnfa ( void ); + + void mem_log_dor ( struct nodeLinkClass::node * node_ptr ); + void mem_log_identity ( struct nodeLinkClass::node * node_ptr ); + void mem_log_network ( struct nodeLinkClass::node * node_ptr ); + void mem_log_state1 ( struct nodeLinkClass::node * node_ptr ); + void mem_log_state2 ( struct nodeLinkClass::node * node_ptr ); + void mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr ); + void mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr ); + void mem_log_stage ( struct nodeLinkClass::node * node_ptr ); + void mem_log_test_info ( struct nodeLinkClass::node * node_ptr ); + void mem_log_bm ( struct nodeLinkClass::node * node_ptr ); + void mem_log_heartbeat ( struct nodeLinkClass::node * node_ptr ); + void mem_log_hbs_cnts ( struct nodeLinkClass::node * node_ptr ); + void mem_log_type_info ( struct nodeLinkClass::node * node_ptr ); + void mem_log_reset_info( struct nodeLinkClass::node * node_ptr ); + void mem_log_power_info( struct nodeLinkClass::node * node_ptr ); + void mem_log_thread_info ( struct nodeLinkClass::node * node_ptr ); + + void print_node_info ( struct nodeLinkClass::node * node_ptr ); + +// #endif + +/** Public Interfaces that allow hosts to be + * added or removed from maintenance. + */ +public: + + nodeLinkClass(); /**< constructor */ + ~nodeLinkClass(); /**< destructor */ + + system_type_enum system_type ; + + string functions ; /**< comma delimited string list of functions supported */ + bool maintenance ; + bool heartbeat ; + + /* offline_handler tuning controls */ + int offline_threshold ; /* number of back to back mtcAlive misses before offline */ + int offline_period ; /* offline handler mtcAlive request period */ + + /* dor mode data ; state and start time + * - start time is used to compare how long slave hosts take to come up + * after the active controller has entered dor mode */ + bool dor_mode_active ; + unsigned int dor_start_time ; + int dor_mode_active_log_throttle ; + + bool hbs_disabled ; /**< Control heartbeat service state */ + bool hbs_state_change ; /**< Flag service state change */ + int hbs_pulse_period ; /**< The curent pulse period in msec */ + int hbs_pulse_period_save ; /**< preserved copy of hbs_pulse_period */ + + /** a loop counter used to detect when the heartbeat service is silently failing */ + int hbs_silent_fault_detector ; + + /* prevents flooding FM with the silent_fault detected log */ + int hbs_silent_fault_logged ; + + /* tracks the number of pulse requests set on each interface */ + int pulse_requests[MAX_IFACES] ; + + /** The number of heartbeat misses that result in a + * minor notification to maintenance */ + int hbs_minor_threshold ; + /** The number of heartbeat misses that result in a degraded state */ + int hbs_degrade_threshold ; + /** The number of heartbeat misses that result in a failed state */ + int hbs_failure_threshold ; + + /** Running Resource Reference Identifier */ + int rrri ; + + bool active ; + bool is_active ( void ) + { return (active); } + void set_activity_state ( bool state ) + { active = state ; } + + /** Store the hostname of this controller */ + string my_hostname ; /**< */ + string my_local_ip ; /**< Primary IP address */ + string my_float_ip ; /**< Secondary (floating) IP address */ + + /********* New Public Constructs for IPMI Comamnd Handling ***********/ + + /* the main fsm entrypoint to service all hosts */ + void fsm ( void ) ; + + /** This controller's hostname set'er */ + void set_my_hostname ( string hostname ); + + /** This controller's hostname get'er */ + string get_my_hostname ( void ); + + /** This controller's local ip addr set'er */ + void set_my_local_ip ( string & hostname ); + + /** This controller's local ip addr get'er */ + string get_my_local_ip ( void ); + + /** This controller's local ip addr set'er */ + void set_my_float_ip ( string & hostname ); + + /** This controller's local ip addr get'er */ + string get_my_float_ip ( void ); + + /** get ip address for any hostname */ + string get_hostaddr ( string & hostname ); + + /** get mac address for any hostname and specified interface */ + string get_hostIfaceMac ( string & hostname, int iface ); + + /** get infrastructure network ip address for any hostname */ + string get_infra_hostaddr ( string & hostname ); + + /** set a node's ip address */ + int set_hostaddr ( string & hostname, string & ip ); + + /** set a node's infrastructure ip address */ + int set_infra_hostaddr ( string & hostname, string & ip ); + + /** get hostname for any hostname */ + string get_hostname ( string & hostaddr ); + + /******************************/ + /* NODE TYPE Member Functions */ + /******************************/ + + /** Fetch the node type (compute or controller) by hostname */ + int get_nodetype ( string & hostname ); + + /** Check if a node is a controller */ + bool is_controller ( struct nodeLinkClass::node * node_ptr ); + + /** Check if a node is a compute */ + bool is_compute ( struct nodeLinkClass::node * node_ptr ); + bool is_compute_subfunction ( struct nodeLinkClass::node * node_ptr ); + + string get_node_function_str ( string hostname ); + string get_node_subfunction_str ( string hostname ); + + /** Check if a node is a storage */ + bool is_storage ( struct nodeLinkClass::node * node_ptr ); + + /** Check if a node is a controller by hostname */ + bool is_controller ( string & hostname ); + + /** Check if a node is a compute by hostname */ + bool is_compute ( string & hostname ); + bool is_compute_subfunction ( string & hostname ); + + /** Check if a node is a storage by hostname */ + bool is_storage ( string & hostname ); + + /** Sets a hosts's function and subfunction members */ + int update_host_functions ( string hostname , string functions ); + + /***********************************************************/ + + /** Number of provisioned hosts (nodes) */ + int hosts ; + + /** Host has been deleted */ + bool host_deleted ; + + /** Host Administrative State Change public member function */ + int admin_state_change ( string hostname, + string newAdminState ); + + /** Host Operational State Change public member function */ + int oper_state_change ( string hostname, + string newOperState ); + + /** Host Availability Status Change public member function */ + int avail_status_change ( string hostname, + string newAvailStatus ); + + /** Host Subfunction Operational State Change public member function */ + int oper_subf_state_change ( string hostname, + string newOperState ); + + /** Host Subfunction Availability Status Change public member function */ + int avail_subf_status_change ( string hostname, + string newAvailStatus ); + + + + /** Update mtce Key with Value */ + int update_key_value ( string hostname, string key , string value ); + + /** This is the list of inventory by hostname. + * The Maintenance FSM loops over this list + * to provide maintenance service */ + std::list hostname_inventory ; + std::list::iterator host ; + + std::list mnfa_awol_list ; + void mnfa_timeout_handler ( void ); + + /** Return the number of inventoried hosts */ + int num_hosts ( void ); + + /** ********************************************************************** + * + * Name : nodeLinkClass::workQueue_enqueue + * + * Description: Adds the next sequence number to the supplied event + * reference, creates a log prefix based on the event's + * hostname, service, operation and sequence number + * (to avoid repeated recreation) and then copies that + * event to the work queue. + * + * @param event is a reference to the callers libEvent. + * @return an integer with value of PASS. + * + * Implementation: in maintenance/mtcWorkQueue.cpp + * + * *********************************************************************/ + int workQueue_enqueue ( libEvent & event ); + + /** ********************************************************************** + * + * Name : nodeLinkClass::doneQueue_dequeue + * + * Description: Searches the done queue for the event matching the supplied + * event reference , specifically the sequence number. If found + * it pulls the execution status information and then proceeds + * to remove it from the done queue. + * + * If the event is found then the event status is returned. + * If not found then a RETRY is returned. + * If the done event status is RETRY then a FAIL is returned since + * it should not be on the done queue with a retry status. + * + * @param event is a reference to the callers libEvent + * @return an integer with values of PASS, FAIL, RETRY + * + * Implementation: in maintenance/mtcWorkQueue.cpp + * + * ************************************************************************/ + int doneQueue_dequeue ( libEvent & event ); + + bool workQueue_present ( libEvent & event ); + void workQueue_dump_all ( void ); + void doneQueue_dump_all ( void ); + void mtcCmd_workQ_dump_all ( void ); + void mtcCmd_doneQ_dump_all ( void ); + + + /** Add a host to the Node list */ + int add_host ( node_inv_type & inv ); + int mod_host ( node_inv_type & inv ); + int set_host_failed ( node_inv_type & inv ); + + /** Check to see if the node list already contains any of the following + * information and reject the add or modify if it does + * + * uuid + * hostname + * ip address + * mac address + * + **/ + int add_host_precheck ( node_inv_type & inv ); + + int del_host ( string uuid ); + + /** Returns empty string if not provisioned or the name of the host if it is */ + string get_host ( string uuid ); + string get_uuid ( string hostname ); + void set_uuid ( string hostname, string uuid ); + void set_task ( string hostname, string task ); + + /** Updates the hostname and resource reference identifier + * based on the next one in the cycle */ + void get_rris ( string & hostname, int & rri ); + + /** Performs a service affecting symantic check on whether + * the specified uuid can be locked. + * In the case of a compute node it asks Nova. + * In the case of a controller it verifies that there is + * another controller active and inservice. + * + * @params uuid string + * @params reason int + * + * @returns true if locked and false otherwise + * + */ + bool can_uuid_be_locked ( string uuid , int & reason ); + +//#ifdef WANT_HBS + /** Add a host to the Node list */ + int add_heartbeat_host ( const node_inv_type &inv ); +// #endif + + void host_print ( struct nodeLinkClass::node * node_ptr ); + + /** Remove a host from Node list */ + int rem_host ( string & hostname ); + + /* Returns the active client. */ + mtc_client_enum get_activeClient ( string hostname ); + + /* Sets the active client for this particular host. The first use of this + * is or reset/reboot acknowledge to the VIm over an evacuate reset request + * from within the reboot handler. */ + int set_activeClient ( string hostname, mtc_client_enum client ); + + /** Get the number of compute hosts that are operationally 'enabled' */ + int enabled_compute_nodes ( void ); + + /** Get the number of storage hosts that are operationally 'enabled' */ + int enabled_storage_nodes ( void ); + + /** get the number of hosts that are enabled excluding the active controller */ + int enabled_nodes ( void ); + + /** Get the system's storage backend type */ + int get_storage_backend ( void ); + + /** Returns true if the storage pool has a monitor running on + * an unlocked-enabled storage host */ + bool is_storage_mon_enabled ( void ) ; + + /** true if the management link's operational state is up and running */ + bool mgmnt_link_up_and_running ; + bool infra_link_up_and_running ; + + /** A boolean that is used to quickly determine if the infrastructure + * network is provisioned and configured for this daemon to use */ + bool infra_network_provisioned ; + + /** A debug bool hat allows infrastructure heartbeat failures to only + * cause host degrade rather than failure */ + bool infra_degrade_only ; + + int service_netlink_events ( int nl_socket , int ioctl_socket ); + void manage_heartbeat_minor ( string hostname, iface_enum iface, bool clear_event ); + void manage_heartbeat_degrade ( string hostname, iface_enum iface, bool clear_event ); + void manage_heartbeat_failure ( string hostname, iface_enum iface, bool clear_event ); + + /* Clear heartbeat failed flag for all interfaces */ + void manage_heartbeat_clear ( string hostname, iface_enum iface ); + + /** Test and Debug Members and Variables */ + + /** Print node info banner */ + void print_node_info ( void ); + + int testhead ( int test ); + + int testmode ; + +// #ifdef WANT_MTC + + /** Hostname of the Active Controller */ + std::string active_controller_hostname ; + + /** Hostname of the Inactive Controller */ + std::string inactive_controller_hostname ; + + bool inactive_controller_is_patched ( void ); + bool inactive_controller_is_patching ( void ); + + string get_inactive_controller_hostname ( void ); + void set_inactive_controller_hostname ( string hostname ); + + string get_active_controller_hostname ( void ); + void set_active_controller_hostname ( string hostname ); + + /** Returns 'true' if inactive controller main/subfunction is in-service + * + * In-Service if "unlocked-enabled-available or + * unlocked-enabled-degraded + */ + bool is_inactive_controller_main_insv ( void ); + bool is_inactive_controller_subf_insv ( void ); + + /** Returns true if the specified hostname is the active controller */ + bool is_active_controller ( string hostname ); + + /** Returns number of enabled controllers */ + int num_controllers_enabled ( void ); + + /** Run the FSM against the specified host */ + int run_fsm ( string hostname ); + + /** Post a specific enable handler stage */ + int set_enableStage ( string & hostname, mtc_enableStages_enum stage ); + + /** Get a posted enable handler stage */ + mtc_enableStages_enum get_enableStage ( string & hostname ); + + /* Set the reboot stage */ + int set_rebootStage ( string & hostname, mtc_resetProgStages_enum stage ); + + + + /** handle an expired timer. Find the node with this + * timer ID and set its ringer */ + void timer_handler ( int sig, siginfo_t *si, void *uc); + + struct mtc_timer mtcTimer ; + struct mtc_timer mtcTimer_mnfa ; + struct mtc_timer mtcTimer_token ; + struct mtc_timer mtcTimer_uptime ; + + /* System Level DOR recovery timer + * Note: tid != NULL represents DOR Mode Active */ + struct mtc_timer mtcTimer_dor ; + + unsigned int get_cmd_resp ( string & hostname ); + void set_cmd_resp ( string & hostname, mtc_message_type & msg ); + + void set_uptime ( string & hostname, unsigned int uptime, bool force ); + unsigned int get_uptime ( string & hostname ); + + void set_uptime_refresh_ctr ( string & hostname, int value ); + int get_uptime_refresh_ctr ( string & hostname ); + + + /** Returns true when a 'maintenance alive' message for that + * hostnamed node is received */ + void set_mtcAlive ( string & hostname, int iface ); + bool get_mtcAlive_gate ( string & hostname ); + void ctl_mtcAlive_gate ( string & hostname, bool gated ); + + /** Store the latest mtce flags for the specified host + * current flags are defined in nodebase.h + #define MTC_FLAG__I_AM_CONFIGURED (0x00000001) + #define MTC_FLAG__I_AM_NOT_HEALTHY (0x00000002) + #define MTC_FLAG__I_AM_HEALTHY (0x00000004) + #define MTC_FLAG__I_AM_LOCKED (0x00000008) + */ + void set_mtce_flags ( string hostname, int flags ); + + /** Updates the node's health code + * Codes are found in nodeBase.h + * + * - NODE_HEALTH_UNKNOWN (0) + * - NODE_HEALTHY (1) + * - NODE_UNHEALTHY (2) + * + * */ + void set_health ( string & hostname, int health ); + + /** Returns true when a 'go enabled' message for that + * hostnamed node is received */ + void set_goEnabled_failed ( string & hostname ); + void set_goEnabled ( string & hostname ); + bool get_goEnabled ( string & hostname ); + + void set_goEnabled_failed_subf ( string & hostname ); + void set_goEnabled_subf ( string & hostname ); + bool get_goEnabled_subf ( string & hostname ); + + int set_subf_info ( string hostname, + string functions, + string operState_subf, + string availState_subf ); + + /** Board management variable setter and getter utilities + * Only the bm_ip is propped through to the database */ + + int set_bm_ip ( string hostname , string bm_ip ); + int set_bm_type ( string hostname , string bm_type ); + int set_bm_un ( string hostname , string bm_un ); + + bool is_bm_ip_already_used ( string bm_ip ); + + int manage_bmc_provisioning ( struct node * node_ptr ); + + string get_bm_ip ( string hostname ); + string get_bm_un ( string hostname ); + string get_bm_type ( string hostname ); + + string get_hostname_from_bm_ip ( string bm_ip ); + + string get_hwmon_info ( string hostname ); + + int get_server_code ( string hostname ); + + void set_hwmond_monitor_state ( string & hostname, bool state ); + bool get_hwmond_monitor_state ( string & hostname ); + + int manage_shadow_change ( string hostname ); + int inotify_shadow_file_fd ; + int inotify_shadow_file_wd ; + + /** The multi node failure avoidance type */ + #define MNFA_NUMBER 0 + #define MNFA_PERCENT 1 + int mnfa_threshold_type ; + + /** % of hosts that need to simultaneously fail before 'mnfa' kicks in */ + int mnfa_threshold_percent ; + + /** # of hosts that need to simultaneously fail before 'mnfa' kicks in */ + int mnfa_threshold_number ; + + /** the calculated threshold */ + int mnfa_threshold ; + + /** Calculates and returns the mnfa threshold based on enabled hosts */ + int mnfa_calculate_threshold ( string hostname ); + + /***************************************** + ** Process Monitor Event Utilities API ** + *****************************************/ + + /** Interface to declare that a key service on the + * specified host is up, running and ready */ + int declare_service_ready ( string & hostname, unsigned int service ); + + /** Process Monitor 'Clear' Event handler. + * + * The process specified will be removed from the + * 'degraded_processes_list' and 'critical_processes_list' for + * the specified host. + * if there are no other degraded/critical processes or other + * degraded services/reasons against that host then + * this handler will clear the degrade state for the + * specified host all together. */ + int degrade_pmond_clear ( string & hostname ); + + /** Resource Monitor 'Clear' Event handler. + * + * The resource specified will be removed from the + * 'degraded_resources_list' for specified host. + * if there are no other degraded resources or other + * degraded services/reasons against that host then + * this handler will clear the degrade state for the + * specified host all together. */ + int degrade_resource_clear ( string & hostname, string & resource ); + + /** + * If the pmond degrade flag is not set then do so. + * if the host is not degraded then set it to degraded. */ + int degrade_process_raise ( string & hostname, string & process ); + + /** if host is unlocked-enabled generate a process failure log */ + int log_process_failure ( string & hostname, string & process ); + + /** if host is unlocked-enabled generate a process failure alarm */ + int alarm_process_failure ( string & hostname, string & process ); + + /** Resource Monitor Raise Event handler. + * + * The host will enter degrade state due to the specified resource + * not running properly. The resource name is recorded in the + * 'degraded_resources_list' for specified host. + * Clearing degrade against this resource requires that host to + * send a clear event against that resource or for that host to + * fully re-enable */ + int degrade_resource_raise ( string & hostname, string & resource ); + + /** Generate a resource failure log if the host is unlocked */ + int log_resource_failure ( string & hostname, string & resource ); + + /** Hardware Process Monitor Degrade Event handler. + * see implementation for details */ + int node_degrade_control ( string & hostname, int state, string service ); + + /** Hardware Monitor 'Action' Event method + * + * The hardware monitor daemon is calling out a sensor that + * is operating out of spec. The command is the accompanying + * action that hwmond requested as a recovery action to this failure. + * The sensor is the sensor name that triggersed the event. */ + int invoke_hwmon_action ( string & hostname, int action, string & sensor ); + + /** Process Monitor Failed Event handler. + * + * The host will go out of service and be reset and + * automatically re-enabled. */ + int critical_process_failed( string & hostname, string & process, unsigned int nodetype ); + + /** Resource Monitor Failed Event handler. + * + * The host will go out of service and be reset and + * automatically re-enabled. */ + int critical_resource_failed( string & hostname, string & resource ); + + /************************************************************/ + + /** + * Node state set'ers and get'ers + */ + mtc_nodeAdminAction_enum get_adminAction ( string & hostname ); + int set_adminAction ( string & hostname, mtc_nodeAdminAction_enum adminAction ); + mtc_nodeAdminState_enum get_adminState ( string & hostname ); + int set_adminState ( string & hostname, mtc_nodeAdminState_enum adminState ); + mtc_nodeOperState_enum get_operState ( string & hostname ); + int set_operState ( string & hostname, mtc_nodeOperState_enum operState ); + mtc_nodeAvailStatus_enum get_availStatus ( string & hostname ); + int set_availStatus ( string & hostname, mtc_nodeAvailStatus_enum availStatus ); + + /** Convert the supplied string to a valid maintenance Admin State enum */ + mtc_nodeAdminState_enum adminState_str_to_enum ( const char * admin_string_ptr ); + /** Convert the supplied string to a valid maintenance Oper State enum */ + mtc_nodeOperState_enum operState_str_to_enum ( const char * oper_string_ptr ); + /** Convert the supplied string to a valid maintenance Avail Status enum */ + mtc_nodeAvailStatus_enum availStatus_str_to_enum ( const char * avail_string_ptr ); + + /** Convert the supplied enum to the corresponding Admin Action string */ + string adminAction_enum_to_str ( mtc_nodeAdminAction_enum val ); + /** Convert the supplied enum to the corresponding Admin State string */ + string adminState_enum_to_str ( mtc_nodeAdminState_enum val ); + /** Convert the supplied enum to the corresponding Oper State string */ + string operState_enum_to_str ( mtc_nodeOperState_enum val ); + /** Convert the supplied enum to the corresponding Avail Status string */ + string availStatus_enum_to_str ( mtc_nodeAvailStatus_enum val ); + + string get_operState_dport ( string & hostname ); + string get_availStatus_dport ( string & hostname ); + + /******************************************** + ** External Services Control Utilities API * + ********************************************/ + + /** number of times mtce will retry an API before it gives up. + * Configurable option through mtc.ini */ + int api_retries ; + + /* Inventory APIs */ + int mtcInvApi_cfg_show ( string hostname ); + int mtcInvApi_cfg_modify ( string hostname, bool install ); + + int mtcInvApi_load_host ( string & hostname , node_inv_type & info ); + int mtcInvApi_update_task ( string hostname, string task ); + int mtcInvApi_force_task ( string hostname, string task ); + int mtcInvApi_update_state ( string hostname, string state, string value ); + int mtcInvApi_update_states ( string hostname, string admin, string oper, string avail ); + int mtcInvApi_force_states ( string hostname, string admin, string oper, string avail ); + int mtcInvApi_subf_states ( string hostname, string oper_subf, string avail_subf ); + + int mtcInvApi_update_states_now ( string hostname, string admin, string oper, string avail, string oper_subf, string avail_subf ); + int mtcInvApi_update_task_now ( string hostname, string task ); + + int mtcInvApi_update_value ( string hostname, string key, string value ); + int mtcInvApi_update_uptime ( string hostname, unsigned int uptime ); + + void mtcInvApi_add_handler ( struct evhttp_request *req, void *arg ); + void mtcInvApi_qry_handler ( struct evhttp_request *req, void *arg ); + void mtcInvApi_get_handler ( struct evhttp_request *req, void *arg ); + + + string mtcVimApi_state_get ( string hostname, int & http_status_code ); + + int mtcVimApi_system_info ( string & response ); + + void mtcSmgrApi_handler ( struct evhttp_request *req, void *arg ); + + void mtcHttpUtil_handler ( struct evhttp_request *req, void *arg ); + + /* Update the authentication token as a work queue'd command */ + int mtcKeyApi_refresh_token ( string hostname ); + + /* Update the authentication token now ; as a blocking request */ + int mtcKeyApi_get_token ( string hostname ); + + /*********************** Public Heartbeat Interfaces *********************/ + + /** Creates a linked list of nodes to heartbeat for the specified port + * + * Based on unlocked enabled hosts and provisioned ports + * + * @param + * iface_enum specifying the port to create the pulse list for + * @return + * a pointer to the head of the burndown checkin list for the specified port + */ + int create_pulse_list ( iface_enum iface ); + + /** Clear the pulse list */ + void clear_pulse_list ( iface_enum iface ); + + /** Remove a host from an interface's pulse list */ + int remove_pulse ( string & hostname, iface_enum iface, int index, unsigned int flags ); + + /** Manage the heartbeat pulse flags by hostname + * + * These flags contain service information sent by the replying host. + * One example of this is the pmond flag which indicates whether the process + * monitor is running on that host. + * + * Flags that are not set are thresholded for degrade or alarm assertion + * or cleared when found to be set again. + * + * @param hostname - a string containing the name of the host + * that sent the flags. + * @param flags - integer containing a bit field set of flags + * + **/ + void manage_pulse_flags ( string & hostname, unsigned int flags ); + + /** Control the heartbeat monitoring state of a host */ + int mon_host ( const string & hostname, iface_enum iface, bool true_false ); + + /** Return true if the pulse list is empty */ + bool pulse_list_empty ( iface_enum iface ); + + void recalibrate_thresholds ( void ); + + /** Handle heartbeat losses + * + * Any hosts that remain in the pulse list at the end + * of the heartbeat period have not responded with a + * pulse message suggesting a health issue with that host + * This interface manages thresholding and acting on hosts + * that exceed preset thresholds. + * + */ + int lost_pulses ( iface_enum iface ); + + bool monitored_pulse ( string hostname , iface_enum iface ); + + /** Print the pulse list */ + void print_pulse_list ( iface_enum iface ); + + /*********************** Public Heartbeat Pulse Data *********************/ + + /** How many pulses in the list */ + int hbs_expected_pulses[MAX_IFACES]; + + /** How many pulses have come in */ + int hbs_detected_pulses[MAX_IFACES]; + + /** Flag indicating the hbs service is ready to start monitoring hosts */ + bool hbs_ready ; + + /*************************************************************************/ + + + void memDumpAllState ( void ); + void memDumpNodeState ( string hostname ); + +// #endif + + /** Common REST API Structs */ + + /* System Management REST API Control Struct */ + libEvent sysinvEvent ; + + /* System Management REST API Control Struct */ + libEvent smgrEvent ; + + /* Keystone Authentication Token Control Struct */ + libEvent tokenEvent ; + + /** /etc/mtc.ini configurable timeouts */ + + int compute_mtcalive_timeout; + int controller_mtcalive_timeout ; + int goenabled_timeout ; + int swact_timeout ; + int sysinv_timeout ; + int sysinv_noncrit_timeout ; + int loc_recovery_timeout ; /**< Loss Of Communication Recovery Timeout */ + int mnfa_recovery_timeout; /**< Multi-Node-Failure Avoidance Recovery Timeout */ + int work_queue_timeout ; + int node_reinstall_timeout ; + + /** /etc/mtc.ini configurable audit intervals */ + int insv_test_period ; + int oos_test_period ; + int uptime_period ; + int online_period ; + int token_refresh_rate; + + int unknown_host_throttle ; + int invalid_arg_throttle ; +}; + +/** + * @} nodeLinkClass + */ + +/* allocates nodeLinkClass node_ptr */ +#define GET_NODE_PTR(hostname) \ + nodeLinkClass::node * node_ptr = this->getNode ( hostname ) ; \ + if ( node_ptr == NULL ) \ + { \ + elog ("%s hostname unknown\n", hostname.c_str()); \ + return (FAIL_HOSTNAME_LOOKUP); \ + } + +#define CHK_NODE_PTR(node_ptr) \ + if ( node_ptr == NULL ) \ + { \ + slog ("null node_ptr\n"); \ + return (FAIL_NULL_POINTER); \ + } + +nodeLinkClass * inv_init ( void ); +nodeLinkClass * get_mtcInv_ptr ( void ); + int module_init ( void ); + +const char * get_adminAction_str ( mtc_nodeAdminAction_enum action ); +string bmc_get_ip ( string hostname, string mac , string & current_bm_ip ); +void clear_host_degrade_causes ( unsigned int & degrade_mask ); +bool sensor_monitoring_supported ( string hostname ); +void autorecovery_clear ( string hostname ); +void log_mnfa_pool ( std::list & mnfa_awol_list ); + +#endif /* __INCLUDE_NODECLASS_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeCmds.h b/mtce-common/cgts-mtce-common-1.0/common/nodeCmds.h new file mode 100644 index 00000000..47ff9fdb --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeCmds.h @@ -0,0 +1,91 @@ +#ifndef __INCLUDE_MTCCMDHDLR_HH__ +#define __INCLUDE_MTCCMDHDLR_HH__ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Maintenance Command Handler Header + */ + +#include +#include + +using namespace std; + + +#define MTC_OPER__NONE 0 +#define MTC_OPER__MODIFY_HOSTNAME 1 +#define MTC_OPER__RUN_IPMI_COMMAND 2 +#define MTC_OPER__RESET_PROGRESSION 3 +#define MTC_OPER__HOST_SERVICES_CMD 4 + +/* A set of command groupings that create sub-FSMs for + * specialized maintenance command operations */ +typedef enum +{ + /* Common command dispatch stage */ + MTC_CMD_STAGE__START = 0, + + /* Modify Hostname FSM Stages + * + * FSM that runs Nova and Neutron Delete then Create Operations + * in support tof changing a hostname */ + MTC_CMD_STAGE__MODIFY_HOSTNAME_START, + MTC_CMD_STAGE__MODIFY_HOSTNAME_DELETE_WAIT, + MTC_CMD_STAGE__MODIFY_HOSTNAME_CREATE_WAIT, + + /* Reset Progression FSM Stages + * + * FSM that tries all possible avenues to reset/reboot a host */ + MTC_CMD_STAGE__RESET_PROGRESSION_START, + MTC_CMD_STAGE__RESET, + MTC_CMD_STAGE__RESET_ACK, + MTC_CMD_STAGE__REBOOT, + MTC_CMD_STAGE__REBOOT_ACK, + MTC_CMD_STAGE__OFFLINE_CHECK, + MTC_CMD_STAGE__IPMI_COMMAND_SEND, + MTC_CMD_STAGE__IPMI_COMMAND_RECV, + MTC_CMD_STAGE__RESET_PROGRESSION_RETRY, + + /* Manage Running a Host Services Start or Stop Command for host type */ + MTC_CMD_STAGE__HOST_SERVICES_SEND_CMD, + MTC_CMD_STAGE__HOST_SERVICES_RECV_ACK, + MTC_CMD_STAGE__HOST_SERVICES_WAIT_FOR_RESULT, + + /* Common command done stage */ + MTC_CMD_STAGE__DONE, + MTC_CMD_STAGE__STAGES +} mtc_cmdStages_enum ; + +typedef struct +{ + string name ; + mtc_cmdStages_enum stage ; + unsigned int seq ; + + /* command and response info */ + unsigned int cmd ; + unsigned int rsp ; + unsigned int ack ; + + /* variable parms */ + unsigned int parm1 ; + unsigned int parm2 ; + + /* controls */ + bool task ; /* send task updates */ + unsigned int retry ; + + /* execution status */ + int status ; + string status_string ; +} mtcCmd ; + +void mtcCmd_init ( mtcCmd & cmd ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeEvent.cpp b/mtce-common/cgts-mtce-common-1.0/common/nodeEvent.cpp new file mode 100644 index 00000000..e3f2f49b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeEvent.cpp @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform common iNotify utilities + */ + +#include "nodeEvent.h" +#include "nodeBase.h" + +// The following are legal, implemented events that user-space can watch for +// ------------------------------------------------------------------------- +// #define IN_ACCESS 0x00000001 /* File was accessed */ +// #define IN_MODIFY 0x00000002 /* File was modified */ +// #define IN_ATTRIB 0x00000004 /* Metadata changed */ +// #define IN_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ +// #define IN_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +// #define IN_OPEN 0x00000020 /* File was opened */ +// #define IN_MOVED_FROM 0x00000040 /* File was moved from X */ +// #define IN_MOVED_TO 0x00000080 /* File was moved to Y */ +// #define IN_CREATE 0x00000100 /* Subfile was created */ +// #define IN_DELETE 0x00000200 /* Subfile was deleted */ +// #define IN_DELETE_SELF 0x00000400 /* Self was deleted */ + +// The following are legal events. they are sent as needed to any watch +// #define IN_UNMOUNT 0x00002000 /* Backing fs was unmounted */ +// #define IN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +// #define IN_IGNORED 0x00008000 /* File was ignored */ + +// Helper events // +// #define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE) /* close */ +// #define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) /* moves */ + +// Special flags +// #define IN_ISDIR 0x40000000 /* event occurred against dir */ +// #define IN_ONESHOT 0x80000000 /* only send event once */ + + /* Display information from inotify_event structure */ + const char * get_inotify_event_str (int mask ) + { + if (mask & IN_ACCESS) return("IN_ACCESS "); + if (mask & IN_ATTRIB) return("IN_ATTRIB "); + if (mask & IN_CLOSE_NOWRITE) return("IN_CLOSE_NOWRITE "); + if (mask & IN_CLOSE_WRITE) return("IN_CLOSE_WRITE "); + if (mask & IN_CREATE) return("IN_CREATE "); + if (mask & IN_DELETE) return("IN_DELETE "); + if (mask & IN_DELETE_SELF) return("IN_DELETE_SELF "); + if (mask & IN_IGNORED) return("IN_IGNORED "); + if (mask & IN_ISDIR) return("IN_ISDIR "); + if (mask & IN_MODIFY) return("IN_MODIFY "); + if (mask & IN_MOVE_SELF) return("IN_MOVE_SELF "); + if (mask & IN_MOVED_FROM) return("IN_MOVED_FROM "); + if (mask & IN_MOVED_TO) return("IN_MOVED_TO "); + if (mask & IN_OPEN) return("IN_OPEN "); + if (mask & IN_Q_OVERFLOW) return("IN_Q_OVERFLOW "); + if (mask & IN_UNMOUNT) return("IN_UNMOUNT "); + return ("None"); + } + +int set_inotify_watch_file ( const char * file , int & fd , int & wd ) +{ + int rc = PASS ; + + /* Close if already set */ + set_inotify_close ( fd , wd ); + + fd = inotify_init(); + if ( fd < 0 ) + { + elog ("iNotify init error (%d:%m)\n", errno ); + rc = FAIL; + } + else + { + wd = inotify_add_watch ( fd, file, IN_MODIFY | IN_CREATE | IN_DELETE ); + if ( wd < 0 ) + { + elog ("failed adding watch on %s (%d:%m)\n", file, errno ); + rc = FAIL ; + } + else + { + ilog ("watching %s\n", file ); + } + } + return (rc); +} + + +int set_inotify_watch ( const char * dir , int & fd , int & wd ) +{ + int rc = PASS ; + fd = inotify_init(); + if ( fd < 0 ) + { + elog ("inotify init error (%d:%m)\n", errno); + rc = FAIL; + } + else + { + ilog ("watching %s\n", dir ); + wd = inotify_add_watch ( fd, dir , IN_MODIFY | + IN_CREATE | + IN_DELETE | + IN_MOVE ); + if ( wd < 0 ) + { + elog ("failed adding watch on %s (%d:%m)\n", dir, errno ); + rc = FAIL ; + } + } + return (rc); +} + +int set_inotify_watch_events ( const char * dir , int & fd , int & wd, int events ) +{ + int rc = PASS ; + fd = inotify_init(); + if ( fd < 0 ) + { + elog ("inotify init error (%d:%m)\n", errno ); + rc = FAIL; + } + else + { + ilog ("watching %s\n", dir ); + wd = inotify_add_watch ( fd, dir , events ); + if ( wd < 0 ) + { + elog ("failed adding watch on %s (%d:%m)\n", dir, errno); + rc = FAIL ; + } + } + return (rc); +} + +bool valid_file ( char * name ) +{ + string temp = name ; + std::size_t found ; + + found = temp.find(".swx",0) ; + if ( found != std::string::npos ) + { + dlog1 ("%s file is not valid\n", temp.c_str() ); + return (false); + } + + found = temp.find(".swp",0) ; + if ( found != std::string::npos ) + { + dlog1 ("%s file is not valid\n", temp.c_str() ); + return (false); + } + + dlog ("%s file is valid\n", temp.c_str() ); + return (true); +} + +int get_inotify_events ( int fd, int event_mask ) +{ + int l = 0 ; + int i = 0 ; + char buf [EVENT_BUF_LEN] ; + int status = 0 ; + memset (buf, 0 , EVENT_BUF_LEN ); + if ( ( l = read (fd, buf, EVENT_BUF_LEN ) > 0 )) + { + /* Read returns the list of change events. + * Deal with all the change events and then reload. + */ + while ( i < l ) + { + struct inotify_event *event_ptr = (struct inotify_event *)&buf[i]; + dlog ("iNotify Event Mask:%8x Requested Mask:%8x\n", event_ptr->mask, event_mask ); + if ( event_ptr->mask & event_mask ) + { + status |= (event_ptr->mask & event_mask) ; + } + if ( event_ptr->mask & IN_IGNORED ) + { + wlog ("Watch file is now being ignored (%x) !!!\n", status ); + status |= IN_IGNORED ; + } + + i += EVENT_SIZE + event_ptr->len; + } + } + return(status); +} + + +bool get_inotify_events ( int fd ) +{ + int l = 0 ; + int i = 0 ; + char buf [EVENT_BUF_LEN] ; + bool status = false ; + memset (buf, 0 , EVENT_BUF_LEN ); + if ( ( l = read (fd, buf, EVENT_BUF_LEN ) > 0 )) + { + /* Read returns the list of change events. + * Deal with all the change events and then reload. + */ + while ( i < l ) + { + struct inotify_event *event_ptr = (struct inotify_event *)&buf[i]; + dlog ("iNotify Event Mask:%08x\n", event_ptr->mask); + if ( event_ptr->len ) + { + if (( event_ptr->mask & IN_CREATE ) || + ( event_ptr->mask & IN_MOVED_TO )) + { + if ( valid_file ( event_ptr->name ) == true ) + { + if ( !(event_ptr->mask & IN_ISDIR) ) + { + dlog( "New file %s created or moved into\n", event_ptr->name ); + status = true ; + } + } + } + else if (( event_ptr->mask & IN_DELETE ) || + ( event_ptr->mask & IN_MOVED_FROM )) + { + if ( valid_file ( event_ptr->name ) == true ) + { + if ( !(event_ptr->mask & IN_ISDIR) ) + { + dlog( "%s deleted or removed.\n", event_ptr->name ); + status = true ; + } + } + } + else if ( event_ptr->mask & IN_MODIFY ) + { + if ( valid_file ( event_ptr->name ) == true ) + { + if ( !(event_ptr->mask & IN_ISDIR) ) + { + dlog( "%s modified.\n", event_ptr->name ); + status = true ; + } + } + } + else + { + dlog ("Unhandled iNotify Event Mask:%08x\n", event_ptr->mask); + } + } + i += EVENT_SIZE + event_ptr->len; + } + } + return(status); +} + +static string null_str = "" ; + +/* Returns the number of events found */ +int get_inotify_events ( int fd, inotify_event_queue_type & event_queue ) +{ + int l = 0 ; + int i = 0 ; + char buf [EVENT_BUF_LEN] ; + memset (buf, 0 , EVENT_BUF_LEN ); + + event_queue.num = 0 ; /* default to no events */ + + if ( ( l = read (fd, buf, EVENT_BUF_LEN ) > 0 )) + { + /* Read returns the list of change events. + * Deal with all the change events and then reload. + */ + while ( i < l ) + { + struct inotify_event *event_ptr = (struct inotify_event *)&buf[i]; + dlog ("iNotify Event Mask:%08x\n", event_ptr->mask); + if ( event_ptr->len ) + { + if (( event_ptr->mask & IN_CREATE ) || + ( event_ptr->mask & IN_MOVED_TO )) + { + if ( valid_file ( event_ptr->name ) == true ) + { + if ( !(event_ptr->mask & IN_ISDIR) ) + { + dlog( "%s created\n", event_ptr->name ); + event_queue.item[event_queue.num].event = IN_CREATE ; + snprintf ( &event_queue.item[event_queue.num].name[0], EVENT_BUF_LEN, "%s", event_ptr->name ); + event_queue.num++ ; + } + } + } + else if (( event_ptr->mask & IN_DELETE ) || + ( event_ptr->mask & IN_MOVED_FROM )) + { + if ( valid_file ( event_ptr->name ) == true ) + { + if ( !(event_ptr->mask & IN_ISDIR) ) + { + dlog( "%s deleted\n", event_ptr->name ); + event_queue.item[event_queue.num].event = IN_DELETE ; + snprintf ( &event_queue.item[event_queue.num].name[0], EVENT_BUF_LEN, "%s", event_ptr->name ); + event_queue.num++ ; + } + } + } + else if ( event_ptr->mask & IN_MODIFY ) + { + if ( valid_file ( event_ptr->name ) == true ) + { + if ( !(event_ptr->mask & IN_ISDIR) ) + { + dlog( "%s modified\n", event_ptr->name ); + event_queue.item[event_queue.num].event = IN_MODIFY ; + snprintf ( &event_queue.item[event_queue.num].name[0], EVENT_BUF_LEN, "%s", event_ptr->name ); + event_queue.num++ ; + } + } + } + else + { + dlog ("Unhandled iNotify Event Mask:%08x\n", event_ptr->mask); + } + } + i += EVENT_SIZE + event_ptr->len; + } + } + return (event_queue.num) ; +} + +void set_inotify_close ( int & fd, int & wd ) +{ + /* cleanup */ + if ( fd ) + { + inotify_rm_watch( fd, wd ); + close (fd); + fd = 0 ; + wd = 0 ; + } +} + diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeEvent.h b/mtce-common/cgts-mtce-common-1.0/common/nodeEvent.h new file mode 100644 index 00000000..b6c7a599 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeEvent.h @@ -0,0 +1,48 @@ +/* +* Copyright (c) 2013-2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* +*/ + +#include +#include +#include +#include +#include +#include +#include /* */ +#include /* for inotify service */ +#include /* for the list of conf file names */ +#include + +using namespace std; + +#define EVENT_SIZE ( sizeof (struct inotify_event) ) +#define EVENT_BUF_LEN ( PATH_MAX * ( EVENT_SIZE + 16 ) ) + +#define MAX_EVENTS (50) + +typedef struct +{ + int event ; + char name [EVENT_BUF_LEN] ; +} inotify_event_type ; + + +typedef struct +{ + int num ; + inotify_event_type item[MAX_EVENTS] ; +} inotify_event_queue_type ; + +int set_inotify_watch ( const char * dir, int & fd, int & wd ); +int set_inotify_watch ( const char * dir, int & fd, int & wd , int events ); +int set_inotify_watch_file ( const char * file, int & fd, int & wd ); + +bool get_inotify_events( int fd ); +int get_inotify_events( int fd, int event_mask ); +int get_inotify_events( int fd, inotify_event_queue_type & event_queue ); +void set_inotify_close ( int & fd, int & wd ); + +const char * get_inotify_event_str (int i); diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeMacro.h b/mtce-common/cgts-mtce-common-1.0/common/nodeMacro.h new file mode 100644 index 00000000..46d3ea37 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeMacro.h @@ -0,0 +1,318 @@ +/* +* Copyright (c) 2013-2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* +*/ + + +#define CREATE_REUSABLE_INET_UDP_TX_SOCKET(ip, port, s, a, p, l, n, rc) \ +{ \ + int on = 1 ; \ + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); \ + if ( 0 >= s ) \ + { \ + elog ("failed to create '%s' socket (%d:%s)\n", n, errno, strerror(errno)); \ + rc = FAIL_SOCKET_CREATE ; \ + } \ + else if ( setsockopt ( s , SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) \ + { \ + elog ("failed to make '%s' socket re-useable (%d:%s)\n", n, errno, strerror(errno)); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_OPTION ; \ + } \ + else \ + { \ + memset(&a, 0, sizeof(struct sockaddr_in)); \ + l = sizeof(a); \ + p = port ; \ + a.sin_family = AF_INET ; \ + a.sin_addr.s_addr = inet_addr(ip); \ + a.sin_port = htons(p) ; \ + ilog ("Transmitting: '%s' socket %s:%d\n", n, inet_ntoa(a.sin_addr), p); \ + } \ +} + +#define CREATE_NONBLOCK_INET_UDP_RX_SOCKET(ip, port, s, a, p, l, n, rc) \ +{ \ + int on = 1 ; \ + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); \ + if ( 0 >= s ) \ + { \ + elog ("failed to create '%s' socket (%d:%s)\n", n, errno, strerror(errno)); \ + rc = FAIL_SOCKET_CREATE ; \ + } \ + else if ( setsockopt ( s , SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) \ + { \ + elog ("failed to make '%s' socket re-useable (%d:%s)\n", n, errno, strerror(errno)); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_OPTION ; \ + } \ + else if ( 0 > ioctl(s, FIONBIO, (char *)&on)) \ + { \ + elog ("failed to set '%s' socket non-blocking (%d:%s)\n", n, errno, strerror(errno)); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_NOBLOCK ; \ + } \ + else \ + { \ + memset(&a, 0, sizeof(a)); \ + l = sizeof(a); \ + p = port ; \ + a.sin_family = AF_INET ; \ + a.sin_addr.s_addr = inet_addr(ip); \ + a.sin_port = htons(p) ; \ + if ( bind ( s, (const struct sockaddr *)&a, sizeof(struct sockaddr_in)) == -1 ) \ + { \ + elog ( "failed to bind '%s' socket to port %d (%d:%s)\n", n, p, errno, strerror(errno) ); \ + close (s); \ + s = 0 ; \ + rc = FAIL_SOCKET_BIND ; \ + } \ + rc = PASS ; \ + ilog ("Listening On: '%s' socket %s:%d\n", n, inet_ntoa(a.sin_addr), p); \ + } \ +} + +/* Non-Blocking Receive From ANY IP on 'port' */ +#define CREATE_NONBLOCK_INET_UDP_CMD_RX_SOCKET(port, s, a, n, rc) \ +{ \ + int on = 1 ; \ + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); \ + if ( 0 >= s ) \ + { \ + elog ("failed to create '%s' socket (%d:%m)\n", n, errno); \ + rc = FAIL_SOCKET_CREATE ; \ + } \ + else if ( setsockopt ( s , SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) \ + { \ + elog ("failed to make '%s' socket re-useable (%d:%m)\n", n, errno); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_OPTION ; \ + } \ + else if ( 0 > ioctl(s, FIONBIO, (char *)&on)) \ + { \ + elog ("failed to set '%s' socket non-blocking (%d:%m)\n", n, errno); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_NOBLOCK ; \ + } \ + else \ + { \ + memset(&a, 0, sizeof(a)); \ + a.sin_family = AF_INET ; \ + a.sin_addr.s_addr = htonl(INADDR_ANY); \ + a.sin_port = htons(port) ; \ + if ( bind ( s, (const struct sockaddr *)&a, sizeof(a)) == -1 ) \ + { \ + elog ( "failed to bind '%s' socket to port %d (%d:%m)\n", n, port, errno); \ + close (s); \ + s = 0 ; \ + rc = FAIL_SOCKET_BIND ; \ + } \ + ilog ("Listening for '%s' messages on %s:%d\n", n, inet_ntoa(a.sin_addr), port); \ + } \ +} + +/* Transmit UDP messages to a specified for on specified interface */ +#define CREATE_NTWK_UDP_TX_SOCKET(iface, ip, s, a, p, n, rc) \ +{ \ + struct ifreq ifr ; \ + int on = 1 ; \ + rc = PASS ; \ + memset(&ifr, 0, sizeof(ifr)); \ + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", iface ); \ + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); \ + if ( 0 >= s ) \ + { \ + elog ("failed to create '%s' socket (%d:%m)\n",n,errno); \ + rc = FAIL_SOCKET_CREATE ; \ + } \ + else if ( setsockopt ( s , SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) \ + { \ + elog ("failed to make '%s' socket re-useable (%d:%m)\n", n, errno ); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_OPTION ; \ + } \ + else \ + { \ + ioctl(s, SIOCGIFADDR, &ifr); \ + dlog("Interface : %s %s\n", iface, \ + inet_ntoa(((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr)); \ + \ + rc=setsockopt(s,SOL_SOCKET,SO_BINDTODEVICE,(void*)&ifr,sizeof(ifr)) ; \ + if (rc < 0) \ + { \ + elog ("setsockopt failed for SO_BINDTODEVICE (%d:%m)\n", errno); \ + wlog ("Check permission level, must be root\n"); \ + close(s); \ + rc = FAIL_SOCKET_OPTION ; \ + } \ + else \ + { \ + memset(&a, 0, sizeof(struct sockaddr_in)); \ + a.sin_family = AF_INET ; \ + a.sin_addr.s_addr = inet_addr(ip); \ + a.sin_port = htons(p) ; \ + ilog ("Transmitting: '%s' messages to %s:%s:%d\n",n,iface,ip,p);\ + } \ + } \ +} + +/* Non-Blocking Receive From specified IP and port */ +#define CREATE_NTWK_UDP_RX_SOCKET(ip, hn, p, s, a, l, n, rc) \ +{ \ + int on = 1 ; \ + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); \ + if ( 0 >= s ) \ + { \ + elog ("failed to create '%s' socket (%d:%m)\n",n,errno); \ + rc = FAIL_SOCKET_CREATE ; \ + } \ + else if ( setsockopt ( s , SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) \ + { \ + elog ("failed to make '%s' socket re-useable (%d:%m)\n", n, errno); \ + close(s); s = 0 ; \ + rc = FAIL_SOCKET_OPTION ; \ + } \ + else if ( 0 > ioctl(s, FIONBIO, (char *)&on)) \ + { \ + elog ("failed to set '%s' socket non-blocking (%d:%m)\n", n, errno); \ + close(s); s = 0 ; \ + rc = FAIL_SOCKET_NOBLOCK ; \ + } \ + else \ + { \ + rc = inet_pton(AF_INET, ip.c_str(), &(a)); \ + if ( rc != 1 ) \ + { \ + elog("%s Failed to convert the '%s' to network address (rc:%d)\n", hn, ip.c_str(), rc); \ + close (s); s = 0 ; \ + rc = FAIL_HOSTADDR_LOOKUP; \ + } \ + else \ + { \ + rc = PASS ; \ + memset(&a, 0, sizeof(a)); \ + a.sin_family = AF_INET ; \ + a.sin_addr.s_addr = inet_addr(ip.data()) ; \ + a.sin_port = htons(p) ; \ + l = sizeof(a); \ + if ( bind ( s, (const struct sockaddr *)&a, sizeof(a)) == -1 ) \ + { \ + elog ( "'%s' socket bind failed (%d) (%d:%m)\n",n,p,errno); \ + close (s); \ + s = 0 ; \ + rc = FAIL_SOCKET_BIND ; \ + } \ + ilog ("Listening for '%s' messages on %s:%d\n", n, inet_ntoa(a.sin_addr), p); \ + } \ + } \ +} + +/************************************************** + * + * Name: CREATE_INET_UDP_TX_SOCKET + * + * Create reusable INET Transmit socket over + * the 'lo' interface to the specified 'port' + * + * Parameters: + * + * s - socket (int) + * a - address (struct sockaddr_in) + * p - port (int) + * l - length (socklen_t) + * n - name (char *) + * rc - status (int) + * + **************************************************/ +#define CREATE_INET_UDP_TX_SOCKET(s,a,p,l,n,rc) \ +{ \ + int on = 1 ; \ + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); \ + if ( 0 >= s ) \ + { \ + elog ("failed to create '%s' socket (%d:%m)\n", n, errno ); \ + rc = FAIL_SOCKET_CREATE ; \ + } \ + else if ( setsockopt ( s , SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) \ + { \ + elog ("failed to make '%s' socket re-useable (%d:%m)\n", n, errno ); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_OPTION ; \ + } \ + else \ + { \ + memset(&a, 0, sizeof(struct sockaddr_in)); \ + l = sizeof(a); \ + a.sin_family = AF_INET ; \ + a.sin_addr.s_addr = inet_addr(LOOPBACK_IP);\ + a.sin_port = htons(p) ; \ + ilog ("Transmitting: '%s' messages to %s:%d\n", n, inet_ntoa(a.sin_addr), p); \ + } \ +} + +/************************************************** + * + * Name: CREATE_INET_UDP_RX_SOCKET + * + * Create Non-Blocking, reusable INET Receive + * socket over the 'lo' interface on + * specified 'port' + * + * Parameters: + * + * s - socket (int) + * a - address (struct sockaddr_in) + * p - port (int) + * l - length (socklen_t) + * n - name (char *) + * rc - status (int) + * + **************************************************/ +#define CREATE_INET_UDP_RX_SOCKET(s,a,p,l,n,rc) \ +{ \ + int on = 1 ; \ + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); \ + if ( 0 >= s ) \ + { \ + elog ("failed to create '%s' socket (%d:%m)\n", n, errno); \ + rc = FAIL_SOCKET_CREATE ; \ + } \ + else if ( setsockopt ( s , SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) \ + { \ + elog ("failed to make '%s' socket re-useable (%d:%m)\n", n, errno); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_OPTION ; \ + } \ + else if ( 0 > ioctl(s, FIONBIO, (char *)&on)) \ + { \ + elog ("failed to set '%s' socket non-blocking (%d:%m)\n", n, errno); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_NOBLOCK ; \ + } \ + else \ + { \ + memset(&a, 0, sizeof(a)); \ + a.sin_family = AF_INET ; \ + a.sin_addr.s_addr = inet_addr(LOOPBACK_IP); \ + a.sin_port = htons(p) ; \ + if ( bind ( s, (const struct sockaddr *)&a, sizeof(a)) == -1 ) \ + { \ + elog ( "failed to bind '%s' socket to port %d (%d:%m)\n", n, p, errno); \ + close (s); \ + s = 0 ; \ + rc = FAIL_SOCKET_BIND ; \ + } \ + ilog ("Listening for '%s' messages on %s:%d\n", n, inet_ntoa(a.sin_addr), p); \ + } \ +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeTimers.cpp b/mtce-common/cgts-mtce-common-1.0/common/nodeTimers.cpp new file mode 100755 index 00000000..fe2bb1b4 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeTimers.cpp @@ -0,0 +1,604 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Node Maintenance "Timer Facility" + * Implementation + */ + +/** + * @detail + * Detailed description ... + * + * Common timer struct + * + */ + +#include "daemon_common.h" +#include "nodeBase.h" +#include "nodeTimers.h" + +static int timer_count = 0 ; + +int _timer_start ( struct mtc_timer * mtcTimer_ptr, + void (*handler)(int, siginfo_t*, void*), + int secs, int msec ) + +{ + int rc = PASS ; + + if ( mtcTimer_ptr == NULL) + { + return (FAIL_NULL_POINTER); + } + + mtcTimer_ptr->mutex = true ; + mtcTimer_ptr->ring = true ; /* default to rung for failure path cases */ + + /* Avoid programming mistake that leads to over-writing + * a seemingly active timer; if .tid is not null then + * cancel that timr first */ + if (( mtcTimer_ptr->tid ) && ( timer_count > 0 )) + { + wlog ("%s (%s) called with active timer ; stopping first \n", + mtcTimer_ptr->hostname.c_str(), + mtcTimer_ptr->service.c_str()); + mtcTimer_stop ( mtcTimer_ptr ); + } + + if (( handler == NULL ) || + (( secs == 0 ) && ( msec == 0 )) || + ( secs > MAX_TIMER_DURATION )) + { + elog ("%s (%s) Invalid Duration (%d:%d)\n", + mtcTimer_ptr->hostname.c_str(), + mtcTimer_ptr->service.c_str(), + secs, msec ); + + rc = FAIL_BAD_PARM ; + goto _timer_start_out ; + } + + /* Clean the timer struct */ + memset ( &mtcTimer_ptr->sev, 0, sizeof(struct sigevent)); + memset ( &mtcTimer_ptr->value, 0, sizeof(struct itimerspec)); + memset ( &mtcTimer_ptr->sa, 0, sizeof(struct sigaction)); + + /* Setup the timer */ + mtcTimer_ptr->sa.sa_flags = SA_SIGINFO; + mtcTimer_ptr->sa.sa_sigaction = handler; + sigemptyset(&mtcTimer_ptr->sa.sa_mask); + if (sigaction(SIGRTMIN, &mtcTimer_ptr->sa, NULL) == -1) + { + elog ("%s (%s) Timer 'set action' (sigaction) failed\n", + mtcTimer_ptr->hostname.c_str(), + mtcTimer_ptr->service.c_str()); + + rc = FAIL_TIMER_SET_ACTION ; + goto _timer_start_out ; + + } + /* set and enable alarm */ + mtcTimer_ptr->sev.sigev_notify = SIGEV_SIGNAL; + mtcTimer_ptr->sev.sigev_signo = SIGRTMIN; + mtcTimer_ptr->sev.sigev_value.sival_ptr = &mtcTimer_ptr->tid; + + /* TODO: move up or set block till time is set ? */ + mtcTimer_ptr->value.it_value.tv_sec = secs; + mtcTimer_ptr->value.it_value.tv_nsec = (msec*1000000) ; + mtcTimer_ptr->value.it_interval.tv_sec = secs ; + mtcTimer_ptr->value.it_interval.tv_nsec = (msec*1000000) ; + + if ( timer_create (CLOCK_REALTIME, &mtcTimer_ptr->sev, &mtcTimer_ptr->tid) == -1 ) + { + elog ("%s (%s) Timer 'create' (timer_create) failed (-1)\n", + mtcTimer_ptr->hostname.c_str(), + mtcTimer_ptr->service.c_str() ); + + rc = FAIL_TIMER_CREATE ; + goto _timer_start_out ; + } + + /* make a backup copy just for fun */ + mtcTimer_ptr->secs = secs ; + mtcTimer_ptr->msec = msec ; + timer_count++ ; + + /* Set the ring to false DEFORE we start the timer */ + mtcTimer_ptr->_guard = 0x12345678 ; + mtcTimer_ptr->guard_ = 0x77654321 ; + mtcTimer_ptr->ring = false ; + + if ( timer_settime (mtcTimer_ptr->tid, 0, &mtcTimer_ptr->value, NULL) == -1 ) + { + elog ("%s (%s) Timer 'set time' (timer_settime) failed (-1)\n", + mtcTimer_ptr->hostname.c_str(), + mtcTimer_ptr->service.c_str() ); + + timer_count-- ; + rc = FAIL_TIMER_SET ; + goto _timer_start_out ; + } + mtcTimer_ptr->active = true ; + + /* moved here so that the tid will be valid in the log for debug purposes */ + tlog ("%s (%s) Tid:%p with %d.%03d second timeout (count:%d)\n", + mtcTimer_ptr->hostname.c_str(), + mtcTimer_ptr->service.c_str(), + mtcTimer_ptr->tid, + mtcTimer_ptr->secs, + mtcTimer_ptr->msec, + timer_count ); + +_timer_start_out: + + mtcTimer_ptr->mutex = false ; + + return rc ; +} + +int mtcTimer_start ( struct mtc_timer & mtcTimer, + void (*handler)(int, siginfo_t*, void*), + int secs ) +{ + return ( _timer_start ( &mtcTimer, handler, secs, 0 )); +} + +int mtcTimer_start_msec ( struct mtc_timer & mtcTimer, + void (*handler)(int, siginfo_t*, void*), + int msec ) +{ + return ( _timer_start ( &mtcTimer, handler, 0, msec )); +} + +int mtcTimer_start ( struct mtc_timer * mtcTimer_ptr, + void (*handler)(int, siginfo_t*, void*), + int secs ) +{ + return ( _timer_start ( mtcTimer_ptr, handler, secs, 0 )); +} + +int mtcTimer_start_sec_msec ( struct mtc_timer * mtcTimer_ptr, + void (*handler)(int, siginfo_t*, void*), + int secs , int msec ) +{ + return ( _timer_start ( mtcTimer_ptr, handler, secs, msec )); +} + +int mtcTimer_start_msec ( struct mtc_timer * mtcTimer_ptr, + void (*handler)(int, siginfo_t*, void*), + int msec ) +{ + return ( _timer_start ( mtcTimer_ptr, handler, 0, msec )); +} + +/*************************************************************************/ + +int _timer_stop ( struct mtc_timer * mtcTimer_ptr , bool int_safe) +{ + int rc = PASS ; + + if ( mtcTimer_ptr == NULL) + { + return (FAIL_NULL_POINTER); + } + + if ( timer_count == 0 ) + { + if ( int_safe == false ) + { + elog ("%s (%s) called with no outstanding timers\n", + mtcTimer_ptr->hostname.c_str(), + mtcTimer_ptr->service.c_str()); + } + goto _timer_stop_out ; + } + else if ( mtcTimer_ptr->tid ) + { + mtcTimer_ptr->value.it_value.tv_sec = 0; + mtcTimer_ptr->value.it_value.tv_nsec = 0; + mtcTimer_ptr->value.it_interval.tv_sec = 0; + mtcTimer_ptr->value.it_interval.tv_nsec = 0; + if ( timer_settime (mtcTimer_ptr->tid, 0, &mtcTimer_ptr->value, NULL) == -1 ) + { + if ( int_safe == false ) + { + elog ("%s (%s) timer_settime failed (tid:%p)\n", + mtcTimer_ptr->hostname.c_str(), + mtcTimer_ptr->service.c_str(), + mtcTimer_ptr->tid ); + } + rc = FAIL_TIMER_STOP ; + goto _timer_stop_out ; + } + + if ( int_safe == false ) + { + tlog ("%s (%s) Tid:%p with %d.%d second timeout (count:%d)\n", + mtcTimer_ptr->hostname.c_str(), + mtcTimer_ptr->service.c_str(), + mtcTimer_ptr->tid, + mtcTimer_ptr->secs, + mtcTimer_ptr->msec, + timer_count ); + } + timer_delete (mtcTimer_ptr->tid); + mtcTimer_ptr->tid = NULL ; + if ( timer_count ) + timer_count-- ; + } + else if ( int_safe == false ) + { + elog ("%s (%s) called with null TID (count:%d)\n", + mtcTimer_ptr->hostname.c_str(), + mtcTimer_ptr->service.c_str(), + timer_count); + } + +#ifdef WANT_OUTSTANDING_TIMER_COUNT + if ( int_safe == false ) + { + tlog ("%s (%s) Outstanding timers: %d\n", + mtcTimer_ptr->hostname.c_str(), + mtcTimer_ptr->service.c_str(), + timer_count ); + } +#endif + +_timer_stop_out: + mtcTimer_ptr->active = false ; + return rc ; +} + + +/* Interrupt level safe timer stop utility by pointer */ +int mtcTimer_stop_int_safe ( struct mtc_timer * mtcTimer_ptr ) +{ + return ( _timer_stop ( mtcTimer_ptr, true )) ; +} + +/* Interrupt level safe timer stop utility by reference */ +int mtcTimer_stop_int_safe ( struct mtc_timer & mtcTimer ) +{ + return ( _timer_stop ( &mtcTimer, true ) ); +} + +/* timer stop utility by pointer */ +int mtcTimer_stop ( struct mtc_timer * mtcTimer_ptr ) +{ + return ( _timer_stop ( mtcTimer_ptr, false ) ); +} + +/* stop utility by reference */ +int mtcTimer_stop ( struct mtc_timer & mtcTimer ) +{ + return ( _timer_stop ( &mtcTimer , false )); +} + + + +bool mtcTimer_expired ( struct mtc_timer & mtcTimer ) +{ + if (( mtcTimer.ring == true ) || + ( mtcTimer.active == false ) || + ( mtcTimer.tid == NULL )) + { + return (true); + } + return (false); +} + + +bool mtcTimer_expired ( struct mtc_timer * mtcTimer_ptr ) +{ + if ( mtcTimer_ptr ) + { + if (( mtcTimer_ptr->ring == true ) || + ( mtcTimer_ptr->active == false ) || + ( mtcTimer_ptr->tid == NULL )) + { + return (true); + } + } + return (false); +} + + +void mtcTimer_reset ( struct mtc_timer & mtcTimer ) +{ + if ( mtcTimer.tid ) + _timer_stop ( &mtcTimer , false ); + + if ( mtcTimer.active ) + mtcTimer.active = false ; + + mtcTimer.ring = false ; +} + +void mtcTimer_reset ( struct mtc_timer * mtcTimer_ptr ) +{ + if ( mtcTimer_ptr ) + { + if ( mtcTimer_ptr->tid ) + _timer_stop ( mtcTimer_ptr , false ); + + if ( mtcTimer_ptr->active ) + mtcTimer_ptr->active = false ; + + mtcTimer_ptr->ring = false ; + } +} + +/************************************************************************* + * + * Issue: These static vars record unknown/stale timer data. + * The time of the ring, the TID, and the number of outstanding + * timers at that time. They are defaulted to zero and should + * remain that way. The mtcTimer_dump_data utility can be + * called periodically by a process audit, will create a Swerr + * log with the recorded data and then clear the these vars only + * to allow the next occurance to be recorded and loged on the + * next audit interval. + * + **************************************************************************/ +static timer_t * stale_tid_ptr = NULL ; +static string stale_tid_time = "" ; +static int stale_tid_count = 0 ; + +/* Dump the mtcTimer data - currently only dumps stale data */ +void mtcTimer_dump_data ( void ) +{ + if ( stale_tid_ptr ) + { + slog ("Unknown timer fired at '%s' with tid '%p' ; module has %d loaded timers\n", + stale_tid_time.c_str(), + stale_tid_ptr, + stale_tid_count ); + + stale_tid_ptr = NULL ; + stale_tid_time.clear() ; + stale_tid_count = 0 ; + } +} + +int _timer_stop_tid ( timer_t * tid_ptr , bool int_safe ) +{ + int rc = PASS ; + +#ifdef UNUSED + UNUSED (int_safe); +#endif + + /********************************************************************* + * + * Issue reported a segfault that was a result of trying to cancel + * a timer based on a stale/unknown TID. Its better to record the error + * and leave the timer alone than to try and cancel and get a segfault. + * + * This update records the fact that this condition has happened only + * to be logged by the host process with a call to mtcTimer_dump_data + * and debugged after-the-fact. + * + **********************************************************************/ + if ( stale_tid_ptr == NULL ) + { + stale_tid_ptr = tid_ptr ; + stale_tid_time = pt(); + stale_tid_count = timer_count ; + } + +/* This defined out due to potential for segfault */ +#ifdef WANT_TIMER_STOP_BY_ID + + if ( tid_ptr ) + { + struct mtc_timer ghostTimer ; + ghostTimer.value.it_value.tv_sec = 0; + ghostTimer.value.it_value.tv_nsec = 0; + ghostTimer.value.it_interval.tv_sec = 0; + ghostTimer.value.it_interval.tv_nsec = 0; + if ( timer_settime (*tid_ptr, 0, &ghostTimer.value, NULL) == -1 ) + { + if ( int_safe == false ) + { + elog ("ghostTimer stop (timer_settime) failed\n"); + } + rc = FAIL_TIMER_STOP ; + goto _timer_stop_tid_out ; + } + + timer_delete (*tid_ptr); + if ( timer_count ) + timer_count-- ; + } + else if ( int_safe == false ) + { + elog ("called with NULL TID (%d)\n", timer_count); + } + + if ( int_safe == false ) + { + tlog ("Remaining outstanding timers: %d\n", timer_count ); + } + +_timer_stop_tid_out: + +#endif + + return rc ; +} + +int mtcTimer_stop_tid ( timer_t * tid_ptr ) +{ + return (_timer_stop_tid ( tid_ptr , false )); +} + +int mtcTimer_stop_tid_int_safe ( timer_t * tid_ptr ) +{ + return (_timer_stop_tid ( tid_ptr , true )); +} + +/*************************************************************************/ + +void _timer_init ( struct mtc_timer * mtcTimer_ptr , string hostname, string service ) +{ + if ( mtcTimer_ptr == NULL) + { + return ; + } + + if ( hostname.empty() ) + mtcTimer_ptr->hostname = "unset_hostname" ; + else + mtcTimer_ptr->hostname = hostname ; + + if ( service.empty() ) + mtcTimer_ptr->service = "unset_service" ; + else + mtcTimer_ptr->service = service ; + + if (( mtcTimer_ptr->init == TIMER_INIT_SIGNATURE ) && ( mtcTimer_ptr->tid != NULL )) + { + slog ("%s '%s' service is unexpectedly re-initializated ; stopping active timer\n", + hostname.c_str(), + mtcTimer_ptr->service.c_str()); + mtcTimer_reset ( mtcTimer_ptr ); + } + + tlog ("%s '%s' service initialized (%p)\n", + hostname.c_str(), + mtcTimer_ptr->service.c_str(), + mtcTimer_ptr->tid) ; + + mtcTimer_ptr->init = TIMER_INIT_SIGNATURE ; + mtcTimer_ptr->tid = NULL ; + mtcTimer_ptr->secs = 0 ; + mtcTimer_ptr->msec = 0 ; + mtcTimer_ptr->ring = false ; + mtcTimer_ptr->active= false ; + mtcTimer_ptr->error = false ; + mtcTimer_ptr->mutex = false ; + +} + +/* Init / clean a user timer */ +void _timer_fini ( struct mtc_timer * mtcTimer_ptr ) +{ + mtcTimer_reset ( mtcTimer_ptr ); + mtcTimer_ptr->init = 0 ; +} + +/* de-init a user timer */ +void mtcTimer_fini ( struct mtc_timer & mtcTimer ) +{ + _timer_fini (&mtcTimer ); +} + +/* de-init a user timer */ +void mtcTimer_fini ( struct mtc_timer* mtcTimer_ptr ) +{ + _timer_fini ( mtcTimer_ptr ); +} + + +/* Init / clean a user timer */ +void mtcTimer_init ( struct mtc_timer * mtcTimer_ptr ) +{ + _timer_init ( mtcTimer_ptr, "" , "" ); +} + +/* Init / clean a user timer */ +void mtcTimer_init ( struct mtc_timer * mtcTimer_ptr , string hostname, string service) +{ + _timer_init ( mtcTimer_ptr, hostname , service ); +} + +/* Init / clean a user timer */ +void mtcTimer_init ( struct mtc_timer & mtcTimer ) +{ + _timer_init (&mtcTimer, "", "" ); +} + +/* Init / clean a user timer */ +void mtcTimer_init ( struct mtc_timer & mtcTimer , string hostname ) +{ + _timer_init (&mtcTimer, hostname, "" ); +} + +/* Init / clean a user timer */ +void mtcTimer_init ( struct mtc_timer & mtcTimer , string hostname, string service ) +{ + _timer_init (&mtcTimer, hostname, service ); +} + +/* Wait Utilities - only use during init */ + +static struct mtc_timer waitTimer ; + +static void waitTimer_handler ( int sig, siginfo_t *si, void *uc) +{ + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + + /* Avoid compiler errors/warnings for parms we must + * have but currently do nothing with */ + UNUSED(sig); + UNUSED(uc); + if ( !(*tid_ptr) ) + { + return ; + } + /* is base mtc timer */ + else if (( *tid_ptr == waitTimer.tid ) ) + { + waitTimer.ring = true ; + mtcTimer_stop ( waitTimer ); + } + else + { + wlog ("Unexpected timer (%p)\n", *tid_ptr ); + mtcTimer_stop_tid ( tid_ptr ); + } +} + +void mtcWait_msecs ( int millisecs ) +{ + if ( waitTimer.init != TIMER_INIT_SIGNATURE ) + mtcTimer_init ( waitTimer , "localhost", "mtcWait_msecs" ); + + if ( millisecs > 999 ) + { + wlog ("Wait request too long, rounding down to 999\n"); + millisecs = 999 ; + } + mtcTimer_start_msec ( waitTimer, waitTimer_handler, millisecs ); + do + { + usleep (1000); + daemon_signal_hdlr (); + } while ( waitTimer.ring == false ) ; +} + +void mtcWait_secs ( int secs ) +{ + if ( waitTimer.init != TIMER_INIT_SIGNATURE ) + mtcTimer_init ( waitTimer , "localhost", "mtcWait_secs" ); + + mtcTimer_start ( waitTimer, waitTimer_handler, secs ); + do + { + sleep (1); + daemon_signal_hdlr (); + } while ( waitTimer.ring == false ) ; +} + +void mtcTimer_mem_log ( void ) +{ + char str [64] ; + sprintf ( str, "Working Timers: %d\n", timer_count ); + mem_log ( str ); +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeTimers.h b/mtce-common/cgts-mtce-common-1.0/common/nodeTimers.h new file mode 100755 index 00000000..154ffc7d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeTimers.h @@ -0,0 +1,179 @@ +#ifndef __INCLUDE_NODETIMERS_HH__ +#define __INCLUDE_NODETIMERS_HH__ + +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Node Maintenance "Timer Facility" + * Header and Maintenance API + */ + +/** + * @detail + * Detailed description ... + * + * Common timer struct + * + */ + +#include +#include +#include +#include +#include + +#define MAX_TIMER_DURATION (30000) + +#define MTC_SECS_1 (1) +#define MTC_SECS_2 (2) +#define MTC_SECS_5 (5) +#define MTC_SECS_10 ( 10) +#define MTC_SECS_15 ( 15) +#define MTC_SECS_20 ( 20) +#define MTC_SECS_30 ( 30) + +#define MTC_MINS_1 ( 60) +#define MTC_MINS_2 (120) +#define MTC_MINS_3 (180) +#define MTC_MINS_4 (240) +#define MTC_MINS_5 (300) +#define MTC_MINS_10 (600) +#define MTC_MINS_15 (900) +#define MTC_MINS_20 (1200) +#define MTC_MINS_30 (1800) +#define MTC_MINS_40 (2400) +#define MTC_HRS_4 (14400) +#define MTC_HRS_8 (28800) /* old token refresh rate */ + +#define HOST_MTCALIVE_TIMEOUT (MTC_MINS_20) +#define HOST_GOENABLED_TIMEOUT (MTC_MINS_2) +#define MTC_CMD_RSP_TIMEOUT (10) +#define MTC_FORCE_LOCK_RESET_WAIT (30) +#define MTC_RECOVERY_TIMEOUT (16) +#define MTC_PMOND_READY_TIMEOUT (10) +#define MTC_UPTIME_REFRESH_TIMER (MTC_MINS_1) /* If this interval changes review impact + to garbage collecton in mtctimer_handler */ +#define MTC_MNFA_RECOVERY_TIMER (3) +#define MTC_ALIVE_TIMER (5) +#define MTC_POWEROFF_DELAY (5) +#define MTC_SWACT_POLL_TIMER (10) +#define MTC_TASK_UPDATE_DELAY (10) +#define MTC_BM_PING_TIMEOUT (30) +#define MTC_BM_POWEROFF_TIMEOUT (30) +#define MTC_BM_POWERON_TIMEOUT (30) +#define MTC_RESET_PROG_TIMEOUT (20) +#define MTC_WORKQUEUE_TIMEOUT (60) +#define MTC_COMPUTE_CONFIG_TIMEOUT (900) +#define MTC_EXIT_DOR_MODE_TIMEOUT (60*15) +#define MTC_RESET_PROG_OFFLINE_TIMEOUT (20) +#define MTC_RESET_TO_OFFLINE_TIMEOUT (150) +#define MTC_POWEROFF_TO_OFFLINE_TIMEOUT (200) +#define MTC_POWERON_TO_ONLINE_TIMEOUT (900) +#define MTC_POWERCYCLE_COOLDOWN_DELAY (MTC_MINS_5) +#define MTC_POWERCYCLE_BACK2BACK_DELAY (MTC_MINS_5) +#define MTC_HEARTBEAT_SOAK_BEFORE_ENABLE (11) +#define MTC_REINSTALL_TIMEOUT_DEFAULT (MTC_MINS_40) +#define MTC_REINSTALL_TIMEOUT_MIN (MTC_MINS_1) +#define MTC_REINSTALL_TIMEOUT_MAX (MTC_HRS_4) +#define MTC_REINSTALL_WAIT_TIMER (10) +#define MTC_IPMITOOL_REQUEST_DELAY (10) /* consider making this shorter */ +#define LAZY_REBOOT_RETRY_DELAY_SECS (60) +#define SM_NOTIFY_UNHEALTHY_DELAY_SECS (5) +#define MTC_MIN_ONLINE_PERIOD_SECS (7) +#define MTC_RETRY_WAIT (5) +#define MTC_AGENT_TIMEOUT_EXTENSION (5) + +/** Host must stay enabled for this long for the + * failed_recovery_counter to get cleared */ +#define MTC_ENABLED_TIMER (5) + +/** Should be same or lower but not less than half of ALIVE_TIMER */ +#define MTC_OFFLINE_TIMER (7) + +#define TIMER_INIT_SIGNATURE (0x86752413) + +struct mtc_timer +{ + /** linux timer structs */ + struct sigevent sev ; /**< set by util - time event specifier */ + struct itimerspec value ; /**< set by util - time values */ + struct sigaction sa ; /**< set by util and create parm handler */ + + /** local service members */ + unsigned int init ; /** timer initialized signatur */ + timer_t tid ; /**< the timer address pointer */ + bool active ; /**< indicates that the timer is active */ + bool mutex ; + bool error ; + int _guard ; + bool ring ; /**< set to true if the timer fires */ + int guard_ ; + int secs ; /**< set by create parm - sub second not supported */ + int msec ; /**< set by create parm - sub second not supported */ + string hostname ; /**< name of the host using the timer */ + string service ; /**< name of the service using the timer */ +} ; + +void mtcTimer_mem_log ( void ); + +void mtcTimer_init ( struct mtc_timer & mtcTimer ); +void mtcTimer_init ( struct mtc_timer & mtcTimer, string hostname ); +void mtcTimer_init ( struct mtc_timer & mtcTimer, string hostname, string service ); +void mtcTimer_init ( struct mtc_timer * mtcTimer_ptr ); +void mtcTimer_init ( struct mtc_timer * mtcTimer_ptr, string hostname, string service ); + +int mtcTimer_start ( struct mtc_timer & mtcTimer, + void (*handler)(int, siginfo_t*, void*), + int seconds ); + +int mtcTimer_start ( struct mtc_timer * mtcTimer_ptr, + void (*handler)(int, siginfo_t*, void*), + int seconds ); + +int mtcTimer_start_msec ( struct mtc_timer & mtcTimer, + void (*handler)(int, siginfo_t*, void*), + int msec ); + +int mtcTimer_start_msec ( struct mtc_timer * mtcTimer_ptr, + void (*handler)(int, siginfo_t*, void*), + int msec ); + +int mtcTimer_start_sec_msec ( struct mtc_timer * mtcTimer_ptr, + void (*handler)(int, siginfo_t*, void*), + int secs , int msec ); + +int mtcTimer_stop ( struct mtc_timer & mtc_timer ); +int mtcTimer_stop ( struct mtc_timer * mtcTimer_ptr ); +int mtcTimer_stop_int_safe ( struct mtc_timer & mtcTimer ); +int mtcTimer_stop_int_safe ( struct mtc_timer * mtcTimer_ptr ); + +void mtcTimer_dump_data ( void ); + +/** Cleanup interface - stop and delete an unknown timer */ +int mtcTimer_stop_tid ( timer_t * tid_ptr ); +int mtcTimer_stop_tid_int_safe ( timer_t * tid_ptr ); + +/* returns true if the timer is not active or ring is true */ +bool mtcTimer_expired ( struct mtc_timer & mtcTimer ); +bool mtcTimer_expired ( struct mtc_timer * mtcTimer_ptr ); + +/* stops timer if tid is active running */ +void mtcTimer_reset ( struct mtc_timer & mtcTimer ); +void mtcTimer_reset ( struct mtc_timer * mtcTimer_ptr ); + +/* de-init a user timer */ +void mtcTimer_fini ( struct mtc_timer & mtcTimer ); +void mtcTimer_fini ( struct mtc_timer * mtcTimer_ptr ); + +void mtcWait_msecs ( int millisecs ); +void mtcWait_secs ( int secs ); + +int mtcTimer_testhead ( void ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/nodeUtil.cpp new file mode 100755 index 00000000..945fa978 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeUtil.cpp @@ -0,0 +1,1977 @@ +/* +* Copyright (c) 2013-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* +*/ + +#include +#include +#include +#include +#include +#include +#include /* for dir reading */ +#include +#include +#include +#include +#include /* for hostent */ +#include +#include +#include +#include /* for ... file stat */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +using namespace std; + +#include "nodeBase.h" +#include "nodeClass.h" +#include "nodeUtil.h" +#include "daemon_common.h" +#include "msgClass.h" + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "com" + +/*************************************************************************** + * + * Name : nodeUtil_latency_log + * + * Description: Measures execution time of a section of code. + * Produce a latency log if the specified duration + * in msec is exceeded. + * + * Warning : Not multi thread safe. + * + * Parms: + * + * hostname - for hostname. + * + * label_ptr - "start" to init the prev_timer or + * - "some label" to identify the point in the code and to + * measure time against the previous call. + * + * msecs - the latency log threshold + * + * Usage: + * + * nodeUtil_latency_log ( NODEUTIL_LATENCY_MON_START, 0 ); + * + * [ timed code ] + * + * nodeUtil_latency_log ( hostname, "label 1" , msecs ); + * + * [ timed code ] + * + * nodeUtil_latency_log ( hostname, "label 2", msecs ); + * + * ... + * + *****************************************************************************/ + +void nodeUtil_latency_log ( string hostname, const char * label_ptr, int msecs ) +{ + static unsigned long long prev__time = 0 ; + static unsigned long long this__time = 0 ; + + this__time = gettime_monotonic_nsec () ; + + /* If label_ptr is != NULL and != start then take the measurement */ + if ( label_ptr && strncmp ( label_ptr, NODEUTIL_LATENCY_MON_START, strlen(NODEUTIL_LATENCY_MON_START))) + { + if ( this__time > (prev__time + (NSEC_TO_MSEC*(msecs)))) + { + llog ("%s ... %4llu.%-4llu msec - %s\n", hostname.c_str(), + ((this__time-prev__time) > NSEC_TO_MSEC) ? ((this__time-prev__time)/NSEC_TO_MSEC) : 0, + ((this__time-prev__time) > NSEC_TO_MSEC) ? ((this__time-prev__time)%NSEC_TO_MSEC) : 0, + label_ptr ); + } + } + /* reset to be equal for next round */ + prev__time = this__time ; +} + +void node_inv_init (node_inv_type & inv) +{ + inv.type.clear(); + inv.uuid.clear(); + inv.name.clear(); + inv.ip.clear(); + inv.mac.clear(); + inv.admin.clear(); + inv.oper.clear(); + inv.avail.clear(); + inv.id.clear(); + inv.task.clear(); + inv.bm_ip.clear(); + inv.bm_un.clear(); + inv.bm_type.clear(); + inv.action.clear(); + inv.uptime.clear(); + inv.oper_subf.clear(); + inv.avail_subf.clear(); + inv.nodetype = 0; +} + +void print_inv ( node_inv_type & info ) +{ + const char bar [] = { "+------------+--------------------------------------+" }; + const char uar [] = { "+ Host Info +--------------------------------------+" }; + + + syslog ( LOG_INFO, "%s\n", &uar[0]); + syslog ( LOG_INFO, "| action : %s\n", info.action.c_str()); + syslog ( LOG_INFO, "| personality: %s\n", info.type.c_str()); + syslog ( LOG_INFO, "| hostname : %s\n", info.name.c_str()); + syslog ( LOG_INFO, "| task : %s\n", info.task.c_str()); + syslog ( LOG_INFO, "| ip : %s\n", info.ip.c_str()); + syslog ( LOG_INFO, "| mac : %s\n", info.mac.c_str()); + syslog ( LOG_INFO, "| uuid : %s\n", info.uuid.c_str()); + syslog ( LOG_INFO, "| operState: %s\n", info.oper_subf.c_str()); + syslog ( LOG_INFO, "| adminState: %s\n", info.admin.c_str()); + syslog ( LOG_INFO, "| operState: %s\n", info.oper.c_str()); + syslog ( LOG_INFO, "| availStatus: %s\n", info.avail.c_str()); + syslog ( LOG_INFO, "| bm ip : %s\n", info.bm_ip.c_str()); + syslog ( LOG_INFO, "| bm un : %s\n", info.bm_un.c_str()); + syslog ( LOG_INFO, "| bm type : %s\n", info.bm_type.c_str()); + syslog ( LOG_INFO, "| subFunction: %s\n", info.func.c_str()); + syslog ( LOG_INFO, "| operState: %s\n", info.oper_subf.c_str()); + syslog ( LOG_INFO, "| availStatus: %s\n", info.avail_subf.c_str()); + syslog ( LOG_INFO, "%s\n", &bar[0]); +} + +unsigned int get_host_function_mask ( string & nodeType_str ) +{ + unsigned int nodeType = CGTS_NODE_NULL ; + if ( nodeType_str.find("compute") != string::npos ) + nodeType |= COMPUTE_TYPE ; + if ( nodeType_str.find("controller") != string::npos ) + nodeType |= CONTROLLER_TYPE ; + if ( nodeType_str.find("storage") != string::npos ) + nodeType |= STORAGE_TYPE ; + + return (nodeType); +} + +bool is_combo_system (unsigned int nodetype_mask ) +{ + if ( nodetype_mask & CONTROLLER_TYPE ) + { + if ( nodetype_mask & COMPUTE_TYPE ) + { + return (true); + } + if ( nodetype_mask & STORAGE_TYPE ) + { + return (true); + } + } + return (false); +} + + +int set_host_functions ( string nodetype_str, + unsigned int * nodetype_bits_ptr, + unsigned int * nodetype_function_ptr, + unsigned int * nodetype_subfunction_ptr ) +{ + int rc = PASS ; + *nodetype_bits_ptr = get_host_function_mask ( nodetype_str ) ; + *nodetype_function_ptr = CGTS_NODE_NULL ; + *nodetype_subfunction_ptr = CGTS_NODE_NULL ; + + /* Load up the host function and subfunction */ + if ( *nodetype_bits_ptr & CONTROLLER_TYPE ) + { + *nodetype_function_ptr = CONTROLLER_TYPE ; + dlog2 ("Function : controller\n"); + + /* Check for subfunctions */ + if ( *nodetype_bits_ptr & COMPUTE_TYPE ) + { + dlog2 ("Sub Function: compute\n"); + *nodetype_subfunction_ptr = COMPUTE_TYPE ; + } + if ( *nodetype_bits_ptr & STORAGE_TYPE ) + { + *nodetype_subfunction_ptr |= STORAGE_TYPE ; + dlog2 ("Sub Function: storage\n"); + } + } + else + { + if ( *nodetype_bits_ptr & COMPUTE_TYPE ) + { + *nodetype_function_ptr = COMPUTE_TYPE ; + dlog2 ("Function : compute\n"); + } + else if ( *nodetype_bits_ptr & STORAGE_TYPE ) + { + *nodetype_function_ptr |= STORAGE_TYPE ; + dlog2 ("Function : storage\n"); + } + else + { + elog ("Unsupported nodetype (%u:%s)\n", *nodetype_bits_ptr, nodetype_str.c_str()); + rc = FAIL ; + } + } + return (rc); +} + +/* Checks that the goenabled tests are in the READY or PASS states + * If pass parameter is true, then we check for the appropriate PASS state. + * If pass parameter is false, we check for the appropriate READY state. + * + * Returns true if the appropriate state is found. + */ +bool is_goenabled ( int nodeType, bool pass ) +{ + char* file; + + if ( is_combo_system ( nodeType ) == true ) + { + if ( pass ) + { + file = (char*) GOENABLED_SUBF_PASS; + } + else + { + file = (char*) GOENABLED_SUBF_READY; + } + } + else + { + if ( pass ) + { + file = (char*) GOENABLED_MAIN_PASS; + } + else + { + file = (char*) GOENABLED_MAIN_READY; + } + } + return daemon_is_file_present ( file ); +} + +#define LOG_MEMORY(buf) ilog ("%s", buf ); \ + buf_ptr = &buf[0]; \ + MEMSET_ZERO ( buf ); + +void dump_memory ( void * raw_ptr , int format, size_t bytes ) +{ + uint32_t * word_ptr = (uint32_t*)raw_ptr ; + uint8_t * byte_ptr = (uint8_t*)raw_ptr ; + + char buf[0x1024] ; + char * buf_ptr = &buf[0]; + MEMSET_ZERO ( buf ); + ilog ("Dumping Memory:\n"); + if ( format == 4 ) + { + int loops = bytes/format ; + + for ( int i = 0 ; i < loops ; i++ ) + { + buf_ptr += sprintf ( buf_ptr, "0x%p : 0x%08x : ", word_ptr, *word_ptr ); + byte_ptr = (uint8_t*)word_ptr ; + for ( int c = 0 ; c < format ; c++ ) + { + if (( *byte_ptr >= ' ' ) && ( *byte_ptr <= '~' )) + buf_ptr += sprintf ( buf_ptr, "%c", *byte_ptr) ; + else + buf_ptr += sprintf ( buf_ptr, "%c", '.'); + + byte_ptr++ ; + } + LOG_MEMORY(buf); + word_ptr++ ; + } + } + else if ( format == 8 ) + { + int loops = bytes/format ; + + for ( int i = 0 ; i < loops ; i++ ) + { + buf_ptr += sprintf ( buf_ptr, "0x%p : 0x%08x 0x%08x : ", word_ptr, *word_ptr, *(word_ptr+1) ); + byte_ptr = (uint8_t*)word_ptr ; + for ( int c = 0 ; c < format ; c++ ) + { + if (( *byte_ptr >= ' ' ) && ( *byte_ptr <= '~' )) + buf_ptr += sprintf ( buf_ptr , "%c", *byte_ptr) ; + else + buf_ptr += sprintf ( buf_ptr , "%c", '.'); + + byte_ptr++ ; + } + LOG_MEMORY(buf); + word_ptr += 2 ; + } + } + else if ( format == 16 ) + { + int loops = bytes/format ; + + for ( int i = 0 ; i < loops ; i++ ) + { + buf_ptr += sprintf ( buf_ptr, "0x%p : 0x%08x 0x%08x 0x%08x 0x%08x : ", word_ptr, *word_ptr, *(word_ptr+1), *(word_ptr+2), *(word_ptr+3)); + byte_ptr = (uint8_t*)word_ptr ; + for ( int c = 0 ; c < format ; c++ ) + { + if (( *byte_ptr >= ' ' ) && ( *byte_ptr <= '~' )) + buf_ptr += sprintf ( buf_ptr , "%c", *byte_ptr) ; + else + buf_ptr += sprintf ( buf_ptr , "%c", '.'); + + byte_ptr++ ; + } + LOG_MEMORY(buf); + word_ptr += 4 ; + } + } + byte_ptr = (uint8_t*)raw_ptr ; + ilog ("Raw Hex Dump : %ld\n", bytes ); + for ( unsigned int x = 0 ; x < bytes ; x++ ) + { + buf_ptr += sprintf ( buf_ptr, " %02x", *byte_ptr ); + byte_ptr++ ; + } + // printf ("\n\n"); +} + + +/** Initialize the supplied command buffer */ +void mtcCmd_init ( mtcCmd & cmd ) +{ + cmd.cmd = 0 ; + cmd.rsp = 0 ; + cmd.ack = 0 ; + cmd.retry = 0 ; + cmd.parm1 = 0 ; + cmd.parm2 = 0 ; + cmd.task = false ; + cmd.status = RETRY ; + cmd.status_string.clear(); + cmd.name.clear(); +} + +char hostname_floating [] = { "controller" } ; + +string getipbyiface ( const char * iface ) +{ + string ip_string = ""; + char ip_cstr[INET6_ADDRSTRLEN]; + if(msgClassAddr::getAddressFromInterface(iface, ip_cstr, INET6_ADDRSTRLEN)==PASS) + { + ip_string = ip_cstr; + } + return ip_string; +} + +string getipbyname ( string name ) +{ + string ip_string = "" ; + const char* address_string; + int count = 0 ; + do + { + msgClassAddr addr = msgClassAddr(name.c_str()); + address_string = addr.toNumericString(); + if(address_string) + { + ip_string = address_string; + } + if(ip_string.empty()) + { + wlog_throttled ( count, 50, "Unable to get ip address list for '%s', retrying ...\n", name.c_str()); + mtcWait_secs (2); + } + daemon_signal_hdlr (); + } while ( ip_string.empty() ) ; + return (ip_string); +} + +string getipbynameifexists ( string name ) +{ + string ip_string = "" ; + const char* address_string; + msgClassAddr addr = msgClassAddr(name.c_str()); + address_string = addr.toNumericString(); + if(address_string) + { + ip_string = address_string; + } + return (ip_string); +} + + +/* Reads the local hostname, ip and the floating ip address. + * Returns RETRY if the information has changed compared to what is + * passed in. Reference strings are updated if values are changed */ +int get_ip_addresses ( string & my_hostname , string & my_local_ip , string & my_float_ip ) +{ + int rc = PASS ; + + string temp_hostname = "" ; + string temp_local_ip = "" ; + string temp_float_ip = "" ; + + char hostname_str [MAX_HOST_NAME_SIZE+1]; + memset (&hostname_str[0], 0, MAX_HOST_NAME_SIZE+1); + + /* read the host name */ + rc = gethostname(&hostname_str[0], MAX_HOST_NAME_SIZE ); + if ( rc == PASS ) + { + /* Load as a string and then compare */ + temp_hostname = hostname_str ; + if ( temp_hostname != my_hostname ) + { + /* update control struct and set rc for reload (RETRY) */ + my_hostname = temp_hostname ; + ilog ("My Hostname : %s\n", my_hostname.c_str()); + rc = RETRY ; + } + } + else + { + /* get the host info */ + elog ("Unable to get controller local hostname\n"); + return (FAIL); + } + + set_hn (hostname_str); + + /* Get the Primary hostname ip address */ + temp_local_ip = getipbyname (hostname_str); + + /* See if the local ip address has changed */ + if ( temp_local_ip != my_local_ip ) + { + ilog (" Local IP : %s\n", temp_local_ip.c_str()); + + /* update control struct and set rc for reload (RETRY) */ + my_local_ip = temp_local_ip ; + rc = RETRY ; + } + + /* Move on to read the floating ip */ + temp_float_ip = getipbyname (hostname_floating); + + /* See if the floating ip address has changed */ + if ( temp_float_ip != my_float_ip ) + { + ilog ("Floating IP : %s\n", temp_float_ip.c_str()); + + + /* update control struct and set rc for reload (RETRY) */ + my_float_ip = temp_float_ip ; + rc = RETRY ; + } + return rc; +} + +int open_ioctl_socket ( void ) +{ + int flags; + + int ioctl_socket = socket( PF_PACKET, SOCK_DGRAM, 0 ); + if( 0 > ioctl_socket ) + { + elog ( "Failed to open ioctl socket (%d:%s)\n", + errno, strerror( errno ) ); + return( ioctl_socket ); + } + + flags = fcntl( ioctl_socket, F_GETFL, 0 ); + if( 0 > flags ) + { + elog ( "Failed to get ioctl socket flags (%d:%s)\n", + errno, strerror( errno ) ); + close( ioctl_socket ); + return( flags ); + } + + if( 0 > fcntl( ioctl_socket, F_SETFL, flags | O_NONBLOCK ) ) + { + elog ( "Failed to set ioctl socket flags (%d:%s)", + errno, strerror( errno ) ); + close( ioctl_socket ); + return( -1 ); + } + + return ( ioctl_socket ); +} + +/* returns true if the link is up for the specified interface */ +int get_link_state_throttle = 0 ; +int get_link_state ( int ioctl_socket, const char * iface_ptr, bool * running_ptr ) +{ + struct ifreq if_data; + int rc = FAIL ; + + if (!iface_ptr || !*iface_ptr) + { + dlog ("Null interface name\n"); + return ( rc ) ; + } + + memset( &if_data, 0, sizeof(if_data) ); + sprintf( if_data.ifr_name, "%s", iface_ptr ); + if( 0 <= ioctl( ioctl_socket, SIOCGIFFLAGS, &if_data ) ) + { + if( if_data.ifr_flags & IFF_RUNNING ) + { + *running_ptr = true; + } + else + { + *running_ptr = false; + } + + /* reset log flood gate counter */ + get_link_state_throttle = 0 ; + + rc = PASS ; + } + else + { + wlog_throttled (get_link_state_throttle, 100, + "Failed to get %s (%s) interface state (%d:%s)\n", + iface_ptr, if_data.ifr_name, errno, strerror(errno)); + } + return ( rc ); +} + +int get_iface_attrs ( const char * iface_ptr, + int & index, + int & speed, + int & duplex, + string & autoneg ) +{ + struct ifreq ifr; + struct ethtool_cmd cmd; + int fd, result; + + if (!iface_ptr || !*iface_ptr) + { + elog ("Null interface name\n"); + return FAIL ; + } + + index = -1; + speed = -1; + duplex = -1; + autoneg = "N/A" ; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd == -1) + { + const int err = errno; + elog ("%s: Cannot create AF_INET socket: (%s)\n", iface_ptr, strerror(err)); + return FAIL; + } + + snprintf (ifr.ifr_name, sizeof(ifr.ifr_name), "%s", iface_ptr); + ifr.ifr_data = (char*)&cmd; + cmd.cmd = ETHTOOL_GSET; + if (ioctl(fd, SIOCETHTOOL, &ifr) < 0) + { + const int err = errno; + do + { + result = close(fd); + + daemon_signal_hdlr (); + + } while (result == -1 && errno == EINTR); + elog ("%s: SIOCETHTOOL ioctl: (%s)\n", iface_ptr, strerror(err)); + return FAIL; + } + + speed = ethtool_cmd_speed(&cmd); + + if ( cmd.advertising & SUPPORTED_Autoneg ) + { + ilog ("Autoneg: %d\n", cmd.autoneg ); + if ( cmd.autoneg ) + autoneg = "Yes" ; + else + autoneg = "No"; + } + + switch (cmd.duplex) + { + case DUPLEX_HALF: duplex = 0; break; + case DUPLEX_FULL: duplex = 1; break; + default: + elog ("%s: Unknown mode (0x%x).\n", iface_ptr, cmd.duplex); + } + + if (index && ioctl(fd, SIOCGIFINDEX, &ifr) >= 0) + { + index = ifr.ifr_ifindex; + } + + do + { + result = close(fd); + } while (result == -1 && errno == EINTR); + + if (result == -1) + { + const int err = errno; + elog ("%s: Error closing socket: %s\n", iface_ptr, strerror(err)); + return FAIL ; + } + + return PASS; +} + + +int get_iface_macaddr ( const char * iface_ptr , string & macaddr ) +{ + int rc ; + struct ifreq s; + int fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); + memset ( (void*)&s, 0 , sizeof(struct ifreq) ); + strcpy(s.ifr_name, iface_ptr ); + rc = ioctl(fd, SIOCGIFHWADDR, &s) ; + if ( rc == PASS ) + { + char str [COL_CHARS_IN_MAC_ADDR+1] ; /* and terminator */ + memset ( &str[0], 0 , COL_CHARS_IN_MAC_ADDR); + + snprintf ( &str[0], COL_CHARS_IN_MAC_ADDR+1, + "%02x:%02x:%02x:%02x:%02x:%02x", + (unsigned char)(s.ifr_hwaddr.sa_data[0]), + (unsigned char)(s.ifr_hwaddr.sa_data[1]), + (unsigned char)(s.ifr_hwaddr.sa_data[2]), + (unsigned char)(s.ifr_hwaddr.sa_data[3]), + (unsigned char)(s.ifr_hwaddr.sa_data[4]), + (unsigned char)(s.ifr_hwaddr.sa_data[5])); + + macaddr = str ; + ilog ("Mac Address : %s\n", macaddr.c_str() ); + } + else + { + elog ("Mac Address : Unknown\n"); + elog ("Failed to get %s's mac address (rc:%d)\n", iface_ptr , rc ); + } + close(fd); + return (rc); +} + +string get_iface_mac ( const char * iface_ptr ) +{ + int rc ; + struct ifreq s; + string mac = "---" ; + int fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); + memset ( (void*)&s, 0 , sizeof(struct ifreq) ); + strcpy(s.ifr_name, iface_ptr ); + rc = ioctl(fd, SIOCGIFHWADDR, &s) ; + if ( rc == PASS ) + { + char str [COL_CHARS_IN_MAC_ADDR+1] ; /* and terminator */ + memset ( &str[0], 0 , COL_CHARS_IN_MAC_ADDR); + + snprintf ( &str[0], COL_CHARS_IN_MAC_ADDR, + "%02x:%02x:%02x:%02x:%02x:%02x", + (unsigned char)(s.ifr_hwaddr.sa_data[0]), + (unsigned char)(s.ifr_hwaddr.sa_data[1]), + (unsigned char)(s.ifr_hwaddr.sa_data[2]), + (unsigned char)(s.ifr_hwaddr.sa_data[3]), + (unsigned char)(s.ifr_hwaddr.sa_data[4]), + (unsigned char)(s.ifr_hwaddr.sa_data[5])); + + mac = str ; + ilog ("Mac Address : %s\n", mac.c_str() ); + } + else + { + elog ("Mac Address : Unknown\n"); + elog ("Failed to get %s's mac address (rc:%d)\n", iface_ptr , rc ); + } + close(fd); + return (mac); +} + + + + +int get_hostname ( char * hostname_ptr, int max_len ) +{ + int rc ; + memset ( hostname_ptr, 0, max_len-1); + do + { + rc = gethostname(hostname_ptr, max_len ); + if ( rc == PASS ) + { + ilog ("Hostname : %s\n", hostname_ptr); + } + else + { + wlog ("No hostname, retrying ...\n" ); + mtcWait_secs (5); + } + + daemon_signal_hdlr (); + + } while ( rc != PASS ) ; + + set_hn (hostname_ptr); + + return (rc); +} + +int get_iface_hostname ( const char * iface_ptr, char * hostname_ptr) +{ + int rc ; + memset ( hostname_ptr, 0, MAX_HOST_NAME_SIZE-1); + do + { + rc = gethostname(hostname_ptr, MAX_HOST_NAME_SIZE ); + if ( rc == PASS ) + { + ilog ("Hostname : %s\n", hostname_ptr); + } + else + { + wlog ("%s has no hostname, retrying ...\n", iface_ptr); + mtcWait_secs (5); + } + daemon_signal_hdlr (); + } while ( rc != PASS ) ; + return (rc); +} + +int get_iface_address ( const char * iface_ptr, string & ip_addr , bool retry ) +{ + int rc ; + int count ; + char ip_cstr[INET6_ADDRSTRLEN]; + + /* Now fetch the IP address. We stay here till we have one. */ + count = 0 ; + do + { + rc = msgClassAddr::getAddressFromInterface(iface_ptr, ip_cstr, INET6_ADDRSTRLEN); + if ( rc == PASS ) + { + ip_addr = ip_cstr; + ilog ("IP Address : %s\n", ip_addr.c_str() ); + } + else + { + wlog_throttled ( count, 24, "%s has no IP address (rc=%d), retrying ...\n", iface_ptr, rc ); + /* get out if the caller does not want retries */ + if ( retry == false ) + return (RETRY); + + mtcWait_secs (5); + } + daemon_signal_hdlr (); + } while ( rc != PASS ) ; + return (rc); +} + +void get_infra_iface ( char ** infra_iface_ptr ) +{ + char * iface_ptr ; + string infra = daemon_infra_iface(); + + /* remove .None from the interface name if it exists */ + size_t found = infra.find(".None"); + if ( found != string::npos) + { + infra.erase(found, string::npos); + } + if ( infra.size() ) + { + iface_ptr = daemon_get_iface_master ( (char*)infra.data()); + *infra_iface_ptr = strdup((const char*)iface_ptr); + dlog("Infra iface : %s\n", *infra_iface_ptr ); + } + else + { + *infra_iface_ptr = strdup((const char*)""); + } +} + +/***************************************************************************** + * + * Name : load_filenames_in_dir + * + * Purpose : Load the supplied list with all the file names + * in the specified directory + * + *****************************************************************************/ + +int load_filenames_in_dir ( const char * directory, std::list & filelist ) +{ + DIR *d; + struct dirent *dir; + + /* Clear the content of the config file list and running counter */ + filelist.clear (); + + d = opendir(directory); + if (d) + { + while ((dir = readdir(d)) != NULL) + { + dlog3 ("File: %s\n", dir->d_name); + if ( strcmp ( dir->d_name , "." ) && + strcmp ( dir->d_name , ".." )) + { + string temp = directory ; + temp.append("/"); + temp.append(dir->d_name); + filelist.push_back ( temp ); + } + daemon_signal_hdlr (); + } + closedir(d); + } + else + { + elog ("Failed to open %s\n", directory ); + } + return(PASS); +} + +int setup_child ( bool close_file_descriptors ) +{ + /* Create a new process group for the child process */ + if ( 0 > setpgid (0,0)) + return (FAIL); + + /* Change the current working directory. */ + if ((chdir("/")) < 0) + return (FAIL); + + if ( close_file_descriptors == true ) + { + struct rlimit file_limits ; + + if ( 0 < getrlimit ( RLIMIT_NOFILE, &file_limits ) ) + return (FAIL); + + /* Close all existing file descriptors */ + for ( unsigned int fd_i = 0 ; fd_i < file_limits.rlim_cur ; ++fd_i ) + { + close (fd_i); + } + + int fd = open("/dev/null",O_RDWR, 0); + + if (fd != -1) + { + dup2 (fd, STDIN_FILENO); + dup2 (fd, STDOUT_FILENO); + dup2 (fd, STDERR_FILENO); + + if (fd > 2) + { + close (fd); + } + } + } + return (PASS); +} + +/* + * Issue a double fork to run the specified command string. Its a + * double fork to avoid need to stick around ; avoids defunct processes. + * On success the grandchild gets 0 and the parent gets 1. + * On failure the parent gets -1. + */ +int double_fork_host_cmd ( string hostname , char * cmd_string, const char * cmd_oper ) +{ + int status = 0 ; + + + UNUSED(cmd_oper); /* for future use maybe */ + blog ("%s %s\n", hostname.c_str(), cmd_string); + + /* Flush the logs before fork */ + fflush (stdout); + fflush (stderr); + + pid_t child_pid = fork (); + if ( child_pid == 0 ) + { + /* In child process */ + pid_t grandchild_pid = fork(); + if ( grandchild_pid > 0 ) + { + /* child exits immediately */ + exit (0); + } + else if ( grandchild_pid == -1 ) + { + // in child, error forking + wlog("problem forking grandchild: %m\n"); + fflush(stdout); + exit (-1); + } + else + { + /* grandchild runs system command */ + int rc = FAIL ; + bool close_file_descriptors = false ; + if ( setup_child ( close_file_descriptors ) != PASS ) + { + exit(EXIT_FAILURE); + } + + /* Set child to ignore child exit */ + signal (SIGCHLD, SIG_DFL); + + if (( rc = system(cmd_string) ) > 0 ) + { + wlog("system call failed: '%s' (%d:%m)\n", cmd_string, errno ); + // decode_error ( hostname, rc , cmd_string ) ; + } + else + { + dlog ( "%s forked cmd '%s' passed\n", hostname.c_str(), cmd_string); + } + exit(0); + } + } + else if ( child_pid > 0 ) + { + // In Parent, child successfully forked + /* Wait for first child to exit ; happens immediately */ + waitpid(child_pid , &status, 0 ); + if ( WIFEXITED(status) && WEXITSTATUS(status) == 0 ) + { + return 1; + } + else + { + ilog ("Waiting for child to exit ...\n"); + usleep (1000); + } + } + wlog("problem forking child: %m\n"); + fflush(stdout); + return -1; +} + + +/* Issue a double fork to avoid need to + * stick around to avoid defunct processes. + * On success the grandchild gets 0 and the parent gets 1. + * On failure the parent gets -1. + */ +int double_fork ( void ) +{ + int status = 0 ; + + /* Theoretically we should flush the logs before forking otherwise it can + * cause duplicate messages in parent and child. But we want to minimize + * the work being done in the main thread, so we accept the tradeoff. + */ + //fflush (stdout); + //fflush (stderr); + + pid_t child_pid = fork (); + if ( child_pid == 0 ) + { + pid_t grandchild_pid = fork(); + if ( grandchild_pid == 0 ) { + // in grandchild + return 0; + } else if ( grandchild_pid == -1 ) { + // in child, error forking + wlog("problem forking grandchild: %m\n"); + fflush(stdout); + exit (-1); + } else + // in child, grandchild successfully forked + exit (0); + } + else if ( child_pid > 0 ) + { + // in parent, child successfully forked + /* Wait for first child to exit ; happens immediately */ + waitpid(child_pid , &status, 0 ); + if ( WIFEXITED(status) && WEXITSTATUS(status) == 0 ) + return 1; + } + wlog("problem forking child: %m\n"); + fflush(stdout); + return -1; +} + +/*************************************************************************** + * + * Name : fork_sysreq_reboot + * + * Purpose : Timed SYSREQ Reset service used as a backup mechanism + * to force a self reset after a specified period of time. + * + **************************************************************************/ + +/* This is a common utility that forces a sysreq reboot */ +void fork_sysreq_reboot ( int delay_in_secs ) +{ + int parent = 0 ; + + /* Fork child to do a sysreq reboot. */ + if ( 0 > ( parent = double_fork())) + { + elog ("failed to fork fail-safe (backup) sysreq reboot\n"); + return ; + } + else if( 0 == parent ) /* we're the child */ + { + int sysrq_handler_fd; + int sysrq_tigger_fd ; + size_t temp ; + + setup_child ( false ) ; + + ilog ("*** Failsafe Reset Thread ***\n"); + + /* Commented this out because blocking SIGTERM in systemd environment + * causes any processes that spawn this sysreq will stall shutdown + * + * sigset_t mask , mask_orig ; + * sigemptyset (&mask); + * sigaddset (&mask, SIGTERM ); + * sigprocmask (SIG_BLOCK, &mask, &mask_orig ); + * + */ + + // Enable sysrq handling. + sysrq_handler_fd = open( "/proc/sys/kernel/sysrq", O_RDWR | O_CLOEXEC ); + if( 0 > sysrq_handler_fd ) + { + ilog ( "failed sysrq_handler open\n"); + return ; + } + + temp = write( sysrq_handler_fd, "1", 1 ); + close( sysrq_handler_fd ); + + for ( int i = delay_in_secs ; i >= 0 ; --i ) + { + sleep (1); + { + if ( 0 == (i % 5) ) + { + ilog ( "sysrq reset in %d seconds\n", i ); + } + } + } + + // Trigger sysrq command. + sysrq_tigger_fd = open( "/proc/sysrq-trigger", O_RDWR | O_CLOEXEC ); + if( 0 > sysrq_tigger_fd ) + { + ilog ( "failed sysrq_trigger open\n"); + return ; + } + + temp = write( sysrq_tigger_fd, "b", 1 ); + close( sysrq_tigger_fd ); + + ilog ( "sysreq rc:%ld\n", temp ); + + UNUSED(temp); + + sleep (10); + + // Shouldn't get this far, else there was an error. + exit(-1); + } + ilog ("Forked Fail-Safe (Backup) Reboot Action\n"); +} + +/*************************************************************************** + * + * Name : fork_graceful_reboot + * + * Purpose : Timed reset via /sbin/reboot which attempts to use graceful + * mechanisms (like unmounting filesystems) to perform a reset. + * Note that in cases where blocking can occur (like I/O failure) + * the forked process may become blocked. + * + **************************************************************************/ + +/* This is a common utility that forces a /sbin/reboot reboot */ +void fork_graceful_reboot ( int delay_in_secs ) +{ + int parent = double_fork (); + if (0 > parent) /* problem forking */ + { + elog ("failed to fork graceful reboot process\n"); + return ; + } + else if (0 == parent) /* if we're the child */ + { + char* reboot_args[] = { (char*) "/sbin/reboot", NULL }; + char* reboot_env[] = { NULL }; + + setup_child(false); /* initialize the process group, etc */ + ilog ("*** Graceful Reset Thread ***\n"); + + /* Commented this out because blocking SIGTERM in systemd environment + * causes any processes that spawn this sysreq will stall shutdown + * + * sigset_t mask , mask_orig ; + * sigemptyset (&mask); + * sigaddset (&mask, SIGTERM ); + * sigprocmask (SIG_BLOCK, &mask, &mask_orig ); + * + */ + sleep (delay_in_secs); + + execve ("/sbin/reboot", reboot_args, reboot_env); + + /* execve returns -1 on error, and does not return on success */ + elog ("Could not execute graceful reboot - error code = %d (%s)\n", + errno, strerror(errno)); + exit (-1); + } +} + +bool is_string_in_string_list ( std::list & l , string & str ) +{ + if ((std::find (l.begin(), l.end(), str )) == l.end()) + { + return (false); + } + return (true); +} +#define WANT_MANUAL_SEARCH +bool is_int_in_int_list ( std::list & l , int & val ) +{ +#ifdef WANT_MANUAL_SEARCH + std::list::iterator iter_ptr ; + for ( iter_ptr = l.begin() ; iter_ptr != l.end() ; iter_ptr++ ) + { + if ( *iter_ptr == val ) + { + ilog ("%d found\n", val ); + return (true) ; + } + } + return (false); +#else + if ((std::find (l.begin(), l.end(), val )) == l.end()) + { + return (false); + } + return (true); +#endif +} + +string get_strings_in_string_list ( std::list & l ) +{ + std::list::iterator iter_ptr ; + string s = "" ; + + if ( l.empty() ) + return (s); + + for ( iter_ptr = l.begin() ; iter_ptr != l.end() ; iter_ptr++ ) + { + s.append(iter_ptr->c_str()); + s.append(" "); + } + return (s); +} + + +bool string_contains ( string buffer, string sequence ) +{ + size_t found = buffer.find(sequence); + if ( found != string::npos ) + return (true); + else + return (false); +} + + +extern char *program_invocation_short_name; + + +string get_bm_password ( const char * uuid ) +{ + #define STR_LEN 128 + int rc = RETRY ; + string value = "" ; + + daemon_signal_hdlr (); + + if ( uuid == NULL ) + { + slog ("failed ; Null uuid\n" ); + return (value); + } + + string temp_file = "/tmp/." ; + temp_file.append(program_invocation_short_name); + temp_file.append("_bmc.tmp"); + + /* If the keyring dir is not present then set the password to unknown */ + DIR *d = opendir(daemon_get_cfg_ptr()->keyring_directory); + if (d) + { + char str [STR_LEN] ; + memset (&str[0],0,STR_LEN); + sprintf(&str[0], "/usr/bin/keyring get BM %s > %s", uuid, temp_file.data() ); + /* This system call can take 1 sec */ + rc = system(str) ; + { + int parms ; + usleep(10); + FILE *fp = fopen(temp_file.c_str(), "r"); + if (fp) + { + memset (&str[0],0,STR_LEN); + if ( (parms = fscanf(fp, "%128s", &str[0])) == 1 ) + { + value = str ; /* get the value we are looking for */ + rc = PASS ; + } + fclose(fp); + } + else + { + wlog ("Failed to open %s\n", temp_file.c_str() ); + } + } + closedir(d); + } + else + { + wlog ("Failed to open credentials directory '%s'\n", daemon_get_cfg_ptr()->keyring_directory ); + } + + if ( rc != PASS ) + { + wlog ("password for uuid '%s' not found\n", uuid); + } + + /* Don't leave the temp file containing the password around */ + daemon_remove_file ( temp_file.data() ); + + return (value); +} + +static int health = NODE_HEALTH_UNKNOWN ; +int get_node_health ( string hostname ) +{ + struct stat p ; + memset ( &p, 0 , sizeof(struct stat)); + stat ( CONFIG_PASS_FILE, &p ) ; + if ((p.st_ino != 0 ) && (p.st_dev != 0)) + { + if ( health != NODE_HEALTHY ) + { + ilog ("%s is Healthy (%d)\n", hostname.c_str(), health ); + } + health = NODE_HEALTHY ; + } + else + { + memset ( &p, 0 , sizeof(struct stat)); + stat ( CONFIG_FAIL_FILE, &p ) ; + if ((p.st_ino != 0 ) && (p.st_dev != 0)) + { + if ( health != NODE_UNHEALTHY ) + { + elog ("%s is UnHealthy\n", hostname.c_str()); + } + else if ( health == NODE_HEALTH_UNKNOWN ) + { + wlog ("%s is UnHealthy\n", hostname.c_str()); + } + health = NODE_UNHEALTHY ; + } + else + { + if ( health != NODE_HEALTH_UNKNOWN ) + { + wlog ("%s has Unknown Health\n", hostname.c_str()); + } + health = NODE_HEALTH_UNKNOWN ; + } + } + return (health); +} + +int clean_bm_response_files ( string hostname ) +{ + char cmd_string [200] ; + sprintf ( &cmd_string[0], "rm -f /var/run/.bm*.%s", hostname.data()); + int rc = system(cmd_string); + return (rc); +} + + +string md5sum_string ( string str ) +{ + string temp ; + unsigned char digest[MD5_DIGEST_LENGTH]; + char md5str [MD5_STRING_LENGTH]; + memset ( &digest, 0, MD5_DIGEST_LENGTH ); + memset ( &md5str, 0, MD5_STRING_LENGTH ); + + MD5 ((unsigned char*)str.data(), str.length(), (unsigned char*)&digest); + + for(int i = 0; i < MD5_DIGEST_LENGTH; i++) + sprintf(&md5str[i*2], "%02x", (unsigned int)digest[i]); + + // ilog ("user value: %s\n", buffer ); + // ilog ("md5 digest: %s\n", md5str ); + temp = md5str ; + return (temp); +} + +/* get a processid by the processname using a pipe */ +int get_pid_by_name_pipe ( string procname ) +{ + pid_t pid = 0 ; + if ( procname.length() ) + { + char buffer[MAX_CHARS_FILENAME] ; + snprintf ( buffer, MAX_CHARS_FILENAME, "pidof -s %s" , procname.data() ); + FILE *cmd_pipe = popen (buffer, "r" ); + if ( cmd_pipe ) + { + memset(buffer, 0, MAX_CHARS_FILENAME ); + char * c = fgets (buffer, MAX_CHARS_FILENAME, cmd_pipe ); + UNUSED(c); + if ( strnlen ( buffer , MAX_CHARS_FILENAME ) ) + { + pid = strtoul (buffer, NULL, 10) ; + } + pclose(cmd_pipe); + } + } + return (pid); +} + +/* get a processid by the processname searching the proc file system */ +int get_pid_by_name_proc ( string procname ) +{ + int pid = -1; + + /* Open the /proc dir */ + DIR *dp = opendir("/proc"); + if (dp != NULL) + { + /* Enumerate all entries in directory until we find the process */ + struct dirent *dirp; + while (pid < 0 && (dirp = readdir(dp))) + { + /* Skip non-numeric entries */ + int id = atoi(dirp->d_name); + if (id > 0) + { + /* Read contents of virtual /proc/{pid}/cmdline file */ + string cmdPath = string("/proc/") + dirp->d_name + "/cmdline"; + ifstream cmdFile(cmdPath.c_str()); + string cmdLine; + getline(cmdFile, cmdLine); + if (!cmdLine.empty()) + { + size_t pos = cmdLine.find("python"); + //printf ("\nCmdLine: %s (length:%ld)\n", cmdLine.c_str(), cmdLine.length()); + if (pos != string::npos) + { + cmdLine = cmdLine.substr(7, cmdLine.length()-8); + //printf ("\nCmdLine Next: %s (length:%ld)\n", cmdLine.c_str(), cmdLine.length()); + std::size_t found = cmdLine.find(procname); + if ( found != std::string::npos ) + { + closedir(dp); + return(id) ; + } + } + + /* Keep first cmdline item which contains the program path */ + pos = cmdLine.find('\0'); + if (pos != string::npos) + { + cmdLine = cmdLine.substr(0, pos); + } + /* removing the path prefix */ + pos = cmdLine.rfind('/'); + if (pos != string::npos) + cmdLine = cmdLine.substr(pos + 1); + + /* is this the process ? */ + if (procname == cmdLine) + pid = id; + } + } + } + } + + closedir(dp); + + return pid; +} + + + +const char mgmnt_iface_str[] = { "Mgmnt" } ; +const char infra_iface_str[] = { "Infra" } ; +const char null_iface_str[] = { "Null" } ; + +const char * get_iface_name_str ( int iface ) +{ + switch ( iface ) + { + case MGMNT_IFACE: + return mgmnt_iface_str; + case INFRA_IFACE: + return infra_iface_str; + default: + return null_iface_str ; + } +} + + +string get_event_str ( int event_code ) +{ + switch ( event_code ) + { + case MTC_EVENT_RMON_READY: + case MTC_EVENT_MONITOR_READY: + return "ready" ; + case MTC_EVENT_PMOND_CLEAR: + case MTC_EVENT_PMON_CLEAR: + case MTC_EVENT_RMON_CLEAR: + case MTC_EVENT_HWMON_CLEAR: + return "clear" ; + case MTC_EVENT_PMON_CRIT: + case MTC_EVENT_RMON_CRIT: + case MTC_EVENT_HWMON_CRIT: + return "critical" ; + case MTC_EVENT_PMON_LOG: + return "log" ; + case MTC_EVENT_PMON_MAJOR: + case MTC_EVENT_RMON_MAJOR: + case MTC_EVENT_HWMON_MAJOR: + return "major" ; + case MTC_EVENT_PMON_MINOR: + case MTC_EVENT_RMON_MINOR: + case MTC_EVENT_HWMON_MINOR: + return "minor" ; + case MTC_EVENT_HWMON_CONFIG: + return "config" ; + case MTC_EVENT_HWMON_RESET: + return "reset" ; + case MTC_EVENT_HWMON_POWERDOWN: + return "power-down" ; + case MTC_EVENT_HWMON_POWERCYCLE: + return "power-cycle" ; + case MTC_DEGRADE_RAISE: + return "degrade raise" ; + case MTC_DEGRADE_CLEAR: + return "degrade clear" ; + case MTC_CMD_ADD_HOST: + return "add" ; + case MTC_CMD_DEL_HOST: + return "delete" ; + case MTC_CMD_MOD_HOST: + return "modify" ; + case MTC_CMD_QRY_HOST: + return "query" ; + case MTC_CMD_START_HOST: + return "start" ; + case MTC_CMD_STOP_HOST: + return "stop" ; + default: + { + slog ("Unknown event code (0x%x)\n", event_code ); + return "unknown" ; + } + } +} + +#define HTTP_GET_STR "GET" +#define HTTP_PUT_STR "PUT" +#define HTTP_PATCH_STR "PATCH" +#define HTTP_POST_STR "POST" +#define HTTP_DELETE_STR "DELETE" +#define HTTP_UNKNOWN_STR "UNKNOWN" + +/* Private: convert http event type to its string name */ +const char * getHttpCmdType_str ( evhttp_cmd_type type ) +{ + switch (type) + { + case EVHTTP_REQ_GET: return(HTTP_GET_STR); + case EVHTTP_REQ_PUT: return(HTTP_PUT_STR); + case EVHTTP_REQ_PATCH: return(HTTP_PATCH_STR); + case EVHTTP_REQ_POST: return(HTTP_POST_STR); + case EVHTTP_REQ_DELETE: return(HTTP_DELETE_STR); + case EVHTTP_REQ_HEAD: + case EVHTTP_REQ_OPTIONS: + case EVHTTP_REQ_TRACE: + case EVHTTP_REQ_CONNECT: + default: + break ; + } + return(HTTP_UNKNOWN_STR); +} + +#define MAX_NUM_LEN 64 +string itos ( int val ) +{ + char int_str[MAX_NUM_LEN] ; + string temp ; + memset ( &int_str[0], 0, MAX_NUM_LEN ); + snprintf ( &int_str[0], MAX_NUM_LEN, "%d" , val ); + temp = int_str ; + return (temp); +} + +string ftos ( float val, int resolution ) +{ + char float_str[MAX_NUM_LEN] ; + string temp ; + memset ( &float_str[0], 0, MAX_NUM_LEN ); + if ( resolution == 2 ) + snprintf ( &float_str[0], MAX_NUM_LEN, "%.2f" , val ); + else if ( resolution == 3 ) + snprintf ( &float_str[0], MAX_NUM_LEN, "%.3f" , val ); + else + snprintf ( &float_str[0], MAX_NUM_LEN, "%.1f" , val ); + temp = float_str ; + return (temp); +} + +/* standard 1s complement checksum */ +unsigned short checksum(void *b, int len) +{ + unsigned short *buf = (unsigned short*)b; + unsigned int sum=0; + unsigned short result; + + for ( sum = 0; len > 1; len -= 2 ) + sum += *buf++; + if ( len == 1 ) + sum += *(unsigned char*)buf; + sum = (sum >> 16) + (sum & 0xFFFF); + sum += (sum >> 16); + result = ~sum; + return result; +} + + +std::string tolowercase ( const std::string & in ) +{ + std::string out; + + std::transform( in.begin(), in.end(), std::back_inserter( out ), ::tolower ); + return out; +} + + + +int send_log_message ( msgSock_type * sock_ptr, + const char * hostname, + const char * filename, + const char * log_str ) +{ + int bytes = 0 ; + int bytes_to_send = 0 ; + int rc = PASS ; + + log_message_type log ; + + if (( log_str == NULL ) || ( filename == NULL ) || ( hostname == NULL )) + { + slog ("null parm\n"); + return (FAIL_NULL_POINTER); + } + + if ( sock_ptr == NULL ) + { + slog ("%s mtclogd not setup for file '%s'\n", hostname, filename ); + return (FAIL_NULL_POINTER); + } + else if ( sock_ptr->sock == 0 ) + { + dlog ("%s mtclogd not setup for file '%s'\n", hostname, filename ); + return (FAIL_INVALID_OPERATION); + } + + memset ( &log, 0 , sizeof(log_message_type)); + snprintf ( &log.header [0], MSG_HEADER_SIZE , "%s",get_mtc_log_msg_hdr()); + snprintf ( &log.filename [0], MAX_FILENAME_LEN , "%s",filename ); + snprintf ( &log.hostname [0], MAX_HOST_NAME_SIZE, "%s",hostname ); + snprintf ( &log.logbuffer[0], MAX_LOG_MSG , "%s",log_str ); + + /* There is no buffer data in any of these messages */ + bytes_to_send = sizeof(log_message_type)-(MAX_LOG_MSG-(strlen(log_str))) ; + + bytes = sendto ( sock_ptr->sock, (char*) &log, bytes_to_send, 0, + (struct sockaddr *) &sock_ptr->addr, + sock_ptr->len); + if ( bytes <= 0 ) + { + wlog ("%s send log message failed (%s)\n", log.hostname, log.filename ); + rc = FAIL_TO_TRANSMIT ; + } + else + { + mlog2 ("%s:%s\n%s", &log.hostname[0], &log.filename[0], log_str ); + } + return rc ; +} + + +/********************************************************************************** + * + * Name : get_delimited_list + * + * Description: Update the_list with the individual items in the passed in string. + * + * valid delimiters include , : = ; . - + + * + * Updates: the_list + * + * Returns: PASS for success and FAIL_STRING_EMPTY or FAIL_INVALID_DATA otherwise + * + **********************************************************************************/ +int get_delimited_list ( string str , char delimiter, list & the_list, bool remove_whitespace ) +{ + std::size_t last = 0 ; + std::size_t first = 0 ; + + /* Error handling - empty string and invalid delimitors */ + if ( str.empty () ) + { + dlog ("empty string\n"); + return ( FAIL_STRING_EMPTY ) ; + } + else if (( delimiter != '.' ) && ( delimiter != ',' ) && + ( delimiter != '-' ) && ( delimiter != '+' ) && + ( delimiter != '=' ) && ( delimiter != ';' ) && + ( delimiter != ':' )) + { + dlog ("invalid delimiter\n"); + return ( FAIL_INVALID_DATA ) ; + } + + // ilog ("String: <%s>\n", str.c_str()); + + do + { + last = str.find_first_of(delimiter, first ); + string temp_str = str.substr(first, last-first) ; + + /* TODO: Add support for stripping off whitespace */ + if ( remove_whitespace == true ) + { + // std::string::iterator _str ; + // _str = std::remove(temp_str.begin(), temp_str.end(), ' '); + // string xx = std::remove_if(temp_str.begin(), temp_str.end(), isspace); + // ilog ("XX: <%s>\n", (*_str).c_str()); + ; + } + + // dlog ("List Item: <%s> (%ld:%ld)\n", temp_str.c_str(), first, last); + the_list.push_back(temp_str); + + /* prepare for next loop */ + if ( last != std::string::npos ) + { + first = last+1 ; // dlog (" > First: %ld\n", first ); + } + } while ( last != std::string::npos ) ; + +#ifdef WANT_DEBUG + std::list::iterator iter_ptr ; + for ( iter_ptr = the_list.begin(); + iter_ptr != the_list.end() ; + iter_ptr++ ) + { + ilog ("List: <%s>\n", iter_ptr->c_str()); + } +#endif + + return (PASS); +} + +/* Name: update_config_option + * + * 1. free what is in *config_ptr_ptr (if not null) + * 2. allocate new memory pointed for the supplied string + */ +void update_config_option ( const char ** config_ptr_ptr, string str2dup ) +{ + if ( *config_ptr_ptr != NULL ) + { + dlog1 ("Modifying config from '%s' to '%s'\n", *config_ptr_ptr, str2dup.c_str()); + free ( (void*)(*config_ptr_ptr) ) ; + } + else + { + dlog1 ("Adding %s config\n", str2dup.c_str()); + } + *config_ptr_ptr = strdup(str2dup.data()); + dlog1 ("New Config %s\n", *config_ptr_ptr); +} + + +static const char bar [] = { "-----------------------------------------------------------------------------------------\n"} ; +static const char ban [] = { "Service State and Traceback -------------------------------------------------------------\n"} ; +std::list mem_log_list ; +std::list::iterator mem_log_iter ; + +void mem_log_list_init ( void ) +{ + mem_log_list.clear(); +} + +/* Log a label int value and string of other data */ +void mem_log ( string label , int value , string data ) +{ + char str[MAX_MEM_LOG_LEN] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s %d %s\n", label.c_str(), value, data.c_str()); + mem_log (str); +} + +void mem_log ( string one, string two ) +{ + char str[MAX_MEM_LOG_LEN] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s%s\n", one.c_str(), two.c_str()); + mem_log (str); +} + +void mem_log ( string one, string two, string three ) +{ + char str[MAX_MEM_LOG_LEN] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s%s%s\n", one.c_str(), two.c_str(), three.c_str()); + mem_log (str); +} + +/* log a character string */ +void mem_log ( char * log ) +{ + // string full_log = pt() ; + // full_log.append(": "); + string full_log = log ; + + mem_log_list.push_back ( full_log ) ; + + /* Don't allow the in-memory list to exceed MAX_MEM_LIST_SIZE */ + if ( mem_log_list.size() > MAX_MEM_LIST_SIZE ) + { + mem_log_list.pop_front(); + } +} + +/* Log a single character ; typically used to add a linefeed to the trace log */ +void mem_log ( char char_log ) +{ + string tmp = ""; + tmp.insert(tmp.begin(),char_log) ; + mem_log ( tmp ); /* Call string proto */ +} + +/* log a string */ +void mem_log ( string log ) +{ + // string full_log = pt() ; + // full_log.append(": "); + string full_log = log ; + + mem_log_list.push_back ( full_log ) ; + + /* Don't allow the in-memory list to exceed MAX_MEM_LIST_SIZE */ + if ( mem_log_list.size() >= MAX_MEM_LIST_SIZE ) + { + mem_log_list.pop_front(); + } +} + +void daemon_dump_membuf_banner ( void ) +{ + syslog ( LOG_INFO, "%s", &bar[0]); + syslog ( LOG_INFO, "%s", &ban[0]); + syslog ( LOG_INFO, "%s", &bar[0]); +} + +/* Dump the in-memory trace buffer to syslog */ +void daemon_dump_membuf ( void ) +{ + int i = 0 ; + int usec_delay = 1 ; + + if ( mem_log_list.empty () ) + return ; + + /* as the data grows so do we have to accept loosing data over stalling process */ + if ( mem_log_list.size() < 200 ) + usec_delay = 99 ; + else if ( mem_log_list.size() < 1000 ) + usec_delay = 10 ; + + /* Run Maintenance on Inventory */ + for ( mem_log_iter = mem_log_list.begin () ; + mem_log_iter != mem_log_list.end () ; + mem_log_iter++ ) + { + /* sleep for usec_delay every 10 logs so we don't overload syslog */ + if (( ++i % 10 ) == 0 ) + { + usleep (usec_delay); + } + syslog ( LOG_INFO, "%3d| %s", i, mem_log_iter->c_str() ); + } + mem_log_list.clear(); +} + +#define BUFFER_SIZE 128 + +/***************************************************************************** + * + * Name : execute_pipe_cmd + * + * Purpose : Obtain the result of a bash command. + * + * Params : command - char buffer containing the bash command + * result - char buffer for storing the result of the command + * result_size - size of result buffer + * + * Return : PASS/FAIL + * + *****************************************************************************/ +int execute_pipe_cmd(const char *command, char *result, unsigned int result_size) { + + /* Local variables. */ + char fsLine[BUFFER_SIZE]; + char *pos; + string data; + FILE *pFile; + int rc = 0; + + /* Initialize to zero the result buffer. */ + memset(result, 0, result_size); + + /* Execute command. */ + if ((pFile = popen(command, "r")) == NULL) { + elog("Error executing command: %s", command); + return (FAIL); + } else { + while ((memset(fsLine, 0, sizeof(fsLine))) && + (fgets((char *) &fsLine, sizeof(fsLine), pFile) != NULL)) { + data.append(fsLine); + } + int ret = pclose(pFile); + rc = WEXITSTATUS(ret); + } + + /* Extract result. */ + strncpy(result, data.c_str(), result_size); + if (data.length() < result_size - 1) { + /* Eliminate trailing newline. */ + if ((pos=strchr(result, '\n')) != NULL) + *pos = '\0'; + } + else { + *(result + result_size -1) = '\0'; // in this case, strncpy does not terminate string + elog("Result of executed command is larger than result buffer; " + "result size: %i, buffer size: %i", int(data.length()), int(result_size)); + wlog("...cmd: '%s' exit status: %i truncated result: '%s'", command, rc, result); + return (FAIL); + } + + dlog("cmd: '%s' exit status: %i result: '%s'\n", + command, rc, result); + + return (rc); +} + +/**************************************************************************** + * + * Name: get_system_state + * + * Purpose: Query and return system running state + * + * https://www.freedesktop.org/software/systemd/man/systemctl.html + * + * Refer to is-system-running command. + * + * Note: Return code is > 0 for all cases except for running. + * + * Name - Description + * ------------ -------------------------------------------- + * initializing - Early bootup, before basic.target is reached. + * starting - Late bootup, before the job queue becomes idle for the first time. + * running - The system is fully operational. rc = 0 + * degraded - The system is operational but one or more units failed. + * maintenance - The rescue or emergency target is active. + * stopping - The manager is shutting down. + * offline - The manager is not running, faulty system manager (PID 1). + * unknown - The operational state could not be determined. + * + * Returns one of corresponding 'mtc_system_state_enum' defined in nodeUtil.h + * + ****************************************************************************/ + +#ifndef PIPE_COMMAND_RESPON_LEN +#define PIPE_COMMAND_RESPON_LEN (100) +#endif + +system_state_enum get_system_state ( void ) +{ + char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ; + execute_pipe_cmd ( "systemctl is-system-running", &pipe_cmd_output[0], PIPE_COMMAND_RESPON_LEN ); + if ( strnlen ( pipe_cmd_output, PIPE_COMMAND_RESPON_LEN ) > 0 ) + { + ilog ("systemctl reports host as '%s'\n", pipe_cmd_output ); + string temp = pipe_cmd_output ; + if ( temp.find ("stopping") != string::npos ) + return MTC_SYSTEM_STATE__STOPPING; + if ( temp.find ("running") != string::npos ) + return MTC_SYSTEM_STATE__RUNNING; + if ( temp.find ("degraded") != string::npos ) + return MTC_SYSTEM_STATE__DEGRADED; + if ( temp.find ("starting") != string::npos ) + return MTC_SYSTEM_STATE__STARTING; + if ( temp.find ("initializing") != string::npos ) + return MTC_SYSTEM_STATE__INITIALIZING; + if ( temp.find ("offline") != string::npos ) + return MTC_SYSTEM_STATE__OFFLINE; + if ( temp.find ("maintenance") != string::npos ) + return MTC_SYSTEM_STATE__MAINTENANCE; + slog ("unexpected response: <%s>\n", temp.c_str()); + } + else + { + wlog ("systemctl is-system-running yielded no response\n"); + } + return MTC_SYSTEM_STATE__UNKNOWN ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeUtil.h b/mtce-common/cgts-mtce-common-1.0/common/nodeUtil.h new file mode 100755 index 00000000..2c46100f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeUtil.h @@ -0,0 +1,133 @@ +#ifndef __INCLUDE_NODEUTIL_H__ +#define __INCLUDE_NODEUTIL_H__ + +/* +* Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* +*/ + +#include +#include +#include +#include +#include +#include /* for ... HTTP_ status definitions */ + +using namespace std; + +#include "nodeBase.h" + +#define LATENCY_1500MSECS (1500) +#define LATENCY_1SEC (1000) +#define LATENCY_50MSECS (50) +#define LATENCY_100MSECS (100) +#define LATENCY_200MSECS (200) +#define LATENCY_600MSECS (600) +#define NODEUTIL_LATENCY_MON_START ((const char *)"start") +void nodeUtil_latency_log ( string hostname, const char * label_ptr, int msecs ); + + +/* Common socket type struct */ +typedef struct +{ + + int port ; /**< ... port number */ + int sock ; /**< ... socket fd */ + struct sockaddr_in addr ; /**< ... attributes struct */ + socklen_t len ; /**< ... length */ + +} msgSock_type ; + +int send_log_message ( msgSock_type * sock_ptr, + const char * hostname, + const char * filename, + const char * log_str ); + +msgSock_type * get_mtclogd_sockPtr ( void ) ; + +void mem_log_list_init ( void ); + +string md5sum_string ( string str ); + +string getipbyname ( string name ); +string getipbynameifexists ( string name ); +string getipbyiface ( const char * iface ); + +int get_ip_addresses ( string & my_hostname , string & my_local_ip , string & my_float_ip ); +int get_iface_address ( const char * iface_ptr, string & ip_addr , bool retry ); +int get_iface_hostname ( const char * iface_ptr, char * hostname_ptr); +int get_iface_macaddr ( const char * iface_ptr , string & macaddr ); +void get_infra_iface ( char ** infra_iface_ptr ); +int get_hostname ( char * hostname_ptr, int max_len ); +string get_iface_mac ( const char * iface_ptr ); + +void print_inv ( node_inv_type & info ); +int get_iface_attrs ( const char * iface_ptr, int & index, int & speed , int & duplex , string & autoneg ); +const char * get_iface_name_str ( int iface ); + +unsigned int get_host_function_mask ( string & nodeType_str ); +bool is_combo_system (unsigned int nodetype_mask ); + +int set_host_functions ( string nodetype_str, + unsigned int * nodetype_bits_ptr, + unsigned int * nodetype_function_ptr, + unsigned int * nodetype_subfunction_ptr ); + +bool is_goenabled ( int nodeType, bool pass ); + +string get_bm_password ( const char * username ); +string get_strings_in_string_list ( std::list & l ); +bool is_string_in_string_list ( std::list & l , string & str ); +bool is_int_in_int_list ( std::list & l , int & val ); +bool string_contains ( string buffer, string sequence ); +int load_filenames_in_dir ( const char * directory, std::list & filelist ); + +int double_fork ( void ); +int double_fork_host_cmd ( string hostname , char * cmd_string, const char * cmd_oper ); +int setup_child ( bool close_file_descriptors ); +void fork_sysreq_reboot ( int delay_in_secs ); +void fork_graceful_reboot ( int delay_in_secs ); + +int get_node_health ( string hostname ); +int clean_bm_response_files ( string hostname ); +int get_pid_by_name_proc ( string procname ); +int get_pid_by_name_pipe ( string procname ); + +int get_link_state ( int ioctl_socket , const char * iface_ptr, bool * running_ptr ); +int open_ioctl_socket ( void ); + +string get_event_str ( int event_code ); +const char * getHttpCmdType_str ( evhttp_cmd_type type ); + +string itos ( int val ); +string ftos ( float val, int resolution ); +unsigned short checksum(void *b, int len); + +std::string tolowercase ( const std::string & in ); + + +int get_delimited_list ( string str , char delimiter, list & the_list, bool remove_whitespace ); +void update_config_option ( const char ** config_ptr_ptr, string str2dup ); + + +void dump_memory ( void * raw_ptr , int format, size_t bytes ); + +int execute_pipe_cmd(const char *command, char *result, unsigned int result_size); + +typedef enum +{ + MTC_SYSTEM_STATE__INITIALIZING, + MTC_SYSTEM_STATE__STARTING, + MTC_SYSTEM_STATE__RUNNING, + MTC_SYSTEM_STATE__DEGRADED, + MTC_SYSTEM_STATE__MAINTENANCE, + MTC_SYSTEM_STATE__STOPPING, + MTC_SYSTEM_STATE__OFFLINE, + MTC_SYSTEM_STATE__UNKNOWN +} system_state_enum ; + +system_state_enum get_system_state ( void ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/common/pgdbClass.cpp.OBS b/mtce-common/cgts-mtce-common-1.0/common/pgdbClass.cpp.OBS new file mode 100644 index 00000000..ee113aa2 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/pgdbClass.cpp.OBS @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform PostgreSQL Database Access module for maintenance. + * + * pqdmClass member primitive implementation. + * + * query + * + * + */ + +#include "pgdbClass.h" + +#define MAX_SQL_RESPONSE_MAX (4096) +#define SENSOR_SAMPLE_TABLE "sample" + +/* DB connection status */ +#define DB_DISCONNECTED 0 +#define DB_CONNECTED 1 + +pgdbClass::pgdbClass() +{ + pg.uri = NULL; + pg.conn = NULL; + pg.connected = false ; +} + +pgdbClass::~pgdbClass() +{ + if (pg.conn) + { + PQfinish(pg.conn); + } + pg.connected = false ; +} + +int pgdbClass::connect(const char *uri) +{ + const char *val = NULL; + + /* make a connection to the specified database */ + this->pg.conn = PQconnectdb(uri); + + /* verify the connection */ + if ((this->pg.conn == NULL) || (PQstatus(this->pg.conn) != CONNECTION_OK)) + { + elog ("failed to connected to DB: (%s)\n", uri); + PQfinish(this->pg.conn); + return FAIL; + } + + this->pg.connected = true ; + this->pg.uri = uri; + + val = get_parameter_status("standard_conforming_strings"); + ilog ("server standard_conforming_strings parameter: %s\n", val ? val : "unavailable"); + + this->pg.equote = (val && (0 == strcmp("off", val))); + ilog ("server requires E'' quotes: %s\n", this->pg.equote ? "YES" : "NO"); + + this->pg.server_version = PQserverVersion(this->pg.conn); + this->pg.protocol = PQprotocolVersion(this->pg.conn); + this->pg.encoding = get_parameter_status("client_encoding"); + + return PASS ; +} + +int pgdbClass::monitor( void ) +{ + if (PQstatus(this->pg.conn) != CONNECTION_OK) + { + elog ("failed connection audit to '%s' (%s)\n", this->pg.uri, PQerrorMessage(this->pg.conn)); + disconnect(); + + /* TODO: make this an FSM, otherwise this will bang away */ + // return connect(this->pg.uri); + return (FAIL); + } + return PASS ; +} + +void pgdbClass::disconnect() +{ + if (this->pg.conn != NULL) + { + PQfinish(this->pg.conn); + } + if (this->pg.connected == true ) + { + this->pg.connected = false ; + } +} + +const char * pgdbClass::get_parameter_status(const char *param) +{ + return PQparameterStatus(this->pg.conn, param); +} + +int pgdbClass::cmd(const char *db_cmd) +{ + PGresult *res; + int rc = PASS; + + if (monitor() != PASS ) + { + elog ("Failed to reconnect: %s", PQerrorMessage(this->pg.conn)); + return FAIL ; + } + res = PQexec(this->pg.conn, db_cmd); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + elog ("Request Status: %s\n", PQresStatus(PQresultStatus(res))); + elog ("execute Status: %s (%s)\n", db_cmd, PQresultErrorMessage(res)); + rc = FAIL; + } + if (rc == PASS ) + { + int row = atoi(PQcmdTuples(res)); + ilog ("SQL command returned successful: %d rows affected.\n", row); /* dlog */ + if (row < 1) + { + rc = FAIL; + } + } + PQclear(res); + return rc; +} + + +int pgdbUtil_get_version ( void ) +{ + int ver = PQlibVersion(); + ilog ("libpq version: %d\n", ver); + return ver ; +} + +int pgdbClass::query (const char * db_cmd , mtc_query_type & result) +{ + PGresult *res; + int nfields, ntuples, i, j; + + if (monitor() != PASS) + { + elog ("Failed to reconnect: %s\n", PQerrorMessage(this->pg.conn)); + return (FAIL) ; + } + + res = PQexec( pg.conn, db_cmd); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + elog("request status: %s\n", PQresStatus(PQresultStatus(res))); + elog("execute status: %s (%s)\n", db_cmd, PQresultErrorMessage(res)); + PQclear(res); + return(FAIL); + } + + nfields = PQnfields(res); + ntuples = PQntuples(res); + ilog ("Cmd: (%s) OK, entries found: (%d)\n", db_cmd, ntuples); /* dlog */ + + for (i = 0; i < ntuples; ++i) + { + mtc_key_value_type key_value ; + + for (j =0; j < nfields; ++j) + { + char * key = PQfname(res, j); + char * value = PQgetvalue(res, i, j); + key_value[key] = value; + } + result.push_back ( key_value ); + } + PQclear(res); + return(PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/pgdbClass.h.OBS b/mtce-common/cgts-mtce-common-1.0/common/pgdbClass.h.OBS new file mode 100644 index 00000000..a7f6189d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/pgdbClass.h.OBS @@ -0,0 +1,83 @@ +#ifndef __INCLUDE_PGDBCLASS_H__ +#define __INCLUDE_PGDBCLASS_H__ + +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform PostgreSQL Database Access module for maintenance. + * + * Class, support structs and enums. + * + * This module is based off the libpq - C Library + * + * Header File: libpg-fe.h + * Library File: libpq + * + * http://www.postgresql.org/docs/8.3/static/libpq-build.html + * + */ + +#include +#include +#include +#include +#include +#include +#include + +// #include +#include + +using namespace std; + +#include "nodeBase.h" /* for ... comnmon definitions, emums and structs */ +#include "nodeUtil.h" +#include "nodeTimers.h" /* */ +#include "daemon_common.h" +#include "daemon_option.h" +#include "daemon_ini.h" + + +typedef std::map mtc_key_value_type ; /* key-value pair array */ +typedef std::vector mtc_query_type ; /* dynamic sized array */ + +typedef struct +{ + PGconn *conn; /* the postgresql connection */ + bool connected; /* true = connected ; false = not connected */ + int equote; /* use E''-style quotes for escaped strings */ + int protocol; /* protocol version */ + int server_version; /* server version */ + const char *encoding; /* client encoding */ + const char *uri; /* Connection URI */ +} pgdb_conn_type ; // formerly SFmDBConn + +class pgdbClass +{ +protected: + const char * get_parameter_status(const char *param); + +public: + pgdbClass(); + ~pgdbClass(); + + pgdb_conn_type pg ; + + int connect (const char *uri); + void disconnect (); + int reconnect (); + int monitor (); + + int query (const char *db_cmd , mtc_query_type & result); + int cmd (const char *db_cmd); +}; + +int pgdbUtil_get_version ( void ); + +#endif /* __INCLUDE_PGDBCLASS_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/common/pgdbUtil.cpp.OBS b/mtce-common/cgts-mtce-common-1.0/common/pgdbUtil.cpp.OBS new file mode 100644 index 00000000..a5b9fd6a --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/pgdbUtil.cpp.OBS @@ -0,0 +1,427 @@ +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform PostgreSQL Database Access Utility module. + **/ + +#include "pgdbClass.h" +#include "jsonUtil.h" + +#define CONF_FILE ((const char *)("/etc/ceilometer/ceilometer.conf")) + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + exit (0); +} + +void daemon_sigchld_hdlr ( void ) +{ + dlog("Received SIGCHLD ...\n"); +} + + +static daemon_config_type _config ; +static opts_type * opts_ptr ; + + +/* Ceilometer config read */ +int _config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + if (MATCH("database", "connection")) + { + config_ptr->ceilometer_url = strdup(value); + if ( config_ptr->ceilometer_url ) + { + ilog ("Ceilometer URL: %s\n", config_ptr->ceilometer_url ); + } + } + return (PASS); +} + + + +daemon_config_type * daemon_get_cfg_ptr (void) +{ + return (&_config); +} + +int daemon_configure ( void ) +{ + int rc = PASS ; + opts_ptr = daemon_get_opts_ptr(); + + daemon_files_init (); + + ilog("Config File : %s\n", CONF_FILE ); + if (ini_parse ( CONF_FILE, _config_handler, &_config ) < 0) + { + elog("Failed to load '%s'\n", CONF_FILE ); + } + return (rc) ; +} + +int daemon_init ( string iface , string nodetype ) +{ + UNUSED(iface); + UNUSED(nodetype); + + daemon_configure (); + + // get_debug_options ( CONF_FILE, &_config ); + return (PASS); +} + +pgdbClass dbConn ; + +#define EXACT_MATCH (0) +#define POSITION_ZERO (0) + +/************************************************************************************ + * + * Ceilometer Database: Sensor Value Correlation + * + * 1. The METER table lists valid meter_ids + * - hardware.ipmi.* + * + * 2. The RESOURCE table lists all the sensors based on internal id + * - hostname-{sensor number-}_ + * ... nokia-2-temp_psu2_(0x94) + * ... controller-1-37-system_board_(0x33) + * + * 3. The SAMPLE table lists sample data as volume based on specified unit type + * - sensor resource lookup + * ... SAMPLE:resource_id == RESOURCE:internal_id + * - SAMPLE:meter_id is used to know it is a valid meter to monitor + * ... METER:id is valid if METER:name has hardware.ipmi in it + * ... hardware.ipmi.current + * + **************************************************************************************/ + +/* Valid meter ids */ +#define PQ_TABLE_INDEX__METER_ID (0) +#define PQ_TABLE_INDEX__METER_NAME (1) +#define PQ_TABLE_INDEX__METER_TYPE (2) +#define PQ_TABLE_INDEX__METER_UNIT (3) + +#define VALID_METER_PREFIX ((const char *)("hardware.ipmi.")) +typedef struct +{ + int id ; + string scope; /* ipmi - VALID_METER_PREFIX */ + string name ; + string unit ; + string type ; +} hwmon_meter_type ; +std::map global_meter_list ; + +#define PQ_TABLE_INDEX__RESOURCE__INTERNAL_ID (0) +#define PQ_TABLE_INDEX__RESOURCE__RESOURCE_ID (1) +#define PQ_TABLE_INDEX__RESOURCE__METADATA (2) +typedef struct +{ + int id ; + string hostname ; + string sensorname ; + string somevalue ; +} hwmon_resource_type ; +std::map global_sensor_list ; +std::map excluded_resource_list ; + +// mtc_key_value_type ipmi_sensor_group +void daemon_service_run ( void ) +{ + int rc = PASS ; + pgdbUtil_get_version (); + if ( ! _config.ceilometer_url ) + { + rc = FAIL_NULL_POINTER ; + } + else + { + PGresult * pqResult_ptr ; + string pqCommand_str; + + ilog ("URI:%s\n", _config.ceilometer_url ); + + // check_connection: + for ( ; ; ) + { + if ( dbConn.pg.connected == false ) + { + elog ("not connected\n"); + sleep (2); + dbConn.connect ( _config.ceilometer_url ); + continue ; + // goto check_connection ; + } + else if ( dbConn.monitor() != PASS ) + { + sleep (2); + continue ; + } + global_meter_list.clear(); + + // ilog ("max number of meters allowed: %zu\n", global_meter_list.max_size()); + + /* PARSE METER - hwmon_get_ipmi_sensor_meters () ; */ + pqCommand_str = "SELECT * FROM meter" ; + pqResult_ptr = PQexec ( dbConn.pg.conn, pqCommand_str.data()); + + // +----+-------------------------------------+------------+--------- + // | id | name | type | unit + // +----+-------------------------------------+------------+--------- + // | 1 | hardware.ipmi.current | gauge | W + // | 2 | hardware.ipmi.temperature | gauge | C + // | 3 | hardware.ipmi.fan | gauge | percent + + if ( PQresultStatus( pqResult_ptr ) == PGRES_TUPLES_OK) + { + hwmon_meter_type meter ; + meter.scope = VALID_METER_PREFIX ; + int rows = PQntuples(pqResult_ptr); + for(int i=0; i sizeof(int)) ) + { + meter.id = atoi(meter_id.data()); + global_meter_list.insert (std::make_pair(meter.id, meter )); + dlog3 ("ipmi sensor group %d '%s' (%s:%s)\n", + meter.id, meter.name.c_str(), + meter.type.c_str(), meter.unit.c_str()); + } + } + } + } /* end 'meters rows parse' for loop */ + + if ( global_meter_list.size() ) + { + // ************** Testing ********************* +#define WANT_QUERY_AUTO_ITERATOR_TEST + +#ifdef WANT_QUERY_DECLARED_ITERATOR_TEST + /* TEST: Query Meters using declared iterator */ + std::map::const_iterator global_meter_list_iter = global_meter_list.begin(); + while ( global_meter_list_iter != global_meter_list.end() ) + { + ilog ("ipmi sensor group %d '%s' (%s:%s)\n", + global_meter_list_iter->second.id, + global_meter_list_iter->second.name.c_str(), + global_meter_list_iter->second.type.c_str(), + global_meter_list_iter->second.unit.c_str()); + + ++global_meter_list_iter ; + } +#endif + +#ifdef WANT_QUERY_AUTO_ITERATOR_TEST + /* TEST: Loop Over meter List using C++11 auto iterator */ + for ( auto _iter = global_meter_list.begin() ; _iter != global_meter_list.end() ; _iter++ ) + { + dlog ("ipmi sensor group %d '%s' (%s:%s)\n", + _iter->second.id, + _iter->second.name.c_str(), + _iter->second.type.c_str(), + _iter->second.unit.c_str()); + } +#endif + +#ifdef WANT_FIND_TEST + /* TEST: Find meters test */ + for ( int i = 0 ; i < 1000 ; i++ ) + { + global_meter_list_iter = global_meter_list.find(i); + if ( global_meter_list_iter != global_meter_list.end() ) + { + ilog ("ipmi sensor group query test - %d:%d '%s' (%s:%s)\n", + i, + global_meter_list_iter->second.id, + global_meter_list_iter->second.name.c_str(), + global_meter_list_iter->second.type.c_str(), + global_meter_list_iter->second.unit.c_str()); + } + } +#endif + ilog ( "%zu sensor groups total\n", global_meter_list.size() ); + + + /* PARSE RESOURCE - hwmon_get_ipmi_sensor_list () ; */ + pqCommand_str = "SELECT internal_id,resource_id,resource_metadata FROM resource" ; + pqResult_ptr = PQexec ( dbConn.pg.conn, pqCommand_str.data()); + + // +-------------+--------------------------------------+-----------------------------+ + // | internal_id | resource_id | resource_metadata + // | 1 | controller-0-power_meter_(0x40) | {"node": "controller-0"} + // | 2 | controller-0-26-ilo_zone_(0x26) | {"node": "controller-0"} + // | 3 | controller-0-fan_2_(0x7) | {"node": "controller-0"} + + if ( PQresultStatus( pqResult_ptr ) == PGRES_TUPLES_OK) + { + hwmon_resource_type resource ; + + int rows = PQntuples(pqResult_ptr); + for(int i=0; i sizeof(int)) ) + { + resource.id = atoi(internal_id.data()); + } + else + { + elog ("failed to convert internal_id:%s to integer ; excluded '%s'\n", internal_id.c_str(), resource_id.c_str()); + continue ; + } + + /* Add those that have a valid hostname as metadata key:value pair { node: "" } */ + + if ( metadata.size() && ( jsonUtil_get_key_val ( (char*)metadata.data(), "node", hostname ) == PASS )) + { + resource.hostname = hostname ; + if ( resource_id.find (hostname, POSITION_ZERO ) == POSITION_ZERO ) + { + /* Get somevalue */ + resource.somevalue.clear(); + resource.sensorname = resource_id.substr ( hostname.length()); + size_t index = resource.sensorname.find ("_(0x"); + if ( index != std::string::npos ) + { + resource.somevalue = resource.sensorname.substr (index+1); + } + resource.sensorname = resource_id.substr ( hostname.length()+1, index-1 ); + global_sensor_list.insert (std::make_pair(resource.id, resource )); + added = true ; + dlog ("added - %s %s\n", hostname.c_str(), resource.sensorname.c_str()); + } + } + else + { + elog ("no valid metadata node:hostname key value pair (%s) ; excluded %s\n", metadata.c_str(), resource_id.c_str() ); + } + + if ( added == false ) + { + resource.hostname = "none" ; + resource.sensorname = resource_id ; + resource.somevalue = metadata ; + excluded_resource_list.insert(std::make_pair(resource.id, resource )); + } + } /* end 'resource rows parse' for loop */ + } + if ( global_sensor_list.size() ) + { + /* Loop over meter List using C++11 auto iterator */ + for ( auto _iter = global_sensor_list.begin() ; _iter != global_sensor_list.end() ; _iter++ ) + { + dlog ("%s %d sensor '%s' %s\n", + _iter->second.hostname.c_str(), + _iter->second.id, + _iter->second.sensorname.c_str(), + _iter->second.somevalue.c_str()); + } + } +#ifdef WANT_DISPLAY_EXCLUDED_METERS_LIST + if ( excluded_resource_list.size() ) + { + /* Loop over meter List using C++11 auto iterator */ + for ( auto _iter = excluded_resource_list.begin() ; _iter != excluded_resource_list.end() ; _iter++ ) + { + ilog ("excluded %d '%s' (%s)\n", + _iter->second.id, + _iter->second.sensorname.c_str(), + _iter->second.somevalue.c_str()); + } + } +#endif + ilog ("%zu sensors ; system wide and %zu meter resources excluded\n", + global_sensor_list.size(), + excluded_resource_list.size()); + + /* SELECT id,volume,timestamp,meter_id,resource_id FROM sample WHERE timestamp >= '2016-11-22 21:57:45' AND resource_id = 149; */ + /* PARSE RESOURCE - hwmon_get_ipmi_sensor_list () ; + pqCommand_str = "SELECT internal_id,resource_id,resource_metadata FROM resource" ; + pqResult_ptr = PQexec ( dbConn.pg.conn, pqCommand_str.data()); +*/ + // +-------------+--------------------------------------+-----------------------------+ + // | internal_id | resource_id | resource_metadata + // | 1 | controller-0-power_meter_(0x40) | {"node": "controller-0"} + // | 2 | controller-0-26-ilo_zone_(0x26) | {"node": "controller-0"} + // | 3 | controller-0-fan_2_(0x7) | {"node": "controller-0"} + + //if ( PQresultStatus( pqResult_ptr ) == PGRES_TUPLES_OK) + //{ + + // Query all samples based on the last + // - keep track of sample ID numbers to discard already managed samples + // for each host + // for each sensor + + } + else + { + elog ("no data retrieved for command '%s'\n", pqCommand_str.c_str()); + } + + PQclear (pqResult_ptr); + + /* TODO: Group these 2 commands into an _fini proc */ + //PQfinish(dbConn.pg.conn); + //dbConn.pg.conn = NULL ; + } + } + } + ilog ("RC:%d\n", rc ); +} + +/* STUBS */ + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + daemon_dump_membuf_banner (); + daemon_dump_membuf(); +} + + +int daemon_run_testhead ( void ) +{ + return PASS ; +} + + +const char * dummy = "empty" ; +const char * daemon_stream_info ( void ) +{ + return (dummy) ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/pingUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/pingUtil.cpp new file mode 100644 index 00000000..d8afdc1f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/pingUtil.cpp @@ -0,0 +1,832 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Maintenance Ping Utility Implementation + */ + +#include "daemon_common.h" /* for ... MEMSET_ZERO */ +#include "nodeBase.h" +#include "nodeUtil.h" +#include "hostUtil.h" /* for ... hostUtil_is_valid_ip_addr */ +#include "pingUtil.h" /* for ... this module header */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "acc" + +typedef struct +{ + struct icmphdr hdr; + char msg[PING_MESSAGE_LEN]; +} ping4_tx_message_type ; + +typedef struct +{ + struct iphdr ip_hdr ; + struct icmphdr hdr ; + char msg[PING_MESSAGE_LEN]; +} ping4_rx_message_type ; + +typedef struct +{ +// struct ip6_hdr iphdr; + struct icmp6_hdr icmphdr; + char msg[PING_MESSAGE_LEN] ; // MSG_HEADER_SIZE]; +} ping6_tx_message_type ; + + +typedef struct +{ + // struct ip6_hdr ip_hdr; + struct icmp6_hdr hdr; + char msg[PING_MESSAGE_LEN]; +} ping6_rx_message_type ; + +/******************************************************************************* + * + * Name : pingUtil_init + * + * Purpose : Setup a ping socket + * + * Assumptions: caller initializes and installs timer handler outside of init + * before the monitor is called. + * + * Returns : PASS : non-blocking ping socket towards specified ip address setup ok + * FAIL__xxx : init failed + * + ******************************************************************************/ +int pingUtil_init ( string hostname, ping_info_type & ping_info, const char * ip_address ) +{ + int rc = PASS ; + if ( hostUtil_is_valid_ip_addr ( ip_address ) == false ) + { + wlog ("%s refusing to setup ping socket for invalid IP address\n", hostname.c_str()); + return (FAIL_NULL_POINTER); + } + + string identity_string = program_invocation_short_name ; + identity_string.append ("_"); + identity_string.append(ip_address) ; + ping_info.identity = + checksum ((void*)identity_string.data(), identity_string.length()); + + dlog1 ("%s ping identity string: %s (0x%04x)\n", + hostname.c_str(), + identity_string.c_str(), + ping_info.identity); + + /* init the ping_info struct */ + ping_info.hostname = hostname ; + ping_info.ip = ip_address ; + ping_info.sequence = getpid() ; + ping_info.recv_retries = 0 ; + ping_info.send_retries = 0 ; + ping_info.requested= false ; + ping_info.received = false ; + ping_info.recv_flush_highwater=2; + /* added for ping monitor */ + ping_info.ok = false ; + ping_info.monitoring = false ; + ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ; + + ping_info.sock = new msgClassTx(ip_address, 0, IPPROTO_RAW, NULL ); + + /* Validate the socket setup */ + if ( ping_info.sock == NULL ) + { + rc = FAIL_SOCKET_CREATE ; + elog ("%s failed to create ping socket ; null socket\n", + ping_info.hostname.c_str()); + } + else + { + rc = ping_info.sock->return_status; + if ( rc != PASS ) + { + elog ("%s failed to create ping socket ; error status:%d\n", + ping_info.hostname.c_str(), rc ); + rc = FAIL_SOCKET_CREATE ; + delete (ping_info.sock ); + return rc; + } + else + { + if ( ( rc = ping_info.sock->setSocketNonBlocking () ) == PASS ) + { + MEMSET_ZERO(ping_info.message); + switch ( ping_info.sock->get_dst_addr()->getIPVersion() ) + { + case AF_INET: + { + ping_info.ipv6_mode = false ; + snprintf (&ping_info.message[0], PING_MESSAGE_LEN, + "%s ipv4 ping message from %s daemon", + ping_info.hostname.data(), + program_invocation_short_name); + break ; + } + case AF_INET6: + { + ping_info.ipv6_mode = true ; + snprintf (&ping_info.message[0], PING_MESSAGE_LEN, + "%s ipv6 ping message from %s daemon", + ping_info.hostname.data(), + program_invocation_short_name); + break ; + } + default: + { + elog ("Unsupported IP protocol version\n"); + return (FAIL); + } + } + dlog3 ("%s (fd:%d)\n", + ping_info.message, + ping_info.sock->getFD()); + } + else + { + elog ("%s failed to set ping socket to non-blocking:%d\n", + ping_info.hostname.c_str(), rc ); + } + } + } + + return rc ; +} + +int pingUtil_recv_flush ( ping_info_type & ping_info, bool loud ); + +/******************************************************************************* + * + * Name : pingUtil_send + * + * Purpose : Send an ICMP ECHO ping request to the specified socket + * + * Returns : PASS : send was ok + * FAIL : send failed + * + ******************************************************************************/ +int pingUtil_send ( ping_info_type & ping_info ) +{ + ping4_tx_message_type ping4_tx; + ping6_tx_message_type ping6_tx; + int bytes = 0 ; + + + pingUtil_recv_flush ( ping_info, false ); + + if (( ping_info.sock == NULL ) || ( ping_info.sock->return_status != PASS )) + { + wlog ("%s refusing to send ping on %s socket\n", + ping_info.hostname.c_str(), + ping_info.sock ? "faulty" : "null" ); + + return (FAIL_NULL_POINTER); + } + + ping_info.sequence++ ; + ping_info.recv_retries = 0; + + if ( ping_info.ipv6_mode == false ) + { + MEMSET_ZERO (ping4_tx); + + ping4_tx.hdr.type = ICMP_ECHO; + + ping4_tx.hdr.un.echo.id = htons(ping_info.identity) ; + ping4_tx.hdr.un.echo.sequence = htons(ping_info.sequence) ; + + snprintf ( &ping4_tx.msg[0], PING_MESSAGE_LEN, ping_info.message ); + + /* checksum should not be converted to htons + * - will get (wrong icmp cksum ) */ + ping4_tx.hdr.checksum = checksum(&ping4_tx, sizeof(ping4_tx)); + + dlog3 ("%s ping4 checksum: %04x\n", + ping_info.hostname.c_str(), + ping4_tx.hdr.checksum ); + + bytes = ping_info.sock->write((const char*)&ping4_tx, sizeof(ping4_tx)); + } + else + { + MEMSET_ZERO (ping6_tx); + + ping6_tx.icmphdr.icmp6_type = ICMP6_ECHO_REQUEST; + ping6_tx.icmphdr.icmp6_code = 0; + + ping6_tx.icmphdr.icmp6_id = htons(ping_info.identity) ; + ping6_tx.icmphdr.icmp6_seq = htons(ping_info.sequence) ; + + snprintf ( &ping6_tx.msg[0], PING_MESSAGE_LEN, ping_info.message ); + + ping6_tx.icmphdr.icmp6_cksum = htons(checksum(&ping6_tx, sizeof(ping6_tx))); + + dlog3 ("%s ping6 checksum: %04x\n", + ping_info.hostname.c_str(), + ping6_tx.icmphdr.icmp6_cksum ); + + bytes = ping_info.sock->write( (const char*)&ping6_tx, sizeof(ping6_tx_message_type)); + } + + ping_info.recv_retries = 0; + + if ( bytes <= 0 ) + { + wlog ("%s ping %s send failed (rc:%d) (%d:%m)\n", ping_info.hostname.c_str(), ping_info.ip.c_str(), bytes, errno ); + return FAIL ; + } + if ( ping_info.monitoring == false ) + { + ilog ("%s ping send %s ok ; identity:%04x sequence:%04x (try %d)\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), + ping_info.identity, + ping_info.sequence, + ping_info.send_retries); + } + else + { + mlog ("%s ping send %s ok ; identity:%04x sequence:%04x (try %d)\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), + ping_info.identity, + ping_info.sequence, + ping_info.send_retries); + } + + ping_info.received = false ; + ping_info.requested = true ; + + return PASS ; +} + +/******************************************************************************* + * + * Name : pingUtil_recv_flush + * + * Purpose : Empty the ping receiver in preparation for a fresh ping request. + * + * Returns : PASS : empty + * RETRY: not empty + * + ******************************************************************************/ + +int pingUtil_recv_flush ( ping_info_type & ping_info, bool loud ) +{ + int empty_count = 0 ; + int flush_count = 0 ; + bool exit_pass = false ; + + if ( ping_info.sock == NULL ) + return (FAIL_NULL_POINTER); + + for ( int i = 0 , bytes = 0 ; i < PING_MAX_FLUSH_RETRIES ; i++ ) + { + if ( ping_info.ipv6_mode == true ) + { + ping6_rx_message_type ping6_rx ; + MEMSET_ZERO(ping6_rx); + bytes = ping_info.sock->readReply( (char *)&ping6_rx, sizeof(ping6_rx_message_type)); + if ( bytes > 0 ) + { + unsigned short id = htons(ping6_rx.hdr.icmp6_id) ; + unsigned short seq = htons(ping6_rx.hdr.icmp6_seq) ; + flush_count++ ; + empty_count = 0 ; + + if ( id == ping_info.identity ) + { + wlog ("%s flushed out-of-sequence ping response for my identity:%04x ; sequence:%04x (%d)\n", + ping_info.hostname.c_str(), ping_info.identity, seq, flush_count ); + } + else if ( loud == true ) + { + wlog ("%s flushed %d byte message identity:%04x sequence:%04x\n", + ping_info.hostname.c_str(), bytes, id , seq ); + } + } + } + else + { + ping4_rx_message_type ping4_rx ; + MEMSET_ZERO(ping4_rx); + bytes = ping_info.sock->readReply( (char *)&ping4_rx, sizeof(ping4_rx_message_type)); + if (( bytes > 0 ) && ( ping4_rx.hdr.un.echo.id != 0 )) + { + flush_count++ ; + empty_count = 0 ; + unsigned short id = htons(ping4_rx.hdr.un.echo.id); + unsigned short seq = htons(ping4_rx.hdr.un.echo.sequence); + if ( id == ping_info.identity ) + { + wlog ("%s flushed out-of-sequence ping response for my identity:%04x ; sequence:%04x (%d)\n", + ping_info.hostname.c_str(), + ping_info.identity, + seq, + flush_count ); + } + else if ( loud == true ) + { + wlog ("%s flushed %d byte message identity:%04x sequence:%04x\n", + ping_info.hostname.c_str(), bytes, id, seq ); + } + } + } + + if ( bytes <= 0 ) + { + if ( empty_count++ == 3 ) + { + exit_pass = true ; + break ; + } + } + } + + if ( flush_count > ping_info.recv_flush_highwater ) + { + ping_info.recv_flush_highwater = flush_count ; + dlog ("%s ping flush peak at %d\n", + ping_info.hostname.c_str(), + ping_info.recv_flush_highwater ); + } + else if ( flush_count ) + { + dlog2 ("%s ping flushed %d messages\n", + ping_info.hostname.c_str(), flush_count ); + } + + if ( exit_pass == true ) + return (PASS); + + return (RETRY); +} + +/******************************************************************************* + * + * Name : pingUtil_recv + * + * Purpose : Receive an ICMP ping response and compare the suggested sequence + * and identifier numbers. + * + * Returns : PASS : got the response with the correct id and seq codes + * RETRY: got response but with one or mode bad codes + * FAIL : got no ping reply + * + ******************************************************************************/ + +/* handle a reasonable ping flood without failing local pings */ +#define MAX_PING_FLUSH (512) + +int pingUtil_recv ( ping_info_type & ping_info, + bool loud ) /* print log if no data received */ +{ + int rc = FAIL ; + int bytes = 0 ; + + if (( ping_info.requested == true ) && ( ping_info.received == true )) + { + ping_info.requested = false ; + return (PASS); + } + + if ( ping_info.sock == NULL ) + return (FAIL_NULL_POINTER); + + for ( int i = 0 ; i < MAX_PING_FLUSH ; i++ ) + { + if ( ping_info.ipv6_mode == true ) + { + ping6_rx_message_type ping6_rx ; + MEMSET_ZERO(ping6_rx); + bytes = ping_info.sock->readReply( (char *)&ping6_rx, sizeof(ping6_rx_message_type)); + if ( bytes > 0 ) + { + unsigned short id = htons(ping6_rx.hdr.icmp6_id); + unsigned short seq = htons(ping6_rx.hdr.icmp6_seq); + + if ( loud == true ) + { + ilog ("%s %s search ; bytes:%d ; identity:%04x (got %04x) sequence:%04x (got %04x)\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), bytes, + ping_info.identity, ping6_rx.hdr.icmp6_id, + ping_info.sequence, ping6_rx.hdr.icmp6_seq ); + } + + if (( ping6_rx.hdr.icmp6_type == ICMP6_ECHO_REPLY ) && + ( id == ping_info.identity ) && + ( seq == ping_info.sequence )) + { + /* Don't print this log once we have established ping and + * are in monitoring mode. */ + if ( ping_info.monitoring == false ) + { + /* ... only want the log when we ar first connecting */ + ilog ("%s ping recv %s ok ; identity:%04x sequence:%04x (try %d) (%d)\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), + ping_info.identity, + ping_info.sequence, + ping_info.recv_retries+1, + i); + } + else + { + /* ... only want the log when we ar first connecting */ + mlog ("%s ping recv %s ok ; identity:%04x sequence:%04x (try %d) (%d)\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), + ping_info.identity, + ping_info.sequence, + ping_info.recv_retries+1, + i); + } + + ping_info.requested = false ; + ping_info.received = true ; + rc = PASS ; + break ; + } + + else if ( ping6_rx.hdr.icmp6_id == ping_info.identity ) + { + ilog ("%s received-out-of-sequence ping response for this identity:%04x ; sequence:%04x\n", + ping_info.hostname.c_str(), id, seq); + rc = RETRY ; + } + else + { + ; /* identity is 0 or does not match this host */ + } + } + else + { + /* no data */ + rc = RETRY ; + break ; + } + } + else + { + ping4_rx_message_type ping4_rx ; + MEMSET_ZERO(ping4_rx); + bytes = ping_info.sock->readReply ( (char*)&ping4_rx, sizeof(ping4_rx)) ; + if ( bytes > 0 ) + { + unsigned short id = htons(ping4_rx.hdr.un.echo.id); + unsigned short seq = htons(ping4_rx.hdr.un.echo.sequence); + + // dump_memory ( &ping4_rx, 16, sizeof(ping4_rx_message_type)); + if ( loud == true ) + { + ilog ("%s %s search ; bytes:%d ; identity:%04x (got %04x) sequence:%04x (got %04x)\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), bytes, + ping_info.identity, id, + ping_info.sequence, seq ); + } + + if (( ping4_rx.hdr.type == ICMP_ECHOREPLY ) && + ( id == ping_info.identity ) && + ( seq == ping_info.sequence )) + { + /* Don't print this log once we have established ping and + * are in monitoring mode. */ + if ( ping_info.monitoring == false ) + { + /* ... only want the log when we ar first connecting */ + ilog ("%s ping recv %s ok ; identity:%04x sequence:%04x (try %d) (%d)\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), + ping_info.identity, + ping_info.sequence, + ping_info.recv_retries+1, + i); + } + else + { + /* ... only want the log when we ar first connecting */ + mlog ("%s ping recv %s ok ; identity:%04x sequence:%04x (try %d) (%d)\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), + ping_info.identity, + ping_info.sequence, + ping_info.recv_retries+1, + i); + } + + ping_info.requested = false ; + ping_info.received = true ; + + rc = PASS ; + break ; + } + + else if ( id == ping_info.identity ) + { + ilog ("%s received-out-of-sequence ping response for this identity:%04x ; sequence:%04x\n", + ping_info.hostname.c_str(), id, seq ); + rc = RETRY ; + } + else + { + ; /* identity is 0 or does not match this host */ + } + } + else + { + /* no data */ + rc = RETRY ; + break ; + } + } + } + return rc ; +} + +/******************************************************************************* + * + * Name : pingUtil_fini + * + * Purpose : Close an ping socket + * + *******************************************************************************/ +void pingUtil_fini ( ping_info_type & ping_info ) +{ + if ( ping_info.sock ) + { + dlog1 ("%s ping socket close ok (fd:%d)\n", + ping_info.hostname.c_str(), + ping_info.sock->getFD()); + + delete ( ping_info.sock ); + ping_info.sock = NULL ; + } + + ping_info.recv_retries = 0; + ping_info.send_retries = 0; + ping_info.sequence = 0; + ping_info.identity = 0; + + /* Support for ping monitor */ + mtcTimer_reset ( ping_info.timer ); + ping_info.stage = PINGUTIL_MONITOR_STAGE__IDLE ; +} + +/******************************************************************************** + * + * Name : pingUtil_acc_monitor + * + * Purpose : FSM used to monitor ping access to specific ip address + * + *******************************************************************************/ + +int pingUtil_acc_monitor ( ping_info_type & ping_info ) +{ + switch ( ping_info.stage ) + { + /* do nothing stage */ + case PINGUTIL_MONITOR_STAGE__IDLE: + { + break ; + } + case PINGUTIL_MONITOR_STAGE__WAIT: + { + if ( mtcTimer_expired ( ping_info.timer ) ) + { + ping_info.stage = PINGUTIL_MONITOR_STAGE__SEND ; + } + /* Don't let the buffer fill up with pings ; + * keep the socket empty till we want to ping */ + pingUtil_recv_flush ( ping_info , false ); + + break ; + } + case PINGUTIL_MONITOR_STAGE__OPEN: + { + if ( pingUtil_init ( ping_info.hostname, + ping_info, + ping_info.ip.data()) != PASS ) + { + ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + } + else + { + ping_info.stage = PINGUTIL_MONITOR_STAGE__SEND ; + } + break ; + } + case PINGUTIL_MONITOR_STAGE__SEND: + { + if ( ping_info.sock == NULL ) + { + if (( ping_info.ip.empty()) || !ping_info.ip.compare(NONE)) + { + elog ("%s no address to ping\n", ping_info.hostname.c_str()); + ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + break ; + } + + int rc = pingUtil_init ( ping_info.hostname, + ping_info, + ping_info.ip.data()); + if ( rc ) + { + elog ("%s failed to setup bmc ping socket to '%s'\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str()); + + ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + break ; + } + } + + + if ( ++ping_info.send_retries > PING_MAX_SEND_RETRIES ) + { + elog ("%s ping to %s failed\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str()); + + ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + } + else if ( pingUtil_send ( ping_info ) ) + { + elog ("%s failed to send bmc ping\n", ping_info.hostname.c_str()); + ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + } + else + { + if ( ping_info.timer_handler == NULL ) + { + elog ("%s no timer handler installed\n", ping_info.hostname.c_str()); + ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + } + else + { + if ( ping_info.timer.tid ) + { + ilog ("%s unexpected active timer\n", ping_info.hostname.c_str()); + mtcTimer_reset ( ping_info.timer ); + } + mtcTimer_start_msec ( ping_info.timer, ping_info.timer_handler, PING_WAIT_TIMER_MSEC ); + ping_info.stage = PINGUTIL_MONITOR_STAGE__RECV ; + } + } + break ; + } + case PINGUTIL_MONITOR_STAGE__RECV: + { + if ( mtcTimer_expired ( ping_info.timer )) + { + bool loud = false ; + if ( daemon_get_cfg_ptr()->debug_bmgmt ) + loud = true ; + + if ( pingUtil_recv ( ping_info , loud ) ) + { + if ( ++ping_info.recv_retries > (PING_MAX_RECV_RETRIES) ) + { + /* only print this log once on the the resend attempt */ + if ( ping_info.send_retries >= PING_MAX_SEND_RETRIES ) + { + mlog ("%s ping recv from %s missed ; identity:%04x sequence:%04x (try %d of %d)\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), + ping_info.identity, + ping_info.sequence, + ping_info.recv_retries-1, + PING_MAX_RECV_RETRIES); + } + ping_info.stage = PINGUTIL_MONITOR_STAGE__SEND ; + break ; + } + else + { + blog1 ("%s retrying ping\n", ping_info.hostname.c_str()); + } + mtcTimer_start_msec ( ping_info.timer, ping_info.timer_handler, PING_RETRY_DELAY_MSECS ); + } + else + { + int interval = PING_MONITOR_INTERVAL ; + ping_info.ok = true ; + ping_info.monitoring = true ; + + dlog ("%s ping %s ok (send:%d:recv:%d) (%d)\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), + ping_info.send_retries, + ping_info.recv_retries+1, + ping_info.ok ); + + ping_info.send_retries = 0 ; + ping_info.recv_retries = 0 ; + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__FAST_PING_AUDIT_HOST, ping_info.hostname ) == true ) + interval = 3 ; + if ( daemon_want_fit ( FIT_CODE__FAST_PING_AUDIT_ALL ) == true ) + interval = 3 ; +#endif + mtcTimer_start ( ping_info.timer, ping_info.timer_handler, interval ); + ping_info.stage = PINGUTIL_MONITOR_STAGE__WAIT ; + } + } + break ; + } + case PINGUTIL_MONITOR_STAGE__CLOSE: + { + pingUtil_fini (ping_info); + break ; + } + case PINGUTIL_MONITOR_STAGE__FAIL: + { + ping_info.ok = false ; + ping_info.send_retries = 0 ; + ping_info.monitoring = false ; + pingUtil_fini (ping_info); + pingUtil_init (ping_info.hostname, ping_info, ping_info.ip.data()); + + mtcTimer_reset ( ping_info.timer ); + mtcTimer_start ( ping_info.timer, ping_info.timer_handler, PING_MONITOR_INTERVAL ); + ping_info.stage = PINGUTIL_MONITOR_STAGE__WAIT; + break ; + } + default: + { + slog ("%s default case (%d)\n", ping_info.hostname.c_str(), ping_info.stage ); + + /* Default to check the connection. + * Failure case is handled there */ + mtcTimer_reset ( ping_info.timer ); + + ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + } + } + return(PASS); +} + + + + +#ifdef WANT_MAIN +/*--------------------------------------------------------------------*/ +/*--- main - look up host and start ping processes. ---*/ +/*--------------------------------------------------------------------*/ +int main(int argc, char *argv[]) +{ + int rc ; + int ping_socket = 0; + int sequence = 1 ; + struct sockaddr_in addr_ping ; + + if ( argc > 1 ) + { + int identity = getpid() ; + printf ( "\npinging %s\n", argv[1]); + if ( ( rc = pingUtil_init ( argv[1], ping_socket , addr_ping )) == 0 ) + { + pingUtil_recv ( ping_socket, identity, sequence, false ); + if ( ( rc = pingUtil_send ( ping_socket, &addr_ping, identity, sequence )) == 0 ) + { + for ( int loop=0;loop < 10; loop++) + { + usleep(300000); + if ( ( rc = pingUtil_recv ( ping_socket, identity, sequence, true ) ) == 0 ) + { + printf("Ping OK.\n"); + return 0; + } + else + { + printf ("receive failed (%d)\n", rc ); + } + } + printf("Ping FAILED !!\n"); + } + else + { + printf ("ping send Failed (%d)\n", rc ); + } + } + else + { + printf ("ping init failed (%d)\n", rc ); + } + } + pingUtil_close ( ping_socket ); + return 0; +} + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/common/pingUtil.h b/mtce-common/cgts-mtce-common-1.0/common/pingUtil.h new file mode 100644 index 00000000..d9937c5b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/pingUtil.h @@ -0,0 +1,149 @@ +#ifndef __INCLUDE_PINGUTIL_H__ +#define __INCLUDE_PINGUTIL_H__ + +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Maintenance Ping Utility Header + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // struct ip6_hdr +#include // struct icmp6_hdr and ICMP6_ECHO_REQUEST +#include // IP_MAXPACKET (which is 65535) + +using namespace std; + +#include "nodeBase.h" +#include "msgClass.h" +#include "nodeUtil.h" +#include "nodeTimers.h" + +// Define some constants +#define IP6_HDRLEN 40 // IPv6 header length +#define ICMP_HDRLEN 8 // ICMP header length for echo request, excludes data + + +#define PING_MAX_RETRIES (5) +#define PING_MAX_FLUSH_RETRIES (100) +#define PING_MAX_RECV_RETRIES (20) +#define PING_MAX_SEND_RETRIES (5) +#define PING_WAIT_TIMER_MSEC (200) +#define PING_RETRY_DELAY_MSECS (200) +#define PING_MONITOR_INTERVAL (60) +#define PING_MISS_RETRY_DELAY (5) + +#define PING_MESSAGE_LEN (80) + +typedef enum +{ + PINGUTIL_MONITOR_STAGE__IDLE = 0, + PINGUTIL_MONITOR_STAGE__OPEN, + PINGUTIL_MONITOR_STAGE__SEND, + PINGUTIL_MONITOR_STAGE__RECV, + PINGUTIL_MONITOR_STAGE__WAIT, + PINGUTIL_MONITOR_STAGE__CLOSE, + PINGUTIL_MONITOR_STAGE__FAIL, + PINGUTIL_MONITOR_STAGES, +} pingUtil_stage_type ; + +typedef struct +{ + string hostname ; + string ip ; + msgClassSock * sock ; + + unsigned short identity ; + unsigned short sequence ; + int send_retries ; + int recv_retries ; + bool ipv6_mode ; + bool received ; + bool requested ; + int recv_flush_highwater; + /* for monitor FSM */ + bool ok ; + bool monitoring ; + pingUtil_stage_type stage ; + struct mtc_timer timer ; + void (*timer_handler) ( int, siginfo_t*, void* ); + char message [PING_MESSAGE_LEN]; +} ping_info_type ; + +/******************************************************************************* + * + * Name : pingUtil_init + * + * Purpose : Setup a ping socket + * + * Returns : PASS : non-blocking ping socket towards specified ip address setup ok + * FAIL : send failed + * + ******************************************************************************/ +int pingUtil_init ( string hostname, ping_info_type & ping_info, const char * ip_address ); + +/******************************************************************************* + * + * Name : pingUtil_send + * + * Purpose : Send an ICMP ECHO ping request to the specified socket + * + * Returns : PASS : send was ok + * FAIL : send failed + * + ******************************************************************************/ + +int pingUtil_send ( ping_info_type & ping_info ); + +/******************************************************************************* + * + * Name : pingUtil_recv + * + * Purpose : Receive an ICMP ping response and compare the suggested sequence + * and identifier numbers. + * + * Returns : PASS : got the response with the correct id and seq codes + * RETRY: got response but with one or mode bad codes + * FAIL : got no ping reply + * + ********************************************************************************/ + +int pingUtil_recv ( ping_info_type & ping_info, /* sequence in the ping request */ + bool loud ); /* print log no data received */ + +/******************************************************************************** + * + * Name : pingUtil_fini + * + * Purpose : Close an ping socket + * + *******************************************************************************/ +void pingUtil_fini ( ping_info_type & ping_info ); /* the preopened ping socket */ + +/******************************************************************************** + * + * Name : pingUtil_acc_monitor + * + * Purpose : FSM used to monitor ping access to specific ip address + * + *******************************************************************************/ + +int pingUtil_acc_monitor ( ping_info_type & ping_info ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/common/regexUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/regexUtil.cpp new file mode 100644 index 00000000..62094bd4 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/regexUtil.cpp @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River - Titanium Cloud - Regex Utilty Module + */ + +#include +#include +#include + +using namespace std; + +#include "nodeBase.h" +#include "regexUtil.h" + +/********************************************************************** + * + * Name : regexUtil_label_match + * + * Purpose : Verify that the rule matches the pattern + * + * Description: Loop over the rule extracting the labels that are + * delimited by ':'s. Then for each label, walk the pattern + * verifying that the label exists while maintaining a + * forward position search. If all the rule labels are + * found (in order) in the pattern then return a true. + * Otherwise, return false. + * + **********************************************************************/ + +bool regexUtil_label_match ( string hostname, string pattern, string rule ) +{ + bool match = false ; + + if ( rule.empty() ) + { + wlog ("%s empty rule string\n", hostname.c_str() ); + } + else + { + int i ; + size_t pos = 0 ; + size_t new_pos = 0 ; + size_t run_pos = 0 ; + int colins = 0 ; + int len = (int)rule.length() ; + char * ptr = (char*)rule.data(); + for ( int i = 0 ; i < len ; i++ ) + { + if ( *ptr++ == ':' ) colins++ ; + } + dlog ("%s there are %d colins in the rule (%s)\n", hostname.c_str() , colins, rule.c_str()); + match = true ; + i = 0 ; + do + { + string label ; + if ( colins ) + { + /* get the label from the current position up to the next colin */ + new_pos = rule.find( ':' , pos ); + if ( new_pos != std::string::npos ) + { + label = rule.substr(pos, new_pos-pos) ; + dlog2 ("%s %d-%s (%ld-%ld)\n", hostname.c_str() , i, label.c_str(), pos, new_pos); + + /* move beyond the ':' */ + pos = ++new_pos ; + } + } + else + { + label = rule.substr(pos) ; + dlog ("%s %d:%s\n", hostname.c_str(), i, label.c_str()); + } + + if ( label.empty() ) + { + wlog ("%s label not found\n", hostname.c_str()); + match = false ; + } + else + { + run_pos = pattern.find( label, run_pos ) ; + if ( run_pos != std::string::npos ) + { + dlog1 ("%s '%s' found in pattern (pos:%ld)\n", hostname.c_str(), label.c_str(),run_pos); + run_pos++ ; + } + else + { + dlog3 ("%s '%s' NOT found in pattern \n", + hostname.c_str(), + label.c_str()); + match = false ; + } + } + i++ ; + } while ( ( colins--) && ( match == true ) ) ; + } + return (match); +} + +bool regexUtil_pattern_match ( std::string pattern , std::string rule, int type ) +{ + bool result = false ; + if ( type == 1 ) + { + //std::cmatch cm; // same as std::match_results cm; + //std::regex e (rule.data()) ; + //result = std::regex_match (pattern,cm, e , std::regex_constants::match_not_bol ); + if ( result ) + { + std::cout << "Flagged String match\n"; + } + } + else + { + result = std::regex_match (pattern, std::regex(rule)) ; + if ( result ) + { + std::cout << "String literal matched\n"; + + + std::smatch sm; // same as std::match_results sm; + std::regex_match (pattern,sm,std::regex(rule)); + + std::cout << "String object with " << sm.size() << " matches\n"; + + std::cout << "The matches are: "; + for (unsigned i=0; i cm; + std::regex_match (cstr,cm,e); + std::cout << "string literal with " << cm.size() << " matches\n"; + + std::smatch sm; // same as std::match_results sm; + std::regex_match (s,sm,e); + std::cout << "string object with " << sm.size() << " matches\n"; + + std::regex_match ( s.cbegin(), s.cend(), sm, e); + std::cout << "range with " << sm.size() << " matches\n"; + + // using explicit flags: + std::regex_match ( cstr, cm, e, std::regex_constants::match_default ); + + std::cout << "the matches were: "; + for (unsigned i=0; i MTCE_PTHREAD_MAX_STACK_SIZE ) + { + if ( pthread_attr_setstacksize ( &__attr, MTCE_PTHREAD_MAX_STACK_SIZE ) == PASS ) + { + if ( pthread_attr_getstacksize (&__attr,&stack_size_after) == PASS ) + { + ilog ("thread Stack: %zu KB (was %zu)\n", + stack_size_after/1024, + stack_size_before/1024 ); + } + else + { + elog ("failed to set pthread stack size (%d:%m)\n", errno ); + } + } + } + else + { + ilog ("pthread stack size is %zu bytes\n", stack_size_before ); + } + } + else + { + elog ("failed to get pthread stack size (%d:%m)\n", errno ); + } +} + + +/***************************************************************************** + * + * Name : threadUtil_fini + * + * Description: Module cleanup + * + ****************************************************************************/ + +void threadUtil_fini ( void ) +{ + ; // ilog ("called\n"); +} + +/***************************************************************************** + * + * Name : _stage_change + * + * Description: Change thread FSM stage. + * + * See thread_stages_enum in threadUtil.h for a list of stage enums + * + ****************************************************************************/ + +void _stage_change ( thread_ctrl_type & ctrl, thread_stages_enum newStage ) +{ + if ( newStage < THREAD_STAGE__STAGES ) + { + clog ("%s %s thread stage from %s -> %s\n", + ctrl.hostname.c_str(), + ctrl.name.c_str(), + threadStages_str[ctrl.stage].c_str(), + threadStages_str[newStage].c_str()); + ctrl.stage = newStage ; + } + else + { + slog ("%s %s thread stage change to '%d' is invalid ; switching to KILL\n", + ctrl.hostname.c_str(), + ctrl.name.c_str(), + newStage ); + ctrl.stage = THREAD_STAGE__KILL ; + } + ctrl.stage_log_throttle = 0 ; +} + +/***************************************************************************** + * + * Name : thread_stage + * + * Description: Returns a string representing the current thread stage. + * + *****************************************************************************/ + +string thread_stage ( thread_ctrl_type & ctrl ) +{ + if ( ctrl.stage < THREAD_STAGE__STAGES ) + return(threadStages_str[ctrl.stage]); + return("out-of-range thread stage"); +} + +/***************************************************************************** + * + * Name : thread_init + * + * Description: Default a thread 'ctrl' and 'info' structs. + * + * Assumptions: Called at init time once. + * + * Warning : Thread should be running when this is called. + * Warning : Should not be called more than once or else might create + * an orphan timer. + * + * Parameters: + * + * - reference to the ctrl and info structs for a specified thread + * - pointer to thread specific extra data + * - the thread function pointer itself + * - thread execution timeout in seconds + * - reference to the host and thread names. + * + * Returns : nothing + * + *****************************************************************************/ + +void thread_init ( thread_ctrl_type & thread_ctrl, + thread_info_type & thread_info, + void* extra_data_ptr, + void* (*thread) (void*), + int timeout, + string & hostname, + string threadname ) +{ + /* default the ctrl struct */ + thread_ctrl.stage = THREAD_STAGE__IDLE ; + thread_ctrl.done = true ; + thread_ctrl.idle = true ; + thread_ctrl.id = 0 ; + thread_ctrl.thread = thread ; + thread_ctrl.hostname = hostname ; + thread_ctrl.name = threadname ; + + thread_ctrl.timeout = timeout ; + mtcTimer_init ( thread_ctrl.timer, hostname, threadname ); + + thread_ctrl.status = PASS ; + thread_ctrl.runcount = 0 ; + thread_ctrl.retries = 0 ; + + thread_ctrl.stage_log_throttle = 0 ; + + /* Init the thread's info struct - the only non-stack memory the + * thread can look at or touch */ + thread_info.hostname = hostname ; + thread_info.name = threadname ; + thread_info.id = 0 ; + thread_info.command = 0 ; + thread_info.runcount = 0 ; + thread_info.progress = 0 ; + thread_info.signal = 0 ; + thread_info.data.clear() ; + thread_info.extra_info_ptr = extra_data_ptr ; + thread_info.pw_file_fd = 0 ; + thread_info.password_file.clear() ; + + /* command execution status */ + thread_info.status_string.clear(); + thread_info.status = 0 ; + + snprintf ( thread_info.log_prefix, MAX_LOG_PREFIX_LEN, "%s %s thread", + thread_ctrl.hostname.data(), thread_ctrl.name.data()); +} + +/**************************************************************************** + * + * Name : thread_done + * + * Description: Return true if we are in the DONE stage. + * + ****************************************************************************/ + +bool thread_done ( thread_ctrl_type & ctrl ) +{ + if ( ctrl.stage == THREAD_STAGE__DONE ) + { + return (true) ; + } + return (false); +} + +/**************************************************************************** + * + * Name : thread_idle + * + * Description: Return true if we are in the IDLE stage. + * + ****************************************************************************/ + +bool thread_idle ( thread_ctrl_type & ctrl ) +{ + if ( ctrl.stage == THREAD_STAGE__IDLE ) + { + return (true) ; + } + return (false); +} + +/**************************************************************************** + * + * Name : thread_launch + * + * Description: Perform prechecks that verify the ctrl struct is ready for + * thread launch and if so change stage to THREAD_STAGE__LAUNCH. + * + ****************************************************************************/ + +int thread_launch ( thread_ctrl_type & ctrl, thread_info_type & info ) +{ + int rc = FAIL ; + if ( ! thread_timer_handler ) + { + slog ("%s no thread timer handler bound in\n", + ctrl.hostname.c_str()); + rc = FAIL_NULL_POINTER ; + } + + else if ( ctrl.thread == NULL ) + { + slog ("%s %s no thread bound in\n", + ctrl.hostname.c_str(), + ctrl.name.c_str()); + rc = FAIL_NULL_POINTER ; + } + + else if ( ctrl.stage != THREAD_STAGE__IDLE ) + { + wlog ("%s %s not in IDLE stage (in %s stage)\n", + ctrl.hostname.c_str(), + ctrl.name.c_str(), + threadStages_str[ctrl.stage].c_str()); + thread_kill ( ctrl, info ); + rc = FAIL_BAD_STATE ; + } + + else if ( ctrl.id ) + { + slog ("%s %s thread may be running ; id is not null and should be\n", + ctrl.hostname.c_str(), + ctrl.name.c_str()); + thread_kill ( ctrl, info ); + rc = FAIL_THREAD_RUNNING ; + } + + else + { + _stage_change ( ctrl, THREAD_STAGE__LAUNCH ); + rc = PASS ; + } + return (rc); +} + + +/**************************************************************************** + * + * Name : thread_kill + * + * Description: put the FSM in the kill state. + * + ****************************************************************************/ + +void thread_kill ( thread_ctrl_type & ctrl, thread_info_type & info ) +{ + info.signal = SIGKILL ; + + /* only go to kill if not already handling kill */ + if (( ctrl.stage != THREAD_STAGE__KILL ) && + ( ctrl.stage != THREAD_STAGE__WAIT ) && + ( ctrl.stage != THREAD_STAGE__IDLE )) + { + blog ("%s kill request\n", ctrl.hostname.c_str() ); + _stage_change ( ctrl, THREAD_STAGE__KILL ); + } +} + + +/***************************************************************************** + * + * Name : thread_handler + * + * Description: finite state machine to manage a pthread execution life cycle + * + * The parent must periodically run this thread_handler to service and make + * forward progress in the FSM. + * + * Thread FSM life cycle and responsibilities: + * + * Parent calls thread_init once before any launch which sets up the ctrl + * and info structs. Default state THREAD_STAGE__IDLE IDLE + * + * When there is a thread to be launched ... + * + * 1. Parent FSM calls thread_launch to launch the thread + * - Thread FSM performs thread launch pre-checks + * - check for timer handler binding + * - rc = FAIL_NULL_POINTER + * - check that there is a thread bound in + * - rc = FAIL_NULL_POINTER + * - verify we are in the correct stage for launch + * - rc = FAIL_BAD_STATE + * - verify the thread is not already running + * - rc = FAIL_THREAD_RUNNING + * + * - if rc == PASS change state to THREAD_STAGE__LAUNCH + * - if rc != PASS change state to THREAD_STAGE__IDLE + * + * - Parent FSM handles thread_launch return status + * + * if ( thread_launch == PASS ) + * - start a parent timer ; a longer umbrella timer + * else + * - fail operation or retry + * + * 2. Thread FSM launches the thread in THREAD_STAGE__LAUNCH stage + * - preserves parent signal mask + * - clears signal mask so that thread does not inherit signal handling + * - launch the thread + * - restore signal mask + * + * if launch failed + * - change ctrl.status = FAIL_THREAD_CREATE + * - change ctrl.stage = THREAD_STAGE__DONE + * + * if launch passed + * - start the thread timeout timer if timeout is !0 + * - change ctrl.stage = THREAD_STAGE__MONITOR + * + * 3. Thread FSM monitors thread execution in THREAD_STAGE__MONITOR stage + * - waits for done conditions or thread timeout + * - ctrl.timer.ring or incremented info.runcount + * Note: thread increments info.runcount on exit/done + * + * if ( thread timeout ) + * - sets ctrl.status = FAIL_TIMEOUT + * - sets ctrl.stage = THREAD_STAGE__KILL + * + * if ( info.runcount > ctrl.runcount ) + * - stop thread timer + * - change ctrl.stage = THREAD_STAGE__DONE + * + * 4. Parent FSM Monitors for thread done or parent timer timeout + * - has started its own umbrella timeout timer that is a + * few seconds longer than the actual thread timeout. + * - thread_done returns true when ctrl.stage == THREAD_STAGE__DONE + * + * if ( parent timeout ) + * - sets ctrl.status = FAIL_TIMEOUT + * - sets trl.stage = THREAD_STAGE__KILL + * + * if ( thread_done ) + * - interprets ctrl.status + * - interprets info.status + * - consumes info.data which contains thread execution result + * - changes ctrl.done = true once data is consumed. + * - Parent FSM is done with this thread + * + * 5. Thread FSM monitors for Parent FSM done in THREAD_STAGE__DONE + * - Parent FSM changes ctrl.done to true once it has consumed the thread data + * - Thread FSM polls ctrl.done + * + * if ( ctrl.done == true ) + * - changes ctrl.stage = THREAD_STAGE__IDLE + * - Thread FSM is done with this thread + * + * Note: The ctrl and info structs are intentionally kept separate for two + * reasons ... + * + * 1. distinguish between parent process (ctrl) and thread (info) data. + * 2. the parent might want them to occupying completely differnet memory + * spaces in the future. + * + *****************************************************************************/ + +int thread_handler ( thread_ctrl_type & ctrl, thread_info_type & info ) +{ + int rc = PASS ; + + switch ( ctrl.stage ) + { + case THREAD_STAGE__IGNORE: + { + break ; + } + case THREAD_STAGE__IDLE: + { + if ( ctrl.idle == false ) + { + ctrl.idle = true ; + dlog ("%s IDLE\n", info.log_prefix); + if (( ctrl.id ) || ( info.id ) || ( ctrl.done == false )) + { + slog ("%s bad thread state [%lu:%lu:%d]\n", info.log_prefix, ctrl.id, info.id, ctrl.done ); + } + } + + /******************** Garbage Collection *****************/ + + + /* remove previous password file if it somehow did not get removed before */ + if ( info.pw_file_fd ) + { + wlog ("%s closing pw fd (%d) ; garbage collected\n", + info.hostname.c_str(), + info.pw_file_fd ); + + close(info.pw_file_fd); + info.pw_file_fd = 0 ; + } + + if ( ! info.password_file.empty() ) + { + if ( daemon_is_file_present ( info.password_file.data() )) + { + wlog ("%s removing pw file (%s) ; garbage collected\n", + info.hostname.c_str(), + info.password_file.c_str()); + + unlink(info.password_file.data()); + daemon_remove_file (info.password_file.data()); + info.password_file.clear(); + } + } + + break ; + } + case THREAD_STAGE__WAIT: + { + if ( mtcTimer_expired ( ctrl.timer ) ) + { + ctrl.timer.ring = false ; + ctrl.done = true ; + ctrl.id = 0 ; + info.id = 0 ; + info.command = 0 ; + _stage_change ( ctrl, THREAD_STAGE__IDLE ); + } + else if ( ctrl.done == true ) + { + /* force wait completed */ + mtcTimer_reset ( ctrl.timer ); + info.command = 0 ; + _stage_change ( ctrl, THREAD_STAGE__IDLE ); + } + + break ; + } + case THREAD_STAGE__DONE: + { + if ( ctrl.done == true ) + { + if (( info.signal_handling == 0 ) && ( info.status == PASS )) + { + wlog ("%s %s thread not servicing pthread_signal_handler\n", + ctrl.hostname.c_str(), + ctrl.name.c_str()); + } + dlog ("%s %s thread data was consumed by parent ; switching to IDLE\n", + ctrl.hostname.c_str(), + ctrl.name.c_str()); + ctrl.id = 0 ; + info.id = 0 ; + + dlog ("%s %s done\n", ctrl.hostname.c_str(), ctrl.name.c_str()); + + _stage_change ( ctrl, THREAD_STAGE__IDLE ); + } + else if ( info.signal == SIGKILL ) + { + wlog ("%s %s thread completed ; waiting on DONE but got SIGKILL ; forcing DONE\n", ctrl.hostname.c_str(), ctrl.name.c_str() ); + ctrl.done = true ; + } + break ; + } + case THREAD_STAGE__LAUNCH: + { + /* + * pre-check should never this this come in as non-null but just + * to be sure a thread is actually created properly we set it to null + */ + if ( ctrl.id ) + { + slog ("%s %s thread id should be 0\n", + ctrl.hostname.c_str(), + ctrl.name.c_str()); + + ctrl.id = 0 ; + } + /* + * Prepare thread complete criteria. + * + * When info.runcount > ctrl.runcount then the thread is done. + */ + ctrl.runcount = info.runcount ; + + /* thread updates this stuff */ + info.status_string.clear() ; + info.status = -1 ; + info.progress = 0 ; + info.signal = 0 ; + info.id = 0 ; + info.data.clear() ; + info.signal_handling = 0 ; + + ctrl.idle = false ; /* not idle - for idle log throttle */ + ctrl.done = false ; /* declare the thread as running */ + + daemon_signal_hdlr (); + + /* Block signals */ + sigfillset(&__disabled_mask); + + // sigemptyset(&__enabled_mask); /* maybe not needed */ + pthread_sigmask(SIG_SETMASK, &__disabled_mask, NULL ); + pthread_sigmask(SIG_BLOCK, &__disabled_mask, &__enabled_mask); + + rc = pthread_create(&ctrl.id, &__attr, ctrl.thread, (void*)&info); + + if ( sigismember (&__enabled_mask, SIGINT ) == 0 ) + { + slog ("%s SIGINT signal was not enabled ; enabling\n", ctrl.hostname.c_str()); + sigaddset(&__enabled_mask, SIGINT); + } + if ( sigismember (&__enabled_mask, SIGTERM ) == 0 ) + { + slog ("%s SIGTERM signal was not enabled ; enabling\n", ctrl.hostname.c_str()); + sigaddset(&__enabled_mask, SIGTERM); + } + if ( sigismember (&__enabled_mask, SIGUSR1 ) == 0 ) + { + slog ("%s SIGUSR1 signal was not enabled ; enabling\n", ctrl.hostname.c_str()); + sigaddset(&__enabled_mask, SIGUSR1); + } + + /* restore signal mask */ + pthread_sigmask(SIG_SETMASK, &__enabled_mask, NULL ); + pthread_sigmask(SIG_UNBLOCK, &__enabled_mask, NULL ); + + /* The above disables signal handling for a short period while a + * thread is started. In the meantime the only signal that is + * crutial not to miss is USR1. + * Work Around: run the USR1 signal handler immediately following + * the launch just in case it was requested during the launch + * while the signals were masked. */ + daemon_health_test (); + + if (rc != PASS) + { + elog ("%s %s thread launch failed (%d:%d:%m]", + ctrl.hostname.c_str(), + ctrl.name.c_str(), + rc, errno ); + + ctrl.status = info.status = FAIL_THREAD_CREATE ; + _stage_change ( ctrl, THREAD_STAGE__DONE ); + } + else if ( ctrl.id == 0 ) + { + elog ("%s %s thread id is null\n", + ctrl.hostname.c_str(), + ctrl.name.c_str()); + + ctrl.status = info.status = FAIL_THREAD_CREATE ; + _stage_change ( ctrl, THREAD_STAGE__DONE ); + } + else + { + dlog ("%s %s thread launched with command:%d\n", + ctrl.hostname.c_str(), + ctrl.name.c_str(), + info.command ); + ctrl.status = PASS ; + + if ( ctrl.timeout ) + { + mtcTimer_start ( ctrl.timer, thread_timer_handler, ctrl.timeout ); + } + + /* start monitoring */ + _stage_change ( ctrl, THREAD_STAGE__MONITOR ); + } + break ; + } + case THREAD_STAGE__MONITOR: + { + /* provide subtle indication that the thread ids don't match */ + if (( ctrl.id != info.id ) && ( info.id != 0 )) + { + ilog_throttled (ctrl.stage_log_throttle, 50, "%s %s thread [%ld:%ld] monitoring (progress:%d)\n", + ctrl.hostname.c_str(), + ctrl.name.c_str(), + ctrl.id, + info.id, + info.progress); + } +#ifdef WANT_THROTTLED_PROGRESS_LOG + else + { + ilog_throttled (ctrl.stage_log_throttle, 50, "%s %s thread monitoring (progress:%d)\n", + ctrl.hostname.c_str(), + ctrl.name.c_str(), + info.progress); + } +#endif + if (( ctrl.timeout ) && ( mtcTimer_expired ( ctrl.timer ) )) + { + elog ("%s %s thread timeout\n", + ctrl.hostname.c_str(), + ctrl.name.c_str()); + + ctrl.status = FAIL_TIMEOUT ; + _stage_change ( ctrl, THREAD_STAGE__KILL ); + } + else if ( info.runcount > ctrl.runcount ) + { + mtcTimer_reset ( ctrl.timer ); + + if ( info.runcount != (ctrl.runcount+1)) + { + wlog ("%s %s thread runcount jumped from %d to %d (rc:%d)\n", + ctrl.hostname.c_str(), + ctrl.name.c_str(), + ctrl.runcount, + info.runcount, + info.status); + } + else + { + if ( info.status ) + { + wlog ("%s %s thread completed (rc:%d)\n", + ctrl.hostname.c_str(), + ctrl.name.c_str(), + info.status); + } + } + ctrl.id = 0 ; + info.id = 0 ; + _stage_change ( ctrl, THREAD_STAGE__DONE ); + } + break ; + } + case THREAD_STAGE__KILL: + { + info.signal = SIGKILL ; + if ( info.id != 0 ) + { + wlog ("%s %s thread kill req (rc:%d)\n", + ctrl.hostname.c_str(), + ctrl.name.c_str(), + info.status); + } + if ( ctrl.id != 0 ) + { + /* Tell the thread ; by way of cancellation points ; to exit + * + * WARNING: Cannot send a cancel to the thread because if the + * thread is already gone then, although the Linux man page says + * it will just return an error, in fact after testing with 0 and + * invalid numbers, causes the calling process to segfault. + * Too dangerous !! Need cooperative exit */ + // pthread_cancel(ctrl.id); + ctrl.id = 0 ; + } + + mtcTimer_reset ( ctrl.timer ); + mtcTimer_start ( ctrl.timer, thread_timer_handler, THREAD_POST_KILL_WAIT ); + _stage_change ( ctrl, THREAD_STAGE__WAIT ); + + break ; + } + case THREAD_STAGE__STAGES: + default: + { + slog ("%s %s has invalid stage ; changing to IDLE\n", + ctrl.hostname.c_str(), + ctrl.name.c_str() ); + + _stage_change ( ctrl , THREAD_STAGE__IDLE ); + rc = FAIL ; + break ; + } + } + return (rc); +} + +/* called by the thread */ +void pthread_signal_handler ( thread_info_type * info_ptr ) +{ + switch ( info_ptr->signal ) + { + case SIGKILL: + ilog ("%s SIGKILL ; exiting ...\n", info_ptr->log_prefix ); + + /* avoid touching data after the sigkill is received */ + // info_ptr->data = "thread SIGKILL" ; + // info_ptr->status = FAIL_THREAD_EXIT ; + // info_ptr->runcount++ ; + pthread_exit(&info_ptr->status ); + exit (FAIL_THREAD_EXIT); + break ; + default: + info_ptr->signal_handling++ ; + } + + /* check for a cancel request - handled internally */ + /* Note: No pint using pthread_testcancel since we don't + * use pthread_cancel because if the risk of crashing the + * parent process + * pthread_testcancel (); + */ +} + + +pthread_t thread_launch_thread (void*(thread)(void*), void * arg) +{ + pthread_t id ; + int rc = FAIL ; + if ( __thread_init_sig == THREAD_INIT_SIG ) + { + rc = pthread_create(&id, &__attr, thread, (void*)arg); + } + else + { + slog ("cannot launch thread ; threading not initialized yet\n"); + } + if ( rc ) + return 0 ; + return id ; +} + +void * thread_test ( void * arg ) +{ + UNUSED(arg); + for ( ; ; ) + sleep (1); + + return NULL ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/threadUtil.h b/mtce-common/cgts-mtce-common-1.0/common/threadUtil.h new file mode 100644 index 00000000..00e6d2a6 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/threadUtil.h @@ -0,0 +1,288 @@ +#ifndef __INCLUDE_THREADBASE_H__ +#define __INCLUDE_THREADBASE_H__ + +/* + * Copyright (c) 2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/****************************************************************************** + * + * This is the header file for the common Threads Utility Module of Maintenance. + * + * This module offers the following API public to other maintenance modules + * for the purpose of running pthreads. + * + * Limitations: Does not support thread signal handling and only supports + * + * - a single thread per host + * - detached pthreads ; pthread_join would stall the parent process. + * + * There are 2 main structures used for managing pthreads. + * + * thread_ctrl_type - owned and updated and only visible to the parent service. + * thread_info_type - initially init'ed by the parent process, updated by the + * thread and once the thread is done the parent process + * consumes the results of the thread execution. + * + * See these structures definition below for more details. + * + * Thead Utility API Summary + * + * threadUtil_init - module init ; called in daemon_init + * threadUtil_fini - module finish ; called in dameon_exit + * + * thread_handler - thread FSM ; called periodically in parent process main loop + * + * thread_init - setup a thread for launch ; default stage is IDLE + * thread_idle - returns true if that thread is in the IDLE state + * thread_launch - requests thread launch if in IDLE state + * thread_done - puts a thread into the IDLE state ; called after parent + * consumes thread results. Required before next thread launch + * thread_kill - sends cooperative cancel and SIGKILL to thread via info.signal + * + * Any maintenance service that wants thread execution support must ... + * + * Run thread_init ( ctrl, info, ... )' before running thread_handler. + * Run thread_handler ( ctrl, info ) periodically in the service's FSM loop. + * + * With the above done and in place ; when a service wants to run a thread + * it makes the following calls to launch a thread when idle and monitor + * completion or timeout of a thread. + * + * if ( thread_idle ( ctrl ) + * if ( thread_launch ( ctrl ) == PASS ) + * start thread timeout timer + * + * service_FSM ( ; ; ) + * { + * if ( timeout ) + * thread_kill ( ctrl ); + * else if ( ctrl.runcount > info.runcount ) + * parent consumes info data and status + * thread_done ( ctrl ); + * + * thread_handler + * } + * + * The thread_handler FSM lauches the thread, starts its own thread timeout + * timer, monitors for thread timeout or completion and handles thread_done + * and thread_kill requests. + * + * thread_done is used to handle the transition between when the thread + * indicates it is done and when the parent service is finished consuming + * the thread results. + * + * A thread is expected to update ... + * + * - info.id with the thread_self thread identifier. + * - info.data with the thread's consumable results. + * - info.status with its exection status ; a maintenance return code returnCodes.h + * - info.status_string with a string representative of the info.status condition. + * - info.progress should be incremented in the thread's main loop to + * represent forward progress of the thread. + * - info.runcount should be incremented by 1 as the last operation before + * pthread_exit to indicate that the thread is done. + * + * A thread execution is cooperative completion and is therefore expected to + * periodically call pthread_signal_handler in the main loop in support of + * 1. info.signal for SIGKILL exit request + * 2. pthread_testcancel to force monitor a cancellation point + * + * The State Machine: The 'thread_handler' FSM manages a threads life cycle + * through the following stages. + * + * IDLE - default 'do nothing' starting and ending stage. + * LAUNCH - set by thread_launch call + * - launches the thread and starts a timeout timer + * - changes to stage MONITOR after successful launch + * - changes to stage DONE if launch fails (failure status is updated) + * MONITOR - set by LAUNCH stage + * - monitors for completion or timeout + * - timeout changes stage to KILL + * - completion changes stage to DONE + * KILL - set by thread_kill call or MONITOR for timout case + * WAIT - set by KILL to add kill wait time before going IDLE + * + ******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#define gettid() syscall(SYS_gettid) + +using namespace std; + +#include "daemon_common.h" +#include "nodeBase.h" +#include "nodeTimers.h" + +/** Info logger macro*/ +#define ilog_t(format, args...) { syslog(LOG_INFO, "[%ld.%05d] %s %s %-3s %-18s(%4d) %-24s: Info : " format, gettid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } + +#define elog_t(format, args...) { syslog(LOG_INFO, "[%ld.%05d] %s %s %-3s %-18s(%4d) %-24s:Error : " format, gettid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } + +#define wlog_t(format, args...) { syslog(LOG_INFO, "[%ld.%05d] %s %s %-3s %-18s(%4d) %-24s: Warn : " format, gettid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } + +#define dlog_t(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_level&1) \ + { syslog(LOG_INFO, "[%ld.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug : " format, gettid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }} +#define dlog1_t(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_level&2) \ + { syslog(LOG_INFO, "[%ld.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug2: " format, gettid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }} +#define dlog2_t(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_level&4) \ + { syslog(LOG_INFO, "[%ld.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug4: " format, gettid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }} +#define dlog3_t(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_level&8) \ + { syslog(LOG_INFO, "[%ld.%05d] %s %s %-3s %-18s(%4d) %-24s:Debug8: " format, gettid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }} + + +#define blog_t(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_bmgmt&1) \ + { syslog(LOG_INFO, "[%ld.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt : " format, gettid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }} +#define blog1_t(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_bmgmt&2) \ + { syslog(LOG_INFO, "[%ld.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt2: " format, gettid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }} +#define blog2_t(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_bmgmt&4) \ + { syslog(LOG_INFO, "[%ld.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt4: " format, gettid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }} +#define blog3_t(format, args...) { \ + if(daemon_get_cfg_ptr()->debug_bmgmt&8) \ + { syslog(LOG_INFO, "[%ld.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt8: " format, gettid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }} + + + +#define THREAD_INIT_SIG (0xbabef00d) +#define MAX_PTHREADS (1) /* max number concurrent pthreads */ +#define DEFAULT_THREAD_TIMEOUT_SECS (60) /* default pthread exec timout */ +#define MAX_LOG_PREFIX_LEN (MAX_CHARS_HOSTNAME*4) +#define THREAD_POST_KILL_WAIT (10) /* wait time between KILL and IDLE */ + +typedef enum +{ + THREAD_STAGE__IDLE = 0, /* do nothing stage */ + THREAD_STAGE__IGNORE, /* unmonitored thread */ + THREAD_STAGE__LAUNCH, /* run the thread */ + THREAD_STAGE__MONITOR, /* look for done status and timeout */ + THREAD_STAGE__DONE, /* wait for parent to consume done results */ + THREAD_STAGE__KILL, /* send cancel and kill requests to thread */ + THREAD_STAGE__WAIT, /* wait time before changing to idle stage */ + THREAD_STAGE__STAGES, /* number of stages */ +} thread_stages_enum ; + +/**************************************************************************** + * + * Name : thread_ctrl_type + * + * Description: Structure updated by and only visible to the parent. + * Used to control execution of the thread. + * + ****************************************************************************/ +typedef struct +{ + thread_stages_enum stage ; /* current FSM stage */ + + /* Thread details */ + pthread_t id ; /* the thread id */ + void* (*thread) (void*) ; /* pointer to the thread */ + string hostname ; /* hostname this thread is tied to */ + string name ; /* short name of the thread */ + + /* Timout controls */ + struct mtc_timer timer ; /* the timer to use for the thread */ + int timeout ; /* timout in msecs , 0 for no timeout */ + + /* FSM Level Completion Control and Status */ + int status ; /* FSM status ; overrides info status */ + bool done ; /* flag indicating thread data was consumed */ + int runcount ; /* copy of info.runcount before launch ; */ + int retries ; /* max thread retries */ + + /* Miscellaneous */ + int stage_log_throttle ; /* limit number of logs in this stage */ + bool idle ; /* flags entry into idle stage & log throttle */ +} thread_ctrl_type ; + + +/**************************************************************************** + * + * Name : thread_info_type + * + * Description: Structure initialized by the parent, updated by the thread + * during execution and then data and status is consumed by + * the parent when the thread is done. + * + ****************************************************************************/ + +typedef struct +{ + /* -------------------------------------------------------------------- */ + /* Thread Read Only Data */ + /* -------------------------------------------------------------------- */ + + string hostname ; /* hostname this thread is tied to */ + string name ; /* short name of the thread */ + int command ; /* the command the thread should execute */ + int signal ; /* parent request signal ; SIGKILL exit request */ + void * extra_info_ptr ; /* pointer to thread specific command data */ + char log_prefix[MAX_LOG_PREFIX_LEN]; /* preformatted log prefix */ + + /* -------------------------------------------------------------------- */ + /* Thread Write Data - Parent Read Only Result/Progress/Monitoring Data */ + /* -------------------------------------------------------------------- */ + + pthread_t id ; /* the thread id of self */ + int status ; /* thread execution status set before runcount++ */ + string status_string ; /* status string representing unique error case */ + int runcount ; /* thread increments just before exit - complete */ + int progress ; /* incremented by thread ; show forward progress */ + string data ; /* data that resulted from the thread execution */ + int signal_handling;/* incremented by thread calling signal handler */ + int pw_file_fd ; /* file descriptor for the password file */ + string password_file ; /* the name of the password file */ + +} thread_info_type ; + +/****************************************************************************/ +/* Thread Module API */ +/* **************************************************************************/ + +/* module init/fini */ +void threadUtil_fini ( void ); +int threadUtil_init ( void (*handler)(int, siginfo_t*, void* )); + +void threadUtil_setstack_size ( void ); + +/* Onetime thread init setup */ +void thread_init ( thread_ctrl_type & ctrl, + thread_info_type & info, + void*, /* extra_info_ptr */ + void* (*thread) (void*), + int timeout, + string & hostname, + string threadname ); + +/* The thread FSM */ +int thread_handler( thread_ctrl_type & ctrl, thread_info_type & info ); + +/* Thread execution management APIs */ +int thread_launch ( thread_ctrl_type & ctrl , thread_info_type & info ); +bool thread_done ( thread_ctrl_type & ctrl ); +bool thread_idle ( thread_ctrl_type & ctrl ); +void thread_kill ( thread_ctrl_type & ctrl , thread_info_type & info ); +string thread_stage ( thread_ctrl_type & ctrl ); + +/* Cooperative service of cancel and exit requests from parent */ +void pthread_signal_handler ( thread_info_type * info_ptr ); + +pthread_t thread_launch_thread (void*(thread)(void*), void*); + +void * thread_test ( void * arg ); + +#endif // __INCLUDE_THREADBASE_H__ diff --git a/mtce-common/cgts-mtce-common-1.0/common/timeUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/timeUtil.cpp new file mode 100644 index 00000000..be366827 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/timeUtil.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Maintenance Time Utility Header + */ + +#include "timeUtil.h" + +typedef struct +{ + bool init ; + + time_debug_type last_time ; + time_debug_type this_time ; + time_delta_type diff_time ; + + unsigned long under_50_msec ; + unsigned long under_500_msec; + unsigned long under_1_sec ; + unsigned long under_2_sec ; + unsigned long under_3_sec ; + unsigned long under_5_sec ; + unsigned long over_5_sec ; + + unsigned long counter ; +} timeUtil_type ; + +static timeUtil_type time_struct ; + +void timeUtil_sched_init ( void ) +{ + gettime ( time_struct.last_time ); + time_struct.under_50_msec = 0 ; + time_struct.under_500_msec = 0 ; + time_struct.under_1_sec = 0 ; + time_struct.under_2_sec = 0 ; + time_struct.under_3_sec = 0 ; + time_struct.under_5_sec = 0 ; + time_struct.over_5_sec = 0 ; + time_struct.counter = 0 ; + time_struct.init = true ; +} + +void scheduling_histogram ( void ) +{ + ilog ("Under: 50ms: %ld - 500ms:%ld - 1s:%ld - 2s:%ld - 3s:%ld - 5s:%ld ---- over:%ld\n", + time_struct.under_50_msec, + time_struct.under_500_msec, + time_struct.under_1_sec, + time_struct.under_2_sec, + time_struct.under_3_sec, + time_struct.under_5_sec, + time_struct.over_5_sec); +} + +void timeUtil_sched_sample ( void ) +{ + if ( time_struct.init == false ) + { + elog ("Time struct not initialized\n"); + return ; + } + gettime ( time_struct.this_time ); + timedelta ( time_struct.last_time, time_struct.this_time, time_struct.diff_time ); + + if ( time_struct.diff_time.secs == 0 ) + { + if ( time_struct.diff_time.msecs < 50000 ) time_struct.under_50_msec++ ; + else if ( time_struct.diff_time.msecs < 500000 ) time_struct.under_500_msec++ ; + else time_struct.under_1_sec++ ; + } + else + { + if ( time_struct.diff_time.secs < 2 ) + { + time_struct.under_2_sec++ ; + } + else if ( time_struct.diff_time.secs < 3 ) + { + wlog (">>> Minor Scheduling delay: %ld.%3ld secs\n", + time_struct.diff_time.secs, + time_struct.diff_time.msecs ); + time_struct.under_3_sec++ ; + } + else if ( time_struct.diff_time.secs < 5 ) + { + wlog (">>> Major Scheduling delay: %ld.%3ld secs\n", + time_struct.diff_time.secs, + time_struct.diff_time.msecs ); + time_struct.under_5_sec++ ; + scheduling_histogram ( ); + } + else + { + wlog (">>> Critical Scheduling delay: %ld.%3ld secs\n", + time_struct.diff_time.secs, + time_struct.diff_time.msecs ); + time_struct.over_5_sec++ ; + scheduling_histogram ( ); + } + } + time_struct.last_time.ts.tv_sec = time_struct.this_time.ts.tv_sec ; + time_struct.last_time.ts.tv_nsec = time_struct.this_time.ts.tv_nsec ; + + if ( ++time_struct.counter >= 1000 ) + { + scheduling_histogram ( ); + time_struct.counter = 0 ; + } +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/timeUtil.h b/mtce-common/cgts-mtce-common-1.0/common/timeUtil.h new file mode 100644 index 00000000..9ca9bf95 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/timeUtil.h @@ -0,0 +1,30 @@ +#ifndef __INCLUDE_TIMEUTIL_H__ +#define __INCLUDE_TIMEUTIL_H__ + +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Maintenance Time Utility Header + */ + +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "daemon_common.h" /* */ + +void timeUtil_sched_init ( void ); +void timeUtil_sched_sample ( void ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.cpp new file mode 100644 index 00000000..0613839e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.cpp @@ -0,0 +1,582 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud + * Common Keystone Token Authentication Utility API + * + * tokenUtil_handler - handle response + * tokenUtil_get_token - refresh the static token + * tokenUtil_get_ptr - get a pointer to the static token + * + */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "tok" + +#include /* for ... time_t, struct tm */ +#include "daemon_ini.h" /* for ... MATCH macro */ + +#include "nodeUtil.h" /* for ... node utilituies */ +#include "nodeBase.h" /* for ... node_inv_type */ +#include "jsonUtil.h" /* for ... Json utilities */ +#include "tokenUtil.h" /* for ... this module header */ + +#define GET_SERVICE_LIST_LABEL ((const char *)"OS-KSADM:services") + +/* The static token used for authentication by any + * daemon that includes this module */ +static keyToken_type __token__ ; +keyToken_type * tokenUtil_get_ptr ( void ) { return &__token__ ; }; +keyToken_type tokenUtil_get_token ( void ) { return __token__ ; }; + +void tokenUtil_log_refresh ( void ) +{ + if ( __token__.refreshed == true ) + { + __token__.refreshed = false ; + ilog ( "Token Refresh: [%s] [Expiry: %s %s]\n", + md5sum_string ( __token__.token).c_str(), + __token__.expiry.substr(0,10).c_str(), + __token__.expiry.substr(11,8).c_str()); + } +} + +/* Handle refreshing the authentication token */ +int tokenUtil_token_refresh ( libEvent & event, string hostname ) +{ + struct tm tokenExpiry; // given token expired time (UTC) + time_t cTime = time(NULL); // current time (UTC) + double diffTime = 0; + + if ( event.status != PASS ) + { + event.status = tokenUtil_new_token( event, hostname ); + } + else + { + strptime( __token__.expiry.c_str(), "%Y-%m-%dT%H:%M:%S", &tokenExpiry ); + + /* Get a new authentication token if the given token is about to expire */ + diffTime = difftime( mktime( &tokenExpiry ), cTime ); + if ( diffTime <= STALE_TOKEN_DURATION ) + { + ilog ("The given token will expire in %f seconds\n", diffTime); + event.status = tokenUtil_new_token( event, hostname ); + } + } + return (event.status); +} + +string _get_ip ( void ) +{ + string ip = "" ; + daemon_config_type * cfg_ptr = daemon_get_cfg_ptr() ; + if (( cfg_ptr->keystone_auth_host ) && ( strlen (cfg_ptr->keystone_auth_host) >= MIN_CHARS_IN_IP_ADDR )) + { + ip = cfg_ptr->keystone_auth_host; + } + else + { + string my_hostname = "" ; + string my_local_ip = "" ; + string my_float_ip = "" ; + get_ip_addresses ( my_hostname, my_local_ip, my_float_ip ) ; + if ( !my_float_ip.empty() ) + { + ip = my_float_ip ; + ilog ("defaulting keystone auth host to floating IP:%s\n", ip.c_str()); + } + else + ip = "localhost" ; + + } + return (ip); +} + +string _get_keystone_prefix_path ( ) +{ + string prefix_path = ""; + daemon_config_type * cfg_ptr = daemon_get_cfg_ptr() ; + + if ( cfg_ptr->keystone_prefix_path) + { + prefix_path = cfg_ptr->keystone_prefix_path; + } + + return (prefix_path); +} + +/* http://localhost:5000/v2.0/tokens -X POST -H "Content-Type: application/json" + * -H "Accept: application/json" + * -H "User-Agent: python-keyclient" + * -H "Connection: close" + * + * { + * "auth": + * { + * "tenantName": "admin", + * "passwordCredentials": + * { + * "username": "admin", + * "password": "password" + * } + * } + * } + * + */ + +/******************************************************************* + * + * Name : tokenUtil_handler + * + * Description: The handles the keystone get request + * responses for the following messages + * + * KEYSTONE_GET_TOKEN, + * KEYSTONE_GET_SERVICE_LIST + * KEYSTONE_GET_ENDPOINT_LIST + * + *******************************************************************/ +int tokenUtil_handler ( libEvent & event ) +{ + jsonUtil_auth_type info ; + + string hn = event.hostname ; + int rc = event.status ; + + keyToken_type * token_ptr = tokenUtil_get_ptr ( ) ; + + if ( event.status ) + { + elog ( "%s Token Request Failed - Error Code (%d) \n", hn.c_str(), event.status ); + } + if ( event.request == KEYSTONE_GET_TOKEN ) + { + /* get the token from response header*/ + struct evkeyvalq *header_ptr = evhttp_request_get_input_headers(event.req); + const char * header_token_ptr = evhttp_find_header (header_ptr, MTC_JSON_AUTH_ID); + if ( !header_token_ptr ) + { + rc = FAIL_JSON_PARSE ; + elog ( "%s Token Request Failed - no token in header\n", hn.c_str()); + } + std::string token_str(header_token_ptr); + + if ( jsonApi_auth_load ( hn, (char*)event.response.data(), info ) ) + { + rc = FAIL_JSON_PARSE ; + elog ( "%s Token Request Failed - Json Parse Error\n", hn.c_str()); + } + else + { + jlog ("%s Token Exp: %s\n", hn.c_str(), info.expiry.c_str() ); + jlog ("%s Admin URL: %s\n" ,hn.c_str(), info.adminURL.c_str() ); + jlog ("%s Token Len: %ld\n",hn.c_str(), token_str.length() ); + token_ptr->issued = info.issued ; + token_ptr->expiry = info.expiry ; + token_ptr->token = token_str ; + token_ptr->url = info.adminURL ; + token_ptr->refreshed = true ; + } + } + else if ( event.request == KEYSTONE_GET_ENDPOINT_LIST ) + { + /* Response: {"endpoints": + * [{ + * "service_id": "067...b60", + * "region": "RegionOne", + * "enabled": true, + * "id": "410ab64a37114a418d188f450300aa48", + * "interface": "internal", + * ""links": { + * "self": "http://192.168.204.2:5000/v3/endpoints/410ab64a37114a418d188f450300aa48" + * } + * "url": "http://192.168.204.2:8777", + * { ... }]} + * + * Output: + * + * event.admin_url = "http://192.168.204.2:8777" ; + * event.internal_url = "http://192.168.204.2:8777" ; + * event.public_url = "http://10.10.10.2:8777" ; + * + */ + list endpoint_list ; endpoint_list.clear() ; + rc = jsonUtil_get_list ( (char*)event.response.data(), (char*)event.label.data(), endpoint_list ); + if ( rc == PASS ) + { + std::list::iterator iter_ptr ; + string interface_type; + int rc1 = FAIL, rc2 = FAIL, rc3 = FAIL; + for ( iter_ptr = endpoint_list.begin(); + iter_ptr != endpoint_list.end(); + iter_ptr++ ) + { + if ( jsonUtil_get_key_val ( (char*)iter_ptr->data(), "service_id", event.value ) == PASS ) + { + if ( !event.value.compare(event.information) ) + { + rc = jsonUtil_get_key_val ( (char*)iter_ptr->data(), MTC_JSON_AUTH_INTERFACE, interface_type); + if ( rc) + { + wlog ("%s '%s' failed to get interface type from endpoint list (%d)\n", + event.hostname.c_str(), + event.information.c_str(), rc); + } + else if ( interface_type == "admin" ) + { + rc1 = jsonUtil_get_key_val ( (char*)iter_ptr->data(), MTC_JSON_AUTH_URL, event.admin_url ); + } + else if ( interface_type == "internal" ) + { + rc2 = jsonUtil_get_key_val ( (char*)iter_ptr->data(), MTC_JSON_AUTH_URL, event.internal_url ); + } + else if ( interface_type == "public" ) + { + rc3 = jsonUtil_get_key_val ( (char*)iter_ptr->data(), MTC_JSON_AUTH_URL, event.public_url ); + } + } + else + { + wlog ("%s '%s' service endpoint not found\n", event.hostname.c_str(), event.information.c_str()); + } + } + else + { + elog ("%s Parse service endpoint list failed (rc:%d)\n", event.hostname.c_str(), rc); + elog ("%s Response: %s\n", event.hostname.c_str(), event.response.c_str() ); + event.status = rc ; + } + } + + if ( rc1 | rc2 | rc3 ) + { + wlog ("%s '%s' one or mode endpoint parse failure (%d:%d:%d)\n", + event.hostname.c_str(), + event.information.c_str(), rc1, rc2, rc3 ); + event.status = FAIL_KEY_VALUE_PARSE ; + } + else + { + ilog ("%s keystone '%s' service endpoint admin url: %s\n", + event.hostname.c_str(), + event.information.c_str(), + event.admin_url.c_str()); + ilog ("%s keystone '%s' service endpoint public url: %s\n", + event.hostname.c_str(), + event.information.c_str(), + event.public_url.c_str()); + ilog ("%s keystone '%s' service endpoint internal url: %s\n", + event.hostname.c_str(), + event.information.c_str(), + event.internal_url.c_str()); + event.status = PASS ; + } + } + else + { + wlog ("%s '%s' service not found using '%s' label\n", + event.hostname.c_str(), + event.information.c_str(), + event.label.c_str()); + } + return (event.status); + } + else if ( event.request == KEYSTONE_GET_SERVICE_LIST ) + { + /* Response: {"services": + [ + {"id": "49fc93c32d734c78a9d9f975c22f1703", "type": "network", "name": "neutron", "description": "Neutron Networking Service"}, + {"id": "0900a982ff114e7ba62c317443b43362", "type": "metering", "name": "ceilometer", "description": "Openstack Metering Service"}, + {"id": "97940d057bec47cc989cc190b4293aad", "type": "ec2", "name": "nova_ec2", "description": "EC2 Service"}, + {"id": "7ce51d481d024b1f8b80bb1127b80752", "type": "volumev2", "name": "cinderv2", "description": "Cinder Service v2"}, + {"id": "3ed8ae6ccf85445ebdf2e93bbce9f5fb", "type": "computev3", "name": "novav3", "description": "Openstack Compute Service v3"}, + {"id": "564bf663693c49cf9fee24e2fdbdba3a", "type": "identity", "name": "keystone", "description": "OpenStack Identity Service"}, + {"id": "7e0cadd9db444342b7fddb0005c4ce5f", "type": "platform", "name": "sysinv", "description": "SysInv Service"}, + {"id": "be7afccda91c4ba19ac2e53f613c6b63", "type": "volume", "name": "cinder", "description": "Cinder Service"}, + {"id": "edf60a37f4f84b9baba215d8346b814f", "type": "image", "name": "glance", "description": "Openstack Image Service"}, + {"id": "0673921c7b094178989455a5b157fb60", "type": "patching", "name": "patching", "description": "Patching Service"}, + {"id": "d7621026166f43c0a1c74e0e9784cce6", "type": "compute", "name": "nova", "description": "Openstack Compute Service"}, + {"id": "aef585311e3144e0b1267ea25dc40b70", "type": "orchestration", "name": "heat", "description": "Openstack Orchestration Service"}, + {"id": "0a67bc174fa0469e9b837daf23d83aaf", "type": "cloudformation", "name": "heat-cfn", "description": "Openstack Cloudformation Service"} + ]} */ + + bool found = false ; + list service_list ; service_list.clear() ; + rc = jsonUtil_get_list ( (char*)event.response.data(), (char*)event.label.data(), service_list ); + if ( rc == PASS ) + { + std::list::iterator iter_ptr ; + + for ( iter_ptr = service_list.begin() ; + iter_ptr != service_list.end() ; + iter_ptr++ ) + { + if ( jsonUtil_get_key_val ( (char*)iter_ptr->data(), "name", event.value ) == PASS ) + { + if ( !event.value.compare(event.information) ) + { + if ( jsonUtil_get_key_val ( (char*)iter_ptr->data(), "id", event.result ) == PASS ) + { + found = true ; + ilog ("%s '%s' service uuid is '%s'\n", + event.hostname.c_str(), + event.information.c_str(), + event.result.c_str()); + break ; + } + else + { + wlog ("%s '%s' service uuid not found\n", + event.hostname.c_str(), + event.information.c_str()); + event.status = FAIL_KEY_VALUE_PARSE ; + } + } + } + else + { + wlog ("%s '%s' service not found\n", + event.hostname.c_str(), + event.information.c_str()); + } + } + } + else + { + elog ("%s Parse service list failed (rc:%d)\n", event.hostname.c_str(), rc); + wlog ("%s Response: %s\n", event.hostname.c_str(), event.response.c_str() ); + event.status = rc ; + } + + if ( found == true ) + { + event.status = PASS ; + } + else + { + wlog ("%s '%s' service not found using '%s' label\n", + event.hostname.c_str(), + event.information.c_str(), + event.label.c_str()); + event.status = FAIL_NOT_FOUND ; + } + return (event.status); + } + else + { + wlog ("%s Keystone Request Failed - Unsupported Request (%d)\n", hn.c_str(), event.request ); + } + + /* Check for a response string */ + if ( token_ptr->token.empty() ) + { + elog ("%s Failed to get token\n", hn.c_str()); + rc = FAIL_TOKEN_GET; + } + + /* Check for Key URL */ + else if ( token_ptr->url.empty() ) + { + elog ("%s Failed to get token URL\n", hn.c_str()); + rc = FAIL_TOKEN_URL; + } + else + { + dlog ("%s Token Refresh O.K.\n", event.hostname.c_str()); + } + return (rc); +} + +void tokenUtil_fail_token ( void ) +{ + __token__.token.replace ( 8, 8, "EEEEEEEE" ); + slog ("Corrupting Token: %s\n",__token__.token.c_str()); +} + +/* fetches an authorization token as a blocking request */ +int tokenUtil_new_token ( libEvent & event, string hostname ) +{ + ilog ("%s Requesting Authentication Token\n", hostname.c_str()); + + httpUtil_event_init ( &event, + hostname, + "tokenUtil_new_token", + _get_ip(), + daemon_get_cfg_ptr()->keystone_port); + + event.hostname = _hn (); + + dlog ("%s fetching new token\n", event.hostname.c_str()); + + event.prefix_path = _get_keystone_prefix_path(); + event.blocking = true ; + event.request = KEYSTONE_GET_TOKEN ; + event.operation = "get new" ; + event.type = EVHTTP_REQ_POST ; + event.timeout = HTTP_TOKEN_TIMEOUT ; + event.handler = &tokenUtil_handler ; + + return ( httpUtil_api_request ( event )); +} + +/* returns the uuid for the specified keystone service */ +string tokenUtil_get_svc_uuid ( libEvent & event, string service_name ) +{ + httpUtil_event_init ( &event, + service_name, + "tokenUtil_get_svc_uuid", + _get_ip(), + 5000 ) ; // get_keystone_admin_port() ; + + event.hostname = _hn() ; + + /* The type of HTTP request */ + event.type = EVHTTP_REQ_GET ; + + /* set the timeout */ + event.timeout = HTTP_KEYSTONE_GET_TIMEOUT ; + + event.prefix_path = _get_keystone_prefix_path() ; + event.blocking = true ; + event.request = KEYSTONE_GET_SERVICE_LIST ; + event.operation = "get service list" ; + event.handler = &tokenUtil_handler ; + event.information = service_name ; + event.label = "services" ; + event.token.url = "/v3/services" ; + event.address = event.token.url; + + if ( httpUtil_api_request ( event ) != PASS ) + { + elog ("%s service name fetch failed\n", service_name.c_str() ); + } + return ( event.result ); +} + +/* returns the endpoint string for the specified service uuid */ +int tokenUtil_get_endpoints ( libEvent & event, string service_uuid ) +{ + httpUtil_event_init ( &event, + service_uuid, + "tokenUtil_get_endpoints", + _get_ip(), + 5000 ); // get_keystone_admin_port(); + + event.hostname = _hn() ; + + /* The type of HTTP request */ + event.type = EVHTTP_REQ_GET ; + + /* set the timeout */ + event.timeout = HTTP_KEYSTONE_GET_TIMEOUT ; + + event.prefix_path = _get_keystone_prefix_path() ; + event.blocking = true ; + event.request = KEYSTONE_GET_ENDPOINT_LIST ; + event.operation = "get endpoint list" ; + event.handler = &tokenUtil_handler ; + event.information = service_uuid ; + event.label = "endpoints" ; + event.token.url = "/v3/endpoints" ; + /* get the endpoints by service uuid*/ + event.token.url.append("?service_id="); + event.token.url.append(service_uuid.data()); + event.address = event.token.url; + + if ( httpUtil_api_request ( event ) != PASS ) + { + elog ("%s service uuid fetch failed\n", service_uuid.c_str() ); + } + return ( event.status ); +} + +int keystone_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("agent", "keystone_auth_host")) + { + /* Read this into a config_ptr parameter */ + config_ptr->keystone_auth_host = strdup(value); + ilog("Keystone IP : %s\n", config_ptr->keystone_auth_host ); + } + else if (MATCH("agent", "keystone_auth_port")) + { + config_ptr->keystone_port = atoi(value); + dlog("Keystone Port : %d\n", config_ptr->keystone_port ); + } + else if (MATCH("agent", "keystone_auth_uri")) + { + /* Read this into a config_ptr parameter. + * Note, if keystone_auth_uri is defined, it will take + * precedence over keystone_auth_host, and auth_port. + */ + if ( value != NULL && (strlen(value) > 0)) + { + if ( strcmp(value,"undef") != 0) + { + config_ptr->keystone_auth_uri = strdup(value); + ilog("Mtce Keystone auth uri : %s\n", + config_ptr->keystone_auth_uri); + } + } + } + else if (MATCH("agent", "keyring_directory")) + { + config_ptr->keyring_directory = strdup(value); + ilog("Keyring Directory : %s\n", config_ptr->keyring_directory ); + } + else if (MATCH("agent", "keystone_auth_username")) + { + config_ptr->keystone_auth_username = strdup(value); + ilog("Mtce Keystone username : %s\n", + config_ptr->keystone_auth_username ); + } + else if (MATCH("agent", "keystone_auth_pw")) + { + config_ptr->keystone_auth_pw = strdup(value); + dlog("Mtce Keystone pw : %s\n", + config_ptr->keystone_auth_pw ); + } + else if (MATCH("agent", "keystone_auth_project")) + { + config_ptr->keystone_auth_project = strdup(value); + ilog("Mtce Keystone project : %s\n", + config_ptr->keystone_auth_project ); + } + else if (MATCH("agent", "keystone_user_domain")) + { + config_ptr->keystone_user_domain = strdup(value); + ilog("Mtce Keystone user domain : %s\n", + config_ptr->keystone_user_domain ); + } + else if (MATCH("agent", "keystone_project_domain")) + { + config_ptr->keystone_project_domain = strdup(value); + ilog("Mtce Keystone project domain : %s\n", + config_ptr->keystone_project_domain ); + } + else if (MATCH("agent", "keystone_region_name")) // region_name=RegionOne + { + config_ptr->keystone_region_name = strdup(value); + ilog("Region Name : %s\n", config_ptr->keystone_region_name ); + } + else if (MATCH("agent", "ceilometer_port")) + { + config_ptr->ceilometer_port = atoi(value); + dlog("Ceilometer Port : %d\n", config_ptr->ceilometer_port ); + } + return (PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.h b/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.h new file mode 100644 index 00000000..1937ddc0 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.h @@ -0,0 +1,58 @@ +#ifndef __INCLUDE_TOKENUTIL_H__ +#define __INCLUDE_TOKENUTIL_H__ +/* + * Copyright (c) 2013, 2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/* + * This module contains a single static __token__ object, + * an interface that updates/refreshes it with a valid token + * an interface that queries keystone service list uuids + * an interface tht queries the specified service admin + * endpoint using its service uuid. + * + * + * tokenUtil_get_svc_uuid - returns the service uuid for the + * specified service. + * tokenUtil_get_endpoint - returns the admin endpoint for the + * specified service uuid. + */ + +#include +#include + +using namespace std; + +#include "logMacros.h" +#include "httpUtil.h" /* for ... libEvent */ + +#define MTC_POST_KEY_LABEL "/v3/auth/tokens" + +#define KEYSTONE_SIG "token" + +/* The invalidation window is 5 minutes according + * to the testing of token expiration time */ +#define STALE_TOKEN_DURATION 300 //5 minutes + +/* returns the static token object for this module */ +keyToken_type * tokenUtil_get_ptr ( void ); +keyToken_type tokenUtil_get_token ( void ); + +int tokenUtil_handler ( libEvent & event ); +int tokenUtil_new_token ( libEvent & event, string hostname ); +int tokenUtil_token_refresh( libEvent & event, string hostname ); +int tokenUtil_get_endpoints( libEvent & event, string service_uuid ); +string tokenUtil_get_svc_uuid ( libEvent & event, string service_name ); + +void tokenUtil_fail_token ( void ); +void tokenUtil_log_refresh ( void ); + +int keystone_config_handler ( void * user, + const char * section, + const char * name, + const char * value); + +#endif /* __INCLUDE_TOKENUTIL_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/Makefile b/mtce-common/cgts-mtce-common-1.0/daemon/Makefile new file mode 100755 index 00000000..707ae0b6 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/daemon/Makefile @@ -0,0 +1,33 @@ +# +# Copyright (c) 2013-2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = daemon_main.cpp daemon_ini.cpp daemon_debug.cpp daemon_signal.cpp daemon_config.cpp daemon_files.cpp +OBJS = daemon_main.o daemon_ini.o daemon_debug.o daemon_signal.o daemon_config.o daemon_files.o + +OBJS = $(SRCS:.cpp=.o) +INCLUDES = -I. -I../common +CCFLAGS = -g -O2 -Wall -Wextra -Werror -DBUILDINFO="\"$$(date)\"" + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) -lstdc++ $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -UWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +lib: build $(OBJS) + ar rcs libdaemon.a $(OBJS) $(EXTRAARFLAGS) + +build: clean static_analysis + +clean: + @rm -f $(OBJ) *.o *.a diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_common.h b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_common.h new file mode 100755 index 00000000..f804b849 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_common.h @@ -0,0 +1,353 @@ +#ifndef __DAEMON_COMMON_H__ +#define __DAEMON_COMMON_H__ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Common Maintenance Header + */ + +#include +#include + +using namespace std ; + +#include "logMacros.h" +#include "returnCodes.h" + +#ifndef UNUSED +#define UNUSED(_x_) ((void) _x_) +#endif + +#ifndef MEMSET_ZERO +#define MEMSET_ZERO(_y_) (memset (&_y_,0,sizeof(_y_))) +#endif + +#define DEBUG_HALT ilog ("HALTED !!!!\n"); \ + for ( ;; ) \ + { \ + daemon_signal_hdlr() ; \ + } + + +/* List of different types */ +typedef enum +{ + SYSTEM_TYPE__NORMAL =0, + SYSTEM_TYPE__CPE_MODE__DUPLEX =1, + SYSTEM_TYPE__CPE_MODE__DUPLEX_DIRECT =2, + SYSTEM_TYPE__CPE_MODE__SIMPLEX =3, +} system_type_enum ; + + +/** Called by signal handler on daemon exit + * Performs cleanup by closing open files + * and freeing used memory */ +void daemon_exit ( void ); + +/** daemon_files.cpp cleanup utility */ +void daemon_files_fini ( void ); + +/** daemon_files.cpp init utility + * Creates log file, process id file + * and a process fill script */ +int daemon_files_init ( void ); +int daemon_create_pidfile ( void ); +void daemon_remove_pidfile ( void ); +void daemon_remove_file ( const char * filename ); +void daemon_rename_file ( const char * path, const char * old_filename, const char * new_filename ); +void daemon_make_dir ( const char * dir ); + +string daemon_read_file ( const char * filename ); + +void daemon_logfile_close ( void ); +void daemon_logfile_open ( void ); + +void daemon_log ( const char * filename , const char * str ); +void daemon_log_value ( const char * filename , int val ); +void daemon_log_value ( const char * filename , const char * str, int val ); + +/* reads the first line of a file and if it contains a string + * that represents an integer value then return it */ +int daemon_get_file_int ( const char * filename ); +string daemon_get_file_str ( const char * filename ); + +string daemon_nodetype ( void ); +string daemon_infra_iface ( void ); +string daemon_mgmnt_iface ( void ); +string daemon_sw_version ( void ); +string daemon_bmc_hosts_file ( void ); +string daemon_bmc_hosts_dir ( void ); +string daemon_md5sum_file ( const char * file ); + +system_type_enum daemon_system_type ( void ); + +char * daemon_get_iface_master ( char * iface_slave_ptr ); + +string get_shadow_signature ( char * shadowfile , const char * username, + char * shadowinfo, size_t infolen); + +void daemon_healthcheck ( const char * sig ); +void daemon_health_test ( void ); + +bool daemon_is_file_present ( const char * filename ); +int daemon_get_rmem_max ( void ); + +typedef struct +{ + int count ; + int warnings ; + int errors ; +} status_type ; + +void daemon_dump_info ( void ); /**< Common info dump utility */ +const char * daemon_stream_info ( void ); /**< Send the dump info as a string */ + +void get_debug_options ( const char * file , daemon_config_type * ptr ); + + +/** + * Read and process mtc.ini file settings into the daemon configuration + */ +int daemon_configure ( void ); + +/* Set default config values. + * This is especially important for char 8 options that default to null. */ +void daemon_config_default ( daemon_config_type * config_ptr ); + +/** + * Initialize the daemon main service + * + * @param iface + *- user can overide the management interface via -i option on nthe command line + * + */ +int daemon_init ( string iface , string nodetype ); + +/** + * Run the daemon service + */ +void daemon_service_run ( void ); + +/* Don't return from this call until the specified file exists + * or the timeout is exceeded. In the timeout case a FAIL_TIMEOUT + * is returned. */ +int daemon_wait_for_file ( const char * filename, int timeout ); + +/** + * Daemon Signal management - init and main loop handler + */ +int daemon_signal_init ( void ); +void daemon_signal_hdlr ( void ); +void daemon_sigchld_hdlr ( void ); + +/** + * Control the enabled state of the signal handler latency monitor + * true = enabled + */ +void daemon_latency_monitor ( bool state ); + +void daemon_dump_cfg ( void ); + +int timeout_config_handler ( void * user, + const char * section, + const char * name, + const char * value); + +int debug_config_handler ( void * user, + const char * section, + const char * name, + const char * value); + +int sysinv_config_handler ( void * user, + const char * section, + const char * name, + const char * value); + +int client_timeout_handler ( void * user, + const char * section, + const char * name, + const char * value); + +/** Test Head Entry */ +int daemon_run_testhead ( void ); +/** + * Debug API used to set module debug level. + */ +#define CONFIG_AGENT_HBS_PERIOD 0x00000001 /**< Service period */ +#define CONFIG_AGENT_LOC_TIMEOUT 0x00000002 /**< Loss Of Comm Timeout */ +#define CONFIG_AGENT_MULTICAST 0x00000004 /**< Multicase Addr */ +#define CONFIG_SCHED_PRIORITY 0x00000008 /**< Scheduling priority */ +#define CONFIG_AGENT_HBS_MGMNT_PORT 0x00000010 /**< Management Pulse Rx Port */ +#define CONFIG_AGENT_HBS_INFRA_PORT 0x00000020 /**< Infra Pulse Rx Port */ +#define CONFIG_AGENT_HBS_DEGRADE 0x00000040 /**< Heartbeat degrade */ +#define CONFIG_AGENT_HBS_FAILURE 0x00000080 /**< Heartbeat failure */ +#define CONFIG_AGENT_INV_PORT 0x00000100 /**< Inventory Port Number */ +#define CONFIG_AGENT_HA_PORT 0x00000200 /**< HA Framework Port Number */ +#define CONFIG_CLIENT_MTCALARM_PORT 0x00000400 /**< Send alarm requests to */ +#define CONFIG_RESERVED_800 0x00000800 /**< */ +#define CONFIG_MTC_TO_HWMON_CMD_PORT 0x00001000 /**< HWmon Port Number */ +#define CONFIG_AGENT_KEY_PORT 0x00002000 /**< Keystone HTTP port */ +#define CONFIG_AGENT_HBS_MTC_PORT 0x00004000 /**< Heartbeat Service Port */ +#define CONFIG_AGENT_INV_EVENT_PORT 0x00008000 /**< Inventory Event Port */ +#define CONFIG_AGENT_API_RETRIES 0x00010000 /**< Num api retries b4 fail */ +#define CONFIG_AGENT_MTC_INFRA_PORT 0x00020000 /**< Agent Infr network port */ +#define CONFIG_AGENT_MTC_MGMNT_PORT 0x00040000 /**< Agent Infr network port */ +#define CONFIG_AGENT_TOKEN_REFRESH 0x00080000 /**< Token refresh rate mask */ +#define CONFIG_CLIENT_MTC_INFRA_PORT 0x00100000 /**< Client Infra nwk mtc port */ +#define CONFIG_CLIENT_MTC_MGMNT_PORT 0x00200000 /**< Client mgmnt nwk mtc port */ +#define CONFIG_AGENT_VIM_CMD_PORT 0x00400000 /**< VIM Command Port Mask */ +#define CONFIG_CLIENT_HBS_INFRA_PORT 0x00800000 /**< Infrastructure ntwk Port */ +#define CONFIG_CLIENT_HBS_MGMNT_PORT 0x01000000 /**< Management network Port */ +#define CONFIG_CLIENT_HBS_EVENT_PORT 0x02000000 /**< Heartbeat Event Messaging */ +#define CONFIG_MTC_TO_HBS_CMD_PORT 0x04000000 /**< Mtce to Hbs Command Port */ +#define CONFIG_HBS_TO_MTC_EVENT_PORT 0x08000000 /**< Hbs to Mtc Event Port */ +#define CONFIG_CLIENT_PULSE_PORT 0x10000000 /**< Pmon pulse port */ +#define CONFIG_AGENT_VIM_EVENT_PORT 0x40000000 /**< VIM Event Port Mask */ +#define CONFIG_CLIENT_RMON_PORT 0x80000000 /**< Rmon client port */ + +#define CONFIG_AGENT_PORT CONFIG_AGENT_MTC_MGMNT_PORT +#define CONFIG_CLIENT_PORT CONFIG_CLIENT_MTC_MGMNT_PORT + +typedef struct { + struct timespec ts ; + struct tm t; + char time_buff[50]; +} time_debug_type ; + +typedef struct +{ + long secs ; + long msecs ; +} time_delta_type ; + +int timedelta ( time_debug_type & before , time_debug_type & after, time_delta_type & delta ); +int gettime ( time_debug_type & nowtime ) ; +unsigned long long gettime_monotonic_nsec ( void ); + +/* get formatted future time for number of seconds from now */ +char * future_time ( int secs ); + + +/***************************************************************************************** + * + * ####### ### ####### ##### # # ###### ###### ####### ###### ####### + * # # # # # # # # # # # # # # # # + * # # # # # # # # # # # # # # # + * ##### # # ##### # # ###### ###### # # ###### # + * # # # # # # # # # # # # # + * # # # # # # # # # # # # # # + * # ### # ##### ##### # # ####### # # # + * + * Allows a single fault insertion condition to be created and monitored in a commo way + * for any maintenance daemon. + * + * Here is how it works. + * + * Daemons that want fit support must add daemon_load_fit to its main loop whic will + * detect and load any new fit requests. + * + * Create '/var/run/fit/fitinfo' file with the following labels (with no spaces) + * + * proc=hwmond ; specifies the process name to apply this fit to + * code=1 ; specifies the unique fit code to loom for + * hits=2 ; specifies nmber of hits before clearing fit info ; defaults to 1 + * + * if ( daemon_want_fit ( MY_FIT_CODE ) == true ) + * do_fit_condition + * + * Add additional labels for further fit refinements ... + * + * + * + * + * host=compute-0 + * + * if ( daemon_want_fit ( MY_FIT_CODE , hostname ) == true ) + * do_fit_condition + * + * + * + * name=Temp_CPU0 + * + * if ( daemon_want_fit ( MY_FIT_CODE, hostname, "Temp_CPU0" ) == true ) + * do_fit_condition + * + * + * + * data=cr + * + * if ( daemon_want_fit ( MY_FIT_CODE hostname, "Temp_CPU0", data ) == true ) + * do_fit_condition_with data + * + * + * + * When the 'daemon_load_fit' sees this file it will load its content and rename + * /var/run/fit/fitinfo /var/run/fit/fitinfo.renamed. + * + * daemon_want_fit returns a true when that fit condition is met and hits is decremented + * when hits becomes 0 the fit is removed from memory and requires fitinfo.renamed to be + * recopied to fitinfo for that fit to be seen and loaded again. + * + *****************************************************************************************/ + +// #define WANT_FIT_TESTING + +#ifdef WANT_FIT_TESTING + +#define FIT__INFO_FILE ("/var/run/fit/fitinfo") +#define FIT__INFO_FILEPATH ("/var/run/fit") +#define FIT__INFO_FILENAME ("fitinfo") +#define FIT__INFO_FILENAME_RENAMED ("fitinfo.renamed") + +#define FIT__INIT_FILE ("/var/run/fit/fitinit") +#define FIT__INIT_FILEPATH ("/var/run/fit") +#define FIT__INIT_FILENAME ("fitinit") +#define FIT__INIT_FILENAME_RENAMED ("fitinit.renamed") + +/* Common Fault Insertion Structure */ +typedef struct +{ + int code ; /* the unique code specifying the condition to fault */ + int hits ; /* how many times to run fit before it auto clears */ + string proc ; /* the daemon to apply the fit to */ + string host ; /* host to apply the fit to */ + string name ; /* refinement of the fit code to a specific condition */ + string data ; /* returned fit data for specified named condition */ +} daemon_fit_type ; + +#endif + +/* Init / Clear the in-memory fit info struct. + * Automatically called during files init. + * Can be explicitely called to force remove fit condition. */ +void daemon_init_fit ( void ); + +/* Load fit info from /var/run/fit/fitinfo file. + * Add a call to this to the daemon's main loop */ +int daemon_load_fit ( void ) ; + +/* add hits to fit */ +void daemon_hits_fit ( int hits ); + +/* Check for specific fit enabled conditions */ +bool daemon_want_fit ( int code ); +bool daemon_want_fit ( int code, string hostname ); +bool daemon_want_fit ( int code, string hostname, string name ); + +/* ... and in this case update fit data reference string when hit */ +bool daemon_want_fit ( int code, string hostname, string name, string & data ); + +/* Prints the in-memory loaded fit data. + * This is called on new fit info load (and file rename) */ +void daemon_print_fit( bool hit ); + + +void daemon_do_segfault ( void ); + +#endif /* __MTC_COMMON_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_config.cpp b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_config.cpp new file mode 100644 index 00000000..e0dcea8e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_config.cpp @@ -0,0 +1,355 @@ +/* +* Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* +*/ + + +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "daemon_ini.h" /* Init parset header */ +#include "daemon_common.h" /* Common daemon header */ +#include "nodeBase.h" +#include "nodeTimers.h" + +void daemon_config_default ( daemon_config_type* config_ptr ) +{ + /* init config struct */ + memset ( config_ptr, 0 , sizeof(daemon_config_type)); + + config_ptr->debug_filter = strdup("none"); + config_ptr->debug_event = strdup("none"); + config_ptr->mon_process_1 = strdup("none"); + config_ptr->mon_process_2 = strdup("none"); + config_ptr->mon_process_3 = strdup("none"); + config_ptr->mon_process_4 = strdup("none"); + config_ptr->mon_process_5 = strdup("none"); + config_ptr->mon_process_6 = strdup("none"); + config_ptr->mon_process_7 = strdup("none"); + config_ptr->keystone_prefix_path = strdup(""); + config_ptr->keystone_identity_uri = strdup(""); + config_ptr->keystone_auth_uri = strdup(""); + config_ptr->keystone_auth_host = strdup(""); + config_ptr->keystone_region_name = strdup("none"); + config_ptr->keyring_directory = strdup(""); + config_ptr->sysinv_mtc_inv_label = strdup("none"); + config_ptr->mgmnt_iface = strdup("none"); + config_ptr->infra_iface = strdup("none"); + config_ptr->sysinv_api_bind_ip = strdup("none"); + config_ptr->mode = strdup("none"); + config_ptr->fit_host = strdup("none"); + config_ptr->mnfa_threshold_type = strdup("none"); + config_ptr->multicast = strdup("none"); + + config_ptr->debug_all = 0 ; + config_ptr->debug_json = 0 ; + config_ptr->debug_timer = 0 ; + config_ptr->debug_fsm = 0 ; + config_ptr->debug_http = 0 ; + config_ptr->debug_msg = 0 ; + config_ptr->debug_work = 0 ; + config_ptr->debug_state = 0 ; + config_ptr->debug_alive = 0 ; + config_ptr->debug_bmgmt = 0 ; + config_ptr->debug_level = 0 ; +} + +/* Timeout config read */ +int client_timeout_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("timeouts", "goenabled_timeout")) + { + config_ptr->goenabled_timeout = atoi(value); + ilog ("goEnabled TO: %3d secs\n" , config_ptr->goenabled_timeout); + } + else if (MATCH("timeouts", "host_services_timeout")) + { + config_ptr->host_services_timeout = atoi(value); + ilog ("Host Svcs TO: %3d secs\n" , config_ptr->host_services_timeout); + } + return (PASS); +} + + +/* Timeout config read */ +int timeout_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("timeouts", "controller_boot_timeout")) + { + bool extended = false ; + config_ptr->controller_mtcalive_timeout = atoi(value); + /* extend the controller mtcalive timeout when in virtual box. + * loaded machines boot slower on controllers in this environment + * and frequently timeout in the first pass */ + if ( daemon_is_file_present ( HOST_IS_VIRTUAL ) == true ) + { + config_ptr->controller_mtcalive_timeout *= 2 ; + extended = true ; + } + ilog (" mtcAlive TO: %4d secs (controller) %s\n" , + config_ptr->controller_mtcalive_timeout, + extended ? "(doubled for vbox)" : ""); + } + else if (MATCH("timeouts", "compute_boot_timeout")) + { + config_ptr->compute_mtcalive_timeout = atoi(value); + ilog (" mtcAlive TO: %4d secs (compute)\n" , config_ptr->compute_mtcalive_timeout); + } + else if (MATCH("timeouts", "goenabled_timeout")) + { + config_ptr->goenabled_timeout = atoi(value); + ilog ("goEnabled TO: %3d secs\n" , config_ptr->goenabled_timeout); + } + else if (MATCH("timeouts", "host_services_timeout")) + { + config_ptr->host_services_timeout = atoi(value); + ilog ("Host Svcs TO: %3d secs\n" , config_ptr->host_services_timeout); + } + else if (MATCH("timeouts", "sysinv_timeout")) + { + config_ptr->sysinv_timeout = atoi(value); + ilog ("Inv crit TO: %3d secs\n" , config_ptr->sysinv_timeout); + } + else if (MATCH("timeouts", "swact_timeout")) + { + config_ptr->swact_timeout = atoi(value); + ilog ("HA Swact TO : %3d secs\n" , config_ptr->swact_timeout); + } + else if (MATCH("timeouts", "sysinv_noncrit_timeout")) + { + config_ptr->sysinv_noncrit_timeout = atoi(value); + ilog ("Inv nonc TO: %3d secs\n" , config_ptr->sysinv_noncrit_timeout); + } + else if (MATCH("timeouts", "work_queue_timeout")) + { + config_ptr->work_queue_timeout = atoi(value); + ilog ("WorkQueue TO: %3d secs\n" , config_ptr->work_queue_timeout); + } + else if (MATCH("timeouts", "uptime_period")) + { + config_ptr->uptime_period = atoi(value); + ilog ("Uptime Timer: %3d secs\n" , config_ptr->uptime_period); + } + else if (MATCH("timeouts", "online_period")) + { + config_ptr->online_period = atoi(value); + ilog ("Online Timer: %3d secs\n" , config_ptr->online_period); + } + else if (MATCH("timeouts", "insv_test_period")) + { + config_ptr->insv_test_period = atoi(value); + ilog ("Insvt Period:%4d secs (controller)\n" , config_ptr->insv_test_period); + } + else if (MATCH("timeouts", "oos_test_period")) + { + config_ptr->oos_test_period = atoi(value); + ilog (" Oost Period:%4d secs (controller)\n", config_ptr->oos_test_period); + } + else if (MATCH("timeouts", "audit_period")) + { + config_ptr->audit_period = atoi(value); + ilog ("Audit Period: %3d secs\n", config_ptr->audit_period ); + } + else if (MATCH("timeouts", "loc_recovery_timeout")) + { + config_ptr->mask |= CONFIG_AGENT_LOC_TIMEOUT ; + config_ptr->loc_recovery_timeout = atoi(value); + ilog ("LOC Timeout: %3d secs\n", config_ptr->loc_recovery_timeout ); + } + else if (MATCH("timeouts", "mnfa_recovery_timeout")) + { + config_ptr->mnfa_recovery_timeout = atoi(value); + if (( config_ptr->mnfa_recovery_timeout > 300 ) || + ( config_ptr->mnfa_recovery_timeout == 0 )) + config_ptr->mnfa_recovery_timeout = 5 ; + ilog ("MNFA Timeout: %3d secs\n", config_ptr->mnfa_recovery_timeout ); + } + else if (MATCH("timeouts", "node_reinstall_timeout")) + { + config_ptr->node_reinstall_timeout = atoi(value); + if (( config_ptr->node_reinstall_timeout > MTC_REINSTALL_TIMEOUT_MAX ) || + ( config_ptr->node_reinstall_timeout < MTC_REINSTALL_TIMEOUT_MIN )) + config_ptr->node_reinstall_timeout = MTC_REINSTALL_TIMEOUT_DEFAULT ; + ilog ("Reinstall TO: %3d secs\n", config_ptr->node_reinstall_timeout ); + } + else if (MATCH("timeouts", "dor_mode_timeout")) + { + config_ptr->dor_mode_timeout = atoi(value); + ilog ("DOR Mode TO : %3d secs\n", config_ptr->dor_mode_timeout ); + } + else if (MATCH("timeouts", "dor_recovery_timeout_ext")) + { + config_ptr->dor_recovery_timeout_ext = atoi(value); + ilog ("DOR Time Ext: %3d secs\n", config_ptr->dor_recovery_timeout_ext ); + } + + return (PASS); +} + +/* System Inventory Config Reader */ +int sysinv_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("DEFAULT", "MTC_INV_LABEL")) // MTC_INV_LABEL=/v1/hosts/ + { + config_ptr->sysinv_mtc_inv_label = strdup(value); + ilog("Sysinv Label: %s\n", config_ptr->sysinv_mtc_inv_label ); + } + else if (MATCH("DEFAULT", "sysinv_api_port")) // sysinv_api_port=6385 + { + config_ptr->sysinv_api_port = atoi(value); + ilog("Sysinv Port : %d\n", config_ptr->sysinv_api_port ); + } + else if (MATCH("DEFAULT", "sysinv_api_bind_ip")) // sysinv_api_bind_ip=192.168.204.2 + { + config_ptr->sysinv_api_bind_ip = strdup(value); + ilog("Sysinv IP : %s\n", config_ptr->sysinv_api_bind_ip ); + } + return (PASS); +} + +#define EMPTY "----" + +void daemon_dump_cfg ( void ) +{ + daemon_config_type * ptr = daemon_get_cfg_ptr(); + + ilog ("Configuration Settings\n------------------------------\n"); + if ( ptr->scheduling_priority ) { ilog ("scheduling_priority = %d\n", ptr->scheduling_priority ); } + + if ( ptr->infra_degrade_only ) { ilog ("infra_degrade_only = %s\n", ptr->infra_degrade_only ? "Yes" : "No" );} + if ( ptr->need_infra_poll_audit ) { ilog ("need_infra_poll_audit = %s\n", ptr->need_infra_poll_audit ? "Yes" : "No" );} + if ( ptr->active ) { ilog ("active = %s\n", ptr->active ? "Yes" : "No" );} + + /* hbsAgent */ + if ( ptr->hbs_pulse_period ) { ilog ("hbs_pulse_period = %d\n", ptr->hbs_pulse_period );} + if ( ptr->token_refresh_rate ) { ilog ("token_refresh_rate = %d\n", ptr->token_refresh_rate );} + if ( ptr->hbs_minor_threshold ) { ilog ("hbs_minor_threshold = %d\n", ptr->hbs_minor_threshold );} + if ( ptr->hbs_degrade_threshold ) { ilog ("hbs_degrade_threshold = %d\n", ptr->hbs_degrade_threshold );} + if ( ptr->hbs_failure_threshold ) { ilog ("hbs_failure_threshold = %d\n", ptr->hbs_failure_threshold );} + + if ( strcmp(ptr->mgmnt_iface, "none" )) { ilog ("mgmnt_iface = %s\n", ptr->mgmnt_iface ); } + if ( strcmp(ptr->infra_iface, "none" )) { ilog ("infra_iface = %s\n", ptr->infra_iface );} + if ( strcmp(ptr->multicast, "none" )) { ilog ("multicast = %s\n", ptr->multicast );} + + + if ( ptr->ha_port ) { ilog ("ha_port = %d\n", ptr->ha_port );} + if ( ptr->vim_cmd_port ) { ilog ("vim_cmd_port = %d\n", ptr->vim_cmd_port );} + if ( ptr->vim_event_port ) { ilog ("vim_event_port = %d\n", ptr->vim_event_port );} + if ( ptr->mtc_agent_port ) { ilog ("mtc_agent_port = %d\n", ptr->mtc_agent_port );} + if ( ptr->mtc_client_port) { ilog ("mtc_client_port = %d\n", ptr->mtc_client_port );} + if ( ptr->keystone_port ) { ilog ("keystone_port = %d\n", ptr->keystone_port );} + + /* mtcAgent & hwmond */ + if ( ptr->sysinv_api_port ) { ilog ("sysinv_api_port = %d\n", ptr->sysinv_api_port );} + if ( ptr->keystone_prefix_path ) { ilog ("keystone_prefix_path = %s\n", ptr->keystone_prefix_path );} + if ( ptr->keystone_auth_host ) { ilog ("keystone_auth_host = %s\n", ptr->keystone_auth_host );} + if ( ptr->keystone_identity_uri ) { ilog ("keystone_identity_uri = %s\n", ptr->keystone_identity_uri );} + if ( ptr->keystone_auth_uri ) { ilog ("keystone_auth_uri = %s\n", ptr->keystone_auth_uri );} + if ( ptr->keystone_auth_username ) { ilog ("keystone_auth_username = %s\n", ptr->keystone_auth_username );} + if ( ptr->keystone_auth_project ) { ilog ("keystone_auth_project = %s\n", ptr->keystone_auth_project );} + if ( ptr->keystone_user_domain ) { ilog ("keystone_user_domain = %s\n", ptr->keystone_user_domain );} + if ( ptr->keystone_project_domain ) { ilog ("keystone_project_domain = %s\n", ptr->keystone_project_domain );} + if ( ptr->keystone_region_name ) { ilog ("keystone_region_name = %s\n", ptr->keystone_region_name );} + if ( ptr->keyring_directory ) { ilog ("keyring_directory = %s\n", ptr->keyring_directory );} + + if ( ptr->mtc_rx_mgmnt_port ) { ilog ("mtc_rx_mgmnt_port = %d\n", ptr->mtc_rx_mgmnt_port );} + if ( ptr->mtc_rx_infra_port ) { ilog ("mtc_rx_infra_port = %d\n", ptr->mtc_rx_infra_port );} + if ( ptr->mtc_tx_mgmnt_port ) { ilog ("mtc_tx_mgmnt_port = %d\n", ptr->mtc_tx_mgmnt_port );} + if ( ptr->mtc_tx_infra_port ) { ilog ("mtc_tx_infra_port = %d\n", ptr->mtc_tx_infra_port );} + if ( ptr->agent_rx_port ) { ilog ("agent_rx_port = %d\n", ptr->agent_rx_port );} + if ( ptr->client_rx_port ) { ilog ("client_rx_port = %d\n", ptr->client_rx_port );} + if ( ptr->mtc_to_hbs_cmd_port ) { ilog ("mtc_to_hbs_cmd_port = %d\n", ptr->mtc_to_hbs_cmd_port );} + if ( ptr->mtc_to_guest_cmd_port) { ilog ("mtc_to_guest_cmd_port = %d\n", ptr->mtc_to_guest_cmd_port);} + if ( ptr->hwmon_cmd_port ) { ilog ("hwmon_cmd_port = %d\n", ptr->hwmon_cmd_port );} + if ( ptr->hbs_to_mtc_event_port) { ilog ("hbs_to_mtc_event_port = %d\n", ptr->hbs_to_mtc_event_port);} + if ( ptr->inv_event_port ) { ilog ("inv_event_port = %d\n", ptr->inv_event_port );} + + /* rmond */ + if ( ptr->per_node ) { ilog ("per_node = %d\n", ptr->per_node );} + if ( ptr->audit_period ) { ilog ("audit_period = %d\n", ptr->audit_period );} + if ( ptr->pm_period ) { ilog ("pm_period = %d\n", ptr->pm_period );} + + if ( ptr->pmon_amon_port ) { ilog ("pmon_amon_port = %d\n", ptr->pmon_amon_port );} + if ( ptr->pmon_event_port ) { ilog ("pmon_event_port = %d\n", ptr->pmon_event_port );} + if ( ptr->pmon_pulse_port ) { ilog ("pmon_pulse_port = %d\n", ptr->pmon_pulse_port );} + if ( ptr->rmon_api_tx_port ) { ilog ("rmon_api_tx_port = %d\n", ptr->rmon_api_tx_port );} + if ( ptr->rmon_event_port ) { ilog ("rmon_event_port = %d\n", ptr->rmon_event_port );} + if ( ptr->rmon_critical_thr ) { ilog ("rmon_critical_thr = %d\n", ptr->rmon_critical_thr );} + if ( ptr->rmon_tx_port ) { ilog ("rmon_tx_port = %d\n", ptr->rmon_tx_port );} + if ( ptr->event_port ) { ilog ("event_port = %d\n", ptr->event_port );} + if ( ptr->cmd_port ) { ilog ("cmd_port = %d\n", ptr->cmd_port );} + if ( ptr->sensor_port ) { ilog ("sensor_port = %d\n", ptr->sensor_port );} + if ( ptr->start_delay ) { ilog ("start_delay = %d\n", ptr->start_delay );} + if ( ptr->api_retries ) { ilog ("api_retries = %d\n", ptr->api_retries );} + if ( ptr->testmode ) { ilog ("testmode = %x\n", ptr->testmode );} + if ( ptr->testmask ) { ilog ("testmask = %x\n", ptr->testmask );} + if ( ptr->mask ) { ilog ("mask = %x\n", ptr->mask );} + ilog ("mode = %s\n", ptr->mode ? ptr->mode : EMPTY ); + + /* pmond */ + if ( ptr->stall_pmon_thld ) { ilog ("stall_pmon_thld = %d\n", ptr->stall_pmon_thld );} + if ( ptr->stall_mon_period ) { ilog ("stall_mon_period = %d\n", ptr->stall_mon_period );} + if ( ptr->stall_poll_period ) { ilog ("stall_poll_period = %d\n", ptr->stall_poll_period );} + if ( ptr->stall_rec_thld ) { ilog ("stall_rec_thld = %d\n", ptr->stall_rec_thld );} + + /* mtcAgent */ + if ( ptr->mnfa_threshold_type ) { ilog ("mnfa_threshold_type = %s\n", ptr->mnfa_threshold_type );} + if ( ptr->mnfa_threshold_percent ) { ilog ("mnfa_threshold_percent= %d\n", ptr->mnfa_threshold_percent );} + if ( ptr->mnfa_threshold_number ) { ilog ("mnfa_threshold_number = %d\n", ptr->mnfa_threshold_number );} + if ( ptr->mnfa_recovery_threshold ) { ilog ("mnfa_recovery_threshod= %d\n", ptr->mnfa_recovery_threshold );} + if ( ptr->controller_mtcalive_timeout) { ilog ("controller_mtcalive_to= %d\n", ptr->controller_mtcalive_timeout );} + if ( ptr->compute_mtcalive_timeout ) { ilog ("compute_mtcalive_to = %d\n", ptr->compute_mtcalive_timeout );} + if ( ptr->goenabled_timeout ) { ilog ("goenabled_timeout = %d\n", ptr->goenabled_timeout );} + if ( ptr->swact_timeout ) { ilog ("swact_timeout = %d\n", ptr->swact_timeout );} + if ( ptr->sysinv_timeout ) { ilog ("sysinv_timeout = %d\n", ptr->sysinv_timeout );} + if ( ptr->sysinv_noncrit_timeout ) { ilog ("sysinv_noncrit_timeout= %d\n", ptr->sysinv_noncrit_timeout );} + if ( ptr->work_queue_timeout ) { ilog ("work_queue_timeout = %d\n", ptr->work_queue_timeout );} + if ( ptr->loc_recovery_timeout ) { ilog ("loc_recovery_timeout = %d\n", ptr->loc_recovery_timeout );} + if ( ptr->mnfa_recovery_timeout ) { ilog ("mnfa_recovery_timeout = %d\n", ptr->mnfa_recovery_timeout );} + if ( ptr->node_reinstall_timeout ) { ilog ("node_reinstall_timeout= %d\n", ptr->node_reinstall_timeout );} + if ( ptr->uptime_period ) { ilog ("uptime_period = %d\n", ptr->uptime_period );} + if ( ptr->online_period ) { ilog ("online_period = %d\n", ptr->online_period );} + if ( ptr->insv_test_period ) { ilog ("insv_test_period = %d\n", ptr->insv_test_period );} + if ( ptr->oos_test_period ) { ilog (" oos_test_period = %d\n", ptr->oos_test_period );} + + /* mtcClient & hbsClient */ + if ( ptr->failsafe_shutdown_delay ) { ilog ("failsafe_shutdown_dela= %d\n", ptr->failsafe_shutdown_delay );} + + if ( ptr->debug_all ) { ilog ("debug_all = %d\n", ptr->debug_all );} + if ( ptr->debug_json ) { ilog ("debug_json = %d\n", ptr->debug_json );} + if ( ptr->debug_timer ) { ilog ("debug_timer = %d\n", ptr->debug_timer );} + if ( ptr->debug_fsm ) { ilog ("debug_fsm = %d\n", ptr->debug_fsm );} + if ( ptr->debug_http ) { ilog ("debug_http = %d\n", ptr->debug_http );} + if ( ptr->debug_msg ) { ilog ("debug_msg = %d\n", ptr->debug_msg );} + if ( ptr->debug_work ) { ilog ("debug_work = %d\n", ptr->debug_work );} + if ( ptr->debug_state ) { ilog ("debug_state = %d\n", ptr->debug_state );} + if ( ptr->debug_alive ) { ilog ("debug_alive = %d\n", ptr->debug_alive );} + if ( ptr->debug_bmgmt ) { ilog ("debug_bmgmt = %d\n", ptr->debug_bmgmt );} + if ( ptr->debug_level ) { ilog ("debug_level = %d\n", ptr->debug_level );} + ilog ("debug_filter = %s\n", ptr->debug_filter ); + ilog ("debug_event = %s\n", ptr->debug_event ); + +} diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_debug.cpp b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_debug.cpp new file mode 100755 index 00000000..e2d787e7 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_debug.cpp @@ -0,0 +1,376 @@ +/* +* Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* +*/ + + +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "daemon_ini.h" /* Init parset header */ +#include "daemon_common.h" /* Init parset header */ +#include "nodeBase.h" + +static char time_buff [50] ; +static const char null_t [25] = "YYYY:MM:DD HH:MM:SS.xxx"; + +unsigned long long gettime_monotonic_nsec ( void ) +{ + struct timespec ts; + clock_gettime (CLOCK_MONOTONIC, &ts); + return ((unsigned long long) ts.tv_sec) * 1000000000ULL + ts.tv_nsec; +} + +int timedelta ( time_debug_type & before , time_debug_type & after, time_delta_type & delta ) +{ + /* Subtract before from after */ + + if ((after.ts.tv_sec < before.ts.tv_sec) || + ((after.ts.tv_sec == before.ts.tv_sec) && + (after.ts.tv_nsec <= before.ts.tv_nsec))) + { + delta.secs = delta.msecs = 1 ; + } + else + { + delta.secs = after.ts.tv_sec - before.ts.tv_sec ; + if (after.ts.tv_nsec < before.ts.tv_nsec) + { + delta.msecs = after.ts.tv_nsec + 1000000000L - before.ts.tv_nsec ; + delta.secs-- ; + } + else + { + delta.msecs = after.ts.tv_nsec - before.ts.tv_nsec ; + } + delta.msecs = (delta.msecs/1000); + } + return (PASS) ; +} + +int gettime ( time_debug_type & p ) +{ + int len ; + clock_gettime (CLOCK_REALTIME, &p.ts ); + if (localtime_r(&(p.ts.tv_sec), &p.t) == NULL) + { + return (FAIL); + } + else + { + len = strftime(&p.time_buff[0], 30, "%H:%M:%S.", &p.t ); + sprintf (&p.time_buff[len], "%06ld", (p.ts.tv_nsec/1000) ); + } + return (PASS); +} + +/* Log counter */ +static int __lc = 0 ; +int lc (void) /* returns the current log count */ +{ + return(__lc++); +} + + +char * pt ( void ) +{ + struct timespec ts ; + struct tm t; + int len ; + + clock_gettime (CLOCK_REALTIME, &ts ); + if (localtime_r(&(ts.tv_sec), &t) == NULL) + { + return ((char*)&null_t[0]); + } + len = strftime(time_buff, 30, "%FT%H:%M:%S.", &t ); + sprintf ( &time_buff[len], "%03ld", (ts.tv_nsec/1000000) ); + + return (&time_buff[0]); +} + +/***************************************************************************** + * + * Name : future_time + * + * Description: Return a future time date:time formatted string + * that represents current time + specified seconds. + * + *****************************************************************************/ + +char * future_time ( int secs ) +{ + struct timespec ts ; + struct tm t; + int len ; + + clock_gettime (CLOCK_REALTIME, &ts ); + /* add the caller's seconds */ + ts.tv_sec += secs ; + + if (localtime_r(&(ts.tv_sec), &t) == NULL) + { + return ((char*)&null_t[0]); + } + len = strftime(time_buff, 30, "%FT%H:%M:%S.", &t ); + sprintf ( &time_buff[len], "%03ld", (ts.tv_nsec/1000000) ); + + return (&time_buff[0]); +} + + +/* Debug config read */ +int debug_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + if (MATCH("debug", "debug_timer")) + { + config_ptr->debug_timer = atoi(value); + if ( config_ptr->debug_timer ) + { + ilog ("Timer Debug : %x\n", config_ptr->debug_timer ); + } + } + else if (MATCH("debug", "debug_json")) + { + config_ptr->debug_json = atoi(value); + if ( config_ptr->debug_json ) + { + ilog (" Json Debug : %x\n", config_ptr->debug_json ); + } + } + else if (MATCH("debug", "debug_fsm")) + { + config_ptr->debug_fsm = atoi(value); + if ( config_ptr->debug_fsm ) + { + ilog (" FSM Debug : %x\n", config_ptr->debug_fsm ); + } + } + else if (MATCH("debug", "debug_alive")) + { + config_ptr->debug_alive = atoi(value); + if ( config_ptr->debug_alive ) + { + ilog ("Alive Debug : %x\n", config_ptr->debug_alive ); + } + } + else if (MATCH("debug", "debug_bm")) + { + config_ptr->debug_bmgmt = atoi(value); + if ( config_ptr->debug_bmgmt ) + { + ilog ("BMgmt Debug : %x\n", config_ptr->debug_bmgmt ); + } + } + else if (MATCH("debug", "debug_http")) + { + config_ptr->debug_http = atoi(value); + if ( config_ptr->debug_http ) + { + ilog (" Http Debug : %x\n", config_ptr->debug_http ); + } + } + else if (MATCH("debug", "debug_hdlr")) + { + config_ptr->debug_http = atoi(value); + if ( config_ptr->debug_http ) + { + ilog (" Http Debug : %x\n", config_ptr->debug_http ); + } + } + else if (MATCH("debug", "debug_msg")) + { + config_ptr->debug_msg = atoi(value); + if ( config_ptr->debug_msg ) + { + ilog (" Msg Debug : %x\n", config_ptr->debug_msg ); + } + } + else if (MATCH("debug", "debug_work")) + { + config_ptr->debug_work = atoi(value); + if ( config_ptr->debug_work ) + { + ilog (" Work Debug : %x\n", config_ptr->debug_work ); + } + } + else if (MATCH("debug", "debug_state")) + { + config_ptr->debug_state = atoi(value); + if ( config_ptr->debug_state ) + { + ilog ("State Debug : %x\n", config_ptr->debug_state ); + } + } + else if (MATCH("debug", "debug_level")) + { + config_ptr->debug_level = atoi(value); + if ( config_ptr->debug_level ) + { + ilog ("Level Debug : %x\n", config_ptr->debug_level ); + } + } + else if (MATCH("debug", "debug_all")) + { + config_ptr->debug_all = atoi(value) ; + if ( config_ptr->debug_all ) + { + ilog ("Globl Debug : %x\n", config_ptr->debug_all ); + config_ptr->debug_timer = atoi(value); + config_ptr->debug_json = atoi(value); + config_ptr->debug_alive = atoi(value); + config_ptr->debug_bmgmt = atoi(value); + config_ptr->debug_msg = atoi(value); + config_ptr->debug_work = atoi(value); + config_ptr->debug_http = atoi(value); + config_ptr->debug_state = atoi(value); + config_ptr->debug_level = atoi(value); + } + } + else if (MATCH("debug", "flush")) + { + config_ptr->flush = atoi(value); + } + else if (MATCH("debug", "flush_thld")) + { + config_ptr->flush_thld = atoi(value); + } + else if (MATCH("debug", "debug_filter")) + { + config_ptr->debug_filter = strdup(value); + if (( config_ptr->debug_filter ) && + ( strnlen( config_ptr->debug_filter, 20 ) > 0 ) && + ( strcmp ( config_ptr->debug_filter, "none" ))) + { + ilog ("State Filter: %s\n", config_ptr->debug_filter ); + } + } + else if (MATCH("debug", "debug_event")) + { + config_ptr->debug_event = strdup(value); + if (( config_ptr->debug_event ) && + ( strnlen( config_ptr->debug_event, 20 ) > 0 ) && + ( strcmp ( config_ptr->debug_event, "none" ))) + { + ilog ("Event Filter: %s\n", config_ptr->debug_event ); + } + } + else if (MATCH("debug", "infra_degrade_only")) + { + config_ptr->infra_degrade_only = atoi(value); + if ( config_ptr->infra_degrade_only ) + { + ilog ("Infra Degrad: true\n" ); + } + } + else if (MATCH("debug", "testmode")) + { + config_ptr->testmode = atoi(value); + if ( config_ptr->testmode ) + { + ilog ("Stress Mode : Enabled\n"); + } + } + else if (MATCH("debug", "testmask")) + { + config_ptr->testmask = atoi(value); + if ( config_ptr->testmask ) + { + ilog ("Stress Mask : %x\n", config_ptr->testmask ); + } + } + else if (MATCH("debug", "fit_code")) + { + config_ptr->fit_code = atoi(value); + if ( config_ptr->fit_code ) + { + ilog ("FIT Code : %d\n", config_ptr->fit_code ); + } + } + else if (MATCH("debug", "fit_host")) + { + config_ptr->fit_host = strdup(value); + if ( config_ptr->fit_host ) + { + ilog ("FIT host : %s\n", config_ptr->fit_host ); + } + } + else if (MATCH("debug", "stall_pmon_thld")) + { + config_ptr->stall_pmon_thld = atoi(value); + } + else if (MATCH("debug", "stall_mon_period")) + { + config_ptr->stall_mon_period = atoi(value); + } + else if (MATCH("debug", "stall_poll_period")) + { + config_ptr->stall_poll_period = atoi(value); + } + else if (MATCH("debug", "stall_rec_thld")) + { + config_ptr->stall_rec_thld = atoi(value); + } + else if (MATCH("debug", "mon_process_1")) + { + config_ptr->mon_process_1 = strdup(value); + } + else if (MATCH("debug", "mon_process_2")) + { + config_ptr->mon_process_2 = strdup(value); + } + else if (MATCH("debug", "mon_process_3")) + { + config_ptr->mon_process_3 = strdup(value); + } + else if (MATCH("debug", "mon_process_4")) + { + config_ptr->mon_process_4 = strdup(value); + } + else if (MATCH("debug", "mon_process_5")) + { + config_ptr->mon_process_5 = strdup(value); + } + else if (MATCH("debug", "mon_process_6")) + { + config_ptr->mon_process_6 = strdup(value); + } + else if (MATCH("debug", "mon_process_7")) + { + config_ptr->mon_process_7 = strdup(value); + } + else if (MATCH("debug", "latency_thld")) + { + config_ptr->latency_thld = atoi(value); + } + + return (PASS); +} + +void get_debug_options ( const char * init_file , daemon_config_type * config_ptr ) +{ + ilog("Config File : %s\n", init_file ); + if (ini_parse ( init_file, debug_config_handler, config_ptr ) < 0) + { + elog("Failed to load '%s'\n", init_file ); + } +} + + +void daemon_do_segfault ( void ) +{ + char * ptr = NULL ; + ilog ("FIT segfault at %p:%d\n", ptr, *ptr); +} diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_files.cpp b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_files.cpp new file mode 100755 index 00000000..89408f46 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_files.cpp @@ -0,0 +1,1456 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Maintenance ... + * + * /var/run/.pid + * + * Also upon request can create a health check info file. + * + * /var/run/.info + * + */ + +#include /* for .. system */ +#include /* for .. close and usleep */ +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "daemon_common.h" +#include "daemon_option.h" +#include "nodeBase.h" + +/* GNU Extension + * program_invocation_name contains the name that was used to invoke the + * calling program. This is the same as the value of argv[0] in main(), + * with the difference that the scope of program_invocation_name is global. + * program_invocation_short_name contains the basename component of name + * that was used to invoke the calling program. That is, it is the same + * value as program_invocation_name, with all text up to and including + * the final slash (/), if any, removed. + * These variables are automatically initialized by the glibc run-time + * startup code. + * + * The daemon name info */ +extern char *program_invocation_name; +extern char *program_invocation_short_name; + +static char pid_filename [MAX_FILENAME_LEN] ; +static char hc_filename [MAX_FILENAME_LEN] ; + +void daemon_files_fini ( void ) +{ + close_syslog(); +} + +void daemon_health_test ( void ) +{ + char file_str[2000] ; + sprintf ( &file_str[0], "I'm healthy: Here is my config ...\n%s\n", daemon_stream_info ()); + daemon_healthcheck (&file_str[0]); +} + +bool daemon_is_file_present ( const char * filename ) +{ + struct stat p ; + memset ( &p, 0 , sizeof(struct stat)); + stat ( filename, &p ) ; + if ((p.st_ino != 0 ) && (p.st_dev != 0)) + return (true); + else + return (false); +} + +void daemon_healthcheck ( const char * sig ) +{ + FILE * hc_file_stream ; + + snprintf ( &hc_filename[0], MAX_FILENAME_LEN, "/var/run/%s.info", + program_invocation_short_name ); + + /* remove the old file */ + unlink (hc_filename); + + /* Create daemon healthcheck file */ + hc_file_stream = fopen (hc_filename, "w" ) ; + if ( hc_file_stream == NULL ) + { + wlog("Failed to open %s\n", hc_filename); + } + else if ( !fprintf (hc_file_stream,"%s\n", sig )) + { + wlog ("Failed to write to %s\n", hc_filename); + } + if ( hc_file_stream ) + { + fflush (hc_file_stream); + fclose (hc_file_stream); + } +} + +#define BUFFER 1024 + +void daemon_log_value ( const char * filename , const char * str, int val ) +{ + FILE * file_stream = fopen (filename, "a" ) ; + if ( file_stream != NULL ) + { + fprintf ( file_stream,"%s %d\n", str, val ); + fflush (file_stream); + fclose (file_stream); + } +} + +void daemon_log_value ( const char * filename , int val ) +{ + FILE * file_stream = fopen (filename, "w" ) ; + if ( file_stream != NULL ) + { + fprintf ( file_stream,"%d\n", val ); + fflush (file_stream); + fclose (file_stream); + } +} + +void daemon_log ( const char * filename , const char * str ) +{ + FILE * file_stream = fopen (filename, "a" ) ; + if ( file_stream != NULL ) + { + fprintf ( file_stream,"%s\n", str ); + fflush (file_stream); + fclose (file_stream); + } +} + +/* reads the first line of a file and if it contains a string + * that represents an integer value then return it */ +int daemon_get_file_int ( const char * filename ) +{ + int value = 0 ; + FILE * __stream = fopen ( filename, "r" ); + if ( __stream != NULL ) + { + int rc ; + + char buffer [MAX_CHARS_IN_INT]; + memset(buffer, 0 , MAX_CHARS_IN_INT); + if ( fgets (buffer,MAX_CHARS_IN_INT, __stream) != NULL ) + { + rc = sscanf ( &buffer[0], "%d", &value ); + if ( rc >= 1 ) + { + dlog ("%s contains number %d\n", filename, value ); + } + else + { + wlog ("failed to sscanf integer from file:%s\n", filename ); + } + } + else + { + wlog ("failed to read integer from file:%s\n", filename ); + } + fclose(__stream); + } + else + { + wlog ("failed to open file:%s\n", filename ); + } + return ( value ); +} + +/* reads the first line of a file and returns it as a string */ +string daemon_get_file_str ( const char * filename ) +{ + string value = "null" ; + FILE * __stream = fopen ( filename, "r" ); + if ( __stream != NULL ) + { + int rc ; + + char buffer [MAX_CHARS_ON_LINE]; + char data [MAX_CHARS_ON_LINE]; + memset(buffer, 0 , MAX_CHARS_ON_LINE); + memset(data, 0 , MAX_CHARS_ON_LINE); + if ( fgets (buffer,MAX_CHARS_ON_LINE, __stream) != NULL ) + { + rc = sscanf ( &buffer[0], "%s", &data[0] ); + if ( rc >= 1 ) + { + value = data ; + dlog ("%s contains '%s'\n", filename, value.c_str()); + } + else + { + wlog ("failed to sscanf string from file:%s\n", filename ); + } + } + else + { + wlog ("failed to read string from file:%s\n", filename ); + } + fclose(__stream); + } + else + { + wlog ("failed to open file:%s\n", filename ); + } + return ( value ); +} + +/* Stay here till we get the data we need */ +/* Warning: Don't enable logging here */ +string daemon_nodetype ( void ) +{ + for ( ; ; ) + { + char buffer[BUFFER]; + int line = 0 ; + string nodetype = "unknown" ; + char nodetype_str[BUFFER]; + + memset ( &nodetype_str[0], 0 , BUFFER ); + /* open the configuration file */ + FILE * cfg_file_stream = fopen ( PLATFORM_CONF_FILE, "rb" ); + if ( cfg_file_stream != NULL ) + { + int rc ; + while ( fgets (buffer, BUFFER, cfg_file_stream) != NULL ) + { + char* s = strstr ( buffer, "subfunction"); + if(s!=NULL) + { + rc = sscanf ( &buffer[0], "subfunction=%1023s", &nodetype_str[0] ); + if ( rc == 1 ) + { + nodetype = nodetype_str ; + fclose(cfg_file_stream); + return ( nodetype ) ; + } + } + line++ ; + } + /* Close the file */ + fclose(cfg_file_stream); + } + usleep (5000000); + } + return ( "" ); +} + +/* Read the integer value of rmem_max from /proc/sys/net/core/rmem_max */ +int daemon_get_rmem_max ( void ) +{ + #define RMEM_MAX_VALUE_SIZE (64) + #define RMEM_MAX_FILENAME ((const char *)("/proc/sys/net/core/rmem_max")) + + int value = 0 ; + FILE * __stream = fopen ( RMEM_MAX_FILENAME, "r" ); + if ( __stream != NULL ) + { + int rc ; + + char buffer [RMEM_MAX_VALUE_SIZE]; + memset(buffer, 0 , RMEM_MAX_VALUE_SIZE); + if ( fgets (buffer,RMEM_MAX_VALUE_SIZE, __stream) != NULL ) + { + rc = sscanf ( &buffer[0], "%d", &value ); + if ( rc == 1 ) + { + dlog1 ("mem_max:%d\n", value ); + } + } + fclose(__stream); + } + return ( value ); +} + +/* + * Read the platform.conf file looking for the management_interface label. + * If found return that value as string ; otherwise return empty string. + * + * const char infra_mgmt_label [] = {"management_interface"} ; + * + * Stay here till we get the data we need + * Warning: Don't enable logging here + * + **/ +string daemon_mgmnt_iface ( void ) +{ + for ( ; ; ) + { + char iface_str[BUFFER]; + char buffer [BUFFER]; + int line = 0 ; + string iface = "" ; + + memset ( iface_str, 0 , BUFFER ); + FILE * cfg_file_stream = fopen ( PLATFORM_CONF_FILE, "r" ); + if ( cfg_file_stream != NULL ) + { + int rc ; + while ( fgets (buffer, BUFFER, cfg_file_stream) != NULL ) + { + char* s = strstr ( buffer, "management_interface"); + if(s!=NULL) + { + rc = sscanf ( &buffer[0], "management_interface=%1023s", &iface_str[0] ); + if ( rc == 1 ) + { + iface = iface_str ; + fclose(cfg_file_stream); + // ilog("Mgmnt iface : %s\n", iface.c_str() ); + return ( iface ) ; + } + } + line++ ; + } + /* Close the file */ + fclose(cfg_file_stream); + } + usleep (5000000); + } + ilog("Mgmnt iface : none\n"); + return ( "" ); +} + + +/**************************************************************************** + * + * Name : daemon_system_type + * + * Description: Read the platform.conf file looking for system type system_mode label. + * If found then load and return that content. + * if not found then return an empty string. + * + * Assumptions: Caller is expected to interpret the data. + * + * At time of writing the valid CPE Modes were + * + * - simplex - All In One Controller/Compute (one unit ) + * - duplex - Fully redundant Combined Controller/Compute (two units) + * + *****************************************************************************/ + +#define SYSTEM_TYPE_PREFIX ((const char *)("System Type :")) +system_type_enum daemon_system_type ( void ) +{ + bool system_type_found = false ; + bool system_mode_found = false ; + bool cpe_system = false ; + + system_type_enum system_type = SYSTEM_TYPE__NORMAL ; + + FILE * cfg_file_stream = fopen ( PLATFORM_CONF_FILE, "r" ); + if ( cfg_file_stream != NULL ) + { + char buffer [BUFFER]; + int line = 0 ; + MEMSET_ZERO(buffer); + while ( fgets (buffer, BUFFER, cfg_file_stream) != NULL ) + { + char mode_str[BUFFER]; + MEMSET_ZERO(mode_str); + if ( strstr ( buffer, "system_type") != NULL ) + { + int rc = sscanf ( &buffer[0], "system_type=%1023s", &mode_str[0] ); + if ( rc == 1 ) + { + string mode = mode_str ; + + if ( !mode.empty() ) + { + if (( mode == "CPE" ) || ( mode == "All-in-one")) + { + cpe_system = true ; + } + system_type_found = true ; + } + } + } + else if ( strstr ( buffer, "system_mode") != NULL ) + { + int rc = sscanf ( &buffer[0], "system_mode=%1023s", &mode_str[0] ); + if ( rc == 1 ) + { + string mode = mode_str ; + + if ( !mode.empty() ) + { + if ( mode.compare("duplex") == 0 ) + { + system_mode_found = true ; + system_type = SYSTEM_TYPE__CPE_MODE__DUPLEX ; + } + else if ( mode.compare("duplex-direct") == 0 ) + { + system_mode_found = true ; + system_type = SYSTEM_TYPE__CPE_MODE__DUPLEX_DIRECT ; + } + else if ( mode.compare("simplex") == 0 ) + { + system_mode_found = true ; + system_type = SYSTEM_TYPE__CPE_MODE__SIMPLEX ; + } + else + { + elog ("%s CPE Undetermined\n", SYSTEM_TYPE_PREFIX ); + wlog ("... %s\n", buffer ); + } + } + else + { + elog ("%s CPE Undetermined\n", SYSTEM_TYPE_PREFIX ); + wlog ("... %s\n", buffer ); + } + } + else + { + elog ("%s CPE Undetermined\n", SYSTEM_TYPE_PREFIX ); + wlog ("... %s\n", buffer ); + } + break ; + } + if (( system_type_found == true ) && ( system_mode_found == true )) + break ; + + line++ ; + MEMSET_ZERO(buffer); + } + } + + if ( cfg_file_stream ) + { + /* Close the file */ + fclose(cfg_file_stream); + } + + if (( system_type_found == true ) && ( system_mode_found == true )) + { + if ( !cpe_system ) + { + system_type = SYSTEM_TYPE__NORMAL ; + } + } + else + { + system_type = SYSTEM_TYPE__NORMAL ; + } + + switch ( system_type ) + { + case SYSTEM_TYPE__CPE_MODE__DUPLEX_DIRECT: + { + ilog ("%s Duplex Direct Connect CPE\n", SYSTEM_TYPE_PREFIX ); + break ; + } + case SYSTEM_TYPE__CPE_MODE__DUPLEX: + { + ilog ("%s Duplex CPE\n", SYSTEM_TYPE_PREFIX ); + break ; + } + case SYSTEM_TYPE__CPE_MODE__SIMPLEX: + { + ilog ("%s Simplex CPE\n", SYSTEM_TYPE_PREFIX ); + break ; + } + case SYSTEM_TYPE__NORMAL: + default: + { + ilog("%s Large System\n", SYSTEM_TYPE_PREFIX); + break ; + } + } + return ( system_type ); +} + + +/* ******************************************************************** + * + * Name: daemon_infra_iface + * + * Description: Read the platform.conf file looking for the infra + * interface label. If found return that value as string ; + * otherwise return empty string. + **/ +string daemon_infra_iface ( void ) +{ + char buffer [BUFFER]; + int line = 0 ; + string iface = "" ; + + FILE * cfg_file_stream = fopen ( PLATFORM_CONF_FILE, "r" ); + if ( cfg_file_stream != NULL ) + { + int rc ; + while ( fgets (buffer, BUFFER, cfg_file_stream) != NULL ) + { + char* s = strstr ( buffer, "infrastructure_interface" ); + if(s!=NULL) + { + char iface_str[BUFFER]; + memset ( iface_str, 0 , BUFFER ); + rc = sscanf ( &buffer[0], "infrastructure_interface=%1023s", &iface_str[0] ); + if ( rc == 1 ) + { + iface = iface_str ; + fclose(cfg_file_stream); + // ilog("Infra iface : %s\n", iface.c_str() ); + return ( iface ) ; + } + } + line++ ; + } + /* Close the file */ + fclose(cfg_file_stream); + } + dlog("Infra iface : none\n"); + return ( "" ); +} + +/* ********************************************************************************* + * + * Name : daemon_sw_version + * + * Description: read the /etc/build.info file and extract the SW_VERSION value. + * + * Note: If the value is found surrounded by quotes then they are stripped. + * Note: if the lable is not found then a empty string is returned. + * + * @return SW_VERSION string value + * + * Here is the head of the file with that label. + * + * root@controller-0:/opt# cat /etc/build.info + * + * SW_VERSION="14.08" <----------------------- + * BUILD_TARGET="Unknown" + * BUILD_TYPE="Informal" + * BUILD_ID="n/a" + * + ************************************************************************************/ + +string daemon_sw_version ( void ) +{ + char version_str[BUFFER]; + string version = "" ; + int line = 0 ; + + const char build_info_filename[] = { "/etc/build.info" }; + FILE * fp = fopen (&build_info_filename[0], "r" ) ; + if ( fp ) + { + int rc = 0 ; + char buffer[BUFFER]; + while ( fgets (buffer, BUFFER, fp ) != NULL ) + { + memset ( version_str, 0 , BUFFER ); + char* s = strstr ( buffer, "SW_VERSION"); + if(s!=NULL) + { + rc = sscanf ( &buffer[0], "SW_VERSION=%1023s", &version_str[0] ); + if ( rc == 1 ) + { + /* Chop off the surrounding quotes if they exist */ + if ( version_str[0] == '"' ) + { + /* "12.34" -> 12.34 */ + int len = strlen (version_str); + string temp = version_str ; + version = temp.substr ( 1, len-2 ) ; + } + else + { + version = version_str ; + } + fclose(fp); + return ( version ) ; + } + } + line++ ; + } + /* Close the file */ + fclose(fp); + } + return (""); +} + +/**************************************************************************** + * + * Name : daemon_bmc_hosts_file + * + * Description: Insert the software version into the path to the bmc hosts + * file and return the file path as a string. + * + ****************************************************************************/ +string daemon_bmc_hosts_dir ( void ) +{ + string fn = "/opt/platform/config/" ; + string version = daemon_sw_version(); + if ( !version.empty() ) + { + version.append("/"); + } + fn.append(version); + return(fn); +} +string daemon_bmc_hosts_file ( void ) +{ + string fn = "/opt/platform/config/" ; + string version = daemon_sw_version(); + if ( !version.empty() ) + { + version.append("/"); + } + fn.append(version); + fn.append(BM_DNSMASQ_FILENAME); + return(fn); +} + +/**************************************************************************** + * + * Name : daemon_get_iface_master + * + * Description: Get the master interface name for the supplied + * physical interface. + * + ****************************************************************************/ +char master_interface [BUFFER] ; +char * daemon_get_iface_master ( char * iface_slave_ptr ) +{ + FILE * file_ptr ; + char buffer[BUFFER]; + + string iface_master = "/sys/class/net/" ; + iface_master.append (iface_slave_ptr) ; + iface_master.append ("/master/uevent"); + + /* Create daemon healthcheck file */ + file_ptr = fopen ( iface_master.data(), "r" ) ; + if ( file_ptr != NULL ) + { + while ( fgets ( buffer, BUFFER, file_ptr ) != NULL ) + { + char* s = strstr ( buffer, "INTERFACE"); + if ( s != NULL ) + { + memset ( &master_interface[0], 0, BUFFER ); + int rc = sscanf ( &buffer[0], "INTERFACE=%1023s", + &master_interface[0] ); + if ( rc == 1 ) + { + fclose(file_ptr); + return ( &master_interface[0] ); + } + } + } + fclose(file_ptr); + } + return ( iface_slave_ptr ); +} + +string daemon_md5sum_file ( const char * file ) +{ + struct stat p ; + string md5sum = "" ; + + memset ( &p, 0 , sizeof(struct stat)); + stat ( file, &p ) ; + if ((p.st_ino != 0 ) && (p.st_dev != 0)) + { + /* add 256 bytes to the buffer just in case there are + * additions to the file by the time we start reading it */ + int len = p.st_size+0x100 ; + + char * buf_ptr = (char*)malloc(len) ; + char * buf_ptr_save = buf_ptr ; + if ( buf_ptr ) + { + dlog ("%s is %ld bytes\n", file, p.st_size ); + + /* Open and read the file data */ + FILE * file_ptr = fopen ( file, "r" ) ; + if ( file_ptr != NULL ) + { + size_t l ; + unsigned char digest[MD5_DIGEST_LENGTH]; + char md5str [MD5_STRING_LENGTH]; + memset ( &digest, 0, MD5_DIGEST_LENGTH); + memset ( &md5str, 0, MD5_STRING_LENGTH); + + while ( fgets ( (char*)buf_ptr, BUFFER, file_ptr ) != NULL ) + { + l = strnlen ( buf_ptr_save, len ); + buf_ptr = buf_ptr_save+l ; + } + MD5 ( (unsigned char*)buf_ptr_save, strlen(buf_ptr_save), (unsigned char*)&digest); + + for(int i = 0; i < MD5_DIGEST_LENGTH; i++) + sprintf(&md5str[i*2], "%02x", (unsigned int)digest[i]); + + dlog ("md5 digest: %s\n", md5str ); + dlog3 ("file: %s\n", buf_ptr_save ); + + fclose(file_ptr); + md5sum = md5str ; + } + else + { + elog ("%s file open failed\n", file ); + } + free (buf_ptr_save); + } + else + { + elog ("failed to allocate buffer memory for %s file md5sum calc\n", file ); + } + } + else + { + wlog ("%s file not present\n", file ); + } + return ( md5sum ); +} + +// generate a md5sum signature for the Shadow entry +// returns the password hash and aging in the shadowinfo +string get_shadow_signature ( char * shadowfile , const char * username, + char *shadowinfo, size_t infolen) +{ + char buffer[BUFFER]; + + /* Open the specified file file Create daemon healthcheck file */ + FILE * file_ptr = fopen ( shadowfile, "r" ) ; + + if ( file_ptr != NULL ) + { + /* Clear the buffer - start fresh */ + memset ( &buffer, 0, BUFFER ); + + while ( fgets ( buffer, BUFFER, file_ptr ) != NULL ) + { + char* s = strstr ( buffer, username); + if ( s != NULL ) + { + int result; + char user[BUFFER], password[BUFFER], aging[BUFFER]; + unsigned char digest[MD5_DIGEST_LENGTH]; + char md5str [MD5_STRING_LENGTH]; + + /* Fields are separated by ':'. The first field is the + * user. We need to only isolate the password and aging + * fields since these are the only ones that'd be + * propagated across host nodes. By specifically tracking + * these we prevent config-out-of-date alarms for other fields + */ + + /* + * The following line should be changed to add width limits: + * (However, not changing it yet because of risk.) + * result = sscanf(buffer, "%1023[^:]:%1023[^:]:%*[^:]:%*[^:]:%1023[^:]", + */ + result = sscanf(buffer, "%[^:]:%[^:]:%*[^:]:%*[^:]:%[^:]", + user, password, aging); + if ( result != 3 || strcmp(user, username) != 0 ) + { + /* Sanity */ + continue; + } + + char shadowEntry[BUFFER] = {0}; + snprintf (shadowEntry, sizeof(shadowEntry), + "%s:%s", password, aging); + + int ret = snprintf(shadowinfo, infolen, "%s", shadowEntry); + if (ret >= (int)infolen) + { + elog("insufficient space in shadow buffer(%d) for %d bytes\n", + (int)infolen, ret); + return ( "" ); + } + + memset ( &digest, 0, MD5_DIGEST_LENGTH ); + memset ( &md5str, 0, MD5_STRING_LENGTH ); + MD5 ((unsigned char*) shadowEntry, strlen(shadowEntry), (unsigned char*)&digest); + + for(int i = 0; i < MD5_DIGEST_LENGTH; i++) + sprintf(&md5str[i*2], "%02x", (unsigned int)digest[i]); + + dlog ("user value: %s\n", shadowEntry); + dlog ("md5 digest: %s\n", md5str ); + + fclose(file_ptr); + string md5sum = md5str ; + return ( md5sum ); + } + else + { + /* Clear the buffer - start fresh */ + memset ( &buffer, 0, BUFFER ); + } + } + fclose(file_ptr); + } + return ( "" ); + +} + +/* Introduced for process monitor daemon to allow quiet + * close of the log file before a process respawn */ +void daemon_logfile_close ( void ) +{ + return ; +} + +/* Introduced for process monitor daemon to allow quiet + * re-open of the log file after a respawn of a process */ +void daemon_logfile_open ( void ) +{ + return ; +} + +/**************************************************************************** + * + * Name : daemon_remove_file + * + * Description: Remove the specified file if it exists. + * + *****************************************************************************/ + +void daemon_remove_file ( const char * filename ) +{ + if ( daemon_is_file_present ( filename )) + { + if ( remove(filename) ) + { + elog ("failed to remove file '%s' ; (%d:%m)\n", filename, errno); + } + else + { + if ( daemon_is_file_present ( filename ) ) + { + elog ("failed to remove file '%s' ; still present\n", filename ); + } + else + { + dlog3 ("removed %s\n", filename ); + } + } + } + else + { + dlog3 ("no remove required ; file '%s' not present\n", filename ); + } +} + +/**************************************************************************** + * + * Name : daemon_make_dir + * + * Description: Create the specified full path directory. + * + *****************************************************************************/ + +void daemon_make_dir ( const char * dir ) +{ + struct stat st ; + MEMSET_ZERO(st); + if (stat(dir, &st) == -1) + { + if ( mkdir (dir, 0755) != 0 ) + { + elog ("failed to create directory '%s' ; (%d:%m)\n", dir, errno ); + } + } +} + +/**************************************************************************** + * + * Name : daemon_rename_file + * + * Description: Rename the specified file from old to new name. + * + * Warnings : If the target rename filename exists then it is removed first. + * + *****************************************************************************/ + +void daemon_rename_file ( const char * path, const char * old_filename, const char * new_filename ) +{ + if ( path ) + { + string _old = path ; + _old.append("/"); + _old.append(old_filename); + if ( daemon_is_file_present ( _old.data() ) == true ) + { + string _new = path ; + _new.append("/"); + _new.append(new_filename); + + if ( daemon_is_file_present ( _new.data() ) == true ) + { + dlog ("removing '%s' prior to rename\n", _new.c_str()); + daemon_remove_file ( _new.data() ); + } + if ( rename ( _old.data(), _new.data()) == 0) + { + dlog ("file rename : %s -> %s\n", old_filename, new_filename); + } + else + { + elog ( "Error renaming %s -> %s (%d:%m)\n", old_filename, new_filename, errno ); + } + } + else + { + elog ("rename failed ; '%s' not found\n", _old.c_str()); + } + } +} + +void daemon_remove_pidfile ( void ) +{ + char str [64] ; + sprintf (str, "rm -f %s", pid_filename ); + int rc = system (str); + if ( rc ) + { + wlog ("system command to remove %s returned %d\n", &pid_filename[0], rc ); + } +} + +int daemon_create_pidfile ( void ) +{ + FILE * pid_file_stream = (FILE *)(NULL); + + snprintf ( &pid_filename[0], MAX_FILENAME_LEN, "/var/run/%s.pid", + program_invocation_short_name ); + + /* Create PID file */ + pid_t mypid = getpid(); + + /* Check for another instance running by trying to open in read only mode. + * If it opens then there "may" be another process running. + * If it opens then read the pid and see if that pID exists. + * If it does then this is a duplicate process so exit. */ + pid_file_stream = fopen (&pid_filename[0], "r" ) ; + if ( pid_file_stream ) + { + int rc = 0 ; + pid_t pid = 0 ; + char buffer[BUFFER]; + if ( fgets ( buffer, BUFFER, pid_file_stream) != NULL ) + { + rc = sscanf ( &buffer[0], "%d", &pid ); + if ( rc == 1 ) + { + rc = kill ( pid, 0 ); + if ( rc == 0 ) + { + syslog ( LOG_INFO, "Refusing to start duplicate process (pid:%d)\n", pid); + fclose (pid_file_stream); + daemon_files_fini (); + exit (0); + } + } + } + } + + if ( pid_file_stream ) + fclose (pid_file_stream); + + /* if we got here then we are ok to run */ + pid_file_stream = fopen (pid_filename, "w" ) ; + + if ( pid_file_stream == NULL ) + { + syslog ( LOG_ERR, "Failed to open or create %s\n", pid_filename); + return ( FAIL_PID_OPEN ); + } + else if (!fprintf (pid_file_stream,"%d", mypid)) + { + syslog ( LOG_ERR, "Failed to write pid file for %s\n", pid_filename ); + fclose ( pid_file_stream ) ; + return ( FAIL_FILE_WRITE ) ; + } + fflush (pid_file_stream); + fclose (pid_file_stream); + return (PASS); +} + +string daemon_read_file ( const char * filename ) +{ + string data = "" ; + int lines = 0 ; + if ( daemon_is_file_present ( filename ) == true ) + { + FILE * _stream = fopen ( filename, "r" ); + if ( _stream ) + { + char buffer [BUFFER]; + MEMSET_ZERO(buffer); + while ( fgets (buffer, BUFFER, _stream) ) + { + data.append(buffer); + if ( ++lines > 100 ) + { + wlog ("%s file to big ; aborting\n", filename ); + break ; + } + MEMSET_ZERO(buffer); + } + fclose (_stream); + } + } + return data ; +} + +/*************************************************************************** + * + * Don't return from this call until the specified file exists + * or the timeout is exceeded. In the timeout case a FAIL_TIMEOUT + * is returned. + * + * Warning: Timeout timer is not yet implemented + * + ***************************************************************************/ +int daemon_wait_for_file ( const char * filename, int timeout ) +{ + UNUSED(timeout) ; + if ( filename ) + { + int count = 0 ; + for ( ; ; ) + { + daemon_signal_hdlr (); + if ( daemon_is_file_present( filename )) + return PASS ; + sleep (MTC_SECS_2); + wlog_throttled ( count, 60, "Waiting for %s\n", filename); + } + } + return (FAIL_TIMEOUT); +} + +int daemon_files_init ( void ) +{ + /* Create PID file */ + pid_t mypid = getpid(); + ilog ("--- Daemon Start-Up --- pid:%d\n", mypid); + daemon_init_fit (); + return ( PASS ); +} + +/***************************************************************************************** + * + * ####### ### ####### ##### # # ###### ###### ####### ###### ####### + * # # # # # # # # # # # # # # # # + * # # # # # # # # # # # # # # # + * ##### # # ##### # # ###### ###### # # ###### # + * # # # # # # # # # # # # # + * # # # # # # # # # # # # # # + * # ### # ##### ##### # # ####### # # # + * + *****************************************************************************************/ + +#ifdef WANT_FIT_TESTING +static daemon_fit_type __fit_info ; +#endif + +#ifdef WANT_HIT_THROTTLE +int throttle_max ; +int throttle_count ; +#endif + +void daemon_print_fit ( void ) +{ +#ifdef WANT_FIT_TESTING + if ( __fit_info.code ) + { + if ( !daemon_get_cfg_ptr()->testmode ) + { + slog ("%s FIT Mode Not Enabled ; need to enable testmode\n", + __fit_info.host.c_str()); + + daemon_init_fit ( ); + } + else if ( __fit_info.hits == 0 ) + { + daemon_init_fit (); + } + else + { + slog ("%s FIT Add:%d '%s' with '%s' %d times\n", + __fit_info.host.empty() ? "any" : __fit_info.host.c_str(), + __fit_info.code, + __fit_info.name.empty() ? "n/a" : __fit_info.name.c_str(), + __fit_info.data.empty() ? "n/a" : __fit_info.data.c_str(), + __fit_info.hits); + } + } +#endif +} + +void daemon_hits_fit ( int hits ) +{ +#ifdef WANT_FIT_TESTING + __fit_info.hits += hits ; + daemon_print_fit (); +#else + UNUSED(hits); +#endif +} + +void daemon_init_fit ( void ) +{ +#ifdef WANT_FIT_TESTING + ilog ("FIT Inactive\n"); + __fit_info.code = 0 ; + __fit_info.host.clear() ; + __fit_info.name.clear() ; + __fit_info.proc.clear() ; + __fit_info.data.clear() ; + __fit_info.hits = 0 ; + + /* Indicate that the fit is unloaded */ + if ( daemon_is_file_present ( "/var/run/fit") ) + daemon_log ( "/var/run/fit/fitdone", "done" ); + +#endif +} + +/* called by processes that don't match the fit proc name */ +void daemon_ignore_fit ( void ) +{ +#ifdef WANT_FIT_TESTING + __fit_info.code = 0 ; + __fit_info.host.clear() ; + __fit_info.name.clear() ; + __fit_info.proc.clear() ; + __fit_info.data.clear() ; + __fit_info.hits = 0 ; + +#endif +} + +void daemon_handle_hit ( void ) +{ +#ifdef WANT_FIT_TESTING + if ( __fit_info.code ) + { + --__fit_info.hits ; + +#ifdef WANT_HIT_THROTTLE + ilog_throttled (throttle_count, throttle_max, "%s FIT Hit:%d '%s' with '%s' %d times remaining\n", + __fit_info.host.empty() ? "any" : __fit_info.host.c_str(), + __fit_info.code, + __fit_info.name.empty() ? "n/a" : __fit_info.name.c_str(), + __fit_info.data.empty() ? "n/a" : __fit_info.data.c_str(), + __fit_info.hits); +#else + slog ("%s FIT Hit:%d '%s' with '%s' %d times remaining\n", + __fit_info.host.empty() ? "any" : __fit_info.host.c_str(), + __fit_info.code, + __fit_info.name.empty() ? "n/a" : __fit_info.name.c_str(), + __fit_info.data.empty() ? "n/a" : __fit_info.data.c_str(), + __fit_info.hits); +#endif + if ( __fit_info.hits == 0 ) + daemon_ignore_fit (); + else + daemon_log_value ( "/var/run/fit/fithits", "hits =", __fit_info.hits ); + } +#endif +} + + +/* Read the fit file if its present and load its fit info */ +int daemon_load_fit ( void ) +{ +#ifdef WANT_FIT_TESTING + if ( __fit_info.code ) + { + return (PASS); + } + + if ( daemon_is_file_present ( FIT__INIT_FILE ) == true ) + { + daemon_rename_file ( FIT__INIT_FILEPATH, FIT__INIT_FILENAME, FIT__INIT_FILENAME_RENAMED ); + daemon_init_fit (); + } + + if ( daemon_is_file_present ( FIT__INFO_FILE ) == false ) return (PASS); + + bool correct_process = false ; + bool valid_code = false ; + + FILE * _stream = fopen ( FIT__INFO_FILE, "r" ); + if ( _stream ) + { + char buffer [BUFFER]; + memset (buffer, 0 , BUFFER ); + while ( fgets (buffer, BUFFER, _stream) ) + { + if (( correct_process == false ) && + ( strstr ( buffer, "proc=" ))) + { + char _str[BUFFER]; + memset ( _str, 0 , BUFFER ); + int chars = 0 ; + if ( ( chars = sscanf ( &buffer[0], "proc=%1023s", &_str[0] ) ) == 1 ) + { + string proc = program_invocation_short_name ; + if ( proc.compare(_str) == 0 ) + { + __fit_info.proc = proc ; + correct_process = true ; + } + else + { + break ; + } + } + else + { + daemon_ignore_fit (); + // ilog ("%d [%s:%s]\n", chars, program_invocation_short_name , _str); + } + } + else if ( strstr ( buffer, "code=" ) ) + { + if ( sscanf ( &buffer[0], "code=%d", &__fit_info.code ) == 1 ) + { + valid_code = true ; + } + } + else if ( strstr ( buffer, "hits=" ) ) + { + if ( sscanf ( &buffer[0], "hits=%d", &__fit_info.hits ) == 1 ) + { +#ifdef WANT_HIT_THROTTLE + throttle_count = 0 ; + + if ( __fit_info.hits > 10 ) + throttle_max = 10 ; + else + throttle_max = 1 ; +#else + ; +#endif + } + } + else if ( strstr ( buffer, "host=" ) ) + { + char _str1[MAX_CHARS_HOSTNAME+1]; + char _str2[MAX_CHARS_HOSTNAME+1]; + char _str3[MAX_CHARS_HOSTNAME+1]; + char _str4[MAX_CHARS_HOSTNAME+1]; + memset ( _str1, 0 , MAX_CHARS_HOSTNAME+1 ); + memset ( _str2, 0 , MAX_CHARS_HOSTNAME+1 ); + memset ( _str3, 0 , MAX_CHARS_HOSTNAME+1 ); + memset ( _str4, 0 , MAX_CHARS_HOSTNAME+1 ); + int rc = sscanf ( &buffer[0], "host=%32s %32s %32s %32s", &_str1[0], &_str2[0], &_str3[0], &_str4[0] ); + if ( rc ) + { + __fit_info.host = _str1 ; + if ( rc > 1 ) + { + __fit_info.host.append(" "); + __fit_info.host.append(_str2 ); + } + if ( rc > 2 ) + { + __fit_info.host.append(" "); + __fit_info.host.append(_str3); + } + if ( rc > 3 ) + { + __fit_info.host.append(" "); + __fit_info.host.append(_str4); + } + } + } + else if ( strstr ( buffer, "name=" ) ) + { + char _str1[60]; + char _str2[60]; + char _str3[60]; + memset ( _str1, 0 , 60 ); + memset ( _str2, 0 , 60 ); + memset ( _str3, 0 , 60 ); + int rc = sscanf ( &buffer[0], "name=%59s %59s %59s", &_str1[0], &_str2[0], &_str3[0] ); + if ( rc ) + { + __fit_info.name = _str1 ; + if ( rc > 1 ) + { + __fit_info.name.append(" "); + __fit_info.name.append(_str2 ); + } + if ( rc > 2 ) + { + __fit_info.name.append(" "); + __fit_info.name.append(_str3); + } + } + } + else if ( strstr ( buffer, "data=" ) ) + { + char _str[BUFFER]; + memset ( _str, 0 , BUFFER ); + if ( sscanf ( &buffer[0], "data=%1023s", &_str[0] ) == 1 ) + { + __fit_info.data = _str ; + } + } + memset (buffer, 0 , BUFFER ); + } /* end while */ + fclose(_stream); + + } + if ( !correct_process ) + { + return (PASS); + } + + daemon_print_fit (); + + if ( !valid_code ) + { + elog ( "FIT file parse error (%d)\n", valid_code ); + daemon_init_fit (); + } + else + { + daemon_log_value ( "/var/run/fit/fithits", "hits =", __fit_info.hits ); + daemon_remove_file ( "/var/run/fit/fitdone" ); + daemon_rename_file ( FIT__INFO_FILEPATH, FIT__INFO_FILENAME, FIT__INFO_FILENAME_RENAMED ); + } + +#endif + return (PASS); +} + +/* Check for fault insertion */ +bool daemon_want_fit ( int code ) +{ +#ifdef WANT_FIT_TESTING + if ( __fit_info.hits > 0) + { + if ( daemon_get_cfg_ptr()->testmode ) + { + if ( __fit_info.code ) + { + //ilog ("hits %d code %d:%d\n", __fit_info.hits, __fit_info.code, code); + if ( __fit_info.code == code ) + { + daemon_handle_hit (); + return (true) ; + } + } + } + } +#else + UNUSED(code); +#endif + return (false); +} + +bool daemon_want_fit ( int code, string host ) +{ +#ifdef WANT_FIT_TESTING + if ( __fit_info.hits > 0 ) + { + if ( daemon_get_cfg_ptr()->testmode ) + { + if ( __fit_info.code == code ) + { + //ilog ("%s:%s hits %d code %d:%d\n", host.c_str(), __fit_info.host.c_str() , __fit_info.hits, __fit_info.code, code); + if (( __fit_info.code == code ) && + ( __fit_info.host.find(host) != std::string::npos )) + { + daemon_handle_hit (); + return (true) ; + } + } + } + } +#else + UNUSED(code); + UNUSED(host); +#endif + return (false); +} + +bool daemon_want_fit ( int code, string host, string name ) +{ +#ifdef WANT_FIT_TESTING + if ( __fit_info.hits > 0 ) + { + if ( daemon_get_cfg_ptr()->testmode ) + { + if ( __fit_info.code == code ) + { + //ilog ("%s:%s <%s:%s> hits %d code %d:%d\n", host.c_str(), __fit_info.host.c_str(), name.c_str(), __fit_info.name.c_str(), __fit_info.hits, __fit_info.code, code); + if (( __fit_info.code == code ) && + (( __fit_info.host.find(host) != std::string::npos ) || ( host == "any" )) && + ( __fit_info.name.find(name) != std::string::npos )) + { + daemon_handle_hit (); + return (true) ; + } + } + } + } +#else + UNUSED(code); + UNUSED(host); + UNUSED(name); +#endif + return (false); +} + +bool daemon_want_fit ( int code, string host, string name, string & data ) +{ +#ifdef WANT_FIT_TESTING + if ( __fit_info.hits > 0 ) + { + if ( daemon_get_cfg_ptr()->testmode ) + { + if ( __fit_info.code == code ) + { + //ilog ("%s:%s <%s:%s> hits %d code %d:%d\n", host.c_str(), __fit_info.host.c_str(), name.c_str(), __fit_info.name.c_str(), __fit_info.hits, __fit_info.code, code); + if (( __fit_info.code == code ) && + (( __fit_info.host.find(host) != std::string::npos ) || ( host == "any" )) && + ( __fit_info.name.find(name) != std::string::npos)) + { + data = __fit_info.data ; + daemon_handle_hit (); + return (true) ; + } + } + } + } +#else + UNUSED(code); + UNUSED(host); + UNUSED(name); + UNUSED(data); +#endif + return (false); +} + diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_ini.cpp b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_ini.cpp new file mode 100755 index 00000000..e031b5d5 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_ini.cpp @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/* inih -- simple .INI file parser + +inih is released under the New BSD license (see LICENSE.txt). Go to the project +home page for more info: + +http://code.google.com/p/inih/ + +*/ + + +#include "daemon_ini.h" + +#include "logMacros.h" +#include "returnCodes.h" + +#define MAX_SECTION 50 +#define MAX_NAME 50 + +/* Strip whitespace chars off end of given string, in place. Return s. */ +static char* rstrip(char* s) +{ + char* p = s + strlen(s); + while (p > s && isspace((unsigned char)(*--p))) + *p = '\0'; + return s; +} + +/* Return pointer to first non-whitespace char in given string. */ +static char* lskip(const char* s) +{ + while (*s && isspace((unsigned char)(*s))) + s++; + return (char*)s; +} + +/* Return pointer to first char c or ';' comment in given string, or pointer to + null at end of string if neither found. ';' must be prefixed by a whitespace + character to register as a comment. */ +static char* find_char_or_comment(const char* s, char c) +{ + int was_whitespace = 0; + while (*s && *s != c && !(was_whitespace && *s == ';')) { + was_whitespace = isspace((unsigned char)(*s)); + s++; + } + return (char*)s; +} + +/* Version of strncpy that ensures dest (size bytes) is null-terminated. */ +static char* strncpy0(char* dest, const char* src, size_t size) +{ + strncpy(dest, src, size); + dest[size - 1] = '\0'; + return dest; +} + + +int ini_get_config_value ( const char * filename, + string section, + string name, + string & value, + bool maybe_missing ) +{ + FILE* file; + int rc = -1 ; + + dlog2 ("config file: %s\n", filename ); + + file = fopen(filename, "r"); + if (file) + { + char _line[INI_MAX_LINE]; + + char* _start; + char* _end; + char* _name; + char* _value; + + int _lineno = 0; + bool in_section = false ; + + /* Scan through file line by line */ + while (fgets(_line, INI_MAX_LINE, file) != NULL) + { + _lineno++; + _start = _line ; + _start = lskip(rstrip(_start)); + + /* skip '#' or ';' comments at start of line */ + if ( ( *_start != ';' ) && ( *_start != '#') ) + { + if (( *_start == '[') && ( *(_start+1) != ']' )) + { + string _section = "[" ; + _section.append(section); + _section.append("]"); + + string line_string = _line ; + if ( line_string.find(_section) != std::string::npos ) + { + dlog3 ("Section: %s (line:%s)\n", _section.c_str(), line_string.c_str()); + in_section = true ; + + } + else + { + if ( in_section == true ) + { + in_section = false ; + } + } + } + else if ( in_section == false ) + { + ; /* keep looking for specified section */ + } + /* fields are delimited by a ';' + * field = value ; */ + else if (*_start && *_start != ';') + { + // dlog ("Line: %s\n", _line ); + + /* Not a comment, must be a name[=:]value pair */ + _end = find_char_or_comment(_start, '='); + if (*_end != '=') + { + _end = find_char_or_comment(_start, ':'); + } + if (*_end == '=' || *_end == ':') + { + *_end = '\0'; + _name = rstrip(_start); + + // dlog ("Name: %s\n", _name ); + + _value = lskip(_end + 1); + _end = find_char_or_comment(_value, '\0'); + if (*_end == ';') + { + *_end = '\0'; + } + rstrip(_value); + + + /* if the label match then sae the value and exit */ + if ( !name.compare(_name) ) + { + value = _value ; + dlog2 ("key:value - %s:%s\n", name.c_str(), value.c_str() ); + fclose(file); + return (PASS); + } + } + } + } + } + fclose(file); + } + if ( maybe_missing == false ) + { + wlog ("Failed to find label '%s' in section '[%s]'\n", name.c_str(), section.c_str() ); + } + else + { + rc = PASS ; + } + return (rc) ; +} + +/* See documentation in header file. */ +int ini_parse_file(FILE* file, + int (*handler)(void*, const char*, const char*, + const char*), + void* user) +{ + /* Uses a fair bit of stack (use heap instead if you need to) */ +#if INI_USE_STACK + char line[INI_MAX_LINE]; +#else + char* line; +#endif + char section[MAX_SECTION] = ""; + char prev_name[MAX_NAME] = ""; + + char* start; + char* end; + char* name; + char* value; + int lineno = 0; + int error = 0; + +#if !INI_USE_STACK + line = (char*)malloc(INI_MAX_LINE+1); + if (!line) { + return -2; + } +#endif + + /* Scan through file line by line */ + while (fgets(line, INI_MAX_LINE, file) != NULL) { + lineno++; + + start = line; +#if INI_ALLOW_BOM + if (lineno == 1 && (unsigned char)start[0] == 0xEF && + (unsigned char)start[1] == 0xBB && + (unsigned char)start[2] == 0xBF) { + start += 3; + } +#endif + start = lskip(rstrip(start)); + + if (*start == ';' || *start == '#') { + /* Per Python ConfigParser, allow '#' comments at start of line */ + } +#if INI_ALLOW_MULTILINE + else if (*prev_name && *start && start > line) { + /* Non-black line with leading whitespace, treat as continuation + of previous name's value (as per Python ConfigParser). */ + if (!handler(user, section, prev_name, start) && !error) + error = lineno; + } +#endif + else if (*start == '[') { + /* A "[section]" line */ + end = find_char_or_comment(start + 1, ']'); + if (*end == ']') { + *end = '\0'; + strncpy0(section, start + 1, sizeof(section)); + *prev_name = '\0'; + } + else if (!error) { + /* No ']' found on section line */ + error = lineno; + } + } + else if (*start && *start != ';') { + /* Not a comment, must be a name[=:]value pair */ + end = find_char_or_comment(start, '='); + if (*end != '=') { + end = find_char_or_comment(start, ':'); + } + if (*end == '=' || *end == ':') { + *end = '\0'; + name = rstrip(start); + value = lskip(end + 1); + end = find_char_or_comment(value, '\0'); + if (*end == ';') + *end = '\0'; + rstrip(value); + + /* Valid name[=:]value pair found, call handler */ + strncpy0(prev_name, name, sizeof(prev_name)); + if (!handler(user, section, name, value) && !error) + error = lineno; + } + else if (!error) { + /* No '=' or ':' found on name[=:]value line */ + error = lineno; + } + } + } + +#if !INI_USE_STACK + free(line); +#endif + + return error; +} + +/* See documentation in header file. */ +int ini_parse(const char* filename, + int (*handler)(void*, const char*, const char*, const char*), + void* user) +{ + FILE* file; + int error; + + file = fopen(filename, "r"); + if (!file) + return -1; + error = ini_parse_file(file, handler, user); + fclose(file); + return error; +} diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_ini.h b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_ini.h new file mode 100755 index 00000000..c5b27a71 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_ini.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2013-2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/* inih -- simple .INI file parser + +inih is released under the New BSD license (see LICENSE.txt). Go to the project +home page for more info: + +http://code.google.com/p/inih/ + +*/ + +#ifndef __INI_H__ +#define __INI_H__ + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +/* Parse given INI-style file. May have [section]s, name=value pairs + (whitespace stripped), and comments starting with ';' (semicolon). Section + is "" if name=value pair parsed before any section heading. name:value + pairs are also supported as a concession to Python's ConfigParser. + + For each name=value pair parsed, call handler function with given user + pointer as well as section, name, and value (data only valid for duration + of handler call). Handler should return nonzero on success, zero on error. + + Returns 0 on success, line number of first error on parse error (doesn't + stop on first error), -1 on file open error, or -2 on memory allocation + error (only when INI_USE_STACK is zero). +*/ +int ini_parse(const char* filename, + int (*handler)(void* user, const char* section, + const char* name, const char* value), + void* user); + +/* Same as ini_parse(), but takes a FILE* instead of filename. This doesn't + close the file when it's finished -- the caller must do that. */ +int ini_parse_file(FILE* file, + int (*handler)(void* user, const char* section, + const char* name, const char* value), + void* user); + +int ini_get_config_value ( const char * filename, + string section, + string name, + string & value, + bool maybe_missing ); + +/* Nonzero to allow multi-line value parsing, in the style of Python's + ConfigParser. If allowed, ini_parse() will call the handler with the same + name for each subsequent line parsed. */ +#ifndef INI_ALLOW_MULTILINE +#define INI_ALLOW_MULTILINE 1 +#endif + +/* Nonzero to allow a UTF-8 BOM sequence (0xEF 0xBB 0xBF) at the start of + the file. See http://code.google.com/p/inih/issues/detail?id=21 */ +#ifndef INI_ALLOW_BOM +#define INI_ALLOW_BOM 1 +#endif + +/* Nonzero to use stack, zero to use heap (malloc/free). */ +#ifndef INI_USE_STACK +#define INI_USE_STACK 1 +#endif + +/* Maximum line length for any line in INI file. */ +#ifndef INI_MAX_LINE +#define INI_MAX_LINE 1000 +#endif + + +#ifdef MATCH +#undef MATCH +#endif +#define MATCH(s, n) strcmp(section, s) == 0 && strcmp(name, n) == 0 + +#endif /* __INI_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_main.cpp b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_main.cpp new file mode 100755 index 00000000..9c2bb190 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_main.cpp @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Maintenance Main Implementation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define EXIT_SUCCESS 0 +#define EXIT_FAILURE 1 + +using namespace std; + +bool testing = false ; + +/** Feature Utility Includes */ +#include "daemon_option.h" +#include "daemon_common.h" +#include "nodeBase.h" +#include "nodeUtil.h" /* for ... mem_log_list_init */ + +/** + * Cache a copy of the current hostname. + * Use these set/get interfaces to set and retrieve it + */ +static char this_hostname [MAX_CHARS_HOSTNAME+2]; + +char * _hn ( void ) +{ + return(&this_hostname[0]); +} + +void set_hn ( char * hn ) +{ + if ( hn ) + snprintf ( &this_hostname[0], MAX_CHARS_HOSTNAME+1, "%s", hn ); + else + snprintf ( &this_hostname[0], MAX_CHARS_HOSTNAME+1, "%s", "localhost" ); +} + +static opts_type opts ; /**< The allocated memory for Daemon run options */ + +opts_type * daemon_get_opts_ptr ( void ) +{ + return(&opts); +} + +bool ltc ( void ) +{ + return(opts.front); +} + +void print_help ( void ) +{ + printf ("\nUsage: options ...\n"); + printf ("\t-h --help - Display this usage information\n"); + printf ("\t-a --active - Specify service as Active\n"); + printf ("\t-d --debug <0..15> - Enter specified debug level\n"); + printf ("\t-f --foreground - Run in foreground\n"); + printf ("\t-l --log - Log to file ; /var/log/.log\n"); + printf ("\t-p --passive - Passive mode ; do not act on failures\n"); + printf ("\t-v --verbose - Show command line arguments\n"); + printf ("\t-t --test - Run Test Head\n"); + printf ("\t-g --gap - Gap in seconds\n"); + printf ("\t-m --mode - Word string representing a run mode\n"); + printf ("\t-n --number - A number\n"); + printf ("\t-p --password - Password\n"); + printf ("\t-c --command - Command\n"); + printf ("\t-u --username - Username\n"); + printf ("\t-i --ipaddr - Ipaddr\n"); + printf ("\n" ); + exit (0); +} + +int daemon_get_run_option ( const char * option ) +{ + if ( !strcmp ( option, "test" ) ) + return opts.test ; + else if ( !strcmp ( option, "active" ) ) + return opts.active ; + else if ( !strcmp ( option, "log" ) ) + { + /* no logging in foreground mode + * but always log otherwise */ + if ( opts.front ) + { + return opts.log ; + } + return (1); + } + else if ( !strcmp ( option, "front" ) ) + return opts.front ; + + return 0 ; +} + +void opts_init ( void) +{ + opts.help = false ; + opts.log = false ; + opts.test = false ; + opts.verbose = false ; + opts.active = false ; + opts.front = false ; + opts.front = false ; + opts.command = "" ; + opts.username= "" ; + opts.password= "" ; + opts.ipaddr = "" ; + opts.mode = "" ; + opts.number = 0 ; + opts.delay = 0 ; +} + +int daemon_set_cfg_option ( const char * option , int value ) +{ + int rc = PASS ; + + daemon_config_type * cfg_ptr = daemon_get_cfg_ptr (); + + if ( !strcmp ( option, "active" ) ) + cfg_ptr->active = value ; + else + rc = FAIL ; + + return (rc); +} + + +int parseArg ( int argc, char * argv[], opts_type * opts_ptr ) +{ + int arg_count = 0 ; + int next_option = 0 ; + int cmd_arg_count = 1 ; /* command args start at 1 */ + + /* A string listing of valid short options letters. */ + const char* const short_options = "u:c:p:g:i:m:n:d:hlfpvta"; + + /* An array listing of valid long options. */ + const struct option long_options[] = + { + { "debug" , 1, NULL, 'd' }, + { "gap" , 1, NULL, 'g' }, + { "mode" , 1, NULL, 'm' }, + { "number" , 1, NULL, 'n' }, + { "ipaddr" , 1, NULL, 'i' }, + { "command" , 1, NULL, 'c' }, + { "password" , 1, NULL, 'p' }, + { "username" , 1, NULL, 'u' }, + { "help" , 0, NULL, 'h' }, + { "active" , 0, NULL, 'a' }, + { "foreground", 0, NULL, 'f' }, + { "log" , 0, NULL, 'l' }, + { "verbose" , 0, NULL, 'v' }, + { "test" , 0, NULL, 't' }, + { NULL , 0, NULL, 0 } /* Required at end of array. */ + }; + + do + { + next_option = getopt_long (argc, argv, short_options, long_options, NULL); + arg_count++ ; + switch (next_option) + { + case -1: /* Done with options */ + { + break ; + } + case 'f': /* -f or --foreground */ + { + opts_ptr->front = true ; + cmd_arg_count++ ; + break ; + } + case 'g': /* -g or --gap */ + { + opts_ptr->delay = atoi(optarg) ; + cmd_arg_count++ ; + break; + } + + case 'l': /* -l or --log */ + { + opts_ptr->log = true ; + cmd_arg_count++ ; + break ; + } + case 'h': /* -h or --help */ + { + opts_ptr->help = true ; + cmd_arg_count++ ; + return ( PASS ) ; + } + case 'd': /* -p or --debug */ + { + opts_ptr->debug = atoi(optarg) ; + cmd_arg_count++ ; + break; + } + case 'm': /* -u or --mode */ + { + opts_ptr->mode = optarg ; + cmd_arg_count++ ; + break; + } + case 'n': /* -p or --number */ + { + opts_ptr->number = atoi(optarg) ; + cmd_arg_count++ ; + break; + } + case 'p': /* -p or --password */ + { + opts_ptr->password = optarg ; + cmd_arg_count++ ; + break; + } + case 'u': /* -u or --username */ + { + opts_ptr->username = optarg ; + cmd_arg_count++ ; + break; + } + case 'c': /* -c or --command */ + { + opts_ptr->command = optarg ; + cmd_arg_count++ ; + break; + } + case 'i': /* -i or --ipaddr */ + { + opts_ptr->ipaddr = optarg; + cmd_arg_count++ ; + break; + } + case 't': /* -t or --test */ + { + opts_ptr->test = true ; + cmd_arg_count++ ; + break; + } + case 'v': /* -t or --verbose */ + { + opts_ptr->verbose = true ; + cmd_arg_count++ ; + break; + } + case 'a': /* -a or --active */ + { + opts_ptr->active = true ; + cmd_arg_count++ ; + break; + } + case '?': + default: /* Something else: unexpected */ + { + printf ("Unsupported option (%c)\n", next_option ); + opts_ptr->help = true ; + return ( cmd_arg_count ); + } + } + } while (next_option != -1); + + if (opts_ptr->verbose) + { + int i ; + + for ( i = 0 ; i < argc; ++i) + printf ("Arg [%d]: %s\n", i, argv[i]); + + printf ("\n"); + } + return ( cmd_arg_count ) ; +} + + +static void daemonize(void) +{ + pid_t pid, sid; + int fd; + + /* already a daemon */ + if ( getppid() == 1 ) + return; + + /* Fork off the parent process */ + pid = fork(); + if (pid < 0) + { + exit(EXIT_FAILURE); + } + + if (pid > 0) + { + exit(EXIT_SUCCESS); /*Killing the Parent Process*/ + } + + /* At this point we are executing as the child process */ + + /* Create a new SID for the child process */ + sid = setsid(); + if (sid < 0) + { + exit(EXIT_FAILURE); + } + + /* Change the current working directory. */ + if ((chdir("/")) < 0) + { + exit(EXIT_FAILURE); + } + + fd = open("/dev/null",O_RDWR, 0); + + if (fd != -1) + { + dup2 (fd, STDIN_FILENO); + dup2 (fd, STDOUT_FILENO); + dup2 (fd, STDERR_FILENO); + + if (fd > 2) + { + close (fd); + } + } + + /* File Creation Mask */ + umask(022); +} + +int main(int argc, char *argv[]) +{ + int rc = FAIL ; + + set_hn (NULL); + + /* Manually Zero the main service structs */ + opts_init (); + + /* Parse the argument list */ + parseArg ( argc, argv, &opts ); + + if ( opts.help ) + { + print_help ( ); + exit (0) ; + } + + /* lets turn this process into an independent daemon */ + if ( opts.front != true ) + { + daemonize (); + } + + daemon_health_test (); + daemon_create_pidfile (); + if ( !opts.front ) + { + open_syslog(); + } + + mem_log_list_init ( ); + + /* Init the daemon config structure */ + daemon_config_default ( daemon_get_cfg_ptr() ); + + /* get the management interface */ + string iface = daemon_mgmnt_iface (); + + /* get the node type */ + string nodet = daemon_nodetype (); + + rc = daemon_init ( iface, nodet ); + if ( rc != PASS ) + { + elog ("Initialization failed (rc=%d)\n", rc); + rc = FAIL_DAEMON_INIT ; + } + else if ( rc == PASS ) + { + if ( opts.username.size() ) + { + ilog ("Username : %s\n", opts.username.c_str()); + } + if ( opts.ipaddr.size()) + { + ilog ("IP Addr : %s\n", opts.ipaddr.c_str()); + } + if ( opts.password.size()) + { + ilog ("Password : %s\n", opts.password.c_str()); + } + if ( opts.command.size()) + { + ilog ("Command : %s\n", opts.command.c_str()); + } + + if ( opts.test ) + { + printf ("Enabling Test Mode\n"); + testing = true ; + } + ilog ("Build Date : %s\n", BUILDINFO); + ilog ("------------------------------------------------------\n"); + + /* Call the test head if test mode is selected. + * Otherwise call the main service. */ + if ( opts.test ) + daemon_run_testhead ( ); + else + { + daemon_load_fit (); + daemon_service_run ( ); + } + } + exit (rc) ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_option.h b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_option.h new file mode 100755 index 00000000..a4924b50 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_option.h @@ -0,0 +1,64 @@ +#if !defined(_MTC_DAEMON_H__) +#define _MTC_DAEMON_H__ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Maintenance Main Implementation + */ + +/** + * @addtogroup daemon_main + * @{ + */ + + +#include +#include + +using namespace std ; + +/** + * Daemon development run options structure + */ +typedef struct +{ + int help ; /**< Display daemon options help */ + int log ; /**< Request log to file */ + int test ; /**< Enable test mode */ + int info ; /**< Dump data module info */ + int verbose ; /**< Dump command line options */ + int active ; /**< Set daemon active */ + int debug ; /**< Set tracing debug mode "debug,"test","info","trace" */ + int front ; /**< run in the foreground ; do not daemonize */ + int number ; /**< a number option - loops */ + int delay ; /**< a number option - loops */ + string mode ; /**< specify a mode as a string ' i.e. shell mode*/ + string ipaddr ; + string username ; + string command ; + string password ; +} opts_type ; + +opts_type * daemon_get_opts_ptr ( void ); + +/** Returns the value of a specified run option + * + * @param option + * pointer to a run option string ; debug, test, info, trace + * @return + * the run option value + */ +int daemon_get_run_option ( const char * option ); + + +/** + * @} daemon_main + */ + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_signal.cpp b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_signal.cpp new file mode 100755 index 00000000..10439b2d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_signal.cpp @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Maintenance Daemon Signal Handling + */ +#include +#include +#include +#include +#include +#include + +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "sig" + +#include "daemon_common.h" /* Common Daemon Structs and Utils */ +#include "nodeBase.h" /* Common Definitions */ +#include "nodeUtil.h" /* Common Definitions */ + +/* Flag indicating that the signal handler is initialized */ +static bool __signal_init_done = false ; + +/* Flag indicating that there is an active signal assertion */ +static bool __signal_assertion = false ; + +/* Flag to control the enabled state of the latency monitor + * - default = disabled (false) */ +static bool __signal_want_latency_monitor = false; + +static unsigned long long __signal_prev_time = 0 ; +static unsigned long long __signal_this_time = 0 ; + +/* Signal specific flag assertions */ +static bool __signal_sigchld_assertion = false ; +static bool __signal_sigusr1_assertion = false ; +static bool __signal_sigusr2_assertion = false ; +static bool __signal_sigint_assertion = false ; +static bool __signal_sigterm_assertion = false ; +static bool __signal_sigkill_assertion = false ; +static bool __signal_sigabrt_assertion = false ; +static bool __signal_sighup_assertion = false ; +static bool __signal_sigcont_assertion = false ; +static bool __signal_sigstp_assertion = false ; +static bool __signal_sigpipe_assertion = false ; +static bool __signal_unsupported = false ; + +/* List of supported signals */ +#define MAX_SIGNALS 9 +static const int signals [MAX_SIGNALS]={ SIGTERM, SIGINT, SIGUSR1, SIGUSR2, SIGHUP, SIGTSTP, SIGCHLD, SIGABRT, SIGPIPE}; +static const char signal_names [MAX_SIGNALS][5] = {"TERM", "INT","USR1","USR2","HUP","TSTP","CHLD", "ABRT", "PIPE" }; + +/* + * Control enabled state of the base level signal handler latency monitor. + * state - true to enable or false to disable + */ +void daemon_latency_monitor ( bool state ) +{ + __signal_want_latency_monitor = state ; + __signal_prev_time = gettime_monotonic_nsec(); +} + +void __signal_hdlr ( int signo, siginfo_t * siginfo_ptr , void * uc ) +{ + UNUSED(uc); + UNUSED(siginfo_ptr); /* Future */ + + if (signo == SIGCHLD) __signal_sigchld_assertion = true ; + else if (signo == SIGUSR1) __signal_sigusr1_assertion = true ; + else if (signo == SIGUSR2) __signal_sigusr2_assertion = true ; + else if (signo == SIGHUP) __signal_sighup_assertion = true ; + else if (signo == SIGINT ) __signal_sigint_assertion = true ; + else if (signo == SIGTERM) __signal_sigterm_assertion = true ; + else if (signo == SIGKILL) __signal_sigkill_assertion = true ; + else if (signo == SIGABRT) __signal_sigabrt_assertion = true ; + else if (signo == SIGCONT) __signal_sigcont_assertion = true ; + else if (signo == SIGTSTP) __signal_sigstp_assertion = true ; + else if (signo == SIGPIPE) __signal_sigpipe_assertion = true ; + else __signal_unsupported = true ; + + /* set the glabal flag indicating there is a signal to handle */ + __signal_assertion = true ; +} + +#define LATENCY_THRESHOLD_2SECS (2000) + +void daemon_signal_hdlr ( void ) +{ + /* Monitor base level signal handler scheduling latency */ + if (( __signal_init_done ) && ( __signal_want_latency_monitor )) + { + __signal_this_time = gettime_monotonic_nsec(); + if ( __signal_this_time > (__signal_prev_time + (NSEC_TO_MSEC*(LATENCY_THRESHOLD_2SECS)))) + { + llog ("... %4llu.%-4llu msec - base level signal handler\n", + ((__signal_this_time-__signal_prev_time) > NSEC_TO_MSEC) ? ((__signal_this_time-__signal_prev_time)/NSEC_TO_MSEC) : 0, + ((__signal_this_time-__signal_prev_time) > NSEC_TO_MSEC) ? ((__signal_this_time-__signal_prev_time)%NSEC_TO_MSEC) : 0 ); + } + __signal_prev_time = __signal_this_time ; + } + + /* handle signals at base level */ + if (( __signal_init_done == true ) && ( __signal_assertion == true )) + { + __signal_assertion = false ; /* prevent recursion */ + + if ( __signal_sigchld_assertion ) + { + // ilog("Received SIGCHLD\n"); + { + daemon_sigchld_hdlr (); + } + __signal_sigchld_assertion = false ; + } + if ( __signal_sigusr1_assertion ) + { + // ilog("Received SIGUSR1 ----------------------------\n"); + { + daemon_health_test (); + } + __signal_sigusr1_assertion = false ; + } + if ( __signal_sigusr2_assertion ) + { + ilog("Received SIGUSR2\n"); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__SIGNAL_NOEXIT ) == false ) +#endif + { + daemon_dump_info () ; + daemon_dump_cfg (); + } + __signal_sigusr2_assertion = false ; + } + if ( __signal_sighup_assertion ) + { + ilog("Received SIGHUP ; Reloading Config\n"); +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__SIGNAL_NOEXIT ) == false ) +#endif + { + daemon_configure () ; + } + __signal_sighup_assertion = false ; + } + if ( __signal_sigint_assertion ) + { + ilog("Received SIGINT\n"); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__SIGNAL_NOEXIT ) == false ) +#endif + { + daemon_remove_pidfile (); + daemon_dump_cfg (); + daemon_exit(); + } + __signal_sigint_assertion = false ; + } + if ( __signal_sigterm_assertion ) + { + ilog("Received SIGTERM\n"); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__SIGNAL_NOEXIT ) == false) +#endif + { + daemon_remove_pidfile (); + daemon_dump_cfg (); + daemon_exit(); + } + __signal_sigterm_assertion = false ; + } + if ( __signal_sigkill_assertion ) + { + ilog("Received SIGKILL\n"); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__SIGNAL_NOEXIT ) == false ) +#endif + { + daemon_remove_pidfile (); + daemon_exit(); + } + __signal_sigkill_assertion = false ; + } + if ( __signal_sigabrt_assertion ) + { + ilog("Received SIGABRT\n"); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__SIGNAL_NOEXIT ) == false ) +#endif + { + daemon_remove_pidfile (); + daemon_exit(); + } + __signal_sigabrt_assertion = false ; + } + if ( __signal_sigcont_assertion ) + { + ilog("Received SIGCONT ; not supported\n"); + __signal_sigcont_assertion = false ; + } + if ( __signal_sigstp_assertion ) + { + ilog("Received SIGSTP ; not supported\n"); + __signal_sigstp_assertion = false ; + } + if ( __signal_sigpipe_assertion ) + { + ilog("Received SIGPIPE\n"); + __signal_sigpipe_assertion = false ; + } + if ( __signal_unsupported ) + { + ilog ("Error: unsupported signal\n"); + __signal_unsupported = false ; + } + } +} + +/* bind the handler to each supported signal */ +int daemon_signal_init ( void ) +{ + int i ; + int rc = PASS ; + + struct sigaction act; + memset( &act, 0, sizeof(act) ); + + act.sa_sigaction = __signal_hdlr; + act.sa_flags = (SA_SIGINFO | SA_NODEFER) ; + for ( i = 0 ; i < MAX_SIGNALS ; i++ ) + { + if ( sigaction ( signals[i], &act, NULL )) + { + elog("Error: Registering '%s' Signal (%i) with kernel (%d:%m)\n", + signal_names[i], signals[i], errno ); + + rc = FAIL ; + } + } + + ilog ("Signal Hdlr : Installed (sigaction)\n"); + + __signal_prev_time = __signal_this_time = gettime_monotonic_nsec() ; + __signal_assertion = false; + __signal_init_done = true ; + return rc ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/fsmon/Makefile b/mtce-common/cgts-mtce-common-1.0/fsmon/Makefile new file mode 100644 index 00000000..58445085 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/fsmon/Makefile @@ -0,0 +1,42 @@ +# +# Copyright (c) 2014-2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = fsmonInit.cpp fsmonHdlr.cpp +OBJS = $(SRCS:.cpp=.o) +LDLIBS = -lstdc++ -ldaemon -lcommon -lrt -lcrypto +INCLUDES = -I../daemon -I../common -I. +CCFLAGS = -g -O2 -Wall -Wextra -Werror + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +all: common daemon build clean_common static_analysis + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +build: clean static_analysis $(OBJS) + $(CXX) $(CCFLAGS) $(OBJS) -L../daemon -L../common $(LDLIBS) -o fsmond + +common: + ( cd ../common ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +daemon: + ( cd ../daemon ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +clean_common: + @rm -f ../common/*.o + @rm -f ../daemon/*.o + +clean: clean_common + @rm -f $(OBJ) fsmond *.o *.a diff --git a/mtce-common/cgts-mtce-common-1.0/fsmon/fsmon.h b/mtce-common/cgts-mtce-common-1.0/fsmon/fsmon.h new file mode 100644 index 00000000..ee7dd072 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/fsmon/fsmon.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2014-2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/* + * This implements the CGCS process Monitor ; /usr/local/bin/pmond + * + * Call trace is as follows: + * daemon_init + * daemon_files_init + * daemon_signal_init + * daemon_configure + * ini_parse + * get_debug_options + * + * daemon_service_run + * _forever + */ + + /** + * @file + * Wind River CGCS Platform File System Monitor Service Header + */ + +#include +#include +#include +#include /* for .. signaling */ +#include /* for .. close and usleep */ +#include /* for .. system */ +#include /* for config dir reading */ +#include /* for the list of conf file names */ +#include /* for ... syslog */ +#include /* for ... waitpid */ +#include /* for ... time */ +#include /* for program control header */ +#include /* */ +#include /* for ... socket */ +#include /* for ... domain socket type */ +#include /* for ... UDP socket type */ +#include +#include +#include +#include /* for hostent */ +#include +#include + +using namespace std; + +#include "nodeBase.h" +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ +#include "nodeTimers.h" /* maintenance timer utilities start/stop */ +#include "nodeUtil.h" /* common utilities */ + +/** + * @addtogroup fsmon_base + * @{ + */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "fsm" + +#define CONFIG_AUDIT_PERIOD 1 + +#define CONFIG_MASK CONFIG_AUDIT_PERIOD + + +void fsmon_service ( unsigned int nodetype ); + +/** + * @} fsmon_base + */ diff --git a/mtce-common/cgts-mtce-common-1.0/fsmon/fsmonHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/fsmon/fsmonHdlr.cpp new file mode 100644 index 00000000..714a3db4 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/fsmon/fsmonHdlr.cpp @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform File System Monitor Service Handler + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "fsmon.h" +#include "nodeEvent.h" + +#define FILE_TEST_DATA "TEST-FILE" + +typedef char FileNameT[PATH_MAX]; + +static FileNameT _files[] = +{ "/.fs-test", + "/scratch/.fs_test", + "/var/log/.fs_test", + "/var/run/.fs_test", + "/var/lock/.fs_test", + "" +}; + +static struct mtc_timer mtcTimer_audit ; + +/******************************************************************* + * Module Utilities * + ******************************************************************/ +/* SIGCHLD handler support - for waitpid */ +void daemon_sigchld_hdlr ( void ) +{ + ilog("Received SIGCHLD ...\n"); +} + +/* Looks up the timer ID and asserts the corresponding ringer */ +void fsmon_timer_handler ( int sig, siginfo_t *si, void *uc) +{ + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + + /* Avoid compiler errors/warnings for parms we must + * have but currently do nothing with */ + UNUSED(sig); + UNUSED(uc); + + if ( !(*tid_ptr) ) + { + // tlog ("Called with a NULL Timer ID\n"); + return ; + } + + /* is event ids fsmon timer */ + if ( *tid_ptr == mtcTimer_audit.tid ) + { + mtcTimer_stop_int_safe ( mtcTimer_audit ); + mtcTimer_audit.ring = true ; + return ; + } + mtcTimer_stop_tid_int_safe (tid_ptr); +} + + +// **************************************************************************** +// Do File Test +// ============ +static bool do_file_test( char filename[] ) +{ + int fd = -1; + char test_data[sizeof(FILE_TEST_DATA)*2]; + ssize_t result; + bool success = false; + + memset( test_data, 0, sizeof(test_data) ); + + // File write test. + fd = open( filename, O_RDWR | O_CREAT | O_CLOEXEC, + S_IRUSR | S_IRGRP | S_IROTH ); + if( 0 > fd ) + { + dlog( "Failed to open %s for writing, error=%s.", + filename, strerror(errno) ); + success = (EINTR == errno); + goto ERROR; + } + + result = write( fd, FILE_TEST_DATA, sizeof(FILE_TEST_DATA) ); + if( 0 > result ) + { + dlog( "Write to %s failed, error=%s.", filename, + strerror(errno) ); + success = (EINTR == errno); + goto ERROR; + } + + close( fd ); + fd = -1; + + // File read test. + fd = open( filename, O_RDONLY | O_CLOEXEC ); + if( 0 > fd ) + { + dlog( "Failed to open %s for reading, error=%s.", filename, + strerror(errno) ); + success = (EINTR == errno); + goto ERROR; + } + + result = read( fd, test_data, sizeof(test_data) ); + if( 0 > result ) + { + dlog( "Read of %s failed, error=%s.", filename, + strerror(errno) ); + success = (EINTR == errno); + goto ERROR; + } + + test_data[sizeof(test_data)-1] = '\0'; + + if( 0 != strcmp( FILE_TEST_DATA, test_data ) ) + { + dlog( "Read data from %s does not match, error=%s.", filename, + strerror(errno) ); + success = false; + goto ERROR; + } + + close( fd ); + fd = -1; + + // Delete file test. + result = remove( filename ); + if( 0 > result ) + { + dlog( "Failed to delete %s, error=%s.", filename, + strerror(errno) ); + success = (EINTR == errno); + goto ERROR; + } + + return( true ); + +ERROR: + if( 0 <= fd ) + { + close( fd ); + } + + remove( filename ); + + return( success ); +} + +void fsmon_service ( unsigned int nodetype ) +{ + int flush_thld = 0 ; + daemon_config_type * cfg_ptr = daemon_get_cfg_ptr (); + + ilog ("Starting 'Audit' timer (%d secs)\n", cfg_ptr->audit_period ); + mtcTimer_start ( mtcTimer_audit, fsmon_timer_handler, cfg_ptr->audit_period ); + + for ( ; ; ) + { + if (mtcTimer_audit.ring == true ) + { + mtcTimer_audit.ring = false ; + + /* only support stall monitor on computes */ + if (( nodetype & COMPUTE_TYPE) == COMPUTE_TYPE ) + { + int file_i; + int rc = PASS ; + for( file_i=0; '\0' != _files[file_i][0]; ++file_i ) + { + if( do_file_test( _files[file_i] ) ) + { + dlog( "File (%s) test passed\n", _files[file_i] ); + } + else + { + wlog( "File (%s) test failed\n", _files[file_i] ); + rc = FAIL ; + } + } + if ( rc == PASS ) + { + ilog ("tests passed\n"); + } + } + mtcTimer_start ( mtcTimer_audit, fsmon_timer_handler, cfg_ptr->audit_period ); + } + + daemon_signal_hdlr (); + + /* Support the log flush config option */ + if ( cfg_ptr->flush ) + { + if ( ++flush_thld > cfg_ptr->flush_thld ) + { + flush_thld = 0 ; + fflush (stdout); + fflush (stderr); + } + } + usleep (500000); + } +} diff --git a/mtce-common/cgts-mtce-common-1.0/fsmon/fsmonInit.cpp b/mtce-common/cgts-mtce-common-1.0/fsmon/fsmonInit.cpp new file mode 100644 index 00000000..f7374b50 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/fsmon/fsmonInit.cpp @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform File System Monitor Service Header + */ + +#include "fsmon.h" + +/* Process Monitor Configuration File */ +#define CONFIG_FILE ((const char *)"/etc/mtc/fsmond.conf") + +static unsigned int my_nodetype = CGTS_NODE_NULL ; + +static char hostname_str [MAX_HOST_NAME_SIZE+1]; + +/** Daemon Configuration Structure + * - Allocation and get pointer + * @see daemon_common.h for daemon_config_type struct format. */ +static daemon_config_type fsmon_config ; +daemon_config_type * daemon_get_cfg_ptr () { return &fsmon_config ; } + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + daemon_files_fini (); + daemon_dump_info (); + exit (0); +} + + +/* Startup config read */ +static int fsmon_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("config", "audit_period")) + { + config_ptr->audit_period = atoi(value); + config_ptr->mask |= CONFIG_AUDIT_PERIOD ; + } + return (PASS); +} + +/***************************************************************************** + * + * Name : daemon_configure + * + * Purpose : Read process config file settings into the daemon configuration + * + *****************************************************************************/ +int daemon_configure ( void ) +{ + int rc = PASS ; + + if (ini_parse( CONFIG_FILE, fsmon_config_handler, &fsmon_config) < 0) + { + elog("Can't load '%s'\n", CONFIG_FILE ); + } + + get_debug_options ( CONFIG_FILE, &fsmon_config ); + + /* Verify loaded config against an expected mask + * as an ini file fault detection method */ + if ( fsmon_config.mask != CONFIG_MASK ) + { + elog ("Error: Agent configuration failed (%x)\n", + ((-1 ^ fsmon_config.mask) & CONFIG_MASK)); + return (FAIL_INI_CONFIG); + } + + /* This ensures any link aggregation interface overrides the physical */ + fsmon_config.mgmnt_iface = daemon_get_iface_master ( fsmon_config.mgmnt_iface ); + + /* Log the startup settings */ + ilog("Interface : %s\n", fsmon_config.mgmnt_iface ); + + ilog("Audit Period: %d\n", fsmon_config.audit_period ); + + return (rc); +} + + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Setup the daemon messaging interfaces/sockets */ +int socket_init ( void ) +{ + return (PASS); +} + +/* The common daemon init */ +int daemon_init ( string iface, string nodeType_str ) +{ + int rc = PASS ; + + /* convert node type to integer */ + my_nodetype = get_host_function_mask ( nodeType_str ) ; + ilog ("Node Type : %s (%d)\n", nodeType_str.c_str(), my_nodetype); + + if ( daemon_files_init ( ) != PASS ) + { + elog ("Pid, log or other files could not be opened\n"); + return ( FAIL_FILES_INIT ) ; + } + + /* Assign interface to config */ + fsmon_config.mgmnt_iface = (char*)iface.data() ; + + /* Bind signal handlers */ + if ( daemon_signal_init () != PASS ) + { + elog ("daemon_signal_init failed\n"); + return ( FAIL_SIGNAL_INIT ); + } + + /************************************************************************ + * There is no point continuing with init ; i.e. running daemon_configure, + * initializing sockets and trying to query for an ip address until the + * daemon's configuration requirements are met. Here we wait for those + * flag files to be present before continuing. + ************************************************************************ + * Wait for /etc/platform/.initial_config_complete & /var/run/.goenabled */ + daemon_wait_for_file ( CONFIG_COMPLETE_FILE , 0); + daemon_wait_for_file ( GOENABLED_MAIN_READY , 0); + + /* Configure the daemon */ + if ( (rc = daemon_configure ( )) != PASS ) + { + elog ("Daemon service configuration failed (rc:%i)\n", rc ); + rc = FAIL_DAEMON_CONFIG ; + } + + /* Setup the messaging sockets */ + else if ( (rc = socket_init ( )) != PASS ) + { + elog ("socket initialization failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT ; + } + + get_hostname ( &hostname_str[0], MAX_HOST_NAME_SIZE ); + + return (rc); +} + +/* Start the service + * + * 1. Wait for host config (install) complete + * 2. Wait for goenable + * 3. Do startup delay + * 4. run the pmon service inside pmonHdlr.cpp + * + */ +void daemon_service_run ( void ) +{ + fsmon_service ( my_nodetype ); + daemon_exit (); +} + + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + daemon_dump_membuf_banner (); + daemon_dump_membuf(); +} + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + ilog ("Empty test head.\n"); + return (PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon b/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon new file mode 100644 index 00000000..b29f85f5 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon @@ -0,0 +1,91 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +. /etc/init.d/functions + +DAEMON_NAME="fsmond" +DAEMON="/usr/local/bin/${DAEMON_NAME}" +PIDFILE="/var/run/${DAEMON_NAME}.pid" + +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +if [ ! -e "${DAEMON}" ] ; then + logger "${DAEMON} is missing" + exit ${NOT_INSTALLED} +fi + +case "$1" in + start) + logger "Starting ${DAEMON_NAME}" + echo -n "Starting ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo -n "is already running " + RETVAL=0 + else + start-stop-daemon --start -b -x ${DAEMON} -- -l + RETVAL=$? + fi + if [ ${RETVAL} -eq 0 ] ; then + pid=`pidof ${DAEMON_NAME}` + echo "OK" + logger "${DAEMON} (${pid})" + else + echo "FAIL" + RETVAL=${GENERIC_ERROR} + fi + ;; + + stop) + logger "Stopping ${DAEMON_NAME}" + echo -n "Stopping ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + killproc ${DAEMON_NAME} + fi + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo "FAIL" + RETVAL=${NOT_RUNNING} + else + echo "OK" + fi + rm -f ${PIDFILE} + ;; + + restart) + $0 stop + $0 start + ;; + + status) + pid=`pidof ${DAEMON_NAME}` + RETVAL=$? + if [ ${RETVAL} -eq 0 ] ; then + echo "${DAEMON_NAME} is running" + else + echo "${DAEMON_NAME} is NOT running" + RETVAL=${NOT_RUNNING} + fi + ;; + + condrestart) + $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.conf b/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.conf new file mode 100644 index 00000000..1b566261 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.conf @@ -0,0 +1,25 @@ +[process] +process = fsmond +service = fsmon +script = /etc/init.d/fsmon +style = lsb +pidfile = /var/run/fsmond.pid +severity = major ; minor, major, critical +restarts = 3 ; restart retries before error assertion +interval = 5 ; number of seconds to wait between restarts +debounce = 20 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 1 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring +quorum = 1 ; process is in the host watchdog quorum + +; Active Monitoring Options + +port = 2202 +period = 5 ; monitor period in seconds +timeout = 4 ; Messaging timeout period in seconds, must be shorter than period +threshold = 5 ; Number of back to back heartbeat failures before action diff --git a/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.logrotate b/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.logrotate new file mode 100644 index 00000000..0476a8b2 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.logrotate @@ -0,0 +1,15 @@ +#daily +nodateext + +/var/log/fsmond.log +{ + size 10M + start 1 + missingok + rotate 20 + compress + sharedscripts + postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true + endscript +} diff --git a/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.service b/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.service new file mode 100644 index 00000000..5c8282ac --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmon.service @@ -0,0 +1,23 @@ +[Unit] +Description=Titanium Cloud Maintenance Filesystem Monitor +After=network.target syslog.service config.service +Before=pmon.service + +[Service] +Type=forking +ExecStart=/etc/rc.d/init.d/fsmon start +ExecStop=/etc/rc.d/init.d/fsmon stop +ExecReload=/etc/rc.d/init.d/fsmon reload +PIDFile=/var/run/fsmond.pid + +# Failure handling +TimeoutStartSec=10s +TimeoutStopSec=10s + +# restarted by pmon +Restart=no +RestartSec=5 + +[Install] +WantedBy=multi-user.target + diff --git a/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmond.conf b/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmond.conf new file mode 100644 index 00000000..4ea92462 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/fsmon/scripts/fsmond.conf @@ -0,0 +1,20 @@ +; CGTS File System Monitor Configuration File +[config] ; Configuration + +audit_period = 15 ; Period in seconds + +[defaults] + +[timeouts] + +[features] + +[debug] ; SIGHUP to reload +debug_timer = 0 ; enable(1) or disable(0) timer logs (tlog) +debug_msg = 0 ; enable(1) or disable(0) message logs (mlog) +debug_state = 0 ; enable(1) or disable(0) state change logs (clog) +debug_level = 0 ; decimal mask 0..15 (8,4,2,1) +debug_all = 0 ; + +flush = 1 ; enable(1) or disable(0) force log flush (main loop) +flush_thld = 5 ; if enabled - force flush after this number of loops diff --git a/mtce-common/cgts-mtce-common-1.0/guest/Makefile b/mtce-common/cgts-mtce-common-1.0/guest/Makefile new file mode 100644 index 00000000..f4c5df17 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/Makefile @@ -0,0 +1,48 @@ +# +# Copyright (c) 2015-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = guestClass.cpp guestInstClass.cpp \ + guestSvrFsm.cpp guestSvrHdlr.cpp \ + guestServer.cpp guestAgent.cpp \ + guestHttpSvr.cpp guestHttpUtil.cpp guestVimApi.cpp \ + guestUtil.cpp guestSvrUtil.cpp guestSvrMsg.cpp \ + guestVirtio.cpp guestStubs.cpp +AGENT_OBJS = \ + guestAgent.o guestClass.o guestHttpSvr.o guestHttpUtil.o guestVimApi.o guestUtil.o guestStubs.o +SERVER_OBJS = \ + guestServer.o guestInstClass.o \ + guestSvrFsm.o guestSvrHdlr.o \ + guestSvrMsg.o guestVirtio.o \ + guestUtil.o guestSvrUtil.o + +OBJS = $(SRCS:.cpp=.o) + +CCPFLAGS = -g -O2 -Wall -Wextra -Werror +LDLIBS = -lstdc++ -ldaemon -lcommon -lfmcommon -ljson-c -levent -lrt -lcrypto -luuid +INCLUDES = -I../daemon -I../common -I../maintenance -I. -I.. + +#CFLAGS = -fPIC -g -O2 -Wall -Wextra -Werror -I../daemon -I../common -I. -I.. + +#.cpp.o: +# $(CXX) $(INCLUDES) $(CCPFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +build: $(OBJS) + $(CXX) $(CCPFLAGS) $(AGENT_OBJS) -L../daemon -L../common -L../public -L.. $(LDLIBS) -o guestAgent + $(CXX) $(CCPFLAGS) $(SERVER_OBJS) -L../libs -L../daemon -L../common -L../public -L.. $(LDLIBS) -L. -o guestServer + +#$(CXX) $(CCPFLAGS) $(SERVER_OBJS) -L../libs -L../daemon -L../common -L../public -L.. $(LDLIBS) -L. -ltrap_handler -lalarm_clock -latomic -o guestServer + +.cpp.o: + $(CXX) $(INCLUDES) $(CCPFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +common: + ( cd ../common ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +daemon: + ( cd ../daemon ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +clean: + @rm -f $(OBJ) guestAgent guestServer *.o *.a libghbb.so.1 diff --git a/mtce-common/cgts-mtce-common-1.0/guest/README.txt b/mtce-common/cgts-mtce-common-1.0/guest/README.txt new file mode 100644 index 00000000..c5732e5f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/README.txt @@ -0,0 +1,88 @@ +This maintenance common guest service was introduced in cgts release 2 in order to +provide a means of guest heartbeat orchistration under VIM (system management) control. + +This module includes header files exported to the sysroot by the original cgts +release 1 heartbeat service for messaging structure definition. + +The original cgts release 1 heartbeat service source can be found here + + $MY_REPO/addons/wr-cgcs/layers/middleware/vm-mgmt/recipes-common/heartbeat + +That original module builds a common heartbeat daemon capable of running on the +controller, compute and within the vm. The controller version did nothing except +for respond to heartbeats from the compute version. The compute instance was the +brains of the heartbeat service reponsible for starting heartbeat to the VMs and +interfacing with Nova to take actions. + +The introduction of this module changes a large part of that. +Here is a summary of the changes + +1. Packaging and Initialization: + + The source code for the original cgts release 1 heartbeat daemon remains + unchanged but the packaging of the heartbeat daemon and heartbeat_init + script are modified + + a. image.inc and filter_out packaging files are modified to exclude the + heartbeat daemon from being packaged on the controller. + + b. because the heartbeat daemon is still packaged on the compute + heartbeat_init script is modified to prevent the heartbeat + daemon from being spawned on the compute host. + + Note: the heartbeat daemon continues to be packaged and spawned in the guest + as it did in release 1 - no change there. + +2. Compute Function: Heartbeats the guest and reports failures. + + Package: cgts-mtce-common-guestServer-1.0-r54.0.x86_64.rpm + Binary: /usr/local/bin/guestServer + Init: /etc/init.d/guestServer + Managed: /etc/pmon.d/guestServer.pmon + +3. Controller Function: Guest heartbeat control and event proxy + + Package: cgts-mtce-common-guestAgent-x.x-rxx.x86_64.rpm + Binary: /usr/local/bin/guestAgent + Init: /usr/lib/ocf/resource.d/platform/guestAgent + Managed: by SM + + The heartbeat daemon that did run on the controller is replaced by a new + guestAgent daemon performing the following functions + + a. HTTP Command Receiver : to which the VIM sends instance control commands. + b. HTTP Event Transmitter: to which the daemon can send instance failure + events and state query commands to the VIM. + c. State query audit to the + +Behavioral Executive Summary: + +The guestServer daemon (on the compute) listens for (using inotify) 'uuid' +UNIX named heartbeat communication channels that nova:libvirt creates and +opens in /var/lib/libvirt/qemu whenever an instance is created. Example: + +/var/lib/libvirt/qemu/cgcs.heartbeat.02e172a9-aeae-4cef-a6bc-7eb9de7825d6.sock + +The guestServer connects (and auto reconnects) to these channels when they are +created or modified and disconnects from them when deleted. + +Once connected, the guestServer listens for TCP messages over that UNIX named +socket. + +If a guest supports heartbeating then it will run the heartbeat_init script +during its initialization process. Once the heartbeat daemon is running it +will periodically send init request messages to the libvirt pipe that, +on the host side, the guestServer is listening for. + +on receipt of an init message, the guestServer will extract name, timeout +and corrective action info from it and then send back an 'init_ack' followed +by a continuous heartbeat cycle consisting of sending a 'challenge' request +messages and expecting a correct computational responses within a guest specified +heartbeat window. Failure to comply will result in a corrective action that was +specified in the init message from the guest. + +The VIM is responsible for enabling and disabling heartbeat fault reporting as +well as taking the guest specified corrective action in he event of a heartbeat +failure. + +The VIM guestAgent running diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestAgent.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestAgent.cpp new file mode 100644 index 00000000..98ea342f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestAgent.cpp @@ -0,0 +1,1434 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Guest Services Agent Daemon + * + * Services: heartbeat + */ + +#include +#include +#include +#include +#include +#include +#include +#include /* for ... hostent */ +#include +#include +#include +#include +#include +#include /* for ... close and usleep */ +#include +#include /* for ... RTMGRP_LINK */ + +using namespace std; + +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ + +#include "nodeBase.h" +#include "nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ +#include "nodeUtil.h" /* for ... get_ip_addresses */ +#include "nodeTimers.h" /* maintenance timer utilities start/stop */ +#include "jsonUtil.h" /* for ... jsonApi_get_key_value */ +#include "httpUtil.h" /* for ... */ + +#include "guestBase.h" /* Guest services Base Header File */ +#include "guestClass.h" /* */ +#include "guestUtil.h" /* for ... guestUtil_inst_init */ + +#include "guestHttpUtil.h" /* for ... guestHttpUtil_init */ +#include "guestHttpSvr.h" /* for ... guestHttpSvr_init/_fini/_look */ +#include "guestVimApi.h" /* for ... guestVimApi_getHostState */ + +/* Where to send events */ +string guestAgent_ip = "" ; + +/* Process Monitor Control Structure */ +static ctrl_type _ctrl ; + +/** This heartbeat service inventory is tracked by + * the same nodeLinkClass that maintenance uses. + * + */ +guestHostClass hostInv ; +guestHostClass * get_hostInv_ptr ( void ) +{ + return (&hostInv); +} + +/** Setup the pointer */ +int module_init ( void ) +{ + return (PASS); +} + +msgSock_type * get_mtclogd_sockPtr ( void ) +{ + return (&_ctrl.sock.mtclogd); +} + +void daemon_sigchld_hdlr ( void ) +{ + ; /* dlog("Received SIGCHLD ... no action\n"); */ +} + +/** + * Daemon Configuration Structure - The allocated struct + * @see daemon_common.h for daemon_config_type struct format. + */ +static daemon_config_type guest_config ; + +#ifdef __cplusplus +extern "C" { +#endif +daemon_config_type * daemon_get_cfg_ptr () { return &guest_config ; } +#ifdef __cplusplus +} +#endif + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + daemon_dump_info (); + daemon_files_fini (); + + /* Close the event socket */ + if ( _ctrl.sock.server_rx_sock ) + delete (_ctrl.sock.server_rx_sock); + + if ( _ctrl.sock.server_tx_sock ) + delete (_ctrl.sock.server_tx_sock); + + if ( _ctrl.sock.agent_rx_local_sock ) + delete (_ctrl.sock.agent_rx_local_sock); + + if ( _ctrl.sock.agent_rx_float_sock ) + delete (_ctrl.sock.agent_rx_float_sock); + + if ( _ctrl.sock.agent_tx_sock ) + delete (_ctrl.sock.agent_tx_sock); + + guestHttpSvr_fini (); + guestHttpUtil_fini (); + + fflush (stdout); + fflush (stderr); + + exit (0); +} + +#define CONFIG_CHALLENGE_PERIOD (1) + +int _self_provision ( void ) +{ + int rc = PASS ; + int waiting_msg = false ; + + hostInv.hostBase.my_float_ip.clear(); + hostInv.hostBase.my_local_ip.clear(); + + for ( ;; ) + { + get_ip_addresses ( hostInv.hostBase.my_hostname, + hostInv.hostBase.my_local_ip , + hostInv.hostBase.my_float_ip ); + + if ( hostInv.hostBase.my_float_ip.empty() || hostInv.hostBase.my_local_ip.empty() ) + { + if ( waiting_msg == false ) + { + ilog ("Waiting on ip address config ...\n"); + waiting_msg = true ; + + /* Flush the init data */ + fflush (stdout); + fflush (stderr); + } + mtcWait_secs (3); + } + else + { + break ; + } + daemon_signal_hdlr (); + } + return (rc); +} + + +/** Control Config Mask */ +// #define CONFIG_MASK (CONFIG_CHALLENGE_PERIOD) +/** Client Config mask */ +#define CONFIG_MASK (CONFIG_CLIENT_RX_PORT |\ + CONFIG_AGENT_RX_PORT |\ + CONFIG_MTC_CMD_PORT |\ + CONFIG_VIM_CMD_RX_PORT |\ + CONFIG_VIM_EVENT_RX_PORT |\ + CONFIG_MTC_EVENT_PORT) + +/* Startup config read */ +static int _config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("agent", "rx_port")) + { + config_ptr->agent_rx_port = atoi(value); + config_ptr->mask |= CONFIG_AGENT_RX_PORT ; + } + else if (MATCH("agent", "vim_cmd_port")) + { + config_ptr->vim_cmd_port = atoi(value); + config_ptr->mask |= CONFIG_VIM_CMD_RX_PORT ; + } + else if (MATCH("client", "rx_port")) + { + config_ptr->client_rx_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_RX_PORT ; + } + else + { + return (PASS); + } + return (FAIL); +} + +/* Startup config read */ +static int mtc_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("agent", "hbs_to_mtc_event_port")) + { + config_ptr->hbs_to_mtc_event_port = atoi(value); + config_ptr->mask |= CONFIG_MTC_EVENT_PORT ; + } + if (MATCH("agent", "mtc_to_guest_cmd_port")) + { + config_ptr->mtc_to_guest_cmd_port = atoi(value); + config_ptr->mask |= CONFIG_MTC_CMD_PORT ; + } + else + { + return (PASS); + } + return (FAIL); +} + +int _nfvi_handler( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("guest-rest-api", "port")) + { + config_ptr->vim_event_port = atoi(value); + config_ptr->mask |= CONFIG_VIM_EVENT_RX_PORT ; + } + else + { + return (PASS); + } + return (FAIL); +} + +/* Read the mtc.ini settings into the daemon configuration */ +int daemon_configure ( void ) +{ + /* Read the ini */ + char config_fn[100] ; + guest_config.mask = 0 ; + sprintf ( &config_fn[0], "/etc/mtc/%s.ini", program_invocation_short_name ); + if (ini_parse(config_fn, _config_handler, &guest_config) < 0) + { + elog("Can't load '%s'\n", config_fn ); + return (FAIL_LOAD_INI); + } + + get_debug_options ( config_fn, &guest_config ); + + if (ini_parse(MTCE_CONF_FILE, mtc_config_handler, &guest_config) < 0) + { + elog("Can't load '%s'\n", MTCE_CONF_FILE ); + return (FAIL_LOAD_INI); + } + + if (ini_parse(NFVI_PLUGIN_CFG_FILE, _nfvi_handler, &guest_config) < 0) + { + elog ("Can't load '%s'\n", NFVI_PLUGIN_CFG_FILE ); + return (FAIL_LOAD_INI); + } + + /* Verify loaded config against an expected mask + * as an ini file fault detection method */ + if ( guest_config.mask != CONFIG_MASK ) + { + elog ("Error: Agent configuration failed (%x) (%x:%x)\n", + ((-1 ^ guest_config.mask) & CONFIG_MASK), + guest_config.mask, CONFIG_MASK ); + return (FAIL_INI_CONFIG); + } + + guest_config.mgmnt_iface = daemon_get_iface_master ( guest_config.mgmnt_iface ); + ilog("Interface : %s\n", guest_config.mgmnt_iface ); + ilog("Command Port: %d (rx) from mtcAgent\n", guest_config.mtc_to_guest_cmd_port ); + ilog("Event Port: %d (tx) to mtcAgent\n", guest_config.hbs_to_mtc_event_port ); + ilog("Command Port: %d (tx) to guestServer\n", guest_config.client_rx_port); + ilog("Event Port: %d (rx) from guestServer\n",guest_config.agent_rx_port ); + ilog("Command Port: %d (rx) from vim\n",guest_config.vim_cmd_port ); + ilog("Event Port: %d (rx) to vim\n",guest_config.vim_event_port ); + + /* provision this controller */ + if ( _self_provision () != PASS ) + { + elog ("Failed to self provision active controller\n"); + daemon_exit (); + } + + return (PASS); +} + +void _timer_handler ( int sig, siginfo_t *si, void *uc) +{ + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + + /* Avoid compiler errors/warnings for parms we must + * have but currently do nothing with */ + sig=sig ; uc = uc ; + + if ( !(*tid_ptr) ) + { + // tlog ("Called with a NULL Timer ID\n"); + return ; + } + /* is base ctrl mtc timer */ + else if (( *tid_ptr == _ctrl.timer.tid ) ) + { + mtcTimer_stop_int_safe ( _ctrl.timer ); + _ctrl.timer.ring = true ; + } + + /* is base object mtc timer */ + else if (( *tid_ptr == hostInv.audit_timer.tid ) ) + { + mtcTimer_stop_int_safe ( hostInv.audit_timer ); + hostInv.audit_timer.ring = true ; + } + else + { + mtcTimer_stop_tid_int_safe (tid_ptr); + } +} + +int mtclogd_port_init ( ctrl_type * ctrl_ptr ) +{ + int rc = PASS ; + int port = ctrl_ptr->sock.mtclogd.port = daemon_get_cfg_ptr()->daemon_log_port ; + CREATE_REUSABLE_INET_UDP_TX_SOCKET ( LOOPBACK_IP, + port, + ctrl_ptr->sock.mtclogd.sock, + ctrl_ptr->sock.mtclogd.addr, + ctrl_ptr->sock.mtclogd.port, + ctrl_ptr->sock.mtclogd.len, + "mtc logger message", + rc ); + if ( rc ) + { + elog ("Failed to setup messaging to mtclogd on port %d\n", port ); + } + return (rc); +} + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Construct the messaging sockets * + * 1. multicast transmit socket * + * 2. unicast receive socket */ +int _socket_init ( void ) +{ + int rc = PASS ; + + guestAgent_ip = getipbyname ( CONTROLLER ); + ilog ("ControllerIP: %s\n", guestAgent_ip.c_str()); + + /* Read the ports the socket struct */ + _ctrl.sock.agent_rx_port = guest_config.agent_rx_port ; + _ctrl.sock.server_rx_port = guest_config.client_rx_port ; + + /******************************************************************/ + /* UDP Tx Message Socket Towards guestServer */ + /******************************************************************/ + + _ctrl.sock.agent_tx_sock = new msgClassTx(guestAgent_ip.c_str(), guest_config.client_rx_port, IPPROTO_UDP, guest_config.mgmnt_iface); + rc = _ctrl.sock.agent_tx_sock->return_status; + if ( rc ) + { + elog ("Failed to setup 'guestAgent' transmitter\n" ); + return (rc) ; + } + + /******************************************************************/ + /* UDP Tx Message Socket Towards mtcAgent */ + /******************************************************************/ + + _ctrl.sock.mtc_event_tx_port = guest_config.hbs_to_mtc_event_port ; + _ctrl.sock.mtc_event_tx_sock = new msgClassTx(LOOPBACK_IP, guest_config.hbs_to_mtc_event_port, IPPROTO_UDP); + rc = _ctrl.sock.mtc_event_tx_sock->return_status; + if ( rc ) + { + elog ("Failed to setup 'mtcAgent' 'lo' transmitter on port (%d)\n", + _ctrl.sock.mtc_event_tx_port ); + + return (rc) ; + } + + /***************************************************************/ + /* Non-Blocking UDP Rx Message Socket for Maintenance Commands */ + /***************************************************************/ + + _ctrl.sock.mtc_cmd_port = guest_config.mtc_to_guest_cmd_port ; + _ctrl.sock.mtc_cmd_sock = new msgClassRx(LOOPBACK_IP, guest_config.mtc_to_guest_cmd_port, IPPROTO_UDP); + rc = _ctrl.sock.mtc_cmd_sock->return_status; + if ( rc ) + { + elog ("Failed to setup mtce command receive on port %d\n", + _ctrl.sock.mtc_cmd_port ); + return (rc) ; + } + + /* Get a socket that listens to the controller's FLOATING IP */ + /* This is the socket that the guestAgent receives events from + * the guestServer from the compute on */ + _ctrl.sock.agent_rx_float_sock = new msgClassRx(hostInv.hostBase.my_float_ip.c_str(), guest_config.agent_rx_port, IPPROTO_UDP); + rc = _ctrl.sock.agent_rx_float_sock->return_status; + if ( rc ) + { + elog ("Failed to setup 'guestServer' receiver on port %d\n", + _ctrl.sock.server_rx_port ); + return (rc) ; + } + + /* Get a socket that listens to the controller's LOCAL IP */ + /* This is the socket that the guestAgent receives events from + * the guestServer from the compute on */ + _ctrl.sock.agent_rx_local_sock = new msgClassRx(hostInv.hostBase.my_local_ip.c_str(), guest_config.agent_rx_port, IPPROTO_UDP); + rc = _ctrl.sock.agent_rx_local_sock->return_status; + if ( rc ) + { + elog ("Failed to setup 'guestServer' receiver on port %d\n", + _ctrl.sock.server_rx_port ); + return (rc) ; + } + + /* Don't fail the daemon if the logger port is not working */ + mtclogd_port_init (&_ctrl); + + rc = guestHttpSvr_init ( guest_config.vim_cmd_port ); + + return (rc) ; +} + +/* The main heartbeat service loop */ +int daemon_init ( string iface, string nodetype ) +{ + int rc = 10 ; + + /* Not used by this service */ + nodetype = nodetype ; + + /* Initialize socket construct and pointer to it */ + memset ( &_ctrl.sock, 0, sizeof(_ctrl.sock)); + + /* initialize the timer */ + mtcTimer_init ( _ctrl.timer ); + _ctrl.timer.hostname = "guestAgent" ; + + /* Assign interface to config */ + guest_config.mgmnt_iface = (char*)iface.data() ; + + httpUtil_init (); + + if ( daemon_files_init ( ) != PASS ) + { + elog ("Pid, log or other files could not be opened\n"); + rc = FAIL_FILES_INIT ; + } + + /* Bind signal handlers */ + else if ( daemon_signal_init () != PASS ) + { + elog ("daemon_signal_init failed\n"); + rc = FAIL_SIGNAL_INIT ; + } + + /* Configure the agent */ + else if ( (rc = daemon_configure ( )) != PASS ) + { + elog ("Daemon service configuration failed (rc:%i)\n", rc ); + rc = FAIL_DAEMON_CONFIG ; + } + + /* Setup the heartbeat service messaging sockets */ + else if ( (rc = _socket_init ( )) != PASS ) + { + elog ("socket initialization failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT ; + } + else + { + _ctrl.timer.hostname = hostInv.hostBase.my_hostname ; + } + + return (rc); +} + + +/*************************************************************************** + * + * Name: send_cmd_to_guestServer + * + * Description: Messaging interface capable of building command specific + * messages and sending them to the guestServer daemon on + * the specified compute host. + * + * TODO: setup acknowledge mechanism using guestHost + * + ***************************************************************************/ +int send_cmd_to_guestServer ( string hostname, + unsigned int cmd, + string uuid, + bool reporting, + string event) +{ + mtc_message_type msg ; + int bytes_sent = 0 ; + int bytes_to_send = 0 ; + int rc = PASS ; + + string ip = hostInv.get_host_ip(hostname) ; + + + memset (&msg,0,sizeof(mtc_message_type)); + memcpy (&msg.hdr[0], get_guest_msg_hdr(), MSG_HEADER_SIZE ); + + /* Start creating the json string to he client */ + string payload = "{" ; + payload.append("\"source\":\""); + payload.append(hostInv.hostBase.my_float_ip); + payload.append("\""); + + if ( cmd == MTC_EVENT_LOOPBACK ) + { + ; /* go with default payload only */ + } + else if ( cmd == MTC_CMD_ADD_INST ) + { + ilog ("%s %s 'add' instance ; sent to guestServer\n", hostname.c_str(), uuid.c_str()); + payload.append(",\"uuid\":\""); + payload.append(uuid); + payload.append("\",\"service\":\"heartbeat\""); + payload.append(",\"state\":\""); + if ( reporting == true ) + payload.append("enabled\""); + else + payload.append("disabled\""); + } + else if ( cmd == MTC_CMD_DEL_INST ) + { + ilog ("%s %s 'delete' instance ; sent to guestServer\n", hostname.c_str(), uuid.c_str()); + payload.append(",\"uuid\":\""); + payload.append(uuid); + payload.append("\",\"service\":\"heartbeat\""); + payload.append(",\"state\":\"disabled\""); + } + else if ( cmd == MTC_CMD_MOD_INST ) + { + /* this may be a frequent log so its changed to a message log */ + mlog ("%s %s 'modify' instance ; sent to guestServer\n", hostname.c_str(), uuid.c_str()); + payload.append(",\"uuid\":\""); + payload.append(uuid); + payload.append("\",\"service\":\"heartbeat\""); + + payload.append(",\"state\":\""); + if ( reporting == true ) + payload.append("enabled\""); + else + payload.append("disabled\""); + } + else if ( cmd == MTC_CMD_MOD_HOST ) + { + payload.append(",\"uuid\":\""); + payload.append(uuid); + + /* In this host case , the instance heartbeat member + * contains the state we want to entire host to be + * put to */ + payload.append("\",\"heartbeat\":\""); + if ( reporting == true ) + payload.append("enabled\""); + else + payload.append("disabled\""); + + ilog ("%s %s 'modify' host (reporting=%s); sent to guestServer\n", + hostname.c_str(), + uuid.c_str(), + reporting ? "enabled" : "disabled" ); + } + else if ( cmd == MTC_CMD_QRY_INST ) + { + /* setting the query flag so the FSM + * knows to wait for the response. */ + hostInv.set_query_flag ( hostname ); + } + else if ( cmd == MTC_CMD_VOTE_INST + || cmd == MTC_CMD_NOTIFY_INST ) + { + bool vote ; + payload.append(",\"uuid\":\""); + payload.append(uuid); + payload.append("\",\"event\":\""); + payload.append(event); + payload.append("\""); + + if ( cmd == MTC_CMD_VOTE_INST ) + vote = true ; + else + vote = false ; + + ilog ("%s %s '%s' host (event=%s); sent to guestServer\n", + hostname.c_str(), + uuid.c_str(), + vote ? "vote" : "notify", + event.c_str()); + } + else + { + slog ("unsupported command (%d)\n", cmd ); + return (FAIL_BAD_CASE); + } + + payload.append("}"); + memcpy (&msg.buf[0], payload.data(), payload.length()); + msg.cmd = cmd ; + bytes_to_send = ((sizeof(mtc_message_type))-(BUF_SIZE))+(strlen(msg.buf)) ; + print_mtc_message ( &msg ); + bytes_sent = _ctrl.sock.agent_tx_sock->write((char *)&msg, bytes_to_send,ip.data(), _ctrl.sock.server_rx_port); + + if ( 0 > bytes_sent ) + { + elog("%s failed to send command (rc:%i)\n", hostname.c_str(), rc); + rc = FAIL_SOCKET_SENDTO ; + } + else if ( bytes_to_send != bytes_sent ) + { + wlog ("%s transmit byte count error (%d:%d)\n", + hostname.c_str(), bytes_to_send, bytes_sent ); + rc = FAIL_TO_TRANSMIT ; + } + else + { + mlog1 ("Transmit to %s port %5d\n", + _ctrl.sock.agent_tx_sock->get_dst_str(), + _ctrl.sock.agent_tx_sock->get_dst_addr()->getPort()); + rc = PASS ; + + /* Schedule receive ACK mechanism - bind a callback */ + } + + return (rc); +} + + +/*************************************************************************** + * + * Name: send_event_to_mtcAgent + * + * Description: Messaging interface capable of building the specified event + * messages and sending them to the mtcAgent daemon locally. + * + * TODO: setup acknowledge mechanism using guestHost + * + ***************************************************************************/ +int send_event_to_mtcAgent ( string hostname, + unsigned int event) +{ + int bytes_sent = 0 ; + int bytes_to_send = 0 ; + int rc = FAIL ; + + mtc_message_type msg ; + memset (&msg, 0 , sizeof(mtc_message_type)); + memcpy (&msg.hdr[0], get_guest_msg_hdr(), MSG_HEADER_SIZE ); + memcpy (&msg.hdr[MSG_HEADER_SIZE], "guestAgent", strlen("guestAgent")); + if ( event == MTC_EVENT_MONITOR_READY ) + { + if ( event == MTC_EVENT_MONITOR_READY ) + { + ilog ("%s requesting inventory from mtcAgent\n", hostname.c_str()); + } + msg.cmd = event ; + print_mtc_message (&msg ); + bytes_to_send = ((sizeof(mtc_message_type))-(BUF_SIZE))+(strlen(msg.buf)) ; + bytes_sent = _ctrl.sock.mtc_event_tx_sock->write((char*)&msg, bytes_to_send); + if ( 0 > bytes_sent ) + { + elog("%s Failed to send event (%d:%m)\n", hostname.c_str(), errno ); + rc = FAIL_SOCKET_SENDTO ; + } + else if ( bytes_to_send != bytes_sent ) + { + wlog ("%s transmit byte count error (%d:%d)\n", + hostname.c_str(), bytes_to_send, bytes_sent ); + rc = FAIL_TO_TRANSMIT ; + } + else + { + mlog1 ("Transmit to %s port %d\n", + _ctrl.sock.mtc_event_tx_sock->get_dst_str(), + _ctrl.sock.mtc_event_tx_sock->get_dst_addr()->getPort()); + rc = PASS ; + + /* Schedule receive ACK mechanism - bind a callback */ + } + } + else + { + slog ("Unsupported event (%d)\n", event ); + return ( FAIL_BAD_CASE ); + } + + return rc ; +} + +/*************************************************************************** + * + * Name: service_mtcAgent_command + * + * Description: Message handling interface capable of servicing mtcAgent + * commands such as 'add host', 'del host', 'mod host', etc. + * + * TODO: setup acknowledge mechanism using guestHost + * + ***************************************************************************/ +int service_mtcAgent_command ( unsigned int cmd , char * buf_ptr ) +{ + if ( !buf_ptr ) + { + slog ("Empty payload"); + return (FAIL); + } + string uuid = ""; + string hostname = ""; + string hosttype = ""; + string ip = ""; + + int rc = jsonUtil_get_key_val ( buf_ptr, "hostname", hostname ) ; + if ( rc != PASS ) + { + elog ("failed to get hostname\n"); + return (FAIL_GET_HOSTNAME); + } + rc = jsonUtil_get_key_val ( buf_ptr, "uuid", uuid ) ; + if ( rc != PASS ) + { + elog ("%s failed to get host 'uuid'\n", hostname.c_str()); + elog ("... buffer:%s\n", buf_ptr ); + + return (FAIL_INVALID_UUID); + } + + if ( cmd == MTC_CMD_ADD_HOST ) + { + rc = jsonUtil_get_key_val ( buf_ptr, "ip", ip ); + if ( rc == PASS ) + { + rc = jsonUtil_get_key_val ( buf_ptr, "personality", hosttype ); + if ( rc == PASS ) + { + rc = hostInv.add_host ( uuid, ip, hostname, hosttype ); + } + else + { + elog ("%s failed to get host 'personality'\n", hostname.c_str()); + } + } + else + { + elog ("%s failed to get host 'ip'\n", hostname.c_str()); + } + } + else if ( cmd == MTC_CMD_MOD_HOST ) + { + rc = jsonUtil_get_key_val ( buf_ptr, "ip", ip ); + if ( rc == PASS ) + { + rc = jsonUtil_get_key_val ( buf_ptr, "personality", hosttype ); + if ( rc == PASS ) + { + rc = hostInv.mod_host ( uuid, ip, hostname, hosttype ); + } + else + { + elog ("%s failed to get host 'personality'\n", hostname.c_str()); + } + } + else + { + elog ("%s failed to get host 'ip'\n", hostname.c_str()); + } + } + else if ( cmd == MTC_CMD_DEL_HOST ) + { + rc = hostInv.del_host ( uuid ); + } + else + { + wlog ("Unsupported command (%d)\n", cmd ); + rc = FAIL_BAD_CASE ; + } + return (rc); +} + + + +int recv_from_guestServer ( unsigned int cmd, char * buf_ptr ) +{ + int rc = PASS ; + switch ( cmd ) + { + case MTC_EVENT_MONITOR_READY: + { + string hostname = "" ; + if ( jsonUtil_get_key_val ( buf_ptr, "hostname", hostname ) ) + { + elog ("failed to extract 'hostname' from 'ready event'\n" ); + rc = FAIL_LOCATE_KEY_VALUE ; + } + else + { + ilog ("%s guestServer ready event\n", hostname.c_str()); + + /* Set all the instance state for this host */ + get_hostInv_ptr()->set_inst_state ( hostname ); + } + break ; + } + case MTC_EVENT_HEARTBEAT_RUNNING: + case MTC_EVENT_HEARTBEAT_STOPPED: + case MTC_EVENT_HEARTBEAT_ILLHEALTH: + { + string hostname = "" ; + string uuid = "" ; + int rc1 = jsonUtil_get_key_val ( buf_ptr, "hostname", hostname ); + int rc2 = jsonUtil_get_key_val ( buf_ptr, "uuid", uuid ); + if ( rc1 | rc2 ) + { + elog ("failed to parse 'hostname' or 'uuid' from heartbeat event buffer (%d:%d)\n", + rc1, rc2 ); + elog ("... Buffer: %s\n", buf_ptr ); + return ( FAIL_KEY_VALUE_PARSE ); + } + instInfo * instInfo_ptr = get_hostInv_ptr()->get_inst ( uuid ); + rc1 = guestUtil_get_inst_info ( hostname , instInfo_ptr, buf_ptr ); + if ( rc1 == PASS ) + { + if ( instInfo_ptr ) + { + string state ; + string status; + + if ( instInfo_ptr->heartbeat.reporting == true ) + state = "enabled" ; + else + state = "disabled" ; + + if ( instInfo_ptr->heartbeating == true ) + status = "enabled" ; + else + status = "disabled" ; + + if ( cmd == MTC_EVENT_HEARTBEAT_ILLHEALTH ) + { + ilog ("%s %s ill health notification\n", hostname.c_str(), instInfo_ptr->uuid.c_str()); + rc = guestVimApi_alarm_event ( hostname, uuid ); + } + else if ( cmd == MTC_EVENT_HEARTBEAT_RUNNING ) + { + if ( instInfo_ptr->heartbeating != true ) + { + instInfo_ptr->heartbeating = true ; + ilog ("%s %s is now heartbeating\n", hostname.c_str(), instInfo_ptr->uuid.c_str()); + } + rc = guestVimApi_svc_event ( hostname, uuid, state, status, instInfo_ptr->restart_to_str); + } + else + { + if ( instInfo_ptr->heartbeating != false ) + { + instInfo_ptr->heartbeating = false ; + wlog ("%s %s is not heartbeating\n", hostname.c_str(), instInfo_ptr->uuid.c_str()); + } + rc = guestVimApi_svc_event ( hostname, uuid, state, status, "0"); + } + if ( rc != PASS ) + { + /* TODO: make this an elog before delivery */ + elog ("%s %s failed to send state change 'event' to vim (rc:%d)\n", + hostname.c_str(), instInfo_ptr->uuid.c_str(), rc ); + } + } + else + { + elog ("%s %s failed instance lookup\n", hostname.c_str(), uuid.c_str()); + } + } + else + { + elog ("failed to get instance info\n"); + } + break ; + } + + case MTC_EVENT_HEARTBEAT_LOSS: + { + string hostname = "" ; + string uuid = "" ; + int rc1 = jsonUtil_get_key_val ( buf_ptr, "hostname", hostname ) ; + int rc2 = jsonUtil_get_key_val ( buf_ptr, "uuid", uuid ) ; + if ( rc1 | rc2 ) + { + elog ("failed to parse 'heartbeat loss' key values (%d:%d)\n", rc1, rc2 ); + rc = FAIL_LOCATE_KEY_VALUE ; + } + else + { + if ( get_hostInv_ptr()->get_reporting_state ( hostname ) == true ) + { + instInfo * instInfo_ptr = get_hostInv_ptr()->get_inst ( uuid ) ; + if ( instInfo_ptr ) + { + if ( instInfo_ptr->heartbeat.reporting == true ) + { + rc = guestVimApi_inst_failed ( hostname, uuid , MTC_EVENT_HEARTBEAT_LOSS, 0 ); + } + else + { + ilog ("%s %s reporting disabled\n", hostname.c_str(), uuid.c_str() ); + } + } + else + { + elog ("%s %s failed instance lookup\n", hostname.c_str(), uuid.c_str() ); + rc = FAIL_HOSTNAME_LOOKUP ; + } + } + else + { + wlog ("%s heartbeat failure reporting disabled\n", hostname.c_str()); + } + } + break ; + } + case MTC_CMD_QRY_INST: + { + string hostname = "" ; + string uuid = "" ; + string status = "" ; + + jlog ("%s Instance Query Response: %s\n", hostname.c_str(), buf_ptr); + + int rc1 = jsonUtil_get_key_val ( buf_ptr, "hostname", hostname ) ; + int rc2 = jsonUtil_get_key_val ( buf_ptr, "uuid" , uuid ) ; + if ( rc1 | rc2 ) + { + ilog ("failed to parse 'hostname' or 'uuid' (%d:%d)\n", rc1, rc2 ); + } + + instInfo * instInfo_ptr = get_hostInv_ptr()->get_inst ( uuid ) ; + if ( instInfo_ptr ) + { + /** + * Verify that this instance is still associated with this host. + * This check was added as a fix and seeing + * a number of stale instance inventory in the guestServer. + * + * Without this check a late query response from an instance + * that was just deleted can result in an MOD sent to the + * server causing this instance to be mistakenly + * re-added to its inventory. + **/ + if ( !hostname.compare(instInfo_ptr->hostname) ) + { + /* + * Save the current reporting and heartbeating state + * only to compare to see if either has changed + */ + bool current_heartbeating_status = instInfo_ptr->heartbeating ; + bool current_reporting_state = instInfo_ptr->heartbeat.reporting ; + + if ( guestUtil_get_inst_info ( hostname, instInfo_ptr, buf_ptr ) == PASS ) + { + if ( instInfo_ptr->heartbeat.reporting != current_reporting_state ) + { + wlog ("%s:%s state mismatch\n", hostname.c_str(), uuid.c_str()); + wlog ("... state is '%s' but should be '%s' ... fixing\n", + instInfo_ptr->heartbeat.reporting ? "enabled" : "disabled", + current_reporting_state ? "enabled" : "disabled" ); + + instInfo_ptr->heartbeat.reporting = current_reporting_state ; + + rc = send_cmd_to_guestServer ( hostname, + MTC_CMD_MOD_INST , + uuid, + current_reporting_state ); + } + + if ( instInfo_ptr->heartbeating != current_heartbeating_status ) + { + string state ; + if ( instInfo_ptr->heartbeat.reporting == true ) + state = "enabled" ; + else + state = "disabled" ; + + if ( instInfo_ptr->heartbeating == true ) + { + string status = "enabled" ; + ilog ("%s %s is now heartbeating\n", hostname.c_str(), uuid.c_str()); + rc = guestVimApi_svc_event ( hostname, uuid, state, status, instInfo_ptr->restart_to_str); + } + else + { + string status = "disabled" ; + wlog ("%s %s is not heartbeating\n", hostname.c_str(), uuid.c_str()); + rc = guestVimApi_svc_event ( hostname, uuid, state, status, "0" ); + } + if ( rc != PASS ) + { + /* TODO: make this an elog before delivery */ + elog ("%s %s failed to send state change 'query' to vim (rc:%d)\n", hostname.c_str(), uuid.c_str(), rc ); + } + } + } + } + else + { + wlog ("%s %s no longer paired ; dropping query response\n", + hostname.c_str(), instInfo_ptr->uuid.c_str() ); + + /* Delete this just in case */ + send_cmd_to_guestServer ( hostname, MTC_CMD_DEL_INST , uuid, false ); + } + } + else + { + elog ("%s unknown uuid ; correcting ...\n", uuid.c_str() ); + + /* Delete this unknown host as it might somehow be stale */ + rc = send_cmd_to_guestServer ( hostname, MTC_CMD_DEL_INST , uuid, false ); + + rc = FAIL_UNKNOWN_HOSTNAME ; + } + hostInv.clr_query_flag ( hostname ); + break ; + } + case MTC_EVENT_VOTE_NOTIFY: + { + string hostname = "" ; + string instance_uuid = "" ; + string notification_type = ""; + string event = ""; + string vote = ""; + string reason = ""; + + int rc1 = jsonUtil_get_key_val ( buf_ptr, "hostname", hostname ) ; + int rc2 = jsonUtil_get_key_val ( buf_ptr, "uuid", instance_uuid ) ; + int rc3 = jsonUtil_get_key_val ( buf_ptr, "notification_type", notification_type ) ; + int rc4 = jsonUtil_get_key_val ( buf_ptr, "event-type", event ) ; + int rc5 = jsonUtil_get_key_val ( buf_ptr, "vote", vote ) ; + if ( rc1 | rc2 | rc3 | rc4 | rc5 ) + { + elog ("failed to parse 'vote-notify' key values (%d:%d:%d:%d:%d)\n", rc1, rc2, rc3, rc4, rc5); + rc = FAIL_LOCATE_KEY_VALUE ; + } + else + { + // 'reason' is optional + jsonUtil_get_key_val ( buf_ptr, "reason", reason ) ; + + jlog ("%s Instance Vote/Notification Response: %s\n", instance_uuid.c_str(), buf_ptr); + + string guest_response = ""; + + if (!vote.compare("accept") || !vote.compare("complete")) + { + if (!notification_type.compare("revocable")) + { + guest_response = "allow"; + } + else if (!notification_type.compare("irrevocable")) + { + guest_response = "proceed"; + } + else + { + rc = FAIL_BAD_PARM; + break; + } + } + else if (!vote.compare("reject")) + { + guest_response = "reject"; + } + else + { + rc = FAIL_BAD_PARM; + } + guestVimApi_inst_action (hostname, instance_uuid, event, guest_response, reason); + } + break ; + } + default: + elog ("Unsupported comand (%d)\n", cmd ); + } + return (rc); +} + + +void guestHostClass::run_fsm ( string hostname ) +{ + guestHostClass::guest_host * guest_host_ptr ; + guest_host_ptr = guestHostClass::getHost ( hostname ); + if ( guest_host_ptr != NULL ) + { + /* This FSM is only run on computes */ + if (( guest_host_ptr->hosttype & COMPUTE_TYPE ) == COMPUTE_TYPE) + { + flog ("%s FSM\n", hostname.c_str() ); + } + } +} + +/* Top level call to run FSM */ +/* TODO: Deal with delete */ +int guest_fsm_run ( guestHostClass * obj_ptr ) +{ + instInfo instance ; + + /* Run Maintenance on Inventory */ + for ( obj_ptr->hostlist_iter_ptr = obj_ptr->hostlist.begin () ; + obj_ptr->hostlist_iter_ptr != obj_ptr->hostlist.end () ; + obj_ptr->hostlist_iter_ptr++ ) + { + string hostname = *obj_ptr->hostlist_iter_ptr ; + + daemon_signal_hdlr (); + + obj_ptr->run_fsm ( hostname ); + + /* Run the audit on each host */ + if ( obj_ptr->audit_timer.ring == true ) + { + // ilog ("%s FSM Audit !\n", hostname.c_str() ); + + obj_ptr->audit_run = true ; + + if ( obj_ptr->get_got_host_state ( hostname ) == false ) + { + libEvent & event = hostInv.get_host_event ( hostname ); + string uuid = obj_ptr->get_host_uuid (hostname) ; + int rc = guestVimApi_getHostState ( hostname, uuid, event ); + + if ( rc != PASS ) + { + wlog ("%s failed to get host level reporting state (rc=%d)\n", + hostname.c_str(), rc); + } + else + { + /* Only set it if true as it is defaulted to false already. + * The VIM will send an enable command at a later time */ + if ( !event.value.compare("enabled")) + { + ilog ("%s fault reporting enabled\n", hostname.c_str()); + + rc = hostInv.set_reporting_state ( hostname, true ); + if ( rc != PASS ) + { + wlog ("%s failed to set host level reporting state (rc=%d)\n", + hostname.c_str(), rc); + } + } + else + { + rc = hostInv.set_reporting_state ( hostname, false ); + } + + dlog ("%s Got host state\n", hostname.c_str() ); + obj_ptr->set_got_host_state ( hostname ); + } + } + + /* make sure that the instances for this host are loaded */ + if ( obj_ptr->get_got_instances ( hostname ) == false ) + { + libEvent & event = hostInv.get_host_event ( hostname ); + string uuid = obj_ptr->get_host_uuid (hostname) ; + int rc = guestVimApi_getHostInst ( hostname, uuid, event ); + if ( rc != PASS ) + { + wlog ("%s failed to get host instances (rc=%d)\n", hostname.c_str(), rc); + } + else + { + obj_ptr->set_got_instances ( hostname ); + dlog ("%s instances loaded\n", hostname.c_str() ); + } + } + + /* only query the guestServer if reporting for that server + * is 'enabled' and instance list is not empty */ + if (( obj_ptr->num_instances ( hostname ) != 0 ) && + ( obj_ptr->get_reporting_state ( hostname ) == true )) + { + if ( obj_ptr->get_query_flag ( hostname ) == true ) + { + obj_ptr->inc_query_misses ( hostname); + dlog ("%s guestServer Query Misses:%d\n", hostname.c_str(), + obj_ptr->get_query_misses ( hostname )); + } + else + { + obj_ptr->clr_query_misses ( hostname ); + } + + /* Note: The 3rd and 4th parms are not needed + * for the MTC_CMD_QRY_INST command */ + send_cmd_to_guestServer ( hostname, MTC_CMD_QRY_INST, "", false ); + } + } + if ( obj_ptr->exit_fsm == true ) + { + obj_ptr->exit_fsm = false ; + break ; + } + } + if (( obj_ptr->audit_timer.ring == true ) && ( obj_ptr->audit_run == true )) + { + // dlog ("Audit Restarted\n"); + obj_ptr->audit_run = false ; + obj_ptr->audit_timer.ring = false ; + mtcTimer_start ( obj_ptr->audit_timer , _timer_handler, 10 ); + } + + return ( PASS ); +} + +/*****************************************************************************/ +/*****************************************************************************/ +/*****************************************************************************/ + +void daemon_service_run ( void ) +{ + int rc = PASS ; + int count = 0 ; + int flush_thld = 0 ; + + mtcTimer_start ( hostInv.audit_timer , _timer_handler, 2 ); + + guestHttpUtil_init (); + + /* socket descriptor list */ + std::list socks ; + + /* Not monitoring address changes RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR */ + if (( _ctrl.sock.ioctl_sock = open_ioctl_socket ( )) <= 0 ) + { + elog ("Failed to create ioctl socket"); + daemon_exit (); + } + + socks.clear(); + socks.push_front (_ctrl.sock.agent_rx_local_sock->getFD()); + socks.push_front (_ctrl.sock.agent_rx_float_sock->getFD()); + socks.push_front (_ctrl.sock.mtc_cmd_sock->getFD()); + dlog ("Selects: %d %d %d\n", _ctrl.sock.agent_rx_local_sock->getFD(), + _ctrl.sock.agent_rx_float_sock->getFD(), + _ctrl.sock.mtc_cmd_sock->getFD()); + socks.sort(); + + ilog ("Sending ready event to maintenance\n"); + do + { + /* Wait for maintenance */ + rc = send_event_to_mtcAgent ( hostInv.hostBase.my_hostname, + MTC_EVENT_MONITOR_READY ) ; + if ( rc == RETRY ) + { + mtcWait_secs ( 3 ); + } + } while ( rc == RETRY ) ; + + if ( rc == FAIL ) + { + elog ("Unrecoverable heartbeat startup error (rc=%d)\n", rc ); + daemon_exit (); + } + + /* enable the base level signal handler latency monitor */ + daemon_latency_monitor (true); + + ilog ("------------------------------------------------------------\n"); + + for ( ;; ) + { + /* Service Sockets */ + hostInv.waitd.tv_sec = 0; + hostInv.waitd.tv_usec = GUEST_SOCKET_TO ; + + /* Initialize the master fd_set */ + FD_ZERO(&hostInv.message_readfds); + + FD_SET(_ctrl.sock.agent_rx_local_sock->getFD(), &hostInv.message_readfds); + FD_SET(_ctrl.sock.agent_rx_float_sock->getFD(), &hostInv.message_readfds); + FD_SET(_ctrl.sock.mtc_cmd_sock->getFD(), &hostInv.message_readfds); + + /* Call select() and wait only up to SOCKET_WAIT */ + rc = select( socks.back()+1, &hostInv.message_readfds, NULL, NULL, &hostInv.waitd); + if (( rc < 0 ) || ( rc == 0 ) || ( rc > (int)socks.size())) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if (( rc < 0 ) && ( errno != EINTR )) + { + wlog_throttled ( count, 20, "socket select failed (%d:%m)\n", errno); + } + else if ( rc > (int)socks.size()) + { + wlog_throttled ( count, 100, "Select return exceeds current file descriptors (%ld:%d)\n", + socks.size(), rc ); + } + else + { + count = 0 ; + } + } + else + { + mtc_message_type msg ; + memset ((void*)&msg,0,sizeof(mtc_message_type)); + + /* Service guestServer messages towards the local IP */ + if (FD_ISSET(_ctrl.sock.agent_rx_local_sock->getFD(), &hostInv.message_readfds) ) + { + int bytes = _ctrl.sock.agent_rx_local_sock->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); + + mlog1 ("Received %d bytes from %s:%d:guestServer (local)\n", bytes, + _ctrl.sock.agent_rx_local_sock->get_src_str(), + _ctrl.sock.agent_rx_local_sock->get_dst_addr()->getPort()); + + recv_from_guestServer ( msg.cmd, msg.buf ); + print_mtc_message ( &msg ); + } + + /* Service guestServer messages towards the floating IP */ + else if (FD_ISSET(_ctrl.sock.agent_rx_float_sock->getFD(), &hostInv.message_readfds) ) + { + int bytes = _ctrl.sock.agent_rx_float_sock->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); + + mlog1 ("Received %d bytes from %s:%d:guestServer (float)\n", bytes, + _ctrl.sock.agent_rx_float_sock->get_src_str(), + _ctrl.sock.agent_rx_port); + + recv_from_guestServer ( msg.cmd, msg.buf ); + print_mtc_message ( &msg ); + } + + /* Service mtcAgent commands */ + else if (FD_ISSET(_ctrl.sock.mtc_cmd_sock->getFD(), &hostInv.message_readfds) ) + { + int bytes = _ctrl.sock.mtc_cmd_sock->read((char*)&msg.hdr[0],sizeof(mtc_message_type)); + + mlog1 ("Received %d bytes from %s:%d:mtcAgent\n", bytes, + _ctrl.sock.mtc_cmd_sock->get_src_str(), + _ctrl.sock.mtc_cmd_port); + print_mtc_message ( &msg ); + if ( !strncmp ( get_cmd_req_msg_header(), &msg.hdr[0], MSG_HEADER_SIZE )) + { + service_mtcAgent_command ( msg.cmd , &msg.buf[0] ); + count = 0 ; + } + else + { + wlog_throttled ( count, 100, "Invalid message header\n"); + } + } + else + { + ilog ("Unknown select\n"); + } + } + + guestHttpSvr_look (); + + guest_fsm_run ( &hostInv ) ; + + daemon_signal_hdlr (); + + /* Support the log flush config option */ + if ( guest_config.flush ) + { + if ( ++flush_thld > guest_config.flush_thld ) + { + flush_thld = 0 ; + fflush (stdout); + fflush (stderr); + } + } + } + daemon_exit (); +} + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + daemon_dump_membuf_banner (); + + hostInv.print_node_info (); + hostInv.memDumpAllState (); + + daemon_dump_membuf(); +} + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + int rc = PASS; + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestBase.h b/mtce-common/cgts-mtce-common-1.0/guest/guestBase.h new file mode 100644 index 00000000..2381ccdb --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestBase.h @@ -0,0 +1,319 @@ +#ifndef __INCLUDE_GUESTBASE_H__ +#define __INCLUDE_GUESTBASE_H__ + +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Guest Services "Base" Header + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace std; + +#include "msgClass.h" +#include "nodeBase.h" +#include "httpUtil.h" +#include "nodeTimers.h" + +#define WANT_NEW + +/** + * @addtogroup guest_services_base + * @{ + */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "gst" + +#define CONFIG_CLIENT_RX_PORT (0x00000001) +#define CONFIG_MTC_EVENT_PORT (0x00000002) +#define CONFIG_MTC_CMD_PORT (0x00000004) +#define CONFIG_AGENT_RX_PORT (0x00000008) +#define CONFIG_VIM_CMD_RX_PORT (0x00000010) +#define CONFIG_VIM_EVENT_RX_PORT (0x00000020) + +#define HB_DEFAULT_FIRST_MS 2000 +#define HB_DEFAULT_INTERVAL_MS 1000 +#define HB_DEFAULT_REBOOT_MS 10000 +#define HB_DEFAULT_VOTE_MS 10000 +#define HB_DEFAULT_SHUTDOWN_MS 10000 +#define HB_DEFAULT_SUSPEND_MS 10000 +#define HB_DEFAULT_RESUME_MS 10000 +#define HB_DEFAULT_RESTART_MS 120000 + +/* Directory where libvirt creates the serial I/O pipe channel sockets into the guest + * We monitor this directory with inotify for file changes */ +#define QEMU_CHANNEL_DIR ((const char *)"/var/lib/libvirt/qemu") + +#define ARRAY_SIZE(x) ((int)(sizeof(x)/sizeof(*x))) + +#define MAX_INSTANCES (100) +#define MAX_MESSAGES (10) + +/* The socket select timeout */ +#define GUEST_SOCKET_TO (10000) + +#define DEFAULT_CONNECT_WAIT (1) + +#define CONNECT_TIMOUT (60) +#define WAIT_FOR_INIT_TIMEOUT (60) +#define HEARTBEAT_START_TIMEOUT (120) +#define SEARCH_AUDIT_TIME (180) + +void guestTimer_handler ( int sig, siginfo_t *si, void *uc); + +const char * get_guest_msg_hdr (void) ; + +typedef struct +{ + char buffer [256]; +} gst_message_type ; + +typedef enum +{ + hbs_invalid, + hbs_server_waiting_init, + hbs_server_waiting_challenge, + hbs_server_waiting_response, + hbs_server_paused, // heartbeat paused at request of vm + hbs_server_nova_paused, // heartbeat paused at request of nova + hbs_server_migrating, // heartbeat paused while migrate in progress + hbs_server_corrective_action, + hbs_client_waiting_init_ack, + hbs_client_waiting_challenge, + hbs_client_waiting_pause_ack, + hbs_client_waiting_resume_ack, + hbs_client_paused, + hbs_client_waiting_shutdown_ack, + hbs_client_waiting_shutdown_response, + hbs_client_shutdown_response_recieved, + hbs_client_exiting, + hbs_state_max +} hb_state_t; + +/** Guest service control messaging socket control structure */ +typedef struct +{ + /** Guest Services Messaging Agent Receive (from guestServer) Socket + * + * Note: This socket supports receiving from the computes specifying + * either the floating or local IP */ + int agent_rx_port ; + msgClassSock* agent_rx_float_sock ; + msgClassSock* agent_rx_local_sock ; + + /** Guest Services Messaging Agent Transmit (to guestServer) Socket + * + * Note: This transmit socket can be used for any port + * specified at send time */ + msgClassSock* agent_tx_sock ; + + + /** Guest Services Messaging Socket mtcAgent commands are received on */ + msgClassSock* mtc_cmd_sock ; + int mtc_cmd_port ; + + /** Guest Services Messaging Server Receive (from guestAgent) Socket */ + msgClassSock* server_rx_sock ; + int server_rx_port ; + + /** Guest Services Messaging Server Transmit (to guestAgent) Socket */ + msgClassSock* server_tx_sock ; + struct sockaddr_in server_tx_addr ; + + /** Socket used to transmit READY status and Events to Maintenance */ + int mtc_event_tx_port ; + msgClassSock* mtc_event_tx_sock ; + + int netlink_sock ; /* netlink socket */ + int ioctl_sock ; /* general ioctl socket */ + + msgSock_type mtclogd ; +} guest_services_socket_type ; + +/** + * The HTTP server supports two URL levels ; + * a hosts level and instances level. + **/ +typedef enum +{ + SERVICE_LEVEL_NONE, + SERVICE_LEVEL_HOST, + SERVICE_LEVEL_INST, +} service_level_enum ; + +/** common service_type control info */ +typedef struct +{ + bool provisioned ; /* set true once the VIM issues create */ + string state ; /* enabled, configured or disabled */ + bool reporting ; /* failue reporting state */ + + int failures ; /* Running count of failures */ + bool failed ; /* true means heartbeating has failed */ + bool waiting ; /* Waiting on a response */ + int b2b_misses ; /* running back-to-back misses */ +} service_type ; + +/** A grouping of info extracted from command's url */ +typedef struct +{ + service_level_enum service_level ; + string uuid ; + string command ; + string temp ; +} url_info_type ; + +/** instance control structure */ +typedef struct +{ + string hostname ; /**< The host that this instance is on */ + + /* Instance identifiers */ + string name ; /**< the Instance Name as it appears in the GUI */ + string uuid ; /**< the instance uuid which is unique to the system */ + string chan ; /**< virtio channel name 'cgcs.heartbeat..sock' */ + string inst ; /**< the instance part of the channel name */ + + /* Set to true when this channel has been provisioned by the guestAgent */ + // bool provisioned ; + + /* + * Full path and name to the detected channel. + * Used to set inotify file watch. + */ + string fd_namespace ; + + #define CHAN_FLAGS (SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC ) + int chan_fd ; + bool chan_ok ; + + bool connecting ; + bool connected ; /* true = the channel is connected to the guest */ + bool heartbeating ; /* true = the heartbeating has started */ + + string name_log_prefix ; + string uuid_log_prefix ; + + int connect_wait_in_secs ; + + /* added service bools */ + service_type heartbeat ; + service_type reserved ; + + /* + * File and watch descriptors used to monitor + * specific files in QEMU_CHANNEL_DIR + */ + int inotify_file_fd ; + int inotify_file_wd ; + + /* Message header info */ + int version; + int revision; + string msg_type; + uint32_t sequence; + + hb_state_t hbState ; /* see heartbeat_types.h */ + hb_state_t vnState ; /* see heartbeat_types.h */ + + uint32_t invocation_id ; + + // For voting and notification + string event_type; // GuestHeartbeatMsgEventT + string notification_type; // GuestHeartbeatMsgNotifyT + + uint32_t heartbeat_challenge ; + uint32_t heartbeat_interval_ms ; + + uint32_t vote_secs; + uint32_t shutdown_notice_secs; + uint32_t suspend_notice_secs; + uint32_t resume_notice_secs; + uint32_t restart_secs; + string corrective_action; + + string unhealthy_corrective_action; + bool unhealthy_failure ; + + /* String versions of the above timeouts - integer portions only */ + string vote_to_str ; /* vote timeout in seconds as a string value */ + string shutdown_to_str ; /* shutdown timeout in seconds as a string value */ + string suspend_to_str ; /* suspend timeout in seconds as a string value */ + string resume_to_str ; /* resume timeout in seconds as a string value */ + string restart_to_str ; /* restart timeout in seconds as a string value */ + + int select_count ; + int message_count ; + int health_count ; + int failure_count ; + int connect_count ; + int connect_retry_count ; + int corrective_action_count ; + + libEvent vimEvent ; + +} instInfo ; + +/* daemon control structure - used for both guestAgent and guestServer */ +typedef struct +{ + bool init ; + char hostname [MAX_HOST_NAME_SIZE+1]; + string address ; + string address_peer ; /* used for server only */ + int nodetype ; /* used for server only */ + + guest_services_socket_type sock ; + struct mtc_timer timer ; + + /* List of instances provisioned on this host */ + list instance_list ; /* used for server only */ + list::iterator instance_list_ptr; /* used for server only */ + + /* file and watch descriptors used to monitor QEMU_CHANNEL_DIR */ + int inotify_dir_fd ; + int inotify_dir_wd ; + + + +} ctrl_type ; + +ctrl_type * get_ctrl_ptr ( void ); + + + +int send_cmd_to_guestServer ( string hostname, unsigned int cmd, string uuid, bool reporting, string event="unknown" ); + +/** + * @} guest_services_base + */ + +#endif /* __INCLUDE_GUESTBASE_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestClass.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestClass.cpp new file mode 100644 index 00000000..69cf64f7 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestClass.cpp @@ -0,0 +1,1335 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Guest Services "Base" Header + */ + +#include "nodeBase.h" +#include "nodeTimers.h" +#include "guestClass.h" +#include "nodeUtil.h" + + +const char guest_msg_hdr [MSG_HEADER_SIZE] = {"guest msg header:"}; +const char * get_guest_msg_hdr (void) { return guest_msg_hdr ; } + +/* used as a default return in procedures that return a reference to libEvent */ +libEvent nullEvent ; + +/**< constructor */ +guestHostClass::guestHostClass() +{ + guest_head = guest_tail = NULL; + memory_allocs = 0 ; + memory_used = 0 ; + hosts = 0 ; + + for ( int i = 0 ; i < MAX_HOSTS ; i++ ) + { + host_ptrs[i] = NULL ; + } + /* Query Host state from the VIM bools */ + audit_run = false ; + + exit_fsm = false ; + +// httpUtil_event_init ( &nullEvent, "null", "null" , "0.0.0.0", 0 ); + nullEvent.request = SERVICE_NONE ; + return ; +} + +/**< destructor */ +guestHostClass::~guestHostClass() +{ + guest_host * ptr = guest_head ; + guest_host * temp_ptr = ptr ; + while ( ptr != NULL ) + { + temp_ptr = ptr ; + ptr = ptr->next ; + delHost (temp_ptr); + } + if ( memory_used != 0 ) + { + elog ( "Apparent Memory Leak - Allocs:%d and Bytes:%d\n", + memory_allocs, memory_used ); + } + else + { + dlog ( "No Memory Leaks\n\n"); + } + return ; +} + +/* + * Allocate new host and tack it on the end of the host_list + */ +struct guestHostClass::guest_host* guestHostClass::addHost( string hostname ) +{ + /* verify host is not already provisioned */ + struct guest_host * ptr = guestHostClass::getHost ( hostname ); + if ( ptr ) + { + if ( guestHostClass::remHost ( hostname ) ) + { + /* Should never get here but if we do then */ + /* something is seriously wrong */ + elog ("Error: Unable to remove host during reprovision\n"); + return static_cast(NULL); + } + } + + /* allocate memory for new host */ + ptr = guestHostClass::newHost (); + if( ptr == NULL ) + { + elog ( "Error: Failed to allocate memory for new host\n" ); + return static_cast(NULL); + } + + /* Init the new host */ + ptr->hostname = hostname ; + ptr->reporting = false ; + ptr->query_flag = false ; + + ptr->got_host_state = false; + ptr->got_instances = false; + + ptr->stage = STAGE__START ; + ptr->instance_list.clear(); + + /* Init host's general mtc timer */ + mtcTimer_init ( ptr->host_audit_timer ); + + /* Assign the timer the host's name */ + ptr->host_audit_timer.hostname = hostname ; + + /* If the host list is empty add it to the head */ + if( guest_head == NULL ) + { + guest_head = ptr ; + guest_tail = ptr ; + ptr->prev = NULL ; + ptr->next = NULL ; + } + else + { + /* link the new_host to the tail of the host_list + * then mark the next field as the end of the host_list + * adjust tail to point to the last host + */ + guest_tail->next = ptr ; + ptr->prev = guest_tail ; + ptr->next = NULL ; + guest_tail = ptr ; + } + + hosts++ ; + dlog2 ("Added guestHostClass host instance %d\n", hosts); + return ptr ; +} + +/* Remove a hist from the linked list of hosts - may require splice action */ +int guestHostClass::remHost( string hostname ) +{ + if ( hostname.c_str() == NULL ) + return -ENODEV ; + + if ( guest_head == NULL ) + return -ENXIO ; + + struct guest_host * ptr = guestHostClass::getHost ( hostname ); + + if ( ptr == NULL ) + return -EFAULT ; + + /* Free the mtc timer if in use */ + if ( ptr->host_audit_timer.tid ) + { + tlog ("%s Stopping host timer\n", hostname.c_str()); + mtcTimer_stop ( ptr->host_audit_timer ); + ptr->host_audit_timer.ring = false ; + ptr->host_audit_timer.tid = NULL ; + } + + /* If the host is the head host */ + if ( ptr == guest_head ) + { + /* only one host in the list case */ + if ( guest_head == guest_tail ) + { + dlog ("Single Host -> Head Case\n"); + guest_head = NULL ; + guest_tail = NULL ; + } + else + { + dlog ("Multiple Hosts -> Head Case\n"); + guest_head = guest_head->next ; + guest_head->prev = NULL ; + } + } + /* if not head but tail then there must be more than one + * host in the list so go ahead and chop the tail. + */ + else if ( ptr == guest_tail ) + { + dlog ("Multiple Host -> Tail Case\n"); + guest_tail = guest_tail->prev ; + guest_tail->next = NULL ; + } + else + { + dlog ("Multiple Host -> Full Splice Out\n"); + ptr->prev->next = ptr->next ; + ptr->next->prev = ptr->prev ; + } + guestHostClass::delHost ( ptr ); + hosts-- ; + return (PASS) ; +} + + +struct guestHostClass::guest_host* guestHostClass::getHost ( string hostname_or_uuid ) +{ + for ( struct guest_host * ptr = guest_head ; guest_head ; ptr = ptr->next ) + { + if ( !hostname_or_uuid.compare ( ptr->hostname )) + { + return ptr ; + } + else if ( !hostname_or_uuid.compare ( ptr->uuid )) + { + return ptr ; + } + + if (( ptr->next == NULL ) || ( ptr == guest_tail )) + break ; + } + return static_cast(NULL); +} + +/* + * Allocates memory for a new host and stores its the address in host_ptrs + * + * @param void + * @return pointer to the newly allocted host memory + */ +struct guestHostClass::guest_host * guestHostClass::newHost ( void ) +{ + struct guestHostClass::guest_host * temp_host_ptr = NULL ; + + if ( memory_allocs == 0 ) + { + memset ( host_ptrs, 0 , sizeof(struct guest_host *)*MAX_HOSTS); + } + + // find an empty spot + for ( int i = 0 ; i < MAX_HOSTS ; i++ ) + { + if ( host_ptrs[i] == NULL ) + { + host_ptrs[i] = temp_host_ptr = new guest_host ; + memory_allocs++ ; + memory_used += sizeof (struct guestHostClass::guest_host); + + return temp_host_ptr ; + } + } + elog ( "Failed to save new host pointer address\n" ); + return temp_host_ptr ; +} + +/* Frees the memory of a pre-allocated host and removes + * it from the host_ptrs list + * @param host * pointer to the host memory address to be freed + * @return int return code { PASS or -EINVAL } + */ +int guestHostClass::delHost ( struct guestHostClass::guest_host * host_ptr ) +{ + if ( guestHostClass::memory_allocs > 0 ) + { + for ( int i = 0 ; i < MAX_NODES ; i++ ) + { + if ( guestHostClass::host_ptrs[i] == host_ptr ) + { + delete host_ptr ; + guestHostClass::host_ptrs[i] = NULL ; + guestHostClass::memory_allocs-- ; + guestHostClass::memory_used -= sizeof (struct guestHostClass::guest_host); + return PASS ; + } + } + elog ( "Error: Unable to validate memory address being freed\n" ); + } + else + elog ( "Error: Free memory called when there is no memory to free\n" ); + + return -EINVAL ; +} + + +int guestHostClass::mod_host ( string uuid, string address, string hostname, string hosttype ) +{ + struct guestHostClass::guest_host * host_ptr = static_cast(NULL); + + if (hostname.empty()) + { + wlog ("Refusing to modify host with 'null' or 'invalid' hostname (uuid:%s)\n", + uuid.c_str()); + return (FAIL_INVALID_HOSTNAME) ; + } + + host_ptr = guestHostClass::getHost(hostname); + if ( !host_ptr ) + { + ilog ("%s not already provisioned\n", host_ptr->hostname.c_str()); + + /* Send back a retry in case the add needs to be converted to a modify */ + return (FAIL_INVALID_OPERATION); + } + host_ptr->uuid = uuid ; + host_ptr->ip = address ; + host_ptr->hosttype = get_host_function_mask (hosttype) ; + + ilog ("%s modify %s %s %s\n", + hostname.c_str(), + host_ptr->uuid.c_str(), + host_ptr->ip.c_str(), + hosttype.c_str()); + + return (PASS); +} + +int guestHostClass::add_host ( string uuid, + string address, + string hostname, + string hosttype) +{ + int rc = FAIL ; + struct guestHostClass::guest_host * host_ptr = static_cast(NULL); + + host_ptr = guestHostClass::getHost(hostname); + if ( host_ptr ) + { + ilog ("%s Already provisioned\n", host_ptr->hostname.c_str()); + + /* Send back a retry in case the add needs to be converted to a modify */ + return (RETRY); + } + /* Otherwise add it as a new host */ + else + { + host_ptr = guestHostClass::addHost(hostname); + if ( host_ptr ) + { + host_ptr->uuid = uuid ; + host_ptr->ip = address ; + host_ptr->hosttype = get_host_function_mask(hosttype); + + mtcTimer_init ( host_ptr->host_audit_timer, hostname ); + + host_ptr->stage = STAGE__START ; + + /* Add to the end of inventory */ + hostlist.push_back ( host_ptr->hostname ); + + rc = PASS ; + ilog ("%s added\n", hostname.c_str()); + } + else + { + elog ("%s add failed\n", hostname.c_str()); + rc = FAIL_NULL_POINTER ; + } + } + return (rc); +} + +/***************************************************************************** + * + * Name : rem_host + * + * Purpose : Remove this host from daemon all together + * + *****************************************************************************/ +int guestHostClass::rem_host ( string hostname ) +{ + int rc = FAIL ; + if ( ! hostname.empty() ) + { + /* remove the service specific component */ + hostlist.remove ( hostname ); + + exit_fsm = true ; + + /* free memory */ + rc = guestHostClass::remHost ( hostname ); + } + return ( rc ); +} + +/***************************************************************************** + * + * Name : del_host_inst + * + * Purpose : Delete all instances for this host + * + *****************************************************************************/ +int guestHostClass::del_host_inst ( string host_uuid ) +{ + int rc = FAIL_DEL_UNKNOWN ; + guestHostClass::guest_host * guest_host_ptr = guestHostClass::getHost( host_uuid ); + if ( guest_host_ptr ) + { + if ( guest_host_ptr->instance_list.size() != 0 ) + { + for ( guest_host_ptr->instance_list_ptr = guest_host_ptr->instance_list.begin(); + guest_host_ptr->instance_list_ptr != guest_host_ptr->instance_list.end(); + guest_host_ptr->instance_list_ptr++ ) + { + send_cmd_to_guestServer ( guest_host_ptr->hostname, + MTC_CMD_DEL_INST, + guest_host_ptr->instance_list_ptr->uuid, + guest_host_ptr->instance_list_ptr->heartbeat.reporting ); + } + + /* If the instance list is empty then clear the query flag */ + if ( guest_host_ptr->instance_list.empty () ) + { + clr_query_flag ( guest_host_ptr->hostname ); + } + } + } + else + { + wlog ("Unknown host uuid: %s\n", host_uuid.c_str()); + } + return (rc); +} + +/***************************************************************************** + * + * Name : del_inst + * + * Purpose : Add an instance to Delete all instances and then the host + * + *****************************************************************************/ +int guestHostClass::del_host ( string uuid ) +{ + int rc = FAIL_DEL_UNKNOWN ; + guestHostClass::guest_host * guest_host_ptr = guestHostClass::getHost( uuid ); + if ( guest_host_ptr ) + { + if ( guest_host_ptr->instance_list.size() != 0 ) + { + for ( guest_host_ptr->instance_list_ptr = guest_host_ptr->instance_list.begin(); + guest_host_ptr->instance_list_ptr != guest_host_ptr->instance_list.end(); + guest_host_ptr->instance_list_ptr++ ) + { + send_cmd_to_guestServer ( guest_host_ptr->hostname, + MTC_CMD_DEL_INST, + guest_host_ptr->instance_list_ptr->uuid, + guest_host_ptr->instance_list_ptr->heartbeat.reporting ); + } + } + /* save the hostname so that the logs below refer to something valid */ + string hostname = guest_host_ptr->hostname ; + rc = rem_host ( hostname ); + if ( rc == PASS ) + { + ilog ("%s deleted\n", hostname.c_str()); + print_node_info(); + } + else + { + elog ("%s delete host failed (rc:%d)\n", hostname.c_str(), rc ); + } + } + else + { + wlog ("Unknown uuid: %s\n", uuid.c_str()); + } + return (rc); +} + +/***************************************************************************** + * + * Name : add_inst + * + * Purpose : Add an instance to the guestAgent guestHostClass database + * + * Assumptions: This acts as a modify as well. See description below. + * + * Description: Search through all guestAgent guestHostClass database Looking + * for the specified instance uuid. + * + * If found against the specified hostname then just ensure the + * channel info is correct or updated, the services are disabled + * and the counters are reset. + * + * If found against a different host the do the same + * initialization but add a unique log staing this condition. + * + * If not found then just go ahead and add it to the instance + * list for the specified host with the same default initialization. + * + * Send a add command to the guestServer so that it has the + * opportunity to try to open the channel to the guest and + * start heartbeating. + * + *****************************************************************************/ + +int guestHostClass::add_inst ( string hostname, instInfo & instance ) +{ + int rc = FAIL_NOT_FOUND ; + + if ( instance.uuid.empty() ) + { + elog ("%s refusing to add null instance to host\n", hostname.c_str()); + return (FAIL_INVALID_UUID); + } + + /** + * Loop over all hosts looking for this instance. If it exhists for a different host + * then remove it from that host and add it to the specified host. + * This is done because the add is also acting as a modify operation + **/ + for ( struct guest_host * ptr = guest_head ; guest_head ; ptr = ptr->next ) + { + if ( ptr->instance_list.size() != 0 ) + { + for ( ptr->instance_list_ptr = ptr->instance_list.begin(); + ptr->instance_list_ptr != ptr->instance_list.end(); + ptr->instance_list_ptr++ ) + { + if ( !ptr->instance_list_ptr->uuid.compare(instance.uuid)) + { + /* Verify that this instance is for the specified host. + * If not then delete this instance from this host and + * allow it to be added below for the specified host */ + if ( ptr->hostname.compare(hostname) ) + { + /* not this host so delete it from the list */ + ilog ("%s %s move to %s\n", ptr->hostname.c_str(), + ptr->instance_list_ptr->uuid.c_str(), + hostname.c_str() ); + + /* remove the instance from this host's guestServer */ + send_cmd_to_guestServer ( ptr->hostname, + MTC_CMD_DEL_INST, + instance.uuid, + instance.heartbeat.reporting ); + + /* remove it from this hosts list */ + ptr->instance_list.erase(ptr->instance_list_ptr); + } + else + { + rc = PASS ; + + /* TODO: OBSOLETE check + * Update the instance if it is different from what was passed in */ + if ( ptr->instance_list_ptr->uuid.compare(instance.uuid) ) + { + ptr->instance_list_ptr->hostname = hostname ; + ptr->instance_list_ptr->uuid = instance.uuid ; + ptr->instance_list_ptr->heartbeat.reporting = instance.heartbeat.reporting ; + ptr->instance_list_ptr->heartbeat.provisioned = instance.heartbeat.provisioned; + ptr->instance_list_ptr->heartbeat.failures = 0 ; + ptr->instance_list_ptr->heartbeat.b2b_misses = 0 ; + + ptr->instance_list_ptr->restart_to_str = "0" ; + ptr->instance_list_ptr->resume_to_str = "0" ; + ptr->instance_list_ptr->suspend_to_str = "0" ; + ptr->instance_list_ptr->shutdown_to_str = "0" ; + ptr->instance_list_ptr->vote_to_str = "0" ; + + ilog ("%s %s updated info\n", hostname.c_str(), instance.uuid.c_str()); + + /* Setup the new channel */ + send_cmd_to_guestServer ( hostname, + MTC_CMD_ADD_INST, + instance.uuid, + instance.heartbeat.reporting ); + } + else + { + ilog ("%s %s info unchanged\n", + hostname.c_str(), instance.uuid.c_str()); + } + } + break ; + } + } + } + if (( ptr->next == NULL ) || ( ptr == guest_tail )) + break ; + } + + /* If the instance is not found then we need to add it to the specified host */ + if ( rc == FAIL_NOT_FOUND ) + { + struct guestHostClass::guest_host * host_ptr = static_cast(NULL); + host_ptr = guestHostClass::getHost(hostname); + if ( host_ptr ) + { + instance.hostname = hostname ; + + instance.restart_to_str = "0" ; + instance.resume_to_str = "0" ; + instance.suspend_to_str = "0" ; + instance.shutdown_to_str = "0" ; + instance.vote_to_str = "0" ; + + instance.heartbeat.provisioned = true ; + instance.heartbeat.failures = 0 ; + instance.heartbeat.b2b_misses = 0 ; + host_ptr->instance_list.push_back (instance); + + ilog ("%s %s add - Prov: %s Notify: %s\n", + hostname.c_str(), + instance.uuid.c_str(), + instance.heartbeat.provisioned ? "YES" : "no ", + instance.heartbeat.reporting ? "YES" : "no " ); + + send_cmd_to_guestServer ( hostname, MTC_CMD_ADD_INST, instance.uuid, instance.heartbeat.reporting ); + rc = PASS ; + } + else + { + elog ("%s hostname is unknown (%s)\n", hostname.c_str(), instance.uuid.c_str() ); + rc = FAIL_INVALID_HOSTNAME ; + } + } + return (rc); +} + +/***************************************************************************** + * + * Name : mod_inst + * + * Purpose : Modify an instance's services' state(s) + * + *****************************************************************************/ + +int guestHostClass::mod_inst ( string hostname, instInfo & instance ) +{ + int rc = FAIL_NOT_FOUND ; + + if ( instance.uuid.empty() ) + { + elog ("%s empty instance uuid\n", hostname.c_str()); + return (FAIL_INVALID_UUID); + } + + /** + * First search for this instance. + * If it is found against a different instance then we need to delete the + * instance from that host and add it to he new host. + **/ + for ( struct guest_host * ptr = guest_head ; guest_head ; ptr = ptr->next ) + { + if ( ptr->instance_list.size() != 0 ) + { + for ( ptr->instance_list_ptr = ptr->instance_list.begin(); + ptr->instance_list_ptr != ptr->instance_list.end(); + ptr->instance_list_ptr++ ) + { + if ( !ptr->instance_list_ptr->uuid.compare(instance.uuid)) + { + /* Verify that this instance is for the specified host. + * If not then delete this instance from this host and + * allow it to be added below for the specified host */ + if ( !ptr->hostname.compare(hostname) ) + { + /* This instance is provisioned for this host */ + + /* Manage its state */ + if ( ptr->instance_list_ptr->heartbeat.reporting != instance.heartbeat.reporting ) + { + ptr->instance_list_ptr->heartbeat.reporting = + instance.heartbeat.reporting ; + + ilog ("%s %s instance reporting state changed to %s\n", + ptr->hostname.c_str(), ptr->instance_list_ptr->uuid.c_str(), + ptr->instance_list_ptr->heartbeat.reporting ? "Enabled" : "Disabled"); + } + else + { + ilog ("%s %s instance reporting state already %s\n", + ptr->hostname.c_str(), ptr->instance_list_ptr->uuid.c_str(), + ptr->instance_list_ptr->heartbeat.reporting ? "Enabled" : "Disabled"); + } + send_cmd_to_guestServer ( ptr->hostname, + MTC_CMD_MOD_INST, + ptr->instance_list_ptr->uuid, + ptr->instance_list_ptr->heartbeat.reporting ); + return (PASS) ; + } + else + { + ilog ("%s %s move to %s while %s\n", + ptr->hostname.c_str(), + ptr->instance_list_ptr->uuid.c_str(), + hostname.c_str(), + ptr->instance_list_ptr->heartbeat.reporting ? "enabled" : "disabled"); + /** + * The instance must have moved to another host. + * Delete it here and then explicitely add + * it below by keeping rc = FAIL_NOT_FOUND + **/ + send_cmd_to_guestServer ( ptr->hostname, + MTC_CMD_DEL_INST, + instance.uuid, + instance.heartbeat.reporting ); + + ptr->instance_list.erase(ptr->instance_list_ptr); + + /* Go through other hosts just to make it easy to exit + * - acts as a safety net */ + break ; + } + } + } + } + if (( ptr->next == NULL ) || ( ptr == guest_tail )) + break ; + } + + /* If the instance is not found then we need to add it to the specified host */ + if ( rc == FAIL_NOT_FOUND ) + { + struct guestHostClass::guest_host * ptr = guestHostClass::getHost(hostname); + if ( ptr ) + { + instance.hostname = hostname ; + + instance.heartbeat.provisioned = true ; + + /* Don't change the reportinfg state */ + instance.heartbeat.reporting = instance.heartbeat.reporting ; + + instance.heartbeat.failures = 0 ; + instance.heartbeat.b2b_misses = 0 ; + + instance.restart_to_str = "0" ; + instance.resume_to_str = "0" ; + instance.suspend_to_str = "0" ; + instance.shutdown_to_str = "0" ; + instance.vote_to_str = "0" ; + + /* The mod might be straight to enabled state */ + ilog ("%s %s instance reporting state is %s\n", + ptr->hostname.c_str(), + instance.uuid.c_str(), + instance.heartbeat.reporting ? "Enabled" : "Disabled"); + + ptr->instance_list.push_back (instance); + send_cmd_to_guestServer ( hostname, + MTC_CMD_ADD_INST, + instance.uuid, + instance.heartbeat.reporting ); + rc = PASS ; + } + else + { + rc = FAIL_INVALID_HOSTNAME ; + } + } + return (rc); +} + + +/***************************************************************************** + * + * Name : del_inst + * + * Purpose : Delete an instance from the guestAgent guestHostClass database + * + * Description: Search all the hosts for this instance and remove it + * from its instance tracking list. + * + * Also send a delete command to the guestServer so that it + * has the opportunity to do any cleanup actions. + * + *****************************************************************************/ +int guestHostClass::del_inst ( string instance_uuid ) +{ + int rc = FAIL_NOT_FOUND ; + + if ( instance_uuid.empty() ) + { + elog ("supplied instance uuid was null\n"); + return (FAIL_INVALID_UUID); + } + + /** Loop over all hosts looking for this instance. */ + for ( struct guest_host * ptr = guest_head ; guest_head ; ptr = ptr->next ) + { + if ( ptr->instance_list.size() != 0 ) + { + for ( ptr->instance_list_ptr = ptr->instance_list.begin(); + ptr->instance_list_ptr != ptr->instance_list.end(); + ptr->instance_list_ptr++ ) + { + if ( !ptr->instance_list_ptr->uuid.compare(instance_uuid)) + { + ilog ("%s removed instance %s\n", + ptr->hostname.c_str(), + instance_uuid.c_str()); + + send_cmd_to_guestServer ( ptr->hostname, + MTC_CMD_DEL_INST, + instance_uuid, + ptr->instance_list_ptr->heartbeat.reporting ); + + ptr->instance_list.erase(ptr->instance_list_ptr); + + return (PASS) ; + } + else + { + jlog ("%s %s:%s (search)\n", + ptr->hostname.c_str(), + ptr->instance_list_ptr->uuid.c_str(), + instance_uuid.c_str()); + } + } + } + if (( ptr->next == NULL ) || ( ptr == guest_tail )) + break ; + } + wlog ("instance was not found '%s'\n", instance_uuid.c_str()); + return (rc); +} + +/** + * Change the host level fault repoorting state for the specified host. + * + * TODO: Consider sending a MOD_HOST command to the guestServer + * + **/ +int guestHostClass::host_inst ( string hostname, mtc_cmd_enum command ) +{ + int rc = FAIL_NOT_FOUND ; + + struct guestHostClass::guest_host * ptr = static_cast(NULL); + + if ( hostname.empty() ) + { + elog ("no hostname specified\n"); + return (FAIL_STRING_EMPTY); + } + ptr = guestHostClass::getHost(hostname); + if ( ptr ) + { + if ( command == MTC_CMD_ENABLE ) + { + ptr->reporting = true ; + ilog ("%s host level heartbeat reporting is Enabled\n", hostname.c_str()); + send_cmd_to_guestServer ( hostname, MTC_CMD_MOD_HOST, ptr->uuid, true ); + } + else + { + ptr->reporting = false ; + ilog ("%s host level heartbeat reporting is Disabled\n", hostname.c_str()); + send_cmd_to_guestServer ( hostname, MTC_CMD_MOD_HOST, ptr->uuid, false ); + } + rc = PASS ; + } + return (rc); +} + + +instInfo * guestHostClass::get_inst ( string instance_uuid ) +{ + if ( instance_uuid.empty() ) + { + elog ("empty instance uuid\n"); + return (NULL); + } + + /** Loop over all hosts looking for this instance. */ + for ( struct guest_host * ptr = guest_head ; guest_head ; ptr = ptr->next ) + { + if ( ptr->instance_list.size() != 0 ) + { + for ( ptr->instance_list_ptr = ptr->instance_list.begin(); + ptr->instance_list_ptr != ptr->instance_list.end(); + ptr->instance_list_ptr++ ) + { + if ( !ptr->instance_list_ptr->uuid.compare(instance_uuid)) + { + dlog ("%s found instance %s\n", + ptr->hostname.c_str(), + ptr->instance_list_ptr->uuid.c_str()); + + return ( &(*ptr->instance_list_ptr) ); + } + } + } + if (( ptr->next == NULL ) || ( ptr == guest_tail )) + break ; + } + return (NULL); +} + +/** returns he number of instances on this host */ +int guestHostClass::num_instances ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr != NULL ) + { + return ( guest_host_ptr->instance_list.size()); + } + return ( 0); +} + +/****************************************************************************/ +/** Host Class Setter / Getters */ +/****************************************************************************/ + +struct guestHostClass::guest_host * guestHostClass::getHost_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct guest_host * host_ptr = guest_head ; guest_head ; host_ptr = host_ptr->next ) + { + if ( host_ptr->host_audit_timer.tid == tid ) + { + return host_ptr ; + } + if (( host_ptr->next == NULL ) || ( host_ptr == guest_tail )) + break ; + } + } + return static_cast(NULL); +} + +static string null_str = "" ; +string guestHostClass::get_host_name ( string uuid ) +{ + guest_host * guest_host_ptr = getHost ( uuid ); + if ( guest_host_ptr != NULL ) + { + return ( guest_host_ptr->hostname ); + } + return ( null_str ); +} + +string guestHostClass::get_host_uuid ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr != NULL ) + { + return ( guest_host_ptr->uuid ); + } + return ( null_str ); +} + + + + + +string guestHostClass::get_inst_host_name ( string instance_uuid ) +{ + if ( instance_uuid.empty() ) + { + elog ("empty instance uuid\n"); + return (null_str); + } + /** Loop over all hosts looking for this instance. */ + for ( struct guest_host * ptr = guest_head ; guest_head ; ptr = ptr->next ) + { + if ( ptr->instance_list.size() != 0 ) + { + for ( ptr->instance_list_ptr = ptr->instance_list.begin(); + ptr->instance_list_ptr != ptr->instance_list.end(); + ptr->instance_list_ptr++ ) + { + if ( !ptr->instance_list_ptr->uuid.compare(instance_uuid)) + { + dlog ("%s found instance %s\n", + ptr->hostname.c_str(), + ptr->instance_list_ptr->uuid.c_str()); + + return ( ptr->hostname ); + } + } + } + if (( ptr->next == NULL ) || ( ptr == guest_tail )) + break ; + } + return ( null_str ); +} + +/** + * Set and Get a bool that indicates whether we already + * got the host reporting state from the VIM. + * + * The VIM might not be running at the time this daemon + * is started so we need to retry until we get it + **/ +void guestHostClass::set_got_host_state ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + guest_host_ptr->got_host_state = true ; + } + else + { + wlog ("%s not found\n", hostname.c_str()); + } +} + +bool guestHostClass::get_got_host_state ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + return ( guest_host_ptr->got_host_state ); + } + else + { + wlog ("%s not found\n", hostname.c_str()); + } + return (false); +} + +void guestHostClass::set_got_instances ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + guest_host_ptr->got_instances = true ; + } + else + { + wlog ("%s not found\n", hostname.c_str()); + } +} + +bool guestHostClass::get_got_instances ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + return ( guest_host_ptr->got_instances ); + } + else + { + wlog ("%s not found\n", hostname.c_str()); + } + return (false); +} + +bool guestHostClass::get_reporting_state ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + return ( guest_host_ptr->reporting ); + } + else + { + wlog ("%s not found\n", hostname.c_str()); + } + return ( false ); +} + +int guestHostClass::set_reporting_state( string hostname, bool reporting ) +{ + int rc = PASS ; + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + guest_host_ptr->reporting = reporting ; + } + else + { + wlog ("%s not found\n", hostname.c_str()); + rc = FAIL_NOT_FOUND ; + } + return (rc); +} + + +string guestHostClass::get_host_ip ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + return ( guest_host_ptr->ip ); + } + return ( null_str ); +} + +void guestHostClass::set_query_flag ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + guest_host_ptr->query_flag = true ; + } +} + +void guestHostClass::clr_query_flag ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + guest_host_ptr->query_flag = false ; + } +} + +bool guestHostClass::get_query_flag ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + return ( guest_host_ptr->query_flag ); + } + return ( false ); +} + +int guestHostClass::set_inst_state ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + for ( guest_host_ptr->instance_list_ptr = guest_host_ptr->instance_list.begin(); + guest_host_ptr->instance_list_ptr != guest_host_ptr->instance_list.end(); + guest_host_ptr->instance_list_ptr++ ) + { + send_cmd_to_guestServer ( hostname, MTC_CMD_MOD_INST, + guest_host_ptr->instance_list_ptr->uuid, + guest_host_ptr->instance_list_ptr->heartbeat.reporting ); + } + } + return (PASS); +} + +void guestHostClass::inc_query_misses ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + guest_host_ptr->query_misses++ ; + } + else + { + /* TODO: turn into a wlog_throttled ... */ + dlog ("%s not found\n", hostname.c_str()); + } +} + +void guestHostClass::clr_query_misses ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + guest_host_ptr->query_misses = 0 ; + } + else + { + /* TODO: turn into a wlog_throttled ... */ + dlog ("%s not found\n", hostname.c_str()); + } +} + +int guestHostClass::get_query_misses ( string hostname ) +{ + guest_host * guest_host_ptr = getHost ( hostname ); + if ( guest_host_ptr ) + { + return ( guest_host_ptr->query_misses ) ; + } + else + { + /* TODO: turn into a wlog_throttled ... */ + dlog ("%s not found\n", hostname.c_str()); + } + return (-1); +} + +/************************************************************************** + * + * Name : getEvent + * + * Purpose : Return a reference to a host or instance level libEvent. + * + **************************************************************************/ +libEvent & guestHostClass::getEvent ( struct event_base * base_ptr, string & hostname ) +{ + struct guest_host * guest_ptr = static_cast(NULL) ; + + /* check for empty list condition */ + if ( guest_head == NULL ) + return (nullEvent) ; + + if ( base_ptr == NULL ) + return (nullEvent) ; + + /** Loop over all hosts looking for this instance. */ + for ( guest_ptr = guest_head ; guest_ptr != NULL ; guest_ptr = guest_ptr->next ) + { + if ( guest_ptr->vimEvent.base == base_ptr ) + { + dlog2 ("%s Found Event Base Pointer (host) (%p)\n", + guest_ptr->vimEvent.uuid.c_str(), + guest_ptr->vimEvent.base); + + /* Update the reference variable */ + hostname = guest_ptr->hostname ; + + return (guest_ptr->vimEvent) ; + } + else if ( guest_ptr->instance_list.size() ) + { + for ( guest_ptr->instance_list_ptr = guest_ptr->instance_list.begin(); + guest_ptr->instance_list_ptr != guest_ptr->instance_list.end(); + guest_ptr->instance_list_ptr++ ) + { + if ( guest_ptr->instance_list_ptr->vimEvent.base == base_ptr ) + { + dlog2 ("%s Found Event Base Pointer (instance) (%p)\n", + guest_ptr->instance_list_ptr->uuid.c_str(), + guest_ptr->instance_list_ptr->vimEvent.base); + + /* Update the reference variable */ + hostname = guest_ptr->hostname ; + + return (guest_ptr->instance_list_ptr->vimEvent) ; + } + } + } + if (( guest_ptr->next == NULL ) || ( guest_ptr == guest_tail )) + break ; + } + return (nullEvent) ; +} + +libEvent & guestHostClass::get_host_event ( string hostname ) +{ + guestHostClass::guest_host * guest_host_ptr ; + guest_host_ptr = guestHostClass::getHost ( hostname ); + if ( guest_host_ptr ) + { + return ( guest_host_ptr->vimEvent ); + } + else + { + wlog ("%s not found\n", hostname.c_str()); + } + return ( nullEvent ); +} + +/***************************************************************************** + * Memory Dump Stuff * + *****************************************************************************/ +void guestHostClass::print_node_info ( void ) +{ + fflush (stdout); + fflush (stderr); +} + +void guestHostClass::mem_log_info ( void ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "Hosts:%d Allocs:%d Memory:%d\n", hosts, memory_allocs, memory_used ); + mem_log (str); +} + +void guestHostClass::mem_log_info_host ( struct guestHostClass::guest_host * guest_host_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s:%s\n", guest_host_ptr->hostname.c_str(), guest_host_ptr->ip.c_str()); + mem_log (str); +} + +void mem_log_delimit_host ( void ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "-------------------------------------------------------------\n"); + mem_log (str); +} + +void guestHostClass::mem_log_info_inst ( struct guestHostClass::guest_host * ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + if ( ptr->instance_list.size() ) + { + for ( ptr->instance_list_ptr = ptr->instance_list.begin(); + ptr->instance_list_ptr != ptr->instance_list.end(); + ptr->instance_list_ptr++ ) + { + snprintf (&str[0], MAX_MEM_LOG_DATA, + " %s %s Faults:%d %s %s\n", + ptr->instance_list_ptr->uuid.data(), + ptr->instance_list_ptr->hostname.data(), + ptr->instance_list_ptr->heartbeat.failures, + ptr->instance_list_ptr->heartbeat.provisioned ? "provisioned" : "", + ptr->instance_list_ptr->heartbeat.reporting ? "reporting" : ""); + mem_log (str); + + snprintf (&str[0], MAX_MEM_LOG_DATA, + " Timeouts: Restart:%s Resume:%s Suspend:%s Shutdown:%s Vote:%s\n", + ptr->instance_list_ptr->restart_to_str.data(), + ptr->instance_list_ptr->resume_to_str.data(), + ptr->instance_list_ptr->suspend_to_str.data(), + ptr->instance_list_ptr->shutdown_to_str.data(), + ptr->instance_list_ptr->vote_to_str.data()); + mem_log (str); + + // mem_log_delimit_host (); + } + } + else + { + snprintf (&str[0], MAX_MEM_LOG_DATA, "no instances\n"); + mem_log (str); + } +} + + +void guestHostClass::memDumpNodeState ( string hostname ) +{ + guestHostClass::guest_host* guest_host_ptr ; + guest_host_ptr = guestHostClass::getHost ( hostname ); + if ( guest_host_ptr == NULL ) + { + mem_log ( hostname, ": ", "Not Found in guestHostClass\n" ); + return ; + } + else + { + mem_log_info_host ( guest_host_ptr ); + } +} + +void guestHostClass::memDumpAllState ( void ) +{ + guestHostClass::hostBase.memLogDelimit (); + + mem_log_info ( ); + + /* walk the node list looking for nodes that should be monitored */ + for ( struct guest_host * ptr = guest_head ; ptr != NULL ; ptr = ptr->next ) + { + memDumpNodeState ( ptr->hostname ); + if ( (ptr->hosttype & COMPUTE_TYPE) == COMPUTE_TYPE) + { + mem_log_info_inst ( ptr ); + } + guestHostClass::hostBase.memLogDelimit (); + } +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestClass.h b/mtce-common/cgts-mtce-common-1.0/guest/guestClass.h new file mode 100644 index 00000000..2cb3af6d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestClass.h @@ -0,0 +1,202 @@ +#ifndef __INCLUDE_GUESTCLASS_H__ +#define __INCLUDE_GUESTCLASS_H__ + +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include "guestBase.h" +#include "httpUtil.h" /* for ... libEvent and httpUtil_... */ +#include "hostClass.h" + +typedef enum +{ + STAGE__START, + STAGES +} guest_stage_enum ; + + +class guestHostClass +{ + private: + struct guest_host { + + /** Pointer to the previous / next host in the list */ + struct guest_host * prev; + struct guest_host * next; + + string hostname ; + string uuid ; + string ip ; + int hosttype ; + + /** + * Top level gate for the host. + * If false then reporting for all instances are off. + */ + bool reporting; + + bool query_flag ; + int query_misses ; + + /** Instance level Audit timer */ + struct mtc_timer host_audit_timer; + + /** flag that indicates we were able to fetch host state from the VIM */ + bool got_host_state ; + + /** flag that indicates we were able to fetch intances from the VIM */ + bool got_instances ; + + /** Main FSM stage */ + guest_stage_enum stage ; + + /* List of instances for this host */ + list instance_list ; + list::iterator instance_list_ptr; + + libEvent vimEvent ; + }; + + /** List of allocated host memory. + * + * An array of host pointers. + */ + guest_host * host_ptrs[MAX_HOSTS] ; + + /** A memory allocation counter. + * + * Should represent the number of hosts in the linked list. + */ + int memory_allocs ; + + /** A memory used counter + * + * A variable storing the accumulated host memory + */ + int memory_used ; + + + // struct hostBaseClass::host* getHost ( string hostname ); + + struct guest_host * guest_head ; /**< Host Linked List Head pointer */ + struct guest_host * guest_tail ; /**< Host Linked List Tail pointer */ + + struct guestHostClass::guest_host* newHost ( void ); + struct guestHostClass::guest_host* addHost ( string hostname ); + struct guestHostClass::guest_host* getHost ( string hostname ); + int remHost ( string hostname ); + int delHost ( struct guestHostClass::guest_host * guest_host_ptr ); + struct guestHostClass::guest_host* getHost_timer ( timer_t tid ); + + libEvent & getEvent ( struct event_base * base_ptr, string & hostname ); + + const + char * get_guestStage_str ( struct guestHostClass::guest_host * guest_host_ptr ); + int guestStage_change ( struct guestHostClass::guest_host * guest_host_ptr, guest_stage_enum newStage ); + + void mem_log_info ( void ); + void mem_log_info_host ( struct guestHostClass::guest_host * guest_host_ptr ); + void mem_log_info_inst ( struct guestHostClass::guest_host * guest_host_ptr ); + + public: + + guestHostClass(); /**< constructor */ + ~guestHostClass(); /**< destructor */ + + hostBaseClass hostBase ; + + bool exit_fsm ; + void run_fsm ( string hostname ); + + bool audit_run ; + + /** Host level Audit timer */ + struct mtc_timer audit_timer; + + /** This is a list of host names. */ + std::list hostlist ; + std::list::iterator hostlist_iter_ptr ; + + // void guest_fsm ( void ); + + int hosts ; + + /* For select dispatch */ + struct timeval waitd ; + + fd_set inotify_readfds ; + fd_set instance_readfds ; + fd_set message_readfds ; + + int add_host ( string uuid, string address, string hostname, string nodetype ); + int mod_host ( string uuid, string address, string hostname, string nodetype ); + int del_host ( string hostname ); /* delete the host from the daemon - mtcAgent */ + int rem_host ( string hostname ); + + /** Delete all instances for this host */ + int del_host_inst ( string host_uuid ); + + int add_inst ( string hostname, instInfo & instance ); + int mod_inst ( string hostname, instInfo & instance ); + int del_inst ( string instance ); + instInfo * get_inst ( string instance ); + + /* The handler that lib event calls to handle the return response */ + void guestVimApi_handler ( struct evhttp_request *req, void *arg ); + + /** + * Change all the instance service states to enabled or disable + * for the specified host. + **/ + int host_inst( string hostname , mtc_cmd_enum command ); + + /** + * Set and Get a bool that indicates whether we already + * got the host reporting state from the VIM. + * + * The VIM might not be running at the time this daemon + * is started so we need to retry until we get it + **/ + void set_got_host_state ( string hostname ); + bool get_got_host_state ( string hostname ); + void set_got_instances ( string hostname ); + bool get_got_instances ( string hostname ); + + /** returns he number of instances on this host */ + int num_instances ( string hostname ); + + string get_host_name ( string host_uuid ); + string get_host_uuid ( string hostname ); + string get_host_ip ( string hostname ); + string get_inst_host_name ( string instance_uuid ); + + /* Send the instance reporting state to the guestServer on that host + * primarily used to preiodically refresh instance reporting state or + * set it when the guestServer seems to have restarted */ + int set_inst_state ( string hostname ); + + libEvent & get_host_event ( string hostname ); + + void inc_query_misses ( string hostname ); + void clr_query_misses ( string hostname ); + int get_query_misses ( string hostname ); + void set_query_flag ( string hostname ); + void clr_query_flag ( string hostname ); + bool get_query_flag ( string hostname ); + + bool get_reporting_state( string hostname ); + int set_reporting_state( string hostname, bool enabled ); + + void memLogDelimit ( void ); /**< Debug log delimiter */ + void memDumpNodeState ( string hostname ); + void memDumpAllState ( void ); + void print_node_info ( void ); /**< Print node info banner */ +}; + +guestHostClass * get_hostInv_ptr ( void ); + +#endif /* __INCLUDE_GUESTCLASS_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestHttpSvr.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestHttpSvr.cpp new file mode 100644 index 00000000..d8c651d2 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestHttpSvr.cpp @@ -0,0 +1,1092 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** @file Wind River Titanium Cloud Guest Daemon's HTTP Server */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "gst" + +#include +#include +#include +#include /* for ... inet_addr , inet_ntoa */ +#include /* for ... LOOPBACK_IP */ +#include /* for ... HTTP_ status definitions */ +#include + +using namespace std; + +#include "daemon_common.h" /* */ + +#include "nodeBase.h" /* Service header */ +#include "nodeTimers.h" /* */ +#include "nodeUtil.h" /* */ +#include "jsonUtil.h" /* for ... jsonUtil_get_key_val */ + +#include "guestUtil.h" /* for ... guestUtil_inst_init */ +#include "guestClass.h" /* */ +#include "guestHttpSvr.h" /* for ... this module */ +#include "guestVimApi.h" /* for ... guestVimApi_inst_action */ + +extern int send_event_to_mtcAgent ( string hostname, unsigned int event); + +/* Used for log messages */ +#define GUEST_SERVER "HTTP Guest Server" + +/** + * HTTP commands level is specified in the URL as either + * of the following ; both are at v1 + **/ +#define HOST_LEVEL_URL "/v1/hosts/" +#define INST_LEVEL_URL "/v1/instances/" + +/* Commands require the "User Agent" to be set to SERVICE_VERSION */ +#define USER_AGENT "User-Agent" +#define SERVICE_VERSION "vim/1.0" + +/* This servers's request structure */ +static request_type guest_request ; + +int sequence = 0 ; +char log_str [MAX_API_LOG_LEN]; +char filename[MAX_FILENAME_LEN]; + +/* Module Cleanup */ +void guestHttpSvr_fini ( void ) +{ + if ( guest_request.fd ) + { + if ( guest_request.base ) + { + event_base_free( guest_request.base); + } + close ( guest_request.fd ); + } +} + +/* Look for events */ +void guestHttpSvr_look ( void ) +{ + /* Look for INV Events */ + if ( guest_request.base ) + event_base_loop( guest_request.base, EVLOOP_NONBLOCK ); +} + +/** + * Formulates and updates the resp_buffer reference + * variable based on the specified error code + **/ +string _create_error_response ( int error_code ) +{ + string resp_buffer = "{" ; + resp_buffer.append (" \"status\" : \"fail\""); + switch (error_code) + { + case FAIL_KEY_VALUE_PARSE: + { + resp_buffer.append (",\"reason\" : \"command parse error\""); + break ; + } + case FAIL_JSON_ZERO_LEN: + { + resp_buffer.append (",\"reason\" : \"no buffer\""); + break ; + } + case FAIL_NOT_FOUND: + { + resp_buffer.append (",\"reason\" : \"entity not found\""); + break ; + + } + case FAIL_INVALID_DATA: + { + resp_buffer.append (",\"reason\" : \"invalid data\""); + break ; + } + case FAIL_BAD_STATE: + { + resp_buffer.append (",\"reason\" : \"bad state\""); + break ; + } + case FAIL_BAD_CASE: + { + resp_buffer.append (",\"reason\" : \"unsupported http command\""); + break ; + } + default: + { + ; + } + } + resp_buffer.append ("}"); + + return (resp_buffer); +} + +/******************************************************************** + * + * Name : _get_service_level + * + * Description: Verify this request contains + * + * 1. valid service level specification in the URL and + * 2. the expected User-Agent value + * + ********************************************************************/ +service_level_enum _get_service_level ( struct evhttp_request *req ) +{ + service_level_enum service_level = SERVICE_LEVEL_NONE ; + + /* Parse Headers we care about to verify that it also contains the correct User-Agent header */ + struct evkeyvalq * headers_ptr = evhttp_request_get_input_headers (req); + const char * header_value_ptr = evhttp_find_header (headers_ptr, USER_AGENT); + if ( header_value_ptr ) + { + if ( strncmp ( header_value_ptr, SERVICE_VERSION, 20 ) ) + { + elog ("Request missing required '%s=%s' (%s)\n", + USER_AGENT, SERVICE_VERSION, header_value_ptr ); + return (service_level); + } + } + + /* get the URL string */ + const char * url_ptr = evhttp_request_get_uri (req); + jlog1 ("URI: %s\n", url_ptr ); + + /* look for the supported service levels in the url */ + const char * service_level_ptr = strstr ( url_ptr, HOST_LEVEL_URL); + if ( service_level_ptr ) + { + service_level = SERVICE_LEVEL_HOST ; + } + else + { + service_level_ptr = strstr ( url_ptr, INST_LEVEL_URL); + if ( service_level_ptr ) + { + service_level = SERVICE_LEVEL_INST ; + } + } + if ( service_level == SERVICE_LEVEL_NONE ) + { + elog ("Unsupported service level (url:%s)\n", url_ptr ); + return (service_level); + } + return (service_level); +} + + + +string _update_services_response ( string hostname, string uuid, instInfo * instinfo_ptr ) +{ + string response = ("{"); + response.append ("\"uuid\":\""); + response.append (uuid); + response.append ("\","); + response.append ("\"hostname\":\""); + response.append (hostname); + response.append ("\","); + response.append ("\"services\": [{ \"service\":\"heartbeat\","); + + response.append ("\"state\":\""); + if ( instinfo_ptr->heartbeat.reporting == true ) + response.append ("enabled"); + else + response.append ("disabled"); + + response.append ("\",\"restart-timeout\":\""); + if ( instinfo_ptr->heartbeating == true ) + { + response.append (instinfo_ptr->restart_to_str); + response.append ("\",\"status\":\""); + response.append ("enabled\"}]}"); + } + else + { + response.append ("0\",\"status\":\""); + response.append ("disabled\"}]}"); + } + return (response); +} + +/***************************************************************************** + * + * Name: guestHttpSvr_vim_req + * + * Handles three 'operations' + * + * 'delete' - based on uuid + * 'modify' - based on list of key - value pairs + * 'add' - based on inventory record + * + ****************************************************************************** + * Test Commands: + * + +Add Instance: +curl -i -X POST -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df10449d -d '{"hostname": "compute-1", "uuid" : "8d80875b-fa73-4ccb-bce3-1cd4df10449d", "channel" : "cgts-instance000001", "services" : ["heartbeat"]}' + + + +Disable Instance: heartbeat +curl -i -X PUT -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df10449d -d '{"hostname": "compute-1", "uuid" : "8d80875b-fa73-4ccb-bce3-1cd4df10449d", "channel" : "cgts-instance000001", "services" : [{"service":"heartbeat" , "state":"disabled"}]}' + +Delete Host: +curl -i -X DELETE -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/hosts/8aee436e-d564-459e-a0d8-26c44792a9df + +Enable Host: heartbeat +curl -i -X PUT -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/hosts/8aee436e-d564-459e-a0d8-26c44792a9df/enable -d '{"hostname": "compute-1", "uuid" : "8d80875b-fa73-4ccb-bce3-1cd4df10449d"}' + +Enable Host: heartbeat +curl -i -X GET -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df10449d + +*/ + +/********************************************************************************* + * + * Name : guestHttpSvr_host_req + * + * Description : Handles host level VIM requests + * + ********************************************************************************/ +string guestHttpSvr_host_req ( char * buffer_ptr, + mtc_cmd_enum command, + evhttp_cmd_type http_cmd, + int & http_status_code ) +{ + string response = "" ; + string hostname = "" ; + + int rc = jsonUtil_get_key_val ( buffer_ptr, MTC_JSON_INV_NAME, hostname ); + + if ( rc ) + { + wlog ("Failed to parse command key values (%d)\n", rc ); + ilog ("... %s\n", buffer_ptr ); + + response = _create_error_response ( FAIL_KEY_VALUE_PARSE ); + http_status_code = HTTP_BADREQUEST ; + } + else + { + guestHostClass * obj_ptr = get_hostInv_ptr (); + + string instance_info = "" ; + string instance_uuid = "" ; + string instance_chan = "" ; + + /* WARNING: We only support a single list element for now */ + list services_list ; + services_list.clear() ; + + switch ( http_cmd ) + { + case EVHTTP_REQ_PUT: + { + qlog ("%s VIM CMD: Enable Host\n", hostname.c_str()); + + rc = obj_ptr->host_inst ( hostname, command ); + if ( rc ) + { + elog ("%s Host Enable Request (vim) - Host Not Found\n", hostname.c_str()); + response = _create_error_response ( FAIL_NOT_FOUND ); + http_status_code = HTTP_NOTFOUND ; + + /* Ask mtce for an inventory update */ + send_event_to_mtcAgent ( obj_ptr->hostBase.my_hostname, MTC_EVENT_MONITOR_READY ) ; + + } + else + { + http_status_code = HTTP_OK ; + response = " { \"status\" : \"pass\" }" ; + } + break ; + } + default: + { + wlog ("%s Unsupported http command '%s'\n", + hostname.c_str(), getHttpCmdType_str(http_cmd)); + response = _create_error_response ( FAIL_BAD_CASE ); + http_status_code = HTTP_BADREQUEST ; + } + } + } + return (response); +} + +/********************************************************************************* + * + * Name : _get_key_val + * + * Description : Get valid value from http message and generate error if failed + * + ********************************************************************************/ +int _get_key_val ( char * buffer_ptr, + string key, + string & value, + int & http_status_code, + string & response ) +{ + int rc = jsonUtil_get_key_val ( buffer_ptr, key, value ); + + if ( rc ) + { + wlog ("Failed to extract %s from message\n", key.c_str()); + http_status_code = HTTP_BADREQUEST ; + response = _create_error_response ( FAIL_KEY_VALUE_PARSE ); + } + return rc; +} + +/********************************************************************************* + * + * Name : _get_list + * + * Description : Get valid list from http message and generate error if failed + * + ********************************************************************************/ +int _get_list ( char * buffer_ptr, + string key, + list & list, + int & http_status_code, + string & response ) +{ + int rc = jsonUtil_get_list ( buffer_ptr, key, list ); + + if ( rc ) + { + wlog ("Failed to extract %s from message\n", key.c_str()); + http_status_code = HTTP_BADREQUEST ; + response = _create_error_response ( FAIL_KEY_VALUE_PARSE ); + } + return rc; +} + +#define EVENT_VOTE "vote" +#define EVENT_STOP "stop" +#define EVENT_REBOOT "reboot" +#define EVENT_PAUSE "pause" +#define EVENT_UNPAUSE "unpause" +#define EVENT_SUSPEND "suspend" +#define EVENT_RESUME "resume" +#define EVENT_LIVE_MIGRATE_BEGIN "live_migrate_begin" +#define EVENT_LIVE_MIGRATE_END "live_migrate_end" +#define EVENT_COLD_MIGRATE_BEGIN "cold_migrate_begin" +#define EVENT_COLD_MIGRATE_END "cold_migrate_end" + +string _get_action_timeout ( instInfo * instInfo_ptr, string action ) +{ + if ( instInfo_ptr->heartbeating == false ) + { + ilog ("%s returning timeout of zero while not heartbeating for action '%s'\n", + log_prefix(instInfo_ptr).c_str(), action.c_str()); + return ("0"); + } + if ( !action.compare (EVENT_VOTE) ) + return (instInfo_ptr->vote_to_str); + + if ( !action.compare (EVENT_STOP) ) + return (instInfo_ptr->shutdown_to_str); + if ( !action.compare (EVENT_REBOOT) ) + return (instInfo_ptr->shutdown_to_str); + if ( !action.compare (EVENT_PAUSE) ) + return (instInfo_ptr->suspend_to_str); + if ( !action.compare (EVENT_UNPAUSE) ) + return (instInfo_ptr->resume_to_str); + if ( !action.compare (EVENT_SUSPEND) ) + return (instInfo_ptr->suspend_to_str); + if ( !action.compare (EVENT_RESUME) ) + return (instInfo_ptr->resume_to_str); + + if ( !action.compare (EVENT_LIVE_MIGRATE_BEGIN) ) + return (instInfo_ptr->suspend_to_str); + if ( !action.compare (EVENT_LIVE_MIGRATE_END) ) + return (instInfo_ptr->resume_to_str); + if ( !action.compare (EVENT_COLD_MIGRATE_BEGIN) ) + return (instInfo_ptr->suspend_to_str); + if ( !action.compare (EVENT_COLD_MIGRATE_END) ) + return (instInfo_ptr->resume_to_str); + + ilog ("%s returning timeout of zero for invalid action '%s'\n", + log_prefix(instInfo_ptr).c_str(), action.c_str()); + + return ("0"); +} + +/********************************************************************************* + * + * Name : guestHttpSvr_inst_req + * + * Description : Handles instance level VIM requests + * + ********************************************************************************/ +string guestHttpSvr_inst_req ( char * buffer_ptr, + mtc_cmd_enum command, + evhttp_cmd_type http_cmd, + int & http_status_code ) +{ + string response = "" ; + string hostname = "" ; + string instance_uuid = "" ; + + _get_key_val ( buffer_ptr, MTC_JSON_INV_NAME, hostname, http_status_code, response); + if ( _get_key_val ( buffer_ptr, MTC_JSON_INV_NAME, hostname, http_status_code, response )) + return (response); + + if ( _get_key_val ( buffer_ptr, "uuid", instance_uuid, http_status_code, response )) + return (response); + + instInfo instance_info ; guestUtil_inst_init ( &instance_info ); + instance_info.uuid = instance_uuid; + + guestHostClass * obj_ptr = get_hostInv_ptr (); + + /* WARNING: We only support a single list element for now */ + list services_list ; + services_list.clear() ; + + switch ( http_cmd ) + { + case EVHTTP_REQ_POST: + { + if ( MTC_CMD_VOTE == command ) + { + jlog ("vote instance Info: %s", buffer_ptr ); + + string action = ""; + if ( _get_key_val (buffer_ptr, "action", action, http_status_code, response ) ) + return (response); + + qlog ("VIM CMD: Vote instance %s\n", + instance_info.uuid.c_str()); + + instInfo * instInfo_ptr = obj_ptr->get_inst ( instance_uuid ); + if ( instInfo_ptr ) + { + response = ("{\"uuid\":\""); + response.append (instance_uuid); + response.append ("\",\"hostname\":\""); + response.append (hostname); + response.append ("\",\"action\":\""); + response.append (action); + response.append ("\",\"timeout\":\""); + response.append (_get_action_timeout ( instInfo_ptr, EVENT_VOTE )); + response.append ("\"}"); + + jlog ("%s %s Vote Response: %s\n", hostname.c_str(), + instance_uuid.c_str(), + response.c_str()); + + + if ( instInfo_ptr->heartbeating ) + send_cmd_to_guestServer (hostname, MTC_CMD_VOTE_INST, instance_uuid, true, action); + } + else + { + elog ("%s %s vote request (from vim) - Instance Not Found\n", hostname.c_str(), instance_uuid.c_str()); + response = _create_error_response ( FAIL_NOT_FOUND ); + http_status_code = HTTP_NOTFOUND ; + } + } + else if ( MTC_CMD_NOTIFY == command ) + { + jlog ("notify instance Info: %s", buffer_ptr ); + + string action = ""; + if ( _get_key_val (buffer_ptr, "action", action, http_status_code, response )) + return (response); + + qlog ("%s %s VIM CMD: Notify instance\n", + hostname.c_str(), instance_info.uuid.c_str()); + + instInfo * instInfo_ptr = obj_ptr->get_inst ( instance_uuid ); + if ( instInfo_ptr ) + { + response = ("{\"uuid\":\""); + response.append (instance_uuid); + response.append ("\",\"hostname\":\""); + response.append (hostname); + response.append ("\",\"action\":\""); + response.append (action); + response.append ("\",\"timeout\":\""); + response.append (_get_action_timeout ( instInfo_ptr , action )); + response.append ("\"}"); + + jlog ("%s %s Notify Response: %s\n", hostname.c_str(), instInfo_ptr->uuid.c_str(), response.c_str()); + + if ( instInfo_ptr->heartbeating ) + send_cmd_to_guestServer (hostname, MTC_CMD_NOTIFY_INST, instance_uuid, true, action); + } + else + { + elog ("%s %s notify request (vim) - Instance Not Found\n", hostname.c_str(), instance_uuid.c_str()); + response = _create_error_response ( FAIL_NOT_FOUND ); + http_status_code = HTTP_NOTFOUND ; + } + } + /* Add instance */ + else + { + if ( _get_list (buffer_ptr, "services", services_list, http_status_code, response )) + return (response); + + string service = services_list.front(); + qlog ("%s %s VIM CMD: Add Instance\n", + hostname.c_str(), + instance_info.uuid.c_str()); + + ilog ("%s %s add request (from vim) (%s)\n", + hostname.c_str(), + instance_info.uuid.c_str(), + service.c_str()); + + if ( obj_ptr->add_inst ( hostname , instance_info ) != PASS ) + { + response = _create_error_response ( FAIL_INVALID_DATA ); + http_status_code = HTTP_BADREQUEST ; + } + else + { + instance_info.heartbeat.provisioned = true ; + response = " { \"status\" : \"pass\" }" ; + } + } + break ; + } + /* PATCH is used to control service states ; enable or disable */ + case EVHTTP_REQ_PATCH: + { + if ( _get_list (buffer_ptr, "services", services_list, http_status_code, response ) ) + return (response); + + jlog ("%s modify instance (%s)", hostname.c_str(), buffer_ptr ); + + string service , state ; + string services = services_list.front() ; + jsonUtil_get_key_val ( (char*)services.data(), "service", service ); + jsonUtil_get_key_val ( (char*)services.data(), "state" , state ); + + qlog ("%s %s VIM CMD: Modify Instance\n", + hostname.c_str(), + instance_info.uuid.c_str()); + + if ( service.compare("heartbeat")) + { + response = _create_error_response ( FAIL_INVALID_DATA ); + http_status_code = HTTP_BADREQUEST ; + return (response); + } + else if ( !state.compare("enabled")) + instance_info.heartbeat.reporting = true; + else if ( !state.compare("disabled")) + instance_info.heartbeat.reporting = false; + else + { + elog ("%s modify request (vim) - invalid instance state '%s'\n", hostname.c_str(), state.c_str()); + response = _create_error_response ( FAIL_BAD_STATE ); + http_status_code = HTTP_BADREQUEST ; + return (response); + } + int rc = obj_ptr->mod_inst ( hostname, instance_info ); + if ( rc ) + { + elog ("%s %s modify request (vim) - Instance Not Found\n", + hostname.c_str(), instance_info.uuid.c_str()); + + response = _create_error_response ( FAIL_NOT_FOUND ); + http_status_code = HTTP_NOTFOUND ; + return (response); + } + else + { + instInfo * instInfo_ptr = obj_ptr->get_inst ( instance_info.uuid ); + if ( instInfo_ptr ) + { + response = _update_services_response ( hostname , instInfo_ptr->uuid, instInfo_ptr ); + http_status_code = HTTP_OK ; + } + else + { + response = _create_error_response ( FAIL_NOT_FOUND ); + http_status_code = HTTP_NOTFOUND ; + } + } + break ; + } + default: + { + wlog ("%s Unsupported HTTP '%s' request\n", instance_uuid.c_str(), getHttpCmdType_str(http_cmd)); + } + } + return (response); +} + +/***************************************************************************** + * + * Name: guestHttpSvr_vimCmdHdlr + * + * Description: Receive an http request extract the request type and buffer from + * it and call process request handler. + * Send the processed message response back to the connection. + * + * Supported requests include: POST, PUT, DELETE + * + ******************************************************************************/ + +int _get_url_info ( struct evhttp_request * req, + const char * url_ptr, + url_info_type & url_info ) +{ + size_t len = 0 ; + + /* Extract the service level from the request URL ; host or instance */ + url_info.service_level = _get_service_level ( req ); + if ( url_info.service_level == SERVICE_LEVEL_NONE ) + { + return ( FAIL_INVALID_DATA ); + } + + /* Remove the service level from the URL */ + if ( url_info.service_level == SERVICE_LEVEL_HOST ) + { + url_ptr = strstr ( url_ptr, HOST_LEVEL_URL ); + len = strlen ( HOST_LEVEL_URL ); + } + else + { + url_ptr = strstr ( url_ptr, INST_LEVEL_URL); + len = strlen ( INST_LEVEL_URL ); + } + + if ( url_ptr ) + { + url_ptr += len ; + url_info.temp = url_ptr ; + url_info.uuid = url_info.temp.substr ( 0 , UUID_LEN ); + } + else + { + ilog ("Failed to parse URL (%s)", url_ptr); // DLOG + return (FAIL_INVALID_UUID) ; + } + /** + * Check to see if there is a command enable/disable/etc after the UUID + * ... If what is left on the URL is longer than a UUID then + * there must be a command so lets get that + **/ + if ( url_info.temp.length() > UUID_LEN ) + { + url_info.command = url_info.temp.substr(url_info.uuid.length()+1, string::npos ); + dlog ("Command:%s\n", url_info.command.c_str()); + } + return (PASS); +} + +void guestHttpSvr_vimCmdHdlr (struct evhttp_request *req, void *arg) +{ + int rc ; + struct evbuffer *resp_buf ; + url_info_type url_info ; + + int http_status_code = HTTP_NOTFOUND ; + guestHostClass * obj_ptr = get_hostInv_ptr () ; + string response = _create_error_response ( FAIL_JSON_ZERO_LEN ); + + guest_request.req = req ; + jlog1 ("HTTP Request:%p base:%p Req:%p arg:%p\n", &guest_request, + guest_request.base, + guest_request.req, + arg ); + + /* Get sender must be localhost */ + const char * host_ptr = evhttp_request_get_host (req); + if ( strncmp ( host_ptr , "localhost" , 10 )) + { + wlog ("Message received from unknown host (%s)\n", host_ptr ); + + /* TODO: Fail the request if from unknown host */ + } + + const char * url_ptr = evhttp_request_get_uri (req); + + /* Extract the operation */ + evhttp_cmd_type http_cmd = evhttp_request_get_command (req); + jlog1 ("%s request from '%s'\n", getHttpCmdType_str(http_cmd), host_ptr ); + + /* Log the request */ + snprintf (&filename[0], MAX_FILENAME_LEN, "/var/log/%s_request.log", program_invocation_short_name ); + // snprintf (&log_str[0], MAX_API_LOG_LEN-1, "\n%s [%5d] http request seq: %d with %d request from %s:%s", + snprintf (&log_str[0], MAX_API_LOG_LEN-1, "guest services request '%s' (%d) %s:%s", + getHttpCmdType_str(http_cmd), ++sequence, host_ptr, url_ptr ); + // send_log_message ( "controller-0", &filename[0], &log_str[0] ); + jlog ( "%s", log_str ); + + /* Fill in the url_info struct from the url string in the request */ + rc = _get_url_info ( req, url_ptr, url_info ); + if ( rc ) + { + evhttp_send_error ( req, MTC_HTTP_FORBIDDEN, response.data() ); + return ; + } + + switch ( http_cmd ) + { + case EVHTTP_REQ_DELETE: + { + qlog ("%s VIM CMD: Delete Host or Instance\n", url_info.uuid.c_str()); + if ( url_info.service_level == SERVICE_LEVEL_HOST ) + { + /* Nothing to do at the host level for delete. + * Don't try and do a hostname lookup as it may already have been deleted */ + ilog ("%s delete host services request (vim)\n", url_info.uuid.c_str()); + } + else + { + ilog ("%s delete instance request (vim)\n", url_info.uuid.c_str()); + + rc = obj_ptr->del_inst ( url_info.uuid ); + } + if ( rc ) + { + elog ("%s instance not found\n", url_info.uuid.c_str()); + response = _create_error_response ( FAIL_NOT_FOUND ); + http_status_code = HTTP_NOTFOUND ; + } + else + { + http_status_code = HTTP_NOCONTENT ; + response = "{ \"status\" : \"pass\" }" ; + } + break ; + } + + /** + * GET is handled at this level because + * there is no payload with it. + **/ + case EVHTTP_REQ_GET: + { + if ( url_info.service_level == SERVICE_LEVEL_INST ) + { + instInfo * instance_ptr = obj_ptr->get_inst ( url_info.uuid ); + qlog ("%s VIM CMD: Query Instance - Reporting State\n", url_info.uuid.c_str()); + if ( !instance_ptr ) + { + elog ("%s query instance reporting state (vim) failed - Instance Not Found\n", url_info.uuid.c_str()); + response = _create_error_response ( FAIL_NOT_FOUND ); + http_status_code = HTTP_NOTFOUND ; + } + else + { + string hostname = obj_ptr->get_inst_host_name(url_info.uuid); + response = _update_services_response ( hostname , url_info.uuid, instance_ptr ); + http_status_code = HTTP_OK ; + } + } + /* GET the host level reporting state */ + else if ( url_info.service_level == SERVICE_LEVEL_HOST ) + { + string hostname = obj_ptr->get_host_name(url_info.uuid) ; + qlog ("%s VIM CMD: Query Host - Reporting State\n", hostname.c_str()); + if ( hostname.length() ) + { + + response = ("{"); + response.append ("\"uuid\":\""); + response.append (url_info.uuid); + response.append ("\","); + response.append ("\"hostname\":\""); + response.append (hostname); + response.append ("\","); + response.append ("\"state\":\""); + + if ( obj_ptr->get_reporting_state(hostname) == true ) + response.append ("enabled\"}"); + else + response.append ("disabled\"}"); + + http_status_code = HTTP_OK ; + } + else + { + wlog ("%s query host reporting state (vim) failed - Host Not Found\n", url_info.uuid.c_str()); + response = _create_error_response ( FAIL_NOT_FOUND ); + http_status_code = HTTP_NOTFOUND ; + + /* Ask mtce for an inventory update */ + send_event_to_mtcAgent ( obj_ptr->hostBase.my_hostname, MTC_EVENT_MONITOR_READY ) ; + } + } + else + { + http_status_code = HTTP_NOTFOUND ; + slog ("invalid service level\n"); + } + break ; + } + + case EVHTTP_REQ_PUT: + case EVHTTP_REQ_POST: + case EVHTTP_REQ_PATCH: + { + /* GET the host level reporting state */ + if (( http_cmd == EVHTTP_REQ_POST ) && + ( url_info.service_level == SERVICE_LEVEL_HOST )) + { + string hostname = obj_ptr->get_host_name(url_info.uuid) ; + qlog ("%s VIM CMD: Create Host\n", hostname.c_str()); + if ( hostname.length() ) + { + ilog ("%s create host services (vim)\n", hostname.c_str()); + + http_status_code = HTTP_OK ; + response = " { \"status\" : \"pass\" }" ; + } + else + { + wlog ("%s create host (vim) failed - Host Not Found\n", url_info.uuid.c_str()); + response = _create_error_response ( FAIL_NOT_FOUND ); + http_status_code = HTTP_NOTFOUND ; + + /* Ask mtce for an inventory update */ + send_event_to_mtcAgent ( obj_ptr->hostBase.my_hostname, MTC_EVENT_MONITOR_READY ) ; + } + break ; + } + + /* Otherwise for PUTs and instances ; get the payload */ + struct evbuffer *in_buf = evhttp_request_get_input_buffer ( req ); + if ( in_buf ) + { + size_t len = evbuffer_get_length(in_buf) ; + if ( len ) + { + ev_ssize_t bytes = 0 ; + char * buffer_ptr = (char*)malloc(len+1); + jlog1 ("Buffer @ %p contains %ld bytes\n", &in_buf, len ); + + memset ( buffer_ptr, 0, len+1 ); + bytes = evbuffer_remove(in_buf, buffer_ptr, len ); + + if ( bytes <= 0 ) + { + http_status_code = HTTP_BADREQUEST ; + wlog ("http request with no payload\n"); + } + else + { + http_status_code = HTTP_OK ; + mtc_cmd_enum mtc_cmd; + + jlog("%s\n", buffer_ptr ); + + if (!url_info.command.compare("enable") ) + mtc_cmd = MTC_CMD_ENABLE ; + else if (!url_info.command.compare("disable") ) + mtc_cmd = MTC_CMD_DISABLE ; + else if (!url_info.command.compare("vote") ) + mtc_cmd = MTC_CMD_VOTE ; + else if (!url_info.command.compare("notify") ) + mtc_cmd = MTC_CMD_NOTIFY ; + else + mtc_cmd = MTC_CMD_NOT_SET ; + + if ( url_info.service_level == SERVICE_LEVEL_INST ) + { + response = guestHttpSvr_inst_req ( buffer_ptr, + mtc_cmd, + http_cmd, + http_status_code ); + } + else if ( url_info.service_level == SERVICE_LEVEL_HOST ) + { + response = guestHttpSvr_host_req ( buffer_ptr, + mtc_cmd, + http_cmd, + http_status_code ); + } + else + { + slog ("invalid service level\n"); + } + } + free ( buffer_ptr ); + } + else + { + http_status_code = MTC_HTTP_LENGTH_REQUIRED ; + wlog ("http request has no length\n"); + } + } + else + { + http_status_code = HTTP_BADREQUEST ; + wlog ("Http request has no buffer\n"); + } + break ; + } + default: + { + wlog ("Unknown command (%d)\n", http_cmd ); + http_status_code = HTTP_NOTFOUND ; + } + } + + if (( http_status_code >= HTTP_OK) && (http_status_code <= HTTP_NOCONTENT )) + { + resp_buf = evbuffer_new(); + jlog ("Response: %s\n", response.c_str()); + evbuffer_add_printf (resp_buf, "%s\n", response.data()); + evhttp_send_reply (guest_request.req, http_status_code, "OK", resp_buf ); + evbuffer_free ( resp_buf ); + } + else + { + if ( http_status_code == HTTP_NOTFOUND ) + { + wlog ("%s not found\n", url_ptr ); + } + else + { + elog ("HTTP request error:%d ; cmd:%s url:%s\n", + http_status_code, + getHttpCmdType_str(http_cmd), + url_ptr); + elog ("... response:%s\n", response.c_str()); + } + evhttp_send_error (guest_request.req, http_status_code, response.data() ); + } +} + +/***************************************************************** + * + * Name : guestHttpSvr_bind + * + * Description : Setup the HTTP server socket + * + *****************************************************************/ +int guestHttpSvr_bind ( request_type & request ) +{ + int rc ; + int flags ; + int one = 1; + + request.fd = socket(AF_INET, SOCK_STREAM, 0); + if (request.fd < 0) + { + elog ("HTTP server socket create failed (%d:%m)\n", errno); + return FAIL_SOCKET_CREATE ; + } + + /* make socket reusable */ + rc = setsockopt(request.fd, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(int)); + + memset(&request.addr, 0, sizeof(struct sockaddr_in)); + request.addr.sin_family = AF_INET; + request.addr.sin_addr.s_addr = inet_addr(LOOPBACK_IP) ; + request.addr.sin_port = htons(request.port); + + /* bind port */ + rc = bind ( request.fd, (struct sockaddr*)&request.addr, sizeof(struct sockaddr_in)); + if (rc < 0) + { + elog ("HTTP bind failure for port %d (%d:%m)\n", request.port, errno ); + return FAIL_SOCKET_BIND ; + } + + /* Listen for requests */ + rc = listen(request.fd, 10 ); + if (rc < 0) + { + elog ("HTTP listen failed (%d:%m)\n", errno ); + return FAIL_SOCKET_LISTEN; + } + + /* make non-blocking */ + flags = fcntl ( request.fd, F_GETFL, 0) ; + if ( flags < 0 || fcntl(request.fd, F_SETFL, flags | O_NONBLOCK) < 0) + { + elog ("HTTP set to non-blocking failed (%d:%m)\n", errno ); + return FAIL_SOCKET_OPTION; + } + + return PASS; +} + +/* Setup the http server */ +int guestHttpSvr_setup ( request_type & request ) +{ + int rc = PASS ; + if ( ( rc = guestHttpSvr_bind ( request )) != PASS ) + { + return rc ; + } + else if (request.fd < 0) + { + wlog ("failed to get http server socket file descriptor\n"); + return RETRY ; + } + + request.base = event_base_new(); + if (request.base == NULL) + { + elog ("failed to get http server request base\n"); + return -1; + } + request.httpd = evhttp_new(request.base); + if (request.httpd == NULL) + { + elog ("failed to get httpd server handle\n"); + return -1; + } + + evhttp_set_allowed_methods (request.httpd, EVENT_METHODS ); + + rc = evhttp_accept_socket(request.httpd, request.fd); + if ( rc == -1) + { + elog ("failed to accept on http server socket\n"); + return -1; + } + evhttp_set_gencb(request.httpd, guestHttpSvr_vimCmdHdlr, NULL); + + return PASS ; +} + +/* initialize the mtce http server */ +int guestHttpSvr_init ( int port ) +{ + int rc = PASS ; + memset ( &guest_request, 0, sizeof(request_type)); + guest_request.port = port ; + + for ( ; ; ) + { + rc = guestHttpSvr_setup ( guest_request ); + if ( rc == RETRY ) + { + wlog ("%s bind failed (%d)\n", GUEST_SERVER, guest_request.fd ); + } + else if ( rc != PASS ) + { + elog ("%s start failed (rc:%d)\n", GUEST_SERVER, rc ); + } + else if ( guest_request.fd > 0 ) + { + ilog ("Listening for 'http command' messages on %s:%d\n", + inet_ntoa(guest_request.addr.sin_addr), guest_request.port ); + rc = PASS ; + break ; + } + if ( rc ) mtcWait_secs (5); + } + return ( rc ) ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestHttpSvr.h b/mtce-common/cgts-mtce-common-1.0/guest/guestHttpSvr.h new file mode 100644 index 00000000..750d3512 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestHttpSvr.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance Daemon + */ + + +typedef struct +{ + struct sockaddr_in addr ; + struct event_base * base ; + struct evhttp_request * req ; + struct evhttp * httpd ; + int fd ; + int port ; +} request_type ; + +void guestHttpSvr_fini ( void ); +int guestHttpSvr_init ( int port ); +int guestHttpSvr_setup ( request_type & request ); +void guestHttpSvr_look ( void ); diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestHttpUtil.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestHttpUtil.cpp new file mode 100644 index 00000000..a39ef5b4 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestHttpUtil.cpp @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Controller Maintenance HTTP Utilities. + * + * Public Interfaces: + * + */ + +#include +#include +#include + +using namespace std; + +#include "httpUtil.h" /* for ... common http utilities */ +#include "jsonUtil.h" /* for ... Json Utilities */ +#include "nodeUtil.h" /* for ... Node Utilities */ + +#include "guestClass.h" /* for ... maintenance class nodeLinkClass */ +#include "guestHttpUtil.h" /* for ... this module header */ +#include "guestVimApi.h" /* for ... guestVimApi_Handler */ + +/* Module init */ +void guestHttpUtil_init ( void ) +{ + return ; +} + +/* Module close */ +void guestHttpUtil_fini ( void ) +{ + return ; +} + +/* ********************************************************************* + * + * Name : guestHttpUtil_status + * + * Description: Extracts and returns the HTTP execution status + * + * *********************************************************************/ + +int guestHttpUtil_status ( libEvent & event ) +{ + int rc = PASS ; + + if ( !event.req ) + { + elog ("%s Invalid request\n", event.hostname.length() ? event.hostname.c_str() : "unknown" ); + return (FAIL_UNKNOWN_HOSTNAME); + } + event.status = event.http_status = evhttp_request_get_response_code (event.req); + switch (event.status) + { + case HTTP_OK: + case 201: + case 202: + case 203: + case 204: + { + dlog ("%s HTTP_OK (%d)\n", event.hostname.c_str(), event.status ); + event.status = PASS ; + break; + } + /* Authentication error - refresh the token */ + case 401: + { + rc = FAIL_AUTHENTICATION ; + break ; + } + case 0: + { + wlog ("%s Status:0 - failed to connect to '%s:%d'\n", + event.hostname.c_str(), event.ip.c_str(), event.port); + event.status = FAIL_HTTP_ZERO_STATUS ; + rc = FAIL_HTTP_ZERO_STATUS ; + break ; + } + default: + { + dlog3 ("%s Status: %d\n", event.hostname.c_str(), event.status ); + rc = event.status ; + break; + } + } + return (rc); +} + +/* *********************************************************************** + * + * Name : guestHttpUtil_api_req + * + * Description: Makes an HTTP request based on all the info + * in the supplied libEvent. + * + * ************************************************************************/ +int guestHttpUtil_api_req ( libEvent & event ) + +{ + http_headers_type hdrs ; + bool has_payload = false; + int hdr_entry = 0 ; + int rc = FAIL ; + void(*handler)(struct evhttp_request *, void *) = NULL ; + + /* Bind the unlock handler */ + handler = &guestVimApi_Handler; + + /* set the timeout */ + event.timeout = HTTP_VIM_TIMEOUT ; + + /* Check for memory leaks */ + if ( event.base ) + { + slog ("%s http base memory leak avoidance (%p) fixme !!\n", + event.log_prefix.c_str(), event.base ); + // event_base_free(event.base); + } + + /* Allocate the base */ + event.base = event_base_new(); + if ( event.base == NULL ) + { + elog ("%s No Memory for Request\n", event.log_prefix.c_str()); + return ( FAIL_EVENT_BASE ); + } + + /* Establish connection */ + else if ( httpUtil_connect ( event )) + { + return (FAIL_CONNECT); + } + + else if ( httpUtil_request ( event, handler )) + { + return (FAIL_REQUEST_NEW); + } + + jlog ("%s Address : %s\n", event.hostname.c_str(), event.token.url.c_str()); + + if ((( event.type != EVHTTP_REQ_GET ) && ( event.type != EVHTTP_REQ_DELETE )) || + ( event.request == VIM_HOST_STATE_QUERY )) + { + has_payload = true ; + + /* Add payload to the output buffer but only for PUT, POST and PATCH requests */ + if ( httpUtil_payload_add ( event )) + { + return (FAIL_PAYLOAD_ADD); + } + jlog ("%s Payload : %s\n", event.hostname.c_str(), + event.payload.c_str() ); + } + + /* Convert port to a string */ + char port_str[10] ; + sprintf ( port_str, "%d", event.port ); + + /* Build the HTTP Header */ + hdrs.entry[hdr_entry].key = "Host" ; + hdrs.entry[hdr_entry].value = event.ip ; + hdrs.entry[hdr_entry].value.append(":") ; + hdrs.entry[hdr_entry].value.append(port_str); + hdr_entry++; + + if ( has_payload == true ) + { + hdrs.entry[hdr_entry].key = "Content-Length" ; + hdrs.entry[hdr_entry].value = httpUtil_payload_len ( &event ); + hdr_entry++; + } + + hdrs.entry[hdr_entry].key = "User-Agent" ; + hdrs.entry[hdr_entry].value = "guest-agent/1.0" ; + hdr_entry++; + + hdrs.entry[hdr_entry].key = "Content-Type" ; + hdrs.entry[hdr_entry].value = "application/json" ; + hdr_entry++; + + hdrs.entry[hdr_entry].key = "Connection" ; + hdrs.entry[hdr_entry].value = "close" ; + hdr_entry++; + hdrs.entries = hdr_entry ; + + /* Add the headers */ + if ( httpUtil_header_add ( &event, &hdrs )) + { + return (FAIL_HEADER_ADD); + } + + event.address = event.token.url ; + + rc = evhttp_make_request ( event.conn, event.req, event.type, event.token.url.data()); + if ( rc == PASS ) + { + evhttp_connection_set_timeout(event.req->evcon, event.timeout); + + /* Default to retry for both blocking and non-blocking command */ + event.status = RETRY ; + event.log_prefix = event.hostname ; + event.log_prefix.append (" "); + event.log_prefix.append (event.service) ; + event.log_prefix.append (" "); + event.log_prefix.append (event.operation) ; + jlog2 ("%s Requested (blocking) (to:%d)\n", event.log_prefix.c_str(), event.timeout); + + /* Send the message with timeout */ + event_base_dispatch(event.base); + + httpUtil_free_conn ( event ); + httpUtil_free_base ( event ); + + return(event.status) ; + } + elog ("%s Call to 'evhttp_make_request' failed (rc:%d)\n", + event.hostname.c_str(), rc); + + return (FAIL_MAKE_REQUEST); +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestHttpUtil.h b/mtce-common/cgts-mtce-common-1.0/guest/guestHttpUtil.h new file mode 100644 index 00000000..9e622e96 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestHttpUtil.h @@ -0,0 +1,32 @@ +#ifndef __INCLUDE_GUESTHTTPUTIL_H__ +#define __INCLUDE_GUESTHTTPUTIL_H__ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance ... + * + * libevent HTTP support utilities and control structure support header + */ + +#include /* for ... string */ +#include /* for ... http libevent client */ + +using namespace std; + +#include "guestClass.h" /* for ... maintenance class nodeLinkClass */ +#include "httpUtil.h" /* for ... common http utilities */ + +/***********************************************************************/ + +void guestHttpUtil_init ( void ); +void guestHttpUtil_fini ( void ); +int guestHttpUtil_status ( libEvent & event ); +int guestHttpUtil_api_req ( libEvent & event ); + +#endif /* __INCLUDE_GUESTHTTPUTIL_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestInstClass.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestInstClass.cpp new file mode 100644 index 00000000..bffae409 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestInstClass.cpp @@ -0,0 +1,764 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Guest Services "Instances Base Class" + */ + + +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "nodeBase.h" /* for ... common definitions */ +#include "nodeEvent.h" /* for ... set_inotify_watch_file */ +#include "nodeTimers.h" /* for ... mtcTimer */ +#include "guestBase.h" /* for ... instInfo */ +#include "guestUtil.h" /* for ... guestUtil_inst_init */ +#include "guestInstClass.h" /* for ... get_inst */ +#include "guestSvrUtil.h" /* for ... hb_get_state_name */ + +/**< constructor */ +guestInstClass::guestInstClass() +{ + inst_head = NULL ; + inst_tail = NULL ; + + memory_allocs = 0 ; + memory_used = 0 ; + instances = 0 ; + + for ( int i = 0 ; i < MAX_INSTANCES ; i++ ) + { + inst_ptrs[i] = NULL ; + } + + fsm_exit = false ; + reporting = true ; + return ; +} + +/**< destructor */ +guestInstClass::~guestInstClass() +{ + inst * inst_ptr = inst_head ; + inst * temp_ptr = inst_ptr ; + while ( inst_ptr != NULL ) + { + temp_ptr = inst_ptr ; + inst_ptr = inst_ptr->next ; + delInst (temp_ptr); + } + if ( memory_used != 0 ) + { + elog ( "Apparent Memory Leak - Allocs:%d and Bytes:%d\n", + memory_allocs, memory_used ); + } + else + { + dlog ( "No Memory Leaks\n\n"); + } + return ; +} + +void guestInstClass::guest_fsm_run ( void ) +{ + fsm_run (); +} + +/* + * Allocate new instance and tack it on the end of the instance list + */ +struct guestInstClass::inst* guestInstClass::addInst ( string uuid ) +{ + if ( uuid.length() != UUID_LEN ) + { + elog ("invalid instance uuid ; cannot add %s\n", uuid.c_str()); + return static_cast(NULL); + } + + /* verify instance is not already provisioned */ + struct inst * inst_ptr = guestInstClass::getInst ( uuid ); + if ( inst_ptr ) + { + if ( guestInstClass::remInst ( uuid ) ) + { + /* Should never get here but if we do then */ + /* something is seriously wrong */ + elog ("%s unable to remove instance during reprovision\n", + log_prefix(&inst_ptr->instance).c_str()); + return static_cast(NULL); + } + } + + /* allocate memory for new instance */ + inst_ptr = guestInstClass::newInst (); + if( inst_ptr == NULL ) + { + elog ( "failed to allocate memory for new instance\n" ); + return static_cast(NULL); + } + + guestUtil_inst_init ( &inst_ptr->instance ); + + /* Init the new instance */ + inst_ptr->instance.uuid = uuid ; + inst_ptr->query_flag = false ; + inst_ptr->instance.connect_wait_in_secs = DEFAULT_CONNECT_WAIT ; + + /* Init instance's connect and monitor timers */ + /* Assign the timer the instance's name */ + mtcTimer_init ( inst_ptr->reconnect_timer, uuid ); + mtcTimer_init ( inst_ptr->connect_timer, uuid ); + mtcTimer_init ( inst_ptr->monitor_timer, uuid ); + mtcTimer_init ( inst_ptr->init_timer, uuid ); + mtcTimer_init ( inst_ptr->vote_timer, uuid ); + + inst_ptr->action = FSM_ACTION__NONE ; + + inst_ptr->connectStage = INST_CONNECT__START ; + inst_ptr->monitorStage = INST_MONITOR__STEADY ; + inst_ptr->messageStage = INST_MESSAGE__RECEIVE ; + + /* If the instance list is empty add it to the head */ + if( inst_head == NULL ) + { + inst_head = inst_ptr ; + inst_tail = inst_ptr ; + inst_ptr->prev = NULL ; + inst_ptr->next = NULL ; + } + else + { + /* link the new_instance to the tail of the inst_list + * then mark the next field as the end of the inst_list + * adjust tail to point to the last instance + */ + inst_tail->next = inst_ptr ; + inst_ptr->prev = inst_tail ; + inst_ptr->next = NULL ; + inst_tail = inst_ptr ; + } + + instances++ ; + ilog ("%s added as instance %d\n", log_prefix(&inst_ptr->instance).c_str(), instances); + return inst_ptr ; +} + +/* Remove an instance from the linked list of instances - may require splice action */ +int guestInstClass::remInst( string uuid ) +{ + if ( uuid.empty() ) + return -ENODEV ; + + if ( inst_head == NULL ) + return -ENXIO ; + + struct inst * inst_ptr = getInst ( uuid ); + + if ( inst_ptr == NULL ) + return -EFAULT ; + + stop_instance_timers ( inst_ptr ); + + /* Close the channel if it is open */ + guestUtil_close_channel ( &inst_ptr->instance ); + + /* If the instance is the head instance */ + if ( inst_ptr == inst_head ) + { + /* only one instance in the list case */ + if ( inst_head == inst_tail ) + { + dlog2 ("Single Inst -> Head Case\n"); + inst_head = NULL ; + inst_tail = NULL ; + } + else + { + dlog2 ("Multiple Insts -> Head Case\n"); + inst_head = inst_head->next ; + inst_head->prev = NULL ; + } + } + /* if not head but tail then there must be more than one + * instance in the list so go ahead and chop the tail. + */ + else if ( inst_ptr == inst_tail ) + { + dlog2 ("Multiple Inst -> Tail Case\n"); + inst_tail = inst_tail->prev ; + inst_tail->next = NULL ; + } + else + { + dlog2 ("Multiple Inst -> Full Splice Out\n"); + inst_ptr->prev->next = inst_ptr->next ; + inst_ptr->next->prev = inst_ptr->prev ; + } + delInst ( inst_ptr ); + instances-- ; + + if ( instances == 0 ) + ilog ("no instances to monitor\n"); + + return (PASS) ; +} + +/* Perform a linked list search for the instance matching the instance name */ +struct guestInstClass::inst* guestInstClass::getInst ( string chan_or_uuid ) +{ + struct inst * inst_ptr = static_cast(NULL) ; + + /* check for empty list condition */ + if ( inst_head ) + { + for ( inst_ptr = inst_head ; inst_ptr != NULL ; inst_ptr = inst_ptr->next ) + { + if ( !inst_ptr->instance.uuid.compare (chan_or_uuid) ) + { + return inst_ptr ; + } + if ( !inst_ptr->instance.chan.compare (chan_or_uuid) ) + { + return inst_ptr ; + } + + if (( inst_ptr->next == NULL ) || ( inst_ptr == inst_tail )) + break ; + } + } + return static_cast(NULL); +} + +/* + * Allocates memory for a new instance and stores its address in inst_ptrs + * + * @param void + * @return pointer to the newly allocted instance memory + */ +struct guestInstClass::inst * guestInstClass::newInst ( void ) +{ + struct guestInstClass::inst * temp_inst_ptr = NULL ; + + if ( memory_allocs == 0 ) + { + memset ( inst_ptrs, 0 , sizeof(struct inst *)*MAX_INSTANCES); + } + + // find an empty spot + for ( int i = 0 ; i < MAX_INSTANCES ; i++ ) + { + if ( inst_ptrs[i] == NULL ) + { + inst_ptrs[i] = temp_inst_ptr = new inst ; + memory_allocs++ ; + memory_used += sizeof (struct guestInstClass::inst); + + return temp_inst_ptr ; + } + } + elog ( "failed to store new instance pointer address\n" ); + return temp_inst_ptr ; +} + +/* Frees the memory of a pre-allocated instance and removes + * it from the inst_ptrs list. + * + * @param instance * pointer to the instance memory address to be freed + * @return int return code { PASS or -EINVAL } + */ +int guestInstClass::delInst ( struct guestInstClass::inst * inst_ptr ) +{ + if ( memory_allocs > 0 ) + { + for ( int i = 0 ; i < MAX_INSTANCES ; i++ ) + { + if ( inst_ptrs[i] == inst_ptr ) + { + delete inst_ptr ; + inst_ptrs[i] = NULL ; + memory_allocs-- ; + memory_used -= sizeof (struct guestInstClass::inst); + return PASS ; + } + } + elog ( "unable to validate memory address being freed\n" ); + } + else + elog ( "free memory called when there is no memory to free\n" ); + + return -EINVAL ; +} + +/*************************************************************************************** + * P U B L I C I N T E R F A C E S + **************************************************************************************/ + +/* Add an instance based on its uuid. + * If the instance already exists then update its info */ +int guestInstClass::add_inst ( string uuid , instInfo & instance ) +{ + int rc = FAIL ; + + struct guestInstClass::inst * inst_ptr = getInst(uuid); + if ( inst_ptr ) + { + ilog ("********************************************************\n"); + ilog ("%s Already provisioned - TODO: Create copy constructor \n", uuid.c_str()); + ilog ("********************************************************\n"); + + /* Send back a retry in case the add needs to be converted to a modify */ + rc = PASS ; + } + /* Otherwise add it as a new instance */ + else + { + if ( uuid.length() != UUID_LEN ) + { + elog ("invalid uuid %s\n", uuid.c_str()); + return (FAIL_INVALID_UUID); + } + + inst_ptr = guestInstClass::addInst(uuid); + if ( inst_ptr ) + { + rc = PASS ; + } + else + { + elog ("failed to add instance '%s'\n", uuid.c_str()); + rc = FAIL_NULL_POINTER ; + } + } + + if ( rc == PASS ) + { + inst_ptr->heartbeat_count = 0 ; + + inst_ptr->mismatch_count = 0 ; + + /* TODO: This needs to be a complete copy - Need copy constructor */ + inst_ptr->instance.heartbeat.failures = 0 ; + inst_ptr->instance.heartbeat.failed = false ; + inst_ptr->instance.heartbeat.reporting = instance.heartbeat.reporting ; + inst_ptr->instance.heartbeat.provisioned = instance.heartbeat.provisioned ; + inst_ptr->instance.heartbeat.state = instance.heartbeat.state ; + inst_ptr->instance.hbState = hbs_server_waiting_init ; + inst_ptr->instance.vnState = hbs_server_waiting_init ; + + inst_ptr->instance.name_log_prefix = "" ; + inst_ptr->instance.uuid_log_prefix = "" ; + + inst_ptr->instance.name = instance.name ; + inst_ptr->instance.inst = instance.inst ; + inst_ptr->instance.connected = instance.connected ; + inst_ptr->instance.heartbeating = instance.heartbeating ; + inst_ptr->instance.chan_fd = instance.chan_fd ; + inst_ptr->instance.chan_ok = instance.chan_ok ; + + inst_ptr->instance.corrective_action = instance.corrective_action ; + inst_ptr->instance.heartbeat_interval_ms = instance.heartbeat_interval_ms ; + + inst_ptr->instance.vote_secs = instance.vote_secs ; + inst_ptr->instance.shutdown_notice_secs = instance.shutdown_notice_secs ; + inst_ptr->instance.suspend_notice_secs = instance.suspend_notice_secs ; + inst_ptr->instance.resume_notice_secs = instance.resume_notice_secs ; + inst_ptr->instance.restart_secs = instance.restart_secs ; + + /* Update the channel */ + if ( instance.chan.length() > UUID_LEN ) + inst_ptr->instance.chan = instance.chan ; + } + return (rc); +} + +/***************************************************************************** + * + * Name : del_inst + * + * Purpose : Delete an instance from the linked list + * + *****************************************************************************/ +int guestInstClass::del_inst ( string uuid ) +{ + int rc = FAIL ; + if ( ! uuid.empty() ) + { + /* free memory */ + rc = remInst ( uuid ); + } + return ( rc ); +} + +/***************************************************************************** + * + * Name : qry_inst + * + * Purpose : Send instance info to the guestAgent + * + *****************************************************************************/ +int guestInstClass::qry_inst ( ) +{ + return ( guestAgent_qry_handler ()); +} + +void guestInstClass::stop_instance_timers ( struct guestInstClass::inst * inst_ptr ) +{ + /* Free the mtc timer if in use */ + if ( inst_ptr->reconnect_timer.tid ) + { + mtcTimer_stop ( inst_ptr->reconnect_timer ); + inst_ptr->reconnect_timer.ring = false ; + inst_ptr->reconnect_timer.tid = NULL ; + } + /* Free the connect timer if in use */ + if ( inst_ptr->connect_timer.tid ) + { + mtcTimer_stop ( inst_ptr->connect_timer ); + inst_ptr->connect_timer.ring = false ; + inst_ptr->connect_timer.tid = NULL ; + } + /* Free the monitor timer if in use */ + if ( inst_ptr->monitor_timer.tid ) + { + mtcTimer_stop ( inst_ptr->monitor_timer ); + inst_ptr->monitor_timer.ring = false ; + inst_ptr->monitor_timer.tid = NULL ; + } + /* Free the init timer if in use */ + if ( inst_ptr->init_timer.tid ) + { + mtcTimer_stop ( inst_ptr->init_timer ); + inst_ptr->init_timer.ring = false ; + inst_ptr->init_timer.tid = NULL ; + } + /* Free the vote timer if in use */ + if ( inst_ptr->vote_timer.tid ) + { + mtcTimer_stop ( inst_ptr->vote_timer ); + inst_ptr->vote_timer.ring = false ; + inst_ptr->vote_timer.tid = NULL ; + } +} + + +void guestInstClass::free_instance_resources ( void ) +{ + /* check for empty list condition */ + if ( inst_head ) + { + for ( struct inst * inst_ptr = inst_head ; ; inst_ptr = inst_ptr->next ) + { + if ( inst_ptr->instance.chan_fd ) + { + ilog ("%s closing fd %d for uuid %s\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.chan_fd, + inst_ptr->instance.uuid.c_str()); + + close ( inst_ptr->instance.chan_fd ); + } + stop_instance_timers ( inst_ptr ); + + if (( inst_ptr->next == NULL ) || ( inst_ptr == inst_tail )) + break ; + } + } +} + + +/****************************************************************************/ +/** FSM Control Utilities */ +/****************************************************************************/ + +void guestInstClass::reconnect_start ( const char * uuid_ptr ) +{ + string uuid = uuid_ptr ; + if ( uuid.length() != UUID_LEN ) + { + elog ("invalid uuid %s (uuid:%ld)\n", uuid.c_str(), uuid.length()); + return ; + } + + struct guestInstClass::inst * inst_ptr = guestInstClass::getInst(uuid); + if ( inst_ptr ) + { + guestUtil_close_channel ( &inst_ptr->instance ); + } + else + { + inst_ptr = guestInstClass::addInst(uuid); + } + + if ( inst_ptr ) + { + instInfo * instInfo_ptr = &inst_ptr->instance ; + if ( instInfo_ptr->fd_namespace.size() ) + { + /* Setup inotify to watch for new instance serial IO channel creations */ + if ( set_inotify_watch_file ( instInfo_ptr->fd_namespace.data(), + instInfo_ptr->inotify_file_fd, + instInfo_ptr->inotify_file_wd)) + { + elog ("%s failed to setup 'inotify' on %s\n", + log_prefix(instInfo_ptr).c_str(), + instInfo_ptr->fd_namespace.c_str()); + } + } + ilog ("%s reconnecting ... %s\n", log_prefix(instInfo_ptr).c_str(), + instInfo_ptr->connected ? " CONNECTED" : "" ); + + if ( inst_ptr->connect_timer.tid ) + mtcTimer_stop ( inst_ptr->connect_timer ); + + inst_ptr->action = FSM_ACTION__CONNECT ; + inst_ptr->connectStage = INST_CONNECT__START ; + + // mtcTimer_start ( inst_ptr->connect_timer, guestTimer_handler, inst_ptr->instance.connect_wait_in_secs ); + + //ilog ("%s connect attempt in %d seconds\n", + // log_prefix(&inst_ptr->instance).c_str(), inst_ptr->instance.connect_wait_in_secs); + instInfo_ptr->connecting = true ; + } + else + { + elog ("%s failed to find or add instance\n", uuid.c_str() ); + } +} + + + +/****************************************************************************/ +/** Inst Class Setter / Getters */ +/****************************************************************************/ + +/***************************************************************************** + * + * Name : get_inst + * + * Purpose : Return a pointer to the instance for a specified uuid + * + *****************************************************************************/ +instInfo * guestInstClass::get_inst ( string uuid ) +{ + struct guestInstClass::inst * inst_ptr = guestInstClass::getInst(uuid); + if ( inst_ptr ) + { + return (&inst_ptr->instance ); + } + return static_cast(NULL); +} + +/***************************************************************************** + * + * Name : getInst_timer + * + * Purpose : Return a pointer to the instance that contains the timer for + * the specified timer ID. + * + *****************************************************************************/ +struct guestInstClass::inst * guestInstClass::getInst_timer ( timer_t tid, int timer_id ) +{ + if ( tid != NULL ) + { + if ( inst_head ) + { + struct inst * inst_ptr ; + for ( inst_ptr = inst_head ; inst_ptr != NULL ; inst_ptr = inst_ptr->next ) + { + if (( timer_id == INST_TIMER_MONITOR ) && (inst_ptr->monitor_timer.tid == tid )) + { + return inst_ptr ; + } + else if (( timer_id == INST_TIMER_CONNECT ) && (inst_ptr->connect_timer.tid == tid )) + { + return inst_ptr ; + } + else if (( timer_id == INST_TIMER_VOTE ) && ( inst_ptr->vote_timer.tid == tid )) + { + return inst_ptr ; + } + else if (( timer_id == INST_TIMER_INIT ) && ( inst_ptr->init_timer.tid == tid )) + { + return inst_ptr ; + } + else if (( timer_id == INST_TIMER_RECONNECT ) && ( inst_ptr->reconnect_timer.tid == tid )) + { + return inst_ptr ; + } + + if (( inst_ptr->next == NULL ) || ( inst_ptr == inst_tail )) + break ; + } + } + } + return static_cast(NULL); +} + +/* Get an instance's heartbeat fault reporting state */ +bool guestInstClass::get_reporting_state ( string uuid ) +{ + guestInstClass::inst * inst_ptr = guestInstClass::getInst ( uuid ); + if ( inst_ptr ) + { + return ( inst_ptr->instance.heartbeat.reporting ); + } + else + { + wlog ("uuid not found '%s'\n", uuid.c_str()); + } + return ( false ); +} + +/* Set an instances heartbeat fault reporting state */ +int guestInstClass::set_reporting_state( string uuid, bool reporting ) +{ + guestInstClass::inst * inst_ptr = guestInstClass::getInst ( uuid ); + if ( inst_ptr ) + { + inst_ptr->instance.heartbeat.reporting = reporting ; + } + else + { + wlog ("uuid not found '%s'\n", uuid.c_str()); + return (FAIL_NOT_FOUND) ; + } + return (PASS); +} + + +/***************************************************************************** + * + * Name : print_all_instances + * + * Purpose: Print a summary of the instances that are currently provisioned + * + *****************************************************************************/ +void guestInstClass::print_all_instances ( void ) +{ + bool found = false; + int i = 0 ; + if ( inst_head ) + { + struct inst * inst_ptr ; + for ( inst_ptr = inst_head ; inst_ptr != NULL ; inst_ptr = inst_ptr->next ) + { + ilog ("%2d %s Heartbeat: Notify:%c Failures:%d\n", i, + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.heartbeat.reporting ? 'Y':'n', + inst_ptr->instance.heartbeat.failures); + found = true ; + i++ ; + if (( inst_ptr->next == NULL ) || ( inst_ptr == inst_tail )) + break ; + } + } + + if ( found == false ) + { + ilog ("no instances provisioned\n"); + } +} + +/***************************************************************************** + * + * Name : print_instances (private) + * + *****************************************************************************/ +void guestInstClass::print_instances ( void ) +{ + print_all_instances(); +} + +/***************************************************************************** + * Memory Dump Stuff * + *****************************************************************************/ +void guestInstClass::print_node_info ( void ) +{ + fflush (stdout); + fflush (stderr); +} + +void guestInstClass::mem_log_info ( void ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "Instances:%d Allocs:%d Memory:%d\n", instances, memory_allocs, memory_used ); + mem_log (str); +} + +void mem_log_delimit_host ( void ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "-------------------------------------------------------------\n"); + mem_log (str); +} + +void guestInstClass::mem_log_inst_info ( void ) +{ + char str[MAX_MEM_LOG_DATA] ; + + struct inst * inst_ptr = static_cast(NULL) ; + + for ( inst_ptr = inst_head ; inst_ptr != NULL ; inst_ptr = inst_ptr->next ) + { + snprintf (&str[0], MAX_MEM_LOG_DATA, "Name : %s %s (%s)\n", + inst_ptr->instance.name.data(), + inst_ptr->instance.uuid.data(), + inst_ptr->instance.inst.data()); + mem_log (str); + + snprintf (&str[0], MAX_MEM_LOG_DATA, "Action: %8d Connect:%2d Message:%2d Delay:%d secs\n", + inst_ptr->action, + inst_ptr->connectStage, + inst_ptr->messageStage, + inst_ptr->instance.connect_wait_in_secs); + mem_log (str); + + snprintf (&str[0], MAX_MEM_LOG_DATA, "State : Reporting: %c Failures: %d Failed: %c\n", + inst_ptr->instance.heartbeat.reporting ? 'Y' : 'n', + inst_ptr->instance.heartbeat.failures, + inst_ptr->instance.heartbeat.failed ? 'Y' : 'n' ); + mem_log (str); + + snprintf (&str[0], MAX_MEM_LOG_DATA, "Setup : Select :%2d Channel OK: %c hbState:%s vnState:%s\n", + inst_ptr->instance.chan_fd, + inst_ptr->instance.chan_ok ? 'Y' : 'n' , + hb_get_state_name(inst_ptr->instance.hbState), + hb_get_state_name(inst_ptr->instance.vnState)); + mem_log (str); + + snprintf (&str[0], MAX_MEM_LOG_DATA, "Oper : Connected: %c Heartbeating: %c\n", + inst_ptr->instance.connected ? 'Y' : 'n', + inst_ptr->instance.heartbeating ? 'Y' : 'n'); + mem_log (str); + + mem_log_delimit_host(); + + /* exit if this happens to be the last one in the list */ + if (( inst_ptr->next == NULL ) || ( inst_ptr == inst_tail )) + break ; + } + if ( inst_head == NULL ) + { + snprintf (&str[0], MAX_MEM_LOG_DATA, "no instances\n"); + mem_log (str); + } +} + +void guestInstClass::memDumpAllState ( void ) +{ + mem_log_info ( ); + mem_log_delimit_host (); + mem_log_inst_info (); +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestInstClass.h b/mtce-common/cgts-mtce-common-1.0/guest/guestInstClass.h new file mode 100644 index 00000000..12bfdd0c --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestInstClass.h @@ -0,0 +1,221 @@ +#ifndef __INCLUDE_INSTBASECLASS_H__ +#define __INCLUDE_INSTBASECLASS_H__ + +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Guest Services "Instances Base Class Header" + */ + +#include "guestBase.h" /* for ... instInfo */ + +typedef enum +{ + FSM_ACTION__NONE, + FSM_ACTION__CONNECT, + FSM_ACTION__LAST + +} guest_fsmActions_enum ; + +typedef enum +{ + INST_CONNECT__START = 1, + INST_CONNECT__WAIT = 2, + INST_CONNECT__RETRY = 3, + INST_CONNECT__DONE = 4, + INST_CONNECT__STAGES = 5 +} guest_connectStages_enum ; + +typedef enum +{ + INST_MONITOR__STEADY = 0, + INST_MONITOR__DELAY = 2, + INST_MONITOR__FAILURE = 1, +} guest_monitorStages_enum ; + + +typedef enum +{ + INST_MESSAGE__RECEIVE = 0, + INST_MESSAGE__SEND_INIT_ACK = 1, + INST_MESSAGE__RESP_WAIT = 2, /* Waiting for heartbeat challenge response */ + INST_MESSAGE__SEND_WAIT = 3, /* Waiting for period to expire for challenge resend */ + INST_MESSAGE__TRANSMIT = 4, + INST_MESSAGE__STALL = 5 +} guest_messageStages_enum ; + +class guestInstClass +{ + private: + struct inst { + + /** Pointer to the previous / next host in the list */ + struct inst * prev; + struct inst * next; + + /* Instance info */ + instInfo instance ; + + /** + * Top level gate for the host. + * If false then reporting for all instances are off. + */ + // bool reporting; + + bool query_flag ; + + #define INST_TIMER_MONITOR (0) + #define INST_TIMER_CONNECT (1) + #define INST_TIMER_RECONNECT (2) + #define INST_TIMER_INIT (3) + #define INST_TIMER_VOTE (4) + #define INST_TIMER_MAX (5) + + /** General Purpose instance timer */ + // struct mtc_timer timer; + struct mtc_timer vote_timer; + struct mtc_timer init_timer; + struct mtc_timer monitor_timer; + struct mtc_timer connect_timer; + struct mtc_timer reconnect_timer; + + + guest_connectStages_enum connectStage ; + guest_messageStages_enum messageStage ; + guest_monitorStages_enum monitorStage ; + + guest_fsmActions_enum action ; + + int monitor_handler_count ; + int message_handler_count ; + int connect_handler_count ; + int mismatch_count ; + int heartbeat_count ; + + /* Message list for this instance*/ + list message_list ; + }; + + struct inst * inst_head ; /**< Inst Linked List Head pointer */ + struct inst * inst_tail ; /**< Inst Linked List Tail pointer */ + + /** List of allocated host memory. + * + * An array of host pointers. + */ + inst * inst_ptrs[MAX_HOSTS] ; + + /** A memory allocation counter. + * + * Should represent the number of hosts in the linked list. + */ + int memory_allocs ; + + /** A memory used counter + * + * A variable storing the accumulated instance memory + */ + int memory_used ; + + bool fsm_exit ; + void fsm_run ( void ); + + struct guestInstClass::inst* newInst ( void ); + struct guestInstClass::inst* addInst ( string uuid ); + struct guestInstClass::inst* getInst ( string uuid ); + int remInst ( string uuid ); + int delInst ( struct guestInstClass::inst * inst_ptr ); + void readInst ( void ); + + void print_all_instances ( void ); + + void mem_log_inst_info ( void ); + + struct guestInstClass::inst* getInst_timer ( timer_t tid, int timer_id ); + + int message_handler ( struct guestInstClass::inst * inst_ptr ); + int connect_handler ( struct guestInstClass::inst * inst_ptr ); + int monitor_handler ( struct guestInstClass::inst * inst_ptr ); + + void start_monitor_timer ( struct guestInstClass::inst * inst_ptr ); + + /** Thus member function loops over all the insances and sends + * a json string instances: [uuid:state],[uuid:state]... + * back to the guestAgent. */ + int guestAgent_qry_handler ( void ); + + int send_challenge ( struct guestInstClass::inst * inst_ptr ); + + void manage_comm_loss ( void ); + + void mem_log_info ( void ); + + public: + + guestInstClass(); /**< constructor */ + ~guestInstClass(); /**< destructor */ + + bool reporting ; + void print_instances ( void ); + + /** handle an expired timer */ + void timer_handler ( int sig, siginfo_t *si, void *uc); + + struct mtc_timer search_timer; + + int instances ; + void guest_fsm_run ( void ); + + int qry_inst ( void ); + int add_inst ( string uuid, instInfo & instance ); + int mod_inst ( string uuid, instInfo & instance ); + int del_inst ( string uuid ); + instInfo * get_inst ( string uuid ); + + ssize_t write_inst ( instInfo * ptr, const char *message, size_t size); + + void reconnect_start ( const char * uuid_ptr ) ; // string uuid ); + + void set_query_flag ( string uuid ); + bool get_query_flag ( string uuid ); + bool get_reporting_state( string uuid ); + int set_reporting_state( string uuid, bool enabled ); + + int send_vote_notify ( string uuid ); + int send_vote_notify_resp ( char * hostname, string uuid, + string notification_type, + string event_type, + string vote_result, + string reject_reason); + + void send_client_msg_nack ( instInfo * instInfo_ptr, + string log_err); + void handle_parse_failure ( struct guestInstClass::inst * inst_ptr, + const char *key); + + /* Called on controlle daemon exit */ + void free_instance_resources ( void ); + void stop_instance_timers ( struct guestInstClass::inst * inst_ptr ); + + /* For select dispatch */ + struct timeval waitd ; + + fd_set inotify_readfds ; + fd_set instance_readfds ; + fd_set message_readfds ; + + void memLogDelimit ( void ); /**< Debug log delimiter */ + void memDumpNodeState ( string uuid ); + void memDumpAllState ( void ); + void print_node_info ( void ); /**< Print node info banner */ +}; + +guestInstClass * get_instInv_ptr ( void ); + +#endif /* __INCLUDE_INSTBASECLASS_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestServer.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestServer.cpp new file mode 100644 index 00000000..60903524 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestServer.cpp @@ -0,0 +1,580 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Guest Heartbeat Server Daemon on Compute + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for hostent */ +#include +#include +#include +#include +#include +#include /* for close and usleep */ +#include /* for realtime scheduling api */ + +using namespace std; + +#include "nodeBase.h" +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ +#include "nodeUtil.h" /* for ... common utilities */ +#include "jsonUtil.h" /* for ... jason utilities */ +#include "nodeTimers.h" /* for ... maintenance timers */ +#include "nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ +#include "nodeEvent.h" /* for ... set_inotify_watch, set_inotify_close */ +#include "guestBase.h" +#include "guestUtil.h" /* for ... guestUtil_inst_init */ +#include "guestSvrUtil.h" /* for ... guestUtil_inotify_events */ +#include "guestVirtio.h" /* for ... virtio_channel_connect */ +#include "guestSvrMsg.h" /* for ... send_to_guestAgent */ +#include "guestInstClass.h" + +/* Where to send events */ +string guestAgent_ip = "" ; + +/***************************************************************************** + * + * The daemon primary instance racking object. + * + * This object is a dynamically managed linked list of tracked insances + * + * @see guestInstClass Module control structure in guestInstClass.h + * + *****************************************************************************/ +guestInstClass instInv ; +guestInstClass * get_instInv_ptr ( void ) { return(&instInv); } + + +/* @see guestBase.h Module control structure + * TODO: Consider obsoleting by moving into class */ +ctrl_type ctrl ; +ctrl_type * get_ctrl_ptr ( void ) +{ + return(&ctrl); +} + +void daemon_sigchld_hdlr ( void ) +{ + ; /* dlog("Received SIGCHLD ... no action\n"); */ +} + +/** + * Daemon Configuration Structure - The allocated struct + * @see daemon_common.h for daemon_config_type struct format. + */ +static daemon_config_type guest_config ; +daemon_config_type * daemon_get_cfg_ptr () { return &guest_config ; } + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + daemon_dump_info (); + daemon_files_fini (); + + /* Close the messaging sockets */ + if ( ctrl.sock.server_rx_sock ) + delete (ctrl.sock.server_rx_sock); + + if ( ctrl.sock.server_tx_sock ) + delete (ctrl.sock.server_tx_sock); + + if ( ctrl.sock.agent_rx_float_sock ) + delete (ctrl.sock.agent_rx_float_sock); + + if ( ctrl.sock.agent_tx_sock ) + delete (ctrl.sock.agent_tx_sock); + + /* Turn off inotify */ + set_inotify_close ( ctrl.inotify_dir_fd, ctrl.inotify_dir_wd ); + + instInv.free_instance_resources (); + + fflush (stdout); + fflush (stderr); + + exit (0); +} + +/** Client Config mask */ +#define CONFIG_MASK (CONFIG_CLIENT_RX_PORT |\ + CONFIG_AGENT_RX_PORT) + +/* Startup config read */ +static int _config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("agent", "rx_port")) + { + config_ptr->agent_rx_port = atoi(value); + config_ptr->mask |= CONFIG_AGENT_RX_PORT ; + } + else if (MATCH("client", "rx_port")) + { + config_ptr->client_rx_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_RX_PORT ; + } + else if (MATCH("client", "hbs_pulse_period")) + { + config_ptr->hbs_pulse_period = atoi(value); + } + else if (MATCH("client", "hbs_failure_threshold")) + { + config_ptr->hbs_failure_threshold = atoi(value); + } +#ifdef WANT_REPORT_DELAY + else if (MATCH("timeouts", "start_delay")) + { + config_ptr->start_delay = atoi(value); + } +#endif + else + { + return (PASS); + } + return (FAIL); +} + +/* Read the guest.ini file and load agent */ +/* settings into the daemon configuration */ +int daemon_configure ( void ) +{ + int rc = FAIL ; + + /* Read the ini */ + char config_fn[100] ; + guest_config.mask = 0 ; + sprintf ( &config_fn[0], "/etc/mtc/%s.ini", program_invocation_short_name ); + if (ini_parse(config_fn, _config_handler, &guest_config) < 0) + { + elog("Can't load '%s'\n", config_fn ); + return (FAIL_LOAD_INI); + } + + get_debug_options ( config_fn, &guest_config ); + + /* Verify loaded config against an expected mask + * as an ini file fault detection method */ + if ( guest_config.mask != CONFIG_MASK ) + { + elog ("Configuration load failed (%x)\n", + (( -1 ^ guest_config.mask ) & CONFIG_MASK) ); + rc = FAIL_INI_CONFIG ; + } + else + { + guest_config.mgmnt_iface = daemon_get_iface_master ( guest_config.mgmnt_iface ); + ilog("Guest Agent : %s:%d\n", guest_config.mgmnt_iface, guest_config.client_rx_port ); + + // get_iface_macaddr ( guest_config.mgmnt_iface, my_macaddr ); + get_iface_address ( guest_config.mgmnt_iface, ctrl.address, true ); + get_hostname ( &ctrl.hostname[0], MAX_HOST_NAME_SIZE ); + + ilog("Report Thres: %d\n", guest_config.hbs_failure_threshold ); +#ifdef WANT_REPORT_DELAY + ilog("Report Delay: %d sec\n", guest_config.start_delay ); +#endif + ilog("Deflt Period: %d msec\n", guest_config.hbs_pulse_period ); + rc = PASS ; + } + + return (rc); +} + + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Setup UDP messaging to the guestAgent. */ +int _socket_init ( void ) +{ + int rc = PASS ; + + guestAgent_ip = getipbyname ( CONTROLLER ); + ilog ("ControllerIP: %s\n", guestAgent_ip.c_str()); + + /* Read the ports the socket struct */ + ctrl.sock.agent_rx_port = guest_config.agent_rx_port ; + ctrl.sock.server_rx_port = guest_config.client_rx_port ; + + /****************************/ + /* Setup the Receive Socket */ + /****************************/ + ctrl.sock.server_rx_sock = new msgClassRx(ctrl.address.c_str(), guest_config.client_rx_port, IPPROTO_UDP); + rc = ctrl.sock.server_rx_sock->return_status; + if ( rc ) + { + elog ("Failed to setup 'guestAgent' receiver on port %d\n", + ctrl.sock.server_rx_port ); + return (rc) ; + } + ctrl.sock.server_tx_sock = new msgClassTx(guestAgent_ip.c_str(), guest_config.agent_rx_port, IPPROTO_UDP, guest_config.mgmnt_iface); + rc = ctrl.sock.server_tx_sock->return_status; + if ( rc ) + { + elog ("Failed to setup 'guestServer' transmiter\n" ); + return (rc) ; + } + + return (rc); +} + +/* The main heartbeat service loop */ +int daemon_init ( string iface, string nodeType_str ) +{ + int rc = PASS ; + + ctrl.address.clear() ; + ctrl.address_peer.clear(); + ctrl.nodetype = CGTS_NODE_NULL ; + + /* Init the Inotify descriptors */ + ctrl.inotify_dir_fd = 0 ; + ctrl.inotify_dir_wd = 0 ; + + /* clear hostname */ + memset ( &ctrl.hostname[0], 0, MAX_HOST_NAME_SIZE ); + + /* Initialize socket construct and pointer to it */ + memset ( &ctrl.sock, 0, sizeof(ctrl.sock)); + + /* Assign interface to config */ + guest_config.mgmnt_iface = (char*)iface.data() ; + + if ( (rc = daemon_files_init ( )) != PASS ) + { + elog ("Pid, log or other files could not be opened (rc:%d)\n", rc ); + rc = FAIL_FILES_INIT ; + } + + /* convert node type to integer */ + ctrl.nodetype = get_host_function_mask ( nodeType_str ) ; + ilog ("Node Type : %s (%d)\n", nodeType_str.c_str(), ctrl.nodetype ); + + /* Bind signal handlers */ + if ( daemon_signal_init () != PASS ) + { + elog ("daemon_signal_init failed\n"); + return ( FAIL_SIGNAL_INIT ); + } + + /************************************************************************ + * There is no point continuing with init ; i.e. running daemon_configure, + * initializing sockets and trying to query for an ip address until the + * daemon's configuration requirements are met. Here we wait for those + * flag files to be present before continuing. + ************************************************************************ + * Wait for /etc/platform/.initial_config_complete & /var/run/.goenabled */ + daemon_wait_for_file ( CONFIG_COMPLETE_FILE , 0); + daemon_wait_for_file ( GOENABLED_MAIN_READY , 0); + + /* Configure the client */ + if ( (rc = daemon_configure ()) != PASS ) + { + elog ("Daemon service configuration failed (rc:%d)\n", rc ); + rc = FAIL_DAEMON_CONFIG ; + } + + /* Setup the heartbeat service messaging sockets */ + else if ( (rc = _socket_init ( )) != PASS ) + { + elog ("socket initialization failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT; + } + + /* Ignore this signal */ + signal(SIGPIPE, SIG_IGN); + + return (rc); +} + +/* + { hostname" : "" , + "instances" : + [ + { "channel" : "" , "services" : + [ + { "service":"heartbeat", "admin":"enabled", "oper":"enabled" , "avail":"available" } + ], + "channel: : "" , "services" : + [ + { "service":"heartbeat", "admin":"enabled", "oper":"enabled" , "avail":"available"} + ] + } + ] + } +*/ + + +int select_failure_count = 0 ; + +void guestInstClass::manage_comm_loss ( void ) +{ + int rc ; + + std::list socks ; + socks.clear(); + + waitd.tv_sec = 0; + waitd.tv_usec = GUEST_SOCKET_TO; + + /* Initialize the master fd_set */ + FD_ZERO(&inotify_readfds); + + /* check for empty list condition */ + if ( inst_head ) + { + for ( struct inst * inst_ptr = inst_head ; inst_ptr != NULL ; inst_ptr = inst_ptr->next ) + { + if ( inst_ptr->instance.inotify_file_fd ) + { + //ilog ("adding inotify_fd %d for %s to select list\n", + // inst_ptr->instance.inotify_file_fd, + // inst_ptr->instance.uuid.c_str()); + + socks.push_front ( inst_ptr->instance.inotify_file_fd ); + FD_SET ( inst_ptr->instance.inotify_file_fd, &inotify_readfds); + } + if (( inst_ptr->next == NULL ) || ( inst_ptr == inst_tail )) + break ; + } + + /* if there are no sockets to monitor then just exit */ + if ( socks.empty() ) + return ; + + /* Call select() and wait only up to SOCKET_WAIT */ + socks.sort(); + rc = select( socks.back()+1, &inotify_readfds, NULL, NULL, &waitd); + if (( rc < 0 ) || ( rc == 0 ) || ( rc > (int)socks.size())) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if (( rc < 0 ) && ( errno != EINTR )) + { + wlog_throttled ( select_failure_count, 20, + "socket select failed (%d:%m)\n", errno); + } + else if ( rc > (int)socks.size()) + { + wlog_throttled ( select_failure_count, 100, + "Select return exceeds current file descriptors (%ld:%d)\n", + socks.size(), rc ); + } + else + { + select_failure_count = 0 ; + } + } + else + { + wlog ( "inotify channel event\n"); + + for ( struct inst * inst_ptr = inst_head ; inst_ptr != NULL ; inst_ptr = inst_ptr->next ) + { + if ( inst_ptr->instance.inotify_file_fd ) + { + if (FD_ISSET(inst_ptr->instance.inotify_file_fd, &inotify_readfds) ) + { + ilog ("Watch Event on instance %s\n", inst_ptr->instance.uuid.c_str()); + guestUtil_inotify_events (inst_ptr->instance.inotify_file_fd); + } + } + if (( inst_ptr->next == NULL ) || ( inst_ptr == inst_tail )) + break ; + } + } + } +} + + + +#define MAX_LEN 300 +void daemon_service_run ( void ) +{ + int rc = 0 ; + int count = 0 ; + int flush_thld = 0 ; + + string payload = "" ; /* for the ready event */ + + std::list socks ; + + guestUtil_load_channels (); + + /* Setup inotify to watch for new instance serial IO channel creations */ + if ( set_inotify_watch ( QEMU_CHANNEL_DIR, + ctrl.inotify_dir_fd, + ctrl.inotify_dir_wd ) ) + { + elog ("failed to setup inotify on %s\n", QEMU_CHANNEL_DIR ); + } + + socks.clear(); + socks.push_front (ctrl.sock.server_rx_sock->getFD()); + if ( ctrl.inotify_dir_fd ) + socks.push_front (ctrl.inotify_dir_fd); + else + { + elog ("unable to inotify monitor %s\n", QEMU_CHANNEL_DIR ); + + // TODO: consider exiting daemon + } + socks.sort(); + + mtcTimer_init ( ctrl.timer, ctrl.hostname ); + mtcTimer_init ( instInv.search_timer, ctrl.hostname ); + + mtcTimer_start ( ctrl.timer , guestTimer_handler, 2 ); + mtcTimer_start ( instInv.search_timer, guestTimer_handler, SEARCH_AUDIT_TIME ); + + ilog ("Selects: guestAgent:%d qemuDir:%d\n", ctrl.sock.server_rx_sock->getFD(), ctrl.inotify_dir_fd ); + ilog ("-------------------------------------------------------\n"); + + /* Tell the guestAgent that we started or restarted + * so that it can send instance state data */ + payload = "{\"hostname\":\"" ; + payload.append(ctrl.hostname); + payload.append("\"}"); + + /* Run heartbeat service forever or until stop condition */ + for ( ; ; ) + { + instInv.waitd.tv_sec = 0; + instInv.waitd.tv_usec = GUEST_SOCKET_TO; + + /* Initialize the master fd_set */ + FD_ZERO(&instInv.message_readfds); + + FD_SET ( ctrl.sock.server_rx_sock->getFD(), &instInv.message_readfds); + if ( ctrl.inotify_dir_fd ) + { + FD_SET ( ctrl.inotify_dir_fd, &instInv.message_readfds); + } + + rc = select( socks.back()+1, &instInv.message_readfds, NULL, NULL, &instInv.waitd); + if (( rc < 0 ) || ( rc == 0 ) || ( rc > (int)socks.size())) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if (( rc < 0 ) && ( errno != EINTR )) + { + wlog_throttled ( count, 20, "socket select failed (%d:%m)\n", errno); + } + else if ( rc > (int)socks.size()) + { + wlog_throttled ( count, 100, "Select return exceeds current file descriptors (%ld:%d)\n", + socks.size(), rc ); + } + else + { + count = 0 ; + } + } + else if (FD_ISSET(ctrl.sock.server_rx_sock->getFD(), &instInv.message_readfds)) + { + /* clean the rx/tx buffer */ + mtc_message_type msg ; + memset ((void*)&msg,0,sizeof(mtc_message_type)); + + int bytes = ctrl.sock.server_rx_sock->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); + ctrl.address_peer = ctrl.sock.server_rx_sock->get_src_str() ; + mlog1 ("Received %d bytes from %s:%d:guestAgent\n", bytes, + ctrl.sock.server_rx_sock->get_src_str(), + ctrl.sock.server_rx_sock->get_dst_addr()->getPort() ); + print_mtc_message (&msg); + + if ( bytes > 0 ) + { + recv_from_guestAgent ( msg.cmd, &msg.buf[0] ); + } + } + + else if (FD_ISSET(ctrl.inotify_dir_fd, &instInv.message_readfds)) + { + dlog ("%s dir change\n", QEMU_CHANNEL_DIR ); + + guestUtil_inotify_events (ctrl.inotify_dir_fd); + } + + fflush (stdout); + fflush (stderr); + + instInv.guest_fsm_run ( ); + + if ( ctrl.timer.ring == true ) + { + /* restart the timer and try again if this call returns a RETRY */ + if ( send_to_guestAgent ( MTC_EVENT_MONITOR_READY, payload.data()) == RETRY ) + { + mtcTimer_start ( ctrl.timer, guestTimer_handler, 5 ); + } + ctrl.timer.ring = false ; + } + + daemon_signal_hdlr (); + + /* Support the log flush config option */ + if ( guest_config.flush ) + { + if ( ++flush_thld > guest_config.flush_thld ) + { + flush_thld = 0 ; + fflush (stdout); + fflush (stderr); + } + } + } + daemon_exit (); +} +/* Write the daemon /var/log/.dump */ +void daemon_dump_info ( void ) +{ + daemon_dump_membuf_banner (); + + instInv.print_node_info (); + instInv.memDumpAllState (); + + daemon_dump_membuf(); +} + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/*************************************************************************** + * * + * Module Test Head * + * * + ***************************************************************************/ + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + int rc = PASS; + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestStubs.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestStubs.cpp new file mode 100644 index 00000000..5a99dc4d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestStubs.cpp @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Nodal Health Check Agent Stubs + */ + +#include + +using namespace std; + +#include "nodeBase.h" +#include "nodeUtil.h" diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestSvrFsm.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrFsm.cpp new file mode 100644 index 00000000..6add43bb --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrFsm.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/*************************************************************************** + * + * @file + * Wind River CGTS Platform "Guest Services - Finite State Machine" + * + * + * This FSM handles the following actions + * + * FSM_ACTION__CONNECT + * + */ + +#include +#include + +using namespace std; + +#include "nodeBase.h" +#include "nodeTimers.h" +#include "guestBase.h" +#include "guestInstClass.h" +#include "guestSvrUtil.h" + +void guestInstClass::fsm_run ( void ) +{ + int rc = PASS ; + struct inst * inst_ptr = static_cast(NULL) ; + + if (( instances > 0 ) ) + { + /* get new messages */ + readInst(); + + for ( inst_ptr = inst_head ; inst_ptr != NULL ; inst_ptr = inst_ptr->next ) + { + if ( inst_ptr->message_list.size() ) + { + guestInstClass::message_handler ( inst_ptr ); + } + + if ( inst_ptr->action == FSM_ACTION__NONE ) + { + guestInstClass::monitor_handler ( inst_ptr ); + } + + else if ( inst_ptr->action == FSM_ACTION__CONNECT ) + { + rc = guestInstClass::connect_handler ( inst_ptr ); + if ( rc == RETRY ) + return ; + } + else + { + slog ("unknown action (%d) for instance %s\n", + inst_ptr->action, inst_ptr->instance.uuid.c_str()); + } + +#ifdef WANT_LOSS_FIT + if ( inst_ptr->heartbeat_count > 10 ) + { + mtcTimer_stop ( inst_ptr->monitor_timer ); + mtcWait_secs (1); + start_monitor_timer ( inst_ptr ); + inst_ptr->heartbeat_count = 0 ; + } +#endif + + /* exit if this happens to be the last one in the list */ + if (( inst_ptr->next == NULL ) || ( inst_ptr == inst_tail )) + break ; + } + } + else if ( inst_head != NULL ) + { + slog ("head pointer is not NULL while there are no instances (%p)\n", inst_head ); + } + + if ( search_timer.ring == true ) + { + guestUtil_channel_search (); + mtcTimer_start ( search_timer, guestTimer_handler, SEARCH_AUDIT_TIME ); + } + + /* Make this part of the connect FSM */ + manage_comm_loss ( ); +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestSvrHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrHdlr.cpp new file mode 100644 index 00000000..ed7ddd7c --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrHdlr.cpp @@ -0,0 +1,1435 @@ +/* + * Copyright (c) 2015-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/**************************************************************************** + * @file + * Wind River CGTS Platform Guest Services "Handlers" Implementation + * + * Description: This file contains the following FSM handlers, + + * Interfaces: + * + * guestInstClass::timer_handler + * guestInstClass::monitor_handler + * guestInstClass::connect_handler + * + ****************************************************************************/ + +#include +#include "nodeBase.h" +#include "nodeUtil.h" /* for ... clean_bm_response_files */ +#include "nodeTimers.h" /* for ... mtcTimer_start/stop */ +#include "jsonUtil.h" /* for ... jsonApi_array_value */ +#include "daemon_common.h" + +#include "guestBase.h" /* for ... */ +#include "guestUtil.h" /* for ... guestUtil_print_instance */ +#include "guestSvrUtil.h" /* for ... hb_get_message_type_name */ +#include "guestVirtio.h" /* for ... */ +#include "guestSvrMsg.h" /* for ... */ +#include "guestInstClass.h" /* for ... */ + +static int failure_reporting_count = 0 ; + +void voteStateChange ( instInfo * instInfo_ptr , hb_state_t newState ) +{ + if ( instInfo_ptr->vnState == newState ) + return ; + + clog ("%s '%s' -> '%s'\n", + log_prefix(instInfo_ptr).c_str(), + hb_get_state_name(instInfo_ptr->vnState), + hb_get_state_name(newState)); + + instInfo_ptr->vnState = newState ; +} + +void beatStateChange ( instInfo * instInfo_ptr , hb_state_t newState ) +{ + if ( instInfo_ptr->hbState == newState ) + return ; + + if ((( instInfo_ptr->hbState == hbs_server_waiting_challenge ) && + ( newState == hbs_server_waiting_response )) || + (( instInfo_ptr->hbState == hbs_server_waiting_response ) && + ( newState == hbs_server_waiting_challenge ))) + { + ; /* don't print heartbeat state changes */ + } + else if (( newState == hbs_server_waiting_init ) && + ( instInfo_ptr->hbState != hbs_server_waiting_init )) + { + ilog ("%s waiting for init ... \n", log_prefix(instInfo_ptr).c_str()); + } + else + { + clog ("%s '%s' -> '%s'\n", + log_prefix(instInfo_ptr).c_str(), + hb_get_state_name(instInfo_ptr->hbState), + hb_get_state_name(newState)); + } + instInfo_ptr->hbState = newState ; +} + +void hbStatusChange ( instInfo * instInfo_ptr, bool status ) +{ + if ( instInfo_ptr->heartbeating != status ) + { + instInfo_ptr->heartbeating = status ; + string payload = guestUtil_set_inst_info ( get_ctrl_ptr()->hostname , instInfo_ptr ); + + if ( status == true ) + { + ilog ("%s is now heartbeating\n", log_prefix(instInfo_ptr).c_str()); + send_to_guestAgent ( MTC_EVENT_HEARTBEAT_RUNNING, payload.data()); + } + else + { + ilog ("%s is not heartbeating\n", log_prefix(instInfo_ptr).c_str()); + send_to_guestAgent ( MTC_EVENT_HEARTBEAT_STOPPED, payload.data()); + } + + jlog ("%s Heartbeating State Change: %s\n", log_prefix(instInfo_ptr).c_str(), payload.c_str()); + } + else + { + clog ("%s heartbeating is still %s\n", + log_prefix(instInfo_ptr).c_str(), status ? "enabled" : "disabled" ); + } +} + + +void manage_heartbeat_failure ( instInfo * instInfo_ptr ) +{ + instInfo_ptr->heartbeat.failed = true ; + + dlog ("%s calling hbStatusChange false\n", log_prefix(instInfo_ptr).c_str()); + + hbStatusChange ( instInfo_ptr, false) ; /* heartbeating is now false */ + + beatStateChange ( instInfo_ptr, hbs_server_waiting_init ) ; +} + +/* Looks up the timer ID and asserts the corresponding node's ringer */ +void guestInstClass::timer_handler ( int sig, siginfo_t *si, void *uc) +{ + struct guestInstClass::inst * inst_ptr ; + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + + ctrl_type * ctrl_ptr = get_ctrl_ptr(); + + /* Avoid compiler errors/warnings for parms we must + * have but currently do nothing with */ + sig=sig ; uc = uc ; + + if ( !(*tid_ptr) ) + { + return ; + } + + else if ( *tid_ptr == search_timer.tid ) + { + mtcTimer_stop_int_safe ( search_timer ); + search_timer.ring = true ; + return ; + } + else if ( *tid_ptr == ctrl_ptr->timer.tid ) + { + mtcTimer_stop_int_safe ( ctrl_ptr->timer ); + ctrl_ptr->timer.ring = true ; + return ; + } + + for ( int timer_id = INST_TIMER_MONITOR ; timer_id < INST_TIMER_MAX ; timer_id++ ) + { + if ( ( inst_ptr = guestInstClass::getInst_timer ( *tid_ptr , timer_id ) ) != NULL ) + { + switch ( timer_id ) + { + case INST_TIMER_MONITOR: + { + if (( *tid_ptr == inst_ptr->monitor_timer.tid ) ) + { + mtcTimer_stop_int_safe ( inst_ptr->monitor_timer ); + inst_ptr->monitor_timer.ring = true ; + return ; + } + break ; + } + case INST_TIMER_CONNECT: + { + if (( *tid_ptr == inst_ptr->connect_timer.tid ) ) + { + mtcTimer_stop_int_safe ( inst_ptr->connect_timer ); + inst_ptr->connect_timer.ring = true ; + return ; + } + break ; + } + case INST_TIMER_RECONNECT: + { + if (( *tid_ptr == inst_ptr->reconnect_timer.tid ) ) + { + mtcTimer_stop_int_safe ( inst_ptr->reconnect_timer ); + inst_ptr->reconnect_timer.ring = true ; + + return ; + } + break ; + } + case INST_TIMER_INIT: + { + if (( *tid_ptr == inst_ptr->init_timer.tid ) ) + { + beatStateChange ( &inst_ptr->instance, hbs_server_waiting_init ) ; + mtcTimer_stop_int_safe ( inst_ptr->init_timer ); + return ; + } + break ; + } + case INST_TIMER_VOTE: + { + if (( *tid_ptr == inst_ptr->vote_timer.tid ) ) + { + mtcTimer_stop_int_safe ( inst_ptr->vote_timer ); + inst_ptr->vote_timer.ring = true ; + return ; + } + break ; + } + default: + { + // slog ("unknown timer id (%d)\n", timer_id); + } + } /* end switch */ + } /* end if */ + } /* end for */ +} + +/* guest services timer object wrapper + * - does a instance lookup and calls the timer handler */ +void guestTimer_handler ( int sig, siginfo_t *si, void *uc) +{ + get_instInv_ptr()->timer_handler ( sig, si, uc ); +} + +void guestInstClass::start_monitor_timer ( struct guestInstClass::inst * inst_ptr ) +{ + if ( inst_ptr->monitor_timer.tid ) + mtcTimer_stop ( inst_ptr->monitor_timer ); + + mtcTimer_start_sec_msec ( &inst_ptr->monitor_timer, + guestTimer_handler, + (inst_ptr->instance.heartbeat_interval_ms/1000), + (inst_ptr->instance.heartbeat_interval_ms%1000)); +} + +void _schedule_init_timer ( string event_type , struct mtc_timer & timer ) +{ + if (( !event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_SUSPEND) ) || + ( !event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_LIVE_MIGRATE_BEGIN) ) || + ( !event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_COLD_MIGRATE_BEGIN) ) || + ( !event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_REBOOT))) + { + if ( timer.tid ) + mtcTimer_stop ( timer ); + mtcTimer_start ( timer, guestTimer_handler, WAIT_FOR_INIT_TIMEOUT ); + ilog ("scheduling waiting_init transition in %d seconds\n", WAIT_FOR_INIT_TIMEOUT ); + } +} + +/* extend the reconnect time as the attempts pile up. max out at 1 minute. */ +void manage_reconnect_timeout ( instInfo * instInfo_ptr ) +{ + /* extend the reconnect time as the attempts pile up. max out at 1 minute. */ + if ( (instInfo_ptr->connect_wait_in_secs*2) > MTC_MINS_1 ) + instInfo_ptr->connect_wait_in_secs = MTC_MINS_1 ; + else + instInfo_ptr->connect_wait_in_secs *= 2 ; +} + +int connect_count = 0 ; +int guestInstClass::connect_handler ( struct guestInstClass::inst * inst_ptr ) +{ + int rc = PASS ; + switch ( inst_ptr->connectStage ) + { + case INST_CONNECT__START: + { + if ( inst_ptr->instance.connected == true ) + { + inst_ptr->connectStage = INST_CONNECT__START ; + inst_ptr->action = FSM_ACTION__NONE ; + + if (inst_ptr->connect_timer.tid) + mtcTimer_stop ( inst_ptr->connect_timer ); + } + else + { + ilog ("%s connect attempt in %d seconds\n", + log_prefix(&inst_ptr->instance).c_str(), inst_ptr->instance.connect_wait_in_secs); + inst_ptr->instance.connecting = true ; + mtcTimer_start ( inst_ptr->connect_timer, guestTimer_handler, inst_ptr->instance.connect_wait_in_secs ); + inst_ptr->connectStage = INST_CONNECT__WAIT ; + } + break ; + } + case INST_CONNECT__WAIT: + { + if ( inst_ptr->instance.connecting != true ) + { + slog ("%s bad connect wait state ; auto correcting\n", + log_prefix(&inst_ptr->instance).c_str()); + + inst_ptr->connectStage = INST_CONNECT__START ; + inst_ptr->action = FSM_ACTION__NONE ; + } + + else if ( inst_ptr->connect_timer.ring == true ) + { + char buf[PATH_MAX]; + + inst_ptr->connect_timer.ring = false ; + + /* if the socket is not there then don't try and connect to it */ + snprintf(buf, sizeof(buf), "%s/cgcs.heartbeat.%s.sock", QEMU_CHANNEL_DIR, inst_ptr->instance.uuid.data()); + if ( daemon_is_file_present ( buf ) ) + { + /* Try to connect with virtio_channel_connect ... + * If that succeeds then go DONE. + * if that fails with a ENOENT hen that means the socket fd is gone do close and delete instance + * otherwise retry the connect + */ + + ilog ( "%s connect start\n", log_prefix(&inst_ptr->instance).c_str()); + rc = virtio_channel_connect ( &inst_ptr->instance ); + if ( rc == PASS ) + { + inst_ptr->connectStage = INST_CONNECT__DONE ; + break ; + } + /* Abort connect if the instance channel is no longer there. + * -1 and errno=2 : No such file or directory) */ + else if (( rc == -1 ) && ( errno == ENOENT )) + { + ilog ("%s channel gone\n", log_prefix(&inst_ptr->instance).c_str() ); + del_inst ( inst_ptr->instance.uuid ); + return (RETRY); + } + else + { + wlog ("%s channel connect failed\n", + log_prefix(&inst_ptr->instance).c_str() ); + manage_reconnect_timeout ( &inst_ptr->instance ); + } + } + else + { + ilog ("%s does not exist\n", buf ); + manage_reconnect_timeout ( &inst_ptr->instance ); + } + inst_ptr->connectStage = INST_CONNECT__START ; + } + break ; + } + case INST_CONNECT__DONE: + { + + inst_ptr->connectStage = INST_CONNECT__START ; + inst_ptr->action = FSM_ACTION__NONE ; + + inst_ptr->instance.connecting = false ; + inst_ptr->instance.connected = true ; + + failure_reporting_count = 0 ; + + /* no longer failed */ + inst_ptr->instance.heartbeat.failed = false ; + inst_ptr->instance.heartbeat.b2b_misses = 0 ; + + /* waiting for init message */ + beatStateChange ( &inst_ptr->instance, hbs_server_waiting_init ) ; + + /* default back to the start 2 second reconnect time default */ + inst_ptr->instance.connect_wait_in_secs = DEFAULT_CONNECT_WAIT ; + + start_monitor_timer ( inst_ptr ); + + if ( inst_ptr->reconnect_timer.tid ) + mtcTimer_stop ( inst_ptr->reconnect_timer ); + mtcTimer_start ( inst_ptr->reconnect_timer, guestTimer_handler, HEARTBEAT_START_TIMEOUT ); + + ilog ("%s connect done\n", log_prefix(&inst_ptr->instance).c_str()); + + break ; + } + default: + { + slog ("Unsupported connect stage (%d) ... correcting\n", inst_ptr->connectStage ); + inst_ptr->connectStage = INST_CONNECT__START ; + } + } + return(rc); +} + +int guestInstClass::monitor_handler ( struct guestInstClass::inst * inst_ptr ) +{ + int rc = PASS ; + +#ifdef WANT_THIS + clog ("%s in '%s:%s' state - stage %d - R:%c F:%c H:%c\n", + log_prefix(&inst_ptr->instance).c_str(), + hb_get_state_name(inst_ptr->instance.hbState), + hb_get_state_name(inst_ptr->instance.vnState), + inst_ptr->monitorStage, + inst_ptr->instance.heartbeat.reporting ? 'Y' : 'n', + inst_ptr->instance.heartbeat.failed ? 'Y' : 'n', + inst_ptr->instance.heartbeating ? 'Y' : 'n'); + // inst_ptr->instance.heartbeat.waiting ? 'Y' : 'n'); +#endif + + switch ( inst_ptr->monitorStage ) + { + case INST_MONITOR__STEADY: + { + /* Manage Reconnect Timer */ + if ( inst_ptr->reconnect_timer.ring == true ) + { + inst_ptr->reconnect_timer.ring = false ; + if (( inst_ptr->instance.heartbeating == false ) && + ( inst_ptr->instance.connecting == false )) + { + /* If this timer rings and heartbeating is not started + * then we need to close the connection and repoen it + * Since the re-open is automatic all we need to do is + * close it here */ + wlog ("%s issuing auto-reconnect ; no heartbeating\n", + log_prefix(&inst_ptr->instance).c_str() ); + + reconnect_start ( inst_ptr->instance.uuid.data() ); + } + mtcTimer_start ( inst_ptr->reconnect_timer, guestTimer_handler, HEARTBEAT_START_TIMEOUT ); + } + + /* Manage Monitor Timer - expires in 3 cases + * 1. heartbeat miss - hbs_server_waiting_response + * 2. heartbeat done - hbs_server_waiting_challenge - interval is done and ready for the next one + * 3. heartbeat none - not heartbeating ; waiting for init + * 4. heratbeat fail - in wrong state + **/ + if ( inst_ptr->monitor_timer.ring == true ) + { + inst_ptr->monitor_timer.ring = false ; + + /* Case 1: heartbeat miss while waiting for heartbeat response */ + if ( inst_ptr->instance.hbState == hbs_server_waiting_response ) + { + int threshold = daemon_get_cfg_ptr()->hbs_failure_threshold ; + if (( inst_ptr->instance.heartbeat.failed == true ) || + ( inst_ptr->instance.heartbeat.reporting == false )) + { + hbStatusChange ( &inst_ptr->instance, false ); + beatStateChange ( &inst_ptr->instance, hbs_server_waiting_init) ; + } + else if ( ++inst_ptr->instance.heartbeat.b2b_misses > threshold ) + { + inst_ptr->instance.message_count = 0 ; + inst_ptr->instance.heartbeat.b2b_misses = 0 ; + + elog ("%s *** Heartbeat Loss *** (Timeout=%d msec)\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.heartbeat_interval_ms ); + + manage_heartbeat_failure ( &inst_ptr->instance ); + inst_ptr->monitorStage = INST_MONITOR__FAILURE ; + } + else + { + wlog ("%s *** Heartbeat Miss *** %d of %d (Timeout=%d msec)\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.heartbeat.b2b_misses, + threshold, + inst_ptr->instance.heartbeat_interval_ms ); + /* Send another challenge */ + send_challenge ( inst_ptr ) ; + } + } + + /* Case 2: Heartbeat done and the interval is expired. + * Just start another challenge request + */ + else if (( inst_ptr->instance.hbState != hbs_server_waiting_init ) && + ( inst_ptr->instance.hbState != hbs_server_waiting_response) && + ( inst_ptr->instance.heartbeat.waiting == false )) + { + // printf ("*"); + /* Send another challenge */ + inst_ptr->instance.heartbeat.b2b_misses = 0 ; + send_challenge ( inst_ptr ) ; + } + + /* Case 3: The monitor timer still runs while we are in the + * waiting for init state so just make sure we are + * handling init stuff + */ + else if ( inst_ptr->instance.hbState == hbs_server_waiting_init ) + { + clog ("%s is %s\n", log_prefix(&inst_ptr->instance).c_str(), + hb_get_state_name(inst_ptr->instance.hbState)); + inst_ptr->messageStage = INST_MESSAGE__RECEIVE ; + inst_ptr->instance.message_count = 0 ; + inst_ptr->instance.heartbeat.b2b_misses = 0 ; + } + + /* Case 4: Heratbeat has failed while we are in the wrong state */ + else + { + int threshold = daemon_get_cfg_ptr()->hbs_failure_threshold ; + if ( inst_ptr->instance.heartbeat.failed == true ) + { + ; /* nothing to do while failed */ + } + else if ( inst_ptr->instance.heartbeat.reporting == false ) + { + /* Send a challenge to keep the heartbeat going */ + send_challenge ( inst_ptr ) ; + } + else if ( ++inst_ptr->instance.heartbeat.b2b_misses > threshold ) + { + inst_ptr->instance.message_count = 0 ; + inst_ptr->instance.heartbeat.b2b_misses = 0 ; + + elog ("%s *** Heartbeat Loss *** (state:%s)\n", + log_prefix(&inst_ptr->instance).c_str(), + hb_get_state_name(inst_ptr->instance.hbState)); + + manage_heartbeat_failure ( &inst_ptr->instance ); + inst_ptr->monitorStage = INST_MONITOR__FAILURE ; + } + else + { + wlog ("%s *** Heartbeat Miss *** (state:%s)\n", + log_prefix(&inst_ptr->instance).c_str(), + hb_get_state_name(inst_ptr->instance.hbState)); + /* Send another challenge */ + send_challenge ( inst_ptr ) ; + } + } + } + + if ( inst_ptr->vote_timer.ring == true ) + { + if ( inst_ptr->instance.vnState == hbs_client_waiting_shutdown_response ) + { + // handle time out as silent agreement to accept + if ( !inst_ptr->instance.msg_type.compare(GUEST_HEARTBEAT_MSG_ACTION_NOTIFY) || + !inst_ptr->instance.msg_type.compare(GUEST_HEARTBEAT_MSG_ACTION_RESPONSE) ) + { + ilog ("%s response time out on '%s' message ; proceeding with action\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.msg_type.c_str()); + + string reject_reason = ""; + string vote_result = GUEST_HEARTBEAT_MSG_VOTE_RESULT_UNKNOWN; + if (!inst_ptr->instance.notification_type.compare(GUEST_HEARTBEAT_MSG_NOTIFY_REVOCABLE)) + { + vote_result = GUEST_HEARTBEAT_MSG_VOTE_RESULT_ACCEPT; + } + else if (!inst_ptr->instance.notification_type.compare(GUEST_HEARTBEAT_MSG_NOTIFY_IRREVOCABLE)) + { + vote_result = GUEST_HEARTBEAT_MSG_VOTE_RESULT_COMPLETE; + } + else + { + wlog ("%s Unexpected '%s' notify timeout ; proceeding with action\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.notification_type.c_str()); + } + send_vote_notify_resp (get_ctrl_ptr()->hostname, + inst_ptr->instance.uuid, + inst_ptr->instance.notification_type, + inst_ptr->instance.event_type, + vote_result, reject_reason); + } + + _schedule_init_timer ( inst_ptr->instance.event_type , + inst_ptr->init_timer) ; + + voteStateChange ( &inst_ptr->instance, hbs_server_waiting_init ); + } + inst_ptr->vote_timer.ring = false ; + } + break ; + } + case INST_MONITOR__DELAY: + { + if ( inst_ptr->monitor_timer.ring == true ) + { + inst_ptr->monitorStage = INST_MONITOR__FAILURE ; + } + break ; + } + case INST_MONITOR__FAILURE: + { + if ( get_instInv_ptr()->reporting == false ) + { + wlog_throttled (failure_reporting_count, 100, "host level reporting is disabled\n"); + } + else if ( inst_ptr->instance.heartbeat.reporting == false ) + { + wlog_throttled (failure_reporting_count, 100, "%s instance level reporting is disabled\n", + log_prefix(&inst_ptr->instance).c_str()); + } + else + { + inst_ptr->instance.heartbeat.failures++ ; + + wlog ("%s sending failure notification to guestAgent (failures:%d)\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.heartbeat.failures); + + string payload = "" ; + payload.append ("{\"hostname\":\""); + payload.append (get_ctrl_ptr()->hostname); + payload.append ("\",\"uuid\":\""); + payload.append (inst_ptr->instance.uuid); + payload.append ("\"}"); + + jlog1 ("%s Failure Event Payload: %s\n", + log_prefix(&inst_ptr->instance).c_str(), payload.c_str()); + + send_to_guestAgent ( MTC_EVENT_HEARTBEAT_LOSS , payload.data()); + failure_reporting_count = 0 ; + } + // inst_ptr->instance.heartbeat.failed = false ; + inst_ptr->monitorStage = INST_MONITOR__STEADY ; + + break ; + } + default: + { + inst_ptr->monitorStage = INST_MONITOR__STEADY ; + break ; + } + } + + /* This will try to reconnect failed channels */ + if (( !inst_ptr->instance.connected ) || + (( inst_ptr->instance.chan_fd > 0 ) && ( inst_ptr->instance.chan_ok != true ))) + { + if ( inst_ptr->action == FSM_ACTION__NONE ) + { + ilog ("%s enabling connect FSM\n", log_prefix(&inst_ptr->instance).c_str()); + + hbStatusChange ( &inst_ptr->instance, false) ; + + inst_ptr->connectStage = INST_CONNECT__START ; + inst_ptr->action = FSM_ACTION__CONNECT ; + } + else if ( inst_ptr->action != FSM_ACTION__CONNECT ) + { + wlog ("%s bypassing reconnect due to existing action (%d)\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->action); + } + } + + return (rc); +} + + +/***************************************************************************** + * + * Name : message_handler + * + * Purpose : Receive messages from the guest and trigger actions + * based on message content and type. + * + * Description: Only stage presently supported is INST_MESSAGE__RECEIVE + * for each connected socket. This FSM handler is not called + * unless there is a valid receive message to be handled. If + * for some reason there are no enqued messages then the FSM + * just returns having done thinting ; should not happen + * through. + * + * Currently supported message types are. + * + * GUEST_HEARTBEAT_MSG_INIT - vm heartbeat init message. + * > Action is to send an init_ack message to start heartbeating + * + * GUEST_HEARTBEAT_MSG_CHALLENGE_RESPONSE - a challenge response message + * > Action is to change state to 'hbs_server_waiting_challenge' + * and allow the heartbeat interval timer to expire in the + * monitor_handler which will then send another challenge + * request setting state back to 'hbs_server_waiting_response' + * + * Note: Unsupported messages are popped off the queue and discarded with + * an error log containing the message type. + * + *****************************************************************************/ +int guestInstClass::message_handler ( struct guestInstClass::inst * inst_ptr ) +{ + int rc = PASS ; + + switch ( inst_ptr->messageStage ) + { + case INST_MESSAGE__RECEIVE: + { + /* Only process if there are messages */ + if ( inst_ptr->message_list.size() ) + { + struct json_object *jobj_msg = inst_ptr->message_list.front(); + + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_VERSION, &inst_ptr->instance.version) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_VERSION); + break; + } + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_REVISION, &inst_ptr->instance.revision) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_REVISION); + break; + } + if (jsonUtil_get_string(jobj_msg, GUEST_HEARTBEAT_MSG_MSG_TYPE, &inst_ptr->instance.msg_type) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_MSG_TYPE); + break; + } + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_SEQUENCE, &inst_ptr->instance.sequence) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_SEQUENCE); + break; + } + + mlog1 ("%s:%s message - Seq:%x Ver:%d.%d Fd:%d\n", + inst_ptr->instance.uuid.c_str(), + inst_ptr->instance.msg_type.c_str(), + inst_ptr->instance.sequence , + inst_ptr->instance.version, inst_ptr->instance.revision, + inst_ptr->instance.chan_fd); + + if ( !inst_ptr->instance.msg_type.compare(GUEST_HEARTBEAT_MSG_CHALLENGE_RESPONSE) ) + { + if ( inst_ptr->instance.hbState == hbs_server_waiting_response ) + { + uint32_t heartbeat_response; + string heartbeat_health; + string corrective_action; + string log_msg; + + inst_ptr->instance.heartbeat.waiting = false ; + + if ( daemon_get_cfg_ptr()->debug_work ) + printf ("-"); + + inst_ptr->heartbeat_count++ ; + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_HEARTBEAT_RESPONSE, &heartbeat_response) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_HEARTBEAT_RESPONSE); + break; + } + if (jsonUtil_get_string(jobj_msg, GUEST_HEARTBEAT_MSG_HEARTBEAT_HEALTH, &heartbeat_health) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_HEARTBEAT_HEALTH); + break; + } + if (jsonUtil_get_string(jobj_msg, GUEST_HEARTBEAT_MSG_CORRECTIVE_ACTION, &corrective_action) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_CORRECTIVE_ACTION); + break; + } + if (jsonUtil_get_string(jobj_msg, GUEST_HEARTBEAT_MSG_LOG_MSG, &log_msg) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_LOG_MSG); + break; + } + + if ( heartbeat_response != inst_ptr->instance.heartbeat_challenge) + { + inst_ptr->instance.health_count = 0 ; + wlog_throttled (inst_ptr->mismatch_count, 100, "%s challenge secret mismatch (%d:%d) (throttle:100)\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.heartbeat_challenge, + heartbeat_response); + } + else if (!heartbeat_health.compare(GUEST_HEARTBEAT_MSG_HEALTHY)) + { + inst_ptr->mismatch_count = 0 ; + inst_ptr->instance.health_count = 0 ; + inst_ptr->instance.corrective_action_count = 0 ; + + mlog ("%s recv '%s' (seq:%x) (health:%s)\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.msg_type.c_str(), inst_ptr->instance.sequence, heartbeat_health.c_str()); + + /* lets wait for the period timer to expire before + * sending another in the monitor_handler */ + beatStateChange ( &inst_ptr->instance, hbs_server_waiting_challenge ) ; + + if ( inst_ptr->instance.heartbeating != true ) + { + hbStatusChange ( &inst_ptr->instance, true ); + } + + if (inst_ptr->instance.heartbeat.failed != false ) + { + inst_ptr->instance.heartbeat.failed = false ; + } + + ilog_throttled ( inst_ptr->instance.message_count, 1000, "%s is heartbeating ...(seq:%08x)\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.sequence ); + } + else + { + const char *msg = json_object_to_json_string_ext(jobj_msg, JSON_C_TO_STRING_PLAIN); + ilog ("%s received unhealthy response message: %s\n", + log_prefix(&inst_ptr->instance).c_str(), msg ); + + inst_ptr->mismatch_count = 0 ; + + /* lets wait for the period timer to expire before + * sending another in the monitor_handler */ + beatStateChange ( &inst_ptr->instance, hbs_server_waiting_challenge ) ; + + if ( inst_ptr->instance.health_count == 0 ) + { + if ( heartbeat_health.compare(GUEST_HEARTBEAT_MSG_UNHEALTHY) != 0 ) + { + wlog ("%s Invalid health reported (%s)\n", + log_prefix(&inst_ptr->instance).c_str(), + heartbeat_health.c_str() ); + } + + wlog_throttled ( inst_ptr->instance.health_count, 500, + "%s VM Unhealthy Message:\n", + log_prefix(&inst_ptr->instance).c_str()); + + wlog ("%s ... %s\n", log_prefix(&inst_ptr->instance).c_str(), + log_msg.c_str() ); + } + + inst_ptr->instance.unhealthy_corrective_action = corrective_action; + + if (!inst_ptr->instance.unhealthy_corrective_action.compare(GUEST_HEARTBEAT_MSG_ACTION_NONE) || + !inst_ptr->instance.unhealthy_corrective_action.compare(GUEST_HEARTBEAT_MSG_ACTION_UNKNOWN)) + { + wlog_throttled ( inst_ptr->instance.corrective_action_count, 500, + "%s corrective action is %s ; not reporting\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.unhealthy_corrective_action.c_str()); + } else { + inst_ptr->instance.unhealthy_failure = true ; + string payload = guestUtil_set_inst_info ( get_ctrl_ptr()->hostname , &inst_ptr->instance ); + inst_ptr->instance.unhealthy_failure = false ; + + ilog ("%s ill health notification\n", log_prefix(&inst_ptr->instance).c_str()); + send_to_guestAgent ( MTC_EVENT_HEARTBEAT_ILLHEALTH, payload.data()); + inst_ptr->instance.corrective_action_count = 0 ; + } + } + } + else if ( inst_ptr->instance.hbState == hbs_server_waiting_challenge ) + { + wlog ("%s received late '%s' response (seq:%x)\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.msg_type.c_str(), + inst_ptr->instance.sequence); + } + else + { + dlog ("%s recv '%s' while in '%s' state (seq:%x)\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.msg_type.c_str(), + hb_get_state_name(inst_ptr->instance.hbState), + inst_ptr->instance.sequence); + } + inst_ptr->message_list.pop_front(); + break ; + } + + else if ( !inst_ptr->instance.msg_type.compare(GUEST_HEARTBEAT_MSG_INIT) ) + { + const char *msg = json_object_to_json_string_ext(jobj_msg, JSON_C_TO_STRING_PLAIN); + ilog ("%s received init message: %s\n", + log_prefix(&inst_ptr->instance).c_str(), msg ); + + if (inst_ptr->instance.hbState != hbs_server_waiting_init) + { + wlog("%s unexpected 'init' message ; currState: '%s' (%d)\n", + log_prefix(&inst_ptr->instance).c_str(), + hb_get_state_name(inst_ptr->instance.hbState), + inst_ptr->instance.hbState ); + + /* Allow the heartbeat challenge response message log */ + inst_ptr->instance.message_count = 0 ; + beatStateChange ( &inst_ptr->instance, hbs_server_waiting_init ) ; + inst_ptr->message_list.pop_front(); + } + else + { + string instance_name; + string response; + + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_INVOCATION_ID, &inst_ptr->instance.invocation_id) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_INVOCATION_ID); + break; + } + + if (jsonUtil_get_string(jobj_msg, GUEST_HEARTBEAT_MSG_NAME, &instance_name) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_NAME); + break; + } + + if (jsonUtil_get_string(jobj_msg, GUEST_HEARTBEAT_MSG_CORRECTIVE_ACTION, &inst_ptr->instance.corrective_action) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_CORRECTIVE_ACTION); + break; + } + + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_HEARTBEAT_INTERVAL_MS, &inst_ptr->instance.heartbeat_interval_ms) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_HEARTBEAT_INTERVAL_MS); + break; + } + + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_VOTE_SECS, &inst_ptr->instance.vote_secs) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_VOTE_SECS); + break; + } + + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_SHUTDOWN_NOTICE_SECS, &inst_ptr->instance.shutdown_notice_secs) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_SHUTDOWN_NOTICE_SECS); + break; + } + + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_SUSPEND_NOTICE_SECS, &inst_ptr->instance.suspend_notice_secs) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_SUSPEND_NOTICE_SECS); + break; + } + + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_RESUME_NOTICE_SECS, &inst_ptr->instance.resume_notice_secs) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_RESUME_NOTICE_SECS); + break; + } + + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_RESTART_SECS, &inst_ptr->instance.restart_secs) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_RESTART_SECS); + break; + } + + inst_ptr->instance.name = instance_name; + + /* Override the unused 'inst' name with an abbreviated version of the instance uuid + * cgcs.heartbeat.1f0bc3e3-efbe-48b8-9688-4821fc0ff83c.sock + * + * */ + if ( inst_ptr->instance.uuid.length() >= (24+12) ) + inst_ptr->instance.inst = inst_ptr->instance.uuid.substr(24,12); + + string name = log_prefix(&inst_ptr->instance).c_str() ; + + ilog ("%s 'init' message ; sending 'init_ack' (ver:%d.%d)\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.version, + inst_ptr->instance.revision ); + + inst_ptr->instance.heartbeat_challenge = rand(); + + + /* Set the unhealthy corrective action to unknown by default */ + inst_ptr->instance.unhealthy_corrective_action = GUEST_HEARTBEAT_MSG_ACTION_UNKNOWN ; + + ilog ("%s corrective_action = %s\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.corrective_action.c_str() ); + + ilog ("%s Interval : %4d msec\n",name.c_str(), inst_ptr->instance.heartbeat_interval_ms); + + /* auto correct an interval that is too small */ + if ( inst_ptr->instance.heartbeat_interval_ms < (uint32_t)daemon_get_cfg_ptr()->hbs_pulse_period ) + { + wlog ("%s cannot have an interval of zero seconds\n", + log_prefix(&inst_ptr->instance).c_str()); + + wlog ("%s ... auto correcting to %d msecs\n", + log_prefix(&inst_ptr->instance).c_str(), + daemon_get_cfg_ptr()->hbs_pulse_period); + + inst_ptr->instance.heartbeat_interval_ms = daemon_get_cfg_ptr()->hbs_pulse_period ; + } + + ilog ("%s Vote TO : %4d secs\n",name.c_str(), inst_ptr->instance.vote_secs); + inst_ptr->instance.vote_to_str = time_in_secs_to_str(inst_ptr->instance.vote_secs) ; + + ilog ("%s Shutdown : %4d secs\n", name.c_str(), inst_ptr->instance.shutdown_notice_secs); + inst_ptr->instance.shutdown_to_str = time_in_secs_to_str (inst_ptr->instance.shutdown_notice_secs); + + ilog ("%s Suspend : %4d secs\n", name.c_str(), inst_ptr->instance.suspend_notice_secs); + inst_ptr->instance.suspend_to_str = time_in_secs_to_str (inst_ptr->instance.suspend_notice_secs); + + ilog ("%s Resume : %4d secs\n", name.c_str(), inst_ptr->instance.resume_notice_secs); + inst_ptr->instance.resume_to_str = time_in_secs_to_str (inst_ptr->instance.resume_notice_secs); + + ilog ("%s Restart : %4d secs\n", name.c_str(), inst_ptr->instance.restart_secs); + inst_ptr->instance.restart_to_str = time_in_secs_to_str(inst_ptr->instance.restart_secs); + + /* cancel the init timer since we already got the init */ + if ( inst_ptr->init_timer.tid ) + mtcTimer_stop ( inst_ptr->init_timer ) ; + + /************************************************************* + * + * Send INIT ACK right away followed by the first Challenge. + * + * Cannot allow the FSM to run or we might see a + * race condition with another INIT messages that come after. + * + *************************************************************/ + response = guestSvrMsg_hdr_init(inst_ptr->instance.uuid , GUEST_HEARTBEAT_MSG_INIT_ACK); + response.append ("\""); + response.append (GUEST_HEARTBEAT_MSG_INVOCATION_ID); + response.append ("\":"); + response.append (int_to_string(inst_ptr->instance.invocation_id)); + response.append ("}\n"); + + inst_ptr->instance.message_count = 0 ; + + /* Send message to the vm through the libvirt channel */ + ilog("%s sending 'init_ack' invocation_id:%d, msg: %s\n", name.c_str(), + inst_ptr->instance.invocation_id, response.c_str()); + + get_instInv_ptr()->write_inst (&inst_ptr->instance, response.c_str(), response.length()); + + /* Send a challenge right away */ + beatStateChange ( &inst_ptr->instance, hbs_server_waiting_response ) ; + inst_ptr->instance.heartbeat.b2b_misses = 0 ; + inst_ptr->instance.heartbeat.failed = false ; + send_challenge ( inst_ptr ) ; + + inst_ptr->messageStage = INST_MESSAGE__RECEIVE ; + } + } + else if ( !inst_ptr->instance.msg_type.compare(GUEST_HEARTBEAT_MSG_ACTION_RESPONSE) ) + { + uint32_t invocation_id; + const char *msg = json_object_to_json_string_ext(jobj_msg, JSON_C_TO_STRING_PLAIN); + ilog ("%s received action response message: %s\n", + log_prefix(&inst_ptr->instance).c_str(), msg ); + + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_INVOCATION_ID, &invocation_id) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_INVOCATION_ID); + break; + } + + if ( invocation_id != inst_ptr->instance.invocation_id ) + { + wlog ("%s invocation id mismatch (%x:%x) - dropping response\n", + log_prefix(&inst_ptr->instance).c_str(), + invocation_id, + inst_ptr->instance.invocation_id ); + string log_err = "Invocation id mismatch. Received: "; + log_err.append(int_to_string(invocation_id)); + log_err.append(" expect: "); + log_err.append(int_to_string(inst_ptr->instance.invocation_id)); + send_client_msg_nack(&inst_ptr->instance, log_err); + } + else + { + string event_type; + string notification_type; + string vote_result; + string reject_reason; + + if(jsonUtil_get_string(jobj_msg, GUEST_HEARTBEAT_MSG_EVENT_TYPE, &event_type) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_EVENT_TYPE); + break; + } + if (jsonUtil_get_string(jobj_msg, GUEST_HEARTBEAT_MSG_NOTIFICATION_TYPE, ¬ification_type) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_NOTIFICATION_TYPE); + break; + } + if (jsonUtil_get_string(jobj_msg, GUEST_HEARTBEAT_MSG_VOTE_RESULT, &vote_result) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_VOTE_RESULT); + break; + } + if (jsonUtil_get_string(jobj_msg, GUEST_HEARTBEAT_MSG_LOG_MSG, &reject_reason) != PASS) + { + handle_parse_failure(inst_ptr, GUEST_HEARTBEAT_MSG_LOG_MSG); + break; + } + + + send_vote_notify_resp (get_ctrl_ptr()->hostname, + inst_ptr->instance.uuid, + notification_type, + event_type, + vote_result, + reject_reason); + + inst_ptr->monitorStage = INST_MONITOR__STEADY ; + + _schedule_init_timer ( event_type , inst_ptr->init_timer ); + + // if pause-accept or pause-complete) + if (!event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_PAUSE) && + (!vote_result.compare(GUEST_HEARTBEAT_MSG_VOTE_RESULT_ACCEPT) || + !vote_result.compare(GUEST_HEARTBEAT_MSG_VOTE_RESULT_COMPLETE)) ) + { + beatStateChange ( &inst_ptr->instance, hbs_server_waiting_init ) ; + } + voteStateChange ( &inst_ptr->instance, hbs_server_waiting_init ) ; + + // cancel the vote timer + if ( inst_ptr->vote_timer.tid ) + mtcTimer_stop ( inst_ptr->vote_timer ); + inst_ptr->vote_timer.ring = false ; + } + /* Delete the message */ + inst_ptr->message_list.pop_front(); + } + else if ( !inst_ptr->instance.msg_type.compare(GUEST_HEARTBEAT_MSG_EXIT) ) + { + const char *msg = json_object_to_json_string_ext(jobj_msg, JSON_C_TO_STRING_PLAIN); + ilog ("%s received client exit request: %s\n", + log_prefix(&inst_ptr->instance).c_str(), msg ); + + /* Prevent a heartbeat loss in the case of a graceful exit + * by moving into the waiting_init state */ + beatStateChange ( &inst_ptr->instance, hbs_server_waiting_init ) ; + + hbStatusChange ( &inst_ptr->instance, false ); + + inst_ptr->message_list.pop_front(); + } + else + { + elog ("%s unsupported message type: %s.\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.msg_type.c_str()); + + string log_err = "unsupported message type: "; + log_err.append(inst_ptr->instance.msg_type); + send_client_msg_nack(&inst_ptr->instance, log_err); + + /* Delete the message */ + inst_ptr->message_list.pop_front(); + } + } + + /* Global case break */ + break ; + } + + default: + { + elog ("Unsupported stage (%d)\n", inst_ptr->messageStage ); + } + } + return (rc) ; +} + +/************************************************************************************* + * + * Name : send_challenge + * + * Description: Transmit a heartbeat challenge to he specified VM + * and start the timoeut timer. + * + **************************************************************************************/ +int guestInstClass::send_challenge ( struct guestInstClass::inst * inst_ptr ) +{ + size_t bytes_sent ; + + string message = guestSvrMsg_hdr_init(inst_ptr->instance.uuid , GUEST_HEARTBEAT_MSG_CHALLENGE); + + beatStateChange ( &inst_ptr->instance, hbs_server_waiting_response ); + + inst_ptr->instance.heartbeat_challenge = rand(); + + message.append ("\""); + message.append (GUEST_HEARTBEAT_MSG_HEARTBEAT_CHALLENGE); + message.append ("\":"); + message.append (int_to_string(inst_ptr->instance.heartbeat_challenge)); + message.append ("}\n"); + + /* Send message to the vm through the libvirt channel */ + bytes_sent = write_inst (&inst_ptr->instance, message.c_str(), message.length()); + + /* The write_inst will report an error log. + * This one is only to report a partial message send. + */ + if (( bytes_sent > 0) && ( bytes_sent != message.length())) + { + wlog ("%s only sent %ld of %ld bytes\n", + log_prefix(&inst_ptr->instance).c_str(), + bytes_sent, message.length() ); + } + + /* Clear the message queue and wait for the challenge response */ + inst_ptr->message_list.clear (); + + /* Waiting on a response now */ + inst_ptr->instance.heartbeat.waiting = true ; + + start_monitor_timer ( inst_ptr ) ; + + if ( daemon_get_cfg_ptr()->debug_work ) + printf ("_"); + + return (PASS); +} + +/************************************************************************************* + * + * Name : send_vote_notify + * + * Description: Send a voting or notification message to GuestClient on VM + * and start the timeout timer. + * + **************************************************************************************/ +int guestInstClass::send_vote_notify ( string uuid ) +{ + struct guestInstClass::inst * inst_ptr = getInst(uuid); + size_t bytes_sent ; + uint32_t timeout_ms; + + string message = guestSvrMsg_hdr_init(inst_ptr->instance.uuid , GUEST_HEARTBEAT_MSG_ACTION_NOTIFY); + + voteStateChange ( &inst_ptr->instance, hbs_client_waiting_shutdown_response ); + + + if ( !inst_ptr->instance.notification_type.compare(GUEST_HEARTBEAT_MSG_NOTIFY_REVOCABLE) ) + { + timeout_ms = inst_ptr->instance.vote_secs * 1000; + } + else + { + timeout_ms = inst_ptr->instance.vote_secs ; + if (!inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_STOP) || + !inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_REBOOT)) + { + timeout_ms = inst_ptr->instance.shutdown_notice_secs * 1000 ; + + } else if (!inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_SUSPEND) || + !inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_PAUSE) || + !inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_RESIZE_BEGIN) || + !inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_LIVE_MIGRATE_BEGIN) || + !inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_COLD_MIGRATE_BEGIN) ) { + timeout_ms = inst_ptr->instance.suspend_notice_secs * 1000 ; + + } else if (!inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_UNPAUSE) || + !inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_RESUME) || + !inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_RESIZE_END) || + !inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_LIVE_MIGRATE_END) || + !inst_ptr->instance.event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_COLD_MIGRATE_END) ) { + timeout_ms = inst_ptr->instance.resume_notice_secs * 1000 ; + } else { + wlog ("%s unsupported event type (%s) defaulting to 'vote' timeout of %d secs\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.event_type.c_str(), + inst_ptr->instance.vote_secs); + } + } + + dlog ("%s event_type:%s notification_type:%s invocation_id:%d timeout_ms:%d\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.event_type.c_str(), + inst_ptr->instance.notification_type.c_str(), + inst_ptr->instance.invocation_id, + timeout_ms); + + message.append ("\""); + message.append (GUEST_HEARTBEAT_MSG_INVOCATION_ID); + message.append ("\":"); + message.append (int_to_string(inst_ptr->instance.invocation_id)); + message.append (",\""); + message.append (GUEST_HEARTBEAT_MSG_EVENT_TYPE); + message.append ("\":\""); + message.append (inst_ptr->instance.event_type); + message.append ("\",\""); + message.append (GUEST_HEARTBEAT_MSG_NOTIFICATION_TYPE); + message.append ("\":\""); + message.append (inst_ptr->instance.notification_type); + message.append ("\",\""); + message.append (GUEST_HEARTBEAT_MSG_TIMEOUT_MS); + message.append ("\":"); + message.append (int_to_string(timeout_ms)); + message.append ("}\n"); + + ilog("%s send_vote_notify message=%s\n", + log_prefix(&inst_ptr->instance).c_str(), message.c_str()); + + /* Send message to the vm through the libvirt channel */ + bytes_sent = write_inst (&inst_ptr->instance, message.c_str(), message.length()); + if ( bytes_sent != message.length() ) + { + wlog ("%s only sent %ld of %ld bytes\n", inst_ptr->instance.inst.c_str(), + bytes_sent, message.length() ); + } + + /* Clear the message queue and wait for the vote response */ + inst_ptr->message_list.clear (); + + if ( inst_ptr->vote_timer.tid ) + mtcTimer_stop ( inst_ptr->vote_timer ); + mtcTimer_start ( inst_ptr->vote_timer, guestTimer_handler, inst_ptr->instance.vote_secs ); + + dlog("%s timer started for %d seconds\n", + log_prefix(&inst_ptr->instance).c_str(), + inst_ptr->instance.vote_secs); + + return (PASS); +} + +/************************************************************************************* + * + * Name : send_vote_notify_resp + * + * Description: Send response for voting or notification to GuestAgent + * + **************************************************************************************/ +int guestInstClass::send_vote_notify_resp ( char * hostname, string uuid, + string notification_type, + string event_type, + string vote_result, + string reject_reason) +{ + instInfo * instInfo_ptr = get_inst ( uuid ); + + if ( !instInfo_ptr ) + { + elog ("%s is unknown\n", uuid.c_str()); + return FAIL; + } + + if (!vote_result.compare(GUEST_HEARTBEAT_MSG_VOTE_RESULT_ACCEPT) || + !vote_result.compare(GUEST_HEARTBEAT_MSG_VOTE_RESULT_COMPLETE)) + { + // accept + ilog ("%s '%s' '%s' '%s'\n", + log_prefix(instInfo_ptr).c_str(), + notification_type.c_str(), + event_type.c_str(), + vote_result.c_str()); + + if (!vote_result.compare(GUEST_HEARTBEAT_MSG_VOTE_RESULT_COMPLETE) && + !event_type.compare(GUEST_HEARTBEAT_MSG_EVENT_SUSPEND)) + { + instInfo_ptr->connected = false ; + hbStatusChange ( instInfo_ptr , false ); + } + } else if (!vote_result.compare(GUEST_HEARTBEAT_MSG_VOTE_RESULT_REJECT)) { + ilog ("%s '%s' '%s' '%s' reason: %s\n", + log_prefix(instInfo_ptr).c_str(), + notification_type.c_str(), + event_type.c_str(), + vote_result.c_str(), + reject_reason.c_str()); + } else if (!vote_result.compare(GUEST_HEARTBEAT_MSG_VOTE_RESULT_TIMEOUT)) { + ilog ("%s '%s' '%s' '%s'\n", + log_prefix(instInfo_ptr).c_str(), + notification_type.c_str(), + event_type.c_str(), + vote_result.c_str()); + } else if (!vote_result.compare(GUEST_HEARTBEAT_MSG_VOTE_RESULT_ERROR)) { + elog ("%s vote to '%s' returned error: %s\n", + log_prefix(instInfo_ptr).c_str(), + event_type.c_str(), + vote_result.c_str()); + } else { + elog ("%s vote to '%s' unknown vote response %s\n", + log_prefix(instInfo_ptr).c_str(), + event_type.c_str(), + vote_result.c_str()); + } + + string payload = "" ; + payload.append ("{\"hostname\":\""); + payload.append (hostname); + payload.append ("\", \"uuid\": \""); + payload.append (uuid.c_str()); + payload.append ("\", \"notification_type\": \""); + payload.append (notification_type); + payload.append ("\", \"event-type\": \""); + payload.append (event_type); + payload.append ("\", \"vote\": \""); + payload.append (vote_result); + payload.append ("\", \"reason\": \""); + payload.append (reject_reason); + payload.append ("\"}"); + + jlog ("%s Notification Event Payload: %s\n", log_prefix(instInfo_ptr).c_str(), payload.c_str()); + + send_to_guestAgent ( MTC_EVENT_VOTE_NOTIFY , payload.data()); + + return (PASS); +} + +/************************************************************************************* + * + * Name : send_client_msg_nack + * + * Description: Send failure response to GuestClient when fail to process the client message + * + **************************************************************************************/ +void guestInstClass::send_client_msg_nack ( instInfo * instInfo_ptr, + string log_err) +{ + size_t bytes_sent ; + + string message = guestSvrMsg_hdr_init(instInfo_ptr->uuid , GUEST_HEARTBEAT_MSG_NACK); + + message.append ("\""); + message.append (GUEST_HEARTBEAT_MSG_INVOCATION_ID); + message.append ("\":"); + message.append (int_to_string(instInfo_ptr->invocation_id)); + message.append (",\""); + message.append (GUEST_HEARTBEAT_MSG_LOG_MSG); + message.append ("\":\""); + message.append (log_err.c_str()); + message.append ("\"}\n"); + + ilog("%s send_client_msg_nack message=%s\n", + log_prefix(instInfo_ptr).c_str(), message.c_str()); + + /* Send message to the vm through the libvirt channel */ + bytes_sent = write_inst (instInfo_ptr, message.c_str(), message.length()); + if ( bytes_sent != message.length() ) + { + wlog ("%s only sent %ld of %ld bytes\n", instInfo_ptr->inst.c_str(), + bytes_sent, message.length() ); + } +} + +/************************************************************************************* + * + * Name : handle_parse_failure + * + * Description: Handle JSON parse failure + * + **************************************************************************************/ +void guestInstClass::handle_parse_failure ( struct guestInstClass::inst * inst_ptr, + const char *key) +{ + string log_err = "failed to parse "; + log_err.append(key); + elog("%s %s\n", log_prefix(&inst_ptr->instance).c_str(), log_err.c_str()); + send_client_msg_nack(&inst_ptr->instance, log_err); + inst_ptr->message_list.pop_front(); +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestSvrMsg.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrMsg.cpp new file mode 100644 index 00000000..65286f8d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrMsg.cpp @@ -0,0 +1,701 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Guest Heartbeat Server Daemon on Compute + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for hostent */ +#include +#include +#include +#include +#include +#include +#include /* for close and usleep */ +#include /* for realtime scheduling api */ +#include + +using namespace std; + +#include "nodeBase.h" +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ +#include "nodeUtil.h" /* for ... common utilities */ +#include "jsonUtil.h" /* for ... jason utilities */ +#include "nodeTimers.h" /* for ... maintenance timers */ +#include "nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ +#include "nodeEvent.h" /* for ... set_inotify_watch, set_inotify_close */ +#include "guestBase.h" +#include "guestInstClass.h" /* for ... guestUtil_inst_init */ +#include "guestUtil.h" /* for ... guestUtil_inst_init */ +#include "guestSvrUtil.h" /* for ... hb_get_message_type_name */ +#include "guestSvrMsg.h" /* for ... this module header */ + +extern void hbStatusChange ( instInfo * instInfo_ptr, bool status ); +extern void beatStateChange ( instInfo * instInfo_ptr , hb_state_t newState ); + +/***************************************************************************** + * + * Name : guestSvrMsg_hdr_init + * + * Purpose: Initialize the message header. Example output: + * {"version":2,"revision":1,"msg_type":"init","sequence":29, + * The rest of the message should be appended to it. + * + *****************************************************************************/ + +string guestSvrMsg_hdr_init (string channel, string msg_type) +{ + instInfo * instInfo_ptr = get_instInv_ptr()->get_inst (channel); + + string msg = "\n{\""; + msg.append(GUEST_HEARTBEAT_MSG_VERSION); + msg.append("\":"); + msg.append(int_to_string(GUEST_HEARTBEAT_MSG_VERSION_CURRENT)); + msg.append(",\""); + msg.append(GUEST_HEARTBEAT_MSG_REVISION); + msg.append("\":"); + msg.append(int_to_string(GUEST_HEARTBEAT_MSG_REVISION_CURRENT)); + msg.append(",\""); + msg.append(GUEST_HEARTBEAT_MSG_MSG_TYPE); + msg.append("\":\""); + msg.append(msg_type); + msg.append("\",\""); + msg.append(GUEST_HEARTBEAT_MSG_SEQUENCE); + msg.append("\":"); + msg.append(int_to_string(++(instInfo_ptr->sequence))); + msg.append(","); + + // store msg_type in instance structure so that it is available to handle timeout + instInfo_ptr->msg_type = msg_type; + return msg; +} + +/** + * Manages the fault reporting state + * - returns current reporting state + * */ +bool manage_reporting_state ( instInfo * instInfo_ptr, string state) +{ + if (!state.compare("enabled")) + { + if ( instInfo_ptr->heartbeat.reporting == false ) + { + ilog ("%s heartbeat reporting '%s' by guestAgent\n", + log_prefix(instInfo_ptr).c_str(), + state.c_str()); + + instInfo_ptr->heartbeat.reporting = true ; + instInfo_ptr->message_count = 0 ; + } + } + else + { + if ( instInfo_ptr->heartbeat.reporting == true ) + { + ilog ("%s heartbeat reporting '%s' by guestAgent\n", + log_prefix(instInfo_ptr).c_str(), + state.c_str()); + + instInfo_ptr->heartbeat.reporting = false ; + instInfo_ptr->message_count = 0 ; + hbStatusChange ( instInfo_ptr, false) ; /* heartbeating is now false */ + beatStateChange ( instInfo_ptr, hbs_server_waiting_init ) ; + } + } + + return instInfo_ptr->heartbeat.reporting ; +} + +/***************************************************************************** + * + * Name : guestAgent_qry_handler + * + * Purpose: Loop over all the instances and return their uuid, hostname, + * reporting state, heartbneating status and timeout values. + * + * { "hostname":"compute-1", "instances": [{"uuid":"","heartbeat":"", status":"}, timeouts ...]} + * + *****************************************************************************/ +int guestInstClass::guestAgent_qry_handler ( void ) +{ + int rc = PASS ; + + /* check for empty list condition */ + if ( inst_head ) + { + struct inst * inst_ptr = static_cast(NULL) ; + for ( inst_ptr = inst_head ; ; inst_ptr = inst_ptr->next ) + { + string payload = guestUtil_set_inst_info ( get_ctrl_ptr()->hostname , &inst_ptr->instance ); + jlog ("%s Query Instance Response:%ld:%s\n", + log_prefix(&inst_ptr->instance).c_str(), + payload.size(), + payload.c_str() ); + + if (( rc=send_to_guestAgent ( MTC_CMD_QRY_INST, payload.data())) != PASS ) + { + wlog ("%s failed to send query instance response to guestAgent\n", + log_prefix(&inst_ptr->instance).c_str()); + } + + /* Deal with exit case */ + if (( inst_ptr->next == NULL ) || ( inst_ptr == inst_tail )) + { + break ; + } + } + } + return (rc); +} + +/***************************************************************************** + * + * Name : recv_from_guestAgent + * + * Purpose: Handle guestAgent commands + * + * MTC_EVENT_LOOPBACK + * MTC_CMD_QRY_INST + * MTC_CMD_DEL_INST + * MTC_CMD_MOD_INST + * MTC_CMD_ADD_INST + * MTC_CMD_MOD_HOST + * + * ***************************************************************************/ +int recv_from_guestAgent ( unsigned int cmd, char * buf_ptr ) +{ + int rc = PASS ; + + mlog1 ("Cmd:%x - %s\n", cmd, buf_ptr); + + if ( cmd == MTC_EVENT_LOOPBACK ) + { + /* TODO: Send message back */ + return (rc) ; + } + else if ( cmd == MTC_CMD_QRY_INST ) + { + if ( ( rc = get_instInv_ptr()->qry_inst ()) != PASS ) + { + elog ("failed to send hosts instance info\n"); + } + return (rc) ; + } + else if ( cmd == MTC_CMD_VOTE_INST + || cmd == MTC_CMD_NOTIFY_INST ) + { + string source; + string uuid; + string event; + + rc = FAIL_KEY_VALUE_PARSE ; /* default to parse error */ + + if (( rc = jsonUtil_get_key_val ( buf_ptr, "source", source )) != PASS) + { + elog ("failed to extract 'source' (cmd:%x %s)\n", cmd , buf_ptr ); + } + else if (( rc = jsonUtil_get_key_val ( buf_ptr, "uuid", uuid )) != PASS) + { + elog ("failed to extract 'uuid' (cmd:%x %s)\n", cmd , buf_ptr); + } + else if (( rc = jsonUtil_get_key_val ( buf_ptr, "event", event )) != PASS) + { + elog ("failed to extract 'event' key (cmd:%x %s)\n", cmd , buf_ptr); + } + else + { + + // send message to guest Client + instInfo * instInfo_ptr = get_instInv_ptr()->get_inst(uuid); + if ( instInfo_ptr ) + { + /* If this is a resume then we need to reconnect to the channel */ + if ( !event.compare(GUEST_HEARTBEAT_MSG_EVENT_RESUME) ) + { + /* issue a reconnect if we are not connected the hartbeating has not started */ + if (( instInfo_ptr->connected == false ) || + ( instInfo_ptr->heartbeating == false )) + { + // instInfo_ptr->connect_wait_in_secs = 10 ; + get_instInv_ptr()->reconnect_start ( instInfo_ptr->uuid.data() ); + } + } + + instInfo_ptr->event_type = event; + if (MTC_CMD_VOTE_INST == cmd) + { + // for voting + instInfo_ptr->notification_type = GUEST_HEARTBEAT_MSG_NOTIFY_REVOCABLE ; + + ilog ("%s sending revocable '%s' vote\n", + log_prefix(instInfo_ptr).c_str(), + event.c_str()); + } + else + { + // for notification + instInfo_ptr->notification_type = GUEST_HEARTBEAT_MSG_NOTIFY_IRREVOCABLE ; + + ilog ("%s sending irrevocable '%s' notify\n", + log_prefix(instInfo_ptr).c_str(), + event.c_str()); + } + get_instInv_ptr()->send_vote_notify(uuid) ; + rc = PASS ; + } + else + { + wlog ("%s is unknown\n", uuid.c_str()); + } + } + } + else + { + string source ; + string uuid ; + string service ; + string state ; + + rc = FAIL_KEY_VALUE_PARSE ; /* default to parse error */ + + if (( rc = jsonUtil_get_key_val ( buf_ptr, "source", source )) != PASS) + { + elog ("failed to extract 'source' (cmd:%x %s)\n", cmd , buf_ptr ); + } + else if (( rc = jsonUtil_get_key_val ( buf_ptr, "uuid", uuid )) != PASS) + { + elog ("failed to extract 'uuid' (cmd:%x %s)\n", cmd , buf_ptr); + } + else if (( rc = jsonUtil_get_key_val ( buf_ptr, "service", service )) != PASS) + { + elog ("failed to extract 'service' key (cmd:%x %s)\n", cmd , buf_ptr); + } + else if (( rc = jsonUtil_get_key_val ( buf_ptr, "state", state )) != PASS) + { + elog ("failed to extract 'state' (cmd:%x %s)\n", cmd , buf_ptr ); + } + else + { + rc = RETRY ; + switch ( cmd ) + { + case MTC_CMD_DEL_INST: + { + ilog ("%s delete\n", uuid.c_str()); + + if ( get_instInv_ptr()->del_inst( uuid ) == PASS ) + { + rc = PASS ; + } + else + { + dlog ("%s delete failed ; uuid lookup\n", uuid.c_str()); + rc = FAIL_NOT_FOUND ; + } + if (daemon_get_cfg_ptr()->debug_level ) + get_instInv_ptr()->print_instances (); + break ; + } + case MTC_CMD_ADD_INST: + case MTC_CMD_MOD_INST: + { + instInfo * instInfo_ptr = get_instInv_ptr()->get_inst ( uuid ); + if ( instInfo_ptr ) + { + manage_reporting_state ( instInfo_ptr, state ); + rc = PASS ; + } + + /* if true then the current channel was not found and we need to add it */ + if ( rc == RETRY ) + { + instInfo instance ; + guestUtil_inst_init (&instance); + + instance.uuid = uuid ; + ilog ("%s add with %s reporting %s\n", + uuid.c_str(), + service.c_str(), + state.c_str()); + + get_instInv_ptr()->add_inst ( uuid, instance ); + + manage_reporting_state ( &instance, state ); + } + if (daemon_get_cfg_ptr()->debug_level ) + get_instInv_ptr()->print_instances(); + + break ; + } + case MTC_CMD_MOD_HOST: + { + guestInstClass * obj_ptr = get_instInv_ptr() ; + string reporting_state = "" ; + rc = jsonUtil_get_key_val ( buf_ptr, "heartbeat", reporting_state ) ; + if ( rc != PASS) + { + elog ("failed to extract heartbeat reporting state (rc=%d)\n", rc ); + wlog ("... disabling 'heartbeat' fault reporting due to error\n"); + obj_ptr->reporting = false ; + rc = FAIL_JSON_PARSE ; + } + else if ( !reporting_state.compare("enabled") ) + { + ilog ("Enabling host level 'heartbeat' fault reporting\n"); + obj_ptr->reporting = true ; + } + else + { + ilog ("Disabling host level 'heartbeat' fault reporting\n"); + obj_ptr->reporting = false ; + } + break ; + } + default: + { + elog ("unsupported command (%x)\n", cmd ); + } + } + } + } + return (rc); +} + +/**************************************************************************** + * + * Name : send_to_guestAgent + * + * Purpose : Send a command and buffer to the guestAgent + * + * Description: If the guestAgent IP is not known the message is dropped + * and a retry is returned. Otherwise the supplied message is + * sent to the guestAgent running on the controller. + * + * **************************************************************************/ +int send_to_guestAgent ( unsigned int cmd, const char * buf_ptr ) +{ + int bytes = 0; + + ctrl_type * ctrl_ptr = get_ctrl_ptr () ; + + int rc = PASS ; + mtc_message_type mtc_cmd ; + memset (&mtc_cmd,0,sizeof(mtc_message_type)); + + memcpy ( &mtc_cmd.buf[0], buf_ptr, strlen(buf_ptr)); + bytes = sizeof(mtc_message_type) ; + + if ( ctrl_ptr->address_peer.empty()) + { + mlog2 ("controller address unknown ; dropping message (%x:%s)", cmd , buf_ptr ); + return RETRY ; + } + + mlog1 ("Sending: %s:%d Cmd:%x:%s\n", ctrl_ptr->address_peer.c_str(), ctrl_ptr->sock.agent_rx_port, cmd, buf_ptr ); + + mtc_cmd.cmd = cmd ; + + /* rc = message size */ + rc = ctrl_ptr->sock.server_tx_sock->write((char *)&mtc_cmd, bytes,ctrl_ptr->address_peer.c_str()); + + if ( 0 > rc ) + { + elog("failed to send (%d:%m)\n", errno ); + rc = FAIL_SOCKET_SENDTO ; + } + else + { + mlog1 ("Transmit to %14s port %d\n", + ctrl_ptr->address_peer.c_str(), + ctrl_ptr->sock.server_tx_sock->get_dst_addr()->getPort()); + print_mtc_message ( &mtc_cmd ); + rc = PASS ; + } + + return (rc); +} + +/********************************************************************************* + * + * Name : write_inst (guestInstClass::public) + * + * Purpose: Send a message to the specified VM instance. + * + *********************************************************************************/ +ssize_t guestInstClass::write_inst ( instInfo * instInfo_ptr, + const char * message, + size_t size) +{ + string name = log_prefix(instInfo_ptr); + + errno = 0 ; + size_t len = write ( instInfo_ptr->chan_fd, message, size ); + if ( len != size ) + { + if ( errno ) + { + wlog_throttled ( instInfo_ptr->failure_count, 100, + "%s failed to send '%s' (seq:%x) (%d:%m)\n", name.c_str(), + instInfo_ptr->msg_type.c_str(), + instInfo_ptr->sequence, errno ); + + if ( errno == EPIPE ) + { + instInfo_ptr->connected = false ; + + instInfo_ptr->connect_wait_in_secs = DEFAULT_CONNECT_WAIT ; + get_instInv_ptr()->reconnect_start ( instInfo_ptr->uuid.data() ); + } + + len = 0 ; + } + else + { + wlog_throttled ( instInfo_ptr->failure_count, 100, + "%s send '%s' (seq:%x) (len:%ld)\n", name.c_str(), + instInfo_ptr->msg_type.c_str(), + instInfo_ptr->sequence, len); + } + } + else + { + instInfo_ptr->failure_count = 0 ; + mlog("%s send '%s' (seq:%x)\n", name.c_str(), + instInfo_ptr->msg_type.c_str(), + instInfo_ptr->sequence ); + } + return (len); +} + + +/********************************************************************************* + * + * Name : readInst (guestInstClass::private) + * + * Purpose : try to receive a single message from all instances. + * + * Description: Each received message is enqueued into the associated + * instance's message queue. + * + *********************************************************************************/ + +int fail_count = 0 ; +void guestInstClass::readInst ( void ) +{ + int rc ; + std::list socks ; + + waitd.tv_sec = 0; + waitd.tv_usec = GUEST_SOCKET_TO; + + struct json_object *jobj_msg = NULL; + + /* Initialize the master fd_set */ + FD_ZERO(&instance_readfds); + + socks.clear(); + + for ( struct inst * inst_ptr = inst_head ; inst_ptr != NULL ; inst_ptr = inst_ptr->next ) + { + if ( inst_ptr->instance.connected ) + { + socks.push_front( inst_ptr->instance.chan_fd ); + FD_SET(inst_ptr->instance.chan_fd, &instance_readfds); + } + if (( inst_ptr->next == NULL ) || ( inst_ptr == inst_tail )) + break ; + } + + /* if there are no connected instance channels then exit */ + if ( socks.empty() ) + { + return ; + } + + /* Call select() and wait only up to SOCKET_WAIT */ + socks.sort(); + rc = select( socks.back()+1, &instance_readfds, NULL, NULL, &waitd); + + if (( rc <= 0 ) || ( rc > (int)socks.size())) + { + /* Check to see if the select call failed. */ + if ( rc > (int)socks.size()) + { + wlog_throttled ( fail_count, 100, "select return exceeds current file descriptors (%ld:%d)\n", + socks.size(), rc ); + } + /* ... but filter Interrupt signal */ + else if (( rc < 0 ) && ( errno != EINTR )) + { + wlog_throttled ( fail_count, 100, "socket select failed (%d:%m)\n", errno); + } + else + { + mlog3 ("nothing received from %ld instances; socket timeout (%d:%m)\n", socks.size(), errno ); + } + } + else + { + fail_count = 0 ; + mlog2 ("trying to receive for %ld instances\n", socks.size()); + + /* Search through all the instances for watched channels */ + for ( struct inst * inst_ptr = inst_head ; inst_ptr != NULL ; inst_ptr = inst_ptr->next ) + { + mlog1 ("%s monitoring %d\n", inst_ptr->instance.inst.c_str(), + inst_ptr->instance.chan_fd ); + + /* Service guestServer messages towards the local IP */ + if (FD_ISSET(inst_ptr->instance.chan_fd, &instance_readfds) ) + { + bool message_present ; + int count ; + string last_message_type ; + char vm_message[GUEST_HEARTBEAT_MSG_MAX_MSG_SIZE] ; + string name ; + + if( inst_ptr->instance.inst.empty() ) + name = inst_ptr->instance.uuid ; + else + name = inst_ptr->instance.inst ; + + count = 0 ; + last_message_type = GUEST_HEARTBEAT_MSG_INIT_ACK ; + + do + { + message_present = false ; + rc = read ( inst_ptr->instance.chan_fd, vm_message, GUEST_HEARTBEAT_MSG_MAX_MSG_SIZE); + mlog3 ("%s read channel: bytes:%d, fd:%d\n", name.c_str(), rc,inst_ptr->instance.chan_fd ); + if ( rc < 0 ) + { + if ( errno == EINTR ) + { + wlog_throttled ( inst_ptr->instance.failure_count, 100, "%s EINTR\n", name.c_str()); + } + else if ( errno == ECONNRESET ) + { + wlog ("%s connection reset ... closing\n", name.c_str()); + + /* Close the connection if we get a 'connection reset by peer' errno */ + guestUtil_close_channel ( &inst_ptr->instance ); + + /* An element of the list is removed - need to break out */ + } + else if ( errno != EAGAIN ) + { + wlog_throttled ( inst_ptr->instance.failure_count, 100, "%s error (%d:%m)\n", name.c_str(), errno ); + } + else + { + mlog3 ("%s no more messages\n", name.c_str()); + } + break ; + } + else if ( rc == 0 ) + { + mlog3 ("%s no message\n" , name.c_str()); + break ; + } + else + { + if ( rc < GUEST_HEARTBEAT_MSG_MIN_MSG_SIZE ) + { + wlog_throttled ( inst_ptr->instance.failure_count, 100, + "%s message size %d is smaller than minimal %d; dropping\n", + name.c_str(), rc, GUEST_HEARTBEAT_MSG_MIN_MSG_SIZE); + } + else if ( inst_ptr->message_list.size() > MAX_MESSAGES ) + { + wlog_throttled ( inst_ptr->instance.failure_count, 100, + "%s message queue overflow (max:%d) ; dropping\n", + name.c_str(), MAX_MESSAGES ); + } + else + { + inst_ptr->instance.failure_count = 0 ; + jobj_msg = json_tokener_parse(vm_message); + int version; + string msg_type; + string log_err = "failed to parse "; + guestInstClass * obj_ptr = get_instInv_ptr(); + + //parse incoming msg + if (jobj_msg == NULL) + { + wlog("failed to parse msg\n"); + continue; + } + + if (jsonUtil_get_int(jobj_msg, GUEST_HEARTBEAT_MSG_VERSION, &version) != PASS) + { + // fail to parse the version + log_err.append(GUEST_HEARTBEAT_MSG_VERSION); + elog("%s\n", log_err.c_str()); + obj_ptr->send_client_msg_nack(&inst_ptr->instance, log_err); + json_object_put(jobj_msg); + continue; + } + + if ( version != GUEST_HEARTBEAT_MSG_VERSION_CURRENT) + { + char log_err_str[100]; + sprintf(log_err_str, "Bad version: %d, expect version: %d", + version, GUEST_HEARTBEAT_MSG_VERSION_CURRENT); + elog("%s\n", log_err_str); + log_err = log_err_str; + obj_ptr->send_client_msg_nack(&inst_ptr->instance, log_err); + json_object_put(jobj_msg); + continue; + } + + message_present = true ; + if (jsonUtil_get_string(jobj_msg, GUEST_HEARTBEAT_MSG_MSG_TYPE, &msg_type) != PASS) + { + // fail to parse the msg_type + log_err.append(GUEST_HEARTBEAT_MSG_MSG_TYPE); + elog("%s\n", log_err.c_str()); + obj_ptr->send_client_msg_nack(&inst_ptr->instance, log_err); + json_object_put(jobj_msg); + continue; + } + + mlog2 ("%s '%s' message\n", name.c_str(), msg_type.c_str()); + + /* Try and purge out old init messages */ + if (!msg_type.compare(GUEST_HEARTBEAT_MSG_INIT) && + !msg_type.compare(last_message_type) ) + { + inst_ptr->message_list.pop_back(); + ilog ("%s deleting stale init message\n", name.c_str()); + } + /* Enqueue the message to its instance message list */ + inst_ptr->message_list.push_back(jobj_msg); + last_message_type = msg_type ; + } + } + } while ( ( message_present == true ) && ( ++count<10 ) ) ; + } + if (( inst_ptr->next == NULL ) || ( inst_ptr == inst_tail )) + break ; + } + } +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestSvrMsg.h b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrMsg.h new file mode 100644 index 00000000..9e61246d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrMsg.h @@ -0,0 +1,27 @@ +#ifndef __INCLUDE_GUESTSVRMSG_H__ +#define __INCLUDE_GUESTSVRMSG_H__ + +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Guest Services "Messaging" Header + */ + +#include "guestBase.h" +#include "guestInstClass.h" /* for ... */ + +/* Send a command and buffer to the guestAgent */ +int send_to_guestAgent ( unsigned int cmd, + const char * buf_ptr ); + +int recv_from_guestAgent ( unsigned int cmd, char * buf_ptr ); + +string guestSvrMsg_hdr_init (string channel, string msg_type); + +#endif /* __INCLUDE_GUESTSVRMSG_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestSvrUtil.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrUtil.cpp new file mode 100644 index 00000000..4ab15676 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrUtil.cpp @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "nodeBase.h" /* for ... */ +#include "nodeEvent.h" /* for ... inotify_event_queue_type and utils */ +#include "nodeTimers.h" /* maintenance timer utilities start/stop */ + +#include "guestInstClass.h" +#include "guestUtil.h" /* for ... guestUtil_inst_init */ +#include "guestSvrUtil.h" /* for ... this module header */ +#include "guestVirtio.h" /* for ... virtio_check_filename, + virtio_channel_add */ + +/***************************************************************************** + * + * Name : guestUtil_close_channel + * + * Purpose: Close the specified channel's virtio channel file descriptor. + * + ******************************************************************************/ +int guestUtil_close_channel ( instInfo * instInfo_ptr ) +{ + int rc = FAIL_NOT_FOUND ; + if ( instInfo_ptr ) + { + /* Free up the inotify watch */ + if ( instInfo_ptr->inotify_file_fd ) + { + dlog ("%s freeing inotify resource\n", log_prefix(instInfo_ptr).c_str() ); + set_inotify_close (instInfo_ptr->inotify_file_fd , + instInfo_ptr->inotify_file_wd ); + } + + if ( instInfo_ptr->chan_fd ) + { + dlog ("%s closing socket %d\n", + log_prefix(instInfo_ptr).c_str(), + instInfo_ptr->chan_fd ); + + close ( instInfo_ptr->chan_fd ); + instInfo_ptr->chan_fd = 0 ; + } + instInfo_ptr->chan_ok = false ; + instInfo_ptr->heartbeating = false ; + instInfo_ptr->connected = false ; + rc = PASS ; + } + return (rc); +} + +/***************************************************************************** + * + * Name : guestUtil_load_channels + * + * Purpose: Scan the Virtio Qemu directory looking for heartbeat channels + * into guests. + * + * Load those that are found into the control structure + * and setup messaging to them. + * + ******************************************************************************/ +void guestUtil_load_channels ( void ) +{ + DIR *dirp; + struct dirent entry; + struct dirent *result; + + dirp = opendir(QEMU_CHANNEL_DIR); + if (!dirp) + { + elog("failed to open %s directory (%d:%m)\n", QEMU_CHANNEL_DIR, errno); + } + else + { + dlog ("Searching %s directory\n", QEMU_CHANNEL_DIR); + while(0 == readdir_r(dirp, &entry, &result)) + { + if (!result) + break; + + if ( virtio_check_filename (result->d_name) ) + { + string channel = result->d_name ; + ilog ("%s found\n", channel.c_str() ); + if ( virtio_channel_add ( result->d_name ) == PASS ) + { + if ( virtio_channel_connect ( channel ) != PASS ) + { + string uuid = virtio_instance_name ( result->d_name ) ; + get_instInv_ptr()->reconnect_start ( uuid.data() ); + } + } + } + else + { + dlog3 ("ignoring file %s\n", result->d_name); + } + } + closedir(dirp); + } +} + +/***************************************************************************** + * + * Name : guestUtil_channel_search + * + * Purpose: Scan the Virtio Qemu directory looking for heartbeat channels + * into guests that are not currently provisioned. + * + ******************************************************************************/ +void guestUtil_channel_search ( void ) +{ + DIR *dirp; + struct dirent entry; + struct dirent *result; + + dirp = opendir(QEMU_CHANNEL_DIR); + if (!dirp) + { + elog("failed to open %s directory (%d:%m)\n", QEMU_CHANNEL_DIR, errno); + } + else + { + dlog ("Searching %s directory\n", QEMU_CHANNEL_DIR); + while(0 == readdir_r(dirp, &entry, &result)) + { + if (!result) + break; + + if ( virtio_check_filename (result->d_name) ) + { + if ( get_instInv_ptr()->get_inst ( virtio_instance_name (result->d_name).data()) == NULL ) + { + string channel = result->d_name ; + ilog ("found %s\n", channel.c_str() ); + virtio_channel_add ( result->d_name ); + virtio_channel_connect ( channel ); + } + } + } + closedir(dirp); + } +} + +/***************************************************************************** + * + * Name : guestUtil_inotify_events + * + * Purpose: Handle inotify events for the specified file descriptor. + * + *****************************************************************************/ +int guestUtil_inotify_events ( int fd ) +{ + string channel = "" ; + inotify_event_queue_type event_queue ; + int num = get_inotify_events ( fd , event_queue ) ; + + dlog3 ("inotify events queued: %d\n", num ); + + for ( int i = 0 ; i < num ; i++ ) + { + dlog2 ( "Event:%s for file:%s\n", get_inotify_event_str(event_queue.item[i].event), event_queue.item[i].name ); + + if ( event_queue.item[i].event == IN_CREATE ) + { + dlog1 ("%s CREATE event on %s\n", event_queue.item[i].name, QEMU_CHANNEL_DIR ); + if ( virtio_check_filename (&event_queue.item[i].name[0]) ) + { + dlog ("%s CREATE accepted\n", event_queue.item[i].name ); + channel = event_queue.item[i].name ; + if ( virtio_channel_add ( event_queue.item[i].name ) != PASS ) + { + elog ("%s failed to add detected channel\n", event_queue.item[i].name ); + } + } + } + else if ( event_queue.item[i].event == IN_DELETE ) + { + dlog1 ("%s DELETE event on %s\n", event_queue.item[i].name, QEMU_CHANNEL_DIR ); + if ( virtio_check_filename (&event_queue.item[i].name[0]) ) + { + dlog ("%s DELETE accepted\n", event_queue.item[i].name ); + channel = event_queue.item[i].name ; + get_instInv_ptr()->del_inst ( channel ); + } + else + { + dlog ("%s DELETE rejected\n", event_queue.item[i].name ); + } + } + else if ( event_queue.item[i].event == IN_MODIFY ) + { + dlog1 ("%s MODIFY event on %s\n", event_queue.item[i].name, QEMU_CHANNEL_DIR ); + if ( virtio_check_filename (&event_queue.item[i].name[0]) ) + { + dlog ("%s MODIFY accepted\n", event_queue.item[i].name ); + channel = event_queue.item[i].name ; + + /* if the channel was modified then we need + * + * 1. to close the channel, + * 2. delete it, + * 3. re-add it and + * 4. then repoen it. + * */ + get_instInv_ptr()->del_inst ( channel ); + + if ( virtio_channel_add ( event_queue.item[i].name ) != PASS ) + { + elog ("%s failed to re-add modified channel\n", channel.c_str()); + } + } + } + else + { + wlog ("%s UNKNOWN event on %s\n", event_queue.item[i].name, QEMU_CHANNEL_DIR ); + } + } + return (PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestSvrUtil.h b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrUtil.h new file mode 100644 index 00000000..01b79c24 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestSvrUtil.h @@ -0,0 +1,18 @@ +#ifndef __INCLUDE_GUESTSVRUTIL_H__ +#define __INCLUDE_GUESTSVRUTIL_H__ + +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include "guestBase.h" /* for ... instInfo */ + +int guestUtil_close_channel ( instInfo * instInfo_ptr ); +void guestUtil_load_channels ( void ); +int guestUtil_inotify_events ( int fd ); +void guestUtil_channel_search ( void ) ; + +#endif /* __INCLUDE_GUESTSVRUTIL_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestUtil.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestUtil.cpp new file mode 100644 index 00000000..6b72fc03 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestUtil.cpp @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include +#include +#include +using namespace std; + +#include "guestBase.h" +#include "guestUtil.h" +#include "guestClass.h" +#include "jsonUtil.h" + +#define MAX_NUM_LEN 64 +string time_in_secs_to_str ( time_t secs ) +{ + char int_str[MAX_NUM_LEN] ; + string temp ; + memset ( &int_str[0], 0, MAX_NUM_LEN ); + sprintf ( &int_str[0], "%ld" , secs ); + temp = int_str ; + return (temp); +} + +/***************************************************************************** + * + * Name : guestUtil_inst_init + * + * Purpose: Init the specified instance + * + *****************************************************************************/ +void guestUtil_inst_init ( instInfo * instance_ptr ) +{ + instance_ptr->uuid.clear(); /* Not used in the server */ + + instance_ptr->inotify_file_fd = 0 ; + instance_ptr->inotify_file_wd = 0 ; + + instance_ptr->chan_fd = 0 ; + instance_ptr->chan_ok = false ; + instance_ptr->connected = false ; /* Assume we have not connected to this channel */ + instance_ptr->heartbeating = false ; + + instance_ptr->heartbeat.provisioned = false ; + instance_ptr->heartbeat.reporting = false ; + instance_ptr->heartbeat.failures = 0 ; + + instance_ptr->heartbeat.state.clear() ; + + instance_ptr->hbState = hbs_server_waiting_init ; + instance_ptr->vnState = hbs_server_waiting_init ; + + instance_ptr->connect_count = 0 ; + instance_ptr->connect_retry_count = 0 ; + instance_ptr->select_count = 0 ; + instance_ptr->message_count = 0 ; + instance_ptr->health_count = 0 ; + instance_ptr->failure_count = 0 ; + instance_ptr->corrective_action_count = 0 ; + + instance_ptr->unhealthy_failure = false ; + + instance_ptr->heartbeat_interval_ms = HB_DEFAULT_INTERVAL_MS; + + instance_ptr->vote_secs = HB_DEFAULT_VOTE_MS/1000; + instance_ptr->vote_to_str = time_in_secs_to_str (instance_ptr->vote_secs); + + instance_ptr->shutdown_notice_secs = HB_DEFAULT_SHUTDOWN_MS/1000; + instance_ptr->shutdown_to_str = time_in_secs_to_str (instance_ptr->shutdown_notice_secs); + + instance_ptr->suspend_notice_secs = HB_DEFAULT_SUSPEND_MS/1000; + instance_ptr->suspend_to_str = time_in_secs_to_str (instance_ptr->suspend_notice_secs); + + instance_ptr->resume_notice_secs = HB_DEFAULT_RESUME_MS/1000; + instance_ptr->resume_to_str = time_in_secs_to_str (instance_ptr->resume_notice_secs); + + instance_ptr->restart_secs = HB_DEFAULT_RESTART_MS/1000; + instance_ptr->restart_to_str = time_in_secs_to_str(instance_ptr->restart_secs); + + instance_ptr->notification_type = GUEST_HEARTBEAT_MSG_NOTIFY_IRREVOCABLE ; + instance_ptr->event_type = GUEST_HEARTBEAT_MSG_EVENT_RESUME ; + + instance_ptr->corrective_action = GUEST_HEARTBEAT_MSG_ACTION_LOG ; + + instance_ptr->unhealthy_corrective_action = GUEST_HEARTBEAT_MSG_ACTION_UNKNOWN ; +} + +/***************************************************************************** + * + * Name : guestUtil_print_instance + * + * Purpose: Print a summary of the instances that are currently provisioned + * + *****************************************************************************/ +void guestUtil_print_instance ( instInfo * instInfo_ptr ) +{ + ilog ("%s Heartbeat: Prov-%c Reporting-%c Failures:%d\n", + instInfo_ptr->uuid.c_str(), + instInfo_ptr->heartbeat.provisioned ? 'Y':'n' , + instInfo_ptr->heartbeat.reporting ? 'Y':'n', + instInfo_ptr->heartbeat.failures); +} + +/***************************************************************************** + * + * Name : guestUtil_print_instances + * + * Purpose: Print a summary of the instances that are currently provisioned + * + *****************************************************************************/ +void guestUtil_print_instances ( ctrl_type * ctrl_ptr ) +{ + bool found = false ; + int i = 1 ; + + for ( ctrl_ptr->instance_list_ptr = ctrl_ptr->instance_list.begin(); + ctrl_ptr->instance_list_ptr != ctrl_ptr->instance_list.end(); + ctrl_ptr->instance_list_ptr++ ) + { + guestUtil_print_instance ( &(*ctrl_ptr->instance_list_ptr) ); + found = true ; + i++ ; + } + + if ( found == false ) + { + ilog ("no heartbeat channels provisioned\n"); + } +} + +string log_prefix ( instInfo * instInfo_ptr ) +{ + string prefix = "unknown" ; + + if ( instInfo_ptr ) + { + if ( instInfo_ptr->name.length() ) + { + if ( instInfo_ptr->name_log_prefix.empty() ) + { + instInfo_ptr->name_log_prefix = instInfo_ptr->inst ; + instInfo_ptr->name_log_prefix.append (" "); + instInfo_ptr->name_log_prefix.append (instInfo_ptr->name); + } + prefix = instInfo_ptr->name_log_prefix ; + } + else + { + if ( instInfo_ptr->uuid_log_prefix.empty() ) + { + instInfo_ptr->uuid_log_prefix = instInfo_ptr->uuid ; + } + prefix = instInfo_ptr->uuid_log_prefix ; + } + } + return (prefix); +} + +string guestUtil_set_inst_info ( string hostname , instInfo * instInfo_ptr ) +{ + /* Send one message per instance */ + string payload ("{\"hostname\":\""); + payload.append (hostname); + payload.append ("\",\"uuid\":\""); + payload.append (instInfo_ptr->uuid); + + /* Share the reporting state */ + payload.append ("\",\"reporting\":"); + if ( instInfo_ptr->heartbeat.reporting == true ) + payload.append ("\"enabled"); + else + payload.append ("\"disabled"); + + /* Share the heartbeating state */ + payload.append ("\",\"heartbeating\":"); + if ( instInfo_ptr->heartbeating == true ) + payload.append ("\"enabled"); + else + payload.append ("\"disabled"); + + payload.append ("\",\"repair-action\":\"" ); + if ( instInfo_ptr->unhealthy_failure == true ) + { + payload.append (instInfo_ptr->unhealthy_corrective_action); + } + else + { + payload.append (instInfo_ptr->corrective_action); + } + /* Add the restart timeout to the message */ + payload.append ("\",\"restart-to\":\""); + payload.append (instInfo_ptr->restart_to_str); + payload.append ("\",\"shutdown-to\":\""); + payload.append (instInfo_ptr->shutdown_to_str); + payload.append ("\",\"suspend-to\":\""); + payload.append (instInfo_ptr->suspend_to_str); + payload.append ("\",\"resume-to\":\""); + payload.append (instInfo_ptr->resume_to_str); + payload.append ("\",\"vote-to\":\""); + payload.append (instInfo_ptr->vote_to_str); + payload.append ("\""); + payload.append ("}"); + + jlog ("Payload: %s\n", payload.c_str()); + + return (payload); +} + +int guestUtil_get_inst_info ( string hostname, instInfo * instInfo_ptr, char * buf_ptr ) +{ + int rc = PASS ; + + string hostname_str = "" ; + string uuid = "" ; + string state = "" ; + string status = "" ; + string restart_to = "" ; + string resume_to = "" ; + string suspend_to = "" ; + string shutdown_to = "" ; + string vote_to = "" ; + string repair_str = "" ; + + if ( !buf_ptr ) + { + elog ( "null buffer\n" ); + return ( FAIL_NULL_POINTER ); + } + + jlog ("Payload: %s\n", buf_ptr ); + + int rc0 = jsonUtil_get_key_val ( buf_ptr, "hostname", hostname_str) ; + int rc1 = jsonUtil_get_key_val ( buf_ptr, "uuid", uuid ) ; + int rc2 = jsonUtil_get_key_val ( buf_ptr, "reporting", state ) ; + int rc3 = jsonUtil_get_key_val ( buf_ptr, "heartbeating", status ) ; + int rc4 = jsonUtil_get_key_val ( buf_ptr, "restart-to", restart_to ) ; + int rc5 = jsonUtil_get_key_val ( buf_ptr, "resume-to", resume_to ) ; + int rc6 = jsonUtil_get_key_val ( buf_ptr, "suspend-to", suspend_to ) ; + int rc7 = jsonUtil_get_key_val ( buf_ptr, "shutdown-to", shutdown_to ) ; + int rc8 = jsonUtil_get_key_val ( buf_ptr, "vote-to", vote_to ) ; + int rc9= jsonUtil_get_key_val ( buf_ptr, "repair-action",repair_str ) ; + if ( rc0 | rc1 | rc2 | rc3 | rc4 | rc5 | rc6 | rc7 | rc8 | rc9 ) + { + elog ("%s failed parse one or more key values (%d:%d:%d:%d:%d:%d:%d:%d:%d:%d)\n", + hostname.c_str(), rc0, rc1, rc2, rc3, rc4, rc5, rc6, rc7, rc8, rc9); + + rc = FAIL_KEY_VALUE_PARSE ; + } + else + { + if ( hostname.compare(hostname_str) ) + { + wlog ("%s hostname mismatch - loaded\n", hostname_str.c_str()); + } + + if ( instInfo_ptr ) + { + /* Update the reporting state */ + if ( !state.compare("enabled") ) + instInfo_ptr->heartbeat.reporting = true ; + else + instInfo_ptr->heartbeat.reporting = false ; + + /* update the heartbeating status */ + if ( !status.compare("enabled") ) + instInfo_ptr->heartbeating = true ; + else + instInfo_ptr->heartbeating = false ; + + instInfo_ptr->corrective_action = repair_str ; + + /* Update the intance timeout values */ + instInfo_ptr->restart_to_str = restart_to ; + instInfo_ptr->shutdown_to_str = shutdown_to ; + instInfo_ptr->resume_to_str = resume_to ; + instInfo_ptr->suspend_to_str = suspend_to ; + instInfo_ptr->vote_to_str = vote_to ; + } + else + { + wlog ("%s %s lookup failed\n", hostname.c_str(), uuid.c_str()); + rc = FAIL_INVALID_UUID ; + } + } + return (rc); +} + +const char* state_names[] = + { + "invalid", + "server_waiting_init", + "server_waiting_challenge", + "server_waiting_response", + "server_paused", + "server_nova_paused", + "server_migrating", + "server_corrective_action", + "client_waiting_init_ack", + "client_waiting_challenge", + "client_waiting_pause_ack", + "client_waiting_resume_ack", + "client_paused", + "client_waiting_shutdown_ack", + "client_waiting_shutdown_response", + "client_shutdown_response_recieved", + "client_exiting", + }; + +const char* hb_get_state_name ( hb_state_t s ) +{ + if (s >= hbs_state_max) + return "???"; + + return state_names[s]; +} + +/***************************************************************************** + * Convert integer to string + *****************************************************************************/ +string int_to_string(int number) +{ + ostringstream ostr; + ostr << number; + return ostr.str(); +} + diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestUtil.h b/mtce-common/cgts-mtce-common-1.0/guest/guestUtil.h new file mode 100644 index 00000000..f1926f53 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestUtil.h @@ -0,0 +1,34 @@ +#ifndef __INCLUDE_GUESTUTIL_H__ +#define __INCLUDE_GUESTUTIL_H__ + +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include "guestBase.h" /* for ... instInfo */ + +void guestUtil_inst_init ( instInfo * instance_ptr ); +void guestUtil_print_instances ( ctrl_type * ctrl_ptr ); +void guestUtil_print_instance ( instInfo * instInfo_ptr ); + +/* called in guestAgent */ +int guestUtil_get_inst_info ( string hostname, instInfo * instInfo_ptr, char * buf_ptr ); + +/* called in guestServer */ +string guestUtil_set_inst_info ( string hostname, instInfo * instInfo_ptr ); + + + +string log_prefix ( instInfo * instInfo_ptr ); +string time_in_secs_to_str ( time_t secs ); + +const char* hb_get_corrective_action_name( uint32_t a) ; // heartbeat_corrective_action_t a); +const char* hb_get_state_name (hb_state_t s); +// Convert integer to string +string int_to_string(int number); + + +#endif /* __INCLUDE_GUESTUTIL_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestVimApi.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestVimApi.cpp new file mode 100644 index 00000000..369ff9e3 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestVimApi.cpp @@ -0,0 +1,777 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file Wind River CGTS Platform Guest Heartbeat REST API + * used to report heartbeat faults or query instance + * information from the VIM. + * + */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "vim" + +using namespace std; + +#include "nodeBase.h" /* for ... common definitions */ +#include "nodeUtil.h" /* for ... common utilities */ +#include "jsonUtil.h" /* for ... jsonUtil_get_key_val */ + +#include "guestUtil.h" /* for ... guestUtil_inst_init */ +#include "guestSvrUtil.h" /* for ... hb_get_corrective_action_name */ +#include "guestVimApi.h" /* for ... this module header */ + +#define URL_VIM_ADDRESS "127.0.0.1" +#define URL_VIM_INST_LABEL "/nfvi-plugins/v1/instances/" +#define URL_VIM_HOST_LABEL "/nfvi-plugins/v1/hosts/" + +#define VIM_EVENT_SIG "vimEvent" +#define VIM_SIG "vim" + +#define OPER__HOST_STATE_QUERY "host state query" +#define OPER__HOST_INST_QUERY "host inst query" +#define OPER__HOST_INST_FAIL "host inst fail" +#define OPER__HOST_INST_STATUS "host inst status" +#define OPER__HOST_INST_CHANGE "inst status change" +#define OPER__HOST_INST_NOTIFY "host inst notify" + + +/********************************************************************* + * + * Name : guestVimApi_handler + * + * Description: The Guest Heartbeat event request handler + * + *********************************************************************/ +void guestHostClass::guestVimApi_handler ( struct evhttp_request *req, void *arg ) +{ + string hostname = "unknown" ; + + guestHostClass * obj_ptr = get_hostInv_ptr(); + libEvent & event = obj_ptr->getEvent ( (struct event_base *)arg, hostname ); + if ( event.request == SERVICE_NONE ) + { + slog ("guest instance Lookup Failed (%p)\n", arg); + return ; + } + + /* Check for command timeout */ + if ( !req ) + { + dlog ("hostname=%s service=%s No Request Parm (%s)\n", + event.hostname.c_str(), + event.service.c_str(), + event.uuid.c_str()); + } + + /* Check the HTTP Status Code */ + event.status = guestHttpUtil_status ( event ) ; + if ( event.status == HTTP_NOTFOUND ) + { + wlog ("%s Not Found (%d)\n", event.log_prefix.c_str(), + event.status); + goto _guest_handler_done ; + } + + else if ( event.status != PASS ) + { + /* The VIM Might not be running at he time I issue the query. + * In hat case I will get back a 400 */ + if (( event.request != VIM_HOST_STATE_QUERY ) && ( event.status != 400 )) + { + elog ("%s HTTP Request Failed (%d) (%s)\n", + event.log_prefix.c_str(), + event.status, + event.uuid.c_str()); + } + goto _guest_handler_done ; + } + + /* No response content for this command */ + if ( event.request == VIM_HOST_INSTANCE_STATUS ) + { + jlog ("%s %s instance status change succeeded\n", event.hostname.c_str(), event.uuid.c_str()); + goto _guest_handler_done ; + } + + /* No response content for this command */ + else if ( event.request == VIM_HOST_INSTANCE_NOTIFY ) + { + jlog ("%s %s instance notify succeeded\n", event.hostname.c_str(), event.uuid.c_str()); + goto _guest_handler_done ; + } + + else if ( httpUtil_get_response ( event ) != PASS ) + { + wlog ("%s no response available\n", hostname.c_str()); + goto _guest_handler_done ; + } + + if ( event.response.length() ) + { + jlog ("%s Response: %s\n", event.hostname.c_str(), + event.response.c_str()); + + if ( event.request == VIM_HOST_STATE_QUERY ) + { + ilog ("%s host state query response\n", event.hostname.c_str()); + int rc = jsonUtil_get_key_val ( (char*)event.response.data(), "state", event.value ) ; + if ( rc != PASS ) + { + elog ("failed to state value (rc=%d)\n", rc ); + event.status = FAIL_KEY_VALUE_PARSE ; + event.value = "disabled" ; /* override to disabled if operation failed */ + } + } + + else if ( event.request == VIM_HOST_INSTANCE_FAILED ) + { + ilog ("%s instance failure response\n", event.uuid.c_str()); + // {"services": [ {"state": "enabled", "service": "heartbeat"}], + // "hostname": "compute-1", + // "uuid": "da973c2a-7469-4e06-b7e1-89bf2643f906"} + string state = "" ; + string service = "" ; + string uuid = "" ; + int rc1 = jsonUtil_get_key_val ( (char*)event.response.data(), "hostname", hostname ) ; + int rc2 = jsonUtil_get_key_val ( (char*)event.response.data(), "uuid" , uuid ) ; + if (!(rc1 | rc2 )) + { + /* Look for the list of services for this instance + * - currently only heartbeat is supported + * + * services:[ { "state": "enabled", "service": "heartbeat" } ] + */ + string service_list = "" ; + rc1 = jsonUtil_get_array_idx ((char*)event.response.data(), "services", 0, service_list ) ; + if ( rc1 == PASS ) + { + instInfo instance ; guestUtil_inst_init ( &instance ); + guestHostClass * obj_ptr = get_hostInv_ptr(); + string service = "" ; + + ilog ("Service List:%s\n", service_list.c_str()); // jlog1 + + instance.uuid = uuid ; + + /* Get the contents of the services list/array + * Note: we only support one element of the array so hat's + * why only index 0 is being requested or looked for + * + * Get the state of the only service - heartbeat */ + rc1 = jsonUtil_get_key_val ( (char*)service_list.data(), "state", instance.heartbeat.state ) ; + rc2 = jsonUtil_get_key_val ( (char*)service_list.data(), "service", service ) ; + + /* both of these must pass in order to add this instance */ + if (( rc1 == PASS ) && ( rc2 == PASS )) + { + if ( !service.compare("heartbeat") ) + { + instance.heartbeat.provisioned = true ; + + /* Its either enabled or disabled + * - default was disabled in guestUtil_inst_init above */ + if ( !instance.heartbeat.state.compare("enabled") ) + { + instance.heartbeat.reporting = true ; + rc1 = obj_ptr->mod_inst ( hostname, instance ); + } + else if ( !instance.heartbeat.state.compare("disabled") ) + { + instance.heartbeat.reporting = false ; + rc1 = obj_ptr->mod_inst ( hostname, instance ); + } + else + { + // raise error if it is neither enabled nor disabled + elog ("%s %s invalid heartbeat.state value %s received\n", + hostname.c_str(), instance.uuid.c_str(), instance.heartbeat.state.c_str()); + event.status = FAIL_INVALID_DATA ; + rc1 = FAIL; + } + if ( rc1 == PASS ) + { + /* o.K. so its provisioned !! */ + dlog ("%s %s instance modified\n", hostname.c_str(), instance.uuid.c_str()); + } + else + { + event.status = rc1 ; + } + } + else + { + elog ("%s unsupported 'service' (%s)\n", hostname.c_str(), service.c_str() ); + event.status = FAIL_INVALID_DATA ; + } + } + else + { + elog ("%s failed to get 'state' or 'service' (%d:%d)\n", hostname.c_str(), rc1, rc2 ); + event.status = FAIL_KEY_VALUE_PARSE ; + } + } + else + { + elog ("%s failed to get 'service list' or 'uuid' (%d:%d)\n", hostname.c_str(), rc1, rc2 ); + event.status = FAIL_KEY_VALUE_PARSE ; + } + } + else + { + ilog ("%s failed to get 'hostname' or 'uuid' (%d:%d)\n", event.hostname.c_str(), rc1, rc2 ); + event.status = FAIL_KEY_VALUE_PARSE ; + } + } + else if ( event.request == VIM_HOST_INSTANCE_QUERY ) + { + ilog ("%s instance query response\n", event.uuid.c_str()); + /* { "instances": [{"services": {"service":"heartbeat", "state":"enabled"}, + * "hostname": "compute-2", + * "uuid": "3aca8dad-0e38-4a58-83ab-23ee71159e0d"}]} */ + + int rc = jsonUtil_get_key_val ( (char*)event.response.data(), "instances", event.value ) ; + if ( rc != PASS ) + { + elog ("%s failed to get host instance array (rc=%d) (%s)\n", + event.hostname.c_str(), rc, event.uuid.c_str()); + event.status = FAIL_KEY_VALUE_PARSE ; + } + else + { + /* The following code parses a JSON string that looks like this. + * { + * "instances": + * [ + * { "services": { "service":"heartbeat", "state":"enabled" }, + * "hostname": "compute-2", + * "uuid" : "3aca8dad-0e38-4a58-83ab-23ee71159e0d" + * } + * ] , ... + * } + */ + int instances = 0 ; + jlog ("%s instance array %s\n", event.hostname.c_str(), (char*)event.response.data()); + rc = jsonUtil_array_elements ( (char*)event.response.data(), "instances", instances ); + if ( rc != PASS ) + { + elog ("%s failed to get array elements (%d)\n", hostname.c_str(), rc ); + event.status = FAIL_KEY_VALUE_PARSE ; + } + else + { + ilog ("%s has %d instances\n", hostname.c_str(), instances ); + for ( int i = 0 ; i < instances ; i++ ) + { + string instance_element = "" ; + rc = jsonUtil_get_array_idx ( (char*)event.response.data(), "instances", i, instance_element ); + if ( ( rc == PASS ) && ( instance_element.size() )) + { + /* Look for the list of services for this instance + * - currently only heartbeat is supported + * + * services:[ { "state": "enabled", "service": "heartbeat" } ] + **/ + string service_list = "" ; + string uuid = "" ; + int rc1 = jsonUtil_get_array_idx ((char*)instance_element.data(), "services", 0, service_list ) ; + int rc2 = jsonUtil_get_key_val ((char*)instance_element.data(), "uuid", uuid ) ; + if (( rc1 == PASS ) && ( rc2 == PASS )) + { + instInfo instance ; guestUtil_inst_init ( &instance ); + guestHostClass * obj_ptr = get_hostInv_ptr(); + string service = "" ; + + ilog ("Service List:%s\n", service_list.c_str()); + + instance.uuid = uuid ; + + /* Get the contents of the services list/array + * Note: we only support one element of the array so hat's + * why only index 0 is being requested or looked for + * + * Get the state of the only service - heartbeat */ + rc1 = jsonUtil_get_key_val ( (char*)service_list.data(), "state", instance.heartbeat.state ) ; + rc2 = jsonUtil_get_key_val ( (char*)service_list.data(), "service", service ) ; + + /* both of these must pass in order to add this instance */ + if (( rc1 == PASS ) && ( rc2 == PASS )) + { + if ( !service.compare("heartbeat") ) + { + instance.heartbeat.provisioned = true ; + + /* Its either enabled or disabled + * - default was disabled in guestUtil_inst_init above */ + if ( !instance.heartbeat.state.compare("enabled") ) + { + instance.heartbeat.reporting = true ; + rc = obj_ptr->add_inst ( hostname, instance ); + } + else if ( !instance.heartbeat.state.compare("disabled") ) + { + instance.heartbeat.reporting = false ; + rc = obj_ptr->add_inst ( hostname, instance ); + } + else + { + // raise error if it is neither enabled nor disabled + elog ("%s %s invalid heartbeat.state value %s received\n", + hostname.c_str(), instance.uuid.c_str(), instance.heartbeat.state.c_str()); + event.status = FAIL_INVALID_DATA ; + rc = FAIL; + } + if ( rc == PASS ) + { + /* o.K. so its provisioned !! */ + ilog ("%s %s instance added\n", hostname.c_str(), instance.uuid.c_str()); + } + else + { + event.status = rc ; + } + } + else + { + elog ("%s unsupported 'service' (%s)\n", hostname.c_str(), service.c_str() ); + event.status = FAIL_INVALID_DATA ; + } + } + else + { + elog ("%s failed to get 'state' or 'service' (%d:%d)\n", hostname.c_str(), rc1, rc2 ); + wlog ("... Service List: %s\n", service_list.data()); + event.status = FAIL_KEY_VALUE_PARSE ; + } + } + else + { + elog ("%s failed to get 'service list' or 'uuid' (%d:%d)\n", hostname.c_str(), rc1, rc2 ); + event.status = FAIL_KEY_VALUE_PARSE ; + } + } + else if ( rc != PASS ) + { + elog ("%s failed to get array index %d (rc=%d)\n", hostname.c_str(), i, rc ); + event.status = FAIL_KEY_VALUE_PARSE ; + } + } + } + } + } + } +_guest_handler_done: + + // httpUtil_log_event ( event ); + + if (( event.request != SERVICE_NONE ) && + ( event.status != HTTP_OK ) && + ( event.status != PASS )) + { + // wlog ("Event Status: %d\n", event.status ); + + /* TODO: Enable log_event */ + wlog ("%s Address : %s (%d)\n", + event.log_prefix.c_str(), + event.address.c_str(), + event.status); + elog ("%s Payload : %s\n", event.log_prefix.c_str(), event.payload.c_str()); + if ( event.response.size() ) + { + elog ("%s Response: %s\n", event.log_prefix.c_str(), event.response.c_str()); + } + else + { + elog ("%s: no response\n", event.log_prefix.c_str()); + } + } + event.active = false ; + httpUtil_free_conn ( event ); + httpUtil_free_base ( event ); + + /* This is needed to get out of the loop */ + event_base_loopbreak((struct event_base *)arg); +} + +/* The Guest Heartbeat event request handler + * wrapper abstracted from guestHostClass */ +void guestVimApi_Handler ( struct evhttp_request *req, void *arg ) +{ + get_hostInv_ptr()->guestVimApi_handler ( req , arg ); +} + +/***************************************************************************** + * + * Name : guestVimApi_svc_event + * + * Description: Send a VM instance service state/status change notification + * to the VIM. + * + * Warning : Only the 'heartbeat' service 'status' change is supported. + * + *****************************************************************************/ + +int guestVimApi_svc_event ( string hostname, + string instance_uuid, + string state, + string status, + string timeout) +{ + guestHostClass * obj_ptr = get_hostInv_ptr() ; + + ilog ("%s %s %s heartbeating status change to '%s' (to vim)\n", hostname.c_str(), + instance_uuid.c_str(), + state.c_str(), + status.c_str()); + + instInfo * instInfo_ptr = obj_ptr->get_inst ( instance_uuid ); + if ( instInfo_ptr ) + { + httpUtil_event_init ( &instInfo_ptr->vimEvent, + hostname, + VIM_SIG, + URL_VIM_ADDRESS, + daemon_get_cfg_ptr()->vim_event_port); + + instInfo_ptr->vimEvent.base = NULL ; + instInfo_ptr->vimEvent.conn = NULL ; + + /* Set the host context */ + instInfo_ptr->vimEvent.uuid = instance_uuid ; + instInfo_ptr->vimEvent.cur_retries = 0 ; + instInfo_ptr->vimEvent.max_retries = 3 ; + instInfo_ptr->vimEvent.active = true ; + instInfo_ptr->vimEvent.noncritical = false ; + + instInfo_ptr->vimEvent.request = VIM_HOST_INSTANCE_STATUS; + instInfo_ptr->vimEvent.operation = OPER__HOST_INST_CHANGE ; + instInfo_ptr->vimEvent.token.url = URL_VIM_INST_LABEL ; + instInfo_ptr->vimEvent.token.url.append(instance_uuid) ; + + /* The type of HTTP request */ + instInfo_ptr->vimEvent.type = EVHTTP_REQ_PATCH ; + + /* Build the payload */ + instInfo_ptr->vimEvent.payload = ("{\"uuid\":\""); + instInfo_ptr->vimEvent.payload.append (instance_uuid); + instInfo_ptr->vimEvent.payload.append ("\",\"hostname\":\""); + instInfo_ptr->vimEvent.payload.append (hostname); + instInfo_ptr->vimEvent.payload.append ("\",\"event-type\":\"service\",\"event-data\":{\"services\":"); + instInfo_ptr->vimEvent.payload.append ("[{\"service\":\"heartbeat\",\"state\":\""); + instInfo_ptr->vimEvent.payload.append (state); + instInfo_ptr->vimEvent.payload.append ("\",\"status\":\""); + instInfo_ptr->vimEvent.payload.append (status); + instInfo_ptr->vimEvent.payload.append ("\",\"restart-timeout\":\""); + instInfo_ptr->vimEvent.payload.append (timeout); + instInfo_ptr->vimEvent.payload.append ("\"}]}}"); + + jlog ("%s %s Payload: %s\n", hostname.c_str(), instance_uuid.c_str(), instInfo_ptr->vimEvent.payload.c_str()); + + return (guestHttpUtil_api_req ( instInfo_ptr->vimEvent )); + } + return (FAIL_HOSTNAME_LOOKUP); +} + + + + +/***************************************************************************** + * + * Name : guestVimApi_alarm_event + * + * Description: Send a VM instance service an alarm event. + * + *****************************************************************************/ + +int guestVimApi_alarm_event ( string hostname, + string instance_uuid) +{ + guestHostClass * obj_ptr = get_hostInv_ptr() ; + + ilog ("%s %s heartbeating alarm (ill health) event (to vim)\n", + hostname.c_str(), + instance_uuid.c_str()); + + instInfo * instInfo_ptr = obj_ptr->get_inst ( instance_uuid ); + if ( instInfo_ptr ) + { + httpUtil_event_init ( &instInfo_ptr->vimEvent, + hostname, + VIM_SIG, + URL_VIM_ADDRESS, + daemon_get_cfg_ptr()->vim_event_port); + + instInfo_ptr->vimEvent.base = NULL ; + instInfo_ptr->vimEvent.conn = NULL ; + + /* Set the host context */ + instInfo_ptr->vimEvent.uuid = instance_uuid ; + instInfo_ptr->vimEvent.cur_retries = 0 ; + instInfo_ptr->vimEvent.max_retries = 3 ; + instInfo_ptr->vimEvent.active = true ; + instInfo_ptr->vimEvent.noncritical = false ; + + instInfo_ptr->vimEvent.request = VIM_HOST_INSTANCE_STATUS; + instInfo_ptr->vimEvent.operation = OPER__HOST_INST_CHANGE ; + instInfo_ptr->vimEvent.token.url = URL_VIM_INST_LABEL ; + instInfo_ptr->vimEvent.token.url.append(instance_uuid) ; + + /* The type of HTTP request */ + instInfo_ptr->vimEvent.type = EVHTTP_REQ_PATCH ; + + /* Build the payload */ + instInfo_ptr->vimEvent.payload = ("{\"uuid\":\""); + instInfo_ptr->vimEvent.payload.append (instance_uuid); + instInfo_ptr->vimEvent.payload.append ("\",\"hostname\":\""); + instInfo_ptr->vimEvent.payload.append (hostname); + + instInfo_ptr->vimEvent.payload.append ("\",\"event-type\":\"alarm\",\"event-data\":{\"services\":"); + instInfo_ptr->vimEvent.payload.append ("[{\"service\":\"heartbeat\",\"state\":\"unhealthy\",\"repair-action\":\""); + instInfo_ptr->vimEvent.payload.append (instInfo_ptr->corrective_action); + instInfo_ptr->vimEvent.payload.append ("\"}]}}"); + + jlog ("%s %s Payload: %s\n", hostname.c_str(), + instance_uuid.c_str(), + instInfo_ptr->vimEvent.payload.c_str()); + + return (guestHttpUtil_api_req ( instInfo_ptr->vimEvent )); + } + return (FAIL_HOSTNAME_LOOKUP); +} + + +/***************************************************************************** + * + * Name : guestVimApi_inst_failed + * + * Description: Send a VM instance a failure notification to the VIM. + * + * Supported failures are ... + * + * MTC_EVENT_HEARTBEAT_LOSS + * + *****************************************************************************/ +int guestVimApi_inst_failed ( string hostname, + string instance_uuid, + unsigned int event, + int retries ) +{ + guestHostClass * obj_ptr = get_hostInv_ptr() ; + + elog ("%s %s *** Heartbeat Loss *** \n", + hostname.c_str(), + instance_uuid.c_str() ); + + if ( obj_ptr->get_reporting_state (hostname) == false ) + { + ilog ("%s cancelling failure notification request\n", hostname.c_str()); + ilog ("%s ... 'host' level fault reporting is disabled\n", hostname.c_str()); + return (PASS); + } + instInfo * instInfo_ptr = obj_ptr->get_inst ( instance_uuid ); + if ( instInfo_ptr ) + { + if (( event == MTC_EVENT_HEARTBEAT_LOSS ) && + ( instInfo_ptr->heartbeat.reporting == false )) + { + ilog ("%s cancelling failure notification request\n", hostname.c_str()); + ilog ("%s ... 'instance' level fault reporting is disabled\n", hostname.c_str()); + return (PASS); + } + + httpUtil_event_init ( &instInfo_ptr->vimEvent, + hostname, + VIM_SIG, + URL_VIM_ADDRESS, + daemon_get_cfg_ptr()->vim_event_port); + + instInfo_ptr->vimEvent.base = NULL ; + instInfo_ptr->vimEvent.conn = NULL ; + + /* Set the host context */ + instInfo_ptr->vimEvent.uuid = instance_uuid ; + instInfo_ptr->vimEvent.cur_retries = 0 ; + instInfo_ptr->vimEvent.max_retries = retries ; + instInfo_ptr->vimEvent.active = true ; + instInfo_ptr->vimEvent.noncritical = false ; + + instInfo_ptr->vimEvent.request = VIM_HOST_INSTANCE_FAILED; + instInfo_ptr->vimEvent.operation = OPER__HOST_INST_FAIL ; + instInfo_ptr->vimEvent.token.url = URL_VIM_INST_LABEL ; + instInfo_ptr->vimEvent.token.url.append(instance_uuid) ; + + /* The type of HTTP request */ + instInfo_ptr->vimEvent.type = EVHTTP_REQ_PATCH ; + + /* Build the payload */ + instInfo_ptr->vimEvent.payload = ("{\"uuid\":\""); + instInfo_ptr->vimEvent.payload.append (instance_uuid); + instInfo_ptr->vimEvent.payload.append ("\",\"hostname\":\""); + instInfo_ptr->vimEvent.payload.append (hostname); + if ( event == MTC_EVENT_HEARTBEAT_LOSS ) + { + instInfo_ptr->vimEvent.payload.append ("\",\"event-type\":\"alarm\",\"event-data\":{\"services\":"); + instInfo_ptr->vimEvent.payload.append ("[{\"service\":\"heartbeat\",\"state\":\"failed\",\"repair-action\":\""); + instInfo_ptr->vimEvent.payload.append (instInfo_ptr->corrective_action); + instInfo_ptr->vimEvent.payload.append ("\"}]}}"); + + wlog ("%s %s Payload: %s\n", hostname.c_str(), + instance_uuid.c_str(), + instInfo_ptr->vimEvent.payload.c_str()); + } + else + { + elog ("%s Unsupported 'event code' (%d)\n", instance_uuid.c_str(), event ); + return (FAIL_BAD_PARM); + } + + return (guestHttpUtil_api_req ( instInfo_ptr->vimEvent )); + } + return (FAIL_HOSTNAME_LOOKUP); +} + + +/***************************************************************************** + * + * Name : guestVimApi_inst_action + * + * Description: Send a notify message to the VIM in response to voting or notification + * + *****************************************************************************/ +int guestVimApi_inst_action ( string hostname, + string instance_uuid, + string action, + string guest_response, + string reason, + int retries) +{ + guestHostClass * obj_ptr = get_hostInv_ptr() ; + + ilog ("%s %s '%s' action (to vim)\n", hostname.c_str(), instance_uuid.c_str() , action.c_str() ); + + instInfo * instInfo_ptr = obj_ptr->get_inst ( instance_uuid ); + if ( !instInfo_ptr ) + return FAIL_HOSTNAME_LOOKUP; + + httpUtil_event_init ( &instInfo_ptr->vimEvent, + hostname, + VIM_SIG, + URL_VIM_ADDRESS, + daemon_get_cfg_ptr()->vim_event_port); + + instInfo_ptr->vimEvent.base = NULL ; + instInfo_ptr->vimEvent.conn = NULL ; + + /* Set the host context */ + instInfo_ptr->vimEvent.uuid = instance_uuid ; + instInfo_ptr->vimEvent.cur_retries = 0 ; + instInfo_ptr->vimEvent.max_retries = retries ; + instInfo_ptr->vimEvent.active = true ; + instInfo_ptr->vimEvent.noncritical = false ; + + instInfo_ptr->vimEvent.request = VIM_HOST_INSTANCE_NOTIFY; + instInfo_ptr->vimEvent.operation = OPER__HOST_INST_NOTIFY ; + instInfo_ptr->vimEvent.token.url = URL_VIM_INST_LABEL ; + instInfo_ptr->vimEvent.token.url.append(instance_uuid) ; + + /* The type of HTTP request */ + instInfo_ptr->vimEvent.type = EVHTTP_REQ_PATCH ; + + /* Build the payload */ + instInfo_ptr->vimEvent.payload = ("{\"uuid\":\""); + instInfo_ptr->vimEvent.payload.append (instance_uuid); + instInfo_ptr->vimEvent.payload.append ("\",\"event-type\": \"action\",\"event-data\": {\"action\": \""); + instInfo_ptr->vimEvent.payload.append (action); + instInfo_ptr->vimEvent.payload.append ("\", \"guest-response\": \""); + instInfo_ptr->vimEvent.payload.append (guest_response); + instInfo_ptr->vimEvent.payload.append ("\", \"reason\": \""); + instInfo_ptr->vimEvent.payload.append (jsonUtil_escapeSpecialChar(reason)); + instInfo_ptr->vimEvent.payload.append ("\"}}"); + + jlog ("%s %s Payload: %s\n", hostname.c_str(), instance_uuid.c_str(), instInfo_ptr->vimEvent.payload.c_str()); + + return (guestHttpUtil_api_req ( instInfo_ptr->vimEvent )); +} + + +/***************************************************************************** + * + * Name : guestVimApi_getHostState + * + * Description: Ask the VIM for the top level fault reporting + * state for this host + * + *****************************************************************************/ + +int guestVimApi_getHostState ( string hostname, string uuid, libEvent & event ) +{ + httpUtil_event_init ( &event, + hostname, + VIM_SIG, + URL_VIM_ADDRESS, + daemon_get_cfg_ptr()->vim_event_port); + + event.base = NULL ; + event.conn = NULL ; + event.uuid = uuid ; + event.active = true ; + event.noncritical = false ; + + event.type = EVHTTP_REQ_GET ; + event.request = VIM_HOST_STATE_QUERY; + event.operation = OPER__HOST_STATE_QUERY ; + event.token.url = URL_VIM_HOST_LABEL ; + event.token.url.append(event.uuid); + + /* Build the payload */ + event.payload = "{\"hostname\": \""; + event.payload.append (hostname) ; + event.payload.append ("\",\"uuid\":\""); + event.payload.append (uuid); + event.payload.append ("\"}"); + + jlog ("%s %s Payload: %s\n", hostname.c_str(), uuid.c_str(), event.payload.c_str()); + + return ( guestHttpUtil_api_req ( event ) ); +} + + +/***************************************************************************** + * + * Name : guestVimApi_getHostInst + * + * Description: Ask the VIM for all the VM instance info for the + * specified host. + * + *****************************************************************************/ +int guestVimApi_getHostInst ( string hostname, string uuid, libEvent & event ) +{ + httpUtil_event_init ( &event, + hostname, + VIM_SIG, + URL_VIM_ADDRESS, + daemon_get_cfg_ptr()->vim_event_port); + + event.base = NULL ; + event.conn = NULL ; + event.uuid = uuid ; + event.active = true ; + event.noncritical = false ; + + event.type = EVHTTP_REQ_GET ; + event.request = VIM_HOST_INSTANCE_QUERY; + event.operation = OPER__HOST_INST_QUERY ; + event.token.url = URL_VIM_INST_LABEL ; + event.token.url.append("?host_uuid="); + event.token.url.append(event.uuid); + + jlog ("%s %s Payload: %s\n", hostname.c_str(), event.uuid.c_str(), event.token.url.c_str()); + + return ( guestHttpUtil_api_req ( event ) ); +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestVimApi.h b/mtce-common/cgts-mtce-common-1.0/guest/guestVimApi.h new file mode 100644 index 00000000..176fa203 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestVimApi.h @@ -0,0 +1,47 @@ +#ifndef __INCLUDE_GUESTVIMAPI_H__ +#define __INCLUDE_GUESTVIMAPI_H__ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include +#include + +#include "guestHttpUtil.h" + + /** + * @file + * Wind River CGTS Platform Guest Services Request Transmitter. + * + * This module is used by the guestAgent only and allows the guestAgent to + * + * 1. Transmit notification of an instance failure to the VIM + * + * guestVimApi_inst_failed + * + * 2. Get the instrance info for a specified host from the VIM + * + * guestVimApi_getHostState + * + * 3. Get the host level fault reporting state. + * + * guestVimApi_getHostInst + * + **************************************************************************/ + +int guestVimApi_init ( string ip, int port ); +void guestVimApi_fini ( void ); + +int guestVimApi_inst_failed ( string hostname, string instance, unsigned int event, int retries ); +int guestVimApi_inst_action ( string hostname, string instance_uuid, string action, string guest_response, string reason, int retries=0 ); +int guestVimApi_svc_event ( string hostname, string instance_uuid, string state, string status, string timeout ); +int guestVimApi_alarm_event ( string hostname, string instance_uuid ); +int guestVimApi_getHostInst ( string hostname, string uuid, libEvent & event ); +int guestVimApi_getHostState ( string hostname, string uuid, libEvent & event ); + +void guestVimApi_Handler ( struct evhttp_request *req, void *arg ); + +#endif /* __INCLUDE_GUESTVIMAPI_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestVirtio.cpp b/mtce-common/cgts-mtce-common-1.0/guest/guestVirtio.cpp new file mode 100644 index 00000000..88f5f4e2 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestVirtio.cpp @@ -0,0 +1,373 @@ +/* +* Copyright (c) 2013-2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* +*/ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "nodeBase.h" +#include "nodeEvent.h" +#include "guestBase.h" +#include "guestUtil.h" +#include "guestVirtio.h" +#include "guestInstClass.h" /* for ... get_inst */ + +/***************************************************************************** + * Name : virtio_check_filename + * + * Purpose: Return valid virtio instance heartbeat messaging socket filenames + * + * Description: + * + * Check a filename, already striped of an directory component, + * against the expected pattern for a cgcs heartbeat vio socket file. + * + * If satisfied, returns an allocated buffer containing the qemu instance name. + * The buffer must be free'd. + * + * Returns NULL on failure. + * + *****************************************************************************/ + +const char* host_virtio_dir = "/var/lib/libvirt/qemu"; + +// Use instance id to substitute the first %s below +const char* host_virtio_file_format_print = "cgcs.heartbeat.%s.sock"; +const char* alt_host_virtio_file_format_print = "wrs.heartbeat.agent.0.%s.sock"; + +// Must return '2' when scaned, first buffer recieves instance id, second should get a k, and third is unused +const char* virtio_file_format_scan = "%m[cgcs].%m[heartbeat].%m[^.].soc%m[k]%ms"; +const char* host_virtio_file_format_scan = "cgcs.heartbeat.%m[^.].soc%m[k]%ms"; +const char* alt_host_virtio_file_format_scan = "wrs.heartbeat.agent.0.%m[^.].soc%m[k]%ms"; + +string virtio_instance_name ( char * fn ) +{ + string name = "" ; + char *s1 = NULL; + char *s2= NULL; + char *instance_name = NULL; + + int rc = sscanf(fn, host_virtio_file_format_scan, &instance_name, &s1, &s2); + if (rc != 2) + { + dlog3 ("'%s' does not satisfy scan pattern %s\n", fn, host_virtio_file_format_scan); + if (s1) + { + free(s1); + s1 = NULL; + } + + if (s2) + { + free(s2); + s2 = NULL; + } + + if (instance_name) + { + free(instance_name); + instance_name = NULL; + } + + rc = sscanf(fn, alt_host_virtio_file_format_scan, &instance_name, &s1, &s2); + if (rc != 2) + { + dlog3 ("'%s' does not satisfy scan pattern %s\n", fn, alt_host_virtio_file_format_scan); + if (instance_name) + { + free(instance_name); + instance_name = NULL; + } + } + else + { + /* Valid instance filename found */ + name = instance_name ; + } + } + else + { + /* Valid instance filename found */ + name = instance_name ; + } + + if (s1) free(s1); + if (s2) free(s2); + + if (instance_name) + { + free(instance_name); + } + + return (name); +} + + +bool virtio_check_filename ( char * fn ) +{ + string instance_name = virtio_instance_name ( fn ) ; + if ( instance_name.size () == UUID_LEN ) + return true ; + else + return false ; +} + +/* Add the auto detected channel to the instance list + * WARNING: This is where the cgcs.heartbeat.*.sock part is + * removed from the channel and put into the instInfo + * struct as a uuid value */ +int virtio_channel_add ( char * channel ) +{ + instInfo * instInfo_ptr ; + int rc = FAIL_NOT_FOUND ; + char * prefix1 = NULL ; + char * prefix2 = NULL ; + char * suffix = NULL ; + char * uuid_ptr = NULL ; + char * s1 = NULL ; + string uuid = ""; + instInfo instance ; + guestUtil_inst_init ( &instance ); + + rc = sscanf(channel, virtio_file_format_scan, &prefix1, &prefix2, &uuid_ptr, &suffix, &s1 ); + if ( rc != 4 ) + { + elog ("failed to extract uuid from channel %s (num:%d)\n", channel, rc); + rc = FAIL_INVALID_DATA ; + goto virtio_channel_add_cleanup ; + } + + uuid = uuid_ptr ; + if ( uuid.length() != UUID_LEN ) + { + elog ("failed to get UUID from channel %s (uuid:%ld)\n", uuid.c_str(), uuid.length()); + rc = FAIL_INVALID_UUID ; + goto virtio_channel_add_cleanup ; + } + + + instInfo_ptr = get_instInv_ptr()->get_inst ( uuid ); + if ( instInfo_ptr ) + { + /* detected channel found */ + ilog ("%s add ; already provisioned\n", log_prefix(instInfo_ptr).c_str()); + rc = PASS ; + } + else if ( ( rc = get_instInv_ptr()->add_inst ( uuid, instance ) ) == PASS ) + { + dlog ("%s add ; auto provisioned\n", instance.uuid.c_str()); + rc = PASS ; + } + else + { + elog ("%s add failed\n", uuid.c_str()); + rc = FAIL_INVALID_UUID ; + } + + if ( rc == PASS ) + { + + /* get the recently added instance */ + instInfo_ptr = get_instInv_ptr()->get_inst ( uuid ); + if ( instInfo_ptr ) + { + instInfo_ptr->uuid = uuid ; + instInfo_ptr->chan = channel ; + instInfo_ptr->fd_namespace = QEMU_CHANNEL_DIR ; + instInfo_ptr->fd_namespace.append ("/") ; + instInfo_ptr->fd_namespace.append (channel) ; + + instInfo_ptr->connect_wait_in_secs = DEFAULT_CONNECT_WAIT ; + + get_instInv_ptr()->reconnect_start ( (const char *)uuid_ptr ) ; + } + } + +virtio_channel_add_cleanup: + + if (prefix1) free(prefix1); + if (prefix2) free(prefix2); + if (suffix) free(suffix); + if (uuid_ptr) free(uuid_ptr); + if (s1) free (s1); + + return(rc); +} + + + +/***************************************************************************** + * + * Name : virtio_channel_connect + * + * Purpose : Connect to the channel specified by the instance pointer + * + *****************************************************************************/ +int virtio_channel_connect ( instInfo * instInfo_ptr ) +{ + int rc = PASS ; + char buf[PATH_MAX]; + + if ( ! instInfo_ptr ) + { + slog ("called with NULL instance pointer\n"); + return (FAIL_NULL_POINTER); + } + + snprintf(buf, sizeof(buf), "%s/cgcs.heartbeat.%s.sock", QEMU_CHANNEL_DIR, instInfo_ptr->uuid.data()); + + dlog ("... trying connect: %s\n", buf ); + + if (( instInfo_ptr->chan_fd > 0 ) && ( instInfo_ptr->chan_ok == true )) + { + if ( instInfo_ptr->connected ) + { + ilog ("%s already connected\n", log_prefix(instInfo_ptr).c_str()); + return (PASS); + } + else + { + ilog ("%s socket and chan ok but not connected\n", log_prefix(instInfo_ptr).c_str()); + } + } + + instInfo_ptr->chan_ok = false ; + instInfo_ptr->connected = false ; + + if ( instInfo_ptr->chan_fd ) + close (instInfo_ptr->chan_fd); + + /* found channel */ + instInfo_ptr->chan_fd = socket ( AF_UNIX, CHAN_FLAGS, 0 ); + if ( instInfo_ptr->chan_fd <= 0 ) + { + ilog("%s socket create failed for %s, (%d:%m)\n", log_prefix(instInfo_ptr).c_str(), buf, errno ) ; + rc = FAIL_SOCKET_CREATE ; + } + else + { + int flags ; + struct linger so_linger ; + + /* get socket flags */ + flags = fcntl(instInfo_ptr->chan_fd, F_GETFL); + if (flags < 0) + { + elog ("%s failed to get socket %d flags (%d:%m)\n", + log_prefix(instInfo_ptr).c_str(), + instInfo_ptr->chan_fd , errno); + rc = FAIL_SOCKET_OPTION ; + } + + /* set socket as nonblocking */ + if ( flags & O_NONBLOCK ) + { + dlog ("%s Socket already set as non-blocking\n", + log_prefix(instInfo_ptr).c_str()); + } + else + { + flags = (flags | O_NONBLOCK); + if (fcntl(instInfo_ptr->chan_fd, F_SETFL, flags) < 0) + { + elog ("%s failed to set socket %d nonblocking (%d:%m)\n", + instInfo_ptr->uuid.data(), + instInfo_ptr->chan_fd , errno); + rc = FAIL_SOCKET_NOBLOCK ; + } + } + so_linger.l_onoff = 1 ; /* true */ + so_linger.l_linger = 0 ; /* linger time is 0 ; no TIME_WAIT */ + + rc = setsockopt ( instInfo_ptr->chan_fd, SOL_SOCKET, SO_LINGER, &so_linger, sizeof(so_linger)); + if ( rc ) + { + elog ("%s failed to set linger=0 option (%d:%m)\n", log_prefix(instInfo_ptr).c_str(), errno ); + } + } + + if ( rc == PASS ) + { + int len ; + struct sockaddr_un un; + un.sun_family = AF_UNIX; + + strcpy(un.sun_path, buf); + len = offsetof(struct sockaddr_un, sun_path) + strlen(buf); + rc = connect(instInfo_ptr->chan_fd, (struct sockaddr *)&un, len); + if (rc < 0) + { + elog ( "%s connect failed %s (%d:%d:%m)\n", + log_prefix(instInfo_ptr).c_str(), buf, rc, errno); + } + else + { + ilog ("%s connect accepted\n", log_prefix(instInfo_ptr).c_str() ); + instInfo_ptr->chan_ok = true ; + instInfo_ptr->connected = true ; + rc = PASS ; + } + } + /* Handle errors */ + if ( rc != PASS ) + { + /* TODO: cleanup */ + if (instInfo_ptr->chan_fd ) + { + ilog ("%s closing socket %d\n", + log_prefix(instInfo_ptr).c_str(), + instInfo_ptr->chan_fd); + + close (instInfo_ptr->chan_fd) ; + instInfo_ptr->chan_fd = 0 ; + instInfo_ptr->chan_ok = false ; + instInfo_ptr->connected = false ; + } + /* TODO: consider removing this entry from the list */ + } + return (rc); +} + + +int virtio_channel_connect ( string channel ) +{ + instInfo * instInfo_ptr = get_instInv_ptr()->get_inst ( channel ) ; + if ( instInfo_ptr ) + { + return ( virtio_channel_connect ( instInfo_ptr )); + } + elog ("%s instance lookup failed\n", channel.c_str() ); + return (FAIL_NULL_POINTER); +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/guestVirtio.h b/mtce-common/cgts-mtce-common-1.0/guest/guestVirtio.h new file mode 100644 index 00000000..82f31382 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/guestVirtio.h @@ -0,0 +1,24 @@ +#ifndef __GUESTVIRTIO_H__ +#define __GUESTVIRTIO_H__ + +/* +* Copyright (c) 2013-2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* +*/ + +#include +#include + +using namespace std; + +#include "guestBase.h" + +bool virtio_check_filename ( char * fn ); +int virtio_channel_connect ( string channel ); +int virtio_channel_connect ( instInfo * inst_ptr ); +int virtio_channel_add ( char * chan_ptr ); +string virtio_instance_name ( char * fn ); + +#endif /* __GUESTVIRTIO_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/guest/scripts/guest.ini b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guest.ini new file mode 100644 index 00000000..26e666fd --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guest.ini @@ -0,0 +1,28 @@ +; CGTS Guest Service daemons config file +[agent] ; Agent Configuration +rx_port = 2401 ; guestAgent inter-daemon messaging rx port number +vim_cmd_port = 2410 ; vim to guestAgent command port +hbs_failure_threshold = 1 ; Number of failures to accept before reporting the fault + +[client] ; Client Configuration +rx_port = 2411 ; guestAgent inter-daemon messaging rx port number +hbs_failure_threshold = 1 ; Number of failures to accept before reporting the fault +hbs_pulse_period = 400 ; Smallest allowable heartbeat interval in msecs + +[timeouts] ; Configurable timeouts - values in seconds +start_delay = 1 ; time in secs to wait before starting failure reporting + +[debug] ; +debug_timer = 0 ; enable(1) or disable(0) timer logs (tlog) +debug_json = 0 ; enable(1) or disable(0) Json logs (jlog) +debug_fsm = 0 ; enable(1) or disable(0) fsm logs (flog) +debug_http = 0 ; enable(1) or disable(0) http logs (hlog) +debug_alive = 0 ; enable(1) or disable(0) mtcAlive logs (alog) +debug_msg = 0 ; enable(1) or disable(0) message logs (mlog) +debug_state = 0 ; enable(1) or disable(0) state change logs (clog) +debug_work = 0 ; enable(1) or disable(0) work queue trace logs (qlog) +debug_level = 0 ; decimal mask 0..15 (8,4,2,1) and 16 for mem logging +debug_all = 0 ; set all debug labels to the specified value + +flush = 1 ; enable(1) or disable(0) force log flush (main loop) +flush_thld = 5 ; if enabled - force flush after this number of loops diff --git a/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent new file mode 100644 index 00000000..364e1e71 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent @@ -0,0 +1,113 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 95 95 +# +### BEGIN INIT INFO +# Provides: guestAgent +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Maintenance Client Daemon +### END INIT INFO + +. /etc/init.d/functions + +DAEMON_NAME="guestAgent" +DAEMON="/usr/local/bin/${DAEMON_NAME}" +PIDFILE="/var/run/${DAEMON_NAME}.pid" +PLATFORM_CONF="/etc/platform/platform.conf" + +IFACE="" + +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +if [ ! -e "${DAEMON}" ] ; then + logger "${DAEMON} is missing" + exit ${NOT_INSTALLED} +fi + +if [ -f ${PLATFORM_CONF} ] ; then + IFACE=`cat ${PLATFORM_CONF} | grep management_interface | cut -f2 -d'='` + if [ "${IFACE}" != "" ] ; then + if ip link show $IFACE | grep -sq 'state DOWN'; then + ip link set dev $IFACE up + fi + fi +fi + +case "$1" in + start) + logger "Starting ${DAEMON_NAME}" + echo -n "Starting ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo -n "is already running " + RETVAL=0 + else + start-stop-daemon --start -b -x ${DAEMON} -- -l + RETVAL=$? + fi + if [ ${RETVAL} -eq 0 ] ; then + pid=`pidof ${DAEMON_NAME}` + echo "OK" + logger "${DAEMON} (${pid})" + else + echo "FAIL" + RETVAL=${GENERIC_ERROR} + fi + ;; + + stop) + logger "Stopping ${DAEMON_NAME}" + echo -n "Stopping ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + killproc ${DAEMON_NAME} + fi + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo "FAIL" + RETVAL=${NOT_RUNNING} + else + echo "OK" + fi + rm -f ${PIDFILE} + ;; + + restart) + $0 stop + $0 start + ;; + + status) + pid=`pidof ${DAEMON_NAME}` + RETVAL=$? + if [ ${RETVAL} -eq 0 ] ; then + echo "${DAEMON_NAME} is running" + else + echo "${DAEMON_NAME} is NOT running" + RETVAL=${NOT_RUNNING} + fi + ;; + + condrestart) + $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.logrotate b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.logrotate new file mode 100644 index 00000000..654a7135 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.logrotate @@ -0,0 +1,16 @@ +#daily + +/var/log/guestAgent.log +{ + nodateext + size 10M + rotate 5 + start 1 + missingok + notifempty + compress + sharedscripts + postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true + endscript +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.ocf b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.ocf new file mode 100644 index 00000000..34708a87 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.ocf @@ -0,0 +1,442 @@ +#!/bin/sh +# +# Copyright (c) 2013-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# Support: www.windriver.com +# +# Purpose: This resource agent manages +# +# .... the Titanium Cloud Controller Maintenance Daemon +# +# RA Spec: +# +# http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/resource-agent-api.txt?rev=HEAD +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + + +####################################################################### + +# Fill in some defaults if no values are specified +OCF_RESKEY_binary_default="guestAgent" +OCF_RESKEY_config_default="/etc/mtc/guestAgent.ini" +OCF_RESKEY_dbg_default="false" +OCF_RESKEY_mode_default="normal" +OCF_RESKEY_user_default="admin" +OCF_RESKEY_pid_default="/var/run/guestAgent.pid" + + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_dbg=${OCF_RESKEY_dbg_default}} +: ${OCF_RESKEY_mode=${OCF_RESKEY_mode_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} + +mydaemon="/usr/local/bin/${OCF_RESKEY_binary}" +statusfile="/var/run/${OCF_RESKEY_binary}.info" + +####################################################################### + +usage() { + cat < + + +1.0 + + +This 'guestAgent' is an OCF Compliant Resource Agent that manages start, stop and in- +service monitoring of Maintenance's guestAgent daemon on Wind River's Titanium Cloud. + + + +Manages the Titanium Cloud's Maintenance guestAgent service daemon. + + + + + + + +mode = normal ... run maintenance daemon in 'normal' mode (default) +mode = passive ... run maintenance daemon in 'passive' mode + +Maintenance Mode Option + + + + + + +dbg = false ... info, warn and err logs sent to output stream (default) +dbg = true ... Additional dbg logs are also sent to the output stream + +Service Debug Control Option + + + + + + + + + + + + + + +END + return ${OCF_SUCCESS} +} + +guestAgent_validate() { + + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "guestAgent:validate" + fi + + check_binary "/usr/local/bin/${OCF_RESKEY_binary}" + check_binary pidof + + if [ ! -f ${OCF_RESKEY_config} ] ; then + msg="${OCF_RESKEY_binary} file missing ${OCF_RESKEY_config}" + ocf_log err "${msg}" + return ${OCF_ERR_CONFIGURED} + fi + + return ${OCF_SUCCESS} +} + +guestAgent_status () { + + proc="guestAgent:status" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "guestAgent:status" + fi + + # remove the status file before we request a new + rm -f ${statusfile} + + # Verify the pid file exists as part of status + for ((loop=0;loop<3;loop++)) { + if [ -f ${OCF_RESKEY_pid} ] ; then + break + else + sleep 1 + fi + } + + # See if the daemon is running + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + + log_sig="${OCF_RESKEY_binary} In-Service Active Monitor Test" + + # Ask the daemon to produce status + ocf_run kill -s USR1 $pid + + # Wait for the response + for ((loop=0;loop<10;loop++)) { + sleep 1 + if [ -f ${statusfile} ] ; then + + ocf_log info "${log_sig} Passed ($loop)" + return ${OCF_SUCCESS} + + elif [ $loop -eq 5 ] ; then + + # send the signal again + ocf_run kill -s USR1 $pid + + pid_stat=`cat /proc/${pid}/stat` + ocf_log notice "${log_sig} is slow to respond" + ocf_log notice "$pid_stat" + + elif [ $loop -eq 8 ] ; then + + pid_stat=`cat /proc/${pid}/stat` + ocf_log warn "${log_sig} is very slow to respond" + ocf_log warn "$pid_stat" + + fi + } + log_procfs + ocf_log err "${log_sig} Failed" + return ${OCF_ERR_GENERIC} + fi + return ${OCF_NOT_RUNNING} +} + +guestAgent_monitor () { + + proc="guestAgent:monitor" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc}" + fi + + # Uncomment if you want the monitor function to force-pass + # return ${OCF_SUCCESS} + + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -ne 0 ] ; then + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc} called while ${OCF_RESKEY_binary} not running." + fi + return ${OCF_NOT_RUNNING} + fi + + guestAgent_status + return $? +} + + +guestAgent_start () { + + local rc + + start_proc="guestAgent:start" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${start_proc}" + fi + + # Uncomment if you want the start function to force-pass without starting + # return ${OCF_SUCCESS} + + # If running then issue a ping test + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + guestAgent_status + rc=$? + if [ $rc -ne ${OCF_SUCCESS} ] ; then + msg="${start_proc} ping test failed rc=${rc}" + ocf_log err "${msg}" + guestAgent_stop + else + # Spec says to return success if process is already running for start + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + ocf_log info "${start_proc} called while ${OCF_RESKEY_binary} is already running" + return ${OCF_SUCCESS} + fi + fi + fi + + # should not be running now or error + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + msg="${start_proc} cannot kill off existing instance of ${OCF_RESKEY_binary}" + ocf_log err "${msg}" + return ${OCF_RUNNING_MASTER} + fi + + rm -f ${statusfile} + + # default PID to null + pid="" + + # Try to Start the daemon + ${mydaemon} + rc=$? + + # verify it was started and set return code appropriately + if [ $rc -eq ${OCF_SUCCESS} ] ; then + # Verify the pid file exists as part of status + for ((loop=0;loop<3;loop++)) { + if [ -f ${OCF_RESKEY_pid} ] ; then + break + else + ocf_log info "${start_proc} waiting ... loop=${loop}" + sleep 1 + fi + } + + pid=`cat ${OCF_RESKEY_pid}` + # ocf_log info "PID:$pid" + kill -0 $pid 2> /dev/null + if [ $? -ne 0 ] ; then + rc=${OCF_FAILED_MASTER} + else + if [ ! -f ${statusfile} ] ; then + ocf_log info "guestAgent: Startup Health Test Failed - missing info" + rc = ${OCF_ERR_GENERIC} + fi + fi + else + ocf_log info "${start_proc} failed ${mydaemon} daemon rc=${rc}" + rc = ${OCF_ERR_GENERIC} + fi + + # Record success or failure and return status + if [ ${rc} -eq $OCF_SUCCESS ] ; then + msg="${start_proc}ed pid=${pid}" + ocf_log info "${msg}" + else + msg="${start_proc} failed rc=${rc}" + ocf_log err "${msg}" + rc=${OCF_NOT_RUNNING} + fi + return $rc +} + +guestAgent_confirm_stop () { + + proc="guestAgent:confirm_stop" + ocf_log info "${proc}" + + pid=`pidof ${OCF_RESKEY_binary}` + kill -0 ${pid} 2> /dev/null + if [ $? -eq 0 ] ; then + ocf_log info "${proc} 'kill -9 ${pid}'" + kill -9 ${pid} + ocf_log info "${proc}ed (by emergency kill -9 ${pid})" + sleep 1 + fi + rm -f ${OCF_RESKEY_pid} +} + +guestAgent_stop () { + + proc="guestAgent:stop" + + # See if the process is running by pidfile + + pid=`pidof ${OCF_RESKEY_binary}` + ocf_log info "${proc} PID:${pid}" + kill -0 ${pid} 2> /dev/null + if [ $? -ne 0 ] ; then + ocf_log info "${proc} called while already stopped (no process)" + guestAgent_confirm_stop + return ${OCF_SUCCESS} + fi + + MAX=3 + for ((loop=0;loop<$MAX;loop++)) { + + # verify stop with pidfile + if [ -f ${OCF_RESKEY_pid} ] ; then + + pid=`cat ${OCF_RESKEY_pid}` + + # if pid file is gone we are done + if [ ${pid} = "" ] ; then + ocf_log info "${proc}ped (by -int)" + break + + # if pidfile is empty then kill by -int + else + + kill -0 ${pid} 2> /dev/null + if [ $? -ne 0 ] ; then + ocf_log info "${proc}ped (by pid)" + break + else + ocf_log info "${proc}ping (by -int - loop:${loop})" + kill -int ${pid} + sleep 1 + fi + fi + fi + } + guestAgent_confirm_stop + return ${OCF_SUCCESS} +} + +guestAgent_reload () { + + local rc + + proc="guestAgent:reload" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc}" + fi + + guestAgent_stop + rc=$? + if [ $rc -eq ${OCF_SUCCESS} ] ; then + #sleep 1 + guestAgent_start + rc=$? + if [ $rc -eq ${OCF_SUCCESS} ] ; then + msg="${proc}ed" + ocf_log info "${mgs}" + fi + fi + + if [ ${rc} -ne ${OCF_SUCCESS} ] ; then + msg="${OCF_RESKEY_binary}: failed to restart rc=${rc}" + ocf_log info "${mgs}" + fi + + return ${rc} +} + +case ${__OCF_ACTION} in + meta-data) meta_data + exit ${OCF_SUCCESS} + ;; + usage|help) usage + exit ${OCF_SUCCESS} + ;; +esac + +ocf_log info "guestAgent:${__OCF_ACTION} action" + +# Anything except meta-data and help must pass validation +guestAgent_validate || exit $? + +case ${__OCF_ACTION} in + start) guestAgent_start + ;; + stop) guestAgent_stop + ;; + status) guestAgent_status + ;; + reload) guestAgent_reload + ;; + monitor) guestAgent_monitor + ;; + validate-all) guestAgent_validate + ;; + *) usage + exit ${OCF_ERR_UNIMPLEMENTED} + ;; +esac diff --git a/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.service b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.service new file mode 100644 index 00000000..e88a965b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgent.service @@ -0,0 +1,16 @@ +[Unit] +Description=Titanium Cloud Guest Agent +After=network.target syslog.service + +[Service] +Type=forking +ExecStart=/etc/rc.d/init.d/guestAgent start +ExecStop=/etc/rc.d/init.d/guestAgent stop +ExecReload=/etc/rc.d/init.d/guestAgent reload +PIDFile=/var/run/guestAgent.pid + +Restart=no + +[Install] +WantedBy=multi-user.target + diff --git a/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgentTest.sh b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgentTest.sh new file mode 100644 index 00000000..55818508 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestAgentTest.sh @@ -0,0 +1,160 @@ +#! /bin/bash + +# +# Copyright (c) 2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +echo "Running guestAgent guest services command testhead" + +if [ -z $1 ] ; then + echo "Error: must supply a host name as first arguement" + echo "Syntax: $0 compute-1" + exit 1 +fi + +echo "Args: $1 $2 $3" + +banner="-----------------------------------------------------------" + +hostname=$1 +hostuuid=`system host-show $hostname | grep uuid | cut -f 15 -d ' '` +#hostuuid=`system host-show $hostname | grep uuid` + +echo "hostname: $hostname" +echo "hostuuid: $hostuuid" +echo "Emulating VIM guest services commands against $hostname" + +count=1 + +echo $banner +echo "$count Create Host Services" +echo $banner +curl -i -X POST -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/hosts/$hostuuid + +count=$((count + 1)) + +echo $banner +echo "$count Query Host Services" +echo $banner +curl -i -X GET -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/hosts/$hostuuid + +count=$((count + 1)) + +echo $banner +echo "$count Enable Host Services" +echo $banner +curl -i -X PUT -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/hosts/$hostuuid/enable -d '{"hostname": "compute-1", "uuid" : "010e7741-1173-4a3b-88fa-c4e5905500ca"}' + +count=$((count + 1)) + +echo $banner +echo "$count Create Guest Service: Instance 1" +echo $banner +curl -i -X POST -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104400 -d '{"hostname": "compute-1", "uuid" : "8d80875b-fa73-4ccb-bce3-1cd4df104400", "channel" : "cgts-instance000001", "services" : ["heartbeat"]}' + +count=$((count + 1)) + +echo $banner +echo "$count Create Guest Service: Instance 2" +echo $banner +curl -i -X POST -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104401 -d '{"hostname": "compute-1", "uuid" : "8d80875b-fa73-4ccb-bce3-1cd4df104401", "channel" : "cgts-instance000002", "services" : ["heartbeat"]}' + +count=$((count + 1)) + +echo $banner +echo "$count Query Guest Services: Instance 2:" +echo $banner +curl -i -X GET -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104401 + +count=$((count + 1)) + +echo $banner +echo "$count Query Guest Services: Instance 1:" +echo $banner +curl -i -X GET -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104400 + +count=$((count + 1)) + +echo $banner +echo "$count Enable Guest Service: Instance 2" +echo $banner +curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104401 -d '{"hostname": "compute-1", "uuid" : "8d80875b-fa73-4ccb-bce3-1cd4df104401", "channel" : "cgts-instance000002", "services" : [{"service":"heartbeat" , "state":"enabled"}]}' + +count=$((count + 1)) + +echo $banner +echo "$count Query Guest Services: Instance 2:" +echo $banner +curl -i -X GET -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104401 + +count=$((count + 1)) + +echo $banner +echo "$count Disable Guest Service: Instance 2" +echo $banner +curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104401 -d '{"hostname": "compute-1", "uuid" : "8d80875b-fa73-4ccb-bce3-1cd4df104401", "channel" : "cgts-instance000002", "services" : [{"service":"heartbeat" , "state":"disabled"}]}' + +count=$((count + 1)) + +echo $banner +echo "$count Query Guest Services: Instance 1:" +echo $banner +curl -i -X GET -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104401 + +count=$((count + 1)) + +exit 0 + +echo $banner +echo "$count Delete Guest Service: Instance 2" +echo $banner +curl -i -X DELETE -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104401 + +count=$((count + 1)) + +echo $banner +echo "$count Query Host Services" +echo $banner +curl -i -X GET -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/hosts/$hostuuid + +count=$((count + 1)) + +echo $banner +echo "$count Disable Host Services" +echo $banner +curl -i -X PUT -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/hosts/$hostuuid/disable -d '{"hostname": "compute-1", "uuid" : "010e7741-1173-4a3b-88fa-c4e5905500ca"}' + +count=$((count + 1)) + +echo $banner +echo "$count Delete Host Services" +echo $banner +curl -i -X DELETE -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/hosts/$hostuuid + +count=$((count + 1)) + +echo $banner +echo "$count Enable Guest Service: Instance 1" +echo $banner +curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104400 -d '{"hostname": "compute-1", "uuid" : "8d80875b-fa73-4ccb-bce3-1cd4df104400", "channel" : "cgts-instance000001", "services" : [{"service":"heartbeat" , "state":"enabled"}]}' + +count=$((count + 1)) + +echo $banner +echo "$count Disable Guest Service: Instance 1" +echo $banner +curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104400 -d '{"hostname": "compute-1", "uuid" : "8d80875b-fa73-4ccb-bce3-1cd4df104400", "channel" : "cgts-instance000001", "services" : [{"service":"heartbeat" , "state":"disabled"}]}' + +count=$((count + 1)) + +echo $banner +echo "$count Enable Guest Service: Instance 1 - Change Channel" +echo $banner +curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2410/v1/instances/8d80875b-fa73-4ccb-bce3-1cd4df104400 -d '{"hostname": "compute-1", "uuid" : "8d80875b-fa73-4ccb-bce3-1cd4df104400", "channel" : "cgts-instance000003", "services" : [{"service":"heartbeat" , "state":"enabled"}]}' + +echo $banner +echo $banner + +exit 0 diff --git a/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer new file mode 100644 index 00000000..5e8d0877 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer @@ -0,0 +1,113 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 95 95 +# +### BEGIN INIT INFO +# Provides: guestServer +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Maintenance Client Daemon +### END INIT INFO + +. /etc/init.d/functions + +DAEMON_NAME="guestServer" +DAEMON="/usr/local/bin/${DAEMON_NAME}" +PIDFILE="/var/run/${DAEMON_NAME}.pid" +PLATFORM_CONF="/etc/platform/platform.conf" + +IFACE="" + +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +if [ ! -e "${DAEMON}" ] ; then + logger "${DAEMON} is missing" + exit ${NOT_INSTALLED} +fi + +if [ -f ${PLATFORM_CONF} ] ; then + IFACE=`cat ${PLATFORM_CONF} | grep management_interface | cut -f2 -d'='` + if [ "${IFACE}" != "" ] ; then + if ip link show $IFACE | grep -sq 'state DOWN'; then + ip link set dev $IFACE up + fi + fi +fi + +case "$1" in + start) + logger "Starting ${DAEMON_NAME}" + echo -n "Starting ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo -n "is already running " + RETVAL=0 + else + start-stop-daemon --start -b -x ${DAEMON} -- -l + RETVAL=$? + fi + if [ ${RETVAL} -eq 0 ] ; then + pid=`pidof ${DAEMON_NAME}` + echo "OK" + logger "${DAEMON} (${pid})" + else + echo "FAIL" + RETVAL=${GENERIC_ERROR} + fi + ;; + + stop) + logger "Stopping ${DAEMON_NAME}" + echo -n "Stopping ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + killproc ${DAEMON_NAME} + fi + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo "FAIL" + RETVAL=${NOT_RUNNING} + else + echo "OK" + fi + rm -f ${PIDFILE} + ;; + + restart) + $0 stop + $0 start + ;; + + status) + pid=`pidof ${DAEMON_NAME}` + RETVAL=$? + if [ ${RETVAL} -eq 0 ] ; then + echo "${DAEMON_NAME} is running" + else + echo "${DAEMON_NAME} is NOT running" + RETVAL=${NOT_RUNNING} + fi + ;; + + condrestart) + $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.logrotate b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.logrotate new file mode 100644 index 00000000..9b54a875 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.logrotate @@ -0,0 +1,17 @@ +#daily +nodateext + +/var/log/guestServer.log +{ + nodateext + size 10M + rotate 5 + start 1 + missingok + notifempty + compress + sharedscripts + postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true + endscript +} diff --git a/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.pmon b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.pmon new file mode 100644 index 00000000..a19ce303 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.pmon @@ -0,0 +1,25 @@ +[process] +process = guestServer +service = guestServer +pidfile = /var/run/guestServer.pid +script = /etc/init.d/guestServer +style = lsb ; ocf or lsb +severity = major ; minor, major, critical +restarts = 3 ; restart retries before error assertion +interval = 3 ; number of seconds to wait between restarts +debounce = 10 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 1 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring +subfunction = compute ; Optional label. + ; Manage this process in the context of a combo host subfunction + ; Choices: compute or storage. + ; when specified pmond will wait for + ; /var/run/.compute_config_complete or + ; /var/run/.storage_config_complete + ; ... before managing this process with the specified subfunction + ; Excluding this label will cause this process to be managed by default on startup diff --git a/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.service b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.service new file mode 100644 index 00000000..6e2dd3a2 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/guest/scripts/guestServer.service @@ -0,0 +1,23 @@ +[Unit] +Description=Titanium Cloud Maintenance Guest Heartbeat Monitor Server +After=network.target syslog.service config.service +Before=pmon.service + +[Service] +Type=forking +ExecStart=/etc/rc.d/init.d/guestServer start +ExecStop=/etc/rc.d/init.d/guestServer stop +ExecReload=/etc/rc.d/init.d/guestServer reload +PIDFile=/var/run/guestServer.pid + +# Failure handling +TimeoutStartSec=10s +TimeoutStopSec=10s + +# process recovery is handled by pmond +Restart=no +RestartSec=5 + +[Install] +WantedBy=multi-user.target + diff --git a/mtce-common/cgts-mtce-common-1.0/heartbeat/Makefile b/mtce-common/cgts-mtce-common-1.0/heartbeat/Makefile new file mode 100755 index 00000000..d69bef0d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/heartbeat/Makefile @@ -0,0 +1,51 @@ +# +# Copyright (c) 2013-2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = hbsAlarm.cpp hbsClient.cpp hbsAgent.cpp hbsPmon.cpp hbsStubs.cpp +OBJS = $(SRCS:.cpp=.o) +LDLIBS = -lstdc++ -ldaemon -lcommon -lthreadUtil -lpthread -lfmcommon -lalarm -lrt -lamon -lcrypto -luuid +INCLUDES = -I../daemon -I../common -I../alarm -I../maintenance -I../public -I. -I.. +CCFLAGS = -g -O2 -Wall -Wextra -Werror + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +BINS = hbsAgent hbsClient + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +all: static_analysis common daemon agent client + +build: clean static_analysis $(OBJS) + $(CXX) $(CCFLAGS) hbsAlarm.o hbsAgent.o hbsStubs.o -L../daemon -L../common -L../public -L../alarm $(LDLIBS) -o hbsAgent + $(CXX) $(CCFLAGS) hbsClient.o hbsPmon.o -L../daemon -L../common -L../public -L../alarm $(LDLIBS) -o hbsClient + +common: + ( cd ../common ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +daemon: + ( cd ../daemon ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +agent: $(OBJS) + $(CXX) $(CCFLAGS) hbsAgent.o -L../daemon -L../common -L../alarm -L../public $(LDLIBS) -o hbsAgent + +client: $(OBJS) + $(CXX) $(CCFLAGS) hbsClient.o -L../daemon -L../common -L../public $(LDLIBS) -o hbsClient + +clean_bins: + @rm -f $(BINS) + +clean: + @rm -f $(OBJ) $(BINS) *.o *.a + diff --git a/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAgent.cpp b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAgent.cpp new file mode 100755 index 00000000..c1b6e14d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAgent.cpp @@ -0,0 +1,1808 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Nodal Health Check Agent Daemon + */ + +#include +#include +#include +#include +#include +#include +#include +#include /* for ... hostent */ +#include +#include +#include +#include +#include +#include /* for ... close and usleep */ +#include +#include /* for ... RTMGRP_LINK */ + +using namespace std; + +#include "nodeBase.h" +#include "nodeUtil.h" /* for ... get_ip_addresses */ +#include "nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ +#include "nodeClass.h" /* The main link class */ +#include "nodeTimers.h" /* maintenance timer utilities start/stop */ +#include "nlEvent.h" /* for ... open_netlink_socket */ +#include "hbsBase.h" /* Heartbeat Base Header File */ +#include "hbsAlarm.h" /* for ... hbsAlarm_clear_all */ +#include "alarm.h" /* for ... alarm send message to mtcalarmd */ + +/************************************************************** + * Implementation Structure + ************************************************************** + * + * Call sequence: + * + * daemon_init + * daemon_configure + * daemon_signal_init + * hbs_hostname_read + * hbs_message_init + * hbs_int_socket_init + * hbs_ext_socket_init + * forever ( timer_handler ) + * hbs_pulse_req + * hbs_timer_start + * _pulse_receive + * hbs_timer_stop + * + * Note: Interface implementation is in opposite + * order of the following call sequence + */ + +/* Number of back to back interface errors before the interface is re-initialized. */ +#define INTERFACE_ERRORS_FOR_REINIT (8) + +/* Historical String data for mem_logs */ +static string unexpected_pulse_list[MAX_IFACES] = { "" , "" } ; +static string arrival_histogram[MAX_IFACES] = { "" , "" } ; + +/** This heartbeat service inventory is tracked by + * the same nodeLinkClass that maintenance uses. + * + */ +nodeLinkClass hbsInv ; +nodeLinkClass * get_hbsInv_ptr ( void ) +{ + return (&hbsInv); +} + +/** Setup the pointer */ +int module_init ( void ) +{ + return (PASS); +} + +static unsigned int my_nodetype= CGTS_NODE_NULL ; + +void daemon_sigchld_hdlr ( void ) +{ + ; /* dlog("Received SIGCHLD ... no action\n"); */ +} + +/** + * Daemon Configuration Structure - The allocated struct + * @see daemon_common.h for daemon_config_type struct format. + */ +static daemon_config_type hbs_config ; +daemon_config_type * daemon_get_cfg_ptr () { return &hbs_config ; } + +/** + * Messaging Socket Control Struct - The allocated struct + * @see hbsBase.h for hbs_socket_type struct format. + */ +static hbs_socket_type hbs_sock ; + +msgSock_type * get_mtclogd_sockPtr ( void ) +{ + return (&hbs_sock.mtclogd); +} + +#define SCHED_MONITOR__MAIN_LOOP ((const char *) "---> scheduling latency : main loop :") +#define SCHED_MONITOR__RECEIVER ((const char *) "---> scheduling latency : rx pulses :") +void monitor_scheduling ( unsigned long long & this_time, unsigned long long & prev_time , int data, const char * label_ptr ) +{ + this_time = gettime_monotonic_nsec () ; + if ( this_time > (prev_time + (NSEC_TO_MSEC*(hbs_config.latency_thld)))) /* 10 millisec */ + { + llog ("%4llu.%-4llu msec - %s at line %d\n", + ((this_time-prev_time) > NSEC_TO_MSEC) ? ((this_time-prev_time)/NSEC_TO_MSEC) : 0, + ((this_time-prev_time) > NSEC_TO_MSEC) ? ((this_time-prev_time)%NSEC_TO_MSEC) : 0, + label_ptr, data); + } +// else if ( ! strcmp (SCHED_MONITOR__RECEIVER, label_ptr ) && ( data > 10 )) +// { +// ilog ("===> receive latency : batch of %d pulses in under scheduling threshold of %d msec\n", data , hbs_config.latency_thld ); +// } + prev_time = this_time ; +} + +void nodeLinkClass::recalibrate_thresholds ( void ) +{ + if ( hbsInv.hosts > hbs_config.hbs_calibrate_threshold ) + { + hbsInv.hbs_pulse_period = (hbsInv.hosts * hbs_config.hbs_calibrate_period_factor ) ; + hbsInv.hbs_minor_threshold = (hbsInv.hosts * hbs_config.hbs_calibrate_minor_factor ) ; + hbsInv.hbs_degrade_threshold = (hbsInv.hosts * hbs_config.hbs_calibrate_degrade_factor) ; + hbsInv.hbs_failure_threshold = (hbsInv.hosts * hbs_config.hbs_calibrate_fail_factor ) ; + } + else + { + hbsInv.hbs_pulse_period = hbs_config.hbs_pulse_period ; + hbsInv.hbs_minor_threshold = hbs_config.hbs_minor_threshold ; + hbsInv.hbs_degrade_threshold = hbs_config.hbs_degrade_threshold ; + hbsInv.hbs_failure_threshold = hbs_config.hbs_failure_threshold ; + } + + hbsInv.hbs_pulse_period_save = hbsInv.hbs_pulse_period ; + + ilog ("Heartbeat Thresholds ; hosts:%d pulse:%d msecs - minor:%d degrade:%d failure:%d\n", + hbsInv.hosts, + hbsInv.hbs_pulse_period, + hbsInv.hbs_minor_threshold, + hbsInv.hbs_degrade_threshold, + hbsInv.hbs_failure_threshold); +} + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + daemon_dump_info (); + daemon_files_fini (); + + /* Close the heatbeat sockets */ + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + if ( hbs_sock.tx_sock[i] ) + delete (hbs_sock.tx_sock[i]); + if ( hbs_sock.rx_sock[i] ) + delete (hbs_sock.rx_sock[i]); + } + + /* Close the event socket */ + if ( hbs_sock.hbs_event_tx_sock ) + delete (hbs_sock.hbs_event_tx_sock); + + /* Close the command socket */ + if ( hbs_sock.mtc_to_hbs_sock ) + delete (hbs_sock.mtc_to_hbs_sock); + + exit (0); +} + +/* Number of pulse response socket receive */ +/* retries that should occur in a heartbeat */ +/* period before we declare a node missing */ +/* Note: Value that needs to be engineered */ +/* once we get time on real hardware */ +#define MAX_PULSE_RETRIES (3) + +#define HBS_SOCKET_MSEC (5) +#define HBS_SOCKET_NSEC (HBS_SOCKET_MSEC*1000) +#define HBS_MIN_PERIOD (50) +#define HBS_MAX_PERIOD (999) +#define HBS_VIRT_PERIOD (500) +#define HBS_BACKOFF_FACTOR (4) /* period*this during backoff */ + +/** Control Config Mask */ +#define CONFIG_AGENT_MASK (CONFIG_AGENT_HBS_PERIOD |\ + CONFIG_AGENT_HBS_DEGRADE |\ + CONFIG_AGENT_HBS_FAILURE |\ + CONFIG_AGENT_MULTICAST |\ + CONFIG_SCHED_PRIORITY |\ + CONFIG_MTC_TO_HBS_CMD_PORT |\ + CONFIG_HBS_TO_MTC_EVENT_PORT |\ + CONFIG_AGENT_HBS_MGMNT_PORT |\ + CONFIG_AGENT_HBS_INFRA_PORT |\ + CONFIG_CLIENT_HBS_MGMNT_PORT |\ + CONFIG_CLIENT_MTCALARM_PORT |\ + CONFIG_CLIENT_HBS_INFRA_PORT ) + +/* Startup config read */ +static int hbs_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("agent", "heartbeat_period")) + { + config_ptr->hbs_pulse_period = atoi(value); + hbsInv.hbs_pulse_period = atoi(value); + hbsInv.hbs_state_change = true ; + hbsInv.hbs_disabled = false ; + config_ptr->mask |= CONFIG_AGENT_HBS_PERIOD ; + + /* Adjust the heartbeat period in a virtual environment */ + if (( hbsInv.hbs_pulse_period >= HBS_MIN_PERIOD ) || + ( hbsInv.hbs_pulse_period <= HBS_MAX_PERIOD )) + { + struct stat p ; + p.st_size = 0 ; + stat ( HOST_IS_VIRTUAL, &p ) ; + if ( p.st_size ) + { + if (( hbsInv.hbs_pulse_period != 0 ) && + ( hbsInv.hbs_pulse_period < HBS_VIRT_PERIOD )) + { + config_ptr->hbs_pulse_period = HBS_VIRT_PERIOD ; + hbsInv.hbs_pulse_period = HBS_VIRT_PERIOD ; + hbsInv.hbs_pulse_period_save = HBS_VIRT_PERIOD ; + } + } + } + } + + hbsInv.hbs_pulse_period_save = hbsInv.hbs_pulse_period ; + + if (MATCH("agent", "hbs_minor_threshold")) + { + config_ptr->hbs_minor_threshold = atoi(value); + hbsInv.hbs_minor_threshold = atoi(value); + } + if (MATCH("agent", "heartbeat_degrade_threshold")) + { + config_ptr->hbs_degrade_threshold = atoi(value); + hbsInv.hbs_degrade_threshold = atoi(value); + config_ptr->mask |= CONFIG_AGENT_HBS_DEGRADE ; + } + if (MATCH("agent", "heartbeat_failure_threshold")) + { + config_ptr->hbs_failure_threshold = atoi(value); + hbsInv.hbs_failure_threshold = atoi(value); + config_ptr->mask |= CONFIG_AGENT_HBS_FAILURE ; + } + if (MATCH("agent", "hbs_calibrate_threshold")) + { + config_ptr->hbs_calibrate_threshold = atoi(value); + } + if (MATCH("agent", "hbs_calibrate_period_factor")) + { + config_ptr->hbs_calibrate_period_factor = atoi(value); + } + if (MATCH("agent", "hbs_calibrate_minor_factor")) + { + config_ptr->hbs_calibrate_minor_factor = atoi(value); + } + if (MATCH("agent", "hbs_calibrate_degrade_factor")) + { + config_ptr->hbs_calibrate_degrade_factor = atoi(value); + } + if (MATCH("agent", "hbs_calibrate_fail_factor")) + { + config_ptr->hbs_calibrate_fail_factor = atoi(value); + } + if (MATCH("agent", "multicast")) + { + config_ptr->multicast = strdup(value); + config_ptr->mask |= CONFIG_AGENT_MULTICAST ; + } + else if (MATCH("agent", "mtc_to_hbs_cmd_port")) + { + config_ptr->mtc_to_hbs_cmd_port = atoi(value); + config_ptr->mask |= CONFIG_MTC_TO_HBS_CMD_PORT ; + } + else if (MATCH("agent", "hbs_to_mtc_event_port")) + { + config_ptr->hbs_to_mtc_event_port = atoi(value); + config_ptr->mask |= CONFIG_HBS_TO_MTC_EVENT_PORT ; + } + else if (MATCH("agent", "scheduling_priority")) + { + int max = sched_get_priority_max(SCHED_RR); + int min = sched_get_priority_min(SCHED_RR); + + config_ptr->scheduling_priority = atoi(value); + config_ptr->mask |= CONFIG_SCHED_PRIORITY ; + + if (( config_ptr->scheduling_priority < min) || + ( config_ptr->scheduling_priority > max)) + { + wlog ("Invalid scheduling priority (%d), overriding to min of %d\n", + config_ptr->scheduling_priority, min ); + wlog ("Specified value of %d is out of acceptable range (%d-%d)\n", + config_ptr->scheduling_priority, min, max ); + config_ptr->scheduling_priority = min ; + } + } + else if (MATCH("agent", "hbs_agent_mgmnt_port")) + { + config_ptr->hbs_agent_mgmnt_port = atoi(value); + config_ptr->mask |= CONFIG_AGENT_HBS_MGMNT_PORT ; + } + else if (MATCH("client", "hbs_client_mgmnt_port")) + { + config_ptr->hbs_client_mgmnt_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_HBS_MGMNT_PORT ; + } + else if (MATCH("agent", "hbs_agent_infra_port")) + { + config_ptr->hbs_agent_infra_port = atoi(value); + config_ptr->mask |= CONFIG_AGENT_HBS_INFRA_PORT ; + } + else if (MATCH("client", "hbs_client_infra_port")) + { + config_ptr->hbs_client_infra_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_HBS_INFRA_PORT ; + } + else if ( MATCH("client", "mtcalarm_req_port") ) + { + config_ptr->mtcalarm_req_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_MTCALARM_PORT ; + } + else + { + return (PASS); + } + return (FAIL); +} + + +/* Read the mtc.ini settings into the daemon configuration */ +int daemon_configure ( void ) +{ + bool waiting_msg = false ; + + /* Read the ini */ + hbs_config.mask = 0 ; + if (ini_parse(MTCE_CONF_FILE, hbs_config_handler, &hbs_config) < 0) + { + elog("Can't load '%s'\n", MTCE_CONF_FILE ); + return (FAIL_LOAD_INI); + } + + if (ini_parse(MTCE_INI_FILE, hbs_config_handler, &hbs_config) < 0) + { + elog("Can't load '%s'\n", MTCE_CONF_FILE ); + return (FAIL_LOAD_INI); + } + + get_debug_options ( MTCE_CONF_FILE, &hbs_config ); + + /* Verify loaded config against an expected mask + * as an ini file fault detection method */ + if ( hbs_config.mask != CONFIG_AGENT_MASK ) + { + elog ("Error: Agent configuration failed (%x)\n", + ((-1 ^ hbs_config.mask) & CONFIG_AGENT_MASK)); + return (FAIL_INI_CONFIG); + } + + if ( hbsInv.hbs_minor_threshold > hbsInv.hbs_degrade_threshold ) + { + hbsInv.hbs_minor_threshold = hbsInv.hbs_degrade_threshold ; + } + + // hbsInv.recalibrate_thresholds (); + + /* Log the startup settings */ + ilog("Realtime Pri: RR/%i \n", hbs_config.scheduling_priority ); + ilog("Pulse Period: %i msec\n", hbsInv.hbs_pulse_period ); + ilog("Minor Thld: %i misses\n", hbsInv.hbs_minor_threshold ); + ilog("Degrade Thld: %i misses\n", hbsInv.hbs_degrade_threshold ); + ilog("Failure Thld: %i misses\n", hbsInv.hbs_failure_threshold ); + ilog("Multicast: %s\n", hbs_config.multicast ); + + hbs_config.mgmnt_iface = daemon_get_iface_master ( hbs_config.mgmnt_iface ); + ilog("Mgmnt iface : %s\n", hbs_config.mgmnt_iface ); + ilog("Mgmnt RxPort: %d\n", hbs_config.hbs_agent_mgmnt_port ); + ilog("Mgmnt TxPort: %d\n", hbs_config.hbs_client_mgmnt_port ); + + /* Fetch the infrastructure interface name. + * calls daemon_get_iface_master inside so the + * aggrigated name is returned if it exists */ + get_infra_iface (&hbs_config.infra_iface ); + if ( strlen(hbs_config.infra_iface) ) + { + hbsInv.infra_network_provisioned = true ; + ilog ("Infra iface : %s\n", hbs_config.infra_iface ); + } + + ilog("Infra RxPort: %d\n", hbs_config.hbs_agent_infra_port ); + ilog("Infra TxPort: %d\n", hbs_config.hbs_client_infra_port ); + + ilog("Command Port: %d (rx)\n", hbs_config.mtc_to_hbs_cmd_port ); + ilog("Event Port : %d (tx)\n", hbs_config.hbs_to_mtc_event_port ); + ilog("Alarm Port : %d (tx)\n", hbs_config.mtcalarm_req_port ); + + hbsInv.hbs_state_change = true ; + + /* pull in the degrade only config option */ + hbsInv.infra_degrade_only = hbs_config.infra_degrade_only ; + + if ( hbsInv.hbs_degrade_threshold >= hbsInv.hbs_failure_threshold ) + { + wlog ("Degrade threshold should be larger than Failure threshold\n"); + wlog ("Heartbeat 'degrade' state disabled ; see %s\n", MTCE_CONF_FILE); + } + for ( ;; ) + { + get_ip_addresses ( hbsInv.my_hostname, hbsInv.my_local_ip , hbsInv.my_float_ip ); + if ( hbsInv.my_float_ip.empty() || hbsInv.my_float_ip.empty() ) + { + if ( waiting_msg == false ) + { + ilog ("Waiting on ip address config ...\n"); + waiting_msg = true ; + } + mtcWait_secs (3); + } + else + { + break ; + } + } + + /* Set Controller Activity State */ + hbs_config.active = daemon_get_run_option ("active") ; + ilog ("Controller : %s\n", + hbs_config.active ? "Active" : "In-Active" ); + + /* pust the activity state into inventory (nodeLinkClass) */ + if ( hbs_config.active == true ) + hbsInv.set_activity_state ( true ); + else + hbsInv.set_activity_state ( false ); + + return (PASS); +} + +static struct mtc_timer hbsTimer ; + +void hbsTimer_handler ( int sig, siginfo_t *si, void *uc) +{ + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + + /* Avoid compiler errors/warnings for parms we must + * have but currently do nothing with */ + UNUSED(sig); + UNUSED(uc); + + if ( !(*tid_ptr) ) + { + // tlog ("Called with a NULL Timer ID\n"); + return ; + } + /* is base mtc timer */ + else if (( *tid_ptr == hbsTimer.tid ) ) + { + mtcTimer_stop_int_safe ( hbsTimer ); + hbsTimer.ring = true ; + } + else + { + // wlog ("Unexpected timer - %p", *tid_ptr ); + mtcTimer_stop_tid_int_safe ( tid_ptr ); + } +} + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Initialize the multicast pulse request message */ +/* One time thing ; tx same message all the time. */ +int hbs_message_init ( void ) +{ + /* Build the transmit pulse response message for each interface */ + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + memset ( &hbs_sock.tx_mesg[i], 0, sizeof(hbs_message_type)); + memcpy ( &hbs_sock.tx_mesg[i].m[0], &req_msg_header[0], HBS_HEADER_SIZE ); + } + return (PASS); +} + +/* initialize pulse messaging for the specified interface */ +int _setup_pulse_messaging ( iface_enum i, int rmem_max ) +{ + int rc = PASS ; + char * iface = NULL ; + + /* Load up the interface name */ + if ( i == MGMNT_IFACE ) + { + iface = hbs_config.mgmnt_iface ; + } + else if (( i == INFRA_IFACE ) && ( hbs_config.infra_iface != NULL )) + { + iface = hbs_config.infra_iface ; + } + else + { + wlog ("No Infrastructure Interface\n"); + return (RETRY); + } + + /* Start by closing existing sockets just in case this is a (re)initialization */ + if ( hbs_sock.rx_sock[i] ) + { + delete (hbs_sock.rx_sock[i]); + hbs_sock.rx_sock[i] = 0 ; + } + + if ( hbs_sock.tx_sock[i] ) + { + delete (hbs_sock.tx_sock[i]); + hbs_sock.tx_sock[i] = 0 ; + } + + /* Create transmit socket */ + hbs_sock.tx_sock[i] = new msgClassTx(hbs_config.multicast,hbs_sock.tx_port[i],IPPROTO_UDP,iface); + if ( hbs_sock.tx_sock[i] ) + { + if ( hbs_sock.tx_sock[i]->return_status != PASS ) + { + elog("Cannot open multicast transmit socket - rc:%d (%d:%m)\n", hbs_sock.tx_sock[i]->return_status, errno ); + delete (hbs_sock.tx_sock[i]); + hbs_sock.tx_sock[i] = 0 ; + return (FAIL_SOCKET_CREATE); + } + } + else + { + elog("Cannot open multicast transmit socket - null object (%d:%m)\n", errno ); + return (FAIL_SOCKET_CREATE); + } + dlog("Opened multicast transmit socket\n" ); + + /* In order to avoid multicast packets being routed wrong, force sending from that socket */ + hbs_sock.tx_sock[i]->interfaceBind(); + + /* set this tx socket interface with priority class messaging */ + hbs_sock.tx_sock[i]->setPriortyMessaging( iface ); + + /***********************************************************/ + /* Setup the Pulse response receive socket */ + /***********************************************************/ + hbs_sock.rx_sock[i] = new msgClassRx(hbs_config.multicast,hbs_sock.rx_port[i],IPPROTO_UDP,iface,true); + if (( hbs_sock.rx_sock[i] == NULL ) || (hbs_sock.rx_sock[i]->return_status != PASS )) + { + elog("Failed opening pulse receive socket (%d:%s)\n", + errno, strerror (errno)); + rc = FAIL_SOCKET_CREATE ; + } + else + { + /* set rx socket buffer size ro rmem_max */ + if (rmem_max != 0 ) + hbs_sock.rx_sock[i]->setSocketMemory ( iface, "rx pulse socket memory", rmem_max ); + else + wlog ("failed to query rmem_max ; using rmem_default\n"); + } + + /* handle failure path */ + if ( rc != PASS ) + { + if ( hbs_sock.rx_sock[i] ) + { + delete (hbs_sock.rx_sock[i]); + hbs_sock.rx_sock[i] = 0 ; + } + if ( hbs_sock.tx_sock[i] ) + { + delete (hbs_sock.tx_sock[i]); + hbs_sock.tx_sock[i] = 0 ; + } + return (rc); + } + + return (rc); +} + +/* Setup the Unix Domain Transmit Pulse Socket */ +int alarm_port_init ( void ) +{ + hbs_sock.alarm_port = daemon_get_cfg_ptr()->mtcalarm_req_port; + hbs_sock.alarm_sock = new msgClassTx(LOOPBACK_IP, hbs_sock.alarm_port, IPPROTO_UDP); + if ( hbs_sock.alarm_sock ) + { + if ( hbs_sock.alarm_sock->return_status == PASS ) + { + hbs_sock.alarm_sock->sock_ok(true); + alarm_register_user ( hbs_sock.alarm_sock ); + } + else + { + elog ("alarm_port_init failed socket setup (rc:%d)\n", + hbs_sock.alarm_sock->return_status ); + } + } + return ( hbs_sock.alarm_sock->return_status ) ; +} + +/* Init the internal/local sockets ; the ones that will no change. + * This way we don't miss add and start commands from maintenance. */ + +int hbs_int_socket_init ( void ) +{ + int rc = PASS ; + + ilog ("internal sockets init ...\n"); + + /******************************************************************/ + /* UDP Tx Message Socket for Heartbeat Events Towards Maintenance */ + /******************************************************************/ + + int port = hbs_config.hbs_to_mtc_event_port ; + hbs_sock.hbs_event_tx_sock = new msgClassTx(LOOPBACK_IP, port, IPPROTO_UDP); + if (hbs_sock.hbs_event_tx_sock->return_status != PASS) + { + elog ("Failed to setup hbs event transmit port %d\n", port ); + return (hbs_sock.hbs_event_tx_sock->return_status) ; + } + + /***************************************************************/ + /* Non-Blocking UDP Rx Message Socket for Maintenance Commands */ + /***************************************************************/ + + port = hbs_config.mtc_to_hbs_cmd_port ; + hbs_sock.mtc_to_hbs_sock = new msgClassRx(LOOPBACK_IP, port, IPPROTO_UDP); + if (hbs_sock.mtc_to_hbs_sock->return_status != PASS) + { + elog ("Failed to setup mtce command receive port %d\n", port ); + return (hbs_sock.mtc_to_hbs_sock->return_status) ; + } + + if ( ( rc = alarm_port_init ()) != PASS ) + { + elog ("Alarm port setup or registration failed (rc:%d)\n", rc ); + } + return (rc); +} + +/* Construct the messaging sockets * + * 1. multicast transmit socket * + * 2. unicast receive socket */ +int hbs_ext_socket_init ( void ) +{ + int rc = PASS ; + + ilog ("external sockets init ...\n"); + + /* set rx socket buffer size ro rmem_max */ + int rmem_max = daemon_get_rmem_max () ; + + /* Read the port config strings into the socket struct + * + * These ports are swapped compared to the hbsClient + * + * From the agent perspective + * rx_port is the hbs_agent_..._port + * tx_port is the hbs_client_..._port + * + */ + hbs_sock.rx_port[MGMNT_IFACE] = hbs_config.hbs_agent_mgmnt_port ; + hbs_sock.tx_port[MGMNT_IFACE] = hbs_config.hbs_client_mgmnt_port; + hbs_sock.rx_port[INFRA_IFACE] = hbs_config.hbs_agent_infra_port ; + hbs_sock.tx_port[INFRA_IFACE] = hbs_config.hbs_client_infra_port; + + /* Setup the pulse messaging interfaces */ + SETUP_PULSE_MESSAGING ( hbsInv.infra_network_provisioned, rmem_max ) ; + + return (rc) ; +} + + +/* Send a multicast heartbeat pulse request message on a */ +/* specific port to all listening nodes on the network. */ +int hbs_pulse_request ( iface_enum iface, + unsigned int seq_num, + string hostname_clue, + unsigned int lookup_clue) +{ + int rc = PASS ; + #define MAX_LEN 1000 + char str[MAX_LEN] ; + + /* Add the sequence number */ + hbs_sock.tx_mesg[iface].s = seq_num ; + memset ( &hbs_sock.tx_mesg[iface].m[HBS_HEADER_SIZE], 0, MAX_CHARS_HOSTNAME ); + if (( lookup_clue ) && + ( hostname_clue.length() <= MAX_CHARS_HOSTNAME )) + { + hbs_sock.tx_mesg[iface].c = lookup_clue ; + memcpy ( &hbs_sock.tx_mesg[iface].m[HBS_HEADER_SIZE], + hostname_clue.data(), + hostname_clue.length()); + } + /* Message length is the size of the sequence number, the clue and the buffer */ + + int msg_len = (HBS_MAX_MSG+(sizeof(unsigned int)*2)) ; + if ( hbs_sock.tx_sock[iface] ) + { +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__NO_PULSE_REQUEST, "any" , get_iface_name_str(iface) ) ) + { + goto hbs_pulse_request_out ; + } + else if ( daemon_want_fit ( FIT_CODE__NO_PULSE_REQUEST, "any" , "any" ) ) + { + goto hbs_pulse_request_out ; + } +#endif + if ( (rc = hbs_sock.tx_sock[iface]->write((char*)&hbs_sock.tx_mesg[iface], msg_len)) < 0 ) + { + elog("Failed to send Pulse request: %d:%s to %s.%d (rc:%i ; %d:%s)\n", + hbs_sock.tx_mesg[iface].s, + &hbs_sock.tx_mesg[iface].m[0], + hbs_sock.tx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(), + rc, errno, strerror(errno) ); + return (FAIL_SOCKET_SENDTO); + } + } + else + { + wlog("Unable to send pulse request - null tx object - auto re-init pending\n"); + return (FAIL_SOCKET_SENDTO); + } + +#ifdef WANT_FIT_TESTING +hbs_pulse_request_out: +#endif + + mlog1("%s Pulse Req: (%5d): %17s:%5d: %d:%d:%x:%s\n", + get_iface_name_str(iface), rc, + hbs_sock.tx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(), + hbs_sock.tx_mesg[iface].s, + hbs_sock.tx_mesg[iface].c, + hbs_sock.tx_mesg[iface].f, + hbs_sock.tx_mesg[iface].m); + + + snprintf ( &str[0], MAX_LEN, "%s Pulse Req: %17s:%5d: %u:%u:%s\n", + get_iface_name_str(iface), + hbs_sock.tx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(), + hbs_sock.tx_mesg[iface].s, + hbs_sock.tx_mesg[iface].c, + hbs_sock.tx_mesg[iface].m); + mem_log (&str[0]); + + return (PASS); +} + +string get_hostname_from_pulse ( char * msg_ptr ) +{ + char temp [MAX_HOST_NAME_SIZE]; + string hostname ; + + char * str_ptr = strstr ( msg_ptr, ":" ); + memset ( temp, 0 , MAX_HOST_NAME_SIZE ); + + sscanf ( ++str_ptr, "%31s", &temp[0] ); + hostname = temp ; + return (hostname); +} + +int _pulse_receive ( iface_enum iface , unsigned int seq_num ) +{ + int n = 0 ; + + int detected_pulses = 0 ; + + /* get a starting point */ + unsigned long long after_rx_time ; + unsigned long long before_rx_time = gettime_monotonic_nsec (); + + do + { + /* Clean the receive buffer */ + memset ( hbs_sock.rx_mesg[iface].m, 0, HBS_MAX_MSG ); + hbs_sock.rx_mesg[iface].s = 0 ; + hbs_sock.rx_mesg[iface].c = 0 ; + if ( hbs_sock.rx_sock[iface] == NULL ) + { + elog ("%s cannot receive pulses - null object\n", get_iface_name_str(iface) ); + return (0); + } + if ( (n = hbs_sock.rx_sock[iface]->read((char*)&hbs_sock.rx_mesg[iface], sizeof(hbs_message_type))) != -1 ) + { + mlog1 ("%s Pulse Rsp: (%5d): %17s:%5d: %d:%d:%x:%s\n", + get_iface_name_str(iface), n, + hbs_sock.rx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(), + hbs_sock.rx_mesg[iface].s, + hbs_sock.rx_mesg[iface].c, + hbs_sock.rx_mesg[iface].f, + hbs_sock.rx_mesg[iface].m); + + /* Validate the header */ + if ( strstr ( hbs_sock.rx_mesg[iface].m, rsp_msg_header) ) + { + int rc = RETRY ; + string hostname = get_hostname_from_pulse (&hbs_sock.rx_mesg[iface].m[0]); + +#ifdef WANT_FIT_TESTING + if ( hbs_config.testmode == 1 ) + { + if ( daemon_want_fit ( FIT_CODE__NO_PULSE_RESPONSE, hostname, get_iface_name_str(iface) ) ) + { + continue ; + } + else if ( daemon_want_fit ( FIT_CODE__NO_PULSE_RESPONSE, hostname, "any" ) ) + { + continue ; + } + else if ( daemon_want_fit ( FIT_CODE__NO_PULSE_RESPONSE, "any", "any" ) ) + { + continue ; + } + } +#endif + + mlog ("%s Pulse Rsp from (%s)\n", get_iface_name_str(iface), hostname.c_str()); + if ( !hostname.compare("localhost") ) + { + mlog3 ("%s Pulse Rsp (local): %17s:%5d: %d:%d:%x:%s\n", + get_iface_name_str(iface), + hbs_sock.rx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(), + hbs_sock.rx_mesg[iface].s, + hbs_sock.rx_mesg[iface].c, + hbs_sock.rx_mesg[iface].f, + hbs_sock.rx_mesg[iface].m); + } + else if ( !hostname.compare(hbsInv.my_hostname)) + { + mlog3 ("%s Pulse Rsp: (self ): %17s:%5d: %d:%d:%x:%s\n", + get_iface_name_str(iface), + hbs_sock.rx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(), + hbs_sock.rx_mesg[iface].s, + hbs_sock.rx_mesg[iface].c, + hbs_sock.rx_mesg[iface].f, + hbs_sock.rx_mesg[iface].m); + + hbsInv.manage_pulse_flags ( hostname, hbs_sock.rx_mesg[iface].f ); + } + else + { + if ( hbsInv.monitored_pulse ( hostname , iface ) == true ) + { + #define MAX_LEN 1000 + char str[MAX_LEN] ; + string extra = "Rsp" ; + + if ( seq_num != hbs_sock.rx_mesg[iface].s ) + { + extra = "SEQ" ; + } + else + { + rc = hbsInv.remove_pulse ( hostname, iface, hbs_sock.rx_mesg[iface].c, hbs_sock.rx_mesg[iface].f ) ; + } + snprintf (&str[0], MAX_LEN, "%s Pulse %s: (%5d): %17s:%5d: %u:%u:%x:%s\n", + get_iface_name_str(iface), extra.c_str(), n, + hbs_sock.rx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(), + hbs_sock.rx_mesg[iface].s, + hbs_sock.rx_mesg[iface].c, + hbs_sock.rx_mesg[iface].f, + hbs_sock.rx_mesg[iface].m); + mlog1 ("%s", &str[0]); + mem_log (str); + if ( extra.empty()) + { + detected_pulses++ ; + } + } + else + { + mlog3 ("%s Pulse Dis: (%5d): %17s:%5d: %d:%d:%x:%s\n", + get_iface_name_str(iface), n, + hbs_sock.rx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(), + hbs_sock.rx_mesg[iface].s, + hbs_sock.rx_mesg[iface].c, + hbs_sock.rx_mesg[iface].f, + hbs_sock.rx_mesg[iface].m); + } + + } + + if ( rc == ENXIO ) + { + mlog3 ("Unexpected %s Pulse: <%s>\n", get_iface_name_str(iface), + &hbs_sock.rx_mesg[iface].m[0] ); + unexpected_pulse_list[iface].append ( hostname.c_str()); + unexpected_pulse_list[iface].append ( " " ); + } + /* Empty list rc - do nothing */ + else if ( rc == -ENODEV ) + { + /* This error occurs when the active controller is the only enabled host */ + mlog3 ("Remove Pulse Failed due to empty pulse list\n"); + } + } + else + { + wlog ( "Badly formed message\n" ); + mlog ( "Bad %s Msg: %14s:%5d: %d:%s\n", + get_iface_name_str(iface), + hbs_sock.rx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(), + hbs_sock.rx_mesg[iface].s, + hbs_sock.rx_mesg[iface].m) ; + } + } + } while ( n > 0 ) ; + monitor_scheduling ( after_rx_time, before_rx_time, detected_pulses, SCHED_MONITOR__RECEIVER ); + return (detected_pulses); +} + +int send_event ( string & hostname, unsigned int event_cmd, iface_enum iface ) +{ + int bytes ; + int bytes_to_send ; + int rc = PASS ; + int retries = 0 ; + + mtc_message_type event ; + memset (&event, 0 , sizeof(mtc_message_type)); + if ( event_cmd == MTC_EVENT_HEARTBEAT_LOSS ) + { + daemon_dump_membuf_banner (); + daemon_dump_membuf (); + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_loss_header()); + } + else if ( event_cmd == MTC_EVENT_LOOPBACK ) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_event_header()); + } + else if ( event_cmd == MTC_EVENT_HEARTBEAT_MINOR_SET ) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_event_header()); + } + else if ( event_cmd == MTC_EVENT_HEARTBEAT_MINOR_CLR ) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_event_header()); + } + else if ( event_cmd == MTC_EVENT_HEARTBEAT_DEGRADE_SET ) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_event_header()); + } + else if ( event_cmd == MTC_EVENT_HEARTBEAT_DEGRADE_CLR ) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_event_header()); + } + else if ( event_cmd == MTC_EVENT_HEARTBEAT_READY ) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_ready_header()); + } + else if (( event_cmd == MTC_EVENT_PMOND_CLEAR ) || + ( event_cmd == MTC_EVENT_PMOND_RAISE ) || + ( event_cmd == MTC_EVENT_HOST_STALLED )) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_mtce_event_header()); + } + else + { + elog ("Unsupported heartbeat event (%d)\n", event_cmd ); + return ( FAIL_BAD_CASE ); + } + + /* Put the hostname in the buffer - as well */ + snprintf ( &event.buf[0] , MAX_CHARS_HOSTNAME, "%s", hostname.data()); + + /* TODO: obsolete this method in the future as it limits the host name lenth to 32 */ + snprintf ( &event.hdr[MSG_HEADER_SIZE] , MAX_CHARS_HOSTNAME, "%s", hostname.data()); + + event.cmd = event_cmd ; + event.num = 1 ; + event.parm[0] = iface ; + + print_mtc_message ( LOCALHOST, MTC_CMD_TX, event, LOCALHOST , false ); + + /* remove the buffer as it is not needed for this message */ + bytes_to_send = ((sizeof(mtc_message_type))-(BUF_SIZE-hostname.length())) ; + do + { + bytes = hbs_sock.hbs_event_tx_sock->write((char*)&event,bytes_to_send); + if ( bytes <= 0 ) + { + rc = FAIL_TO_TRANSMIT ; + + if ( retries++ > 3 ) + { + elog ("Cannot communicate with maintenance\n"); + return (RETRY); + } + } + else + rc = PASS ; + } while ( bytes <= 0 ) ; + + return rc ; +} + +/* The main heartbeat service loop */ +int daemon_init ( string iface, string nodetype ) +{ + int rc = 10 ; + + /* Not used by this service */ + UNUSED(nodetype); + /* Initialize socket construct and pointer to it */ + memset ( &hbs_sock, 0, sizeof(hbs_sock)); + + /* initialize the timer */ + mtcTimer_init ( hbsTimer, "controller", "heartbeat" ); + + /* Assign interface to config */ + hbs_config.mgmnt_iface = (char*)iface.data() ; + + if ( daemon_files_init ( ) != PASS ) + { + elog ("Pid, log or other files could not be opened\n"); + return FAIL_FILES_INIT ; + } + + hbsInv.system_type = daemon_system_type (); + + /* convert node type to integer */ + my_nodetype = get_host_function_mask ( nodetype ) ; + ilog ("Node Type : %s (%d)\n", nodetype.c_str(), my_nodetype ); + + /* Bind signal handlers */ + if ( daemon_signal_init () != PASS ) + { + elog ("daemon_signal_init failed\n"); + rc = FAIL_SIGNAL_INIT ; + } + + /* Configure the agent */ + else if ( (rc = daemon_configure ( )) != PASS ) + { + elog ("Daemon service configuration failed (rc:%i)\n", rc ); + rc = FAIL_DAEMON_CONFIG ; + } + + /* Init the heartbeat request message */ + else if ( hbs_message_init ( ) != PASS ) + { + elog ("Failed to initialize pulse request message\n"); + rc = FAIL_MESSAGE_INIT; + } + + /* Setup the heartbeat service messaging sockets */ + else if ((rc = hbs_int_socket_init ( )) != PASS ) + { + elog ("internal socket initialization failed (rc:%d)\n", rc ); + return ( FAIL_SOCKET_INIT ) ; + } + + daemon_init_fit(); + return (rc); +} + +void daemon_service_run ( void ) +{ + int exp_pulses[MAX_IFACES] ; + int rc = PASS ; + int counter = 0 ; + int goenabled_wait_log_throttle = 0 ; + bool goenabled = false ; + + /* A variable that throttles external socket init failure retries and + * ultimately triggers an exit if that retry count gets too big */ + int ext_socket_init_fail_count = 0 ; + + /* get a starting point */ + unsigned long long prev_time = gettime_monotonic_nsec (); + unsigned long long this_time = prev_time ; + + /* Used to throttle warning messages that report + * an error transmitting the pulse request */ + int pulse_request_fail_log_counter[MAX_IFACES] ; + + bool heartbeat_request = true ; + unsigned int seq_num = 0 ; + + /* socket descriptor list */ + std::list socks ; + + hbsInv.hbs_state_change = true ; + hbsInv.hbs_disabled = false ; + + /* Set the mode */ + hbsInv.maintenance = false ; + hbsInv.heartbeat = true ; + + /* Load the expected pulses and zero detected */ + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + pulse_request_fail_log_counter[iface] = 0 ; + hbsInv.pulse_requests[iface] = 0 ; + } + + /* Make the main loop schedule in real-time */ + struct sched_param param ; + memset ( ¶m, 0, sizeof(struct sched_param)); + param.sched_priority = hbs_config.scheduling_priority ; + if ( sched_setscheduler(0, SCHED_RR, ¶m) ) + { + elog ("sched_setscheduler (0, SCHED_RR, %d ) returned error (%d:%s)\n", + param.sched_priority, errno, strerror(errno)); + } + + /* Not monitoring address changes RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR */ + if (( hbs_sock.ioctl_sock = open_ioctl_socket ( )) <= 0 ) + { + elog ("Failed to create ioctl socket"); + daemon_exit (); + } + + if (( hbs_sock.netlink_sock = open_netlink_socket ( RTMGRP_LINK )) <= 0 ) + { + elog ("Failed to create netlink listener socket"); + daemon_exit (); + } + + + /* CGTS 4114: Small Footprint: Alarm 200.005 remains active after connectivity restored + * + * Clear self alarms */ + hbsAlarm_clear_all ( hbsInv.my_hostname ); + + /* add this host as inventory to hbsAgent + * Although this host is not monitored for heartbeat, + * there are OOB flags in the heartbneat message that + * are needed to be extracted and locally updated */ + { + /* Scoping this so that the inv variable is freed after the add. + * No need sarying it around on the stack all the time */ + node_inv_type inv ; + + /* init the inv variable */ + node_inv_init ( inv ); + inv.name = hbsInv.my_hostname ; + inv.nodetype = CONTROLLER_TYPE ; + hbsInv.add_heartbeat_host ( inv ); + } + ilog ("Sending ready event to maintenance\n"); + do + { + /* Wait for maintenance */ + rc = send_event ( hbsInv.my_hostname, MTC_EVENT_HEARTBEAT_READY, MGMNT_IFACE ) ; + if ( rc == RETRY ) + { + mtcWait_secs ( 3 ); + } + } while ( rc == RETRY ) ; + + if ( rc == FAIL ) + { + elog ("Unrecoverable heartbeat startup error (rc=%d)\n", rc ); + daemon_exit (); + } + + /* enable the base level signal handler latency monitor */ + daemon_latency_monitor (true); + + /* Run heartbeat service forever or until stop condition */ + for ( hbsTimer.ring = false ; ; ) + { + daemon_signal_hdlr (); + + /******************************************************************* + * + * This handles hbsAgent external socket initialization in the main + * loop only after the goenabled state is reached. + * + *******************************************************************/ + if ( goenabled == false ) + { + if ( hbsInv.system_type == SYSTEM_TYPE__NORMAL ) + { + if ( daemon_is_file_present ( GOENABLED_MAIN_PASS ) == true ) + { + ilog ("GOENABLE (large system)\n"); + goenabled = true ; + } + } + else + { + if ( daemon_is_file_present ( GOENABLED_SUBF_PASS ) == true ) + { + ilog ("GOENABLE (small system)\n"); + goenabled = true ; + } + } + + if ( goenabled == false ) + { + ilog_throttled ( goenabled_wait_log_throttle, 2000, "GOENABLE wait ...\n"); + usleep (50000); /* 50 msec */ + } + + if ( goenabled == true ) + { + /* Setup the heartbeat service messaging sockets */ + if ( (rc = hbs_ext_socket_init ( )) != PASS ) + { + goenabled = false ; + if ( ext_socket_init_fail_count++ == 30 ) + { + elog ("external socket initialization failed (rc:%d) max retries ; exiting ...\n", rc ); + daemon_exit (); + } + else + { + elog ("external socket initialization failed (rc:%d)\n", rc ); + } + } + else + { + ext_socket_init_fail_count = 0 ; + goenabled_wait_log_throttle = 0 ; + + if ( get_link_state ( hbs_sock.ioctl_sock, hbs_config.mgmnt_iface, &hbsInv.mgmnt_link_up_and_running ) ) + { + hbsInv.mgmnt_link_up_and_running = false ; + wlog ("Failed to query %s operational state ; defaulting to down\n", hbs_config.mgmnt_iface ); + } + else + { + ilog ("Mgmnt %s link is %s\n", hbs_config.mgmnt_iface, hbsInv.mgmnt_link_up_and_running ? "Up" : "Down" ); + } + + if ( hbsInv.infra_network_provisioned == true ) + { + if ( get_link_state ( hbs_sock.ioctl_sock, hbs_config.infra_iface, &hbsInv.infra_link_up_and_running ) ) + { + hbsInv.infra_link_up_and_running = false ; + wlog ("Failed to query %s operational state ; defaulting to down\n", hbs_config.infra_iface ); + } + else + { + ilog ("Infra %s link is %s\n", hbs_config.infra_iface, hbsInv.infra_link_up_and_running ? "Up" : "Down" ); + } + } + } + } + } + + /***************** Service Sockets ********************/ + + /* Initialize the master fd_set and clear socket list */ + FD_ZERO(&hbs_sock.readfds); + socks.clear(); + + /* Add the mtc command receiver to the select list */ + if (( hbs_sock.mtc_to_hbs_sock ) && + ( hbs_sock.mtc_to_hbs_sock->getFD())) + { + socks.push_front (hbs_sock.mtc_to_hbs_sock->getFD()); + FD_SET(hbs_sock.mtc_to_hbs_sock->getFD(), &hbs_sock.readfds); + } + + /* Add the netlink event listener to the select list */ + if ( hbs_sock.netlink_sock ) + { + socks.push_back (hbs_sock.netlink_sock); + FD_SET(hbs_sock.netlink_sock, &hbs_sock.readfds); + } + + /* Add the management interface to the select list */ + if (( goenabled == true ) && + ( hbs_sock.rx_sock[MGMNT_INTERFACE] ) && + ( hbs_sock.rx_sock[MGMNT_INTERFACE]->getFD())) + { + socks.push_back (hbs_sock.rx_sock[MGMNT_INTERFACE]->getFD()); + FD_SET(hbs_sock.rx_sock[MGMNT_INTERFACE]->getFD(), &hbs_sock.readfds ); + } + + /* Add the INFRA network pulse rx socket if its provisioned and have a valid socket */ + if (( goenabled == true ) && + ( hbsInv.infra_network_provisioned == true ) && + ( hbs_sock.rx_sock[INFRA_INTERFACE] ) && + ( hbs_sock.rx_sock[INFRA_INTERFACE]->getFD())) + { + socks.push_back (hbs_sock.rx_sock[INFRA_INTERFACE]->getFD()); + FD_SET(hbs_sock.rx_sock[INFRA_INTERFACE]->getFD(), &hbs_sock.readfds ); + } + + monitor_scheduling ( this_time, prev_time, seq_num, SCHED_MONITOR__MAIN_LOOP ); + + /* Sort and select() at HBS_SOCKET_NSEC timeout */ + hbs_sock.waitd.tv_sec = 0; + hbs_sock.waitd.tv_usec = HBS_SOCKET_NSEC; + socks.sort(); + + rc = select( socks.back()+1, &hbs_sock.readfds, NULL, NULL, &hbs_sock.waitd); + + /* If the select time out expired then */ + if (( rc < 0 ) || ( rc == 0 )) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if (( rc < 0 ) && ( errno != EINTR )) + { + elog ("rx_socket select() failed (rc:%d) %s\n", + errno, strerror(errno)); + } + } + else + { + if (( goenabled == true ) && + ( hbs_sock.rx_sock[MGMNT_INTERFACE] ) && + ( FD_ISSET(hbs_sock.rx_sock[MGMNT_INTERFACE]->getFD(), &hbs_sock.readfds))) + { + hbs_sock.fired[MGMNT_INTERFACE] = true ; + } + + if (( goenabled == true ) && + ( hbsInv.infra_network_provisioned == true ) && + ( hbs_sock.rx_sock[INFRA_INTERFACE] ) && + ( hbs_sock.rx_sock[INFRA_INTERFACE]->getFD()) && + ( FD_ISSET(hbs_sock.rx_sock[INFRA_INTERFACE]->getFD(), &hbs_sock.readfds))) + { + hbs_sock.fired[INFRA_INTERFACE] = true ; + } + + if ((hbs_sock.mtc_to_hbs_sock != NULL ) && + ( FD_ISSET(hbs_sock.mtc_to_hbs_sock->getFD(), &hbs_sock.readfds))) + { + int bytes ; + mtc_message_type msg ; + + /* Look for maintenance command messages */ + memset (&msg, 0, sizeof(mtc_message_type)); + bytes = hbs_sock.mtc_to_hbs_sock->read((char*)&msg,sizeof(mtc_message_type)); + if ( bytes > 0 ) + { + mlog ("Received Maintenance Command (%i)\n", bytes ); + mlog ("%s - cmd:0x%x\n", &msg.hdr[0], msg.cmd ); + + if ( !strncmp ( get_hbs_cmd_req_header(), &msg.hdr[0], MSG_HEADER_SIZE )) + { + string hostname = &msg.hdr[MSG_HEADER_SIZE] ; + if ( msg.cmd == MTC_CMD_ADD_HOST ) + { + node_inv_type inv ; + node_inv_init(inv); + inv.name = hostname ; + inv.nodetype = msg.parm[0]; + hbsInv.add_heartbeat_host ( inv ) ; + ilog ("%s added to heartbeat service (%d)\n", hostname.c_str(), inv.nodetype ); + + /* clear any outstanding alarms on the ADD */ + hbsAlarm_clear_all ( hostname ); + + // hbsInv.recalibrate_thresholds (); + } + else if ( msg.cmd == MTC_CMD_DEL_HOST ) + { + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + hbsInv.mon_host ( hostname, (iface_enum)iface, false ); + } + + hbsInv.del_host ( hostname ); + ilog ("%s deleted from heartbeat service\n", hostname.c_str()); + + /* clear any outstanding alarms on the DEL */ + hbsAlarm_clear_all ( hostname ); + + hbsInv.print_node_info(); + + // hbsInv.recalibrate_thresholds (); + } + else if ( msg.cmd == MTC_CMD_STOP_HOST ) + { + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + hbsInv.mon_host ( hostname, (iface_enum)iface, false ); + } + ilog ("%s stopping heartbeat service\n", hostname.c_str()); + hbsInv.print_node_info(); + } + else if ( msg.cmd == MTC_CMD_START_HOST ) + { + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + hbsInv.mon_host ( hostname, (iface_enum)iface, true ); + } + ilog ("%s starting heartbeat service\n", hostname.c_str()); + hbsInv.print_node_info(); + } + else if ( msg.cmd == MTC_RESTART_HBS ) + { + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + hbsInv.mon_host ( hostname, (iface_enum)iface, false ); + hbsInv.mon_host ( hostname, (iface_enum)iface, true ); + } + ilog ("%s restarting heartbeat service\n", hostname.c_str()); + hbsInv.print_node_info(); + } + else if ( msg.cmd == MTC_RECOVER_HBS ) + { + hbsInv.hbs_pulse_period = hbsInv.hbs_pulse_period_save ; + ilog ("%s starting heartbeat recovery (period:%d msec)\n", hostname.c_str(), hbsInv.hbs_pulse_period); + hbsInv.print_node_info(); + } + else if ( msg.cmd == MTC_BACKOFF_HBS ) + { + hbsInv.hbs_pulse_period = (hbsInv.hbs_pulse_period_save * HBS_BACKOFF_FACTOR) ; + ilog ("%s starting heartbeat backoff (period:%d msecs)\n", hostname.c_str(), hbsInv.hbs_pulse_period ); + hbsInv.print_node_info(); + } + else + { + wlog ("Unsupport maintenance command\n"); + } + } + else + { + elog ("Unexpected maintenance message header\n"); + } + } + else + { + elog ("Failed receive from agent domain socket (%i)\n", bytes ); + } + } + if (FD_ISSET( hbs_sock.netlink_sock, &hbs_sock.readfds)) + { + dlog ("netlink socket fired\n"); + if ( hbsInv.service_netlink_events ( hbs_sock.netlink_sock, hbs_sock.ioctl_sock ) != PASS ) + { + elog ("service_netlink_events failed (rc:%d)\n", rc ); + } + } + } + + /***************************************************************/ + /**************** Manage Heartbeat Service *********************/ + /***************************************************************/ + + /* bypass heartbeat if the period is out of accepted / tested range */ + if ( hbsInv.hbs_pulse_period < HBS_MIN_PERIOD ) + { + if ( hbsInv.hbs_state_change == true ) + { + wlog ("Heartbeat Disabled by out-of-range period (%d msec)\n", + hbsInv.hbs_pulse_period ); + wlog ("Period must be greater than %d msec, see %s\n", + HBS_MIN_PERIOD, MTCE_CONF_FILE ); + + hbsInv.hbs_disabled = true ; + hbsInv.hbs_state_change = false ; + + /* print current node inventory to the stdio */ + hbsInv.print_node_info(); + + } + } + + /* Manage enabling and disabling the heartbeat service based on + * the state of the management link. + * link up = run heartbeat service + * link down = disable heatbeat service and monitor the link up to re-enable + */ + else if (( hbsInv.mgmnt_link_up_and_running == false ) && + ( hbsInv.hbs_disabled == false )) + { + hbsInv.hbs_disabled = true ; + hbsInv.hbs_state_change = true ; + ilog ("Heartbeat disabled by %s link down event\n", hbs_config.mgmnt_iface ); + counter = 1 ; + } + + /* Recover heartbeat when link comes back up */ + else if (( hbsInv.mgmnt_link_up_and_running == true ) && + ( hbsInv.hbs_disabled == true )) + { + hbsInv.hbs_disabled = false ; + hbsInv.hbs_state_change = true ; + ilog ("Heartbeat Enabled by %s link up event\n", hbs_config.mgmnt_iface ); + counter = 1 ; + } + + /* Send a log indicating the main loop has recognized + * a state change to enable */ + else if (( hbsInv.hbs_state_change == true ) && + ( hbsInv.hbs_disabled == false )) + { + ilog ("Heartbeat Enabled with %d pulse period and %d msec mnfa backoff period\n", + hbsInv.hbs_pulse_period, (hbsInv.hbs_pulse_period_save * HBS_BACKOFF_FACTOR) ); + ilog ("Heartbeat Thresholds ; minor:%d degrade:%d failure:%d\n", + hbsInv.hbs_minor_threshold, + hbsInv.hbs_degrade_threshold, + hbsInv.hbs_failure_threshold); + + /* print current node inventory to the stdio */ + hbsInv.print_node_info(); + } + + /* go to sleep if disabled */ + if ( hbsInv.hbs_disabled == true ) + { + wlog_throttled (counter, 1000, "Heartbeat service still disabled\n"); + usleep (50000) ; + continue ; + } + + /* Be sure state change flag is cleared */ + hbsInv.hbs_state_change = false ; + counter = 0 ; + + /* Silent Fault Detection Monitor - Log only for now */ + if ( hbsInv.hbs_silent_fault_detector++ > HBS_MAX_SILENT_FAULT_LOOP_COUNT ) + { + bool some_progress = false ; + + /* Load the expected pulses and zero detected */ + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + if ( hbsInv.pulse_requests[iface] > 0 ) + { + hbsInv.hbs_silent_fault_detector = 0 ; + // if ( daemon_is_file_present ( MTC_CMD_FIT__HBSSILENT ) == false ) + // { + some_progress = true ; + // } + hbsInv.pulse_requests[iface] = 0 ; + } + } + if ( some_progress == false ) + { + if ( hbsInv.hbs_silent_fault_logged == false ) + { + hbsInv.hbs_silent_fault_logged = true; + + alarm_warning_log ( hbsInv.my_hostname, SERVICESTATUS_LOG_ID, + "maintenance heartbeat service is not making forward progress ; " + "recommend process restart by controller switchover " + "at earliest convenience" , "service=heartbeat"); + } + hbsInv.hbs_silent_fault_detector = 0 ; + } + } + + if ( hbsTimer.ring == false ) + { + if ( heartbeat_request == true ) + { + string ri = "" ; + int rri = 0 ; + string lf = "\n" ; + + mem_log ((char*)lf.data()); + + /* Get the next Resource Reference Identifier + * and its Resourvce Identifier. These values + * are updated by reference */ + hbsInv.get_rris ( ri, rri ); + + /* Load the expected pulses and zero detected */ + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + /* Don't service the infrastructure network if it is not provisioned */ + if (( iface == INFRA_IFACE ) && ( hbsInv.infra_network_provisioned == false )) + continue ; + + exp_pulses[iface] = + hbsInv.hbs_expected_pulses[iface] = + hbsInv.create_pulse_list((iface_enum)iface); + + arrival_histogram[iface] = "" ; + unexpected_pulse_list[iface] = "" ; + + rc = hbs_pulse_request ( (iface_enum)iface, seq_num, ri, rri ); + if ( rc != 0 ) + { + /* TODO: Fix this with an alarm */ + wlog_throttled ( pulse_request_fail_log_counter[iface], 100, + "%s hbs_pulse_request failed - rc:%d\n", get_iface_name_str(iface), rc); + + if ( pulse_request_fail_log_counter[iface] == INTERFACE_ERRORS_FOR_REINIT ) + { + _setup_pulse_messaging ( (iface_enum)iface , daemon_get_rmem_max ()) ; + } + } + else + { + hbsInv.pulse_requests[iface]++ ; + pulse_request_fail_log_counter[iface] = 0 ; + } + } + + /* Set this semaphore to false which puts the + * algorithm into 'receive' mode */ + heartbeat_request = false ; + + /* Start the heartbeat timer. + * All nodes are expected to send a + * pulse before this timer expires. */ + if ( hbsInv.hbs_pulse_period >= 1000 ) + { + /* Call the 'second' timer for pulse periods that exceed a second */ + int sec = (hbsInv.hbs_pulse_period/1000) ; + mtcTimer_start ( hbsTimer, hbsTimer_handler, sec ); + } + else + { + /* Otherwise call the msec timer */ + mtcTimer_start_msec ( hbsTimer, hbsTimer_handler, hbsInv.hbs_pulse_period); + } + } + + /* We get here many times while in the audit period. */ + + /* Each time ; loop over each interface trying to get all + * the pulse responses that have come in */ + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + /* Do not service the infrastructure interface if it is not provisioned + * We won't get here anyway ... gate above prevents it */ + if (( iface == INFRA_IFACE ) && ( hbsInv.infra_network_provisioned != true )) + continue ; + + if ( hbs_sock.fired[iface] == true ) + { + hbs_sock.fired[iface] = false ; + + /* lets start getting the pulse responses from provisioned interfaces */ + /* Receive and handle heartbeat pulse responses from host */ + /* nodes. All responses that come in on specific unicast port. */ + rc = _pulse_receive( (iface_enum)iface, seq_num ); + + /* Creates a string that represents the pulse arrival time */ + /* . none + * 1..9 pulses on that loop + * a..f is 10 to 15 arrivals on that loop + * * is more than 15 in one group + */ + if ( rc == 0 ) + arrival_histogram[iface].append(1,'.'); + else if ( rc > 15 ) + arrival_histogram[iface].append(1,'*'); + else if ( rc > 9 ) + { + char c = (char)(87+rc) ; + arrival_histogram[iface].append(1,c) ; + } + else + { + char c = (char)(48+rc) ; + arrival_histogram[iface].append(1,c) ; + // ilog ("Char:%s", ); + } + + if ( rc > 0 ) + { + if ( rc <= hbsInv.hbs_expected_pulses[iface] ) + { + hbsInv.hbs_expected_pulses[iface] -= rc ; + } + else + { + dlog ("%s more heartbeat responses than expected (exp:%d)\n", + get_iface_name_str(iface), + hbsInv.hbs_expected_pulses[iface] ); + + hbsInv.hbs_expected_pulses[iface] = 0 ; + } +#ifdef WANT_PULSE_LIST_EMPTY_WARNING + if ( hbsInv.hbs_expected_pulses[iface] == 0 ) + { + if ( hbsInv.pulse_list_empty((iface_enum)iface) != true ) + { + elog ("%s Internal - Pulse list should be empty\n", get_iface_name_str(iface)); + } + } +#endif + } + } + } + } + /* + * Heartbeat pulse period is over ! + * Time to take attendance. + * The pulse lists should be empty + * + */ + else + { + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + /* Do not service the infrastructure interface if it is not provisioned */ + if (( iface == INFRA_IFACE ) && ( hbsInv.infra_network_provisioned != true )) + continue ; + + #define MAX_LEN 1000 + char str[MAX_LEN] ; + + snprintf (&str[0], MAX_LEN, "%s Histogram: %d - %s\n", + get_iface_name_str(iface), + exp_pulses[iface], + arrival_histogram[iface].c_str()); + + mem_log (str); + + if ( !unexpected_pulse_list[iface].empty() ) + { + snprintf ( &str[0], MAX_LEN, "%s Others : %s\n", + get_iface_name_str(iface), + unexpected_pulse_list[iface].c_str()); + + mem_log(str); + } + hbsInv.lost_pulses ( (iface_enum)iface ); + } + hbsTimer.ring = false ; + heartbeat_request = true ; + seq_num++ ; + } + daemon_load_fit (); + } + daemon_exit (); +} + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + daemon_dump_membuf_banner (); + + hbsInv.print_node_info (); + hbsInv.memDumpAllState (); + + daemon_dump_membuf (); /* write mem_logs to log file and clear log list */ +} + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + int rc = PASS; + + nodeLinkClass * hbsInv_testhead_ptr = new nodeLinkClass ; + hbsInv_testhead_ptr->testmode = true ; + + printf ("\n\n"); + printf (TESTHEAD_BAR); + printf ("| Node Class Test Head - Private and Public Member Functions\n"); + printf (TESTHEAD_BAR); + for ( int i = 0 ; i < 11 ; i++ ) + { + if ( hbsInv_testhead_ptr->testhead ( i+1 ) ) + { + FAILED ; + rc = FAIL ; + } + else + PASSED ; + } + printf (TESTHEAD_BAR); + printf ("| Heartbeat Service Test Head\n"); + printf (TESTHEAD_BAR); + + printf (TESTHEAD_BAR); + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAlarm.cpp b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAlarm.cpp new file mode 100644 index 00000000..7fc96b75 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAlarm.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud 'Heartbeat Agent' Alarm Module + */ + +#include +#include +#include +#include +#include + +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "alm" + +#include "daemon_common.h" /* */ + +#include "nodeBase.h" /* */ +#include "nodeTimers.h" /* */ +#include "nodeUtil.h" /* */ +#include "hbsAlarm.h" /* for ... this module header */ +#include "alarm.h" /* for ... alarm send message to mtcalarmd */ + +void hbsAlarm_clear_all ( string hostname ) +{ + alarm_clear ( hostname, MGMNT_HB_ALARM_ID, MGMNT_NAME ); + alarm_clear ( hostname, INFRA_HB_ALARM_ID, INFRA_NAME ); + alarm_clear ( hostname , PMOND_ALARM_ID, PMON_NAME ); +} + +#ifdef WANT_OLD_CODE + +/** Create a WARNING maintenance log */ +int hbsAlarm_warning_log ( string hostname, hbs_alarm_id_enum id, string entity ) +{ + if ( id < HBS_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + + alarm_list[HBS_ALARM_ID__SERVICE].instc_prefix = "service=heartbeat" ; + + instance.append(alarm_list[HBS_ALARM_ID__SERVICE].instc_prefix); + + wlog ("%s %s %s warning log\n", + hostname.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), entity.data()); + + return ( alarmUtil_warning_log ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAlarm.h b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAlarm.h new file mode 100644 index 00000000..8f2f204f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsAlarm.h @@ -0,0 +1,32 @@ +#ifndef __HBSALARM_H__ +#define __HBSALARM_H__ + +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud 'Heartbeat Agent' Alarm Header + */ + +#include +#include +#include +#include + +using namespace std; + +/* Consider removing this */ +#include "alarm.h" /* for .. mtcAlarm */ + +#define MGMNT_NAME ((const char *)"Management") +#define INFRA_NAME ((const char *)"Infrastructure") +#define PMON_NAME ((char *)"pmond") + +void hbsAlarm_clear_all ( string hostname ); + +#endif /* __HBSALARM_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsBase.h b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsBase.h new file mode 100755 index 00000000..d6725dc3 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsBase.h @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Nodal Health Check "Base" Header + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "msgClass.h" + +/** + * @addtogroup hbs_base + * @{ + */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "hbs" + +#define ALIGN_PACK(x) __attribute__((packed)) x + +/** Maximum service fail count before action */ +#define MAX_FAIL_COUNT (1) + +/** Heartbeat pulse request/response message header byte size */ +#define HBS_HEADER_SIZE (15) + +#define HBS_MAX_SILENT_FAULT_LOOP_COUNT (1000) + +/** Heartbeat pulse request message header content */ +const char req_msg_header [HBS_HEADER_SIZE+1] = {"cgts pulse req:"}; + +/** Heartbeat pulse response message header content */ +const char rsp_msg_header [HBS_HEADER_SIZE+1] = {"cgts pulse rsp:"}; + +#define HBS_MAX_MSG (HBS_HEADER_SIZE+MAX_CHARS_HOSTNAME) + +/* A heartbeat service message + * if this structire is changed then + * hbs_pulse_request needs to be looked at + */ +typedef struct +{ + /** Message buffer */ + char m [HBS_MAX_MSG]; + + /** Sequence number */ + unsigned int s ; + + /* Fast Lookup Clue Info */ + unsigned int c ; + + /* Status Flags + * ------------ + * bit 0: Process Monitor Status: 1=running + * bit 1: Infrastructure Network: 1=provisioned + * + * */ + unsigned int f ; + + /** message version number */ + unsigned int v ; + +} ALIGN_PACK(hbs_message_type) ; + + +/** Heartbeat service messaging socket control structure */ +typedef struct +{ + /** Mtce to Heartbeat Service Cmd Interface - mtcAgent -> hbsAgent */ + msgClassSock* mtc_to_hbs_sock; + + /** Heartbeat Service Event Transmit Interface - hbsAgent -> mtcAgent */ + msgClassSock* hbs_event_tx_sock; + + /** Heartbeat Service Event Transmit Interface - hbsClient -> mtcAgent */ + msgClassSock* hbs_ready_tx_sock; + + /** PMON Pulse Receive Interface - pmond -> hbsClient */ + msgClassSock* pmon_pulse_sock; + + /** Active monitoring Transmit Interface socket - hbsClient -> pmond */ + /** The addr and port are stored in the shared libamon.so library */ + int amon_socket ; + + /** Heartbeat Pulse Receive Constructs */ + msgClassSock* rx_sock [MAX_IFACES]; /**< rx socket file descriptor */ + int rx_port [MAX_IFACES]; /**< rx pulse port number */ + hbs_message_type rx_mesg [MAX_IFACES]; /**< rx pulse message buffer */ + + /** Heartbeat Pulse Receive Constructs */ + msgClassSock* tx_sock [MAX_IFACES]; /**< tx socket file descriptor */ + int tx_port [MAX_IFACES]; /**< tx pulse port number */ + hbs_message_type tx_mesg [MAX_IFACES]; /**< tx pulse message buffer */ + + bool fired [MAX_IFACES]; /**< true if select fired */ + + msgSock_type mtclogd ; /* Not used */ + + /** Heartbeat Alarms Messaging Constructs / Interface */ + msgClassSock* alarm_sock ; /**< tx socket file descriptor */ + int alarm_port ; /**< tx pulse port number */ + + + /* For select dispatch */ + struct timeval waitd ; + fd_set readfds; + + int netlink_sock ; /* netlink socket */ + int ioctl_sock ; /* general ioctl socket */ + + bool mgmnt_link_up_and_running ; + bool infra_link_up_and_running ; + bool mgmnt_link_up_and_running_last ; + bool infra_link_up_and_running_last ; + + +} hbs_socket_type ; + +typedef struct +{ + string proc ; + int pid ; + int status ; + int stalls ; + int periods; + unsigned long long this_count ; + unsigned long long prev_count ; +} procList ; + +typedef struct +{ + unsigned long long this_count ; + unsigned long long prev_count ; +} schedHist ; + +int hbs_refresh_pids ( std::list & proc_list ); +int hbs_process_monitor ( std::list & pmon_list ); +int hbs_self_recovery ( unsigned int cmd ); + +/* Setup the pulse messaging interfaces + * 'p' is a boot that indicates if the infrastructure network is provisioned + * 'p' = true means it is provisioned */ +#define SETUP_PULSE_MESSAGING(p,g) \ +{ \ + if ( ( rc = _setup_pulse_messaging ( MGMNT_IFACE , g)) != PASS ) \ + { \ + elog ("Failed to setup 'Mgmnt' network pulse messaging (rc:%d)\n", rc ); \ + } \ + if ( p == true ) \ + { \ + if (( rc = _setup_pulse_messaging ( INFRA_IFACE , g)) != PASS ) \ + { \ + elog ("Failed to setup 'Infra' network pulse messaging (rc:%d)\n", rc ); \ + } \ + } \ +} + +/** + * @} hbs_base + */ diff --git a/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsClient.cpp b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsClient.cpp new file mode 100755 index 00000000..67a5b7ee --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsClient.cpp @@ -0,0 +1,1454 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Nodal Health Check Client Daemon + */ + +/************************************************************** + * Implementation Structure + ************************************************************** + * + * Call sequence: + * + * daemon_init + * daemon_files_init + * daemon_configure + * daemon_signal_init + * hbs_message_init + * hbs_socket_init + * + * daemon_service_run + * forever ( timer_handler ) + * _service_pulse_request + * + * Note: Interface implementation is in opposite + * order of the following call sequence + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for hostent */ +#include +#include +#include +#include +#include +#include /* for close and usleep */ +#include /* for realtime scheduling api */ +#include /* for ... RTMGRP_LINK */ + +using namespace std; + +#include "nodeBase.h" +#include "nodeUtil.h" /* for ... common utilities */ +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ +#include "nodeTimers.h" /* for ... maintenance timers */ +#include "nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ +#include "nlEvent.h" /* for ... open_netlink_socket */ +#include "hbsBase.h" /* Heartbeat Base Header File */ + +extern "C" +{ + #include "amon.h" /* for ... active monitoring utilities */ +} + +/* Where to send events */ +string mtcAgent_ip = "" ; + +/* A boolean that is used to quickly determine if the infrastructure + * network is provisioned and configured for this daemon to use */ +static bool infra_network_provisioned = false ; + +/* pmon pulse count clear timer */ +struct mtc_timer pmonPulse_timer ; +struct mtc_timer readyEvent_timer ; +static int pmonPulse_counter = 0 ; +typedef struct +{ + struct mtc_timer stallMon_timer ; + struct mtc_timer stallPol_timer ; + bool monitor_mode ; + bool recovery_mode ; + unsigned int b2b_pmond_pulse_misses ; + int monitored_processes ; + int failures ; + + /* process monitor list */ + std::list proc_list ; + + /* process monitor list iterator */ + std::list::iterator proc_ptr ; +} stallMon_type ; + + +static char my_hostname [MAX_HOST_NAME_SIZE+1]; +static string my_macaddr = "" ; +static string my_address = "" ; +static unsigned int my_nodetype= CGTS_NODE_NULL ; +static stallMon_type stallMon ; + +void daemon_sigchld_hdlr ( void ) +{ + ; /* dlog("Received SIGCHLD ... no action\n"); */ +} + +/** + * Daemon Configuration Structure - The allocated struct + * @see daemon_common.h for daemon_config_type struct format. + */ +static daemon_config_type hbs_config ; +daemon_config_type * daemon_get_cfg_ptr () { return &hbs_config ; } + +/** + * Messaging Socket Control Struct - The allocated struct + * @see hbsBase.h for hbs_socket_type struct format. + */ +static hbs_socket_type hbs_sock ; + +void _close_pulse_rx_sock ( int iface ) +{ + if ( hbs_sock.rx_sock[iface] ) + { + delete (hbs_sock.rx_sock[iface]); + hbs_sock.rx_sock[iface] = 0 ; + } +} + +void _close_pulse_tx_sock ( int iface ) +{ + if ( hbs_sock.tx_sock[iface] ) + { + delete (hbs_sock.tx_sock[iface]); + hbs_sock.tx_sock[iface] = 0 ; + } +} + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + daemon_dump_info (); + daemon_files_fini (); + + /* Close the heatbeat sockets */ + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + _close_pulse_tx_sock ( i ); + _close_pulse_rx_sock ( i ); + } + + /* Close the pmond pulse socket */ + if ( hbs_sock.pmon_pulse_sock ) + delete (hbs_sock.pmon_pulse_sock ); + + if ( hbs_sock.netlink_sock > 0 ) + close (hbs_sock.netlink_sock); + + exit (0); +} + + +void timer_handler ( int sig, siginfo_t *si, void *uc) +{ + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + + /* Avoid compiler errors/warnings for parms we must + * have but currently do nothing with */ + UNUSED(sig); + UNUSED(uc); + + if ( !(*tid_ptr) ) + { + // tlog ("Called with a NULL Timer ID\n"); + return ; + } + /* is it the pmon pulse counter clear timer */ + else if (( *tid_ptr == pmonPulse_timer.tid ) ) + { + pmonPulse_counter = 0 ; + } + /* is it the ready event timer ring */ + else if (( *tid_ptr == readyEvent_timer.tid ) ) + { + readyEvent_timer.ring = true ; + } + /* is it the monitor interval stats poll timer */ + else if (( *tid_ptr == stallMon.stallPol_timer.tid ) ) + { + mtcTimer_stop_int_safe ( stallMon.stallPol_timer ); + stallMon.stallPol_timer.ring = true ; + } + /* is it the monitor timer */ + else if (( *tid_ptr == stallMon.stallMon_timer.tid ) ) + { + mtcTimer_stop_int_safe ( stallMon.stallMon_timer ); + stallMon.stallMon_timer.ring = true ; + } + else + { + mtcTimer_stop_tid_int_safe (tid_ptr); + } +} + +void stallMon_init ( void ) +{ + procList temp ; + + dlog ("initializing Stall Monitor\n"); + + /* Stop all the timers if they are running */ + if ( stallMon.stallMon_timer.tid ) + mtcTimer_stop ( stallMon.stallMon_timer ); + if ( stallMon.stallPol_timer.tid ) + mtcTimer_stop ( stallMon.stallPol_timer ); + + /* process monitor constructs and controls */ + stallMon.b2b_pmond_pulse_misses = 0 ; + stallMon.monitor_mode = false ; + stallMon.recovery_mode = false ; + stallMon.failures = 0 ; + stallMon.proc_list.clear(); + stallMon.monitored_processes = stallMon.proc_list.size() ; + + temp.status = RETRY ; + temp.pid = 0 ; + temp.stalls = 0 ; + temp.periods = 0 ; + temp.this_count = 0 ; + temp.prev_count = 0 ; + + /* only support stall monitor on computes */ + if ( (my_nodetype & COMPUTE_TYPE) != COMPUTE_TYPE ) + return ; + + if (( hbs_config.mon_process_1 != NULL ) && + ( strncmp ( hbs_config.mon_process_1, "none" , 4 ))) + { + temp.proc = hbs_config.mon_process_1; + stallMon.proc_list.push_back(temp); + } + if (( hbs_config.mon_process_2 != NULL ) && + ( strncmp ( hbs_config.mon_process_2, "none" , 4 ))) + { + temp.proc = hbs_config.mon_process_2; + stallMon.proc_list.push_back(temp); + } + if (( hbs_config.mon_process_3 != NULL ) && + ( strncmp ( hbs_config.mon_process_3, "none" , 4 ))) + { + temp.proc = hbs_config.mon_process_3; + stallMon.proc_list.push_back(temp); + } + if (( hbs_config.mon_process_4 != NULL ) && + ( strncmp ( hbs_config.mon_process_4, "none" , 4 ))) + { + temp.proc = hbs_config.mon_process_4; + stallMon.proc_list.push_back(temp); + } + if (( hbs_config.mon_process_5 != NULL ) && + ( strncmp ( hbs_config.mon_process_5, "none" , 4 ))) + { + temp.proc = hbs_config.mon_process_5; + stallMon.proc_list.push_back(temp); + } + if (( hbs_config.mon_process_6 != NULL ) && + ( strncmp ( hbs_config.mon_process_6, "none" , 4 ))) + { + temp.proc = hbs_config.mon_process_6; + stallMon.proc_list.push_back(temp); + } + if (( hbs_config.mon_process_7 != NULL ) && + ( strncmp ( hbs_config.mon_process_7, "none" , 4 ))) + { + temp.proc = hbs_config.mon_process_7; + stallMon.proc_list.push_back(temp); + } + + for ( stallMon.proc_ptr = stallMon.proc_list.begin(); + stallMon.proc_ptr != stallMon.proc_list.end(); + stallMon.proc_ptr++ ) + { + ilog ("Monitor Proc: %s\n", stallMon.proc_ptr->proc.c_str()); + } + + stallMon.monitored_processes = stallMon.proc_list.size() ; +} + +/** Client Config mask */ +#define CONFIG_CLIENT_MASK (CONFIG_AGENT_MULTICAST |\ + CONFIG_AGENT_HBS_INFRA_PORT |\ + CONFIG_AGENT_HBS_MGMNT_PORT |\ + CONFIG_CLIENT_HBS_INFRA_PORT |\ + CONFIG_CLIENT_HBS_MGMNT_PORT |\ + CONFIG_CLIENT_PULSE_PORT |\ + CONFIG_SCHED_PRIORITY) + +/* Startup config read */ +static int hbs_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("agent", "multicast")) + { + config_ptr->multicast = strdup(value); + config_ptr->mask |= CONFIG_AGENT_MULTICAST ; + } + else if (MATCH("agent", "hbs_agent_mgmnt_port")) + { + config_ptr->hbs_agent_mgmnt_port = atoi(value); + config_ptr->mask |= CONFIG_AGENT_HBS_MGMNT_PORT ; + } + else if (MATCH("client", "hbs_client_mgmnt_port")) + { + config_ptr->hbs_client_mgmnt_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_HBS_MGMNT_PORT ; + } + else if (MATCH("agent", "mtc_rx_mgmnt_port")) + { + config_ptr->mtc_rx_mgmnt_port = atoi(value); + } + else if (MATCH("debug", "stall_mon_start_delay")) + { + config_ptr->start_delay = atoi(value); + } + else if (MATCH("agent", "hbs_agent_infra_port")) + { + config_ptr->hbs_agent_infra_port = atoi(value); + config_ptr->mask |= CONFIG_AGENT_HBS_INFRA_PORT ; + } + else if (MATCH("client", "hbs_client_infra_port")) + { + config_ptr->hbs_client_infra_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_HBS_INFRA_PORT ; + } + else if (MATCH("client", "scheduling_priority")) + { + int max = sched_get_priority_max(SCHED_FIFO); + int min = sched_get_priority_min(SCHED_FIFO); + + config_ptr->scheduling_priority = atoi(value); + config_ptr->mask |= CONFIG_SCHED_PRIORITY ; + + if (( config_ptr->scheduling_priority < min) || + ( config_ptr->scheduling_priority > max)) + { + wlog ("Invalid scheduling priority, overriding to min of %d\n", min ); + wlog ("Specified value of %d is out of acceptable range (%d-%d)\n", + config_ptr->scheduling_priority, min, max ); + config_ptr->scheduling_priority = min ; + } + } + else if (MATCH("client", "pmon_pulse_port")) + { + config_ptr->pmon_pulse_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_PULSE_PORT ; + } + else + { + return (PASS); + } + return (FAIL); +} + +/* Read the hbs.ini file and load agent */ +/* settings into the daemon configuration */ +int daemon_configure ( void ) +{ + int rc = FAIL ; + + hbs_config.start_delay = 300 ; + + /* Read the ini */ + hbs_config.mask = 0 ; + if (ini_parse(MTCE_CONF_FILE, hbs_config_handler, &hbs_config) < 0) + { + elog("Failed to load '%s'\n", MTCE_CONF_FILE ); + return(FAIL_LOAD_INI); + } + if (ini_parse(MTCE_INI_FILE, hbs_config_handler, &hbs_config) < 0) + { + elog("Failed to load '%s'\n", MTCE_INI_FILE ); + return(FAIL_LOAD_INI); + } + + get_debug_options ( MTCE_CONF_FILE, &hbs_config ); + + /* Verify loaded config against an expected mask + * as an ini file fault detection method */ + if ( hbs_config.mask != CONFIG_CLIENT_MASK ) + { + elog ("Client configuration failed (%x)\n", + (( -1 ^ hbs_config.mask ) & CONFIG_CLIENT_MASK) ); + rc = FAIL_INI_CONFIG ; + } + else + { + ilog("Realtime Pri: FIFO/%i \n", hbs_config.scheduling_priority ); + ilog("Multicast: %s\n", hbs_config.multicast ); + + hbs_config.mgmnt_iface = daemon_get_iface_master ( hbs_config.mgmnt_iface ); + ilog("Mgmnt iface : %s\n", hbs_config.mgmnt_iface ); + ilog("Mgmnt RxPort: %d\n", hbs_config.hbs_client_mgmnt_port ); + ilog("Mgmnt TxPort: %d\n", hbs_config.hbs_agent_mgmnt_port ); + + get_iface_macaddr ( hbs_config.mgmnt_iface, my_macaddr ); + get_iface_address ( hbs_config.mgmnt_iface, my_address, true ); + get_hostname ( &my_hostname[0], MAX_HOST_NAME_SIZE ); + + /* Fetch the infrastructure interface name. + * calls daemon_get_iface_master inside so the + * aggrigated name is returned if it exists */ + get_infra_iface (&hbs_config.infra_iface ); + if ( strlen(hbs_config.infra_iface) ) + { + infra_network_provisioned = true ; + ilog ("Infra iface : %s\n", hbs_config.infra_iface ); + } + ilog("Infra RxPort: %d\n", hbs_config.hbs_client_infra_port ); + ilog("Infra TxPort: %d\n", hbs_config.hbs_agent_infra_port ); + + /* initialize the stall detection monitor */ + stallMon_init (); + + ilog("Procmon Thld: %d pmond pulse misses\n", hbs_config.stall_pmon_thld ); + ilog("Recover Thld: %d process stalls\n", hbs_config.stall_rec_thld ); + ilog("Monitor |--|: %d secs\n", hbs_config.stall_mon_period ); + ilog("Monitor Poll: %d secs\n", hbs_config.stall_poll_period ); + ilog("Monitor Dlay: %d secs\n", hbs_config.start_delay ); + + rc = PASS ; + } + + return (rc); +} + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Initialize the unicast pulse response message */ +/* One time thing ; tx same message all the time. */ +int hbs_message_init ( void ) +{ + /* Build the transmit pulse response message for each interface */ + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + memset ( &hbs_sock.tx_mesg[i], 0, sizeof (hbs_message_type)); + memcpy ( &hbs_sock.tx_mesg[i].m[0], &rsp_msg_header[0], HBS_HEADER_SIZE ); + memcpy ( &hbs_sock.tx_mesg[i].m[HBS_HEADER_SIZE], my_hostname, strlen(my_hostname)); + } + return (PASS); +} + +/* Initialize pulse messaging for the specified interface + * This is called by a macro defined in hbsBase.h */ +int _setup_pulse_messaging ( iface_enum i, int rmem ) +{ + int rc = PASS ; + char * iface = NULL ; + + /* client sockets are not modified */ + UNUSED(rmem); + + /* Load up the interface name */ + if ( i == MGMNT_IFACE ) + { + iface = hbs_config.mgmnt_iface ; + } + else if (( i == INFRA_IFACE ) && ( hbs_config.infra_iface != NULL )) + { + iface = hbs_config.infra_iface ; + } + else + { + wlog ("No Infrastructure Interface\n"); + return (RETRY); + } + + _close_pulse_rx_sock (i); + _close_pulse_tx_sock (i); + + /********************************************************************/ + /* Setup multicast Pulse Request Receive Socket */ + /********************************************************************/ + + hbs_sock.rx_sock[i] = + new msgClassRx(hbs_config.multicast,hbs_sock.rx_port[i],IPPROTO_UDP,iface,true,true); + if (hbs_sock.rx_sock[i]->return_status != PASS) + { + elog("Cannot create socket (%d) (%d:%m)\n", i, errno ); + _close_pulse_rx_sock (i); + return (FAIL_SOCKET_CREATE); + } + hbs_sock.rx_sock[i]->sock_ok(true); + + /* Setup unicast transmit (reply) socket */ + hbs_sock.tx_sock[i] = + new msgClassTx(hbs_config.multicast,hbs_sock.tx_port[i],IPPROTO_UDP, iface); + if (hbs_sock.tx_sock[i]->return_status != PASS) + { + elog("Cannot create unicast transmit socket (%d) (%d:%m)\n", i, errno ); + _close_pulse_tx_sock(i); + return (FAIL_SOCKET_CREATE); + } + hbs_sock.tx_sock[i]->sock_ok(true); + + /* set this tx socket interface with priority class messaging */ + hbs_sock.tx_sock[i]->setPriortyMessaging( iface ); + + return (rc); +} + +void _close_hbs_ready_tx_socket ( void ) +{ + if (hbs_sock.hbs_ready_tx_sock) + { + delete (hbs_sock.hbs_ready_tx_sock); + hbs_sock.hbs_ready_tx_sock = 0 ; + } +} + +void setup_ready_tx_socket ( void ) +{ + _close_hbs_ready_tx_socket (); + mtcAgent_ip = getipbyname ( CONTROLLER ); + hbs_sock.hbs_ready_tx_sock = new msgClassTx(mtcAgent_ip.c_str(), hbs_config.mtc_rx_mgmnt_port, IPPROTO_UDP, hbs_config.mgmnt_iface); + + if ( hbs_sock.hbs_ready_tx_sock ) + { + /* look for fault insertion request */ + if ( daemon_is_file_present ( MTC_CMD_FIT__MGMNT_TXSOCK ) ) + hbs_sock.hbs_ready_tx_sock->return_status = FAIL ; + + if ( hbs_sock.hbs_ready_tx_sock->return_status == PASS ) + { + hbs_sock.hbs_ready_tx_sock->sock_ok(true); + // ilog ("Ready Event TX Socket setup Ok \n"); + } + else + { + elog ("failed to init 'ready event tx' socket (rc:%d)\n", + hbs_sock.hbs_ready_tx_sock->return_status ); + hbs_sock.hbs_ready_tx_sock->sock_ok(false); + } + } +} + +/* Construct the messaging sockets * + * 1. multicast receive socket (rx_sock) * + * 2. unicast transmit socket (tx_sock) */ +int hbs_socket_init ( void ) +{ + int rc = PASS ; + int on = 1 ; + + /* set rx socket buffer size to rmem_max */ + int rmem_max = daemon_get_rmem_max () ; + + setup_ready_tx_socket (); + + /* Read the port config strings into the socket struct + * + * These ports are swapped compared to the hbsAgent + * + * From the client perspective + * rx_port is the hbs_client_..._port + * tx_port is the hbs_agent_..._port + * + */ + hbs_sock.rx_port[MGMNT_IFACE] = hbs_config.hbs_client_mgmnt_port; + hbs_sock.tx_port[MGMNT_IFACE] = hbs_config.hbs_agent_mgmnt_port ; + hbs_sock.rx_port[INFRA_IFACE] = hbs_config.hbs_client_infra_port; + hbs_sock.tx_port[INFRA_IFACE] = hbs_config.hbs_agent_infra_port ; + + /* Setup the pulse messaging interfaces */ + SETUP_PULSE_MESSAGING(infra_network_provisioned, rmem_max ) ; + + /***********************************************************/ + /* Setup the PMON I'm Alive Pulse Receive Socket */ + /***********************************************************/ + + hbs_sock.pmon_pulse_sock = new msgClassRx(LOOPBACK_IP,hbs_config.pmon_pulse_port,IPPROTO_UDP); + if ( rc ) return (rc) ; + hbs_sock.pmon_pulse_sock->sock_ok(true); + + /*************************************************** + * Open the active monitoring socket + ***************************************************/ + + char filename [MAX_FILENAME_LEN] ; + string port_string ; + + snprintf ( filename , MAX_FILENAME_LEN, "%s/%s.conf", PMON_CONF_FILE_DIR, program_invocation_short_name ) ; + + if ( ini_get_config_value ( filename, "process", "port", port_string , false ) != PASS ) + { + elog ("failed to get active monitor port from %s\n", filename ); + hbs_sock.amon_socket = 0 ; + return (FAIL_SOCKET_CREATE); + } + + hbs_sock.amon_socket = active_monitor_initialize ( program_invocation_short_name, atoi(port_string.data())); + ilog ("Active Monitor Socket %d\n", hbs_sock.amon_socket ); + if ( 0 > hbs_sock.amon_socket ) + hbs_sock.amon_socket = 0 ; + + /* Make the active monitor socket non-blocking */ + rc = ioctl(hbs_sock.amon_socket, FIONBIO, (char *)&on); + if ( 0 > rc ) + { + elog ("Failed to set amon socket non-blocking (%d:%m)\n", errno); + return (FAIL_SOCKET_NOBLOCK); + } + + return (PASS); +} + + +/* Get Process Monitor Pulse message */ +int get_pmon_pulses ( void ) +{ + mtc_message_type msg ; + int bytes = 0 ; + int count = 0 ; + int expected_bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)) ; + #define MAX_ERRORS 20 + + /* Default to no pulse received */ + int pulses = 0 ; + + /* Empty the receive buffer. */ + do + { + /* Receive event messages */ + memset ( &msg , 0, sizeof(mtc_message_type)); + bytes = hbs_sock.pmon_pulse_sock->read((char*)&msg, sizeof(mtc_message_type)); + if ( bytes == expected_bytes ) + { + if ( !strncmp ( &msg.hdr[0] , get_pmond_pulse_header(), MSG_HEADER_SIZE )) + { + pulses++ ; + mlog ("Pmon Pulse (%s) (%d)\n", msg.hdr, pulses ); + } + else + { + /* gracefully deal with error case - "count" + * is incremented in this macro */ + wlog_throttled ( count, MAX_ERRORS, + "Invalid pmon pulse message (bytes=%d)\n", bytes ); + } + + /* get out if we are seeing a bunch of errors */ + if ( count > MAX_ERRORS ) + return (pulses); + } + } while ( bytes == expected_bytes ) ; + return (pulses); +} + +static unsigned int my_rri = 0 ; + +/************************************************************* + * + * Name : _service_pulse_request + * + * Receive the controller's multicast pulse request messages + * and send a unicast reply to the sender on the same network. + * + * This utility supports the following networks + * + * - management network + * - infrastructure network (if configured) + * + * For each message, look inside the message for + * + * 1. the header key + * 2. the hostname key. + * if the hostname key matches this hosts's name + * then cache the clue key (RRI: resource reference + * identifier) and sent it back in every response. + * If a different clue key is found in a later message + * update the cached one with the new one and use it + * instead. + * + * The clue key can change at any time without prior + * notice. + * + * Just send back zero until the first hostname key + * match is found. + * + * Receive message Format: + * + * "cgts pulse req:xxx.xxx.xxx + * + * Construct a response message containing a response header + * key, this node's hostname and he cached clue key (RRI) + * and send the response back to the controller that sent it + * + **************************************************************/ + +static int rx_error_count[MAX_IFACES] = {0,0} ; +static int tx_error_count[MAX_IFACES] = {0,0} ; + +int _service_pulse_request ( iface_enum iface , unsigned int flags ) +{ + unsigned int s = 0 ; /* Sequence number */ + int n = 0 ; /* message size */ + int rc = 0 ; + + if (( iface != MGMNT_IFACE ) && ( iface != INFRA_IFACE )) + return (FAIL_BAD_CASE); + + memset ( (char*) &hbs_sock.rx_mesg[iface], 0, sizeof(hbs_message_type)); + if ( ! hbs_sock.rx_sock[iface] ) + { + elog ("cannot receive from null rx_mesg[%s] socket\n", get_iface_name_str(iface) ); + return (FAIL_TO_RECEIVE); + } + else if ( hbs_sock.rx_sock[iface]->sock_ok() == false ) + { + elog ("cannot receive from failed rx_mesg[%s] socket\n", get_iface_name_str(iface) ); + return (FAIL_TO_RECEIVE); + } + + n = hbs_sock.rx_sock[iface]->read((char*)&hbs_sock.rx_mesg[iface], sizeof(hbs_message_type)); + + if( n < HBS_HEADER_SIZE ) + { + rx_error_count[iface]++ ; + + /* throtle the log so that if they come back-to-back we avoid flooding */ + if ( n == -1 ) + { + if ( rx_error_count[iface] > 1 ) + { + wlog_throttled ( rx_error_count[iface], 500, "%s receive error (%d:%m)\n", get_iface_name_str(iface), errno ); + } + } + else + { + wlog_throttled ( rx_error_count[iface], 500, "%s message underrun (expected %ld but got %d)\n", + get_iface_name_str(iface), sizeof(hbs_message_type), n ); + } + if ( rx_error_count[iface] == 100 ) + { + wlog ( "%s is getting a lot of receive errors (%d:%m)\n", get_iface_name_str(iface), errno ); + } + return (FAIL_TO_RECEIVE); + } + + /* Clear the error count since we got a good receive */ + rx_error_count[iface] = 0 ; + +#ifdef WANT_NO_SELF_HEARTBEAT_REPLY + /* Don't reply to the heartbeat if the request came from myself */ + if ( ! strncmp ( my_address.data(), + hbs_sock.rx_sock[iface]->get_dst_addr()->toString(), + MAX_CHARS_IN_IP_ADDR )) + { + ilog ("%s Refusing to send heartbeat response to self\n", hbs_sock.rx_sock[iface]->get_dst_addr()->toString()); + return (PASS); + } +#else + /* We use this to monitor pmond on active controller */ +#endif + + /* Save the sequence number */ + s = hbs_sock.rx_mesg[iface].s ; + + mlog ("\n"); + mlog ("%s Pulse Req: %s:%5d: %d: :%s RRI:%d\n", get_iface_name_str(iface), + hbs_sock.rx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(), + hbs_sock.rx_mesg[iface].s, + hbs_sock.rx_mesg[iface].m, + hbs_sock.rx_mesg[iface].c); + + if ( strncmp ( (const char *)&hbs_sock.rx_mesg[iface].m, (const char *)&req_msg_header, HBS_HEADER_SIZE )) + { + wlog_throttled ( rx_error_count[iface], 200, "%s Invalid header (%d:%s)\n", + get_iface_name_str(iface), + hbs_sock.rx_mesg[iface].s, + hbs_sock.rx_mesg[iface].m ); + + mlog ("Detected: %d <%s>\n", HBS_HEADER_SIZE,hbs_sock.rx_mesg[iface].m); + mlog ("Expected: %d <%s>\n", HBS_HEADER_SIZE,req_msg_header); + return (FAIL_MSG_HEADER) ; + } + + /* Manage the Resource Reference Index (RRI) "lookup clue" */ + if ( ! strncmp ( &hbs_sock.rx_mesg[iface].m[HBS_HEADER_SIZE], &my_hostname[0], MAX_CHARS_HOSTNAME )) + { + if( my_rri!= hbs_sock.rx_mesg[iface].c ) + { + my_rri = hbs_sock.rx_mesg[iface].c ; + ilog ("%s Caching New RRI: %d\n", &my_hostname[0], my_rri ); + } + } + + /* Add my RRI to the response message */ + hbs_sock.tx_mesg[iface].c = my_rri ; + + /* Clear struct */ + hbs_sock.tx_mesg[iface].s = s ; + hbs_sock.tx_mesg[iface].f = flags ; + if ( pmonPulse_counter ) + { + hbs_sock.tx_mesg[iface].f |= ( PMOND_FLAG ) ; + } + if ( infra_network_provisioned == true ) + { + hbs_sock.tx_mesg[iface].f |= INFRA_FLAG ; + } + + n = (int)sizeof(hbs_message_type) ; + + if ( ! hbs_sock.tx_sock[iface] ) + { + elog ("cannot send to null tx_mesg[%s] socket\n", get_iface_name_str(iface) ); + return (FAIL_TO_TRANSMIT); + } + else if ( hbs_sock.tx_sock[iface]->sock_ok() == false ) + { + elog ("cannot send to failed tx_mesg[%s] socket\n", get_iface_name_str(iface) ); + return (FAIL_TO_TRANSMIT); + } + +#ifdef WANT_PULSE_RESPONSE_FIT + if (( iface == INFRA_IFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_INFRA_RSP ))) + { + wlog ("refusing to send %s pulse reply ; due to FIT\n", get_iface_name_str(iface)); + return PASS ; + } + + if (( iface == MGMNT_IFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_MGMNT_RSP ))) + { + wlog ("refusing to send %s pulse reply ; due to FIT\n", get_iface_name_str(iface)); + return PASS ; + } +#endif + + /* Send pulse response message with sequence number, flags and resource referecen index */ + rc = hbs_sock.tx_sock[iface]->reply(hbs_sock.rx_sock[iface],(char*)&hbs_sock.tx_mesg[iface], n); + if ( rc == -1 ) + { + elog ("Failed to sendto socket %d through %s:%d len:%d (%s) (%d:%s)\n", + hbs_sock.tx_sock[iface]->getFD(), + hbs_sock.tx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(), + hbs_sock.tx_sock[iface]->get_dst_addr()->getSockLen(), + get_iface_name_str(iface), errno, strerror(errno)); + } + else if ( rc != n) + { + /* Avoid log flooding + elog ("unicast send failed. (%d)\n", rc); */ + wlog_throttled ( tx_error_count[iface], 200, + "%s Pulse Rsp: %d:%d bytes < %d:%s > to <%s>\n", + get_iface_name_str(iface), n, rc, + hbs_sock.tx_mesg[iface].s, + &hbs_sock.tx_mesg[iface].m[0], + &hbs_sock.rx_mesg[iface].m[0]); + return (rc); + } + else + { + mlog ("%s Pulse Rsp: %s:%5d: %d:%d:%s RRI:%d (%d)\n", + get_iface_name_str(iface), + hbs_sock.tx_sock[iface]->get_dst_addr()->toString(), + hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(), + hbs_sock.tx_mesg[iface].s, + hbs_sock.tx_mesg[iface].f, + hbs_sock.tx_mesg[iface].m, + hbs_sock.tx_mesg[iface].c, + pmonPulse_counter); + /* Clear the error count since we got a good transmit */ + tx_error_count[iface] = 0 ; + } + return PASS; +} + +#ifdef WANT_FIT_TESTING +static int fit_log_count = 0 ; +#endif + +int hbs_send_event ( unsigned int event ) +{ + mtc_message_type msg ; + + int rc = FAIL_BAD_PARM ; + int bytes = 0 ; + + memset (&msg, 0 , sizeof(mtc_message_type)); + + if ( event != MTC_EVENT_MONITOR_READY) + { + slog ("Unsupported event (%08x)\n", event ); + return (rc); + } + +#ifdef WANT_FIT_TESTING + if (( hbs_config.testmode ) && + ( hbs_config.testmask == FIT_CODE__NO_READY_EVENT )) + { + slog ("FIT: bypassing 'ready event' send\n"); + return PASS ; + } + + if ( daemon_is_file_present ( "/tmp/no_ready_event" ) == true ) + { + ilog_throttled ( fit_log_count, 100, "FIT: bypassing 'ready event' send\n"); + return PASS ; + } +#endif + + /* build the message */ + snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header()); + snprintf ( &msg.hdr[MSG_HEADER_SIZE], MAX_CHARS_HOSTNAME, "%s", &my_hostname[0]); + + #define MAX_PROC_NAME_SIZE (64) + snprintf ( &msg.buf[0], MAX_PROC_NAME_SIZE, "%s", program_invocation_short_name); + + size_t len = strlen(program_invocation_short_name); + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len)); + msg.cmd = event ; + + if (( hbs_sock.hbs_ready_tx_sock ) && + ( hbs_sock.hbs_ready_tx_sock->sock_ok() == true )) + { + mlog ("Ready message\n"); + if ((rc = hbs_sock.hbs_ready_tx_sock->write((char*)&msg.hdr[0], bytes))!= bytes ) + { + elog ("Ready message send failed (%d) (%d:%s)\n", rc, errno, strerror(errno) ); + rc = FAIL_SOCKET_SENDTO ; + } + else + { + mlog2 ("Transmit: %x bytes to %s:%d\n", bytes, + hbs_sock.hbs_ready_tx_sock->get_dst_str(), + hbs_sock.hbs_ready_tx_sock->get_dst_addr()->getPort()); + print_mtc_message ( &msg ); + rc = PASS ; + } + } + else + { + rc = FAIL_NULL_POINTER ; + elog ("cannot send to null or failed 'hbs_ready_tx_sock'\n"); + } + return rc ; + +} + +/* The main heartbeat service loop */ +int daemon_init ( string iface, string nodeType_str ) +{ + int rc = PASS ; + + /* Initialize socket construct and pointer to it */ + memset ( &hbs_sock, 0, sizeof(hbs_sock)); + + /* Defaults */ + hbs_config.stall_pmon_thld = -1 ; + hbs_config.stall_mon_period = MTC_HRS_8 ; + hbs_config.stall_poll_period = MTC_HRS_8 ; + hbs_config.stall_rec_thld = 100 ; + + mtcTimer_init ( stallMon.stallMon_timer ); + mtcTimer_init ( stallMon.stallPol_timer ); + + /* Assign interface to config */ + hbs_config.mgmnt_iface = (char*)iface.data() ; + + if ( (rc = daemon_files_init ( )) != PASS ) + { + elog ("Pid, log or other files could not be opened (rc:%d)\n", rc ); + rc = FAIL_FILES_INIT ; + } + + /* convert node type to integer */ + my_nodetype = get_host_function_mask ( nodeType_str ) ; + ilog ("Node Type : %s (%d)\n", nodeType_str.c_str(), my_nodetype ); + + /* Bind signal handlers */ + if ( daemon_signal_init () != PASS ) + { + elog ("daemon_signal_init failed\n"); + return ( FAIL_SIGNAL_INIT ); + } + + /************************************************************************ + * There is no point continuing with init ; i.e. running daemon_configure, + * initializing sockets and trying to query for an ip address until the + * daemon's configuration requirements are met. Here we wait for those + * flag files to be present before continuing. + ************************************************************************ + * Wait for /etc/platform/.initial_config_complete & /var/run/goenabled */ + daemon_wait_for_file ( CONFIG_COMPLETE_FILE , 0); + + /* Don't initialize messaging till we have the goenabled signal */ + if ( is_combo_system ( my_nodetype ) == true ) + { + daemon_wait_for_file ( GOENABLED_SUBF_PASS , 0); + ilog ("GOENABLE (small system)\n"); + } + else + { + daemon_wait_for_file ( GOENABLED_MAIN_PASS , 0); + ilog ("GOENABLE (large system)\n"); + } + + /* Configure the client */ + if ( (rc = daemon_configure ()) != PASS ) + { + elog ("Daemon service configuration failed (rc:%d)\n", rc ); + rc = FAIL_DAEMON_CONFIG ; + } + + /* Init the heartbeat transmit pulse response message */ + else if ( hbs_message_init () != PASS ) + { + elog ("Failed to initialize pulse response message\n"); + rc = FAIL_MESSAGE_INIT ; + } + /* Setup the heartbeat service messaging sockets */ + else if ( hbs_socket_init () != PASS ) + { + elog ("socket initialization failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT; + } + return (rc); +} + +#define SPACE ' ' +#define ARROW '<' + +int stall_threshold_log = 0 ; +int stall_times_threshold_log = 0 ; +#define MAX_LEN 300 +void daemon_service_run ( void ) +{ +#ifdef WANT_DAEMON_DEBUG + time_debug_type before ; + time_debug_type after ; + time_delta_type delta ; + time_delta_type select_delta ; + char arrow = SPACE ; + char str [MAX_LEN] ; + int num = 0 ; + int flush_thld = 0 ; +#endif + + bool stall_monitor_ready = false ; + + unsigned int flags = 0 ; + int rc = 0 ; + int count = 0 ; + + /* Make the main loop schedule in real-time */ + struct sched_param param ; + memset ( ¶m, 0, sizeof(struct sched_param)); + param.sched_priority = hbs_config.scheduling_priority ; + if ( sched_setscheduler(0, SCHED_FIFO, ¶m) ) + { + elog ("sched_setscheduler (0, SCHED_FIFO, %d ) returned error (%d:%s)\n", + param.sched_priority, errno, strerror(errno)); + } + + if (( hbs_sock.ioctl_sock = open_ioctl_socket ( )) <= 0 ) + { + elog ("Failed to create ioctl socket"); + daemon_exit (); + } + + /* Not monitoring address changes RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR */ + if (( hbs_sock.netlink_sock = open_netlink_socket ( RTMGRP_LINK )) <= 0 ) + { + elog ("Failed to create netlink listener socket"); + daemon_exit (); + } + + hbs_sock.amon_socket = active_monitor_get_sel_obj (); + std::list socks ; + socks.clear(); + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + if (hbs_sock.rx_sock[i] && hbs_sock.rx_sock[i]->getFD() > 0 ) + { + socks.push_front (hbs_sock.rx_sock[i]->getFD()); + } + } + socks.push_front (hbs_sock.pmon_pulse_sock->getFD()); + socks.push_front (hbs_sock.amon_socket ); + socks.push_front (hbs_sock.netlink_sock); + + socks.sort(); + + bool locked = daemon_is_file_present ( NODE_LOCKED_FILE ) ; + + ilog ("Pmon Pulse Counter Timer init with %d seconds timeout\n", hbs_config.start_delay ); + mtcTimer_init ( pmonPulse_timer , &my_hostname[0], "pmon pulse count clear timer" ); + mtcTimer_start ( pmonPulse_timer, timer_handler, 5 ); + + ilog ("Process Stall-Monitor starting in %d seconds\n", hbs_config.start_delay ); + mtcTimer_start ( stallMon.stallMon_timer, timer_handler, hbs_config.start_delay ); + + ilog ("Ready Event Period %d seconds\n", MTC_SECS_5 ); + mtcTimer_start ( readyEvent_timer, timer_handler, MTC_SECS_5 ); + + ilog ("Sending Heartbeat Ready Event\n"); + hbs_send_event ( MTC_EVENT_MONITOR_READY ); + + /* Run heartbeat service forever or until stop condition */ + for ( ; ; ) + { + hbs_sock.waitd.tv_sec = 0; + hbs_sock.waitd.tv_usec = SOCKET_WAIT; + +#ifdef WANT_DAEMON_DEBUG + if ( hbs_config.flush_thld != 0 ) + { + if ( debug_level ( DEBUG_MEM_LOG ) ) + { + gettime (before); + } + } + + /* Initialize the timeval struct */ + if ( hbs_config.flush_thld == 0 ) + { + hbs_sock.waitd.tv_usec = hbs_config.testmask ; + } +#endif + + /* Initialize the master fd_set */ + FD_ZERO(&hbs_sock.readfds); + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + if (hbs_sock.rx_sock[i] && hbs_sock.rx_sock[i]->getFD() > 0 ) + { + FD_SET(hbs_sock.rx_sock[i]->getFD(),&hbs_sock.readfds); + } + } + FD_SET(hbs_sock.pmon_pulse_sock->getFD(),&hbs_sock.readfds); + FD_SET(hbs_sock.amon_socket, &hbs_sock.readfds); + FD_SET(hbs_sock.netlink_sock, &hbs_sock.readfds); + + rc = select( socks.back()+1, + &hbs_sock.readfds, NULL, NULL, + &hbs_sock.waitd); + + if ( infra_network_provisioned == true ) + { + flags |= INFRA_FLAG ; + } + + /* Select error */ + if ( rc < 0 ) + { + if ( errno != EINTR ) + { + wlog_throttled ( count, 100, "select failed (%d:%s)\n", + errno, strerror(errno)); + } + } + + /* Only service sockets for the rc > 0 case */ + else if ( rc ) + { + if (hbs_sock.rx_sock[MGMNT_IFACE]&&FD_ISSET(hbs_sock.rx_sock[MGMNT_IFACE]->getFD(), &hbs_sock.readfds)) + { + /* Receive pulse request and send a response */ + /* Note: The flags are taken from the last round of get_pmon_pulses below */ + int rc = _service_pulse_request ( MGMNT_IFACE, flags ); + if ( rc != PASS ) + { + if ( rc == FAIL_TO_RECEIVE ) + { + mlog ("Failed to receive pulse request on management network (rc:%d)\n",rc); + } + else + { + wlog_throttled ( count, 200, "Failed to service pulse request on management network (rc:%d)\n",rc); + } + } + /* Clear 'flags'. If no pmon pulses come in then flags will not be updated + * and we will be stuck in the last flags state */ + flags = 0 ; + } + + if (hbs_sock.rx_sock[INFRA_IFACE]&&FD_ISSET(hbs_sock.rx_sock[INFRA_IFACE]->getFD(), &hbs_sock.readfds)) + { + /* Receive pulse request from the infrastructure interface and send a response */ + /* Note: The flags are taken from the last round of get_pmon_pulses below */ + int rc = _service_pulse_request ( INFRA_IFACE, flags ); + if ( rc != PASS ) + { + if ( rc == FAIL_TO_RECEIVE ) + { + mlog ("Failed to receive pulse request on infrastructure network (rc:%d)\n",rc); + } + else + { + wlog_throttled ( count, 200, "Failed to service pulse request on infrastructure network (rc:%d)\n",rc); + } + } + } + + if ( FD_ISSET(hbs_sock.pmon_pulse_sock->getFD(), &hbs_sock.readfds)) + { + pmonPulse_counter += get_pmon_pulses ( ); + if ( pmonPulse_counter ) + { + flags |= ( PMOND_FLAG ) ; + if ( stallMon.monitor_mode == true ) + { + stallMon_init (); + } + } + } + + if ( FD_ISSET(hbs_sock.amon_socket, &hbs_sock.readfds)) + { + dlog3 ("Active Monitor Select Fired\n"); + active_monitor_dispatch (); + } + + if (FD_ISSET(hbs_sock.netlink_sock, &hbs_sock.readfds)) + { + log_link_events ( hbs_sock.netlink_sock, + hbs_sock.ioctl_sock, + hbs_config.mgmnt_iface, + hbs_config.infra_iface, + hbs_sock.mgmnt_link_up_and_running, + hbs_sock.infra_link_up_and_running) ; + } + } + + count = 0 ; + + /* This waits for the stall monitor startup delay to expire */ + if (( stall_monitor_ready == false ) && + ( stallMon.stallMon_timer.ring == true )) + { + ilog ("Process Stall-Monitor started ...\n"); + stall_monitor_ready = true ; + } + + + if (( locked == false ) && + (stall_monitor_ready == true ) && + ((my_nodetype & COMPUTE_TYPE) == COMPUTE_TYPE ) && + (!(flags & PMOND_FLAG) )) + { + /* This is run every 50 msec - the WAIT_SELECT time */ + if (( ++stallMon.b2b_pmond_pulse_misses > hbs_config.stall_pmon_thld ) && + ( stallMon.monitored_processes > 0 )) + { + flags |= STALL_MON_FLAG ; + + /* If monitor mode is not on now ; turn it on and start the + * monitor interval timer as well as the first poll interval + * timer */ + if ( stallMon.monitor_mode == false ) + { + stallMon.monitor_mode = true ; + mtcTimer_start ( stallMon.stallMon_timer, timer_handler, hbs_config.stall_mon_period ); + mtcTimer_start ( stallMon.stallPol_timer, timer_handler, hbs_config.stall_poll_period ); + } + else if ( stallMon.stallMon_timer.ring == true ) + { + stallMon.failures = 0 ; + stallMon.stallMon_timer.ring = false ; + + /* if we get here then we may have a failure */ + for ( stallMon.proc_ptr = stallMon.proc_list.begin(); + stallMon.proc_ptr != stallMon.proc_list.end(); + stallMon.proc_ptr++ ) + { + if ( stallMon.proc_ptr->stalls >= (stallMon.proc_ptr->periods-1) ) + { + stallMon.failures++ ; + } + } + + if ( stallMon.failures >= hbs_config.stall_rec_thld ) + { + wlog_throttled (stall_threshold_log, 200, + "Host Is Stalling !!! (fails:%d thld:%d)\n", + stallMon.failures, hbs_config.stall_rec_thld ); + + flags |= STALL_REC_FLAG ; + if ( stallMon.recovery_mode == false ) + { + elog ( "Host has Stalled !!! (fails:%d thld:%d)\n", + stallMon.failures, hbs_config.stall_rec_thld ); + + stallMon.recovery_mode = true ; + if ( hbs_self_recovery ( STALL_REBOOT_CMD ) != PASS ) + { + flags |= STALL_ERR3_FLAG ; + } + /* Start a longer timer for the sysreq kill */ + if ( hbs_self_recovery ( STALL_SYSREQ_CMD ) != PASS ) + { + flags |= STALL_ERR4_FLAG ; + } + } + } + else + { + dlog ("Recovery Criteria Not Met\n"); + dlog ("... only %d of %d processes failed (%d are monitored)\n", + stallMon.failures, + hbs_config.stall_rec_thld , + stallMon.monitored_processes); + dlog ("... restarting stall monitor\n"); + stallMon_init (); + } + } + + /* We should never get here ; if we do then set the STALL_REC_FAIL_FLAG */ + else if ( stallMon.recovery_mode == true ) + { + flags |= STALL_REC_FLAG ; + } + if ( stallMon.stallPol_timer.ring == true ) + { + int i = 0 ; + // stallMon.stallPol_timer.ring = false ; + + /* TODO: Future ; track pids ans tally any pids that have changed */ + if ( stallMon.monitored_processes != hbs_refresh_pids ( stallMon.proc_list )) + { + flags |= STALL_ERR1_FLAG ; + } + /* Count the audits */ + hbs_process_monitor ( stallMon.proc_list ); + + /* Look over the scheduling counts and increment + * the stall count if they have not changed + * or if the status reads FAIL indicating that + * there was a problem getting the stat ; which + * qualifies as a stall failure */ + for ( stallMon.proc_ptr = stallMon.proc_list.begin(); + stallMon.proc_ptr != stallMon.proc_list.end(); + stallMon.proc_ptr++ , i++ ) + { + dlog ("%s (pid:%d) counts (%llu:%llu) \n", + stallMon.proc_ptr->proc.c_str(), + stallMon.proc_ptr->pid, + stallMon.proc_ptr->this_count, + stallMon.proc_ptr->prev_count); + + /* Increment the audit count for this process */ + stallMon.proc_ptr->periods++ ; + if (( stallMon.proc_ptr->this_count == stallMon.proc_ptr->prev_count ) || + ( stallMon.proc_ptr->status != PASS )) + { + /* Distinguish the stat collect failure as a + * stat read error compared to a stall */ + if ( stallMon.proc_ptr->status != PASS ) + { + // ilog ("%s process error\n", stallMon.proc_ptr->proc.c_str()); + flags |= STALL_ERR2_FLAG ; + } + /* Increment the stall count for this process */ + stallMon.proc_ptr->stalls++ ; + + /* Set this process's stall flag */ + int x = STALL_PID1_FLAG ; + flags |= ( x <<= i ) ; + wlog_throttled (stall_times_threshold_log, 100, + "%s stalled %d times in %d periods (flags:%x) (%llu:%llu)\n", + stallMon.proc_ptr->proc.c_str(), + stallMon.proc_ptr->stalls, + stallMon.proc_ptr->periods, flags, + stallMon.proc_ptr->this_count, + stallMon.proc_ptr->prev_count); + } + /* Save this count in prev for next compare */ + stallMon.proc_ptr->prev_count = stallMon.proc_ptr->this_count ; + } + /* restart the monitor audit timer */ + if ( mtcTimer_start ( stallMon.stallPol_timer, timer_handler, hbs_config.stall_poll_period ) != PASS ) + { + flags |= STALL_REC_FAIL_FLAG ; + stallMon_init (); + } + } + } + } + else + { + stallMon.b2b_pmond_pulse_misses = 0 ; + stall_threshold_log = 0 ; + stall_times_threshold_log = 0 ; + } + + if ( readyEvent_timer.ring == true ) + { + hbs_send_event ( MTC_EVENT_MONITOR_READY ); + readyEvent_timer.ring = false ; + } + + daemon_signal_hdlr (); + +#ifdef WANT_DAEMON_DEBUG + /* Support the log flush config option */ + if ( hbs_config.flush ) + { + if ( ++flush_thld > hbs_config.flush_thld ) + { + flush_thld = 0 ; + fflush (stdout); + fflush (stderr); + } + } +#endif + } + daemon_exit (); +} + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + daemon_dump_membuf_banner (); + daemon_dump_membuf(); +} + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/*************************************************************************** + * * + * Module Test Head * + * * + ***************************************************************************/ + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + int rc = PASS; + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsPmon.cpp b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsPmon.cpp new file mode 100644 index 00000000..c01d8b1a --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsPmon.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Nodal Health Check Client Daemon + */ + +// #include +#include +#include /* for ... syslog */ + +using namespace std; + +#include "nodeBase.h" +#include "nodeUtil.h" /* Common utilities */ +#include "hbsBase.h" /* Heartbeat Base Header File */ +#include "nodeTimers.h" /* mtcTimer utilities */ +#include "daemon_common.h" /* Common definitions and types for daemons */ + +#define TEST_FILE (const char *)"/tmp/hbsClient.test" + +int hbs_refresh_pids ( std::list & proc_list ) +{ + int count = 0 ; + std::list:: iterator proc_ptr ; + for ( proc_ptr = proc_list.begin(); + proc_ptr != proc_list.end(); + proc_ptr++ ) + { + string procname = proc_ptr->proc.data() ; + proc_ptr->pid = get_pid_by_name_proc( procname ); + count++ ; + } + return (count); +} + + +#define MAX_SCHEDSTAT_LEN (128) +int hbs_process_monitor ( std::list & proc_list ) +{ + char file_path [MAX_FILENAME_LEN] ; + char schedstat [MAX_SCHEDSTAT_LEN] ; + std::list:: iterator proc_ptr ; + + FILE * fp ; + + for ( proc_ptr = proc_list.begin(); + proc_ptr != proc_list.end(); + proc_ptr++ ) + { + proc_ptr->status = FAIL ; + + // ilog ("Monotoring: %s (pid:%d)\n", proc_ptr->proc.c_str(), proc_ptr->pid ); + if ( proc_ptr->pid == -1 ) + { + continue ; + } + + snprintf ( &file_path[0], MAX_FILENAME_LEN, "/proc/%d/schedstat", proc_ptr->pid ); + fp = fopen (file_path, "r" ); + if ( fp ) + { + memset ( schedstat, 0 , MAX_SCHEDSTAT_LEN ); + char * str = fgets ( &schedstat[0], MAX_SCHEDSTAT_LEN, fp ); + UNUSED(str); + if ( strlen(schedstat) ) + { + if ( sscanf ( schedstat , "%*s %*s %llu", &(proc_ptr->this_count)) >= 1 ) + { + dlog ("%s: %llu\n", proc_ptr->proc.c_str(), proc_ptr->this_count ); + proc_ptr->status = PASS ; + } + else + { + dlog ("Failed to get schedstat from (%s)\n", file_path); + } + } + else + { + dlog ("failed to read from (%s)\n", file_path ); + } + fclose(fp); + } + else + { + dlog ("Failed to open (%s)\n", file_path); + } + } + return (PASS); +} + +struct mtc_timer _timer ; + +void hbs_recovery_timer_handler ( int sig, siginfo_t *si, void *uc) +{ + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + + /* Avoid compiler errors/warnings for parms we must + * have but currently do nothing with */ + UNUSED(sig); + UNUSED(uc); + if ( !(*tid_ptr) ) + { + tlog ("Called with a NULL Timer ID\n"); + return ; + } + else if (( *tid_ptr == _timer.tid ) ) + { + _timer.ring = true ; + } +} + + +int hbs_self_recovery ( unsigned int cmd ) +{ + // char cmd[2048]; + pid_t pid = 0 ; + + /* Reboot Command */ + if ( cmd == STALL_REBOOT_CMD ) + { + elog ("Forking Self-Recovery Reboot Action\n"); + + // Fork child to do the reboot. + pid = fork(); + if( 0 > pid ) + { + return (FAIL); + } + else if( 0 == pid ) + { + + char reboot_cmd[] = "reboot"; + char* reboot_argv[] = {reboot_cmd, NULL}; + char* reboot_env[] = {NULL}; + + bool close_file_descriptors = true ; + setup_child ( close_file_descriptors ); + + syslog ( LOG_INFO, "child"); + + mtcTimer_init ( _timer ); + mtcTimer_start( _timer, hbs_recovery_timer_handler, 10 ); + + while( true ) + { + if ( _timer.ring == true ) + { + syslog ( LOG_INFO, "issuing reboot"); + + execve( "/sbin/reboot", reboot_argv, reboot_env ); + break ; + } + syslog ( LOG_INFO, "waiting for reboot timer ..."); + + sleep( 10 ); // 10 seconds + } + sleep (10); + + // Shouldn't get this far, else there was an error. + exit(-1); + } + else + { + /* parent returns */ + return (PASS); + } + } + /* Forced Self Reset Now */ + else if ( cmd == STALL_SYSREQ_CMD ) + { + fork_sysreq_reboot ( 60 ) ; + + /* parent returns */ + return (PASS); + } + else + { + return (FAIL); + } +} diff --git a/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsStubs.cpp b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsStubs.cpp new file mode 100644 index 00000000..503f7a9a --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/heartbeat/hbsStubs.cpp @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Nodal Health Check Agent Stubs + */ + + +#include +#include +#include +#include +#include +#include /* for ... close and usleep */ +#include +#include /* for ... RTMGRP_LINK */ + +using namespace std; + +#include "nodeBase.h" +#include "nodeUtil.h" /* for ... msgSock_type */ +#include "nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ +#include "nodeClass.h" /* The main link class */ +#include "hbsBase.h" /* Heartbeat Base Header File */ +#include "mtcAlarm.h" /* for ... the mtcAlarm stubs */ + +int send_guest_command ( string hostname, int command ) +{ + UNUSED(hostname); + UNUSED(command); + return(PASS); +} + +/* Stub interfaces due to common class definition without inheritance */ +int nodeLinkClass::mtcInvApi_update_state ( string hostname, + string key, + string value ) +{ + UNUSED(hostname); + UNUSED(key); + UNUSED(value); + return(PASS); +} + +/* Stub interfaces due to common class definition without inheritance */ +int nodeLinkClass::mtcInvApi_update_state ( struct nodeLinkClass::node * node_ptr, + string key, + string value ) +{ + UNUSED(node_ptr); + UNUSED(key); + UNUSED(value); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_task ( string hostname, + string task ) +{ + UNUSED(hostname); + UNUSED(task); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_task ( struct nodeLinkClass::node * node_ptr, + string task ) +{ + UNUSED(node_ptr); + UNUSED(task); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_states ( string hostname, + string admin, + string oper, + string avail ) +{ + UNUSED(hostname); + UNUSED(admin); + UNUSED(oper); + UNUSED(avail); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_states ( struct nodeLinkClass::node * node_ptr, + string admin, + string oper, + string avail ) +{ + UNUSED(node_ptr); + UNUSED(admin); + UNUSED(oper); + UNUSED(avail); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_force_states ( string hostname, + string admin, + string oper, + string avail ) +{ + UNUSED(hostname); + UNUSED(admin); + UNUSED(oper); + UNUSED(avail); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_force_states ( struct nodeLinkClass::node * node_ptr, + string admin, + string oper, + string avail ) +{ + UNUSED(node_ptr); + UNUSED(admin); + UNUSED(oper); + UNUSED(avail); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_uptime ( string hostname, + unsigned int uptime ) +{ + UNUSED(hostname); + UNUSED(uptime); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_uptime ( struct nodeLinkClass::node * node_ptr, + unsigned int uptime ) +{ + UNUSED(node_ptr); + UNUSED(uptime); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_load_host ( string & hostname , + node_inv_type & info ) +{ + UNUSED(hostname); + UNUSED(info); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_value ( string hostname, + string key, + string value ) +{ + UNUSED(hostname); + UNUSED(key); + UNUSED(value); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_value ( struct nodeLinkClass::node * node_ptr, + string key, + string value ) +{ + UNUSED(node_ptr); + UNUSED(key); + UNUSED(value); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_cfg_show ( string hostname ) +{ + UNUSED(hostname); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_cfg_modify ( string hostname, bool install ) +{ + UNUSED(hostname); + UNUSED(install); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_states_now ( string hostname, string admin, string oper, string avail, string oper_subf, string avail_subf ) +{ + UNUSED(hostname); + UNUSED(admin); + UNUSED(oper); + UNUSED(avail); + UNUSED(oper_subf); + UNUSED(avail_subf); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_states_now ( struct nodeLinkClass::node * node_ptr, string admin, string oper, string avail, string oper_subf, string avail_subf) +{ + UNUSED(node_ptr); + UNUSED(admin); + UNUSED(oper); + UNUSED(avail); + UNUSED(oper_subf); + UNUSED(avail_subf); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_task_now ( string hostname, string task ) +{ + UNUSED(hostname); + UNUSED(task); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_update_task_now ( struct nodeLinkClass::node * node_ptr, string task ) +{ + UNUSED(node_ptr); + UNUSED(task); + return(PASS); +} + +int nodeLinkClass::mtcSmgrApi_request ( struct nodeLinkClass::node * node_ptr, mtc_cmd_enum operation, int retries ) +{ + UNUSED(node_ptr); + UNUSED(operation); + UNUSED(retries); + return(PASS); +} + + +void mtcTimer_handler ( int sig, siginfo_t *si, void *uc) +{ + UNUSED(sig); + UNUSED(si); + UNUSED(uc); +} + +int mtcSmgrApi_active_services ( string hostname , bool * yes_no_ptr ) +{ + UNUSED(hostname); + UNUSED(yes_no_ptr); + return(PASS); +} + +int send_hbs_command ( string hostname, int command ) +{ + UNUSED(hostname); + UNUSED(command); + return(PASS); +} + +int send_hwmon_command ( string hostname, int command ) +{ + UNUSED(hostname); + UNUSED(command); + return(PASS); +} + +nodeLinkClass * get_mtcInv_ptr (void ) +{ + return(NULL); +} + +int daemon_log_message ( const char * hostname, + const char * filename, + const char * log_str ) +{ + UNUSED(hostname); + UNUSED(filename); + UNUSED(log_str); + return(PASS); +} + + +int nodeLinkClass::mnfa_calculate_threshold ( string hostname ) +{ + UNUSED(hostname); + return(PASS) ; +} +void nodeLinkClass::mnfa_add_host ( struct nodeLinkClass::node * node_ptr, iface_enum iface ) +{ node_ptr = node_ptr ; iface = iface ; } +void nodeLinkClass::mnfa_recover_host ( struct nodeLinkClass::node * node_ptr ) +{ node_ptr = node_ptr ; } +void nodeLinkClass::mnfa_enter ( void ) +{ } +void nodeLinkClass::mnfa_exit ( bool force ) +{ force = force ; } + +int send_mtc_cmd ( string & hostname, int cmd, int interface ) +{ UNUSED(hostname); UNUSED(cmd); UNUSED(interface); return PASS ; } + +int nodeLinkClass::mtcInvApi_subf_states ( string hostname, + string oper_subf, + string avail_subf ) +{ + UNUSED(hostname); + UNUSED(oper_subf); + UNUSED(avail_subf); + return(PASS); +} + +int nodeLinkClass::mtcInvApi_subf_states ( struct nodeLinkClass::node * node_ptr, + string oper_subf, + string avail_subf ) +{ + UNUSED(node_ptr); + UNUSED(oper_subf); + UNUSED(avail_subf); + return(PASS); +} + +int nodeLinkClass::mtcVimApi_state_change ( struct nodeLinkClass::node * node_ptr, + libEvent_enum operation, + int retries ) +{ + + UNUSED(node_ptr); + UNUSED(operation); + UNUSED(retries); + return(PASS); +} + +int nodeLinkClass::doneQueue_purge ( struct nodeLinkClass::node * node_ptr ) { node_ptr = node_ptr ; return (PASS) ; } +int nodeLinkClass::workQueue_purge ( struct nodeLinkClass::node * node_ptr ) { node_ptr = node_ptr ; return (PASS) ; } +int nodeLinkClass::mtcCmd_doneQ_purge ( struct nodeLinkClass::node * node_ptr ) { node_ptr = node_ptr ; return (PASS) ; } +int nodeLinkClass::mtcCmd_workQ_purge ( struct nodeLinkClass::node * node_ptr ) { node_ptr = node_ptr ; return (PASS) ; } + +void nodeLinkClass::workQueue_dump ( struct nodeLinkClass::node * node_ptr ) { node_ptr = node_ptr ; } + +int tokenUtil_parse_uri (const string uri, daemon_config_type* config_ptr) +{ + dlog ("%s\n", uri.c_str()) ; + UNUSED(config_ptr); + return(PASS); +} + +void * mtcThread_ipmitool ( void * arg ) { UNUSED(arg); return NULL ; } + +int nodeLinkClass::ipmi_command_send ( struct nodeLinkClass::node * node_ptr, int command ) +{ + UNUSED(node_ptr); + UNUSED(command); + return(PASS); +} + +int nodeLinkClass::ipmi_command_recv ( struct nodeLinkClass::node * node_ptr ) +{ + UNUSED(node_ptr); + return(PASS); +} + +void nodeLinkClass::ipmi_command_done ( struct nodeLinkClass::node * node_ptr ) +{ + UNUSED(node_ptr); +} + + +int mtcAlarm_clear ( string hostname, mtc_alarm_id_enum id ) { UNUSED(hostname); id = id ; return (PASS); } +int mtcAlarm_warning ( string hostname, mtc_alarm_id_enum id ) { UNUSED(hostname); id = id ; return (PASS); } +int mtcAlarm_minor ( string hostname, mtc_alarm_id_enum id ) { UNUSED(hostname); id = id ; return (PASS); } +int mtcAlarm_major ( string hostname, mtc_alarm_id_enum id ) { UNUSED(hostname); id = id ; return (PASS); } +int mtcAlarm_critical ( string hostname, mtc_alarm_id_enum id ) { UNUSED(hostname); id = id ; return (PASS); } + + +int mtcAlarm_critical_log ( string hostname, mtc_alarm_id_enum id ) { UNUSED(hostname); id = id ; return (PASS); } +int mtcAlarm_major_log ( string hostname, mtc_alarm_id_enum id ) { UNUSED(hostname); id = id ; return (PASS); } +int mtcAlarm_minor_log ( string hostname, mtc_alarm_id_enum id ) { UNUSED(hostname); id = id ; return (PASS); } +int mtcAlarm_warning_log ( string hostname, mtc_alarm_id_enum id ) { UNUSED(hostname); id = id ; return (PASS); } +int mtcAlarm_log ( string hostname, mtc_alarm_id_enum id ) { UNUSED(hostname); id = id ; return (PASS); } + +string mtcAlarm_getId_str ( mtc_alarm_id_enum id ) { id = id ; return ("stub"); } diff --git a/mtce-common/cgts-mtce-common-1.0/hostw/Makefile b/mtce-common/cgts-mtce-common-1.0/hostw/Makefile new file mode 100755 index 00000000..b0dfdaae --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hostw/Makefile @@ -0,0 +1,43 @@ +# +# Copyright (c) 2015-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = hostwInit.cpp hostwHdlr.cpp hostwMsg.cpp +OBJS = $(SRCS:.cpp=.o) +LDLIBS = -lstdc++ -ldaemon -lcommon -lrt -lcrypto +INCLUDES = -I../daemon -I../common -I. +CCFLAGS = -g -Wall -Wextra -Werror + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +all: common daemon build clean_common + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +build: clean static_analysis $(OBJS) + $(CXX) $(CCFLAGS) $(OBJS) -L../daemon -L../common $(LDLIBS) -o hostwd + +common: + ( cd ../common ; make clean ; make -j1 lib VER=$(VER) VER_MJR=$(VER_MJR)) + +daemon: + ( cd ../daemon ; make clean ; make -j1 lib VER=$(VER) VER_MJR=$(VER_MJR)) + +clean_common: + @rm -f ../common/*.o + @rm -f ../daemon/*.o + +clean: clean_common + @rm -f $(OBJ) *.o *.a hostwd + diff --git a/mtce-common/cgts-mtce-common-1.0/hostw/hostw.h b/mtce-common/cgts-mtce-common-1.0/hostw/hostw.h new file mode 100755 index 00000000..ff2a2a14 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hostw/hostw.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2015-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/* + * This implements the CGCS Host Watchdog ; /usr/local/bin/hostwd + * + * Call trace is as follows: + * daemon_init + * daemon_files_init + * daemon_signal_init + * daemon_configure + * ini_parse + * hostw_process_config + * socket_init + * hostw_socket_init + * + * daemon_service_run + * hostw_service + * _forever + * hostw_service_command + * kernel_watchdog_pet + * + * + * This daemon waits for a "goenabled" signal and then + * - starts the kernel watchdog (to insure against death of this process) + * - regularly pets the watchdog + * - expects regular updates from PMON indicating "sane" status + * - PMON is configured via pmond.config to send updates at certain + * intervals + * - host watchdog reads PMON config file to determine the expected + * interval, but allows for some flexability (HOSTW_UPDATE_TOLERANCE) + * to allow for process scheduling, etc + * - will log and reboot if PMON dies or if PMON reports system is not right + * + */ + /** + * @file + * Wind River CGCS Host Watchdog Service Header + */ + +#include +#include +#include + +using namespace std; + +#include "nodeBase.h" +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ +#include "nodeTimers.h" /* maintenance timer utilities start/stop */ +#include "nodeUtil.h" /* common utilities */ +#include "hostwMsg.h" /* message format */ + +/* Configuration Files */ +#define HOSTWD_CONFIG_FILE ((const char *)"/etc/mtc/hostwd.conf") +#define PMOND_CONFIG_FILE ((const char *)"/etc/mtc/pmond.conf") + +#define HOSTW_MIN_KERN_UPDATE_PERIOD 60 /* user can set how long until kernel + * watchdog panics, down to this + * minimum (seconds) */ + +/* Daemon Config Bit Masks */ +#define CONFIG_HOSTWD_FAILURE_THRESHOLD 0x01 +#define CONFIG_HOSTWD_REBOOT 0x02 +#define CONFIG_HOSTWD_USE_KERN_WD 0x04 +#define CONFIG_HOSTWD_CONSOLE_PATH 0x10 +#define CONFIG_START_DELAY 0x20 +#define CONFIG_HOSTWD_UPDATE_PERIOD 0x40 +#define CONFIG_KERNWD_UPDATE_PERIOD 0x80 + + +/** Daemon Config Mask */ +#define CONFIG_MASK (CONFIG_HOSTWD_FAILURE_THRESHOLD |\ + CONFIG_HOSTWD_REBOOT |\ + CONFIG_HOSTWD_USE_KERN_WD |\ + CONFIG_HOSTWD_CONSOLE_PATH |\ + CONFIG_START_DELAY |\ + CONFIG_HOSTWD_UPDATE_PERIOD |\ + CONFIG_KERNWD_UPDATE_PERIOD) + +#define PIPE_COMMAND_RESPON_LEN 100 /* max pipe command rsponse length */ + +#define GRACEFUL_REBOOT_DELAY 60 /* how many seconds to wait for logger + * to finish before we start reboot */ + +#define FORCE_REBOOT_DELAY 300 /* how many seconds to wait for logger + * and graceful reboot to finish before + * we give up and force reboot */ + +/* Context control structure */ +typedef struct +{ + /* Watchdog interface */ + /* ------------------ */ + int watchdog ; /** The opened /dev/watchdog file */ + + /* Loop counters */ + /* ------------------ */ + int pmon_grace_loops ; /** Messages we allow pmon to miss before panic */ + int process_grace_loops; /** Number of consecutive "something is wrong" */ + /* messages we allow before panic */ + struct sigaction info ; /**< This daemon signal action struct */ + struct sigaction prev ; /**< Action handler that was replaced */ + /**< This is put back on the exit */ + +} hostw_ctrl_type ; + +/** Daemon Service messaging socket control structure */ +typedef struct +{ + /** Unix socket used to listen for status updates */ + int status_sock; /**< Tx Event Socket */ + struct sockaddr_un status_addr; /**< Address to use for unix socket */ + fd_set readfds; +} hostw_socket_type ; + +/* functions called between files */ + +hostw_ctrl_type * get_ctrl_ptr(void); +hostw_socket_type * hostw_getSock_ptr(void); +void hostw_service(void); +void hostw_log_and_reboot(void); +int hostw_socket_init(void); +void kernel_watchdog_pet(void) ; + + diff --git a/mtce-common/cgts-mtce-common-1.0/hostw/hostwHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/hostw/hostwHdlr.cpp new file mode 100644 index 00000000..fb5e986a --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hostw/hostwHdlr.cpp @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2015-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Host Watchdog Service Handler + */ + +#include "hostw.h" +#include +#include +#include /* for execve */ + +/* In addition to logging to wherever elog messages go, + * this function does its best to log output to the console + * (for the purpose of capturing data when the system is + * about to go down) + * + * The path we log to is defined in the config file, exepected to + * be something like "/dev/console", "/dev/pts/0" or "/dev/ttyS0" + */ +#define emergency_log(...) \ +{ \ + daemon_config_type *cfg = daemon_get_cfg_ptr (); \ + elog(__VA_ARGS__) \ + if (cfg->hostwd_console_path) { \ + FILE* console = fopen(cfg->hostwd_console_path, "a"); \ + if (NULL != console) { \ + fprintf(console, __VA_ARGS__); \ + fclose (console); \ + } \ + } \ +} + +int hostw_service_command ( hostw_socket_type * hostw_socket ); + +static void fork_hostwd_logger ( void ); + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ +} + +void daemon_sigchld_hdlr ( void ) +{ +} + +/** + * This is the main loop of the program + * + * We loop waiting for messages to arrive. We're allowed to miss some messages + * (due to jitter in system scheduling, overcommitted resources, etc) and this + * can be tuned using the config file. On each loop (regardless of message + * received) we pet the watchdog. + * + * If a large number of messages are missed, or the messages conistently + * indicate system issues, we take the appropriate action (log what we can + * and reboot the system). + */ +void hostw_service ( void ) +{ + std::list socks ; /* we have a "list" of 1 socket, to allow for + * future extension and to mirror code flow of + * other utilities */ + hostw_socket_type * hostw_socket = hostw_getSock_ptr (); + struct timeval timeout; + int rc; + + hostw_ctrl_type *ctrl = get_ctrl_ptr (); + daemon_config_type *config = daemon_get_cfg_ptr (); + + ctrl->pmon_grace_loops = config->hostwd_failure_threshold + 1; + + socks.clear(); + if ( hostw_socket->status_sock ) + { + socks.push_front (hostw_socket->status_sock); + FD_SET(hostw_socket->status_sock, &(hostw_socket->readfds)); + } + socks.sort(); + + ilog("Host Watchdog Service running\n"); + for ( ; ; ) + { + timeout.tv_sec = config->hostwd_update_period; + timeout.tv_usec=0; + + /* pet the watchdog */ + kernel_watchdog_pet(); + + /* set the master fd_set */ + FD_ZERO(&(hostw_socket->readfds)); + FD_SET(hostw_socket->status_sock, &(hostw_socket->readfds)); + + rc = select (socks.back() + 1, + &(hostw_socket->readfds), NULL, NULL, &timeout); + + /* If the select time out expired then no new message to process */ + if ( rc < 0 ) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if ( errno != EINTR ) + { + elog ("Select Failed (rc:%d) %s \n", errno, strerror(errno)); + ctrl->pmon_grace_loops--; + } + } + else if ( rc == 0 ) + { + if (daemon_is_file_present(NODE_LOCKED_FILE)) + { + wlog( "Did not receive message from PMON, however node is" + " locked -- refusing to take reset action while locked\n" ); + } + else + { + ctrl->pmon_grace_loops--; + + /* we missed a message from PMON */ + if ( ctrl->pmon_grace_loops == (config->hostwd_failure_threshold - 1) ) + { + ilog ("Did not receive expected message from PMON - first missed message\n"); + } + ctrl->pmon_grace_loops--; + if ( ctrl->pmon_grace_loops <= 2 ) + { + ilog ("Did not receive expected message from PMON - %d more missed messages allowed\n", + ctrl->pmon_grace_loops); + } + } + } + else + { + if (FD_ISSET(hostw_socket->status_sock, &(hostw_socket->readfds))) + { + rc = hostw_service_command ( hostw_socket); + if ( rc == PASS ) /* got "all is well" message */ + { + ctrl->pmon_grace_loops = config->hostwd_failure_threshold; + } + } + } + if (0 >= ctrl->pmon_grace_loops) + { + if (daemon_is_file_present(NODE_LOCKED_FILE)) + { + wlog( "Host watchdog (hostwd) not receiving messages from PMON" + " however host is locked - refusing to take reset action" + " while locked\n" ); + } + else + { + emergency_log( "*** Host watchdog (hostwd) not receiving messages " + "from PMON ***\n"); + hostw_log_and_reboot(); + } + } + + daemon_signal_hdlr (); + + } +} + +/** + * Parse and react to a message from PMON + */ +int hostw_service_command ( hostw_socket_type * hostw_socket) +{ + mtc_message_type msg[2]; /* we use a chunk of memory larger than a single + * mtc_message_type to check for oversized messages + * (invalid...) + */ + int len = sizeof(msg[0]) + 1; + + socklen_t addrlen = (socklen_t) sizeof(hostw_socket->status_addr); + len = recvfrom(hostw_socket->status_sock, + (char*)&msg, + len, + 0, + (struct sockaddr*) &hostw_socket->status_addr, + &addrlen); + + if (sizeof(msg[0]) == len) + { + /* message is correct size, check pmon reported status */ + switch (msg[0].cmd) + { + case MTC_CMD_NONE: + /* All is well */ + return PASS; + + case MTC_EVENT_PMON_CRIT: + if (daemon_is_file_present(NODE_LOCKED_FILE)) + { + ilog( "PMON reports unrecoverable system, however node is" + " locked - considering this an OK message\n" ); + return PASS; + } + else + { + emergency_log( "*** PMON reports unrecoverable system - message '%s' ***\n", msg[0].buf); + hostw_log_and_reboot(); + } + return FAIL; + + default: + elog("Unknown status reported\n"); + break; + } + } + else + { + /* bad message size */ + elog("Host Watchdog received bad or corrupted message (length = %d)\n", len); + } + return FAIL; +} + +/** + * Host watchdog (or PMON) has determined that the system is not healthy and is + * performing recovery action. + */ +void hostw_log_and_reboot() +{ + daemon_config_type* config = daemon_get_cfg_ptr (); + + emergency_log ("*** Host Watchdog declaring system unhealthy ***\n"); + + /* start the process to log as much data as possible */ + fork_hostwd_logger (); + + if (config->hostwd_reboot_on_err) { + emergency_log ("*** Initiating reboot ***\n"); + + /* start the process that will perform an ungraceful reboot, if + * the graceful reboot fails */ + fork_sysreq_reboot ( FORCE_REBOOT_DELAY ); + + /* start the graceful reboot process */ + fork_graceful_reboot ( GRACEFUL_REBOOT_DELAY ); + } +} + +/** + * Initiate the thread which logs as much information about the system + * as possible. + */ +static void fork_hostwd_logger ( void ) +{ + int parent = double_fork (); + if (0 > parent) /* problem forking */ + { + elog ("failed to fork hostwd logging process\n"); + return ; + } + else if (0 == parent) /* if we're the child */ + { + sigset_t mask , mask_orig ; + + setup_child(false); /* initialize the process group, etc */ + ilog ("*** Host Watchdog Logging Thread ***\n"); + + sigemptyset (&mask); + sigaddset (&mask, SIGTERM ); + sigprocmask (SIG_BLOCK, &mask, &mask_orig ); + + /* TODO - log data here */ + exit (0); + } +} + diff --git a/mtce-common/cgts-mtce-common-1.0/hostw/hostwInit.cpp b/mtce-common/cgts-mtce-common-1.0/hostw/hostwInit.cpp new file mode 100644 index 00000000..d251a6a3 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hostw/hostwInit.cpp @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2015-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Host Watchdog Daemon + */ + +#include +#include +#include "hostw.h" + +/* avoid compiler warnings */ +#define UNUSED(_x_) ((void) _x_) + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Local kernel watchdog functions */ +static int kernel_watchdog_init ( void ); +static void kernel_watchdog_close ( void ); + +/* Host Watchdog Control Structure */ +static hostw_ctrl_type hostw_ctrl; + +/* Daemon Configuration Structure + * @see daemon_common.h for daemon_config_type struct format. + */ +static daemon_config_type hostw_config; + +/* Get local structs */ + +hostw_ctrl_type * get_ctrl_ptr ( void ) +{ + return (&hostw_ctrl); +} + +daemon_config_type * daemon_get_cfg_ptr ( void ) +{ + return &hostw_config ; +} + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + int rc ; + char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ; + hostw_socket_type * hostw_socket = hostw_getSock_ptr(); + + rc = execute_pipe_cmd ( "systemctl is-system-running", &pipe_cmd_output[0], PIPE_COMMAND_RESPON_LEN ); + + if ( rc != 0 ) + { + elog ("call to 'systemctl is-system-running' failed (%d:%d:%m)\n", rc, errno ); + } + if ( strnlen ( pipe_cmd_output, PIPE_COMMAND_RESPON_LEN ) > 0 ) + { + ilog ("systemctl is-system-running result: <%s>\n", pipe_cmd_output ); + string temp = pipe_cmd_output ; + + if ( temp.find ("stopping") == string::npos ) + { + /* system is not stopping so turn off the watchdog with process stop */ + kernel_watchdog_close(); + } + else + { + /* leave the watchdog running during shutdown as failsafe reset method */ + ilog ("Leaving watchdog running while system is 'stopping'\n"); + } + } + else + { + wlog ("call to systemctl is-system-running failed to yield response\n"); + } + + + if (hostw_socket->status_sock != 0) + { + close (hostw_socket->status_sock); + hostw_socket->status_sock = 0; + } + + daemon_files_fini(); + daemon_dump_info(); + exit (0); +} + + +/* Startup config read */ +int hostw_process_config ( void * user, + const char * section, + const char * name, + const char * value ) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("config", "hostwd_failure_threshold")) + { + config_ptr->hostwd_failure_threshold = atoi(value); + config_ptr->mask |= CONFIG_HOSTWD_FAILURE_THRESHOLD ; + } + else if (MATCH("config", "hostwd_reboot_on_err")) + { + config_ptr->hostwd_reboot_on_err = atoi(value); + config_ptr->mask |= CONFIG_HOSTWD_REBOOT ; + } + else if (MATCH("config", "hostwd_use_kern_wd")) + { + config_ptr->hostwd_use_kern_wd = atoi(value); + config_ptr->mask |= CONFIG_HOSTWD_USE_KERN_WD ; + } + else if (MATCH("config", "hostwd_console_path")) + { + config_ptr->hostwd_console_path = strdup(value); + config_ptr->mask |= CONFIG_HOSTWD_CONSOLE_PATH ; + } + else if (MATCH("timeouts", "kernwd_update_period")) + { + config_ptr->kernwd_update_period = atoi(value); + config_ptr->mask |= CONFIG_KERNWD_UPDATE_PERIOD ; + } + else if (MATCH("config", "hostwd_update_period")) /* in pmond.conf file */ + { + config_ptr->hostwd_update_period = atoi(value); + config_ptr->mask |= CONFIG_HOSTWD_UPDATE_PERIOD ; + } + return (PASS); +} + +/** Start processing of the config file + */ +int daemon_configure ( void ) +{ + daemon_config_type * config = daemon_get_cfg_ptr(); + + if (ini_parse(HOSTWD_CONFIG_FILE, hostw_process_config, config) < 0) + { + elog("Can't load '%s'\n", HOSTWD_CONFIG_FILE); + return (FAIL_INI_CONFIG); + } + + if (ini_parse(PMOND_CONFIG_FILE, hostw_process_config, config) < 0) + { + elog("Can't load '%s'\n", PMOND_CONFIG_FILE); + return (FAIL_INI_CONFIG); + } + + /* hostwd_update_period is how long we *expect* to wait between updates. + * given the unpredicability of scheduling, etc, we'll not consider a + * message missed until twice the expected time has elapsed + */ + config->hostwd_update_period *= 2; + + return (PASS); +} + +/* Setup the daemon messaging interfaces/sockets */ +int socket_init ( void ) +{ + int rc = PASS; + ilog("setting up host socket\n"); + rc = hostw_socket_init(); + + return (rc); +} + +/* The main program initializer + * iface and nodetype_str are passed by the daemon framework, but are + * not needed in this program + */ +int daemon_init ( string iface, string nodetype_str ) +{ + int rc = PASS ; + hostw_ctrl_type* ctrl_ptr = get_ctrl_ptr(); + UNUSED(iface); + UNUSED(nodetype_str); + + /* init the control struct */ + memset(ctrl_ptr, 0, sizeof(hostw_ctrl_type)); + + if (daemon_files_init() != PASS) + { + elog ("Pid, log or other files could not be opened\n"); + return ( FAIL_FILES_INIT ) ; + } + + /* Bind signal handlers */ + if (daemon_signal_init() != PASS) + { + elog ("daemon_signal_init failed\n"); + return ( FAIL_SIGNAL_INIT ); + } + + /************************************************************************ + * There is no point continuing with init ; i.e. running daemon_configure, + * initializing sockets and trying to query for an ip address until the + * daemon's configuration requirements are met. Here we wait for those + * flag files to be present before continuing. + ************************************************************************ + * Wait for /etc/platform/.initial_config_complete & /var/run/.goenabled */ + daemon_wait_for_file ( CONFIG_COMPLETE_FILE , 0); + daemon_wait_for_file ( GOENABLED_MAIN_READY , 0); + + /* Configure the daemon */ + if ((rc = daemon_configure()) != PASS) + { + elog ("Daemon service configuration failed (rc:%i)\n", rc ); + rc = FAIL_DAEMON_CONFIG ; + } + + /* Setup the messaging sockets */ + else if ((rc = socket_init()) != PASS) + { + elog ("socket initialization failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT ; + } + return (rc); +} + +/* Start the service + * + * 1. Wait for host config (install) complete + * 2. Wait for goenable + * 3. Do startup delay + * 4. run the host watchdog service inside hostwHdlr.cpp + * + */ +void daemon_service_run ( void ) +{ + ilog ("System is up and running, hostwd ready for action\n" ); + + /* last step before starting main loop - start kernel watchdog */ + kernel_watchdog_init(); + + hostw_service(); + daemon_exit(); +} + +/* Startup the kernel watchdog + * + * We have to regularly pet the watchdog after calling this, so don't call + * this function until we're ready to start our main program loop where + * the watchdog is pet. + * + * Potential improvement - use an mtcTimer rather than main loop to pet + * watchdog to avoid this requirement + */ +static int kernel_watchdog_init ( void ) +{ + hostw_ctrl_type * ctrl_ptr = get_ctrl_ptr(); + daemon_config_type * config_ptr = daemon_get_cfg_ptr(); + + /* open the watchdog */ + + if ( (config_ptr->hostwd_use_kern_wd == 0) || + (config_ptr->kernwd_update_period < HOSTW_MIN_KERN_UPDATE_PERIOD)) + { + /* config file says don't use watchdog, or used too small a period */ + return PASS; + } + + ilog ("Opening kernel watchdog device\n"); + ctrl_ptr->watchdog = open("/dev/watchdog", O_WRONLY); + if (0 >= ctrl_ptr->watchdog) + { + elog("Could not open kernel watchdog\n"); + return FAIL; + } + + /* set watchdog timeout (in seconds) */ + ilog ("Setting kernel watchdog options - kernel timeout after %d seconds\n", + config_ptr->kernwd_update_period); + if (ioctl(ctrl_ptr->watchdog, WDIOC_SETTIMEOUT, &config_ptr->kernwd_update_period)) + { + elog ("Error setting watchdog options -- closing watchdog\n") + kernel_watchdog_close(); + return FAIL; + } + + /* do initial keep alive */ + ilog ("Watchdog options set\n"); + kernel_watchdog_pet(); + return PASS; +} + +/* Gracefully take the watchdog to live on the farm */ +static void kernel_watchdog_close ( void ) +{ + hostw_ctrl_type* ctrl_ptr = get_ctrl_ptr(); + + if (ctrl_ptr->watchdog) + { + /* "Magic close" - special character required by some watchdogs */ + size_t written; + written = write(ctrl_ptr->watchdog, "V", 1); + if (written <= 0) + { + wlog("Can't send magic close to kernel watchdog - behavior will" + " be implementation dependant"); + } + close(ctrl_ptr->watchdog); + ctrl_ptr->watchdog = 0; + } +} + +/* Pet the watchdog to keep it from barking (resetting the system) */ +void kernel_watchdog_pet ( void ) +{ + hostw_ctrl_type* ctrl_ptr = get_ctrl_ptr(); + if (ctrl_ptr->watchdog != 0){ + ioctl(ctrl_ptr->watchdog, WDIOC_KEEPALIVE, 0); + } +} + + + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + ilog ("Empty test head.\n"); + return (PASS); +} + diff --git a/mtce-common/cgts-mtce-common-1.0/hostw/hostwMsg.cpp b/mtce-common/cgts-mtce-common-1.0/hostw/hostwMsg.cpp new file mode 100644 index 00000000..f7e44f13 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hostw/hostwMsg.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Host Watchdog Service Messaging + */ + +#include "hostw.h" +#include "nodeMacro.h" + +/** + * Messaging Socket Control Struct - The allocated struct + */ +static hostw_socket_type hostw_sock; + +hostw_socket_type * hostw_getSock_ptr ( void ) +{ + return ( &hostw_sock ); +} + +/** + * Create the socket interface to the host watchdog daemon + * We use Unix sockets rather than UDP, which are identified by a pathname + * (essentially, a FIFO pipe) rather than a portn number + */ +int hostw_socket_init ( ) +{ + hostw_socket_type * hostw_socket = hostw_getSock_ptr(); + + hostw_socket->status_sock = socket (AF_UNIX, SOCK_DGRAM, 0); + if (hostw_socket->status_sock <= 0) return FAIL; + + memset(&hostw_socket->status_addr, 0, sizeof(hostw_socket->status_addr)); + hostw_socket->status_addr.sun_family = AF_UNIX; + + snprintf( &(hostw_socket->status_addr.sun_path[1]), UNIX_PATH_MAX-1, + "%s", + HOSTW_UNIX_SOCKNAME); + + if (bind(hostw_socket->status_sock, (struct sockaddr *)&hostw_socket->status_addr, sizeof(hostw_socket->status_addr)) == -1) + { + elog ("failed to bind\n"); + close (hostw_socket->status_sock); + return FAIL; + } + + ilog ("hostwd listening for status updates\n"); + + return PASS; +} + diff --git a/mtce-common/cgts-mtce-common-1.0/hostw/hostwMsg.h b/mtce-common/cgts-mtce-common-1.0/hostw/hostwMsg.h new file mode 100644 index 00000000..687c6875 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hostw/hostwMsg.h @@ -0,0 +1,17 @@ +#ifndef _HOSTW_MSG_H +#define _HOSTW_MSG_H + +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/* Note that using Unix abstract namespace, this will be prepended by a + * NULL character + */ +#define HOSTW_UNIX_SOCKNAME "HostWatchdog" + +#endif /* _HOSTW_MSG_H */ + diff --git a/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw b/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw new file mode 100644 index 00000000..9f5526c5 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw @@ -0,0 +1,85 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 05 95 +# +### BEGIN INIT INFO +# Provides: hostwd +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: host watchdog daemon +### END INIT INFO + +# echo "7:3:respawn:/usr/local/bin/hostwd" >> /etc/inittab + +. /etc/init.d/functions + +DAEMON_NAME="hostwd" +DAEMON="/usr/local/bin/${DAEMON_NAME}" +IFACE="" + +if [ ! -e "$DAEMON" ] ; then + logger "$DAEMON is missing" + exit 1 +fi + +RETVAL=0 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +case "$1" in + start) + echo -n "Starting $DAEMON_NAME: " + # Prevent multipe starts + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo "OK" + exit $RETVAL + fi + start-stop-daemon --start -b -x ${DAEMON} -- + RETVAL=$? + if [ $RETVAL -eq 0 ] ; then + echo "OK" + else + echo "FAIL" + fi + ;; + + stop) + echo -n "Stopping ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + killproc ${DAEMON_NAME} + fi + echo "OK" + ;; + + restart) + $0 stop + $0 start + ;; + + status) + pid=`pidof ${DAEMON_NAME}` + RETVAL=$? + if [ ${RETVAL} -eq 0 ] ; then + echo "${DAEMON_NAME} is running" + else + echo "${DAEMON_NAME} is NOT running" + fi + ;; + + condrestart) + [ -f /var/lock/subsys/${DAEMON_NAME} ] && $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit $RETVAL diff --git a/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw.logrotate b/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw.logrotate new file mode 100644 index 00000000..065ccc1f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw.logrotate @@ -0,0 +1,16 @@ +#daily +nodateext + +/var/log/hostwd.log +{ + nodateext + size 10M + start 1 + missingok + rotate 20 + compress + sharedscripts + postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true + endscript +} diff --git a/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw.service b/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw.service new file mode 100644 index 00000000..9fc4ba1e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostw.service @@ -0,0 +1,23 @@ +[Unit] +Description=Titanium Cloud Maintenance Host Watchdog +After=syslog.service network.target hbsClient.service mtcClient.service sm.service fsmon.service + + +[Service] +Type=forking +ExecStart=/etc/rc.d/init.d/hostw start +ExecStop=/etc/rc.d/init.d/hostw stop +ExecReload=/etc/rc.d/init.d/hostw reload +PIDFile=/var/run/hostwd.pid + +# Failure handling +TimeoutStartSec=10s +TimeoutStopSec=10s + +# Auto recovery +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target + diff --git a/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostwd.conf b/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostwd.conf new file mode 100755 index 00000000..7a94a32e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hostw/scripts/hostwd.conf @@ -0,0 +1,12 @@ +; CGTS Host Watchdog Configuration File + +[config] ; Configuration +hostwd_reboot_on_err = 1 ; host watchdog to reboot on detected failure +hostwd_failure_threshold = 5 ; number of # missed messages before action taken +hostwd_use_kern_wd = 1 ; use kernel /dev/watchdog as backup watchdog +hostwd_console_path = /dev/console ; console on which to log extreme events, like + ; notification of reboot +[timeouts] +kernwd_update_period = 300 ; timeout until kernel resets system due to dead + ; hostwd process (kernel watchdog) + diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/Makefile b/mtce-common/cgts-mtce-common-1.0/hwmon/Makefile new file mode 100644 index 00000000..f24bcc52 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/Makefile @@ -0,0 +1,57 @@ +# +# Copyright (c) 2014-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS += hwmonUtil.cpp +SRCS += hwmonIpmi.cpp +SRCS += hwmonModel.cpp +SRCS += hwmonGroup.cpp +SRCS += hwmonSensor.cpp +SRCS += hwmonThreads.cpp +SRCS += hwmonAlarm.cpp +SRCS += hwmonHdlr.cpp +SRCS += hwmonJson.cpp +SRCS += hwmonClass.cpp +SRCS += hwmonHttp.cpp +SRCS += hwmonInit.cpp +SRCS += hwmonMsg.cpp +SRCS += hwmonFsm.cpp + +OBJS = $(SRCS:.cpp=.o) +BIN = hwmond +LDLIBS = -lstdc++ -ldaemon -lfmcommon -lcommon -lthreadUtil -lipmiUtil -lpthread -levent -ljson-c -lrt -lcrypto +INCLUDES = -I../daemon -I../common -I../maintenance -I. +CCFLAGS = -g -O2 -Wall -Wextra -Werror -std=c++11 -pthread + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +# all: common daemon build clean_common +all: clean static_analysis common daemon build + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --enable=warning --language=c++ -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +build: clean static_analysis $(OBJS) + $(CXX) $(CCFLAGS) $(OBJS) -L../daemon -L../common $(LDLIBS) -o $(BIN) + +common: + ( cd ../common ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +daemon: + ( cd ../daemon ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +clean_bins: + @rm -f $(BIN) + +clean: + @rm -f $(OBJS) $(BIN) diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hp380.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hp380.cpp new file mode 100644 index 00000000..e69de29b diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmon.h b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmon.h new file mode 100644 index 00000000..9f2ec083 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmon.h @@ -0,0 +1,512 @@ +#ifndef __INCLUDE_HWMON_H__ +#define __INCLUDE_HWMON_H__ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud's Hardware Monitor Service Header + */ + +/* TODO: Scrub header list removing stuff we don't need */ +#include +#include +#include /* for .. signaling */ +#include /* for .. close and usleep */ +#include /* for .. system */ +#include /* for the list of conf file names */ +#include /* for ... time */ +#include /* */ +#include /* for ... socket */ +#include +#include /* for ... UDP socket type */ +#include +#include /* for ... ioctl calls */ +#include /* for ... ifreq ifr */ +#include /* for ... hostent */ +#include + +using namespace std; + +#include "nodeBase.h" +#include "alarmUtil.h" /* for ... common alarm identities */ +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ +#include "msgClass.h" + +#include "nodeTimers.h" /* maintenance timer utilities start/stop */ +#include "nodeUtil.h" /* common utilities */ +#include "httpUtil.h" /* for ... libEvent */ +#include "hwmonAlarm.h" /* for ... hwmonAlarm_id_type */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "mon" + +#define MAX_HOST_SENSORS (512) // (100) +#define MAX_HOST_GROUPS (20) +#define MIN_SENSOR_GROUPS (4) +#define MAX_SIZE_SENSOR_MSG_BYTES (4096*4) +#define HWMON_DEFAULT_LARGE_INTERVAL (MTC_MINS_15) +#define HWMON_DEFAULT_AUDIT_INTERVAL (MTC_MINS_2) +#define HWMON_MIN_AUDIT_INTERVAL (10) +#define DEGRADE_AUDIT_TRIGGER (2) +#define MAX_SENSORS_NOT_FOUND (5) +#define START_DEBOUCE_COUNT (1) + +/* Daemon Sensor Config Directory - where profile files are stored */ +#define CONFIG_DIR ((const char *)("/etc/hwmon.d")) + +#define QUANTA_SENSOR_PROFILE_FILE ((const char *)("/etc/bmc/server_profiles.d/sensor_quanta_v1_ilo_v4.profile")) +#define QUANTA_SENSOR_GROUPS (5) +#define QUANTA_PROFILE_SENSORS (55) +#define QUANTA_PROFILE_SENSORS_REVISED_1 (51) + +#define ENTITY_DELIMITER ((const char *)":") +#define SENSOR_DELIMITER ((const char ) '=') +#define DEFAULT_READING ((const char *) "unknown") + +#define CONFIG_AUDIT_PERIOD (0x00000001) +#define CONFIG_KEYSTONE_PORT (0x00000002) +#define CONFIG_EVENT_PORT (0x00000004) +#define CONFIG_CMD_PORT (0x00000008) +#define CONFIG_TOKEN_REFRESH (0x00000020) +#define CONFIG_AUTH_HOST (0x00000040) +#define CONFIG_INV_EVENT_PORT (0x00000080) + +#define CONFIG_MASK ( CONFIG_AUDIT_PERIOD | \ + CONFIG_KEYSTONE_PORT | \ + CONFIG_EVENT_PORT | \ + CONFIG_INV_EVENT_PORT| \ + CONFIG_TOKEN_REFRESH | \ + CONFIG_CMD_PORT ) + +typedef enum +{ + HWMON_SEVERITY_GOOD, + HWMON_SEVERITY_OFFLINE, + HWMON_SEVERITY_MINOR, + HWMON_SEVERITY_MAJOR, + HWMON_SEVERITY_CRITICAL, + HWMON_SEVERITY_NONRECOVERABLE, + HWMON_SEVERITY_RESET, + HWMON_SEVERITY_POWERCYCLE, + HWMON_SEVERITY_LAST +} sensor_severity_enum; + +/* Action strings */ +#define HWMON_ACTION_IGNORE ((const char *)"ignore") +#define HWMON_ACTION_LOG ((const char *)"log") +#define HWMON_ACTION_ALARM ((const char *)"alarm") +#define HWMON_ACTION_RESET ((const char *)"reset") +#define HWMON_ACTION_POWERCYCLE ((const char *)"power-cycle") + +/* Severity strings */ +#define HWMON_MINOR ((const char *)"minor") +#define HWMON_MAJOR ((const char *)"major") +#define HWMON_CRITICAL ((const char *)"critical") + +typedef enum +{ + SENSOR_KIND__NONE = 0x00, + SENSOR_KIND__TEMP = 0x01, /* Temperature */ + SENSOR_KIND__VOLT = 0x02, /* Voltage */ + SENSOR_KIND__CURR = 0x03, /* Current */ + SENSOR_KIND__FAN = 0x04, /* Fan */ + SENSOR_KIND__RES1 = 0x05, + SENSOR_KIND__RES2 = 0x06, + SENSOR_KIND__CPU = 0x07, + SENSOR_KIND__POWER = 0x08, + SENSOR_KIND__RES3 = 0x09, + SENSOR_KIND__RES4 = 0x0A, + SENSOR_KIND__RES5 = 0x0B, + SENSOR_KIND__MEM = 0x0C, + SENSOR_KIND__DISK = 0x0D, + SENSOR_KIND__RES6 = 0x0E, + SENSOR_KIND__FWPROG = 0x0F, + SENSOR_KIND__LOG = 0x10, + SENSOR_KIND__WDOG = 0x11, + SENSOR_KIND__EVENT = 0x12, + SENSOR_KIND__INT = 0x13, + SENSOR_KIND__BUTTON = 0x14, + +} sensor_kind_enum ; + +/* Values mimic ipmi_unit_type_e in ipmi_bits.h */ +typedef enum +{ + SENSOR_UNIT__NONE = 0x00, + SENSOR_UNIT__DEG_C = 0x01, + SENSOR_UNIT__DEG_F = 0x02, + SENSOR_UNIT__DEG_K = 0x03, + SENSOR_UNIT__VOLTS = 0x04, + SENSOR_UNIT__AMPS = 0x05, + SENSOR_UNIT__WATTS = 0x06, + + SENSOR_UNIT__RPM = 18, + + SENSOR_UNIT__BYTES = 70, + SENSOR_UNIT__KBYTES, + SENSOR_UNIT__MBYTES, + SENSOR_UNIT__GBYTES, + SENSOR_UNIT__WORDS, + SENSOR_UNIT__DWORDS, + SENSOR_UNIT__QWORDS, + SENSOR_UNIT__LINES, + SENSOR_UNIT__HITS, + SENSOR_UNIT__MISSES, + SENSOR_UNIT__RETRIES = 80, + SENSOR_UNIT__RESETS, + SENSOR_UNIT__OVERRUNS, + SENSOR_UNIT__UNDERRUNS, + SENSOR_UNIT__COLLISIONS, + SENSOR_UNIT__PACKETS, + SENSOR_UNIT__MESSAGES, + SENSOR_UNIT__CHARACTERS, + SENSOR_UNIT__ERRORS, + SENSOR_UNIT__CORRECTABLE_ERRORS, + SENSOR_UNIT__UNCORRECTABLE_ERRORS = 90, + SENSOR_UNIT__FATAL_ERRORS + +} sensor_unit_enum ; + + + +typedef enum +{ + HWMON_ADD__START = 0, + HWMON_ADD__STATES, + HWMON_ADD__WAIT, + HWMON_ADD__DONE, + HWMON_ADD__STAGES, +} hwmon_addStages_enum ; + +typedef enum +{ + HWMON_SENSOR_MONITOR__IDLE = 0, + HWMON_SENSOR_MONITOR__START, + HWMON_SENSOR_MONITOR__DELAY, + HWMON_SENSOR_MONITOR__READ, + HWMON_SENSOR_MONITOR__PARSE, + HWMON_SENSOR_MONITOR__CHECK, + HWMON_SENSOR_MONITOR__UPDATE, + HWMON_SENSOR_MONITOR__HANDLE, + HWMON_SENSOR_MONITOR__FAIL, + HWMON_SENSOR_MONITOR__POWER, + HWMON_SENSOR_MONITOR__RESTART, + HWMON_SENSOR_MONITOR__STAGES +} monitor_ctrl_stage_enum ; + +typedef enum +{ + HWMON_CANNED_GROUP__NULL, + HWMON_CANNED_GROUP__FANS, + HWMON_CANNED_GROUP__TEMP, + HWMON_CANNED_GROUP__VOLT, + HWMON_CANNED_GROUP__POWER, + HWMON_CANNED_GROUP__USAGE, +#ifdef WANT_MORE_GROUPS + HWMON_CANNED_GROUP__MEMORY, + HWMON_CANNED_GROUP__CLOCKS, + HWMON_CANNED_GROUP__ERRORS, + HWMON_CANNED_GROUP__MSG, + HWMON_CANNED_GROUP__TIME, + HWMON_CANNED_GROUP__MISC, +#endif + HWMON_CANNED_GROUPS +} canned_group_enum ; + + +typedef struct +{ + bool ignored ; + bool alarmed ; + bool logged ; +} action_state_type ; + +/* Sensor sample data structure for ipmitool output */ +typedef struct +{ + string name ; /* sensor name */ + string value ; /* sensor value */ + string unit ; /* sensor unit type */ + string status ; /* status - ok, nc, cr, nr */ + string lnr ; /* Lower Non-Recoverable */ + string lcr ; /* Lower Critical */ + string lnc ; /* Lower Non-Critical */ + string unc ; /* Upper Non-Critical */ + string ucr ; /* Upper Critical */ + string unr ; /* Upper Non-Recoverable */ + + /* the group this sensor will go into */ + canned_group_enum group_enum ; + + /* set to true if we want the system to ignore this sensor */ + bool ignore = true ; + + /* used to find sensor name mismatches */ + bool found ; +} sensor_data_type; + + +/* Control structure for ipmi sensor monitoring + * + * TODO: The interval is part of the host but + * should eventually me moved here. + */ +typedef struct +{ + monitor_ctrl_stage_enum stage ; + struct mtc_timer timer ; + + /* monolithic timestamp of the last/this sensor sample time + * Not Used - future */ + unsigned long long last_sample_time ; + unsigned long long this_sample_time ; +} monitor_ctrl_type ; + +/** Sensor Information: All the information related to a sensor + * what is needed to read, threshold along with back end algorithms + * that might suppress or downgrade action handling */ +typedef struct +{ + string hostname ; /**< the board management controller type string */ + string bmc ; /**< the board management controller type string */ + + string uuid ; /**< sensor uuid */ + string host_uuid ; /**< host uuid */ + string group_uuid ; /**< The UUID of the group this sensor is in */ + string sensorname ; /**< sensor name as a string */ + string sensortype ; /**< sensor type string 'voltage', 'fan' etc */ + string datatype ; /**< discrete or analog */ + + bool suppress ; /**< True to allow action handling */ + string actions_minor ; /**< One of the following actions */ + string actions_major ; /**< Ignore, Log, Alarm and for critical only */ + string actions_critl ; /**< we add Reset and Powercycle */ + + string script ; /**< script that can read the sensor */ + string path ; /**< sensor read path */ + string entity_path ; /**< entity path is "path:sensorname" */ + + string algorithm ; /**< unique string representing a mgmt algorithm */ + string status; /**< offline, ok, minor, major, critical */ + string state ; /**< enabled or disabled */ + + float t_critical_lower; /**< lower threshold for critical alarm assertion*/ + float t_major_lower; /**< lower threshold for major alarm assertion */ + float t_minor_lower; /**< lower threshold for minor alarm assertion */ + + float t_minor_upper; /**< upper threshold for minor alarm assertion */ + float t_major_upper; /**< upper threshold for major alarm assertion */ + float t_critical_upper; /**< upper threshold for critical alarm assertion*/ + + string unit_modifier ; /**< 10^2 , per second or x/sec or x/hr */ + string unit_base ; /**< Celcius, Revolutions */ + string unit_rate ; /**< Minute */ + + protocol_enum prot ; /**< protocol to use for this sensor */ + sensor_kind_enum kind ; /**< the kind of sensor ; see definition */ + sensor_unit_enum unit ; /**< the units the sensor should be displayed in */ + + sensor_severity_enum severity ; + sensor_severity_enum sample_severity ; + string sample_status ; + string sample_status_last ; + bool degraded ; + bool alarmed ; + + int debounce_count ; + bool want_debounce_log_if_ok ; + + action_state_type minor ; + action_state_type major ; + action_state_type critl ; + + bool updated ; + int not_updated_status_change_count ; + bool found ; + canned_group_enum group_enum ; + int not_found_log_throttle ; +} sensor_type ; + +#define NOT_FOUND_COUNT_BEFORE_MINOR (3) +#define NOT_FOUND_LOG_THROTTLE (1) + + +/****************************************************************************** + * A structure containing sensor model settings that need to be + * preserved over a model relearn + ******************************************************************************/ +typedef struct +{ + string name ; /* group name */ + string minor ; + string major ; + string critl ; +} group_actions_type ; + +typedef struct +{ + int groups ; + int interval ; + group_actions_type group_actions[MAX_HOST_GROUPS] ; + +} model_attr_type ; + +void init_model_attributes ( model_attr_type & attr ); + +/** Sensor Group Information: All the group information related to a group + * of sensors, group actions, group thresholds, etc */ +struct sensor_group_type +{ + string hostname ; /**< the host this group is assigned to */ + string host_uuid ; /**< sensor name as a string */ + string group_uuid ; /**< The UUID of the group this sensor is in */ + string group_name ; /**< sensor name as a string */ + string sensortype ; /**< sensor type string 'voltage', 'fan' etc */ + canned_group_enum group_enum ; /**< index into group type ; fans,voltage,power */ + string datatype ; /**< discrete or analog */ + string algorithm ; /**< unique string representing a mgmt algorithm */ + string actions_critical_choices ; /**< list of actions for critical pull down */ + string actions_major_choices ; /**< list of actions for major pull down */ + string actions_minor_choices ; /**< list of actions for minor pull down */ + bool suppress ; /**< True to allow action handling */ + + /** pointers to the sensors in this group */ + sensor_type * sensor_ptr[MAX_HOST_SENSORS] ; + int sensors ; /**< number of sensors in this group */ + + string sensor_labels ; /**< list of sensor labels fetched from profile */ + + string path ; /**< sensor group read path */ + + /* current sensor read index within this group ; used by the group monitor FSM + * This member is only used when we are reading group sensors individually */ + int sensor_read_index ; + + string status ; /**< group status */ + + string actions_minor_group ; /**< One of the following actions */ + string actions_major_group ; /**< Ignore, Log, Alarm, and for critical only */ + string actions_critl_group ; /**< we add Reset and Powercycle */ + + string group_state ; /**< disabled, minor, major, critical */ + int group_interval ; /**< audit interval */ + + float t_critical_lower_group; /**< lower threshold for critical alarm assertion*/ + float t_major_lower_group ; /**< lower threshold for major alarm assertion */ + float t_minor_lower_group ; /**< lower threshold for minor alarm assertion */ + + float t_minor_upper_group ; /**< upper threshold for minor alarm assertion */ + float t_major_upper_group ; /**< upper threshold for major alarm assertion */ + float t_critical_upper_group; /**< upper threshold for critical alarm assertion*/ + + string unit_modifier_group ; /**< 10^2 , per second or x/sec or x/hr */ + string unit_base_group ; /**< Celcius, Revolutions */ + string unit_rate_group ; /**< Minute */ + + bool active ; /**< true if this sensor request is in progress */ + bool timeout ; /**< true if the last request timed-out */ + bool failed ; /**< true if group read failed */ + bool alarmed ; /**< true if the group alarm is asserted */ + struct mtc_timer timer; /**< group audit timer in seconds */ + + /**< Sensor Read Data Handler + * + * Parms: group_ptr - the sensor group pointer + * index - index into the group's sensor_ptr table + * response - the sensor read data as a string + * + * Returns: sensor_severity type ; see hwmon.h + * > ok, minor, major or critical + * + **/ + sensor_severity_enum (*server_handler) (struct sensor_group_type *, int , string ); +} ; + +/* The Hardware Monitor Messaging Socket Structure */ +typedef struct +{ + int event_port ; /**< hwmon event transmit port */ + msgClassSock* event_sock ; /**< ... socket */ + + int cmd_port ; /**< hwmon command receive port */ + msgClassSock* cmd_sock ; /**< ... socket */ + + msgSock_type mtclogd ; /**< messaging into to mtclogd */ + +} hwmon_socket_type ; + +/* Note: Any addition to this struct requires explicit + * init in daemon_init. + * Cannot memset a struct contianing a string type. + **/ +typedef struct +{ + string my_macaddr ; /**< MAC address of event port */ + string my_hostname ; /**< My hostname */ + string my_local_ip ; /**< Primary IP address */ + string my_float_ip ; /**< Secondary (floating) IP address */ + + bool active ; /**< Monitor hardware when true. This is set by + either the -a run option on daemon startup + or is controlled by the ...HWMON_MON_START + and HWMON_MON_STOP commands from maintenance */ + int audit_period ; + + struct libEvent httpEvent ; + + char log_str [MAX_API_LOG_LEN]; + char filename[MAX_FILENAME_LEN]; + +} hwmon_ctrl_type ; +hwmon_ctrl_type * get_ctrl_ptr ( void ) ; + +hwmon_socket_type * getSock_ptr ( void ); + +void hwmon_stages_init ( void ); + +/* hwmonHdlr.cpp API */ +void hwmon_timer_init ( void ); +int hwmon_hdlr_init ( hwmon_ctrl_type * ctrl_ptr ); +void hwmon_hdlr_fini ( hwmon_ctrl_type * ctrl_ptr ); +void hwmon_service ( hwmon_ctrl_type * ctrl_ptr ); + +/* hwmonInit.cpp API */ +int hwmon_profile_read ( string hostname, const char * profile_name ); + +/* hwmonMsg.cpp API */ +void hwmon_msg_init ( void ); +void hwmon_msg_fini ( void ); + +int event_tx_port_init ( int port , const char * iface ); +int cmd_rx_port_init ( int port ); +int mtclogd_tx_port_init ( void ); + +int hwmon_log_message ( const char * hostname, + const char * filename, + const char * log_str ); + +int hwmon_send_event ( string hostname, unsigned int event_code , const char * sensor_ptr ); +int hwmon_service_inbox ( void ); + + +/* hwmonFsm.cpp API */ +void hwmonTimer_handler ( int sig, siginfo_t *si, void *uc); +extern void timer_handler ( int sig, siginfo_t *si, void *uc); + +void sensorState_print ( string & hostname, sensor_type * sensor_ptr ); + + +/** + * @} hwmon_base + */ + +#endif /* __INCLUDE_HWMON_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonAlarm.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonAlarm.cpp new file mode 100644 index 00000000..920b59bf --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonAlarm.cpp @@ -0,0 +1,552 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform common Alarm utilities + */ + +#include "daemon_common.h" /* for ... daemon_is_file_present */ +#include "nodeBase.h" +#include "nodeUtil.h" +#include "hwmonAlarm.h" + +#define SENSOR_ALARM_ID ((const char *)"200.007") /* Sensor read alarm ; i.e. the sensor read value bad */ +#define SENSORCFG_ALARM_ID ((const char *)"200.014") /* Sensor configuration alarm ; i.e. could not add */ + +string _getSev_str ( EFmAlarmSeverityT sev ) +{ + switch ( sev ) + { + case FM_ALARM_SEVERITY_CLEAR: return ("clear"); + case FM_ALARM_SEVERITY_WARNING: return ("warning"); + case FM_ALARM_SEVERITY_MINOR: return ("minor"); + case FM_ALARM_SEVERITY_MAJOR: return ("major"); + case FM_ALARM_SEVERITY_CRITICAL:return ("critical"); + default : return ("unknown"); + } +} + +void build_sensor_entity_path ( string & hostname , SFmAlarmDataT * alarm_ptr , string & sensorname ) +{ + memset(alarm_ptr, 0, sizeof(SFmAlarmDataT)); + snprintf ( &alarm_ptr->entity_type_id[0] , FM_MAX_BUFFER_LENGTH, "system.host" ); + int num = snprintf ( &alarm_ptr->entity_instance_id[0], FM_MAX_BUFFER_LENGTH , "host=%s.sensor=%s", hostname.data(), sensorname.data()); + alog ("%s Entity Path:%d:%s\n", hostname.c_str(), num, &alarm_ptr->entity_instance_id[0] ); +} + + +/* Utility will init the alarm and return the severity if it is currently asserted */ +EFmAlarmSeverityT hwmon_alarm_query ( string & hostname, hwmonAlarm_id_type id, string & sensorname ) +{ + SFmAlarmDataT alarm_query ; + AlarmFilter alarm_filter ; + + memset(&alarm_query, 0, sizeof(alarm_query)); + memset(&alarm_filter, 0, sizeof(alarm_filter)); + + alarm_query.severity = FM_ALARM_SEVERITY_CLEAR ; + + switch ( id ) + { + case HWMON_ALARM_ID__SENSOR: + snprintf ( &alarm_filter.entity_instance_id[0], FM_MAX_BUFFER_LENGTH , "host=%s.sensor=%s", hostname.data(), sensorname.data()); + snprintf ( &alarm_filter.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", SENSOR_ALARM_ID); break ; + case HWMON_ALARM_ID__SENSORGROUP: + snprintf ( &alarm_filter.entity_instance_id[0], FM_MAX_BUFFER_LENGTH , "host=%s", hostname.data() ); + snprintf ( &alarm_filter.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", SENSORGROUP_ALARM_ID); break ; + case HWMON_ALARM_ID__SENSORCFG: + snprintf ( &alarm_filter.entity_instance_id[0], FM_MAX_BUFFER_LENGTH , "host=%s.sensor=%s", hostname.data(), sensorname.data()); + snprintf ( &alarm_filter.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", SENSORCFG_ALARM_ID); break ; + default: + { + slog ("%s invalid alarm ID (%d)\n", hostname.c_str(), id ); + return (FM_ALARM_SEVERITY_CLEAR); + } + } + + EFmErrorT rc = fm_get_fault ( &alarm_filter, &alarm_query ) ; + if ( rc == FM_ERR_OK ) + { + alog ("Found with Severity: %d\n", alarm_query.severity ); + return (alarm_query.severity) ; + } + else if ( rc != FM_ERR_ENTITY_NOT_FOUND ) + { + elog ("%s fm_get_fault returned error (%d)\n", hostname.c_str(), rc ); + } + return (FM_ALARM_SEVERITY_CLEAR); +} + +/****************************************************************************** + * + * Name : hwmonAlarm_query_entity + * + * Description: Query FM for all the sensor alarms that match the specified + * entity and update the user supplied list with the entity + * instance and its severity level. + * + ******************************************************************************/ + +int hwmonAlarm_query_entity ( string & hostname, + string & entity, + list & alarm_list ) +{ + if ( hostname.empty() ) + { + hostname = "unknown" ; + } + + if ( entity.empty() ) + { + slog ("no 'entity' string specified\n"); + return (FAIL_STRING_EMPTY); + } + + alarm_list.clear(); + + fm_ent_inst_t entity_instance ; + SFmAlarmDataT fm_alarm_list [MAX_HOST_SENSORS]; + + unsigned int max_alarms = MAX_HOST_SENSORS ; + + MEMSET_ZERO(fm_alarm_list); + + MEMSET_ZERO(entity_instance); + snprintf ( entity_instance, FM_MAX_BUFFER_LENGTH, "%s", entity.data()); + + EFmErrorT rc = fm_get_faults ( &entity_instance, &fm_alarm_list[0], &max_alarms ); + +#ifdef WANT_FIT_TESTING + + if ((( rc == FM_ERR_OK ) || ( rc == FM_ERR_ENTITY_NOT_FOUND )) && + ( daemon_want_fit ( FIT_CODE__FM_QRY_ALARMS, hostname ))) + { + rc = FM_ERR_NOCONNECT ; + } + +#endif + + if ( rc == FM_ERR_OK ) + { + hwmonAlarm_entity_status_type alarmed_entity ; + max_alarms = 0 ; + for ( int i = 0 ; i < MAX_HOST_SENSORS ; i++ ) + { + /* loop over each active alarm list loading up the reference list with sensor name and severity */ + if ( strnlen ( fm_alarm_list[i].entity_instance_id , MAX_FILENAME_LEN ) ) + { + alarmed_entity.entity = entity ; + alarmed_entity.instance = fm_alarm_list[i].entity_instance_id ; + alarmed_entity.severity = fm_alarm_list[i].severity ; + + ilog ("%s found '%s' with '%s' severity\n", + hostname.c_str(), + alarmed_entity.instance.c_str(), + _getSev_str(alarmed_entity.severity).c_str()); + + max_alarms++ ; + alarm_list.push_back(alarmed_entity); + } + else + { + break ; + } + } + + ilog ("%s found %d:%ld alarms for '%s'\n", hostname.c_str(), max_alarms, alarm_list.size(), entity.c_str()); + return (PASS); + } + else if ( rc == FM_ERR_ENTITY_NOT_FOUND ) + { + alog ("%s found no alarms for '%s'\n", hostname.c_str(), entity.c_str()); + return (PASS); + } + else + { + elog ("%s fm_get_faults for '%s' failed (rc:%d)\n", hostname.c_str(), entity.c_str(), rc ); + return (FAIL); + } +} + + + + + + + +int hwmon_alarm_util ( string & hostname, + hwmonAlarm_id_type id, + EFmAlarmStateT state, + EFmAlarmSeverityT severity, + string & sub_entity , + string reason ) +{ + SFmAlarmDataT alarm ; /* local working alarm struct */ + + int rc = PASS ; + if (( state == FM_ALARM_STATE_MSG ) && ( id == HWMON_ALARM_ID__SENSORCFG )) + { + slog ("%s customer logging not supported for sensor config alarm IDs (%d)\n", hostname.c_str(), id ); + return (FAIL_BAD_PARM); + } + + build_sensor_entity_path ( hostname, &alarm, sub_entity ); + + alarm.alarm_state = state ; + if ( state == FM_ALARM_STATE_CLEAR ) + { + severity = FM_ALARM_SEVERITY_CLEAR ; + } + + switch ( id ) + { + case HWMON_ALARM_ID__SENSORCFG: + { + snprintf (&alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", SENSORCFG_ALARM_ID); + alarm.alarm_type = FM_ALARM_OPERATIONAL; + alarm.service_affecting = FM_FALSE; + alarm.suppression = FM_TRUE ; + alarm.severity = severity; + alarm.probable_cause = FM_ALARM_CAUSE_UNKNOWN ; + alarm.inhibit_alarms = FM_FALSE; + + if ( alarm.alarm_state == FM_ALARM_STATE_SET ) + { + snprintf(alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "Check Board Management Controller (BMC) provisioning. " + "Try reprovisioning the BMC. If problem persists try power cycling the host and then " + "the entire server including the BMC power. " + "If problem persists then contact next level of support."); + + snprintf(alarm.reason_text , FM_MAX_BUFFER_LENGTH, + "The Hardware Monitor was unable to load, configure and monitor one or more hardware sensors."); + + wlog ("%s 'sensor config' alarm asserted (%s:%s)\n", + hostname.c_str(), alarm.alarm_id, sub_entity.c_str()); + } + else + { + ilog ("%s 'sensor config' alarm cleared (%s:%s))\n", + hostname.c_str(), alarm.alarm_id, sub_entity.c_str()); + } + break ; + } + + /* Handle Hardware Monitor Sensor Alarms */ + case HWMON_ALARM_ID__SENSOR: + { + bool hostname_prefix_in_reason = true ; + + snprintf (&alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", SENSOR_ALARM_ID); + alarm.alarm_type = FM_ALARM_OPERATIONAL ; + alarm.probable_cause = FM_ALARM_UNSPECIFIED_REASON; + alarm.suppression = FM_FALSE; + alarm.inhibit_alarms = FM_FALSE; + alarm.service_affecting = FM_FALSE; + + snprintf (alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "If problem consistently occurs after Host is power cycled and or reset, " + "contact next level of support or lock and replace failing host."); + + snprintf(alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s is reporting a '%s' out-of-tolerance reading from the '%s' sensor", + hostname.c_str(), _getSev_str (severity).c_str(), sub_entity.c_str()); + + if ( state == FM_ALARM_STATE_CLEAR ) + { + snprintf(alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s:%s alarm clear\n", + alarm.alarm_id, + alarm.entity_instance_id); + hostname_prefix_in_reason = false ; + } + else if ( state == FM_ALARM_STATE_MSG ) + { + if ( severity == FM_ALARM_SEVERITY_WARNING ) + { + snprintf(alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s '%s' sensor %s\n", + hostname.c_str(), + sub_entity.c_str(), + reason.c_str()); + } + else + { + snprintf(alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s '%s' sensor %s %s\n", + hostname.c_str(), + sub_entity.c_str(), + reason.c_str(), + _getSev_str (severity).c_str()); + } + hostname_prefix_in_reason = true ; + } + else + { + snprintf(alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s is reporting a '%s' out-of-tolerance reading from the '%s' sensor", + hostname.c_str(), _getSev_str (severity).c_str(), sub_entity.c_str()); + hostname_prefix_in_reason = true ; + } + /* Override service affecting setting */ + if ( severity == FM_ALARM_SEVERITY_CRITICAL ) + { + alarm.service_affecting = FM_TRUE; + if ( reason.compare(REASON_RESETTING) == 0 ) + { + snprintf(alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s is being auto recovered by 'reset' due to a 'critical' " + "out-of-tolerance reading from the '%s' sensor.", + hostname.c_str(), sub_entity.c_str()); + hostname_prefix_in_reason = true ; + } + else if ( reason.compare(REASON_POWERCYCLING) == 0 ) + { + snprintf(alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s is being auto recovered by 'power-cycle' due to a 'critical' " + "out-of-tolerance reading from the '%s' sensor.", + hostname.c_str(), sub_entity.c_str()); + hostname_prefix_in_reason = true ; + } + } + else if ( severity == FM_ALARM_SEVERITY_CLEAR ) + { + alarm.service_affecting = FM_FALSE; + + if ( reason.empty() ) + reason = "reporting an in-tolerance reading" ; + + snprintf(alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s '%s' sensor %s", hostname.c_str(), sub_entity.c_str(), reason.c_str()); + hostname_prefix_in_reason = true ; + + } + if ( hostname_prefix_in_reason == false ) + { + ilog ("%s %s\n", hostname.c_str(), alarm.reason_text ); + } + else + { + ilog ("%s\n", alarm.reason_text ); + } + + alarm.severity = severity ; + break ; + } + + /* Handle Hardware Monitor Sensor Alarms */ + case HWMON_ALARM_ID__SENSORGROUP: + { + memset ( &alarm, 0, sizeof(SFmAlarmDataT)); + snprintf ( &alarm.entity_type_id[0] , FM_MAX_BUFFER_LENGTH, "system.host" ); + snprintf ( &alarm.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "host=%s.sensorgroup=%s", hostname.data(), sub_entity.c_str()); + snprintf ( &alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", SENSORGROUP_ALARM_ID); + + alarm.alarm_type = FM_ALARM_OPERATIONAL ; + alarm.probable_cause = FM_ALARM_UNSPECIFIED_REASON; + alarm.suppression = FM_FALSE; + alarm.inhibit_alarms = FM_FALSE; + alarm.alarm_state = state ; + + snprintf (alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "Check board management connectivity and try rebooting the board management controller. " + "If problem persists contact next level of support or lock and replace failing host."); + + if ( state == FM_ALARM_STATE_CLEAR ) + { + alarm.service_affecting = FM_FALSE; + + alarm.severity = FM_ALARM_SEVERITY_CLEAR ; + ilog ("%s %s:%s alarm clear\n", + hostname.c_str(), + alarm.alarm_id, + alarm.entity_instance_id); + + if ( reason.empty() ) + reason = "reporting an in-tolerance reading" ; + + snprintf(alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s '%s' sensor group %s", hostname.c_str(), sub_entity.c_str(), reason.c_str()); + } + else if ( state == FM_ALARM_STATE_MSG ) + { + alarm.severity = FM_ALARM_SEVERITY_CLEAR ; + + snprintf(alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s '%s' sensor group %s", hostname.c_str(), sub_entity.c_str(), reason.c_str()); + + ilog ("%s %s:%s %s\n", + hostname.c_str(), + alarm.alarm_id, + alarm.entity_instance_id, + reason.c_str()); + } + else + { + alarm.severity = FM_ALARM_SEVERITY_MAJOR ; + alarm.service_affecting = FM_FALSE; + + snprintf(alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s has one or more board management controller sensor group read failures", + hostname.c_str()); + + wlog ("%s %s: %s 'major'\n", hostname.c_str(), alarm.alarm_id, sub_entity.c_str() ); + } + break ; + } + + default: + { + slog ("%s Unsupported Alarm (%d)\n", hostname.c_str(), id ); + return (FAIL_BAD_CASE) ; + } + } + + if ( alarm.alarm_state == FM_ALARM_STATE_CLEAR ) + { + AlarmFilter filter ; + memset(&filter, 0, sizeof(filter)); + + /* Setup the alarm filter */ + snprintf (filter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.alarm_id); + snprintf (filter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.entity_instance_id); + + nodeUtil_latency_log ( hostname, NODEUTIL_LATENCY_MON_START , 0 ); + if ( ( rc = fm_clear_fault ( &filter )) != FM_ERR_OK ) + { + if ( rc != FM_ERR_ENTITY_NOT_FOUND ) + { + elog ("%s failed to fm_clear_fault (rc:%d)\n", hostname.c_str(), rc ); + rc = FAIL ; + } + } + nodeUtil_latency_log ( hostname, "fm_clear_fault - hwmon" , LATENCY_1SEC ); + } + else if ( alarm.alarm_state == FM_ALARM_STATE_SET ) + { + /* Debug Logs */ + alog ("%s Alarm Reason: %s\n", hostname.c_str(), alarm.reason_text ); + alog ("%s Alarm Action: %s\n", hostname.c_str(), alarm.proposed_repair_action ); + alog ("%s Alarm Ident : %s : %s\n", hostname.c_str(), alarm.entity_type_id, alarm.entity_instance_id ); + alog ("%s Alarm State : state:%d sev:%d type:%d cause:%d sa:%c supp:%c\n", + hostname.c_str(), + state, + alarm.severity, + alarm.alarm_type, + alarm.probable_cause, + alarm.service_affecting ? 'Y' : 'N', + alarm.suppression ? 'Y' : 'N' ); + nodeUtil_latency_log ( hostname, NODEUTIL_LATENCY_MON_START , 0 ); + rc = fm_set_fault ( &alarm , NULL ); + nodeUtil_latency_log ( hostname, "fm_set_fault - alarm - hwmon" , LATENCY_1SEC ); + + if ( rc != FM_ERR_OK ) + { + elog ("%s failed to set alarm %s (rc:%d)\n", hostname.c_str(), alarm.alarm_id, rc); + rc = FAIL ; + } + } + else if ( alarm.alarm_state == FM_ALARM_STATE_MSG ) + { + /* Debug Logs */ + alog ("%s Log Reason: %s\n", hostname.c_str(), alarm.reason_text ); + alog ("%s Log Action: %s\n", hostname.c_str(), alarm.proposed_repair_action ); + alog ("%s Log Ident : %s : %s\n", hostname.c_str(), alarm.entity_type_id, alarm.entity_instance_id ); + alog ("%s Log State : state:%d sev:%d type:%d cause:%d sa:%c supp:%c\n", + hostname.c_str(), + state, + alarm.severity, + alarm.alarm_type, + alarm.probable_cause, + alarm.service_affecting ? 'Y' : 'N', + alarm.suppression ? 'Y' : 'N' ); + + nodeUtil_latency_log ( hostname, NODEUTIL_LATENCY_MON_START , 0 ); + rc = fm_set_fault ( &alarm , NULL ); + nodeUtil_latency_log ( hostname, "fm_set_fault - log - hwmon" , LATENCY_1SEC ); + + if ( rc != FM_ERR_OK ) + { + elog ("%s failed to create customer log %s (rc:%d)\n", hostname.c_str(), alarm.alarm_id, rc); + rc = FAIL ; + } + + } + else + { + slog ("%s internal error ; unsupported alarm state (%d)\n", hostname.c_str(), state ); + rc = FAIL_BAD_CASE ; + } + return (rc); +} + +/* Clear the specified maintenance alarm for specific host */ +int hwmonAlarm_clear ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ) +{ + return(hwmon_alarm_util ( hostname, id, FM_ALARM_STATE_CLEAR, FM_ALARM_SEVERITY_CLEAR, sub_entity , reason )); +} + +/* Assert a specified mtce alarm against the specified host with a CRITICAL severity level */ +int hwmonAlarm_critical ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ) +{ + return(hwmon_alarm_util ( hostname, id, FM_ALARM_STATE_SET, FM_ALARM_SEVERITY_CRITICAL, sub_entity, reason )); +} + +/* Assert a specified mtce alarm against the specified host with a MAJOR severity level */ +int hwmonAlarm_major ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ) +{ + return(hwmon_alarm_util ( hostname, id, FM_ALARM_STATE_SET, FM_ALARM_SEVERITY_MAJOR, sub_entity, reason )); +} + +/* Assert a specified mtce alarm against the specified host with a MINOR severity level */ +int hwmonAlarm_minor ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason) +{ + return ( hwmon_alarm_util ( hostname, id, FM_ALARM_STATE_SET, FM_ALARM_SEVERITY_MINOR, sub_entity, reason )); +} + +/* Assert a specified mtce alarm against the specified host with a WARNING severity level */ +int hwmonAlarm_warning ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ) +{ + return ( hwmon_alarm_util ( hostname, id, FM_ALARM_STATE_SET, FM_ALARM_SEVERITY_WARNING, sub_entity, reason )); +} + +/* generate a customer log for the specified severity */ +int hwmonLog ( string & hostname, hwmonAlarm_id_type id, EFmAlarmSeverityT sev, string sub_entity, string reason ) +{ + return ( hwmon_alarm_util ( hostname, id, FM_ALARM_STATE_MSG, sev, sub_entity, reason )); +} + +/* generate a customer alarm that indicates the sensor on this host has recovered */ +int hwmonLog_clear ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ) +{ + return ( hwmon_alarm_util ( hostname, id, FM_ALARM_STATE_MSG, FM_ALARM_SEVERITY_CLEAR, sub_entity, reason )); +} + +/* generate customer log against the specified host and sensor with a CRITICAL severity level */ +int hwmonLog_critical ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ) +{ + return ( hwmon_alarm_util ( hostname, id, FM_ALARM_STATE_MSG, FM_ALARM_SEVERITY_CRITICAL, sub_entity, reason )); +} + +/* generate customer log against the specified host and sensor with a MAJOR severity level */ +int hwmonLog_major ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ) +{ + return ( hwmon_alarm_util ( hostname, id, FM_ALARM_STATE_MSG, FM_ALARM_SEVERITY_MAJOR, sub_entity , reason )); +} + +/* generate customer log against the specified host and sensor with a MINOR severity level */ +int hwmonLog_minor ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason) +{ + return ( hwmon_alarm_util ( hostname, id, FM_ALARM_STATE_MSG, FM_ALARM_SEVERITY_MINOR, sub_entity, reason)); +} + +/* generate customer log against the specified host and sensor with a WARNING severity level */ +int hwmonLog_warning ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ) +{ + return ( hwmon_alarm_util ( hostname, id, FM_ALARM_STATE_MSG, FM_ALARM_SEVERITY_WARNING, sub_entity, reason)); +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonAlarm.h b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonAlarm.h new file mode 100644 index 00000000..12427e55 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonAlarm.h @@ -0,0 +1,90 @@ +#ifndef __HWMONALARM_H__ +#define __HWMONALARM_H__ + +/* + * Copyright (c) 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Maintenance 'Alarm' Header + */ + +#include +#include +#include +#include +#include + +using namespace std; + +#include "alarmUtil.h" +#include "hwmon.h" +// #include "fmAPI.h" + +typedef enum +{ + HWMON_ALARM_ID__SENSOR = 0, + HWMON_ALARM_ID__SENSORCFG = 1, + HWMON_ALARM_ID__SENSORGROUP = 2, + HWMON_ALARM_ID__LAST = 3 +} hwmonAlarm_id_type ; + +typedef struct +{ + string entity ; + string instance ; + EFmAlarmSeverityT severity ; +} hwmonAlarm_entity_status_type ; + + +#define REASON_SUPPRESSED ((const char *)"is 'suppressed'") +#define REASON_UNSUPPRESSED ((const char *)"is 'unsuppressed'") +#define REASON_OK ((const char *)"is 'ok'") +#define REASON_DEGRADED ((const char *)"is 'degraded'") +#define REASON_DEPROVISIONED ((const char *)"is 'deprovisioned'") +#define REASON_IGNORED ((const char *)"is 'ignored'") +#define REASON_OOT ((const char *)"is 'out-of-tolerance'") +#define REASON_SET_TO_ALARM ((const char *)"is 'set to alarm'") +#define REASON_SET_TO_LOG ((const char *)"is 'set to log'") +#define REASON_SET_TO_POWERCYCLE ((const char *)"is 'set to powercycle'") +#define REASON_SET_TO_RESET ((const char *)"is 'set to reset'") +#define REASON_SET_TO_IGNORE ((const char *)"is 'set to ignore'") +#define REASON_RESETTING ((const char *)"is 'resetting'") +#define REASON_POWERCYCLING ((const char *)"is 'power-cycling'") +#define REASON_OFFLINE ((const char *)"is 'offline'") +#define REASON_ONLINE ((const char *)"is 'online'") + +int hwmon_alarm_util ( string & hostname, + hwmonAlarm_id_type id, + EFmAlarmStateT state, + EFmAlarmSeverityT severity, + string & sub_entity , + string reason ); + +/* Clear the specified maintenance alarm for specific host */ +int hwmonAlarm_clear ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ); +int hwmonAlarm_critical ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ); +int hwmonAlarm_major ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ); +int hwmonAlarm_minor ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ); +int hwmonAlarm_warning ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ); + +int hwmonLog_clear ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ); +int hwmonLog_critical ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ); +int hwmonLog_major ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ); +int hwmonLog_minor ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ); +int hwmonLog_warning ( string & hostname, hwmonAlarm_id_type id, string sub_entity, string reason ); + +/* generate a customer log for the specified severity */ +int hwmonLog ( string & hostname, hwmonAlarm_id_type id, EFmAlarmSeverityT sev, string sub_entity, string reason ); + +/* Utility will init the alarm and return the severity if it is currently asserted */ +EFmAlarmSeverityT hwmon_alarm_query ( string & hostname, hwmonAlarm_id_type id, string & sensorname ); + +int hwmonAlarm_query_entity ( string & hostname, string & entity, list & alarm_list ); + + +#endif /* __HWMONALARM_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonClass.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonClass.cpp new file mode 100644 index 00000000..f7587094 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonClass.cpp @@ -0,0 +1,2432 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include "nodeBase.h" +#include "tokenUtil.h" +#include "hwmonClass.h" +#include "hwmonUtil.h" +#include "hwmonIpmi.h" +#include "hwmonHttp.h" +#include "hwmonAlarm.h" +#include "hwmonGroup.h" +#include "hwmonSensor.h" +#include "hwmonThreads.h" +#include "hwmon.h" + +/**< constructor */ +hwmonHostClass::hwmonHostClass() +{ + for ( int i = 0 ; i < MAX_HOSTS ; i++ ) + host_ptrs[i] = static_cast(NULL) ; + + memory_allocs = 0 ; + memory_used = 0 ; + hwmon_head = NULL ; + hwmon_tail = NULL ; + hosts = 0 ; + host_deleted = false ; + config_reload = false ; + + return ; +} + +hwmonHostClass::~hwmonHostClass() { return ; } /**< destructor */ + +static std::string addStages_str [HWMON_ADD__STAGES +1] ; + +void hwmon_stages_init ( void ) +{ + addStages_str [HWMON_ADD__START ] = "Add-Start" ; + addStages_str [HWMON_ADD__STATES ] = "Add-States" ; + addStages_str [HWMON_ADD__WAIT ] = "Add-Wait" ; + addStages_str [HWMON_ADD__DONE ] = "Add-Done" ; +} + +/** Host add handler Stage Change member function */ +int hwmonHostClass::addStageChange ( struct hwmonHostClass::hwmon_host * ptr, + hwmon_addStages_enum newStage ) +{ + if (( newStage < HWMON_ADD__STAGES ) && + ( ptr->addStage < HWMON_ADD__STAGES )) + { + clog ("%s %s -> %s (%d->%d)\n", + &ptr->hostname[0], + addStages_str[ptr->addStage].c_str(), + addStages_str[newStage].c_str(), + ptr->addStage, newStage); + + ptr->addStage = newStage ; + + return (PASS); + } + else + { + slog ("%s Invalid Stage (now:%d new:%d)\n", + ptr->hostname.c_str(), + ptr->addStage, + newStage ); + + ptr->addStage = HWMON_ADD__DONE ; + + return (FAIL); + } +} + +/* Initialize bmc data for ipmi mode monitoring */ +void hwmonHostClass::ipmi_bmc_data_init ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + host_ptr->ping_info.timer_handler = &hwmonTimer_handler ; + host_ptr->accessible = false; + host_ptr->degraded = false ; + + hwmon_del_groups ( host_ptr ); + hwmon_del_sensors ( host_ptr ); + + /* force the add handler to run */ + host_ptr->addStage = HWMON_ADD__START; + + host_ptr->sensor_query_count = 0 ; +} + +/* + * Allocate new host and tack it on the end of the host_list + */ +struct hwmonHostClass::hwmon_host* hwmonHostClass::addHost( string hostname ) +{ + /* verify host is not already provisioned */ + struct hwmon_host * ptr = hwmonHostClass::getHost ( hostname ); + if ( ptr ) + { + if ( hwmonHostClass::remHost ( hostname ) ) + { + /* Should never get here but if we do then */ + /* something is seriously wrong */ + elog ("Error: Unable to remove host during reprovision\n"); + return static_cast(NULL); + } + } + + /* allocate memory for new host */ + ptr = hwmonHostClass::newHost (); + if( ptr == NULL ) + { + elog ( "Error: Failed to allocate memory for new host\n" ); + return static_cast(NULL); + } + + /* Init the new host */ + ptr->hostname = hostname ; + ptr->host_delete = false ; + ptr->poweron = false ; + ptr->retries = 0 ; + ptr->delStage = HWMON_DEL__START ; + + ptr->ping_info.timer_handler = &hwmonTimer_handler ; + mtcTimer_init ( ptr->hostTimer, ptr->hostname, "host timer" ); + mtcTimer_init ( ptr->addTimer, ptr->hostname, "add timer" ); + mtcTimer_init ( ptr->relearnTimer, ptr->hostname, "relearn timer" ); + + mtcTimer_init ( ptr->ping_info.timer, ptr->hostname, "ping monitor timer" ); + mtcTimer_init ( ptr->monitor_ctrl.timer, ptr->hostname, "sensor monitor timer") ; + + ptr->groups = 0 ; + ptr->sensors = 0 ; + ptr->samples = 0 ; + + /* http event pre-init + * PATCHBACK - consider patchback to REL3 and earlier */ + ptr->event.base = NULL ; + ptr->event.conn = NULL ; + ptr->event.req = NULL ; + ptr->event.buf = NULL ; + + /* If the host list is empty add it to the head */ + if( hwmon_head == NULL ) + { + hwmon_head = ptr ; + hwmon_tail = ptr ; + ptr->prev = NULL ; + ptr->next = NULL ; + } + else + { + /* link the new_host to the tail of the host_list + * then mark the next field as the end of the host_list + * adjust tail to point to the last host + */ + hwmon_tail->next = ptr ; + ptr->prev = hwmon_tail ; + ptr->next = NULL ; + hwmon_tail = ptr ; + } + + /* Default to not monitoring */ + ptr->monitor = false ; + ptr->bm_provisioned = false ; + ptr->alarmed = false ; + ptr->alarmed_config = false ; + ptr->degraded = false ; + + hosts++ ; + dlog2 ("Added hwmonHostClass host instance %d\n", hosts); + return ptr ; +} + +void hwmonHostClass::free_host_timers ( struct hwmon_host * ptr ) +{ + mtcTimer_fini ( ptr->hostTimer ); + mtcTimer_fini ( ptr->addTimer ); + mtcTimer_fini ( ptr->relearnTimer ); + mtcTimer_fini ( ptr->ping_info.timer ); + + mtcTimer_fini ( ptr->monitor_ctrl.timer ); + mtcTimer_fini ( ptr->ipmitool_thread_ctrl.timer ); +} + +/* Remove a hist from the linked list of hosts - may require splice action */ +int hwmonHostClass::remHost( string hostname ) +{ + if ( hostname.c_str() == NULL ) + return -ENODEV ; + + if ( hwmon_head == NULL ) + return -ENXIO ; + + struct hwmon_host * ptr = hwmonHostClass::getHost ( hostname ); + + if ( ptr == NULL ) + return -EFAULT ; + + free_host_timers ( ptr ); + + /* If the host is the head host */ + if ( ptr == hwmon_head ) + { + /* only one host in the list case */ + if ( hwmon_head == hwmon_tail ) + { + dlog2 ("Single Host -> Head Case\n"); + hwmon_head = NULL ; + hwmon_tail = NULL ; + } + else + { + dlog2 ("Multiple Hosts -> Head Case\n"); + hwmon_head = hwmon_head->next ; + hwmon_head->prev = NULL ; + } + } + /* if not head but tail then there must be more than one + * host in the list so go ahead and chop the tail. + */ + else if ( ptr == hwmon_tail ) + { + dlog2 ("Multiple Host -> Tail Case\n"); + hwmon_tail = hwmon_tail->prev ; + hwmon_tail->next = NULL ; + } + else + { + dlog2 ("Multiple Host -> Full Splice Out\n"); + ptr->prev->next = ptr->next ; + ptr->next->prev = ptr->prev ; + } + + hwmonHostClass::delHost ( ptr ); + hosts-- ; + return (PASS) ; +} + + +struct hwmonHostClass::hwmon_host* hwmonHostClass::getHost ( string hostname ) +{ + /* check for empty list condition */ + if ( hwmon_head == NULL ) + return NULL ; + + for ( struct hwmon_host * ptr = hwmon_head ; ; ptr = ptr->next ) + { + if ( !hostname.compare ( ptr->hostname )) + { + // dlog2 ("Fetched hwmonHostClass host instance %s\n", ptr->hostname.c_str()); + return ptr ; + } + + if (( ptr->next == NULL ) || ( ptr == hwmon_tail )) + break ; + } + return static_cast(NULL); +} + +/* + * Allocates memory for a new host and stores its the address in host_ptrs + * + * @param void + * @return pointer to the newly allocted host memory + */ +struct hwmonHostClass::hwmon_host * hwmonHostClass::newHost ( void ) +{ + struct hwmonHostClass::hwmon_host * temp_host_ptr = NULL ; + + if ( memory_allocs == 0 ) + { + memset ( host_ptrs, 0 , sizeof(struct hwmon_host *)*MAX_HOSTS); + } + + // find an empty spot + for ( int i = 0 ; i < MAX_HOSTS ; i++ ) + { + if ( host_ptrs[i] == NULL ) + { + host_ptrs[i] = temp_host_ptr = new hwmon_host ; + memory_allocs++ ; + memory_used += sizeof (struct hwmonHostClass::hwmon_host); + + return temp_host_ptr ; + } + } + elog ( "Failed to save new host pointer address\n" ); + return temp_host_ptr ; +} + +void hwmonHostClass::degrade_state_audit ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + bool found ; + string sensorname ; + int s ; + + /* manage degrade state */ + for ( s = 0 , sensorname.clear() , found = false ; s < host_ptr->sensors ; s++ ) + { + if ( host_ptr->sensor[s].degraded == true ) + { + sensorname = host_ptr->sensor[s].sensorname ; + + /* do some auto correction of degrade */ + if ( is_alarmed ( &host_ptr->sensor[s] ) == false ) + { + slog ("%s %s is degraded but not alarmed ; correcting by removing degrade\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[s].sensorname.c_str()); + host_ptr->sensor[s].degraded = false ; + } + else + { + found = true ; + break ; + } + } + } + + if ( found == true ) + { + hwmon_send_event ( host_ptr->hostname, MTC_DEGRADE_RAISE , sensorname.data() ); + wlog_throttled (host_ptr->degrade_audit_log_throttle, 20, "%s degraded ... due to '%s' sensor\n", host_ptr->hostname.c_str(), sensorname.c_str()); + } + else if ( host_ptr->degraded == true ) + { + hwmon_send_event ( host_ptr->hostname, MTC_DEGRADE_RAISE , sensorname.data()); + wlog_throttled (host_ptr->degrade_audit_log_throttle, 20, "%s degraded ... due to 'hwmon' config error\n", host_ptr->hostname.c_str()); + } + else + { + dlog ("%s available\n", host_ptr->hostname.c_str()); + hwmon_send_event ( host_ptr->hostname, MTC_DEGRADE_CLEAR, "sensors" ); + host_ptr->degrade_audit_log_throttle = 0 ; + } + +#ifdef WANT_FIT_TESTING + + if (daemon_want_fit(FIT_CODE__HWMON__CORRUPT_TOKEN)) + { + tokenUtil_fail_token (); + if ( host_ptr->event.active == false ) + { + hwmonHttp_load_sensors ( host_ptr->hostname, host_ptr->event ); + } + else + { + slog ("%s FIT skipping hwmonHttp_load_sensors failure trigger due to in-progress event\n", + host_ptr->hostname.c_str()); + daemon_hits_fit (1); + } + } + + if ( host_ptr->bm_provisioned == true ) + { + /* FIT Support for creating orphan sensor or group alarm */ + if ( daemon_want_fit ( FIT_CODE__HWMON__CREATE_ORPHAN_GROUP_ALARM, host_ptr->hostname )) + { + string orphan = "orphan_group_" + itos((rand()%1000)) ; + hwmonAlarm_major ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, orphan, REASON_DEGRADED ); + } + if ( daemon_want_fit ( FIT_CODE__HWMON__CREATE_ORPHAN_SENSOR_ALARM, host_ptr->hostname )) + { + string orphan = "orphan_sensor_" + itos((rand()%1000)) ; + hwmonAlarm_major ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, orphan, REASON_DEGRADED ); + } + + /* FIT Support for forcing raise or clear of any Group or Sensor Alarm in FM */ + /* FIT Support for forcing state or status of any Group or Sensor Alarm in the database */ + for ( int g = 0 ; g < host_ptr->groups ; g++ ) + { + string sev ; + if ( daemon_want_fit ( FIT_CODE__HWMON__RAISE_GROUP_ALARM, host_ptr->hostname, host_ptr->group[g].group_name, sev )) + { + hwmon_alarm_util ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, FM_ALARM_STATE_SET, alarmUtil_getSev_enum(sev), host_ptr->group[g].group_name, REASON_DEGRADED ); + break ; + } + if ( daemon_want_fit ( FIT_CODE__HWMON__CLEAR_GROUP_ALARM, host_ptr->hostname, host_ptr->group[g].group_name )) + { + hwmon_alarm_util ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, FM_ALARM_STATE_CLEAR, FM_ALARM_SEVERITY_CLEAR, host_ptr->group[g].group_name, REASON_OK ); + break ; + } + if ( daemon_want_fit ( FIT_CODE__HWMON__SET_DB_GROUP_STATE, host_ptr->hostname, host_ptr->group[g].group_name, sev )) + { + hwmonHttp_mod_group ( host_ptr->hostname, host_ptr->event , host_ptr->group[g].group_uuid, "state", sev ); + break ; + } + if ( daemon_want_fit ( FIT_CODE__HWMON__SET_DB_GROUP_STATUS, host_ptr->hostname, host_ptr->group[g].group_name, sev )) + { + hwmonHttp_mod_group ( host_ptr->hostname, host_ptr->event , host_ptr->group[g].group_uuid, "status", sev ); + break ; + } + } + + for ( int s = 0 ; s < host_ptr->sensors ; s++ ) + { + string sev ; + if ( daemon_want_fit ( FIT_CODE__HWMON__RAISE_SENSOR_ALARM, host_ptr->hostname, host_ptr->sensor[s].sensorname, sev )) + { + hwmon_alarm_util ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, FM_ALARM_STATE_SET, alarmUtil_getSev_enum(sev), host_ptr->sensor[s].sensorname, REASON_DEGRADED ); + break ; + } + if ( daemon_want_fit ( FIT_CODE__HWMON__CLEAR_SENSOR_ALARM, host_ptr->hostname, host_ptr->sensor[s].sensorname )) + { + hwmon_alarm_util ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, FM_ALARM_STATE_CLEAR, FM_ALARM_SEVERITY_CLEAR, host_ptr->sensor[s].sensorname, REASON_OK ); + break ; + } + if ( daemon_want_fit ( FIT_CODE__HWMON__SET_DB_SENSOR_STATE, host_ptr->hostname, host_ptr->sensor[s].sensorname, sev )) + { + hwmonHttp_mod_sensor ( host_ptr->hostname, host_ptr->event , host_ptr->sensor[s].uuid, "state", sev ); + break ; + } + if ( daemon_want_fit ( FIT_CODE__HWMON__SET_DB_SENSOR_STATUS, host_ptr->hostname, host_ptr->sensor[s].sensorname, sev )) + { + hwmonHttp_mod_sensor ( host_ptr->hostname, host_ptr->event , host_ptr->sensor[s].uuid, "status", sev ); + break ; + } + } + } + +#endif + +} + +/* Frees the memory of a pre-allocated host and removes + * it from the host_ptrs list + * @param host * pointer to the host memory address to be freed + * @return int return code { PASS or -EINVAL } + */ +int hwmonHostClass::delHost ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + if ( hwmonHostClass::memory_allocs > 0 ) + { + for ( int i = 0 ; i < MAX_NODES ; i++ ) + { + if ( hwmonHostClass::host_ptrs[i] == host_ptr ) + { + delete host_ptr ; + hwmonHostClass::host_ptrs[i] = NULL ; + hwmonHostClass::memory_allocs-- ; + hwmonHostClass::memory_used -= sizeof (struct hwmonHostClass::hwmon_host); + return PASS ; + } + } + elog ( "Error: Unable to validate memory address being freed\n" ); + } + else + elog ( "Error: Free memory called when there is no memory to free\n" ); + + return -EINVAL ; +} + +void hwmonHostClass::clear_bm_assertions ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + /* Loop over all sensors and groups + * - clear any outstanding alarms + * - clear degrade of host + * ... while we deprovision the BMC */ + for ( int i = 0 ; i < host_ptr->sensors ; i++ ) + { + if ( host_ptr->sensor[i].alarmed == true ) + { + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, host_ptr->sensor[i].sensorname, REASON_DEPROVISIONED ); + host_ptr->sensor[i].alarmed = false ; + host_ptr->sensor[i].degraded = false ; + } + } + + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, host_ptr->group[g].group_name, REASON_DEPROVISIONED ); + } + + /* send the degrade anyway , just to be safe */ + hwmon_send_event ( host_ptr->hostname, MTC_DEGRADE_CLEAR , "sensors" ); + + /* Bug Fix: This was outside the if bm_provisioned clause causing it + * to be called even if the bmc was not already provisioned + */ + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "sensors", REASON_DEPROVISIONED ); +} + + + + +int hwmonHostClass::set_bm_prov ( struct hwmonHostClass::hwmon_host * host_ptr, bool state ) +{ + int rc = FAIL_HOSTNAME_LOOKUP ; + if ( host_ptr ) + { + rc = PASS ; + + /* Clear the alarm if we are starting fresh from an unprovisioned state */ + if (( host_ptr->bm_provisioned == false ) && ( state == true )) + { + ilog ("%s board management controller is being provisioned\n", host_ptr->hostname.c_str()); + ilog ("%s setting up ping socket\n", host_ptr->hostname.c_str() ); + + /* --------------------------------------- + * Init bmc data based on monitoring mode + * ---------------------------------------*/ + + mtcTimer_reset ( host_ptr->ping_info.timer ) ; + host_ptr->ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ; + host_ptr->ping_info.ip = host_ptr->bm_ip ; + host_ptr->ping_info.hostname = host_ptr->hostname ; + ipmi_bmc_data_init ( host_ptr ); + host_ptr->thread_extra_info.bm_pw = host_ptr->bm_pw = get_bm_password (hostBase.get_uuid(host_ptr->hostname).data()); + host_ptr->thread_extra_info.bm_ip = host_ptr->bm_ip ; + host_ptr->thread_extra_info.bm_un = host_ptr->bm_un ; + } + /* handle the case going from provisioned to not provisioned */ + if (( host_ptr->bm_provisioned == true ) && ( state == false )) + { + ilog ("%s board management controller is being deprovisioned\n", host_ptr->hostname.c_str()); + clear_bm_assertions ( host_ptr ); + pingUtil_fini ( host_ptr->ping_info ); + ipmi_bmc_data_init ( host_ptr ); + } + host_ptr->bm_provisioned = state ; + } + return (rc); +} + +int hwmonHostClass::mod_host ( node_inv_type & inv ) +{ + int rc = FAIL ; + struct hwmonHostClass::hwmon_host * host_ptr = static_cast(NULL); + + if (( inv.name.empty()) || + ( !inv.name.compare (NONE)) || + ( !inv.name.compare ("None"))) + { + wlog ("Refusing to add host with 'null' or 'invalid' hostname (%s)\n", + inv.uuid.c_str()); + return (FAIL_INVALID_HOSTNAME) ; + } + + host_ptr = hwmonHostClass::getHost(inv.name); + if ( host_ptr ) + { + rc = PASS ; + bool modify_bm = false ; + + if ( host_ptr->bm_ip.compare( inv.bm_ip ) ) + { + ilog ("%s modify board management 'ip' from '%s' to '%s'\n", + inv.name.c_str(), + host_ptr->bm_ip.c_str(), + inv.bm_ip.c_str()); + + host_ptr->bm_ip = inv.bm_ip ; + + modify_bm = true ; + } + + if ( host_ptr->bm_un.compare( inv.bm_un ) ) + { + ilog ("%s modify board management 'username' from '%s' to '%s'\n", + inv.name.c_str(), + host_ptr->bm_un.c_str(), + inv.bm_un.c_str()); + + host_ptr->bm_un = inv.bm_un ; + + modify_bm = true ; + } + + if ( host_ptr->bm_type.compare( inv.bm_type ) ) + { + ilog ("%s modify board management 'type' from '%s' to '%s'\n", + inv.name.c_str(), + host_ptr->bm_type.c_str(), + inv.bm_type.c_str()); + + host_ptr->bm_type = inv.bm_type ; + + modify_bm = true ; + } + + if ( modify_bm == true ) + { + ilog ("%s modify summary %s %s@%s ... provisioned = %s\n", + inv.name.c_str(), + host_ptr->bm_type.c_str(), + host_ptr->bm_un.c_str(), + host_ptr->bm_ip.c_str(), + host_ptr->bm_provisioned ? "Yes" : "No" ); + if ( host_ptr->bm_provisioned == true ) + { + /* if we have a credentials only change then disable the sensor + * model only to get re-enabled if sensor monitoring is + * successful with the new credentils */ + if (( hostUtil_is_valid_bm_type (host_ptr->bm_type) == true ) && + ( host_ptr->bm_un.compare(NONE))) + { + ipmi_set_group_state ( host_ptr, "disabled" ); + ipmi_disable_sensors ( host_ptr ); + } + rc = set_bm_prov ( host_ptr, false ); + } + + if (( hostUtil_is_valid_bm_type (host_ptr->bm_type) == true ) && + ( hostUtil_is_valid_ip_addr (host_ptr->bm_ip) == true ) && + !host_ptr->bm_un.empty()) + { + rc = set_bm_prov ( host_ptr, true ); + } + } + else + { + /* Only reprovision if the provisioning data has changed */ + dlog ("%s bmc provisioning unchanged\n", host_ptr->hostname.c_str()); + } + } + else + { + elog ("%s board management info modify failed\n", inv.name.c_str()); + rc = FAIL_NULL_POINTER ; + } + return (rc); +} + +void hwmonHostClass::set_degrade_audit ( void ) +{ + struct hwmon_host * ptr = hwmon_head ; + for ( int i = 0 ; i < hosts ; i++ ) + { + ptr->want_degrade_audit = true ; + ptr = ptr->next ; + if ( ptr == NULL ) + break ; + } +} + + +int hwmonHostClass::add_host ( node_inv_type & inv ) +{ + int rc = FAIL ; + struct hwmonHostClass::hwmon_host * host_ptr = static_cast(NULL); + + if (( inv.name.empty()) || + ( !inv.name.compare (NONE)) || + ( !inv.name.compare ("None"))) + { + wlog ("Refusing to add host with 'null' or 'invalid' hostname (%s)\n", + inv.uuid.c_str()); + return (FAIL_INVALID_HOSTNAME) ; + } + + rc = hostBase.add_host ( inv ); + if ( rc > RETRY ) + { + elog ("Error\n"); + } + + host_ptr = hwmonHostClass::getHost(inv.name); + if ( host_ptr ) + { + if ( host_ptr->host_delete == true ) + { + ilog ("%s cannot be added while previous delete is still in progress\n", host_ptr->hostname.c_str()); + return (FAIL_OPERATION); + } + dlog ("%s already provisioned\n", host_ptr->hostname.c_str()); + + /* Send back a retry in case the add needs to be converted to a modify */ + return (RETRY); + } + /* Otherwise add it as a new host */ + else + { + host_ptr = hwmonHostClass::addHost(inv.name); + if ( host_ptr ) + { + /* Add board management stuff */ + host_ptr->bm_ip = inv.bm_ip ; + host_ptr->bm_un = inv.bm_un ; + host_ptr->bm_type = inv.bm_type ; + + /* default the socket number to closed */ + host_ptr->ping_info.sock = 0 ; + + host_ptr->quanta_server= false ; + + ipmi_bmc_data_init ( host_ptr ); + + /* Default audit interval to zero - disable sensor monitoring by default */ + host_ptr->interval = 0 ; + host_ptr->interval_old = 0 ; + host_ptr->interval_changed = false ; + host_ptr->accounting_ok = false ; + host_ptr->accounting_bad_count = 0 ; + + /* Additions for sensor monitoring using IPMI protocol */ + host_ptr->want_degrade_audit = false ; + host_ptr->degrade_audit_log_throttle = 0 ; + host_ptr->json_ipmi_sensors.clear(); + + /* Sensor Monitoring Control Structure */ + host_ptr->monitor_ctrl.stage = HWMON_SENSOR_MONITOR__START ; + host_ptr->monitor_ctrl.last_sample_time = 0 ; + host_ptr->monitor_ctrl.this_sample_time = 0 ; + host_ptr->sensor_query_count = 0 ; + + /* Sensor Monitoring Thread 'Extra Request Information' */ + host_ptr->thread_extra_info.bm_ip = host_ptr->bm_ip ; + host_ptr->thread_extra_info.bm_un = host_ptr->bm_un ; + host_ptr->thread_extra_info.bm_pw.clear() ; + host_ptr->thread_extra_info.sensor_query_request = IPMITOOL_PATH_AND_FILENAME ; + + /* Sensor Monitoring Thread Initialization */ + thread_init ( host_ptr->ipmitool_thread_ctrl, + host_ptr->ipmitool_thread_info, + &host_ptr->thread_extra_info, + hwmonThread_ipmitool, + DEFAULT_THREAD_TIMEOUT_SECS, + host_ptr->hostname, + THREAD_NAME__IPMITOOL); + + /* TODO: create a is_bm_info_valid */ + if ( ( hostUtil_is_valid_ip_addr (host_ptr->bm_ip) == true ) && + ( hostUtil_is_valid_bm_type (host_ptr->bm_type) == true ) && + ( !host_ptr->bm_un.empty() ) && + ( host_ptr->bm_un.compare(NONE)) ) + { + set_bm_prov ( host_ptr, true ); + } + else + { + set_bm_prov ( host_ptr, false ); + } + ilog ("%s BMC is %sprovisioned\n", host_ptr->hostname.c_str(), host_ptr->bm_provisioned ? "" : "not " ); + + host_ptr->bmc_fw_version.clear(); + + host_ptr->group_index = 0 ; + + /* Init sensor model relearn controls, state and status */ + host_ptr->relearn = false ; + host_ptr->relearn_request = false ; + host_ptr->relearn_retry_counter = 0 ; + host_ptr->relearn_done_date.clear(); + init_model_attributes ( host_ptr->model_attributes_preserved ); + + /* Add to the end of inventory */ + hostlist.push_back ( host_ptr->hostname ); + + rc = PASS ; + dlog ("%s running add FSM\n", inv.name.c_str()); + } + else + { + elog ("%s host service add failed\n", inv.name.c_str()); + rc = FAIL_NULL_POINTER ; + } + } + return (rc); +} + +int hwmonHostClass::rem_host ( string hostname ) +{ + int rc = FAIL ; + if ( ! hostname.empty() ) + { + /* Remove the hostBase */ + rc = hostBase.rem_host ( hostname ); + if ( rc == PASS ) + { + rc = hwmonHostClass::remHost ( hostname ); + } + else + { + hwmonHostClass::remHost ( hostname ); + slog ("potential memory leak !\n"); + } + + /* Now remove the service specific component */ + hostlist.remove ( hostname ); + } + return ( rc ); +} + +int hwmonHostClass::request_del_host ( string hostname ) +{ + int rc = FAIL_DEL_UNKNOWN ; + hwmonHostClass::hwmon_host * host_ptr = hwmonHostClass::getHost( hostname ); + if ( host_ptr ) + { + if ( host_ptr->host_delete == true ) + { + ilog ("%s delete already in progress\n", hostname.c_str()); + } + else + { + host_ptr->delStage = HWMON_DEL__START ; + host_ptr->host_delete = true ; + } + rc = PASS ; + } + else + { + wlog ("Unknown hostname: %s\n", hostname.c_str()); + } + return (rc); +} + +int hwmonHostClass::del_host ( string hostname ) +{ + int rc = FAIL_DEL_UNKNOWN ; + hwmonHostClass::hwmon_host * hwmon_host_ptr = hwmonHostClass::getHost( hostname ); + if ( hwmon_host_ptr ) + { + rc = rem_host ( hostname ); + if ( rc == PASS ) + { + ilog ("%s deleted\n", hostname.c_str()); + print_node_info(); + } + else + { + elog ("%s delete host failed (rc:%d)\n", hostname.c_str(), rc ); + } + } + else + { + wlog ("Unknown hostname: %s\n", hostname.c_str()); + } + return (rc); +} + +int hwmonHostClass::mon_host ( string hostname, bool monitor ) +{ + int rc = FAIL_UNKNOWN_HOSTNAME ; + hwmonHostClass::hwmon_host * hwmon_host_ptr = hwmonHostClass::getHost( hostname ); + if ( hwmon_host_ptr ) + { + bool change = false ; + string want_state = "" ; + + if ( monitor == true ) + want_state = "enabled" ; + else + want_state = "disabled" ; + + /* if not provisioned then just return */ + if ( hwmon_host_ptr->bm_provisioned == false ) + { + dlog ("%s ignoring monitor '%s' request for unprovisioned bmc\n", + hostname.c_str(), want_state.c_str()); + return (PASS); + } + + else if ( hwmon_host_ptr->host_delete == true ) + { + dlog ("%s ignoring monitor '%s' request while delete is pending\n", + hostname.c_str(), want_state.c_str() ); + return (PASS); + } + + if (( monitor == false ) && + ( hwmon_host_ptr->monitor != monitor ) && + ( hwmon_host_ptr->bm_provisioned == true )) + { + clear_bm_assertions ( hwmon_host_ptr ); + } + + if ( hwmon_host_ptr->monitor == monitor ) + { + dlog ("%s sensor monitoring already %s\n", hwmon_host_ptr->hostname.c_str(), monitor ? "enabled" : "disabled" ); + + /* if any group is not in the correct enabled state then set change bool */ + for ( int g = 0 ; g < hwmon_host_ptr->groups ; ++g ) + { + if ( hwmon_host_ptr->group[g].group_state.compare(want_state) ) + { + change = true ; + } + } + } + else + { + ilog ("%s sensor monitoring set to %s\n", hwmon_host_ptr->hostname.c_str(), monitor ? "enabled" : "disabled" ); + change = true ; + hwmon_host_ptr->monitor = monitor ; + } + + if ( change == true ) + { + if ( monitor == false ) + { + /* sets all groups state to disable if monitor is false ; handle state change failure alarming internally */ + rc = ipmi_set_group_state ( hwmon_host_ptr, "disabled" ); + } + else if ( hwmon_host_ptr->group[0].group_state.compare("disabled") == 0 ) + { + /* or to enabled if presently disabled - don't change from failed to enabled over a monitor start */ + rc = ipmi_set_group_state ( hwmon_host_ptr, "enabled" ); + } + } + } + else + { + dlog ("Unknown hostname: %s\n", hostname.c_str()); + } + return (rc); +} + +/****************************************************************************/ +/** Host Class Setter / Getters */ +/****************************************************************************/ + +bool hwmonHostClass::is_bm_provisioned ( string hostname ) +{ + hwmonHostClass::hwmon_host * hwmon_host_ptr ; + hwmon_host_ptr = hwmonHostClass::getHost ( hostname ); + if ( hwmon_host_ptr != NULL ) + { + return (hwmon_host_ptr->bm_provisioned); + } + elog ("%s lookup failed\n", hostname.c_str() ); + return (false); +} + +/** Get this hosts board management IP address */ +string hwmonHostClass::get_bm_ip ( string hostname ) +{ + hwmonHostClass::hwmon_host * hwmon_host_ptr ; + hwmon_host_ptr = hwmonHostClass::getHost ( hostname ); + if ( hwmon_host_ptr != NULL ) + { + if ( hostUtil_is_valid_ip_addr (hwmon_host_ptr->bm_ip) == false ) + { + return (NONE); + } + else + { + return (hwmon_host_ptr->bm_ip); + } + } + elog ("%s bm ip lookup failed\n", hostname.c_str() ); + return (""); +} + +/** Get this hosts board management TYPE ilo3/ilo4/quanta/etc */ +string hwmonHostClass::get_bm_type ( string hostname ) +{ + hwmonHostClass::hwmon_host * hwmon_host_ptr ; + hwmon_host_ptr = hwmonHostClass::getHost ( hostname ); + if ( hwmon_host_ptr != NULL ) + { + return (hwmon_host_ptr->bm_type); + } + elog ("%s bm type lookup failed\n", hostname.c_str() ); + return (""); +} + +/** Get this hosts board management user name */ +string hwmonHostClass::get_bm_un ( string hostname ) +{ + hwmonHostClass::hwmon_host * hwmon_host_ptr ; + hwmon_host_ptr = hwmonHostClass::getHost ( hostname ); + if ( hwmon_host_ptr != NULL ) + { + if ( hwmon_host_ptr->bm_un.empty() ) + { + return (NONE); + } + else + { + return (hwmon_host_ptr->bm_un); + } + } + elog ("%s bm username lookup failed\n", hostname.c_str() ); + return (""); +} + + + +string hwmonHostClass::get_relearn_done_date ( string hostname ) +{ + hwmonHostClass::hwmon_host * hwmon_host_ptr ; + hwmon_host_ptr = hwmonHostClass::getHost ( hostname ); + if ( hwmon_host_ptr != NULL ) + { + if ( !hwmon_host_ptr->relearn_done_date.empty()) + { + return (hwmon_host_ptr->relearn_done_date); + } + } + elog ("%s relearn done date empty or hostname lookup failed\n", hostname.c_str()); + return (pt()); +} + + +struct hwmonHostClass::hwmon_host * hwmonHostClass::getHost_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if (( hwmon_head ) && ( tid )) + { + for ( struct hwmon_host * host_ptr = hwmon_head ; ; host_ptr = host_ptr->next ) + { + if ( host_ptr->ipmitool_thread_ctrl.timer.tid == tid ) + { + return host_ptr ; + } + if ( host_ptr->hostTimer.tid == tid ) + { + return host_ptr ; + } + if ( host_ptr->ping_info.timer.tid == tid ) + { + return host_ptr ; + } + if ( host_ptr->monitor_ctrl.timer.tid == tid ) + { + return host_ptr ; + } + if ( host_ptr->addTimer.tid == tid ) + { + return host_ptr ; + } + if ( host_ptr->relearnTimer.tid == tid ) + { + return host_ptr ; + } + + if (( host_ptr->next == NULL ) || ( host_ptr == hwmon_tail )) + break ; + } + } + return static_cast(NULL); +} + +/********************************************************************************** + * + * Name : get_sensor + * + * Description : Update the supplied pointer with the host sensor + * that matches the supplied sensor name. + * + * Updates : sensor_ptr is set if found, otherwise a NULL is returned + * + **********************************************************************************/ +sensor_type * hwmonHostClass::get_sensor ( string hostname, string entity_path ) +{ + int rc = FAIL_NOT_FOUND ; + + if ( entity_path.empty() ) + rc = FAIL_STRING_EMPTY ; + else + { + hwmonHostClass::hwmon_host * host_ptr ; + host_ptr = hwmonHostClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + for ( int i = 0 ; i < host_ptr->sensors ; i++ ) + { + if ( !entity_path.compare(host_ptr->sensor[i].sensorname)) + { + blog ("%s '%s' sensor found\n", + hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str()); + + return (&host_ptr->sensor[i]) ; + } + } + } + } + if ( rc == FAIL_NOT_FOUND ) + { + wlog ("%s '%s' entity path not found\n", hostname.c_str() , entity_path.c_str()); + } + else if ( rc ) + { + elog ("%s sensor entity path query failed\n", hostname.c_str() ); + } + return (static_cast(NULL)); +} + +int hwmonHostClass::add_sensor ( string hostname, sensor_type & sensor ) +{ + int rc = PASS ; + + if ( sensor.sensorname.empty() ) + return (FAIL_STRING_EMPTY); + else + { + hwmonHostClass::hwmon_host * host_ptr ; + host_ptr = hwmonHostClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + int i ; + bool found = false ; + for ( i = 0 ; i < host_ptr->sensors ; i++ ) + { + if ( !sensor.entity_path.compare(host_ptr->sensor[i].sensorname)) + { + found = true ; + break ; + } + } + if ( i >= MAX_HOST_SENSORS ) + { + rc = FAIL ; + } + else + { + /* PATCHBACK - to REL3 and earlier + * This init should have been initialized here all along */ + hwmonSensor_init ( hostname, &host_ptr->sensor[i] ); + + host_ptr->sensor[i].sensorname = sensor.sensorname ; /* for fresh add case */ + host_ptr->sensor[i].sensortype = sensor.sensortype ; + host_ptr->sensor[i].script = sensor.script ; + host_ptr->sensor[i].uuid = sensor.uuid ; + host_ptr->sensor[i].datatype = sensor.datatype ; + host_ptr->sensor[i].group_uuid = sensor.group_uuid; + host_ptr->sensor[i].host_uuid = sensor.host_uuid ; + host_ptr->sensor[i].algorithm = sensor.algorithm ; + host_ptr->sensor[i].group_uuid = sensor.group_uuid; + host_ptr->sensor[i].status = sensor.status ; + host_ptr->sensor[i].state = sensor.state ; + host_ptr->sensor[i].prot = sensor.prot ; + host_ptr->sensor[i].kind = sensor.kind ; + host_ptr->sensor[i].unit = sensor.unit ; + host_ptr->sensor[i].suppress = sensor.suppress ; + host_ptr->sensor[i].path = sensor.path ; + + if ( sensor.path.empty() ) + { + host_ptr->sensor[i].entity_path = sensor.sensorname ; + } + else + { + host_ptr->sensor[i].entity_path = sensor.path ; + host_ptr->sensor[i].entity_path.append(ENTITY_DELIMITER); + host_ptr->sensor[i].entity_path.append(sensor.sensorname); + } + + host_ptr->sensor[i].unit_base = sensor.unit_base ; + host_ptr->sensor[i].unit_rate = sensor.unit_rate ; + host_ptr->sensor[i].unit_modifier = sensor.unit_modifier ; + + host_ptr->sensor[i].actions_minor = sensor.actions_minor ; + host_ptr->sensor[i].actions_major = sensor.actions_major ; + host_ptr->sensor[i].actions_critl = sensor.actions_critl ; + + host_ptr->sensor[i].t_critical_lower = sensor.t_critical_lower ; + host_ptr->sensor[i].t_major_lower = sensor.t_major_lower ; + host_ptr->sensor[i].t_minor_lower = sensor.t_minor_lower ; + host_ptr->sensor[i].t_minor_upper = sensor.t_minor_upper ; + host_ptr->sensor[i].t_major_upper = sensor.t_major_upper ; + host_ptr->sensor[i].t_critical_upper = sensor.t_critical_upper ; + + if ( found == false ) + host_ptr->sensors++ ; + } + } + } + + if ( rc ) + { + elog ("%s '%s' sensor add failed\n", hostname.c_str() , + sensor.sensorname.c_str()); + } + return (rc); +} + +/**************************************************************************** + * + * Name: hwmon_get_sensorgroup + * + * Description: Returns a pointer to the sensor group that matches the supplied + * entity path. + * + ****************************************************************************/ +struct sensor_group_type * hwmonHostClass::hwmon_get_sensorgroup ( string hostname, string entity_path ) +{ + int rc = FAIL_NOT_FOUND ; + + if ( ( !entity_path.empty() ) && ( !hostname.empty()) ) + { + hwmonHostClass::hwmon_host * host_ptr ; + host_ptr = hwmonHostClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + for ( int g = 0 ; g < host_ptr->groups ; g++ ) + { + /* look for the sensor in the group */ + for ( int s = 0 ; s < host_ptr->group[g].sensors ; s++ ) + { + if ( !host_ptr->group[g].sensor_ptr[s]->sensorname.compare(entity_path) ) + { + blog ("%s '%s' sensor found in '%s' group\n", + hostname.c_str(), + host_ptr->group[g].sensor_ptr[s]->sensorname.c_str(), + host_ptr->group[g].group_name.c_str()); + + return (&host_ptr->group[g]); + } + } + } + } + else + { + rc = FAIL_HOSTNAME_LOOKUP ; + elog ("%s hostname lookup failed\n", hostname.c_str() ); + } + } + else + { + rc = FAIL_STRING_EMPTY ; + slog ("%s empty hostname or entity path '%s' string\n", hostname.c_str(), entity_path.c_str() ); + } + if ( rc == FAIL_NOT_FOUND ) + { + slog ("%s '%s' entity path not found in any group\n", hostname.c_str() , entity_path.c_str()); + } + return (static_cast(NULL)); +} + +/********************************************************************************** + * + * Name : hwmon_get_group + * + * Description : Returns a pointer to the sensor group that matches the supplied + * group name. + * + **********************************************************************************/ +struct sensor_group_type * hwmonHostClass::hwmon_get_group ( string hostname, string group_name ) +{ + int rc = FAIL_NOT_FOUND ; + + if ( ( !group_name.empty() ) && ( !hostname.empty()) ) + { + hwmonHostClass::hwmon_host * host_ptr ; + host_ptr = hwmonHostClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + for ( int i = 0 ; i < host_ptr->groups ; i++ ) + { + if ( !group_name.compare(host_ptr->group[i].group_name)) + { + blog ("%s '%s' sensor group found\n", + hostname.c_str(), + host_ptr->group[i].group_name.c_str()); + + return (&host_ptr->group[i]) ; + } + } + } + } + if ( rc == FAIL_NOT_FOUND ) + { + wlog ("%s '%s' sensor group not found\n", hostname.c_str() , group_name.c_str()); + } + else if ( rc ) + { + elog ("%s sensor group query failed\n", hostname.c_str() ); + } + return (static_cast(NULL)); +} + + +/* Add a sensor group to a host */ +int hwmonHostClass::hwmon_add_group ( string hostname, struct sensor_group_type & group ) +{ + int rc = PASS ; + + if ( group.group_name.empty() ) + return (FAIL_STRING_EMPTY); + else + { + hwmonHostClass::hwmon_host * host_ptr ; + host_ptr = hwmonHostClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + int i ; + bool found = false ; + for ( i = 0 ; i < host_ptr->groups ; i++ ) + { + if ( !group.group_name.compare(host_ptr->group[i].group_name)) + { + found = true ; + break ; + } + } + if ( i >= MAX_HOST_GROUPS ) + { + rc = FAIL ; + } + else + { + host_ptr->group[i].failed = false ; + + host_ptr->group[i].host_uuid = group.host_uuid ; + + host_ptr->group[i].group_name = group.group_name ; /* for fresh add case */ + host_ptr->group[i].group_uuid = group.group_uuid ; + + host_ptr->group[i].hostname = hostname ; + host_ptr->interval_changed = true ; + + host_ptr->group[i].group_interval = group.group_interval ; + + host_ptr->group[i].sensortype = group.sensortype ; + host_ptr->group[i].datatype = group.datatype ; + host_ptr->group[i].algorithm = group.algorithm ; + host_ptr->group[i].group_state = group.group_state ; + host_ptr->group[i].suppress = group.suppress ; + host_ptr->group[i].path = group.path ; + + host_ptr->group[i].unit_base_group = group.unit_base_group ; + host_ptr->group[i].unit_rate_group = group.unit_rate_group ; + host_ptr->group[i].unit_modifier_group = group.unit_modifier_group ; + + host_ptr->group[i].actions_minor_choices = group.actions_minor_choices ; + host_ptr->group[i].actions_major_choices = group.actions_major_choices ; + host_ptr->group[i].actions_critical_choices = group.actions_critical_choices ; + + host_ptr->group[i].actions_minor_group = group.actions_minor_group ; + host_ptr->group[i].actions_major_group = group.actions_major_group ; + host_ptr->group[i].actions_critl_group = group.actions_critl_group ; + + host_ptr->group[i].t_critical_lower_group = group.t_critical_lower_group ; + host_ptr->group[i].t_critical_upper_group = group.t_critical_upper_group ; + host_ptr->group[i].t_major_lower_group = group.t_major_lower_group ; + host_ptr->group[i].t_major_upper_group = group.t_major_upper_group ; + host_ptr->group[i].t_minor_lower_group = group.t_minor_lower_group ; + host_ptr->group[i].t_minor_upper_group = group.t_minor_upper_group ; + + /* Default the read index to the first sensor in this group. + * This member is only used when we are reading group sensors individually */ + host_ptr->group[i].sensor_read_index = 0 ; + + blog ("%s '%s' sensor group added\n", host_ptr->hostname.c_str(), host_ptr->group[i].group_name.c_str() ); + + if ( found == false ) + host_ptr->groups++ ; + } + } + } + + if ( rc ) + { + elog ("%s '%s' sensor group add failed\n", hostname.c_str() , + group.group_name.c_str()); + } + return (rc); +} + +/**************************************************************************** + * + * Name: add_group_uuid + * + * Description: Adds the sysinv supplied group uuid to hwmon for + * the specified group/host. + * + ****************************************************************************/ +int hwmonHostClass::add_group_uuid ( string & hostname, string & group_name, string & uuid ) +{ + int rc = FAIL_NOT_FOUND ; + + if ( ( !group_name.empty() ) && ( !hostname.empty()) ) + { + hwmonHostClass::hwmon_host * host_ptr ; + host_ptr = hwmonHostClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + for ( int i = 0 ; i < host_ptr->groups ; i++ ) + { + if ( !group_name.compare(host_ptr->group[i].group_name)) + { + blog1 ("%s '%s' sensor group found\n", + hostname.c_str(), + host_ptr->group[i].group_name.c_str()); + + host_ptr->group[i].group_uuid = uuid ; + rc = PASS ; + break ; + } + } + } + } + if ( rc == FAIL_NOT_FOUND ) + { + wlog ("%s '%s' sensor group not found\n", hostname.c_str() , group_name.c_str()); + } + return (rc); +} + + +/**************************************************************************** + * + * Name: add_sensor_uuid + * + * Description: Adds the sysinv supplied sensor uuid to hwmon for + * the specified sensor/host. + * + ****************************************************************************/ +int hwmonHostClass::add_sensor_uuid ( string & hostname, string & sensorname, string & uuid ) +{ + int rc = FAIL_NOT_FOUND ; + + if ( ( !sensorname.empty() ) && ( !hostname.empty()) ) + { + hwmonHostClass::hwmon_host * host_ptr ; + host_ptr = hwmonHostClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + for ( int i = 0 ; i < host_ptr->sensors ; i++ ) + { + if ( !sensorname.compare(host_ptr->sensor[i].sensorname)) + { + blog1 ("%s '%s' sensor found\n", + hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str()); + + host_ptr->sensor[i].uuid = uuid ; + rc = PASS ; + break ; + } + } + } + } + if ( rc == FAIL_NOT_FOUND ) + { + wlog ("%s '%s' sensor not found\n", hostname.c_str() , sensorname.c_str()); + } + return (rc); +} + +/***************************************************************************** + * + * Name : hwmon_del_groups + * + * Description: Delete all the groups from the specified host in hwmon + * + * Purpose : In support of group reprovisioning + * + *****************************************************************************/ + +int hwmonHostClass::hwmon_del_groups ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = PASS ; + + for ( int g = 0 ; g < host_ptr->groups ; g++ ) + { + hwmonGroup_init ( host_ptr->hostname , &host_ptr->group[g] ); + } + + host_ptr->groups = 0 ; + return (rc); +} + +/***************************************************************************** + * + * Name : hwmon_del_sensors + * + * Description: Delete all the sensors from the specified host in hwmon + * + * Purpose : In support of sensor reprovisioning + * + *****************************************************************************/ + +int hwmonHostClass::hwmon_del_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = PASS ; + + host_ptr->quanta_server = false ; + + for ( int s = 0 ; s < host_ptr->sensors ; s++ ) + { + hwmonSensor_init ( host_ptr->hostname, &host_ptr->sensor[s] ); + } + + /* these are the sample data transient lists */ + for ( int i = 0 ; i < (MAX_HOST_SENSORS-1) ; i++ ) + { + sensor_data_init ( host_ptr->sample[i] ); + } + + host_ptr->sensors = + host_ptr->samples = + host_ptr->profile_sensor_checksum = + host_ptr->sample_sensor_checksum = + host_ptr->last_sample_sensor_checksum = 0 ; + return (rc); +} + + + +/* look up a host name from a host uuid */ +string hwmonHostClass::get_hostname ( string uuid ) +{ + if ( !uuid.empty() ) + { + string hostname = hostBase.get_hostname ( uuid ) ; + if ( !hostname.empty() ) + { + dlog ("%s is hostname for uuid:%s\n", hostname.c_str(), uuid.c_str()); + return (hostname); + } + } + wlog ("hostname not found (uuid:%s)\n", uuid.c_str()); + return (""); +} + +/************************************************************************* + * + * Sensor Model Attributes Saving and Restoring Support Utilities + * + *************************************************************************/ +void init_model_attributes ( model_attr_type & attr ) +{ + attr.interval = HWMON_DEFAULT_AUDIT_INTERVAL ; + for ( int i = 0 ; i < MAX_HOST_GROUPS ; i++ ) + { + attr.group_actions[i].name = HWMON_GROUP_NAME__NULL ; + attr.group_actions[i].minor = HWMON_ACTION_IGNORE ; + attr.group_actions[i].major = HWMON_ACTION_LOG ; + attr.group_actions[i].critl = HWMON_ACTION_ALARM ; + } + attr.groups = 0 ; +} + +/***************************************************************************** + * + * Name : save_model_attributes + * + * Description: Save key sensor group settings. + * + * - severity level group_actions + * - audit interval + * + *****************************************************************************/ + +void hwmonHostClass::save_model_attributes ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + init_model_attributes ( host_ptr->model_attributes_preserved ); + if ( host_ptr->groups ) + { + for ( int g = 0 ; g < host_ptr->groups ; g++ ) + { + host_ptr->model_attributes_preserved.group_actions[g].name = host_ptr->group[g].group_name ; + host_ptr->model_attributes_preserved.group_actions[g].minor = host_ptr->group[g].actions_minor_group ; + host_ptr->model_attributes_preserved.group_actions[g].major = host_ptr->group[g].actions_major_group ; + host_ptr->model_attributes_preserved.group_actions[g].critl = host_ptr->group[g].actions_critl_group ; + } + host_ptr->model_attributes_preserved.interval = host_ptr->interval ; + host_ptr->model_attributes_preserved.groups = host_ptr->groups ; + } +} + +/****************************************************************************** + * + * Name : restore_group_actions + * + * Description: Copy saved severity level group action into the matching + * sensor group (name). + * + *****************************************************************************/ + +void hwmonHostClass::restore_group_actions ( struct hwmonHostClass::hwmon_host * host_ptr, + struct sensor_group_type * group_ptr ) +{ + if ( ( host_ptr ) && ( group_ptr ) && ( host_ptr->model_attributes_preserved.groups ) ) + { + for ( int i = 0 ; i < host_ptr->model_attributes_preserved.groups ; i++ ) + { + /* look for a matching group name and restore the settings for that group */ + if ( group_ptr->group_name == host_ptr->model_attributes_preserved.group_actions[i].name ) + { + ilog ("%s %s group match\n", host_ptr->hostname.c_str(), group_ptr->group_name.c_str()); + if ( group_ptr->actions_minor_group != host_ptr->model_attributes_preserved.group_actions[i].minor ) + { + group_ptr->actions_minor_group = host_ptr->model_attributes_preserved.group_actions[i].minor ; + ilog ("%s %s group 'minor' action restored to '%s'\n", + host_ptr->hostname.c_str(), + group_ptr->group_name.c_str(), + group_ptr->actions_minor_group.c_str()); + } + if ( group_ptr->actions_major_group != host_ptr->model_attributes_preserved.group_actions[i].major ) + { + group_ptr->actions_major_group = host_ptr->model_attributes_preserved.group_actions[i].major ; + ilog ("%s %s group 'major' action restored to '%s'\n", + host_ptr->hostname.c_str(), + group_ptr->group_name.c_str(), + group_ptr->actions_major_group.c_str()); + } + if ( group_ptr->actions_critl_group != host_ptr->model_attributes_preserved.group_actions[i].critl ) + { + group_ptr->actions_critl_group = host_ptr->model_attributes_preserved.group_actions[i].critl ; + ilog ("%s %s group 'critical' action restored to '%s'\n", + host_ptr->hostname.c_str(), + group_ptr->group_name.c_str(), + group_ptr->actions_critl_group.c_str()); + } + + /* don't need to look anymore */ + return ; + } + } + } +} + +/***************************************************************************** + * + * Name : ipmi_sensor_model_learn + * + * Description: Setup hwmon for a sesor model relearn. + * Relearn is a background operation. + * Generates warning log if requested while already in progress. + * + *****************************************************************************/ + +int hwmonHostClass::ipmi_learn_sensor_model ( string uuid ) +{ + /* check for empty list condition */ + if ( hwmon_head == NULL ) + { + elog ("no provisioned hosts\n"); + return FAIL_HOSTNAME_LOOKUP ; + } + + else if ( hostUtil_is_valid_uuid ( uuid ) == false ) + { + elog ("invalid host uuid:%s\n", + uuid.empty() ? "empty" : uuid.c_str()); + + return FAIL_INVALID_UUID ; + } + + for ( struct hwmon_host * ptr = hwmon_head ; ; ptr = ptr->next ) + { + string hostname = hostBase.get_hostname ( uuid ) ; + if ( hostname == ptr->hostname ) + { + int rc ; + + if ( ptr->relearn == true ) + { + wlog ("%s sensor model relearn already in progress\n", + ptr->hostname.c_str()); + + wlog ("%s ... projected completion time: %s\n", + ptr->hostname.c_str(), + ptr->relearn_done_date.c_str()); + + rc = RETRY ; + } + else + { + ilog ("%s sensor model relearn request accepted\n", + ptr->hostname.c_str()); + + ptr->bmc_fw_version.clear(); + ptr->relearn_request = true ; + ptr->relearn_retry_counter = 0 ; + rc = PASS ; + } + return rc ; + } + + if (( ptr->next == NULL ) || ( ptr == hwmon_tail )) + break ; + } + + elog ("hostname lookup failed for uuid:%s\n", uuid.c_str()); + return FAIL_HOSTNAME_LOOKUP ; +} + +/********************************************************************************* + * + * Name : manage_sensor_state + * + * Purpose : manage sensor that change events + * + * Description: Manages sensor failures in the following way + * + * 1. if the sensor is suppressed then check to see if it is already alarmed + * and if so clear that alarm. Send degrade clear message to mtce if this is + * the only sensor that is degraded. + * + * 2. if the sensor is already failed then + * - see if its severity level has changed + * - if the new level is to not alarm then clear the alarm. + * - if the new level is alarm then raise the correct alarm level + * + * 3. if the severity action is to alarm then raise the alarm + * + * Assumptions: sensor status in the database is managed by the caller + * + * Parameters: + * + * hostname - the host that is affected. + * sensor - the sensor that is affected + * severity - any of sensor_severity_enum types + * + **********************************************************************************/ +int hwmonHostClass::manage_sensor_state ( string & hostname, sensor_type * sensor_ptr, sensor_severity_enum severity ) +{ + int rc = FAIL_UNKNOWN_HOSTNAME ; + + hwmonHostClass::hwmon_host * host_ptr = hwmonHostClass::getHost ( hostname ); + if ( host_ptr ) + { + string reason = REASON_OOT ; + + bool ignore_action = false ; + bool log_action = false ; + bool clear_alarm = false ; + bool clear_degrade = false ; + bool clear_log = false ; + + bool assert_alarm = false ; + bool assert_degrade = false ; + bool assert_log_minor = false ; + bool assert_log_major = false ; + bool assert_log_critical = false ; + + int current_severity = HWMON_SEVERITY_GOOD ; + + /* load up the severity level */ + if ( !sensor_ptr->status.compare("ok") ) + current_severity = HWMON_SEVERITY_GOOD ; + else if ( !sensor_ptr->status.compare("critical") ) + current_severity = HWMON_SEVERITY_CRITICAL ; + else if ( !sensor_ptr->status.compare("major") ) + current_severity = HWMON_SEVERITY_MAJOR ; + else if ( !sensor_ptr->status.compare("minor") ) + current_severity = HWMON_SEVERITY_MINOR ; + else if ( !sensor_ptr->status.compare("offline") ) + { + current_severity = HWMON_SEVERITY_GOOD ; + return (PASS); + } + else + { + slog ("%s unsupported sensor status '%s'\n", hostname.c_str(), sensor_ptr->status.c_str()); + return (FAIL_BAD_STATE); + } + + /* Check suppression */ + if ( sensor_ptr->suppress == true ) + { + reason = REASON_SUPPRESSED ; + blog ("%s '%s' sensor %s\n", hostname.c_str(), sensor_ptr->sensorname.c_str(), reason.c_str()); + + if ( sensor_ptr->critl.logged || sensor_ptr->major.logged || sensor_ptr->minor.logged ) + { + clear_log = true ; + } + + if ( sensor_ptr->alarmed == true ) + clear_alarm = true ; + + if ( sensor_ptr->degraded == true ) + clear_degrade = true ; + + clear_ignored_state (sensor_ptr); + clear_logged_state (sensor_ptr); + } + + /* ignore these cases if suppress is true (else if) */ + else if ( severity == HWMON_SEVERITY_GOOD ) + { + reason = REASON_OK ; + if ( sensor_ptr->critl.logged || sensor_ptr->major.logged || sensor_ptr->minor.logged ) + { + clear_log = true ; + } + + if ( sensor_ptr->alarmed == true ) + { + clear_alarm = true ; + } + + if ( sensor_ptr->degraded == true ) + { + clear_degrade = true ; + } + clear_ignored_state (sensor_ptr); + clear_logged_state (sensor_ptr); + } + else if ( severity == HWMON_SEVERITY_MINOR ) + { + if ( sensor_ptr->degraded == true ) + clear_degrade = true ; + + if ( sensor_ptr->minor.ignored == true ) + { + reason = REASON_IGNORED ; + if ( is_alarmed ( sensor_ptr ) == true ) + { + clear_alarm = true ; + } + ignore_action = true ; + } + else if ( ( log_action = is_log_action ( sensor_ptr->actions_minor )) == true ) + { + if ( sensor_ptr->minor.logged == false) + { + clear_logged_state ( sensor_ptr ); + assert_log_minor = true ; + } + + if ( sensor_ptr->alarmed == true ) + { + clear_alarm = true ; + } + clear_ignored_state ( sensor_ptr ); + } + else if ( sensor_ptr->alarmed == true ) + { + if (( ignore_action == true ) || ( log_action == true )) + { + clear_alarm = true ; + } + else if ( current_severity != HWMON_SEVERITY_MINOR ) + { + assert_alarm = true ; + } + } + else + { + assert_alarm = true ; + } + + /* Minor assertions should not degrade */ + if ( sensor_ptr->degraded == true ) + { + clear_degraded_state ( sensor_ptr ) ; + } + } + else if ( severity == HWMON_SEVERITY_MAJOR ) + { + if ( sensor_ptr->major.ignored == true ) + { + reason = REASON_IGNORED ; + if ( is_alarmed ( sensor_ptr ) == true ) + { + clear_alarm = true ; + } + ignore_action = true ; + + if ( sensor_ptr->degraded == true ) + clear_degrade = true ; + } + + else if (( log_action = is_log_action ( sensor_ptr->actions_major )) == true ) + { + if ( sensor_ptr->major.logged == false) + { + clear_logged_state ( sensor_ptr ); + assert_log_major = true ; + } + + if ( sensor_ptr->alarmed == true ) + { + clear_alarm = true ; + } + clear_ignored_state ( sensor_ptr ); + } + + else if ( sensor_ptr->alarmed == true ) + { + if (( ignore_action == true ) || ( log_action == true )) + { + clear_alarm = true ; + } + else if ( current_severity != HWMON_SEVERITY_MAJOR ) + { + assert_alarm = true ; + } + } + else + { + assert_alarm = true ; + } + + if ( sensor_ptr->degraded == false ) + { + if (( ignore_action == true ) || ( log_action == true )) + { + ; // clear_degrade = true ; + } + else + { + assert_degrade = true ; + } + } + } + else if ( severity == HWMON_SEVERITY_CRITICAL ) + { + if ( sensor_ptr->critl.ignored == true ) + { + reason = REASON_IGNORED ; + if ( is_alarmed ( sensor_ptr ) == true ) + { + clear_alarm = true ; + } + ignore_action = true ; + + if ( sensor_ptr->degraded == true ) + clear_degrade = true ; + } + + else if ( ( log_action = is_log_action ( sensor_ptr->actions_critl )) == true ) + { + if ( sensor_ptr->critl.logged == false ) + { + clear_logged_state ( sensor_ptr ); + assert_log_critical = true ; + } + + if ( sensor_ptr->alarmed == true ) + { + clear_alarm = true ; + } + clear_ignored_state ( sensor_ptr ); + } + + else if ( sensor_ptr->alarmed == true ) + { + if (( ignore_action == true ) || ( log_action == true )) + { + clear_alarm = true ; + } + else if ( current_severity != HWMON_SEVERITY_CRITICAL ) + { + assert_alarm = true ; + } + } + else + { + assert_alarm = true ; + } + + if ( sensor_ptr->degraded == false ) + { + if (( ignore_action == true ) || ( log_action == true )) + { + ; // clear_degrade = true ; + } + else + { + assert_degrade = true ; + } + } + } + + if ( assert_degrade || clear_degrade || clear_alarm || assert_alarm ) + { + ilog ("%s %-20s assert_degrade = %d severity = %x %s\n", hostname.c_str(), sensor_ptr->sensorname.c_str(), assert_degrade, severity, sensor_ptr->suppress ? "suppressed" : " action " ); + ilog ("%s %-20s clear_degrade = %d status = %3s minor = %s\n", hostname.c_str(), sensor_ptr->sensorname.c_str(), clear_degrade , sensor_ptr->status.c_str(), sensor_ptr->actions_minor.c_str()); + ilog ("%s %-20s clear_alarm = %d degraded = %3s major = %s\n", hostname.c_str(), sensor_ptr->sensorname.c_str(), clear_alarm , sensor_ptr->degraded ? "Yes" : "No ", sensor_ptr->actions_major.c_str()); + ilog ("%s %-20s assert_alarm = %d alarmed = %3s critl = %s\n", hostname.c_str(), sensor_ptr->sensorname.c_str(), assert_alarm , sensor_ptr->alarmed ? "Yes" : "No ", sensor_ptr->actions_critl.c_str()); + } + + if ( assert_log_critical || assert_log_major || assert_log_minor || clear_log ) + { + ilog ("%s %s assert log [%s%s%s] %s %s\n", + hostname.c_str(), + sensor_ptr->sensorname.c_str(), + assert_log_critical ? "crit" : "", + assert_log_major ? "major" : "", + assert_log_minor ? "minor" : "", + clear_log ? "clear log" : "", + ignore_action ? "ignore" : "" ); + } + + /* logic error check */ + if ((( assert_degrade == true ) && ( clear_degrade == true )) || + (( assert_alarm == true ) && ( clear_alarm == true ))) + { + slog ("%s conflicting degrade state or alarming calculation - favoring clear\n", hostname.c_str() ); + if ( clear_alarm == true ) + { + assert_alarm = false ; + } + if ( clear_degrade == true ) + { + assert_degrade = false ; + } + } + + /*************************************************************************** + * + * TAKE THE ACTIONS NOW + * + **************************************************************************/ + + if ( clear_log == true ) + { + hwmonLog_clear ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + clear_logged_state ( sensor_ptr ); + } + + if ( assert_log_critical ) + { + clear_logged_state (sensor_ptr); + sensor_ptr->critl.logged = true ; + hwmonLog_critical ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + } + if ( assert_log_major ) + { + clear_logged_state (sensor_ptr); + sensor_ptr->major.logged = true ; + hwmonLog_major ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + } + if ( assert_log_minor ) + { + clear_logged_state (sensor_ptr); + sensor_ptr->minor.logged = true ; + hwmonLog_minor ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + } + + /* handle clearing the specified alarm */ + if ( clear_alarm == true ) + { + hwmonAlarm_clear ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + clear_degraded_state ( sensor_ptr ); + clear_alarmed_state ( sensor_ptr ); + } + /* handle asserting the specified alarm */ + else if ( assert_alarm == true ) + { + clear_alarmed_state ( sensor_ptr); + if ( severity == HWMON_SEVERITY_CRITICAL ) + { + hwmonAlarm_critical ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + set_alarmed_severity ( sensor_ptr, FM_ALARM_SEVERITY_CRITICAL ); + if ( assert_degrade != true ) + assert_degrade = true ; + } + else if ( severity == HWMON_SEVERITY_MAJOR ) + { + hwmonAlarm_major ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + set_alarmed_severity ( sensor_ptr, FM_ALARM_SEVERITY_MAJOR ); + if ( assert_degrade != true ) + assert_degrade = true ; + } + else if ( severity == HWMON_SEVERITY_MINOR ) + { + hwmonAlarm_minor ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + set_alarmed_severity ( sensor_ptr, FM_ALARM_SEVERITY_MINOR ); + } + /* NEW */ + clear_logged_state ( sensor_ptr ); + clear_ignored_state ( sensor_ptr ); + } + + /* handle sending a degrade clear request to mtcAgent */ + if ( clear_degrade == true ) + { + clear_degraded_state ( sensor_ptr ); + } + + /* handle sending a degrade request to mtcAgent */ + else if ( assert_degrade == true ) + { + set_degraded_state ( sensor_ptr ); + } + } + else + { + wlog ("%s Unknown Host\n", hostname.c_str()); + } + + sensorState_print ( hostname, sensor_ptr ); + return (rc); +} + +/***************************************************************************** + * + * Name : audit_interval_change + * + * Description: Set a host specific flag indicating that the sensor monitoring + * audit interval for this host has changed. + * + * The actual interval change is handled in the add handler. + * + * This API is used during group load from the database when the + * default host_ptr->interval is zero or groups have differing + * values. + * + *****************************************************************************/ + +void hwmonHostClass::audit_interval_change ( string hostname ) +{ + if ( !hostname.empty()) + { + hwmon_host * host_ptr = hwmonHostClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + /* handle refreshing sysinv at base level to avoid deadlock */ + host_ptr->interval_changed = true ; + } + } +} + +/***************************************************************************** + * + * Name : modify_audit_interval + * + * Description: Changes the host_ptr->interval to the specified value and + * sets the 'interval_changed' flag indicating that the sensor + * monitoring audit interval for this host has changed. + * + * The actual interval change is handled in the DELAY stage of the + * ipmi_sensor_monitor. + * + * This API is called by http group modify handler to trigger + * change of the sensor audit interval to a specific value. + * + *****************************************************************************/ + +void hwmonHostClass::modify_audit_interval ( string hostname , int interval ) +{ + if ( !hostname.empty()) + { + hwmonHostClass::hwmon_host * host_ptr ; + host_ptr = hwmonHostClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + if ( host_ptr->interval != interval ) + { + host_ptr->interval_old = host_ptr->interval ; + host_ptr->interval = interval ; + + /* handle popping this new value to hwmon groups + * and sysinv database at base level to avoid deadlock */ + + host_ptr->interval_changed = true ; + } + } + } +} + + +/* log sensor data to a tmp file to assis debug of sensor read issues */ +void hwmonHostClass::log_sensor_data ( struct hwmonHostClass::hwmon_host * host_ptr, string & sensorname, string from, string to ) +{ + string sensor_datafile = IPMITOOL_OUTPUT_DIR ; + sensor_datafile.append(host_ptr->hostname); + sensor_datafile.append(IPMITOOL_SENSOR_OUTPUT_FILE_SUFFIX); + + string debugfile = "/tmp/" ; + debugfile.append(host_ptr->hostname); + debugfile.append(IPMITOOL_SENSOR_OUTPUT_FILE_SUFFIX); + debugfile.append("_debug"); + + string source = pt() ; + source.append (" - "); + source.append (sensorname); + source.append (" from '"); + source.append (from ); + source.append ("' to '"); + source.append (to ); + source.append ("'\n"); + daemon_log ( debugfile.data(), source.data()); + daemon_log ( debugfile.data(), host_ptr->ipmitool_thread_info.data.data()); + daemon_log ( debugfile.data(), daemon_read_file ( sensor_datafile.data()).data()); + daemon_log ( debugfile.data(), "---------------------------------------------------------------------\n"); +} + + +void hwmonHostClass::print_node_info ( void ) +{ + fflush (stdout); + fflush (stderr); +} + +void hwmonHostClass::mem_log_info ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s has %d sensor(s) across %d sensor group(s)\n", + hwmon_host_ptr->hostname.c_str(), + hwmon_host_ptr->sensors, + hwmon_host_ptr->groups ); + mem_log (str); +} + +void hwmonHostClass::mem_log_options ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tMonitoring: %s Provisioned: %s Connected: %s Count: %d\n", + hwmon_host_ptr->hostname.c_str(), + hwmon_host_ptr->monitor ? "YES" : "no" , + hwmon_host_ptr->bm_provisioned ? "YES" : "no", + hwmon_host_ptr->connected ? "YES" : "no", + hwmon_host_ptr->sensor_query_count); + + mem_log (str); + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tMon Gates : GroupIndex:%d Groups:%d Sensors:%d\n", + hwmon_host_ptr->hostname.c_str(), + hwmon_host_ptr->group_index, + hwmon_host_ptr->groups, + hwmon_host_ptr->sensors ); + mem_log (str); +} + +void hwmonHostClass::mem_log_bm ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tbm_ip:%s bm_un:%s bm_type:%s\n", + hwmon_host_ptr->hostname.c_str(), + hwmon_host_ptr->bm_ip.c_str(), + hwmon_host_ptr->bm_un.c_str(), + hwmon_host_ptr->bm_type.c_str()); + mem_log (str); +} + +void hwmonHostClass::mem_log_threads ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr) +{ + char str[MAX_MEM_LOG_DATA] ; + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tThread Stage:%d Runs:%d Progress:%d Ctrl Status:%d Thread Status:%d\n", + hwmon_host_ptr->hostname.c_str(), + hwmon_host_ptr->ipmitool_thread_ctrl.stage, + hwmon_host_ptr->ipmitool_thread_ctrl.runcount, + hwmon_host_ptr->ipmitool_thread_info.progress, + hwmon_host_ptr->ipmitool_thread_ctrl.status, + hwmon_host_ptr->ipmitool_thread_info.status); + mem_log (str); +} + +void hwmonHostClass::check_accounting ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + int count = 0 ; + + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + for ( int s = 0 ; s < host_ptr->group[g].sensors ; ++s ) + { + count++ ; + } + } + + if ( count == host_ptr->sensors ) + host_ptr->accounting_ok = true ; + else + host_ptr->accounting_ok = false ; + + snprintf ( &str[0], MAX_MEM_LOG_DATA, "SENSOR: Accounting is %s (%d:%d)", host_ptr->accounting_ok ? "GOOD" : "BAD", host_ptr->sensors, count ); + mem_log (str); +} + +void hwmonHostClass::mem_log_groups ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + + for ( int i = 0 ; i < host_ptr->groups ; i++ ) + { + /* Don't dump sensor group info if there are no sensors in it */ + if ( !host_ptr->group[i].sensors ) + continue ; + + snprintf (&str[0], MAX_MEM_LOG_DATA, " "); + mem_log (str); + + snprintf (&str[0], MAX_MEM_LOG_DATA, "GROUP : %03d secs %s %s %s uuid:%s\n", + host_ptr->group[i].group_interval, + host_ptr->group[i].group_name.c_str(), + host_ptr->group[i].group_state.c_str(), + host_ptr->group[i].suppress ? "suppressed" : "", + host_ptr->group[i].group_uuid.c_str()); + mem_log (str); + + snprintf (&str[0], MAX_MEM_LOG_DATA, " Actions: [minor:%s][%s] [major:%s][%s] [crit:%s][%s]\n\n", + host_ptr->group[i].actions_minor_group.c_str(), + host_ptr->group[i].actions_minor_choices.c_str(), + host_ptr->group[i].actions_major_group.c_str(), + host_ptr->group[i].actions_major_choices.c_str(), + host_ptr->group[i].actions_critl_group.c_str(), + host_ptr->group[i].actions_critical_choices.c_str()); + mem_log (str); + +#ifdef WANT_UNIT_MEMLOG_INFO /* not used presently */ + snprintf (&str[0], MAX_MEM_LOG_DATA, " > Info : algorithm: %s - unit [base:%s] [rate:%s] [modifier:%s]\n", + host_ptr->group[i].algorithm.c_str(), + host_ptr->group[i].unit_base_group.c_str(), + host_ptr->group[i].unit_rate_group.c_str(), + host_ptr->group[i].unit_modifier_group.c_str()); + mem_log (str); +#endif + +#ifdef WANT_THRESHOLD_MEMLOG_INFO /* not used presently */ + snprintf (&str[0], MAX_MEM_LOG_DATA, " > Threshold: Lcrit - Lmajor - Lminor | Uminor - Umajor - Ucrit\n"); + mem_log (str); + + snprintf (&str[0], MAX_MEM_LOG_DATA, " > %5.3f - %6.3f - %6.3f | %6.3f - %6.3f - %6.3f\n", + host_ptr->group[i].t_critical_lower_group, host_ptr->group[i].t_major_lower_group , + host_ptr->group[i].t_minor_lower_group, host_ptr->group[i].t_minor_upper_group , + host_ptr->group[i].t_major_upper_group, host_ptr->group[i].t_critical_upper_group); + mem_log (str); +#endif + + if ( host_ptr->accounting_ok == true ) + { + for ( int s = 0 ; s < host_ptr->group[i].sensors ; s++ ) + { + sensor_type * sensor_ptr = host_ptr->group[i].sensor_ptr[s] ; + + snprintf ( &str[0], MAX_MEM_LOG_DATA, "SENSOR: %-20s %-20s %8s-%-8s sev:%-8s [minor:%-6s major:%-6s crit:%-6s] [alarmed:%c%c%c] [ignored:%c%c%c] [logged:%c%c%c] %s:%s %s%s%s\n", + host_ptr->group[i].group_name.c_str(), + sensor_ptr->sensorname.c_str(), + sensor_ptr->state.c_str(), + sensor_ptr->status.c_str(), + get_severity(sensor_ptr->severity).c_str(), + sensor_ptr->actions_minor.c_str(), + sensor_ptr->actions_major.c_str(), + sensor_ptr->actions_critl.c_str(), + sensor_ptr->minor.alarmed ? 'Y' : '.', + sensor_ptr->major.alarmed ? 'Y' : '.', + sensor_ptr->critl.alarmed ? 'Y' : '.', + sensor_ptr->minor.ignored ? 'Y' : '.', + sensor_ptr->major.ignored ? 'Y' : '.', + sensor_ptr->critl.ignored ? 'Y' : '.', + sensor_ptr->minor.logged ? 'Y' : '.', + sensor_ptr->major.logged ? 'Y' : '.', + sensor_ptr->critl.logged ? 'Y' : '.', + sensor_ptr->uuid.c_str(), + sensor_ptr->group_uuid.substr(0,8).c_str(), + sensor_ptr->degraded ? "degraded " : "", + sensor_ptr->alarmed ? "alarmed " : "", + sensor_ptr->suppress ? "suppressed " : ""); + mem_log (str); + } + } + else + { + string sensor_list = "" ; + bool first = true ; + bool done = false ; + for ( int x = 0 ; x < host_ptr->group[i].sensors ; x++ ) + { + sensor_type * sensor_ptr = host_ptr->group[i].sensor_ptr[x] ; + sensor_list.append(sensor_ptr->sensorname); + if ( x < host_ptr->group[i].sensors - 1 ) + sensor_list.append(", "); + + if ( x == host_ptr->group[i].sensors - 1 ) + { + done = true ; + } + if ((( x % 8 == 0 ) & ( x != 0 )) || ( done == true )) + // if ( done == true ) + { + if ( first == true ) + { + snprintf (&str[0], MAX_MEM_LOG_DATA, " SENSORS:%02d: %s\n", host_ptr->group[i].sensors, sensor_list.c_str() ); + mem_log (str); + first = false ; + } + else + { + snprintf (&str[0], MAX_MEM_LOG_DATA, " %s\n", sensor_list.c_str() ); + mem_log (str); + } + sensor_list = " " ; + } + if ( done == true ) break ; + } + } + } +} + +void hwmonHostClass::memDumpNodeState ( string hostname ) +{ + hwmonHostClass::hwmon_host* hwmon_host_ptr ; + hwmon_host_ptr = hwmonHostClass::getHost ( hostname ); + if ( hwmon_host_ptr == NULL ) + { + mem_log ( hostname, ": ", "Not Found in hwmonHostClass\n" ); + return ; + } + else + { + mem_log_options ( hwmon_host_ptr ); + hwmonHostClass::hostBase.memDumpNodeState ( hostname ); + mem_log_info ( hwmon_host_ptr ); + mem_log_bm ( hwmon_host_ptr ); + mem_log_threads ( hwmon_host_ptr ); + check_accounting( hwmon_host_ptr ); + mem_log_groups ( hwmon_host_ptr ); + } +} + +void hwmonHostClass::memDumpAllState ( void ) +{ + struct hwmon_host * ptr = hwmon_head ; + + if ( hwmon_head == NULL ) return ; + + hwmonHostClass::hostBase.memLogDelimit (); + + /* walk the node list looking for nodes that should be monitored */ + for ( int i = 0 ; i < hosts ; i++ ) + { + memDumpNodeState ( ptr->hostname ); + hwmonHostClass::hostBase.memLogDelimit (); + ptr = ptr->next ; + if ( ptr == NULL ) + break ; + } +} + +void hwmonHostClass::sensorState_print_debug ( struct hwmonHostClass::hwmon_host * host_ptr, string sensorname, string proc, int line ) +{ + /* loop over all the sensors handling their current severity */ + for ( int i = 0 ; i < host_ptr->sensors ; i++ ) + { + sensor_type * ptr = &host_ptr->sensor[i] ; + + if ( ptr->sensorname.compare(sensorname) == 0 ) + { + plog ("Location: %s %d\n", proc.c_str(), line ); + sensorState_print ( host_ptr->hostname, ptr ); + break ; + } + } +} + + diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonClass.h b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonClass.h new file mode 100644 index 00000000..d731140a --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonClass.h @@ -0,0 +1,606 @@ +#ifndef __INCLUDE_HWMONCLASS_H__ +#define __INCLUDE_HWMONCLASS_H__ + +/* + * Copyright (c) 2015-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include "nodeBase.h" /* for ... */ +#include "hostUtil.h" /* for ... server_enum */ +#include "httpUtil.h" /* for ... libEvent */ +#include "pingUtil.h" /* for ... ping */ +#include "threadUtil.h" /* for ... thread_ctrl_type thread_info_type*/ +#include "hwmon.h" +#include "hostClass.h" +#include "hwmonThreads.h" +#include "hwmonSensor.h" +//#include "hwmonIpmi.h" /* for ... sensor_data_type */ + +typedef enum +{ + HWMON_DEL__START = 0, + HWMON_DEL__WAIT, + HWMON_DEL__DONE, + HWMON_DEL__STAGES +} hwmon_delStages_enum ; + +class hwmonHostClass +{ + private: + struct hwmon_host { + + string hostname ; + + /** The IP address of the host's board management controller */ + string bm_ip ; + + /** The PW of the host's board management controller */ + string bm_pw ; + + /** A string label that represents the board management + * controller type for this host */ + string bm_type ; + + /** The operator provisioned board management hostname */ + string bm_un ; + + bool bm_provisioned ; + + /** set true once a connection is estabished and + * set false when error recovery is performed on the connection + **/ + bool connected ; + + /** 'Connected' alternative (see above) for ipmi sensor monitoring. + * We don't 'connect' using ipmi sensor monitoring so a more + * representative word is introduced. + * + * The BMC is 'accessible' once provisioning data is available + * and bmc is verified pingable. + **/ + bool accessible; + + /** run the delete_handler FSM when set to true */ + bool host_delete ; + + /** general purpose retry counter */ + int retries ; + + /** true when host is degraded due to the inability to load group/sensor configuration */ + bool degraded ; + + /** true when the SENSORCFG alarm is raised due to the inability to load group/sensor configuration */ + bool alarmed ; + + /** true when sensor config alarm is raised */ + bool alarmed_config ; + + /* sensor audit interval */ + int interval ; + int interval_old ; /* helps show interval change in log */ + bool interval_changed ; + + /* throttle degrade audit logs */ + int degrade_audit_log_throttle ; + + /** set to the protocol used to communicate with this server's BMC */ + protocol_enum protocol ; + + /** Pointer to the previous host in the list */ + struct hwmon_host * prev; + + /** Pointer to the next host in the list */ + struct hwmon_host * next; + + struct mtc_timer hostTimer ; + struct mtc_timer addTimer ; + + bool monitor ; /* true if host's sensors are to be monitored */ + + /* set true by HWMON_SENSOR_MONITOR__POWER handling state before the + * sensor model has been learned. Being false provides hold off + * to learning the sensor model ; which will be created incorrectly + * if learned while the power is off */ + bool poweron ; + + /* SENSORS */ + /* ------- */ + + /**** New Host Specific Private Constructs for IPMI Monitoring ****/ + + bool quanta_server ; + + /* for bmc ping access monitor */ + ping_info_type ping_info ; + + /* Sensor Monitoring Thread Structs */ + + /* the info required by the sensor read thread to issue a ipmitool + * lanplus request to read sensors over the network */ + thread_ctrl_type ipmitool_thread_ctrl ; /* control data used to manage the thread */ + thread_info_type ipmitool_thread_info ; /* thread info used to execute and post results */ + thread_extra_info_type thread_extra_info ; /* extra thread info for sensor monitoring */ + + /* Ipmi sensor monitoring control structure */ + monitor_ctrl_type monitor_ctrl ; + + /* number of sensor queries since last process restart */ + int sensor_query_count ; + + int want_degrade_audit ; + + /* the last json string containing the last read sensor data */ + string json_ipmi_sensors ; + + int sensors ; /**< # of sensors in the sysinv database */ + int samples ; /**< # of parsed samples from the reader thread */ + /* + * The Main running Sensors Profile for this host. + * This list reflects what is in the sysinv database + * and shown in the UI. + */ + sensor_type sensor[MAX_HOST_SENSORS] ; + + sensor_data_type sample[MAX_HOST_SENSORS] ; /* last read analog samples */ + + /* + * Sequential checksum of all the sensor names in ther various + * sensor lists. See hwmonUtil.cpp for checksum utilities or + * hwmon.h for prototype + */ + unsigned short last_sample_sensor_checksum ; + unsigned short sample_sensor_checksum ; + unsigned short profile_sensor_checksum ; + + /* GROUPS */ + /* ------ */ + + /* number of sensors groups provisioned . host */ + int groups ; + + /* list of groups for this host */ + struct sensor_group_type group[MAX_HOST_GROUPS] ; + + /* current group monitoring index ; used by the group monitor FSM */ + int group_index ; + + hwmon_addStages_enum addStage ; + hwmon_delStages_enum delStage ; + + int group_mon_log_throttle ; + + libEvent event ; + + /* indicates whether the group/sensor accounting looks valid. + * i.e. number of sensors in sensor groups adds to equal total + * number of sensors */ + bool accounting_ok ; + + /* The number of sensor accounting errors , i.e. sensors not found, + * in the current sample set. + * + * If this count reaches MAX_SENSORS_NOT_FOUND then the + * accounting_bad_b4_reload_count below is incremented. + * + * Whenever all the sensors are found then + * this and the accounting_bad_b4_reload_count is cleared. */ + int accounting_bad_count ; + + /* string that represents the BMC firmware version */ + string bmc_fw_version ; + + /********** Sensor Model Relearn Handling Controls **********/ + + /* set to true when a new relearn request is received while not + * already in sensor model relearning mode */ + bool relearn_request ; + + /* true while in sensor model relearning mode */ + bool relearn ; + + /* a timer that forces exit from learn mode when it expires */ + struct mtc_timer relearnTimer ; + + /* Count relearn failure retries. + * Used to avoid repeating some retry operations. */ + int relearn_retry_counter ; + + /* Store the date/time when learning mode will be disabled. + * Put into error message to tell the administrator when the + * next sensor relearn is permitted when the current request + * is rejected due to already being in relearn mode. */ + string relearn_done_date ; + + /* a structure used to preserved some key sensor model attributes + * so that they can be restored over/after the relearn action */ + model_attr_type model_attributes_preserved ; + }; + + /** List of allocated host memory. + * + * An array of host pointers. + */ + hwmon_host * host_ptrs[MAX_HOSTS] ; + + /** A memory allocation counter. + * + * Should represent the number of hosts in the linked list. + */ + int memory_allocs ; + + /** A memory used counter + * + * A variable storing the accumulated host memory + */ + int memory_used ; + + struct hwmon_host * hwmon_head ; /**< Host Linked List Head pointer */ + struct hwmon_host * hwmon_tail ; /**< Host Linked List Tail pointer */ + + struct hwmonHostClass::hwmon_host* newHost ( void ); + struct hwmonHostClass::hwmon_host* addHost ( string hostname ); + struct hwmonHostClass::hwmon_host* getHost ( string hostname ); + int remHost ( string hostname ); + int delHost ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr ); + struct hwmonHostClass::hwmon_host* getHost_timer ( timer_t tid ); + + int set_bm_prov ( struct hwmonHostClass::hwmon_host * host_ptr, bool state ); + void clear_bm_assertions ( struct hwmonHostClass::hwmon_host * host_ptr ); + void free_host_timers ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /** typically called by an audit, this interface cycles through all + * the sensors looking for any that are in the degrade state and + * sends a degrade request to maintenance if it sees just 1 */ + void degrade_state_audit ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /* FSM handlers */ + int add_host_handler ( struct hwmonHostClass::hwmon_host * host_ptr ); + int group_mon_handler ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /* in hwmonSensor.cpp */ + int hwmon_load_sensors ( struct hwmonHostClass::hwmon_host * host_ptr , bool & error ); + int hwmon_load_groups ( struct hwmonHostClass::hwmon_host * host_ptr , bool & error ); + + int load_profile_sensors ( struct hwmonHostClass::hwmon_host * host_ptr, + sensor_type * sensor_array_ptr, int max, + bool & error ); + + int load_profile_groups ( struct hwmonHostClass::hwmon_host * host_ptr, + struct sensor_group_type * group_array_ptr,int max , + bool & error ); + + + int hwmon_group_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ); + + int delete_unwanted_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /** Host add handler Stage Change member function */ + int addStageChange ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr, + hwmon_addStages_enum newHdlrStage ); + + /** handled deleting a host from the hwmonHostClass object */ + int delete_handler ( struct hwmonHostClass::hwmon_host * host_ptr ); + + void log_sensor_data ( struct hwmonHostClass::hwmon_host * host_ptr, string & sensorname, string data, string note); + void mem_log_info ( struct hwmonHostClass::hwmon_host * host_ptr ); + void mem_log_options ( struct hwmonHostClass::hwmon_host * host_ptr ); + void mem_log_bm ( struct hwmonHostClass::hwmon_host * host_ptr ); + void mem_log_groups ( struct hwmonHostClass::hwmon_host * host_ptr ); + void mem_log_threads ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /************* New Private APIs for IPMI Sensor Monitoring **************/ + + /************************************************************************* + * + * Implemented in hwmonClass.cpp + * + *************************************************************************/ + + void ipmi_bmc_data_init ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /*************************************************************************** + * + * The following are sensor model provisioning APIs responsible for + * loading, creating and deleting sensor models wrt the sysinv database + * and hwmond. + * + * Implemented in hwmonModel.cpp + * + * ipmi_load_sensor_model - will load an existing sensor and group + * model from the database for the specified + * host into hwmond. + * + * ipmi_create_sensor_model - will create a new sensor and group model in + * the sysinv database for the specified host. + * + * ipmi_delete_sensor_model - will delete the sensor and group model from + * the sysinv database for the specified host. + * + * ipmi_create_sample_model - will create a sensor model based on sample + * data for the specified host. + * + * ipmi_create_quanta_model - will create a quanta server sensor group model + * for the specified host from sensor sample data. + * + *************************************************************************/ + int ipmi_load_sensor_model ( struct hwmonHostClass::hwmon_host * host_ptr ); + int ipmi_create_sensor_model ( struct hwmonHostClass::hwmon_host * host_ptr ); + int ipmi_delete_sensor_model ( struct hwmonHostClass::hwmon_host * host_ptr ); + int ipmi_create_sample_model ( struct hwmonHostClass::hwmon_host * host_ptr ); + int ipmi_create_quanta_model ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /************************************************************************* + * + * The following are sensor sample sensor data management APIs + * + * File: hwmonIpmi.cpp + * + * ipmi_load_sensor_samples - loads the samples into the sample list. + * + * ipmi_update_sensors - updates the the hwmond with the latest sensor + * sample severity level for the specified host. + * + *************************************************************************/ + int ipmi_load_sensor_samples ( struct hwmonHostClass::hwmon_host * host_ptr, char * msg_ptr ); + int ipmi_update_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /************************************************************************** + * + * Name : manage_startup_states + * + * Purpose: Manage how hwmon deals with sensor states over process startup. + * + * File : hwmonHdlr.cpp + * + * This code that was taken from the add_handler and put into this stand + * alone procedure for code re-use so that it can be called by the add + * handler for ipmi without cloning it. + * + **************************************************************************/ + bool manage_startup_states ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /************************************************************************** + * + * Handle ipmitool monitoring audit interval changes where there is one + * interval for all sensor groups. Changing a single group's audit + * interval does so for all. All for 1 and one for all. + * + **************************************************************************/ + int interval_change_handler( struct hwmonHostClass::hwmon_host * host_ptr ); + + /* The sensor monitor FSM */ + int ipmi_sensor_monitor ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /* Remove all groups / sensor from hwmon */ + int hwmon_del_groups ( struct hwmonHostClass::hwmon_host * host_ptr ); + int hwmon_del_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /* Implemented in hwmonGroup.cpp */ + + /*************************************************************************** + * Manage sensor group states in the database and hwmon as well + * and manage sensr group alarms. Since state changes affect alarming + * the two functions work well together. + ***************************************************************************/ + int ipmi_set_group_state ( struct hwmonHostClass::hwmon_host * host_ptr, string state ); + + /* Set all sensors to disabled-offline state/status */ + int ipmi_disable_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /**************************************************************************** + * Create sensor groups in hwmon based on sample data using similar ipmi + * unit type canned groups and save those groups into the database. + ****************************************************************************/ + int ipmi_create_groups ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /**************************************************************************** + * Load the sensor samples into hwmon and then save them into the database. + ****************************************************************************/ + int ipmi_create_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /***************************************************************************** + * Add a new group to hwmon and then to the sysinv database. + ****************************************************************************/ + int ipmi_add_group ( struct hwmonHostClass::hwmon_host * host_ptr , + string datatype, string sensortype, + canned_group_enum grouptype, + string group_name, string path ); + + /**************************************************************************** + * Put the current ipmi sensor list into the previously created sensor type + * based groups and save that grouping in the sysinv database. + *****************************************************************************/ + int ipmi_group_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /*************************************************************************** + * Check whether the group/sensor accounting looks valid. + * i.e. number of sensors in sensor groups adds to equal total sensors. + **************************************************************************/ + void check_accounting ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /*************************************************************************** + * Force monitoring to start now + **************************************************************************/ + void monitor_now ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /*************************************************************************** + * Force monitoring to start soon ; called during sensor relearn request + * to give horizon time to show the deleted sensor model but not have + * the user wait for what might be a long audit interval before the + * refresh. + **************************************************************************/ + void monitor_soon ( struct hwmonHostClass::hwmon_host * host_ptr ); + + /************************************************************************** + * Save and restore structure and utilties for preserving audit + * interval and group actions over a sensor relearn. + **************************************************************************/ + + void save_model_attributes ( struct hwmonHostClass::hwmon_host * host_ptr ); + void restore_group_actions ( struct hwmonHostClass::hwmon_host * host_ptr, + struct sensor_group_type * group_ptr ); + + /*************************************************************************/ + + void sensorState_print_debug ( struct hwmonHostClass::hwmon_host * host_ptr, string sensorname, string proc, int line ); + + public: + + hwmonHostClass(); /**< constructor */ + ~hwmonHostClass(); /**< destructor */ + + hostBaseClass hostBase ; + + system_type_enum system_type ; + + void timer_handler ( int sig, siginfo_t *si, void *uc); + + /** This is a list of host names. */ + std::list hostlist ; + std::list::iterator hostlist_iter_ptr ; + + void hwmon_fsm ( void ); + + bool is_bm_provisioned ( string hostname ); + + string get_bm_ip ( string hostname ); + string get_bm_type ( string hostname ); + string get_bm_un ( string hostname ); + string get_hostname ( string uuid ); /**< lookup hostname from the host uuid */ + + string get_relearn_done_date ( string hostname ); + + int hosts ; + + /* This bool is set in the daemon_configure case to inform the + * FSM that there has been a configuraiton reload. + * The initial purpose if this bool is to trigger a full sensor + * dump of all hosts on demand */ + bool config_reload ; + + /********* New Public Constructs for IPMI Sensor Monitoring ***********/ + + /* set to true once a host has been deleted. This will cause the FSM to + * kick out of the host list to be restarted without this host in it + * any more */ + bool host_deleted ; + + /* sets the want_degrade_audit = true for all hosts */ + void set_degrade_audit ( void ); + + /************************************************************************/ + int add_host ( node_inv_type & inv ); + int mod_host ( node_inv_type & inv ); + int del_host ( string hostname ); + int rem_host ( string hostname ); + int mon_host ( string hostname, bool monitor ); + int request_del_host ( string hostname ); + + int ipmi_learn_sensor_model ( string uuid ); + + /**************************************************************************** + * + * Name: get_sensor + * + * Description: Returns a pointer to the host sensor + * that matches the supplied sensor name. + * + ****************************************************************************/ + sensor_type * get_sensor ( string hostname, string sensorname ); + + /**************************************************************************** + * + * Name: add_sensor + * + * Description: If the return code is PASS then the supplied sensor is + * provisioned against this host. If the sensor already exists + * then it is updated with all the new information. Otherwise + * (normally) a new sensor is added. + * + ****************************************************************************/ + int add_sensor ( string hostname, sensor_type & sensor ); + + /**************************************************************************** + * + * Name: add_sensor_uuid + * + * Description: Adds the sysinv supplied sensor uuid to hwmon for + * the specified sensor/host. + * + ****************************************************************************/ + int add_sensor_uuid ( string & hostname, string & name, string & uuid ); + + /**************************************************************************** + * + * Name: hwmon_get_group + * + * Description: Returns a pointer to the host sensor group + * that matches the supplied sensor group name. + ****************************************************************************/ + struct sensor_group_type * hwmon_get_group ( string hostname, string group_name ); + + /**************************************************************************** + * + * Name: hwmon_get_sensorgroup + * + * Description: Returns a pointer to the host sensor group + * that matches the supplied sensor name. + ****************************************************************************/ + struct sensor_group_type * hwmon_get_sensorgroup ( string hostname, string sensorname ); + + /**************************************************************************** + * + * Name: hwmon_add_group + * + * Description: If the return code is PASS then the supplied sensor group is + * provisioned against this host. If the group already exists + * then it is updated with all the new information. Otherwise + * (normally) a new group is added to the hwmon class struct. + * + ****************************************************************************/ + int hwmon_add_group ( string hostname, struct sensor_group_type & sensor_group ); + + /**************************************************************************** + * + * Name: add_group_uuid + * + * Description: Adds the sysinv supplied group uuid to hwmon for + * the specified group/host. + * + ****************************************************************************/ + int add_group_uuid ( string & hostname, string & name, string & uuid ); + + int group_modify ( string hostname, string group, string field, string value ); + + /* TODO: make this a struct hwmonHostClass::hwmon_host * host_ptr */ + int manage_sensor_state ( string & hostname, sensor_type * sensor, sensor_severity_enum severity ); + + void memLogDelimit ( void ); /**< Debug log delimiter */ + void memDumpNodeState ( string hostname ); + void memDumpAllState ( void ); + void print_node_info ( void ); /**< Print node info banner */ + + /************ New Public API for IPMI Sensor Monitoring *************/ + + /* Sets a flag that indicates the sensor audit interval has changed. + * + * The DELAY phase of sensor monitoring will look at and will handle + * the change as a background operation. */ + void audit_interval_change ( string hostname ); + + /* Sets host_ptr->interval to the specified value and sets a flag + * that indicates the sensor audit interval has changed. + * + * The DELAY phase of sensor monitoring will look at thes flag this + * API sets and will handle the change as a background operation. */ + void modify_audit_interval ( string hostname , int interval ); + + /************************************************************************/ +}; + +hwmonHostClass * get_hwmonHostClass_ptr ( void ); + +#endif /* __INCLUDE_HWMONCLASS_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonFsm.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonFsm.cpp new file mode 100644 index 00000000..6faa216b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonFsm.cpp @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Hardware Monitor Service + * Connection and Sensor Monitoring FSMs. + */ + +#include "hwmon.h" +#include "hwmonClass.h" +#include "hwmonHttp.h" +#include "hwmonSensor.h" +#include "hwmonThreads.h" /* for ... ipmitool_thread */ + + + +/************************************************************************** + * + * Name : hwmon_fsm + * + * Description: Loop over host inventory calling connection monitor + * handler followed by sensor read handler. + * + * The connection monitor handler verifies connection + * to the host before calling to read sensors from it. + * + **************************************************************************/ +void hwmonHostClass::hwmon_fsm ( void ) +{ + struct hwmonHostClass::hwmon_host * host_ptr ; + std::list::iterator iter_ptr ; + + if ( config_reload == true ) + { + for ( iter_ptr = hostlist.begin() ; + iter_ptr != hostlist.end() ; + ++iter_ptr ) + { + string hostname = iter_ptr->c_str(); + host_ptr = getHost ( hostname ); + } + config_reload = false ; + } + + for ( iter_ptr = hostlist.begin() ; + iter_ptr != hostlist.end() ; + ++iter_ptr ) + { + string hostname = iter_ptr->c_str(); + daemon_signal_hdlr (); + hwmonHttp_server_look (); + host_ptr = getHost ( hostname ); + if ( host_ptr ) + { + /* Handle host delete in ipmi mode + * + * Note: the bmc may have been deprovisioned already + * so the delete needs to be deleted up front. + */ + if ( host_ptr->host_delete == true ) + { + /* need to service the thread handler during the delete operation */ + thread_handler ( host_ptr->ipmitool_thread_ctrl, host_ptr->ipmitool_thread_info ); + delete_handler ( host_ptr ); + + if ( this->host_deleted == true ) + { + return ; + } + + /* continue with other hosts */ + continue ; + } + + if ( host_ptr->bm_provisioned == true ) + { + /* Run the add handler, but only until its done */ + if ( host_ptr->addStage != HWMON_ADD__DONE ) + { + /* first time run after process restart will load sensor model from database */ + add_host_handler ( host_ptr ); + } + else + { + /* + * Monitor and Manage active threads + * The ipmitool thread needs to run to learn the sensors + * to begin with as well as continually monitor them + */ + thread_handler ( host_ptr->ipmitool_thread_ctrl, host_ptr->ipmitool_thread_info ); + + pingUtil_acc_monitor ( host_ptr->ping_info ); + + /* Check to see if sensor monitoring for this host is disabled. + * If it is ... */ + if ( host_ptr->monitor == false ) + { + /* ... make sure the thread sits in the + * idle state while disabled */ + if ( thread_idle ( host_ptr->ipmitool_thread_ctrl ) == false ) + { + if ( thread_done ( host_ptr->ipmitool_thread_ctrl ) == true ) + { + host_ptr->ipmitool_thread_ctrl.done = true ; + } + else + { + thread_kill ( host_ptr->ipmitool_thread_ctrl, host_ptr->ipmitool_thread_info ); + } + } + continue ; + } + + if (( host_ptr->accessible == false ) && ( host_ptr->ping_info.ok == true )) + { + ilog ("%s bmc is accessible\n", host_ptr->hostname.c_str()); + host_ptr->accessible = host_ptr->connected = true ; + } + else if (( host_ptr->accessible == true ) && ( host_ptr->ping_info.ok == false )) + { + wlog ("%s bmc access lost\n", host_ptr->hostname.c_str()); + thread_kill ( host_ptr->ipmitool_thread_ctrl, host_ptr->ipmitool_thread_info ); + host_ptr->accessible = host_ptr->connected = false ; + host_ptr->sensor_query_count = 0 ; + host_ptr->bmc_fw_version.clear(); + host_ptr->ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + } + + if ( host_ptr->ping_info.ok == false ) + { + /* Auto correct key ping information ; should never occur but if it does ... */ + if (( host_ptr->ping_info.hostname.empty()) || + ( hostUtil_is_valid_ip_addr(host_ptr->ping_info.ip ) == false )) + { + slog ("%s host ping info missing ; (%d:%d)\n", + host_ptr->hostname.c_str(), + host_ptr->ping_info.hostname.empty(), + host_ptr->ping_info.ip.empty()); + + host_ptr->ping_info.hostname = host_ptr->hostname ; + host_ptr->ping_info.ip = host_ptr->bm_ip ; + } + // pingUtil_acc_monitor ( host_ptr->ping_info ); + } +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__EMPTY_BM_PASSWORD )) + { + host_ptr->thread_extra_info.bm_pw = "" ; + } +#endif + if (( host_ptr->thread_extra_info.bm_pw.empty ()) && ( host_ptr->ping_info.ok == true )) + { + wlog ( "%s bm password is empty ; learning and forcing reconnect\n", host_ptr->hostname.c_str()); + host_ptr->ping_info.ok = false ; + host_ptr->thread_extra_info.bm_pw = host_ptr->bm_pw = get_bm_password (hostBase.get_uuid(host_ptr->hostname).data()); + } + else if ( host_ptr->accessible ) + { + /* typical success path */ + hwmonHostClass::ipmi_sensor_monitor ( host_ptr ); + } + else if ( !thread_idle( host_ptr->ipmitool_thread_ctrl ) ) + { + thread_kill ( host_ptr->ipmitool_thread_ctrl, host_ptr->ipmitool_thread_info ); + } + } + if ( host_ptr->want_degrade_audit ) + { + dlog ("%s degrade audit ...\n", host_ptr->hostname.c_str()); + degrade_state_audit ( host_ptr ) ; + host_ptr->want_degrade_audit = false ; + } + } + } + } +} + + diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonGroup.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonGroup.cpp new file mode 100644 index 00000000..32f1e0de --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonGroup.cpp @@ -0,0 +1,772 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + * + * + * @file + * Wind River Titanium Cloud Hardware Monitor Sensor Group Utilities + */ + +#include "daemon_common.h" /* for ... daemon utilities */ +#include "daemon_ini.h" /* for ... parse_ini and MATCH */ +#include "nodeBase.h" /* for ... mtce common definitions */ +#include "jsonUtil.h" /* for ... json utilitiies */ +#include "nodeUtil.h" /* for ... mtce common utilities */ +#include "hwmon.h" /* for ... canned_group_enum */ +#include "hwmonClass.h" /* for ... service class definition */ +#include "hwmonHttp.h" /* for ... http podule header */ +#include "hwmonGroup.h" /* for ... MAX_GROUPING_ERRORS */ +#include "hwmonSensor.h" /* for ... groupSensors_print */ +#include "hwmonAlarm.h" /* for ... hwmonAlarm */ + +/* TODO: After initial inspection move all group utilities + * from hwmonSensor.cpp / h + * into hwmonGroup.cpp / h + * + */ + + +/* IPMI Sensor Types + * Each Sensor group gets a unique name from this list + * + * Is taken from ipmitool private file + */ + +static const char * canned_group__null = +{ + "null" +}; + +static const char * canned_group__fans = +{ + "RPM,% RPM,CFM,% CFM" +}; + +static const char * canned_group__temp = +{ + "degrees" +}; + +static const char * canned_group__voltage = +{ + "Volts" +}; + +static const char * canned_group__power = +{ + "Amps,% Amps,Watts,% Watts,Joules,Coulombs" +}; + +static const char * canned_group__usage = +{ + "TBD" +}; + +#ifdef WANT_MORE_GROUPS + +static const char * canned_group__time = +{ + "microsecond,millisecond,second,minute,hour,day,week" +}; + +static const char * canned_group__msg = +{ + "overflow,underrun,collision,packets,messages,characters" +}; + +static const char * canned_group__memory = +{ + "bit,kilobit,megabit,gigabit,byte,kilobyte,megabyte,gigabyte,word,dword,qword,line,hit,miss" +}; + +static const char * canned_group__errors = +{ + "error,correctable error,uncorrectable error" +}; + +static const char * canned_group__clocks = +{ + "Hz,Hertz" +}; + +static const char * canned_group__misc = +{ + "unspecified" +}; + +#endif + +/* Other types from ipmitool source ******** + + "VA", + "Nits", + "lumen", + "lux", + "Candela", + "kPa", + "PSI", + "Newton", + "mil", + "inches", "feet", + "cu in", "cu feet", + "mm", "cm", "m", "cu cm", "cu m", + "liters", "fluid ounce", + "radians", "steradians", + "revolutions", "cycles", + "gravities", + "ounce", "pound", + "ft-lb", "oz-in", "gauss", + "gilberts", "henry", + "millihenry", "farad", + "microfarad", + "ohms", + "siemens", + "mole", + "becquerel", + "PPM", + "reserved", + "Decibels", + "DbA", "DbC", + "gray", "sievert", + "color temp deg K", + "retry", + "reset", + +********************************************/ + + +/***************************************************************************** + * + * Name : canned_group_array[] (construct) + * canned_group_type (type definition) + * canned_group_enum (enumerated identifier) + * canned_group_[] (list of alowable group unit types) + * + * Description: This is an array of pre-created (or canned) groups. + * Each group has a name that will be the group name displayed + * on the GUI or in the CLI. Each canned group then also has an + * identifier that is added to the sensor_group_type struct and + * used to indicate that group type and then for sensor allocaiton + * purposes a list of unit names that qualify a sensor for being + * part of that group. + * + * For instance all sensors with a CFM or RPM unit type are + * assigned to the HWMON_CANNED_GROUP__FANS group. + * + *****************************************************************************/ + +typedef struct +{ + const char * group_type ; + const char * group_name ; + canned_group_enum group_enum ; + const char * group_units ; +} canned_group_type ; + +const canned_group_type canned_group_array [HWMON_CANNED_GROUPS] = +{ + /* group type group name group enum sensor unit types + ---------- ------------------ ------------------------ --------------------*/ + {"null", HWMON_GROUP_NAME__NULL, HWMON_CANNED_GROUP__NULL, canned_group__null }, + {"fan", HWMON_GROUP_NAME__FANS, HWMON_CANNED_GROUP__FANS, canned_group__fans }, + {"temperature", HWMON_GROUP_NAME__TEMP, HWMON_CANNED_GROUP__TEMP, canned_group__temp }, + {"voltage", HWMON_GROUP_NAME__VOLTS, HWMON_CANNED_GROUP__VOLT, canned_group__voltage}, + {"power", HWMON_GROUP_NAME__POWER, HWMON_CANNED_GROUP__POWER, canned_group__power }, + {"usage", HWMON_GROUP_NAME__USAGE, HWMON_CANNED_GROUP__USAGE, canned_group__usage }, + +#ifdef WANT_MORE_GROUPS + + /* Enable these when we have discrete correlation */ + {"memory", HWMON_GROUP_NAME__MEMORY , HWMON_CANNED_GROUP__MEMORY,canned_group__memory }, + {"clocks", HWMON_GROUP_NAME__CLOCKS , HWMON_CANNED_GROUP__CLOCKS,canned_group__clocks }, + {"errors", HWMON_GROUP_NAME__ERRORS , HWMON_CANNED_GROUP__ERRORS,canned_group__errors }, + {"messages", HWMON_GROUP_NAME__MESSAGES, HWMON_CANNED_GROUP__MSG, canned_group__msg }, + {"time", HWMON_GROUP_NAME__TIME , HWMON_CANNED_GROUP__TIME, canned_group__time }, + {"miscellaneous",HWMON_GROUP_NAME__MISC , HWMON_CANNED_GROUP__MISC, canned_group__misc } + +#endif + +}; + +/**************************************************************************** + * + * Name : groupSensors_print + * + * Purpose: Print the sensors associated with a specified group + * + *****************************************************************************/ +void groupSensors_print ( sensor_group_type * group_ptr ) +{ + if ( group_ptr ) + { + ilog ("%s '%s' group has %d sensors\n", + group_ptr->hostname.c_str(), + group_ptr->group_name.c_str(), + group_ptr->sensors ); + + for ( int i = 0 ; i < group_ptr->sensors ; i++ ) + { + if ( group_ptr->sensor_ptr[i] ) + { + blog1 (" > Sensor: %s\n", group_ptr->sensor_ptr[i]->sensorname.c_str()); + } + else + { + blog1 (" > Sensor: %p\n", group_ptr->sensor_ptr[i] ); + } + } + } + else + { + slog ("Null group pointer\n"); + } +} + +/***************************************************************************** + * + * Name : ipmi_get_grouptype + * + * Description: returns the group type ; which is really a baseline + * ipmi 'sensor type'. + * + *****************************************************************************/ + +string ipmi_get_grouptype ( string & hostname, + string & unittype, + string & sensorname) +{ + canned_group_enum group_enum = ipmi_get_groupenum (hostname, + unittype, + sensorname); + if (( group_enum < HWMON_CANNED_GROUPS ) && + ( group_enum > HWMON_CANNED_GROUP__NULL )) + { + return (canned_group_array[group_enum].group_type); + } + else + { + return "" ; + } +} + +/***************************************************************************** + * + * Name : ipmi_get_groupenum + * + * Description: Returns the group enum that the specified unit would + * fall into. + * + *****************************************************************************/ + +canned_group_enum ipmi_get_groupenum ( string & hostname, + string & unittype, + string & sensorname ) +{ + canned_group_enum group_enum = HWMON_CANNED_GROUP__NULL ; + if ( !unittype.empty() ) + { + /* search canned groups for one having units that match this + * sensor sample. */ + for ( int canned_group = (HWMON_CANNED_GROUP__NULL+1) ; canned_group < HWMON_CANNED_GROUPS ; ++canned_group ) + { + blog3 ("%s search %s:%s \n", hostname.c_str(), canned_group_array[canned_group].group_units, unittype.c_str()); + if ( strstr ( canned_group_array[canned_group].group_units, unittype.data()) || + ( strstr ( unittype.data(), canned_group_array[canned_group].group_units))) + { + blog2 ("%s %s found\n", hostname.c_str(), unittype.c_str() ); + return(canned_group_array[canned_group].group_enum); + } + } + + /* + * We always have a group for any sensor + * canned or uncanned. + * + * Put uncanned into a miscellaneous group + */ + + + /* handle some special cases */ + + /* 1. Quanta Power Sensors */ + if (( unittype.compare("discrete") == 0 ) && + ((sensorname.find("PSU Redundancy") != std::string::npos ) || + (sensorname.find("PSU1 Status") != std::string::npos ) || + (sensorname.find("PSU2 Status") != std::string::npos ))) + { + group_enum = HWMON_CANNED_GROUP__POWER ; + + blog2 ("%s %-15s group added (for '%s' sensor (translation)\n", + hostname.c_str(), + canned_group_array[group_enum].group_name, + sensorname.c_str()); + } + else if (( unittype.compare("discrete") == 0 ) && + ((sensorname.find("MB Thermal Trip") != std::string::npos ) || + (sensorname.find("PCH Thermal Trip") != std::string::npos ))) + { + group_enum = HWMON_CANNED_GROUP__TEMP ; + + blog2 ("%s %-15s group added (for '%s' sensor (translation)\n", + hostname.c_str(), + canned_group_array[group_enum].group_name, + sensorname.c_str()); + } + + /* 1. HP Fans show up as 'percent' sensor type with Fan in the name */ + else if (( unittype.compare("percent") == 0 ) && + ((sensorname.find("Fan") != std::string::npos ) || + (sensorname.find("fan") != std::string::npos ))) + { + group_enum = HWMON_CANNED_GROUP__FANS ; + + blog2 ("%s %-15s group added (for '%s' sensor (translation)\n", + hostname.c_str(), + canned_group_array[group_enum].group_name, + sensorname.c_str()); + } + /* 1. HP Fans show up as 'percent' sensor type with Fan in the name */ + else if (( unittype.compare("percent") == 0 ) && + ((sensorname.find("Usage") != std::string::npos ) || + (sensorname.find("usage") != std::string::npos ))) + { + group_enum = HWMON_CANNED_GROUP__USAGE ; + + blog2 ("%s %-15s group added (for '%s' sensor (translation)\n", + hostname.c_str(), + canned_group_array[group_enum].group_name, + sensorname.c_str()); + } + else + { +#ifdef WANT_MORE_GROUPS + /* Otherwise, uncanned so put the sensor into the miscellaneous group */ + group_enum = HWMON_CANNED_GROUP__MISC ; + + ilog ("%s %-15s group added (for '%s' sensor) (%s:%s)\n", + hostname.c_str(), + canned_group_array[group_enum].group_name, + sensorname.c_str(), + unittype.c_str(), + canned_group_array[group_enum].group_units); +#else + blog3 ("%s %-15s is ignored ; no matching sensor group\n", hostname.c_str(), sensorname.c_str()); + group_enum = HWMON_CANNED_GROUP__NULL ; +#endif + } + } + return (group_enum); +} + +/***************************************************************************** + * + * Name : ipmi_get_groupname + * + * Description: returns the group name for the specified group enum. + * + *****************************************************************************/ + +string ipmi_get_groupname ( canned_group_enum group_enum ) +{ + if ( group_enum < HWMON_CANNED_GROUPS ) + { + return (canned_group_array[group_enum].group_name); + } + return "unknown" ; +} + +/****************************************************************************** + * + * Name : _log_group_add_status + * + * Description: Create appropriate group add status log + * + * Scope : Local + * + ******************************************************************************/ + +void _log_group_add_status ( string hostname, + string groupname, + int rc ) +{ + if ( rc ) + { + wlog ("%s %s group add failed (to sysinv) (rc:%d)\n", + hostname.c_str(), groupname.c_str(), rc ); + } + else + { + ilog ("%s %s group added (to sysinv)\n", + hostname.c_str(), groupname.c_str()); + } +} + +/******************************************************************************** + * + * Name : ipmi_add_group + * + * Purpose : Add a new group to hwmon and then to the sysinv database. + * + * Description: Write the new group info to the next group index for the + * specified host, update its info with passed in attributes + * and then call to program that group into the database. + * + * Wait for the response and update the group with its uuid. + * + ****************************************************************************/ + +int hwmonHostClass::ipmi_add_group ( struct hwmonHostClass::hwmon_host * host_ptr , + string datatype, + string sensortype, + canned_group_enum group_enum, + string group_name, + string path ) +{ + int rc ; + + int g = host_ptr->groups ; + hwmonGroup_init ( host_ptr->hostname, &host_ptr->group[g]); + host_ptr->group[g].datatype = datatype ; + host_ptr->group[g].group_enum = group_enum ; + host_ptr->group[g].sensortype = sensortype ; + host_ptr->group[g].group_name = group_name ; + host_ptr->group[g].path = path ; + + /* If we are in learn mode then restore saved group + * attributes before programming them into the database */ + if ( host_ptr->relearn == true ) + { + restore_group_actions ( host_ptr, &host_ptr->group[g] ); + } + + if (( rc = hwmonHttp_add_group ( host_ptr->hostname, host_ptr->event, host_ptr->group[g])) == PASS ) + { + host_ptr->group[g].group_uuid = host_ptr->event.new_uuid ; + host_ptr->groups++ ; + } + _log_group_add_status ( host_ptr->hostname, host_ptr->group[g].group_name, rc ); + return (rc); +} + +/***************************************************************************** + * + * Name : ipmi_create_groups + * + * Description: Perform sensor grouping from sample data. + * This is done using similar ipmi unit types from canned groups. + * + *****************************************************************************/ + +int hwmonHostClass::ipmi_create_groups ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = PASS ; + int sample_errors = 0 ; + host_ptr->groups = 0 ; + + /* for each sample ... + * 1. create a new group or + * 2. add sensor to an existing sensor group type + * i.e. fan , poer, voltage, temperature, etc. + * + * ... based on that sensors' unit type. + * + * Use the canned unit groups above , i.e. canned_group__voltage + */ + for ( int s = 0 ; s < host_ptr->samples ; ++s ) + { + /* canned group array index */ + int canned_group_index = 0 ; + + /* + * When set true indicates that hwmon already has a + * group type allocated for this sensor type. + */ + bool group_found = false ; + + /* TODO: allow a MAX number of failures before action failure */ + if ( host_ptr->sample[s].unit.empty() ) + { + if ( ++sample_errors > MAX_SENSOR_TYPE_ERRORS ) + { + elog ("%s '%s' sensor has empty unit type ; too many errors (%d) ; aborting\n", + host_ptr->hostname.c_str(), + host_ptr->sample[s].name.c_str(), + sample_errors); + return FAIL_STRING_EMPTY ; + } + else + { + wlog ("%s '%s' sensor has empty unit type ; skipping\n", + host_ptr->hostname.c_str(), + host_ptr->sample[s].name.c_str()); + continue ; + } + } + + /* get the group enum from the sensor type and name */ + canned_group_index = ipmi_get_groupenum ( host_ptr->hostname, + host_ptr->sample[s].unit, + host_ptr->sample[s].name ); + + if ( canned_group_index == HWMON_CANNED_GROUP__NULL ) + { + host_ptr->sample[s].ignore = true ; + continue ; + } + + if ( canned_group_index != host_ptr->sample[s].group_enum ) + { + slog ("%s %s should already be assigned to a group ; sensor filter broken\n", + host_ptr->hostname.c_str(), + host_ptr->sample[s].name.c_str()); + } + + /* loop over all the existing groups to see if this group type has already been added. */ + for ( int group = 0 ; group < host_ptr->groups ; ++group ) + { + if ( host_ptr->group[group].group_enum == canned_group_array[canned_group_index].group_enum ) + { + group_found = true ; + break ; + } + } /* loop over all the groups */ + + /* if no then add the new group ; otherwise ignore it */ + if ( group_found == false ) + { + hwmonGroup_init ( host_ptr->hostname, &host_ptr->group[host_ptr->groups] ); + host_ptr->group[host_ptr->groups].group_name = canned_group_array[canned_group_index].group_name ; + host_ptr->group[host_ptr->groups].group_enum = canned_group_array[canned_group_index].group_enum ; + host_ptr->group[host_ptr->groups].sensortype = canned_group_array[canned_group_index].group_type ; + + if ( host_ptr->relearn == true ) + { + restore_group_actions ( host_ptr, &host_ptr->group[host_ptr->groups] ); + } + /* create a new group in sysinv */ + if ( ( rc = hwmonHttp_add_group ( host_ptr->hostname, + host_ptr->event, + host_ptr->group[host_ptr->groups]) ) == PASS ) + { + /* add the sysinv group uuid to the group */ + host_ptr->group[host_ptr->groups].group_uuid = host_ptr->event.new_uuid ; + } + _log_group_add_status ( host_ptr->hostname, host_ptr->group[host_ptr->groups].group_name, rc ); + + if ( rc ) + { + return (FAIL_OPERATION); + } + else + { + blog ("%s %-15s group created (in hwmon) (for %s sensor)\n", + host_ptr->hostname.c_str(), + canned_group_array[canned_group_index].group_name, + host_ptr->sample[s].name.c_str()); + host_ptr->groups++ ; + } + } + + /* Tell the sample with what group it will go in later */ + host_ptr->sample[s].group_enum = + canned_group_array[canned_group_index].group_enum ; + + } /* end for loop over sensor samples */ + + ilog ("%s new sensor group model created with %d groups\n", + host_ptr->hostname.c_str(), + host_ptr->groups ); + + if (( host_ptr->relearn == true ) && + ( host_ptr->model_attributes_preserved.interval ) && + ( host_ptr->model_attributes_preserved.interval != host_ptr->interval )) + { + host_ptr->interval_changed = true ; + ilog ("%s audit interval restored to %d seconds (from %d)\n", + host_ptr->hostname.c_str(), + host_ptr->model_attributes_preserved.interval, + host_ptr->interval); + host_ptr->interval = host_ptr->model_attributes_preserved.interval ; + } + + host_ptr->interval_changed = true ; + return rc ; +} + +/**************************************************************************** + * + * Name : ipmi_group_sensors + * + * Description: Group the sensors based on the group enum that was assigned + * to the sensor during group creation. + * + *****************************************************************************/ + +int hwmonHostClass::ipmi_group_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = FAIL ; + int grouping_errors = 0 ; + + for ( int s = 0 ; s < host_ptr->sensors ; ++s ) + { + bool found = false ; + + if (( host_ptr->sensor[s].group_enum > HWMON_CANNED_GROUP__NULL ) && + ( host_ptr->sensor[s].group_enum < HWMON_CANNED_GROUPS )) + { + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + if ( host_ptr->group[g].group_enum == host_ptr->sensor[s].group_enum ) + { + if ( !host_ptr->group[g].sensor_labels.empty() ) + { + host_ptr->group[g].sensor_labels.append(","); + } + host_ptr->group[g].sensor_labels.append(host_ptr->sensor[s].sensorname); + + /* add the sensor pointer to the group's list */ + host_ptr->group[g].sensor_ptr[host_ptr->group[g].sensors] = &host_ptr->sensor[s] ; + + /* formally add the sensor to the group by incrementing the group sensors count */ + host_ptr->group[g].sensors++ ; + + /* assign the group uuid to the sensor */ + host_ptr->sensor[s].group_uuid = host_ptr->group[g].group_uuid ; + + /* assign group action */ + host_ptr->sensor[s].actions_minor = host_ptr->group[g].actions_minor_group ; + host_ptr->sensor[s].actions_major = host_ptr->group[g].actions_major_group ; + host_ptr->sensor[s].actions_critl = host_ptr->group[g].actions_critl_group ; + + found = true ; + break ; + } + } + } + + if ( found == false ) + { + if ( ++grouping_errors >= MAX_GROUPING_ERRORS ) + { + elog ("%s '%s' sensor could not be grouped (%d)\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[s].sensorname.c_str(), + host_ptr->sensor[s].group_enum ); + rc = FAIL_NOT_FOUND ; + } + } + else + { + /* if we found at least one then change rc to PASS */ + rc = PASS ; + } + } + + if ( rc == PASS ) + { + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + groupSensors_print ( &host_ptr->group[g] ); + rc = hwmonHttp_group_sensors ( host_ptr->hostname, + host_ptr->event, + host_ptr->group[g].group_uuid, + host_ptr->group[g].sensor_labels ); + if ( rc ) + { + break ; + } + } + } + + if ( rc == PASS ) + { + ilog ("%s sensors grouped\n", host_ptr->hostname.c_str()); + } + else + { + elog ("%s failed to group sensors\n", host_ptr->hostname.c_str()); + } + + return (rc); +} + + +/***************************************************************************** + * + * Name : ipmi_set_group_state + * + * Purpose : With the introduction of ipmi monitoring, all groups are + * monitored at once. Therefore all should be in the same state. + * + * Description: Set all groups to specified state + * + * TODO : Consider setting all sensors offline for failure case. + * Would need a bulk HTTP command to sysinv or do it as a + * time sliced operation. + * + ******************************************************************************/ + +int hwmonHostClass::ipmi_set_group_state ( struct hwmonHostClass::hwmon_host * host_ptr , string state ) +{ + int rc = FAIL_NULL_POINTER ; + if ( host_ptr ) + { + rc = PASS ; + + /* don't send requests to sysinv if we are in the middle of + * deleting a host because sysinv has already gotten rid of the + * sensor model */ + if ( host_ptr->host_delete == true ) + return (PASS); + + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + struct sensor_group_type * group_ptr = &host_ptr->group[g] ; + if ( group_ptr->group_state.compare(state.data())) + { + group_ptr->group_state = state ; + int status = hwmonHttp_mod_group ( host_ptr->hostname, + host_ptr->event , + group_ptr->group_uuid, + "state", + group_ptr->group_state ); + if ( status ) + { + elog ( "%s failed to set '%s' group state to '%s'\n", + host_ptr->hostname.c_str(), + group_ptr->group_name.c_str(), + state.c_str()); + + if ( rc == PASS ) + rc = RETRY ; + } + + /* handle raising the group alarm if we have a failed state, + * its not already faaailed and it is a failed request */ + if ((( status != PASS ) || ( state.compare("failed") == 0 )) && ( group_ptr->failed == false )) + { + hwmonAlarm_major ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, group_ptr->group_name, REASON_DEGRADED ); + group_ptr->failed = true ; + } + + /* handle clearing the group alarm */ + else if (( status == PASS ) && ( state.compare("failed")) && ( group_ptr->failed == true )) + { + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, group_ptr->group_name, REASON_OK ); + group_ptr->failed = false ; + } + else + { + blog ( "%s '%s' group - no alarm action for state '%s' (%d)\n", + host_ptr->hostname.c_str(), + group_ptr->group_name.c_str(), + state.c_str(), + group_ptr->failed); + } + } + } + } + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonGroup.h b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonGroup.h new file mode 100644 index 00000000..ef6c52ab --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonGroup.h @@ -0,0 +1,45 @@ +#ifndef __INCLUDE_HWMONGROUP_H__ +#define __INCLUDE_HWMONGROUP_H__ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud's Hardware Monitor "Sensor Grouping" Header + */ + +#define MAX_GROUPING_ERRORS (1) + +#include "hwmon.h" + +#define HWMON_GROUP_NAME__NULL "null" +#define HWMON_GROUP_NAME__FANS "server fans" +#define HWMON_GROUP_NAME__TEMP "server temperature" +#define HWMON_GROUP_NAME__VOLTS "server voltage" +#define HWMON_GROUP_NAME__POWER "server power" +#define HWMON_GROUP_NAME__USAGE "server usage" +#define HWMON_GROUP_NAME__POWER_FANS "power supply fans" +#define HWMON_GROUP_NAME__MEMORY "server memory" +#define HWMON_GROUP_NAME__CLOCKS "server clocks" +#define HWMON_GROUP_NAME__ERRORS "server errors" +#define HWMON_GROUP_NAME__MESSAGES "server messages" +#define HWMON_GROUP_NAME__TIME "server time" +#define HWMON_GROUP_NAME__MISC "miscellaneous" + +void groupSensors_print ( sensor_group_type * group_ptr ); + +string ipmi_get_groupname ( canned_group_enum group_enum ); + +string ipmi_get_grouptype ( string & hostname, + string & unittype, + string & sensorname); + +canned_group_enum ipmi_get_groupenum ( string & hostname, + string & unittype, + string & sensorname ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHdlr.cpp new file mode 100644 index 00000000..d4369e22 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHdlr.cpp @@ -0,0 +1,2573 @@ +/* + * Copyright (c) 2013-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Process Monitor Service Handler + */ + +#include "daemon_ini.h" + +#include "nodeBase.h" /* for ... mtce common definitions */ +#include "jsonUtil.h" /* for ... json utilities */ +#include "regexUtil.h" /* for ... regexUtil_pattern_match */ +#include "tokenUtil.h" /* for ... tokenUtil_new_token */ +#include "nodeUtil.h" /* for ... mtce common utilities */ +#include "ipmiUtil.h" /* for ... IPMI utilties */ +#include "hwmon.h" /* for ... service module header */ +#include "hwmonUtil.h" /* for ... utilities, ie clear_logged_state */ +#include "hwmonClass.h" /* for ... service class definition */ +#include "hwmonIpmi.h" /* for ... QUANTA_SENSOR_PROFILE_CHECKSUM */ +#include "hwmonSensor.h" /* for ... this mpodule header */ +#include "hwmonHttp.h" /* for ... hwmonHttp_mod_group */ +#include "hwmonAlarm.h" /* for ... hwmonAlarm_major */ + +/* Declare the Hardware Monitor Inventory Object */ +hwmonHostClass hostInv ; + +/* Public interface to get the Hardware Monitor Inventory object */ +hwmonHostClass * get_hwmonHostClass_ptr ( void ) +{ + return (&hostInv); +} + +/* Preserve a local copy of a pointer to the control struct to + * avoid having to publish a get utility prototype into hwmon.h */ +static hwmon_ctrl_type * _hwmon_ctrl_ptr = NULL ; + +/* hwmonTimer_audit - get_events periodic audit timer */ +static struct mtc_timer hwmonTimer_audit ; +static struct mtc_timer hwmonTimer_token ; + +/** List of server profile files */ +std::list profile_files ; +std::list::iterator string_iter_ptr ; + + + /***************************************************************************** + * + * Name : _stage_change + * + * Description: Change the sensor monitor FSM stage. + * + ****************************************************************************/ + +static std::string monitorStages_str[HWMON_SENSOR_MONITOR__STAGES+1]; +void _stage_change ( string hostname, monitor_ctrl_stage_enum & nowStage, monitor_ctrl_stage_enum newStage ) +{ + if ( newStage < HWMON_SENSOR_MONITOR__STAGES ) + { + clog ("%s sensor monitor stage change from %s -> %s\n", + hostname.c_str(), + monitorStages_str[nowStage].c_str(), + monitorStages_str[newStage].c_str()); + nowStage = newStage ; + } + else + { + slog ("%s sensor monitor stage change to '%d' is invalid ; switching to START\n", + hostname.c_str(), + newStage ); + nowStage = HWMON_SENSOR_MONITOR__START ; + } +} + +/******************************************************************* + * Module Initialize and Finalizes Interfaces * + *******************************************************************/ + +/* Initial init of timers. */ +/* Not run on a sighup */ +void hwmon_timer_init ( void ) +{ + mtcTimer_init ( hwmonTimer_audit, "controller", "audit timer" ) ; + mtcTimer_init ( hwmonTimer_token, "controller", "token timer") ; +} + +/* Register realtime signal handler with the kernel */ +int signal_hdlr_init ( int sig_num ) +{ + int rc ; + UNUSED(sig_num) ; + +#ifdef WANT_MORE_SIGNAL_HANDLING + memset (&_pmon_ctrl_ptr->info, 0, sizeof(_pmon_ctrl_ptr->info)); + memset (&_pmon_ctrl_ptr->prev, 0, sizeof(_pmon_ctrl_ptr->info)); + + _pmon_ctrl_ptr->info.sa_sigaction = _process_death_hdlr ; + _pmon_ctrl_ptr->info.sa_flags = (SA_NOCLDSTOP | SA_NOCLDWAIT | SA_SIGINFO) ; + + rc = sigaction ( sig_num, &_pmon_ctrl_ptr->info , &_pmon_ctrl_ptr->prev ); + if ( rc ) + { + elog("Registering Realtime Signal %d - (%d) (%s)\n", + sig_num, errno, strerror(errno)); + rc = FAIL_SIGNAL_INIT ; + } + else + { + ilog("Registering Realtime Signal %d\n", sig_num); + } +#else + rc = PASS ; +#endif + return (rc) ; +} + +/* + * Init the handler + * - Must support re-init that might occur over a SIGHUP + **/ +int hwmon_hdlr_init ( hwmon_ctrl_type * ctrl_ptr ) +{ + int rc = PASS ; + + /* Save the control pointer */ + _hwmon_ctrl_ptr = ctrl_ptr ; + + monitorStages_str[HWMON_SENSOR_MONITOR__START] = "Start" ; + monitorStages_str[HWMON_SENSOR_MONITOR__DELAY] = "Delay" ; + monitorStages_str[HWMON_SENSOR_MONITOR__READ] = "Read" ; + monitorStages_str[HWMON_SENSOR_MONITOR__PARSE] = "Parse" ; + monitorStages_str[HWMON_SENSOR_MONITOR__CHECK] = "Check" ; + monitorStages_str[HWMON_SENSOR_MONITOR__UPDATE] = "Update"; + monitorStages_str[HWMON_SENSOR_MONITOR__HANDLE] = "Handle"; + monitorStages_str[HWMON_SENSOR_MONITOR__FAIL] = "Fail" ; + monitorStages_str[HWMON_SENSOR_MONITOR__POWER] = "Power Query" ; + monitorStages_str[HWMON_SENSOR_MONITOR__RESTART] = "Restart" ; + monitorStages_str[HWMON_SENSOR_MONITOR__IDLE] = "Idle" ; + + return (rc) ; +} + + +/* Module Cleanup */ +void hwmon_hdlr_fini ( hwmon_ctrl_type * ctrl_ptr ) +{ + UNUSED(ctrl_ptr) ; +} + +/******************************************************************* + * Module Utilities * + ******************************************************************/ +/* SIGCHLD handler support - for waitpid */ +void daemon_sigchld_hdlr ( void ) +{ + dlog3 ("Received SIGCHLD ...\n"); +} + + +/* Looks up the timer ID and asserts the corresponding ringer */ +void hwmonHostClass::timer_handler ( int sig, siginfo_t *si, void *uc) +{ + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + struct hwmonHostClass::hwmon_host * hwmon_host_ptr ; + + /* Avoid compiler errors/warnings for parms we must + * have but currently do nothing with */ + UNUSED(sig); + UNUSED(uc); + + if ( tid_ptr == NULL ) + { + return ; + } + else if ( *tid_ptr == NULL ) + { + return ; + } + + /* Audit Timer */ + else if ( *tid_ptr == hwmonTimer_audit.tid ) + { + hwmonTimer_audit.ring = true ; + return ; + } + /* Token refresh Timer */ + else if ( *tid_ptr == hwmonTimer_token.tid ) + { + mtcTimer_stop_int_safe ( hwmonTimer_token ); + hwmonTimer_token.ring = true ; + return ; + } + else + { + hwmon_host_ptr = getHost_timer ( *tid_ptr ) ; + if ( hwmon_host_ptr ) + { + if (( *tid_ptr == hwmon_host_ptr->monitor_ctrl.timer.tid ) ) + { + mtcTimer_stop_int_safe ( hwmon_host_ptr->monitor_ctrl.timer ); + hwmon_host_ptr->monitor_ctrl.timer.ring = true ; + return ; + } + else if (( *tid_ptr == hwmon_host_ptr->ipmitool_thread_ctrl.timer.tid ) ) + { + mtcTimer_stop_int_safe ( hwmon_host_ptr->ipmitool_thread_ctrl.timer ); + hwmon_host_ptr->ipmitool_thread_ctrl.timer.ring = true ; + return ; + } + else if (( *tid_ptr == hwmon_host_ptr->ping_info.timer.tid ) ) + { + mtcTimer_stop_int_safe ( hwmon_host_ptr->ping_info.timer ); + hwmon_host_ptr->ping_info.timer.ring = true ; + return ; + } + else if (( *tid_ptr == hwmon_host_ptr->hostTimer.tid ) ) + { + mtcTimer_stop_int_safe ( hwmon_host_ptr->hostTimer ); + hwmon_host_ptr->hostTimer.ring = true ; + return ; + } + else if (( *tid_ptr == hwmon_host_ptr->addTimer.tid ) ) + { + mtcTimer_stop_int_safe ( hwmon_host_ptr->addTimer ); + hwmon_host_ptr->addTimer.ring = true ; + return ; + } + else if (( *tid_ptr == hwmon_host_ptr->relearnTimer.tid ) ) + { + mtcTimer_stop_int_safe ( hwmon_host_ptr->relearnTimer ); + hwmon_host_ptr->relearnTimer.ring = true ; + hwmon_host_ptr->relearn = false ; + return ; + } + } + } + mtcTimer_stop_tid_int_safe (tid_ptr); +} + +#ifdef WANT_SENSOR_TOGGLE +bool toggle = false ; +#endif + +void hwmon_service ( hwmon_ctrl_type * ctrl_ptr ) +{ + std::list socks ; + struct timeval waitd; + fd_set readfds; + + daemon_config_type * config_ptr = daemon_get_cfg_ptr(); + hwmon_socket_type * sock_ptr = getSock_ptr(); + keyToken_type * token_ptr = tokenUtil_get_ptr(); + + hostInv.hostBase.my_hostname = ctrl_ptr->my_hostname ; + hostInv.hostBase.my_local_ip = ctrl_ptr->my_local_ip ; + hostInv.hostBase.my_float_ip = ctrl_ptr->my_float_ip ; + + if ( config_ptr->token_refresh_rate ) + { + if ( config_ptr->token_refresh_rate < 300 ) + { + ilog ("Starting 'Token' Refresh timer (%d seconds)\n", + (config_ptr->token_refresh_rate) ); + } + else + { + ilog ("Starting 'Token' Refresh timer (%d minutes)\n", + (config_ptr->token_refresh_rate/60) ); + } + if ( mtcTimer_start ( hwmonTimer_token, + hwmonTimer_handler, + config_ptr->token_refresh_rate ) != PASS ) + { + elog ("Failed to start 'Token' Refresh Timer\n"); + daemon_exit ( ) ; + } + } + + // client_len = sizeof(client_addr); + + socks.clear(); + if ( sock_ptr->cmd_sock ) + { + socks.push_front (sock_ptr->cmd_sock->getFD()); + } + else + { + elog ("cannot service Null cmd_sock\n"); + } + + socks.sort(); + + ilog ("Starting 'Audit' timer (%d secs)\n", ctrl_ptr->audit_period ); + mtcTimer_start ( hwmonTimer_audit, hwmonTimer_handler, ctrl_ptr->audit_period ); + + for ( ; ; ) + { + /* Initialize the master fd_set */ + FD_ZERO(&readfds); + + /* add the command receiver socket ro the FD set mask */ + if ( sock_ptr->cmd_sock ) + { + if ( sock_ptr->cmd_sock->getFD()) + { + FD_SET(sock_ptr->cmd_sock->getFD(), &readfds); + } + else + { + /* force a re-init if we have no FD */ + sock_ptr->cmd_sock->sock_ok(false); + } + } /* Null sockts are auto recovered below */ + + waitd.tv_sec = 0; + waitd.tv_usec = (SOCKET_WAIT*3) ; + + /* This is used as a delay up to select_timeout */ + int rc = select( socks.back()+1, &readfds, NULL, NULL, &waitd); + + /* If the select time out expired then */ + if (( rc < 0 ) || ( rc == 0 )) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if (( rc < 0 ) && ( errno != EINTR )) + { + elog ( "Select Failed (rc:%d) %s \n", errno, strerror(errno)); + } + } + else if ( FD_ISSET(sock_ptr->cmd_sock->getFD(), &readfds)) + { + rc = hwmon_service_inbox (); + if ( rc > RETRY ) + { + elog ("Failure servicing inbox (rc:%d)\n", rc); + } + } + else + { + wlog ("unexpected select (%d)\n", rc ); + } + + if ( hwmonTimer_audit.ring == true ) + { + mtcTimer_dump_data (); + hostInv.set_degrade_audit(); + hwmonTimer_audit.ring = false ; + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__AVOID_TOKEN_REFRESH )) + { + if ( hwmonTimer_token.ring == true ) + hwmonTimer_token.ring = false ; + } +#endif + } + + /* Handle refreshing the authentication token */ + tokenUtil_log_refresh (); + if ( hwmonTimer_token.ring == true ) + { + hwmonTimer_token.ring = false ; + tokenUtil_new_token ( ctrl_ptr->httpEvent, ctrl_ptr->my_hostname ); + + /* If the token fetch fails then we retry in a shorter interval */ + if ( token_ptr->delay == true ) + { + mtcTimer_start( hwmonTimer_token, hwmonTimer_handler, 5 ); + token_ptr->delay = false ; + } + /* otherwise at the normal refresh rate */ + else + { + mtcTimer_start ( hwmonTimer_token, hwmonTimer_handler, config_ptr->token_refresh_rate ) ; + } + } + /* handle the spontaneous error case */ + else if ( token_ptr->delay == true ) + { + mtcTimer_stop ( hwmonTimer_token ); + mtcTimer_start( hwmonTimer_token, hwmonTimer_handler, 5 ); + token_ptr->delay = false ; + } + + /* Always ensure that the token refesh timer is running */ + if ( hwmonTimer_token.tid == NULL ) + { + slog ("auto-restarting dead token refresh timer\n"); + mtcTimer_start ( hwmonTimer_token, hwmonTimer_handler, config_ptr->token_refresh_rate ) ; + } + + /* Run the FSM */ + hostInv.hwmon_fsm ( ) ; + + daemon_signal_hdlr (); + + daemon_load_fit ( ); + } +} + +/* Add Host Handler + * ---------------------------*/ +int hwmonHostClass::add_host_handler ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + switch ( host_ptr->addStage ) + { + case HWMON_ADD__WAIT: + { + if ( mtcTimer_expired ( host_ptr->addTimer )) + { + host_ptr->addTimer.ring = false ; + addStageChange ( host_ptr , HWMON_ADD__START ); + } + break ; + } + case HWMON_ADD__START: + { + /* force load of sensors from database if sensors = 0 and they exist */ + int rc = hwmonHostClass::ipmi_load_sensor_model ( host_ptr ) ; + if ( rc == PASS ) + { + mtcTimer_start ( host_ptr->addTimer, hwmonTimer_handler, 1); + addStageChange (host_ptr, HWMON_ADD__STATES); + } + else + { + /* there might be issue accessing the sysinv database */ + int delay = (rand()%30)+1 ; + wlog ("%s ipmi_load_sensor_model failed (rc:%d) ; retrying in %d secs\n", host_ptr->hostname.c_str(), rc , delay); + mtcTimer_start ( host_ptr->addTimer, hwmonTimer_handler, delay ); + addStageChange ( host_ptr , HWMON_ADD__WAIT ); + } + break ; + } + case HWMON_ADD__STATES: + { + if ( mtcTimer_expired ( host_ptr->addTimer )) + { + if ( host_ptr->sensors ) + { + int rc ; + /* manage the alarm and degrade states of all the sensors over process + * startup when the sensor model is already found in the database ; + * typical case over process restart. */ + if (( rc = manage_startup_states ( host_ptr ) ) == PASS ) + { + /* run the audit right away just to update the host degrade state + * if it needs it ; like over a SWACT */ + degrade_state_audit ( host_ptr ) ; + + ilog ("%s add complete (groups:%d sensors:%d)\n", host_ptr->hostname.c_str(), host_ptr->groups, host_ptr->sensors ); + } + else + { + int delay = (rand()%30)+1 ; + if ( host_ptr->alarmed_config == false ) + { + host_ptr->alarmed_config = true ; + hwmonAlarm_minor ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "profile", REASON_DEGRADED ); + } + wlog ("%s manage_startup_states failed (rc:%d) ; retrying in %d secs\n", host_ptr->hostname.c_str(), rc, delay ); + mtcTimer_start ( host_ptr->addTimer, hwmonTimer_handler, delay ); + break ; + } + } + else + { + ilog ("%s no sensor model in database ; must be learned\n", + host_ptr->hostname.c_str()); + } + addStageChange ( host_ptr , HWMON_ADD__DONE ); + } + break ; + } + case HWMON_ADD__DONE: + { + ilog ("%s add complete ; %d sensors %d groups\n", host_ptr->hostname.c_str(), host_ptr->sensors, host_ptr->groups ); + break ; + } + default: + { + slog ("%s invalid 'add' stage\n", host_ptr->hostname.c_str() ); + if ( host_ptr->addTimer.tid ) mtcTimer_stop ( host_ptr->addTimer ); + mtcTimer_start ( host_ptr->addTimer, hwmonTimer_handler, (rand()%10)+1); + addStageChange ( host_ptr , HWMON_ADD__DONE ); + break ; + } + } + return (PASS); +} + +/* Inventory Object wrapper - does a node lookup and calls the timer handler */ +void hwmonTimer_handler ( int sig, siginfo_t *si, void *uc) +{ + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr() ; + obj_ptr->timer_handler ( sig, si, uc ); +} + +/***************************************************************************** + * + * Name : interval_change_handler + * + * Purpose: : Handles setting the monitoring audit interval. + * + * Description: The following conditions are handled. + * + * if host_ptr->interval is zero then it and all the groups + * are set to the default value. + * + * If there is existing inventory then host_ptr->interval + * is set to the shortest group interval. + * + * With no existing inventory all groups are set to + * HWMON_DEFAULT_AUDIT_INTERVAL + * + * if host_ptr->interval is not zero then all the group intervals + * are set to that value. + * + *****************************************************************************/ + +int hwmonHostClass::interval_change_handler ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = RETRY ; + + dlog ("%s interval change handler\n", host_ptr->hostname.c_str()); + + /* Don't issue a request if there is one active already */ + if ( host_ptr->event.base == NULL ) + { + rc = PASS ; + + if ( host_ptr->interval < HWMON_MIN_AUDIT_INTERVAL ) + { + ilog ("%s setting audit interval\n", host_ptr->hostname.c_str()); + if ( host_ptr->groups ) + { + int smallest = HWMON_DEFAULT_LARGE_INTERVAL ; + + /* get the smallest interval */ + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + if ( smallest > host_ptr->group[g].group_interval ) + { + smallest = host_ptr->group[g].group_interval ; + } + } + /* Should be no bigger than the smallest group interval setting. */ + host_ptr->interval = smallest ; + } + else + { + /* default first 'learning' audit interval */ + host_ptr->interval = 5 ; + } + } + + if (( host_ptr->relearn == true ) && + ( host_ptr->model_attributes_preserved.interval != host_ptr->interval )) + { + host_ptr->interval = host_ptr->model_attributes_preserved.interval ; + ilog ("%s audit interval restored to %d seconds\n", + host_ptr->hostname.c_str(), + host_ptr->interval); + } + + string interval_string = itos(host_ptr->interval) ; + + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + daemon_signal_hdlr(); + + + if ( host_ptr->interval != host_ptr->group[g].group_interval ) + { + /* only updat the group if they differ */ + if ( host_ptr->group[g].group_interval != host_ptr->interval ) + { + /* update the group interval. Even though ipmi + * montoring does not need it, we need to be + * backwards compatible. + * + * ipmi monitors all groups at the same interval */ + int old = host_ptr->group[g].group_interval ; + host_ptr->group[g].group_interval = host_ptr->interval ; + + rc = hwmonHttp_mod_group ( host_ptr->hostname, + host_ptr->event, + host_ptr->group[g].group_uuid, + "audit_interval_group", + interval_string ); + + if ( rc ) + { + elog ("%s failed to update '%s' group audit interval (%d of %d); will retry later\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + g, host_ptr->groups ); + break ; + } + else + { + char str [100] ; + snprintf ( &str[0], 100, "audit interval changed from %d to %d seconds", + old, + host_ptr->group[g].group_interval); + + hwmonLog ( host_ptr->hostname, + HWMON_ALARM_ID__SENSORGROUP, + FM_ALARM_SEVERITY_CLEAR, + host_ptr->group[g].group_name, str ); + } + } + } + } + /* retry until pass - retries are spaced by audit interval */ + if ( rc == PASS ) + { + /* TODO: remove error detection and correction */ + if ( host_ptr->interval == 0 ) + { + slog ("%s failed to set interval correctly\n",host_ptr->hostname.c_str()); + + host_ptr->interval = HWMON_DEFAULT_AUDIT_INTERVAL ; + } + + host_ptr->interval_changed = false ; + } + } + + ilog ("%s sensor monitoring period is %d seconds\n", + host_ptr->hostname.c_str(), + host_ptr->interval ); + + return (rc); +} + + +/* Hardware Monitor Handler + * -------------------------- + * + * TODO: Need grouping to enable the groups in the database + * group_ptr->group_state = "enabled" ; + * hwmonHttp_mod_group ( host_ptr->hostname, host_ptr->event , group_ptr->group_uuid, "state" , group_ptr->group_state ); + * if ( group_ptr->group_state.compare("enabled") ) + * TODO: Need grouping disabled on state transition from monitoring enabled to disabled + * + * + * */ +int hwmonHostClass::ipmi_sensor_monitor ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = RETRY ; + + if ( host_ptr ) + { + /* Check the stage */ + if ( host_ptr->monitor_ctrl.stage < HWMON_SENSOR_MONITOR__STAGES ) + { + flog ("%s sensor monitor stage (%s)\n", + host_ptr->hostname.c_str(), + monitorStages_str[host_ptr->monitor_ctrl.stage].c_str()); + } + else + { + slog ("%s bad sensor monitor state (%d) - forcing into IDLE\n", + host_ptr->hostname.c_str(), + host_ptr->monitor_ctrl.stage); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__START ); + } + + /* check for a new model relearn request */ + if ( host_ptr->relearn_request == true ) + { + int relearn_time = MTC_MINS_1 ; + + /* gracefully handle delete model failure retry. + * if there is a relearn timer running then wait for it + * to expire. This way previously failed relear request + * retries are throttled. */ + if ( mtcTimer_expired ( host_ptr->relearnTimer ) == false ) + { + /* TODO: test FIT */ + return (RETRY); + } + + ilog ("%s handling sensor model relearn request\n", + host_ptr->hostname.c_str()); + + rc = ipmi_delete_sensor_model ( host_ptr ); + if ( rc != PASS ) + { + elog ("%s delete model failure ; retry in %d seconds\n", + host_ptr->hostname.c_str(), relearn_time ); + + /* If we got an error then wait relearn_time + * before trying again */ + mtcTimer_start ( host_ptr->relearnTimer, + hwmonTimer_handler, + relearn_time ); + return (RETRY); + } + + relearn_time = MTC_MINS_5 ; + + /* enter relearn mode */ + host_ptr->relearn = true ; + + /* exit relearn request mode. + * allow the relearn operation to proceed */ + host_ptr->relearn_request = false ; + + host_ptr->relearn_done_date = future_time ( relearn_time ); + ilog ("%s next relearn permitted after %s\n", + host_ptr->hostname.c_str(), + host_ptr->relearn_done_date.c_str()); + + this->monitor_soon ( host_ptr ); + + /* start the relearn timer */ + mtcTimer_start ( host_ptr->relearnTimer, + hwmonTimer_handler, + relearn_time ); + } + + switch ( host_ptr->monitor_ctrl.stage ) + { + /****************************************************************** + * + * The IDLE stage is the default start and do nothing stage while + * monitoring is disabled. + * + * Stage Transition: external + * + ******************************************************************/ + case HWMON_SENSOR_MONITOR__IDLE: + { + break ; + } + + /****************************************************************** + * + * A delayed START + * + *****************************************************************/ + case HWMON_SENSOR_MONITOR__RESTART: + { + if ( mtcTimer_expired ( host_ptr->monitor_ctrl.timer ) ) + { + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__START ); + } + break ; + } + + + /****************************************************************** + * + * The START stage is the default stage and starts sensor + * monitoring if enabled for this host. + * + * The start process begins with adding a small randomized delay + * before the first READ so that over a process (re)start we don't + * jolt the process by trying to read sensors from all hosts at the + * same time. + * + * Stage Transition: + * + * Success path -> HWMON_SENSOR_MONITOR__DELAY + * Failure Path -> HWMON_SENSOR_MONITOR__IDLE + * + ******************************************************************/ + case HWMON_SENSOR_MONITOR__START: + { + mtcTimer_reset ( host_ptr->monitor_ctrl.timer ); + + if ( host_ptr->monitor ) + { + /* Handle Audit Interval Change */ + if ( host_ptr->interval_changed ) + { + interval_change_handler ( host_ptr ); + } + + /* Handle power state query + * - don't depend on poweron if in relearn mode. + * - otherwise we need to ensure the model is learned + * while the host power is on. + * See comments in HWMON_SENSOR_MONITOR__POWER for details */ + if (( host_ptr->sensors == 0 ) && + ( host_ptr->poweron == false ) && + ( host_ptr->relearn == false )) + { + if ( host_ptr->ipmitool_thread_ctrl.id ) + { + wlog ("%s sensor monitor thread is unexpectedly active ; retry soon\n", host_ptr->hostname.c_str()); + thread_kill ( host_ptr->ipmitool_thread_ctrl, host_ptr->ipmitool_thread_info ); + sleep (1); + break ; + } + + host_ptr->accounting_bad_count = 0 ; + host_ptr->ipmitool_thread_ctrl.id = 0 ; + host_ptr->ipmitool_thread_ctrl.done = false ; + + host_ptr->ipmitool_thread_info.data.clear() ; + host_ptr->ipmitool_thread_info.status_string.clear(); + host_ptr->ipmitool_thread_info.status = -1 ; + host_ptr->ipmitool_thread_info.progress = 0 ; + host_ptr->ipmitool_thread_info.id = 0 ; + host_ptr->ipmitool_thread_info.signal = 0 ; + host_ptr->ipmitool_thread_info.command = IPMITOOL_THREAD_CMD__POWER_STATUS ; + + /* Update / Setup the BMC query credentials */ + host_ptr->thread_extra_info.bm_ip = host_ptr->bm_ip ; + host_ptr->thread_extra_info.bm_un = host_ptr->bm_un ; + host_ptr->thread_extra_info.bm_pw = host_ptr->bm_pw ; + + rc = thread_launch ( host_ptr->ipmitool_thread_ctrl, + host_ptr->ipmitool_thread_info ) ; + if ( rc != PASS ) + { + host_ptr->ipmitool_thread_info.status = rc ; + host_ptr->ipmitool_thread_info.status_string = + "failed to launch power query thread" ; + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__FAIL ); + } + else + { + /* Assign the extra data pointer */ + host_ptr->ipmitool_thread_info.extra_info_ptr = (void*)&host_ptr->thread_extra_info ; + + /* start an umbrella timer 5 seconds longer than + * the default thread FSM timout */ + mtcTimer_start ( host_ptr->monitor_ctrl.timer, + hwmonTimer_handler, + (DEFAULT_THREAD_TIMEOUT_SECS+5) ); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__POWER ); + } + break ; + } + else if ( host_ptr->interval ) + { + /* Assign the extra data pointer */ + host_ptr->ipmitool_thread_info.extra_info_ptr = (void*)&host_ptr->thread_extra_info ; + + /* randomize the first audit a little so that over a swact we don't spike hwmond */ + int r = (rand() % host_ptr->interval) + 1 ; + + /* poll all the sensors right away - between 1 and 10 seconds */ + ilog ("%s sensor monitoring begins in %d seconds\n", + host_ptr->hostname.c_str(), r ); + + mtcTimer_start ( host_ptr->monitor_ctrl.timer, hwmonTimer_handler, r ); + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__DELAY ); + break ; + } + else + { + host_ptr->interval_changed = true ; + wlog ("%s audit interval is zero ; auto correcting\n", host_ptr->hostname.c_str()); + break ; + } + } + else + { + ilog ("%s sensor monitoring disabled\n", host_ptr->hostname.c_str()); + } + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__IDLE ); + break ; + } + + + /****************************************************************** + * + * The POWER stage handles a power query response. + * + * The START is re-invoked if the power query fails or + * shows that the power is off. + * + * Stage Transition: + * + * Success path -> HWMON_SENSOR_MONITOR__DELAY + * Failure Path -> HWMON_SENSOR_MONITOR__START + * + ******************************************************************/ + case HWMON_SENSOR_MONITOR__POWER: + { + /* handle thread execution umbrella timeout */ + if ( mtcTimer_expired ( host_ptr->monitor_ctrl.timer ) ) + { + host_ptr->monitor_ctrl.timer.ring = false ; + + wlog ("%s power query thread timeout\n", + host_ptr->hostname.c_str()); + + thread_kill ( host_ptr->ipmitool_thread_ctrl, host_ptr->ipmitool_thread_info ); + } + + /* check for 'thread done' completion */ + else if ( thread_done( host_ptr->ipmitool_thread_ctrl ) ) + { + /* Consume done results */ + mtcTimer_reset ( host_ptr->monitor_ctrl.timer ); + + if ( host_ptr->ipmitool_thread_info.status ) + { + elog ("%s %s thread %2d failed (rc:%d) (%d:%d)\n", + host_ptr->ipmitool_thread_ctrl.hostname.c_str(), + host_ptr->ipmitool_thread_ctrl.name.c_str(), + host_ptr->ipmitool_thread_info.command, + host_ptr->ipmitool_thread_info.status, + host_ptr->ipmitool_thread_info.progress, + host_ptr->ipmitool_thread_info.runcount); + + wlog ("%s ... %s\n", + host_ptr->ipmitool_thread_ctrl.hostname.c_str(), + host_ptr->ipmitool_thread_info.status_string.c_str()); + } + else + { + dlog ("%s '%s' thread '%d' command is done ; (%d:%d) (rc:%d)\n", + host_ptr->ipmitool_thread_ctrl.hostname.c_str(), + host_ptr->ipmitool_thread_ctrl.name.c_str(), + host_ptr->ipmitool_thread_info.command, + host_ptr->ipmitool_thread_info.progress, + host_ptr->ipmitool_thread_info.runcount, + host_ptr->ipmitool_thread_info.status); + + blog2("%s ... status: %s\n", + host_ptr->ipmitool_thread_ctrl.hostname.c_str(), + host_ptr->ipmitool_thread_info.status_string.c_str()); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__NO_DATA, host_ptr->hostname )) + { + host_ptr->ipmitool_thread_info.data.clear (); + host_ptr->ipmitool_thread_info.status = 0 ; + host_ptr->ipmitool_thread_info.status_string.clear (); + slog ("%s FIT No Power Status Data\n", host_ptr->hostname.c_str()); + } +#endif + + if ( host_ptr->ipmitool_thread_info.data.empty()) + { + wlog ("%s power query status empty ; retrying query\n", + host_ptr->hostname.c_str()); + } + else if ( host_ptr->ipmitool_thread_info.data.find (IPMITOOL_POWER_ON_STATUS) == string::npos ) + { + ilog ("%s %s\n", host_ptr->hostname.c_str(), + host_ptr->ipmitool_thread_info.data.c_str()); + + wlog ("%s sensor learning delayed ; need power on\n", + host_ptr->hostname.c_str()); + } + else + { + ilog ("%s %s\n", host_ptr->hostname.c_str(), + host_ptr->ipmitool_thread_info.data.c_str()); + + /* OK, this is what we have been waiting for */ + host_ptr->poweron = true ; + } + } + + host_ptr->ipmitool_thread_ctrl.done = true ; + + if ( host_ptr->poweron == false ) + { + mtcTimer_start ( host_ptr->monitor_ctrl.timer, + hwmonTimer_handler, MTC_MINS_1 ); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__RESTART ); + } + else + { + mtcTimer_start ( host_ptr->monitor_ctrl.timer, + hwmonTimer_handler, MTC_MINS_2 ); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__RESTART ); + } + } + break ; + } + + /****************************************************************** + * + * The DELAY stage inserts time after a failure recovery or + * between successive sensor READ intervals. + * + * The failure path is invoked if the 'thread' stage is not IDLE + * when the DELAY period expires. + * + * Stage Transition: + * + * Success path -> HWMON_SENSOR_MONITOR__READ + * Failure Path -> HWMON_SENSOR_MONITOR__FAIL + * + ******************************************************************/ + case HWMON_SENSOR_MONITOR__DELAY: + { + if ( mtcTimer_expired ( host_ptr->monitor_ctrl.timer ) ) + { + host_ptr->monitor_ctrl.timer.ring = false ; + + /* if there was a previous connection failure being handled + * then give it time to resolve */ + if ( !thread_idle ( host_ptr->ipmitool_thread_ctrl ) ) + { + wlog ("%s rejecting thread run stage change ; FSM not IDLE (thread stage:%s)\n", + host_ptr->hostname.c_str(), + thread_stage(host_ptr->ipmitool_thread_ctrl).c_str()); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__FAIL ); + } + else + { + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__READ ); + } + } + /* Handle Audit Interval Change ... + * While we are waiting for the next audit check to see if we have received + * an monitor interval change. If we have then update the database with the + * new data, force this interval to finish and on the next audit the new + * interval will be loaded */ + else if ( host_ptr->interval_changed ) + { + interval_change_handler ( host_ptr ); + + /* force this audit interval to expire but don't include this in the + * pass case only. Give sysinv it some time before the next retry */ + mtcTimer_stop ( host_ptr->monitor_ctrl.timer ); + host_ptr->monitor_ctrl.timer.ring = true ; + } + break ; + } + + + /****************************************************************** + * + * The READ stage requests the launch of the hwmonThread_ipmitool + * thread that will read the sensor data from the specified host. + * + * An umbrella timeout timer is started on behalf of the PARSE + * stage to detect threadUtil FSM not completing. + * + * Launch will fail if attempted if the thread is already running + * or if the launch request returns a failure. + * + * Stage Transition: + * + * Success path -> HWMON_SENSOR_MONITOR__PARSE + * Failure Path -> HWMON_SENSOR_MONITOR__FAIL + * + ******************************************************************/ + case HWMON_SENSOR_MONITOR__READ: + { + if ( host_ptr->ipmitool_thread_ctrl.id ) + { + host_ptr->ipmitool_thread_info.status = FAIL_THREAD_RUNNING ; + host_ptr->ipmitool_thread_info.status_string = + "sensor monitor thread is unexpectedly active ; handling as failure" ; + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__FAIL ); + break ; + } + + host_ptr->accounting_bad_count = 0 ; + host_ptr->ipmitool_thread_ctrl.id = 0 ; + host_ptr->ipmitool_thread_ctrl.done = false ; + + host_ptr->ipmitool_thread_info.data.clear() ; + host_ptr->ipmitool_thread_info.status_string.clear(); + host_ptr->ipmitool_thread_info.status = -1 ; + host_ptr->ipmitool_thread_info.progress = 0 ; + host_ptr->ipmitool_thread_info.id = 0 ; + host_ptr->ipmitool_thread_info.signal = 0 ; + host_ptr->ipmitool_thread_info.command = IPMITOOL_THREAD_CMD__READ_SENSORS ; + + /* Update / Setup the BMC query credentials */ + host_ptr->thread_extra_info.bm_ip = host_ptr->bm_ip ; + host_ptr->thread_extra_info.bm_un = host_ptr->bm_un ; + host_ptr->thread_extra_info.bm_pw = host_ptr->bm_pw ; + + + rc = thread_launch ( host_ptr->ipmitool_thread_ctrl, host_ptr->ipmitool_thread_info ) ; + if ( rc != PASS ) + { + host_ptr->ipmitool_thread_info.status = rc ; + host_ptr->ipmitool_thread_info.status_string = + "failed to launch sensor monitoring thread" ; + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__FAIL ); + } + else + { + /* start an umbrella timer 5 seconds longer than + * the default thread FSM timout */ + mtcTimer_start ( host_ptr->monitor_ctrl.timer, + hwmonTimer_handler, + (DEFAULT_THREAD_TIMEOUT_SECS+5) ); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__PARSE ); + } + break ; + } + + /****************************************************************** + * The PARSE stage has 2 main functions + * + * 1. Wait for the ipmitool command completion from the READ stage + * while monitoring for and handling the unbrella timeout case. + * + * 2. PARSE the sensor data json string into the sample list + * + * sample[MAX_HOST_SENSORS] + * + * The number of sensors read by thread is specified in + * + * thread_extra_info.samples + * + * Failure case is invoked for + * - thread completion umbrella timeout. + * - thread completion error + * - sensor data parse error + * + * Stage Transition: + * + * Success path -> HWMON_SENSOR_MONITOR__CHECK + * Failure Path -> HWMON_SENSOR_MONITOR__FAIL + * + ******************************************************************/ + case HWMON_SENSOR_MONITOR__PARSE: + { + daemon_signal_hdlr (); + + /* Unbrella timeout timer check */ + if ( mtcTimer_expired ( host_ptr->monitor_ctrl.timer ) ) + { + host_ptr->monitor_ctrl.timer.ring = false ; + host_ptr->ipmitool_thread_info.status = FAIL_TIMEOUT ; + host_ptr->ipmitool_thread_info.status_string = + "timeout waiting for sensor read data" ; + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__FAIL ); + } + + /* check for 'thread done' completion */ + else if ( thread_done( host_ptr->ipmitool_thread_ctrl ) ) + { + /* Consume done results */ + mtcTimer_stop ( host_ptr->monitor_ctrl.timer ); + + if ( host_ptr->ipmitool_thread_info.status ) // == FAIL_SYSTEM_CALL ) + { + if ( ++host_ptr->ipmitool_thread_ctrl.retries < MAX_THREAD_RETRIES ) + { + elog ("%s %s thread %2d failed (rc:%d) (try %d of %d) (%d:%d)\n", + host_ptr->ipmitool_thread_ctrl.hostname.c_str(), + host_ptr->ipmitool_thread_ctrl.name.c_str(), + host_ptr->ipmitool_thread_info.command, + host_ptr->ipmitool_thread_info.status, + host_ptr->ipmitool_thread_ctrl.retries, + MAX_THREAD_RETRIES, + host_ptr->ipmitool_thread_info.progress, + host_ptr->ipmitool_thread_info.runcount); + + /* don't flood the logs with the same error data over and over */ + if ( host_ptr->ipmitool_thread_ctrl.retries == 1 ) + { + blog ("%s ... %s\n", + host_ptr->ipmitool_thread_ctrl.hostname.c_str(), + host_ptr->ipmitool_thread_info.status_string.c_str()); + } + + host_ptr->ipmitool_thread_ctrl.done = true ; + mtcTimer_start ( host_ptr->monitor_ctrl.timer, hwmonTimer_handler, THREAD_RETRY_DELAY_SECS ); + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__DELAY ); + break ; + } +#ifdef WANT_THIS + /* don't flood the logs with the same error data over and over */ + if ( host_ptr->ipmitool_thread_ctrl.retries > 1 ) + { + wlog ("%s %s thread '%d' command is done ; (%d:%d) (rc:%d)\n", + host_ptr->ipmitool_thread_ctrl.hostname.c_str(), + host_ptr->ipmitool_thread_ctrl.name.c_str(), + host_ptr->ipmitool_thread_info.command, + host_ptr->ipmitool_thread_info.progress, + host_ptr->ipmitool_thread_info.runcount, + host_ptr->ipmitool_thread_info.status); + blog ("%s ... data: %s\n", + host_ptr->ipmitool_thread_ctrl.hostname.c_str(), + host_ptr->ipmitool_thread_info.status_string.c_str()); + } +#endif + } + else + { + dlog ("%s '%s' thread '%d' command is done ; (%d:%d) (rc:%d)\n", + host_ptr->ipmitool_thread_ctrl.hostname.c_str(), + host_ptr->ipmitool_thread_ctrl.name.c_str(), + host_ptr->ipmitool_thread_info.command, + host_ptr->ipmitool_thread_info.progress, + host_ptr->ipmitool_thread_info.runcount, + host_ptr->ipmitool_thread_info.status); + blog2 ("%s ... data: %s\n", + host_ptr->ipmitool_thread_ctrl.hostname.c_str(), + host_ptr->ipmitool_thread_info.status_string.c_str()); + } + host_ptr->ipmitool_thread_ctrl.done = true ; + host_ptr->ipmitool_thread_ctrl.retries = 0 ; + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__NO_DATA, host_ptr->hostname )) + { + host_ptr->ipmitool_thread_info.data.clear (); + host_ptr->ipmitool_thread_info.status = 0 ; + host_ptr->ipmitool_thread_info.status_string.clear (); + } +#endif + + if ( host_ptr->ipmitool_thread_info.status == PASS ) + { + /* NOTE: This parsing method is not leaking memory ; verified ! */ + + json_bool status ; + struct json_object * req_obj = (struct json_object *)(NULL) ; + struct json_object * raw_obj = json_tokener_parse( host_ptr->ipmitool_thread_info.data.data() ); + if ( raw_obj ) + { + /* Look for ... IPMITOOL_JSON__SENSOR_DATA_MESSAGE_HEADER */ + status = json_object_object_get_ex ( raw_obj, IPMITOOL_JSON__SENSOR_DATA_MESSAGE_HEADER, &req_obj ); + if (( status == TRUE ) && req_obj ) + { + char * msg_ptr = (char*)json_object_to_json_string(req_obj) ; + host_ptr->json_ipmi_sensors = msg_ptr ; + if ( msg_ptr ) + { + host_ptr->ipmitool_thread_info.status = ipmi_load_sensor_samples ( host_ptr , msg_ptr); + if ( host_ptr->ipmitool_thread_info.status == PASS ) + { + if ( host_ptr->samples != host_ptr->sensors ) + { + if ( host_ptr->quanta_server == false ) + { + ilog ("%s read %d sensor samples but expected %d\n", + host_ptr->hostname.c_str(), + host_ptr->samples, + host_ptr->sensors ); + } + } + _stage_change ( host_ptr->hostname, host_ptr->monitor_ctrl.stage, HWMON_SENSOR_MONITOR__CHECK ); + + } + else + { + host_ptr->ipmitool_thread_info.status_string = "failed to load sensor data" ; + } + } + else + { + host_ptr->ipmitool_thread_info.status_string = "failed to get json message after header" ; + host_ptr->ipmitool_thread_info.status = FAIL_JSON_PARSE ; + } + } + else + { + host_ptr->ipmitool_thread_info.status_string = "failed to find '" ; + host_ptr->ipmitool_thread_info.status_string.append(IPMITOOL_JSON__SENSOR_DATA_MESSAGE_HEADER); + host_ptr->ipmitool_thread_info.status_string.append("' label") ; + host_ptr->ipmitool_thread_info.status = FAIL_JSON_PARSE ; + } + } + else + { + host_ptr->ipmitool_thread_info.status_string = "failed to parse ipmitool sensor data string" ; + host_ptr->ipmitool_thread_info.status = FAIL_JSON_PARSE ; + } + + if (raw_obj) json_object_put(raw_obj); + if (req_obj) json_object_put(req_obj); + } + + if ( host_ptr->ipmitool_thread_info.status ) + { + /* Handle thread error status */ + if ( host_ptr->groups == 0 ) + { + if ( host_ptr->alarmed_config == false ) + { + host_ptr->alarmed_config = true ; + hwmonAlarm_minor ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "profile", REASON_DEGRADED ); + } + } + else + { + ipmi_set_group_state ( host_ptr, "failed" ); + } + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__FAIL ); + } + } /* end handling of done command */ + break ; + } + + /****************************************************************** + * + * The CHECK stage is run on the last parsed sample data loaded + * into the temporary sample sensor data list ... + * + * host_ptr->sample[MAX_HOST_SENSORS] + * + * The number of samples loaded into the sample is + * specified in + * + * host_ptr->samples + * + * The CHECK is intended to identify sensor data corruption or + * model changes that might occur over a BMC firmware upgrade. + * + * The CHECK involves performing a checksum of all the sensor + * names in each list and comparing that checksum to the last + * time the sensors were read. + * + * A stored checksum of zero indicates the first sample read. + * If at that time host_ptr->sensors == 0 then a call to + * ipmi_create_sensor_model is made to create a new sensor + * model based on these last sample readings. + * + * If the stored checksums do not match the current checksums + * then that constitutes a sensor mismatch with a design log. + * The mismatch counter is incremented. If the mismatch + * counter exceeds its threshold then the current sensor model + * is deleted and re-created using the new data. + * + * A customer log is created whenever a host's sensor model + * is created or re-created. + * + * Stage Transition: + * + * Success path -> HWMON_SENSOR_MONITOR__UPDATE + * Failure Path -> HWMON_SENSOR_MONITOR__FAIL + * + *********************************************************************/ + case HWMON_SENSOR_MONITOR__CHECK: + { + unsigned short temp_checksum ; + + daemon_signal_hdlr (); + + /* Handle cases where we got an incomplete sensor reading */ + if ( host_ptr->thread_extra_info.samples == 0 ) + { + if ( host_ptr->ipmitool_thread_info.status == PASS ) + { + host_ptr->ipmitool_thread_info.status = FAIL_INVALID_DATA ; + host_ptr->ipmitool_thread_info.status_string = "incomplete sensor data reading" ; + } + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__FAIL ); + break ; + } + + /* get the checksum for this sample set */ + temp_checksum = + checksum_sample_profile ( host_ptr->hostname, + host_ptr->thread_extra_info.samples, + &host_ptr->sample[0]); + + blog1 ("%s samples profile checksum : %04x:%04x (%d:%d:%d)\n", + host_ptr->hostname.c_str(), + temp_checksum, + host_ptr->sample_sensor_checksum, + host_ptr->samples, + host_ptr->sensors, + host_ptr->thread_extra_info.samples); + + /* Initialize the sample checksums and counts for the first reading case */ + if ( host_ptr->sample_sensor_checksum == 0 ) + { + // host_ptr->samples = host_ptr->thread_extra_info.samples ; + host_ptr->sample_sensor_checksum = temp_checksum ; + } + + /* look for first sensor reading case with an empty database profile. + * This can occur over a fresh provisioning or a model recreation */ + if ( host_ptr->sensors == 0 ) + { + ilog ("%s samples profile checksum : %04x (%d sensors) (%d samples)\n", + host_ptr->hostname.c_str(), + host_ptr->sample_sensor_checksum, + host_ptr->sensors, + host_ptr->samples); + + /* check the sample model against known Quanta Server profile checksums and sensor numbers */ + if (((( host_ptr->sample_sensor_checksum == QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13_53 ) || ( host_ptr->sample_sensor_checksum == QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13_50 )) && + (( host_ptr->samples == QUANTA_SAMPLE_PROFILE_SENSORS_VER_13_53) || (QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13_50 ))) || + (( host_ptr->sample_sensor_checksum == QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13___ )) || + (( host_ptr->sample_sensor_checksum == QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13_53b )) || + (( host_ptr->sample_sensor_checksum == QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13_47 ) && ( host_ptr->samples == QUANTA_SAMPLE_PROFILE_SENSORS_VER_13_47 )) || + (( host_ptr->sample_sensor_checksum == QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13_42 ) && ( host_ptr->samples == QUANTA_SAMPLE_PROFILE_SENSORS_VER_13_42 )) || + (( host_ptr->sample_sensor_checksum == QUANTA_SAMPLE_PROFILE_CHECKSUM_VER__3_29 ) && ( host_ptr->samples == QUANTA_SAMPLE_PROFILE_SENSORS_VER__3_29 ))) + { + /* TODO: can also add search for missing sensors */ + ilog ("%s -----------------------------------------------\n", host_ptr->hostname.c_str()); + ilog ("%s is a Quanta server based on sensor sample data\n", host_ptr->hostname.c_str()); + ilog ("%s -----------------------------------------------\n", host_ptr->hostname.c_str()); + host_ptr->quanta_server = true ; + } + + /* Create a sensor model from 'this' sample data */ + if ( ipmi_create_sensor_model ( host_ptr ) != PASS ) + { + elog ("%s failed to create sensor model (in sysinv)\n", + host_ptr->hostname.c_str()); + } + } + + if ( host_ptr->profile_sensor_checksum == 0 ) + { + host_ptr->profile_sensor_checksum = + checksum_sensor_profile ( host_ptr->hostname, + host_ptr->sensors, + &host_ptr->sensor[0]); + } + + if (( host_ptr->sensors == 0 ) || ( host_ptr->groups == 0 )) + { + elog ("%s has read %d sensors but cannot process with no sensor model (%d:%d)\n", + host_ptr->hostname.c_str(), + host_ptr->thread_extra_info.samples, + host_ptr->sensors, + host_ptr->groups); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__START ); + } + else + { + blog ("%s has read %d sensors ... processing results\n", + host_ptr->hostname.c_str(), host_ptr->samples); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__UPDATE ); + } + break ; + } + + /****************************************************************** + * + * The UPDATE stage translates the string based sensor sample + * data's 'status' to a severity and adds that to the sensors' + * sample_severity member in the sensor list. + * + * host_ptr->sensor[MAX_SENSORS].sample_severity + * + * Stage Transition: + * + * Success path -> HWMON_SENSOR_MONITOR__HANDLE + * Failure Path -> HWMON_SENSOR_MONITOR__FAIL + * + *****************************************************************/ + case HWMON_SENSOR_MONITOR__UPDATE: + { + if ( host_ptr->sensor_query_count++ == START_DEBOUCE_COUNT ) + { + /* onetime log showing debounce mode started */ + ilog ("%s sensor status deboucing enabled\n", host_ptr->hostname.c_str()); + } + + daemon_signal_hdlr (); + + /* handle clearing the config alarm if its raised but we are + * now at a point where the sensors are readable */ + if ( host_ptr->alarmed_config == true ) + { + host_ptr->alarmed_config = false ; + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "profile", REASON_OK ); + } + + if ( ipmi_update_sensors ( host_ptr ) == PASS ) + { + if ( ( rc = ipmi_set_group_state ( host_ptr, "enabled" ) ) == PASS ) + { + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__HANDLE ); + } + else + { + elog ("%s failed to set group state to 'enabled' (in sysinv) (rc:%d)\n", + host_ptr->hostname.c_str(), rc); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__FAIL ); + } + } + else + { + elog ("%s failed to update sensor data (in hwmon) (rc:%d)\n", + host_ptr->hostname.c_str(), rc); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__FAIL ); + } + break ; + } + case HWMON_SENSOR_MONITOR__HANDLE: + { + /************************************************************** + * + * Loop over all the sensors handling their current severity. + * + * At this point the new severities are in + * sensor_ptr->sample_severity. + * + * After a sensor is serviced in this loop that + * sensor_ptr->sample_severity is copied to ptr->severity + * to be compared against on the next audit interval. + * + *************************************************************/ + for ( int i = 0 ; i < host_ptr->sensors ; i++ ) + { + /* + * This variable controls whether status change actions + * need to be taken at the end of this loop for sensor + * in context. Assume sensor status is not changed. + */ + bool mod_status = false ; + + /* lets use a local pointer to make the code easier to read */ + sensor_type * ptr = &host_ptr->sensor[i] ; + + /* Local copy of new severity */ + sensor_severity_enum severity = ptr->sample_severity ; + + /* Things can get a little busy so lets make sure we + * service the signal handler and incoming http requests + * from sysinv. + */ + daemon_signal_hdlr (); + hwmonHttp_server_look (); + + /* Internasl error checking ; never seen but just in case. + * Skip over and swerr about null sensor name */ + if ( ptr->sensorname.empty() ) + { + slog ("%s %d sensor name is empty\n", host_ptr->hostname.c_str(), i ); + continue ; + } + + if ( ptr->updated == false ) + { + host_ptr->accounting_bad_count++ ; + + /* + * Force a sensor MINOR if we fail to get status from + * it NOT_FOUND_COUNT_BEFORE_MINOR or more times in a row + * + * This debounces the one of sensor update misses but the + * log above at least shows if/when this is happening. + */ + if ( ++ptr->not_updated_status_change_count >= NOT_FOUND_COUNT_BEFORE_MINOR ) + { + severity = HWMON_SEVERITY_MINOR ; + } + } + else + { + ptr->not_updated_status_change_count = 0 ; + } + + if ( severity != ptr->severity) + { + blog ("%s %s status change ; %s:%s -> %s\n", + host_ptr->hostname.c_str(), + ptr->sensorname.c_str(), + get_severity(ptr->severity).c_str(), + ptr->status.c_str(), + get_severity(severity).c_str()); + + /* debounce of the the transient 'na' case is debounced + * if ( host_ptr->sensor_query_count > 5 ) + * log_sensor_data ( host_ptr, ptr->sensorname, ptr->status, get_ipmi_severity(ptr->sample_severity)); + */ + } + + blog1 ("%s %s curr:%s this:%s last:%s\n", + host_ptr->hostname.c_str(), + ptr->sensorname.c_str(), + ptr->status.c_str(), + ptr->sample_status.c_str(), + ptr->sample_status_last.c_str()); + + if ( severity == HWMON_SEVERITY_GOOD ) + { + if ( ptr->status.compare("ok") ) + { + /* don't bother printing a log for sensors that + * go from offline to ok */ + if ( ptr->status != "offline" ) + { + ilog ("%s %s is ok (was %s)\n", + host_ptr->hostname.c_str(), + ptr->sensorname.c_str(), + ptr->status.c_str()); + } + + /* last state was not 'ok' */ + mod_status = true ; + ptr->status = "ok" ; + clear_ignored_state (ptr ); + clear_logged_state (ptr ); + } + + /* TODO: verify clearing sensor that has cleared over a process restart */ + if ((( ptr->suppress == false ) && ( ptr->severity != HWMON_SEVERITY_GOOD )) || + ((ptr->alarmed == true ) || ( ptr->degraded == true ))) + { + hwmonHostClass::manage_sensor_state ( host_ptr->hostname, ptr , HWMON_SEVERITY_GOOD ); + } + } + else + { + /* Handle transition from offline to online + * - clear any alarm that exhists for a sensor + * coming out of the offline state is no longer + * offline. + **/ + if (( severity != HWMON_SEVERITY_OFFLINE ) && ( !ptr->status.compare("offline") )) + { + wlog ("%s %s sensor returned from '%s' with '%s' severity [alarmed:%s]\n", + host_ptr->hostname.c_str(), + ptr->sensorname.c_str(), + ptr->status.c_str(), + get_severity(severity).c_str(), + ptr->alarmed ? "Yes" : "No"); + + /* Clear the alarm and allow it to be re-raised if the issue exists */ + clear_asserted_alarm ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, ptr, REASON_ONLINE ); + } + + if ( severity == HWMON_SEVERITY_OFFLINE ) + { + if ( ptr->status.compare("offline")) + { + if ( ptr->alarmed == true ) + { + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, ptr->sensorname, REASON_OFFLINE ); + ptr->alarmed = false ; + } + ptr->degraded = false ; + + if ( ptr->critl.logged || ptr->major.logged || ptr->minor.logged ) + { + hwmonLog_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, ptr->sensorname, REASON_OFFLINE ); + ptr->critl.logged = ptr->major.logged = ptr->minor.logged = false ; + } + mod_status = true ; + blog ("%s %s sensor status change '%s' -> 'offline'\n", + host_ptr->hostname.c_str(), + ptr->status.c_str(), + ptr->sensorname.c_str()); + ptr->status = "offline" ; + } + } + else if ( severity == HWMON_SEVERITY_MINOR ) + { + /* logs and alarms state changes are handled when the ignore + * action is set in the modify handler so there is no need + * to call the manager in the ignore case */ + if (( ptr->suppress == false ) && ( ptr->actions_minor.compare (HWMON_ACTION_IGNORE))) + { + hwmonHostClass::manage_sensor_state ( host_ptr->hostname, ptr, HWMON_SEVERITY_MINOR ); + } + else + { + if ( ptr->alarmed == true ) + { + /* We may have transitioned to ignore from an alarm state so check and clear if an alarm exists */ + clear_asserted_alarm ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, ptr, REASON_IGNORED ); + } + clear_logged_state ( ptr ) ; + } + + /* still maintain the status + * ... if not minor then set it to minor */ + if ( ptr->status.compare("minor") ) + { + ptr->status = "minor" ; + mod_status = true ; + } + } + else if ( severity == HWMON_SEVERITY_MAJOR ) + { + /* logs and alarms state changes are handled when the ignore + * action is set in the modify handler so there is no need + * to call the manager in the ignore case */ + if (( ptr->suppress == false ) && ( ptr->actions_major.compare (HWMON_ACTION_IGNORE))) + { + hwmonHostClass::manage_sensor_state ( host_ptr->hostname, ptr, HWMON_SEVERITY_MAJOR ); + } + else + { + if ( ptr->alarmed == true ) + { + /* We may have transitioned to ignore from an alarm state so check and clear if an alarm exists */ + clear_asserted_alarm ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, ptr, REASON_IGNORED ); + } + clear_logged_state ( ptr ) ; + } + + /* if not major then set it to major */ + if ( ptr->status.compare("major") ) + { + ptr->status = "major" ; + mod_status = true ; + } + } + else if (( severity == HWMON_SEVERITY_CRITICAL ) || + ( severity == HWMON_SEVERITY_NONRECOVERABLE )) + { + /* log and alarm state changes are handled when the ignore + * action is set in the modify handler so there is no need + * to call the manager in the ignore case */ + if (( ptr->suppress == false ) && ( ptr->actions_critl.compare (HWMON_ACTION_IGNORE))) + { + if ( !ptr->actions_critl.compare (HWMON_ACTION_RESET)) + { + if ( host_ptr->monitor == false ) + { + /* Ignore event while we are not monitoring */ + ilog ("%s %s ignoring 'reset action' while not monitoring\n", + host_ptr->hostname.c_str(), + ptr->sensorname.c_str()); + } + else + { + if ( ptr->critl.alarmed == false ) + { + hwmonAlarm_critical ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, + ptr->sensorname, REASON_RESETTING ) ; + } + clear_alarmed_state ( ptr ); + set_alarmed_severity ( ptr, FM_ALARM_SEVERITY_CRITICAL ); + + if ( ptr->degraded == false ) + { + ptr->degraded = true ; + } + + clear_ignored_state ( ptr ); + clear_logged_state ( ptr ); + + /* Send reset request to mtcAgent */ + wlog ("%s requesting 'reset' due to critical '%s' sensor\n", + host_ptr->hostname.c_str(), + ptr->sensorname.c_str()); + + hwmon_send_event ( host_ptr->hostname, + MTC_EVENT_HWMON_RESET, + ptr->sensorname.data()); + } + } + else if ( !ptr->actions_critl.compare (HWMON_ACTION_POWERCYCLE)) + { + if ( host_ptr->monitor == false ) + { + /* Ignore event while we are not monitoring */ + ilog ("%s %s ignoring 'power-cycle action' while not monitoring\n", + host_ptr->hostname.c_str(), + ptr->sensorname.c_str()); + } + else + { + if ( ptr->critl.alarmed == false ) + { + hwmonAlarm_critical ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, + ptr->sensorname, REASON_POWERCYCLING ) ; + } + clear_alarmed_state ( ptr ); + + set_alarmed_severity ( ptr, FM_ALARM_SEVERITY_CRITICAL ); + + if ( ptr->degraded == false ) + { + ptr->degraded = true ; + } + + clear_ignored_state ( ptr ); + clear_logged_state ( ptr ); + + wlog ("%s requesting 'powercycle' due to critical '%s' sensor\n", + host_ptr->hostname.c_str(), + ptr->sensorname.c_str()); + + /* Send reset request to mtcAgent */ + hwmon_send_event ( host_ptr->hostname, + MTC_EVENT_HWMON_POWERCYCLE, + ptr->sensorname.data()); + } + } + else + { + hwmonHostClass::manage_sensor_state ( host_ptr->hostname, ptr, HWMON_SEVERITY_CRITICAL ); + } + } + else + { + if ( ptr->alarmed == true ) + { + /* We may have transitioned to ignore from an alarm state so check and clear if an alarm exists */ + clear_asserted_alarm ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, ptr, REASON_IGNORED ); + } + else + { + blog2 ("%s %s is not alarmed\n", host_ptr->hostname.c_str(), ptr->sensorname.c_str() ); + } + clear_logged_state ( ptr ) ; + } + + /* if not critical then set it to critical */ + if ( ptr->status.compare("critical") ) + { + ptr->status = "critical" ; + mod_status = true ; + } + } + else + { + slog ("%s unknown severity (%d)\n", host_ptr->hostname.c_str(), severity ); + } + } /* end else that look at non-good severities */ + + if ( mod_status == true ) + { + hwmonHttp_mod_sensor ( host_ptr->hostname, host_ptr->event , ptr->uuid, "status" , ptr->status ); + } + ptr->severity = severity ; + + } /* end for loop over all sensors */ + if ( host_ptr->bmc_fw_version.empty() ) + { + string fn = (IPMITOOL_OUTPUT_DIR + host_ptr->hostname + "_mc_info") ; + if ( daemon_is_file_present ( fn.data() ) ) + { + host_ptr->bmc_fw_version = + get_bmc_version_string ( host_ptr->hostname, + fn.data() ); + } + if ( !host_ptr->bmc_fw_version.empty() ) + { + ilog ("%s bmc fw version: %s\n", + host_ptr->hostname.c_str(), + host_ptr->bmc_fw_version.c_str()); + } + } + + /* Start the next group interval timer */ + if ( host_ptr->interval < HWMON_MIN_AUDIT_INTERVAL ) + { + ilog ("%s monitor interval set to a %d secs cadence (%d)\n", + host_ptr->hostname.c_str(), + HWMON_DEFAULT_AUDIT_INTERVAL, + host_ptr->interval); + host_ptr->interval = HWMON_DEFAULT_AUDIT_INTERVAL ; + interval_change_handler ( host_ptr ); + } + + /* exit sensor model relearn mode if we have sensors and groups */ + if (( host_ptr->relearn == true ) && + ( host_ptr->sensors ) && ( host_ptr->groups )) + { + mtcTimer_reset ( host_ptr->relearnTimer ); + host_ptr->relearn_done_date.clear(); + host_ptr->relearn = false ; + plog ("%s sensor model relearn complete\n", + host_ptr->hostname.c_str()); + } + + mtcTimer_start ( host_ptr->monitor_ctrl.timer, + hwmonTimer_handler, + host_ptr->interval ); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__DELAY ); + break ; + } + + case HWMON_SENSOR_MONITOR__FAIL: + { + host_ptr->ping_info.ok = false ; + host_ptr->ipmitool_thread_ctrl.retries = 0 ; + + mtcTimer_reset ( host_ptr->monitor_ctrl.timer ); + + if ( host_ptr->ipmitool_thread_info.status ) + { + elog ("%s sensor monitoring failure (rc:%d)\n", + host_ptr->hostname.c_str(), + host_ptr->ipmitool_thread_info.status ); + if ( host_ptr->ipmitool_thread_info.data.length() ) + { + string _temp = host_ptr->ipmitool_thread_info.status_string ; + size_t pos = _temp.find ("-f", 0) ; + + if ( pos != std::string::npos ) + { + /* don't log the password filename */ + elog ("%s ... %s\n", + host_ptr->hostname.c_str(), + _temp.substr(0,pos).c_str()); + } + else + { + elog ("%s ... %s\n", + host_ptr->hostname.c_str(), + host_ptr->ipmitool_thread_info.status_string.c_str()); + } + } + } + + if ( host_ptr->ipmitool_thread_ctrl.id ) + { + slog ("%s sensor monitor thread is unexpectedly active ; handling as failure\n", + host_ptr->hostname.c_str()); + + thread_kill ( host_ptr->ipmitool_thread_ctrl, host_ptr->ipmitool_thread_info ); + } + + if ( host_ptr->interval ) + { + ipmi_set_group_state ( host_ptr, "failed" ) ; + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__START ); + } + else + { + /* TODO: Error case that should not happen ; need to force reprovision */ + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__IDLE ); + } + break ; + } + case HWMON_SENSOR_MONITOR__STAGES: + default: + { + slog ("%s Invalid stage (%d)\n", + host_ptr->hostname.c_str(), + host_ptr->monitor_ctrl.stage ); + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__START ); + } + } + } + return (rc); +} + +/* Delete Handler + * ----------------- */ +int hwmonHostClass::delete_handler ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + if ( host_ptr == NULL ) + { + slog ("delete handler called with null pointer\n"); + return (FAIL_NULL_POINTER); + } + + switch ( host_ptr->delStage ) + { + case HWMON_DEL__START: + { + ilog ("%s Delete Operation Started\n", host_ptr->hostname.c_str()); + host_ptr->retries = 0 ; + + if ( host_ptr->bm_provisioned == true ) + { + set_bm_prov ( host_ptr, false); + } + + if ( host_ptr->ipmitool_thread_ctrl.stage != THREAD_STAGE__IDLE ) + { + int delay = THREAD_POST_KILL_WAIT ; + thread_kill ( host_ptr->ipmitool_thread_ctrl , host_ptr->ipmitool_thread_info) ; + + ilog ("%s thread active ; sending kill ; waiting %d seconds\n", + host_ptr->hostname.c_str(), delay ); + mtcTimer_reset ( host_ptr->hostTimer ); + mtcTimer_start ( host_ptr->hostTimer, hwmonTimer_handler, delay ); + host_ptr->delStage = HWMON_DEL__WAIT ; + } + else + { + host_ptr->delStage = HWMON_DEL__DONE ; + } + + + break ; + } + case HWMON_DEL__WAIT: + { + if ( mtcTimer_expired ( host_ptr->hostTimer ) ) + { + if ( host_ptr->ipmitool_thread_ctrl.stage != THREAD_STAGE__IDLE ) + { + if ( host_ptr->retries++ < 3 ) + { + wlog ("%s still waiting on active thread ; sending another kill signal (try %d or %d)\n", + host_ptr->hostname.c_str(), host_ptr->retries, 3 ); + + thread_kill ( host_ptr->ipmitool_thread_ctrl, host_ptr->ipmitool_thread_info ) ; + mtcTimer_start ( host_ptr->hostTimer, hwmonTimer_handler, THREAD_POST_KILL_WAIT ); + break ; + } + else + { + elog ("%s thread refuses to stop ; giving up ...\n", + host_ptr->hostname.c_str()); + } + } + host_ptr->delStage = HWMON_DEL__DONE ; + } + break ; + } + case HWMON_DEL__DONE: + { + /* ok now delete the host */ + del_host ( host_ptr->hostname ); + this->host_deleted = true ; + break ; + } + default: + { + ilog ("%s invalid delete stage (%d) ; correcting ...\n", host_ptr->hostname.c_str(), host_ptr->delStage ); + host_ptr->delStage = HWMON_DEL__START ; + } + } + return (PASS); +} + + +/***************************************************************************** + * + * Name : manage_startup_states + * + * Description: Manage the sensor startup states. + * + * This means failure log, alarm and degraded states on + * startup for groups and sensors + * + *****************************************************************************/ + +bool hwmonHostClass::manage_startup_states ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = PASS ; + if ( host_ptr ) + { + + std::list::iterator _iter_ptr ; + std::list alarm_list ; + alarm_list.clear(); + + /********************** Manage Profile Alarms ***********************/ + + /* clear this config alarm as it is not used anymore - handles patchback case. + * Its cheaper to send a clear than it is to query for it first */ + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "sensor", REASON_OK ); + +#ifdef WANT_QUERY_SENSOR_CONFIG_ALARM + /* We don't degrade for sensor config error - this is similar to a + * BMC access error in mtcAgent where we only raise a minor alarm */ + if ( hwmon_alarm_query ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "profile" ) != FM_ALARM_SEVERITY_CLEAR ) + host_ptr->alarmed_config = true ; +#endif + if ( host_ptr->alarmed_config == false ) + { + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "profile", REASON_OK ); + host_ptr->alarmed_config = false ; + } + + /********************** Manage Group Alarms ***********************/ + string entity = "host=" + host_ptr->hostname + ".sensorgroup=" ; + + /* 1. Query for all group alarms */ + rc = hwmonAlarm_query_entity ( host_ptr->hostname, entity, alarm_list ); + if ( rc != PASS ) + { + elog ("%s sensorgroup alarm query failed\n", host_ptr->hostname.c_str() ); + return (FAIL_OPERATION); + } + + /* 2. Search the alarm list for orphan groups + * - group alarms that are not in the current group list + * - should not occur but is a catch all for stuck group alarms */ + for ( _iter_ptr = alarm_list.begin(); _iter_ptr != alarm_list.end(); ++_iter_ptr ) + { + bool found = false ; + for ( int g = 0 ; g < host_ptr->groups ; g++ ) + { + string _temp = entity + host_ptr->group[g].group_name ; + if ( _iter_ptr->instance.compare(_temp) == 0 ) + { + found = true ; + break ; + } + } + if ( found == false ) + { + string groupname = _iter_ptr->instance.substr (entity.length()) ; + wlog ("%s found orphan group alarm '%s' ; clearing\n", host_ptr->hostname.c_str(), groupname.c_str() ); + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, groupname, REASON_DEPROVISIONED ); + } + } + + /* 3. Look up each alarmed group and then manage that alarm */ + for ( int g = 0 ; g < host_ptr->groups ; g++ ) + { + struct sensor_group_type * group_ptr = &host_ptr->group[g] ; + bool found = false ; + bool raise = false ; + bool clear = false ; + daemon_signal_hdlr (); + + if ( alarm_list.size() ) + { + for ( _iter_ptr = alarm_list.begin(); _iter_ptr != alarm_list.end(); ++_iter_ptr ) + { + string _temp = entity + group_ptr->group_name ; + if ( _iter_ptr->instance.compare(_temp) == 0 ) + { + ilog ("%s '%s' group '%s' alarm already set\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + alarmUtil_getSev_str(_iter_ptr->severity).c_str()); + found = true ; + break ; + } + } + } + + /* Note: if found == true then the group_ptr points to the group that + * has the alarm raised and _iter_ptr point to the alarm info */ + + /* Determine if this alarm needs to be raised or cleared ... or left alone + * Database state takes precidence of all */ + if ( group_ptr->group_state.compare("failed") == 0 ) + { + group_ptr->failed = true ; + group_ptr->alarmed = true ; + if ( found == true ) + { + if ( _iter_ptr->severity != FM_ALARM_SEVERITY_MAJOR ) + { + slog ("%s %s group alarm severity incorrect (%d:%s) ; correcting \n", + host_ptr->hostname.c_str(), + _iter_ptr->entity.c_str(), + _iter_ptr->severity, + alarmUtil_getSev_str(_iter_ptr->severity).c_str()); + raise = true ; + } + } + else + { + raise = true ; + } + } + else + { + group_ptr->failed = false ; + group_ptr->alarmed = false ; + if ( found == true ) + { + clear = true ; + } + } + + if ( raise == true ) + { + group_ptr->failed = true ; + group_ptr->alarmed = true ; + hwmonAlarm_major ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, group_ptr->group_name, REASON_DEGRADED ); + } + + if ( clear == true ) + { + group_ptr->failed = false ; + group_ptr->alarmed = false ; + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, group_ptr->group_name, REASON_OK ); + } + } + + /********************** Manage Sensor Alarms ***********************/ + + /* 1. Query Sensor Alarm States from FM */ + entity = "host=" + host_ptr->hostname + ".sensor=" ; + + rc = hwmonAlarm_query_entity ( host_ptr->hostname, entity, alarm_list ); + if ( rc != PASS ) + { + elog ("%s sensor alarm query failed\n", host_ptr->hostname.c_str() ); + return (FAIL_OPERATION); + } + + /* 2. Search the alarm list for orphan sensors + * - sensor alarms that are not in the current sensor list + * - should not occur but is a catch all for stuck sensor alarms */ + for ( _iter_ptr = alarm_list.begin (); _iter_ptr != alarm_list.end () ; ++_iter_ptr ) + { + bool found = false ; + for ( int s = 0 ; s < host_ptr->sensors ; s++ ) + { + string _temp = entity + host_ptr->sensor[s].sensorname ; + if ( _iter_ptr->instance.compare(_temp) == 0 ) + { + ilog ("%s '%s' sensor '%s' alarm already set\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[s].sensorname.c_str(), + alarmUtil_getSev_str(_iter_ptr->severity).c_str()); + found = true ; + break ; + } + } + if ( found == false ) + { + string sensorname = _iter_ptr->instance.substr (entity.length()) ; + wlog ("%s found orphan sensor alarm '%s' ; clearing\n", host_ptr->hostname.c_str(), sensorname.c_str() ); + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, sensorname, REASON_DEPROVISIONED ); + } + } + + /* 3. manage the state of sensors alarms */ + for ( int s = 0 ; s < host_ptr->sensors ; s++ ) + { + std::list::iterator _iter_ptr ; + sensor_type * sensor_ptr = &host_ptr->sensor[s] ; + string reason = REASON_OK ; + bool found = false ; + bool clear = false ; + bool minor = false ; + bool major = false ; + bool critl = false ; + + daemon_signal_hdlr (); + + if ( alarm_list.size() ) + { + for ( _iter_ptr = alarm_list.begin () ; + _iter_ptr != alarm_list.end () ; + ++_iter_ptr ) + { + string _temp = entity + sensor_ptr->sensorname ; + if ( _iter_ptr->instance.compare(_temp) == 0 ) + { + found = true ; + break ; + } + } + } + + /* Note: if found == true then the sensor_ptr points to the sensor that + * has the alarm raised and _iter_ptr point to the alarm info */ + + /* Determine if this alarm needs to be raised or cleared ... or left alone + * Database state takes precidence of all */ + if ( sensor_ptr->status.compare("ok") == 0 ) + { + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + if ( found == true ) + { + clear = true ; + } + } + else if ( sensor_ptr->status.compare("offline") == 0 ) + { + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + if ( found == true ) + { + clear = true ; + } + } + else if ( sensor_ptr->status.compare("minor") == 0 ) + { + if ( sensor_ptr->actions_minor.compare("alarm")) + { + if ( found == true ) + { + clear = true ; + } + if ( sensor_ptr->actions_minor.compare("log") == 0 ) + { + set_logged_severity ( sensor_ptr, FM_ALARM_SEVERITY_MINOR ); + reason = REASON_SET_TO_LOG ; + } + if ( sensor_ptr->actions_major.compare("ignore") == 0 ) + { + set_ignored_severity ( sensor_ptr, FM_ALARM_SEVERITY_MINOR ); + reason = REASON_IGNORED ; + } + } + else if ( sensor_ptr->suppress == true ) + { + if ( found == true ) + { + reason = REASON_SUPPRESSED ; + clear = true ; + } + } + /** + * else this is an alarm case ... + * - if no alarm found then raise the minor alarm + * - if alarm found but not in proper severity then + * raise the minor alarm + **/ + else + { + set_alarmed_severity ( sensor_ptr , FM_ALARM_SEVERITY_MINOR ); + clear_degraded_state ( sensor_ptr ); + if (( found == false ) || + (( found == true ) && ( _iter_ptr->severity != FM_ALARM_SEVERITY_MINOR ))) + { + /* correct the severity of the alarm */ + minor = true ; + } + } + } + else if ( sensor_ptr->status.compare("major") == 0 ) + { + if ( sensor_ptr->actions_major.compare("alarm")) + { + if ( found == true ) + { + clear = true ; + } + if ( sensor_ptr->actions_major.compare("log") == 0 ) + { + set_logged_severity ( sensor_ptr, FM_ALARM_SEVERITY_MAJOR ); + reason = REASON_SET_TO_LOG ; + } + if ( sensor_ptr->actions_major.compare("ignore") == 0 ) + { + set_ignored_severity ( sensor_ptr, FM_ALARM_SEVERITY_MAJOR ) ; + reason = REASON_IGNORED ; + } + } + else if ( sensor_ptr->suppress == true ) + { + if ( found == true ) + { + reason = REASON_SUPPRESSED ; + clear = true ; + } + } + /** + * else this is an alarm case ... + * - if no alarm found then raise the major alarm + * - if alarm found but not in proper severity then + * raise the major alarm + **/ + else + { + set_alarmed_severity ( sensor_ptr , FM_ALARM_SEVERITY_MAJOR ); + set_degraded_state ( sensor_ptr ); + if (( found == false ) || + (( found == true ) && ( _iter_ptr->severity != FM_ALARM_SEVERITY_MAJOR ))) + { + /* correct the severity of the alarm */ + major = true ; + } + } + } + else if ( sensor_ptr->status.compare("critical") == 0 ) + { + if ( sensor_ptr->actions_critl.compare("alarm")) + { + if ( found == true ) + { + clear = true ; + } + if ( sensor_ptr->actions_critl.compare("log") == 0 ) + { + set_logged_severity ( sensor_ptr, FM_ALARM_SEVERITY_CRITICAL ) ; + reason = REASON_SET_TO_LOG ; + } + if ( sensor_ptr->actions_critl.compare("ignore") == 0 ) + { + set_ignored_severity ( sensor_ptr , FM_ALARM_SEVERITY_CRITICAL ) ; + reason = REASON_IGNORED ; + } + } + else if ( sensor_ptr->suppress == true ) + { + if ( found == true ) + { + reason = REASON_SUPPRESSED ; + clear = true ; + } + } + /** + * else this is an alarm case ... + * - if no alarm found then raise the critical alarm + * - if alarm found but not in proper severity then + * raise the critical alarm + **/ + else + { + set_alarmed_severity ( sensor_ptr , FM_ALARM_SEVERITY_CRITICAL ); + set_degraded_state ( sensor_ptr ); + if (( found == false ) || + (( found == true ) && ( _iter_ptr->severity != FM_ALARM_SEVERITY_CRITICAL ))) + { + /* correct the severity of the alarm */ + critl = true ; + } + } + } + + if ( clear == true ) + { + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + } + + else if ( minor == true ) + { + clear_degraded_state ( sensor_ptr ); + set_alarmed_severity ( sensor_ptr, FM_ALARM_SEVERITY_MINOR ); + hwmonAlarm_minor ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, REASON_DEGRADED ); + } + + else if ( major == true ) + { + set_degraded_state ( sensor_ptr ); + set_alarmed_severity ( sensor_ptr, FM_ALARM_SEVERITY_MAJOR ); + hwmonAlarm_major ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, REASON_DEGRADED ); + } + + else if ( critl == true ) + { + clear_degraded_state ( sensor_ptr ); + set_alarmed_severity ( sensor_ptr, FM_ALARM_SEVERITY_CRITICAL); + hwmonAlarm_critical ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, REASON_DEGRADED ); + } + // sensorState_print ( host_ptr->hostname, sensor_ptr ); + } + } + else + { + rc = FAIL_NULL_POINTER ; + } + return (rc); +} + +/***************************************************************************** + * + * Name : monitor_now + * + * Description: Force monitor to occur immediately. + * + ****************************************************************************/ + +void hwmonHostClass::monitor_now ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + if ( host_ptr ) + { + if ( host_ptr->monitor_ctrl.stage == HWMON_SENSOR_MONITOR__DELAY ) + { + mtcTimer_reset ( host_ptr->monitor_ctrl.timer ); + host_ptr->monitor_ctrl.timer.ring = true ; + dlog ("%s force monitor now\n", host_ptr->hostname.c_str() ); + } + } + else + { + slog ("null host pointer\n"); + } +} + +/***************************************************************************** + * + * Name : monitor_soon + * + * Description: Force monitor to occur in 30 seconds. + * + ****************************************************************************/ + +void hwmonHostClass::monitor_soon ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + if ( host_ptr ) + { + int delay = MTC_SECS_5 ; + + wlog ("%s sensor monitoring FSM stage (%d) aborted\n", + host_ptr->hostname.c_str(), + host_ptr->monitor_ctrl.stage); + + if ( host_ptr->ipmitool_thread_ctrl.id ) + { + ilog ("%s stopping current thread (%lu)\n", host_ptr->hostname.c_str(), host_ptr->ipmitool_thread_ctrl.id ); + thread_kill ( host_ptr->ipmitool_thread_ctrl, host_ptr->ipmitool_thread_info ); + + /* have to wait a bit longer than THREAD_POST_KILL_WAIT for the thread kill to happen */ + delay += THREAD_POST_KILL_WAIT ; + } + + _stage_change ( host_ptr->hostname, + host_ptr->monitor_ctrl.stage, + HWMON_SENSOR_MONITOR__DELAY) ; + + mtcTimer_reset ( host_ptr->monitor_ctrl.timer ); + mtcTimer_start ( host_ptr->monitor_ctrl.timer, + hwmonTimer_handler, delay ); + + ilog ("%s sensor monitoring will resume in %d seconds\n", + host_ptr->hostname.c_str(), delay ); + } + else + { + slog ("null host pointer\n"); + } +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHttp.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHttp.cpp new file mode 100644 index 00000000..d614f811 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHttp.cpp @@ -0,0 +1,2216 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform - Hardware Monitoring and Fault Handling + * Access to Inventory Database via REST API Interface. + * + */ + +/** This file implements + * + * 1. an http client used to register sensors with sysinv + * 2. an http server used to receive sensor configuration changes from sysinv + * 3. an http client used to send sensor samples to Ceilometer. + * + **/ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "hwm" + +#include "nodeBase.h" /* for ... Base Service Header */ +#include "nodeUtil.h" /* for ... Utility Service Header */ +#include "hostUtil.h" /* for ... Host Utilities */ +#include "jsonUtil.h" /* for ... common Json utilities */ +#include "httpUtil.h" /* for ... common Http utilities */ +#include "hwmonUtil.h" /* for ... is_valid_action */ +#include "hwmonHttp.h" /* this .. module header */ +#include "hwmonClass.h" /* for ... service class definition */ +#include "hwmonSensor.h" /* for ... hwmonSensor_print */ +#include "hwmonAlarm.h" /* for ... hwmonAlarm */ + +//ceilometer/ceilometer.conf port=8777 +//ceilometer/ceilometer.conf host=192.168.204.2 + + +static event_type hwmon_event ; + + +void hwmonHttp_server_init ( void ) +{ + } + +/* Cleanup */ +void hwmonHttp_server_fini ( void ) +{ + if ( hwmon_event.fd ) + { + if ( hwmon_event.base ) + { + event_base_free( hwmon_event.base); + } + close ( hwmon_event.fd ); + } +} + +time_debug_type before ; +time_debug_type after ; +time_delta_type delta = { 0, 0 } ; + +/* Look for events */ +void hwmonHttp_server_look ( void ) +{ + /* Look for INV Events */ + if ( hwmon_event.base ) + { + gettime ( after ) ; + event_base_loop( hwmon_event.base, EVLOOP_NONBLOCK ); + gettime (before) ; + + timedelta ( before , after, delta ); + + if ( delta.secs > 1 ) + { + ilog ("-----> SERVICE STALL : did not service sysinv events for %ld.%ld sec\n", delta.secs, delta.msecs/1000); + } + //if ( inside_delta > 50000000 ) + //{ + // ilog ("-----> LIBEVENT STALL: event_base_loop select stalled for %llu msec\n", inside_delta/1000000 ); + //} + } +} + + +/* The ',' is needed and the '.' is only fior the end of he full line */ +void _create_error_response ( string hostname, string & resp_buffer, int error ) +{ + resp_buffer = "{" ; + resp_buffer.append (" \"status\" : \"fail\""); + if ( error == HTTP_BADREQUEST ) + { + resp_buffer.append (",\"reason\" : \"Request appears invalid. Check logs for more detail\""); + resp_buffer.append (",\"action\" : \"Retry operation or contact next level support.\""); + } + else if ( error == MTC_HTTP_FORBIDDEN ) + { + resp_buffer.append (",\"reason\" : \"Unknown User-Agent specified in request\""); + resp_buffer.append (",\"action\" : \"Retry operation or contact next level support.\""); + } + else if ( error == HTTP_NOTFOUND ) + { + resp_buffer.append (",\"reason\" : \"Specified sensor or group was not found\""); + resp_buffer.append (",\"action\" : \"Please retry request, sensor loading may be in progress. "); + resp_buffer.append ("If problem persists then Lock and Unlock host to force re-read of sensor information. "); + resp_buffer.append ("Then, if problem continues please contact the system administrator for further assistance.\""); + } + else if ( error == MTC_HTTP_CONFLICT ) + { + resp_buffer.append (",\"reason\" : \"Requested operation failed.\""); + resp_buffer.append (",\"action\" : \"Retry request and if the problem persists deprovision and then reprovision BMC. "); + resp_buffer.append ("With the BMC reprovisioned retry the request. "); + resp_buffer.append ("Then, if the problem continues please contact the system administrator for further assistance.\""); + } + else if ( error == MTC_HTTP_LENGTH_REQUIRED ) + { + resp_buffer.append (",\"reason\" : \"Sensor modify request with no accompanying data\""); + resp_buffer.append (",\"action\" : \"Retry operation and if problem continues contact next level support.\""); + } + else if ( error == FAIL_INVALID_DATA ) + { + resp_buffer.append (",\"reason\" : \"Requested sensor audit interval is too frequent\""); + resp_buffer.append (",\"action\" : \"Please use value larger than "); + resp_buffer.append (itos(HWMON_MIN_AUDIT_INTERVAL)); + resp_buffer.append (" secs.\""); + } + else if ( error == FAIL_BAD_STATE ) + { + resp_buffer.append (",\"reason\" : \"Invalid sensor action\""); + resp_buffer.append (",\"action\" : \"Please select one of the following supported actions: 'ignore','log' or 'alarm'.\""); + } + else + { + resp_buffer.append (",\"reason\" : \"Unknown Error code "); + resp_buffer.append (itos(error)); + resp_buffer.append ("\",\"action\" : \"Undetermined\""); + wlog ("%s no supported reason/action string for error code %d.\n", hostname.c_str(), error); + } + resp_buffer.append ("}"); +} + + +int hwmonJson_load_sensor ( string hostname , string sensor_record , sensor_type & sensor ) +{ + int rc = FAIL_STRING_EMPTY ; + + if ( !sensor_record.empty() ) + { + jlog ("Event Payload: %s", sensor_record.c_str()); + + struct json_object *json_obj = json_tokener_parse((char*)sensor_record.data()); + if ( !json_obj ) + { + elog ("%s No or invalid sysinv sensor record\n", hostname.c_str()); + return (FAIL_JSON_PARSE); + } + + /* Get all required fields */ + sensor.uuid = jsonUtil_get_key_value_string ( json_obj, MTC_JSON_INV_UUID ); + sensor.group_uuid = jsonUtil_get_key_value_string ( json_obj, "sensorgroup_uuid"); + sensor.host_uuid = jsonUtil_get_key_value_string ( json_obj, "host_uuid" ); + sensor.sensorname = jsonUtil_get_key_value_string ( json_obj, "sensorname" ); + sensor.datatype = jsonUtil_get_key_value_string ( json_obj, "datatype" ); + sensor.sensortype = jsonUtil_get_key_value_string ( json_obj, "sensortype" ); + sensor.actions_minor = jsonUtil_get_key_value_string ( json_obj, "actions_minor" ); + sensor.actions_major = jsonUtil_get_key_value_string ( json_obj, "actions_major" ); + sensor.actions_critl = jsonUtil_get_key_value_string ( json_obj, "actions_critical"); + sensor.algorithm = jsonUtil_get_key_value_string ( json_obj, "algorithm"); + sensor.status = jsonUtil_get_key_value_string ( json_obj, "status"); + sensor.state = jsonUtil_get_key_value_string ( json_obj, "state"); + sensor.path = jsonUtil_get_key_value_string ( json_obj, "path" ); + + /* if there is no stored path then the entity path is just the sensor name */ + if ( sensor.path.empty() ) + { + sensor.entity_path = sensor.sensorname ; + } + else + { + /* otherwise prefix the entity path with the path */ + sensor.entity_path = sensor.path ; + sensor.entity_path.append(ENTITY_DELIMITER); + sensor.entity_path.append(sensor.sensorname); + } + + string suppress_string= jsonUtil_get_key_value_string ( json_obj, "suppress" ); + if (( !suppress_string.compare("True")) || ( !suppress_string.compare("true"))) + sensor.suppress = true ; + else + sensor.suppress = false ; + + /* Convert actions to lower case */ + sensor.actions_minor = tolowercase (sensor.actions_minor); + sensor.actions_major = tolowercase (sensor.actions_major); + sensor.actions_critl = tolowercase (sensor.actions_critl); + + if ( !sensor.datatype.compare("analog") ) + { + string temp ; + + // sensor.t_critical_lower = jsonUtil_get_key_value_string ( json_obj, "t_critical_lower"); + temp = jsonUtil_get_key_value_string ( json_obj, "t_major_lower"); + if ( !temp.empty() ) sensor.t_major_lower = atof(temp.data()); + + temp = jsonUtil_get_key_value_string ( json_obj, "t_minor_lower"); + if ( !temp.empty() ) sensor.t_minor_lower = atof(temp.data()); + + temp = jsonUtil_get_key_value_string ( json_obj, "t_minor_upper"); + if ( !temp.empty() ) sensor.t_minor_upper = atof(temp.data()); + + temp = jsonUtil_get_key_value_string ( json_obj, "t_major_upper"); + if ( !temp.empty() ) sensor.t_major_upper = atof(temp.data()); + + temp = jsonUtil_get_key_value_string ( json_obj, "t_critical_upper"); + if ( !temp.empty() ) sensor.t_critical_upper = atof(temp.data()); + + temp = jsonUtil_get_key_value_string ( json_obj, "t_critical_lower"); + if ( !temp.empty() ) sensor.t_critical_lower = atof(temp.data()); + + sensor.unit_base = jsonUtil_get_key_value_string ( json_obj, "unit_base" ); + sensor.unit_rate = jsonUtil_get_key_value_string ( json_obj, "unit_rate" ); + sensor.unit_modifier = jsonUtil_get_key_value_string ( json_obj, "unit_modifier" ); + } + + if (json_obj) json_object_put(json_obj); + rc = PASS ; + } + return (rc); +} + +int hwmonJson_load_group ( string hostname , string group_record , struct sensor_group_type & group ) +{ + int rc = FAIL_STRING_EMPTY ; + + if ( !group_record.empty() ) + { + jlog ("Event Payload: %s", group_record.c_str()); + + struct json_object *json_obj = json_tokener_parse((char*)group_record.data()); + if ( !json_obj ) + { + elog ("%s No or invalid sysinv sensor group record\n", hostname.c_str()); + return (FAIL_JSON_PARSE); + } + /* Get all required fields */ + group.group_name = jsonUtil_get_key_value_string ( json_obj, "sensorgroupname" ); + group.actions_minor_group = jsonUtil_get_key_value_string ( json_obj, "actions_minor_group" ); + group.actions_major_group = jsonUtil_get_key_value_string ( json_obj, "actions_major_group" ); + group.actions_critl_group = jsonUtil_get_key_value_string ( json_obj, "actions_critical_group"); + group.algorithm = jsonUtil_get_key_value_string ( json_obj, "algorithm"); + group.group_uuid = jsonUtil_get_key_value_string ( json_obj, "uuid"); + group.host_uuid = jsonUtil_get_key_value_string ( json_obj, "host_uuid"); + group.group_state = jsonUtil_get_key_value_string ( json_obj, "state"); + group.path = jsonUtil_get_key_value_string ( json_obj, "path"); + group.status = jsonUtil_get_key_value_string ( json_obj, "status"); + string suppress_string = jsonUtil_get_key_value_string ( json_obj, "suppress" ); + if (( !suppress_string.compare("True")) || ( !suppress_string.compare("true"))) + group.suppress = true ; + else + group.suppress = false ; + + group.group_interval = jsonUtil_get_key_value_int ( json_obj, "audit_interval_group" ); + + /* Convert actions to lower case */ + group.actions_minor_group = tolowercase (group.actions_minor_group); + group.actions_major_group = tolowercase (group.actions_major_group); + group.actions_critl_group = tolowercase (group.actions_critl_group); + + if ( !group.datatype.compare("analog") ) + { + string temp ; + + temp = jsonUtil_get_key_value_string ( json_obj, "t_major_lower_group"); + if ( !temp.empty() ) group.t_major_lower_group = atof(temp.data()); + + temp = jsonUtil_get_key_value_string ( json_obj, "t_minor_lower_group"); + if ( !temp.empty() ) group.t_minor_lower_group = atof(temp.data()); + + temp = jsonUtil_get_key_value_string ( json_obj, "t_minor_upper_group"); + if ( !temp.empty() ) group.t_minor_upper_group = atof(temp.data()); + + temp = jsonUtil_get_key_value_string ( json_obj, "t_major_upper_group"); + if ( !temp.empty() ) group.t_major_upper_group = atof(temp.data()); + + temp = jsonUtil_get_key_value_string ( json_obj, "t_critical_upper_group"); + if ( !temp.empty() ) group.t_critical_upper_group = atof(temp.data()); + + temp = jsonUtil_get_key_value_string ( json_obj, "t_critical_lower_group"); + if ( !temp.empty() ) group.t_critical_lower_group = atof(temp.data()); + + group.unit_base_group = jsonUtil_get_key_value_string ( json_obj, "unit_base_group" ); + group.unit_rate_group = jsonUtil_get_key_value_string ( json_obj, "unit_rate_group" ); + group.unit_modifier_group = jsonUtil_get_key_value_string ( json_obj, "unit_modifier_group" ); + } + group.actions_minor_choices = jsonUtil_get_key_value_string ( json_obj, "actions_minor_choices"); + group.actions_major_choices = jsonUtil_get_key_value_string ( json_obj, "actions_major_choices"); + group.actions_critical_choices = jsonUtil_get_key_value_string ( json_obj, "actions_critical_choices"); + + if (json_obj) json_object_put(json_obj); + rc = PASS ; + } + return (rc); +} + + + + +/******************************************************************** + * + * Verify this request contains valid client info. + * + * i.e. the user-Agent header needs to exist and be set to + * CLIENT_SYSINV_1_0 + * + ********************************************************************/ +mtc_client_enum _get_client_id ( struct evhttp_request *req ) +{ + mtc_client_enum client_hdr = CLIENT_NONE ; + mtc_client_enum client_url = CLIENT_NONE ; + + const char * url_ptr = evhttp_request_get_uri (req); + const char * group_event_ptr = strstr ( url_ptr, SYSINV_ISENSORGROUPS_LABEL); + const char * sensor_event_ptr = strstr ( url_ptr, SYSINV_ISENSOR_LABEL); + jlog ("URI: %s\n", url_ptr ); + if ( sensor_event_ptr != NULL ) + { + client_url = CLIENT_SENSORS ; + } + else if (group_event_ptr != NULL) + { + client_url = CLIENT_SENSORGROUPS ; + } + else + { + wlog ("Unsupported URL:%s)\n", url_ptr ); + return CLIENT_NONE; + } + + /* Parse Headers we care about to verify that it also contains the correct User-Agent header */ + struct evkeyvalq * headers_ptr = evhttp_request_get_input_headers (req); + const char * header_value_ptr = evhttp_find_header (headers_ptr, CLIENT_HEADER); + if ( header_value_ptr ) + { + if ( ! strncmp ( header_value_ptr, CLIENT_SYSINV_1_0, 20 ) ) + { + dlog3 ("%s\n", header_value_ptr ); + client_hdr = CLIENT_SYSINV ; + } + } + + /* Both client_url and client_hdr need to match */ + if ( client_hdr != CLIENT_NONE ) + { + return (client_url); + } + else + { + wlog ("Unknown or mismatched client (hdr:%d:%s)\n", client_hdr, header_value_ptr); + return (CLIENT_NONE); + } +} +/* +{"status": "ok", "t_critical_upper": null, "actions_minor": null, "sensorname": "5V Rail", "suppress": "False", "updated_at": null, "sensortype": "voltage", "t_critical_lower": null, "unit_base": null, "state_requested": null, "path": "", "unit_rate": null, "actions_critical": null, "id": 8, "t_minor_lower": null, "uuid": "5b019f20-fec1-4173-ab5a-0fb12be34a4d", "unit_modifier": null, "sensor_action_requested": null, "t_minor_upper": null, "datatype": "analog", "capabilities": {}, "t_major_lower": null, "state": "disabled", "sensorgroup_id": null, "host_id": 3, "algorithm": null, "t_major_upper": null, "audit_interval": null, "actions_major": null} +*/ + +/* Handle the sysinv sensor modify request */ +string _sensor_modify_handler ( string hostname, + char * request_ptr, + int & http_status_code) +{ + sensor_type sysinv_sensor ; + string resp_buffer ; + + int rc ; + + resp_buffer.clear(); + + /* Load a local 'sysinv_sensor' variable with the sensor information from the + * sysinv request so that it can be compared to that same sensor in the host. */ + hwmonSensor_init ( hostname, &sysinv_sensor ); + rc = hwmonJson_load_sensor ( hostname, request_ptr, sysinv_sensor ); + if ( rc == PASS ) + { + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr() ; + resp_buffer = "{ \"status\" : \"pass\" }" ; + http_status_code = HTTP_OK ; + sensor_type * sensor_ptr = obj_ptr->get_sensor ( hostname, sysinv_sensor.entity_path ); + if ( sensor_ptr ) + { + if ( sensor_ptr->suppress != sysinv_sensor.suppress ) + { + sensor_ptr->suppress = sysinv_sensor.suppress ; + if ( sysinv_sensor.suppress == true ) + { + hwmonLog ( hostname, HWMON_ALARM_ID__SENSOR, FM_ALARM_SEVERITY_CLEAR, sensor_ptr->sensorname, REASON_SUPPRESSED ); + handle_new_suppression ( sensor_ptr ); + } + else + { + hwmonLog ( hostname, HWMON_ALARM_ID__SENSOR, FM_ALARM_SEVERITY_CLEAR, sensor_ptr->sensorname, REASON_UNSUPPRESSED ); + obj_ptr->manage_sensor_state ( hostname, sensor_ptr, get_severity(sensor_ptr->status)); + } + } + + /* Currently we don't support Sysinv modifying any of the following sensor attributes */ +#ifdef WANT_SENSOR_ATTRIBUTE_MODIFY_SUPPORT + + if ( sensor_ptr->state.compare(sysinv_sensor.state) ) + { + ilog ("%s '%s' sensor 'state' changed from '%s' to '%s'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->state.c_str(), + sysinv_sensor.state.c_str()); + + sensor_ptr->state = sysinv_sensor.state ; + } + + if ( sensor_ptr->status.compare(sysinv_sensor.status) ) + { + ilog ("%s '%s' sensor 'status' changed from '%s' to '%s'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->status.c_str(), + sysinv_sensor.status.c_str()); + + sensor_ptr->status = sysinv_sensor.status ; + } + + if ( sensor_ptr->unit_base.compare(sysinv_sensor.unit_base) ) + { + ilog ("%s '%s' sensor 'unit_base' changed from '%s' to '%s'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->unit_base.c_str(), + sysinv_sensor.unit_base.c_str()); + + sensor_ptr->unit_base = sysinv_sensor.unit_base ; + } + + if ( sensor_ptr->unit_rate.compare(sysinv_sensor.unit_rate) ) + { + ilog ("%s '%s' sensor 'unit_rate' changed from '%s' to '%s'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->unit_rate.c_str(), + sysinv_sensor.unit_rate.c_str()); + + sensor_ptr->unit_rate = sysinv_sensor.unit_rate ; + } + + if ( sensor_ptr->unit_modifier.compare(sysinv_sensor.unit_modifier) ) + { + ilog ("%s '%s' sensor 'unit_modifier' changed from '%s' to '%s'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->unit_modifier.c_str(), + sysinv_sensor.unit_modifier.c_str()); + + sensor_ptr->unit_modifier = sysinv_sensor.unit_modifier ; + } + + if ( sensor_ptr->actions_minor.compare(sysinv_sensor.actions_minor)) + { + /* action is validated and converted to lower case */ + if ( is_valid_action ( HWMON_SEVERITY_MINOR, sysinv_sensor.actions_minor , true) ) + { + ilog ("%s '%s' sensor 'minor event action(s)' changed from '%s' to '%s'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->actions_minor.c_str(), + sysinv_sensor.actions_minor.c_str()); + + sensor_ptr->actions_minor = sysinv_sensor.actions_minor ; + } + else + { + _create_error_response ( hostname, resp_buffer, FAIL_BAD_STATE ) ; + http_status_code = HTTP_BADREQUEST ; + } + } + + if ( sensor_ptr->actions_major.compare(sysinv_sensor.actions_major)) + { + /* action is validated and converted to lower case */ + if ( is_valid_action ( HWMON_SEVERITY_MAJOR, sysinv_sensor.actions_major , true ) ) + { + ilog ("%s '%s' sensor 'major event action(s)' changed from '%s' to '%s'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->actions_major.c_str(), + sysinv_sensor.actions_major.c_str()); + + sensor_ptr->actions_major = sysinv_sensor.actions_major ; + } + else + { + _create_error_response ( hostname, resp_buffer, FAIL_BAD_STATE ) ; + http_status_code = HTTP_BADREQUEST ; + } + } + + if ( sensor_ptr->actions_critl.compare(sysinv_sensor.actions_critl)) + { + /* action is validated and converted to lower case */ + if ( is_valid_action ( HWMON_SEVERITY_CRITICAL, sysinv_sensor.actions_critl , true ) ) + { + ilog ("%s '%s' sensor 'critical event action(s)' changed from '%s' to '%s'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->actions_critl.c_str(), + sysinv_sensor.actions_critl.c_str()); + + sensor_ptr->actions_critl = sysinv_sensor.actions_critl ; + } + else + { + _create_error_response ( hostname, resp_buffer, FAIL_BAD_STATE ) ; + http_status_code = HTTP_BADREQUEST ; + } + } + + if ( sensor_ptr->path.compare(sysinv_sensor.path)) + { + ilog ("%s '%s' sensor 'read command' changed from '%s' to '%s'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->path.c_str(), + sysinv_sensor.path.c_str()); + + sensor_ptr->path = sysinv_sensor.path ; + } + + if ( sensor_ptr->algorithm.compare(sysinv_sensor.algorithm)) + { + ilog ("%s '%s' sensor 'algorithm' changed from '%s' to '%s'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->algorithm.c_str(), + sysinv_sensor.algorithm.c_str()); + + sensor_ptr->algorithm = sysinv_sensor.algorithm ; + } + + if ( sensor_ptr->t_minor_lower != sysinv_sensor.t_minor_lower ) + { + ilog ("%s '%s' sensor 'Lower Minor Threshold' changed from '%5.3f' to '%5.3f'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->t_minor_lower, + sysinv_sensor.t_minor_lower); + + sensor_ptr->t_minor_lower = sysinv_sensor.t_minor_lower ; + } + + if ( sensor_ptr->t_major_lower != sysinv_sensor.t_major_lower ) + { + ilog ("%s '%s' sensor 'Lower Major Threshold' changed from '%5.3f' to '%5.3f'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->t_major_lower, + sysinv_sensor.t_major_lower); + + sensor_ptr->t_major_lower = sysinv_sensor.t_major_lower ; + } + + if ( sensor_ptr->t_critical_lower != sysinv_sensor.t_critical_lower ) + { + ilog ("%s '%s' sensor 'Lower Critical Threshold' changed from '%5.3f' to '%5.3f'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->t_critical_lower, + sysinv_sensor.t_critical_lower); + + sensor_ptr->t_critical_lower = sysinv_sensor.t_critical_lower ; + } + + if ( sensor_ptr->t_minor_upper != sysinv_sensor.t_minor_upper ) + { + ilog ("%s '%s' sensor 'Upper Minor Threshold' changed from '%5.3f' to '%5.3f'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->t_minor_upper, + sysinv_sensor.t_minor_upper); + + sensor_ptr->t_minor_upper = sysinv_sensor.t_minor_upper ; + } + + if ( sensor_ptr->t_major_upper != sysinv_sensor.t_major_upper ) + { + ilog ("%s '%s' sensor 'Upper Major Threshold' changed from '%5.3f' to '%5.3f'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->t_major_upper, + sysinv_sensor.t_major_upper); + + sensor_ptr->t_major_upper = sysinv_sensor.t_major_upper ; + } + + if ( sensor_ptr->t_critical_upper != sysinv_sensor.t_critical_upper ) + { + ilog ("%s '%s' sensor 'Upper Critical Threshold' changed from '%5.3f' to '%5.3f'\n", + hostname.c_str(), sensor_ptr->sensorname.c_str(), + sensor_ptr->t_critical_upper, + sysinv_sensor.t_critical_upper); + + sensor_ptr->t_critical_upper = sysinv_sensor.t_critical_upper ; + } +#endif + } + else + { + elog ("%s '%s' sensor not found (in hwmon)\n", hostname.c_str(), sysinv_sensor.sensorname.c_str()); + _create_error_response ( hostname, resp_buffer, HTTP_NOTFOUND ) ; + http_status_code = HTTP_NOTFOUND ; + } + } + else + { + elog ("%s failed parsing sensor modify request (from sysinv)\n", hostname.c_str()); + http_status_code = HTTP_BADREQUEST ; + _create_error_response ( hostname, resp_buffer, http_status_code ) ; + } + + return (resp_buffer); +} + +/* Handle the sysinv group modify request */ +string _group_modify_handler ( string hostname, + char * request_ptr, + int & http_status_code) +{ + struct sensor_group_type sysinv_group ; + string resp_buffer ; + + int rc ; + + resp_buffer.clear(); + + /* Load a local 'sysinv_group' variable with the sensor information from the + * sysinv request so that it can be compared to that same group in the host. */ + hwmonGroup_init ( hostname, &sysinv_group ); + rc = hwmonJson_load_group ( hostname, request_ptr, sysinv_group ); + if ( rc == PASS ) + { + struct sensor_group_type * host_group_ptr = get_hwmonHostClass_ptr()->hwmon_get_group ( hostname, sysinv_group.group_name ); + if ( host_group_ptr ) + { + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr() ; + if ( host_group_ptr->suppress != sysinv_group.suppress ) + { + hlog ("%s '%s' group 'suppression' changed from '%s' to '%s'\n", + hostname.c_str(), host_group_ptr->group_name.c_str(), + host_group_ptr->suppress ? "True" : "False", + sysinv_group.suppress ? "True" : "False"); + + /* modify all sensors in this group and update this setting */ + rc = obj_ptr->group_modify ( hostname, + sysinv_group.group_uuid, + "suppress" , + sysinv_group.suppress ? "True" : "False" ); + if ( rc ) + { + /* TODO: handle with proper error code */ + _create_error_response ( hostname, resp_buffer, FAIL ) ; + http_status_code = HTTP_BADREQUEST ; + return (resp_buffer); + } + } + + if ( host_group_ptr->group_interval != sysinv_group.group_interval ) + { + hlog ("%s '%s' group 'interval' changed from '%d' to '%d' secs\n", + hostname.c_str(), + host_group_ptr->group_name.c_str(), + host_group_ptr->group_interval, + sysinv_group.group_interval); + + /* modify all sensors in this group and update this setting */ + rc = obj_ptr->group_modify ( hostname, + sysinv_group.group_uuid, + "audit_interval_group" , + itos(sysinv_group.group_interval)); + if ( rc ) + { + _create_error_response ( hostname, resp_buffer, rc ) ; + http_status_code = HTTP_BADREQUEST ; + return (resp_buffer); + } + } + + if ( host_group_ptr->actions_critl_group != sysinv_group.actions_critl_group ) + { + rc = FAIL_BAD_STATE ; + if ( is_valid_action ( HWMON_SEVERITY_CRITICAL, sysinv_group.actions_critl_group , true) ) + { + hlog ("%s '%s' group 'actions_critical_group' changed from '%s' to '%s'\n", + hostname.c_str(), + host_group_ptr->group_name.c_str(), + host_group_ptr->actions_critl_group.c_str(), + sysinv_group.actions_critl_group.c_str() ); + + /* modify all sensors in this group and update this setting */ + rc = obj_ptr->group_modify ( hostname, + sysinv_group.group_uuid, + "actions_critical_group" , + sysinv_group.actions_critl_group ); + } + if ( rc ) + { + _create_error_response ( hostname, resp_buffer, rc ) ; + http_status_code = HTTP_BADREQUEST ; + return (resp_buffer); + } + } + + if ( host_group_ptr->actions_major_group != sysinv_group.actions_major_group ) + { + rc = FAIL_BAD_STATE ; + if ( is_valid_action ( HWMON_SEVERITY_MAJOR, sysinv_group.actions_major_group, true ) ) + { + hlog ("%s '%s' group 'actions_major_group' changed from '%s' to '%s'\n", + hostname.c_str(), + host_group_ptr->group_name.c_str(), + host_group_ptr->actions_major_group.c_str(), + sysinv_group.actions_major_group.c_str() ); + + /* modify all sensors in this group and update this setting */ + rc = obj_ptr->group_modify ( hostname, + sysinv_group.group_uuid, + "actions_major_group" , + sysinv_group.actions_major_group ); + } + if ( rc ) + { + _create_error_response ( hostname, resp_buffer, rc ) ; + http_status_code = HTTP_BADREQUEST ; + return (resp_buffer); + } + } + + if ( host_group_ptr->actions_minor_group != sysinv_group.actions_minor_group ) + { + rc = FAIL_BAD_STATE ; + if ( is_valid_action ( HWMON_SEVERITY_MINOR, sysinv_group.actions_minor_group , true ) ) + { + hlog ("%s '%s' group 'actions_minor_group' changed from '%s' to '%s'\n", + hostname.c_str(), + host_group_ptr->group_name.c_str(), + host_group_ptr->actions_minor_group.c_str(), + sysinv_group.actions_minor_group.c_str() ); + + /* modify all sensors in this group and update this setting */ + rc = obj_ptr->group_modify ( hostname, + sysinv_group.group_uuid, + "actions_minor_group" , + sysinv_group.actions_minor_group ); + } + if ( rc ) + { + _create_error_response ( hostname, resp_buffer, rc ) ; + http_status_code = HTTP_BADREQUEST ; + return (resp_buffer); + } + } + + resp_buffer = "{ \"status\" : \"pass\" }" ; + http_status_code = HTTP_OK ; + } + else + { + elog ("%s '%s' group not found (in hwmon)\n", hostname.c_str(), sysinv_group.group_name.c_str()); + _create_error_response ( hostname, resp_buffer, HTTP_NOTFOUND ) ; + http_status_code = HTTP_NOTFOUND ; + } + } + else + { + elog ("%s failed parsing group modify request (from sysinv)\n", hostname.c_str()); + http_status_code = HTTP_BADREQUEST ; + _create_error_response ( hostname, resp_buffer, http_status_code ) ; + } + + return (resp_buffer); +} + +/***************************************************************************** + * + * Name: hwmondHttp_server_handler + * + * Description: Receive an http event, extract the event type and buffer from + * it and call process request handler. + * Send the processed message response back to the connection. + * + * Supported events include: PATCH from sysinv + * + ******************************************************************************/ +void hwmonHttp_server_handler (struct evhttp_request *req, void *arg) +{ + mtc_client_enum client = CLIENT_NONE ; + int http_status_code = HTTP_NOTFOUND ; + + hwmon_ctrl_type * ctrl_ptr = get_ctrl_ptr() ; + msgSock_type * mtclogd_ptr = get_mtclogd_sockPtr () ; + + string hostname = "" ; + string uuid = "" ; + string response = "" ; + + /* default response */ + response = "{" ; + response.append (" \"status\" : \"fail\""); + response.append (",\"reason\" : \"not found\""); + response.append (",\"action\" : \"retry with valid host\""); + response.append ("}"); + + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr () ; + + hwmon_event.req = req ; + hlog3 ("HTTP Event:%p base:%p Req:%p arg:%p\n", &hwmon_event, hwmon_event.base, hwmon_event.req, arg ); + + /* Get sender must be localhost */ + const char * host_ptr = evhttp_request_get_host (req); + if ( strncmp ( host_ptr , "localhost" , 10 )) + { + wlog ("Message received from unknown host '%s' but should be 'localhost'\n", host_ptr ); + evhttp_send_error (hwmon_event.req, http_status_code, response.data() ); + } + + const char * url_ptr = evhttp_request_get_uri (req); + hlog ("HTTP Request From Sysinv - %p - URL: %s\n", hwmon_event.req, url_ptr ); + + /* Extract the operation */ + evhttp_cmd_type http_cmd = evhttp_request_get_command (req); + + snprintf (&ctrl_ptr->log_str[0] , MAX_API_LOG_LEN-1, "\n%s [%5d] %s Request from %s for %s ...", + pt(), getpid(), getHttpCmdType_str(http_cmd), host_ptr, url_ptr ); + + send_log_message ( mtclogd_ptr, ctrl_ptr->my_hostname.data(), &ctrl_ptr->filename[0], &ctrl_ptr->log_str[0] ); + + snprintf (&ctrl_ptr->log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] failed response\n", pt(), getpid() ); + + /* Acquire the client that sent this event from the url URI */ + client = _get_client_id ( req ); + if ( client != CLIENT_NONE ) + { + switch ( http_cmd ) + { + case EVHTTP_REQ_POST: + { + if ( client == CLIENT_SENSORGROUPS ) + { + /* get the payload */ + struct evbuffer *in_buf = evhttp_request_get_input_buffer ( req ); + if ( in_buf ) + { + size_t len = evbuffer_get_length(in_buf) ; + if ( len ) + { + ev_ssize_t bytes = 0 ; + char * buffer_ptr = (char*)malloc(len+1); + memset ( buffer_ptr, 0, len+1 ); + bytes = evbuffer_remove(in_buf, buffer_ptr, len ); + + if ( bytes > 0 ) + { + struct json_object * json_obj = json_tokener_parse(buffer_ptr); + if ( json_obj ) + { + string host_uuid = jsonUtil_get_key_value_string ( json_obj, "host_uuid" ); + if ( hostUtil_is_valid_uuid ( host_uuid ) ) + { + /* request sensor model relearn as a + * background operation */ + obj_ptr->ipmi_learn_sensor_model (host_uuid) ; + http_status_code = HTTP_OK ; + response = "{ \"status\" : \"pass\" }" ; + } + else + { + wlog ("failed to find 'host_uuid' key in HTTP event message\n"); + http_status_code = HTTP_BADREQUEST ; + _create_error_response ( hostname, response, http_status_code ); + } + if (json_obj) json_object_put(json_obj); + } + else + { + elog ("No or invalid sysinv sensor record\n"); + if (( buffer_ptr ) && ( strlen(buffer_ptr) < 1000 )) + { + elog ("event payload: %s\n", buffer_ptr); + } + http_status_code = HTTP_NOTFOUND ; + _create_error_response ( hostname, response, http_status_code ); + } + } + else + { + wlog ("http event request with no payload\n"); + _create_error_response ( hostname, response, MTC_HTTP_LENGTH_REQUIRED ); + http_status_code = HTTP_BADREQUEST ; + } + free ( buffer_ptr ); + } + else + { + wlog ("http event request with no payload\n"); + _create_error_response ( hostname, response, MTC_HTTP_LENGTH_REQUIRED ); + http_status_code = HTTP_BADREQUEST ; + } + } + else + { + http_status_code = HTTP_BADREQUEST ; + _create_error_response ( hostname, response, http_status_code ); + wlog ("Http event request has no buffer\n"); + } + } + else + { + http_status_code = HTTP_BADREQUEST ; + _create_error_response ( hostname, response, http_status_code ); + elog ("Unexpected POST request ...\n"); + } + break ; + } + case EVHTTP_REQ_PATCH: + { + /* get the payload */ + struct evbuffer *in_buf = evhttp_request_get_input_buffer ( req ); + if ( in_buf ) + { + size_t len = evbuffer_get_length(in_buf) ; + if ( len ) + { + ev_ssize_t bytes = 0 ; + char * buffer_ptr = (char*)malloc(len+1); + memset ( buffer_ptr, 0, len+1 ); + bytes = evbuffer_remove(in_buf, buffer_ptr, len ); + + if ( bytes <= 0 ) + { + wlog ("http event request with no payload\n"); + _create_error_response ( hostname, response, MTC_HTTP_LENGTH_REQUIRED ); + http_status_code = HTTP_BADREQUEST ; + } + else + { + struct json_object * json_obj = json_tokener_parse(buffer_ptr); + if ( json_obj ) + { + string host_uuid = jsonUtil_get_key_value_string ( json_obj, "host_uuid" ); + if ( !host_uuid.empty() ) + { + hostname = obj_ptr->get_hostname ( host_uuid ) ; + + if ( client == CLIENT_SENSORS ) + { + snprintf (&ctrl_ptr->log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] Sensor Modify Request : %s", pt(), getpid(), buffer_ptr); + send_log_message ( mtclogd_ptr, ctrl_ptr->my_hostname.data(), &ctrl_ptr->filename[0], &ctrl_ptr->log_str[0] ); + + response = _sensor_modify_handler ( hostname, buffer_ptr, http_status_code ); + + snprintf (&ctrl_ptr->log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] Sensor Modify Response: %s\n", pt(), getpid(), response.data()); + } + else if ( client == CLIENT_SENSORGROUPS ) + { + snprintf (&ctrl_ptr->log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] Group Modify Request : %s", pt(), getpid(), buffer_ptr); + send_log_message ( mtclogd_ptr, ctrl_ptr->my_hostname.data(), &ctrl_ptr->filename[0], &ctrl_ptr->log_str[0] ); + + response = _group_modify_handler ( hostname, buffer_ptr, http_status_code ); + + snprintf (&ctrl_ptr->log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] Group Modify Response: %s\n", pt(), getpid(), response.data()); + } + else + { + elog ("%s Unknown client\n", hostname.c_str()); + http_status_code = HTTP_NOTFOUND ; + _create_error_response ( hostname, response, http_status_code ); + } + } + else + { + wlog ("failed to find 'host_uuid' key in HTTP event message\n"); + http_status_code = HTTP_BADREQUEST ; + _create_error_response ( hostname, response, http_status_code ); + } + if (json_obj) json_object_put(json_obj); + } + else + { + elog ("No or invalid sysinv sensor record\n"); + if (( buffer_ptr ) && ( strlen(buffer_ptr) < 1000 )) + { + elog ("event payload: %s\n", buffer_ptr); + } + http_status_code = HTTP_NOTFOUND ; + _create_error_response ( hostname, response, http_status_code ); + } + } + free ( buffer_ptr ); + } + else + { + http_status_code = HTTP_BADREQUEST ; + _create_error_response ( hostname, response, MTC_HTTP_LENGTH_REQUIRED ); + wlog ("Http event request has no payload\n"); + } + } + else + { + http_status_code = HTTP_BADREQUEST ; + _create_error_response ( hostname, response, http_status_code ); + wlog ("Http event request has no buffer\n"); + } + break ; + } + default: + { + http_status_code = HTTP_BADREQUEST ; + _create_error_response ( hostname, response, http_status_code ); + wlog ("Unsupported command\n"); + } + } + } + else + { + http_status_code = MTC_HTTP_FORBIDDEN ; + _create_error_response ( hostname, response, http_status_code ); + wlog ("invalid User-Agent specified\n"); + } + + send_log_message ( mtclogd_ptr, ctrl_ptr->my_hostname.data(), &ctrl_ptr->filename[0], &ctrl_ptr->log_str[0] ); + if ( http_status_code == HTTP_OK ) + { + struct evbuffer *resp_buf = evbuffer_new(); + hlog3 ("Event Response: %s\n", response.c_str()); + evbuffer_add_printf (resp_buf, "%s\n", response.data()); + evhttp_send_reply (hwmon_event.req, http_status_code, "OK", resp_buf ); + evbuffer_free ( resp_buf ); + } + else + { + // _create_error_response ( hostname, response, http_status_code ); + elog ("HTTP Event error:%d ; cmd:%s url:%s response:%s\n", + http_status_code, + getHttpCmdType_str(http_cmd), + url_ptr, + response.c_str()); + + evhttp_send_error (hwmon_event.req, http_status_code, response.data() ); + } +} + + +/***************************************************************** + * + * Name : hwmonHttp_server_bind + * + * Description : Setup the HTTP server socket + * + *****************************************************************/ +int hwmonHttp_server_bind ( event_type & event ) +{ + int rc ; + int flags ; + int one = 1; + + event.fd = socket(AF_INET, SOCK_STREAM, 0); + if (event.fd < 0) + { + elog ("HTTP server socket create failed (%d:%m)\n", errno ); + return FAIL_SOCKET_CREATE ; + } + + /* make socket reusable */ + rc = setsockopt(event.fd, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(int)); + if ( rc < 0 ) + { + elog ("failed to set HTTP server socket as reusable (%d:%m)\n", errno ); + } + + memset(&event.addr, 0, sizeof(struct sockaddr_in)); + event.addr.sin_family = AF_INET; + event.addr.sin_addr.s_addr = inet_addr(LOOPBACK_IP) ; /* INADDR_ANY; TODO: Refine this if we can */ + event.addr.sin_port = htons(event.port); + + /* bind port */ + rc = bind ( event.fd, (struct sockaddr*)&event.addr, sizeof(struct sockaddr_in)); + if (rc < 0) + { + elog ("HTTP server port %d bind failed (%d:%m)\n", event.port, errno ); + return FAIL_SOCKET_BIND ; + } + + /* Listen for events */ + rc = listen(event.fd, 10 ); + if (rc < 0) + { + elog ("HTTP server listen failed (%d:%m)\n", errno ); + return FAIL_SOCKET_LISTEN; + } + + /* make non-blocking */ + flags = fcntl ( event.fd, F_GETFL, 0) ; + if ( flags < 0 || fcntl(event.fd, F_SETFL, flags | O_NONBLOCK) < 0) + { + elog ("failed to set HTTP server socket to non-blocking (%d:%m)\n", errno ); + return FAIL_SOCKET_OPTION; + } + + return PASS; +} + + +/* Setup the http server */ +int hwmonHttp_server_setup ( event_type & event ) +{ + int rc = PASS ; + if ( ( rc = hwmonHttp_server_bind ( event )) != PASS ) + { + return rc ; + } + else if (event.fd < 0) + { + wlog ("failed to get http server socket file descriptor\n"); + return RETRY ; + } + + event.base = event_base_new(); + if (event.base == NULL) + { + elog ("failed to get http server event base\n"); + return -1; + } + event.httpd = evhttp_new(event.base); + if (event.httpd == NULL) + { + elog ("failed to get httpd server handle\n"); + return -1; + } + + evhttp_set_allowed_methods (event.httpd, EVENT_METHODS ); + + rc = evhttp_accept_socket(event.httpd, event.fd); + if ( rc == -1) + { + elog ("failed to accept on http server socket\n"); + return -1; + } + evhttp_set_gencb(event.httpd, hwmonHttp_server_handler, NULL); + + return PASS ; +} + +/************************************************************************ + * + * Name : hwmonHttp_server_init + * + * Description: Incoming HTTP event server on specified port. + * + */ + +int hwmonHttp_server_init ( int event_port ) +{ + int rc = PASS ; + hwmon_ctrl_type * ctrl_ptr = get_ctrl_ptr() ; + memset ( &hwmon_event, 0, sizeof(event_type)); + hwmon_event.port = event_port ; + + snprintf (&ctrl_ptr->filename[0], MAX_FILENAME_LEN, "/var/log/%s_event.log", program_invocation_short_name ); + + for ( ; ; ) + { + rc = hwmonHttp_server_setup ( hwmon_event ); + if ( rc == RETRY ) + { + wlog ("%s bind failed (%d)\n", EVENT_SERVER, hwmon_event.fd ); + } + else if ( rc != PASS ) + { + elog ("%s start failed (rc:%d)\n", EVENT_SERVER, rc ); + } + else if ( hwmon_event.fd > 0 ) + { + ilog ("Listening for 'http event server ' socket %s:%d\n", + inet_ntoa(hwmon_event.addr.sin_addr), hwmon_event.port ); + rc = PASS ; + break ; + } + if ( rc ) mtcWait_secs (5); + } + + return ( rc ) ; +} + +int hwmonHttp_handler ( libEvent & event ) +{ + int rc = PASS ; + + string hn = event.hostname ; + + hlog ("%s handler called\n", event.log_prefix.c_str() ); + + /* request shared string iterator */ + std::list::iterator iter_curr_ptr ; + + if ( event.request == SYSINV_SENSOR_MOD ) + { + jlog ("Sensor Modify Response: %s\n", event.response.c_str()); + } + else if ( event.request == SYSINV_SENSOR_MOD_GROUP ) + { + jlog ("Group Modify Response: %s\n", event.response.c_str()); + } + /* Handle the sysinv resposne to a sensor load request */ + if ( event.request == SYSINV_SENSOR_LOAD ) + { + if ( event.status == PASS ) + { + std::list sensor_list ; + sensor_list.clear(); + + rc = jsonUtil_get_list ( (char*)event.response.data(), SYSINV_ISENSOR_LABEL, sensor_list ); + if ( rc == PASS ) + { + hlog ("%s has %ld sensors in the database\n", hn.c_str(), sensor_list.size() ); + sensor_type sysinv_sensor ; + + /* Load the list of sensors for this host */ + for ( iter_curr_ptr = sensor_list.begin(); + iter_curr_ptr != sensor_list.end() ; + ++iter_curr_ptr ) + { + hwmonSensor_init ( hn, &sysinv_sensor ); + rc = hwmonJson_load_sensor ( hn, iter_curr_ptr->data(), sysinv_sensor ); + if ( rc == PASS ) + { + blog2 ("%s '%s' sensor read (from sysinv)\n", hn.c_str(), sysinv_sensor.sensorname.c_str() ); + rc = get_hwmonHostClass_ptr()->add_sensor ( hn, sysinv_sensor ); + if ( rc == PASS ) + { + blog2 ("%s '%s' sensor added (to hwmon)\n", + hn.c_str(), sysinv_sensor.sensorname.c_str()); + } + else + { + elog ("%s '%s' add sensor failed (to hwmon)\n", + hn.c_str(), + sysinv_sensor.sensorname.c_str()); + event.status = rc = FAIL ; + break ; + } + } + else + { + elog ("%s failed parsing sensor record (from sysinv)\n", hn.c_str()); + wlog ("%s ... Raw Sensor Record: \n%s\n", + event.log_prefix.c_str(), + iter_curr_ptr->c_str()); + event.status = rc = FAIL_JSON_PARSE ; + break ; + } + } /* for loop */ + } + else + { + elog ("%s json sensor list parse error (from sysinv)\n", hn.c_str() ); + wlog ("%s ... Raw Sensor List: \n%s\n", + event.log_prefix.c_str(), + event.response.c_str()); + event.status = rc = FAIL_JSON_OBJECT ; + } + } + else + { + elog ("%s handler called with existing error (status:%d) (sensor)\n", + hn.c_str(), event.status ); + } + } + + else if ( event.request == SYSINV_SENSOR_GROUP_SENSORS ) + { + hlog ("%s Sensor Group Response: \n%s\n", event.log_prefix.c_str(), event.response.c_str()); + struct json_object * json_obj = json_tokener_parse((char*)event.response.data()); + if ( json_obj ) + { + /* update the event with the uuid sysinv generated */ + event.new_uuid = jsonUtil_get_key_value_string ( json_obj, "uuid" ); + if (json_obj) json_object_put(json_obj); + + if ( !event.uuid.compare(event.new_uuid)) + { + // log ("%s sensor grouping passed\n", event.log_prefix.c_str()); + event.status = rc = PASS ; + } + else + { + elog ("%s failed to sensor grouping - response group uuid mismatch\n", event.log_prefix.c_str()); + event.status = rc = FAIL_INVALID_UUID ; + } + } + else + { + elog ("%s failed to sensor grouping - cannot tokenize response\n", event.log_prefix.c_str()); + event.status = rc = FAIL_JSON_OBJECT ; + } + } + /* Handle the sysinv resposne to a sensor group add request */ + else if ( event.request == SYSINV_SENSOR_DEL ) + { + hlog ("%s '%s' sensor deleted uuid:%s\n", + event.hostname.c_str(), + event.key.c_str(), + event.value.c_str()); + } + + /* Handle the sysinv resposne to a sensor group add request */ + else if ( event.request == SYSINV_SENSOR_DEL_GROUP ) + { + hlog ("%s '%s' sensor group deleted uuid:%s\n", + event.hostname.c_str(), + event.key.c_str(), + event.value.c_str()); + } + + /* Handle the sysinv resposne to a sensor group add request */ + else if ( event.request == SYSINV_SENSOR_ADD ) + { + hlog ("%s Add Sensor Response: \n%s\n", event.log_prefix.c_str(), event.response.c_str()); + struct json_object *json_obj = json_tokener_parse((char*)event.response.data()); + if ( json_obj ) + { + /* update the event with the uuid sysinv generated */ + event.new_uuid = jsonUtil_get_key_value_string ( json_obj, "uuid" ); + if (json_obj) json_object_put(json_obj); + + event.status = rc = PASS ; + } + else + { + elog ("%s failed to add sensor - cannot tokenize response\n", event.log_prefix.c_str()); + event.status = rc = FAIL_JSON_OBJECT ; + } + } + + /* Handle the sysinv resposne to a sensor group add request */ + else if ( event.request == SYSINV_SENSOR_ADD_GROUP ) + { + hlog ("%s Add Group Response: \n%s\n", event.log_prefix.c_str(), event.response.c_str()); + + struct json_object *json_obj = json_tokener_parse((char*)event.response.data()); + if ( json_obj ) + { + /* update the event with the uuid sysinv generated */ + event.new_uuid = jsonUtil_get_key_value_string ( json_obj, "uuid" ); + if (json_obj) json_object_put(json_obj); + + event.status = rc = PASS ; + } + else + { + elog ("%s failed to add group - cannot tokenize response\n", event.log_prefix.c_str()); + event.status = rc = FAIL_JSON_OBJECT ; + } + } + + /* Handle the sysinv resposne to a sensor group add request */ + else if ( event.request == SYSINV_SENSOR_LOAD_GROUPS ) + { + if ( event.status == PASS ) + { + std::list group_list ; + group_list.clear(); + + rc = jsonUtil_get_list ( (char*)event.response.data(), SYSINV_ISENSORGROUPS_LABEL, group_list ); + if ( rc == PASS ) + { + hlog ("%s has %ld sensor groups in the database\n", hn.c_str(), group_list.size() ); + + struct sensor_group_type sysinv_group ; + + /* Load the list of sensors for this host */ + for ( iter_curr_ptr = group_list.begin(); + iter_curr_ptr != group_list.end() ; + ++iter_curr_ptr ) + { + sysinv_group.timer.tid = NULL ; + hwmonGroup_init ( hn, &sysinv_group ); + rc = hwmonJson_load_group ( hn, iter_curr_ptr->data(), sysinv_group ); + if ( rc == PASS ) + { + blog ("%s '%s' sensor group read (from sysinv) [uuid:%s]\n", + hn.c_str(), + sysinv_group.group_name.c_str(), + sysinv_group.group_uuid.c_str()); + rc = get_hwmonHostClass_ptr()->hwmon_add_group ( hn, sysinv_group ); + if ( rc == PASS ) + { + blog ("%s '%s' sensor group added (to hwmon)\n", + hn.c_str(), sysinv_group.group_name.c_str()); + + if ( daemon_get_cfg_ptr()->debug_bmgmt > 1 ) + { + hwmonGroup_print ( hn, &sysinv_group ); + } + } + else + { + elog ("%s '%s' sensor group add failed (to hwmon)\n", + hn.c_str(), sysinv_group.group_name.c_str()); + /* Don't fail the command */ + // event.status = rc = FAIL ; + // break ; + } + } + else + { + elog ("%s failed parsing sensor group record (from sysinv)\n", hn.c_str()); + wlog ("%s ... Raw Group Record: \n%s\n", + event.log_prefix.c_str(), + iter_curr_ptr->c_str()); + event.status = rc = FAIL_JSON_PARSE ; + break ; + } + } /* for loop */ + } + else + { + elog ("%s json sensor group list parse error (from sysinv)\n", hn.c_str() ); + wlog ("%s ... Raw Group List: \n%s\n", + event.log_prefix.c_str(), + event.response.c_str()); + event.status = rc = FAIL_JSON_OBJECT ; + } + } + else + { + elog ("%s handler called with existing error (status:%d) (group) \n", + hn.c_str(), event.status ); + } + } + if ( rc || event.status ) + { + httpUtil_log_event ( &event ); + } + return ( rc ? rc : event.status ); +} + + +/*************************************************************************************/ +/***************** S Y S T E M - I N V E N T O R Y - A P I *******************/ +/*************************************************************************************/ + +/* fetches an authorization token as a blocking request */ + +/* Load all the sensors for this host from the sysinv database + * ------------------------------------------------------------ + { "isensors": + [ + {"actions_minor": null, + "uuid": "fa9bdd6b-1738-4409-8d85-843510f726e8", + "algorithm": null, + "updated_at": null, + "datatype": "discrete", + "suppress": null, + "created_at": "2015-08-28T18:49:39.799038+00:00", + "sensorgroup_uuid": null, + "capabilities": { }, + "actions_critical": null, + "sensortype": "temperature", + "state": null, + "host_uuid": "d219a108-959d-462d-9418-bb0ead921e3e", + "state_requested": null, + "path": null, + "audit_interval": null, + "actions_major": null, + "sensorname": "Inlet_Air" + "links": [ { "href": "http:\/\/192.168.204.2\/v1\/isensors\/fa9bdd6b-1738-4409-8d85-843510f726e8", "rel": "self" }, + { "href": "http:\/\/192.168.204.2\/isensors\/fa9bdd6b-1738-4409-8d85-843510f726e8", "rel": "bookmark" } ], + },. + { "t_critical_upper": null, + "actions_minor": null, + "sensorname": "5V Rail", + "links": [{"href": "http://192.168.204.2/v1/isensors/575d0b3a-14ea-412e-84ec-2f46af3e86d9", "rel": "self"}, + {"href": "http://192.168.204.2/isensors/575d0b3a-14ea-412e-84ec-2f46af3e86d9", "rel": "bookmark"}], + "updated_at": null, + "path": null, + "state_requested": null, + "t_major_lower": null, + "uuid": "575d0b3a-14ea-412e-84ec-2f46af3e86d9", + "t_minor_upper": null, + "capabilities": {}, + "actions_critical": null, + "state": null, + "sensorgroup_uuid": null, + "t_major_upper": null, + "actions_major": null, + "suppress": null, + "sensortype": "voltage", + "t_critical_lower": null, + "t_minor_lower": null, + "unit_rate": null, + "unit_modifier": null, + "host_uuid": "d219a108-959d-462d-9418-bb0ead921e3e", + "unit_base": null, + "algorithm": null, + "datatype": "analog", + "created_at": "2015-08-28T20:10:37.605930+00:00", + "audit_interval": null + } + ] + } +*/ + +/* Load all sensors for the specified host from the sysinv database */ +int hwmonHttp_load_sensors ( string & hostname, libEvent & event ) +{ + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr (); + + httpUtil_event_init ( &event, + hostname, + "hwmonHttp_load_sensors", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + + event.hostname = hostname ; + event.uuid = obj_ptr->hostBase.get_uuid (hostname); + event.user_agent = HWMON_USER_AGENT ; + + event.address.clear(); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_VERSION); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_HOST_LABEL); + event.address.append("/"); + event.address.append(event.uuid); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_LABEL); + + event.request = SYSINV_SENSOR_LOAD ; + event.type = EVHTTP_REQ_GET ; + event.timeout = HWMOND_HTTP_BLOCKING_TIMEOUT ; // HTTP_SYSINV_NONC_TIMEOUT ; + event.handler = &hwmonHttp_handler ; + event.operation = "load" ; + event.information = event.operation ; + event.blocking = true ; + event.noncritical = true ; + event.service = "sensors" ; + + event.payload.clear(); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__HTTP_LOAD_SENSORS, hostname )) + return (FAIL) ; +#endif + + return ( httpUtil_api_request ( event ) ); +} + +/****************************************************************************** + + # ###### ###### + # # # # # # + # # # # # # + # # # # # # + ####### # # # # + # # # # # # + # # ###### ###### + +******************************************************************************/ + +/***************************************************************************** + * + * Name : hwmonHttp_add_sensor + * + * Description: Add a sensor to the sysinv database + * + *****************************************************************************/ +int hwmonHttp_add_sensor ( string & hostname, + libEvent & event, + sensor_type & sensor ) +{ + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr (); + + httpUtil_event_init ( &event, + hostname, + "hwmonHttp_add_sensor", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + + event.hostname = hostname ; + event.uuid = obj_ptr->hostBase.get_uuid (hostname); + event.user_agent = HWMON_USER_AGENT ; + + event.address.clear(); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_VERSION); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_LABEL); + event.address.append("/"); + + event.request = SYSINV_SENSOR_ADD ; + event.type = EVHTTP_REQ_POST ; + event.timeout = HWMOND_HTTP_BLOCKING_TIMEOUT ; // HTTP_SYSINV_NONC_TIMEOUT ; + event.handler = &hwmonHttp_handler ; + event.operation = "add" ; + event.information = event.operation ; + event.blocking = true ; /* revert back to blocking */ + event.noncritical = true ; + event.service = "sensor" ; + + event.payload = "{" ; + event.payload.append ("\"sensortype\":\"") ; + event.payload.append (sensor.sensortype) ; + + event.payload.append ("\",\"datatype\":\""); + event.payload.append (sensor.datatype); + + event.payload.append ("\",\"sensorname\":\""); + event.payload.append (sensor.sensorname); + + event.payload.append ("\",\"host_uuid\":\"") ; + event.payload.append (event.uuid); + + event.payload.append ("\",\"path\":\"") ; + event.payload.append (sensor.path); + + event.payload.append ("\",\"state\":\"") ; + event.payload.append (sensor.state); + + event.payload.append ("\",\"status\":\"") ; + event.payload.append (sensor.status); + event.payload.append ("\""); + + if ( !sensor.datatype.compare("analog")) + { + event.payload.append (","); + + event.payload.append ("\"t_critical_lower\":\"") ; + event.payload.append (ftos(sensor.t_critical_lower,3)); + + event.payload.append ("\",\"t_major_lower\":\"") ; + event.payload.append (ftos(sensor.t_major_lower,3)); + + event.payload.append ("\",\"t_minor_lower\":\"") ; + event.payload.append (ftos(sensor.t_minor_lower,3)); + + event.payload.append ("\",\"t_minor_upper\":\"") ; + event.payload.append (ftos(sensor.t_minor_upper,3)); + + event.payload.append ("\",\"t_major_upper\":\"") ; + event.payload.append (ftos(sensor.t_major_upper,3)); + + event.payload.append ("\",\"t_critical_upper\":\"") ; + event.payload.append (ftos(sensor.t_critical_upper,3)); + + event.payload.append ("\",\"unit_base\":\"") ; + event.payload.append (sensor.unit_base); + + event.payload.append ("\",\"unit_rate\":\"") ; + event.payload.append (sensor.unit_rate); + + event.payload.append ("\",\"unit_modifier\":\"") ; + event.payload.append (sensor.unit_modifier); + event.payload.append ("\""); + + } + + event.payload.append (",\"actions_minor\":\"") ; + event.payload.append (sensor.actions_minor); + event.payload.append ("\",\"actions_major\":\"") ; + event.payload.append (sensor.actions_major); + event.payload.append ("\",\"actions_critical\":\"") ; + event.payload.append (sensor.actions_critl); + event.payload.append ("\",\"algorithm\":\"") ; + event.payload.append (sensor.algorithm); + + event.payload.append ("\",\"audit_interval\":") ; + event.payload.append ("0"); + + event.payload.append (",\"suppress\":\"") ; + if ( sensor.suppress == true ) + event.payload.append ("True\""); + else + event.payload.append ("False\""); + + event.payload.append ("}"); + + jlog ("%s Payload: %s\n", event.hostname.c_str(), event.payload.c_str()); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__HTTP_ADD_SENSOR, hostname , sensor.sensorname)) + return ( FAIL ) ; +#endif + + return ( httpUtil_api_request ( event )); +} + +/***************************************************************************** + * + * Name : hwmonHttp_del_sensor + * + * Description: Delete a sensor from the sysinv database + * + *****************************************************************************/ +int hwmonHttp_del_sensor ( string & hostname, + libEvent & event, + sensor_type & sensor ) +{ + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr (); + + httpUtil_event_init ( &event, + hostname, + "hwmonHttp_del_sensor", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + + event.hostname = hostname ; + event.uuid = obj_ptr->hostBase.get_uuid (hostname); + event.user_agent = HWMON_USER_AGENT ; + + event.address.clear(); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_VERSION); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_LABEL); + event.address.append("/"); + event.address.append(sensor.uuid); + + event.request = SYSINV_SENSOR_DEL ; + event.type = EVHTTP_REQ_DELETE ; + event.timeout = HWMOND_HTTP_BLOCKING_TIMEOUT ; // HTTP_SYSINV_NONC_TIMEOUT ; + event.handler = &hwmonHttp_handler ; + event.operation = "delete" ; + event.information = event.operation ; + event.blocking = true ; + event.noncritical = true ; + event.service = "sensor" ; + event.value = sensor.uuid ; + event.key = sensor.sensorname ; + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__HTTP_DEL_SENSOR, hostname, sensor.sensorname )) + return (FAIL) ; +#endif + + return ( httpUtil_api_request ( event )); +} + +/***************************************************************************** + * + * Name : hwmonHttp_add_group + * + * Description: Add a sensor group to the sysinv database + * + *****************************************************************************/ +int hwmonHttp_add_group ( string & hostname, + libEvent & event, + struct sensor_group_type & sensor_group ) +{ + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr (); + + httpUtil_event_init ( &event, + hostname, + "hwmonHttp_add_group", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + + event.hostname = hostname ; + event.uuid = obj_ptr->hostBase.get_uuid (hostname); + event.user_agent = HWMON_USER_AGENT ; + + event.address.clear(); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_VERSION); + event.address.append("/"); + event.address.append(SYSINV_ISENSORGROUPS_LABEL); + event.address.append("/"); + + event.request = SYSINV_SENSOR_ADD_GROUP ; + event.type = EVHTTP_REQ_POST ; + event.timeout = HWMOND_HTTP_BLOCKING_TIMEOUT ; // HTTP_SYSINV_NONC_TIMEOUT ; + event.handler = &hwmonHttp_handler ; + event.operation = "add" ; + event.information = event.operation ; + event.blocking = true ; /* revert back to blocking */ + event.noncritical = true ; + event.service = "group" ; + + event.payload = "{" ; + event.payload.append ("\"sensortype\":\"") ; + event.payload.append (sensor_group.sensortype) ; + + event.payload.append ("\",\"datatype\":\""); + event.payload.append (sensor_group.datatype); + + event.payload.append ("\",\"sensorgroupname\":\""); + event.payload.append (sensor_group.group_name); + + event.payload.append ("\",\"host_uuid\":\"") ; + event.payload.append (event.uuid); + + event.payload.append ("\",\"state\":\"") ; + event.payload.append (sensor_group.group_state); + + event.payload.append ("\",\"path\":\"") ; + event.payload.append (sensor_group.path); + + event.payload.append ("\",\"actions_critical_choices\":\"") ; + event.payload.append (sensor_group.actions_critical_choices); + + event.payload.append ("\",\"actions_major_choices\":\"") ; + event.payload.append (sensor_group.actions_major_choices); + + event.payload.append ("\",\"actions_minor_choices\":\"") ; + event.payload.append (sensor_group.actions_minor_choices); + event.payload.append ("\""); + + if ( !sensor_group.datatype.compare("analog")) + { + event.payload.append (","); + + event.payload.append ("\"t_critical_lower_group\":\"") ; + event.payload.append (ftos(sensor_group.t_critical_lower_group,3)); + + event.payload.append ("\",\"t_major_lower_group\":\"") ; + event.payload.append (ftos(sensor_group.t_major_lower_group,3)); + + event.payload.append ("\",\"t_minor_lower_group\":\"") ; + event.payload.append (ftos(sensor_group.t_minor_lower_group,3)); + + event.payload.append ("\",\"t_minor_upper_group\":\"") ; + event.payload.append (ftos(sensor_group.t_minor_upper_group,3)); + + event.payload.append ("\",\"t_major_upper_group\":\"") ; + event.payload.append (ftos(sensor_group.t_major_upper_group,3)); + + event.payload.append ("\",\"t_critical_upper_group\":\"") ; + event.payload.append (ftos(sensor_group.t_critical_upper_group,3)); + + event.payload.append ("\",\"unit_base_group\":\"") ; + event.payload.append (sensor_group.unit_base_group); + + event.payload.append ("\",\"unit_rate_group\":\"") ; + event.payload.append (sensor_group.unit_rate_group); + + event.payload.append ("\",\"unit_modifier_group\":\"") ; + event.payload.append (sensor_group.unit_modifier_group); + event.payload.append ("\""); + } + + event.payload.append (",\"actions_minor_group\":\"") ; + event.payload.append (sensor_group.actions_minor_group); + event.payload.append ("\",\"actions_major_group\":\"") ; + event.payload.append (sensor_group.actions_major_group); + event.payload.append ("\",\"actions_critical_group\":\"") ; + event.payload.append (sensor_group.actions_critl_group); + event.payload.append ("\",\"algorithm\":\"") ; + event.payload.append (sensor_group.algorithm); + + event.payload.append ("\",\"audit_interval_group\":") ; + event.payload.append (itos(sensor_group.group_interval)); + + event.payload.append (",\"suppress\":\"") ; + if ( sensor_group.suppress == true ) + event.payload.append ("True\""); + else + event.payload.append ("False\""); + + event.payload.append ("}"); + + jlog ("%s Payload: %s\n", event.hostname.c_str(), event.payload.c_str()); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__HTTP_ADD_GROUP, hostname, sensor_group.group_name )) + return ( FAIL ) ; +#endif + + return ( httpUtil_api_request ( event )); +} + +/***************************************************************************** + * + * Name : hwmonHttp_del_group + * + * Description: Delete a sensor group from the sysinv database + * + *****************************************************************************/ + +int hwmonHttp_del_group ( string & hostname, + libEvent & event, + struct sensor_group_type & sensor_group ) +{ + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr (); + + httpUtil_event_init ( &event, + hostname, + "hwmonHttp_del_group", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + + event.hostname = hostname ; + event.uuid = obj_ptr->hostBase.get_uuid (hostname); + event.user_agent = HWMON_USER_AGENT ; + + event.address.clear(); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_VERSION); + event.address.append("/"); + event.address.append(SYSINV_ISENSORGROUPS_LABEL); + event.address.append("/"); + event.address.append(sensor_group.group_uuid); + + event.request = SYSINV_SENSOR_DEL_GROUP ; + event.type = EVHTTP_REQ_DELETE ; + event.timeout = HWMOND_HTTP_BLOCKING_TIMEOUT ; // HTTP_SYSINV_NONC_TIMEOUT ; + event.handler = &hwmonHttp_handler ; + event.operation = "delete" ; + event.information = event.operation ; + event.blocking = true ; + event.noncritical = true ; + event.service = "group" ; + event.value = sensor_group.group_uuid ; + event.key = sensor_group.group_name ; + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__HTTP_DEL_GROUP, hostname, sensor_group.group_name )) + return (FAIL) ; +#endif + + return ( httpUtil_api_request ( event )); +} + +/****************************************************************************** + + # # ####### ###### ### ####### # # + ## ## # # # # # # # # + # # # # # # # # # # # # + # # # # # # # # ##### # + # # # # # # # # # + # # # # # # # # # + # # ####### ###### ### # # + +********************************************************************************/ + +/***************************************************************************** + * + * Name : hwmonHttp_mod_sensor + * + * Description: Modiy a field for the specified sensor (by sensor uuid) + * + *****************************************************************************/ +int hwmonHttp_mod_sensor ( string & hostname, + libEvent & event, + string & sensor_uuid, + string key, + string value ) +{ + if ( key.empty() ) + return (FAIL_STRING_EMPTY); + + httpUtil_event_init ( &event, + hostname, + "hwmonHttp_mod_sensor", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + + event.hostname = hostname ; + event.uuid = sensor_uuid ; + event.user_agent = HWMON_USER_AGENT ; + + event.address.clear(); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_VERSION); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_LABEL); + event.address.append("/"); + event.address.append(sensor_uuid); + + event.request = SYSINV_SENSOR_MOD ; + event.type = EVHTTP_REQ_PATCH ; + event.timeout = HWMOND_HTTP_BLOCKING_TIMEOUT ; // HTTP_SYSINV_NONC_TIMEOUT ; + event.handler = &hwmonHttp_handler ; + event.operation = "modify" ; + event.information = event.operation ; + event.blocking = false ; + event.noncritical = true ; + event.service = "sensor" ; + + event.payload = "[{" ; + event.payload.append ("\"path\":\"/") ; + event.payload.append (key) ; + event.payload.append ("\",\"value\":\""); + event.payload.append (value); + event.payload.append ("\",\"op\":\"replace\"}]"); + + jlog ("%s Payload: %s\n", event.hostname.c_str(), event.payload.c_str()); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__HTTP_MOD_SENSOR, hostname )) + return (FAIL) ; +#endif + + return ( httpUtil_api_request ( event )); +} + +/***************************************************************************** + * + * Name : hwmonHttp_mod_group + * + * Description: Modify a field for the specified group (by group uuid) + * + *****************************************************************************/ +int hwmonHttp_mod_group ( string & hostname, + libEvent & event, + string & group_uuid, + string key, + string value ) +{ + if ( key.empty() ) + return (FAIL_STRING_EMPTY); + + blog ("%s Group [%s] Modify [%s:%s]\n", hostname.c_str(), group_uuid.c_str(), key.c_str(), value.c_str() ); + + httpUtil_event_init ( &event, + hostname, + "hwmonHttp_mod_group", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + + event.hostname = hostname ; + event.uuid = group_uuid; + event.user_agent = HWMON_USER_AGENT ; + + event.address.clear(); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_VERSION); + event.address.append("/"); + event.address.append(SYSINV_ISENSORGROUPS_LABEL); + event.address.append("/"); + event.address.append(group_uuid); + + event.request = SYSINV_SENSOR_MOD_GROUP ; + event.type = EVHTTP_REQ_PATCH ; + event.timeout = HWMOND_HTTP_BLOCKING_TIMEOUT ; // HTTP_SYSINV_NONC_TIMEOUT ; + event.handler = &hwmonHttp_handler ; + event.operation = "modify" ; + event.information = event.operation ; + event.blocking = false ; + event.noncritical = true ; + event.service = "group" ; + + event.payload = "[{" ; + event.payload.append ("\"path\":\"/") ; + event.payload.append (key) ; + event.payload.append ("\",\"value\":\""); + event.payload.append (value); + event.payload.append ("\",\"op\":\"replace\"}]"); + + jlog ("%s Payload: %s\n", event.hostname.c_str(), event.payload.c_str()); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__HTTP_MOD_GROUP, hostname )) + return (FAIL) ; +#endif + + return ( httpUtil_api_request ( event )); +} + + +/***************************************************************************** + * + * Name : hwmonHttp_disable_sensor + * + * Description: Disable sensor state and set status to offline. (by sensor uuid) + * + *****************************************************************************/ +int hwmonHttp_disable_sensor ( string & hostname, + libEvent & event, + string & sensor_uuid ) +{ + httpUtil_event_init ( &event, + hostname, + "hwmonHttp_disable_sensor", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + + event.hostname = hostname ; + event.uuid = sensor_uuid ; + event.user_agent = HWMON_USER_AGENT ; + + event.address.clear(); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_VERSION); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_LABEL); + event.address.append("/"); + event.address.append(sensor_uuid); + + event.request = SYSINV_SENSOR_MOD ; + event.type = EVHTTP_REQ_PATCH ; + event.timeout = HWMOND_HTTP_BLOCKING_TIMEOUT ; // HTTP_SYSINV_NONC_TIMEOUT ; + event.handler = &hwmonHttp_handler ; + event.operation = "disable" ; + event.information = event.operation ; + event.blocking = false ; + event.noncritical = true ; + event.service = "sensor" ; + + event.payload = "[" ; + event.payload.append ("{\"path\":\"/state\",\"value\":\"disabled\",\"op\":\"replace\"},") ; + event.payload.append ("{\"path\":\"/status\",\"value\":\"offline\",\"op\":\"replace\"}]") ; + + jlog ("%s Payload: %s\n", event.hostname.c_str(), event.payload.c_str()); + + return ( httpUtil_api_request ( event )); +} + + +/** + * Send sysinv a GET request for all the sensor groups for this host. + * Then load them into this host's sensor group list. + **/ +int hwmonHttp_load_groups ( string & hostname, libEvent & event ) +{ + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr (); + + httpUtil_event_init ( &event, + hostname, + "hwmonHttp_load_groups", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + + event.hostname = hostname ; + event.uuid = obj_ptr->hostBase.get_uuid (hostname); + event.user_agent = HWMON_USER_AGENT ; + + event.address.clear(); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_VERSION); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_HOST_LABEL); + event.address.append("/"); + event.address.append(event.uuid); + event.address.append("/"); + event.address.append(SYSINV_ISENSORGROUPS_LABEL); + + event.request = SYSINV_SENSOR_LOAD_GROUPS ; + event.type = EVHTTP_REQ_GET ; + event.timeout = HWMOND_HTTP_BLOCKING_TIMEOUT ; // HTTP_SYSINV_NONC_TIMEOUT ; + event.handler = &hwmonHttp_handler ; + event.operation = "load" ; + event.information = event.operation ; + event.blocking = true ; + event.noncritical = true ; + event.service = "groups" ; + + event.payload.clear(); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__HTTP_LOAD_GROUPS, hostname )) + return (FAIL) ; +#endif + + return (httpUtil_api_request ( event )); +} + + + + +/* Add a sensor to the sysinv database */ +int hwmonHttp_group_sensors ( string & hostname, libEvent & event, string & group_uuid, string & sensor_list ) +{ + /** If there are no sensors to group then just return a PASS + * + * TODO: Maybe remove this or simply FAIL it after integration + * so that we know we are trying to group NO sensors + **/ + if ( sensor_list.empty() ) + return (PASS); + + httpUtil_event_init ( &event, + hostname, + "hwmonHttp_group_sensors", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + + event.hostname = hostname ; + event.uuid = group_uuid; + event.user_agent = HWMON_USER_AGENT ; + + event.address.clear(); + event.address.append("/"); + event.address.append(SYSINV_ISENSOR_VERSION); + event.address.append("/"); + event.address.append(SYSINV_ISENSORGROUPS_LABEL); + event.address.append("/"); + event.address.append(group_uuid); + + event.request = SYSINV_SENSOR_GROUP_SENSORS ; + event.type = EVHTTP_REQ_PATCH ; + event.timeout = HTTP_SYSINV_NONC_TIMEOUT ; + event.handler = &hwmonHttp_handler ; + event.operation = "grouping" ; + event.information = event.operation ; + event.blocking = false ; // true ; + event.noncritical = true ; + event.service = "sensor" ; + + event.payload = "[{" ; + event.payload.append ("\"path\":\"/sensors\"") ; + event.payload.append (",\"value\":\""); + event.payload.append (sensor_list); + event.payload.append ("\",\"op\":\"replace\"}]"); + + jlog ("%s Payload: %s\n", event.hostname.c_str(), event.payload.c_str()); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__HTTP_GROUP_SENSORS, hostname )) + return (FAIL) ; +#endif + + return ( httpUtil_api_request ( event )); +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHttp.h b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHttp.h new file mode 100644 index 00000000..cc567f82 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHttp.h @@ -0,0 +1,93 @@ +#ifndef __INCLUDE_HWMONHTTP_H__ +#define __INCLUDE_HWMONHTTP_H__ + +/** ************************************************************************ + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + * + * ************************************************************************/ + +#include "hwmon.h" +#include "httpUtil.h" /* for ... libEvent */ + +// Sensor Patch label: "/v1/isensors/" +// Sensor Add label: "/v1/isensors/" +// Sensor Load label: "/v1/ihosts//isensors" + + +#define SYSINV_ISENSOR_VERSION "v1" +#define SYSINV_ISENSORGROUPS_LABEL "isensorgroups" +#define SYSINV_ISENSOR_LABEL "isensors" +#define SYSINV_ISENSOR_HOST_LABEL "ihosts" +#define HWMON_USER_AGENT "hwmon/1.0" + +/* This is 2 seconds shorter than the OCF script audit timeout at 10 */ +#define HWMOND_HTTP_BLOCKING_TIMEOUT (8) + + +typedef struct +{ + struct sockaddr_in addr ; + struct event_base * base ; + struct evhttp_request * req ; + struct evhttp * httpd ; + int fd ; + int port ; +} event_type ; + + + /** ********************************************************************** + * @file + * Wind River Titanium Cloud's Hardware Monitor HTTP Server/Client Header + * + * This file contains + * + * 1. the HTTP Client API for + * - configuring sysinv with sensor data records as well as modifying + * and querying those records. + * - pushing sensor samples to ceilometer + * + * 2. the HTTP Server handler that services sysinv sensor configuration + * change notifications. + * + * ***********************************************************************/ +int hwmonHttp_server_init ( int event_port ); +void hwmonHttp_server_look ( void ); +void hwmonHttp_server_fini ( void ); + +int hwmonHttp_mod_sensor ( string & hostname, libEvent & event, string & sensor_uuid, string key , string value ); +/* 'PATCH /v1/isensors/2bc0ac2c-d0f1-4cbe-9eac-ce40dd18d4c3 HTTP/1.1\r\nHost: 192.168.204.2:6385\r\nContent-Length: 122\r\nuser-agent: Python-httplib2/0.9.1 (gzip)\r\ncontent-type: application/json\r\naccept-encoding: gzip, deflate\r\naccept: application/json\r\nx-auth-token: aee3002723074af8adacd8687e1f639f\r\n\r\n[{"path": "/state", "value": "enabled", "op": "replace"}, {"path": "/suppress", "value": "force_action", "op": "replace"}]' +reply: 'HTTP/1.0 200 OK\r\n' +header: Date: Sun, 13 Sep 2015 17:42:37 GMT +header: Server: WSGIServer/0.1 Python/2.7.3 +header: Content-Length: 1070 +header: Content-Type: application/json; charset=UTF-8 +DEBUG (http:163) RESP: {"t_critical_upper": null, "actions_minor": "ignore", "sensorname": "Volt_P12V", "links": [{"href": "http://192.168.204.2:6385/v1/isensors/2bc0ac2c-d0f1-4cbe-9eac-ce40dd18d4c3", "rel": "self"}, {"href": "http://192.168.204.2:6385/isensors/2bc0ac2c-d0f1-4cbe-9eac-ce40dd18d4c3", "rel": "bookmark"}], "updated_at": "2015-09-13T13:32:59.786966+00:00", "path": "/etc/bmc/server_profiles.d/sensor_quanta_v1_ilo_v4.profile", "state_requested": null, "t_major_lower": null, "uuid": "2bc0ac2c-d0f1-4cbe-9eac-ce40dd18d4c3", "t_minor_upper": null, "capabilities": {}, "actions_critical": "alarm", "state": "enabled", "sensorgroup_uuid": "8da729d7-168c-4f81-9616-d420b9e4d1e6", "t_major_upper": null, "actions_major": "log", "status": "offline", "suppress": "False", "sensortype": "voltage", "t_critical_lower": null, "t_minor_lower": null, "unit_rate": null, "unit_modifier": null, "host_uuid": "44a462f0-56d2-47c7-a3e6-30f60df54e6c", "unit_base": null, "algorithm": "debounce-1.v1", "datatype": "discrete", "created_at": "2015-09-13T13:32:54.941199+00:00", "audit_interval": 300} +*/ + +int hwmonHttp_disable_sensor ( string & hostname, libEvent & event, string & sensor_uuid ); + +int hwmonHttp_add_sensor ( string & hostname, libEvent & event, sensor_type & sensor ); +int hwmonHttp_del_sensor ( string & hostname, libEvent & event, sensor_type & sensor ); +int hwmonHttp_load_sensors ( string & hostname, libEvent & event ); + +int hwmonHttp_mod_group ( string & hostname, libEvent & event, string & group_uuid, string key , string value ); + +/* 'PATCH /v1/isensorgroups/8da729d7-168c-4f81-9616-d420b9e4d1e6 HTTP/1.1\r\nHost: 192.168.204.2:6385\r\nContent-Length: 122\r\nuser-agent: Python-httplib2/0.9.1 (gzip)\r\ncontent-type: application/json\r\naccept-encoding: gzip, deflate\r\naccept: application/json\r\nx-auth-token: 6f8981354dee423eb45fd882f386a377\r\n\r\n[{"path": "/state", "value": "enabled", "op": "replace"}, {"path": "/suppress", "value": "force_action", "op": "replace"}]' +reply: 'HTTP/1.0 200 OK\r\n' +header: Date: Sun, 13 Sep 2015 17:48:35 GMT +header: Server: WSGIServer/0.1 Python/2.7.3 +header: Content-Length: 1321 +header: Content-Type: application/json; charset=UTF-8 +DEBUG (http:163) RESP: {"audit_interval_group": 30, "links": [{"href": "http://192.168.204.2:6385/v1/isensorgroups/8da729d7-168c-4f81-9616-d420b9e4d1e6", "rel": "self"}, {"href": "http://192.168.204.2:6385/isensorgroups/8da729d7-168c-4f81-9616-d420b9e4d1e6", "rel": "bookmark"}], "t_critical_upper_group": null, "updated_at": null, "isensors": [{"href": "http://192.168.204.2:6385/v1/isensorgroups/8da729d7-168c-4f81-9616-d420b9e4d1e6/isensors", "rel": "self"}, {"href": "http://192.168.204.2:6385/isensorgroups/8da729d7-168c-4f81-9616-d420b9e4d1e6/isensors", "rel": "bookmark"}], "t_critical_lower_group": null, "t_minor_upper_group": null, "t_minor_lower_group": null, "uuid": "8da729d7-168c-4f81-9616-d420b9e4d1e6", "unit_modifier_group": null, "capabilities": {}, "state": "enabled", "unit_rate_group": null, "actions_major_group": "log", "suppress": "False", "actions_minor_group": "ignore", "sensorgroupname": "server voltage", "path": "show /SYS/voltage", "sensors": null, "actions_critical_choices": "alarm,ignore,log,reset,powercycle", "actions_major_choices": "alarm,ignore,log", "actions_minor_choices": "ignore,log,alarm",, "host_uuid": "44a462f0-56d2-47c7-a3e6-30f60df54e6c", "t_major_lower_group": null, "unit_base_group": null, "sensortype": "voltage", "algorithm": "debounce-1.v1", "datatype": "discrete", "possible_states": null, "created_at": "2015-09-13T13:32:54.517511+00:00", "actions_critical_group": "alarm", "t_major_upper_group": null} +*/ + +int hwmonHttp_add_group ( string & hostname, libEvent & event, struct sensor_group_type & sensor_group ); +int hwmonHttp_del_group ( string & hostname, libEvent & event, struct sensor_group_type & sensor_group ); +int hwmonHttp_load_groups ( string & hostname, libEvent & event ); +int +hwmonHttp_group_sensors( string & hostname, libEvent & event, string & group_uuid, string & sensor_list ); + +#endif /* __INCLUDE_HWMONHTTP_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHttpClient.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHttpClient.cpp new file mode 100644 index 00000000..e69de29b diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonInit.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonInit.cpp new file mode 100644 index 00000000..4e57ff94 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonInit.cpp @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2015-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Hardware Monitor Service Header + */ + + +#include "hwmon.h" /* for ... service module header */ +#include "httpUtil.h" /* for ... httpUtil_init */ +#include "tokenUtil.h" /* for ... tokenUtil_new_token */ +#include "threadUtil.h" /* for ... common pthread support */ +#include "hwmonClass.h" /* for ... get_hwmonHostClass_ptr */ +#include "hwmonHttp.h" /* for ... hwmonHttp_server_fini */ +#include "tokenUtil.h" /* for ... keystone_config_handler */ + +/* Process Monitor Control Structure */ +static hwmon_ctrl_type hwmon_ctrl ; +hwmon_ctrl_type * get_ctrl_ptr ( void ) { return(&hwmon_ctrl) ; } + +/** Daemon Configuration Structure + * - Allocation and get pointer + * @see daemon_common.h for daemon_config_type struct format. */ +static daemon_config_type hwmon_config ; +daemon_config_type * daemon_get_cfg_ptr () { return &hwmon_config ; } + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + hwmonHttp_server_fini (); + + threadUtil_fini () ; + + hwmon_msg_fini (); + hwmon_hdlr_fini ( &hwmon_ctrl ); + + daemon_files_fini (); + daemon_dump_info (); + + exit (0); +} + + +/* Startup config read */ +static int hwmon_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("config", "audit_period")) + { + config_ptr->audit_period = atoi(value); + config_ptr->mask |= CONFIG_AUDIT_PERIOD ; + ilog("Audit Period : %d secs\n", config_ptr->audit_period ); + hwmon_ctrl.audit_period = hwmon_config.audit_period ; + } + else if (MATCH("config", "event_port")) + { + config_ptr->event_port = atoi(value); + config_ptr->mask |= CONFIG_EVENT_PORT ; + ilog("Mtce Event Port : %d (rx)\n", config_ptr->event_port ); + } + else if (MATCH("config", "inv_event_port")) + { + config_ptr->inv_event_port = atoi(value); + config_ptr->mask |= CONFIG_INV_EVENT_PORT ; + ilog("SysInv Event Port : %d (rx)\n", config_ptr->inv_event_port ); + } + else if (MATCH("config", "cmd_port")) + { + config_ptr->cmd_port = atoi(value); + config_ptr->mask |= CONFIG_CMD_PORT ; + ilog("Mtce Command Port : %d (rx)\n", config_ptr->cmd_port ); + } + return (PASS); +} + +/* mtc.ini config file read - for the keystone url */ +static int mtc_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("agent", "keystone_port")) + { + config_ptr->keystone_port = atoi(value); + config_ptr->mask |= CONFIG_KEYSTONE_PORT ; + } + else if (MATCH("agent", "token_refresh_rate")) + { + config_ptr->token_refresh_rate = atoi(value); + config_ptr->mask |= CONFIG_TOKEN_REFRESH ; + } + else if (MATCH("client", "daemon_log_port")) + { + config_ptr->daemon_log_port = atoi(value); + ilog("mtclogd port: %d (tx)\n", config_ptr->daemon_log_port ); + } + + return (PASS); +} + +/***************************************************************************** + * + * Name : daemon_configure + * + * Purpose : Read process config file settings into the daemon configuration + * + *****************************************************************************/ +int daemon_configure ( void ) +{ + bool waiting_msg = false ; + + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr(); + + /* Read the ini and config files but start off with a cleared flag mask */ + hwmon_config.mask = 0 ; + + get_debug_options ( HWMON_CONF_FILE, &hwmon_config ); + + if (ini_parse(MTCE_CONF_FILE, mtc_config_handler, &hwmon_config) < 0) + { + elog ("Can't load '%s'\n", MTCE_CONF_FILE ); + return (FAIL_LOAD_INI); + } + + if (ini_parse(MTCE_INI_FILE, keystone_config_handler, &hwmon_config) < 0) + { + elog ("Can't load '%s'\n", MTCE_INI_FILE ); + return (FAIL_LOAD_INI); + } + + if (ini_parse( HWMON_CONF_FILE, hwmon_config_handler, &hwmon_config) < 0) + { + elog("Can't load '%s'\n", HWMON_CONF_FILE ); + return ( FAIL_LOAD_INI ); + } + + if (ini_parse(SYSINV_CFG_FILE, sysinv_config_handler, &hwmon_config) < 0) + { + elog ("Can't load '%s'\n", SYSINV_CFG_FILE ); + return (FAIL_LOAD_INI); + } + + /* tell the host service that there has been a config reload */ + obj_ptr->config_reload = true ; + + /* Verify loaded config against an expected mask + * as an ini file fault detection method */ + if ( hwmon_config.mask != CONFIG_MASK ) + { + elog ("Daemon ini configuration failed (0x%x)\n", + ((-1 ^ hwmon_config.mask) & CONFIG_MASK)); + return(FAIL_INI_CONFIG) ; + } + + /* No bigger than every 8 hours - that's all that has been tested */ + if ( hwmon_config.token_refresh_rate > MTC_HRS_8 ) + { + wlog ("Token refresh rate rounded down to 8 hour maximum\n"); + hwmon_config.token_refresh_rate = MTC_HRS_8 ; + } + + /* This ensures any link aggregation interface overrides the physical */ + hwmon_config.mgmnt_iface = + daemon_get_iface_master ( hwmon_config.mgmnt_iface ); + ilog("Mgmnt Iface : %s\n", hwmon_config.mgmnt_iface ); + + get_iface_macaddr ( hwmon_config.mgmnt_iface, hwmon_ctrl.my_macaddr ); + // get_iface_address ( hwmon_config.mgmnt_iface, hwmon_ctrl.my_address ); + + do + { + get_ip_addresses ( hwmon_ctrl.my_hostname, hwmon_ctrl.my_local_ip , hwmon_ctrl.my_float_ip ); + if ( hwmon_ctrl.my_float_ip.empty() ) + { + if ( waiting_msg == false ) + { + ilog ("Waiting on ip address config ...\n"); + waiting_msg = true ; + } + daemon_signal_hdlr (); + + mtcWait_secs (2); + } + } while ( hwmon_ctrl.my_float_ip.empty() ); + + + /* remove any existing fit */ + daemon_init_fit (); + + return (PASS); +} + + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Setup the daemon messaging interfaces/sockets */ +int socket_init ( void ) +{ + int rc ; + hwmon_msg_init ( ); + + /* Setup the hwmon event port. This is the port + * that hwmon sends events to maintenance on */ + rc = event_tx_port_init ( hwmon_config.event_port, + hwmon_config.mgmnt_iface); + + if ( rc == PASS ) + { + /* ... and now the command receive port */ + rc = cmd_rx_port_init ( hwmon_config.cmd_port ); + if ( rc == PASS ) + { + /* setup http server to receive sensor model change requests + * from system inventory */ + rc = hwmonHttp_server_init ( hwmon_config.inv_event_port ); + + /* Don't fail the daemon if the logger port is not working */ + mtclogd_tx_port_init (); + } + } + return (rc); +} + +/* The main heartbeat service loop */ +int daemon_init ( string iface, string nodetype ) +{ + int rc = PASS ; + + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr(); + + /* Not used by this daemon */ + UNUSED(nodetype) ; + + hwmon_hdlr_init ( &hwmon_ctrl ); + hwmon_stages_init (); + httpUtil_init (); + + /* init the control struct */ + hwmon_ctrl.my_hostname = "" ; + hwmon_ctrl.my_macaddr = "" ; + hwmon_ctrl.my_local_ip = "" ; + hwmon_ctrl.my_float_ip = "" ; + + /* Assign interface to config */ + hwmon_config.mgmnt_iface = (char*)iface.data() ; + + if ( daemon_files_init ( ) != PASS ) + { + elog ("Pid, log or other files could not be opened\n"); + return ( FAIL_FILES_INIT ) ; + } + + obj_ptr->system_type = daemon_system_type (); + + threadUtil_init ( hwmonTimer_handler ) ; + + /* Bind signal handlers */ + if ( daemon_signal_init () != PASS ) + { + elog ("daemon_signal_init failed\n"); + rc = FAIL_SIGNAL_INIT ; + } + + /* Configure the daemon */ + else if ( (rc = daemon_configure ( )) != PASS ) + { + elog ("Daemon service configuration failed (rc:%i)\n", rc ); + rc = FAIL_DAEMON_CONFIG ; + } + + /* Setup the messaging sockets */ + else if ( (rc = socket_init ( )) != PASS ) + { + elog ("socket initialization failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT ; + } + + threadUtil_init ( hwmonTimer_handler ) ; + ilog ("BMC Acc Mode: %s\n", "ipmi/ipmitool" ); + + /* override the config reload for the startup case */ + obj_ptr->config_reload = false ; + + /* Init the hwmon service timers */ + hwmon_timer_init (); + + daemon_make_dir(IPMITOOL_OUTPUT_DIR) ; + +#ifdef WANT_FIT_TESTING + daemon_make_dir(FIT__INFO_FILEPATH); +#endif + + return (rc); +} + + +/* **************************************************** + * Start the service + * **************************************************** + * + * 1. Wait for config_complete + * 2. Wait for GOEnable Ready + * 3. Send Service Ready + * + * Note: Service must be started by Mtce command ; + * Ready does not imply auto start like PMON + * + * ****************************************************/ +void daemon_service_run ( void ) +{ + int rc = PASS ; + int count = 0 ; + + /* Wait for config complete indicated by presence + * of /etc/platform/.initial_config_complete */ + struct stat p ; + memset ( &p, 0 , sizeof(struct stat)); + do + { + stat (CONFIG_COMPLETE_FILE, &p); + mtcWait_secs (2); + ilog_throttled ( count, 60, "Waiting for %s\n", CONFIG_COMPLETE_FILE); + + /* The CONFIG_COMPLETE file may be empty so don't look at size, + * look at the node and dev ids as non-zero instead */ + } while ((p.st_ino == 0 ) || (p.st_dev == 0)) ; + + count = 0 ; + + /* Waiting for goenabled signal indicated by the presence of + * the GOENABLED_MAIN_PASS and then send HWMOND READY message */ + memset ( &p, 0 , sizeof(struct stat)); + for ( ; ; ) + { + stat ( GOENABLED_MAIN_READY, &p ) ; + if ( p.st_size ) + { + ilog ("Transmitting: Monitor READY Event\n" ); + do + { + rc = hwmon_send_event ( hwmon_ctrl.my_hostname, MTC_EVENT_MONITOR_READY, "hwmond" ); + if ( rc == RETRY ) + { + mtcWait_secs ( 2 ); + } + if ( rc == FAIL ) + { + elog ("Failed to Send READY event (rc=%d)\n", rc ); + elog ("Trying to provide service anyway\n"); + } + } while ( rc == RETRY ) ; + break ; + } + else + { + wlog_throttled ( count, 60, "Waiting for 'goenabled' signal ...\n"); + mtcWait_secs (2); + } + } + + /* Get an Authentication Token */ + ilog ("%s Requesting initial token\n", hwmon_ctrl.my_hostname.c_str() ); + do + { + // rc = mtcInv.mtcKeyApi_get_token ( mtcInv.my_hostname ); + rc = tokenUtil_new_token ( hwmon_ctrl.httpEvent, hwmon_ctrl.my_hostname ); + if ( rc ) + { + elog ("Failed to get authentication token (%d)\n", rc ); + sleep (2); + } + if ( hwmon_ctrl.httpEvent.base ) + { + slog ("%s token base:%p\n", + hwmon_ctrl.my_hostname.c_str(), + hwmon_ctrl.httpEvent.base ); + } + + } while ( rc != PASS ) ; + + if ( daemon_want_fit ( FIT_CODE__HWMON__CORRUPT_TOKEN )) + { + tokenUtil_fail_token (); + } + + /* enable the base level signal handler latency monitor */ + daemon_latency_monitor (true); + + /* get activity state */ + hwmon_ctrl.active = daemon_get_run_option ("active") ; + hwmon_service ( &hwmon_ctrl ); + daemon_exit (); +} + + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + daemon_dump_membuf_banner (); + + get_hwmonHostClass_ptr()->memDumpAllState (); + + daemon_dump_membuf (); /* write mem_logs to log file and clear log list */ +} + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + ilog ("Empty test head.\n"); + return (PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonIpmi.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonIpmi.cpp new file mode 100644 index 00000000..1a4a9536 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonIpmi.cpp @@ -0,0 +1,775 @@ +/* + * Copyright (c) 2016-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + * + * + * @file + * Wind River Titanium Cloud Hardware Monitor IPMI Sensor Utilities + */ + +#include +#include "json-c/json.h" + +#include "daemon_ini.h" /* for ... parse_ini and MATCH */ +#include "nodeBase.h" /* for ... mtce common definitions */ +#include "nodeUtil.h" /* for ... mtce common utilities */ +#include "jsonUtil.h" /* for ... json string parse utilities */ +#include "hwmonUtil.h" /* for ... get_severity */ +#include "hwmonIpmi.h" /* for ... this module header */ +#include "hwmonHttp.h" /* for ... hwmonHttp_mod_sensor */ +#include "hwmonClass.h" /* for ... service class definition */ +#include "hwmonGroup.h" /* for ... ipmi_get_groupenum */ +#include "hwmonThreads.h" /* for ... IPMITOOL_JSON__SENSORS_LABEL */ + +#ifdef WANT_CORR_STATUS +#define CORR_STATUS_MAX (6) +static const char *_ipmi_status_desc[] = +{ + "ok", // all good - ok + "nc", // Non-Critical + "cr", // Critical + "nr", // Non-Recoverable + "ns", // Not Specified + "na", // Not Applicable +}; +#endif + +#ifdef WANT_CORR_EXTENDED_STATUS +#define CORR_EXTENDED_STATUS_MAX (6) +static const char *_ipmi_status_extended_desc[] = +{ + "lnr", // Lower Non-Recoverable + "unr", // Upper Non-Recoverable + "ucr", // Upper Critical + "lcr", // Lower Critical + "unc", // Upper Non-Critical + "lnc", // Lower Non-Critical +}; +#endif + +/***************************************************************************** + * + * Name : sensor_data_init + * + * Descrfiption: Initialize an ipmi sample data structure + * + *****************************************************************************/ + +void sensor_data_init ( sensor_data_type & data ) +{ + data.name.clear(); + data.status.clear(); + data.value.clear(); + data.unit.clear(); + data.lnr.clear(); + data.lcr.clear(); + data.lnc.clear(); + data.unr.clear(); + data.ucr.clear(); + data.unc.clear(); + data.group_enum = HWMON_CANNED_GROUP__NULL ; + data.found=false ; + data.ignore=false ; +} + +/***************************************************************************** + * + * Name : sensor_data_print + * + * Descrfiption: Print an ipmi sample data structure + * + *****************************************************************************/ + +void sensor_data_print ( const sensor_data_type & data ) +{ + blog3 ("%s is %s : %s (%s) %s %s %s %s %s %s %s\n", + data.name.c_str(), + data.status.c_str(), + data.value.c_str(), + data.unit.c_str(), + data.lnr.c_str(), + data.lcr.c_str(), + data.lnc.c_str(), + data.unr.c_str(), + data.ucr.c_str(), + data.unc.c_str(), + data.ignore ? "ignored" : "" ); +} + +/**************************************************************************** + * + * Name : ipmi_load_json_sensor + * + * Purpose : Load a json formatted sensor data string into the specifie + * sensor data element + * + *****************************************************************************/ + +int ipmi_load_json_sensor ( sensor_data_type & sensor_data , string json_sensor_data ) +{ + int rc = FAIL_KEY_VALUE_PARSE ; + // ilog ("sensor data:%s\n", json_sensor_data.c_str() ); + + struct json_object *raw_obj = json_tokener_parse( (char*)json_sensor_data.data()); + if ( raw_obj ) + { + sensor_data.name = jsonUtil_get_key_value_string ( raw_obj, "n" ) ; + sensor_data.value = jsonUtil_get_key_value_string ( raw_obj, "v" ) ; + sensor_data.unit = jsonUtil_get_key_value_string ( raw_obj, "u" ) ; + sensor_data.status = jsonUtil_get_key_value_string ( raw_obj, "s" ) ; + sensor_data.lnr = jsonUtil_get_key_value_string ( raw_obj, "lnr" ) ; + sensor_data.lcr = jsonUtil_get_key_value_string ( raw_obj, "lcr" ) ; + sensor_data.lnc = jsonUtil_get_key_value_string ( raw_obj, "lnc" ) ; + sensor_data.unr = jsonUtil_get_key_value_string ( raw_obj, "unr" ) ; + sensor_data.ucr = jsonUtil_get_key_value_string ( raw_obj, "ucr" ) ; + sensor_data.unc = jsonUtil_get_key_value_string ( raw_obj, "unc" ) ; + + sensor_data_print ( sensor_data ); + + json_object_put(raw_obj); + rc = PASS ; + } + return (rc); +} + +/**************************************************************************** + * + * Name : sensor_data_copy + * + * Purpose : sopy sensor sample data from one struct to another. + * + *****************************************************************************/ + +void sensor_data_copy ( sensor_data_type & from, sensor_data_type & to ) +{ + to.name = from.name ; + to.value = from.value ; + to.unit = from.unit ; + to.status = from.status ; + to.lnr = from.lnr ; + to.lcr = from.lcr ; + to.lnc = from.lnc ; + to.unr = from.unr ; + to.ucr = from.ucr ; + to.unc = from.unc ; + to.ignore = from.ignore ; +} + +/***************************************************************************** + * + * Name : _handle_dup_sensors + * + * Description: Logically OR status of duplicate sensors where the highest + * severity takes precidence. + * + * Severity order from low to high is + * + * na,ns + * ok + * nc + * cr + * nr + * + * Returns : True is returned if this sensor is a duplicate. + * + ****************************************************************************/ + +bool _handle_dup_sensors ( string hostname, + sensor_data_type * samples_ptr, + int samples, + sensor_data_type & this_sample ) +{ + if ( samples_ptr ) + { + for ( int i = 0 ; i < samples ; i++, samples_ptr++ ) + { + if ( samples_ptr->name == this_sample.name ) + { + bool update = false ; + + /* Treat 'Not Specified' as 'Not Applicable' */ + if ( samples_ptr->status == "ns" ) + samples_ptr->status = "na" ; + + if ( this_sample.status == "na" ) + { + if ( samples_ptr->status != "na" ) + { + ; /* current status is better than last status ; no update */ + } + } + else if ( this_sample.status == "ok" ) + { + if ( samples_ptr->status == "na" ) + { + update = true ; + } + } + else if ( this_sample.status == "nc" ) + { + if ( samples_ptr->status == "ok" ) + { + update = true ; + } + else if ( samples_ptr->status == "na" ) + { + update = true ; + } + } + else if ( this_sample.status == "cr" ) + { + if ( samples_ptr->status == "ok" ) + { + update = true ; + } + else if ( samples_ptr->status == "na" ) + { + update = true ; + } + else if ( samples_ptr->status == "nc" ) + { + update = true ; + } + } + else if ( this_sample.status == "nr" ) + { + if ( samples_ptr->status != "nr" ) + { + update = true ; + } + } + + dlog ("%s %s is a duplicate sensor ; ( '%s' %c '%s')\n", + hostname.c_str(), + this_sample.name.c_str(), + samples_ptr->status.c_str(), + update ? '>' : ':', + this_sample.status.c_str()); + + /* update the ORed status */ + if ( update ) + samples_ptr->status = this_sample.status ; + + return (true) ; + } + } + } + return (false); +} + +/***************************************************************************** + * + * Name : ipmi_load_sensor_samples + * + * Description: Load all the sensor samples into hardware mon. + * + ****************************************************************************/ + +int hwmonHostClass::ipmi_load_sensor_samples ( struct hwmonHostClass::hwmon_host * host_ptr, char * msg_ptr ) +{ + int rc ; + + int samples = 0 ; + host_ptr->samples = 0 ; + + rc = jsonUtil_array_elements ( msg_ptr, IPMITOOL_JSON__SENSORS_LABEL, samples ) ; + if ( rc == PASS ) + { + string sensor_data ; + + jlog ("%s samples: %d:%d : %s\n", host_ptr->hostname.c_str(), samples, host_ptr->thread_extra_info.samples, msg_ptr ); + + if ( samples != host_ptr->thread_extra_info.samples ) + { + wlog ("%s sample accounting mismatch (%d:%d)\n", + host_ptr->hostname.c_str(), + samples, host_ptr->thread_extra_info.samples ); + } + if ( samples >= MAX_HOST_SENSORS ) + { + wlog ("%s too many sensors (%d); must be error condition ; rejecting\n", + host_ptr->hostname.c_str(), + samples ); + return (FAIL_OUT_OF_RANGE); + } + + /**************************************************************************** + * Load samples into hwmond sample sensor list. + * + * Warning : Sample readings from a server that is powered off can + * be misleading. The unit type can change. To handle this we + * filter out sensors that are not already in the list AND don't + * fit into a valid group. + * + ****************************************************************************/ + for ( int index = 0 ; index < samples ; index++ ) + { + sensor_data.clear(); + rc = jsonUtil_get_array_idx ( msg_ptr, IPMITOOL_JSON__SENSORS_LABEL, index, sensor_data ) ; + if ( rc == PASS ) + { + if ( ipmi_load_json_sensor ( host_ptr->sample[host_ptr->samples], sensor_data ) == PASS ) + { + bool found = false ; + + if ( host_ptr->samples > 0 ) + { + if ( _handle_dup_sensors ( host_ptr->hostname, + &host_ptr->sample[0], + host_ptr->samples, + host_ptr->sample[host_ptr->samples]) == true ) + { + continue ; + } + } + for ( int s = 0 ; s < host_ptr->sensors ; ++s ) + { + if ( !host_ptr->sensor[s].sensorname.compare(host_ptr->sample[host_ptr->samples].name)) + { + found = true ; + break ; + } + } + if ( found == false ) + { + /* Drop any sensors that don't fall into a valid group */ + host_ptr->sample[host_ptr->samples].group_enum = + ipmi_get_groupenum ( host_ptr->hostname, + host_ptr->sample[host_ptr->samples].unit, + host_ptr->sample[host_ptr->samples].name); + + if ( host_ptr->sample[host_ptr->samples].group_enum == HWMON_CANNED_GROUP__NULL ) + { + blog3 ("%s ignore sensor : %s\n", host_ptr->hostname.c_str(), sensor_data.c_str()); + continue ; + } + } + blog2 ("%s valid sensor : %s\n", host_ptr->hostname.c_str(), sensor_data.c_str()); + } + else + { + wlog ("%s invalid sensor data:%s\n", host_ptr->hostname.c_str(), sensor_data.c_str()); + host_ptr->ipmitool_thread_info.status_string = + "failed to load sensor sample data from incoming json string" ; + host_ptr->ipmitool_thread_info.status = FAIL_JSON_PARSE ; + break ; + } + } + else + { + host_ptr->ipmitool_thread_info.status_string = "sensor data parse error for index '" ; + host_ptr->ipmitool_thread_info.status_string.append(itos(host_ptr->thread_extra_info.samples)); + host_ptr->ipmitool_thread_info.status_string.append("'"); + host_ptr->ipmitool_thread_info.status = FAIL_JSON_PARSE ; + break ; + } + host_ptr->samples++ ; + } /* for end */ + blog1 ("%s provided %d sensor samples\n", host_ptr->hostname.c_str(), host_ptr->samples); + } + else + { + host_ptr->ipmitool_thread_info.status_string = "failed to find '" ; + host_ptr->ipmitool_thread_info.status_string.append(IPMITOOL_JSON__SENSORS_LABEL); + host_ptr->ipmitool_thread_info.status_string.append("' label") ; + host_ptr->ipmitool_thread_info.status = FAIL_JSON_PARSE ; + } + return (host_ptr->ipmitool_thread_info.status); +} + +void _generate_transient_log ( sensor_type * sensor_ptr ) +{ + /* debounced */ + string reason = "'" + sensor_ptr->status + "' but got a transient '" + sensor_ptr->sample_status_last + "' reading" ; + + hwmonLog ( sensor_ptr->hostname, + HWMON_ALARM_ID__SENSOR, + FM_ALARM_SEVERITY_WARNING, + sensor_ptr->sensorname, + reason ); +} + + +int hwmonHostClass::ipmi_update_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + /* Mark all sensors as not being updated only to get changed below when it is updated. + * This allows us to quickly identify what sensors are missing */ + for ( int i = 0 ; i < host_ptr->sensors ; ++i ) + { + host_ptr->sensor[i].updated = false ; + } + for ( int i = 0 ; i < host_ptr->sensors ; ++i ) + { + for ( int j = 0 ; j < host_ptr->samples ; j++ ) + { + if ( host_ptr->sensor[i].sensorname.compare(host_ptr->sample[j].name) == 0 ) + { + host_ptr->sensor[i].updated = true ; + + blog1 ("%s %s curr:%s this:%s last:%s\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str(), + host_ptr->sensor[i].status.c_str(), + host_ptr->sample[j].status.c_str(), + host_ptr->sensor[i].sample_status_last.c_str()); + +#ifdef WANT_FIT_TESTING + /* Handle Fault Insertion Test Requests ... + * for host and sensor with FIT specified status + */ + string fit_status = "" ; + if ( daemon_want_fit ( FIT_CODE__HWMON__SENSOR_STATUS, host_ptr->hostname, host_ptr->sensor[i].sensorname, fit_status ) ) + { + slog ("%s FIT %s sensor with '%s' status (was %s)\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str(), + fit_status.c_str(), + host_ptr->sensor[i].status.c_str()); + + /* override existing status */ + host_ptr->sample[j].status = fit_status ; + } +#endif + + /************************************************************* + *************** Sensor Debounce Control Start ************** + ************************************************************* + * + * If the last severity is the same as this severity then + * the state change is persistent ; no debounce. + * If the current and last readings are different and the + * debounce bool indicating we are in debounce mode. + * + *************************************************************/ + if ( host_ptr->sensor_query_count > START_DEBOUCE_COUNT ) + { + /* ***** Fix this up once verified */ + + /* if the current sensor state is the same as the current + * sensor sample then don't clear the debounce count as it + * might be indicating that there was a transient */ + if ( host_ptr->sensor[i].sample_status.compare(host_ptr->sample[j].status) == 0 ) + ; + + /* If we get 2 same readings in a row then this is not + * a transient or flapper */ + else if ( host_ptr->sensor[i].sample_status_last.compare(host_ptr->sample[j].status) == 0 ) + host_ptr->sensor[i].debounce_count = 0 ; + + /* if this sample reading is different from the last + * then this is a transient candidate */ + else if ( host_ptr->sensor[i].sample_status_last.compare(host_ptr->sample[j].status) ) + { + host_ptr->sensor[i].debounce_count++ ; + if ( host_ptr->sensor[i].debounce_count > 1 ) + { + /* do not generate logs for suppressed sensors */ + if ( host_ptr->sensor[i].suppress == false ) + { + /* debounced */ + string reason = "'" ; + reason.append(host_ptr->sensor[i].status) ; + reason.append("' but saw changing readings '") ; + reason.append(host_ptr->sensor[i].sample_status_last); + reason.append("' to '"); + reason.append(host_ptr->sample[j].status); + reason.append("'"); + + hwmonLog ( host_ptr->hostname, + HWMON_ALARM_ID__SENSOR, + FM_ALARM_SEVERITY_WARNING, + host_ptr->sensor[i].sensorname, + reason ); + + ilog ("%s %s is '%s:%s' status ; flapping '%s' then '%s'\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str(), + host_ptr->sensor[i].status.c_str(), + host_ptr->sensor[i].sample_status.c_str(), + host_ptr->sensor[i].sample_status_last.c_str(), + host_ptr->sample[j].status.c_str()); + } + } + host_ptr->sensor[i].sample_status_last = host_ptr->sample[j].status ; + break ; // continue ; + } + } + + /***************** Sensor Debounce Handling ******************/ + host_ptr->sensor[i].want_debounce_log_if_ok = false ; + if ( host_ptr->sensor[i].debounce_count == 1 ) + { + /* do not generate logs for suppressed sensors */ + if ( host_ptr->sensor[i].suppress == false ) + { + if ( host_ptr->sensor[i].sample_status_last.compare("na")) + { + host_ptr->sensor[i].want_debounce_log_if_ok = true ; + } + ilog ("%s %s is '%s' but saw a transient '%s' reading\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str(), + host_ptr->sensor[i].sample_status.c_str(), + host_ptr->sensor[i].sample_status_last.c_str()); + } + } + + host_ptr->sensor[i].debounce_count = 0; + + /*************************************************************/ + /******************* Sensor Debounce End ********************/ + /*************************************************************/ + + /* update sample status now that we are beyond the debounce check. + * The last status is updated at the end of this condition */ + host_ptr->sensor[i].sample_status = host_ptr->sample[j].status ; + + /* if we get a match and its status is 'na' then just mark it as 'offline' */ + if ( host_ptr->sample[j].status.compare("na") == 0 ) + { + host_ptr->sensor[i].sample_severity = + get_ipmi_severity (host_ptr->sample[j].status); + } + else if ( host_ptr->sample[j].unit.compare(DISCRETE)) + { + /* not a descrete sensor */ + + /* get severity level */ + host_ptr->sensor[i].sample_severity = + get_ipmi_severity (host_ptr->sample[j].status); + + /* Check to see if we need to generate the transient log. + * Only generate it if want_debounce_log_if_ok is true and + * the reading is ok */ + if (( host_ptr->sensor[i].want_debounce_log_if_ok == true ) && + ( host_ptr->sensor[i].sample_severity == HWMON_SEVERITY_GOOD )) + { + _generate_transient_log ( &host_ptr->sensor[i] ); + } + + /* Minor severity from get_ipmi_severity means + * that the severity status is unexpected */ + if ( host_ptr->sensor[i].sample_severity == HWMON_SEVERITY_MINOR ) + { + if ( host_ptr->sensor[i].status.compare("minor") == 0 ) + { + /* only print this log on the first state transition */ + wlog ("%s '%s' unexpected ipmi sensor reading '%s'\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str(), + host_ptr->sample[j].status.c_str()); + } + } + } + + /* + * The Quanta Air Frame server's power sensors are reported as discrete sensors. + * In order to maintain backward compatibility for Quanta we need to search for + * these sensor status and prop that status to the sensor list. + */ + else if ( host_ptr->quanta_server ) + { + /* otherwise if the status is not prefixed with a 0x then the + * reading is unknown so set its severity to minor as we do + * for all unknown sensor readings */ + if ( host_ptr->sample[j].status.find("0x", 0 ) == std::string::npos ) + { + wlog ("%s '%s' unexpected discrete status reading '%s'\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str(), + host_ptr->sample[j].status.c_str()); + + host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_MINOR ; + } + /* otherwise correlate the status against the sensors we care about */ + else + { + unsigned short ipmi_status = (unsigned short)strtol((char*)host_ptr->sample[j].status.data(), NULL, 0 ); + + /* interpret discrete sensor readings for known Quanta discrete + * sensors that need to be represented with a correlated status */ + blog3 ("%s '%s' discrete sensor found - need to update status %s:0x%04x ...\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str(), + host_ptr->sample[j].status.c_str(), + ipmi_status ); + + /* treat thermal trip sensors failures as Major. + * A good reading is 0x0080 */ + if (( host_ptr->sensor[i].sensorname.compare("PCH Thermal Trip") == 0 ) || + ( host_ptr->sensor[i].sensorname.compare("MB Thermal Trip") == 0 )) + { + if ( ipmi_status == 0x0080 ) + { + host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_GOOD ; + if ( host_ptr->sensor[i].want_debounce_log_if_ok == true ) + { + _generate_transient_log ( &host_ptr->sensor[i] ); + } + } + else + { + host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_MAJOR ; + } + } + else if ( host_ptr->sensor[i].sensorname.compare("PSU Redundancy") == 0 ) + { + if ( ipmi_status == 0x0180 ) /* Fully Redundant */ + { + host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_GOOD ; + if ( host_ptr->sensor[i].want_debounce_log_if_ok == true ) + { + _generate_transient_log ( &host_ptr->sensor[i] ); + } + } + else if ( ipmi_status == 0x0280 ) /* Redundancy Lost */ + { + host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_MAJOR ; + } + else + { + wlog ("%s '%s' unexpected discrete status reading '0x%04x'\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str(), + ipmi_status); + + sensor_data_print (host_ptr->sample[j]); + blog3 ("%s ... %s\n", host_ptr->hostname.c_str(), host_ptr->ipmitool_thread_info.data.c_str()); + + host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_MINOR ; + } + } + else if (( host_ptr->sensor[i].sensorname.compare("PSU1 Status") == 0 ) || + ( host_ptr->sensor[i].sensorname.compare("PSU2 Status") == 0 )) + { +#define STATUS_BIT_MASK (0x3F00) +#define NO_PRESENCE_DETECTED (0x0000) +#define PRESENCE_DETECTED (0x0100) +#define FAILURE_DETECTED (0x0200) +#define PREDICTIVE_FAILURE (0x0400) +#define INPUT_LOST_ACDC (0x0800) +#define INPUT_LOST_OOR (0x1000) +#define INPUT_OOR_PRESENT (0x2000) + + /* Presence Detected and ok */ + // if ( ipmi_status == 0x0180 ) + if ( (ipmi_status&STATUS_BIT_MASK) == PRESENCE_DETECTED ) + { + host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_GOOD ; + if ( host_ptr->sensor[i].want_debounce_log_if_ok == true ) + { + _generate_transient_log ( &host_ptr->sensor[i] ); + } + } + + /* No Presence Detect */ + // else if (( ipmi_status == 0x0080 ) || ( ipmi_status == 0x0000 )) + else if ( (ipmi_status&STATUS_BIT_MASK) == NO_PRESENCE_DETECTED ) + { + host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_MINOR ; + } + + /* Failure Detected with anything else */ + /* 0x02xx */ + else if ( (ipmi_status&STATUS_BIT_MASK) & FAILURE_DETECTED ) + { + host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_CRITICAL ; + } + + /* Presence Detected & Predictive Failure */ + //else if ( (ipmi_status&STATUS_BIT_MASK) == ( PRESENCE_DETECTED | PREDICTIVE_FAILURE )) + // TODO: Fix this ... + else if ( ( ipmi_status == 0x1580 ) || /* Presence Detected & Predictive Failure & Input Lost Or Out Of Range */ + ( ipmi_status == 0x2580 ) || /* Presence Detected & Predictive Failure & Input Out Of Range */ + ( ipmi_status == 0x3580 ) || /* Presence Detected & Predictive Failure & both of the above */ + ( ipmi_status == 0x0580 ) || /* Presence Detected & Predictive Failure */ + ( ipmi_status == 0x0980 ) || /* Presence Detected & Power Supply Input Lost */ + ( ipmi_status == 0x0d80 ) ) /* Presence Detected & Power Supply Input Out Of Range */ + { + host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_MAJOR ; + } + + else + { + wlog ("%s '%s' unexpected discrete status reading '0x%04x'\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str(), + ipmi_status); + + sensor_data_print (host_ptr->sample[j]); + blog3 ("%s ... %s\n", host_ptr->hostname.c_str(), host_ptr->ipmitool_thread_info.data.c_str()); + + host_ptr->sensor[i].sample_severity = HWMON_SEVERITY_MINOR ; + } + } + } + } + + /* update last status AFTER the severity interpretation so + * that debouce logging can report the transient status correctly */ + host_ptr->sensor[i].sample_status_last = host_ptr->sample[j].status ; + + /* we already found the sample so got to the next sensor */ + break ; + } + } /* end for loop over sensor samples */ + + if ( host_ptr->sensor[i].updated == true ) + { + if ( host_ptr->sensor[i].state.compare("enabled") ) + { + if ( hwmonHttp_mod_sensor ( host_ptr->hostname, + host_ptr->event , + host_ptr->sensor[i].uuid, + "state", "enabled" ) == PASS ) + { + host_ptr->sensor[i].state = "enabled" ; + } + } + } + + /* Take sensors that had no status in this sample set minor */ + else + { +#ifdef WANT_FAIL_ON_NO_UPDATE + /* if the sensor was not already enabled at least once then fail + * it since we have never got any good data from it */ + if ( host_ptr->sensor[i].state.compare("failed")) + { + if ( hwmonHttp_mod_sensor ( host_ptr->hostname, + host_ptr->event , + host_ptr->sensor[i].uuid, + "state", "failed" ) == PASS ) + { + host_ptr->sensor[i].state = "failed" ; + } + } +#endif + if ( host_ptr->sensor[i].state.compare("offline")) + { + if ( hwmonHttp_mod_sensor ( host_ptr->hostname, + host_ptr->event , + host_ptr->sensor[i].uuid, + "state", "offline" ) == PASS ) + { + host_ptr->sensor[i].state = "offline" ; + } + + // if alarm is raised ===> clear it + if ( host_ptr->sensor[i].alarmed == true ) + { + clear_asserted_alarm ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, &host_ptr->sensor[i], REASON_OFFLINE ); + } + } + else + { + blog ("%s '%s' sensor status not found ; already minor\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[i].sensorname.c_str()); + } + } + } + return (PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonIpmi.h b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonIpmi.h new file mode 100644 index 00000000..a1bf6e4b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonIpmi.h @@ -0,0 +1,49 @@ +#ifndef __INCLUDE_HWMONIPMI_H__ +#define __INCLUDE_HWMONIPMI_H__ + +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud's Hardware Monitor "IPMI Sensor" Header + */ + +#include "hwmon.h" /* for ... sensor_data_type */ +#include "hwmonClass.h" /* for ... hwmonHostClass */ + +#define QUANTA_SENSOR_PROFILE_CHECKSUM (0xb35b) /* pre 13.58 loaded from database */ +#define QUANTA_SENSOR_PROFILE_CHECKSUM_13_53 (0x5868) /* 13.53 loaded from database */ + +/* + * There is no real difference between the 13.50 and 13.53. + * 13.50 is considered having the Temp_HBA_LSI sensor while + * 13.53 doesn't + */ +#define QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13_53 (0x76b9) /* no LSI sensor */ +#define QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13_53b (0xfb12) /* with LSI sensor */ +#define QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13_50 (0x81a3) +#define QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13_47 (0xd92a) +#define QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13___ (0x5868) +#define QUANTA_SAMPLE_PROFILE_CHECKSUM_VER_13_42 (0xf6e4) +#define QUANTA_SAMPLE_PROFILE_CHECKSUM_VER__3_29 (0x4d31) + +#define QUANTA_SAMPLE_PROFILE_SENSORS_VER_13_53 (54) /* no LSI sensor */ +#define QUANTA_SAMPLE_PROFILE_SENSORS_VER_13_50 (55) +#define QUANTA_SAMPLE_PROFILE_SENSORS_VER_13_47 (57) +#define QUANTA_SAMPLE_PROFILE_SENSORS_VER_13_42 (57) +#define QUANTA_SAMPLE_PROFILE_SENSORS_VER__3_29 (58) + +#define MAX_IPMITOOL_PARSE_ERRORS (20) + +void sensor_data_init ( sensor_data_type & data ); +void sensor_data_print ( const sensor_data_type & data ); +void sensor_data_copy ( sensor_data_type & from, sensor_data_type & to ); + +int ipmi_load_json_sensor ( sensor_data_type & sensor_data , string json_sensor_data ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonJson.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonJson.cpp new file mode 100644 index 00000000..2a86a937 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonJson.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Hardware Monitor Json Utilities Header + */ + +#include +#include +#include /* for ... json-c json string parsing */ + +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "hwm" + +#include "nodeBase.h" +#include "nodeUtil.h" +#include "hwmonJson.h" /* JSON Utilities */ +#include "hwmon.h" +#include "hwmonHttp.h" + +string _get_key_value_string ( struct json_object * obj, const char * key ) +{ + std::string value = "" ; + struct json_object * key_obj = (struct json_object *)(NULL) ; + + /* use the new API ; + * json_object_object_get is depricated and yields compile warning */ + json_bool status = json_object_object_get_ex(obj, key, &key_obj); + if ( ( status == TRUE ) && key_obj ) + { + value.append(json_object_get_string(key_obj)); + } + else + { + value.append("none"); + } + return ( value ); +} + + +int hwmonJson_load_inv ( char * json_str_ptr, node_inv_type & info ) +{ + int rc = PASS ; + string error = "" ; + string infra_ip = "" ; + + /* init to null to avoid trap on early cleanup call with + * bad non-null default pointer value */ + struct json_object *node_obj = (struct json_object *)(NULL); + struct json_object *err_obj = (struct json_object *)(NULL); + + if (( json_str_ptr == NULL ) || ( *json_str_ptr == '\0' ) || + ( ! strncmp ( json_str_ptr, "(null)" , 6 ))) + { + elog ("Cannot tokenize a null json string\n"); + return (FAIL); + } + node_obj = json_tokener_parse( json_str_ptr ); + if ( !node_obj ) + { + elog ("No or invalid inventory response\n"); + rc = FAIL ; + goto hwmon_info_cleanup ; + } + + node_inv_init ( info ); + + /* Get all required fields */ + //info.mac = _get_key_value_string ( node_obj, MTC_JSON_INV_HOSTMAC); + //info.ip = _get_key_value_string ( node_obj, MTC_JSON_INV_HOSTIP ); + info.name = _get_key_value_string ( node_obj, MTC_JSON_INV_NAME ); + + infra_ip = _get_key_value_string ( node_obj, MTC_JSON_INV_INFRAIP ); + if ( infra_ip.length() ) + { + dlog ("%s inventory has infra_ip=%s\n", info.name.c_str(), infra_ip.c_str()); + info.infra_ip = infra_ip; + } + info.type = _get_key_value_string ( node_obj, MTC_JSON_INV_TYPE ); + info.uuid = _get_key_value_string ( node_obj, MTC_JSON_INV_UUID ); + info.bm_ip = _get_key_value_string ( node_obj, MTC_JSON_INV_BMIP ); + info.bm_un = _get_key_value_string ( node_obj, MTC_JSON_INV_BMUN ); + info.bm_type = _get_key_value_string ( node_obj, MTC_JSON_INV_BMTYPE); + + /* print the parsed info if debug level is 3 - mlog2 */ + if ( daemon_get_cfg_ptr()->debug_msg == DEBUG_LEVEL3 ) + { + print_inv ( info ); + } + +hwmon_info_cleanup: + + if (node_obj) json_object_put(node_obj); + if (err_obj) json_object_put(err_obj); + + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonJson.h b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonJson.h new file mode 100644 index 00000000..3c0c9ecc --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonJson.h @@ -0,0 +1,23 @@ +#ifndef __INCLUDE_HWMONJSON_H__ +#define __INCLUDE_HWMONJSON_H__ +/* + * Copyright (c) 2013-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Hardware Monitor Json Utilities Imp + * + * JSON Utility Header + */ + +#include + +using namespace std; + +int hwmonJson_load_inv ( char * json_str_ptr, node_inv_type & info ); + +#endif /* __INCLUDE_HWMONJSON_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonModel.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonModel.cpp new file mode 100644 index 00000000..ce7d1376 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonModel.cpp @@ -0,0 +1,554 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + * + * + * @file + * Wind River Titanium Cloud Hardware Monitor" Sensor Model" Utilities + * + * + * These are the utilities that load, create, group and delete sensor models + * + * + * ipmi_load_sensor_model ....... called by add_host_handler FSM + * + * ipmi_create_sensor_model + * + * ipmi_create_sample_model ... create model based on sample data + * ipmi_create_groups + * ipmi_create_sensors + * ipmi_group_sensors + * + * ipmi_create_quanta_model ... create model for Quanta server + * ipmi_add_group + * load_profile_groups + * load_profile_sensors + * hwmon_group_sensors + * + * ipmi_delete_sensor_model ..... called on model re-create + * + *****************************************************************************/ + +#include "daemon_ini.h" /* for ... parse_ini and MATCH */ +#include "nodeBase.h" /* for ... mtce common definitions */ +#include "jsonUtil.h" /* for ... json utilitiies */ +#include "nodeUtil.h" /* for ... mtce common utilities */ +#include "hwmonUtil.h" /* for ... get_severity */ +#include "hwmonClass.h" /* for ... service class definition */ +#include "hwmonHttp.h" /* for ... http podule header */ +#include "hwmonSensor.h" /* for ... this module header */ +#include "hwmonIpmi.h" /* for ... QUANTA_SENSOR_PROFILE_CHECKSUM */ + +/***************************************************************************** + * + * Name : ipmi_create_sensor_model + * + * Description: Top level utility that creates a sensor model based on + * sample data. + * + * The caller has already determined if the sample set matches + * the special case Quanta server model. If it does then we + * use the Quanta sensor profile to create the model. Otherwise, + * the model is created based on sensor samples. + * + ******************************************************************************/ + +int hwmonHostClass::ipmi_create_sensor_model ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = PASS ; + ilog ("%s creating sensor model\n", host_ptr->hostname.c_str()); + + host_ptr->groups = 0 ; + + /* If this is NOT a Quanta Server then ... */ + if ( ! host_ptr->quanta_server ) + { + /* + * Dynamically create a model based + * on the sensor sample reading data. + */ + rc = ipmi_create_sample_model ( host_ptr ); + } + + /* Otherwise create the model based on the known Quanta sensor profile */ + else + { + if ( ( rc = ipmi_create_quanta_model ( host_ptr )) == PASS ) + { + if ( host_ptr->groups >= MIN_SENSOR_GROUPS ) + { + /* + * If this is a Quanta server then the best way to ensure the + * sensor profile is identical and backward compatible is to + * load the sensor profile from the legacy Quanta profile file. + * + * QUANTA_SENSOR_PROFILE_FILE + */ + struct sensor_group_type group_array [MAX_HOST_GROUPS] ; + sensor_type sensor_array [MAX_HOST_SENSORS]; + + int profile_groups ; + bool error = false ; + + ilog ("%s provisioning Quanta server using %s\n", + host_ptr->hostname.c_str(), QUANTA_SENSOR_PROFILE_FILE ); + +// HP: Why IMPI sensor model is using smashLoad_Server_info ?? +// EM: To maintain legacy mode for Nokia, don't change this (for now):q +// +// if (ini_parse ( QUANTA_SENSOR_PROFILE_FILE, smashLoad_server_info, &host_ptr->profile_config ) < 0) +// { +// elog ("Can't load '%s'\n", QUANTA_SENSOR_PROFILE_FILE ); +// return (FAIL_LOAD_INI); +// } + + profile_groups = load_profile_groups ( host_ptr, &group_array[0], MAX_HOST_GROUPS, error ); + if (( error == false ) && ( profile_groups == host_ptr->groups )) + { + int profile_sensors; + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + /* + * Add the sensor label list to each host_ptr group[x]. + * + * This list was fetched and attached to the group array + * in load_profile_groups. + * + * Having it prevents the need to parse the profile file + * again to associate the sensors to a group all over + * again inside load_profile_sensors + */ + + host_ptr->group[g].sensor_labels = group_array[g].sensor_labels ; + + blog ("%s '%s' group sensor list: %s\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + host_ptr->group[g].sensor_labels.c_str()); + } + + ilog ( "%s %d profile groups loaded\n", host_ptr->hostname.c_str(), profile_groups ); + profile_sensors = load_profile_sensors ( host_ptr, &sensor_array[0], MAX_HOST_SENSORS, error ); + if (( error == false ) && ( profile_sensors )) + { + ilog ( "%s %d profile sensors loaded\n", host_ptr->hostname.c_str(), profile_sensors ); + for ( int s = 0 ; s < profile_sensors ; ++s ) + { + if (( rc = hwmonHttp_add_sensor ( host_ptr->hostname, host_ptr->event, sensor_array[s])) == PASS ) + { + sensor_array[s].uuid = host_ptr->event.new_uuid ; + if (( rc = add_sensor ( host_ptr->hostname, sensor_array[s] )) == PASS ) + { + blog ( "%s '%s' sensor added\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[s].sensorname.c_str()); + } + else + { + wlog ("%s '%s' sensor add failure (to hwmon)\n", + host_ptr->hostname.c_str(), + sensor_array[s].sensorname.c_str()); + } + } + else + { + wlog ("%s '%s' sensor add failure (to sysinv)\n", + host_ptr->hostname.c_str(), + sensor_array[s].sensorname.c_str()); + } + } /* end for loop */ + } + else + { + elog ( "%s load_profile_sensors failed (rc:%d) (%d)\n", + host_ptr->hostname.c_str(), + error, + profile_sensors ); + } + } + else + { + elog ( "%s load_profile_groups failed (rc:%d) (%d:%d)\n", + host_ptr->hostname.c_str(), + error, + profile_groups, + host_ptr->groups ); + } + } + else + { + elog ("%s too few groups\n", host_ptr->hostname.c_str()); + rc = FAIL_INVALID_DATA ; + } + } + else + { + elog ("%s failed to create group model (rc:%d)\n", host_ptr->hostname.c_str(), rc); + } + } + + + if (( rc == PASS ) && ( host_ptr->quanta_server)) + { + /* Group all the sensors into the groups specified by the profile file */ + rc = hwmonHostClass::hwmon_group_sensors ( host_ptr ); + + if ( rc == PASS ) + { + ilog ("%s sensors grouped\n", host_ptr->hostname.c_str()); + } + else + { + elog ("%s sensor grouping failed (rc:%d)\n", host_ptr->hostname.c_str(), rc ); + } + + plog ("%s sensor model created\n", host_ptr->hostname.c_str() ); + } + + if (( host_ptr->relearn == true ) || + ( host_ptr->interval < HWMON_MIN_AUDIT_INTERVAL )) + { + dlog ("%s requesting interval change (%d)\n", + host_ptr->hostname.c_str(), + host_ptr->interval ); + + host_ptr->interval_changed = true ; + } + + /* make sure all sensors are updated with the group actions */ + + return (rc); +} + + +/****************************************************************************** + * + * Name : ipmi_create_sample_model + * + * Description: Create a sensor model based on sample data. + * + ******************************************************************************/ + +int hwmonHostClass::ipmi_create_sample_model ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = FAIL ; + if ( host_ptr->samples ) + { + /* Start by creating a set of sensor groups based on sample data + * and specifically sensor type and save those groups in the database */ + if ( ( rc = ipmi_create_groups ( host_ptr ) ) == PASS ) + { + /* add all the sensors to hwmon and save that in the database */ + if ( ( rc = ipmi_create_sensors ( host_ptr ) ) == PASS ) + { + /* add the sensors to the groups and save that in the database */ + rc = ipmi_group_sensors ( host_ptr ); + } + } + } + else + { + rc = FAIL_NO_DATA ; + elog ("%s failed sensor sample model create ; no sensor samples\n", host_ptr->hostname.c_str() ); + } + return(rc); +} + + +/****************************************************************************** + * + * Name : ipmi_create_quanta_model + * + * Description: Create a static Quanta sever sensor group model. + * + ******************************************************************************/ + +int hwmonHostClass::ipmi_create_quanta_model ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int status = PASS ; + int rc = PASS ; + + if ( host_ptr ) + { + if ( host_ptr->quanta_server == true ) + { + rc = ipmi_add_group ( host_ptr , DISCRETE, "fan" , HWMON_CANNED_GROUP__FANS, "server fans", "show /SYS/fan"); + if (( rc ) && ( !status )) status = rc ; + + rc = ipmi_add_group ( host_ptr , DISCRETE, "fan" , HWMON_CANNED_GROUP__FANS, "power supply fans", "show /SYS/fan"); + if (( rc ) && ( !status )) status = rc ; + + rc = ipmi_add_group ( host_ptr , DISCRETE, "power" , HWMON_CANNED_GROUP__POWER, "server power", "show /SYS/powerSupply"); + if (( rc ) && ( !status )) status = rc ; + + rc = ipmi_add_group ( host_ptr , DISCRETE, "temperature" , HWMON_CANNED_GROUP__TEMP, "server temperature", "show /SYS/temperature"); + if (( rc ) && ( !status )) status = rc ; + + rc = ipmi_add_group ( host_ptr , DISCRETE, "voltage" , HWMON_CANNED_GROUP__VOLT, "server voltage", "show /SYS/voltage"); + if (( rc ) && ( !status )) status = rc ; + } + } + return (status); +} + +int hwmonHostClass::ipmi_delete_sensor_model ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = PASS ; + + if ( host_ptr->relearn_retry_counter == 0 ) + { + ilog ("%s ... saving group customizations\n", + host_ptr->hostname.c_str()); + this->save_model_attributes ( host_ptr ); + + ilog ("%s ... clearing existing assertions\n", + host_ptr->hostname.c_str()); + this->clear_bm_assertions ( host_ptr ); + + ilog ("%s ... deleting sensor model\n", + host_ptr->hostname.c_str()); + } + + /* Delete the groups from the end to the start. + * If there is a failure then exit and the caller will retry. + */ + if ( host_ptr->groups ) + { + for ( int g = host_ptr->groups-1 ; + host_ptr->groups != 0 ; + host_ptr->groups-- , g-- ) + { + daemon_signal_hdlr (); + int rc_temp = hwmonHttp_del_group ( host_ptr->hostname, + host_ptr->event, + host_ptr->group[g] ); + if ( rc_temp ) + { + elog ("%s %s group delete failed (rc:%d) (%d)\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + rc_temp, g ); + host_ptr->relearn_retry_counter++ ; + return (rc_temp); + } + else + { + blog ("%s %s (index:%d)\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), g ); + + if ( host_ptr->group[g].timer.init == TIMER_INIT_SIGNATURE ) + { + mtcTimer_reset ( host_ptr->group[g].timer ); + } + hwmonGroup_init ( host_ptr->hostname, &host_ptr->group[g]); + } + } + } + + /* Delete the sensors from the end to the start. + * If there is a failure then exit and the caller will retry. + */ + if ( host_ptr->sensors ) + { + for ( int s = host_ptr->sensors-1 ; + host_ptr->sensors != 0 ; + host_ptr->sensors-- , s-- ) + { + daemon_signal_hdlr (); + int rc_temp = hwmonHttp_del_sensor ( host_ptr->hostname, + host_ptr->event, + host_ptr->sensor[s] ); + if ( rc_temp ) + { + elog ("%s %s sensor delete failed (rc:%d) (%d)\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[s].sensorname.c_str(), + rc_temp, s ); + host_ptr->relearn_retry_counter++ ; + return (rc_temp); + } + else + { + blog ("%s %s (index:%d)\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[s].sensorname.c_str(), s ); + + hwmonSensor_init ( host_ptr->hostname, &host_ptr->sensor[s]); + sensor_data_init ( host_ptr->sample[s] ); + + if ( host_ptr->sensors == 1 ) + { + host_ptr->quanta_server = false ; + host_ptr->sensors = + host_ptr->samples = + host_ptr->profile_sensor_checksum = + host_ptr->sample_sensor_checksum = + host_ptr->last_sample_sensor_checksum = 0 ; + break ; + } + } + } + } + + if (( host_ptr->sensors == 0 ) && ( host_ptr->groups == 0 )) + { + plog ("%s sensor model deleted\n", host_ptr->hostname.c_str() ); + } + else + { + elog ("%s sensor model delete failed (%d:%d)\n", + host_ptr->hostname.c_str(), + host_ptr->groups, + host_ptr->sensors ); + + rc = FAIL ; + } + return (rc); +} + +/* ************************************************************************* + * + * Name : ipmi_load_sensor_model + * + * Description: Called from the add_handler to load sensors and groups + * for the specified host from the sysinv database. + * + * Warnings : Will return a failure and swerr if called when with an + * already loaded sensor profile. + * + * Assumptions: Inservice sensor model reprovisioning is done with + * ipmi_delete_sensor_model and ipmi_create_sensor_model API. + * + * + * Scope : private hwmonHostClass + * + * Parameters : host_ptr + * + * Returns : TODO: handle modify errors better. + * + * *************************************************************************/ +int hwmonHostClass::ipmi_load_sensor_model ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc ; + + if (( host_ptr->sensors ) || ( host_ptr->groups )) + { + elog ("%s already has %d sensors across %d groups loaded - reloading\n", + host_ptr->hostname.c_str(), + host_ptr->sensors, + host_ptr->groups ); + + this->hwmon_del_sensors ( host_ptr ); + this->hwmon_del_groups ( host_ptr ); + + rc = FAIL_INVALID_OPERATION ; + } + else + { + /* Load aleady provisioned sensors from the database + * into host_ptr->sensor list. + * + * Warning: This is a blocking call and always has been. + */ + rc = hwmonHttp_load_sensors ( host_ptr->hostname, host_ptr->event ); + if ( rc == PASS ) + { + daemon_signal_hdlr (); /* service the signals */ + + if ( host_ptr->sensors != 0 ) + { + /* Load aleady provisioned groups from the database + * into host_ptr->group list */ + rc = hwmonHttp_load_groups ( host_ptr->hostname, host_ptr->event ); + if ( rc == PASS ) + { + /* update sample severity to avoid state change + * from fail to ok to fail over a process restart */ + for ( int s = 0 ; s < host_ptr->sensors ; s++ ) + { + host_ptr->sensor[s].sample_severity = get_severity(host_ptr->sensor[s].status) ; + host_ptr->sensor[s].sample_status = + host_ptr->sensor[s].sample_status_last = host_ptr->sensor[s].status ; + } + rc = hwmonHostClass::hwmon_group_sensors ( host_ptr ); + if ( rc == PASS ) + { + blog ("%s sensors grouped\n", host_ptr->hostname.c_str()); + } + else + { + wlog ("%s sensor grouping failed (in hwmon) (rc:%d)\n", host_ptr->hostname.c_str(), rc ); + } + } + else + { + wlog ("%s sensor group load failed (from sysinv) (rc:%d)\n", host_ptr->hostname.c_str(), rc ); + } + } + } + else + { + wlog ("%s sensors load failed (from sysinv) (rc:%d)\n", host_ptr->hostname.c_str(), rc ); + } + } + + if ( rc == PASS ) + { + if (( host_ptr->sensors ) && ( host_ptr->groups )) + { + ilog ("%s has %d sensors across %d groups (in sysinv)\n", + host_ptr->hostname.c_str(), + host_ptr->sensors, + host_ptr->groups ); + + /* initialize sensor data */ + for ( int i = 0 ; i < host_ptr->sensors ; ++i ) + { + host_ptr->sensor[i].severity = get_severity ( host_ptr->sensor[i].status ); + } + + host_ptr->profile_sensor_checksum = + checksum_sensor_profile ( host_ptr->hostname, + host_ptr->sensors, + &host_ptr->sensor[0]); + + ilog ("%s database profile checksum : %04x (%d sensors)\n", + host_ptr->hostname.c_str(), + host_ptr->profile_sensor_checksum, + host_ptr->sensors); + + if ((( host_ptr->profile_sensor_checksum == QUANTA_SENSOR_PROFILE_CHECKSUM ) || + ( host_ptr->profile_sensor_checksum == QUANTA_SENSOR_PROFILE_CHECKSUM_13_53 )) && + (( host_ptr->sensors == QUANTA_PROFILE_SENSORS ) || (QUANTA_PROFILE_SENSORS_REVISED_1)) && + ( host_ptr->groups == QUANTA_SENSOR_GROUPS )) + { + ilog ("%s ---------------------------------------------\n", host_ptr->hostname.c_str()); + ilog ("%s is a Quanta server with legacy sensor profile\n", host_ptr->hostname.c_str()); + ilog ("%s ---------------------------------------------\n", host_ptr->hostname.c_str()); + host_ptr->quanta_server = true ; + } + else + { + ilog ("%s has unique sensor model\n", host_ptr->hostname.c_str()); + } + } + else + { + /* Incomplete or no sensor/group model found in database */ + ilog ("%s no valid sensor model found (in sysinv) (sensors:%d groups:%d)\n", + host_ptr->hostname.c_str(), + host_ptr->sensors, + host_ptr->groups ); + + if (( host_ptr->sensors ) || (host_ptr->groups )) + { + wlog ("%s has a corrupt sensor profile ; deleting ...\n", host_ptr->hostname.c_str()); + ipmi_delete_sensor_model ( host_ptr ); + } + } + } + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonModel.h b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonModel.h new file mode 100644 index 00000000..fb731169 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonModel.h @@ -0,0 +1,16 @@ +#ifndef __INCLUDE_HWMONMODEL_H__ +#define __INCLUDE_HWMONMODEL_H__ + +/* + * Copyright (c) 2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud's Hardware Monitor "Sensor Model" Header + */ + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonMsg.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonMsg.cpp new file mode 100644 index 00000000..d84e0665 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonMsg.cpp @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Process Monitor Service Messaging + */ + + +#include "hwmon.h" /* */ +#include "hwmonClass.h" /* for ... get_hwmonHostClass_ptr */ +#include "nodeMacro.h" /* */ +#include "hwmonJson.h" /* for ... hwmonJson_load_inv */ + +/* Where to send events */ +string mtcAgent_ip = "" ; + +/** + * Messaging Socket Control Struct - The allocated struct + */ +static +hwmon_socket_type hwmon_sock; +hwmon_socket_type * getSock_ptr ( void ) +{ + return ( &hwmon_sock ); +} + +msgSock_type * get_mtclogd_sockPtr ( void ) +{ + return (&hwmon_sock.mtclogd); +} + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Init the messaging socket control structure */ +void hwmon_msg_init ( void ) +{ + memset(&hwmon_sock, 0, sizeof(hwmon_sock)); +} + +void hwmon_msg_fini ( void ) +{ + if ( hwmon_sock.event_sock ) + { + delete (hwmon_sock.event_sock); + hwmon_sock.event_sock = 0 ; + } + if ( hwmon_sock.cmd_sock ) + { + delete (hwmon_sock.cmd_sock); + hwmon_sock.cmd_sock = 0 ; + } + if ( hwmon_sock.mtclogd.sock > 0 ) + { + close (hwmon_sock.mtclogd.sock); + hwmon_sock.mtclogd.sock = 0 ; + } +} + + +/*********************************************************************/ +/* Setup hwmon command receive port/socket */ +/*********************************************************************/ +int cmd_rx_port_init ( int port ) +{ + int rc = PASS ; + hwmon_sock.cmd_port = port ; + + mtcAgent_ip = getipbyname ( CONTROLLER ); + + /* setup unicast maintenance command receive socket */ + hwmon_sock.cmd_sock = new msgClassRx(mtcAgent_ip.c_str(), hwmon_sock.cmd_port, IPPROTO_UDP, NULL, true); + if ( rc ) + { + elog ("Failed to setup maintenance command receive socket (rc:%d)", rc); + return (rc); + } + + return (rc); +} + +/*********************************************************************/ +/* Setup hwmon broadcast transmit port/socket */ +/*********************************************************************/ +int event_tx_port_init ( int port, const char * iface ) +{ + int rc = PASS ; + + /* Load the event port number */ + hwmon_sock.event_port = port ; + + mtcAgent_ip = getipbyname ( CONTROLLER ); + ilog ("ControllerIP: %s\n", mtcAgent_ip.c_str()); + + hwmon_sock.event_sock = new msgClassTx(mtcAgent_ip.c_str(),hwmon_sock.event_port , IPPROTO_UDP, iface); + if ( rc ) + { + elog ("Failed to setup mtce to hbs transmit command port %d\n", port ); + return (rc) ; + } + + return (PASS); +} + +int mtclogd_tx_port_init ( void ) +{ + int rc = PASS ; + int port = hwmon_sock.mtclogd.port = daemon_get_cfg_ptr()->daemon_log_port ; + CREATE_REUSABLE_INET_UDP_TX_SOCKET ( LOOPBACK_IP, + port, + hwmon_sock.mtclogd.sock, + hwmon_sock.mtclogd.addr, + hwmon_sock.mtclogd.port, + hwmon_sock.mtclogd.len, + "mtc logger message", + rc ); + if ( rc ) + { + elog ("Failed to setup messaging to mtclogd on port %d\n", port ); + } + return (rc); +} + + +int hwmon_send_event ( string hostname, unsigned int event_code , const char * sensor_ptr ) +{ + mtc_message_type event ; + + int rc = FAIL ; + int bytes = 0 ; + + memset (&event, 0 , sizeof(mtc_message_type)); + + if (( event_code == MTC_EVENT_MONITOR_READY)|| + ( event_code == MTC_EVENT_HWMON_CLEAR )|| + ( event_code == MTC_EVENT_HWMON_CONFIG) || + ( event_code == MTC_EVENT_HWMON_MINOR) || + ( event_code == MTC_EVENT_HWMON_MAJOR) || + ( event_code == MTC_EVENT_HWMON_CRIT ) || + ( event_code == MTC_EVENT_HWMON_RESET ) || + ( event_code == MTC_EVENT_HWMON_POWERDOWN) || + ( event_code == MTC_EVENT_HWMON_POWERCYCLE)|| + ( event_code == MTC_DEGRADE_RAISE ) || + ( event_code == MTC_DEGRADE_CLEAR )) + { + mlog ("%s sending '%s' event to mtcAgent for '%s'\n", + hostname.c_str(), + get_event_str(event_code).c_str(), + sensor_ptr ); + + snprintf ( &event.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header()); + + snprintf ( &event.hdr[MSG_HEADER_SIZE] , MAX_CHARS_HOSTNAME , "%s", hostname.data()); + if ( sensor_ptr ) + { + size_t len = strnlen ( sensor_ptr, MAX_SENSOR_NAME_LEN ); + + /* We don't use the buffer for hwmon events to remove it from the size */ + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len)); + + snprintf ( &event.buf[0], MAX_SENSOR_NAME_LEN, "%s", sensor_ptr ); + } + } + else if ( event_code == MTC_EVENT_LOOPBACK ) + { + snprintf ( &event.hdr[MSG_HEADER_SIZE] , MAX_CHARS_HOSTNAME , "%s", hostname.data()); + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_loopback_header()); + + /* We don't use the buffer for hwmon events to remove it from the size */ + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)); + } + else + { + elog ("Unsupported process monitor event (%d)\n", event_code ); + return ( FAIL_BAD_PARM ); + } + + /* Update the event code */ + event.cmd = event_code ; + + /* Send the event */ + if ((rc = hwmon_sock.event_sock->write((char*)&event.hdr[0],bytes)) != bytes ) + { + elog ("Message send failed. (%d)\n", rc); + elog ("Message: %d bytes to <%s:%d>\n", bytes, + hwmon_sock.event_sock->get_dst_str(), + hwmon_sock.event_sock->get_dst_addr()->getPort()); + rc = FAIL_SOCKET_SENDTO ; + } + else + { + mlog ("Sending '%s' Event with %d bytes to %s:%d\n", + get_event_str (event.cmd).c_str(), bytes, + hwmon_sock.event_sock->get_dst_str(), + hwmon_sock.event_sock->get_dst_addr()->getPort()); + print_mtc_message (&event); + rc = PASS ; + } + return rc ; +} + +/* Receive maintnance command messages */ +int hwmon_service_inbox ( void ) +{ + int bytes ; + mtc_message_type msg ; + + int rc = PASS ; + + /* clean the rx/tx buffer */ + memset ((void*)&msg,0,sizeof(mtc_message_type)); + bytes = hwmon_sock.cmd_sock->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); + if( bytes <= 0 ) + { + if ( ( errno == EINTR ) || ( errno == EAGAIN )) + { + return (RETRY); + } + else + { + elog ("receive error (%d:%s)\n", errno , strerror (errno)); + return (FAIL_TO_RECEIVE); + } + } + /* Check for response messages */ + else if ( strstr ( &msg.hdr[0], get_cmd_req_msg_header() ) ) + { + node_inv_type inv ; + node_inv_init (inv); + + mlog("Receive <%s> from %s:%x\n", &msg.hdr[0], + hwmon_sock.cmd_sock->get_src_str(), + hwmon_sock.cmd_sock->get_dst_addr()->getPort()); + + print_mtc_message ( &msg ); + + if ( !strnlen ( &msg.hdr[MSG_HEADER_SIZE], MAX_CHARS_HOSTNAME )) + { + wlog ("Mtce message (%x) did not specify target hostname\n", msg.cmd ); + return (FAIL_UNKNOWN_HOSTNAME); + } + + rc = hwmonJson_load_inv ( &msg.buf[0], inv ); + if ( rc ) + { + wlog ("%s failed to parse host info\n", inv.name.c_str()); + return (FAIL_KEY_VALUE_PARSE); + } + + rc = PASS; + if ( msg.cmd == MTC_CMD_ADD_HOST ) + { + /* If the add returns a RETRY that means this host was already + * provisioned so turn around and run the modify */ + if ( get_hwmonHostClass_ptr()->add_host ( inv ) == RETRY ) + { + mlog ("%s modify host (from add ) message\n", inv.name.c_str()); + get_hwmonHostClass_ptr()->mod_host ( inv ); + } + else + { + mlog ("%s add host message\n", inv.name.c_str()); + } + + } + else if ( msg.cmd == MTC_CMD_DEL_HOST ) + { + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr(); + ilog ("%s Delete Host message\n", inv.name.c_str()); + obj_ptr->request_del_host ( inv.name ); + } + else if ( msg.cmd == MTC_CMD_START_HOST ) + { + mlog ("%s start monitoring message\n", inv.name.c_str()); + get_hwmonHostClass_ptr()->mon_host ( inv.name , true ); + } + else if ( msg.cmd == MTC_CMD_STOP_HOST ) + { + mlog ("%s stop monitoring message\n", inv.name.c_str()); + get_hwmonHostClass_ptr()->mon_host ( inv.name , false ); + } + else if ( msg.cmd == MTC_CMD_MOD_HOST ) + { + /* If the add returns a RETRY that means this host was already + * provisioned so turn around and run the modify otherwise + * default the modify to be an add */ + if ( get_hwmonHostClass_ptr()->add_host ( inv ) == RETRY ) + { + mlog ("%s modify host message\n", inv.name.c_str()); + get_hwmonHostClass_ptr()->mod_host ( inv ); + } + else + { + mlog ("%s add host (from modify) message\n", inv.name.c_str()); + } + } + else if ( msg.cmd == MTC_CMD_QRY_HOST ) + { + mlog ("%s query host message - NOT IMPLEMENTED YET !!!\n", inv.name.c_str()); + } + else if ( msg.cmd == MTC_CMD_LOOPBACK ) + { + mlog ("Loopback command received\n"); + } + else + { + rc = FAIL_BAD_PARM ; + elog ( "Unsupported maintenance command (%d)\n", msg.cmd ); + } + } + else + { + elog ("Unsupported Message\n"); + print_mtc_message ( &msg ) ; + rc = FAIL_BAD_CASE ; + } + +#ifdef WANT_COMMAND_RESPONSE + + /* TODO: Test and enable reply message */ + // snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_rsp_msg_header()); + if ( rc == PASS ) + { + bytes = sizeof(mtc_message_type)-BUF_SIZE; + rc = sendto( hwmon_sock.mtc_client_tx_sock, + (char*)&msg.hdr[0], bytes , 0, + (struct sockaddr *) &hwmon_sock.agent_addr, + sizeof(hwmon_sock.agent_addr)); + if (rc != bytes ) + { + elog ("message send failed. (%d)\n", rc); + elog ("message: %d bytes to <%s>\n", bytes, + inet_ntoa(hwmon_sock.client_addr.sin_addr )); + rc = FAIL ; + } + else + { + mlog ("Response: <%s> to %s:%d\n", &msg.hdr[0], + inet_ntoa(hwmon_sock.client_addr.sin_addr), + ntohs(hwmon_sock.agent_addr.sin_port)); + } + fflush(stdout); + } +#endif + return (rc); +} + diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonParse.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonParse.cpp new file mode 100644 index 00000000..e69de29b diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonSensor.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonSensor.cpp new file mode 100644 index 00000000..333d0abd --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonSensor.cpp @@ -0,0 +1,2281 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + * + * + * @file + * Wind River Titanium Cloud Hardware Monitor "Sensor" Utilities + */ + +#include "daemon_ini.h" /* for ... parse_ini and MATCH */ +#include "nodeBase.h" /* for ... mtce common definitions */ +#include "jsonUtil.h" /* for ... json utilitiies */ +#include "nodeUtil.h" /* for ... mtce common utilities */ +#include "hwmonUtil.h" /* for ... get_severity */ +#include "hwmonClass.h" /* for ... service class definition */ +#include "hwmonHttp.h" /* for ... hwmonHttp_load_sensors */ +#include "hwmonSensor.h" /* for ... this module header */ +#include "hwmonGroup.h" /* for ... ipmi_get_grouptype */ +#include "hwmonAlarm.h" /* for ... hwmonAlarm */ + +#define DELIMITER ((const char)',') + +/**************************************************************************** + * + * Name : hwmonSensor_print + * + * Purpose: Print the contents of the pointed to sensor + * + *****************************************************************************/ +void hwmonSensor_print ( string & hostname, sensor_type * sensor_ptr ) +{ + const char bar [] = {"+--------------------------------------------------\n" } ; + if ( sensor_ptr ) + { + syslog ( LOG_INFO, "%s", bar); + syslog ( LOG_INFO, "| sensor info : %s '%s' sensor\n", sensor_ptr->hostname.c_str(), sensor_ptr->sensorname.c_str()); + syslog ( LOG_INFO, "| uuid : %s\n", sensor_ptr->uuid.c_str()); + syslog ( LOG_INFO, "| group uuid : %s\n", sensor_ptr->group_uuid.c_str()); + syslog ( LOG_INFO, "| sensortype : %s\n", sensor_ptr->sensortype.c_str()); + syslog ( LOG_INFO, "| datatype : %s\n", sensor_ptr->datatype.c_str()); + syslog ( LOG_INFO, "| minor : %s\n", sensor_ptr->actions_minor.c_str()); + syslog ( LOG_INFO, "| major : %s\n", sensor_ptr->actions_major.c_str()); + syslog ( LOG_INFO, "| critical : %s\n", sensor_ptr->actions_critl.c_str()); + syslog ( LOG_INFO, "| state:status : %s-%s\n", sensor_ptr->state.c_str(), sensor_ptr->status.c_str()); + syslog ( LOG_INFO, "| command : %s\n", sensor_ptr->path.c_str()); + syslog ( LOG_INFO, "| algorithm : %s\n", sensor_ptr->algorithm.c_str()); + syslog ( LOG_INFO, "| suppress : %s\n", sensor_ptr->suppress ? "True" : "False" ); + if ( !sensor_ptr->datatype.compare("analog") ) + { + // syslog ( LOG_INFO, "%s",bar); + syslog ( LOG_INFO, "| minor thld: %5.3f <-> %5.3f \n", sensor_ptr->t_minor_lower, sensor_ptr->t_minor_upper ); + syslog ( LOG_INFO, "| major thld: %5.3f <-> %5.3f \n", sensor_ptr->t_major_lower, sensor_ptr->t_major_upper ); + syslog ( LOG_INFO, "| critical thld: %5.3f <-> %5.3f \n", sensor_ptr->t_critical_lower, sensor_ptr->t_critical_upper ); + syslog ( LOG_INFO, "| unit info : [base:%s] [rate:%s] [modifier:%s]\n", + sensor_ptr->unit_base.c_str(), + sensor_ptr->unit_rate.c_str(), + sensor_ptr->unit_modifier.c_str()); + } + } + else + { + slog ("%s cannot print a NULL sensor\n", hostname.c_str() ); + } +} + + +/**************************************************************************** + * + * Name : hwmonGroup_print + * + * Purpose: Print the contents of the pointed to sensor group + * + *****************************************************************************/ +void hwmonGroup_print ( string & hostname, struct sensor_group_type * group_ptr ) +{ + const char bar [] = {"+--------------------------------------------------------\n" } ; + if ( group_ptr ) + { + syslog ( LOG_INFO, "%s", bar); + syslog ( LOG_INFO, "| group info : %s '%s' group\n", hostname.c_str(), group_ptr->group_name.c_str()); + syslog ( LOG_INFO, "| group uuid : %s\n", group_ptr->group_uuid.c_str()); + syslog ( LOG_INFO, "| sensortype : %s\n", group_ptr->sensortype.c_str()); + syslog ( LOG_INFO, "| datatype : %s\n", group_ptr->datatype.c_str()); + syslog ( LOG_INFO, "| group minor choices : %s\n", group_ptr->actions_minor_choices.c_str()); + syslog ( LOG_INFO, "| group minor actions : %s\n", group_ptr->actions_minor_group.c_str()); + syslog ( LOG_INFO, "| group major choices : %s\n", group_ptr->actions_major_choices.c_str()); + syslog ( LOG_INFO, "| group major actions : %s\n", group_ptr->actions_major_group.c_str()); + syslog ( LOG_INFO, "| group critical choices: %s\n", group_ptr->actions_critical_choices.c_str()); + syslog ( LOG_INFO, "| group critical actions: %s\n", group_ptr->actions_critl_group.c_str()); + syslog ( LOG_INFO, "| group state : %s\n", group_ptr->group_state.c_str()); + syslog ( LOG_INFO, "| algorithm : %s\n", group_ptr->algorithm.c_str()); + syslog ( LOG_INFO, "| group audit period : %d secs\n", group_ptr->group_interval ); + syslog ( LOG_INFO, "| group suppress : %s\n", group_ptr->suppress ? "True" : "False" ); + syslog ( LOG_INFO, "| group sensor read cmd : %s\n", group_ptr->path.c_str()); + syslog ( LOG_INFO, "| group sensors (count) : %d\n", group_ptr->sensors); + if ( !group_ptr->sensor_labels.empty() ) + { + syslog ( LOG_INFO, "| group sensor labels : %s\n", group_ptr->sensor_labels.c_str()); + } + if ( group_ptr->sensors ) + { + for ( int s = 0 ; s < group_ptr->sensors ; ++s ) + { + if ( group_ptr->sensor_ptr[s] != NULL ) + { + syslog ( LOG_INFO, "| group sensor %02d : %s\n", s, group_ptr->sensor_ptr[s]->sensorname.c_str()); + } + } + } + if ( !group_ptr->datatype.compare("analog") ) + { + // syslog ( LOG_INFO, "%s",bar); + syslog ( LOG_INFO, "| minor thld: %5.3f <-> %5.3f \n", + group_ptr->t_minor_lower_group, + group_ptr->t_minor_upper_group ); + syslog ( LOG_INFO, "| major thld: %5.3f <-> %5.3f \n", + group_ptr->t_major_lower_group, + group_ptr->t_major_upper_group ); + syslog ( LOG_INFO, "| critical thld: %5.3f <-> %5.3f \n", + group_ptr->t_critical_lower_group, + group_ptr->t_critical_upper_group ); + + syslog ( LOG_INFO, "| unit info : [base:%s] [rate:%s] [modifier:%s]\n", + group_ptr->unit_base_group.c_str(), + group_ptr->unit_rate_group.c_str(), + group_ptr->unit_modifier_group.c_str()); + } + } + else + { + slog ("%s cannot print a NULL group\n", hostname.c_str()); + } +} + +/**************************************************************************** + * + * Name : hwmonSensor_init + * + * Purpose: Initialize a sensor_type struct to default values + * + *****************************************************************************/ +void hwmonSensor_init ( string & hostname , sensor_type * sensor_ptr ) +{ + if ( sensor_ptr ) + { + sensor_ptr->hostname = hostname ; + sensor_ptr->bmc.clear(); + sensor_ptr->uuid.clear(); + sensor_ptr->host_uuid.clear(); + sensor_ptr->group_uuid.clear(); + sensor_ptr->sensorname.clear(); + sensor_ptr->sensortype.clear(); + sensor_ptr->datatype = DISCRETE ; /* should really be ANALOG */ + sensor_ptr->suppress = false ; + + sensor_ptr->debounce_count = 0 ; + sensor_ptr->want_debounce_log_if_ok = false ; + sensor_ptr->algorithm = "debounce-1.v1" ; + + sensor_ptr->status = "offline" ; + sensor_ptr->state = "disabled" ; + + sensor_ptr->script.clear(); + sensor_ptr->path.clear() ; + sensor_ptr->entity_path.clear(); + + sensor_ptr->unit_base.clear(); + sensor_ptr->unit_rate.clear(); + sensor_ptr->unit_modifier.clear(); + + sensor_ptr->prot = PROTOCOL__NONE ; + sensor_ptr->kind = SENSOR_KIND__NONE ; + sensor_ptr->unit = SENSOR_UNIT__NONE ; + + sensor_ptr->actions_minor = HWMON_ACTION_IGNORE ; + sensor_ptr->actions_major = HWMON_ACTION_LOG ; + sensor_ptr->actions_critl = HWMON_ACTION_ALARM ; + + sensor_ptr->t_minor_lower = 1.000 ; + sensor_ptr->t_major_lower = 5.000 ; + sensor_ptr->t_critical_lower = 10.000 ; + sensor_ptr->t_minor_upper = 1.000 ; + sensor_ptr->t_major_upper = 5.000 ; + sensor_ptr->t_critical_upper = 10.000 ; + + /* PATCHBACK - should patchback to REL3 and earlier */ + sensor_ptr->severity = + sensor_ptr->sample_severity = HWMON_SEVERITY_GOOD ; + + sensor_ptr->sample_status = + sensor_ptr->sample_status_last = "ok" ; + + sensor_ptr->updated = false ; + sensor_ptr->found = false ; + sensor_ptr->degraded = false ; + sensor_ptr->alarmed = false ; + + sensor_ptr->not_found_log_throttle = 0 ; + sensor_ptr->not_updated_status_change_count = 0 ; + + clear_logged_state ( sensor_ptr ); + clear_ignored_state ( sensor_ptr ); + clear_alarmed_state ( sensor_ptr ); + + } + else + { + slog ("%s cannot init a NULL sensor\n", hostname.c_str()); + } +} + +/**************************************************************************** + * + * Name : hwmonGroup_init + * + * Purpose: Initialize a sensor_group_type struct to default values + * + *****************************************************************************/ +void hwmonGroup_init ( string & hostname , struct sensor_group_type * group_ptr ) +{ + if ( group_ptr ) + { + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr(); + + group_ptr->hostname = hostname ; + group_ptr->group_name.clear(); + group_ptr->host_uuid.clear(); + group_ptr->group_uuid.clear(); + group_ptr->sensortype.clear(); + group_ptr->datatype = DISCRETE ; /* should really be ANALOG */ + + group_ptr->status.clear(); + group_ptr->unit_base_group.clear(); + group_ptr->unit_rate_group.clear(); + group_ptr->unit_modifier_group.clear(); + group_ptr->path = "na" ; + group_ptr->sensor_labels.clear(); + group_ptr->sensor_read_index = 0 ; + group_ptr->group_interval = HWMON_DEFAULT_AUDIT_INTERVAL ; + + /* Number of sensors in this group followed by an + * array of those sensor pointers + * All this is initied here */ + group_ptr->sensors = 0 ; + for ( int i = 0 ; i < MAX_HOST_SENSORS ; i++ ) + group_ptr->sensor_ptr[i] = NULL ; + + group_ptr->actions_critical_choices = HWMON_ACTION_IGNORE ; + group_ptr->actions_critical_choices.append(","); + group_ptr->actions_critical_choices.append(HWMON_ACTION_LOG); + group_ptr->actions_critical_choices.append(","); + group_ptr->actions_critical_choices.append(HWMON_ACTION_ALARM); + + /* Don't support reset and power cycle in AIO simplex mode */ + if ( obj_ptr->system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX ) + { + group_ptr->actions_critical_choices.append(","); + group_ptr->actions_critical_choices.append(HWMON_ACTION_RESET); + group_ptr->actions_critical_choices.append(","); + group_ptr->actions_critical_choices.append(HWMON_ACTION_POWERCYCLE); + } + + group_ptr->actions_major_choices = HWMON_ACTION_IGNORE ; + group_ptr->actions_major_choices.append(","); + group_ptr->actions_major_choices.append(HWMON_ACTION_LOG); + group_ptr->actions_major_choices.append(","); + group_ptr->actions_major_choices.append(HWMON_ACTION_ALARM); + + group_ptr->actions_minor_choices = HWMON_ACTION_IGNORE ; + group_ptr->actions_minor_choices.append(","); + group_ptr->actions_minor_choices.append(HWMON_ACTION_LOG); + group_ptr->actions_minor_choices.append(","); + group_ptr->actions_minor_choices.append(HWMON_ACTION_ALARM); + + group_ptr->suppress = false ; + group_ptr->algorithm = "debounce-1.v1" ; + group_ptr->group_state = "disabled" ; + + group_ptr->active = false ; + group_ptr->timeout = false ; + group_ptr->failed = false ; + group_ptr->alarmed = false ; + + group_ptr->actions_minor_group = HWMON_ACTION_IGNORE ; + group_ptr->actions_major_group = HWMON_ACTION_LOG ; + group_ptr->actions_critl_group = HWMON_ACTION_ALARM ; + + group_ptr->t_minor_lower_group = 1.000 ; + group_ptr->t_major_lower_group = 5.000 ; + group_ptr->t_critical_lower_group = 10.000 ; + group_ptr->t_minor_upper_group = 1.000 ; + group_ptr->t_major_upper_group = 5.000 ; + group_ptr->t_critical_upper_group = 10.000 ; + } + else + { + slog ("%s cannot print a NULL group\n", hostname.c_str()); + } +} + +/* ************************************************************************* + * + * Name : load_profile_groups + * + * Description: Load all the sensor groups from the profile file. + * + * Scope : private hwmonHostClass + * + * Parameters : host_ptr , a pointer to a group sensor array with + * up to MAX_HOST_GROUPS elements + * + * Returns : The number of groups that were loaded + * + * *************************************************************************/ +int hwmonHostClass::load_profile_groups ( struct hwmonHostClass::hwmon_host * host_ptr, + struct sensor_group_type * group_array_ptr, + int max , bool & error ) +{ + int groups_found = 0 ; + + error = false ; + + if ( ( max <= MAX_HOST_GROUPS ) && ( group_array_ptr ) ) + { + int rc ; + + string sensor_group_types = "" ; + std::list sensor_group_types_list ; + std::list::iterator types_iter_ptr ; + + /* get the top level group types from the profile file + * in the SERVER:group_types heading , i.e. TEMPERATURE */ + rc = ini_get_config_value ( QUANTA_SENSOR_PROFILE_FILE, + "SERVER", "group_types", sensor_group_types , false ); + if ( rc ) /* handle error case */ + { + elog ("%s failed to find '[SERVER] -> 'group_types' label in %s file\n", + host_ptr->hostname.c_str(), QUANTA_SENSOR_PROFILE_FILE); + return rc ; + } + + /* Start with a fresh group type list and load the detected group_types into it */ + sensor_group_types_list.clear(); + rc = get_delimited_list ( sensor_group_types, DELIMITER , sensor_group_types_list, true ) ; + + /* Note: badly parsed group_types , sensors are skipped over with error messages logged + * + * TODO: keep track of an error and raise the SENSORCFG Alarm + * + **/ + /* loop over each 'group_type' looking for the 'groups' */ + for ( types_iter_ptr = sensor_group_types_list.begin(); + types_iter_ptr != sensor_group_types_list.end() ; + ++types_iter_ptr ) + { + string sensor_groups = "" ; + std::list sensor_groups_list ; + + daemon_signal_hdlr (); + + dlog ("%s [%s]\n", host_ptr->hostname.c_str(), types_iter_ptr->c_str()); + + /* get the 'groups' within this 'group type' + * in the [[SERVER]:group_type] -> 'groups' heading, i.e. TEMPERATURE1 */ + rc = ini_get_config_value ( QUANTA_SENSOR_PROFILE_FILE, + *types_iter_ptr, + "groups", + sensor_groups, false ); + if ( rc ) + { + elog ("%s '%s' group type parse error ... skipping (%d)\n", + host_ptr->hostname.c_str(), types_iter_ptr->c_str(), rc ); + error = true ; + continue ; + } + + /* get the list of groups from the []:groups label */ + sensor_groups_list.clear(); + rc = get_delimited_list ( sensor_groups, DELIMITER , sensor_groups_list, true ) ; + if ( rc ) + { + elog ("%s '%s' failed to get group type list ... skipping (%d)\n", + host_ptr->hostname.c_str(), types_iter_ptr->c_str(), rc); + + elog ("%s '%s' error string: %s\n", host_ptr->hostname.c_str(), + types_iter_ptr->c_str(), sensor_groups.c_str()); + + error = true ; + continue ; + } + + /****************************************************************** + * Look for the 'groups' + *****************************************************************/ + std::list::iterator groups_iter_ptr ; + + dlog2 ("%s groups list: %s\n", host_ptr->hostname.c_str(), + sensor_groups.c_str()); + + for ( groups_iter_ptr = sensor_groups_list.begin(); + groups_iter_ptr != sensor_groups_list.end() ; + ++groups_iter_ptr ) + { + /* Start from a fresh default */ + hwmonGroup_init ( host_ptr->hostname, group_array_ptr ); + + daemon_signal_hdlr (); + + /* Get the group name for each group */ + group_array_ptr->group_name.clear() ; + rc = ini_get_config_value ( QUANTA_SENSOR_PROFILE_FILE, + *groups_iter_ptr, "group", + group_array_ptr->group_name, false ); + if ( rc ) + { + elog ("%s '%s' group parse error ... skipping (%d)\n", + host_ptr->hostname.c_str(), + groups_iter_ptr->c_str(), rc ); + + error = true ; + continue ; + } + + /************************************************************************* + * Read the sensor group attributes + *************************************************************************/ + + /* sensortype */ + rc = ini_get_config_value ( QUANTA_SENSOR_PROFILE_FILE, + *groups_iter_ptr, "sensortype", + group_array_ptr->sensortype, false ); + if ( rc ) + { + elog ("%s 'sensortype' for '%s' sensor group is unknown\n", + host_ptr->hostname.c_str(), + group_array_ptr->group_name.c_str()); + error = true ; + continue ; + } + + /* datatype */ + rc = ini_get_config_value ( QUANTA_SENSOR_PROFILE_FILE, + *groups_iter_ptr, "datatype", + group_array_ptr->datatype, false ); + if ( rc ) + { + elog ("%s 'datatype' for '%s' sensor group is unknown\n", + host_ptr->hostname.c_str(), + group_array_ptr->group_name.c_str()); + error = true ; + continue ; + } + + /* interval */ + string interval ; + rc = ini_get_config_value ( QUANTA_SENSOR_PROFILE_FILE, + *groups_iter_ptr, "interval", + interval , false ); + if ( rc ) + { + elog ("%s 'interval' for '%s' sensor group is unknown\n", + host_ptr->hostname.c_str(), + group_array_ptr->group_name.c_str()); + error = true ; + continue ; + } + group_array_ptr->group_interval = atoi(interval.data()) ; /* seconds */ + + /* group read command (cmd) + * This may be over ridden by a sensor specific command + * TODO: add support for multiple commands + **/ + rc = ini_get_config_value ( QUANTA_SENSOR_PROFILE_FILE, + *groups_iter_ptr, "cmd", + group_array_ptr->path, false ); + if ( rc ) + { + elog ("%s 'sensor read command' for '%s' sensor group is unknown ; skipping group\n", + host_ptr->hostname.c_str(), + group_array_ptr->group_name.c_str()); + error = true ; + continue ; + } + + /************************************************************************** + * Get the list of sensors for each group from the profile file + * + * SERVER:group_types + * :groups + * :sensors <------- + **/ + rc = ini_get_config_value ( QUANTA_SENSOR_PROFILE_FILE, + *groups_iter_ptr, "sensors", + group_array_ptr->sensor_labels, false ); + if ( rc ) + { + elog ("%s failed to read 'sensor' list for '%s' sensor group\n", + host_ptr->hostname.c_str(), + group_array_ptr->group_name.c_str()); + error = true ; + continue ; + } + + ilog ("%s '%s' group loaded from profile (%d) (cmd:%s)\n", + host_ptr->hostname.c_str(), + group_array_ptr->group_name.c_str(), + groups_found, group_array_ptr->path.c_str()); + + group_array_ptr++ ; + groups_found++ ; + } + + } /* types_iter_ptr for loop end */ + } + + return (groups_found); +} + +/* ************************************************************************* + * + * Name : load_profile_sensors + * + * Description: Load all the sensors for each group. + * + * Scope : private hwmonHostClass + * + * Parameters : host_ptr , a pointer to a sensor array with + * up to MAX_HOST_SENSORS elements + * + * Returns : The number of sensors that were loaded + * + * *************************************************************************/ +int hwmonHostClass::load_profile_sensors ( struct hwmonHostClass::hwmon_host * host_ptr, + sensor_type * sensor_array_ptr, int max, + bool & error ) +{ + int sensors_found = 0 ; + + if ( ( max <= MAX_HOST_SENSORS ) && ( sensor_array_ptr ) ) + { + std::list sensor_sensors_list ; + std::list::iterator sensors_iter_ptr ; + + string sensor = "" ; + string sensor_read_cmd = "" ; + + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + sensor_sensors_list.clear(); + int rc = get_delimited_list ( host_ptr->group[g].sensor_labels, DELIMITER , sensor_sensors_list, false ); + if ( rc ) + { + elog ("%s '%s' failed to get sensor list ... skipping\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str()); + elog ("%s error string: %s\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].sensor_labels.c_str()); + error = true ; + continue ; + } + + blog ("%s '%s' has %ld sensors (%s)\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + sensor_sensors_list.size(), + host_ptr->group[g].sensor_labels.c_str()); + + for ( sensors_iter_ptr = sensor_sensors_list.begin(); + sensors_iter_ptr != sensor_sensors_list.end() ; + ++sensors_iter_ptr ) + { + sensor.clear(); + + dlog1 ("%s '%s' sensor group label: %s\n", host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + sensors_iter_ptr->c_str()); + + rc = ini_get_config_value ( QUANTA_SENSOR_PROFILE_FILE, + *sensors_iter_ptr, "name", sensor, false ); + if ( rc ) + { + elog ("%s %s '%s' sensor label 'name' parse error ... skipping\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + sensors_iter_ptr->c_str()); + error = true ; + continue ; + } + else + { + hwmonSensor_init ( host_ptr->hostname, sensor_array_ptr ); + + dlog ("%s '%s' group has '%s' sensor\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + sensor.c_str()); + + + /* fetch the sensor specific read command if there is one (cmd) + * maybe_missing parm is true so as to avoid ailing when there is + * no sensor specific command ; which is most of he time */ + sensor_read_cmd.clear(); + ini_get_config_value ( QUANTA_SENSOR_PROFILE_FILE, *sensors_iter_ptr, "cmd", sensor_read_cmd, true ); + if ( sensor_read_cmd.empty() ) + { + sensor_array_ptr->entity_path = sensor ; + } + else + { + sensor_array_ptr->path = sensor_read_cmd ; + host_ptr->group[g].path.clear(); + + sensor_array_ptr->entity_path = sensor_read_cmd ; + sensor_array_ptr->entity_path.append(ENTITY_DELIMITER); + sensor_array_ptr->entity_path.append(sensor); + } + + sensor_array_ptr->sensorname = sensor ; + sensor_array_ptr->sensortype = host_ptr->group[g].sensortype ; + sensor_array_ptr->datatype = host_ptr->group[g].datatype ; + + + /* group uuid is in host_ptr at this point */ + sensor_array_ptr->group_uuid = host_ptr->group[g].group_uuid ; + + if ( host_ptr->group[g].group_uuid.empty() ) + { + wlog ("%s '%s' had empty uuid ; grouping will fail\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str()); + error = true ; + continue ; + + } + sensor_array_ptr++ ; + sensors_found++ ; + } + } + } + } + return (sensors_found); +} + +int hwmonHostClass::delete_unwanted_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = PASS ; + for ( int s = 0 ; s < host_ptr->sensors ; s++ ) + { + if (( host_ptr->sensor[s].sensorname == "PCH Thermal Trip" ) || + ( host_ptr->sensor[s].sensorname == "MB Thermal Trip" ) || + ( host_ptr->sensor[s].sensorname == "Temp_OCP")) + { + ilog ("%s %s sensor is being deleted from sensor model\n", + host_ptr->hostname.c_str(), + host_ptr->sensor[s].sensorname.c_str() ); + + clear_asserted_alarm ( host_ptr->hostname, + HWMON_ALARM_ID__SENSOR, + &host_ptr->sensor[s], + REASON_DEPROVISIONED ); + + hwmonHttp_del_sensor ( host_ptr->hostname, host_ptr->event, host_ptr->sensor[s]); + + rc = RETRY ; + } + } + return (rc); +} + + + +/* ************************************************************************* + * + * Name : hwmon_load_sensors + * + * Description: High level work horse procdure called by the Add FSM to + * + * 1. load all sensors and groups from the database. + * - hwmonHttp_load_sensors + * + * 2. read all sensors and groups from the profile file. + * + * 3. ensure that all the sensors and groups in the profile + * file are in the database and hardware monitor. + * - hwmonHttp_add_sensor (sysinv) + * - add_sensor (hwmon) + * + * 5. verify hat the sensors are read from hwmon correctly. + * - get_sensor (hwmon) + * + * 4. group all sensors + * - group_sensors (hwmon) and sysinv inside + * + * Scope : private hwmonHostClass + * + * Parameters : host_ptr + * + * Returns : TODO: handle modify errors better. + * + * *************************************************************************/ +int hwmonHostClass::hwmon_load_sensors ( struct hwmonHostClass::hwmon_host * host_ptr, bool & error ) +{ + /* + * + * This will load all already provisioned sensors from the sysinv + * database into this host's host_ptr->sensor array. + * + */ + int rc = hwmonHttp_load_sensors ( host_ptr->hostname, host_ptr->event ); + + if ( delete_unwanted_sensors ( host_ptr ) == RETRY ) + { + ilog ("%s reloading sensors list\n", host_ptr->hostname.c_str()); + host_ptr->sensors = 0 ; + rc = hwmonHttp_load_sensors ( host_ptr->hostname, host_ptr->event ); + } + + if ( rc == PASS ) + { + sensor_type sensor_array[MAX_HOST_SENSORS] ; + int profile_sensors = load_profile_sensors ( host_ptr, &sensor_array[0], MAX_HOST_SENSORS, error ); + + ilog ("%s has %d sensors in the profile file\n", + host_ptr->hostname.c_str(), + profile_sensors ); + + /** + * Loop through each profile file sensor and ensure it + * is in the database as well as in this host's control + * structure. if already in the sysinv database then + * don't try and reload it. + **/ + + /** + * TODO:ROBUST: Should have a check for sensors in the + * database that are not in the profile file. + * What to do ? + **/ + for ( int i = 0 ; i < profile_sensors ; i++ ) + { + rc = PASS ; + bool found = false ; + + /* Loop over all the sensors in the database */ + for ( int j = 0 ; j < host_ptr->sensors ; j++ ) + { + daemon_signal_hdlr (); + + if ( !sensor_array[i].entity_path.compare(host_ptr->sensor[j].entity_path) ) + { + found = true ; + break ; + } + } + + /** + * If hwmon does not have it after loading all the + * sensors from the database then handle adding this + * as a new sensor to the sysinv database here. + **/ + if ( found == false ) + { + ilog ( "%s '%s' sensor (add to sysinv)\n", host_ptr->hostname.c_str(), sensor_array[i].sensorname.c_str()); + rc = hwmonHttp_add_sensor ( host_ptr->hostname, host_ptr->event, sensor_array[i] ); + + if ( rc != PASS ) + { + wlog ("%s '%s' sensor add failed (to sysinv) (rc:%d)\n", + host_ptr->hostname.c_str(), + sensor_array[i].sensorname.c_str(), rc); + break ; + } + else + { + sensor_array[i].uuid = host_ptr->event.new_uuid ; + blog1 ("%s '%s' sensor added (to sysinv)\n", + host_ptr->hostname.c_str(), + sensor_array[i].sensorname.c_str()); + } + } + else + { + blog1 ("%s '%s' sensor already provisioned (in sysinv) (rc:%d)\n", + host_ptr->hostname.c_str(), + sensor_array[i].sensorname.c_str(), rc); + } + + /** + * Only add this sensor to hwmon if it was + * successfully added to the sysinv database. + **/ + if (( rc == PASS ) && ( found == false )) + { + // ilog ( "%s '%s' is in %s (add to hwmond)\n", host_ptr->hostname.c_str(), sensor_array[i].sensorname.c_str(), sensor_array[i].group_uuid.c_str()); + rc = add_sensor ( host_ptr->hostname, sensor_array[i] ); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__ADD_SENSOR, host_ptr->hostname )) + rc = FAIL ; +#endif + + if ( rc != PASS ) + { + wlog ("%s '%s' sensor add failed (to hwmond) (rc:%d)\n", + host_ptr->hostname.c_str(), + sensor_array[i].sensorname.c_str(), rc); + break ; + } + else + { + blog1 ("%s '%s' sensor added (to hwmon) uuid:%s\n", + host_ptr->hostname.c_str(), + sensor_array[i].sensorname.c_str(), + sensor_array[i].uuid.c_str()); + } + } + + /* Associate it with its group */ + + /** + * Verify that the sensor was loaded into hwmond + * correctly and assign it to a group + **/ + if (( rc == PASS ) && ( i < host_ptr->sensors )) + { + sensor_type * sensor_ptr = get_sensor ( host_ptr->hostname, sensor_array[i].entity_path ) ; + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__GET_SENSOR, host_ptr->hostname )) + sensor_ptr = NULL ; +#endif + + /* Verify the sensor content */ + if ( sensor_ptr != NULL ) + { + /* Load the sensor with values that are not carried through from the above add */ + sensor_ptr->group_uuid = sensor_array[i].group_uuid ; + + sensor_ptr->degraded = false ; + sensor_ptr->alarmed = false ; + + clear_ignored_state ( sensor_ptr ) ; + clear_logged_state ( sensor_ptr ) ; + + dlog1 ( "%s '%s' is in group %s (hwmon)\n", host_ptr->hostname.c_str(), + sensor_ptr->sensorname.c_str(), + sensor_ptr->group_uuid.c_str()); + + if ( daemon_get_cfg_ptr()->debug_bmgmt > 1 ) + { + hwmonSensor_print ( host_ptr->hostname, sensor_ptr ); + } + } + else + { + wlog ("%s '%s' sensor should have been but was not found (in hwmon)\n", + host_ptr->hostname.c_str(), + sensor_array[i].sensorname.c_str()); + rc = FAIL ; + break ; + } + } + } + + if ( rc == PASS ) + { + /* Group all the sensors into the groups specified by the profile file */ + rc = hwmonHostClass::hwmon_group_sensors ( host_ptr ); + + if ( rc == PASS ) + { + blog ("%s sensors grouped\n", host_ptr->hostname.c_str()); + } + else + { + wlog ("%s sensor grouping failed (rc:%d)\n", host_ptr->hostname.c_str(), rc ); + } + } + } + return (rc); +} + +/* ************************************************************************* + * + * Name : hwmon_load_groups + * + * Description: Read all the sensor groups from the database and profile file. + * Those in the database that match the profile file are loaded + * into hwmon directly. Those found in the profile file but + * missing from the database are added to the database and + * hardware monitor. Assign the each sensor its correct group + * uuid and send sysinv the list of sensor uuids for each group. + * + * Scope : private hwmonHostClass + * + * Parameters : host_ptr + * + * Returns : + * + * *************************************************************************/ +int hwmonHostClass::hwmon_load_groups ( struct hwmonHostClass::hwmon_host * host_ptr , bool & error ) +{ + int rc = hwmonHttp_load_groups ( host_ptr->hostname, host_ptr->event ); + if ( rc == PASS ) + { + struct sensor_group_type group_array[MAX_HOST_GROUPS] ; + + int profile_groups = load_profile_groups ( host_ptr, &group_array[0], MAX_HOST_GROUPS, error ); + + ilog ("%s has %d sensor groups in profile file (%s)\n", + host_ptr->hostname.c_str(), + profile_groups, QUANTA_SENSOR_PROFILE_FILE); + + for ( int i = 0 ; i < profile_groups ; i++ ) + { + rc = PASS ; + bool found = false ; + + daemon_signal_hdlr (); + + for ( int j = 0 ; j < host_ptr->groups ; j++ ) + { + if ( !host_ptr->group[j].group_name.compare(group_array[i].group_name) ) + { + found = true ; + break ; + } + } + + if ( found == false ) + { + dlog ("%s '%s' group not found, adding (to sysinv/hwmon)\n", + host_ptr->hostname.c_str(), + group_array[i].group_name.c_str()); + + group_array[i].hostname = host_ptr->hostname ; + + rc = hwmonHttp_add_group ( host_ptr->hostname, host_ptr->event, group_array[i] ); + + if ( rc ) + { + wlog ("%s '%s' sensor group add failed [%s:%s] (to sysinv/hwmon) (rc:%d)\n", + group_array[i].hostname.c_str(), + group_array[i].group_name.c_str(), + group_array[i].datatype.c_str(), + group_array[i].sensortype.c_str(), rc ); + break ; + } + else + { + group_array[i].group_uuid = host_ptr->event.new_uuid ; + ilog ("%s '%s' sensor group added [%s:%s] (to sysinv)\n", + group_array[i].hostname.c_str(), + group_array[i].group_name.c_str(), + group_array[i].datatype.c_str(), + group_array[i].sensortype.c_str()); + } + + if ( rc == PASS ) + { + /** + * Only add this sensor to hwmon if it was + * successfully added to the sysinv database. + **/ + if (( rc == PASS ) && ( found == false )) + { + rc = hwmon_add_group ( host_ptr->hostname, group_array[i] ); +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__ADD_GROUP, host_ptr->hostname )) + rc = FAIL ; +#endif + + if ( rc != PASS ) + { + wlog ("%s '%s' sensor group add failed (to hwmon) (rc:%d)\n", + host_ptr->hostname.c_str(), + group_array[i].group_name.c_str(), rc); + break ; + } + else + { + blog1 ("%s '%s' sensor group added (to hwmon) uuid:%s\n", + host_ptr->hostname.c_str(), + group_array[i].group_name.c_str(), + group_array[i].group_uuid.c_str()); + } + } + } + /* error log is already printed */ + } + + /* tack on a few important elements */ + if (( rc == PASS ) && ( i < host_ptr->groups )) + { + struct sensor_group_type * group_ptr = hwmon_get_group ( host_ptr->hostname, group_array[i].group_name ) ; + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HWMON__GET_GROUP, host_ptr->hostname )) + group_ptr = NULL ; +#endif + + /* Verify the sensor content */ + if ( group_ptr != NULL ) + { + /* Add the sensor label list for this group to the host_ptr group. + * This list was fetched and attached to the group array in load_profile_groups. + * Having it prevents the need to parse the profile file again to associate + * the sensors to a group all over again inside load_profile_sensors */ + group_ptr->sensor_labels = group_array[i].sensor_labels ; + + if ( daemon_get_cfg_ptr()->debug_bmgmt > 1 ) + { + hwmonGroup_print ( host_ptr->hostname, group_ptr ); + } + } + else + { + wlog ("%s '%s' sensor group should have been but was not found (in hwmon)\n", + host_ptr->hostname.c_str(), + group_array[i].group_name.c_str()); + rc = FAIL ; + break ; + } + } + } + } + return(rc); +} + +/* ************************************************************************* + * + * Name : hwmon_group_sensors + * + * Description: Assign the each sensor its correct group uuid and send + * sysinv the list of sensor uuids for each group. + * + * To do this we have to loop over the groups 3 times + * 1. init the sensors in each group + * 2. assigned the sensors to the groups ; might not be linear + * 3. create sensor list + * + * Scope : private hwmonHostClass + * + * Parameters : host_ptr + * + * Returns : TODO: handle modify errors better. + * + * *************************************************************************/ + +/* TODO: make this a hardware independed implementation */ +int hwmonHostClass::hwmon_group_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = PASS ; + + string sensor_list = "" ; + + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + host_ptr->group[g].sensors = 0 ; + } + + ilog ("%s has %d sensors across %d groups\n", + host_ptr->hostname.c_str(), + host_ptr->sensors, + host_ptr->groups); + + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + /* search through all the sensors and put them in their correct group */ + for ( int s = 0 ; s < host_ptr->sensors ; ++s ) + { + if ( !host_ptr->group[g].group_uuid.compare(host_ptr->sensor[s].group_uuid)) + { + /* Update this group with the pointers to is sensors */ + host_ptr->group[g].sensor_ptr[host_ptr->group[g].sensors] = &host_ptr->sensor[s] ; + host_ptr->group[g].sensors++ ; + + dlog ("%s '%s' is assigned '%s' (%d)\n", host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + host_ptr->sensor[s].sensorname.c_str(), + host_ptr->group[g].sensors); + } + } + } + + /* Build up the sensor uuid list for each + * group and send it to sysinv */ + for ( int g = 0 ; g < host_ptr->groups ; ++g ) + { + int count = 0 ; + bool first = true ; + + sensor_list.clear(); + + for ( int s = 0 ; s < host_ptr->sensors ; ++s ) + { + daemon_signal_hdlr (); + + /* only add it to the list got this group */ + if ( !host_ptr->group[g].group_uuid.compare(host_ptr->sensor[s].group_uuid) ) + { + /* make sure there is a sensor read command at the group or sensor level */ + if ( host_ptr->group[g].path.empty() && host_ptr->sensor[s].path.empty() ) + { + elog ("%s '%s:%s' no read command for this combo ; ignoring sensor\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + host_ptr->sensor[s].sensorname.c_str()); + elog ("... group cmd:(%s) sensor cmd:(%s)\n", + host_ptr->group[g].path.c_str(), + host_ptr->sensor[s].path.c_str()); + } + else + { + /* Default each sensor to its groups action */ + host_ptr->sensor[s].actions_minor = host_ptr->group[g].actions_minor_group ; + host_ptr->sensor[s].actions_major = host_ptr->group[g].actions_major_group ; + host_ptr->sensor[s].actions_critl = host_ptr->group[g].actions_critl_group ; + + count++ ; + if ( first == false ) + { + sensor_list.append(","); + } + else + { + first = false ; + } + sensor_list.append(host_ptr->sensor[s].uuid); + + if ( count == host_ptr->group[g].sensors ) + break ; + } + } + } + + if ( sensor_list.empty() ) + { + wlog ("%s no sensors found for '%s' group ; should have %d\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + host_ptr->group[g].sensors); + } + else if ( host_ptr->group[g].sensors != count ) + { + wlog ("%s incorrect number of sensors found for '%s' group ; has %d but should have %d\n", + host_ptr->hostname.c_str(), + host_ptr->group[g].group_name.c_str(), + count, + host_ptr->group[g].sensors); + } + else + { + groupSensors_print ( &host_ptr->group[g] ); + rc = hwmonHttp_group_sensors ( host_ptr->hostname, + host_ptr->event, + host_ptr->group[g].group_uuid, + sensor_list ); + } + } + + return (rc); +} + +void handle_new_suppression ( sensor_type * sensor_ptr ) +{ + clear_asserted_alarm ( sensor_ptr->hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr, REASON_SUPPRESSED ); +} + +action_state_type * _get_action_state_ptr ( sensor_type * sensor_ptr, + sensor_severity_enum hwmon_sev ) +{ + if ( sensor_ptr ) + { + if ( hwmon_sev == HWMON_SEVERITY_CRITICAL ) + return ( &sensor_ptr->critl ) ; + else if ( hwmon_sev == HWMON_SEVERITY_MAJOR ) + return ( &sensor_ptr->major ) ; + else if ( hwmon_sev == HWMON_SEVERITY_MINOR ) + return ( &sensor_ptr->minor ) ; + } + slog ("invalid parms (%p:%s)\n", sensor_ptr, get_severity(hwmon_sev).c_str()); + return (NULL); +} + +int _update_group_action ( struct sensor_group_type * group_ptr, sensor_severity_enum hwmon_sev, string new_action ) +{ + if ( hwmon_sev == HWMON_SEVERITY_CRITICAL ) + group_ptr->actions_critl_group = new_action ; + else if ( hwmon_sev == HWMON_SEVERITY_MAJOR ) + group_ptr->actions_major_group = new_action ; + else + group_ptr->actions_minor_group = new_action ; + + return (PASS); +} + +int _update_sensor_action ( sensor_type * sensor_ptr, sensor_severity_enum hwmon_sev, string new_action ) +{ + if ( hwmon_sev == HWMON_SEVERITY_CRITICAL ) + { + dlog ("%s %s critical action update '%s' -> '%s'\n", + sensor_ptr->hostname.c_str(), + sensor_ptr->sensorname.c_str(), + sensor_ptr->actions_critl.c_str(), + new_action.c_str()); + sensor_ptr->actions_critl = new_action ; + } + else if ( hwmon_sev == HWMON_SEVERITY_MAJOR ) + { + dlog ("%s %s major action update '%s' -> '%s'\n", + sensor_ptr->hostname.c_str(), + sensor_ptr->sensorname.c_str(), + sensor_ptr->actions_major.c_str(), + new_action.c_str()); + sensor_ptr->actions_major = new_action ; + } + else + { + dlog ("%s %s minor action update '%s' -> '%s'\n", + sensor_ptr->hostname.c_str(), + sensor_ptr->sensorname.c_str(), + sensor_ptr->actions_minor.c_str(), + new_action.c_str()); + sensor_ptr->actions_minor = new_action ; + } + return (PASS); +} + + +bool severity_match ( EFmAlarmSeverityT fm_severity, sensor_severity_enum hwmon_sev ) +{ + if ((( fm_severity == FM_ALARM_SEVERITY_MINOR ) && ( hwmon_sev == HWMON_SEVERITY_MINOR )) || + (( fm_severity == FM_ALARM_SEVERITY_MAJOR ) && ( hwmon_sev == HWMON_SEVERITY_MAJOR )) || + (( fm_severity == FM_ALARM_SEVERITY_CRITICAL ) && ( hwmon_sev == HWMON_SEVERITY_CRITICAL )) || + (( fm_severity == FM_ALARM_SEVERITY_CRITICAL ) && ( hwmon_sev == HWMON_SEVERITY_NONRECOVERABLE ))) + return (true); + return (false); +} + + +int _manage_action_change ( string hostname, + struct sensor_group_type * group_ptr, + string cur_action, + string new_action, + sensor_severity_enum hwmon_sev, + EFmAlarmSeverityT fm_severity ) +{ + + bool new_action__log = false ; + bool new_action__alarm = false ; + bool new_action__ignore = false ; + bool new_action__reset = false ; + bool new_action__powercycle = false ; + + bool cur_action__log = false ; + bool cur_action__alarm = false ; + bool cur_action__ignore = false ; + bool cur_action__reset = false ; + bool cur_action__powercycle = false ; + + if ( is_ignore_action ( new_action )) new_action__ignore = true ; + if ( is_log_action ( new_action )) new_action__log = true ; + if ( is_alarm_action ( new_action )) new_action__alarm = true ; + if ( is_reset_action ( new_action )) new_action__reset = true ; + if ( is_powercycle_action ( new_action )) new_action__powercycle = true ; + + if ( is_alarm_action ( cur_action )) cur_action__alarm = true ; + if ( is_ignore_action ( cur_action )) cur_action__ignore = true ; + if ( is_log_action ( cur_action )) cur_action__log = true ; + if ( is_reset_action ( cur_action )) cur_action__reset = true ; + if ( is_powercycle_action ( cur_action )) cur_action__powercycle = true ; + + + /* TODO: change these return codes to PASS once we know we don't get them */ + if (( new_action__log && cur_action__log ) || + ( new_action__alarm && cur_action__alarm ) || + ( new_action__ignore && cur_action__ignore ) || + ( new_action__reset && cur_action__reset ) || + ( new_action__powercycle && cur_action__powercycle )) + { + elog ("%s null '%s' sensor group action change for severity '%s' ; no action (%s to %s)\n", + hostname.c_str(), + group_ptr->group_name.c_str(), + get_severity(hwmon_sev).c_str(), + cur_action.c_str(), + new_action.c_str()); + return (FAIL_INVALID_OPERATION); + } + + ilog ("%s modifying '%s' sensor group '%s' action from '%s' to '%s'\n", + hostname.c_str(), + group_ptr->group_name.c_str(), + get_severity(hwmon_sev).c_str(), + cur_action.c_str(), + new_action.c_str()); + + /********************************************************************* + * There are 5 possible actions + * + * - alarm + * - log + * - ignore + * - reset + * - powercycle + * + * Any action can be changed to any other action + * Checks above ensure that no null action changes make it here + * + *********************************************************************/ + string reason = get_severity(hwmon_sev) + " severity level action "; + + if ( new_action__alarm ) reason.append(REASON_SET_TO_ALARM); + else if ( new_action__log ) reason.append(REASON_SET_TO_LOG); + else if ( new_action__ignore ) reason.append(REASON_SET_TO_IGNORE); + else if ( new_action__reset ) reason.append(REASON_SET_TO_RESET); + else if ( new_action__powercycle ) reason.append(REASON_SET_TO_POWERCYCLE); + + /* ... now all the sensors in that group */ + for ( int i = 0 ; i < group_ptr->sensors ; i++ ) + { + daemon_signal_hdlr(); + + sensor_type * sensor_ptr = group_ptr->sensor_ptr[i] ; + + if ( sensor_ptr->group_uuid != group_ptr->group_uuid ) + { + slog ("%s %s group:sensor uuid mismatch ; auto correcting\n", + hostname.c_str(), + sensor_ptr->sensorname.c_str() ); + + slog ("%s ... group:sensor [%s:%s]\n", + hostname.c_str(), + group_ptr->group_uuid.c_str(), + sensor_ptr->group_uuid.c_str()); + + sensor_ptr->group_uuid = group_ptr->group_uuid ; + } + + /* Only run change handling when the severity matches current status */ + if ( !severity_match ( fm_severity, sensor_ptr->severity ) ) + { + /* but we still need to update the action for each sensor */ + _update_sensor_action ( sensor_ptr, hwmon_sev, new_action ); + continue ; + } + +// string severity = get_severity(sensor_ptr->severity); + + /* get correct action state bools */ + action_state_type * action_state_ptr = _get_action_state_ptr ( sensor_ptr, hwmon_sev ); + if ( action_state_ptr == NULL ) + { + slog ("%s %s has invalid action state %d\n", + hostname.c_str(), + sensor_ptr->sensorname.c_str(), + hwmon_sev ); + + return (FAIL_INVALID_DATA); + } + + hwmonLog_clear ( hostname, HWMON_ALARM_ID__SENSORGROUP, group_ptr->group_name, reason ); + + /* Handle Action change away from ............. ALARM */ + if ( cur_action__alarm ) + { + /************************************************************* + * + * From 'alarm' to 'log' case + * -------------------------- + * + * If alarm is asserted then clear it in favor of a log + * + *************************************************************/ + if ( new_action__log ) + { + dlog ("%s %s action change from 'alarm' to 'log'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* cleanup and garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + /* clear the current alarm if it exists */ + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason ) ; + } + + /* Produce a log if the sensor is reporting error status */ + if (( sensor_ptr->suppress == false ) && + ( sensor_ptr->status.compare("ok")) && + ( sensor_ptr->status.compare("offline"))) + { + hwmonLog ( hostname, HWMON_ALARM_ID__SENSOR, fm_severity, sensor_ptr->sensorname, REASON_OOT ); + set_logged_severity ( sensor_ptr , fm_severity ); + } + } + + /************************************************************* + * + * From 'alarm' to 'reset' + * ---------------------------- + * + * If alarm is asserted then clear it and let it get + * generated again in the handler if the severity condition + * persists. + * + *************************************************************/ + else if ( new_action__reset ) + { + dlog ("%s %s action change from 'alarm' to 'reset'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* cleanup and garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + /* clear the current alarm if it exists */ + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason ) ; + } + } + + /************************************************************* + * + * From 'alarm' to 'powercycle' + * ---------------------------- + * + * If alarm is asserted then clear it and let it get + * generated again in the handler if the severity condition + * persists. + * + *************************************************************/ + else if ( new_action__powercycle ) + { + dlog ("%s %s action change from 'alarm' to 'powercycle'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* cleanup and garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + /* clear the current alarm if it exists */ + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason ) ; + } + } + + /************************************************************* + * + * From 'alarm' to 'ignore' case + * ----------------------------- + * + * If alarm is asserted then clear it. + * + *************************************************************/ + else /* ignore as default case */ + { + dlog ("%s %s action change from 'alarm' to 'ignore'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* cleanup and garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + /* clear the current alarm if it exists */ + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason) ; + set_ignored_severity ( sensor_ptr, fm_severity ); + } + } + /* Clear alarm and degrade state */ + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + } + + + /* Handle Action change away from ............. LOG */ + else if ( cur_action__log ) + { + /* Do auto correction / garbage collection */ + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + clear_ignored_state ( sensor_ptr ); + + /************************************************************** + * + * From 'log' -> 'alarm' case + * -------------------------- + * + * If it was a log action and was logged and is now alarm + * action then send a log indicating that the current log is + * cleared. + * + * Allow the alarm to get raised on the next status reading + * + **************************************************************/ + if ( new_action__alarm ) + { + dlog ("%s %s action change from 'log' to 'alarm'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + if ( action_state_ptr->logged == true ) + { + hwmonLog_clear ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + } + } + + /********************************************************** + * + * From 'log' to 'reset' case + * -------------------------- + * + * Allow the alarm to get raised on the next status reading + * + *********************************************************/ + else if ( new_action__reset ) + { + dlog ("%s %s action change from 'log' to 'reset'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + if ( action_state_ptr->logged == true ) + { + hwmonLog_clear ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + } + } + + /********************************************************** + * + * From 'log' to 'powercycle' case + * ------------------------------- + * + * Allow the alarm to get raised on the next status reading + * + *********************************************************/ + else if ( new_action__powercycle ) + { + dlog ("%s %s action change from 'log' to 'powercycle'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + if ( action_state_ptr->logged == true ) + { + hwmonLog_clear ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + } + } + + /********************************************************** + * + * From 'log -> 'ignore' case + * --------------------------- + * + * If it was a log action and was logged and is now ignore + * then send a log indicating that it is now ignored. + * + ***********************************************************/ + else /* ignore as default case */ + { + dlog ("%s %s action change from 'log' to 'ignore'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + if ( action_state_ptr->logged == true ) + { + hwmonLog_clear ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason ); + set_ignored_severity ( sensor_ptr, fm_severity ); + } + } + clear_logged_state ( sensor_ptr ); + } + + + /* Handle Action change away from ............. IGNORE */ + else if ( cur_action__ignore ) + { + /* Do auto correction / garbage collection */ + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + + /************************************************************** + * + * From 'ignore' to 'alarm' case + * ----------------------------- + * + * If was ignore and is now alarm then just take it out of + * ignore and let the alarm get raised on the next audit. + * + **************************************************************/ + if ( new_action__alarm ) + { + dlog ("%s %s action change from 'ignore' to 'alarm'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* additional garbage collection and cleanup */ + clear_logged_state ( sensor_ptr ); + + /* take it out of ignore state */ + clear_ignored_state ( sensor_ptr ); + + /* do nothing and allow the sensor event handler to act */ + } + + /************************************************************** + * + * From 'ignore' to 'reset' case + * ---------------------------------- + * + **************************************************************/ + else if ( new_action__reset ) + { + dlog ("%s %s action change from 'ignore' to 'reset'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* additional garbage collection and cleanup */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + /* do nothing and allow the sensor event handler to act */ + } + + /************************************************************** + * + * From 'ignore' to 'powercycle' case + * ---------------------------------- + * + **************************************************************/ + else if ( new_action__powercycle ) + { + dlog ("%s %s action change from 'ignore' to 'powercycle'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* additional garbage collection and cleanup */ + clear_logged_state ( sensor_ptr ); + + /* take it out of ignore state */ + clear_ignored_state ( sensor_ptr ); + + /* do nothing and allow the sensor event handler to act */ + } + + /************************************************************** + * + * From 'ignore' to 'log' case + * ----------------------------- + * + * If was ignore then raise log if status is not ok or offline + * + ***************************************************************/ + else /* log as default case */ + { + dlog ("%s %s action change from 'ignore' to 'log'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + clear_ignored_state ( sensor_ptr ); + + /* take it out of logged state */ + clear_logged_state ( sensor_ptr ); + + /* Produce a log if the sensor is reporting error status */ + if (( sensor_ptr->suppress == false ) && + ( sensor_ptr->status.compare("ok")) && + ( sensor_ptr->status.compare("offline"))) + { + hwmonLog ( hostname, HWMON_ALARM_ID__SENSOR, fm_severity, sensor_ptr->sensorname, REASON_OOT ); + set_logged_severity ( sensor_ptr , fm_severity ); + } + } + } + + /* Handle Action change away from ............. RESET */ + else if ( cur_action__reset ) + { + /************************************************************* + * + * From 'reset' to 'alarm' case + * ------------------------------- + * + * If alarm is asserted then clear it only to allow the + * it to be raised in the handler by failed severity status. + * + *************************************************************/ + if ( new_action__alarm ) + { + dlog ("%s %s action change from 'reset' to 'alarm'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* cleanup and garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + /* clear the current alarm if it exists */ + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason ) ; + } + + /* Clear alarm and degrade state */ + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + } + + /************************************************************* + * + * From 'reset' to 'powercycle' case + * --------------------------------- + * + * If alarm is asserted then clear it only to allow the + * powercycle case alarm to be raised in the handler. + * + *************************************************************/ + else if ( new_action__powercycle ) + { + dlog ("%s %s action change from 'reset' to 'powercycle'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* cleanup and garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + /* clear the current alarm if it exists */ + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason ) ; + } + + /* Clear alarm and degrade state */ + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + } + + /************************************************************* + * + * From 'reset' to 'log' case + * -------------------------- + * + * Clear the reset alarm if it is raised and set the + * corresponding log. + * + *************************************************************/ + else if ( new_action__log ) + { + dlog ("%s %s action change from 'reset' to 'log'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* do garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason ) ; + } + + /* Produce a log if the sensor is reporting error status */ + if (( sensor_ptr->suppress == false ) && + ( sensor_ptr->status.compare("ok")) && + ( sensor_ptr->status.compare("offline"))) + { + hwmonLog ( hostname, HWMON_ALARM_ID__SENSOR, fm_severity, sensor_ptr->sensorname, REASON_OOT ); + set_logged_severity ( sensor_ptr , fm_severity ); + } + + /* Clear alarm and degrade state if Do auto correction / garbage collection */ + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + } + + + /************************************************************* + * + * From 'reset' to 'ignore' case + * ---------------------------------- + * + * Clear the reset alarm if it is raised + * + *************************************************************/ + else /* ignore as default case */ + { + dlog ("%s %s action change from 'reset' to 'ignore'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* do garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason ) ; + } + + if (( sensor_ptr->suppress == false ) && ( sensor_ptr->status.compare(HWMON_CRITICAL))) + { + set_ignored_severity ( sensor_ptr, fm_severity ); + } + + /* Clear alarm and degrade state if Do auto correction / garbage collection */ + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + } + } + + /* Handle Action change away from ............. POWERCYCLE */ + else if ( cur_action__powercycle ) + { + /************************************************************* + * + * From 'powercycle' to 'alarm' case + * --------------------------------- + * + * If alarm is asserted then clear it only to allow it to + * be raised in the handler if the status persists. + * + *************************************************************/ + if ( new_action__alarm ) + { + dlog ("%s %s action change from 'powercycle' to 'alarm'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* cleanup and garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + /* clear the current alarm if it exists */ + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason ) ; + } + + /* Clear alarm and degrade state */ + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + } + + /************************************************************* + * + * From 'powercycle' to 'log' case + * ------------------------------- + * + * If alarm is asserted then clear it in favor of a log + * + *************************************************************/ + else if ( new_action__log ) + { + dlog ("%s %s action change from 'powercycle' to 'log'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* cleanup and garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + /* clear the current alarm if it exists */ + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason ) ; + } + + /* Produce a log if the sensor is reporting error status */ + if (( sensor_ptr->suppress == false ) && + ( sensor_ptr->status.compare("ok")) && + ( sensor_ptr->status.compare("offline"))) + { + hwmonLog ( hostname, HWMON_ALARM_ID__SENSOR, fm_severity, sensor_ptr->sensorname, REASON_OOT ); + set_logged_severity ( sensor_ptr , fm_severity ); + } + /* Clear alarm and degrade state */ + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + } + + /************************************************************* + * + * From 'powercycle' to 'reset' case + * ------------------------------- + * + * If alarm is asserted then clear it only to allow the reset + * case alarm to be raised in the handler if the status + * persists as critical. + * + *************************************************************/ + else if ( new_action__reset ) + { + dlog ("%s %s action change from 'powercycle' to 'reset'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* cleanup and garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + /* clear the current alarm if it exists */ + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason ) ; + } + + /* Clear alarm and degrade state */ + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + } + + /************************************************************* + * + * From 'powercycle' to 'ignore' case + * ---------------------------------- + * + * If alarm is asserted then clear it. + * + *************************************************************/ + else /* ignore as default case */ + { + dlog ("%s %s action change from 'powercycle' to 'ignore'\n", hostname.c_str(), sensor_ptr->sensorname.c_str()); + + /* cleanup and garbage collection */ + clear_ignored_state ( sensor_ptr ); + clear_logged_state ( sensor_ptr ); + + /* clear the current alarm if it exists */ + if ( action_state_ptr->alarmed == true ) + { + clear_severity_alarm ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, fm_severity, reason ) ; + } + + if (( sensor_ptr->suppress == false ) && ( sensor_ptr->status.compare(HWMON_CRITICAL))) + { + set_ignored_severity ( sensor_ptr, fm_severity ); + } + + /* Clear alarm and degrade state */ + clear_alarmed_state ( sensor_ptr ); + clear_degraded_state ( sensor_ptr ); + } + } + else + { + elog ("%s no '%s' sensor group action change for severity '%s' ; no action (%s to %s)\n", + hostname.c_str(), + group_ptr->group_name.c_str(), + get_severity(hwmon_sev).c_str(), + cur_action.c_str(), + new_action.c_str()); + + return (FAIL_INVALID_OPERATION) ; + } + _update_sensor_action ( sensor_ptr, hwmon_sev, new_action ); + } + _update_group_action ( group_ptr, hwmon_sev, new_action ); + + return (PASS); +} + +/* ************************************************************************* + * + * Name : group_modify + * + * Description: For a limited number of attributes, modify each sensor + * that is part of the specified group for that attribute. + * + * Modifiable attributes are: + * + * suppress -> suppress + * audit_interval_group -> audit_interval + * actions_minor_group -> actions_minor + * actions_major_group -> actions_major + * actions_critical_group -> actions_critical + * + * Scope : public hwmonHostClass + * + * Assumptions: action (as value) has already been verified by calling procedure + * + * Returns : PASS, FAIL or FAIL_... + * + * *************************************************************************/ + +/* TODO:FEATURE: manage alarms when the actions are changed */ + +int hwmonHostClass::group_modify ( string hostname, string group_uuid, string key, string value ) +{ + int rc = PASS ; + + if ( ( !group_uuid.empty() ) && ( !hostname.empty()) ) + { + hwmonHostClass::hwmon_host * host_ptr ; + host_ptr = hwmonHostClass::getHost ( hostname ); + if ( host_ptr != NULL ) + { + sensor_group_type * group_ptr = NULL ; + + for ( int i = 0 ; i < host_ptr->groups ; i++ ) + { + if ( !host_ptr->group[i].group_uuid.compare(group_uuid) ) + { + group_ptr = &host_ptr->group[i] ; + break ; + } + } + + if ( group_ptr == NULL ) + { + slog ("%s '%s' group not found value:%s (uuid:%s) \n", + hostname.c_str(), + key.c_str(), + value.c_str(), + group_uuid.substr(0,8).c_str()); + + return (FAIL_NOT_FOUND); + } + + /* Look for Suppression Modify */ + if ( !key.compare("suppress") ) + { + /* modify the group suppression */ + if ( ( value.compare("True") ) && ( value.compare("true") )) + { + hwmonLog ( hostname, HWMON_ALARM_ID__SENSORGROUP, FM_ALARM_SEVERITY_CLEAR, group_ptr->group_name, REASON_UNSUPPRESSED ); + group_ptr->suppress = false ; + } + else + { + hwmonLog ( hostname, HWMON_ALARM_ID__SENSORGROUP, FM_ALARM_SEVERITY_CLEAR, group_ptr->group_name, REASON_SUPPRESSED ); + group_ptr->suppress = true ; + } + ilog ("%s '%s' sensor group is '%ssuppressed'\n", + host_ptr->hostname.c_str(), + group_ptr->group_name.c_str(), + group_ptr->suppress ? "" : "un"); + + /* ... now all the sensors in that group */ + for ( int s = 0 ; s < group_ptr->sensors ; s++ ) + { + /* modify all sensors to match the group suppression state */ + sensor_type * sensor_ptr = group_ptr->sensor_ptr[s] ; + if ( sensor_ptr->suppress != group_ptr->suppress ) + { + if ( group_ptr->suppress == true ) + { + handle_new_suppression ( sensor_ptr ); + } + else + { + manage_sensor_state ( hostname, sensor_ptr, get_severity(sensor_ptr->status)); + } + sensor_ptr->suppress = group_ptr->suppress ; + } + } + } + + /* Look for Audit Interval Modify */ + if ( !key.compare("audit_interval_group") ) + { + hwmonHostClass * obj_ptr = get_hwmonHostClass_ptr() ; + + int interval = atoi(value.data()); + + if ( interval < HWMON_MIN_AUDIT_INTERVAL ) + { + wlog ("%s invalid audit interval (%d:%s)\n", hostname.c_str(), interval, value.c_str()); + return (FAIL_INVALID_DATA); + } + + /* modify the group interval */ + /* This just sets a flag so that the audit interval + * group changes sent back to sysinv are done at base level + * rather that inside this http request, which would create + * a deadlock */ + obj_ptr->modify_audit_interval ( hostname, interval ); + } + + /* Look for Critical Action Group Modify */ + if ( !key.compare("actions_critical_group") ) + { + sensor_severity_enum hwmon_sev = HWMON_SEVERITY_CRITICAL ; + EFmAlarmSeverityT fm_severity = FM_ALARM_SEVERITY_CRITICAL ; + + string cur_action = group_ptr->actions_critl_group ; + string new_action = value ; + + rc = _manage_action_change ( hostname, group_ptr, cur_action, new_action, hwmon_sev, fm_severity ); +#ifdef WANT_MANAGE_SENSOR_STATE_ON_ACTION_CHANGE + /* force evaluation of all sensors in this group */ + for ( int i = 0 ; i < group_ptr->sensors ; i++ ) + { + sensor_severity_enum sev = get_severity(group_ptr->sensor_ptr[i]->status) ; + if ( sev == HWMON_SEVERITY_CRITICAL ) + { + manage_sensor_state ( hostname, group_ptr->sensor_ptr[i], sev ); + } + } +#endif + } + + /* Look for Major Action Group Modify */ + if ( !key.compare("actions_major_group") ) + { + sensor_severity_enum hwmon_sev = HWMON_SEVERITY_MAJOR ; + EFmAlarmSeverityT fm_severity = FM_ALARM_SEVERITY_MAJOR ; + + string cur_action = group_ptr->actions_major_group ; + string new_action = value ; + + rc = _manage_action_change ( hostname, group_ptr, cur_action, new_action, hwmon_sev, fm_severity ); + +#ifdef WANT_MANAGE_SENSOR_STATE_ON_ACTION_CHANGE + /* force evaluation of all sensors in this group */ + for ( int i = 0 ; i < group_ptr->sensors ; i++ ) + { + sensor_severity_enum sev = get_severity(group_ptr->sensor_ptr[i]->status) ; + if ( sev == HWMON_SEVERITY_MAJOR ) + { + manage_sensor_state ( hostname, group_ptr->sensor_ptr[i], sev ); + } + } +#endif + } + + /* Look for Minor Action Group Modify */ + if ( !key.compare("actions_minor_group") ) + { + sensor_severity_enum hwmon_sev = HWMON_SEVERITY_MINOR ; + EFmAlarmSeverityT fm_severity = FM_ALARM_SEVERITY_MINOR ; + + string cur_action = group_ptr->actions_minor_group ; + string new_action = value ; + + rc = _manage_action_change ( hostname, group_ptr, cur_action, new_action, hwmon_sev, fm_severity ); + +#ifdef WANT_MANAGE_SENSOR_STATE_ON_ACTION_CHANGE + /* force evaluation of all sensors in this group */ + for ( int i = 0 ; i < group_ptr->sensors ; i++ ) + { + sensor_severity_enum sev = get_severity(group_ptr->sensor_ptr[i]->status) ; + if ( sev == HWMON_SEVERITY_MINOR ) + { + manage_sensor_state ( hostname, group_ptr->sensor_ptr[i], sev ); + } + } +#endif + } + monitor_now ( host_ptr ); + } + else + { + elog ("%s hostname is unknown\n", hostname.c_str()); + rc = FAIL_UNKNOWN_HOSTNAME ; + } + } + else + { + slog ("empty hostname or group uuid\n"); + rc = FAIL_STRING_EMPTY ; + } + return (rc); +} + +/***************************************************************************** + * + * Name : ipmi_create_sensors + * + * Description: Add sample sensors to the sysinv database. + * + *****************************************************************************/ + +int hwmonHostClass::ipmi_create_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = PASS ; + int sensor_errors = 0 ; + host_ptr->sensors = 0 ; + + for ( int s = 0 ; s < host_ptr->samples ; ++s ) + { + string sensortype = ipmi_get_grouptype ( host_ptr->hostname, + host_ptr->sample[s].unit, + host_ptr->sample[s].name); + +#ifdef WANT_FIT_TESTING + /* sysinv does not allow adding a sensor with no type ; will reject with a 400 */ + if ( daemon_want_fit ( FIT_CODE__HWMON__BAD_SENSOR, host_ptr->hostname, host_ptr->sample[s].name)) + sensortype = "" ; +#endif + + if ( sensortype.empty() ) + { + if ( ++sensor_errors > MAX_SENSOR_TYPE_ERRORS ) + { + rc = FAIL_STRING_EMPTY ; + elog ("%s '%s' not added ; sample sensor create failed ; too many sensor type errors (rc:%d)\n", + host_ptr->hostname.c_str(), + host_ptr->sample[s].unit.c_str(), rc); + } + else + { + wlog ("%s %s %s %s%s not added ; empty or unsupported type classification\n", + host_ptr->hostname.c_str(), + host_ptr->sample[s].name.c_str(), + host_ptr->sample[s].status.c_str(), + host_ptr->sample[s].unit.c_str(), + host_ptr->sample[s].ignore ? " ignored" : ""); + } + } + else + { + /* add the sensor to hwmon */ + hwmonSensor_init ( host_ptr->hostname, &host_ptr->sensor[host_ptr->sensors] ); + clear_ignored_state (&host_ptr->sensor[host_ptr->sensors]); + clear_alarmed_state (&host_ptr->sensor[host_ptr->sensors]); + clear_logged_state (&host_ptr->sensor[host_ptr->sensors]); + host_ptr->sensor[host_ptr->sensors].sensorname = host_ptr->sample[s].name ; + host_ptr->sensor[host_ptr->sensors].sensortype = sensortype ; + +#ifdef WANT_FIT_TESTING + /* sysinv does not allow adding a sensor with no type ; will reject with a 400 */ + if ( daemon_want_fit ( FIT_CODE__HWMON__ADD_SENSOR, host_ptr->hostname, host_ptr->sample[s].name)) + host_ptr->sensor[host_ptr->sensors].sensortype = "" ; +#endif + + /* add it to to sysinv */ + if ( ( rc = hwmonHttp_add_sensor ( host_ptr->hostname, + host_ptr->event, + host_ptr->sensor[host_ptr->sensors] )) == PASS ) + { + /* add the sysinv uuid for this sensor to the sensor in hwmon */ + host_ptr->sensor[host_ptr->sensors].uuid = host_ptr->event.new_uuid ; + host_ptr->sensor[host_ptr->sensors].group_enum = host_ptr->sample[s].group_enum ; + // hwmonSensor_print ( host_ptr->hostname, &host_ptr->sensor[host_ptr->sensors] ); + ilog ("%s '%s' sensor added\n", host_ptr->hostname.c_str(), + host_ptr->sensor[host_ptr->sensors].sensorname.c_str()); + host_ptr->sensors++ ; + } + else + { + elog ("%s '%s' sensor add failed (rc:%d)\n", host_ptr->hostname.c_str(), + host_ptr->sensor[host_ptr->sensors].sensorname.c_str(), rc); + hwmonSensor_print ( host_ptr->hostname, &host_ptr->sensor[s] ); + } + } + + if ( rc ) + break ; + + } /* end for loop over sensor samples */ + return (rc); +} + + + +/***************************************************************************** + * + * Name : ipmi_disable_sensors + * + * Purpose : With the introduction of ipmi monitoring, all groups are + * monitored at once. Therefore all should be in the same state. + * + * Description: Set all sensors to specified state. + * If disabled then also set to offline. + * + ******************************************************************************/ + +int hwmonHostClass::ipmi_disable_sensors ( struct hwmonHostClass::hwmon_host * host_ptr ) +{ + int rc = FAIL_NULL_POINTER ; + if ( host_ptr ) + { + rc = PASS ; + + /* don't send requests to sysinv if we are in the middle of + * deleting a host because sysinv has already gotten rid of the + * sensor model */ + if ( host_ptr->host_delete == true ) + return (PASS); + + for ( int s = 0 ; s < host_ptr->sensors ; ++s ) + { + sensor_type * sensor_ptr = &host_ptr->sensor[s] ; + if (( sensor_ptr->state.compare("disabled")) || + ( sensor_ptr->status.compare("offline"))) + { + sensor_ptr->state = "disabled" ; + sensor_ptr->status = "offline" ; + + int status = hwmonHttp_disable_sensor ( host_ptr->hostname, + host_ptr->event, + sensor_ptr->uuid ); + if ( status ) + { + elog ( "%s failed to disable '%s' sensor\n", + host_ptr->hostname.c_str(), + sensor_ptr->sensorname.c_str()); + + if ( rc == PASS ) + rc = RETRY ; + } + clear_logged_state ( sensor_ptr ) ; + } + } + } + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonSensor.h b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonSensor.h new file mode 100644 index 00000000..2270a5e4 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonSensor.h @@ -0,0 +1,48 @@ +#ifndef __INCLUDE_HWMONSENSOR_H__ +#define __INCLUDE_HWMONSENSOR_H__ + +/** ************************************************************************ + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + * + * ************************************************************************/ + + /** ********************************************************************** + * @file + * Wind River Titanium Cloud's Hardware Monitor Sensor Manipulation + * and Access Methods Header + * + * This file contains the private API for the sensor access methods + * used to display, read and configure sensors and groups + * on a specific host. + * + * ***********************************************************************/ + +#include +#include +#include /* for ... access */ +#include /* for ... system */ +#include /* for ... snprintf */ +#include /* for ... string */ + +using namespace std; + +#include "hwmon.h" /* for ... service module header */ + +#define DISCRETE ((const char *)("discrete")) +#define ANALOG ((const char *)("analog")) + +#define MAX_SENSOR_TYPE_ERRORS (5) + +void hwmonSensor_print ( string & hostname, sensor_type * sensor_ptr ); +void hwmonSensor_init ( string & hostname, sensor_type * sensor_ptr ); + +void hwmonGroup_print ( string & hostname, struct sensor_group_type * group_ptr ); +void hwmonGroup_init ( string & hostname, struct sensor_group_type * group_ptr ); + +void handle_new_suppression ( sensor_type * sensor_ptr ) ; + + +#endif /* __INCLUDE_HWMONSENSOR_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonThreads.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonThreads.cpp new file mode 100644 index 00000000..130f6982 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonThreads.cpp @@ -0,0 +1,783 @@ + +/* + * Copyright (c) 2016-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River Titanium Cloud Hardware Monitor Threads Implementation" + * + */ + + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "daemon_common.h" + +#include "nodeBase.h" +#include "nodeBase.h" /* for ... mtce node common definitions */ +#include "hostUtil.h" /* for ... mtce host common definitions */ +#include "nodeMacro.h" +#include "ipmiUtil.h" +#include "threadUtil.h" +#include "hwmonThreads.h" /* for ... IPMITOOL_THREAD_CMD__READ_SENSORS */ +#include "hwmonIpmi.h" /* for ... MAX_IPMITOOL_PARSE_ERRORS */ +#include "hwmonClass.h" /* for ... thread_extra_info_type */ + +/*************************************************************************** + * + * Name : ipmitool_sample_type + * + * Description: An array of sensor data. + * + * _sample_list + * + ***************************************************************************/ + +static ipmitool_sample_type _sample_list[MAX_HOST_SENSORS] ; + +/*************************************************************************** + * + * P R I V A T E I N T E R F A C E S + * + **************************************************************************/ + +static void _command_not_supported ( thread_info_type * info_ptr ) +{ + info_ptr->data = "{\"" ; + info_ptr->data.append(IPMITOOL_JSON__SENSOR_DATA_MESSAGE_HEADER); + info_ptr->data.append("\":{"); + info_ptr->data.append("\"status\":"); + info_ptr->data.append(itos(info_ptr->status)); + info_ptr->data.append(","); + info_ptr->data.append("\"status_string\":\"command '"); + info_ptr->data.append(itos(info_ptr->command)); + info_ptr->data.append("' not supported\"}}"); + + wlog_t ("%s %s\n", info_ptr->log_prefix, info_ptr->data.c_str()); +} + + +static void _add_json_sensor_tuple ( ipmitool_sample_type * ptr, string & response ) +{ + response.append ("{\"n\":\""); + response.append (ptr->name); + response.append ("\",\"v\":\""); + response.append (ptr->value); + response.append ("\",\"u\":\""); + response.append (ptr->unit); + response.append ("\",\"s\":\""); + response.append (ptr->status); + response.append ("\""); + + /* Include the threshold value of each below if not 'na' */ + if ( strcmp (ptr->lnr,"na" )) + { + response.append (",\"lnr\":\""); + response.append (ptr->lnr); + response.append ("\""); + } + if ( strcmp (ptr->lcr,"na" )) + { + response.append (",\"lcr\":\""); + response.append (ptr->lcr); + response.append ("\""); + } + if ( strcmp (ptr->lnc,"na" )) + { + response.append (",\"lnc\":\""); + response.append (ptr->lnc); + response.append ("\""); + } + if ( strcmp (ptr->unc,"na" )) + { + response.append (",\"unc\":\""); + response.append (ptr->unc); + response.append ("\""); + } + if ( strcmp (ptr->ucr,"na" )) + { + response.append (",\"ucr\":\""); + response.append (ptr->ucr); + response.append ("\""); + } + if ( strcmp (ptr->unr,"na" )) + { + response.append (",\"unr\":\""); + response.append (ptr->unr); + response.append ("\""); + } + response.append("}"); +} + +/***************************************************************************** + * + * Name : _parse_sensor_data + * + * Description: Create a sensor data json string using pertinent data in the + * control structure data and of course the _sample_list. + * + *****************************************************************************/ + +static void _parse_sensor_data ( thread_info_type * info_ptr ) +{ + if ( info_ptr && info_ptr->extra_info_ptr ) + { + /* + * Get local copies rather than continuously use + * the pointer in the parse process ; just safer + */ + thread_extra_info_type * extra_info_ptr = (thread_extra_info_type*)info_ptr->extra_info_ptr ; + int samples = extra_info_ptr->samples ; + + info_ptr->data = "{\"" ; + info_ptr->data.append (IPMITOOL_JSON__SENSOR_DATA_MESSAGE_HEADER); + info_ptr->data.append ("\":{\"status\":"); + info_ptr->data.append(itos(info_ptr->status)); + info_ptr->data.append(","); + info_ptr->data.append("\"status_string\":\""); + + if ( info_ptr->status == PASS ) + { + info_ptr->data.append("pass\""); + } + else + { + info_ptr->data.append(info_ptr->status_string); + info_ptr->data.append ("}}"); /* success path */ + } + + info_ptr->data.append (",\""); + info_ptr->data.append (IPMITOOL_JSON__SENSORS_LABEL); + info_ptr->data.append ("\":["); + for ( int i = 0 ; i < samples ; ) + { + _add_json_sensor_tuple ( &_sample_list[i], info_ptr->data ) ; + if ( ++i < samples ) + info_ptr->data.append (","); + } + info_ptr->data.append ("]"); + + info_ptr->data.append ("}}"); /* success path */ + blog3_t ("%s %s\n", info_ptr->log_prefix, info_ptr->data.c_str()); + } + else if ( info_ptr ) + { + info_ptr->status_string = "null 'extra info' pointer" ; + info_ptr->status = FAIL_NULL_POINTER ; + } +} + + +/***************************************************************************** + * + * Name : _get_field + * + * Description: Assumes a specific string format where fields are delimted + * with the '|' character. + * + * Warnings : The src and dst variable track the src_ptr and dst_ptr to + * ensure we never run longer than that string length + * + * IPMITOOL_MAX_FIELD_LEN for dst_ptr and + * IPMITOOL_MAX_LINE_LEN for src_ptr + * + * A parse error causes dst_ptr to be updated with + * PARSE_ERROR_STR and return. + * + * Assumptions: Extra white spaces at the beginning and end of a field + * are removed. There may or may not be such white spaces. + * + * Field 0 1 2 3 4 5 6 7 8 9 + * ----------+--------+-------------+----+----+----+----+--------+--------+----+ + * Temp_CPU1 | 42.000 | % degrees C | ok | na | na | na | 86.000 | 87.000 | na + * + *****************************************************************************/ + +#define PARSE_ERROR_STR ((const char *)("parse error")) + +void _get_field ( char * src_ptr , int field, char * dst_ptr ) +{ + int src = 0 ; + char * saved_dst_ptr = dst_ptr ; + + /* advance to requested field */ + for ( int y = 0 ; y < field ; src_ptr++, src++ ) + { + /* error detection */ + if (( *src_ptr == '\0' ) || ( src >= IPMITOOL_MAX_LINE_LEN )) + { + goto _get_field_parse_error1 ; + } + if ( *src_ptr == '|' ) + { + y++ ; + } + } + + /* eat first white-space(s) */ + for ( ; *src_ptr == ' ' ; src_ptr++ , src++) + { + /* error detection */ + if ( src >= IPMITOOL_MAX_LINE_LEN ) + { + goto _get_field_parse_error2 ; + } + } + + /* copy the source to destination ; until we see a '|' */ + for ( int dst = 0 ; ; src_ptr++ , dst_ptr++ , src++ , dst++ ) + { + unsigned char ch = 0 ; + + /* error detection */ + if ( src >= IPMITOOL_MAX_LINE_LEN ) + { + goto _get_field_parse_error3 ; + } + if ( dst >= IPMITOOL_MAX_FIELD_LEN ) + { + goto _get_field_parse_error4 ; + } + ch = *src_ptr ; + if (( ch != '|' ) && ( ch != '\0' ) && ( ch != 10 ) && ( ch != 13 )) + { + *dst_ptr = ch ; + } + else + break ; + } + + /* remove last space(s) if they exists */ + for ( dst_ptr-- ; *dst_ptr == ' ' ; dst_ptr-- ) { *dst_ptr = '\0' ; } + + /* terminate the line after the last real non-space char */ + ++dst_ptr = '\0' ; + + return ; + +_get_field_parse_error1: + wlog_t ("%s 1\n", PARSE_ERROR_STR ); + snprintf ( saved_dst_ptr , strlen(PARSE_ERROR_STR)+1, "%s", PARSE_ERROR_STR ); + return ; + +_get_field_parse_error2: + wlog_t ("%s 2\n", PARSE_ERROR_STR ); + snprintf ( saved_dst_ptr , strlen(PARSE_ERROR_STR)+1, "%s", PARSE_ERROR_STR ); + return ; + +_get_field_parse_error3: + wlog_t ("%s 3\n", PARSE_ERROR_STR ); + snprintf ( saved_dst_ptr , strlen(PARSE_ERROR_STR)+1, "%s", PARSE_ERROR_STR ); + return ; + +_get_field_parse_error4: + wlog_t ("%s 4\n", PARSE_ERROR_STR); + snprintf ( saved_dst_ptr , strlen(PARSE_ERROR_STR)+1, "%s", PARSE_ERROR_STR ); + return ; + +} + +/* Temp_CPU1 | 42.000 | % degrees C | ok | na | na | na | 86.000 | 87.000 | na */ +#define IPMITOOL_FULL_OUTPUT_COLUMNS (10) + +void * hwmonThread_ipmitool ( void * arg ) +{ + int samples ; + + thread_info_type * info_ptr ; + thread_extra_info_type * extra_ptr ; + int parse_errors = 0 ; + + /* Pointer Error Detection and Handling */ + if ( !arg ) + { + slog ("*** ipmitool thread called with null arg pointer *** corruption\n"); + return NULL ; + } + + /* cast pointers from arg */ + info_ptr = (thread_info_type*)arg ; + extra_ptr = (thread_extra_info_type*)info_ptr->extra_info_ptr ; + + info_ptr->pw_file_fd = 0 ; + + /* allow the parent to confirm thread id */ + info_ptr->id = pthread_self() ; + if ( extra_ptr == NULL ) + { + info_ptr->status_string = "null 'extra info' pointer" ; + info_ptr->status = FAIL_NULL_POINTER ; + goto ipmitool_thread_done ; + } + + /* Set cancellation option so that a delete operation + * can kill this thread immediately */ + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL ); + + /* the number of sensors are learned */ + extra_ptr->samples = samples = 0 ; + switch ( info_ptr->command ) + { + case IPMITOOL_THREAD_CMD__POWER_STATUS: + { + int rc = PASS ; + + info_ptr->status_string = "" ; + info_ptr->status = PASS ; + string command = IPMITOOL_POWER_STATUS_CMD ; + + blog2_t ("%s query power status\n", info_ptr->log_prefix); + + if ( info_ptr->extra_info_ptr == NULL ) + { + info_ptr->status = FAIL_NULL_POINTER ; + info_ptr->status_string = "null extra info pointer" ; + goto ipmitool_thread_done ; + } + + /**************** Create the password file *****************/ + ipmiUtil_create_pw_fn ( info_ptr, extra_ptr->bm_pw ) ; + if ( info_ptr->password_file.empty() ) + { + info_ptr->status_string = "failed to get a temporary password filename" ; + info_ptr->status = FAIL_FILE_CREATE ; + goto ipmitool_thread_done ; + } + dlog_t ("%s password filename : %s\n", + info_ptr->log_prefix, + info_ptr->password_file.c_str()); + + /*************** Create the output filename ***************/ + string ipmitool_datafile = + ipmiUtil_create_data_fn (info_ptr->hostname, IPMITOOL_POWER_STATUS_FILE_SUFFIX ) ; + dlog_t ("%s power query filename : %s\n", + info_ptr->log_prefix, + ipmitool_datafile.c_str()); + + /************** Create the ipmitool request **************/ + string ipmitool_request = + ipmiUtil_create_request ( command, + extra_ptr->bm_ip, + extra_ptr->bm_un, + info_ptr->password_file, + ipmitool_datafile ); + dlog_t ("%s power status query cmd: %s\n", + info_ptr->log_prefix, + ipmitool_request.c_str()); + + if ( daemon_is_file_present ( MTC_CMD_FIT__POWER_STATUS )) + { + slog ("%s FIT IPMITOOL_POWER_STATUS_CMD\n", info_ptr->hostname.c_str()); + rc = PASS ; + } + else + { + /* Make the request */ + rc = system ( ipmitool_request.data()) ; + } + + unlink(info_ptr->password_file.data()); + daemon_remove_file (info_ptr->password_file.data()); + + /* check for system call error case */ + if ( rc != PASS ) + { + info_ptr->status_string = "failed power status query ; " ; + info_ptr->status_string.append(ipmitool_request); + info_ptr->status = FAIL_SYSTEM_CALL ; + } + else + { + bool ipmitool_datafile_present = false ; + + /* look for the output data file */ + for ( int i = 0 ; i < 10 ; i++ ) + { + pthread_signal_handler ( info_ptr ); + if ( daemon_is_file_present ( ipmitool_datafile.data() )) + { + ipmitool_datafile_present = true ; + break ; + } + info_ptr->progress++ ; + sleep (1); + } + + if ( ipmitool_datafile_present ) + { + info_ptr->data = daemon_read_file (ipmitool_datafile.data()) ; + dlog_t ("%s data:%s\n", + info_ptr->hostname.c_str(), + info_ptr->data.data()); + + info_ptr->status_string = "pass" ; + info_ptr->status = PASS ; + } + else + { + info_ptr->status_string = "command did not produce output file ; timeout" ; + info_ptr->status = FAIL_FILE_ACCESS ; + } + } + break ; + } + case IPMITOOL_THREAD_CMD__READ_SENSORS: + { + int rc = PASS ; + + info_ptr->status_string = "" ; + info_ptr->status = PASS ; + + blog3_t ("%s read sensors request\n", info_ptr->log_prefix); + + if ( info_ptr->extra_info_ptr == NULL ) + { + info_ptr->status = FAIL_NULL_POINTER ; + info_ptr->status_string = "null extra info pointer" ; + goto ipmitool_thread_done ; + } + + ipmiUtil_create_pw_fn ( info_ptr, extra_ptr->bm_pw ) ; + if ( info_ptr->password_file.empty() ) + { + info_ptr->status_string = "failed to get a temporary password filename" ; + info_ptr->status = FAIL_FILE_CREATE ; + goto ipmitool_thread_done ; + } + + dlog_t ("%s password filename : %s\n", + info_ptr->log_prefix, + info_ptr->password_file.c_str()); + + /*************** Create the output filename ***************/ + string sensor_datafile = + ipmiUtil_create_data_fn (info_ptr->hostname, IPMITOOL_SENSOR_OUTPUT_FILE_SUFFIX ) ; + + dlog_t ("%s sensor output file%s\n", + info_ptr->log_prefix, + sensor_datafile.c_str()); + + /************** Create the ipmitool request **************/ + string sensor_query_request = + ipmiUtil_create_request ( IPMITOOL_SENSOR_QUERY_CMD, + extra_ptr->bm_ip, + extra_ptr->bm_un, + info_ptr->password_file, + sensor_datafile ); + + dlog_t ("%s sensor query cmd:%s\n", + info_ptr->log_prefix, + sensor_query_request.c_str()); + + + /**************************************************************** + * + * This fault insertion case is added for PV. + * If MTC_CMD_FIT__SENSOR_DATA file is present then no ipmitool + * sensor read is performed. Instead, a raw output file can be + * placed in /var/run/fit/_sensor_data and used to + * perform sensor fault insertion that way. + * + *****************************************************************/ + if ( daemon_is_file_present ( MTC_CMD_FIT__SENSOR_DATA )) + { + rc = PASS ; + } +#ifdef WANT_FIT_TESTING + else if ( daemon_want_fit ( FIT_CODE__HWMON__AVOID_SENSOR_QUERY, info_ptr->hostname )) + { + rc = PASS ; // ilog ("%s FIT Avoiding Sensor Query\n", info_ptr->hostname.c_str()); + } + else if ( daemon_want_fit ( FIT_CODE__AVOID_N_FAIL_IPMITOOL_REQUEST, info_ptr->hostname )) + { + rc = FAIL ; // ilog ("%s FIT Avoiding Sensor Query\n", info_ptr->hostname.c_str()); + } +#endif + else + { + /* remove the last query */ + // daemon_remove_file ( sensor_datafile.data() ) ; + rc = system ( sensor_query_request.data()) ; + } + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__THREAD_TIMEOUT, info_ptr->hostname ) ) + { + for ( ; ; ) + { + pthread_signal_handler ( info_ptr ); + sleep (1); + } + } + if ( daemon_want_fit ( FIT_CODE__THREAD_SEGFAULT, info_ptr->hostname ) ) + { + daemon_do_segfault(); + } +#endif + + unlink(info_ptr->password_file.data()); + daemon_remove_file (info_ptr->password_file.data()); + // info_ptr->password_file.clear(); + + /* check for system call error case */ + if ( rc != PASS ) + { + info_ptr->status_string = "failed query ; " ; + info_ptr->status_string.append(sensor_query_request); + info_ptr->status = FAIL_SYSTEM_CALL ; + } + else + { + FILE * _fp = fopen ( sensor_datafile.data(), "r" ); + if ( _fp ) + { + char buffer [IPMITOOL_MAX_LINE_LEN]; + int line = 0 ; + while ( fgets (buffer, IPMITOOL_MAX_LINE_LEN, _fp) != NULL ) + { + if ( strnlen ( buffer, IPMITOOL_MAX_LINE_LEN ) ) + { + int bars = 0 ; /* tracks the number of '|'s found in a line */ + bool long_field_error = false ; /* set to true if we get a field in line error */ + int char_field_count = 0 ; /* counts the number of characters in a field */ + // ilog ("\n"); + // ilog ("ipmitool:%d:%s\n", line, buffer ); + + /**************************************** + * sanity check the ipmitool output + * + * ipmitool line output looks like + * + * Temp_CPU1 | 42.000 | % degrees C | ok | na | na | na | 86.000 | 87.000 | na + * + ***************************************** + * start at 1 to handle the 'i-1' case */ + + int i = 1 ; /* aka character in line count or index */ + while (( buffer[i+1] != '\0' ) && ( i < IPMITOOL_MAX_LINE_LEN )) + { + if ( buffer[i] == '|' ) + { + if ( char_field_count > IPMITOOL_MAX_FIELD_LEN ) + { + long_field_error = true ; + } + char_field_count = 0 ; + ++bars ; + } + ++char_field_count ; + i++ ; /* advance through the line, character by character */ + } + + /* scan the sample as long as no field exceeds the max string length */ + if ( long_field_error == false ) + { + /* Only process properly formatted lines that + * don't have field lengths longer than IPMITOOL_MAX_FIELD_LEN*/ + if ( bars == (IPMITOOL_FULL_OUTPUT_COLUMNS-1) ) + { + char type[IPMITOOL_MAX_FIELD_LEN] ; + int i = 0 ; + int x = 0 ; + int y = 0 ; + /* get type */ + /* advance to type field */ + for ( i = 0 , y = 0 ; y < 2 ; i++ ) + { + /* handle case where we cant find the '|'s and y never reaches 2 */ + if ( i < IPMITOOL_MAX_LINE_LEN ) + { + if ( buffer[i] == '|' ) + { + y++ ; + } + } + else + { + if ( ++parse_errors == MAX_IPMITOOL_PARSE_ERRORS ) + { + info_ptr->status = FAIL_JSON_TOO_LONG ; + info_ptr->status_string = "sensor format error ; line format error"; + goto ipmitool_thread_done ; + } + break ; + } + } + + /* ignore this line */ + if ( i >= IPMITOOL_MAX_LINE_LEN ) + { + continue ; + } + + /* eat first white-space(s) */ + for ( ; buffer[i] == ' ' ; i++ ) ; + + /* copy the senor unit type to type */ + for ( x = 0 ; buffer[i] != '|' ; i++, x++ ) { type[x] = buffer[i] ; } + + /* remove last space(s) if they exists */ + for ( x-- ; type[x] == ' ' ; x-- ) { type[x] = '\0' ; } + + /* terminate the line after the last real non-space char */ + type[x+1] = '\0' ; + + if (!strlen(type)) + { + blog3_t ("%s skipping sensor with empty unit type\n", info_ptr->log_prefix); + blog3_t ("%s ... line:%d - %s", info_ptr->log_prefix, line, buffer ); + continue ; + } + else + { + blog3_t ("%s Line:%d is a '%s' sensor\n", info_ptr->log_prefix, line, type ); + } + + _get_field ( buffer, 0, _sample_list[samples].name ); + _get_field ( buffer, 1, _sample_list[samples].value ); + + /* copy already learned type to unit field 2 */ + snprintf ( _sample_list[samples].unit, strlen(type)+1, "%s", type ); + + _get_field ( buffer, 3, _sample_list[samples].status ); + _get_field ( buffer, 4, _sample_list[samples].lnr ); + _get_field ( buffer, 5, _sample_list[samples].lcr ); + _get_field ( buffer, 6, _sample_list[samples].lnc ); + _get_field ( buffer, 7, _sample_list[samples].unc ); + _get_field ( buffer, 8, _sample_list[samples].ucr ); + _get_field ( buffer, 9, _sample_list[samples].unr ); + blog2_t ("%s | %20s | %8s | %12s | %3s | %8s | %8s | %8s | %8s | %8s | %8s |\n", + info_ptr->log_prefix, + _sample_list[samples].name, + _sample_list[samples].value, + _sample_list[samples].unit, + _sample_list[samples].status, + _sample_list[samples].lnr, + _sample_list[samples].lcr, + _sample_list[samples].lnc, + _sample_list[samples].unc, + _sample_list[samples].ucr, + _sample_list[samples].unr); + samples++ ; + if ( samples >= MAX_HOST_SENSORS ) + { + samples-- ; + rc = info_ptr->status = FAIL_OUT_OF_RANGE ; + info_ptr->status_string = "max number of sensors reached"; + break ; + } + rc = PASS ; + } + else + { + /* ignore commented lines */ + if (( buffer[0] != '#' ) && ( buffer[0] != ';' )) + { + if ( ++parse_errors == MAX_IPMITOOL_PARSE_ERRORS ) + { + info_ptr->status = FAIL_BAD_PARM ; + info_ptr->status_string = "sensor format error ; line format error 1"; + } + blog3_t ("%s %s (e:%d d:%d)", info_ptr->log_prefix, + info_ptr->status_string.c_str(), + (IPMITOOL_FULL_OUTPUT_COLUMNS), bars+1 ); + + blog3_t ("%s ... line:%d - %s", info_ptr->log_prefix, line, buffer ); + } + else + { + blog3_t ("%s COMMENT %s", info_ptr->log_prefix, &buffer[0]); + } + } /* end else */ + } + else + { + if ( ++parse_errors == MAX_IPMITOOL_PARSE_ERRORS ) + { + info_ptr->status = FAIL_JSON_TOO_LONG ; + info_ptr->status_string = "sensor format error ; line format error 2" ; + } + blog3_t ("%s ... line:%d - %s", info_ptr->log_prefix, line, buffer ); + } + } + MEMSET_ZERO(buffer) ; + line++ ; + pthread_signal_handler ( info_ptr ); + } /* end while loop */ + + extra_ptr->samples = samples ; + + if ( samples == 0 ) + { + info_ptr->status = FAIL_NO_DATA ; + info_ptr->status_string = "no sensor data found"; + } + fclose(_fp); + } /* fopen */ + else + { + info_ptr->status = FAIL_FILE_ACCESS ; + info_ptr->status_string = "failed to open sensor data file: <"; + info_ptr->status_string.append(sensor_datafile); + info_ptr->status_string.append(">"); + } + } /* end else handling of successful system command */ + break ; + } + default: + { + info_ptr->status = FAIL_BAD_CASE ; + _command_not_supported ( info_ptr ); + break ; + } + } + +ipmitool_thread_done: + + if ( info_ptr->pw_file_fd > 0 ) + close(info_ptr->pw_file_fd); + info_ptr->pw_file_fd = 0 ; + + if ( ! info_ptr->password_file.empty() ) + { + unlink(info_ptr->password_file.data()); + daemon_remove_file ( info_ptr->password_file.data() ) ; + info_ptr->password_file.clear(); + } + + pthread_signal_handler ( info_ptr ); + + /* Sensor reading specific exit */ + if ( info_ptr->command == IPMITOOL_THREAD_CMD__READ_SENSORS ) + { + if ( parse_errors ) + { + wlog_t ("%s exiting with %d parse errors (rc:%d)\n", + info_ptr->log_prefix, parse_errors, info_ptr->status); + } + else + { + dlog_t ("%s exit", info_ptr->log_prefix ); + } + _parse_sensor_data ( info_ptr ); + } + + info_ptr->progress++ ; + info_ptr->runcount++ ; + info_ptr->id = 0 ; + pthread_exit (&info_ptr->status ); + return NULL ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonThreads.h b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonThreads.h new file mode 100644 index 00000000..c4c2d6b4 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonThreads.h @@ -0,0 +1,170 @@ +#ifndef __INCLUDE_HWMONTHREAD_HH__ +#define __INCLUDE_HWMONTHREAD_HH__ + +/* + * Copyright (c) 2016-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River Titanium Cloud Hardware Monitor Threads Header + * + */ + +#define IPMITOOL_MAX_LINE_LEN (200) +#define IPMITOOL_MAX_FIELD_LEN (64) +#define IPMITOOL_FIT_LINE_LEN (1000) + +void * hwmonThread_ipmitool ( void * ); + + /* -------------------- + * ipmitool_sensor_data: outgoing message + * -------------------- + * + * The sensor data is formatted in a json style string that is sent + * to the hardware monitor daemon on the active controller as + * specified by the aformentioned configuration command. + * + * The following is a brief 3 sensor example of the expected + * ipmitool output and json string conversion that is sent to + * hardware mon. + * + * ipmitool output: + * + * Temp_CPU0 | 54.000 | % degrees C | ok | na | na | na | 86.000 | 87.000 | na + * PSU2 Input | 0.000 | % Watts | cr | na | 0.000 | na | na | na | na + * Critical IRQ | 0x0 | discrete | 0x0080| na | na | na | na | na | na + * Fan_SYS0_2 | 4700.000 | % RPM | ok | na | 500.000 | 1000.000 | na | na | na + * + * Message Design Strategy: + * 1. Maintain all the ipmitool output information so that it is available + * to the hardware monitor for future enhancements without the need to + * change the client side messaging. + * 2. Validate the format of the ipmitool output and report on any errors + * observed in a status field of the response string. + * 3. Deliver an industry standard json string formated message + * 4. Provide an overall status field indicating any formatting errors + * detected in the sensor data output format. This is not a summary + * status of the sensor data. + * 5. minimize the amount of data sent + * - use short sensor record labels + * n = name + * v = sensor reading value + * u = unit format used when interpreting the data + * s = correlated status + * - ipmitool labels for thresholds but only include labels for values that are not 'na' + * unr = Upper Non-Recoverable + * ucr = Upper Critical + * unc = Upper Non-Critical + * lnc = Lower Non-Critical + * lcr = Lower Critical + * lnr = Lower Non-Recoverable + * + * Json String: sensor data excluded + * ----------- + * + * { + * "ipmitool_sensor_data": + * { + * "hostname" :"compute-0", + * "status" : 0, + * "status_string" : "pass", + * "sensors" : + * [ + * { }, + * { }, + * { } + * ] + * } + *} + * + * Jason String: full + * ------------- + * + *{ + * "ipmitool_sensor_data": + * { + * "hostname" : "compute-0", + * "status" : 0, + * "status_string": "pass", + * "sensors":[ + * { + * "n":"Temp_CPU0", + * "v":"54.000", + * "u":"% degrees C", + * "s":"ok", + * "unc":"86.000", + * "ucr":"87.000" + * }, + * { + * "n":"PSU2 Input", + * "v":"0.000", + * "u":"% Watts", + * "s":"cr", + * "lcr":"0.000" + * }, + * { + * "n":"Fan_SYS0_2", + * "v":"4700.00", + * "u":"% RPM", + * "s":"ok", + * "lcr":"500.000", + * "lnc":"1000.000" + * } + * ... + * ] + * } + *} + * + * + */ + +#include +#include +#include + +using namespace std; + +// #define MAX_HOST_SENSORS (100) +#define THREAD_RETRY_DELAY_SECS (60) +#define MAX_THREAD_RETRIES (10) + +/* Control structure used for ipmitool related functions ; like sensor monitoring */ +#define DEFAULT_IPMITOOL_SENSOR_MONITORING_PERIOD_SECS (120) /* 2 minutes */ + +#define IPMITOOL_JSON__SENSOR_DATA_MESSAGE_HEADER ((const char *)("ipmitool_sensor_data")) + +#define IPMITOOL_JSON__SENSORS_LABEL ((const char *)("sensors")) +#define IPMITOOL_SENSOR_QUERY_CMD ((const char *)(" sensor list")) + +#define IPMITOOL_SENSOR_OUTPUT_FILE_SUFFIX ((const char *)("_sensor_data")) + +typedef struct +{ + char name [IPMITOOL_MAX_FIELD_LEN] ; /* sensor name */ + char value [IPMITOOL_MAX_FIELD_LEN] ; /* sensor value */ + char unit [IPMITOOL_MAX_FIELD_LEN] ; /* sensor unit type */ + char status [IPMITOOL_MAX_FIELD_LEN] ; /* status - ok, nc, cr, nr */ + char lnr [IPMITOOL_MAX_FIELD_LEN] ; /* Lower Non-Recoverable */ + char lcr [IPMITOOL_MAX_FIELD_LEN] ; /* Lower Critical */ + char lnc [IPMITOOL_MAX_FIELD_LEN] ; /* Lower Non-Critical */ + char unc [IPMITOOL_MAX_FIELD_LEN] ; /* Upper Non-Critical */ + char ucr [IPMITOOL_MAX_FIELD_LEN] ; /* Upper Critical */ + char unr [IPMITOOL_MAX_FIELD_LEN] ; /* Upper Non-Recoverable */ +} ipmitool_sample_type ; + +typedef struct +{ + string bm_ip ; + string bm_un ; + string bm_pw ; + + string sensor_query_request ; /**< sensor query system call request */ + int samples ; + +} thread_extra_info_type ; + +#endif // __INCLUDE_HWMONTHREAD_HH__ diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonUtil.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonUtil.cpp new file mode 100644 index 00000000..a6756c67 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonUtil.cpp @@ -0,0 +1,714 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform - Hardware Monitoring "General Utilities" Implementation + */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "hwm" + +#include "alarmUtil.h" /* for ... alarmUtil_getSev_str */ +#include "hwmonUtil.h" /* this module header */ + +string get_key_value_string ( string reading, string key, char delimiter, bool set_tolowercase ) +{ + string value = "" ; + if ( ! reading.empty() ) + { + /* verify this is the correct sensor to be reading */ + std::size_t start = reading.find(key, 0) ; + if ( start != std::string::npos ) + { + start = reading.find( delimiter, start ) ; + if ( start != std::string::npos ) + { + std::size_t end = reading.find( '\n', ++start ) ; + + value = reading.substr(start, end-start).c_str(); + + if ( set_tolowercase == true ) + value = tolowercase ( value ); + + blog3 ("key:%s - value:%s\n", key.c_str(), value.c_str()); + } + else + { + elog ("value parse error\n"); + } + } + else + { + elog ("key parse error\n"); + } + } + else + { + elog ("empty key\n"); + } + return (value); +} + +string get_severity ( sensor_severity_enum severity ) +{ + if ( severity == HWMON_SEVERITY_GOOD ) + return "ok" ; + else if ( severity == HWMON_SEVERITY_OFFLINE ) + return "offline" ; + else if ( severity == HWMON_SEVERITY_MINOR ) + return "minor" ; + else if ( severity == HWMON_SEVERITY_MAJOR ) + return "major" ; + else if ( severity == HWMON_SEVERITY_CRITICAL ) + return "critical" ; + else if ( severity == HWMON_SEVERITY_NONRECOVERABLE ) + return "critical" ; + else + return "unknown" ; +} + +sensor_severity_enum get_severity ( string status ) +{ + if ( status.compare("ok") == 0 ) + return HWMON_SEVERITY_GOOD ; + if ( status.compare("offline") == 0 ) + return HWMON_SEVERITY_OFFLINE ; + if ( status.compare("minor") == 0 ) + return HWMON_SEVERITY_MINOR ; + if ( status.compare("major") == 0 ) + return HWMON_SEVERITY_MAJOR ; + if ( status.compare("critical") == 0 ) + return HWMON_SEVERITY_CRITICAL ; + if ( status.compare("nonrecov") == 0 ) + return HWMON_SEVERITY_NONRECOVERABLE ; + + return HWMON_SEVERITY_OFFLINE ; +} + +string get_ipmi_severity ( sensor_severity_enum status ) +{ + switch ( status ) + { + case HWMON_SEVERITY_GOOD: return "ok" ; + case HWMON_SEVERITY_MAJOR: return "nc" ; + case HWMON_SEVERITY_CRITICAL: return "cr" ; + case HWMON_SEVERITY_OFFLINE: return "na" ; + default: return "ns" ; + } +} + +sensor_severity_enum get_ipmi_severity ( string status ) +{ + if ( status.compare("ok") == 0 ) + return HWMON_SEVERITY_GOOD ; + if ( status.compare("nc") == 0 ) + return HWMON_SEVERITY_MAJOR ; + if ( status.compare("cr") == 0 ) + return HWMON_SEVERITY_CRITICAL ; + if ( status.compare("na") == 0 ) + return HWMON_SEVERITY_OFFLINE ; + if ( status.compare("ns") == 0 ) + return HWMON_SEVERITY_OFFLINE ; + + /* Separate clauses because they are likelt infrequent if at all */ + if (( status.compare("nr") == 0 ) || + ( status.compare("lnr") == 0 ) || + ( status.compare("unr") == 0 )) + { + return HWMON_SEVERITY_NONRECOVERABLE ; + } + + if (( status.compare("lnc") == 0 ) || + ( status.compare("unc") == 0 )) + { + return HWMON_SEVERITY_MAJOR ; + } + + if (( status.compare("lcr") == 0 ) || + ( status.compare("ucr") == 0 )) + { + return HWMON_SEVERITY_CRITICAL ; + } + + /* Unrecognized status is handled as a minor alarm */ + return HWMON_SEVERITY_MINOR ; +} + +bool is_valid_action ( sensor_severity_enum severity, string & action, bool set_to_lower ) +{ + bool rc = false ; + string lower_case_action = tolowercase ( action ); + + if ( set_to_lower == true ) + { + action = lower_case_action ; + } + + if ( severity == HWMON_SEVERITY_CRITICAL ) + { + if ( !lower_case_action.compare(HWMON_ACTION_IGNORE) || + !lower_case_action.compare(HWMON_ACTION_ALARM) || + !lower_case_action.compare(HWMON_ACTION_LOG) || + !lower_case_action.compare(HWMON_ACTION_RESET) || + !lower_case_action.compare(HWMON_ACTION_POWERCYCLE)) + { + rc = true ; + } + } + else if ( !lower_case_action.compare(HWMON_ACTION_IGNORE) || + !lower_case_action.compare(HWMON_ACTION_ALARM) || + !lower_case_action.compare(HWMON_ACTION_LOG)) + { + rc = true ; + } + return (rc); +} + +bool is_log_action ( string action ) +{ + if ( !action.compare(HWMON_ACTION_LOG) ) + return true ; + return false ; +} + + +bool is_ignore_action ( string action ) +{ + if ( !action.compare(HWMON_ACTION_IGNORE) ) + return true ; + return false ; +} + +bool is_alarm_action ( string action ) +{ + if ( !action.compare(HWMON_ACTION_ALARM) ) + return true ; + return false ; +} + +bool is_reset_action ( string action ) +{ + if ( !action.compare(HWMON_ACTION_RESET) ) + return true ; + return false ; +} + +bool is_powercycle_action ( string action ) +{ + if ( !action.compare(HWMON_ACTION_POWERCYCLE) ) + return true ; + return false ; +} + + +void clear_degraded_state ( sensor_type * sensor_ptr ) +{ + if ( sensor_ptr ) + { + sensor_ptr->degraded = false ; + } +} + +void set_degraded_state ( sensor_type * sensor_ptr ) +{ + if ( sensor_ptr ) + { + // ilog ("%s %s DEGRADE ASSERT ^^^^^^^^^^\n", sensor_ptr->hostname.c_str(), sensor_ptr->sensorname.c_str()); + sensor_ptr->degraded = true ; + } +} + +bool clear_severity_alarm ( string & hostname, hwmonAlarm_id_type id, string & sub_entity, EFmAlarmSeverityT severity, string reason ) +{ + if ( hwmon_alarm_query ( hostname, id, sub_entity ) == severity ) + { + hwmonAlarm_clear ( hostname, id, sub_entity, reason ); + return (true); + } + return (false); +} + +void clear_asserted_alarm ( string & hostname, hwmonAlarm_id_type id, sensor_type * ptr , string reason ) +{ + if ( ptr ) + { + if ( ptr->alarmed == true ) + { + hwmonAlarm_clear ( hostname, id, ptr->sensorname, reason ); + } + clear_logged_state (ptr); + clear_ignored_state (ptr); + clear_alarmed_state (ptr); + ptr->degraded = false ; + ptr->alarmed = false ; + } + else + { + wlog ("%s null sensor pointer\n", hostname.c_str() ); + } +} + + +void clear_alarmed_state ( sensor_type * sensor_ptr ) +{ + if ( sensor_ptr ) + { + sensor_ptr->minor.alarmed = false ; + sensor_ptr->major.alarmed = false ; + sensor_ptr->critl.alarmed = false ; + sensor_ptr->alarmed = false ; + } +} + +void set_alarmed_severity ( sensor_type * sensor_ptr , EFmAlarmSeverityT severity ) +{ + if ( sensor_ptr ) + { + if ( severity == FM_ALARM_SEVERITY_MINOR ) + { + sensor_ptr->alarmed = true ; + sensor_ptr->minor.alarmed = true ; + } + else if ( severity == FM_ALARM_SEVERITY_MAJOR ) + { + sensor_ptr->alarmed = true ; + sensor_ptr->major.alarmed = true ; + } + else if ( severity == FM_ALARM_SEVERITY_CRITICAL ) + { + sensor_ptr->alarmed = true ; + sensor_ptr->critl.alarmed = true ; + } + else + { + slog ("%s alarm status does not apply for severity '%s'\n", + sensor_ptr->hostname.c_str(), + alarmUtil_getSev_str(severity).c_str()); + } + } + else + { + slog ("null sensor pointer\n"); + } +} + +/********************* Log Utilities *****************************/ + +void clear_logged_state ( sensor_type * sensor_ptr ) +{ + if ( sensor_ptr ) + { + sensor_ptr->minor.logged = false ; + sensor_ptr->major.logged = false ; + sensor_ptr->critl.logged = false ; + } +} + + +void set_logged_severity ( sensor_type * sensor_ptr , EFmAlarmSeverityT severity ) +{ + if ( sensor_ptr ) + { + if ( severity == FM_ALARM_SEVERITY_MINOR ) + { + sensor_ptr->minor.logged = true ; + } + else if ( severity == FM_ALARM_SEVERITY_MAJOR ) + { + sensor_ptr->major.logged = true ; + } + else if ( severity == FM_ALARM_SEVERITY_CRITICAL ) + { + sensor_ptr->critl.logged = true ; + } + else + { + slog ("%s %s logged status does not apply for severity '%s'\n", + sensor_ptr->hostname.c_str(), + sensor_ptr->sensorname.c_str(), + alarmUtil_getSev_str(severity).c_str()); + } + } + else + { + slog ("null sensor pointer\n"); + } +} + +void clear_logged_severity ( sensor_type * sensor_ptr , EFmAlarmSeverityT severity ) +{ + if ( sensor_ptr ) + { + if ( severity == FM_ALARM_SEVERITY_MINOR ) + { + sensor_ptr->minor.logged = false ; + } + else if ( severity == FM_ALARM_SEVERITY_MAJOR ) + { + sensor_ptr->major.logged = false ; + } + else if ( severity == FM_ALARM_SEVERITY_CRITICAL ) + { + sensor_ptr->critl.logged = false ; + } + else + { + slog ("%s logged status does not apply for severity '%s'\n", + sensor_ptr->hostname.c_str(), + alarmUtil_getSev_str(severity).c_str()); + } + } + else + { + slog ("null sensor pointer\n"); + } +} + +/********************* Ignore Utilities *************************/ +void clear_ignored_state ( sensor_type * sensor_ptr ) +{ + if ( sensor_ptr ) + { + sensor_ptr->minor.ignored = false ; + sensor_ptr->major.ignored = false ; + sensor_ptr->critl.ignored = false ; + } +} + +void set_ignored_severity ( sensor_type * sensor_ptr , EFmAlarmSeverityT severity ) +{ + if ( sensor_ptr ) + { + clear_ignored_state ( sensor_ptr ); + if ( severity == FM_ALARM_SEVERITY_MINOR ) + { + sensor_ptr->minor.ignored = true ; + } + else if ( severity == FM_ALARM_SEVERITY_MAJOR ) + { + sensor_ptr->major.ignored = true ; + } + else if ( severity == FM_ALARM_SEVERITY_CRITICAL ) + { + sensor_ptr->critl.ignored = true ; + } + else + { + slog ("%s logged status does not apply for severity '%s'\n", + sensor_ptr->hostname.c_str(), + alarmUtil_getSev_str(severity).c_str()); + } + } + else + { + slog ("null sensor pointer\n"); + } +} + +void clear_ignored_severity ( sensor_type * sensor_ptr , EFmAlarmSeverityT severity ) +{ + if ( sensor_ptr ) + { + if ( severity == FM_ALARM_SEVERITY_MINOR ) + { + sensor_ptr->minor.ignored = false ; + } + else if ( severity == FM_ALARM_SEVERITY_MAJOR ) + { + sensor_ptr->major.ignored = false ; + } + else if ( severity == FM_ALARM_SEVERITY_CRITICAL ) + { + sensor_ptr->critl.ignored = false ; + } + else + { + slog ("%s ignored status does not apply for severity '%s'\n", + sensor_ptr->hostname.c_str(), + alarmUtil_getSev_str(severity).c_str()); + } + } + else + { + slog ("null sensor pointer\n"); + } +} + + +string print_alarmed_severity ( sensor_type * sensor_ptr ) +{ + string alarmed_severity = ""; + if ( sensor_ptr->critl.alarmed || sensor_ptr->major.alarmed || sensor_ptr->minor.alarmed ) + { + alarmed_severity.append(" ["); + if ( sensor_ptr->critl.alarmed ) + alarmed_severity.append("alarmd-critl"); + if ( sensor_ptr->major.alarmed ) + alarmed_severity.append("alarmd-major"); + if ( sensor_ptr->minor.alarmed ) + alarmed_severity.append("alarmd-minor"); + alarmed_severity.append("]"); + } + return(alarmed_severity); +} + +string print_ignored_severity ( sensor_type * sensor_ptr ) +{ + string ignored_severity = ""; + if ( sensor_ptr->critl.ignored || sensor_ptr->major.ignored || sensor_ptr->minor.ignored ) + { + ignored_severity.append(" ["); + if ( sensor_ptr->critl.ignored ) + ignored_severity.append("ignore-critl"); + if ( sensor_ptr->major.ignored ) + ignored_severity.append("ignore-major"); + if ( sensor_ptr->minor.ignored ) + ignored_severity.append("ignore-minor"); + ignored_severity.append("]"); + } + return(ignored_severity); +} + +string print_logged_severity ( sensor_type * sensor_ptr ) +{ + string logged_severity = ""; + if ( sensor_ptr->critl.logged || sensor_ptr->major.logged || sensor_ptr->minor.logged ) + { + logged_severity.append(" ["); + if ( sensor_ptr->critl.logged ) + logged_severity.append("logged-critl"); + if ( sensor_ptr->major.logged ) + logged_severity.append("logged-major"); + if ( sensor_ptr->minor.logged ) + logged_severity.append("logged-minor"); + logged_severity.append("]"); + } + return(logged_severity); +} + + +void sensorState_print ( string & hostname, sensor_type * sensor_ptr ) +{ + if ( sensor_ptr->status.compare("ok") || + sensor_ptr->degraded || + sensor_ptr->alarmed || + sensor_ptr->suppress || + sensor_ptr->critl.alarmed || + sensor_ptr->major.alarmed || + sensor_ptr->minor.alarmed || + sensor_ptr->critl.ignored || + sensor_ptr->major.ignored || + sensor_ptr->minor.ignored || + sensor_ptr->critl.logged || + sensor_ptr->major.logged || + sensor_ptr->minor.logged ) + { + wlog ("%s %-20s %-8s %s%s%s%s%s%s\n", + hostname.c_str(), + sensor_ptr->sensorname.c_str(), + sensor_ptr->status.c_str(), + print_alarmed_severity ( sensor_ptr ).c_str(), + print_logged_severity ( sensor_ptr ).c_str(), + print_ignored_severity ( sensor_ptr ).c_str(), + sensor_ptr->alarmed ? " alarmed" : "", + sensor_ptr->degraded ? " degraded" : "", + sensor_ptr->suppress ? " suppressed" : ""); + } +} + + +bool is_alarmed_state ( sensor_type * sensor_ptr , sensor_severity_enum & hwmon_sev ) +{ + int count = 0 ; + if ( sensor_ptr ) + { + if ( sensor_ptr->minor.alarmed == true ) + { + hwmon_sev = HWMON_SEVERITY_MINOR ; + count++ ; + } + if ( sensor_ptr->major.alarmed == true ) + { + hwmon_sev = HWMON_SEVERITY_MAJOR ; + count++ ; + } + if ( sensor_ptr->critl.alarmed == true ) + { + hwmon_sev = HWMON_SEVERITY_CRITICAL ; + count++ ; + } + + if ( count > 1 ) + { + slog ("%s '%s' alarm state tracking mismatch [alarm:%s:%s:%s]\n", + sensor_ptr->hostname.c_str(), + sensor_ptr->sensorname.c_str(), + sensor_ptr->minor.alarmed ? "Yes":"No", + sensor_ptr->major.alarmed ? "Yes":"No", + sensor_ptr->critl.alarmed ? "Yes":"No"); + } + } + return (count); +} + +bool is_alarmed ( sensor_type * sensor_ptr ) +{ + if ( sensor_ptr ) + { + if (( sensor_ptr->alarmed == true ) || + ( sensor_ptr->minor.alarmed == true ) || + ( sensor_ptr->major.alarmed == true ) || + ( sensor_ptr->critl.alarmed == true )) + { + return (true ); + } + } + return (false); +} + +/***************************************************************************** + * + * + * Name : checksum_sample_profile + * + * Description: Append all the sensor names in a 'sensor_data_type' sensor list + * into a long string and checksum that string. + * + * Purponse : The checksum provides a unique signature for a specific sensor + * profile. Used to uniquely identify the Quanta sensor model or + * a change of a sensor model. + * + ******************************************************************************/ + +unsigned short checksum_sample_profile ( const string & hostname, + int sensors, + sensor_data_type * sensor_ptr) +{ + unsigned short sum = 0 ; + if ( sensors ) + { + string temp = "" ; + for ( int i = 0 ; i < sensors ; i++ , sensor_ptr++ ) + { + temp.append(sensor_ptr->name); + } + sum += checksum ( (void*)temp.data(), temp.length()); + + blog2 ("%s sensor sample checksum 0x%04x\n", hostname.c_str(), sum); + } + return (sum); +} + +/***************************************************************************** + * + * + * Name : checksum_sensor_profile + * + * Description: Append all the sensor names in a 'sensor_type' sensor list into + * a long string and checksum that string. + * + * Purponse : The checksum provides a unique signature for a specific sensor + * profile. Used to uniquely identify the Quanta sensor model or + * a change of a sensor model. + * + ******************************************************************************/ + +unsigned short checksum_sensor_profile ( const string & hostname, + int sensors, + sensor_type * sensor_ptr) +{ + unsigned short sum = 0 ; + if ( sensors ) + { + string temp = "" ; + for ( int i = 0 ; i < sensors ; i++ , sensor_ptr++ ) + { + temp.append(sensor_ptr->sensorname); + } + sum += checksum ( (void*)temp.data(), temp.length()); + + blog2 ("%s sensor profile checksum 0x%04x\n", hostname.c_str(), sum ); + } + return (sum); +} + + + +/* load the specified key value in buffer line into 'value' */ +bool got_delimited_value ( char * buf_ptr, + const char * key, + const char * delimiter, + string & value ) +{ + if ( strstr ( buf_ptr, key )) + { + string _str = buf_ptr ; + if ( _str.find(key) != std::string::npos ) + { + if ( _str.find( delimiter ) != std::string::npos ) + { + int y = _str.find( delimiter ) ; + value = _str.substr ( y+strlen(delimiter), std::string::npos) ; + value.erase ( value.size()-1, std::string::npos ) ; + return (true); + } + } + } + return (false); +} + +/*************************************************************************** + * + * Name : get_bmc_version_string + * + * Description: Return the bmc version string from the specified filename. + * + * Looking for 'Firmware Revision' label fromspecified file + * + * Firmware Revision : 3.29 + * + **************************************************************************/ + +#define BUFFER (80) + +#define MC_INFO_LABEL_FW_VERSION ((const char *)("Firmware Revision")) +#define MC_INFO_LABEL_DELIMITER ((const char *)(": ")) +string get_bmc_version_string ( string hostname, + const char * filename ) +{ + string bmc_fw_version = "" ; + if ( daemon_is_file_present ( filename ) ) + { + FILE * _stream = fopen ( filename, "r" ); + if ( _stream ) + { + char buffer [BUFFER]; + MEMSET_ZERO(buffer); + while ( fgets (buffer, BUFFER, _stream) ) + { + if ( got_delimited_value ( buffer, MC_INFO_LABEL_FW_VERSION, + MC_INFO_LABEL_DELIMITER, + bmc_fw_version )) + { + break ; + } + MEMSET_ZERO(buffer); + } + fclose(_stream); + } + } + else + { + elog ("%s failed to open mc info file '%s'\n", hostname.c_str(), + filename); + } + + return (bmc_fw_version); +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonUtil.h b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonUtil.h new file mode 100644 index 00000000..cdf0abcb --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonUtil.h @@ -0,0 +1,80 @@ +#ifndef __INCLUDE_HWMONUTIL_H__ +#define __INCLUDE_HWMONUTIL_H__ + +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud's Hardware Monitor "Utility" Header + */ + +#include +#include +#include /* for ... EFmAlarmSeverityT */ + +using namespace std; + +#include "hwmon.h" /* for ... sensor_severity_enum */ + +string get_key_value_string ( string reading, string key, char delimiter, bool set_tolowercase ); + +bool clear_severity_alarm ( string & hostname, hwmonAlarm_id_type id, string & sub_entity, EFmAlarmSeverityT severity, string reason ); +void clear_asserted_alarm ( string & hostname, hwmonAlarm_id_type id, sensor_type * ptr, string reason ); + +string get_severity ( sensor_severity_enum severity ); +sensor_severity_enum get_severity ( string status ); +string get_ipmi_severity ( sensor_severity_enum status ); +sensor_severity_enum get_ipmi_severity ( string status ); + +bool is_valid_action ( sensor_severity_enum severity, string & action, bool set_to_lower ); +bool is_ignore_action ( string action ) ; +bool is_log_action ( string action ) ; +bool is_alarm_action ( string action ) ; +bool is_reset_action ( string action ) ; +bool is_powercycle_action ( string action ) ; + +bool is_alarmed_state ( string action , sensor_severity_enum & hwmon_sev ) ; +bool is_alarmed ( sensor_type * sensor_ptr ); + +void clear_logged_state ( sensor_type * sensor_ptr ); +void clear_ignored_state ( sensor_type * sensor_ptr ); +void clear_alarmed_state ( sensor_type * sensor_ptr ); +void clear_degraded_state ( sensor_type * sensor_ptr ); +void set_degraded_state ( sensor_type * sensor_ptr ); + + + +void set_alarmed_severity ( sensor_type * sensor_ptr , EFmAlarmSeverityT severity ); + +void set_logged_severity ( sensor_type * sensor_ptr , EFmAlarmSeverityT severity ); +void set_ignored_severity ( sensor_type * sensor_ptr , EFmAlarmSeverityT severity ); + +void clear_logged_severity ( sensor_type * sensor_ptr , EFmAlarmSeverityT severity ); +void clear_ignored_severity ( sensor_type * sensor_ptr , EFmAlarmSeverityT severity ); + +string print_alarmed_severity ( sensor_type * sensor_ptr ); +string print_ignored_severity ( sensor_type * sensor_ptr ); +string print_logged_severity ( sensor_type * sensor_ptr ); + +unsigned short checksum_sensor_profile ( const string & hostname, + int sensors, + sensor_type * sensor_ptr); + +unsigned short checksum_sample_profile ( const string & hostname, + int sensors, + sensor_data_type * sensor_ptr); + +bool got_delimited_value ( char * buf_ptr, + const char * key, + const char * delimiter, + string & value ); + +string get_bmc_version_string ( string hostname, + const char * filename ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmon.logrotate b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmon.logrotate new file mode 100644 index 00000000..dd1eceee --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmon.logrotate @@ -0,0 +1,28 @@ +#daily +nodateext +start 1 +missingok +notifempty +compress +sharedscripts +postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true +endscript + +/var/log/hwmond.log +{ + size 50M + rotate 5 +} + +/var/log/hwmond_event.log +{ + size 50M + rotate 5 +} + +/var/log/hwmond_api.log +{ + size 50M + rotate 5 +} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmon.service b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmon.service new file mode 100644 index 00000000..8c8f5b93 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmon.service @@ -0,0 +1,16 @@ +[Unit] +Description=Titanium Cloud Hardware Monitor +After=network.target syslog.service + +[Service] +Type=forking +ExecStart=/etc/rc.d/init.d/hwmon start +ExecStop=/etc/rc.d/init.d/hwmon stop +ExecReload=/etc/rc.d/init.d/hwmon reload +PIDFile=/var/run/hwmond.pid + +Restart=no + +[Install] +WantedBy=multi-user.target + diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmond.conf b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmond.conf new file mode 100644 index 00000000..7e8835bd --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmond.conf @@ -0,0 +1,51 @@ +; ******************************************************************* +; Hardware Monitor Daemon (hwmond) Configuration File +; ******************************************************************* + +[config] ; Configuration +audit_period = 30 ; Degrade Audit Period in seconds + +event_port = 2101 ; hwmond to Maintenance Event TX Port +cmd_port = 2114 ; Maintenance to hwmond Command RX port +sensor_port = 2188 ; port that expect scripts send sensor read data to for validation +inv_event_port = 2212 ; The Inventory event port number + +[defaults] + +[timeouts] +open_timeout = 3000 +cmnd_timeout = 500 + +[features] + +[debug] ; SIGHUP to reload + +debug_timer = 0 ; enable(1) or disable(0) timer logs (tlog) +debug_json = 0 ; enable(1) or disable(0) Json logs (jlog) +debug_http = 0 ; enable(1) or disable(0) Http logs (hlog) +debug_msg = 0 ; enable(1) or disable(0) message logs (mlog) +debug_bm = 0 ; enable(1) or disable(0) board management (blog) +debug_state = 0 ; enable(1) or disable(0) state change logs (clog) +debug_level = 0 ; decimal mask 0..15 (8,4,2,1) +debug_all = 0 ; set all debug levels to the specified value + +debug_event = none ; REST API tracing : ntrnEvent, novaEvent, novaAudit +debug_filter = none ; filter string (not used yet) + +testmode = 0 ; set to 1 to enable test mode +testmask = 0 ; bit field of out-of-service stress tests + ; bit 1 = fail get_sensor from hwmon + ; bit 2 = fail hwmon_get_group from hwmon + ; bit 3 = fail hwmon_group_sensors + ; bit 4 = fail hwmon_add_group + ; bit 5 = fail hwmonHttp_load_sensors + ; bit 6 = fail hwmonHttp_add_group + ; bit 7 = fail hwmonHttp_add_sensor + ; bit 8 = fail add_sensor + + +flush = 1 ; enable(1) or disable(0) force log flush (main loop) +flush_thld = 5 ; if enabled - force flush after this number of loops + +fit_code = 0 ; codes are in nodeBase.h -> fit_code_enum +fit_host = none ; the hostname to apply the fit to diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmond_notify.py b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmond_notify.py new file mode 100644 index 00000000..840f6cb3 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/hwmond_notify.py @@ -0,0 +1,19 @@ +# +# Copyright (c) 2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import socket +import os + +UDP_IP = socket.gethostbyname('controller') +UDP_PORT = 2188 +ENV_MESSAGE = os.environ["MESSAGE"] + +print "UDP target IP:", UDP_IP +print "UDP target port:", UDP_PORT +print "message:", ENV_MESSAGE + +sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) +sock.sendto(ENV_MESSAGE, (UDP_IP, UDP_PORT)) diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/lsb/hwmon b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/lsb/hwmon new file mode 100644 index 00000000..45c75d8f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/lsb/hwmon @@ -0,0 +1,91 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +. /etc/init.d/functions + +DAEMON_NAME="hwmond" +DAEMON="/usr/local/bin/${DAEMON_NAME}" +PIDFILE="/var/run/${DAEMON_NAME}.pid" + +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +if [ ! -e "${DAEMON}" ] ; then + logger "${DAEMON} is missing" + exit ${NOT_INSTALLED} +fi + +case "$1" in + start) + logger "Starting ${DAEMON_NAME}" + echo -n "Starting ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo -n "is already running " + RETVAL=0 + else + start-stop-daemon --start -b -x ${DAEMON} -- -l + RETVAL=$? + fi + if [ ${RETVAL} -eq 0 ] ; then + pid=`pidof ${DAEMON_NAME}` + echo "OK" + logger "${DAEMON} (${pid})" + else + echo "FAIL" + RETVAL=${GENERIC_ERROR} + fi + ;; + + stop) + logger "Stopping ${DAEMON_NAME}" + echo -n "Stopping ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + killproc ${DAEMON_NAME} + fi + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo "FAIL" + RETVAL=${NOT_RUNNING} + else + echo "OK" + fi + rm -f ${PIDFILE} + ;; + + restart) + $0 stop + $0 start + ;; + + status) + pid=`pidof ${DAEMON_NAME}` + RETVAL=$? + if [ ${RETVAL} -eq 0 ] ; then + echo "${DAEMON_NAME} is running" + else + echo "${DAEMON_NAME} is NOT running" + RETVAL=${NOT_RUNNING} + fi + ;; + + condrestart) + $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/ocf/hwmon b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/ocf/hwmon new file mode 100644 index 00000000..14f52a20 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/ocf/hwmon @@ -0,0 +1,497 @@ +#!/bin/sh +# +# Copyright (c) 2013-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# Support: www.windriver.com +# +# Purpose: This resource agent manages +# +# .... the CGCS Platform Hardware Monitor Daemon +# +# RA Spec: +# +# http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/resource-agent-api.txt?rev=HEAD +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + + +####################################################################### + +# Fill in some defaults if no values are specified +OCF_RESKEY_binary_default="hwmond" +OCF_RESKEY_config_default="/etc/mtc/hwmond.conf" +OCF_RESKEY_dbg_default="false" +OCF_RESKEY_logging_default="true" +OCF_RESKEY_mode_default="normal" +OCF_RESKEY_user_default="admin" +OCF_RESKEY_pid_default="/var/run/hwmond.pid" +OCF_RESKEY_state_default="standby" + + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_logging=${OCF_RESKEY_logging_default}} +: ${OCF_RESKEY_dbg=${OCF_RESKEY_dbg_default}} +: ${OCF_RESKEY_mode=${OCF_RESKEY_mode_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}} + +mydaemon="/usr/local/bin/${OCF_RESKEY_binary}" +statusfile="/var/run/${OCF_RESKEY_binary}.info" + +####################################################################### + +usage() { + cat < + + +1.0 + + +This 'hwmond' is an OCF Compliant Resource Agent that manages start, stop +and in-service monitoring of the Hardware Monitor service on Wind River's +Titanium Cloud in an active mode. + + + +Manages the Titanium Cloud's Hardware Monitor (hwmond) Daemon. + + + + + + + +state = standby ... run daemon in 'standby' mode (default) +state = active ... run daemon in 'active' mode + +Hardware Monitor Daemon Activity State Option + + + + + +mode = normal ... run hardware monitor daemon in 'normal' mode (default) +mode = passive ... run hardware monitor daemon in 'passive' mode + +Maintenance Mode Option + + + + + + +This option is used to direct the hwmon dameon log stream. + +logging = true ... /var/log/hwmond.log (default) +logging = false ... /dev/null + +See also debug option which sets the verbosity of logging. + +Service Logging Control Option + + + + + + +dbg = false ... info, warn and err logs sent to output stream (default) +dbg = true ... Additional dbg logs are also sent to the output stream + +Service Debug Control Option + + + + + + + + + + + + + + +END + return ${OCF_SUCCESS} +} + +hwmond_validate() { + + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "hwmond:validate" + fi + + check_binary "/usr/local/bin/${OCF_RESKEY_binary}" + check_binary "/usr/local/bin/mtcAgent" + check_binary "/usr/local/bin/mtcClient" + check_binary sysinv-api + check_binary pidof + + if [ ! -f ${OCF_RESKEY_config} ] ; then + msg="${OCF_RESKEY_binary} conf file missing ${OCF_RESKEY_config}" + ocf_log err "${msg}" + return ${OCF_ERR_CONFIGURED} + fi + + return ${OCF_SUCCESS} +} + +hwmond_status () { + + proc="hwmond:status" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "hwmond:status" + fi + + # remove the status file before we request a new + rm -f ${statusfile} + + # Verify the pid file exists as part of status + for ((loop=0;loop<3;loop++)) { + if [ -f ${OCF_RESKEY_pid} ] ; then + break + else + sleep 1 + fi + } + + # See if the daemon is running + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + + log_sig="${OCF_RESKEY_binary} In-Service Active Monitor Test" + + # Ask the daemon to produce status + ocf_run kill -s USR1 $pid + + # Wait for the response + for ((loop=0;loop<10;loop++)) { + sleep 1 + if [ -f ${statusfile} ] ; then + + ocf_log info "${log_sig} Passed ($loop)" + return ${OCF_SUCCESS} + + elif [ $loop -eq 5 ] ; then + + # send the signal again + ocf_run kill -s USR1 $pid + + pid_stat=`cat /proc/${pid}/stat` + ocf_log notice "${log_sig} is slow to respond" + ocf_log notice "$pid_stat" + + elif [ $loop -eq 8 ] ; then + + pid_stat=`cat /proc/${pid}/stat` + ocf_log warn "${log_sig} is very slow to respond" + ocf_log warn "$pid_stat" + + fi + } + log_procfs + ocf_log err "${log_sig} Failed" + return ${OCF_ERR_GENERIC} + fi + return ${OCF_NOT_RUNNING} +} + +hwmond_monitor () { + + proc="hwmond:monitor" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc}" + fi + + # Uncomment if you want the monitor function to force-pass + # return ${OCF_SUCCESS} + + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -ne 0 ] ; then + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc} called while ${OCF_RESKEY_binary} not running." + fi + return ${OCF_NOT_RUNNING} + fi + + hwmond_status + return $? +} + + +hwmond_start () { + + local rc + + start_proc="hwmond:start" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${start_proc}" + fi + + # Uncomment if you want the start function to force-pass without starting + # return ${OCF_SUCCESS} + + # If running then issue a ping test + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + hwmond_status + rc=$? + if [ $rc -ne ${OCF_SUCCESS} ] ; then + msg="${start_proc} ping test failed rc=${rc}" + ocf_log err "${msg}" + hwmond_stop + else + # Spec says to return success if process is already running for start + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + ocf_log info "${start_proc} called while ${OCF_RESKEY_binary} is already running" + return ${OCF_SUCCESS} + fi + fi + fi + + # should not be running now or error + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + msg="${start_proc} cannot kill off existing instance of ${OCF_RESKEY_binary}" + ocf_log err "${msg}" + return ${OCF_RUNNING_MASTER} + fi + + rm -f ${statusfile} + + if [ ${OCF_RESKEY_state} = "active" ] ; then + RUN_OPT_STATE="-a" + else + RUN_OPT_STATE="" + fi + + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + RUN_OPT_DEBUG="-d debug" + else + RUN_OPT_DEBUG="" + fi + + if [ ${OCF_RESKEY_mode} = "passive" ] ; then + RUN_OPT_MODE="-p" + else + RUN_OPT_MODE="" + fi + + if [ ${OCF_RESKEY_logging} = "true" ] ; then + RUN_OPT_LOG="-l" + else + RUN_OPT_LOG="" + fi + + # default PID to null + pid="" + + # Try to Start the daemon + ${mydaemon} ${RUN_OPT_STATE} ${RUN_OPT_LOG} ${RUN_OPT_MODE} ${RUN_OPT_DEBUG} + rc=$? + + # verify it was started and set return code appropriately + if [ $rc -eq ${OCF_SUCCESS} ] ; then + # Verify the pid file exists as part of status + for ((loop=0;loop<3;loop++)) { + if [ -f ${OCF_RESKEY_pid} ] ; then + break + else + ocf_log info "${start_proc} waiting ... loop=${loop}" + sleep 1 + fi + } + + pid=`cat ${OCF_RESKEY_pid}` + # ocf_log info "PID:$pid" + kill -0 $pid 2> /dev/null + if [ $? -ne 0 ] ; then + rc=${OCF_FAILED_MASTER} + else + if [ ! -f ${statusfile} ] ; then + ocf_log info "hwmond: Startup Health Test Failed - missing info" + rc = ${OCF_ERR_GENERIC} + fi + fi + else + ocf_log info "${start_proc} failed ${mydaemon} daemon rc=${rc}" + rc = ${OCF_ERR_GENERIC} + fi + + # Record success or failure and return status + if [ ${rc} -eq $OCF_SUCCESS ] ; then + msg="${start_proc}ed pid=${pid}" + ocf_log info "${msg}" + else + msg="${start_proc} failed rc=${rc}" + ocf_log err "${msg}" + rc=${OCF_NOT_RUNNING} + fi + return $rc +} + +hwmond_confirm_stop () { + + proc="hwmond:confirm_stop" + ocf_log info "${proc}" + + pid=`pidof ${OCF_RESKEY_binary}` + kill -0 ${pid} 2> /dev/null + if [ $? -eq 0 ] ; then + ocf_log info "${proc} 'kill -9 ${pid}'" + kill -9 ${pid} + ocf_log info "${proc}ed (by emergency kill -9 ${pid})" + sleep 1 + fi + rm -f ${OCF_RESKEY_pid} +} + +hwmond_stop () { + + proc="hwmond:stop" + + # See if the process is running by pidfile + + pid=`pidof ${OCF_RESKEY_binary}` + ocf_log info "${proc} PID:${pid}" + kill -0 ${pid} 2> /dev/null + if [ $? -ne 0 ] ; then + ocf_log info "${proc} called while already stopped (no process)" + hwmond_confirm_stop + return ${OCF_SUCCESS} + fi + + MAX=3 + for ((loop=0;loop<$MAX;loop++)) { + + # verify stop with pidfile + if [ -f ${OCF_RESKEY_pid} ] ; then + + pid=`cat ${OCF_RESKEY_pid}` + + # if pid file is gone we are done + if [ ${pid} = "" ] ; then + ocf_log info "${proc}ped (by -int)" + break + + # if pidfile is empty then kill by -int + else + + kill -0 ${pid} 2> /dev/null + if [ $? -ne 0 ] ; then + ocf_log info "${proc}ped (by pid)" + break + else + ocf_log info "${proc}ping (by -int - loop:${loop})" + kill -int ${pid} + sleep 1 + fi + fi + fi + } + hwmond_confirm_stop + return ${OCF_SUCCESS} +} + +hwmond_reload () { + + local rc + + proc="hwmond:reload" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc}" + fi + + hwmond_stop + rc=$? + if [ $rc -eq ${OCF_SUCCESS} ] ; then + #sleep 1 + hwmond_start + rc=$? + if [ $rc -eq ${OCF_SUCCESS} ] ; then + msg="${proc}ed" + ocf_log info "${mgs}" + fi + fi + + if [ ${rc} -ne ${OCF_SUCCESS} ] ; then + msg="${OCF_RESKEY_binary}: failed to restart rc=${rc}" + ocf_log info "${mgs}" + fi + + return ${rc} +} + +case ${__OCF_ACTION} in + meta-data) meta_data + exit ${OCF_SUCCESS} + ;; + usage|help) usage + exit ${OCF_SUCCESS} + ;; +esac + +ocf_log info "hwmond:${__OCF_ACTION} action" + +# Anything except meta-data and help must pass validation +hwmond_validate || exit $? + +case ${__OCF_ACTION} in + start) hwmond_start + ;; + stop) hwmond_stop + ;; + status) hwmond_status + ;; + reload) hwmond_reload + ;; + monitor) hwmond_monitor + ;; + validate-all) hwmond_validate + ;; + *) usage + exit ${OCF_ERR_UNIMPLEMENTED} + ;; +esac diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_hp360 b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_hp360 new file mode 100644 index 00000000..9480884f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_hp360 @@ -0,0 +1,78 @@ +#!/bin/bash + +# +# Copyright (c) 2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +rc=0 + +if [ "$1" == "/system1" -a "$2" == "name" ] ; then + + echo "" + echo "status=0" + echo "status_tag=COMMAND COMPLETED" + echo "Wed Aug 5 11:56:35 2015" + echo "" + echo "" + echo "" + echo "/system1" + echo " Properties" + echo " name=ProLiant DL360 Gen9" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "" + echo "" + +elif [ "$0" == "/usr/sbin/stop" -a "$1" == "/system1" ] ; then + + echo "Server powering off ......." + echo "" + shutdown -P -h -time now + +elif [ "$0" == "/usr/sbin/reset" -a "$1" == "/system1" ] ; then + + echo "Server reset" + echo "" + reboot + +elif [ "$1" == "" ] ; then + + echo "" + echo "status=0" + echo "status_tag=COMMAND COMPLETED" + echo "Tue Aug 18 16:44:29 2015" + echo "" + echo "" + echo "" + echo "/" + echo " Targets" + echo " system1" + echo " map1" + echo " Properties" + echo " Verbs" + echo " cd version exit show" + echo "" + +else + + echo "" + echo "status=2" + echo "status_tag=COMMAND PROCESSING FAILED" + echo "error_tag=INVALID OPTION" + echo "Wed Aug 5 12:10:46 2015" + echo "" + echo "" + echo "" + echo "" + echo "" + echo "" + + rc=1 + +fi + +exit $rc + diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_hp380 b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_hp380 new file mode 100644 index 00000000..49a164f0 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_hp380 @@ -0,0 +1,6301 @@ +#!/bin/bash + +# +# Copyright (c) 2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +rc=0 + +if [ "$1" == "/system1" -a "$2" == "name" ] ; then + + echo "" + echo "status=0" + echo "status_tag=COMMAND COMPLETED" + echo "Wed Aug 5 11:56:35 2015" + echo "" + echo "" + echo "" + echo "/system1" + echo " Properties" + echo " name=ProLiant DL380 Gen9" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "" + echo "" + +elif [ "$0" == "/usr/sbin/stop" -a "$1" == "/system1" ] ; then + + echo "Server powering off ......." + echo "" + shutdown -P -h -time now + +elif [ "$0" == "/usr/sbin/reset" -a "$1" == "/system1" ] ; then + + echo "Server reset" + echo "" + reboot + +elif [ "$1" == "" ] ; then + + echo "" + echo "status=0" + echo "status_tag=COMMAND COMPLETED" + echo "Tue Aug 18 16:44:29 2015" + echo "" + echo "" + echo "" + echo "/" + echo " Targets" + echo " system1" + echo " map1" + echo " Properties" + echo " Verbs" + echo " cd version exit show" + echo "" + +elif [ "$2" == "/system1/fan*" ] ; then + + echo "/system1/fan1" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan Block 1" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=44 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan2" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan Block 2" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=44 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan3" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan Block 3" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=44 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan4" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan Block 4" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=44 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan5" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan Block 5" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=44 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan6" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan Block 6" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=44 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan7" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan Block 7" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=44 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan8" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan Block 8" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=44 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + +elif [ "$2" == "/system1/sensor*" ] ; then + + echo "" + echo "" + echo "/system1/sensor1" + echo " Targets" + echo " Properties" + echo " DeviceID=01-Inlet Ambient " + echo " ElementName=Ambient" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=25" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=42" + echo " oemhp_CriticalValue=46" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor2" + echo " Targets" + echo " Properties" + echo " DeviceID=02-CPU 1 " + echo " ElementName=CPU" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=40" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor3" + echo " Targets" + echo " Properties" + echo " DeviceID=03-CPU 2 " + echo " ElementName=CPU" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=40" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor4" + echo " Targets" + echo " Properties" + echo " DeviceID=04-P1 DIMM 1-6 " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=32" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=87" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor5" + echo " Targets" + echo " Properties" + echo " DeviceID=05-P1 DIMM 7-12 " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=34" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=87" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor6" + echo " Targets" + echo " Properties" + echo " DeviceID=06-P2 DIMM 1-6 " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=33" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=87" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor7" + echo " Targets" + echo " Properties" + echo " DeviceID=07-P2 DIMM 7-12 " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=32" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=87" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor8" + echo " Targets" + echo " Properties" + echo " DeviceID=08-P1 Mem Zone " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=31" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor9" + echo " Targets" + echo " Properties" + echo " DeviceID=09-P1 Mem Zone " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=36" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor10" + echo " Targets" + echo " Properties" + echo " DeviceID=10-P2 Mem Zone " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=32" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor11" + echo " Targets" + echo " Properties" + echo " DeviceID=11-P2 Mem Zone " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=31" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor12" + echo " Targets" + echo " Properties" + echo " DeviceID=12-HD Max " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=35" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=60" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor13" + echo " Targets" + echo " Properties" + echo " DeviceID=13-Chipset 1 " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=44" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=105" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor14" + echo " Targets" + echo " Properties" + echo " DeviceID=14-Chipset1 Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=35" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor15" + echo " Targets" + echo " Properties" + echo " DeviceID=15-P/S 1 Inlet " + echo " ElementName=Power Supply" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=29" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor16" + echo " Targets" + echo " Properties" + echo " DeviceID=16-P/S 1 Zone " + echo " ElementName=Power Supply" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=32" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor17" + echo " Targets" + echo " Properties" + echo " DeviceID=17-P/S 2 Inlet " + echo " ElementName=Power Supply" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=31" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor18" + echo " Targets" + echo " Properties" + echo " DeviceID=18-P/S 2 Zone " + echo " ElementName=Power Supply" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=33" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=65" + echo " oemhp_CriticalValue=70" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor19" + echo " Targets" + echo " Properties" + echo " DeviceID=19-PCI #1 " + echo " ElementName=I/O Board" + echo " OperationalStatus=Not Installed" + echo " RateUnits=Celsius" + echo " CurrentReading=N/A" + echo " SensorType=Temperature" + echo " HealthState=Not Installed" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor20" + echo " Targets" + echo " Properties" + echo " DeviceID=20-PCI #2 " + echo " ElementName=I/O Board" + echo " OperationalStatus=Not Installed" + echo " RateUnits=Celsius" + echo " CurrentReading=N/A" + echo " SensorType=Temperature" + echo " HealthState=Not Installed" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor21" + echo " Targets" + echo " Properties" + echo " DeviceID=21-VR P1 " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=37" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor22" + echo " Targets" + echo " Properties" + echo " DeviceID=22-VR P2 " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=34" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor23" + echo " Targets" + echo " Properties" + echo " DeviceID=23-VR P1 Mem " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=29" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor24" + echo " Targets" + echo " Properties" + echo " DeviceID=24-VR P1 Mem " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=29" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor25" + echo " Targets" + echo " Properties" + echo " DeviceID=25-VR P2 Mem " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=31" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor26" + echo " Targets" + echo " Properties" + echo " DeviceID=26-VR P2 Mem " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=29" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor27" + echo " Targets" + echo " Properties" + echo " DeviceID=27-VR P1Mem Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=27" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor28" + echo " Targets" + echo " Properties" + echo " DeviceID=28-VR P1Mem Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=28" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor29" + echo " Targets" + echo " Properties" + echo " DeviceID=29-VR P2Mem Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=30" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor30" + echo " Targets" + echo " Properties" + echo " DeviceID=30-VR P2Mem Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=27" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor31" + echo " Targets" + echo " Properties" + echo " DeviceID=31-HD Controller " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=62" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=105" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor32" + echo " Targets" + echo " Properties" + echo " DeviceID=32-HD Cntlr Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=42" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=65" + echo " oemhp_CriticalValue=70" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor33" + echo " Targets" + echo " Properties" + echo " DeviceID=33-PCI 1 Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=35" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor34" + echo " Targets" + echo " Properties" + echo " DeviceID=34-PCI 1 Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=36" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=66" + echo " oemhp_CriticalValue=71" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor35" + echo " Targets" + echo " Properties" + echo " DeviceID=35-LOM Card " + echo " ElementName=I/O Board" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=51" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=100" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor36" + echo " Targets" + echo " Properties" + echo " DeviceID=36-PCI 2 Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=42" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=65" + echo " oemhp_CriticalValue=70" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor37" + echo " Targets" + echo " Properties" + echo " DeviceID=37-System Board " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=42" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor38" + echo " Targets" + echo " Properties" + echo " DeviceID=38-System Board " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=34" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor39" + echo " Targets" + echo " Properties" + echo " DeviceID=39-Sys Exhaust " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=34" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor40" + echo " Targets" + echo " Properties" + echo " DeviceID=40-Sys Exhaust " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=36" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor41" + echo " Targets" + echo " Properties" + echo " DeviceID=41-Sys Exhaust " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=36" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=64" + echo " oemhp_CriticalValue=69" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor42" + echo " Targets" + echo " Properties" + echo " DeviceID=42-SuperCAP Max " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=25" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=65" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + +elif [ "$2" == "/system1/powersupply*" ] ; then + + echo "/system1/powersupply1" + echo " Targets" + echo " Properties" + echo " ElementName=Power Supply" + echo " OperationalStatus=Failed" + echo " HealthState=Failed" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/powersupply2" + echo " Targets" + echo " Properties" + echo " ElementName=Power Supply" + echo " OperationalStatus=Ok" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "" + +elif [ "$1" == "/system1/powersupply1" ] ; then + + echo "status=0" + echo "status_tag=COMMAND COMPLETED" + echo "Mon Oct 26 05:27:07 2015" + echo "" + echo "" + echo "" + echo "/system1/powersupply1" + echo " Targets" + echo " Properties" + echo " ElementName=Power Supply" + echo " OperationalStatus=Failed" + echo " HealthState=Failed" + echo " Verbs" + echo " cd version exit show " + echo "" + +elif [ "$1" == "/system1/powersupply2" ] ; then + echo "" + echo "status=0" + echo "status_tag=COMMAND COMPLETED" + echo "Mon Oct 26 05:27:07 2015" + echo "" + echo "" + echo "" + echo "/system1/powersupply2" + echo " Targets" + echo " Properties" + echo " ElementName=Power Supply" + echo " OperationalStatus=Ok" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "" + +elif [ "$1" == "-a" -a "$2" == "" ] ; then + + echo "show -a" + echo "status=0" + echo "status_tag=COMMAND COMPLETED" + echo "Wed Oct 28 09:55:46 2015" + echo "" + echo "" + echo "" + echo "/" + echo " Targets" + echo " system1" + echo " map1" + echo " Properties" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1" + echo " Targets" + echo " firmware1" + echo " bootconfig1" + echo " log1" + echo " led1" + echo " network1" + echo " oemhp_vsp1" + echo " oemhp_power1" + echo " cpu1" + echo " cpu2" + echo " memory1" + echo " memory2" + echo " memory3" + echo " memory4" + echo " memory5" + echo " memory6" + echo " memory7" + echo " memory8" + echo " memory9" + echo " memory10" + echo " memory11" + echo " memory12" + echo " memory13" + echo " memory14" + echo " memory15" + echo " memory16" + echo " memory17" + echo " memory18" + echo " memory19" + echo " memory20" + echo " memory21" + echo " memory22" + echo " memory23" + echo " memory24" + echo " slot1" + echo " slot2" + echo " slot3" + echo " swid1" + echo " swid2" + echo " swid3" + echo " swid4" + echo " swid5" + echo " swid6" + echo " swid7" + echo " swid8" + echo " swid9" + echo " swid10" + echo " swid11" + echo " swid12" + echo " swid13" + echo " fan1" + echo " fan2" + echo " fan3" + echo " fan4" + echo " fan5" + echo " fan6" + echo " fan7" + echo " sensor1" + echo " sensor2" + echo " sensor3" + echo " sensor4" + echo " sensor5" + echo " sensor6" + echo " sensor7" + echo " sensor8" + echo " sensor9" + echo " sensor10" + echo " sensor11" + echo " sensor12" + echo " sensor13" + echo " sensor14" + echo " sensor15" + echo " sensor16" + echo " sensor17" + echo " sensor18" + echo " sensor19" + echo " sensor20" + echo " sensor21" + echo " sensor22" + echo " sensor23" + echo " sensor24" + echo " sensor25" + echo " sensor26" + echo " sensor27" + echo " sensor28" + echo " sensor29" + echo " sensor30" + echo " sensor31" + echo " sensor32" + echo " sensor33" + echo " sensor34" + echo " sensor35" + echo " sensor36" + echo " powersupply1" + echo " powersupply2" + echo " Properties" + echo " name=ProLiant DL360 Gen9" + echo " number=MXQ50308V9" + echo " oemhp_server_name=compute-4" + echo " oemhp_server_fqdn=" + echo " oemhp_smh_fqdn=compute-4" + echo " enabledstate=enabled" + echo " processor_number=2" + echo " Verbs" + echo " cd version exit show reset set start stop" + echo "" + echo "/system1/firmware1" + echo " Targets" + echo " Properties" + echo " version=P89" + echo " date=11/03/2014" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/bootconfig1" + echo " Targets" + echo " bootsource1" + echo " bootsource2" + echo " bootsource3" + echo " bootsource4" + echo " bootsource5" + echo " Properties" + echo " oemhp_bootmode=Legacy" + echo " oemhp_secureboot=Not Available" + echo " oemhp_pendingbootmode=Legacy" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "" + echo "/system1/bootconfig1/bootsource1" + echo " Targets" + echo " Properties" + echo " bootorder=1" + echo " bootdevice=BootFmCd " + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/bootconfig1/bootsource2" + echo " Targets" + echo " Properties" + echo " bootorder=3" + echo " bootdevice=BootFmDisk " + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/bootconfig1/bootsource3" + echo " Targets" + echo " Properties" + echo " bootorder=2" + echo " bootdevice=BootFmUSBKey " + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/bootconfig1/bootsource4" + echo " Targets" + echo " Properties" + echo " bootorder=4" + echo " bootdevice=BootFmNetwork" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/bootconfig1/bootsource5" + echo " Targets" + echo " Properties" + echo " bootorder=5" + echo " bootdevice=Unavailable" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "" + echo "/system1/log1" + echo " Targets" + echo " record1" + echo " record2" + echo " record3" + echo " record4" + echo " record5" + echo " record6" + echo " record7" + echo " record8" + echo " record9" + echo " record10" + echo " record11" + echo " record12" + echo " record13" + echo " record14" + echo " record15" + echo " record16" + echo " record17" + echo " record18" + echo " record19" + echo " record20" + echo " record21" + echo " record22" + echo " record23" + echo " record24" + echo " record25" + echo " record26" + echo " record27" + echo " record28" + echo " record29" + echo " record30" + echo " record31" + echo " record32" + echo " record33" + echo " record34" + echo " record35" + echo " record36" + echo " record37" + echo " Properties" + echo " Verbs" + echo " cd version exit show delete set" + echo "" + echo "" + echo "/system1/log1/record1" + echo " Targets" + echo " Properties" + echo " number=1" + echo " severity=Informational" + echo " date=01/21/2015" + echo " time=02:17" + echo " description=Maintenance note: IML cleared through hpasmcli" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record2" + echo " Targets" + echo " Properties" + echo " number=2" + echo " severity=Critical" + echo " date=04/17/2015" + echo " time=21:02" + echo " description=Option ROM POST Error: 1785-Slot 0 Drive Array Not Configured" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record3" + echo " Targets" + echo " Properties" + echo " number=3" + echo " severity=Caution" + echo " date=04/17/2015" + echo " time=13:02" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 2)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record4" + echo " Targets" + echo " Properties" + echo " number=4" + echo " severity=Caution" + echo " date=04/17/2015" + echo " time=21:02" + echo " description=POST Error: 1623-Power Supply Failure - Power Supply 2 is failed. Action: Replace the power supply." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record5" + echo " Targets" + echo " Properties" + echo " number=5" + echo " severity=Informational" + echo " date=04/17/2015" + echo " time=21:55" + echo " description=Maintenance note: Intelligent Provisioning was loaded." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record6" + echo " Targets" + echo " Properties" + echo " number=6" + echo " severity=Caution" + echo " date=04/17/2015" + echo " time=14:03" + echo " description=System Power Supply: General Failure (Power Supply 2)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record7" + echo " Targets" + echo " Properties" + echo " number=7" + echo " severity=Informational" + echo " date=04/17/2015" + echo " time=22:00" + echo " description=Maintenance note: Opt-in" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record8" + echo " Targets" + echo " Properties" + echo " number=8" + echo " severity=Caution" + echo " date=04/17/2015" + echo " time=14:04" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 2)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record9" + echo " Targets" + echo " Properties" + echo " number=9" + echo " severity=Caution" + echo " date=04/17/2015" + echo " time=22:04" + echo " description=POST Error: 1623-Power Supply Failure - Power Supply 2 is failed. Action: Replace the power supply." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record10" + echo " Targets" + echo " Properties" + echo " number=10" + echo " severity=Caution" + echo " date=04/19/2015" + echo " time=08:52" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 2)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record11" + echo " Targets" + echo " Properties" + echo " number=11" + echo " severity=Caution" + echo " date=04/19/2015" + echo " time=16:52" + echo " description=POST Error: 1623-Power Supply Failure - Power Supply 2 is failed. Action: Replace the power supply." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record12" + echo " Targets" + echo " Properties" + echo " number=12" + echo " severity=Caution" + echo " date=04/19/2015" + echo " time=09:09" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 2)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record13" + echo " Targets" + echo " Properties" + echo " number=13" + echo " severity=Caution" + echo " date=04/19/2015" + echo " time=17:10" + echo " description=POST Error: 1623-Power Supply Failure - Power Supply 2 is failed. Action: Replace the power supply." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record14" + echo " Targets" + echo " Properties" + echo " number=14" + echo " severity=Caution" + echo " date=04/19/2015" + echo " time=20:47" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 2)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record15" + echo " Targets" + echo " Properties" + echo " number=15" + echo " severity=Caution" + echo " date=04/20/2015" + echo " time=04:47" + echo " description=POST Error: 1623-Power Supply Failure - Power Supply 2 is failed. Action: Replace the power supply." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record16" + echo " Targets" + echo " Properties" + echo " number=16" + echo " severity=Caution" + echo " date=04/19/2015" + echo " time=21:00" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 2)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record17" + echo " Targets" + echo " Properties" + echo " number=17" + echo " severity=Caution" + echo " date=04/20/2015" + echo " time=05:00" + echo " description=POST Error: 1623-Power Supply Failure - Power Supply 2 is failed. Action: Replace the power supply." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record18" + echo " Targets" + echo " Properties" + echo " number=18" + echo " severity=Caution" + echo " date=[NOT SET]" + echo " time=" + echo " description=POST Error: 288-IMPORTANT: A new storage device has been detected and has been added to the end of the Boot Controller Order. Action: No action required." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record19" + echo " Targets" + echo " Properties" + echo " number=19" + echo " severity=Caution" + echo " date=[NOT SET]" + echo " time=" + echo " description=POST Error: 289-IMPORTANT: A new network or storage device has been detected. This device will not be shown in the Legacy BIOS Boot Order options in RBSU until the system has booted once. Action: No action required." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record20" + echo " Targets" + echo " Properties" + echo " number=20" + echo " severity=Caution" + echo " date=04/20/2015" + echo " time=04:32" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 2)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record21" + echo " Targets" + echo " Properties" + echo " number=21" + echo " severity=Caution" + echo " date=04/20/2015" + echo " time=12:32" + echo " description=POST Error: 1623-Power Supply Failure - Power Supply 2 is failed. Action: Replace the power supply." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record22" + echo " Targets" + echo " Properties" + echo " number=22" + echo " severity=Caution" + echo " date=04/20/2015" + echo " time=07:39" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 2)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record23" + echo " Targets" + echo " Properties" + echo " number=23" + echo " severity=Caution" + echo " date=04/20/2015" + echo " time=15:39" + echo " description=POST Error: 1623-Power Supply Failure - Power Supply 2 is failed. Action: Replace the power supply." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record24" + echo " Targets" + echo " Properties" + echo " number=24" + echo " severity=Repaired" + echo " date=04/20/2015" + echo " time=08:56" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 2)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record25" + echo " Targets" + echo " Properties" + echo " number=25" + echo " severity=Caution" + echo " date=04/20/2015" + echo " time=16:57" + echo " description=POST Error: 1623-Power Supply Failure - Power Supply 2 is failed. Action: Replace the power supply." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record26" + echo " Targets" + echo " Properties" + echo " number=26" + echo " severity=Repaired" + echo " date=04/20/2015" + echo " time=13:39" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 1)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record27" + echo " Targets" + echo " Properties" + echo " number=27" + echo " severity=Repaired" + echo " date=04/20/2015" + echo " time=13:39" + echo " description=System Power Supplies Not Redundant" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record28" + echo " Targets" + echo " Properties" + echo " number=28" + echo " severity=Informational" + echo " date=05/21/2015" + echo " time=13:01" + echo " description=Option ROM POST Error: 1792-Slot 0 Drive Array - Valid Data Found in Write-Back Cache. Data will automatically be written to drive array." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record29" + echo " Targets" + echo " Properties" + echo " number=29" + echo " severity=Critical" + echo " date=05/21/2015" + echo " time=10:27" + echo " description=Critical Temperature Threshold Exceeded" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record30" + echo " Targets" + echo " Properties" + echo " number=30" + echo " severity=Caution" + echo " date=05/21/2015" + echo " time=19:47" + echo " description=POST Error: 289-IMPORTANT: A new network or storage device has been detected. This device will not be shown in the Legacy BIOS Boot Order options in RBSU until the system has booted once. Action: No action required." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record31" + echo " Targets" + echo " Properties" + echo " number=31" + echo " severity=Informational" + echo " date=06/20/2015" + echo " time=03:05" + echo " description=Option ROM POST Error: 1792-Slot 0 Drive Array - Valid Data Found in Write-Back Cache. Data will automatically be written to drive array." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record32" + echo " Targets" + echo " Properties" + echo " number=32" + echo " severity=Repaired" + echo " date=07/14/2015" + echo " time=04:53" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 2)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record33" + echo " Targets" + echo " Properties" + echo " number=33" + echo " severity=Repaired" + echo " date=07/14/2015" + echo " time=04:53" + echo " description=System Power Supplies Not Redundant" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record34" + echo " Targets" + echo " Properties" + echo " number=34" + echo " severity=Repaired" + echo " date=07/14/2015" + echo " time=04:53" + echo " description=System Power Supply: Input Power Loss or Unplugged Power Cord, Verify Power Supply Input (Power Supply 1)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record35" + echo " Targets" + echo " Properties" + echo " number=35" + echo " severity=Repaired" + echo " date=07/14/2015" + echo " time=04:53" + echo " description=System Power Supplies Not Redundant" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record36" + echo " Targets" + echo " Properties" + echo " number=36" + echo " severity=Informational" + echo " date=08/27/2015" + echo " time=01:00" + echo " description=Option ROM POST Error: 1792-Slot 0 Drive Array - Valid Data Found in Write-Back Cache. Data will automatically be written to drive array." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/log1/record37" + echo " Targets" + echo " Properties" + echo " number=37" + echo " severity=Informational" + echo " date=10/03/2015" + echo " time=14:58" + echo " description=Option ROM POST Error: 1792-Slot 0 Drive Array - Valid Data Found in Write-Back Cache. Data will automatically be written to drive array." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/led1" + echo " Targets" + echo " Properties" + echo " enabledstate=enabled" + echo " Verbs" + echo " cd version exit show start stop" + echo "" + echo "/system1/network1" + echo " Targets" + echo " Integrated_NICs" + echo " Properties" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/network1/Integrated_NICs" + echo " Targets" + echo " Properties" + echo " iLO4_MACAddress=38:63:bb:2d:78:b0" + echo " Port1NIC_MACAddress=38:63:bb:33:a4:fc" + echo " Port2NIC_MACAddress=38:63:bb:33:a4:fd" + echo " Port3NIC_MACAddress=38:63:bb:33:a4:fe" + echo " Port4NIC_MACAddress=38:63:bb:33:a4:ff" + echo " Port5NIC_MACAddress=d0:bf:9c:f1:b0:80" + echo " Port6NIC_MACAddress=d0:bf:9c:f1:b0:88" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/oemhp_power1" + echo " Targets" + echo " Properties" + echo " oemhp_powerreg=max" + echo " An iLO 4 License key is required." + echo " oemhp_PresentPower=180 Watts" + echo " oemhp_power_micro_ver=1.0.7" + echo " oemhp_auto_pwr=ON (Minimum delay)" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/oemhp_vsp1" + echo " Targets" + echo " Properties" + echo " enabledstate=disabled" + echo " Verbs" + echo " cd version exit show set start stop" + echo "" + echo "" + echo "/system1/cpu1" + echo " Targets" + echo " Properties" + echo " name=Intel(R) Xeon(R) CPU E5-2640 v3 @ 2.60GHz" + echo " status=OK" + echo " number_cores=8" + echo " active_cores=8" + echo " threads=16" + echo " speed=2600MHz" + echo " memory_technology=64-bit Capable" + echo " cachememory1=512KB" + echo " cachememory2=2048KB" + echo " cachememory3=20480KB" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/system1/cpu2" + echo " Targets" + echo " Properties" + echo " name=Intel(R) Xeon(R) CPU E5-2640 v3 @ 2.60GHz" + echo " status=OK" + echo " number_cores=8" + echo " active_cores=8" + echo " threads=16" + echo " speed=2600MHz" + echo " memory_technology=64-bit Capable" + echo " cachememory1=512KB" + echo " cachememory2=2048KB" + echo " cachememory3=20480KB" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "" + echo "/system1/memory1" + echo " Targets" + echo " Properties" + echo " size=16384 MB" + echo " frequency=2133 MHz" + echo " location=PROC 1 DIMM 1" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory2" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 1 DIMM 2" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory3" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 1 DIMM 3" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory4" + echo " Targets" + echo " Properties" + echo " size=16384 MB" + echo " frequency=2133 MHz" + echo " location=PROC 1 DIMM 4" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory5" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 1 DIMM 5" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory6" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 1 DIMM 6" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory7" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 1 DIMM 7" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory8" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 1 DIMM 8" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory9" + echo " Targets" + echo " Properties" + echo " size=16384 MB" + echo " frequency=2133 MHz" + echo " location=PROC 1 DIMM 9" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory10" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 1 DIMM 10" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory11" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 1 DIMM 11" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory12" + echo " Targets" + echo " Properties" + echo " size=16384 MB" + echo " frequency=2133 MHz" + echo " location=PROC 1 DIMM 12" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory13" + echo " Targets" + echo " Properties" + echo " size=16384 MB" + echo " frequency=2133 MHz" + echo " location=PROC 2 DIMM 1" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory14" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 2 DIMM 2" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory15" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 2 DIMM 3" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory16" + echo " Targets" + echo " Properties" + echo " size=16384 MB" + echo " frequency=2133 MHz" + echo " location=PROC 2 DIMM 4" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory17" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 2 DIMM 5" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory18" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 2 DIMM 6" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory19" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 2 DIMM 7" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory20" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 2 DIMM 8" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory21" + echo " Targets" + echo " Properties" + echo " size=16384 MB" + echo " frequency=2133 MHz" + echo " location=PROC 2 DIMM 9" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory22" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 2 DIMM 10" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory23" + echo " Targets" + echo " Properties" + echo " size=Not Installed" + echo " frequency=Not Installed" + echo " location=PROC 2 DIMM 11" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/memory24" + echo " Targets" + echo " Properties" + echo " size=16384 MB" + echo " frequency=2133 MHz" + echo " location=PROC 2 DIMM 12" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "" + echo "/system1/slot1" + echo " Targets" + echo " Properties" + echo " type=PCI Express Gen 3" + echo " width=8x" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/slot2" + echo " Targets" + echo " Properties" + echo " type=PCI Express Gen 3" + echo " width=16x" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/slot3" + echo " Targets" + echo " Properties" + echo " type=PCI Express Gen 3" + echo " width=16x" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/swid1" + echo " Targets" + echo " Properties" + echo " name=HP ProLiant System ROM" + echo " VersionString=11/03/2014" + echo " oemhp_VersionStrings=11/03/2014" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid2" + echo " Targets" + echo " Properties" + echo " name=HP ProLiant System ROM - Backup" + echo " VersionString=v1.21 (11/03/2014)" + echo " oemhp_VersionStrings=v1.21 (11/03/2014)" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid3" + echo " Targets" + echo " Properties" + echo " name=HP ProLiant System ROM Bootblock" + echo " VersionString=" + echo " oemhp_VersionStrings=" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid4" + echo " Targets" + echo " Properties" + echo " name=iLO" + echo " VersionString=2.03 Nov 07 2014" + echo " oemhp_VersionStrings=2.03 Nov 07 2014" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid5" + echo " Targets" + echo " Properties" + echo " name=Intelligent Platform Abstraction Data" + echo " VersionString=12.3" + echo " oemhp_VersionStrings=12.3" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid6" + echo " Targets" + echo " Properties" + echo " name=Power Management Controller Firmware" + echo " VersionString=1.0.7" + echo " oemhp_VersionStrings=1.0.7" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid7" + echo " Targets" + echo " Properties" + echo " name=Power Management Controller FW Bootloader" + echo " VersionString=1.0" + echo " oemhp_VersionStrings=1.0" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid8" + echo " Targets" + echo " Properties" + echo " name=System Programmable Logic Device" + echo " VersionString=Version 0x30" + echo " oemhp_VersionStrings=Version 0x30" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid9" + echo " Targets" + echo " Properties" + echo " name=SAS Programmable Logic Device" + echo " VersionString=Version 0x01" + echo " oemhp_VersionStrings=Version 0x01" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid10" + echo " Targets" + echo " Properties" + echo " name=Server Platform Services (SPS) Firmware" + echo " VersionString=3.0.6.267.1" + echo " oemhp_VersionStrings=3.0.6.267.1" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid11" + echo " Targets" + echo " Properties" + echo " name=HP Smart Storage Battery 1 Firmware" + echo " VersionString=1.1" + echo " oemhp_VersionStrings=1.1" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid12" + echo " Targets" + echo " Properties" + echo " name=HP Smart Array P440ar Controller" + echo " VersionString=1.34" + echo " oemhp_VersionStrings=1.34" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "/system1/swid13" + echo " Targets" + echo " Properties" + echo " name=HP Ethernet 10Gb 2-port 560SFP+ Adapter" + echo " VersionString=0x800007c7 NCSI v1.2.46.0" + echo " oemhp_VersionStrings=0x800007c7 NCSI v1.2.46.0" + echo " Verbs" + echo " cd version exit show load " + echo "" + echo "" + echo "" + echo "/system1/fan1" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan 1" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=16 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan2" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan 2" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=28 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan3" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan 3" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=28 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan4" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan 4" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=35 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan5" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan 5" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=35 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan6" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan 6" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=35 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "/system1/fan7" + echo " Targets" + echo " Properties" + echo " DeviceID=Fan 7" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " VariableSpeed=Yes" + echo " DesiredSpeed=31 percent" + echo " HealthState=Ok" + echo " Verbs" + echo " cd version exit show" + echo "" + echo "" + echo "/system1/sensor1" + echo " Targets" + echo " Properties" + echo " DeviceID=01-Inlet Ambient " + echo " ElementName=Ambient" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=28" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=42" + echo " oemhp_CriticalValue=46" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor2" + echo " Targets" + echo " Properties" + echo " DeviceID=02-CPU 1 " + echo " ElementName=CPU" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=40" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor3" + echo " Targets" + echo " Properties" + echo " DeviceID=03-CPU 2 " + echo " ElementName=CPU" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=40" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor4" + echo " Targets" + echo " Properties" + echo " DeviceID=04-P1 DIMM 1-6 " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=35" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=87" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor5" + echo " Targets" + echo " Properties" + echo " DeviceID=05-P1 DIMM 7-12 " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=37" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=87" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor6" + echo " Targets" + echo " Properties" + echo " DeviceID=06-P2 DIMM 1-6 " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=40" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=87" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor7" + echo " Targets" + echo " Properties" + echo " DeviceID=07-P2 DIMM 7-12 " + echo " ElementName=Memory" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=37" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=87" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor8" + echo " Targets" + echo " Properties" + echo " DeviceID=08-HD Max " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=35" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=60" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor9" + echo " Targets" + echo " Properties" + echo " DeviceID=10-Chipset " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=38" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=105" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor10" + echo " Targets" + echo " Properties" + echo " DeviceID=11-PS 1 Inlet " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=37" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor11" + echo " Targets" + echo " Properties" + echo " DeviceID=12-PS 2 Inlet " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=37" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor12" + echo " Targets" + echo " Properties" + echo " DeviceID=13-VR P1 " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=41" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor13" + echo " Targets" + echo " Properties" + echo " DeviceID=14-VR P2 " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=40" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor14" + echo " Targets" + echo " Properties" + echo " DeviceID=15-VR P1 Mem " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=33" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor15" + echo " Targets" + echo " Properties" + echo " DeviceID=16-VR P1 Mem " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=33" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor16" + echo " Targets" + echo " Properties" + echo " DeviceID=17-VR P2 Mem " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=37" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor17" + echo " Targets" + echo " Properties" + echo " DeviceID=18-VR P2 Mem " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=35" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=115" + echo " oemhp_CriticalValue=120" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor18" + echo " Targets" + echo " Properties" + echo " DeviceID=19-PS 1 Internal " + echo " ElementName=Power Supply" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=40" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor19" + echo " Targets" + echo " Properties" + echo " DeviceID=20-PS 2 Internal " + echo " ElementName=Power Supply" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=40" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor20" + echo " Targets" + echo " Properties" + echo " DeviceID=21-PCI 1 " + echo " ElementName=I/O Board" + echo " OperationalStatus=Not Installed" + echo " RateUnits=Celsius" + echo " CurrentReading=N/A" + echo " SensorType=Temperature" + echo " HealthState=Not Installed" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor21" + echo " Targets" + echo " Properties" + echo " DeviceID=22-PCI 2 " + echo " ElementName=I/O Board" + echo " OperationalStatus=Not Installed" + echo " RateUnits=Celsius" + echo " CurrentReading=N/A" + echo " SensorType=Temperature" + echo " HealthState=Not Installed" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor22" + echo " Targets" + echo " Properties" + echo " DeviceID=23-PCI 3 " + echo " ElementName=I/O Board" + echo " OperationalStatus=Not Installed" + echo " RateUnits=Celsius" + echo " CurrentReading=N/A" + echo " SensorType=Temperature" + echo " HealthState=Not Installed" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor23" + echo " Targets" + echo " Properties" + echo " DeviceID=24-HD Controller " + echo " ElementName=I/O Board" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=57" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=100" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor24" + echo " Targets" + echo " Properties" + echo " DeviceID=25-LOM Card " + echo " ElementName=I/O Board" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=56" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=100" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor25" + echo " Targets" + echo " Properties" + echo " DeviceID=26-LOM " + echo " ElementName=System" + echo " OperationalStatus=Not Installed" + echo " RateUnits=Celsius" + echo " CurrentReading=N/A" + echo " SensorType=Temperature" + echo " HealthState=Not Installed" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor26" + echo " Targets" + echo " Properties" + echo " DeviceID=27-Front Ambient " + echo " ElementName=Ambient" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=29" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=60" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor27" + echo " Targets" + echo " Properties" + echo " DeviceID=28-P/S 2 Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=38" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=75" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor28" + echo " Targets" + echo " Properties" + echo " DeviceID=29-Battery Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=36" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=75" + echo " oemhp_CriticalValue=80" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor29" + echo " Targets" + echo " Properties" + echo " DeviceID=30-iLO Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=43" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=90" + echo " oemhp_CriticalValue=95" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor30" + echo " Targets" + echo " Properties" + echo " DeviceID=31-PCI 1 Zone " + echo " ElementName=I/O Board" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=40" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor31" + echo " Targets" + echo " Properties" + echo " DeviceID=32-PCI 2 Zone " + echo " ElementName=I/O Board" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=40" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor32" + echo " Targets" + echo " Properties" + echo " DeviceID=33-PCI 3 Zone " + echo " ElementName=I/O Board" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=33" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor33" + echo " Targets" + echo " Properties" + echo " DeviceID=34-HD Cntlr Zone " + echo " ElementName=I/O Board" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=35" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=70" + echo " oemhp_CriticalValue=75" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor34" + echo " Targets" + echo " Properties" + echo " DeviceID=35-I/O Zone " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=35" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=75" + echo " oemhp_CriticalValue=80" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor35" + echo " Targets" + echo " Properties" + echo " DeviceID=36-Storage Batt " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=32" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=60" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/sensor36" + echo " Targets" + echo " Properties" + echo " DeviceID=37-Fuse " + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " RateUnits=Celsius" + echo " CurrentReading=37" + echo " SensorType=Temperature" + echo " HealthState=Ok" + echo " oemhp_CautionValue=Not Applicable" + echo " oemhp_CriticalValue=Not Applicable" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "" + echo "/system1/powersupply1" + echo " Targets" + echo " Properties" + echo " ElementName=Power Supply" + echo " OperationalStatus=Ok" + echo " HealthState=Good, In Use" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/system1/powersupply2" + echo " Targets" + echo " Properties" + echo " ElementName=System" + echo " OperationalStatus=Ok" + echo " HealthState=Good, In Use" + echo " Verbs" + echo " cd version exit show " + echo "" + echo "/map1" + echo " Targets" + echo " firmware1" + echo " accounts1" + echo " log1" + echo " enetport1" + echo " dhcpendpt1" + echo " dnsendpt1" + echo " gateway1" + echo " dnsserver1" + echo " dnsserver2" + echo " dnsserver3" + echo " settings1" + echo " config1" + echo " snmp1" + echo " oemhp_dircfg1" + echo " oemhp_vm1" + echo " vlan1" + echo " oemhp_ssocfg1" + echo " oemhp_syslog1" + echo " oemhp_alertmail1" + echo " oemhp_ahs1" + echo " oemhp_license1" + echo " Properties" + echo " name=iLO 4 Standard" + echo " license=0" + echo " manual_iLO_reset=no" + echo " Verbs" + echo " cd version exit show reset set oemhp_ping" + echo "" + echo "/map1/firmware1" + echo " Targets" + echo " Properties" + echo " version=2.03" + echo " date=Nov 07 2014" + echo " Verbs" + echo " cd version exit show load set" + echo "" + echo "/map1/accounts1" + echo " Targets" + echo " Administrator" + echo " Properties" + echo " Verbs" + echo " cd version exit show create delete set" + echo "" + echo "" + echo "/map1/accounts1/Administrator" + echo " Targets" + echo " Properties" + echo " username=Administrator" + echo " password=********" + echo " name=Administrator" + echo " group=admin,config,oemhp_rc,oemhp_power,oemhp_vm" + echo " sshkeyhash=" + echo " Verbs" + echo " cd version exit show create delete set" + echo "" + echo "/map1/log1" + echo " Targets" + echo " record1" + echo " record2" + echo " record3" + echo " record4" + echo " record5" + echo " record6" + echo " record7" + echo " record8" + echo " record9" + echo " record10" + echo " record11" + echo " record12" + echo " record13" + echo " record14" + echo " record15" + echo " record16" + echo " record17" + echo " record18" + echo " record19" + echo " record20" + echo " record21" + echo " record22" + echo " record23" + echo " record24" + echo " record25" + echo " record26" + echo " record27" + echo " record28" + echo " record29" + echo " record30" + echo " record31" + echo " record32" + echo " record33" + echo " record34" + echo " record35" + echo " record36" + echo " record37" + echo " record38" + echo " record39" + echo " record40" + echo " record41" + echo " record42" + echo " record43" + echo " record44" + echo " record45" + echo " record46" + echo " record47" + echo " record48" + echo " record49" + echo " record50" + echo " record51" + echo " record52" + echo " record53" + echo " record54" + echo " record55" + echo " record56" + echo " record57" + echo " record58" + echo " record59" + echo " record60" + echo " record61" + echo " record62" + echo " record63" + echo " record64" + echo " record65" + echo " record66" + echo " record67" + echo " record68" + echo " record69" + echo " record70" + echo " record71" + echo " record72" + echo " record73" + echo " record74" + echo " record75" + echo " record76" + echo " record77" + echo " record78" + echo " record79" + echo " record80" + echo " record81" + echo " record82" + echo " record83" + echo " record84" + echo " record85" + echo " record86" + echo " record87" + echo " record88" + echo " record89" + echo " record90" + echo " record91" + echo " record92" + echo " record93" + echo " record94" + echo " record95" + echo " record96" + echo " record97" + echo " record98" + echo " record99" + echo " record100" + echo " record101" + echo " record102" + echo " record103" + echo " record104" + echo " record105" + echo " record106" + echo " record107" + echo " record108" + echo " record109" + echo " record110" + echo " record111" + echo " record112" + echo " record113" + echo " record114" + echo " record115" + echo " record116" + echo " record117" + echo " record118" + echo " record119" + echo " record120" + echo " record121" + echo " record122" + echo " record123" + echo " record124" + echo " record125" + echo " record126" + echo " record127" + echo " record128" + echo " record129" + echo " record130" + echo " record131" + echo " record132" + echo " record133" + echo " record134" + echo " record135" + echo " record136" + echo " record137" + echo " record138" + echo " record139" + echo " record140" + echo " record141" + echo " record142" + echo " record143" + echo " record144" + echo " record145" + echo " record146" + echo " record147" + echo " record148" + echo " record149" + echo " record150" + echo " record151" + echo " record152" + echo " record153" + echo " record154" + echo " record155" + echo " record156" + echo " record157" + echo " record158" + echo " record159" + echo " record160" + echo " record161" + echo " record162" + echo " record163" + echo " record164" + echo " record165" + echo " record166" + echo " record167" + echo " record168" + echo " record169" + echo " record170" + echo " record171" + echo " record172" + echo " record173" + echo " record174" + echo " record175" + echo " record176" + echo " record177" + echo " record178" + echo " record179" + echo " record180" + echo " record181" + echo " record182" + echo " record183" + echo " record184" + echo " record185" + echo " record186" + echo " record187" + echo " record188" + echo " record189" + echo " record190" + echo " record191" + echo " record192" + echo " record193" + echo " record194" + echo " record195" + echo " record196" + echo " record197" + echo " record198" + echo " record199" + echo " record200" + echo " record201" + echo " record202" + echo " record203" + echo " record204" + echo " record205" + echo " record206" + echo " record207" + echo " record208" + echo " record209" + echo " record210" + echo " record211" + echo " record212" + echo " record213" + echo " record214" + echo " record215" + echo " record216" + echo " record217" + echo " record218" + echo " record219" + echo " record220" + echo " record221" + echo " record222" + echo " record223" + echo " record224" + echo " record225" + echo " record226" + echo " record227" + echo " record228" + echo " record229" + echo " record230" + echo " record231" + echo " record232" + echo " record233" + echo " record234" + echo " record235" + echo " record236" + echo " record237" + echo " record238" + echo " record239" + echo " record240" + echo " record241" + echo " record242" + echo " record243" + echo " record244" + echo " record245" + echo " record246" + echo " record247" + echo " record248" + echo " record249" + echo " record250" + echo " record251" + echo " record252" + echo " record253" + echo " record254" + echo " record255" + echo " record256" + echo " record257" + echo " record258" + echo " record259" + echo " record260" + echo " record261" + echo " record262" + echo " record263" + echo " record264" + echo " record265" + echo " record266" + echo " record267" + echo " record268" + echo " record269" + echo " record270" + echo " record271" + echo " record272" + echo " record273" + echo " record274" + echo " record275" + echo " record276" + echo " Properties" + echo " Verbs" + echo " cd version exit show delete set" + echo "" + echo "" + echo "/map1/log1/record1" + echo " Targets" + echo " Properties" + echo " number=1" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=06:02" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record2" + echo " Targets" + echo " Properties" + echo " number=2" + echo " severity=Caution" + echo " date=10/06/2015" + echo " time=06:10" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record3" + echo " Targets" + echo " Properties" + echo " number=3" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=06:10" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record4" + echo " Targets" + echo " Properties" + echo " number=4" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=06:10" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record5" + echo " Targets" + echo " Properties" + echo " number=5" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=06:13" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record6" + echo " Targets" + echo " Properties" + echo " number=6" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=06:13" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record7" + echo " Targets" + echo " Properties" + echo " number=7" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=06:13" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record8" + echo " Targets" + echo " Properties" + echo " number=8" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=06:13" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record9" + echo " Targets" + echo " Properties" + echo " number=9" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=06:13" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record10" + echo " Targets" + echo " Properties" + echo " number=10" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=06:13" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record11" + echo " Targets" + echo " Properties" + echo " number=11" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=06:13" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record12" + echo " Targets" + echo " Properties" + echo " number=12" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=06:13" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record13" + echo " Targets" + echo " Properties" + echo " number=13" + echo " severity=Caution" + echo " date=10/06/2015" + echo " time=10:10" + echo " description=Power-Off signal sent to host server by: Administrator." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record14" + echo " Targets" + echo " Properties" + echo " number=14" + echo " severity=Caution" + echo " date=10/06/2015" + echo " time=10:10" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record15" + echo " Targets" + echo " Properties" + echo " number=15" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=10:10" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record16" + echo " Targets" + echo " Properties" + echo " number=16" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=10:10" + echo " description=Server power removed." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record17" + echo " Targets" + echo " Properties" + echo " number=17" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=10:13" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record18" + echo " Targets" + echo " Properties" + echo " number=18" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=10:15" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record19" + echo " Targets" + echo " Properties" + echo " number=19" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=10:56" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record20" + echo " Targets" + echo " Properties" + echo " number=20" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=10:57" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record21" + echo " Targets" + echo " Properties" + echo " number=21" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=10:59" + echo " description=Power-On signal sent to host server by: Administrator." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record22" + echo " Targets" + echo " Properties" + echo " number=22" + echo " severity=Caution" + echo " date=10/06/2015" + echo " time=10:59" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record23" + echo " Targets" + echo " Properties" + echo " number=23" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=10:59" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record24" + echo " Targets" + echo " Properties" + echo " number=24" + echo " severity=Caution" + echo " date=10/06/2015" + echo " time=10:59" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record25" + echo " Targets" + echo " Properties" + echo " number=25" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:02" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record26" + echo " Targets" + echo " Properties" + echo " number=26" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:02" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record27" + echo " Targets" + echo " Properties" + echo " number=27" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:02" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record28" + echo " Targets" + echo " Properties" + echo " number=28" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:02" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record29" + echo " Targets" + echo " Properties" + echo " number=29" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:02" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record30" + echo " Targets" + echo " Properties" + echo " number=30" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:02" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record31" + echo " Targets" + echo " Properties" + echo " number=31" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:02" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record32" + echo " Targets" + echo " Properties" + echo " number=32" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:02" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record33" + echo " Targets" + echo " Properties" + echo " number=33" + echo " severity=Caution" + echo " date=10/06/2015" + echo " time=11:10" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record34" + echo " Targets" + echo " Properties" + echo " number=34" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:10" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record35" + echo " Targets" + echo " Properties" + echo " number=35" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:10" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record36" + echo " Targets" + echo " Properties" + echo " number=36" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:12" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record37" + echo " Targets" + echo " Properties" + echo " number=37" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:12" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record38" + echo " Targets" + echo " Properties" + echo " number=38" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:12" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record39" + echo " Targets" + echo " Properties" + echo " number=39" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:12" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record40" + echo " Targets" + echo " Properties" + echo " number=40" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:12" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record41" + echo " Targets" + echo " Properties" + echo " number=41" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:12" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record42" + echo " Targets" + echo " Properties" + echo " number=42" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:12" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record43" + echo " Targets" + echo " Properties" + echo " number=43" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:12" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record44" + echo " Targets" + echo " Properties" + echo " number=44" + echo " severity=Caution" + echo " date=10/06/2015" + echo " time=11:19" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record45" + echo " Targets" + echo " Properties" + echo " number=45" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:22" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record46" + echo " Targets" + echo " Properties" + echo " number=46" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:22" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record47" + echo " Targets" + echo " Properties" + echo " number=47" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:22" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record48" + echo " Targets" + echo " Properties" + echo " number=48" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:22" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record49" + echo " Targets" + echo " Properties" + echo " number=49" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:22" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record50" + echo " Targets" + echo " Properties" + echo " number=50" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:22" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record51" + echo " Targets" + echo " Properties" + echo " number=51" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:22" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record52" + echo " Targets" + echo " Properties" + echo " number=52" + echo " severity=Informational" + echo " date=10/06/2015" + echo " time=11:22" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record53" + echo " Targets" + echo " Properties" + echo " number=53" + echo " severity=Informational" + echo " date=10/14/2015" + echo " time=07:14" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record54" + echo " Targets" + echo " Properties" + echo " number=54" + echo " severity=Informational" + echo " date=10/14/2015" + echo " time=07:15" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record55" + echo " Targets" + echo " Properties" + echo " number=55" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=09:01" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record56" + echo " Targets" + echo " Properties" + echo " number=56" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=09:01" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record57" + echo " Targets" + echo " Properties" + echo " number=57" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=09:02" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record58" + echo " Targets" + echo " Properties" + echo " number=58" + echo " severity=Caution" + echo " date=10/16/2015" + echo " time=10:58" + echo " description=Power-Off signal sent to host server by: Administrator." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record59" + echo " Targets" + echo " Properties" + echo " number=59" + echo " severity=Caution" + echo " date=10/16/2015" + echo " time=10:59" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record60" + echo " Targets" + echo " Properties" + echo " number=60" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=10:59" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record61" + echo " Targets" + echo " Properties" + echo " number=61" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=10:59" + echo " description=Server power removed." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record62" + echo " Targets" + echo " Properties" + echo " number=62" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=11:18" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record63" + echo " Targets" + echo " Properties" + echo " number=63" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=11:22" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record64" + echo " Targets" + echo " Properties" + echo " number=64" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:28" + echo " description=Browser login: Administrator - 128.224.22.113(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record65" + echo " Targets" + echo " Properties" + echo " number=65" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:28" + echo " description=Power-On signal sent to host server by: Administrator." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record66" + echo " Targets" + echo " Properties" + echo " number=66" + echo " severity=Caution" + echo " date=10/16/2015" + echo " time=16:29" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record67" + echo " Targets" + echo " Properties" + echo " number=67" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:29" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record68" + echo " Targets" + echo " Properties" + echo " number=68" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:29" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record69" + echo " Targets" + echo " Properties" + echo " number=69" + echo " severity=Caution" + echo " date=10/16/2015" + echo " time=16:29" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record70" + echo " Targets" + echo " Properties" + echo " number=70" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:31" + echo " description=Remote console started by: Administrator - 128.224.22.113(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record71" + echo " Targets" + echo " Properties" + echo " number=71" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:32" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record72" + echo " Targets" + echo " Properties" + echo " number=72" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:32" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record73" + echo " Targets" + echo " Properties" + echo " number=73" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:32" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record74" + echo " Targets" + echo " Properties" + echo " number=74" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:32" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record75" + echo " Targets" + echo " Properties" + echo " number=75" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:32" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record76" + echo " Targets" + echo " Properties" + echo " number=76" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:32" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record77" + echo " Targets" + echo " Properties" + echo " number=77" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:32" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record78" + echo " Targets" + echo " Properties" + echo " number=78" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:32" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record79" + echo " Targets" + echo " Properties" + echo " number=79" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:33" + echo " description=Remote console session stopped by: Administrator - 128.224.22.113(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record80" + echo " Targets" + echo " Properties" + echo " number=80" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:33" + echo " description=Browser logout: Administrator - 128.224.22.113(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record81" + echo " Targets" + echo " Properties" + echo " number=81" + echo " severity=Caution" + echo " date=10/16/2015" + echo " time=16:39" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record82" + echo " Targets" + echo " Properties" + echo " number=82" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:42" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record83" + echo " Targets" + echo " Properties" + echo " number=83" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:42" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record84" + echo " Targets" + echo " Properties" + echo " number=84" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:42" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record85" + echo " Targets" + echo " Properties" + echo " number=85" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:42" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record86" + echo " Targets" + echo " Properties" + echo " number=86" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:42" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record87" + echo " Targets" + echo " Properties" + echo " number=87" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:42" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record88" + echo " Targets" + echo " Properties" + echo " number=88" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:42" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record89" + echo " Targets" + echo " Properties" + echo " number=89" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:42" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record90" + echo " Targets" + echo " Properties" + echo " number=90" + echo " severity=Caution" + echo " date=10/16/2015" + echo " time=16:45" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record91" + echo " Targets" + echo " Properties" + echo " number=91" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:48" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record92" + echo " Targets" + echo " Properties" + echo " number=92" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:48" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record93" + echo " Targets" + echo " Properties" + echo " number=93" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:48" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record94" + echo " Targets" + echo " Properties" + echo " number=94" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:48" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record95" + echo " Targets" + echo " Properties" + echo " number=95" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:48" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record96" + echo " Targets" + echo " Properties" + echo " number=96" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:48" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record97" + echo " Targets" + echo " Properties" + echo " number=97" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:48" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record98" + echo " Targets" + echo " Properties" + echo " number=98" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:48" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record99" + echo " Targets" + echo " Properties" + echo " number=99" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:49" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record100" + echo " Targets" + echo " Properties" + echo " number=100" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:49" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record101" + echo " Targets" + echo " Properties" + echo " number=101" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:49" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record102" + echo " Targets" + echo " Properties" + echo " number=102" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:49" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record103" + echo " Targets" + echo " Properties" + echo " number=103" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:49" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record104" + echo " Targets" + echo " Properties" + echo " number=104" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:49" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record105" + echo " Targets" + echo " Properties" + echo " number=105" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:49" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record106" + echo " Targets" + echo " Properties" + echo " number=106" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:49" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record107" + echo " Targets" + echo " Properties" + echo " number=107" + echo " severity=Caution" + echo " date=10/16/2015" + echo " time=16:55" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record108" + echo " Targets" + echo " Properties" + echo " number=108" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:58" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record109" + echo " Targets" + echo " Properties" + echo " number=109" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:58" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record110" + echo " Targets" + echo " Properties" + echo " number=110" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:58" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record111" + echo " Targets" + echo " Properties" + echo " number=111" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:58" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record112" + echo " Targets" + echo " Properties" + echo " number=112" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:58" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record113" + echo " Targets" + echo " Properties" + echo " number=113" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:58" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record114" + echo " Targets" + echo " Properties" + echo " number=114" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:58" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record115" + echo " Targets" + echo " Properties" + echo " number=115" + echo " severity=Informational" + echo " date=10/16/2015" + echo " time=16:58" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record116" + echo " Targets" + echo " Properties" + echo " number=116" + echo " severity=Caution" + echo " date=10/17/2015" + echo " time=07:43" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record117" + echo " Targets" + echo " Properties" + echo " number=117" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:44" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record118" + echo " Targets" + echo " Properties" + echo " number=118" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:44" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record119" + echo " Targets" + echo " Properties" + echo " number=119" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:46" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record120" + echo " Targets" + echo " Properties" + echo " number=120" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:46" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record121" + echo " Targets" + echo " Properties" + echo " number=121" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:46" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record122" + echo " Targets" + echo " Properties" + echo " number=122" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:46" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record123" + echo " Targets" + echo " Properties" + echo " number=123" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:46" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record124" + echo " Targets" + echo " Properties" + echo " number=124" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:46" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record125" + echo " Targets" + echo " Properties" + echo " number=125" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:46" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record126" + echo " Targets" + echo " Properties" + echo " number=126" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:46" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record127" + echo " Targets" + echo " Properties" + echo " number=127" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:47" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record128" + echo " Targets" + echo " Properties" + echo " number=128" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:47" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record129" + echo " Targets" + echo " Properties" + echo " number=129" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:47" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record130" + echo " Targets" + echo " Properties" + echo " number=130" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:47" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record131" + echo " Targets" + echo " Properties" + echo " number=131" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:47" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record132" + echo " Targets" + echo " Properties" + echo " number=132" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:47" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record133" + echo " Targets" + echo " Properties" + echo " number=133" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:47" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record134" + echo " Targets" + echo " Properties" + echo " number=134" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=07:47" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record135" + echo " Targets" + echo " Properties" + echo " number=135" + echo " severity=Caution" + echo " date=10/17/2015" + echo " time=08:53" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record136" + echo " Targets" + echo " Properties" + echo " number=136" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=08:53" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record137" + echo " Targets" + echo " Properties" + echo " number=137" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=08:53" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record138" + echo " Targets" + echo " Properties" + echo " number=138" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=08:56" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record139" + echo " Targets" + echo " Properties" + echo " number=139" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=08:56" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record140" + echo " Targets" + echo " Properties" + echo " number=140" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=08:56" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record141" + echo " Targets" + echo " Properties" + echo " number=141" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=08:56" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record142" + echo " Targets" + echo " Properties" + echo " number=142" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=08:56" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record143" + echo " Targets" + echo " Properties" + echo " number=143" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=08:56" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record144" + echo " Targets" + echo " Properties" + echo " number=144" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=08:56" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record145" + echo " Targets" + echo " Properties" + echo " number=145" + echo " severity=Informational" + echo " date=10/17/2015" + echo " time=08:56" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record146" + echo " Targets" + echo " Properties" + echo " number=146" + echo " severity=Caution" + echo " date=10/18/2015" + echo " time=04:48" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record147" + echo " Targets" + echo " Properties" + echo " number=147" + echo " severity=Informational" + echo " date=10/18/2015" + echo " time=04:48" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record148" + echo " Targets" + echo " Properties" + echo " number=148" + echo " severity=Informational" + echo " date=10/18/2015" + echo " time=04:48" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record149" + echo " Targets" + echo " Properties" + echo " number=149" + echo " severity=Informational" + echo " date=10/18/2015" + echo " time=04:51" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record150" + echo " Targets" + echo " Properties" + echo " number=150" + echo " severity=Informational" + echo " date=10/18/2015" + echo " time=04:51" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record151" + echo " Targets" + echo " Properties" + echo " number=151" + echo " severity=Informational" + echo " date=10/18/2015" + echo " time=04:51" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record152" + echo " Targets" + echo " Properties" + echo " number=152" + echo " severity=Informational" + echo " date=10/18/2015" + echo " time=04:51" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record153" + echo " Targets" + echo " Properties" + echo " number=153" + echo " severity=Informational" + echo " date=10/18/2015" + echo " time=04:51" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record154" + echo " Targets" + echo " Properties" + echo " number=154" + echo " severity=Informational" + echo " date=10/18/2015" + echo " time=04:51" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record155" + echo " Targets" + echo " Properties" + echo " number=155" + echo " severity=Informational" + echo " date=10/18/2015" + echo " time=04:51" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record156" + echo " Targets" + echo " Properties" + echo " number=156" + echo " severity=Informational" + echo " date=10/18/2015" + echo " time=04:51" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record157" + echo " Targets" + echo " Properties" + echo " number=157" + echo " severity=Caution" + echo " date=10/19/2015" + echo " time=08:21" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record158" + echo " Targets" + echo " Properties" + echo " number=158" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=08:21" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record159" + echo " Targets" + echo " Properties" + echo " number=159" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=08:21" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record160" + echo " Targets" + echo " Properties" + echo " number=160" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=08:24" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record161" + echo " Targets" + echo " Properties" + echo " number=161" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=08:24" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record162" + echo " Targets" + echo " Properties" + echo " number=162" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=08:24" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record163" + echo " Targets" + echo " Properties" + echo " number=163" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=08:24" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record164" + echo " Targets" + echo " Properties" + echo " number=164" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=08:24" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record165" + echo " Targets" + echo " Properties" + echo " number=165" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=08:24" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record166" + echo " Targets" + echo " Properties" + echo " number=166" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=08:24" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record167" + echo " Targets" + echo " Properties" + echo " number=167" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=08:24" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record168" + echo " Targets" + echo " Properties" + echo " number=168" + echo " severity=Caution" + echo " date=10/19/2015" + echo " time=12:03" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record169" + echo " Targets" + echo " Properties" + echo " number=169" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=12:03" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record170" + echo " Targets" + echo " Properties" + echo " number=170" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=12:03" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record171" + echo " Targets" + echo " Properties" + echo " number=171" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=12:06" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record172" + echo " Targets" + echo " Properties" + echo " number=172" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=12:06" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record173" + echo " Targets" + echo " Properties" + echo " number=173" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=12:06" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record174" + echo " Targets" + echo " Properties" + echo " number=174" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=12:06" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record175" + echo " Targets" + echo " Properties" + echo " number=175" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=12:06" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record176" + echo " Targets" + echo " Properties" + echo " number=176" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=12:06" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record177" + echo " Targets" + echo " Properties" + echo " number=177" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=12:06" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record178" + echo " Targets" + echo " Properties" + echo " number=178" + echo " severity=Informational" + echo " date=10/19/2015" + echo " time=12:06" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record179" + echo " Targets" + echo " Properties" + echo " number=179" + echo " severity=Caution" + echo " date=10/21/2015" + echo " time=04:26" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record180" + echo " Targets" + echo " Properties" + echo " number=180" + echo " severity=Informational" + echo " date=10/21/2015" + echo " time=04:27" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record181" + echo " Targets" + echo " Properties" + echo " number=181" + echo " severity=Informational" + echo " date=10/21/2015" + echo " time=04:27" + echo " description=Server power removed." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record182" + echo " Targets" + echo " Properties" + echo " number=182" + echo " severity=Informational" + echo " date=10/21/2015" + echo " time=05:17" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record183" + echo " Targets" + echo " Properties" + echo " number=183" + echo " severity=Informational" + echo " date=10/23/2015" + echo " time=07:09" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record184" + echo " Targets" + echo " Properties" + echo " number=184" + echo " severity=Informational" + echo " date=10/23/2015" + echo " time=07:10" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record185" + echo " Targets" + echo " Properties" + echo " number=185" + echo " severity=Informational" + echo " date=10/23/2015" + echo " time=07:13" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record186" + echo " Targets" + echo " Properties" + echo " number=186" + echo " severity=Informational" + echo " date=10/23/2015" + echo " time=07:40" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record187" + echo " Targets" + echo " Properties" + echo " number=187" + echo " severity=Informational" + echo " date=10/23/2015" + echo " time=07:44" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record188" + echo " Targets" + echo " Properties" + echo " number=188" + echo " severity=Informational" + echo " date=10/23/2015" + echo " time=07:54" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record189" + echo " Targets" + echo " Properties" + echo " number=189" + echo " severity=Informational" + echo " date=10/23/2015" + echo " time=07:55" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record190" + echo " Targets" + echo " Properties" + echo " number=190" + echo " severity=Informational" + echo " date=10/24/2015" + echo " time=05:00" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record191" + echo " Targets" + echo " Properties" + echo " number=191" + echo " severity=Informational" + echo " date=10/25/2015" + echo " time=16:33" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record192" + echo " Targets" + echo " Properties" + echo " number=192" + echo " severity=Informational" + echo " date=10/25/2015" + echo " time=16:34" + echo " description=SSH login: Administrator - 128.224.150.17(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record193" + echo " Targets" + echo " Properties" + echo " number=193" + echo " severity=Informational" + echo " date=10/25/2015" + echo " time=16:43" + echo " description=SSH logout: Administrator - 128.224.150.17(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record194" + echo " Targets" + echo " Properties" + echo " number=194" + echo " severity=Informational" + echo " date=10/25/2015" + echo " time=16:44" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record195" + echo " Targets" + echo " Properties" + echo " number=195" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=04:36" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record196" + echo " Targets" + echo " Properties" + echo " number=196" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=04:38" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record197" + echo " Targets" + echo " Properties" + echo " number=197" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:02" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record198" + echo " Targets" + echo " Properties" + echo " number=198" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:03" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record199" + echo " Targets" + echo " Properties" + echo " number=199" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:03" + echo " description=Power-On signal sent to host server by: Administrator." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record200" + echo " Targets" + echo " Properties" + echo " number=200" + echo " severity=Caution" + echo " date=10/27/2015" + echo " time=05:03" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record201" + echo " Targets" + echo " Properties" + echo " number=201" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:03" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record202" + echo " Targets" + echo " Properties" + echo " number=202" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:03" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record203" + echo " Targets" + echo " Properties" + echo " number=203" + echo " severity=Caution" + echo " date=10/27/2015" + echo " time=05:03" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record204" + echo " Targets" + echo " Properties" + echo " number=204" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:06" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record205" + echo " Targets" + echo " Properties" + echo " number=205" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:06" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record206" + echo " Targets" + echo " Properties" + echo " number=206" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:06" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record207" + echo " Targets" + echo " Properties" + echo " number=207" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:06" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record208" + echo " Targets" + echo " Properties" + echo " number=208" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:06" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record209" + echo " Targets" + echo " Properties" + echo " number=209" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:06" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record210" + echo " Targets" + echo " Properties" + echo " number=210" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:06" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record211" + echo " Targets" + echo " Properties" + echo " number=211" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:06" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record212" + echo " Targets" + echo " Properties" + echo " number=212" + echo " severity=Caution" + echo " date=10/27/2015" + echo " time=05:14" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record213" + echo " Targets" + echo " Properties" + echo " number=213" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:17" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record214" + echo " Targets" + echo " Properties" + echo " number=214" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:17" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record215" + echo " Targets" + echo " Properties" + echo " number=215" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:17" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record216" + echo " Targets" + echo " Properties" + echo " number=216" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:17" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record217" + echo " Targets" + echo " Properties" + echo " number=217" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:17" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record218" + echo " Targets" + echo " Properties" + echo " number=218" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:17" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record219" + echo " Targets" + echo " Properties" + echo " number=219" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:17" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record220" + echo " Targets" + echo " Properties" + echo " number=220" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=05:17" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record221" + echo " Targets" + echo " Properties" + echo " number=221" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=09:16" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record222" + echo " Targets" + echo " Properties" + echo " number=222" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=09:16" + echo " description=SSH login: Administrator - 128.224.150.17(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record223" + echo " Targets" + echo " Properties" + echo " number=223" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=09:17" + echo " description=SSH login: Administrator - 128.224.150.17(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record224" + echo " Targets" + echo " Properties" + echo " number=224" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=09:27" + echo " description=SSH logout: Administrator - 128.224.150.17(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record225" + echo " Targets" + echo " Properties" + echo " number=225" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=09:27" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record226" + echo " Targets" + echo " Properties" + echo " number=226" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=10:50" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record227" + echo " Targets" + echo " Properties" + echo " number=227" + echo " severity=Informational" + echo " date=10/27/2015" + echo " time=10:51" + echo " description=SSH login: Administrator - 128.224.150.17(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record228" + echo " Targets" + echo " Properties" + echo " number=228" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=05:37" + echo " description=SSH logout: Administrator - 128.224.150.17(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record229" + echo " Targets" + echo " Properties" + echo " number=229" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=05:37" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record230" + echo " Targets" + echo " Properties" + echo " number=230" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=05:38" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record231" + echo " Targets" + echo " Properties" + echo " number=231" + echo " severity=Caution" + echo " date=10/28/2015" + echo " time=05:38" + echo " description=Power-Off signal sent to host server by: Administrator." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record232" + echo " Targets" + echo " Properties" + echo " number=232" + echo " severity=Caution" + echo " date=10/28/2015" + echo " time=05:38" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record233" + echo " Targets" + echo " Properties" + echo " number=233" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=05:39" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record234" + echo " Targets" + echo " Properties" + echo " number=234" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=05:39" + echo " description=Server power removed." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record235" + echo " Targets" + echo " Properties" + echo " number=235" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=05:43" + echo " description=SSH logout: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record236" + echo " Targets" + echo " Properties" + echo " number=236" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:12" + echo " description=SSH login: Administrator - 128.224.145.95(yow-cgts1-lx.wrs.com)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record237" + echo " Targets" + echo " Properties" + echo " number=237" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:55" + echo " description=SSH login: Administrator - 128.224.150.16(DNS name not found)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record238" + echo " Targets" + echo " Properties" + echo " number=238" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:56" + echo " description=Power-On signal sent to host server by: Administrator." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record239" + echo " Targets" + echo " Properties" + echo " number=239" + echo " severity=Caution" + echo " date=10/28/2015" + echo " time=06:56" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record240" + echo " Targets" + echo " Properties" + echo " number=240" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:56" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record241" + echo " Targets" + echo " Properties" + echo " number=241" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:56" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record242" + echo " Targets" + echo " Properties" + echo " number=242" + echo " severity=Caution" + echo " date=10/28/2015" + echo " time=06:56" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record243" + echo " Targets" + echo " Properties" + echo " number=243" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:59" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record244" + echo " Targets" + echo " Properties" + echo " number=244" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:59" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record245" + echo " Targets" + echo " Properties" + echo " number=245" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:59" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record246" + echo " Targets" + echo " Properties" + echo " number=246" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:59" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record247" + echo " Targets" + echo " Properties" + echo " number=247" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:59" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record248" + echo " Targets" + echo " Properties" + echo " number=248" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:59" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record249" + echo " Targets" + echo " Properties" + echo " number=249" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:59" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record250" + echo " Targets" + echo " Properties" + echo " number=250" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=06:59" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record251" + echo " Targets" + echo " Properties" + echo " number=251" + echo " severity=Caution" + echo " date=10/28/2015" + echo " time=07:06" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record252" + echo " Targets" + echo " Properties" + echo " number=252" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=07:06" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record253" + echo " Targets" + echo " Properties" + echo " number=253" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=07:07" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record254" + echo " Targets" + echo " Properties" + echo " number=254" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=07:09" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record255" + echo " Targets" + echo " Properties" + echo " number=255" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=07:09" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record256" + echo " Targets" + echo " Properties" + echo " number=256" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=07:09" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record257" + echo " Targets" + echo " Properties" + echo " number=257" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=07:09" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record258" + echo " Targets" + echo " Properties" + echo " number=258" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=07:09" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record259" + echo " Targets" + echo " Properties" + echo " number=259" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=07:09" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record260" + echo " Targets" + echo " Properties" + echo " number=260" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=07:09" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record261" + echo " Targets" + echo " Properties" + echo " number=261" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=07:09" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record262" + echo " Targets" + echo " Properties" + echo " number=262" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=07:46" + echo " description=SSH logout: Administrator - 128.224.145.95(yow-cgts1-lx.wrs.com)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record263" + echo " Targets" + echo " Properties" + echo " number=263" + echo " severity=Caution" + echo " date=10/28/2015" + echo " time=08:53" + echo " description=Server reset." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record264" + echo " Targets" + echo " Properties" + echo " number=264" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=08:53" + echo " description=Server power restored." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record265" + echo " Targets" + echo " Properties" + echo " number=265" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=08:53" + echo " description=Embedded Flash/SD-CARD: Restarted." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record266" + echo " Targets" + echo " Properties" + echo " number=266" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=08:55" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record267" + echo " Targets" + echo " Properties" + echo " number=267" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=08:55" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record268" + echo " Targets" + echo " Properties" + echo " number=268" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=08:55" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record269" + echo " Targets" + echo " Properties" + echo " number=269" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=08:55" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record270" + echo " Targets" + echo " Properties" + echo " number=270" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=08:55" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record271" + echo " Targets" + echo " Properties" + echo " number=271" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=08:55" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record272" + echo " Targets" + echo " Properties" + echo " number=272" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=08:55" + echo " description=Browser login: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record273" + echo " Targets" + echo " Properties" + echo " number=273" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=08:55" + echo " description=Browser logout: System Administrator - 127.0.0.1(localhost)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record274" + echo " Targets" + echo " Properties" + echo " number=274" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=09:34" + echo " description=SSH login: Administrator - 128.224.145.95(yow-cgts1-lx.wrs.com)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record275" + echo " Targets" + echo " Properties" + echo " number=275" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=09:54" + echo " description=SSH logout: Administrator - 128.224.145.95(yow-cgts1-lx.wrs.com)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/log1/record276" + echo " Targets" + echo " Properties" + echo " number=276" + echo " severity=Informational" + echo " date=10/28/2015" + echo " time=09:55" + echo " description=SSH login: Administrator - 128.224.145.95(yow-cgts1-lx.wrs.com)." + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/enetport1" + echo " Targets" + echo " lanendpt1" + echo " Properties" + echo " Autosense=yes" + echo " PermanentAddress=38:63:bb:2d:78:b0" + echo " LinkTechnology=Ethernet" + echo " Speed=Auto" + echo " SystemName=undefined" + echo " Fullduplex=Auto" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/enetport1/lanendpt1" + echo " Targets" + echo " ipendpt1" + echo " Properties" + echo " EnabledState=enabled" + echo " MACAddress=38:63:bb:2d:78:b0" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/enetport1/lanendpt1/ipendpt1" + echo " Targets" + echo " Properties" + echo " IPv4Address=128.224.150.227" + echo " SubnetMask=255.255.254.0" + echo " AddressOrigin=Static" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/dhcpendpt1" + echo " Targets" + echo " Properties" + echo " EnabledState=no" + echo " OtherTypeDescription=DHCP" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/dnsendpt1" + echo " Targets" + echo " Properties" + echo " EnabledState=yes" + echo " Hostname=undefined" + echo " DomainName=" + echo " OtherTypeDescription=DNS" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/gateway1" + echo " Targets" + echo " Properties" + echo " AccessInfo=128.224.150.1" + echo " AccessContext=Default Gateway" + echo " DHCPOptionToUse=no" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/dnsserver1" + echo " Targets" + echo " Properties" + echo " AccessInfo=128.224.144.130" + echo " AccessContext=DNS Server" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/dnsserver2" + echo " Targets" + echo " Properties" + echo " AccessInfo=0.0.0.0" + echo " AccessContext=DNS Server" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/dnsserver3" + echo " Targets" + echo " Properties" + echo " AccessInfo=0.0.0.0" + echo " AccessContext=DNS Server" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/settings1" + echo " Targets" + echo " DNSSettings1" + echo " WINSSettingData1" + echo " StaticIPSettings1" + echo " Properties" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/settings1/DNSSettings1" + echo " Targets" + echo " Properties" + echo " DNSServerAddress=128.224.144.130" + echo " RegisterThisConnection=yes" + echo " DomainName=" + echo " DHCPOptionToUse=no" + echo " oemhp_UseDHCPv4DomainName=no" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/settings1/WINSSettingData1" + echo " Targets" + echo " Properties" + echo " WINSServerAddress=0.0.0.0, 0.0.0.0" + echo " RegisterThisConnection=yes" + echo " DHCPOptionToUse=no" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/settings1/StaticIPSettings1" + echo " Targets" + echo " Properties" + echo " oemhp_SRoute1Address=0.0.0.0" + echo " oemhp_Mask1Address=0.0.0.0" + echo " oemhp_Gateway1Address=0.0.0.0" + echo " oemhp_SRoute2Address=0.0.0.0" + echo " oemhp_Mask2Address=0.0.0.0" + echo " oemhp_Gateway2Address=0.0.0.0" + echo " oemhp_SRoute3Address=0.0.0.0" + echo " oemhp_Mask3Address=0.0.0.0" + echo " oemhp_Gateway3Address=0.0.0.0" + echo " DHCPOptionToUse=no" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/config1" + echo " Targets" + echo " Properties" + echo " oemhp_mapenable=yes" + echo " oemhp_timeout=30" + echo " oemhp_rbsuenable=yes" + echo " oemhp_rbsulogin=no" + echo " oemhp_rbsushowip=yes" + echo " oemhp_httpport=80" + echo " oemhp_sslport=443" + echo " oemhp_rcport=17990" + echo " oemhp_vmport=17988" + echo " oemhp_sshport=22" + echo " oemhp_sshstatus=yes" + echo " oemhp_serialclistatus=yes" + echo " oemhp_serialcliauth=yes" + echo " oemhp_serialclispeed=9600" + echo " oemhp_minpwdlen=8" + echo " oemhp_enforce_aes=no" + echo " oemhp_authfailurelogging=3" + echo " oemhp_computer_lock=disabled" + echo " oemhp_hotkey_ctrl_t="NONE,NONE,NONE,NONE,NONE"" + echo " oemhp_hotkey_ctrl_u="NONE,NONE,NONE,NONE,NONE"" + echo " oemhp_hotkey_ctrl_v="NONE,NONE,NONE,NONE,NONE"" + echo " oemhp_hotkey_ctrl_w="NONE,NONE,NONE,NONE,NONE"" + echo " oemhp_hotkey_ctrl_x="NONE,NONE,NONE,NONE,NONE"" + echo " oemhp_hotkey_ctrl_y="NONE,NONE,NONE,NONE,NONE"" + echo " oemhp_irc_trust_enable=no" + echo " oemhp_vsp_log_enable=no" + echo " oemhp_snmp_access=yes" + echo " oemhp_snmp_port=161" + echo " oemhp_snmp_trapport=162" + echo " Verbs" + echo " cd version exit show set oemhp_loadSSHkey oemhp_resetHotkeys vsp log" + echo "" + echo "/map1/snmp1" + echo " Targets" + echo " Properties" + echo " accessinfo1=0" + echo " readcom1=""" + echo " trapcom1=""" + echo " accessinfo2=0" + echo " readcom2=""" + echo " trapcom2=""" + echo " accessinfo3=0" + echo " readcom3=""" + echo " trapcom3=""" + echo " oemhp_iloalert=yes" + echo " oemhp_agentalert=yes" + echo " oemhp_snmppassthru=no" + echo " oemhp_agentlessenable=yes" + echo " oemhp_systemlocation=""" + echo " oemhp_systemcontact=""" + echo " oemhp_systemrole=""" + echo " oemhp_systemroledetail1=""" + echo " oemhp_systemroledetail2=""" + echo " oemhp_systemroledetail3=""" + echo " oemhp_systemroledetail4=""" + echo " oemhp_systemroledetail5=""" + echo " oemhp_imagenturl=compute-4" + echo " oemhp_imdatalevel=enabled" + echo " oemhp_coldstarttrap=yes" + echo " oemhp_trapsource=iLO Hostname" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/oemhp_dircfg1" + echo " Targets" + echo " oemhp_keytab1" + echo " Properties" + echo " oemhp_dirauth=Disabled" + echo " oemhp_localacct=Yes" + echo " oemhp_dirsrvaddr=" + echo " oemhp_ldapport=636" + echo " oemhp_dirdn=" + echo " oemhp_usercntxt1=" + echo " oemhp_usercntxt2=" + echo " oemhp_usercntxt3=" + echo " oemhp_usercntxt4=" + echo " oemhp_usercntxt5=" + echo " oemhp_usercntxt6=" + echo " oemhp_usercntxt7=" + echo " oemhp_usercntxt8=" + echo " oemhp_usercntxt9=" + echo " oemhp_usercntxt10=" + echo " oemhp_usercntxt11=" + echo " oemhp_usercntxt12=" + echo " oemhp_usercntxt13=" + echo " oemhp_usercntxt14=" + echo " oemhp_usercntxt15=" + echo " oemhp_group1_name=Administrators" + echo " oemhp_group2_name=Authenticated Users" + echo " oemhp_group3_name=" + echo " oemhp_group4_name=" + echo " oemhp_group5_name=" + echo " oemhp_group6_name=" + echo " oemhp_group1_priv=login,oemhp_rc,oemhp_vm,oemhp_power,config,admin" + echo " oemhp_group2_priv=login" + echo " oemhp_group3_priv=0" + echo " oemhp_group4_priv=0" + echo " oemhp_group5_priv=0" + echo " oemhp_group6_priv=0" + echo " oemhp_dir_kerberos_enabled=Disabled" + echo " oemhp_dir_kerberos_kdc_port=88" + echo " oemhp_dir_kerberos_kdc_address=" + echo " oemhp_dir_kerberos_realm=" + echo " Verbs" + echo " cd version exit show set delete" + echo "" + echo "/map1/oemhp_dircfg1/oemhp_keytab1" + echo " Targets" + echo " Properties" + echo " Verbs" + echo " cd version exit show load" + echo "" + echo "/map1/oemhp_vm1" + echo " Targets" + echo " floppydr1" + echo " cddr1" + echo " Properties" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/oemhp_vm1/floppydr1" + echo " Targets" + echo " Properties" + echo " oemhp_image=Not Available" + echo " oemhp_connect=Not Available" + echo " oemhp_boot=Not Available" + echo " oemhp_wp=Not Available" + echo " vm_applet=Not Available" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/oemhp_vm1/cddr1" + echo " Targets" + echo " Properties" + echo " oemhp_image=Not Available" + echo " oemhp_connect=Not Available" + echo " oemhp_boot=Not Available" + echo " oemhp_wp=Not Available" + echo " vm_applet=Not Available" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/vlan1" + echo " Targets" + echo " Properties" + echo " EnabledState=disabled" + echo " VLANID=0" + echo " Verbs" + echo " cd version exit show set" + echo "" + echo "/map1/oemhp_ssocfg1" + echo " Targets" + echo " Properties" + echo " oemhp_ssotrust=disabled" + echo " oemhp_ssouser=login" + echo " oemhp_ssooperator=login,oemhp_rc,oemhp_vm,oemhp_power" + echo " oemhp_ssoadministrator=login,oemhp_rc,oemhp_vm,oemhp_power,config,admin" + echo " oemhp_ssoserver=No Records Found" + echo " Verbs" + echo " cd version exit show create delete load set" + echo "" + echo "/map1/oemhp_syslog1" + echo " Targets" + echo " Properties" + echo " oemhp_syslog_enable=no" + echo " oemhp_syslog_port=514" + echo " oemhp_syslog_serveraddress=" + echo " Verbs" + echo " cd version exit show set oemhp_sendTestSyslog" + echo "" + echo "/map1/oemhp_alertmail1" + echo " Targets" + echo " Properties" + echo " oemhp_alertmail_enable=no" + echo " oemhp_alertmail_email=" + echo " oemhp_alertmail_sender_domain=" + echo " oemhp_alertmail_smtp_server=" + echo " oemhp_alertmail_smtp_port=25" + echo " Verbs" + echo " cd version exit show set oemhp_sendTestAlertmail" + echo "" + echo "/map1/oemhp_ahs1" + echo " Targets" + echo " Properties" + echo " EnabledState=yes" + echo " Verbs" + echo " cd version exit show reset set" + echo "" + echo "/map1/oemhp_license1" + echo " Targets" + echo " Properties" + echo " oemhp_name1=iLO 4 Standard" + echo " oemhp_key1=0" + echo " oemhp_name2=None" + echo " oemhp_key2=0" + echo " oemhp_name3=None" + echo " oemhp_key3=0" + echo " Verbs" + echo " cd version exit show oemhp_licenseinstall" + echo "" + echo "" + +else + + echo "" + echo "status=2" + echo "status_tag=COMMAND PROCESSING FAILED" + echo "error_tag=INVALID OPTION" + echo "Wed Aug 5 12:10:46 2015" + echo "" + echo "" + echo "" + echo "" + echo "" + echo "" + + rc=1 + +fi + +exit $rc + diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_quanta b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_quanta new file mode 100644 index 00000000..788a0042 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/scripts/show_quanta @@ -0,0 +1,184 @@ +#!/bin/bash + +# +# Copyright (c) 2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +rc=0 + +if [ "$0" == "reset" -a "$1" == "/SYS" ] ; then + + echo "" + echo "SYS Power Reset" + echo "/SYS reset" + echo "" + sleep 5 + reboot + +elif [ "$0" == "start" -a "$1" == "/SYS" ] ; then + + echo "" + echo "SYS_Power_on" + echo "/SYS started" + echo "" + +elif [ "$0" == "stop" -a "$1" == "/SYS" ] ; then + + echo "" + echo "SYS Power off" + echo "/SYS stopped" + echo "" + sleep 5 + ifdown eth1 + +elif [ "$1" == "" ] ; then + + echo "" + echo " /" + echo " Targets:" + echo " SP" + echo " SYS" + echo "" + echo " Properties:" + echo "" + echo " Target Commands:" + echo " show" + echo " cd" + echo "" + echo "" + +elif [ "$1" == "/SYS" -a "$2" == "" ] ; then + + echo " /SYS" + echo " Targets:" + echo " voltage" + echo " fan" + echo " temperature" + echo " powerSupply" + echo "" + echo " Properties:" + echo " LOM = 2C:60:0C:AD:9A:A1" + echo " LOM = 2C:60:0C:AD:9A:A2" + echo " OCP Mezz = 2C:60:0C:A1:FD:7F" + echo " OCP Mezz = 2C:60:0C:A1:FD:80" + echo " ChassisStatus = powerIsON" + echo "" + echo " Target Commands:" + echo " show" + echo " cd" + echo " start" + echo " stop" + echo " reset" + echo "" + +elif [ "$1" == "/SP" ] ; then + + echo " /SP" + echo " Targets:" + echo "" + echo " Properties:" + echo " BMCVersion = v3.29" + echo " BMCGUID = 3c 67 12 c0 1c 7c 00 10 b7 69 2c 60 0c ad 9a 91 " + echo " ServerIdentify = off" + echo " BMCMAC = 2C:60:0C:AD:9A:91" + echo "" + echo " Target Commands:" + echo " show" + echo " set" + echo " reset" + echo "" + +elif [ "$1" == "/SYS/powerSupply" ] ; then + + echo "" + echo " Properties:" + echo " PSU Redundancy = N/A " + echo " PSU1 Status = Presence Detected, " + echo " PSU2 Status = All Deasserted " + echo "" + echo "" + +elif [ "$1" == "/SYS/voltage" ] ; then + + echo "" + echo " Properties:" + echo " Volt_VR_CPU0 = ok" + echo " Volt_VR_CPU1 = ok" + echo " Volt_P5V = ok" + echo " Volt_P5V_AUX = ok" + echo " Volt_P3V3 = ok" + echo " Volt_P1V05 = ok" + echo " Volt_P1V8_AUX = ok" + echo " Volt_P12V = ok" + echo " Volt_P3V3_AUX = ok" + echo " Volt_VR_DIMM_AB = ok" + echo " Volt_VR_DIMM_CD = ok" + echo " Volt_VR_DIMM_EF = ok" + echo " Volt_VR_DIMM_GH = ok" + echo " Volt_P3V_BAT = ok" + echo "" + echo "" + +elif [ "$1" == "/SYS/fan" ] ; then + + echo "" + echo " Properties:" + echo " Fan_SYS0_1 = ok" + echo " Fan_SYS0_2 = ok" + echo " Fan_SYS1_1 = ok" + echo " Fan_SYS1_2 = ok" + echo " Fan_SYS2_1 = ok" + echo " Fan_SYS2_2 = ok" + echo " Fan_SYS3_1 = ok" + echo " Fan_SYS3_2 = ok" + echo " Fan_SYS4_1 = ok" + echo " Fan_SYS4_2 = ok" + echo " Fan_SYS5_1 = ok" + echo " Fan_SYS5_2 = ok" + echo " Fan_PSU1 = ok" + echo " Fan_PSU2 = critical" + echo "" + echo "" + + +elif [ "$1" == "/SYS/temperature" ] ; then + + echo "" + echo " Properties:" + echo " PCH Thermal Trip = ok" + echo " Temp_CPU0 = ok" + echo " Temp_CPU1 = ok" + echo " Temp_DIMM_AB = ok" + echo " Temp_DIMM_CD = ok" + echo " Temp_DIMM_EF = ok" + echo " Temp_DIMM_GH = ok" + echo " Temp_VR_CPU0 = ok" + echo " Temp_VR_CPU1 = ok" + echo " Temp_VR_DIMM_AB = ok" + echo " Temp_VR_DIMM_CD = ok" + echo " Temp_VR_DIMM_EF = ok" + echo " Temp_VR_DIMM_GH = ok" + echo " Temp_Ambient_FP = ok" + echo " Temp_PCI_Area = ok" + echo " Temp_PCI_Inlet1 = ok" + echo " Temp_PCI_Inlet2 = ok" + echo " Temp_PCH = ok" + echo " Temp_Outlet = ok" + echo " MB Thermal Trip = ok" + echo " Temp_HBA_LSI = ok" + echo " Temp_OCP = na" + echo " Temp_PSU1 = ok" + echo " Temp_PSU2 = ok" + echo "" + echo "" + +else + + echo "Invalid command 'emulated' - type 'help' for a list of commands." + rc=1 +fi + +exit $rc + diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/Makefile b/mtce-common/cgts-mtce-common-1.0/maintenance/Makefile new file mode 100755 index 00000000..5e05a5fe --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/Makefile @@ -0,0 +1,81 @@ +# +# Copyright (c) 2013-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = mtcAlarm.cpp +SRCS += mtcThreads.cpp +SRCS += mtcIpmiUtil.cpp +SRCS += mtcNodeHdlrs.cpp +SRCS += mtcSubfHdlrs.cpp +SRCS += mtcNodeFsm.cpp +SRCS += mtcCompMsg.cpp +SRCS += mtcCtrlMsg.cpp +SRCS += mtcNodeCtrl.cpp +SRCS += mtcNodeComp.cpp +SRCS += mtcHttpUtil.cpp +SRCS += mtcHttpSvr.cpp +SRCS += mtcWorkQueue.cpp +SRCS += mtcInvApi.cpp +SRCS += mtcSmgrApi.cpp +SRCS += mtcKeyApi.cpp +SRCS += mtcCmdHdlr.cpp +SRCS += mtcNodeMnfa.cpp +SRCS += mtcVimApi.cpp + +COMPUTE_OBJS = mtcNodeComp.o +COMPUTE_OBJS += mtcCompMsg.o + +CONTROL_OBJS = mtcAlarm.o +CONTROL_OBJS += mtcThreads.o +CONTROL_OBJS += mtcIpmiUtil.o +CONTROL_OBJS += mtcNodeCtrl.o +CONTROL_OBJS += mtcNodeFsm.o +CONTROL_OBJS += mtcNodeHdlrs.o +CONTROL_OBJS += mtcSubfHdlrs.o +CONTROL_OBJS += mtcCtrlMsg.o +CONTROL_OBJS += mtcWorkQueue.o +CONTROL_OBJS += mtcInvApi.o +CONTROL_OBJS += mtcSmgrApi.o +CONTROL_OBJS += mtcKeyApi.o +CONTROL_OBJS += mtcHttpUtil.o +CONTROL_OBJS += mtcHttpSvr.o +CONTROL_OBJS += mtcCmdHdlr.o +CONTROL_OBJS += mtcNodeMnfa.o +CONTROL_OBJS += mtcVimApi.o + +OBJS = $(SRCS:.cpp=.o) +BINS = mtcAgent mtcClient +LDLIBS += -lstdc++ -ldaemon -lcommon -lthreadUtil -lipmiUtil -lfmcommon -lrmonapi -lalarm -lpthread -lrt -levent -ljson-c -lamon -lcrypto -luuid +INCLUDES += -I../common -I../daemon -I../alarm -I../heartbeat -I../hwmon -I../public -I../rmon/rmonApi -I. +CCFLAGS += -g -O2 -Wall -Wextra -Werror -Wno-missing-braces + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +all: remove_bins $(OBJS) $(BINS) + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --enable=warning --language=c++ -U__AREA__ -UWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +build: clean static_analysis remove_bins $(OBJS) $(BINS) + +mtcAgent: $(OBJS) + $(CXX) $(CONTROL_OBJS) -L../daemon -L../common -L../public -L../alarm -L../rmon/rmonApi $(LDLIBS) -o mtcAgent + +mtcClient: $(OBJS) + $(CXX) $(COMPUTE_OBJS) -L../daemon -L../common -L../public -L../alarm -L../rmon/rmonApi $(LDLIBS) -o mtcClient + +remove_bins: + @rm -f $(BINS) + +clean: + @rm -f $(OBJS) $(BINS) diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/ipmiClient.h b/mtce-common/cgts-mtce-common-1.0/maintenance/ipmiClient.h new file mode 100644 index 00000000..719daa03 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/ipmiClient.h @@ -0,0 +1,297 @@ +#ifndef __INCLUDE_IPMICLIENT_HH__ +#define __INCLUDE_IPMICLIENT_HH__ +/* + * Copyright (c) 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform IPMI Client Daemon + */ + + /* + * + * ------------------------------ + * sensor_monitor_ready: outgoing message - indicates service just started and needs configuration + * ------------------------------ + * + * The sensor monitor will configure itself based off the content of the + * following formatted configuration message. + * + * { "sensor_monitor_ready": + * { + * "hostname":"compute-0" + * } + * } + * + * + * ------------------------------ + * ipmitool_sensor_monitor_config: incoming message + * ------------------------------ + * + * The sensor monitor will configure itself based off the content of the + * following formatted configuration message. + * + * { "ipmitool_sensor_monitor_config": + * { + * "hostname":"compute-0", + * "interval":120, + * "analog" :true, + * "discrete":false + * } + * } + * + * --------------------------------------- + * ipmitool_sensor_monitor_config_response: outgoing message + * --------------------------------------- + * + * This is a config response message. Normally a pass but if there + * is a configuration error then a return code and message are provided. + * + * { "ipmitool_sensor_monitor_config_response": + * { + * "hostname":"compute-0", + * "status": , + * "status_string":"" + * } + * } + * + * -------------------------------- + * ipmitool_sensor_threshold_config: incoming message - NOT YET SUPPORTED IMPLEMENTATION + * -------------------------------- + * + * Specify only the thresholds that need to be changed. + * + * { "ipmitool_sensor_threshold_config": + * [ + * { + * "hostname":"compute-0", + * "n":"Temp_CPU0", + * "lcr":"90.000", + * "lnc":"85.000" + * } + * ] + * } + * + * + * -------------------- + * ipmitool_sensor_data: outgoing message + * -------------------- + * + * The sensor data is formatted in a json style string that is sent + * to the hardware monitor daemon on the active controller as + * specified by the aformentioned configuration command. + * + * The following is a brief 3 sensor example of the expected + * ipmitool output and json string conversion that is sent to + * hardware mon. + * + * ipmitool output: + * + * Temp_CPU0 | 54.000 | % degrees C | ok | na | na | na | 86.000 | 87.000 | na + * PSU2 Input | 0.000 | % Watts | cr | na | 0.000 | na | na | na | na + * Critical IRQ | 0x0 | discrete | 0x0080| na | na | na | na | na | na + * Fan_SYS0_2 | 4700.000 | % RPM | ok | na | 500.000 | 1000.000 | na | na | na + * + * Message Design Strategy: + * 1. Maintain all the ipmitool output information so that it is available + * to the hardware monitor for future enhancements without the need to + * change the client side messaging. + * 2. Validate the format of the ipmitool output and report on any errors + * observed in a status field of the response string. + * 3. Deliver an industry standard json string formated message + * 4. Provide an overall status field indicating any formatting errors + * detected in the sensor data output format. This is not a summary + * status of the sensor data. + * 5. minimize the amount of data sent + * - use short sensor record labels + * n = name + * v = sensor reading value + * u = unit format used when interpreting the data + * s = correlated status + * - ipmitool labels for thresholds but only include labels for values that are not 'na' + * unr = Upper Non-Recoverable + * ucr = Upper Critical + * unc = Upper Non-Critical + * lnc = Lower Non-Critical + * lcr = Lower Critical + * lnr = Lower Non-Recoverable + * + * Json String: sensor data exacluded + * ----------- + * + * { + * "ipmitool_sensor_data": + * { + * "hostname" :"compute-0", + * "status" : 0, + * "status_string" : "pass", + * "analog" : + * [ + * { }, + * { }, + * { } + * ], + * "discrete": + * [ + * + * ] + * } + *} + * + * Jason String: full + * ------------- + * + *{ + * "ipmitool_sensor_data": + * { + * "hostname" : "compute-0", + * "status" : 0, + * "status_string": "pass", + * "analog":[ + * { + * "n":"Temp_CPU0", + * "v":"54.000", + * "u":"% degrees C", + * "s":"ok", + * "unc":"86.000", + * "ucr":"87.000" + * }, + * { + * "n":"PSU2 Input", + * "v":"0.000", + * "u":"% Watts", + * "s":"cr", + * "lcr":"0.000" + * }, + * { + * "n":"Fan_SYS0_2", + * "v":"4700.00", + * "u":"% RPM", + * "s":"ok", + * "lcr":"500.000", + * "lnc":"1000.000" + * } + * ], + * "discrete":[ + * { + * "n":"Critical IRQ", + * "v":"0x0", + * "s":"0x0080" + * } + * ] + * } + *} + * + * + */ + +#include +#include +#include + +using namespace std; + +#include "msgClass.h" /* for ... msgClassSock */ + +#define MAX_HOST_SENSORS (100) + +/* Control structure used for ipmitool related functions ; like sensor monitoring */ +#define DEFAULT_IPMITOOL_SENSOR_MONITORING_PERIOD_SECS (120) /* 2 minutes */ + +#define IPMITOOL_JSON__MONITOR_READY_HEADER ((const char *)("sensor_monitor_ready")) +#define IPMITOOL_JSON__CONFIG_REQUEST_HEADER ((const char *)("ipmitool_sensor_monitor_config")) +#define IPMITOOL_JSON__CONFIG_RESPONSE_HEADER ((const char *)("ipmitool_sensor_monitor_config_response")) +#define IPMITOOL_JSON__SENSOR_DATA_MESSAGE_HEADER ((const char *)("ipmitool_sensor_data")) + +#define IPMITOOL_JSON__ANALOG_LABEL ((const char *)("analog")) +#define IPMITOOL_JSON__DISCRETE_LABEL ((const char *)("discrete")) +#define IPMITOOL_SENSOR_QUERY_CMD ((const char *)(" sensor list")) +// #define IPMITOOL_SENSOR_OUTPUT_FILE ((const char *)("/tmp/ipmitool_sensor_data")) +#define IPMITOOL_SENSOR_OUTPUT_FILE ((const char *)("/var/run/ipmitool_sensor_data")) +#define IPMITOOL_PATH_AND_FILENAME ((const char *)("/usr/bin/ipmitool")) +#define IPMITOOL_PATH_AND_FILENAME_V ((const char *)("/home/wrsroot/test/ipmitool")) + +#define IPMITOOL_MAX_FIELD_LEN (64) + +typedef struct +{ + char name [IPMITOOL_MAX_FIELD_LEN] ; /* sensor name */ + char value [IPMITOOL_MAX_FIELD_LEN] ; /* sensor value */ + char unit [IPMITOOL_MAX_FIELD_LEN] ; /* sensor unit type */ + char status [IPMITOOL_MAX_FIELD_LEN] ; /* status - ok, nc, cr, nr */ + char lnr [IPMITOOL_MAX_FIELD_LEN] ; /* Lower Non-Recoverable */ + char lcr [IPMITOOL_MAX_FIELD_LEN] ; /* Lower Critical */ + char lnc [IPMITOOL_MAX_FIELD_LEN] ; /* Lower Non-Critical */ + char unc [IPMITOOL_MAX_FIELD_LEN] ; /* Upper Non-Critical */ + char ucr [IPMITOOL_MAX_FIELD_LEN] ; /* Upper Critical */ + char unr [IPMITOOL_MAX_FIELD_LEN] ; /* Upper Non-Recoverable */ +} ipmitool_sample_type ; + +#define IPMITOOL_FIT_LINE_LEN (1000) +typedef struct +{ + bool enable ; + bool exclude_discrete_sensors ; + bool include_discrete_sensors ; + bool exclude_analog_sensors ; + bool include_analog_sensors ; + bool exclude_sensors ; + int code ; + char json [IPMITOOL_FIT_LINE_LEN] ; +} ipmiClient_fit_type ; + +typedef struct +{ + bool init ; /**< service initialized */ + bool configured ; /**< config command was received */ + int interval ; /**< audit interval in seconds */ + struct mtc_timer timer ; /**< interval audit timer */ + + bool want_analog_sensors ; /**< true to send analog sensor data */ + bool want_discrete_sensors ; /**< true to send discrete sensor data */ + + int analog_sensors ; /**< number of analog sensors in a dump */ + int discrete_sensors ; /**< number of discrete sensors in a dump */ + + string hostname ; /**< this hosts name */ + string config_request ; /**< original config request string */ + string query_request ; /**< sensor query system call request */ + + string status_string ; /**< empty or error log message */ + int parse_errors ; /**< parse or unreadable sensor count */ + int status ; /**< configuration request exec status */ + + msgClassSock* sensor_tx_sock ; /**< sensor data tx socket interface */ + int sensor_rx_port ; /**< the hwmond port to send data to */ + + ipmiClient_fit_type fit ; /**< manage fault insertion testing */ +} ipmiClient_ctrl_type ; + +/* module open and close */ +void ipmiClient_init ( char * hostname ); +void ipmiClient_fini ( void ); +void ipmiClient_configure ( void ); /* called by daemon_configure */ + +/* service utilities */ +int ipmiClient_config ( char * config_ptr ); +int ipmiClient_ready ( string hostname ); +int ipmiClient_query ( void ); + +/* These are interfaces used to manage the socket used + * to transmit sensor data to the Hardware Monitor. + * + * ipmiClient_socket_open passes in the Hardware + * Monitor's receive port number. + */ +int ipmiClient_socket_open ( int sensor_rx_port , string & iface ); +void ipmiClient_socket_close ( void ); +bool ipmiClient_socket_ok ( void ); + +/* returns the sensor monitor timer id */ +timer_t ipmiClient_tid ( void ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcAlarm.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcAlarm.cpp new file mode 100644 index 00000000..27b1f88a --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcAlarm.cpp @@ -0,0 +1,784 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud 'Maintenance Agent' Alarm Module + */ + +#include +#include +#include +#include +#include + +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "alm" + +#include "daemon_common.h" /* */ + +#include "nodeBase.h" /* */ +#include "nodeTimers.h" /* */ +#include "nodeUtil.h" /* */ +#include "mtcAlarm.h" /* for ... this module header */ +#include "hbsAlarm.h" /* for ... hbsAlarm stubs */ + +alarmUtil_type alarm_list[MTC_ALARM_ID__LAST] ; + +void mtcAlarm_init ( void ) +{ + alarmUtil_type * ptr ; + + /** Lock Alarm ************************************************************/ + + ptr = &alarm_list[MTC_ALARM_ID__LOCK]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", LOCK_ALARM_ID); + + ptr->name = "Lock" ; + ptr->instc_prefix = "" ; + + ptr->critl_reason = + ptr->major_reason = + ptr->minor_reason = "was administratively locked to take it out-of-service."; + ptr->clear_reason = "was administratively unlocked and is back in-service."; + + ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL; + ptr->alarm.probable_cause = FM_ALARM_OUT_OF_SERVICE ; + ptr->alarm.inhibit_alarms = FM_TRUE ; + ptr->alarm.service_affecting = FM_TRUE ; + ptr->alarm.suppression = FM_FALSE; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */ + + snprintf( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "Administratively unlock Host to bring it back in-service."); + + /** Enable Alarm ************************************************************/ + + ptr = &alarm_list[MTC_ALARM_ID__ENABLE]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", ENABLE_ALARM_ID); + + ptr->name = "In-Service" ; + ptr->instc_prefix = "" ; + + /* this is for a log */ + ptr->minor_reason = "has experienced a minor In-Service test event. " + "No action is required. " ; + + /* this is for an alarm and degrade */ + ptr->major_reason = "Host Services failed to start."; + + ptr->critl_reason = "experienced a service-affecting failure. " + "Auto-recovery in progress. " + "Manual Lock and Unlock may be required if auto-recovery is unsuccessful."; + + ptr->clear_reason = "was auto recovered through Reboot and is now in-service if 'unlocked-enabled' " + "or is otherwise 'locked-disabled' by administrative 'lock' action."; + + ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL; + ptr->alarm.probable_cause = FM_ALARM_APP_SUBSYS_FAILURE ; + ptr->alarm.inhibit_alarms = FM_FALSE ; + ptr->alarm.service_affecting = FM_TRUE ; + ptr->alarm.suppression = FM_TRUE ; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */ + + snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "If auto-recovery is consistently unable to recover host to the unlocked-enabled " + "state contact next level of support or lock and replace failing Host."); + + + /** Configuration Alarm ************************************************************/ + + ptr = &alarm_list[MTC_ALARM_ID__CONFIG]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", CONFIG_ALARM_ID); + + ptr->name = "Configuration" ; + ptr->instc_prefix = "" ; + + ptr->critl_reason = + ptr->major_reason = + ptr->minor_reason = "experienced a configuration failure. "; + ptr->clear_reason = "has been successfully configured and is now in-service if 'unlocked-enabled' " + "or is otherwise 'locked-disabled' by administrative 'lock' action."; + + ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL; + ptr->alarm.probable_cause = FM_ALARM_CONFIG_ERROR ; + ptr->alarm.inhibit_alarms = FM_FALSE; + ptr->alarm.service_affecting = FM_TRUE ; + ptr->alarm.suppression = FM_TRUE ; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */ + + snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "If manual or auto-recovery is consistently unable to recover host to the unlocked-enabled " + "state contact next level of support or lock and replace failing Host."); + + /** Board Management Controller Access Alarm ************************************/ + + ptr = &alarm_list[MTC_ALARM_ID__BM]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", BM_ALARM_ID); + + ptr->name = "Board Management Controller Access" ; + ptr->instc_prefix = "" ; + + ptr->critl_reason = "board management controller is unresponsive." ; + ptr->major_reason = "board management controller is unresponsive." ; + ptr->minor_reason = "access to board management module has failed." ; + ptr->clear_reason = "access to board management module is established" ; + + ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL ; + ptr->alarm.probable_cause = FM_ALARM_COMM_SUBSYS_FAILURE ; + ptr->alarm.inhibit_alarms = FM_FALSE; + ptr->alarm.service_affecting = FM_FALSE; + ptr->alarm.suppression = FM_FALSE; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */ + + snprintf( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "Check Host's board management config and connectivity."); + + /** Controller Failure Alarm ****************************************************/ + + ptr = &alarm_list[MTC_ALARM_ID__CH_CONT]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", CH_CONT_ALARM_ID); + + ptr->name = "Controller Function" ; + ptr->instc_prefix = "" ; + + ptr->critl_reason = + ptr->major_reason = + ptr->minor_reason = "controller function has in-service failure while compute services " + "remain healthy."; + ptr->clear_reason = "controller function has recovered"; + + ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL; + ptr->alarm.probable_cause = FM_ALARM_APP_SUBSYS_FAILURE ; + ptr->alarm.inhibit_alarms = FM_FALSE ; + ptr->alarm.service_affecting = FM_TRUE ; + ptr->alarm.suppression = FM_TRUE ; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */ + + snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "Lock and then Unlock host to recover. " + "Avoid using 'Force Lock' action as that will impact compute services " + "running on this host. If lock action fails then contact next level " + "of support to investigate and recover."); + + /** Compute Failure Alarm ****************************************************/ + + ptr = &alarm_list[MTC_ALARM_ID__CH_COMP]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", CH_COMP_ALARM_ID); + + ptr->name = "Compute Function" ; + ptr->instc_prefix = "" ; + + ptr->minor_reason = + ptr->major_reason = "Compute service is not fully operational. Auto recovery in progress." ; + ptr->critl_reason = "Compute service of the only available controller is not operational. " + "Auto-recovery disabled. Degrading host instead."; + ptr->clear_reason = "compute service has recovered"; + + ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL; + ptr->alarm.probable_cause = FM_ALARM_APP_SUBSYS_FAILURE ; + ptr->alarm.inhibit_alarms = FM_FALSE ; + ptr->alarm.service_affecting = FM_TRUE ; + ptr->alarm.suppression = FM_TRUE ; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */ + + snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "If alarm is against the only active controller then Enable second controller " + "and Switch Activity (Swact) to it as soon as possible. If the alarm " + "persists then Lock/Unlock host to recover its local compute service."); + + /** Add Event Log ****************************************************/ + + ptr = &alarm_list[MTC_LOG_ID__EVENT]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", EVENT_LOG_ID); + + ptr->name = "Maintenance Event" ; + + ptr->minor_reason = + ptr->major_reason = + ptr->critl_reason = + ptr->clear_reason = ""; + + ptr->alarm.alarm_type = FM_ALARM_TYPE_UNKNOWN ; + ptr->alarm.probable_cause = FM_ALARM_CAUSE_UNKNOWN ; + ptr->alarm.inhibit_alarms = FM_FALSE ; + ptr->alarm.service_affecting = FM_FALSE ; + ptr->alarm.suppression = FM_FALSE ; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_MSG ; /* Dynamic */ + + snprintf ( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, "%s", ""); +} + +string _getIdentity ( mtc_alarm_id_enum id ) +{ + switch ( id ) + { + case MTC_ALARM_ID__LOCK: return (LOCK_ALARM_ID); + case MTC_ALARM_ID__CONFIG: return (CONFIG_ALARM_ID); + case MTC_ALARM_ID__ENABLE: return (ENABLE_ALARM_ID); + case MTC_ALARM_ID__BM: return (BM_ALARM_ID); + case MTC_ALARM_ID__CH_CONT: return (CH_CONT_ALARM_ID); + case MTC_ALARM_ID__CH_COMP: return (CH_COMP_ALARM_ID); + case MTC_LOG_ID__EVENT: return (EVENT_LOG_ID); + case MTC_LOG_ID__COMMAND: return (COMMAND_LOG_ID); + case MTC_LOG_ID__STATECHANGE: return (STATECHANGE_LOG_ID); + default: return ("200.000"); + } +} + +string mtcAlarm_getId_str ( mtc_alarm_id_enum id ) +{ + return(_getIdentity(id)); +} + +string _getInstance ( mtc_alarm_id_enum id ) +{ + id = id ; + return (""); +} + +EFmAlarmSeverityT mtcAlarm_state ( string hostname, mtc_alarm_id_enum id ) +{ + string identity = _getIdentity(id) ; + string instance = _getInstance(id) ; + return ( alarmUtil_query ( hostname, identity, instance)); +} + +void mtcAlarm_clear_all ( string hostname ) +{ + for ( int i = 0 ; i < MTC_ALARM_ID__LAST ; ++i ) + { + mtcAlarm_clear ( hostname, (mtc_alarm_id_enum)i ); + } +} + +/************************* A L A R M I N G **************************/ + +/* Clear the specified hosts's maintenance alarm */ +int mtcAlarm_clear ( string hostname, mtc_alarm_id_enum id ) +{ + if ( id < MTC_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + + ilog ("%s clearing '%s' alarm (%s%s)\n", + hostname.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].clear_reason.data()); + + return ( alarmUtil_clear ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Assert a specified hosts's mtce alarm with a CRITICAL severity level */ +int mtcAlarm_critical ( string hostname, mtc_alarm_id_enum id ) +{ + if ( id < MTC_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + + elog ("%s setting critical '%s' failure alarm (%s %s)\n", + hostname.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].critl_reason.data()); + + return ( alarmUtil_critical ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Assert a specified host's mtce alarm with a MAJOR severity level */ +int mtcAlarm_major ( string hostname, mtc_alarm_id_enum id ) +{ + if ( id < MTC_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + + wlog ("%s setting major '%s' failure alarm (%s %s)\n", + hostname.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + if ( id == MTC_ALARM_ID__BM ) + { + snprintf( alarm_list[id].alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "board managment controller 'reset' or 'power-cycle' is recommended."); + } + + else if ( id == MTC_ALARM_ID__ENABLE ) + { + snprintf( alarm_list[id].alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "If alarm persists, host may require lock/unlock to recover. See maintenance logs for more detail."); + } + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].major_reason.data()); + + return ( alarmUtil_major ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Assert a specified host's mtce alarm with a MINOR severity level */ +int mtcAlarm_minor ( string hostname, mtc_alarm_id_enum id ) +{ + if ( id < MTC_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + + wlog ("%s setting minor '%s' failure alarm (%s %s)\n", + hostname.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].minor_reason.data()); + + return ( alarmUtil_minor ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Assert a specified host's mtce alarm with a WARNING severity level */ +int mtcAlarm_warning ( string hostname, mtc_alarm_id_enum id ) +{ + if ( id < MTC_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + + wlog ("%s setting warning '%s' alarm (%s %s)\n", + hostname.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + if ( id == MTC_ALARM_ID__BM ) + { + snprintf( alarm_list[id].alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "Check Host's board management config and connectivity."); + } + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].minor_reason.data()); + + return ( alarmUtil_warning ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/*************************** L O G G I N G **********************************/ + +/** Create a CRITICAL maintenance log */ +int mtcAlarm_critical_log ( string hostname, mtc_alarm_id_enum id ) +{ + if ( id < MTC_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + + elog ("%s creating critical '%s' log (%s %s)\n", + hostname.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].critl_reason.data()); + + return ( alarmUtil_critical_log ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Create a MAJOR maintenance log */ +int mtcAlarm_major_log ( string hostname, mtc_alarm_id_enum id ) +{ + if ( id < MTC_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + + wlog ("%s creating major '%s' log (%s %s)\n", + hostname.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].major_reason.data()); + + return ( alarmUtil_major_log ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Create a MINOR maintenance log */ +int mtcAlarm_minor_log ( string hostname, mtc_alarm_id_enum id ) +{ + if ( id < MTC_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + + wlog ("%s creating minor '%s' log (%s %s)\n", + hostname.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s", alarm_list[id].minor_reason.data()); + + return ( alarmUtil_minor_log ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Create a WARNING maintenance log */ +int mtcAlarm_warning_log ( string hostname, mtc_alarm_id_enum id ) +{ + if ( id < MTC_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + + wlog ("%s creating warning '%s' log (%s %s)\n", + hostname.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s", alarm_list[id].minor_reason.data()); + + return ( alarmUtil_warning_log ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Create a neutral customer log */ +int mtcAlarm_log ( string hostname, mtc_alarm_id_enum id ) +{ + if ( id < MTC_ALARM_ID__END ) + { + /* default to command */ + mtc_alarm_id_enum index = MTC_LOG_ID__COMMAND ; + bool found = false ; + + if ( id == MTC_LOG_ID__EVENT_ADD ) + { + index = MTC_LOG_ID__EVENT ; + alarm_list[index].instc_prefix = "event=add" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "has been 'added' to the system" ); + found = true ; + + } + else if ( id == MTC_LOG_ID__EVENT_MNFA_ENTER ) + { + index = MTC_LOG_ID__EVENT ; + alarm_list[index].instc_prefix = "event=mnfa_enter" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "has 'entered' multi-node failure avoidance" ); + found = true ; + + } + else if ( id == MTC_LOG_ID__EVENT_MNFA_EXIT ) + { + index = MTC_LOG_ID__EVENT ; + alarm_list[index].instc_prefix = "event=mnfa_exit" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "has 'exited' multi-node failure avoidance" ); + found = true ; + } + else if ( id == MTC_LOG_ID__STATUSCHANGE_FAILED ) + { + index = MTC_LOG_ID__STATECHANGE ; + alarm_list[index].instc_prefix = "status=failed" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "is 'disabled-failed' to the system" ); + found = true ; + } + else if ( id == MTC_LOG_ID__STATUSCHANGE_ENABLED ) + { + index = MTC_LOG_ID__STATECHANGE ; + alarm_list[index].instc_prefix = "state=enabled" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "is now 'enabled'" ); + found = true ; + } + else if ( id == MTC_LOG_ID__STATUSCHANGE_DISABLED ) + { + index = MTC_LOG_ID__STATECHANGE ; + alarm_list[index].instc_prefix = "state=disabled" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "is now 'disabled'" ); + found = true ; + } + else if ( id == MTC_LOG_ID__STATUSCHANGE_OFFLINE ) + { + index = MTC_LOG_ID__STATECHANGE ; + alarm_list[index].instc_prefix = "status=offline" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "is now 'offline'" ); + found = true ; + } + else if ( id == MTC_LOG_ID__STATUSCHANGE_ONLINE ) + { + index = MTC_LOG_ID__STATECHANGE ; + alarm_list[index].instc_prefix = "status=online" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "is now 'online'" ); + found = true ; + } + + else if ( id == MTC_LOG_ID__STATUSCHANGE_REINSTALL_FAILED ) + { + index = MTC_LOG_ID__STATECHANGE ; + alarm_list[index].instc_prefix = "status=reinstall-failed" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "reinstall failed" ); + found = true ; + } + + else if ( id == MTC_LOG_ID__STATUSCHANGE_REINSTALL_COMPLETE ) + { + index = MTC_LOG_ID__STATECHANGE ; + alarm_list[index].instc_prefix = "status=reinstall-complete" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "reinstall completed successfully" ); + found = true ; + } + + else if ( id == MTC_LOG_ID__COMMAND_UNLOCK ) + { + alarm_list[index].instc_prefix = "command=unlock" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "manual 'unlock' request" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_FORCE_LOCK ) + { + alarm_list[index].instc_prefix = "command=force-lock" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "manual 'force-lock' request" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_SWACT ) + { + alarm_list[index].instc_prefix = "command=swact" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "manual 'controller switchover' request" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_MANUAL_REBOOT ) + { + alarm_list[index].instc_prefix = "command=reboot" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "manual 'reboot' request" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_AUTO_REBOOT ) + { + alarm_list[index].instc_prefix = "action=reboot" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "'reboot' action" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_MANUAL_RESET ) + { + alarm_list[index].instc_prefix = "command=reset" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "manual 'reset' request" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_AUTO_RESET ) + { + alarm_list[index].instc_prefix = "action=reset" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "'reset' action" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_REINSTALL ) + { + alarm_list[index].instc_prefix = "command=reinstall" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "manual 'reinstall' request" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_MANUAL_POWER_ON ) + { + alarm_list[index].instc_prefix = "command=power-on" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "manual 'power-on' request" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_AUTO_POWER_ON ) + { + alarm_list[index].instc_prefix = "action=power-on" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "'power-on' action" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_MANUAL_POWER_OFF ) + { + alarm_list[index].instc_prefix = "command=power-off" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "manual 'power-off' request" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_AUTO_POWER_OFF ) + { + alarm_list[index].instc_prefix = "action=power-off" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "'power-off' action" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_DELETE ) + { + alarm_list[index].instc_prefix = "command=delete" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "manual 'delete' request" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_BM_PROVISIONED ) + { + alarm_list[index].instc_prefix = "command=provision" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "board management controller has been 'provisioned'" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_BM_DEPROVISIONED ) + { + alarm_list[index].instc_prefix = "command=deprovision" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "board management controller has been 'de-provisioned'" ); + found = true ; + } + else if ( id == MTC_LOG_ID__COMMAND_BM_REPROVISIONED ) + { + alarm_list[index].instc_prefix = "command=reprovision" ; + snprintf ( alarm_list[index].alarm.reason_text, + FM_MAX_BUFFER_LENGTH, "%s %s", + hostname.data(), + "board management controller has been 're-provisioned'" ); + found = true ; + } + + if ( found == true ) + { + int rc ; + + string identity = _getIdentity(index); + string instance = _getInstance(index); + instance.append(alarm_list[index].instc_prefix); + //wlog ("%s '%s' log (%s.%s)\n", + // hostname.c_str(), + // alarm_list[index].alarm.reason_text, + // identity.c_str(), + // instance.c_str()); + + /* Want to make this log a critical */ + if ( id == MTC_LOG_ID__STATUSCHANGE_REINSTALL_FAILED ) + { + alarm_list[index].alarm.severity = FM_ALARM_SEVERITY_CRITICAL ; + } + + rc = alarmUtil_log ( hostname, identity, instance, alarm_list[index].alarm ); + + /* Revert the severity of the event log back to Clear ( shows up as N/A ) */ + if ( id == MTC_LOG_ID__STATUSCHANGE_REINSTALL_FAILED ) + { + alarm_list[MTC_LOG_ID__STATECHANGE].alarm.severity = FM_ALARM_SEVERITY_CLEAR ; + } + return (rc); + } + } + return (FAIL_BAD_PARM); +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcAlarm.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcAlarm.h new file mode 100644 index 00000000..93d27492 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcAlarm.h @@ -0,0 +1,114 @@ +#ifndef __MTCALARM_H__ +#define __MTCALARM_H__ + +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud 'Maintenance Agent' Alarm Header + */ + +#include +#include +#include +#include + +using namespace std; + +#include "alarmUtil.h" /* for .. alarmUtil_ */ + +/** Maintenance Alarm Abstract Reference IDs */ +typedef enum +{ + MTC_ALARM_ID__LOCK = 0, + MTC_ALARM_ID__CONFIG = 1, + MTC_ALARM_ID__ENABLE = 2, + MTC_ALARM_ID__BM = 3, + MTC_ALARM_ID__CH_CONT = 4, /* Combo Host Controller Failure - with Active Compute */ + MTC_ALARM_ID__CH_COMP = 5, /* Combo Host Compute Failure - on last Controller */ + + MTC_LOG_ID__EVENT = 6, + MTC_LOG_ID__COMMAND = 7, + MTC_LOG_ID__STATECHANGE = 8, + MTC_ALARM_ID__LAST = 9, + + MTC_LOG_ID__EVENT_ADD = 10, + MTC_LOG_ID__EVENT_RESTART = 11, + MTC_LOG_ID__EVENT_DISCOVERED = 12, + MTC_LOG_ID__EVENT_MNFA_ENTER = 13, + MTC_LOG_ID__EVENT_MNFA_EXIT = 14, + + MTC_LOG_ID__COMMAND_DELETE = 19, + MTC_LOG_ID__COMMAND_UNLOCK = 20, + MTC_LOG_ID__COMMAND_FORCE_LOCK = 21, + MTC_LOG_ID__COMMAND_SWACT = 22, + MTC_LOG_ID__COMMAND_REINSTALL = 23, + MTC_LOG_ID__COMMAND_BM_PROVISIONED = 24, + MTC_LOG_ID__COMMAND_BM_DEPROVISIONED = 25, + MTC_LOG_ID__COMMAND_BM_REPROVISIONED = 26, + + MTC_LOG_ID__COMMAND_AUTO_REBOOT = 30, + MTC_LOG_ID__COMMAND_MANUAL_REBOOT = 31, + MTC_LOG_ID__COMMAND_AUTO_RESET = 32, + MTC_LOG_ID__COMMAND_MANUAL_RESET = 33, + MTC_LOG_ID__COMMAND_AUTO_POWER_ON = 34, + MTC_LOG_ID__COMMAND_MANUAL_POWER_ON = 35, + MTC_LOG_ID__COMMAND_AUTO_POWER_OFF = 36, + MTC_LOG_ID__COMMAND_MANUAL_POWER_OFF = 37, + + + MTC_LOG_ID__STATUSCHANGE_ENABLED = 40, + MTC_LOG_ID__STATUSCHANGE_DISABLED = 41, + MTC_LOG_ID__STATUSCHANGE_ONLINE = 42, + MTC_LOG_ID__STATUSCHANGE_OFFLINE = 43, + MTC_LOG_ID__STATUSCHANGE_FAILED = 44, + MTC_LOG_ID__STATUSCHANGE_REINSTALL_FAILED = 45, + MTC_LOG_ID__STATUSCHANGE_REINSTALL_COMPLETE = 46, + + MTC_ALARM_ID__END = 50 + +} mtc_alarm_id_enum ; + +void mtcAlarm_init ( void ); +void mtcAlarm_clear_all ( void ); + +EFmAlarmSeverityT mtcAlarm_state ( string hostname, mtc_alarm_id_enum id ); + +string mtcAlarm_getId_str ( mtc_alarm_id_enum id ); + +/** Clear the specified maintenance alarm for specific host */ +int mtcAlarm_clear ( string hostname, mtc_alarm_id_enum id ); + +/** Assert a specified mtce alarm against the specified host with a WARNING severity level */ +int mtcAlarm_warning ( string hostname, mtc_alarm_id_enum id ); + +/** Assert a specified mtce alarm against the specified host with a MINOR severity level */ +int mtcAlarm_minor ( string hostname, mtc_alarm_id_enum id ); + +/** Assert a specified mtce alarm against the specified host with a MAJOR severity level */ +int mtcAlarm_major ( string hostname, mtc_alarm_id_enum id ); + +/** Assert a specified mtce alarm against the specified host with a CRITICAL severity level */ +int mtcAlarm_critical ( string hostname, mtc_alarm_id_enum id ); + + +int mtcAlarm_critical_log ( string hostname, mtc_alarm_id_enum id ); + +/** Create a MAJOR maintenance log */ +int mtcAlarm_major_log ( string hostname, mtc_alarm_id_enum id ); + +/** Create a MINOR maintenance log */ +int mtcAlarm_minor_log ( string hostname, mtc_alarm_id_enum id ); + +/** Create a WARNING maintenance log */ +int mtcAlarm_warning_log ( string hostname, mtc_alarm_id_enum id ); + +/** Create a maintenance log */ +int mtcAlarm_log ( string hostname, mtc_alarm_id_enum id ); + +#endif /* __MTCALARM_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcBrdMgmt.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcBrdMgmt.cpp new file mode 100644 index 00000000..e69de29b diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcBrdMgmt.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcBrdMgmt.h new file mode 100644 index 00000000..e69de29b diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCmdHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCmdHdlr.cpp new file mode 100644 index 00000000..98c9d9de --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCmdHdlr.cpp @@ -0,0 +1,826 @@ +/* + * Copyright (c) 2013-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/**************************************************************************** + * @file + * Wind River Titanium Cloud Maintenance Command Handler FSM Implementation + * + * nodeLinkClass::cmd_handler + * + ****************************************************************************/ + +using namespace std; + +#define __AREA__ "cmd" + +#include "nodeClass.h" /* for ... nodeLinkClass */ +#include "nodeUtil.h" /* for ... clean_bm_response_files */ +#include "nodeTimers.h" /* for ... mtcTimer_start/stop */ +#include "mtcNodeMsg.h" /* for ... send_mtc_cmd */ +#include "nodeCmds.h" /* for ... Cmd hdl'ing stages & struct */ + +extern void mtcTimer_handler ( int sig, siginfo_t *si, void *uc); + +string _get_cmd_str( int this_cmd ) +{ + string temp ; + switch (this_cmd) + { + case MTC_OPER__MODIFY_HOSTNAME: + { + temp = "Modify Hostname"; + break ; + } + case MTC_OPER__RESET_PROGRESSION: + { + temp = "Reset Progression"; + break ; + } + case MTC_OPER__HOST_SERVICES_CMD: + { + temp = "Host Services"; + break ; + } + case MTC_OPER__RUN_IPMI_COMMAND: + { + temp = "IPMI Command"; + break ; + } + default: + { + temp = "_unknown_" ; + } + } + return(temp); +} + +void nodeLinkClass::mtcCmd_workQ_dump ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr->mtcCmd_work_fifo.size() != 0 ) + { + for ( node_ptr->mtcCmd_work_fifo_ptr = node_ptr->mtcCmd_work_fifo.begin() ; + node_ptr->mtcCmd_work_fifo_ptr != node_ptr->mtcCmd_work_fifo.end(); + node_ptr->mtcCmd_work_fifo_ptr++ ) + { + printf ( "%15s mtceCmd_workQ:%10s seq:%d stage:%d status [%d:%s]\n", + node_ptr->hostname.c_str(), + _get_cmd_str(node_ptr->mtcCmd_work_fifo_ptr->cmd).c_str(), + node_ptr->mtcCmd_work_fifo_ptr->seq, + node_ptr->mtcCmd_work_fifo_ptr->stage, + node_ptr->mtcCmd_work_fifo_ptr->status, + node_ptr->mtcCmd_work_fifo_ptr->status_string.c_str()); + } + } +} + +void nodeLinkClass::mtcCmd_doneQ_dump ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr->mtcCmd_done_fifo.size() != 0 ) + { + for ( node_ptr->mtcCmd_done_fifo_ptr = node_ptr->mtcCmd_done_fifo.begin() ; + node_ptr->mtcCmd_done_fifo_ptr != node_ptr->mtcCmd_done_fifo.end(); + node_ptr->mtcCmd_done_fifo_ptr++ ) + { + printf ( "%15s mtceCmd_doneQ:%10s seq:%d stage:%d status [%d:%s]\n", + node_ptr->hostname.c_str(), + _get_cmd_str(node_ptr->mtcCmd_done_fifo_ptr->cmd).c_str(), + node_ptr->mtcCmd_done_fifo_ptr->seq, + node_ptr->mtcCmd_done_fifo_ptr->stage, + node_ptr->mtcCmd_done_fifo_ptr->status, + node_ptr->mtcCmd_work_fifo_ptr->status_string.c_str()); + } + } +} + +void nodeLinkClass::mtcCmd_doneQ_dump_all ( void ) +{ + struct node * ptr = static_cast(NULL) ; + + /* check for empty list condition */ + if ( head != NULL ) + { + /* Now search the node list */ + for ( ptr = head ; ptr != NULL ; ptr = ptr->next ) + { + mtcCmd_doneQ_dump ( ptr ); + mtcCmd_doneQ_purge ( ptr ); + } + } +} + +void nodeLinkClass::mtcCmd_workQ_dump_all ( void ) +{ + struct node * ptr = static_cast(NULL) ; + + /* check for empty list condition */ + if ( head != NULL ) + { + /* Now search the node list */ + for ( ptr = head ; ptr != NULL ; ptr = ptr->next ) + { + mtcCmd_workQ_dump ( ptr ); + } + } +} + +int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + /* Should not be called empty but check just in case */ + if ( node_ptr->mtcCmd_work_fifo.size() == 0 ) + return (rc); + + node_ptr->mtcCmd_work_fifo_ptr = node_ptr->mtcCmd_work_fifo.begin (); + switch ( node_ptr->mtcCmd_work_fifo_ptr->stage ) + { + case MTC_CMD_STAGE__START: + { + dlog ("%s mtcCmd: %d:%d.%d\n", + node_ptr->hostname.c_str(), + node_ptr->mtcCmd_work_fifo_ptr->cmd, + node_ptr->mtcCmd_work_fifo_ptr->parm1, + node_ptr->mtcCmd_work_fifo_ptr->parm2); + + if ( node_ptr->mtcCmd_work_fifo_ptr->cmd == MTC_OPER__RESET_PROGRESSION ) + { + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__RESET_PROGRESSION_START ; + } + else if ( node_ptr->mtcCmd_work_fifo_ptr->cmd == MTC_OPER__HOST_SERVICES_CMD ) + { + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__HOST_SERVICES_SEND_CMD ; + } + else if ( node_ptr->mtcCmd_work_fifo_ptr->cmd == MTC_OPER__MODIFY_HOSTNAME ) + { + send_hbs_command ( node_ptr->hostname, MTC_CMD_DEL_HOST ); + send_guest_command ( node_ptr->hostname, MTC_CMD_DEL_HOST ); + + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__MODIFY_HOSTNAME_START ; + } + else + { + slog ("%s Unsupported Mtce Command (%d)\n", + node_ptr->hostname.c_str(), + node_ptr->mtcCmd_work_fifo_ptr->cmd ); + + node_ptr->mtcCmd_work_fifo_ptr->status = FAIL_BAD_PARM ; + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; + } + break ; + } + + case MTC_CMD_STAGE__HOST_SERVICES_SEND_CMD: + { + send_mtc_cmd ( node_ptr->hostname, node_ptr->host_services_req.cmd, MGMNT_INTERFACE ); + + /* Start timer that waits for the initial command received response + * There is no point in waiting for the longer host services + * execution timeout if the far end is not even able to ACK the + * initial test request. Bare in mind that the execution of the + * host services command can take a while so its timeout is much + * longer and polled for in the 3rd phase of this fsm but only + * if we get an initial command ACK. */ + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_CMD_RSP_TIMEOUT ); + + /* change state to waiting for that initial ACK */ + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__HOST_SERVICES_RECV_ACK ; + break ; + } + case MTC_CMD_STAGE__HOST_SERVICES_RECV_ACK: + { + if ( mtcTimer_expired ( node_ptr->mtcCmd_timer ) ) + { + node_ptr->mtcCmd_work_fifo_ptr->status = + node_ptr->host_services_req.status = FAIL_NO_CMD_ACK ; + node_ptr->host_services_req.status_string = + node_ptr->host_services_req.name ; + node_ptr->host_services_req.status_string.append (" ack timeout") ; + + dlog ("%s %s (rc:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.status_string.c_str(), + node_ptr->host_services_req.status ); + + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; + } + else if ( node_ptr->host_services_req.ack ) + { + /* get the host services timeout and add MTC_AGENT_TIMEOUT_EXTENSION + * seconds so that it is a bit longer than the mtcClient timeout */ + int timeout = daemon_get_cfg_ptr()->host_services_timeout ; + timeout += MTC_AGENT_TIMEOUT_EXTENSION ; + + dlog ("%s %s request ack (monitor mode)\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + + node_ptr->host_services_req.cmd = MTC_CMD_HOST_SVCS_RESULT ; + node_ptr->mtcCmd_work_fifo_ptr->stage = + MTC_CMD_STAGE__HOST_SERVICES_WAIT_FOR_RESULT ; + mtcTimer_reset ( node_ptr->mtcCmd_timer ); + mtcTimer_start ( node_ptr->mtcCmd_timer, + mtcTimer_handler, + timeout ); + } + else if ( node_ptr->host_services_req.cmd == node_ptr->host_services_req.rsp ) + { + dlog ("%s %s request ack (legacy mode)\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + node_ptr->mtcCmd_work_fifo_ptr->status = + node_ptr->host_services_req.status = PASS ; + + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; + } + break ; + } + case MTC_CMD_STAGE__HOST_SERVICES_WAIT_FOR_RESULT: + { + if ( mtcTimer_expired ( node_ptr->mtcCmd_timer ) ) + { + node_ptr->mtcCmd_work_fifo_ptr->status = + node_ptr->host_services_req.status = FAIL_TIMEOUT ; + + node_ptr->host_services_req.status_string = + node_ptr->host_services_req.name ; + node_ptr->host_services_req.status_string.append (" execution timeout") ; + + dlog ("%s %s (rc:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.status_string.c_str(), + node_ptr->host_services_req.status ); + } + else if ( node_ptr->host_services_req.rsp != MTC_CMD_HOST_SVCS_RESULT ) + { + /* waiting for result response ... */ + break ; + } + else if ( node_ptr->host_services_req.status == PASS ) + { + dlog ("%s %s completed\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + + node_ptr->mtcCmd_work_fifo_ptr->status = PASS ; + } + else + { + node_ptr->mtcCmd_work_fifo_ptr->status = + node_ptr->host_services_req.status ; + + if ( ! node_ptr->host_services_req.status_string.empty() ) + { + wlog ("%s %s\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.status_string.c_str()); + } + + node_ptr->host_services_req.status_string = + node_ptr->host_services_req.name ; + node_ptr->host_services_req.status_string.append (" execution failed") ; + + dlog ("%s %s ; rc:%d\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.status_string.c_str(), + node_ptr->host_services_req.status); + } + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; + break ; + } + /*************************************************************************** + * + * 'Reset Progression' Command Stages + * + * This target handler FSM is responsible for resetting a host through + * progression escalation of interfaces. First a reboot by command is + * attempted over the management network. If that fails the same operation + * is tried over the infrastructure network. If both reboot command + * attempts fail and the board management network for this host is + * provisioned then reset through it is attempted. + * Number of reset retries is specified in the command parameter 1 + * where a value of -1 means infinitely and a value of zero means no + * retries ; only attempt up to all provisioned interfaces only once. + * + * *************************************************************************/ + case MTC_CMD_STAGE__RESET_PROGRESSION_START: + { + node_ptr->cmd_retries = 0 ; + if ( node_ptr->cmd.task == true ) + { + /* Management Reboot Failed */ + mtcInvApi_update_task ( node_ptr, MTC_TASK_REBOOT_REQUEST ); + } + + start_offline_handler ( node_ptr ); + + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__REBOOT ; + break ; + } + case MTC_CMD_STAGE__REBOOT: + { + bool send_reboot_ok = false ; + + node_ptr->reboot_cmd_ack_mgmnt = false ; + node_ptr->reboot_cmd_ack_infra = false ; + + /* send reboot command */ + node_ptr->cmdReq = MTC_CMD_REBOOT ; + node_ptr->cmdRsp = MTC_CMD_NONE ; + plog ("%s Performing REBOOT (mgmnt network)\n", node_ptr->hostname.c_str()); + if ( send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, MGMNT_INTERFACE ) != PASS ) + { + wlog ("%s REBOOT Request Failed (mgmnt network)\n", + node_ptr->hostname.c_str()); + } + else + { + send_reboot_ok = true ; + } + + if ( infra_network_provisioned == true ) + { + plog ("%s Performing REBOOT (infra network)\n", node_ptr->hostname.c_str()); + if ( send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, INFRA_INTERFACE ) != PASS ) + { + wlog ("%s REBOOT Request Failed (infra network)\n", + node_ptr->hostname.c_str()); + } + else + { + send_reboot_ok = true ; + } + } + + if ( send_reboot_ok == true ) + { + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__REBOOT_ACK ; + mtcTimer_reset ( node_ptr->mtcCmd_timer ); + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_CMD_RSP_TIMEOUT ); + + ilog ("%s waiting for REBOOT ACK\n", node_ptr->hostname.c_str() ); + } + else + { + if ( node_ptr->cmd.task == true ) + { + /* Reboot Failed */ + mtcInvApi_update_task ( node_ptr, MTC_TASK_REBOOT_FAIL ); + } + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__RESET ; + } + break ; + } + case MTC_CMD_STAGE__REBOOT_ACK: + { + /* can come in from either interface */ + if ( node_ptr->cmdRsp != MTC_CMD_REBOOT ) + { + if ( node_ptr->mtcCmd_timer.ring == true ) + { + if ( node_ptr->cmd.task == true ) + { + mtcInvApi_update_task ( node_ptr, MTC_TASK_REBOOT_FAIL ); + } + wlog ("%s REBOOT ACK Timeout\n", node_ptr->hostname.c_str()); + + node_ptr->mtcCmd_timer.ring = false ; + + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__RESET ; + } + } + else + { + /* declare successful reboot */ + plog ("%s REBOOT Request Succeeded\n", node_ptr->hostname.c_str()); + + if ( node_ptr->cmd.task == true ) + { + /* Management Reboot Failed */ + mtcInvApi_update_task ( node_ptr, MTC_TASK_REBOOTING ); + } + set_uptime ( node_ptr, 0 , false ); + + /* start timer that verifies board has reset */ + mtcTimer_reset ( node_ptr->mtcCmd_timer ); + + /* progress to RESET if we have tried 5 times already */ + if ( node_ptr->cmd_retries >= RESET_PROG_MAX_REBOOTS_B4_RESET ) + { + elog ("%s still not offline ; trying reset\n", node_ptr->hostname.c_str()); + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__RESET ; + } + else + { + int delay = (((offline_period*offline_threshold)/1000)+3); + ilog ("%s searching for offline ; next reboot attempt in %d seconds\n", + node_ptr->hostname.c_str(), delay); + + /* After the host is reset we need to wait for it to stop sending mtcAlive messages + * Delay the time fo the offline handler to run to completion at least once before + * timing out and retrying the reset again */ + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, delay ); + + /* Wait for the host to go offline */ + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__OFFLINE_CHECK ; + } + } + break ; + } + case MTC_CMD_STAGE__RESET: + { + if (( node_ptr->bm_provisioned == true ) && ( node_ptr->bm_accessible == true )) + { + plog ("%s Performing RESET over Board Management Interface\n", node_ptr->hostname.c_str()); + if ( node_ptr->cmd.task == true ) + { + mtcInvApi_update_task ( node_ptr, MTC_TASK_RESET_REQUEST); + } + + /* bmc power control reset by ipmitool */ + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_RESET ); + + if ( rc == PASS ) + { + dlog ("%s Board Management Interface RESET Requested\n", node_ptr->hostname.c_str()); + + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__RESET_ACK; + break ; + } + else + { + node_ptr->mtcCmd_work_fifo_ptr->status = rc ; + wlog ("%s 'reset' command request failed (%d)\n", node_ptr->hostname.c_str(), rc ); + } + } + else + { + if ( node_ptr->bm_provisioned == false ) + { + wlog ("%s Board Management Interface not provisioned\n", node_ptr->hostname.c_str()); + } + else if ( node_ptr->bm_accessible == false ) + { + wlog ("%s Board Management Interface not accessible\n", node_ptr->hostname.c_str()); + } + } + int delay = (((offline_period*offline_threshold)/1000)+3); + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, delay ); + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__OFFLINE_CHECK ; + break ; + } + case MTC_CMD_STAGE__RESET_ACK: + { + if ( node_ptr->mtcCmd_timer.ring == true ) + { + int delay = (((offline_period*offline_threshold)/1000)+3); + + /* bmc power control reset by ipmitool */ + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + break ; + } + + if ( rc ) + { + elog ("%s Board Management Interface RESET Unsuccessful\n", node_ptr->hostname.c_str()); + if ( node_ptr->cmd.task == true ) + { + mtcInvApi_update_task ( node_ptr, MTC_TASK_RESET_FAIL); + } + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, delay ); + node_ptr->mtcCmd_work_fifo_ptr->status = rc ; + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__OFFLINE_CHECK ; + } + else + { + plog ("%s Board Management Interface RESET Command Succeeded\n", node_ptr->hostname.c_str()); + + if (( node_ptr->adminAction != MTC_ADMIN_ACTION__RESET ) && + ( node_ptr->adminAction != MTC_ADMIN_ACTION__REBOOT )) + { + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_AUTO_RESET ); + } + + set_uptime ( node_ptr, 0 , false ); + + if ( node_ptr->cmd.task == true ) + { + mtcInvApi_update_task ( node_ptr, MTC_TASK_RESETTING ); + } + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, delay ); + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__OFFLINE_CHECK ; + ilog ("%s waiting for host to go offline (%d secs) before retrying reset\n", + node_ptr->hostname.c_str(), + delay); + } + } + break ; + } + case MTC_CMD_STAGE__OFFLINE_CHECK: + { + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) + { + mtcTimer_reset ( node_ptr->mtcCmd_timer ); + + clear_service_readies ( node_ptr ); + + qlog ("%s Reset Progression Complete ; host is offline (after %d retries)\n", + node_ptr->hostname.c_str(), + node_ptr->cmd_retries ); + node_ptr->mtcCmd_work_fifo_ptr->status = PASS ; + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; + } + + else if ( node_ptr->mtcCmd_timer.ring == true ) + { + if ( ++node_ptr->cmd_retries < RESET_PROG_MAX_REBOOTS_B4_RETRY ) + { + ilog ("%s REBOOT (retry %d of %d)\n", + node_ptr->hostname.c_str(), + node_ptr->cmd_retries, + RESET_PROG_MAX_REBOOTS_B4_RETRY ); + + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__REBOOT ; + } + else + { + ilog ("%s still not offline\n", node_ptr->hostname.c_str()); + node_ptr->mtcCmd_work_fifo_ptr->status = FAIL_RETRY ; + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__RESET_PROGRESSION_RETRY ; + } + } + break ; + } + case MTC_CMD_STAGE__RESET_PROGRESSION_RETRY: + { + /* Complete command if we reach max retries */ + if ( ++node_ptr->mtcCmd_work_fifo_ptr->parm2 > node_ptr->mtcCmd_work_fifo_ptr->parm1 ) + { + plog ("%s Reset Progression Done\n", node_ptr->hostname.c_str()); + node_ptr->mtcCmd_work_fifo_ptr->status = FAIL_RETRY ; + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; + } + else + { + wlog ("%s Reset Progression Retry\n", node_ptr->hostname.c_str()); + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__RESET_PROGRESSION_START ; + } + + stop_offline_handler ( node_ptr ); + break ; + } + + case MTC_CMD_STAGE__IPMI_COMMAND_SEND: + { + if ( ipmi_command_send ( node_ptr, node_ptr->cmdReq ) != PASS ) + { + elog ("%s IPMI %s Send Failed\n", + node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->cmdReq)); + + node_ptr->mtcCmd_work_fifo_ptr->status = FAIL_RETRY ; + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; + } + else + { + plog ("%s IPMI %s Requested\n", + node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->cmdReq)); + + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__IPMI_COMMAND_RECV ; + } + break ; + } + + case MTC_CMD_STAGE__IPMI_COMMAND_RECV: + { + if ( mtcTimer_expired ( node_ptr->mtcCmd_timer ) ) + { + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_SECS_5 ) ; + break ; + } + else if ( rc == PASS ) + { + plog ("%s IPMI %s Successful\n", node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->cmdReq)); + } + else + { + plog ("%s IPMI %s Requested\n", node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->cmdReq)); + } + node_ptr->mtcCmd_work_fifo_ptr->status = rc ; + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__OFFLINE_CHECK ; + } + break ; + } + + /*************************************************************************** + * + * 'Modify Hostname' Command Stages + * + * *************************************************************************/ + case MTC_CMD_STAGE__MODIFY_HOSTNAME_START: + { + send_hbs_command ( node_ptr->hostname, MTC_CMD_DEL_HOST ); + send_hwmon_command ( node_ptr->hostname, MTC_CMD_DEL_HOST ); + send_guest_command ( node_ptr->hostname, MTC_CMD_DEL_HOST ); + + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, work_queue_timeout ); + + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__MODIFY_HOSTNAME_DELETE_WAIT ; + + break ; + } + case MTC_CMD_STAGE__MODIFY_HOSTNAME_DELETE_WAIT: + { + /* We still doing enable work ? */ + if ( node_ptr->libEvent_work_fifo.size () == 0 ) + { + string name = node_ptr->mtcCmd_work_fifo_ptr->name ; + + if ( node_ptr->mtcCmd_timer.tid ) + mtcTimer_stop ( node_ptr->mtcCmd_timer ); + + /* make the change */ + hostname_inventory.remove ( node_ptr->hostname ); + node_ptr->hostname = name ; + hostname_inventory.push_back ( node_ptr->hostname ); + + /* update the timer hostname */ + node_ptr->mtcTimer.hostname = name ; + node_ptr->mtcAlive_timer.hostname = name ; + node_ptr->mtcSwact_timer.hostname = name ; + node_ptr->mtcCmd_timer.hostname = name ; + node_ptr->oosTestTimer.hostname = name ; + node_ptr->insvTestTimer.hostname = name ; + node_ptr->mtcConfig_timer.hostname = name ; + + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, work_queue_timeout ); + + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__MODIFY_HOSTNAME_CREATE_WAIT ; + + /* return RETRY so that the FSM reloads the inventory loop */ + return (RETRY); + } + if ( node_ptr->mtcCmd_timer.ring == true ) + { + elog ("%s mtcCmd timeout ; purging host's work queue\n", node_ptr->hostname.c_str()); + workQueue_purge ( node_ptr ); + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; + } + break ; + } + case MTC_CMD_STAGE__MODIFY_HOSTNAME_CREATE_WAIT: + { + /* We still doing create work ? */ + if ( node_ptr->libEvent_work_fifo.size() == 0 ) + { + if ( node_ptr->mtcCmd_timer.tid ) + mtcTimer_stop ( node_ptr->mtcCmd_timer ); + + send_hbs_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); + send_hwmon_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); + send_guest_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); + + if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) + { + send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + } + node_ptr->mtcCmd_work_fifo_ptr->status = PASS ; + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; + } + if ( node_ptr->mtcCmd_timer.ring == true ) + { + elog ("%s hostname change failed\n", node_ptr->hostname.c_str()); + elog ("... workQueue empty timeout ; purging host's work queue\n"); + workQueue_purge ( node_ptr ); + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; + } + break ; + } + case MTC_CMD_STAGE__DONE: + case MTC_CMD_STAGE__STAGES: + default: + { + int size ; + + mtcTimer_reset ( node_ptr->mtcCmd_timer ); + + if ( node_ptr->mtcCmd_work_fifo_ptr->status != PASS ) + { + qlog ("%s Command '%s' (%d) Failed (Status:%d)\n", + node_ptr->hostname.c_str(), + _get_cmd_str(node_ptr->mtcCmd_work_fifo_ptr->cmd).c_str(), + node_ptr->mtcCmd_work_fifo_ptr->cmd, + node_ptr->mtcCmd_work_fifo_ptr->status ); + } + else + { + qlog ("%s Command '%s' Completed\n", node_ptr->hostname.c_str(), + _get_cmd_str(node_ptr->mtcCmd_work_fifo_ptr->cmd).c_str()); + } + if ( ( size = node_ptr->mtcCmd_done_fifo.size()) != 0 ) + { + wlog ( "%s mtcCmd doneQ not empty (contains %d elements)\n", + node_ptr->hostname.c_str(), size ); + mtcCmd_doneQ_purge ( node_ptr ); + } + node_ptr->mtcCmd_done_fifo.push_front(node_ptr->mtcCmd_work_fifo.front()); + node_ptr->mtcCmd_work_fifo.pop_front(); + break ; + } + } + return (PASS); +} + +/* *********************************************************************** + * + * Name : nodeLinkClass::mtcCmd_workQ_purge + * + * Description: Removes all items from the work queue. + * + */ +int nodeLinkClass::mtcCmd_workQ_purge ( struct nodeLinkClass::node * node_ptr ) +{ + int size = node_ptr->mtcCmd_work_fifo.size() ; + if ( size ) + { + wlog ("%s purging %d items from work queue\n", node_ptr->hostname.c_str(), size ); + for ( node_ptr->mtcCmd_work_fifo_ptr = node_ptr->mtcCmd_work_fifo.begin(); + node_ptr->mtcCmd_work_fifo_ptr != node_ptr->mtcCmd_work_fifo.end(); + node_ptr->mtcCmd_work_fifo_ptr++ ) + { + wlog ("%s purging mtcCmd '%s' in stage %d from work queue\n", + node_ptr->hostname.c_str(), + _get_cmd_str(node_ptr->mtcCmd_work_fifo_ptr->cmd).c_str(), + node_ptr->mtcCmd_work_fifo_ptr->stage); + } + node_ptr->mtcCmd_work_fifo.clear(); + } + else + { + qlog ("%s all work done\n", node_ptr->hostname.c_str()); + } + return (PASS); +} + + +/* *********************************************************************** + * + * Name : nodeLinkClass::mtcCmd_doneQ_purge + * + * Description: Removes all items from the mtcCmd done queue. + * + * Returns a failure, the sequence number of the first command + * in the done queue that did not PASS. + * + */ +int nodeLinkClass::mtcCmd_doneQ_purge ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + int size = node_ptr->mtcCmd_done_fifo.size() ; + if ( size ) + { + int index = 0 ; + for ( node_ptr->mtcCmd_done_fifo_ptr = node_ptr->mtcCmd_done_fifo.begin(); + node_ptr->mtcCmd_done_fifo_ptr != node_ptr->mtcCmd_done_fifo.end(); + node_ptr->mtcCmd_done_fifo_ptr++ ) + { + index++ ; + if ( node_ptr->mtcCmd_done_fifo_ptr->status ) + { + dlog ("%s mtcCmd:%d failed (status:%d) (%d of %d)\n", + node_ptr->hostname.c_str(), + node_ptr->mtcCmd_done_fifo_ptr->cmd, + node_ptr->mtcCmd_done_fifo_ptr->status, + index, size); + /* Save sequence of first failed command */ + if ( rc == PASS ) + { + rc = node_ptr->mtcCmd_done_fifo_ptr->seq ; + } + } + } + if ( rc == PASS ) + { + dlog ("%s all (%d) mtcCmd operations passed\n", node_ptr->hostname.c_str(), size ); + } + + qlog ("%s purging %d items from done queue\n", node_ptr->hostname.c_str(), size ); + node_ptr->mtcCmd_done_fifo.clear(); + } + return (rc); +} + diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCompMsg.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCompMsg.cpp new file mode 100755 index 00000000..d0aa0284 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCompMsg.cpp @@ -0,0 +1,995 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Node Maintenance "Compute Messaging" + * Implementation + */ + +/** + * @detail + * Detailed description ... + * + * + */ + +#include +#include +#include /* for ... unix domain sockets */ +#include +#include +#include +#include /* for ... syslog */ +#include /* for ... waitpid */ +#include +#include +#include +#include /* for the list of conf file names */ + + +using namespace std; + +#define __AREA__ "msg" + +#include "nodeClass.h" /* for ... maintenance class nodeLinkClass */ +#include "jsonUtil.h" /* for ... Json utilities */ +#include /* for ... json-c json string parsing */ +#include "mtcNodeMsg.h" /* for ... daemon socket structure */ +#include "mtcNodeComp.h" /* for ... this module header */ +#include "nodeUtil.h" /* for ... Utility Service Header */ +#include "daemon_common.h" +#include "regexUtil.h" /* for ... Regex and String utilities */ + +extern "C" +{ +#include "amon.h" /* for ... active monitoring utilities */ +} + +extern char *program_invocation_short_name; + +int mtcAlive_mgmnt_sequence = 0 ; +int mtcAlive_infra_sequence = 0 ; + +/***************************************************************************** + * Also handles: Accelerated Virtual Switch 'events' handler + * for receiving data port state change event + * + * Event strings are:- + * {"type":"port-state", "severity":"critical|major|clear"} + * severity: + * critical - port has failed and is not part of an aggregate + * or is the last port in an aggregate + * (degrade, disable services) + * major - port has failed and is part of an aggregate + * with other inservice-ports (degrade only) + * clear - port has recovered from a failed state and is + * operational (clear degrade, enable services) + * + * NOTE : The port status can transition from any of the above + * states to any other state. + * + * RMON monitors the vswitch ports at a 20 second interval(debounce value). + * If a port changes link state during the polling period, it will + * raise/clear the alarm, but now also calculates the impact of that port + * failure on the provider network data interface. + * + * The overall aggregated state across all provider network interfaces will + * be reported to maintenance when ports enter a link down or up state. + * The agent will also periodically send the current provider network port + * status to maintenance every 20 seconds. + * + * Return : MTC_EVENT_AVS_CLEAR|MAJOR|CRITICAL + *****************************************************************************/ +/** Receive and process event messages from rmon **/ +static int rmon_message_error = 0 ; + +int service_rmon_inbox ( mtc_socket_type * sock_ptr, int & rmon_code, string & resource_name ) +{ + /* Max rmon message length */ + #define RMON_MAX_LEN (100) + /* Max tries to receive rmon message */ + #define MAX_TRIES (3) + + char buf[RMON_MAX_LEN]; + char res_name[RMON_MAX_LEN]; + socklen_t len = sizeof(struct sockaddr_in) ; + char str[RMON_MAX_LEN]; + int rc = FAIL; + + + int sequence = 0; + int bytes = 0 ; + int num_tries = 0; + + do + { + memset ( buf,0,RMON_MAX_LEN); + memset ( str,0,RMON_MAX_LEN); + + bytes = recvfrom( sock_ptr->rmon_socket, buf, RMON_MAX_LEN, 0, + (struct sockaddr *)&sock_ptr->mtc_cmd_addr, &len); + if ( bytes > 0 ) + { + sscanf ( buf, "%99s %99s %d", res_name, str, &sequence ); + string r_name(res_name); + resource_name = r_name; + + if ( str[0] != '\0' ) + { + mlog("%s \n",str); + // check if it is an AVS type message + if (regexUtil_string_startswith(str, "AVS_clear")) { + rmon_code = MTC_EVENT_AVS_CLEAR; + } else if (regexUtil_string_startswith(str, "AVS_major")) { + rmon_code = MTC_EVENT_AVS_MAJOR; + } else if (regexUtil_string_startswith(str, "AVS_critical")) { + rmon_code = MTC_EVENT_AVS_CRITICAL; + // process generic RMON messages + } else if (regexUtil_string_startswith(str, "cleared")) { + rmon_code = RMON_CLEAR; + } else if (regexUtil_string_startswith(str, "minor")) { + rmon_code = RMON_MINOR; + } else if (regexUtil_string_startswith(str, "major")) { + rmon_code = RMON_MAJOR; + } else if (regexUtil_string_startswith(str, "critical")) { + rmon_code = RMON_CRITICAL; + } else { + elog("Invalid rmon string: %s \n", str); + rc = FAIL; + return rc; + } + rmon_message_error = 0 ; + rc = PASS; + return rc; + } + else + { + wlog_throttled (rmon_message_error, 1, "rmond message with no severity specified (%s)\n", + resource_name.empty() ? "no resource" : resource_name.c_str()); + + if (( rmon_message_error == 1 ) && ( buf[0] != '\0' )) + { + ilog ("rmon message: [%s]\n", buf ); + } + rc = FAIL_NULL_POINTER; + return rc; + } + } + else if (( 0 > bytes ) && ( errno != EINTR ) && ( errno != EAGAIN )) + { + elog("rmon event recv error (%d:%s) \n", errno, strerror(errno)); + rc = FAIL; + } + } while (( bytes > 0 ) && ( ++num_tries < MAX_TRIES)) ; + + return rc; +} + + +/* Receive and process commands from controller maintenance */ +int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) +{ + int bytes = 0 ; + mtc_message_type msg ; + int rc = FAIL ; + + if ( interface == INFRA_INTERFACE ) + { + if ( ! get_ctrl_ptr()->infra_iface_provisioned ) + { + wlog ("cannot receive from unprovisioned %s interface\n", + get_iface_name_str(interface) ); + return (rc); + } + } + + /* clean the rx/tx buffer */ + memset ((void*)&msg,0,sizeof(mtc_message_type)); + + if ( interface == MGMNT_INTERFACE ) + { + if (( sock_ptr->mtc_client_rx_socket ) && + ( sock_ptr->mtc_client_rx_socket->sock_ok() == true )) + { + bytes = sock_ptr->mtc_client_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); + } + else + { + elog ("cannot read from null or failed 'mtc_client_rx_socket'\n"); + return (FAIL_TO_RECEIVE); + } + } + else if ( interface == INFRA_INTERFACE ) + { + if (( sock_ptr->mtc_client_infra_rx_socket ) && + ( sock_ptr->mtc_client_infra_rx_socket->sock_ok() == true )) + { + bytes = sock_ptr->mtc_client_infra_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); + } + else + { + elog ("cannot read from null or failed 'mtc_client_infra_rx_socket'\n"); + return (FAIL_TO_RECEIVE); + } + } + + if( bytes <= 0 ) + { + if ( ( errno == EINTR ) || ( errno == EAGAIN )) + { + return (RETRY); + } + else + { + return (FAIL_TO_RECEIVE); + } + } + + print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, get_iface_name_str(interface), false ); + + /* Message version greater than zero have the hosts management + * mac address appended to the header string */ + if ( msg.ver >= MTC_CMD_FEATURE_VER__MACADDR_IN_CMD ) + { + /* the minus 1 is to back up from the null char that is accounted for in the hearder size */ + if ( strncmp ( &msg.hdr[MSG_HEADER_SIZE-1], get_ctrl_ptr()->macaddr.data(), MSG_HEADER_SIZE )) + { + wlog ("%s command not for this host (exp:%s det:%s) ; ignoring ...\n", + get_mtcNodeCommand_str(msg.cmd), + get_ctrl_ptr()->macaddr.c_str(), + &msg.hdr[MSG_HEADER_SIZE-1]); + print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, get_iface_name_str(interface), true ); + return (FAIL_INVALID_DATA); + } + } + /* Check for response messages */ + if ( strstr ( &msg.hdr[0], get_cmd_req_msg_header() ) ) + { + rc = PASS ; + if ( msg.cmd == MTC_REQ_MTCALIVE ) + { + mlog1 ("mtcAlive request received (%s network)\n", get_iface_name_str (interface)); + return ( send_mtcAlive_msg ( sock_ptr, get_who_i_am(), interface )); + } + else if ( msg.cmd == MTC_MSG_LOCKED ) + { + /* Only recreate the file if its not already present */ + if ( daemon_is_file_present ( NODE_LOCKED_FILE ) == false ) + { + daemon_log ( NODE_LOCKED_FILE, + "This node is currently in the administratively locked state" ); + } + return (PASS); + } + else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED_FAILED ) + { + /* remove the GOENABLED_SUBF_PASS and create GOENABLED_SUBF_FAIL file */ + daemon_remove_file ( GOENABLED_SUBF_PASS ); + daemon_log ( GOENABLED_SUBF_FAIL, "host subfunction has failed as instructed by maintenance."); + return (PASS); + } + else if ( msg.cmd == MTC_REQ_MAIN_GOENABLED ) + { + time_t time_now ; + double goenabled_age ; + ctrl_type * ctrl_ptr = get_ctrl_ptr () ; + + time (&time_now); // current time in seconds (UTC) + goenabled_age = difftime ( time_now, ctrl_ptr->goenabled_main_time ); + + /* Check to see if we are already running the requested test */ + if ( ctrl_ptr->active_script_set == GOENABLED_MAIN_SCRIPTS ) + { + ilog ("GoEnabled In-Progress\n"); + } + /* Report PASS immediately if there was a recent PASS already */ + else if (( daemon_is_file_present ( GOENABLED_MAIN_PASS ) && + ( goenabled_age < MTC_MINS_20 ))) + { + ilog ("GoEnabled Passed (%f seconds ago)\n", goenabled_age ); + send_mtc_msg ( sock_ptr, MTC_MSG_MAIN_GOENABLED, "" ); + } + else + { + ilog ("GoEnabled request posted (%s)\n",get_iface_name_str (interface)); + ctrl_ptr->posted_script_set.push_back ( GOENABLED_MAIN_SCRIPTS ); + ctrl_ptr->posted_script_set.unique(); + } + rc = PASS ; + } + else if ( msg.cmd == MTC_REQ_SUBF_GOENABLED ) + { + time_t time_now ; + double goenabled_age ; + ctrl_type * ctrl_ptr = get_ctrl_ptr () ; + + time (&time_now); // current time in seconds (UTC) + goenabled_age = difftime ( time_now, ctrl_ptr->goenabled_subf_time ); + if ( ctrl_ptr->active_script_set == GOENABLED_SUBF_SCRIPTS ) + { + ilog ("GoEnabled SubF In-Progress\n"); + } + /* eport PASS immediately if there was a recent PASS already */ + else if (( daemon_is_file_present ( GOENABLED_SUBF_PASS ) && + ( goenabled_age < MTC_MINS_20 ))) + { + ilog ("GoEnabled SubF Passed (%f seconds ago)\n", goenabled_age); + send_mtc_msg ( sock_ptr, MTC_MSG_SUBF_GOENABLED, "" ); + } + else + { + ilog ("GoEnabled Subf request posted (%s)\n", get_iface_name_str (interface)); + + /* Cleanup test result flag files */ + if ( daemon_is_file_present ( GOENABLED_SUBF_PASS) ) + { + ilog ("clearing stale %s file\n", GOENABLED_SUBF_PASS ); + daemon_remove_file (GOENABLED_SUBF_PASS) ; + } + + if ( daemon_is_file_present ( GOENABLED_SUBF_FAIL) ) + { + ilog ("clearing stale %s file\n", GOENABLED_SUBF_FAIL ); + daemon_remove_file (GOENABLED_SUBF_FAIL) ; + } + ctrl_ptr->posted_script_set.push_back ( GOENABLED_SUBF_SCRIPTS ); + ctrl_ptr->posted_script_set.unique(); + } + rc = PASS ; + } + else if ( msg.cmd == MTC_CMD_REBOOT ) + { + ilog ("Reboot command received (%s)\n", get_iface_name_str (interface)); + } + else if ( msg.cmd == MTC_CMD_LAZY_REBOOT ) + { + ilog ("Lazy Reboot command received (%s) ; delay:%d seconds\n", get_iface_name_str (interface), msg.num ? msg.parm[0] : 0 ); + } + else if ( is_host_services_cmd ( msg.cmd ) == true ) + { + ctrl_type * ctrl_ptr = get_ctrl_ptr () ; + + /* Check to see if this command is already running. + * hostservices.posted is set to command on launch + * hostservices.monitor is set to command while monitoring */ + if (( ctrl_ptr->hostservices.posted == msg.cmd ) || + ( ctrl_ptr->hostservices.monitor == msg.cmd )) + { + wlog ("%s already in progress (%d:%d)\n", + get_mtcNodeCommand_str(msg.cmd), + ctrl_ptr->hostservices.posted, + ctrl_ptr->hostservices.monitor ); + + rc = PASS ; + } + else + { + ctrl_ptr->posted_script_set.push_back ( HOSTSERVICES_SCRIPTS ); + ctrl_ptr->posted_script_set.unique (); + + ilog ("%s request posted (%s)\n", + get_mtcNodeCommand_str(msg.cmd), + get_iface_name_str (interface)); + + ctrl_ptr->hostservices.posted = msg.cmd ; + ctrl_ptr->hostservices.monitor = MTC_CMD_NONE ; + rc = PASS ; + } + + /* Fault insertion - fail host services command */ + if ( ( daemon_is_file_present ( MTC_CMD_FIT__START_SVCS ))) + { + rc = FAIL_FIT ; + wlog ("%s Start Services - fit failure (%s)\n", + get_mtcNodeCommand_str(msg.cmd), + get_iface_name_str (interface) ); + } + + /* Fault insertion - fail to send host services ACK */ + if ( ( daemon_is_file_present ( MTC_CMD_FIT__NO_HS_ACK ))) + { + wlog ("%s Start Services - fit no ACK (%s)\n", + get_mtcNodeCommand_str(msg.cmd), + get_iface_name_str (interface) ); + return (PASS); + } + + /* inform mtcAgent of enhanced ost services support */ + msg.parm[1] = MTC_ENHANCED_HOST_SERVICES ; + msg.parm[0] = rc ; + msg.num = 2 ; + + if ( rc ) + { + snprintf (msg.buf, BUF_SIZE, "host service launch failed (rc:%d)", rc ); + } + else + { + snprintf (msg.buf, BUF_SIZE, "host service launched"); + } + } + else if ( msg.cmd == MTC_CMD_WIPEDISK ) + { + ilog ("Reload command received (%s)\n", get_iface_name_str (interface)); + } + else if ( msg.cmd == MTC_CMD_RESET ) + { + ilog ("Reset command received (%s)\n", get_iface_name_str (interface)); + } + else if ( msg.cmd == MTC_CMD_LOOPBACK ) + { + ilog ("Loopback command received (%s)\n", get_iface_name_str (interface)); + } + else + { + rc = FAIL_BAD_CASE ; + elog ( "Unsupported maintenance command (%d)\n", msg.cmd ); + } + + snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_rsp_msg_header()); + } + else if ( strstr ( &msg.hdr[0], get_msg_rep_msg_header()) ) + { + if ( msg.cmd == MTC_MSG_MAIN_GOENABLED ) + { + ilog ("main function goEnabled results acknowledged (%s)\n", get_iface_name_str (interface)); + return (PASS); + } + else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED ) + { + ilog ("sub-function goEnabled results acknowledged (%s)\n", get_iface_name_str (interface)); + return (PASS); + } + else + { + dlog2 ( "reply message for command %d\n", msg.cmd ); + return (PASS); + } + } + + else if ( strstr ( &msg.hdr[0], get_compute_msg_header()) ) + { + elog ("Unsupported Message\n"); + print_mtc_message ( &msg ); + return PASS ; + } + +/*********************************************************** + * + * If we get here, the response should be sent + * regardless of the execution status. + * + * if ( rc == PASS ) + **********************************************************/ + { + bytes = sizeof(mtc_message_type)-BUF_SIZE; + + /* Fault insertion for no command ACK */ + if (( interface == MGMNT_INTERFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_MGMNT_ACK ))) + { + wlog ("%s reply ack message - fit bypass (%s)\n", + get_mtcNodeCommand_str(msg.cmd), + get_iface_name_str (interface) ); + } + else if (( interface == INFRA_INTERFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_INFRA_ACK ))) + { + wlog ("%s reply ack message - fit bypass (%s)\n", + get_mtcNodeCommand_str(msg.cmd), + get_iface_name_str (interface) ); + } + /* Otherwise, send the message back either over the mgmnt or infra interface */ + else if ( interface == MGMNT_INTERFACE ) + { + if (( sock_ptr->mtc_client_tx_socket ) && + ( sock_ptr->mtc_client_tx_socket->sock_ok() == true )) + { + rc=sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes); + } + else + { + elog ("cannot send to null or failed socket (%s network)\n", + get_iface_name_str (interface) ); + } + } + else if ( interface == INFRA_INTERFACE ) + { + if (( sock_ptr->mtc_client_infra_tx_socket ) && + ( sock_ptr->mtc_client_infra_tx_socket->sock_ok() == true )) + { + rc = sock_ptr->mtc_client_infra_tx_socket->write((char*)&msg.hdr[0], bytes); + } + else + { + elog ("cannot send to null or failed socket (%s network)\n", + get_iface_name_str (interface) ); + } + } + + if (rc != bytes ) + { + elog ("failed to send reply message (%d)\n", rc); + } + else + { + print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_iface_name_str(interface), false ); + } + + /* get the shutdown delay config alue */ + int delay = daemon_get_cfg_ptr()->failsafe_shutdown_delay ; + if ( delay < 1 ) + delay = 2 ; + + daemon_dump_info (); + + if ( msg.cmd == MTC_CMD_REBOOT ) + { + if ( daemon_is_file_present ( MTC_CMD_FIT__NO_REBOOT ) ) + { + ilog ("Reboot - fit bypass (%s)\n", get_iface_name_str (interface)); + return (PASS); + } + ilog ("Reboot (%s)\n", get_iface_name_str (interface)); + daemon_log ( NODE_RESET_FILE, "reboot command" ); + fork_sysreq_reboot ( delay ); + rc = system("/usr/bin/systemctl reboot"); + } + if ( msg.cmd == MTC_CMD_LAZY_REBOOT ) + { + if ( daemon_is_file_present ( MTC_CMD_FIT__NO_REBOOT ) ) + { + ilog ("Lazy Reboot - fit bypass (%s)\n", get_iface_name_str (interface)); + return (PASS); + } + daemon_log ( NODE_RESET_FILE, "lazy reboot command" ); + if ( msg.num >= 1 ) + { + do + { + ilog ("Lazy Reboot (%s) ; rebooting in %d seconds\n", get_iface_name_str (interface), msg.num ? msg.parm[0] : 1 ); + sleep (1); + if ( msg.parm[0] % 5 ) + { + /* service the active monitoring every 5 seconds */ + active_monitor_dispatch (); + } + } while ( msg.parm[0]-- > 0 ) ; + } + else + { + ilog ("Lazy Reboot (%s) ; now\n", get_iface_name_str (interface) ); + } + fork_sysreq_reboot ( delay ); + rc = system("/usr/bin/systemctl reboot"); + } + else if ( msg.cmd == MTC_CMD_RESET ) + { + if ( daemon_is_file_present ( MTC_CMD_FIT__NO_RESET ) ) + { + ilog ("Reset - fit bypass (%s)\n", get_iface_name_str (interface)); + return (PASS); + } + ilog ("Reset 'reboot -f' (%s)\n", get_iface_name_str (interface)); + daemon_log ( NODE_RESET_FILE, "reset command" ); + fork_sysreq_reboot ( delay/2 ); + rc = system("/usr/bin/systemctl reboot --force"); + } + else if ( msg.cmd == MTC_CMD_WIPEDISK ) + { + int parent = 0 ; + + if ( daemon_is_file_present ( MTC_CMD_FIT__NO_WIPEDISK ) ) + { + ilog ("Wipedisk - fit bypass (%s)\n", get_iface_name_str (interface)); + return (PASS); + } + /* We fork a reboot as a fail safe. + * If something goes wrong we should reboot anyway + */ + fork_sysreq_reboot ( delay/2 ); + + /* We fork the wipedisk command as it may take upwards of 30s + * If we hold this thread for that long pmon will kill mtcClient + * which will prevent the reboot command from being issued + */ + if ( 0 > ( parent = double_fork())) + { + elog ("failed to fork wipedisk command\n"); + } + else if( 0 == parent ) /* we're the child */ + { + ilog ("Disk wipe in progress (%s)\n", get_iface_name_str (interface)); + daemon_log ( NODE_RESET_FILE, "wipedisk command" ); + rc = system("/usr/local/bin/wipedisk --force"); + ilog ("Disk wipe complete - Forcing Reboot ...\n"); + rc = system("/usr/bin/systemctl reboot --force"); + exit (0); + } + + } + rc = PASS ; + fflush(stdout); + } + return (rc); +} + +/** Send an event to the mtcAgent **/ +int mtce_send_event ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_name_ptr ) +{ + mtc_message_type event ; + + int rc = PASS ; + int bytes = 0 ; + + memset (&event, 0 , sizeof(mtc_message_type)); + + if (( cmd == MTC_EVENT_RMON_READY) || + ( cmd == MTC_EVENT_RMON_MINOR) || + ( cmd == MTC_EVENT_RMON_MAJOR) || + ( cmd == MTC_EVENT_RMON_CRIT ) || + ( cmd == MTC_EVENT_RMON_CLEAR )) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_mtce_event_header() ); + if ( mtce_name_ptr ) + { + size_t len = strnlen ( mtce_name_ptr, MAX_MTCE_EVENT_NAME_LEN ); + + /* We don't use the buffer for mtce events to remove it from the size */ + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len)); + + snprintf ( &event.buf[0], MAX_MTCE_EVENT_NAME_LEN , "%s", mtce_name_ptr ); + } else { + slog ("Internal Error - mtce_name_ptr is null\n"); + } + } + else if ( cmd == MTC_EVENT_LOOPBACK ) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_loopback_header() ); + + /* We don't use the buffer for mtce events to remove it from the size */ + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)); + } + else if (( cmd == MTC_EVENT_AVS_CLEAR ) || + ( cmd == MTC_EVENT_AVS_MAJOR ) || + ( cmd == MTC_EVENT_AVS_CRITICAL )) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_mtce_event_header() ); + + /* We don't use the buffer for mtce events so remove it from the size */ + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)); + } + else if ( is_host_services_cmd ( cmd ) == true ) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_cmd_rsp_msg_header() ); + + if ( mtce_name_ptr ) + { + /* add the error message to the message buffer */ + size_t len = strnlen ( mtce_name_ptr, MAX_MTCE_EVENT_NAME_LEN ); + + /* We don't use the buffer for mtce events to remove it from the size */ + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len)); + + snprintf ( &event.buf[0], MAX_MTCE_EVENT_NAME_LEN , "%s", mtce_name_ptr ); + rc = FAIL_OPERATION ; + } + else + { + /* We don't use the buffer in the pass case */ + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)); + rc = PASS ; + } + event.cmd = cmd ; + event.parm[0] = rc ; + event.num = 1 ; + } + else + { + elog ("Unsupported mtce event (%d)\n", cmd ); + return ( FAIL_BAD_CASE ); + } + + event.cmd = cmd ; + + if (( sock_ptr->mtc_client_tx_socket ) && + ( sock_ptr->mtc_client_tx_socket->sock_ok() == true )) + { + if ( bytes == 0 ) + { + slog ("message send failed ; message size=0 for cmd:%d is 0\n", event.cmd ); + rc = FAIL_NO_DATA ; + } + else if ((rc = sock_ptr->mtc_client_tx_socket->write((char*)&event.hdr[0], bytes))!= bytes ) + { + elog ("message send failed. (%d) (%d:%s) \n", rc, errno, strerror(errno)); + elog ("message: %d bytes to <%s:%d>\n", bytes, + sock_ptr->mtc_client_tx_socket->get_dst_str(), + sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort()); + rc = FAIL_TO_TRANSMIT ; + } + else + { + mlog2 ("Transmit: %x bytes to %s:%d\n", bytes, + sock_ptr->mtc_client_tx_socket->get_dst_str(), + sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort()); + print_mtc_message ( get_hostname(), MTC_CMD_TX, event, get_iface_name_str(MGMNT_INTERFACE), false ); + rc = PASS ; + } + } + else + { + elog ("cannot send to null or failed socket (%s network)\n", + get_iface_name_str (MGMNT_INTERFACE) ); + rc = FAIL_SOCKET_SENDTO ; + } + return rc ; +} + +/**************************************************************************** + * + * Name : create_mtcAlive_msg + * + * Description: Creates a common mtcAlive message + * + ****************************************************************************/ +int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int interface ) +{ + struct timespec ts ; + clock_gettime (CLOCK_MONOTONIC, &ts ); + + /* Get health state of the host - presently limited to the following + * + * during boot = NODE_HEALTH_UNKNOWN + * /var/run/.config_pass = NODE_HEALTHY + * /var/run/.config_fail = NODE_UNHEALTHY + * + * */ + + /* Init the message buffer */ + MEMSET_ZERO (msg); + snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_compute_msg_header()); + msg.cmd = cmd ; + msg.num = MTC_PARM_MAX_IDX ; + + /* Insert the host uptime */ + msg.parm[MTC_PARM_UPTIME_IDX] = ts.tv_sec ; + + /* Insert the host health - TO BE OBSOLTETED */ + msg.parm[MTC_PARM_HEALTH_IDX] = get_node_health( get_hostname() ) ; + + /* Insert the mtce flags */ + msg.parm[MTC_PARM_FLAGS_IDX] = 0 ; + if ( daemon_is_file_present ( CONFIG_COMPLETE_FILE ) ) + msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__I_AM_CONFIGURED ; + if ( daemon_is_file_present ( CONFIG_FAIL_FILE ) ) + msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__I_AM_NOT_HEALTHY ; + if ( daemon_is_file_present ( CONFIG_PASS_FILE ) ) + msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__I_AM_HEALTHY ; + if ( daemon_is_file_present ( NODE_LOCKED_FILE ) ) + msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__I_AM_LOCKED ; + if ( daemon_is_file_present ( GOENABLED_MAIN_PASS ) ) + msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__MAIN_GOENABLED ; + if ( daemon_is_file_present ( PATCHING_IN_PROG_FILE ) ) + msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__PATCHING ; + if ( daemon_is_file_present ( NODE_IS_PATCHED_FILE ) ) + msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__PATCHED ; + + /* manage the compute subfunction flag */ + if ( is_subfunction_compute () == true ) + { + if ( daemon_is_file_present ( CONFIG_COMPLETE_COMPUTE ) ) + { + msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__SUBF_CONFIGURED ; + + /* Only set the go enabled subfunction flag if the pass file only exists */ + if (( daemon_is_file_present ( GOENABLED_SUBF_PASS ) == true ) && + ( daemon_is_file_present ( GOENABLED_SUBF_FAIL ) == false )) + { + msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__SUBF_GOENABLED ; + } + } + } + + if ( daemon_is_file_present ( SMGMT_DEGRADED_FILE ) ) + { + msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__SM_DEGRADED ; + } + + /* add the interface and sequence number to the mtcAlice message */ + identity.append ( ",\"interface\":\""); + identity.append (get_iface_name_str(interface)); + identity.append("\",\"sequence\":"); + + if ( interface == INFRA_INTERFACE ) + { + identity.append(itos(mtcAlive_infra_sequence++)); + } + else + { + identity.append(itos(mtcAlive_mgmnt_sequence++)); + } + identity.append("}"); + + memcpy ( &msg.buf[0], identity.c_str(), identity.size() ); + + /* Send only the data we care about */ + return (((sizeof(mtc_message_type))-(BUF_SIZE)+(identity.size())+1)); +} + + + +/* Send GOENABLED messages to the controller */ + +int send_mtc_msg_failed = 0 ; +int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd , string identity ) +{ + int rc = FAIL ; + + if (( cmd == MTC_MSG_MAIN_GOENABLED ) || + ( cmd == MTC_MSG_SUBF_GOENABLED ) || + ( cmd == MTC_MSG_MAIN_GOENABLED_FAILED ) || + ( cmd == MTC_MSG_SUBF_GOENABLED_FAILED )) + { + int interface = MGMNT_INTERFACE ; + mtc_message_type msg ; + int bytes = create_mtcAlive_msg ( msg, cmd, identity, interface ); + if (( sock_ptr->mtc_client_tx_socket ) && + ( sock_ptr->mtc_client_tx_socket->sock_ok() == true )) + { + /* Send back to requester - TODO: consider sending back to both as multicast */ + if ((rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes)) != bytes ) + { + if ( rc == -1 ) + { + wlog_throttled (send_mtc_msg_failed, 100 , + "failed to send <%s:%d> (%d:%m)\n", + sock_ptr->mtc_client_tx_socket->get_dst_str(), + sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort(), errno ); + } + else + { + wlog_throttled ( send_mtc_msg_failed, 100 , + "sent only %d of %d bytes to <%s:%d>\n", + rc, bytes, + sock_ptr->mtc_client_tx_socket->get_dst_str(), + sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort()); + } + } + else + { + send_mtc_msg_failed = 0 ; + print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_iface_name_str(interface), false ); + rc = PASS ; + } + } + else + { + elog ("cannot send to null or failed socket (%s network)\n", + get_iface_name_str (MGMNT_INTERFACE) ); + } + } + else + { + elog ( "Unsupported Mtc command (%d)\n", cmd ); + } + + return (PASS) ; +} + +int send_mtcAlive_msg_failed = 0 ; +int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interface ) +{ + mtc_message_type msg ; + msgClassSock * mtcAlive_tx_sock_ptr = NULL ; + int rc = FAIL ; + + if (( interface == INFRA_INTERFACE ) && + ( get_ctrl_ptr()->infra_iface_provisioned != true )) + { + dlog2 ("cannot send to unprovisioned %s interface\n", + get_iface_name_str(interface) ); + return (rc); + } + + + if ( interface == MGMNT_INTERFACE ) + { + /* management interface */ + mtcAlive_tx_sock_ptr = sock_ptr->mtc_client_tx_socket ; + } + else if ( interface == INFRA_INTERFACE ) + { + /* infrastructure interface */ + mtcAlive_tx_sock_ptr = sock_ptr->mtc_client_infra_tx_socket ; + } + else + { + wlog_throttled ( send_mtcAlive_msg_failed, 100, + "Unsupported interface (%d)\n", interface ); + return (FAIL_BAD_PARM); + } + + if ( daemon_is_file_present ( MTC_CMD_FIT__NO_MTCALIVE )) + { + wlog ("mtcAlive - fit bypass\n"); + return (PASS); + } + else + { + int bytes = create_mtcAlive_msg ( msg, MTC_MSG_MTCALIVE, identity, interface ); + + if (( mtcAlive_tx_sock_ptr ) && + ( mtcAlive_tx_sock_ptr->sock_ok() == true )) + { + if ((rc = mtcAlive_tx_sock_ptr->write((char*)&msg.hdr[0], bytes)) != bytes ) + { + if ( rc == -1 ) + { + wlog_throttled (send_mtcAlive_msg_failed, 100 , + "failed to send <%s:%d> (%d:%m) (%s)\n", + mtcAlive_tx_sock_ptr->get_dst_str(), + mtcAlive_tx_sock_ptr->get_dst_addr()->getPort(), + errno, get_iface_name_str(interface) ); + } + else + { + wlog_throttled ( send_mtcAlive_msg_failed, 100 , + "sent only %d of %d bytes to <%s:%d> (%s)\n", + rc, bytes, + mtcAlive_tx_sock_ptr->get_dst_str(), + mtcAlive_tx_sock_ptr->get_dst_addr()->getPort(), + get_iface_name_str(interface) ); + } + rc = FAIL_SOCKET_SENDTO ; + } + else + { + send_mtcAlive_msg_failed = 0 ; + print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_iface_name_str(interface), false ); + rc = PASS ; + } + } + else + { + elog ("cannot send to null or failed socket (%s network)\n", + get_iface_name_str(interface)); + } + } + return (rc) ; +} + +/* Accelerated Virtual Switch 'events' socket + * - for receiving data port state change event + * Event strings are + * + * {"type":"port-state", "severity":"critical|major|clear"} + * + * type:port-state - the provider network data port status has changed to the supplied fault severity + * + * severity: + * critical - port has failed and is not part of an aggregate or is the last port in an aggregate (degrade, disable services) + * major - port has failed and is part of an aggregate with other inservice-ports (degrade only) + * clear - port has recovered from a failed state and is operational (clear degrade, enable services) + * + * NOTE: The port status can transition from any of the above states to any other state. + * + * The neutron agent monitors the vswitch ports at a 2 second interval. + * If a port changes link state during the polling period, it will + * raise/clear the alarm, but now also calculates the impact of that port + * failure on the provider network data interface. + * + * The overall aggregated state across all provider network interfaces will + * be reported to maintenance when ports enter a link down or up state. + * The agent will also periodically send the current provider network port + * status to maintenance every 30 seconds. + * + */ + +int mtcCompMsg_testhead ( void ) +{ + return (PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCtrlMsg.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCtrlMsg.cpp new file mode 100755 index 00000000..ff404bff --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCtrlMsg.cpp @@ -0,0 +1,1134 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Node Maintenance "Compute Messaging" + * Implementation + */ + +/** + * @detail + * Detailed description ... + * + * + */ + +#include +#include +#include /* for ... unix domain sockets */ +#include +#include +#include +#include +#include /* for hostent */ +#include +#include +#include +#include +#include +#include /* for close and usleep */ +#include +#include +#include + +using namespace std; + +#define __AREA__ "msg" + +#include "nodeClass.h" +#include "mtcNodeMsg.h" +#include "jsonUtil.h" /* for ... jsonApi_get_key_value */ +#include "daemon_option.h" +#include "daemon_common.h" +#include "mtcAlarm.h" /* for ... mtcAlarm... */ +#include "nodeUtil.h" /* for ... get_event_str ... */ + + +/* Throttle logging of messages from unknown IP addresses */ +std::list unknown_ip_list ; + +/* Send specified command to the guestAgent daemon */ +int send_guest_command ( string hostname, int command ) +{ + int rc = PASS ; + nodeLinkClass * obj_ptr = get_mtcInv_ptr(); + + mlog ("%s NodeType %s (0x%x) Check: %c %c \n", + hostname.c_str(), + obj_ptr->functions.c_str(), + obj_ptr->get_nodetype(hostname), + obj_ptr->is_compute (hostname) ? 'Y' : 'n', + obj_ptr->is_compute_subfunction(hostname) ? 'Y' : 'n'); + + if ( obj_ptr->is_compute (hostname) || + obj_ptr->is_compute_subfunction(hostname)) + { + mtc_message_type msg ; /* the message to send */ + + int bytes_to_send; + int bytes ; + + mtc_socket_type * sock_ptr = get_sockPtr (); + + ilog ("%s sending '%s' to guest service\n", hostname.c_str(), get_event_str(command).c_str()); + string hostinfo = "{\"hostname\":\"" ; + hostinfo.append (hostname); + hostinfo.append ("\""); + + if ( (command == MTC_CMD_ADD_HOST) || (command == MTC_CMD_MOD_HOST) ) + { + hostinfo.append (",\"uuid\":\""); + hostinfo.append ( obj_ptr->get_uuid (hostname)); + hostinfo.append ( "\""); + hostinfo.append (",\"ip\":\""); + hostinfo.append ( obj_ptr->get_hostaddr (hostname)); + hostinfo.append ( "\""); + hostinfo.append (",\"personality\":\"compute\""); + } + else if ( command == MTC_CMD_DEL_HOST ) + { + hostinfo.append (",\"uuid\":\""); + hostinfo.append ( obj_ptr->get_uuid (hostname)); + hostinfo.append ( "\""); + } + + hostinfo.append ( "}"); + + /* Add the header, command and the host info to the message */ + memset (&msg, 0, sizeof(mtc_message_type)); + memcpy(&msg.hdr[0], get_cmd_req_msg_header(), MSG_HEADER_SIZE ); + memcpy(&msg.buf[0], hostinfo.data(), hostinfo.length()); + msg.cmd = command ; + + /* Send to guestAgent daemon port */ + bytes_to_send = ((sizeof(mtc_message_type))-(BUF_SIZE)+(hostinfo.length())); + bytes = sock_ptr->mtc_to_hbs_sock->write((char*) &msg, bytes_to_send, NULL, daemon_get_cfg_ptr()->mtc_to_guest_cmd_port); + if ( bytes <= 0 ) + { + wlog ("Cannot send to heartbeat service\n"); + rc = FAIL_TO_TRANSMIT ; + } + } + return (rc); +} + +/* Log throttle counters for this interface */ +int rx_error_count = 0 ; + +int mtc_service_inbox ( nodeLinkClass * obj_ptr, + mtc_socket_type * sock_ptr, + int iface) +{ + mtc_message_type msg ; + std::list::iterator iter ; + + int bytes = 0 ; + int rc = PASS ; + string ip = "0.0.0.0" ; + + MEMSET_ZERO (msg); + + string hostaddr = "" ; + string hostname = "" ; + + if ( iface == INFRA_INTERFACE ) + { + if ( ( obj_ptr ) && + ( obj_ptr->infra_network_provisioned == true ) && + ( sock_ptr->mtc_agent_infra_rx_socket )) + { + bytes = sock_ptr->mtc_agent_infra_rx_socket->read((char*)&msg, sizeof(msg)); + hostaddr = sock_ptr->mtc_agent_infra_rx_socket->get_src_str(); + hostname = obj_ptr->get_hostname ( hostaddr ) ; + } + else + { + ilog ("cannot receive from unprovisioned Infra socket\n"); + return ( FAIL_NO_INFRA_PROV ); + } + } + else + { + bytes = sock_ptr->mtc_agent_rx_socket->read((char*)&msg, sizeof(msg)); + hostaddr = sock_ptr->mtc_agent_rx_socket->get_src_str(); + hostname = obj_ptr->get_hostname ( hostaddr ) ; + } + + if ( bytes <= 0 ) + { + return (RETRY); + } + else if ( bytes < 7 ) + { + wlog_throttled ( rx_error_count, 100, "Message receive error, underrun (only rxed %d bytes)\n", bytes ); + return (FAIL); + } + else + { + rx_error_count = 0 ; + } + + if ( hostname.empty() ) + { + iter = std::find (unknown_ip_list.begin(), unknown_ip_list.end(), hostaddr ); + if ( iter == unknown_ip_list.end() ) + { + mlog3 ( "Received message from unknown IP <%s>\n", hostaddr.c_str()); + unknown_ip_list.push_front(hostaddr); + } + return (FAIL_NOT_FOUND); + } + else if ( ! hostaddr.empty() ) + { + unknown_ip_list.remove (hostaddr); + } + + print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), false ); + + /* Check for response messages */ + if ( strstr ( &msg.hdr[0], get_cmd_rsp_msg_header() ) ) + { + obj_ptr->set_cmd_resp ( hostname , msg ) ; + } + + /* + * Check for compute messages + */ + else if ( strstr ( &msg.hdr[0], get_compute_msg_header() ) ) + { + if ( msg.cmd == MTC_MSG_MTCALIVE ) + { + string functions = "" ; + rc = jsonUtil_get_key_val ( &msg.buf[0], "personality", functions ); + if ( rc ) + { + wlog ("%s failed to get personality from mtcAlive message\n", hostname.c_str()); + return (FAIL_KEY_VALUE_PARSE); + } + rc = obj_ptr->update_host_functions ( hostname, functions ); + dlog3 ("%s functions: %s\n", hostname.c_str(), functions.c_str()); + if ( rc ) + { + wlog ("%s failed to load functions from mtcAlive message\n", hostname.c_str()); + return (FAIL_NODETYPE); + } + obj_ptr->set_uptime ( hostname , msg.parm[MTC_PARM_UPTIME_IDX], false ); + obj_ptr->set_health ( hostname , msg.parm[MTC_PARM_HEALTH_IDX] ); + obj_ptr->set_mtce_flags ( hostname , msg.parm[MTC_PARM_FLAGS_IDX] ); + + obj_ptr->set_mtcAlive ( hostname, iface ); + + mlog1("%s Uptime:%d Health:%d Flags:0x%x mtcAlive:%s\n", + hostname.c_str(), + msg.parm[MTC_PARM_UPTIME_IDX], + msg.parm[MTC_PARM_HEALTH_IDX], + msg.parm[MTC_PARM_FLAGS_IDX], + obj_ptr->get_mtcAlive_gate ( hostname ) ? "gated" : "open"); + + string infra_ip = ""; + /* Get the infra ip address if it is provisioned */ + rc = jsonUtil_get_key_val ( &msg.buf[0], "infra_ip", infra_ip ); + if ( rc == PASS ) + { + obj_ptr->set_infra_hostaddr ( hostname, infra_ip ); + } + else + { + mlog ("%s null or missing 'infra_ip' value (rc:%d)\n", hostname.c_str(), rc); + } + } + else if ( msg.cmd == MTC_MSG_MAIN_GOENABLED ) + { + if ( !obj_ptr->my_hostname.compare(hostname) ) + { + ilog ("%s received GOENABLED from self\n", hostname.c_str()); + } + rc = send_mtc_cmd ( hostname , msg.cmd, MGMNT_INTERFACE ); + if ( rc != PASS ) + { + elog ("%s GOENABLED send reply failed (rc:%d)\n", + hostname.c_str(), rc); + + wlog ("%s ... need successful GOENABLED reply, dropping ...\n", + hostname.c_str() ); + } + else + { + mlog ("%s got GOENABLED (out-of-service tests passed) message\n", hostname.c_str()); + obj_ptr->set_goEnabled ( hostname ); + } + } + else if ( msg.cmd == MTC_MSG_MAIN_GOENABLED_FAILED ) + { + if ( obj_ptr->get_adminState ( hostname ) == MTC_ADMIN_STATE__UNLOCKED ) + { + wlog ("%s failed out-of-service test: %s\n", hostname.c_str(), &msg.buf[0] ); + obj_ptr->set_goEnabled_failed ( hostname ); + } + /* We don't send a reply on a fail */ + } + else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED ) + { + mlog ("%s-compute GOENABLED message\n", hostname.c_str()); + if ( !obj_ptr->my_hostname.compare(hostname) ) + { + ilog ("%s-compute received GOENABLED from self\n", hostname.c_str()); + } + rc = send_mtc_cmd ( hostname , msg.cmd, MGMNT_INTERFACE ); + if ( rc != PASS ) + { + elog ("%s-compute GOENABLED send reply failed (rc:%d)\n", + hostname.c_str(), rc); + + wlog ("%s-compute ... need successful GOENABLED reply, dropping ...\n", + hostname.c_str() ); + } + else + { + mlog ("%s-compute got GOENABLED (out-of-service tests passed) message\n", hostname.c_str()); + obj_ptr->set_goEnabled_subf ( hostname ); + } + } + else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED_FAILED ) + { + if ( obj_ptr->get_adminState ( hostname ) == MTC_ADMIN_STATE__UNLOCKED ) + { + wlog ("%s-compute failed GOENABLE test: %s\n", hostname.c_str(), &msg.buf[0] ); + obj_ptr->set_goEnabled_failed_subf ( hostname ); + } + /* We don't send a reply on a fail */ + } + else + { + wlog ("Unexpected compute message (0x%x) from '%s'\n", msg.cmd, hostname.c_str()); + } + } + + /* + * Check for Event Messages + */ + else if ( strstr ( &msg.hdr[0], get_mtce_event_header() ) ) + { + rc = PASS ; + if ( hostname.empty() ) + { + mlog2 ( "Received mtce event from unknown host\n"); + rc = FAIL_UNKNOWN_HOSTNAME ; + } + else if ( !hostname.compare("localhost") ) + { + mlog2 ("localhost event (%x) ignored", msg.cmd); + } + else + { + string event = "" ; + + /* TODO: fix this hostname setting */ + if (( msg.cmd == MTC_DEGRADE_CLEAR ) || + ( msg.cmd == MTC_DEGRADE_RAISE ) || + ( msg.cmd == MTC_EVENT_HWMON_CLEAR ) || + ( msg.cmd == MTC_EVENT_HWMON_MINOR ) || + ( msg.cmd == MTC_EVENT_HWMON_MAJOR ) || + ( msg.cmd == MTC_EVENT_HWMON_CRIT ) || + ( msg.cmd == MTC_EVENT_HWMON_RESET ) || + ( msg.cmd == MTC_EVENT_HWMON_POWERDOWN ) || + ( msg.cmd == MTC_EVENT_HWMON_POWERCYCLE) || + ( msg.cmd == MTC_EVENT_HWMON_CONFIG )) + { + hostname = &msg.hdr[MSG_HEADER_SIZE] ; + } + /* the mtce event (process or resource) that causes this raised event is at the + * head of the message buffer. Load it into an 'event' + * string to be passed into the individual handlers for + * convenience. Safer to pass reference to a string than + * the raw buffer pointer. */ + if ( strnlen ( &msg.buf[0] , MAX_MTCE_EVENT_NAME_LEN ) ) + { + event = msg.buf ; + } + + switch ( msg.cmd ) + { + /* TODO: Port other services to use this common code */ + case MTC_EVENT_MONITOR_READY: + { + std::list::iterator temp ; + // bool start_monitoring_flag = false ; + + if ( !event.compare("pmond") ) + { + /* Notify mtcAgent that we got a pmond ready event */ + obj_ptr->declare_service_ready ( hostname, MTC_SERVICE_PMOND ); + return (PASS); + } + else if ( !event.compare("hbsClient") ) + { + /* Notify mtcAgent that we got a hbsClient ready event */ + obj_ptr->declare_service_ready ( hostname, MTC_SERVICE_HEARTBEAT ); + return (PASS); + } + + /* If the active controller got the ready event from a local service + * then push the inventory to that service and for each host that is + * enabled send the start monitoring command to it if the bm_ip is + * provisioned. + * Handles the daemon restart case */ + for ( temp = obj_ptr->hostname_inventory.begin () ; + temp != obj_ptr->hostname_inventory.end () ; + temp++ ) + { + hostname = temp->data(); + + /* Set the general start monitoring flag based on service state. + * This lag may be over ridden my individual services based on + * additional information */ + if (( obj_ptr->get_adminState ( hostname ) == MTC_ADMIN_STATE__UNLOCKED ) && + ( obj_ptr->get_operState ( hostname ) == MTC_OPER_STATE__ENABLED ) && + ((obj_ptr->get_availStatus ( hostname ) == MTC_AVAIL_STATUS__AVAILABLE ) || + (obj_ptr->get_availStatus ( hostname ) == MTC_AVAIL_STATUS__DEGRADED ))) + { + ; // start_monitoring_flag = true ; + } + else + { + ; // start_monitoring_flag = false ; + } + + if ( !event.compare("hwmond") ) + { + obj_ptr->declare_service_ready ( hostname, MTC_SERVICE_HWMOND ); + } + else + { + wlog ("%s Global Ready Event not supported for '%s' service\n", + hostname.c_str(), event.c_str()); + + return (FAIL_BAD_PARM); + } + } + break ; + } + + /***************************************************************** + * Data Port Events * + *****************************************************************/ + + /***************************************************************** + * Process Monitor Events * + *****************************************************************/ + case MTC_EVENT_PMON_CLEAR: + { + mlog ("%s pmond: '%s' recovered (clear)\n", hostname.c_str(), event.c_str()); + obj_ptr->degrade_pmond_clear ( hostname ); + break ; + } + case MTC_EVENT_PMON_CRIT: + { + mlog ("%s pmond: '%s' failed (critical)\n", hostname.c_str(), event.c_str()); + + /** + * event is the process name that has failed + * parm[0] is the nodetype the process serves + **/ + obj_ptr->critical_process_failed ( hostname, event, msg.parm[0] ); + break ; + } + case MTC_EVENT_PMON_MAJOR: + { + mlog ("%s pmond: '%s' failed (major)\n", hostname.c_str(), event.c_str()); + obj_ptr->degrade_process_raise ( hostname, event ); + break ; + } + case MTC_EVENT_PMON_MINOR: + { + mlog ("%s pmond: '%s' failed (minor)\n", hostname.c_str(), event.c_str()); + obj_ptr->alarm_process_failure ( hostname, event ); + break ; + } + case MTC_EVENT_PMON_LOG: + { + mlog ("%s pmond: '%s' failed (log)\n", hostname.c_str(), event.c_str()); + obj_ptr->log_process_failure ( hostname, event ); + break ; + } + + /***************************************************************** + * Resource Monitor Events * + *****************************************************************/ + + /* TODO: Remove - Suspecting OBS Command */ + case MTC_EVENT_RMON_READY: + { + mlog ("%s RMON Ready\n", hostname.c_str()); + obj_ptr->declare_service_ready ( hostname, MTC_SERVICE_RMOND ); + break ; + } + case MTC_EVENT_RMON_CLEAR: + { + mlog ("%s rmond: '%s' recovered (clear)\n", hostname.c_str(), event.c_str()); + obj_ptr->degrade_resource_clear ( hostname , event ); + break ; + } + case MTC_EVENT_RMON_CRIT: + { + mlog ("%s rmond: '%s' failed (critical)\n", hostname.c_str(), event.c_str()); + obj_ptr->critical_resource_failed ( hostname, event ); + break ; + } + case MTC_EVENT_RMON_MAJOR: + { + mlog ("%s rmond: '%s' failed (major)\n", hostname.c_str(), event.c_str()); + obj_ptr->degrade_resource_raise ( hostname, event ); + break ; + } + case MTC_EVENT_RMON_MINOR: + { + mlog ("%s rmond: '%s' failed (minor)\n", hostname.c_str(), event.c_str()); + /* Clear the degrade condition if one is present */ + obj_ptr->degrade_resource_clear ( hostname , event ); + obj_ptr->log_resource_failure ( hostname, event ); + break ; + } + + case MTC_EVENT_HWMON_CLEAR: + case MTC_DEGRADE_CLEAR: + { + mlog ("%s hwmon requests to clear its degrade flag\n", hostname.c_str()); + obj_ptr->node_degrade_control ( hostname, MTC_DEGRADE_CLEAR , "hwmon" ); + break ; + } + case MTC_EVENT_HWMON_MINOR: + case MTC_EVENT_HWMON_MAJOR: + case MTC_EVENT_HWMON_CRIT: + case MTC_DEGRADE_RAISE: + { + mlog ("%s hwmon requested to set its degrade flag\n", hostname.c_str()); + obj_ptr->node_degrade_control ( hostname, MTC_DEGRADE_RAISE , "hwmon" ); + break ; + } + case MTC_EVENT_HWMON_RESET: + case MTC_EVENT_HWMON_POWERDOWN: + case MTC_EVENT_HWMON_POWERCYCLE: + { + mlog ("%s requires maintenance '%s' action due to failing '%s' sensor \n", + hostname.c_str(), + get_event_str(msg.cmd).c_str(), + event.c_str()); + + obj_ptr->invoke_hwmon_action ( hostname, msg.cmd, event ); + break ; + } + default: + { + wlog ("%s Unknown Event (%x)\n", hostname.c_str(), msg.cmd ); + rc = FAIL ; + break ; + } + } + } + } + else + { + wlog ( "Received unsupported or badly formed message\n" ); + } + + /* Only do this if the debug level is appropriate */ + if ( daemon_get_cfg_ptr()->debug_msg ) + { + int count = 0 ; + for ( iter = unknown_ip_list.begin () ; + iter != unknown_ip_list.end () ; + iter++ ) + { + count++ ; + mlog3 ("Unknown IP [%d]:%s\n", count, iter->c_str()); + } + } + return (rc); +} + +int send_mtc_cmd ( string & hostname, int cmd , int interface ) +{ + int rc = FAIL ; + bool force = false ; + mtc_message_type mtc_cmd ; + mtc_socket_type * sock_ptr = get_sockPtr (); + memset (&mtc_cmd,0,sizeof(mtc_message_type)); + + /* Add the command version to he message */ + mtc_cmd.ver = MTC_CMD_VERSION ; + mtc_cmd.rev = MTC_CMD_REVISION; + + switch ( cmd ) + { + case MTC_REQ_MTCALIVE: + { + snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s" , get_cmd_req_msg_header() ); + mtc_cmd.cmd = cmd ; + mtc_cmd.num = 0 ; + rc = PASS ; + break ; + } + case MTC_REQ_MAIN_GOENABLED: + case MTC_REQ_SUBF_GOENABLED: + { + snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() ); + mtc_cmd.cmd = cmd ; + mtc_cmd.num = 0 ; + rc = PASS ; + break ; + } + case MTC_CMD_STOP_CONTROL_SVCS: + case MTC_CMD_STOP_COMPUTE_SVCS: + case MTC_CMD_STOP_STORAGE_SVCS: + case MTC_CMD_START_CONTROL_SVCS: + case MTC_CMD_START_COMPUTE_SVCS: + case MTC_CMD_START_STORAGE_SVCS: + { + snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() ); + mtc_cmd.cmd = cmd ; + rc = PASS ; + break ; + } + case MTC_CMD_RESET: + case MTC_CMD_REBOOT: + case MTC_CMD_WIPEDISK: + case MTC_CMD_LAZY_REBOOT: + { + ilog ("%s sending '%s' request (%s network)\n", hostname.c_str(), get_mtcNodeCommand_str(cmd), get_iface_name_str(interface)); + snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() ); + mtc_cmd.cmd = cmd ; + mtc_cmd.num = 0 ; + if ( cmd == MTC_CMD_LAZY_REBOOT ) + { + mtc_cmd.num = 1 ; + mtc_cmd.parm[0] = MTC_SECS_30 ; + } + rc = PASS ; + break ; + } + + /* Tell the mtcClient on that host that its subFunction has failed */ + case MTC_MSG_SUBF_GOENABLED_FAILED: + { + force = true ; + ilog ("%s sending '%s' request (%s network)\n", hostname.c_str(), get_mtcNodeCommand_str(cmd), get_iface_name_str(interface)); + snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() ); + mtc_cmd.cmd = cmd ; + mtc_cmd.num = 0 ; + rc = PASS ; + break ; + } + case MTC_MSG_MTCALIVE: + { + slog ("request to send mtcAlive message from mtcAgent ; invalid\n"); + return (FAIL_OPERATION); + } + case MTC_MSG_MAIN_GOENABLED: + case MTC_MSG_SUBF_GOENABLED: + { + snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_msg_rep_msg_header() ); + mtc_cmd.cmd = cmd ; + mtc_cmd.num = 0 ; + rc = PASS ; + break ; + } + case MTC_MSG_LOCKED: + { + mlog ("%s sending 'Locked' notification (%s network)\n", hostname.c_str(), get_iface_name_str(interface)); + snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() ); + mtc_cmd.cmd = cmd ; + mtc_cmd.num = 0 ; + rc = PASS ; + break ; + } + default: + { + elog ("Unsupported maintenance command (0x%x)\n", cmd ); + rc = FAIL_BAD_CASE ; + } + } + if ( rc == PASS ) + { + int bytes = 0; + + /* Temporarily get IP from node inventory till dns is available */ + nodeLinkClass * obj_ptr = get_mtcInv_ptr (); + + /* add the mac address of the target card to the header + * Note: the minus 1 is to overwqrite the null */ + snprintf ( &mtc_cmd.hdr[MSG_HEADER_SIZE-1], MSG_HEADER_SIZE, "%s", obj_ptr->get_hostIfaceMac(hostname, MGMNT_IFACE).data()); + + /* Lets add the controller's floating ip in the buffer so hat he host knowns where to reply */ + snprintf ( &mtc_cmd.buf[0], obj_ptr->my_float_ip.length()+1, "%s", obj_ptr->my_float_ip.data()); + + /* only send the minimum amount of data */ + bytes = (sizeof(mtc_message_type)-(BUF_SIZE-(obj_ptr->my_float_ip.length()+1))) ; + + print_mtc_message ( hostname, MTC_CMD_TX, mtc_cmd, get_iface_name_str(interface), force ) ; + + if (interface == MGMNT_INTERFACE) + { + string hostaddr = obj_ptr->get_hostaddr(hostname); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__INVALIDATE_MGMNT_IP, hostname ) ) + hostaddr = "none" ; +#endif + + if ( hostUtil_is_valid_ip_addr ( hostaddr ) != true ) + { + wlog("%s has no management IP assigned\n", hostname.c_str()); + return (FAIL_HOSTADDR_LOOKUP); + } + /* rc = message size */ + rc = sock_ptr->mtc_agent_tx_socket->write((char *)&mtc_cmd, bytes, hostaddr.c_str(), sock_ptr->mtc_cmd_port); + } + else if ((interface == INFRA_INTERFACE) && + ( obj_ptr->infra_network_provisioned == true ) && + ( sock_ptr->mtc_agent_infra_tx_socket != NULL )) + { + /* SETUP TX -> COMPUTE SOCKET INFRA INTERFACE */ + string infra_hostaddr = obj_ptr->get_infra_hostaddr(hostname); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__INVALIDATE_INFRA_IP, hostname ) ) + infra_hostaddr = "none" ; +#endif + + if ( hostUtil_is_valid_ip_addr( infra_hostaddr ) != true ) + { + return (FAIL_NO_INFRA_PROV); + } + rc = sock_ptr->mtc_agent_infra_tx_socket->write((char *)&mtc_cmd, bytes, infra_hostaddr.c_str(), sock_ptr->mtc_cmd_port); + } + + if ( 0 > rc ) + { + elog("%s Failed to send command (rc:%i)\n", hostname.c_str(), rc); + rc = FAIL_SOCKET_SENDTO ; + } + else + { + rc = PASS ; + } + } + return ( rc ); +} + +int send_hbs_command ( string hostname, int cmd ) +{ + int bytes = 0 ; + int bytes_to_send = 0 ; + int rc = PASS ; + + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + mtc_message_type event ; + mtc_socket_type * sock_ptr = get_sockPtr (); + + /* We don't heartbeat self */ + if (( obj_ptr->is_active_controller (hostname) ) && + (( cmd == MTC_CMD_ADD_HOST ) || + ( cmd == MTC_CMD_DEL_HOST ) || + ( cmd == MTC_CMD_START_HOST ) || + ( cmd == MTC_CMD_STOP_HOST ))) + { + dlog ("%s refusing to '%s' self to heartbeat service\n", + hostname.c_str(), get_event_str(cmd).c_str()); + return (PASS); + } + + memset (&event, 0 , sizeof(mtc_message_type)); + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_hbs_cmd_req_header() ); + snprintf ( &event.hdr[MSG_HEADER_SIZE] , MAX_CHARS_HOSTNAME , "%s", hostname.data()); + + /* There is no buffer data in any of these messages */ + bytes_to_send = ((sizeof(mtc_message_type))-(BUF_SIZE)) ; + + switch ( cmd ) + { + case MTC_CMD_STOP_HOST: + ilog ("%s sending 'stop' to heartbeat service\n", hostname.c_str()); + break ; + case MTC_CMD_START_HOST: + obj_ptr->manage_heartbeat_clear ( hostname , MAX_IFACES ); + ilog ("%s sending 'start' to heartbeat service\n", hostname.c_str()); + break ; + case MTC_CMD_DEL_HOST: + ilog ("%s sending 'delete' to heartbeat service\n", hostname.c_str()); + break ; + case MTC_CMD_ADD_HOST: + obj_ptr->manage_heartbeat_clear ( hostname, MAX_IFACES ); + ilog ("%s sending 'add' to heartbeat service\n", hostname.c_str()); + break ; + case MTC_RESTART_HBS: + ilog ("%s sending 'restart' to heartbeat service\n", hostname.c_str()); + break ; + case MTC_BACKOFF_HBS: + ilog ("%s requesting heartbeat period backoff\n", hostname.c_str()); + break ; + case MTC_RECOVER_HBS: + ilog ("%s requesting heartbeat period recovery\n", hostname.c_str()); + break ; + default: + { + slog ("%s Unsupported command operation 0x%x\n", hostname.c_str(), cmd ); + return (FAIL_BAD_PARM); + } + } + + event.cmd = cmd ; + event.num = 1 ; + event.parm[0] = obj_ptr->get_nodetype(hostname); + + /* send to hbsAgent daemon port */ + bytes = sock_ptr->mtc_to_hbs_sock->write((char*) &event, bytes_to_send); + if ( bytes <= 0 ) + { + wlog ("Cannot send to heartbeat service\n"); + rc = FAIL_TO_TRANSMIT ; + } + return rc ; +} + + +/* Handle client 'events' */ +int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr ) +{ + mtc_message_type msg ; + int bytes = 0 ; + + /* Receive event messages */ + memset (&msg, 0, sizeof(mtc_message_type)); + bytes = sock_ptr->mtc_event_rx_sock->read((char*)&msg, sizeof(mtc_message_type)); + if ( bytes <= 0 ) + { + return (RETRY) ; + } + + if ( msg.cmd == MTC_EVENT_LOOPBACK ) + { + const char * event_hdr_ptr = get_loopback_header() ; + + /* Confirm header */ + if ( strncmp ( &msg.hdr[0], event_hdr_ptr, MSG_HEADER_SIZE ) ) + { + elog ("Invalid Event header\n"); + } + else + { + ilog ("Service ping\n"); + + /* Should send back a response */ + } + } + + else if (( msg.cmd == MTC_EVENT_HEARTBEAT_MINOR_SET ) || + ( msg.cmd == MTC_EVENT_HEARTBEAT_MINOR_CLR )) + { + const char * event_hdr_ptr = get_heartbeat_event_header() ; + + /* Confirm header */ + if ( strncmp ( &msg.hdr[0], event_hdr_ptr, MSG_HEADER_SIZE ) ) + { + elog ("Invalid Heartbeat Event header\n"); + } + else + { + string hostname = &msg.buf[0] ; + print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(MGMNT_INTERFACE), false ); + + /* The interface that the heartbeat loss occurred over is + * specified in parm[0 for this command + * 0 = MGMNT_IFACE + * 1 = INFRA_IFACE + * else default to 0 (MGMNT_IFACE) to be backwards compatible + * + * */ + iface_enum iface = MGMNT_IFACE; + if ( msg.num > 0 ) + { + if ( msg.parm[0] == INFRA_IFACE ) + { + iface = INFRA_IFACE ; + } + } + if ( msg.cmd == MTC_EVENT_HEARTBEAT_MINOR_SET ) + { + /* Assert the minor condition with the 'false' (i.e. not clear)*/ + obj_ptr->manage_heartbeat_minor ( hostname, iface, false ); + } + else + { + /* Clear the minor condition with the 'clear=true' */ + obj_ptr->manage_heartbeat_minor ( hostname, iface, true ); + } + } + } + else if (( msg.cmd == MTC_EVENT_HEARTBEAT_DEGRADE_SET ) || + ( msg.cmd == MTC_EVENT_HEARTBEAT_DEGRADE_CLR )) + { + const char * event_hdr_ptr = get_heartbeat_event_header() ; + + /* Confirm header */ + if ( strncmp ( &msg.hdr[0], event_hdr_ptr, MSG_HEADER_SIZE ) ) + { + elog ("Invalid Heartbeat Event header\n"); + } + else + { + string hostname = &msg.buf[0] ; + print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(MGMNT_INTERFACE), false ); + + /* The interface that the heartbeat loss occurred over is + * specified in parm[0 for this command + * 0 = MGMNT_IFACE + * 1 = INFRA_IFACE + * else default to 0 (MGMNT_IFACE) to be backwards compatible + * + * */ + iface_enum iface = MGMNT_IFACE; + if ( msg.num > 0 ) + { + if ( msg.parm[0] == INFRA_IFACE ) + { + iface = INFRA_IFACE ; + } + } + + if ( msg.cmd == MTC_EVENT_HEARTBEAT_DEGRADE_SET ) + { + /* Assert the degrade condition with the 'false' (i.e. not clear)*/ + obj_ptr->manage_heartbeat_degrade ( hostname, iface, false ); + } + else + { + /* Clear the degrade condition with the 'true' */ + obj_ptr->manage_heartbeat_degrade ( hostname, iface, true ); + } + } + } + else if ( msg.cmd == MTC_EVENT_HEARTBEAT_LOSS ) + { + const char * loss_hdr_ptr = get_heartbeat_loss_header() ; + + /* Confirm header */ + if ( strncmp ( &msg.hdr[0], loss_hdr_ptr, MSG_HEADER_SIZE ) ) + { + elog ("Invalid Heartbeat Loss event header\n"); + } + else + { + /* The interface that the heartbeat loss occurred over is + * specified in parm[0 for this command + * 0 = MGMNT_IFACE + * 1 = INFRA_IFACE + * else default to 0 (MGMNT_IFACE) to be backwards compatible + * + * */ + iface_enum iface = MGMNT_IFACE; + if ( msg.num > 0 ) + { + if ( msg.parm[0] == INFRA_IFACE ) + { + iface = INFRA_IFACE ; + } + } + string hostname = &msg.buf[0] ; + print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(MGMNT_INTERFACE), false ); + obj_ptr->manage_heartbeat_failure ( hostname, iface, false ); + } + } + else if ( msg.cmd == MTC_EVENT_PMOND_CLEAR ) + { + string hostname = &msg.hdr[MSG_HEADER_SIZE] ; + string process = "pmond" ; + ilog ("%s Degrade Clear Event for process '%s'\n", hostname.c_str(), process.c_str()); + print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(MGMNT_INTERFACE), false ); + obj_ptr->degrade_pmond_clear ( hostname ); + } + else if ( msg.cmd == MTC_EVENT_PMOND_RAISE ) + { + string hostname = &msg.hdr[MSG_HEADER_SIZE] ; + string process = "pmond" ; + ilog ("%s Degrade Assert Event for process '%s'\n", hostname.c_str(), process.c_str()); + print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(MGMNT_INTERFACE), false ); + obj_ptr->degrade_process_raise ( hostname , process ); + } + else if ( msg.cmd == MTC_EVENT_HOST_STALLED ) + { + string hostname = &msg.hdr[MSG_HEADER_SIZE] ; + print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(MGMNT_INTERFACE), false ); + elog ("%s Stalled !!!\n", hostname.c_str()); + } + + else if ( msg.cmd == MTC_EVENT_MONITOR_READY ) + { + string daemon = &msg.hdr[MSG_HEADER_SIZE] ; + + if ( !daemon.compare("guestAgent") ) + { + std::list::iterator temp ; + int rc = PASS ; + + /* If the active controller got the ready event from a local service + * then push the inventory to that service and for each host that is + * enabled send the start monitoring command to it. + * Handles the daemon restart case */ + for ( temp = obj_ptr->hostname_inventory.begin () ; + temp != obj_ptr->hostname_inventory.end () ; + temp++ ) + { + string hostname = temp->data(); + rc = send_guest_command ( hostname, MTC_CMD_ADD_HOST ); + if ( rc ) + { + elog ("%s host add to '%s' failed\n", hostname.c_str(), daemon.c_str()); + } + else + { + ilog ("%s added to guestAgent\n", hostname.c_str()); + } + } + /* Done sending the host info */ + } + else + { + wlog ("Unsupported ready event for daemon: '%s'\n", daemon.c_str()); + } + } + + else if ( msg.cmd == MTC_EVENT_HEARTBEAT_READY ) + { + std::list::iterator temp ; + + /* no heartbeating in simplex mode */ + if ( obj_ptr->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX ) + { + return (PASS); + } + + ilog ("Received 'Heartbeat Service Ready' Event\n"); + obj_ptr->hbs_ready = true ; + + /* Run Maintenance on Inventory */ + for ( temp = obj_ptr->hostname_inventory.begin () ; + temp != obj_ptr->hostname_inventory.end () ; + temp++ ) + { + string hostname = "" ; + hostname.append( temp->c_str() ) ; + + /* Add all hosts, even the active controller, to + * the heartbeat service. This tell the heartbeat + * service about all the hosts so that it will + * send heartbeat oob flag events to mtce. */ + if ( send_hbs_command( hostname, MTC_CMD_ADD_HOST ) != PASS ) + { + elog ("%s Failed to send inventory to heartbeat service\n", hostname.c_str()); + } + /* Send the start event to the heartbeat service for all enabled hosts except + * for the active controller which is not actively monitored */ + if ( obj_ptr->is_active_controller ( hostname ) == false ) + { + if (( obj_ptr->get_adminState ( hostname ) == MTC_ADMIN_STATE__UNLOCKED ) && + ( obj_ptr->get_operState ( hostname ) == MTC_OPER_STATE__ENABLED ) && + ((obj_ptr->get_availStatus ( hostname ) == MTC_AVAIL_STATUS__AVAILABLE ) || + (obj_ptr->get_availStatus ( hostname ) == MTC_AVAIL_STATUS__DEGRADED ))) + { + send_hbs_command ( hostname, MTC_CMD_START_HOST ); + } + } + else + { + dlog ("%s Refusing to start heartbeat of self\n", hostname.c_str() ); + } + } + } + else + { + wlog ("Unrecognized Event from Heartbeat Service (hbsAgent)\n"); + } + return PASS ; +} + + +int send_hwmon_command ( string hostname, int command ) +{ + int rc = PASS ; + + switch ( command ) + { + case MTC_CMD_QRY_HOST: + { + ilog ("%s sending 'sensor read' request\n", hostname.c_str()); + break ; + } + case MTC_CMD_START_HOST: + case MTC_CMD_STOP_HOST: + case MTC_CMD_ADD_HOST: + case MTC_CMD_MOD_HOST: + case MTC_CMD_DEL_HOST: + { + if ( command == MTC_CMD_START_HOST ) + { + get_mtcInv_ptr()->set_hwmond_monitor_state ( hostname, true ); + } + else if ( command == MTC_CMD_STOP_HOST ) + { + get_mtcInv_ptr()->set_hwmond_monitor_state ( hostname, false ); + } + + ilog ("%s sending '%s' to hwmond service\n", hostname.c_str(), get_event_str(command).c_str()); + break ; + } + default: + { + slog ("%s Unsupported command operation 0x%x\n", hostname.c_str(), command ); + rc = FAIL_BAD_PARM ; + } + } + + if ( rc == PASS ) + { + mtc_message_type cmd ; + + string hwmon_info = "" ; + int bytes = 0; + + mtc_socket_type * sock_ptr = get_sockPtr (); + nodeLinkClass * obj_ptr = get_mtcInv_ptr (); + + memset ( &cmd, 0 , sizeof(mtc_message_type)); + snprintf ( &cmd.hdr[0] , MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header()); + snprintf ( &cmd.hdr[MSG_HEADER_SIZE], MAX_CHARS_HOSTNAME, "%s", hostname.data()); + + /* Store the command, get the board management info and copy it into the message buffer */ + cmd.cmd = command ; + hwmon_info = obj_ptr->get_hwmon_info ( hostname ); + memcpy ( &cmd.buf[0], hwmon_info.data(), hwmon_info.length()); + + /* rc = message size */ + bytes = sizeof(mtc_message_type); + rc = sock_ptr->hwmon_cmd_sock->write((char *)&cmd, bytes, obj_ptr->my_float_ip.c_str(), 0); + if ( 0 > rc ) + { + elog ("%s Failed sendto command to hwmond (%d:%s)\n", hostname.c_str(), errno, strerror(errno)); + rc = FAIL_SOCKET_SENDTO ; + } + else + { + print_mtc_message ( hostname, MTC_CMD_TX, cmd, get_iface_name_str(MGMNT_INTERFACE), false ); + rc = PASS ; + } + } + return rc ; +} + + + diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpSvr.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpSvr.cpp new file mode 100644 index 00000000..e213721f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpSvr.cpp @@ -0,0 +1,1121 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance Daemon + */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "svr" + +#include +#include +#include +#include +#include +#include +#include +#include /* for hostent */ +#include +#include +#include +#include +#include +#include +#include +#include /* for ... close and usleep */ +#include /* for ... HTTP_ status definitions */ +#include /* for ... RTMGRP_LINK */ + +using namespace std; + +#include "daemon_common.h" /* */ + +#include "nodeBase.h" /* Service header */ +#include "nodeTimers.h" /* */ +#include "nodeClass.h" /* */ +#include "nodeUtil.h" /* */ +#include "jsonUtil.h" /* */ +#include "mtcHttpSvr.h" +#include "mtcNodeMsg.h" /* for ... send_mtc_cmd */ +#include "mtcAlarm.h" /* for ... mtcAlarm_log */ + +#define EVENT_SERVER "HTTP Event Server" + +#define CLIENT_SYSINV_URL "/v1/hosts/" +#define CLIENT_VIM_HOSTS_URL "/v1/hosts/" +#define CLIENT_SM_URL "/v1/hosts/" +#define CLIENT_VIM_SYSTEMS_URL "/v1/systems" +#define CLIENT_HEADER "User-Agent" + +#define CLIENT_SYSINV_1_0 "sysinv/1.0" +#define CLIENT_VIM_1_0 "vim/1.0" +#define CLIENT_SM_1_0 "sm/1.0" + +#define EVENT_METHODS (EVHTTP_REQ_PATCH | \ + EVHTTP_REQ_POST | \ + EVHTTP_REQ_GET | \ + EVHTTP_REQ_PUT | \ + EVHTTP_REQ_DELETE) + +int sequence = 0 ; +char log_str [MAX_API_LOG_LEN]; +char filename[MAX_FILENAME_LEN]; + + +/* Cleanup */ +void mtcHttpSvr_fini ( event_type & mtce_event ) +{ + if ( mtce_event.fd ) + { + if ( mtce_event.base ) + { + event_base_free( mtce_event.base); + } + close ( mtce_event.fd ); + mtce_event.fd = 0 ; + } +} + + +/************************************************************************************ + * + * event_base_loopcontinue is not supported until version 2.1.2-alpha + * It allows processing of events in main loop instead of in the handler. + * Theoretically this would be nice to use in conjunction with + * event_base_loopexit in the selected fd +void mtcHttpSvr_work ( event_type & mtce_event ) +{ + if ( mtce_event.base ) + { + int rc = event_base_loopcontinue ( mtce_event.base ) ; // EVLOOP_NONBLOCK ); + if ( rc ) + { + ilog ("HTTP event_base_loopcontinue rc:%d\n", rc ); + } + } +} +**************************************************************************************/ + +/* Look for events */ +void mtcHttpSvr_look ( event_type & mtce_event ) +{ + /* Look for Events */ + if ( mtce_event.base ) + { + // rc = event_base_loopexit( mtce_event.base, NULL ) ; // EVLOOP_NONBLOCK ); + event_base_loop( mtce_event.base, EVLOOP_NONBLOCK ); + } +} + +void _create_error_response ( int rc , string & resp_buffer , node_inv_type & inv ) +{ + resp_buffer = "{" ; + resp_buffer.append (" \"status\" : \"fail\""); + if ( rc == FAIL_UNIT_ACTIVE ) + { + resp_buffer.append (",\"reason\" : \"Controller is Active\""); + resp_buffer.append (",\"action\" : \"Swact Controller and then Lock\""); + } + else if ( rc == FAIL_LOW_STORAGE ) + { + resp_buffer.append (",\"reason\" : \"Storage redundancy check\""); + resp_buffer.append (",\"action\" : \"Enable another storage host\""); + } + else if ( rc == FAIL_PATCH_INPROGRESS ) + { + resp_buffer.append (",\"reason\" : \"Operation not permitted while software patching is 'In-Progress'\""); + resp_buffer.append (",\"action\" : \"Wait for patching to complete and then retry operation\""); + } + else if ( rc == FAIL_PATCHED_NOREBOOT ) + { + resp_buffer.append (",\"reason\" : \"Patches have been applied but not loaded on target host'\""); + resp_buffer.append (",\"action\" : \"Please 'lock' then 'unlock' host and retry operation\""); + } + else if ( rc == FAIL_NEED_STORAGE_MON ) + { + resp_buffer.append (",\"reason\" : \"Failed Backend Monitor Quorum check\""); + resp_buffer.append (",\"action\" : \"Enable second controller or additional storage host\""); + } + else if ( rc == FAIL_NEED_DUPLEX ) + { + resp_buffer.append (",\"reason\" : \"Controller redundancy check\""); + resp_buffer.append (",\"action\" : \"Enable second controller\""); + } + else if ( rc == FAIL_DEL_UNLOCKED ) + { + resp_buffer.append (",\"reason\" : \"Host is Unlocked\""); + resp_buffer.append (",\"action\" : \"Lock Host and then Delete\""); + } + else if ( rc == FAIL_ADMIN_ACTION ) + { + resp_buffer.append (",\"reason\" : \"Unknown admin action\""); + resp_buffer.append (",\"action\" : \"Check admin action\""); + } + else if ( rc == FAIL_NODETYPE ) + { + resp_buffer.append (",\"reason\" : \"Swact not supported for this Host\""); + resp_buffer.append (",\"action\" : \"Re-evaluate selected operation\""); + } + else if ( rc == FAIL_SWACT_NOINSVMATE ) + { + resp_buffer.append (",\"reason\" : \"No unlocked-enabled controller available to switch activity to\""); + resp_buffer.append (",\"action\" : \"Enable second controller and then retry\""); + } + else if ( rc == FAIL_OPER_INPROGRESS ) + { + resp_buffer.append (",\"reason\" : \"User operation on this host already in-progress\""); + resp_buffer.append (",\"action\" : \"Wait a moment and then retry\""); + } + else if ( rc == FAIL_SWACT_INPROGRESS ) + { + resp_buffer.append (",\"reason\" : \"Swact operation on this host already in-progress\""); + resp_buffer.append (",\"action\" : \"Wait for current operation to complete and then retry\""); + } + else if ( rc == FAIL_JSON_PARSE ) + { + resp_buffer.append (",\"reason\" : \"Mtce cannot parse key:values from Inventory request\""); + resp_buffer.append (",\"action\" : \"Retry operation or contact next level support\""); + } + else if ( rc == FAIL_RESET_POWEROFF ) + { + resp_buffer.append (",\"reason\" : \"Cannot reset a powered off host\""); + resp_buffer.append (",\"action\" : \"Power-on host and then retry\""); + } + else if ( rc == FAIL_NO_IP_SUPPORT ) + { + resp_buffer.append (",\"reason\" : \"Warning: The board management IP address is not provisioned or learned.\""); + resp_buffer.append (",\"action\" : \"Make sure the board management controller is powered on, connected to the "); + resp_buffer.append ("board management network and the provisioned MAC address is correct. Board management actions "); + resp_buffer.append ("such as 'reset' or 'power-on' or 'power-off' will not work until the "); + resp_buffer.append ("the host's board management IP is learned.\""); + } + else if ( rc == FAIL_DUP_HOSTNAME ) + { + resp_buffer.append (",\"reason\" : \"Rejecting host-edit with duplicate hostname\""); + resp_buffer.append (",\"action\" : \"Delete host with hostname '"); + resp_buffer.append (inv.name.data()); + resp_buffer.append ("' first and then retry or use different hostname\""); + } + else if ( rc == FAIL_DUP_IPADDR ) + { + resp_buffer.append (",\"reason\" : \"Rejecting host-edit with duplicate ip address\""); + resp_buffer.append (",\"action\" : \"Delete host with ip address '"); + resp_buffer.append (inv.ip.data()); + resp_buffer.append ("' first and then retry or contact system administrator\""); + } + else if ( rc == FAIL_DUP_MACADDR ) + { + resp_buffer.append (",\"reason\" : \"Rejecting host-edit with duplicate mac address\""); + resp_buffer.append (",\"action\" : \"Delete host with mac address '"); + resp_buffer.append (inv.mac.data()); + resp_buffer.append ("' first and then retry or contact system administrator\""); + } + else if (( rc == FAIL_POWER_CONTROL ) || + ( rc == FAIL_RESET_CONTROL )) + { + resp_buffer.append (",\"reason\" : \"The board management controller for this host is not configured\""); + resp_buffer.append (",\"action\" : \"Edit host to configure board management and then retry\""); + } + + else if (( rc == FAIL_RESERVED_NAME ) && + ((( !inv.name.compare ("controller-0")) && ( inv.type.compare("controller"))) || + (( !inv.name.compare ("controller-1")) && ( inv.type.compare("controller"))))) + { + resp_buffer.append (",\"reason\" : \"Can only add reserved '"); + resp_buffer.append (inv.name.data()); + resp_buffer.append ("' hostname with personality set to 'controller'\""); + resp_buffer.append (",\"action\" : \"Retry operation with personality set to 'controller'\""); + } + else if (( rc == FAIL_RESERVED_NAME ) && + (( !inv.name.compare ("storage-0")) && ( inv.type.compare("storage")))) + { + resp_buffer.append (",\"reason\" : \"Can only add reserved '"); + resp_buffer.append (inv.name.data()); + resp_buffer.append ("' hostname with personality set to 'storage'\""); + resp_buffer.append (",\"action\" : \"Retry operation with personality set to 'storage'\""); + } + else if ( rc == FAIL_NOT_ACCESSIBLE ) + { + resp_buffer.append (",\"reason\" : \"Maintenance has not yet established communication with the board management controller for this host\""); + resp_buffer.append (",\"action\" : \"Verify board management configuration settings and then retry\""); + } + else if ( rc == FAIL_NOT_CONNECTED ) + { + resp_buffer.append (",\"reason\" : \"Maintenance does not have an established connection to the board management controller for this host\""); + resp_buffer.append (",\"action\" : \"Verify board management configuration settings and then retry. "); + resp_buffer.append ("Note: Maintenance is continuously trying to maintain an established connection using the supplied provisioning and credentials\""); + } + else if ( rc == FAIL_BM_PROVISION_ERR ) + { + resp_buffer.append (",\"reason\" : \"Request rejected due to provisioning semantic check. \""); + resp_buffer.append (",\"action\" : \"Please verify that the board management MAC or IP address being used is "); + resp_buffer.append ("formatted correctly or not already provisioned against another host\""); + } + else + { + resp_buffer.append (",\"reason\" : \"Unknown\""); + resp_buffer.append (",\"action\" : \"Undetermined\""); + wlog ("%s no supported reason/action string for error code %d\n", inv.name.c_str(), rc); + } + resp_buffer.append ("}"); +} + +/***************************************************************************** + * + * Name: mtcHttpSvr_vim_req + * + * Handles three 'operations' + * + * 'delete' - based on uuid + * 'modify' - based on list of key - value pairs + * 'add' - based on inventory record + * + ******************************************************************************/ +/* Test Commands: + * + * Test 1: Select host, get uuid and make sure it is unlocked-enabled. + * Verify: Host should fail, reset and auto re-enable. +curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "failed"}' + + * Test 2: Lock Host and issue command with correct uuids and hostname. + * Verify: The host is rebooted/reset +curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "failed"}' + + * Test 3: +curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "degraded"}' + + * Test 4: +curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "cleared"}' + + * Test 5: Unsuppored VIM Command +curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "degradeded"}' +*/ + +string mtcHttpSvr_vim_req ( char * buffer_ptr, + evhttp_cmd_type http_cmd, + int & http_status_code ) +{ + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + string response = "" ; + string severity = "" ; + string uuid = "" ; + string hostname = "" ; + + int rc1 = jsonUtil_get_key_val ( buffer_ptr, MTC_JSON_SEVERITY, severity ); + int rc2 = jsonUtil_get_key_val ( buffer_ptr, MTC_JSON_INV_NAME, uuid ); + int rc3 = jsonUtil_get_key_val ( buffer_ptr, MTC_JSON_INV_NAME, hostname ); + + jlog ("%s '%s' request\n", hostname.c_str(), getHttpCmdType_str(http_cmd)); + if ( rc1 | rc2 | rc3 ) + { + wlog ("Failed to parse command key values (%d:%d:%d)\n", rc1, rc2, rc3 ); + response = "{" ; + response.append (" \"status\" : \"fail\""); + response.append (",\"reason\" : \"command parse error\""); + response.append (",\"action\" : \"retry command or contact next level support\""); + response.append ("}"); + http_status_code = HTTP_BADREQUEST ; + } + else + { + if ( ! severity.compare("failed" )) + { + if ( obj_ptr->get_adminState ( hostname ) == MTC_ADMIN_STATE__LOCKED ) + { + /* Test 2 */ + ilog ("%s reboot/reset due to failed event (host is locked)\n", hostname.c_str()); + obj_ptr->set_rebootStage ( hostname , MTC_RESETPROG__START ); + obj_ptr->set_adminAction ( hostname , MTC_ADMIN_ACTION__REBOOT ); + } + else + { + /* Test 1 */ + ilog ("%s is now failed due to failed event (host is unlocked)\n", hostname.c_str()); + obj_ptr->mtcInvApi_update_states ( hostname, + get_adminState_str (MTC_ADMIN_STATE__UNLOCKED), + get_operState_str (MTC_OPER_STATE__DISABLED ), + get_availStatus_str(MTC_AVAIL_STATUS__FAILED)); + } + response = "{ \"status\" : \"pass\" }" ; + http_status_code = HTTP_OK ; + } + else if ( ! severity.compare("degraded")) + { + /* Test 3 */ + ilog ("%s severity 'degraded' request from not supported\n", hostname.c_str() ); + response.append ("{ \"status\" : \"fail\""); + response.append (",\"reason\" : \"Controlled host degrade not supported\""); + response.append (",\"action\" : \"Upgrade maintenance package containing support and retry\""); + response.append ("}"); + http_status_code = HTTP_BADMETHOD; + } + else if ( ! severity.compare("cleared")) + { + /* Test 4 */ + ilog ("%s severity 'cleared' request not supported\n", hostname.c_str() ); + response.append ("{\"status\" : \"fail\""); + response.append (",\"reason\" : \"Controlled host degrade clear not supported\""); + response.append (",\"action\" : \"Upgrade maintenance package containing support and retry\""); + response.append ("}"); + http_status_code = HTTP_BADMETHOD; + } + else + { + /* Test 5 */ + ilog ("%s severity '%s' request from not supported\n", hostname.c_str(), severity.c_str()); + response.append ("{\"status\" : \"fail\""); + response.append (",\"reason\" : \"Unsupported severity request '"); + response.append (severity); + response.append ("' ,\"action\" : \"Upgrade maintenance package containing support and retry\""); + response.append ("}"); + http_status_code = HTTP_BADREQUEST; + } + } + return (response); +} + + +/***************************************************************************** + * + * Name: mtcHttpSvr_inv_req + * + * Handles three 'operations' + * + * 'delete' - based on uuid + * 'modify' - based on list of key - value pairs + * 'add' - based on inventory record + * + ******************************************************************************/ + +string mtcHttpSvr_inv_req ( char * request_ptr, + evhttp_cmd_type event_type, + int & http_status_code ) +{ + int rc = PASS ; + + nodeLinkClass * obj_ptr = get_mtcInv_ptr (); + msgSock_type * mtclogd_ptr = get_mtclogd_sockPtr (); + + event_type = event_type ; + + /* variable scoping */ + string resp_buffer = "" ; + string key = "operation" ; + string value = "" ; + string hostname = "n/a" ; + + /* Identify the operation */ + rc = jsonUtil_get_key_val ( request_ptr, key, value ) ; + if ( rc == PASS ) + { + node_inv_type inv ; + node_inv_init (inv); + + ilog ("%s %s : '%s'\n", obj_ptr->my_hostname.c_str(), key.c_str(), value.c_str()) ; + + rc = jsonUtil_load_host ( request_ptr, inv ); + if ( rc == PASS ) + { + if ( !inv.name.empty() ) + { + hostname = inv.name ; + } + + snprintf (&log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] http event seq: %d Payload:%s: %s", + pt(), getpid(), sequence, hostname.data(), request_ptr); + send_log_message ( mtclogd_ptr, obj_ptr->my_hostname.data(), &filename[0], &log_str[0] ); + + /* ADD */ + if ( ! strncmp ( value.data() , "add" , strlen("add") )) + { + rc = obj_ptr->add_host ( inv ); + if ( rc == PASS ) + { + ilog ("%s Add Operation\n", inv.name.c_str()); + + /* generate event=add alarm if the add_host returns a PASS */ + mtcAlarm_log ( inv.name, MTC_LOG_ID__EVENT_ADD ); + } + + /* A RETRY return from add_host indicates that the node is + * already provisioned. At this point changes can only be + * implemented as modification so call mod_host + */ + if ( rc == RETRY ) + { + rc = obj_ptr->mod_host ( inv ); + } + + /* handle the http response code/message */ + if ( rc == PASS ) + { + resp_buffer = "{ \"status\" : \"pass\" }" ; + } + else + { + elog ("%s Inventory Add failed (%s)\n", + inv.name.length() ? inv.name.c_str() : "none", + inv.uuid.c_str() ); + _create_error_response ( rc , resp_buffer, inv ) ; + } + } + + /* MODIFY ? */ + else if ( ! strncmp ( value.data() , "modify" , strlen("modify") )) + { + ilog ("%s Modify Operation\n", inv.name.c_str()); + + /* If the return value of get_host is empty then we need to add the host */ + if ( obj_ptr->get_host ( inv.uuid ).empty() ) + { + wlog ("%s Missing\n", inv.uuid.c_str() ); + ilog ("%s Overriding 'modify' with 'add' operation\n", inv.name.c_str() ); + rc = obj_ptr->add_host ( inv ); + if ( rc == PASS ) + { + resp_buffer = "{ \"status\" : \"pass\" }" ; + http_status_code = HTTP_OK ; + } + else + { + elog ("Inventory Add failed for uuid: %s\n", inv.uuid.c_str()); + resp_buffer = "{" ; + resp_buffer.append (" \"status\" : \"fail\""); + resp_buffer.append (",\"reason\" : \"Rejected - unknown\""); + resp_buffer.append (",\"action\" : \"Switch activity\""); + resp_buffer.append ("}"); + } + } + else + { + rc = obj_ptr->mod_host ( inv ); + if ( rc != PASS ) + { + elog ("Inventory Modify failed for uuid: %s\n", inv.uuid.c_str()); + _create_error_response ( rc , resp_buffer, inv ) ; + } + else + { + resp_buffer = "{ \"status\" : \"pass\" }" ; + http_status_code = HTTP_OK ; + } + } + } + else + { + elog ("Unsupported Inventory Event Operation:%s\n", value.data()); + resp_buffer = "{" ; + resp_buffer.append (" \"status\" : \"fail\""); + resp_buffer.append (",\"reason\" : \"Unsupported "); + resp_buffer.append (value.data()); + resp_buffer.append (" operation\""); + resp_buffer.append (",\"action\" : \"Use delete, add or modify only\""); + resp_buffer.append ("}"); + http_status_code = HTTP_BADREQUEST ; + } + } + else + { + elog ("JSON key:value parse error: %s\n", request_ptr ); + _create_error_response ( FAIL_JSON_PARSE , resp_buffer, inv ) ; + http_status_code = HTTP_BADREQUEST ; + } + } + else + { + elog ("Unable to get key value\n"); + resp_buffer = "{" ; + resp_buffer.append (" \"status\" : \"fail\""); + resp_buffer.append (",\"reason\" : \"String deserialization\""); + resp_buffer.append (",\"action\" : \"Fix event dictionary\""); + resp_buffer.append ("}"); + http_status_code = HTTP_BADREQUEST ; + } + return resp_buffer ; +} + + +/***************************************************************************** + * + * Name: mtcHttpSvr_sm_req + * + * Handles only 1 'operation' + * + * 'event' - based on hostname, to set host state + * + ******************************************************************************/ + +string mtcHttpSvr_sm_req ( char * request_ptr, + evhttp_cmd_type event_type, + int & http_status_code ) +{ + int rc = PASS ; + http_status_code = HTTP_BADREQUEST ; + + nodeLinkClass * obj_ptr = get_mtcInv_ptr (); + msgSock_type * mtclogd_ptr = get_mtclogd_sockPtr (); + + event_type = event_type ; + + /* variable scoping */ + string resp_buffer = "" ; + string key = "action" ; + string value = "" ; + string hostname = "n/a" ; + + /* Identify the operation */ + rc = jsonUtil_get_key_val ( request_ptr, key, value ) ; + if ( rc == PASS ) + { + node_inv_type inv ; + node_inv_init (inv); + + ilog ("%s %s : '%s'\n", obj_ptr->my_hostname.c_str(), key.c_str(), value.c_str()) ; + + rc = jsonUtil_load_host_state ( request_ptr, inv ); + if ( rc == PASS ) + { + if ( !inv.name.empty() ) + { + hostname = inv.name ; + } + + snprintf (&log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] http event seq: %d Payload:%s: %s", + pt(), getpid(), sequence, hostname.data(), request_ptr); + send_log_message ( mtclogd_ptr, obj_ptr->my_hostname.data(), &filename[0], &log_str[0] ); + + /* state change event */ + if ( !value.compare("event") ) + { + ilog ("%s state change\n", inv.name.c_str()); + + if ( obj_ptr->get_host (inv.name).empty() ) + { + string reason_text = "hostname not provided"; + if( !inv.name.empty() ) + { + reason_text = "host " + inv.name + " not found"; + } + wlog ("%s\n", reason_text.c_str()); + + resp_buffer = "{"; + resp_buffer.append (" \"status\" : \"fail\""); + resp_buffer.append (",\"reason\" : \"" + reason_text + "\""); + resp_buffer.append (",\"action\" : \"event\""); + resp_buffer.append ("}"); + http_status_code = HTTP_OK ; + } + else + { + bool executed = false; + if( (inv.avail.compare("failed") == 0) && (inv.oper.compare("disabled")==0) ) + { + rc = obj_ptr->set_host_failed ( inv ); + executed = true; + } + + if (!executed) + { + resp_buffer = "{" ; + resp_buffer.append (" \"status\" : \"fail\""); + resp_buffer.append (",\"reason\" : \"Rejected - operation not supported\""); + resp_buffer.append (",\"action\" : \"event\""); + resp_buffer.append ("}"); + }else + { + if ( rc != PASS ) + { + char errcode[12]; + snprintf(errcode, sizeof(errcode), "%d", rc); + resp_buffer = "{" ; + resp_buffer.append (" \"status\" : \"fail\""); + resp_buffer.append (",\"reason\" : \"Rejected - "); + resp_buffer.append ( errcode ); + resp_buffer.append ("\""); + resp_buffer.append (",\"action\" : \"event\""); + resp_buffer.append ("}"); + } + else + { + resp_buffer = "{ \"status\" : \"pass\" }" ; + http_status_code = HTTP_OK ; + } + } + } + } + else + { + elog ("Unsupported Inventory Event Operation:%s\n", value.data()); + resp_buffer = "{" ; + resp_buffer.append (" \"status\" : \"fail\""); + resp_buffer.append (",\"reason\" : \"Unsupported "); + resp_buffer.append (value.data()); + resp_buffer.append (" operation\""); + resp_buffer.append (",\"action\" : \"Use event only\""); + resp_buffer.append ("}"); + http_status_code = HTTP_BADREQUEST ; + } + } + else + { + elog ("JSON key:value parse error: %s\n", request_ptr ); + _create_error_response ( FAIL_JSON_PARSE , resp_buffer, inv ) ; + http_status_code = HTTP_BADREQUEST ; + } + } + else + { + elog ("Unable to get key value\n"); + resp_buffer = "{" ; + resp_buffer.append (" \"status\" : \"fail\""); + resp_buffer.append (",\"reason\" : \"String deserialization\""); + resp_buffer.append (",\"action\" : \"Fix event dictionary\""); + resp_buffer.append ("}"); + http_status_code = HTTP_BADREQUEST ; + } + return resp_buffer ; +} +/******************************************************************** + * + * Verify this request contains valid client info. + * + * 1. the URL must have + * CLIENT_SYSINV_URL or + * CLIENT_VIM_HOSTS_URL or + * CLIENT_VIM_SYSTEMS_URL + * + * 2. the user-Agent header needs to exist and be set to either + * CLIENT_SYSINV_1_0 or + * CLIENT_VIM_1_0 + * + ********************************************************************/ +mtc_client_enum _get_client_id ( struct evhttp_request *req ) +{ + mtc_client_enum client = CLIENT_NONE ; + + /* Parse Headers we care about to verify that it also contains the + * correct User-Agent header and supported version */ + struct evkeyvalq * headers_ptr = evhttp_request_get_input_headers (req); + const char * header_value_ptr = evhttp_find_header (headers_ptr, CLIENT_HEADER); + if ( header_value_ptr ) + { + const char * url_ptr = evhttp_request_get_uri (req); + + hlog2 ("URI: %s\n", url_ptr ); + + if ( ! strncmp ( header_value_ptr, CLIENT_SYSINV_1_0, 20 ) ) + { + hlog3 ("%s\n", header_value_ptr ); + + if ( strstr ( url_ptr, CLIENT_SYSINV_URL) ) + { + client = CLIENT_SYSINV ; + } + } + else if ( ! strncmp ( header_value_ptr, CLIENT_VIM_1_0, 20 ) ) + { + hlog3 ("%s\n", header_value_ptr ); + + if ( strstr ( url_ptr, CLIENT_VIM_HOSTS_URL)) + { + client = CLIENT_VIM_HOSTS ; + } + else if ( strstr ( url_ptr, CLIENT_VIM_SYSTEMS_URL) ) + { + client = CLIENT_VIM_SYSTEMS ; + } + } + else if ( ! strncmp ( header_value_ptr, CLIENT_SM_1_0, 20 ) ) + { + hlog3 ("%s\n", header_value_ptr); + if ( strstr ( url_ptr, CLIENT_SM_URL ) ) + { + client = CLIENT_SM; + } + } + } + else + { + wlog ("Unknown or mismatched client (%d)\n", client) ; + } + return (client); +} + +/***************************************************************************** + * + * Name: mtcHttpSvr_handler + * + * Description: Receive an http event extract the event type and buffer from + * it and call process request handler. + * Send the processed message response back to the connection. + * + * Supported events include: POST, PUT, DELETE + * + ******************************************************************************/ + +void mtcHttpSvr_handler (struct evhttp_request *req, void *arg) +{ + struct evbuffer *resp_buf ; + mtc_client_enum client = CLIENT_NONE ; + int http_status_code = HTTP_NOTFOUND ; + string service = "" ; + string uuid = "" ; + string response = "" ; + string hostname = "n/a" ; + + UNUSED(arg); + + response = "{" ; + response.append (" \"status\" : \"fail\""); + response.append (",\"reason\" : \"not found\""); + response.append (",\"action\" : \"retry with valid host\""); + response.append ("}"); + + nodeLinkClass * obj_ptr = get_mtcInv_ptr (); + msgSock_type * mtclogd_ptr = get_mtclogd_sockPtr (); + event_type * event_ptr = get_eventPtr (); + event_ptr->req = req ; + + /* Get sender must be localhost */ + const char * host_ptr = evhttp_request_get_host (req); + if ( strncmp ( host_ptr , "localhost" , 10 )) + { + wlog ("Message received from unknown host (%s)\n", host_ptr ); + + /* TODO: Fail the request if from unknown host */ + } + + const char * url_ptr = evhttp_request_get_uri (req); + + /* Extract the operation */ + evhttp_cmd_type http_cmd = evhttp_request_get_command (req); + jlog ("%s request from '%s'\n", getHttpCmdType_str(http_cmd), host_ptr ); + + /* Acquire the client that sent this event from the url URI */ + client = _get_client_id ( req ); + if ( client == CLIENT_NONE ) + { + response = ("{\"status\" : \"fail\""); + response.append (",\"reason\" : \"unknown client in User-Agent header\""); + response.append (",\"action\" : \"use "); + response.append (CLIENT_VIM_1_0); + response.append (" or "); + response.append (CLIENT_SYSINV_1_0); + response.append (" in User-Agent header\"}"); + http_status_code = HTTP_BADREQUEST ; + elog ("%s\n", response.c_str()); + evhttp_send_error (event_ptr->req, MTC_HTTP_FORBIDDEN, response.data() ); + return ; + } + + if (( client == CLIENT_VIM_HOSTS ) || + ( client == CLIENT_VIM_SYSTEMS )) + { + service = "vim" ; + } + else if ( client == CLIENT_SYSINV ) + { + service = "sysinv" ; + } + else if ( client == CLIENT_SM ) + { + service = "sm"; + } + else + service = "unknown" ; + + snprintf (&log_str[0], MAX_API_LOG_LEN-1, "\n%s [%5d] http event seq: %d with %s %s request from %s:%s", + pt(), getpid(), ++sequence, service.c_str(), getHttpCmdType_str(http_cmd), host_ptr, url_ptr ); + send_log_message ( mtclogd_ptr, obj_ptr->my_hostname.data(), &filename[0], &log_str[0] ); + + switch ( http_cmd ) + { + case EVHTTP_REQ_GET: + case EVHTTP_REQ_DELETE: + { + size_t len = strlen(CLIENT_SYSINV_URL) ; + uuid = (url_ptr+len) ; + hostname = obj_ptr->get_host(uuid) ; + if ( uuid.length() == UUID_LEN ) + { + hostname = obj_ptr->get_host(uuid) ; + } + if (( http_cmd == EVHTTP_REQ_GET ) && ( client == CLIENT_VIM_SYSTEMS )) + { + http_status_code = obj_ptr->mtcVimApi_system_info ( response ); + break ; + } + else + { + http_status_code = HTTP_OK ; + if ( uuid.length() != UUID_LEN ) + { + wlog ("http '%s' request rejected, invalid uuid size (%ld:%s)\n", + getHttpCmdType_str(http_cmd), + uuid.length(), uuid.c_str()); + response = "{" ; + response.append (" \"status\" : \"fail\""); + response.append (",\"reason\" : \"Uuid size error\""); + response.append (",\"action\" : \"Undetermined\""); + response.append ("}"); + http_status_code = HTTP_BADREQUEST ; + } + if (( http_cmd == EVHTTP_REQ_DELETE ) && + (( hostname.length() == 0 ) || ( !hostname.compare("none")))) + { + wlog ("deleting unknown resource: %s\n", uuid.length() ? uuid.c_str() : "(null)" ); + response = "{ \"status\" : \"pass\" }" ; + http_status_code = HTTP_OK ; + } + else if (( http_cmd == EVHTTP_REQ_GET ) ) + { + if ( client == CLIENT_VIM_HOSTS ) + { + response = "{\"state\": " ; + response.append(obj_ptr->mtcVimApi_state_get ( hostname, http_status_code )); + + } + else + { + elog ("http GET request not from VIM (client:%d)\n", client ); + response = "{" ; + response.append (" \"status\" : \"fail\""); + response.append (",\"reason\" : \"command not supported for specified User-Agent header\""); + response.append (",\"action\" : \"use "); + response.append (CLIENT_VIM_1_0); + response.append (" as User-Agent\""); + response.append ("}"); + http_status_code = HTTP_BADREQUEST ; + } + } + else + { + ilog ("%s Delete Request Posted (%s)\n", hostname.c_str(), uuid.c_str()); + obj_ptr->set_adminAction ( hostname, MTC_ADMIN_ACTION__DELETE ); + response = "{ \"status\" : \"pass\" }" ; + http_status_code = HTTP_OK ; + } + } + break ; + } + case EVHTTP_REQ_PATCH: + case EVHTTP_REQ_POST: + { + response = "{" ; + response.append (" \"status\" : \"fail\""); + response.append (",\"reason\" : \"no buffer\""); + response.append (",\"action\" : \"retry with data\""); + response.append ("}"); + + /* get the payload */ + struct evbuffer *in_buf = evhttp_request_get_input_buffer ( req ); + if ( in_buf ) + { + size_t len = evbuffer_get_length(in_buf) ; + if ( len ) + { + ev_ssize_t bytes = 0 ; + char * buffer_ptr = (char*)malloc(len+1); + memset ( buffer_ptr, 0, len+1 ); + bytes = evbuffer_remove(in_buf, buffer_ptr, len ); + + if ( bytes <= 0 ) + { + http_status_code = HTTP_BADREQUEST ; + wlog ("http event request with no payload\n"); + } + else + { + http_status_code = HTTP_OK ; + if ( client == CLIENT_VIM_HOSTS ) + { + response = mtcHttpSvr_vim_req ( buffer_ptr, http_cmd, http_status_code ); + } + else if ( client == CLIENT_SYSINV ) + { + response = mtcHttpSvr_inv_req ( buffer_ptr, http_cmd, http_status_code ); + } + else if ( client == CLIENT_SM ) + { + response = mtcHttpSvr_sm_req ( buffer_ptr, http_cmd, http_status_code ); + } + else + { + http_status_code = HTTP_BADREQUEST ; + } + } + free ( buffer_ptr ); + } + else + { + http_status_code = MTC_HTTP_LENGTH_REQUIRED ; + wlog ("Http event request has no payload\n"); + } + } + else + { + http_status_code = HTTP_BADREQUEST ; + wlog ("Http event request has no buffer\n"); + } + break ; + } + default: + { + wlog ("Unknown command (%d)\n", http_cmd ); + http_status_code = HTTP_NOTFOUND ; + } + } + + snprintf (&log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] http event seq: %d Response:%s: %s", pt(), getpid(), sequence, hostname.c_str(), response.c_str() ); + send_log_message ( mtclogd_ptr, obj_ptr->my_hostname.data(), &filename[0], &log_str[0] ); + + if (( http_status_code == HTTP_OK ) || ( http_status_code == MTC_HTTP_ACCEPTED )) + { + resp_buf = evbuffer_new(); + jlog ("Event Response: %s\n", response.c_str()); + evbuffer_add_printf (resp_buf, "%s\n", response.data()); + evhttp_send_reply (event_ptr->req, http_status_code, "OK", resp_buf ); + evbuffer_free ( resp_buf ); + } + else + { + elog ("HTTP Event error:%d ; cmd:%s url:%s response:%s\n", + http_status_code, + getHttpCmdType_str(http_cmd), + url_ptr, + response.c_str()); + evhttp_send_error (event_ptr->req, http_status_code, response.data() ); + } +} + +/***************************************************************** + * + * Name : mtcHttpSvr_bind + * + * Description : Setup the HTTP server socket + * + *****************************************************************/ +int mtcHttpSvr_bind ( event_type & event ) +{ + int rc ; + int flags ; + int one = 1; + + event.fd = socket(AF_INET, SOCK_STREAM, 0); + if (event.fd < 0) + { + elog ("HTTP server socket create failed (%d:%m)\n", errno ); + return FAIL_SOCKET_CREATE ; + } + + /* make socket reusable */ + rc = setsockopt(event.fd, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(int)); + + memset(&event.addr, 0, sizeof(struct sockaddr_in)); + event.addr.sin_family = AF_INET; + event.addr.sin_addr.s_addr = inet_addr(LOOPBACK_IP) ; /* INADDR_ANY; TODO: Refine this if we can */ + // event.addr.sin_addr.s_addr = INADDR_ANY; + event.addr.sin_port = htons(event.port); + + /* bind port */ + rc = bind ( event.fd, (struct sockaddr*)&event.addr, sizeof(struct sockaddr_in)); + if (rc < 0) + { + elog ("HTTP server port %d bind failed (%d:%m)\n", event.port, errno ); + return FAIL_SOCKET_BIND ; + } + + /* Listen for events */ + rc = listen(event.fd, 10 ); + if (rc < 0) + { + elog ("HTTP server listen failed (%d:%m)\n", errno ); + return FAIL_SOCKET_LISTEN; + } + + /* make non-blocking */ + flags = fcntl ( event.fd, F_GETFL, 0) ; + if ( flags < 0 || fcntl(event.fd, F_SETFL, flags | O_NONBLOCK) < 0) + { + elog ("failed to set HTTP server socket to non-blocking (%d:%m)\n", errno ); + return FAIL_SOCKET_OPTION; + } + + return PASS; +} + +/* Setup the http server */ +int mtcHttpSvr_setup ( event_type & event ) +{ + int rc = PASS ; + if ( ( rc = mtcHttpSvr_bind ( event )) != PASS ) + { + return rc ; + } + else if (event.fd < 0) + { + wlog ("failed to get http server socket file descriptor\n"); + return RETRY ; + } + + event.base = event_base_new(); + if (event.base == NULL) + { + elog ("failed to get http server event base\n"); + return -1; + } + event.httpd = evhttp_new(event.base); + if (event.httpd == NULL) + { + elog ("failed to get httpd server handle\n"); + return -1; + } + + evhttp_set_allowed_methods (event.httpd, EVENT_METHODS ); + + rc = evhttp_accept_socket(event.httpd, event.fd); + if ( rc == -1) + { + elog ("failed to accept on http server socket\n"); + return -1; + } + evhttp_set_gencb(event.httpd, mtcHttpSvr_handler, NULL); + + return PASS ; +} + +/* initialize the mtce http server */ +int mtcHttpSvr_init ( event_type & mtce_event ) +{ + int rc = PASS ; + snprintf (&filename[0], MAX_FILENAME_LEN, "/var/log/%s_event.log", program_invocation_short_name ); + for ( ; ; ) + { + rc = mtcHttpSvr_setup ( mtce_event ); + if ( rc == RETRY ) + { + wlog ("%s bind failed (%d)\n", EVENT_SERVER, mtce_event.fd ); + } + else if ( rc != PASS ) + { + elog ("%s start failed (rc:%d)\n", EVENT_SERVER, rc ); + } + else if ( mtce_event.fd > 0 ) + { + ilog ("Listening On: 'http event server ' socket %s:%d\n", + inet_ntoa(mtce_event.addr.sin_addr), mtce_event.port ); + rc = PASS ; + break ; + } + if ( rc ) mtcWait_secs (5); + } + return ( rc ) ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpSvr.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpSvr.h new file mode 100644 index 00000000..92ab9de8 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpSvr.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance Daemon + */ + +typedef struct +{ + struct sockaddr_in addr ; + struct event_base * base ; + struct evhttp_request * req ; + struct evhttp * httpd ; + int fd ; + int port ; +} event_type ; + +event_type * get_eventPtr ( void ); + +void mtcHttpSvr_fini ( event_type & event ); +int mtcHttpSvr_init ( event_type & event ); +int mtcHttpSvr_setup ( event_type & event ); +void mtcHttpSvr_look ( event_type & event ); +void mtcHttpSvr_work ( event_type & event ); diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpUtil.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpUtil.cpp new file mode 100755 index 00000000..f0faf728 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpUtil.cpp @@ -0,0 +1,1373 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Controller Maintenance HTTP Utilities. + * + * Public Interfaces: + * + * Setup Utilities: + * + * mtcHttpUtil_event_init + * mtcHttpUtil_free_conn + * mtcHttpUtil_free_base + * mtcHttpUtil_connect_new + * mtcHttpUtil_request_new + * mtcHttpUtil_payload_add + * mtcHttpUtil_payload_len + * mtcHttpUtil_header_add + * mtcHttpUtil_status + * + * Request Utility and Handler: + * + * mtcHttpUtil_api_request + * mtcHttpUtil_handler + * + * Result Utilities: + * + * mtcHttpUtil_receive + * mtcHttpUtil_get_length + * mtcHttpUtil_get_response + * mtcHttpUtil_log_event + * mtcHttpUtil_event_info + * + * Debug Utilities: + * + * mtcHttpUtil_start_timer + * mtcHttpUtil_stop_timer + * mtcHttpUtil_log_time + * mtcHttpUtil_payload_len + * + */ + +#include + +using namespace std; + +#include "nodeClass.h" /* for ... maintenance class nodeLinkClass */ +#include "httpUtil.h" /* this module header */ +#include "tokenUtil.h" /* for ... tokenUtil_get_ptr */ +#include "mtcHttpUtil.h" /* this module header */ +#include "mtcInvApi.h" /* Inventory REST API header */ +#include "mtcVimApi.h" /* VIM REST API header */ +#include "jsonUtil.h" /* Json Utilities */ +#include "nodeUtil.h" /* Node Utilities */ + +libEvent nullEvent ; + +/** Inventory Add, Get, Update, Query HTTP Rest API handler wraqpper headers */ +extern void mtcInvApi_add_Handler ( struct evhttp_request *req, void *arg ); +extern void mtcInvApi_qry_Handler ( struct evhttp_request *req, void *arg ); +extern void mtcInvApi_get_Handler ( struct evhttp_request *req, void *arg ); +extern void mtcInvApi_cfg_Handler ( struct evhttp_request *req, void *arg ); + +extern void mtcSmgrApi_Handler ( struct evhttp_request *req, void *arg ); +extern void mtcVimApi_Handler ( struct evhttp_request *req, void *arg ); + +void mtcHttpUtil_Handler ( struct evhttp_request *req, void *arg ); + + +/* *********************************************************************** + * + * Name : mtcHttpUtil_event_init + * + * Description: Initialize the supplied libevent structure to default + * start values including with the supplied hostname, + * service , ip and port values. + * + * Note: No memory allication is performed. + * + * ************************************************************************/ + +int mtcHttpUtil_event_init ( libEvent * ptr , + string hostname, + string service, + string ip, + int port) +{ + /* Default Starting States */ + ptr->sequence = 0 ; + ptr->request = SERVICE_NONE ; + ptr->state = HTTP__TRANSMIT ; + ptr->log_prefix = hostname ; + ptr->log_prefix.append(" ") ; + ptr->log_prefix.append(service) ; + + /* Execution Controls */ + ptr->stuck = 0 ; + ptr->count = 0 ; + ptr->timeout = 0 ; + ptr->cur_retries = 0 ; + ptr->max_retries = 0 ; + ptr->active = false ; + ptr->mutex = false ; + ptr->found = false ; + ptr->blocking = false ; + ptr->noncritical = false ; + ptr->rx_retry_cnt= 0 ; + ptr->rx_retry_max= 1000 ; + + /* Service Specific Request Info */ + ptr->ip = ip ; + ptr->port = port ; + ptr->hostname = hostname ; + ptr->service = service ; + + /* Copy the mtce token into the libEvent struct for this command */ + ptr->token = get_mtcInv_ptr()->tokenEvent.token ; + + /* Instance Specific Request Data Data */ + ptr->entity_path.clear() ; + ptr->entity_path_next.clear() ; + ptr->address.clear(); + ptr->payload.clear(); + ptr->operation.clear(); + ptr->information.clear(); + + /* HTTP Specific Info */ + ptr->type = EVHTTP_REQ_GET ; /* request type GET/PUT/PATCH etc */ + + + /* Result Info */ + ptr->status = FAIL; + ptr->exec_time_msec = 0 ; + ptr->http_status = 0 ; + ptr->low_wm = ptr->med_wm = ptr->high_wm = false ; + + ptr->response.clear(); + node_inv_init ( ptr->inv_info ) ; + + memset (&ptr->req_str[0], 0, MAX_API_LOG_LEN); + + return (PASS); +} + +static char rest_api_filename[MAX_FILENAME_LEN]; +static char rest_api_log_str [MAX_API_LOG_LEN]; + +void mtcHttpUtil_init ( void ) +{ + mtcHttpUtil_event_init ( &nullEvent, "null", "null" , "0.0.0.0", 0); + nullEvent.request = SERVICE_NONE ; + + snprintf (&rest_api_filename[0], MAX_FILENAME_LEN, "/var/log/%s_api.log", + program_invocation_short_name ); +} + +/* *********************************************************************** + * + * Name : mtcHttpUtil_free_conn + * + * Description: Free an event's connection memory if it exists. + * + * ************************************************************************/ + +void mtcHttpUtil_free_conn ( libEvent & event ) +{ + if ( event.conn ) + { + hlog2 ("%s Free Connection (%p)\n", event.log_prefix.c_str(), event.conn ); + evhttp_connection_free ( event.conn ); + event.conn = NULL ; + } + else + { + hlog2 ("%s Already Freed Connection\n", event.log_prefix.c_str()); + } +} + +/* *********************************************************************** + * + * Name : mtcHttpUtil_free_base + * + * Description: Free an event's base memory if it exists. + * + * ************************************************************************/ + +void mtcHttpUtil_free_base ( libEvent & event ) +{ + /* Free the base */ + if ( event.base ) + { + hlog2 ("%s Free Base (%p)\n", event.log_prefix.c_str(), event.base ); + + event_base_free(event.base); + event.base = NULL ; + if ( event.conn ) + { + hlog2 ("%s Free Connection (%p)\n", + event.log_prefix.c_str(), event.conn ); + + evhttp_connection_free ( event.conn ); + event.conn = NULL ; + } + } + else + { + hlog2 ("%s Already Freed Event Base\n", event.log_prefix.c_str()); + } +} + +/* *********************************************************************** + * + * Name : mtcHttpUtil_connect_new + * + * Description: Allocate memory for a new connection off the supplied + * base with respect to an ip and port. + * + * ************************************************************************/ + +int mtcHttpUtil_connect_new ( libEvent & event ) +{ + if ( event.base ) + { + /* Open an http connection to specified IP and port */ + event.conn = evhttp_connection_base_new ( event.base, NULL, + event.ip.c_str(), + event.port ); + /* bind to the correctly-versioned local address */ + if ( event.conn ) + { + return(PASS) ; + } + else + { + elog ("Failed to create http connection (evhttp_connection_base_new)\n"); + return (FAIL_CONNECT); + } + } + else + { + elog ("Null Event base\n"); + return (FAIL_EVENT_BASE); + } +} + +/* *********************************************************************** + * + * Name : mtcHttpUtil_request_new + * + * Description: Allocate memory for a new request off the supplied base. + * + * ************************************************************************/ + +int mtcHttpUtil_request_old ( libEvent & event, + void(*hdlr)(struct evhttp_request *, void *)) +{ + int rc = PASS ; + + /* make a new request and bind the event handler to it */ + event.req = evhttp_request_new( hdlr , event.base ); + if ( ! event.req ) + { + elog ("call to 'evhttp_request_new' returned NULL\n"); + rc = FAIL ; + } + return (rc); +} + +int mtcHttpUtil_request_new ( libEvent & event, + void(*hdlr)(struct evhttp_request *, void *)) +{ + int rc = PASS ; + + /* make a new request and bind the event handler to it */ + event.req = evhttp_request_new( hdlr , &event ); + if ( ! event.req ) + { + elog ("call to 'evhttp_request_new' returned NULL\n"); + rc = FAIL ; + } + return (rc); +} + +/* *********************************************************************** + * + * Name : mtcHttpUtil_payload_add + * + * Description: Add the payload to the output buffer. + * + * @returns 0 for success or -1 in error case + * + * ************************************************************************/ + +int mtcHttpUtil_payload_add ( libEvent & event ) +{ + int rc = PASS ; + + /* Returns the output buffer. */ + event.buf = evhttp_request_get_output_buffer ( event.req ); + + /* Check for no buffer */ + if ( ! event.buf ) + { + elog ("evhttp_request_get_output_buffer returned null (%p)\n", event.req ); + rc = FAIL ; + } + else + { + /* write the body into the buffer */ + rc = evbuffer_add_printf ( event.buf, "%s", event.payload.c_str()); + if ( rc == -1 ) + { + elog ("evbuffer_add_printf returned error (-1)\n"); + rc = FAIL ; + } + else if ( rc == 0 ) + { + elog ("no data added to output buffer (len=0)\n"); + rc = FAIL ; + } + else + { + rc = PASS ; + } + } + return (rc); +} + +/* *********************************************************************** + * + * Name : mtcHttpUtil_payload_len + * + * Description: Calculate payload length from the output buffer + * and return a string representing that length value. + * + * ************************************************************************/ + +string mtcHttpUtil_payload_len ( libEvent * ptr ) +{ + string body_len ; + char len_str[10] ; + int len = evbuffer_get_length ( ptr->req->output_buffer ) ; + if (( len == -1 ) || ( len == 0 )) + { + body_len = "" ; + } + else + { + memset ( &len_str[0], 0 , 10 ); + sprintf ( &len_str[0], "%d", len ); + body_len = len_str ; + hlog3 ("%s Buffer Len:%s\n", ptr->hostname.c_str(), body_len.c_str() ); + } + return ( body_len ); +} + +/* *********************************************************************** + * + * Name : mtcHttpUtil_header_add + * + * Description: Add the supplied list of headers to the http request + * headers section. + * + * ************************************************************************/ + +int mtcHttpUtil_header_add ( libEvent * ptr, http_headers_type * hdrs_ptr ) +{ + int rc = PASS ; + + if ( hdrs_ptr->entries > MAX_HEADERS ) + { + elog ("%s Too many headers (%d:%d)\n", + ptr->hostname.c_str(), MAX_HEADERS, hdrs_ptr->entries ); + return FAIL ; + } + for ( int i = 0 ; i < hdrs_ptr->entries ; i++ ) + { + /* Add the header */ + rc = evhttp_add_header( ptr->req->output_headers, + hdrs_ptr->entry[i].key.c_str() , + hdrs_ptr->entry[i].value.c_str()); + if ( rc ) + { + elog ("evhttp_add_header returned failure (%d:%s:%s)\n", rc, + hdrs_ptr->entry[i].key.c_str(), + hdrs_ptr->entry[i].value.c_str()); + rc = FAIL ; + break ; + } + } + return (rc); +} + +//int mtcHttpUtil_request_make ( libEvent * ptr, +// enum evhttp_cmd_type type, +// string path ) +//{ +// return (evhttp_make_request( ptr->conn, ptr->req, type, path.data())); +//} + +/* *********************************************************************** + * + * Name : mtcHttpUtil_status + * + * Description: Extracts and returns the HTTP execution status + * + * ************************************************************************/ + +int mtcHttpUtil_status ( libEvent & event ) +{ + int rc = PASS ; + + if ( !event.req ) + { + elog ("%s Invalid request\n", event.hostname.length() ? event.hostname.c_str() : "unknown" ); + return (FAIL_UNKNOWN_HOSTNAME); + } + event.status = event.http_status = evhttp_request_get_response_code (event.req); + switch (event.status) + { + case HTTP_OK: + case 201: + case 202: + case 203: + case 204: + { + hlog ("%s HTTP_OK (%d)\n", event.hostname.c_str(), event.status ); + event.status = PASS ; + break; + } + /* Authentication error - refresh the token */ + case 401: + { + keyToken_type * token_ptr = tokenUtil_get_ptr() ; + token_ptr->delay = true ; + rc = FAIL_AUTHENTICATION ; + break ; + } + case 0: + { + elog ("%s connection loss (%s:%d)\n", + event.log_prefix.c_str(), event.ip.c_str(), event.port ); + event.status = FAIL_HTTP_ZERO_STATUS ; + rc = FAIL_HTTP_ZERO_STATUS ; + break ; + } + default: + { + hlog3 ("%s Status: %d\n", event.hostname.c_str(), event.status ); + rc = event.status ; + break; + } + } + return (rc); +} + +/* *********************************************************************** + * + * Name : mtcHttpUtil_api_request + * + * Description: Makes an HTTP request based on all the info + * in the supplied libEvent. + * + * This is the primary external interface in this module. + * + * Both blocking and non-blocking request type are supported. + * + * ************************************************************************/ + + +int mtcHttpUtil_api_request ( libEvent & event ) + +{ + http_headers_type hdrs ; + int hdr_entry = 0 ; + int rc = FAIL ; + void(*handler)(struct evhttp_request *, void *) = NULL ; + + /* Default to PUT */ + event.type = EVHTTP_REQ_PUT ; + + if (( event.request == SERVICE_NONE ) || + ( event.request >= SERVICE_LAST )) + { + slog ("Invalid request %d\n", event.request); + event.status = FAIL_BAD_PARM ; + return (event.status); + } + /* Check for memory leaks */ + if ( event.base ) + { + slog ("%s http base memory leak avoidance (%p)\n", + event.log_prefix.c_str(), event.base ); + // event_base_free(event.base); + } + + /* Allocate the base */ + event.base = event_base_new(); + if ( event.base == NULL ) + { + elog ("%s No Memory for Request\n", event.log_prefix.c_str()); + event.status = FAIL_EVENT_BASE ; + return (event.status) ; + } + else + { + hlog2 ("%s base:%p object:%p\n", event.log_prefix.c_str(), event.base, &event ); + } + + if ( event.request == SYSINV_GET ) + { + event.payload = "" ; + + /* Bind the update handler */ + handler = &mtcInvApi_get_Handler ; + + /* The type of HTTP request */ + event.type = EVHTTP_REQ_GET ; + + /* set the timeout */ + event.timeout = get_mtcInv_ptr()->sysinv_timeout ; + } + + else if ( event.request == SYSINV_HOST_QUERY ) + { + event.token.url = MTC_INV_LABEL ; + event.token.url.append( event.hostname.data() ); + + event.payload = "" ; + + hlog ("%s sysinv query %s\n", event.hostname.c_str(), event.token.url.c_str()); + + /* Bind the update handler */ + handler = &mtcInvApi_qry_Handler ; + + /* The type of HTTP request */ + event.type = EVHTTP_REQ_GET ; + + /* set the timeout */ + event.timeout = get_mtcInv_ptr()->sysinv_timeout ; + } + + else if ( event.request == SYSINV_UPDATE ) + { + event.token.url = MTC_INV_LABEL ; + event.token.url.append( event.uuid.data() ); + + /* Bind the generic handler */ + handler = &mtcHttpUtil_Handler ; + + /* The type of HTTP request */ + event.type = EVHTTP_REQ_PATCH ; + } + + else if ( event.request == SYSINV_ADD ) + { + event.token.url = MTC_INV_LABEL ; + + event.payload = "{" ; + + event.payload.append ("\"mgmt_ip\":\"") ; + event.payload.append ( event.inv_info.ip ); + event.payload.append ("\""); + + event.payload.append (",\"mgmt_mac\":\""); + event.payload.append ( event.inv_info.mac ); + event.payload.append ("\""); + + event.payload.append (",\"hostname\":\""); + event.payload.append ( event.inv_info.name ); + event.payload.append ("\""); + + event.payload.append (",\"task\":\"\""); + event.payload.append (",\"action\":\"none\""); + + event.payload.append (",\"personality\":\""); + event.payload.append ( event.inv_info.type ); + event.payload.append ("\""); + + event.payload.append (",\"administrative\":\""); + event.payload.append ( event.inv_info.admin ); + event.payload.append ("\""); + + event.payload.append (",\"operational\":\""); + event.payload.append ( event.inv_info.oper ); + event.payload.append ("\""); + + event.payload.append (",\"availability\":\""); + event.payload.append ( event.inv_info.avail ); + event.payload.append ("\""); + + event.payload.append (",\"bm_ip\":\"\""); + + if ( !event.inv_info.name.compare("controller-0") ) + { + event.payload.append (",\"invprovision\":\"provisioned\""); + event.payload.append ( "}"); + } + + /* Bind the unlock handler */ + handler = &mtcInvApi_add_Handler ; + + /* The type of HTTP request */ + event.type = EVHTTP_REQ_POST ; + + /* set the timeout */ + event.timeout = get_mtcInv_ptr()->sysinv_timeout ; + } + else if ( ( event.request == SYSINV_CONFIG_SHOW ) || + ( event.request == SYSINV_CONFIG_MODIFY )) + { + /* Bind the unlock handler */ + handler = &mtcHttpUtil_Handler ; + + /* The type of HTTP request */ + if ( event.request == SYSINV_CONFIG_SHOW ) + { + event.type = EVHTTP_REQ_GET ; + event.token.url = MTC_INV_IUSER_LABEL ; + } + else if ( event.request == SYSINV_CONFIG_MODIFY ) + { + event.type = EVHTTP_REQ_PATCH ; + event.token.url = MTC_INV_IUSER_LABEL ; + event.token.url.append ( event.uuid ); + } + else + { + elog ("Unsupported request (%d)\n", event.request ); + + event.status = FAIL_BAD_CASE ; + goto mtcHttpUtil_api_request_done ; + } + + /* set the timeout */ + event.timeout = get_mtcInv_ptr()->sysinv_timeout ; + } + else if (( event.request == VIM_HOST_DISABLED ) || + ( event.request == VIM_HOST_ENABLED ) || + ( event.request == VIM_HOST_OFFLINE ) || + ( event.request == VIM_HOST_FAILED ) || + ( event.request == VIM_DPORT_OFFLINE ) || + ( event.request == VIM_DPORT_FAILED ) || + ( event.request == VIM_DPORT_CLEARED ) || + ( event.request == VIM_DPORT_DEGRADED )) + { + event.token.url = MTC_VIM_LABEL; + event.token.url.append(event.uuid); + + /* Bind the unlock handler */ + handler = &mtcHttpUtil_Handler ; + + /* The type of HTTP request */ + event.type = EVHTTP_REQ_PATCH ; + + /* set the timeout */ + event.timeout = HTTP_VIM_TIMEOUT ; + } + + else if (( event.request == SMGR_QUERY_SWACT ) || + ( event.request == SMGR_START_SWACT ) || + ( event.request == SMGR_HOST_LOCKED ) || + ( event.request == SMGR_HOST_UNLOCKED ) || + ( event.request == SMGR_HOST_DISABLED ) || + ( event.request == SMGR_HOST_ENABLED )) + { + event.timeout = HTTP_SMGR_TIMEOUT ; + handler = &mtcSmgrApi_Handler ; + if ( event.request == SMGR_QUERY_SWACT ) + { + event.type = EVHTTP_REQ_GET ; + } + else + { + event.type = EVHTTP_REQ_PATCH ; + } + } + else + { + slog ("%s Unsupported Request (%d)\n", event.hostname.c_str(), event.request); + event.status = FAIL_BAD_CASE ; + goto mtcHttpUtil_api_request_done ; + } + + /* Establish connection */ + if ( mtcHttpUtil_connect_new ( event )) + { + event.status = FAIL_CONNECT ; + event.conn = NULL ; + goto mtcHttpUtil_api_request_done ; + } + + /* Create request */ + if ( handler == &mtcHttpUtil_Handler ) + { + if ( mtcHttpUtil_request_new ( event, handler )) + { + event.status = FAIL_REQUEST_NEW ; + goto mtcHttpUtil_api_request_done ; + } + } + else + { + if ( mtcHttpUtil_request_old ( event, handler )) + { + event.status = FAIL_REQUEST_NEW ; + goto mtcHttpUtil_api_request_done ; + } + } + + if ( event.request != KEYSTONE_TOKEN ) + { + event.address = event.token.url ; + jlog ("%s Address : %s\n", event.hostname.c_str(), event.token.url.c_str()); + } + + if (( event.type != EVHTTP_REQ_GET ) && + ( event.type != EVHTTP_REQ_DELETE )) + { + /* Add payload to the output buffer but only for PUT, POST and PATCH requests */ + if ( mtcHttpUtil_payload_add ( event )) + { + event.status = FAIL_PAYLOAD_ADD ; + goto mtcHttpUtil_api_request_done ; + } + if ( daemon_get_cfg_ptr()->debug_json ) + { + if ((!string_contains(event.payload,"token")) && + (!string_contains(event.payload,"assword"))) + { + jlog ("%s Payload : %s\n", event.hostname.c_str(), + event.payload.c_str() ); + } + else + { + jlog ("%s Payload : ... contains private content ...\n", + event.hostname.c_str()); + + } + } + } + + /* Build the HTTP Header */ + hdrs.entry[hdr_entry].key = "Host" ; + hdrs.entry[hdr_entry].value = event.ip ; + hdr_entry++; + + hdrs.entry[hdr_entry].key = "X-Auth-Project-Id" ; + hdrs.entry[hdr_entry].value = "admin"; + hdr_entry++; + + if (( event.type != EVHTTP_REQ_GET ) && + ( event.type != EVHTTP_REQ_DELETE )) + { + hdrs.entry[hdr_entry].key = "Content-Length" ; + hdrs.entry[hdr_entry].value = mtcHttpUtil_payload_len ( &event ); + hdr_entry++; + } + + hdrs.entry[hdr_entry].key = "User-Agent" ; + hdrs.entry[hdr_entry].value = "mtce/1.0" ; + hdr_entry++; + + hdrs.entry[hdr_entry].key = "Content-Type" ; + hdrs.entry[hdr_entry].value = "application/json" ; + hdr_entry++; + + hdrs.entry[hdr_entry].key = "Accept" ; + hdrs.entry[hdr_entry].value = "application/json" ; + hdr_entry++; + + if (( event.request != KEYSTONE_TOKEN ) && + ( event.request != SMGR_QUERY_SWACT ) && + ( event.request != SMGR_START_SWACT ) && + ( event.request != SMGR_HOST_LOCKED ) && + ( event.request != SMGR_HOST_UNLOCKED ) && + ( event.request != SMGR_HOST_DISABLED ) && + ( event.request != VIM_HOST_DISABLED ) && + ( event.request != VIM_HOST_ENABLED ) && + ( event.request != VIM_HOST_OFFLINE ) && + ( event.request != VIM_HOST_FAILED ) && + ( event.request != VIM_DPORT_OFFLINE ) && + ( event.request != VIM_DPORT_FAILED ) && + ( event.request != VIM_DPORT_CLEARED ) && + ( event.request != VIM_DPORT_DEGRADED ) && + ( event.request != SMGR_HOST_ENABLED )) + { + hdrs.entry[hdr_entry].key = "X-Auth-Token" ; + hdrs.entry[hdr_entry].value = tokenUtil_get_ptr()->token ; + hdr_entry++; + } + + hdrs.entry[hdr_entry].key = "Connection" ; + hdrs.entry[hdr_entry].value = "close" ; + hdr_entry++; + hdrs.entries = hdr_entry ; + + /* Add the headers */ + if ( mtcHttpUtil_header_add ( &event, &hdrs )) + { + event.status = FAIL_HEADER_ADD ; + goto mtcHttpUtil_api_request_done ; + } + + /* get some timestamps and log the request */ + snprintf (&event.req_str[0], MAX_API_LOG_LEN-1, + "\n%s [%5d] %s %s '%s' seq:%d -> Address : %s:%d %s %s ... %s", + pt(), getpid(), + event.hostname.c_str(), + event.service.c_str(), + event.operation.c_str(), + event.sequence, event.ip.c_str(), event.port, + getHttpCmdType_str( event.type ), + event.address.c_str(), + event.information.c_str()); + + gettime ( event.send_time ); + gettime ( event.done_time ); /* create a valid done value */ + + if ( event.request == KEYSTONE_TOKEN ) + { + string path = MTC_POST_KEY_LABEL ; + event.address = path ; + event.prefix_path += path; + jlog ("%s Keystone Address : %s\n", event.hostname.c_str(), event.prefix_path.c_str()); + event.status = evhttp_make_request ( event.conn, event.req, event.type, event.prefix_path.data()); + } + else + { + event.status = evhttp_make_request ( event.conn, event.req, event.type, event.token.url.data()); + } + if ( event.status == PASS ) + { + evhttp_connection_set_timeout(event.req->evcon, event.timeout); + + /* Default to retry for both blocking and non-blocking command */ + event.status = RETRY ; + if ( event.blocking == true ) + { + event.log_prefix = event.hostname ; + event.log_prefix.append (" "); + event.log_prefix.append (event.service) ; + event.log_prefix.append (" "); + event.log_prefix.append (event.operation) ; + hlog ("%s Requested (blocking) (to:%d)\n", event.log_prefix.c_str(), event.timeout); + + /* Send the message with timeout */ + event_base_dispatch(event.base); + + goto mtcHttpUtil_api_request_done ; + } + else if (( event.request == SYSINV_UPDATE ) || + ( event.request == SYSINV_CONFIG_SHOW ) || + ( event.request == SYSINV_CONFIG_MODIFY ) || + ( event.request == VIM_HOST_DISABLED) || + ( event.request == VIM_HOST_ENABLED ) || + ( event.request == VIM_HOST_OFFLINE ) || + ( event.request == VIM_HOST_FAILED ) || + ( event.request == VIM_DPORT_OFFLINE ) || + ( event.request == VIM_DPORT_FAILED ) || + ( event.request == VIM_DPORT_CLEARED ) || + ( event.request == VIM_DPORT_DEGRADED) || + ( event.request == SMGR_QUERY_SWACT) || + ( event.request == SMGR_START_SWACT) || + ( event.request == KEYSTONE_TOKEN )) + { + if ( event.operation.compare(SYSINV_OPER__UPDATE_UPTIME) ) + { + hlog ("%s Dispatched (to:%d)\n", event.log_prefix.c_str(), event.timeout); + } + + /* TODO: Set a command timer to free up the resources + * and deal with the error if the handler never runs */ + event.active = true ; + return (event_base_loop(event.base, EVLOOP_NONBLOCK)); + } + else + { + /* Catch all but should not be */ + event.log_prefix = event.hostname ; + event.log_prefix.append (" "); + event.log_prefix.append (event.service) ; + event.log_prefix.append (" "); + event.log_prefix.append (event.operation) ; + slog ("%s Requested (blocking) (to:%d) ----------------------------------------\n", event.log_prefix.c_str(), event.timeout ); + + event_base_dispatch(event.base); + + goto mtcHttpUtil_api_request_done ; + } + } + else + { + elog ("%s Call to 'evhttp_make_request' failed (rc:%d)\n", + event.hostname.c_str(), rc); + } + + return (FAIL_MAKE_REQUEST); + +mtcHttpUtil_api_request_done: + + + if ( event.blocking == true ) + { + mtcHttpUtil_free_conn ( event ); + mtcHttpUtil_free_base ( event ); + + /** + * If tere is an authentication error then request a new token and + * return the error to the caller so that the request can be retried + **/ + if (( event.status == FAIL_AUTHENTICATION ) || + ( event.status == MTC_HTTP_UNAUTHORIZED )) + { + /* Find the host this handler instance is being run against */ + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + tokenUtil_new_token ( obj_ptr->tokenEvent, obj_ptr->my_hostname ); + mtcHttpUtil_free_conn ( obj_ptr->tokenEvent ); + mtcHttpUtil_free_base ( obj_ptr->tokenEvent ); + event.status = FAIL_AUTHENTICATION ; + } + } + + return (event.status); +} + + +/* *********************************************************************** + * + * Name : mtcHttpUtil_receive + * + * Description: Issues a non-blocking call to event_base_loop to receive + * from the connection for the specified libevent + * + * @param event is a reference to the callers libEvent struct + * to receive against + * + * @return RETRY if there is no data to receive on the open connection + * Otherwise the status of the command that was received. + * + * ************************************************************************/ + +int mtcHttpUtil_receive ( libEvent & event ) +{ + int rc = event_base_loop(event.base, EVLOOP_NONBLOCK) ; + switch ( rc ) + { + case FAIL: /* 1 - returns 1 if there was nothing to receive , MAY HAVE ALREADY BEEN RECEIVED */ + case PASS: /* 0 - returns 0 if there was a successful receive of something */ + { + // hlog1 ("%s receive O.K. (active:%d)\n", event.log_prefix.c_str(), event.active ); + + /* Check in-progress flag */ + if ( event.active == false ) + { + if ( event.status == RETRY ) + { + event.status = FAIL_RETRY ; + } + else + { + /* return the reported handler status */ + rc = event.status ; + } + /* the log_event is called in the mtcHttpUtil_handler */ + if (( event.request == SYSINV_UPDATE ) || + ( event.request == SYSINV_CONFIG_SHOW ) || + ( event.request == SYSINV_CONFIG_MODIFY ) || + ( event.request == KEYSTONE_TOKEN )) + { + ; + } + } + else + { + rc = RETRY ; + } + break ; + } + + /* event_base_loop returns -1 for some unhandled error in the backend */ + case -1: + { + event.active = false ; + elog ("%s Failed event_base_loop (-1)\n", event.log_prefix.c_str()); + rc = FAIL ; + break ; + } + default: + { + event.active = false ; + slog ("%s Failed event_base_loop - Unexpected Return (%d)\n", + event.log_prefix.c_str(), rc ); + rc = FAIL ; + break ; + } + } + return (rc); +} + +/* *********************************************************************** + * + * Name : mtcHttpUtil_get_length + * + * Description: Loads libEvent.response_len with the length of the + * input buffer so we can allocate enough memory to + * copy it into. + * + * Get the length of the json response. + * Deal with oversized messages. + * + * @param event is a reference to the callers libEvent struct + * where it inds the input buffer pointer + * + * @return integer value representing the length of the input buffer + * + * ************************************************************************/ + +int mtcHttpUtil_get_length ( libEvent & event ) +{ + event.response_len = evbuffer_get_length (event.req->input_buffer); + if ( event.response_len == 0 ) + { + hlog ("%s Request Failed - Zero Length Response\n", + event.log_prefix.c_str()); + event.status = FAIL_JSON_ZERO_LEN ; + } +// else if ( event.response_len > MAX_EVENT_LEN ) +// { +// elog ("%s Request Failed - Length Too Long (%d:%ld)\n", +// event.log_prefix.c_str(), MAX_EVENT_LEN, event.response_len ); +// +// event.status = FAIL_JSON_TOO_LONG ; +// } + return ( event.response_len ); +} + +/* Load the response string into the event struct */ +int mtcHttpUtil_get_response ( libEvent & event ) +{ + if ( mtcHttpUtil_get_length ( event ) ) + { + size_t real_len ; + + /* Get a stack buffer, zero it, copy to it and terminate it */ + char * stack_buf_ptr = (char*)malloc (event.response_len+1); + memset ( stack_buf_ptr, 0, event.response_len+1 ); + real_len = evbuffer_remove( event.req->input_buffer, stack_buf_ptr, + event.response_len); + + if ( real_len != event.response_len ) + { + wlog ("%s Length differs from removed length (%ld:%ld)\n", + event.log_prefix.c_str(), + event.response_len, + real_len ); + } + + /* Terminate the buffer , this is where the +1 above is required. + * Without it there is memory corruption reported by Linux */ + *(stack_buf_ptr+event.response_len) = '\0'; + + /* Store the response */ + event.response = stack_buf_ptr ; + + free (stack_buf_ptr); + } + return ( event.status ); +} + +void mtcHttpUtil_log_event ( libEvent & event ) +{ + msgSock_type * mtclogd_ptr = get_mtclogd_sockPtr (); + + string info = "" ; + string event_sig = daemon_get_cfg_ptr()->debug_event ; + + send_log_message ( get_mtclogd_sockPtr(), event.hostname.data(), &rest_api_filename[0], &event.req_str[0] ); + + if (( event.payload.length()) && + ((!string_contains(event.payload,"token")) && + (!string_contains(event.payload,"assword")))) + { + snprintf (&rest_api_log_str[0], MAX_API_LOG_LEN-1, + "%s [%5d] %s -> Payload : %s", pt(), getpid(), event.log_prefix.c_str(), event.payload.c_str()); + send_log_message ( mtclogd_ptr, event.hostname.data(), &rest_api_filename[0], &rest_api_log_str[0] ); + } + + /* Don't log update uptime and update task responses nor + * responses that have token or password in them */ + if ( (event.response.length()) && + (event.operation.compare(SYSINV_OPER__UPDATE_UPTIME)) && + (event.operation.compare(SYSINV_OPER__UPDATE_TASK)) && + (event.operation.compare(SYSINV_OPER__FORCE_TASK)) && + ((!string_contains(event.response,"token")) && + (!string_contains(event.response,"assword")))) + { + snprintf (&rest_api_log_str[0], MAX_API_LOG_LEN-1, + "%s [%5d] %s -> Response: %s", pt(), getpid(), event.log_prefix.c_str(), event.response.c_str()); + send_log_message ( mtclogd_ptr, event.hostname.data(), rest_api_filename, &rest_api_log_str[0] ); + } + + snprintf (&rest_api_log_str[0], MAX_API_LOG_LEN-1, + "%s [%5d] %s %s '%s' seq:%d -> Status : %d {execution time %ld.%06ld secs}\n", + pt(), getpid(), + event.hostname.c_str(), + event.service.c_str(), + event.operation.c_str(), + event.sequence, + event.http_status, + event.diff_time.secs, + event.diff_time.msecs ); + + if ( ( event.diff_time.secs > 2 ) || ( event.http_status != HTTP_OK ) ) + { + int len = strlen (rest_api_log_str) ; + snprintf (&rest_api_log_str[len-1], 20, " <---------"); + } + send_log_message ( mtclogd_ptr, event.hostname.data(), &rest_api_filename[0], &rest_api_log_str[0] ); +} + +void mtcHttpUtil_event_info ( libEvent & event ) +{ + ilog ("--- %s request to %s.%d Status:%d \n", + event.log_prefix.c_str(), + event.ip.c_str(), + event.port, + event.status); + ilog ("--- Address : %s\n", event.address.c_str()); + ilog ("--- Payload : %s\n", event.payload.c_str()); + ilog ("--- Response: %s\n", event.response.c_str()); + ilog ("--- TokenUrl: %s\n", event.token.url.c_str()); +} + +libEvent & nodeLinkClass::getEvent ( struct event_base * base_ptr) +{ + struct node * ptr = static_cast(NULL) ; + + /* check for empty list condition */ + if ( head == NULL ) + return (nullEvent) ; + + if ( base_ptr == NULL ) + return (nullEvent) ; + + if ( base_ptr == (struct event_base *)&tokenEvent ) + { + hlog1 ("%s Found libEvent Pointer (%p) tokenEvent (%p) Active : %s\n", + tokenEvent.log_prefix.c_str(), + base_ptr, &tokenEvent, + tokenEvent.active ? "Yes" : "No" ); + return (tokenEvent); + } + + if ( base_ptr == (struct event_base *)&smgrEvent ) + { + hlog1 ("%s Found libEvent Pointer (%p) smgrEvent (%p) Active : %s\n", + smgrEvent.log_prefix.c_str(), + base_ptr, &smgrEvent, + smgrEvent.active ? "Yes" : "No" ); + return (smgrEvent); + } + + if ( base_ptr == (struct event_base *)&sysinvEvent ) + { + hlog1 ("%s Found libEvent Pointer (%p) sysinvEvent (%p) Active : %s\n", + sysinvEvent.log_prefix.c_str(), + base_ptr, &sysinvEvent, + sysinvEvent.active ? "Yes" : "No" ); + return (sysinvEvent); + } + + /* Now search the node list */ + for ( ptr = head ; ptr != NULL ; ptr = ptr->next ) + { + if ( base_ptr == (struct event_base *)&ptr->thisReq ) + { + if ( ptr->thisReq.active == true ) + { + if ( workQueue_present ( ptr->thisReq ) == true ) + { + hlog2 ("%s found and is active\n", ptr->thisReq.log_prefix.c_str()); + return (ptr->thisReq) ; + } + else + { + slog ("%s is active but not in work queue\n", ptr->thisReq.log_prefix.c_str()); + ptr->thisReq.active = false ; + } + } + else + { + if ( workQueue_present ( ptr->thisReq ) == true ) + { + slog ("%s is not active ; removing from workQueue\n", ptr->thisReq.log_prefix.c_str() ); + workQueue_del_cmd ( ptr, ptr->thisReq.sequence ); + } + else + { + wlog ("%s is not active and not in workQueue\n", ptr->thisReq.log_prefix.c_str() ); + } + } + return (nullEvent) ; + } + + if ( ptr->next == NULL ) + break ; + } + + wlog ("libEvent for base pointer (%p) not found\n", base_ptr ); + return (nullEvent) ; +} + +/* HTTP Request Handler Dispatcher */ +void nodeLinkClass::mtcHttpUtil_handler ( struct evhttp_request *req, void *arg ) +{ + int rc = PASS ; + + req = req ; + + /* Find the host this handler instance is being run against */ + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + + /* Make sure we get a valid event to work on */ + libEvent & event = obj_ptr->getEvent ( (struct event_base *)arg ) ; + if (( event.request >= SERVICE_LAST ) || ( event.request == SERVICE_NONE )) + { + slog ("HTTP Event Lookup Failed for http base (%p) <------\n", arg); + return ; + } + + + /* Check the HTTP Status Code */ + event.status = mtcHttpUtil_status ( event ) ; + if ( event.status == HTTP_NOTFOUND ) + { + elog ("%s returned (Not-Found) (%d)\n", + event.log_prefix.c_str(), + event.status); + event.status = PASS ; + } + + // hlog ("%s Status:%d Req:%p\n", event.log_prefix.c_str(), event.status); + + else if (( event.status != PASS ) && ( ! req )) + { + elog ("%s Request Timeout (%d)\n", + event.log_prefix.c_str(), + event.timeout); + + event.status = FAIL_TIMEOUT ; + goto _handler_done ; + } + + else if ( event.status != PASS ) + { + goto _handler_done ; + } + + /* Delete commands don't have a response unless there is an error. + * Deal with this as a special case - + * Currently only Neutron uses the delete */ + if ( event.type == EVHTTP_REQ_DELETE ) + { + if ( mtcHttpUtil_get_length ( event ) != 0 ) + { + /* Preserve the incoming status over the get response */ + rc = event.status ; + mtcHttpUtil_get_response ( event ) ; + event.status = rc ; + } + if (event.status == FAIL_JSON_ZERO_LEN ) + event.status = PASS ; + } + else if ( mtcHttpUtil_get_response ( event ) != PASS ) + { + elog ("%s failed to get response\n", event.log_prefix.c_str()); + goto _handler_done ; + } + + if ( event.request == KEYSTONE_TOKEN ) + { + /* TODO: Deal with Failure */ + ilog ("CALLING TOKENUTIL_HANDLER !!!!\n"); + rc = tokenUtil_handler ( event ); + if ( rc ) + { + wlog ("%s tokenUtil_handler reported failure (%d)\n", event.hostname.c_str(), rc ); + } + } + else if (( event.request == SYSINV_UPDATE )|| + ( event.request == SYSINV_CONFIG_SHOW ) || + ( event.request == SYSINV_CONFIG_MODIFY )) + { + /* TODO: Deal with Failure */ + rc = mtcInvApi_handler ( event ); + if ( rc ) + { + wlog ("%s mtcInvApi_handler reported failure (%d)\n", event.hostname.c_str(), rc ); + } + } + else if (( event.request == VIM_HOST_DISABLED )|| + ( event.request == VIM_HOST_ENABLED ) || + ( event.request == VIM_HOST_OFFLINE ) || + ( event.request == VIM_HOST_FAILED ) || + ( event.request == VIM_DPORT_OFFLINE) || + ( event.request == VIM_DPORT_FAILED ) || + ( event.request == VIM_DPORT_CLEARED) || + ( event.request == VIM_DPORT_DEGRADED )) + { + rc = mtcVimApi_handler ( event ); + if ( rc ) + { + wlog ("%s mtcVimApi_handler reported failure (%d)\n", event.hostname.c_str(), rc ); + } + } + else + { + wlog ( "%s has unknown request id (%d)\n", + event.log_prefix.c_str(), + event.request ); + } + +_handler_done: + + event.active = false ; + + gettime ( event.done_time ); + timedelta ( event.send_time, event.done_time, event.diff_time ); + +// Redundant log - already logged in the work queue FSM +// if ( event.status ) +// { +// elog ( "%s Failed (rc:%d)\n", +// event.log_prefix.c_str(), +// event.status ); +// } + mtcHttpUtil_log_event ( event ); + + if ( event.blocking == false ) + { + // mtcHttpUtil_free_conn ( event ); + // mtcHttpUtil_free_base ( event ); + + /** + * If tere is an authentication error then request a new token and + * return the error to the caller so that the request can be retried + **/ + if (( event.status == FAIL_AUTHENTICATION ) || + ( event.status == MTC_HTTP_UNAUTHORIZED )) + { + /* Find the host this handler instance is being run against */ + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + tokenUtil_new_token ( obj_ptr->tokenEvent, obj_ptr->my_hostname ); + mtcHttpUtil_free_conn ( obj_ptr->tokenEvent ); + mtcHttpUtil_free_base ( obj_ptr->tokenEvent ); + event.status = FAIL_AUTHENTICATION ; + } + } +} + + +/* HTTP Handler Dispatcher - wrapper abstracted from nodeLinkClass */ +void mtcHttpUtil_Handler ( struct evhttp_request *req, void *arg ) +{ + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + obj_ptr->mtcHttpUtil_handler ( req , arg ); +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpUtil.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpUtil.h new file mode 100755 index 00000000..d802993b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcHttpUtil.h @@ -0,0 +1,87 @@ +#ifndef __INCLUDE_MTCHTTPUTIL_H__ +#define __INCLUDE_MTCHTTPUTIL_H__ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance ... + * + * libevent HTTP support utilities and control structure support header + */ + +#include /* for ... string */ +#include /* for ... http libevent client */ + +using namespace std; + +#include "httpUtil.h" + + +void mtcHttpUtil_init ( void ); + +int mtcHttpUtil_event_init ( libEvent * ptr , + string hostname, + string service, + string ip, + int port ); + +/** Maximum number of headers that can be added to an HTTP message. */ +#define MAX_HEADERS (10) + +/** Add payload to the HTTP message body. */ +int mtcHttpUtil_payload_add ( libEvent & event ); + +/** Add all headers in header table to the HTTP connection message. */ +int mtcHttpUtil_header_add ( libEvent * ptr, http_headers_type * hdrs_ptr ); + +/** Create an HTTP request. */ +int mtcHttpUtil_request_make ( libEvent * ptr, enum evhttp_cmd_type type, string path ); + +/** Open a connection to an HTTP server. */ +int mtcHttpUtil_connect_new ( libEvent & event ); + +/** Get a new HTTP request pointer. */ +int mtcHttpUtil_request_new ( libEvent & event, + void(*hdlr)(struct evhttp_request *, void *)); + +/** Common REST API Request Utility */ +int mtcHttpUtil_api_request ( libEvent & event ); + +/** Common REST API Request Utility */ +int mtcHttpUtil_request ( libEvent & event , bool block, + void(*hdlr)(struct evhttp_request *, void *)); + +/** Common REST API Receive Utility for non-blocking requests */ +int mtcHttpUtil_receive ( libEvent & event ); + +/** HTTP response status checker */ +int mtcHttpUtil_status ( libEvent & event ); + +/** Free the libEvent */ +void mtcHttpUtil_free_base ( libEvent & event ); + +/** Free the event lib connection */ +void mtcHttpUtil_free_conn ( libEvent & event ); + +/** TODO: FIXME: Get the payload string length. */ +string mtcHttpUtil_payload_len ( libEvent * ptr ); + +/** Get the length of the json response */ +int mtcHttpUtil_get_length ( libEvent & event ); + +/** Load the json response into the event struct */ +int mtcHttpUtil_get_response ( libEvent & event ); + +/** print event filtered event */ +void mtcHttpUtil_log_event ( libEvent & event ); + +void mtcHttpUtil_event_info ( libEvent & event ); + +const char * getHttpCmdType_str ( evhttp_cmd_type type ); + +#endif /* __INCLUDE_MTCHTTPUTIL_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcInvApi.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcInvApi.cpp new file mode 100755 index 00000000..1842eff9 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcInvApi.cpp @@ -0,0 +1,1652 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance + * Access to Inventory Database via REST API Interface. + * + */ + +/** This file implements the a set of mtcInvApi utilities that maintenance + * calls upon to set/get host information to/from the sysinv database. + * + * The APIs exposed from this file are + * + * mtcInvApi_read_inventory - Reads all the host inventory records from the + * sysinv database in a specified batch number. + * mtcInvApi_add_host - Adds a host to the sysinv database. + * mtcInvApi_load_host - Loads the inventory content for a specified host. + * mtcInvApi_update_task - Updates the task field of the specified host. + * mtcInvApi_update_uptime - Updates the uptime of the specified host. + * mtcInvApi_update_value - Updates any field of the specified host. + * mtcInvApi_update_state - Updates a maintenance state of specified host. + * mtcInvApi_update_states - Updates all maintenance states of specified host. + * mtcInvApi_force_states - Force updates all maintenance states of specified host. + * + * Each utility is paired with a private handler. + * + * mtcInvApi_get_handler - handles response for mtcInvApi_read_inventory + * mtcInvApi_add_Handler - handles response for mtcInvApi_add_host + * mtcInvApi_qry_handler - handles response for mtcInvApi_load_host + * mtcInvApi_handler - handles inventory specific response for all update utilities + * + * Warning: These calls cannot be nested. + * + **/ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "inv" + +#include "nodeBase.h" /* for ... Base Service Header */ +#include "nodeClass.h" /* for ... maintenance class nodeLinkClass */ +#include "nodeUtil.h" /* for ... Utility Service Header */ +#include "jsonUtil.h" /* for ... Json utilities */ +#include "mtcInvApi.h" /* this .. module header */ +#include "mtcKeyApi.h" /* for ... keystone service utilities */ +#include "mtcNodeHdlrs.h" /* for ... mtcTimer_handler ( .. ) */ + + +/*********************************************************************** + * + * Name : mtcInvApi_read_inventory + * + * Purpose : Issue an inventory GET request for a specified batch + * number of inventory elements. + * + */ +int mtcInvApi_read_inventory ( int batch ) +{ + char batch_str [10] ; + int rc = PASS ; + int count = 0 ; + int loops = 0 ; + + nodeLinkClass * obj_ptr = get_mtcInv_ptr (); + string hostname = obj_ptr->get_my_hostname(); + if ( rc != PASS ) + { + wlog ("Failed to get an authentication token ... requesting retry\n"); + return (RETRY); + } + + rc = mtcHttpUtil_event_init ( &obj_ptr->sysinvEvent, + obj_ptr->my_hostname, + "mtcInvApi_read_inventory", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + if ( rc ) + { + elog ("%s failed to allocate libEvent memory (%d)\n", hostname.c_str(), rc ); + return (rc); + } + + /* Manage batch range */ + if (( batch == 0 ) || ( batch > MTC_INV_BATCH_MAX )) + batch = MTC_INV_BATCH_MAX ; + + /* Add the batch integer to the request label */ + sprintf (&batch_str[0], "%d", batch ); + + obj_ptr->sysinvEvent.token.url = MTC_INV_LABEL ; + obj_ptr->sysinvEvent.token.url.append(MTC_INV_BATCH); + obj_ptr->sysinvEvent.token.url.append(&batch_str[0]); + + obj_ptr->sysinvEvent.request = SYSINV_GET ; + obj_ptr->sysinvEvent.operation = "get batch" ; + obj_ptr->sysinvEvent.uuid = obj_ptr->get_uuid (obj_ptr->my_hostname); + obj_ptr->sysinvEvent.information = batch_str ; + obj_ptr->sysinvEvent.blocking = true ; + + dlog ("Path:%s\n", obj_ptr->sysinvEvent.token.url.c_str()); + + /* The inventory server returns a path the the next element. + * This code manages the setting the entity path that allows + * a walk through the inventory on subsequent calls if the + * batch number is less that the provisioned inventory number + */ + + do + { + /* New request, no entiry path from previous request present. */ + if (obj_ptr->sysinvEvent.entity_path.length() == 0 ) + obj_ptr->sysinvEvent.entity_path = obj_ptr->sysinvEvent.token.url.data() ; + + /* Inventory server did not specify a 'next' path meaning we are + * at the end of inventory so start at the beginning for this + * request + */ + else if ( obj_ptr->sysinvEvent.entity_path_next.length() == 0 ) + obj_ptr->sysinvEvent.entity_path = obj_ptr->sysinvEvent.token.url.data() ; + + /* Get the next batch using same batch size, of inventory + * using the inventory server's supplied 'next' entity path. */ + else + obj_ptr->sysinvEvent.entity_path = obj_ptr->sysinvEvent.entity_path_next ; + + /* load constructed path */ + dlog ("Element Path:%s\n", obj_ptr->sysinvEvent.entity_path.c_str()); + + obj_ptr->sysinvEvent.token.url = obj_ptr->sysinvEvent.entity_path ; + + /* Make the inventory request and return that result */ + obj_ptr->sysinvEvent.status = PASS ; + + do + { + rc = mtcHttpUtil_api_request ( obj_ptr->sysinvEvent ) ; + if ( rc != PASS ) + { + count++ ; + wlog ("failed Sysinv Database Get request (%d) ... retrying (%d)\n", + rc , count ); + } + } while ( ( rc != PASS ) && ( count < obj_ptr->api_retries ) ) ; + + if ( rc ) + { + elog ("%s Sysinv Database Get Failed (%d) (cnt:%d)\n", + obj_ptr->sysinvEvent.entity_path.c_str(), rc , count ); + return (FAIL); + } + + loops++ ; + } while ( obj_ptr->sysinvEvent.entity_path_next.length()); + dlog3 ("Inventory fetched %d hosts in %d iteration using a batch of %d\n", + obj_ptr->sysinvEvent.count, loops, batch ); + + return ( obj_ptr->sysinvEvent.status ); +} + + +/* ****************************************************************** + * + * Name: mtcInvApi_add_host + * + * Purpose: Add a host to the database + * + * Note: Presently really only used to add the first controller. + * + *********************************************************************/ +int mtcInvApi_add_host ( node_inv_type & info ) +{ + nodeLinkClass * obj_ptr = get_mtcInv_ptr(); + string hostname = obj_ptr->get_my_hostname(); + int rc = mtcHttpUtil_event_init ( &obj_ptr->sysinvEvent, + obj_ptr->my_hostname, + "mtcInvApi_add_host", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + if ( rc ) + { + elog ("%s failed to allocate libEvent memory (%d)\n", hostname.c_str(), rc ); + return (rc); + } + obj_ptr->sysinvEvent.inv_info = info ; + obj_ptr->sysinvEvent.request = SYSINV_ADD ; + obj_ptr->sysinvEvent.operation = "add host" ; + obj_ptr->sysinvEvent.blocking = true ; + + dlog ("%s ip:%s mac:%s\n", info.name.c_str(), + info.ip.c_str(), + info.mac.c_str()); + + return (mtcHttpUtil_api_request ( obj_ptr->sysinvEvent )); +} + + +/* ****************************************************************** + * + * Name: mtcInvApi_load_host + * + * Purpose: Load all the data for a specified host from the + * Sysinv database + * + *********************************************************************/ +int nodeLinkClass::mtcInvApi_load_host ( string & hostname, node_inv_type & info ) +{ + string path = "" ; + GET_NODE_PTR(hostname); + int rc = mtcHttpUtil_event_init ( &node_ptr->sysinvEvent, + node_ptr->hostname, + "mtcInvApi_load_host", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + if ( rc ) + { + elog ("%s failed to allocate libEvent memory (%d)\n", hostname.c_str(), rc ); + return (rc); + } + + /* Set the host context */ + node_ptr->sysinvEvent.hostname = hostname ; + node_ptr->sysinvEvent.request = SYSINV_HOST_QUERY ; /* TODO: change to _HOST_LOAD */ + node_ptr->sysinvEvent.operation = SYSINV_OPER__LOAD_HOST ; + node_ptr->sysinvEvent.uuid = node_ptr->uuid; + node_ptr->sysinvEvent.information = hostname ; + node_ptr->sysinvEvent.blocking = true ; + + rc = mtcHttpUtil_api_request ( node_ptr->sysinvEvent ) ; + if ( node_ptr->sysinvEvent.status == HTTP_NOTFOUND ) + { + wlog ("%s not found in database\n", hostname.c_str()); + } + else if ( rc ) + { + wlog ("%s failed to send request to Sysinv Database (rc:%d)\n", + node_ptr->sysinvEvent.hostname.c_str(), rc ); + } + else if ( node_ptr->sysinvEvent.status == PASS ) + { + dlog ("%s found in database\n", hostname.c_str() ); + + /* check for board management region mode and issue + * a retry if its not set properly */ + + info = node_ptr->sysinvEvent.inv_info ; + } + return (rc); +} + + + +/***************************************************************************** + * + * Name : mtcInvApi_update_task, mtcInvApi_force_task, mtcInvApi_update_task_now + * + * Purpose : Write the specified task and current uptime for the specified host + * to the inventory database. + * + * Note : The 'force' version is a critical command that has retries. + * Failure of the force command will fail an action handler. + * + * Address : /v1/ihosts/63975e14-60bc-4ecd-b5c5-f2771676c0a2 + * Payload : [{"path":"/task","value":"...","op":"replace"},{"path":"/uptime","value":"123336","op":"replace"}] + * Response: {"iports": [{"href": "http://192.168.204.2/v1/ihosts/63975e14-60bc-4ecd-b5c5-f2771676c0a2/iports" ... + * + *****************************************************************************/ + +int nodeLinkClass::mtcInvApi_update_task ( struct nodeLinkClass::node * node_ptr, + const char * task_str_ptr, int one ) +{ + char buffer[MAX_TASK_STR_LEN+1] ; + snprintf ( buffer, MAX_TASK_STR_LEN, task_str_ptr, one ); + return (mtcInvApi_update_task ( node_ptr, buffer)); +} + +int nodeLinkClass::mtcInvApi_update_task ( struct nodeLinkClass::node * node_ptr, + const char * task_str_ptr, int one, int two ) +{ + char buffer[MAX_TASK_STR_LEN+1] ; + snprintf ( buffer, MAX_TASK_STR_LEN, task_str_ptr, one, two); + return (mtcInvApi_update_task ( node_ptr, buffer)); +} + +int nodeLinkClass::mtcInvApi_update_task ( struct nodeLinkClass::node * node_ptr, + string task ) +{ + char str [10] ; + CHK_NODE_PTR (node_ptr); + mtcHttpUtil_event_init ( &node_ptr->httpReq, + node_ptr->hostname, + "mtcInvApi_update_task", + hostUtil_getServiceIp ( SERVICE_SYSINV ), + hostUtil_getServicePort ( SERVICE_SYSINV )); + + /* Set the host context */ + node_ptr->httpReq.hostname = node_ptr->hostname ; + node_ptr->httpReq.uuid = node_ptr->uuid; + node_ptr->httpReq.request = SYSINV_UPDATE ; + node_ptr->httpReq.operation = SYSINV_OPER__UPDATE_TASK ; + node_ptr->httpReq.max_retries = 0 ; + node_ptr->httpReq.cur_retries = 0 ; + node_ptr->httpReq.noncritical = true ; + node_ptr->httpReq.information = task ; + node_ptr->httpReq.timeout = get_mtcInv_ptr()->sysinv_noncrit_timeout ; + + /* Store the immediate task state with the node realiing that the value + * is not stored in the database yet but it reflective of where the code + * execution is. */ + node_ptr->task = task ; + node_ptr->httpReq.information = task ; + + unsigned int uptime = get_uptime ( node_ptr->hostname ) ; + sprintf ( str , "%u", uptime ); + + node_ptr->httpReq.payload = "[" ; + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_TASK); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( task ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"},"); + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_UPTIME); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( str ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"}]"); + + if ( task.empty() ) + { + ilog ("%s task clear (seq:%d)\n", node_ptr->hostname.c_str(), node_ptr->oper_sequence); + } + else + { + ilog ("%s Task: %s (seq:%d)\n", node_ptr->hostname.c_str(), task.c_str(), node_ptr->oper_sequence ); + } + return(this->workQueue_enqueue( node_ptr->httpReq)); +} + +int nodeLinkClass::mtcInvApi_update_task ( string hostname, + string task ) +{ + GET_NODE_PTR(hostname); /* allocates nodeLinkClass */ + return (mtcInvApi_update_task ( node_ptr, task )); +} + + +int nodeLinkClass::mtcInvApi_update_task_now ( struct nodeLinkClass::node * node_ptr, + string task ) +{ + char str [10] ; + CHK_NODE_PTR(node_ptr); + mtcHttpUtil_event_init ( &this->sysinvEvent, + node_ptr->hostname, + "mtcInvApi_update_task_now", + hostUtil_getServiceIp ( SERVICE_SYSINV ), + hostUtil_getServicePort ( SERVICE_SYSINV )); + + /* Set the host context */ + this->sysinvEvent.hostname = node_ptr->hostname ; + this->sysinvEvent.uuid = node_ptr->uuid; + this->sysinvEvent.request = SYSINV_UPDATE ; + this->sysinvEvent.operation = SYSINV_OPER__UPDATE_TASK ; + this->sysinvEvent.max_retries = 0 ; + this->sysinvEvent.cur_retries = 0 ; + this->sysinvEvent.noncritical = true ; + this->sysinvEvent.information = task ; + this->sysinvEvent.timeout = get_mtcInv_ptr()->sysinv_noncrit_timeout ; + this->sysinvEvent.blocking = true ; + + /* Store the immediate task state with the node realiing that the value + * is not stored in the database yet but it reflective of where the code + * execution is. */ + node_ptr->task = task ; + this->sysinvEvent.information = task ; + + unsigned int uptime = get_uptime ( node_ptr->hostname ) ; + sprintf ( str , "%u", uptime ); + + this->sysinvEvent.payload = "[" ; + this->sysinvEvent.payload.append ("{\"path\":\"/") ; + this->sysinvEvent.payload.append (MTC_JSON_INV_TASK); + this->sysinvEvent.payload.append ("\",\"value\":\""); + this->sysinvEvent.payload.append ( task ); + this->sysinvEvent.payload.append ( "\",\"op\":\"replace\"},"); + this->sysinvEvent.payload.append ("{\"path\":\"/") ; + this->sysinvEvent.payload.append (MTC_JSON_INV_UPTIME); + this->sysinvEvent.payload.append ("\",\"value\":\""); + this->sysinvEvent.payload.append ( str ); + this->sysinvEvent.payload.append ( "\",\"op\":\"replace\"}]"); + + if ( task.empty() ) + { + ilog ("%s task clear (seq:%d)\n", node_ptr->hostname.c_str(), node_ptr->oper_sequence); + } + else + { + ilog ("%s Task: %s (seq:%d)\n", node_ptr->hostname.c_str(), task.c_str(), node_ptr->oper_sequence ); + } + return (mtcHttpUtil_api_request ( this->sysinvEvent )) ; +} + +int nodeLinkClass::mtcInvApi_update_task_now ( string hostname, + string task ) +{ + GET_NODE_PTR(hostname); /* allocates nodeLinkClass */ + return (mtcInvApi_update_task_now ( node_ptr, task )); +} + + +int nodeLinkClass::mtcInvApi_force_task ( struct nodeLinkClass::node * node_ptr, + string task ) +{ + char str [10] ; + + CHK_NODE_PTR(node_ptr); + mtcHttpUtil_event_init ( &node_ptr->httpReq, + node_ptr->hostname, + "mtcInvApi_force_task", + hostUtil_getServiceIp ( SERVICE_SYSINV ), + hostUtil_getServicePort ( SERVICE_SYSINV )); + + /* Set the host context */ + node_ptr->httpReq.hostname = node_ptr->hostname ; + node_ptr->httpReq.uuid = node_ptr->uuid; + node_ptr->httpReq.request = SYSINV_UPDATE ; + node_ptr->httpReq.operation = SYSINV_OPER__FORCE_TASK ; + node_ptr->httpReq.max_retries = 3 ; + node_ptr->httpReq.cur_retries = 0 ; + node_ptr->httpReq.information = task ; + node_ptr->httpReq.timeout = get_mtcInv_ptr()->sysinv_timeout ; + + if ( task.empty() ) + { + ilog ("%s task clear (seq:%d) (was:%s)\n", + node_ptr->hostname.c_str(), + node_ptr->oper_sequence, + node_ptr->task.empty() ? "empty" : node_ptr->task.c_str()); + } + else + { + ilog ("%s Task: %s (seq:%d)\n", + node_ptr->hostname.c_str(), + task.c_str(), + node_ptr->oper_sequence ); + } + + /* Store the immediate task state with the node realiing that the value + * is not stored in the database yet but it reflective of where the code + * execution is. */ + node_ptr->task = task ; + node_ptr->httpReq.information = task ; + + unsigned int uptime = get_uptime ( node_ptr->hostname ) ; + sprintf ( str , "%u", uptime ); + + node_ptr->httpReq.payload = "[" ; + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_TASK); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( task ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"},"); + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_UPTIME); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( str ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"}]"); + + return(this->workQueue_enqueue( node_ptr->httpReq)); +} + +int nodeLinkClass::mtcInvApi_force_task ( string hostname, + string task ) +{ + GET_NODE_PTR(hostname); /* allocates nodeLinkClass */ + return (mtcInvApi_force_task (node_ptr, task )); +} + +/***************************************************************************** + * + * Name : mtcInvApi_update_value + * + * Purpose : Write the specified value to the specified key for the specified + * host to the inventory database. + * + *****************************************************************************/ +int nodeLinkClass::mtcInvApi_update_value ( struct nodeLinkClass::node * node_ptr, + string key, + string value ) +{ + CHK_NODE_PTR(node_ptr); + int rc = mtcHttpUtil_event_init ( &node_ptr->httpReq, + node_ptr->hostname, + "mtcInvApi_update_value", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + if ( rc ) + { + elog ("%s failed to allocate libEvent memory (%d)\n", node_ptr->hostname.c_str(), rc ); + return (rc); + } + + rc = update_key_value ( node_ptr->hostname, key , value ); + if ( rc ) + { + slog ("%s failed to update '%s' to '%s' internally (%d)\n", + node_ptr->hostname.c_str(), + key.c_str(), + value.c_str(), + rc ); + + return (rc); + } + /* Set the host context */ + node_ptr->httpReq.hostname = node_ptr->hostname ; + node_ptr->httpReq.uuid = node_ptr->uuid; + node_ptr->httpReq.request = SYSINV_UPDATE ; + node_ptr->httpReq.operation = SYSINV_OPER__UPDATE_VALUE ; + node_ptr->httpReq.key = key ; + node_ptr->httpReq.value = value ; + node_ptr->httpReq.max_retries = 3 ; + node_ptr->httpReq.cur_retries = 0 ; + node_ptr->httpReq.timeout = get_mtcInv_ptr()->sysinv_timeout ; + + node_ptr->httpReq.information = key ; + node_ptr->httpReq.information.append(":") ; + node_ptr->httpReq.information.append(value) ; + + node_ptr->httpReq.payload = "[" ; + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (key.data()); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append (value.data()); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"}]"); + + return(this->workQueue_enqueue( node_ptr->httpReq)); +} + +int nodeLinkClass::mtcInvApi_update_value ( string hostname, + string key, + string value ) +{ + GET_NODE_PTR(hostname); + return (mtcInvApi_update_value ( node_ptr, key, value )); +} + +/***************************************************************************** + * + * Name : mtcInvApi_update_uptime + * + * Purpose : Write a hosts uptime to the inventory database. + * + *****************************************************************************/ +int nodeLinkClass::mtcInvApi_update_uptime ( struct nodeLinkClass::node * node_ptr, + unsigned int uptime ) +{ + char str [10] ; + CHK_NODE_PTR(node_ptr); + int rc = mtcHttpUtil_event_init ( &node_ptr->httpReq, + node_ptr->hostname, + "mtcInvApi_update_uptime", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + if ( rc ) + { + elog ("%s failed to allocate libEvent memory (%d)\n", node_ptr->hostname.c_str(), rc ); + return (rc); + } + + /* Set the host context */ + node_ptr->httpReq.hostname = node_ptr->hostname ; + node_ptr->httpReq.uuid = node_ptr->uuid; + node_ptr->httpReq.request = SYSINV_UPDATE ; + node_ptr->httpReq.operation = SYSINV_OPER__UPDATE_UPTIME ; + node_ptr->httpReq.max_retries = 0 ; + node_ptr->httpReq.cur_retries = 0 ; + node_ptr->httpReq.noncritical = true ; + node_ptr->httpReq.timeout = get_mtcInv_ptr()->sysinv_noncrit_timeout ; + + sprintf ( str , "%d", uptime ); + + /* TODO: remove me, the str should be fine */ + string uptime_str = str ; + + node_ptr->httpReq.information = uptime_str ; + + /* Sent uptime update request. + * But exit this iteration if we get an error as we + * don't want to stall mtce for all hosts on such a + * simple operation */ + + node_ptr->httpReq.payload = "[" ; + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_UPTIME); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( uptime_str ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"}]"); + rc = this->workQueue_enqueue ( node_ptr->httpReq); + set_uptime_refresh_ctr ( node_ptr->hostname, 0 ) ; + return (rc) ; +} + +int nodeLinkClass::mtcInvApi_update_uptime ( string hostname, + unsigned int uptime ) +{ + GET_NODE_PTR(hostname); + return (mtcInvApi_update_uptime ( node_ptr, uptime )); +} + +/***************************************************************************** + * + * Name : mtcInvApi_force_states + * + * Purpose : Write all 3 maintenance states to the inv database. + * + *****************************************************************************/ +int nodeLinkClass::mtcInvApi_force_states ( struct nodeLinkClass::node * node_ptr, + string admin, + string oper, + string avail ) +{ + CHK_NODE_PTR(node_ptr); + int rc = mtcHttpUtil_event_init ( &node_ptr->httpReq, + node_ptr->hostname, + "mtcInvApi_force_states", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + if ( rc ) + { + elog ("%s failed to allocate libEvent memory (%d)\n", node_ptr->hostname.c_str(), rc ); + return (rc); + } + + /* Set the host context */ + node_ptr->httpReq.hostname = node_ptr->hostname ; + node_ptr->httpReq.uuid = node_ptr->uuid; + node_ptr->httpReq.request = SYSINV_UPDATE ; + node_ptr->httpReq.operation = SYSINV_OPER__FORCE_STATES ; + node_ptr->httpReq.max_retries = 3 ; + node_ptr->httpReq.cur_retries = 0 ; + node_ptr->httpReq.timeout = get_mtcInv_ptr()->sysinv_timeout ; + + node_ptr->httpReq.information = admin ; + node_ptr->httpReq.information.append("-") ; + node_ptr->httpReq.information.append(oper) ; + node_ptr->httpReq.information.append("-") ; + node_ptr->httpReq.information.append(avail) ; + + node_ptr->httpReq.payload = "[" ; + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_ADMIN); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( admin ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"},"); + + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_OPER); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( oper ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"},"); + + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_AVAIL); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( avail ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"}]"); + + rc = this->workQueue_enqueue ( node_ptr->httpReq); + return (rc); +} + +int nodeLinkClass::mtcInvApi_force_states ( string hostname, + string admin, + string oper, + string avail ) +{ + GET_NODE_PTR(hostname); /* allocates nodeLinkClass */ + return (mtcInvApi_force_states (node_ptr, admin, oper, avail )); + +} + +/***************************************************************************** + * + * Name : mtcInvApi_subf_states + * + * Purpose : Write all 5 maintenance states to the inv database. + * + *****************************************************************************/ +int nodeLinkClass::mtcInvApi_subf_states ( struct nodeLinkClass::node * node_ptr, + string oper_subf, + string avail_subf ) +{ + CHK_NODE_PTR(node_ptr); + int rc = mtcHttpUtil_event_init ( &node_ptr->httpReq, + node_ptr->hostname, + "mtcInvApi_subf_states", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + if ( rc ) + { + elog ("%s failed to allocate libEvent memory (%d)\n", node_ptr->hostname.c_str(), rc ); + return (rc); + } + + oper_subf_state_change ( node_ptr->hostname, oper_subf ); + avail_subf_status_change ( node_ptr->hostname, avail_subf ); + + /* Set the host context */ + node_ptr->httpReq.hostname = node_ptr->hostname ; + node_ptr->httpReq.uuid = node_ptr->uuid; + node_ptr->httpReq.request = SYSINV_UPDATE ; + node_ptr->httpReq.operation = SYSINV_OPER__FORCE_STATES ; + node_ptr->httpReq.max_retries = 3 ; + node_ptr->httpReq.cur_retries = 0 ; + node_ptr->httpReq.timeout = get_mtcInv_ptr()->sysinv_timeout ; + + node_ptr->httpReq.information = oper_subf ; + node_ptr->httpReq.information.append("-") ; + node_ptr->httpReq.information.append(avail_subf); + + node_ptr->httpReq.payload = "[" ; + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_OPER_SUBF); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( oper_subf ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"},"); + + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_AVAIL_SUBF); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( avail_subf ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"}]"); + + rc = this->workQueue_enqueue ( node_ptr->httpReq); + return (rc); +} + +int nodeLinkClass::mtcInvApi_subf_states ( string hostname, + string oper_subf, + string avail_subf ) +{ + GET_NODE_PTR(hostname); + return ( mtcInvApi_subf_states ( node_ptr, oper_subf, avail_subf )); +} + +/***************************************************************************** + * + * Name : mtcInvApi_update_states + * + * Purpose : Write new values for the admin, oper and avail states to the + * inventory database. + * + *****************************************************************************/ +int nodeLinkClass::mtcInvApi_update_states ( struct nodeLinkClass::node * node_ptr, + string admin, + string oper, + string avail ) +{ + int changes = 0 ; + CHK_NODE_PTR(node_ptr); + int rc = mtcHttpUtil_event_init ( &node_ptr->httpReq, + node_ptr->hostname, + "mtcInvApi_update_states", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + if ( rc ) + { + elog ("%s failed to allocate libEvent memory (%d)\n", node_ptr->hostname.c_str(), rc ); + return (rc); + } + + /* Set the host context */ + node_ptr->httpReq.hostname = node_ptr->hostname ; + node_ptr->httpReq.uuid = node_ptr->uuid; + node_ptr->httpReq.request = SYSINV_UPDATE ; + node_ptr->httpReq.operation = SYSINV_OPER__UPDATE_STATES ; + node_ptr->httpReq.max_retries = 3 ; + node_ptr->httpReq.cur_retries = 0 ; + node_ptr->httpReq.timeout = get_mtcInv_ptr()->sysinv_timeout ; + + node_ptr->httpReq.payload = "[" ; + if ( ! admin.empty() ) + { + admin_state_change ( node_ptr->hostname, admin ); + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_ADMIN); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( admin ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"},"); + changes++ ; + } + else + admin = " " ; + + if ( ! oper.empty() ) + { + oper_state_change ( node_ptr->hostname, oper ); + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_OPER); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( oper ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"},"); + changes++ ; + } + else + oper = " " ; + + if ( ! avail.empty() ) + { + avail_status_change ( node_ptr->hostname, avail ); + node_ptr->httpReq.payload.append ("{\"path\":\"/") ; + node_ptr->httpReq.payload.append (MTC_JSON_INV_AVAIL); + node_ptr->httpReq.payload.append ("\",\"value\":\""); + node_ptr->httpReq.payload.append ( avail ); + node_ptr->httpReq.payload.append ( "\",\"op\":\"replace\"},"); + changes++ ; + } + else + avail = " " ; + + if ( changes ) + { + node_ptr->httpReq.information = admin ; + node_ptr->httpReq.information.append("-") ; + node_ptr->httpReq.information.append(oper) ; + node_ptr->httpReq.information.append("-") ; + node_ptr->httpReq.information.append(avail) ; + + /* remove the last "," and complete the string */ + int len = node_ptr->httpReq.payload.length(); + node_ptr->httpReq.payload.erase(len-1,1); + node_ptr->httpReq.payload.append ( "]"); + + ilog ("%s %s-%s-%s (seq:%d)\n", node_ptr->hostname.c_str(), + admin.c_str(), + oper.c_str(), + avail.c_str(), + node_ptr->oper_sequence); + + rc = this->workQueue_enqueue ( node_ptr->httpReq); + } + else + { + dlog ("%s -> %s-%s-%s\n", node_ptr->hostname.c_str(), + admin.c_str(), + oper.c_str(), + avail.c_str()); + } + return (rc); +} + +int nodeLinkClass::mtcInvApi_update_states ( string hostname, + string admin, + string oper, + string avail ) +{ + GET_NODE_PTR(hostname); + return (mtcInvApi_update_states ( node_ptr, admin, oper, avail )); +} + +/***************************************************************************** + * + * Name : mtcInvApi_update_states_now + * + * Purpose : Write new values for the admin, oper and avail states to the + * inventory database. + * + *****************************************************************************/ +int nodeLinkClass::mtcInvApi_update_states_now ( struct nodeLinkClass::node * node_ptr, + string admin, + string oper, + string avail, + string oper_subf, + string avail_subf) +{ + int changes = 0 ; + CHK_NODE_PTR(node_ptr); + int rc = mtcHttpUtil_event_init ( &this->sysinvEvent, + node_ptr->hostname, + "mtcInvApi_update_states_now", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + if ( rc ) + { + elog ("%s failed to allocate libEvent memory (%d)\n", node_ptr->hostname.c_str(), rc ); + return (rc); + } + + /* Set the host context */ + this->sysinvEvent.hostname = node_ptr->hostname ; + this->sysinvEvent.uuid = node_ptr->uuid; + this->sysinvEvent.request = SYSINV_UPDATE ; + this->sysinvEvent.operation = SYSINV_OPER__UPDATE_STATES ; + this->sysinvEvent.max_retries = 3 ; + this->sysinvEvent.cur_retries = 0 ; + this->sysinvEvent.timeout = get_mtcInv_ptr()->sysinv_timeout ; + + this->sysinvEvent.payload = "[" ; + if ( ! admin.empty() ) + { + admin_state_change ( node_ptr->hostname, admin ); + this->sysinvEvent.payload.append ("{\"path\":\"/") ; + this->sysinvEvent.payload.append (MTC_JSON_INV_ADMIN); + this->sysinvEvent.payload.append ("\",\"value\":\""); + this->sysinvEvent.payload.append ( admin ); + this->sysinvEvent.payload.append ( "\",\"op\":\"replace\"},"); + changes++ ; + } + else + admin = " " ; + + if ( ! oper.empty() ) + { + oper_state_change ( node_ptr->hostname, oper ); + this->sysinvEvent.payload.append ("{\"path\":\"/") ; + this->sysinvEvent.payload.append (MTC_JSON_INV_OPER); + this->sysinvEvent.payload.append ("\",\"value\":\""); + this->sysinvEvent.payload.append ( oper ); + this->sysinvEvent.payload.append ( "\",\"op\":\"replace\"},"); + changes++ ; + } + else + oper = " " ; + + if ( ! avail.empty() ) + { + avail_status_change ( node_ptr->hostname, avail ); + this->sysinvEvent.payload.append ("{\"path\":\"/") ; + this->sysinvEvent.payload.append (MTC_JSON_INV_AVAIL); + this->sysinvEvent.payload.append ("\",\"value\":\""); + this->sysinvEvent.payload.append ( avail ); + this->sysinvEvent.payload.append ( "\",\"op\":\"replace\"},"); + changes++ ; + } + else + avail = " " ; + + if ( CPE_SYSTEM ) + { + if ( ! oper_subf.empty() ) + { + oper_subf_state_change ( node_ptr->hostname, oper_subf ); + this->sysinvEvent.payload.append ("{\"path\":\"/") ; + this->sysinvEvent.payload.append (MTC_JSON_INV_OPER_SUBF); + this->sysinvEvent.payload.append ("\",\"value\":\""); + this->sysinvEvent.payload.append ( oper_subf ); + this->sysinvEvent.payload.append ( "\",\"op\":\"replace\"},"); + changes++ ; + } + else + oper_subf = " " ; + + if ( ! avail_subf.empty() ) + { + avail_subf_status_change ( node_ptr->hostname, avail_subf ); + this->sysinvEvent.payload.append ("{\"path\":\"/") ; + this->sysinvEvent.payload.append (MTC_JSON_INV_AVAIL_SUBF); + this->sysinvEvent.payload.append ("\",\"value\":\""); + this->sysinvEvent.payload.append ( avail_subf ); + this->sysinvEvent.payload.append ( "\",\"op\":\"replace\"},"); + changes++ ; + } + else + avail_subf = "" ; + } + + if ( changes ) + { + this->sysinvEvent.information = admin ; + this->sysinvEvent.information.append("-") ; + this->sysinvEvent.information.append(oper) ; + this->sysinvEvent.information.append("-") ; + this->sysinvEvent.information.append(avail) ; + + /* remove the last "," and complete the string */ + int len = this->sysinvEvent.payload.length(); + this->sysinvEvent.payload.erase(len-1,1); + this->sysinvEvent.payload.append ( "]"); + + if ( CPE_SYSTEM ) + { + ilog ("%s %s-%s-%s %s-%s\n", + node_ptr->hostname.c_str(), + admin.c_str(), + oper.c_str(), + avail.c_str(), + oper_subf.c_str(), + avail_subf.c_str()); + } + else + { + ilog ("%s %s-%s-%s\n", + node_ptr->hostname.c_str(), + admin.c_str(), + oper.c_str(), + avail.c_str()); + } + + this->sysinvEvent.blocking = true ; + rc = mtcHttpUtil_api_request ( this->sysinvEvent ) ; + } + else + { + dlog ("%s -> %s-%s-%s\n", node_ptr->hostname.c_str(), + admin.c_str(), + oper.c_str(), + avail.c_str()); + } + return (rc); +} + +int nodeLinkClass::mtcInvApi_update_states_now ( string hostname, + string admin, + string oper, + string avail, + string oper_subf, + string avail_subf) +{ + GET_NODE_PTR(hostname); + return ( mtcInvApi_update_states_now ( node_ptr, admin, oper, avail, oper_subf, avail_subf )); +} + +/***************************************************************************** + * + * Name : mtcInvApi_update_state + * + * Purpose : Write a new mtce state value to the inventory database. + * + *****************************************************************************/ +int nodeLinkClass::mtcInvApi_update_state ( struct nodeLinkClass::node * node_ptr, + string state, + string value ) +{ + CHK_NODE_PTR(node_ptr); + int rc = mtcHttpUtil_event_init ( &node_ptr->httpReq, + node_ptr->hostname, + "mtcInvApi_update_state", + hostUtil_getServiceIp (SERVICE_SYSINV), + hostUtil_getServicePort(SERVICE_SYSINV)); + if ( rc ) + { + elog ("%s failed to allocate libEvent memory (%d)\n", node_ptr->hostname.c_str(), rc ); + return (rc); + } + + if ( !state.compare(MTC_JSON_INV_ADMIN)) + admin_state_change ( node_ptr->hostname, value ); + else if ( !state.compare(MTC_JSON_INV_OPER)) + oper_state_change ( node_ptr->hostname, value ); + else if ( !state.compare(MTC_JSON_INV_AVAIL)) + avail_status_change ( node_ptr->hostname, value ); + + else if ( !state.compare(MTC_JSON_INV_OPER_SUBF)) + oper_subf_state_change ( node_ptr->hostname, value ); + else if ( !state.compare(MTC_JSON_INV_AVAIL_SUBF)) + avail_subf_status_change ( node_ptr->hostname, value ); + + /* Set the host context */ + node_ptr->httpReq.hostname = node_ptr->hostname ; + node_ptr->httpReq.uuid = node_ptr->uuid; + node_ptr->httpReq.request = SYSINV_UPDATE ; + node_ptr->httpReq.operation = SYSINV_OPER__UPDATE_STATE ; + node_ptr->httpReq.key = state ; + node_ptr->httpReq.value = value ; + node_ptr->httpReq.max_retries = 3 ; + node_ptr->httpReq.cur_retries = 0 ; + node_ptr->httpReq.timeout = get_mtcInv_ptr()->sysinv_timeout ; + + node_ptr->httpReq.information = state ; + node_ptr->httpReq.information.append(":") ; + node_ptr->httpReq.information.append(value) ; + + if (( !state.compare(MTC_JSON_INV_AVAIL_SUBF) )|| + ( !state.compare(MTC_JSON_INV_OPER_SUBF) )) + { + ilog ("%s-compute %s (seq:%d)\n", node_ptr->hostname.c_str(), value.c_str(), node_ptr->oper_sequence); + } + else + { + ilog ("%s %s (seq:%d)\n", node_ptr->hostname.c_str(), value.c_str(), node_ptr->oper_sequence); + } + + node_ptr->httpReq.payload = "[{\"path\":\"/" ; + node_ptr->httpReq.payload.append ( state ); + node_ptr->httpReq.payload.append ( "\", \"value\": \""); + node_ptr->httpReq.payload.append ( value ); + node_ptr->httpReq.payload.append ( "\", \"op\": \"replace\"}]"); + return (this->workQueue_enqueue( node_ptr->httpReq)); +} + +int nodeLinkClass::mtcInvApi_update_state ( string hostname, + string state, + string value ) +{ + GET_NODE_PTR(hostname); + return (mtcInvApi_update_state ( node_ptr, state, value )); +} + + +/***************************************************************************** + * + * Name : mtcInvApi_cfg_show + * + * Purpose : Issue a configuration show command to sysinv + * + * Type : GET + * Address : /v1/iuser + * Payload : none + * Response: {"iusers": + * [ + * {"recordtype": "reconfig", "links": + * [ + * {"href": "http://192.168.204.2:6385/v1/iusers/286a0793-5d15-473d-a459-00c2bfc369cb", "rel": "self"}, + * {"href": "http://192.168.204.2:6385/iusers/286a0793-5d15-473d-a459-00c2bfc369cb", "rel": "bookmark"} + * ] + * "created_at" : "2014-09-30T14:42:16.704390+00:00", + * "updated_at" : "2014-09-30T20:41:07.250815+00:00", + * "root_sig" : "60550974db5458fab1fbb5ccf4f18c4d", + * "istate" : "applied", + * "isystem_uuid": "ce178041-2b2c-405d-bf87-f19334a35582", + * "uuid" : "286a0793-5d15-473d-a459-00c2bfc369cb" + * } + * ] + * } + * + *****************************************************************************/ +int nodeLinkClass::mtcInvApi_cfg_show ( string hostname ) +{ + GET_NODE_PTR(hostname); /* allocates nodeLinkClass */ + mtcHttpUtil_event_init ( &node_ptr->cfgEvent, + hostname, + "mtcInvApi_cfg_show", + hostUtil_getServiceIp ( SERVICE_SYSINV ), + hostUtil_getServicePort ( SERVICE_SYSINV )); + + /* Set the host context */ + node_ptr->cfgEvent.hostname = hostname ; + node_ptr->cfgEvent.uuid = node_ptr->uuid; + node_ptr->cfgEvent.status = PASS ; + node_ptr->cfgEvent.request = SYSINV_CONFIG_SHOW ; + node_ptr->cfgEvent.operation = SYSINV_OPER__CONFIG_SHOW ; + node_ptr->cfgEvent.max_retries = 3 ; + node_ptr->cfgEvent.cur_retries = 0 ; + + return(this->workQueue_enqueue ( node_ptr->cfgEvent)); +} + +/***************************************************************************** + * + * Name : mtcInvApi_cfg_modify + * + * Purpose : Issue a configuration modify command to sysinv + * + * Description: + * + * The initial modify is done with an install command if install boolean + * is true otherwise the apply command is sent. + * + * event.key holds the new password line signature from the shadow file + * event.value holds the current password line signature in the database + * event.uuid holds the reconfig uuid to be used in the url for the command + * event.information holds the current password hash and password age from the shadow file + * + * Type : GET + * Address : /v1/iuser + * Payload : none + * Response: {"iusers": + * [ + * {"recordtype": "reconfig", "links": + * [ + * {"href": "http://192.168.204.2:6385/v1/iusers/286a0793-5d15-473d-a459-00c2bfc369cb", "rel": "self"}, + * {"href": "http://192.168.204.2:6385/iusers/286a0793-5d15-473d-a459-00c2bfc369cb", "rel": "bookmark"} + * ] + * "created_at" : "2014-09-30T14:42:16.704390+00:00", + * "updated_at" : "2014-09-30T20:41:07.250815+00:00", + * "root_sig" : "60550974db5458fab1fbb5ccf4f18c4d", + * "passwd_expiry_days": "45", + * "passwd_hash" : "DkGo4WZdJqemnDgX26oJlZTp8cj61", + * "istate" : "applied", + * "isystem_uuid": "ce178041-2b2c-405d-bf87-f19334a35582", + * "uuid" : "286a0793-5d15-473d-a459-00c2bfc369cb" + * } + * ] + * } + * + *****************************************************************************/ +int nodeLinkClass::mtcInvApi_cfg_modify ( string hostname, bool install ) +{ + GET_NODE_PTR(hostname); /* allocates nodeLinkClass */ + + // stow away the information since it will get cleared + // as part of mtcHttpUtil_event_init + string cfgInfo = node_ptr->cfgEvent.information; + + mtcHttpUtil_event_init ( &node_ptr->cfgEvent, + hostname, + "mtcInvApi_cfg_modify", + hostUtil_getServiceIp ( SERVICE_SYSINV ), + hostUtil_getServicePort ( SERVICE_SYSINV )); + + /* Set the host context */ + node_ptr->cfgEvent.hostname = hostname ; + node_ptr->cfgEvent.request = SYSINV_CONFIG_MODIFY ; + node_ptr->cfgEvent.operation = SYSINV_OPER__CONFIG_MODIFY ; + node_ptr->cfgEvent.max_retries = 3 ; + node_ptr->cfgEvent.cur_retries = 0 ; + node_ptr->cfgEvent.rx_retry_max= get_mtcInv_ptr()->sysinv_timeout * 1000; + + /* Get the invCfg hash and age fields separated by ':' */ + char cfgHash[1024], cfgAging[1024]; + sscanf(cfgInfo.c_str(), "%1023[^:]:%1023[^:]", cfgHash, cfgAging); + /* "%[^:]:%[^:]", cfgHash, cfgAging); + * + * "%1023[^:]:%1023[^:]", cfgHash, cfgAging); + * Eventually replace with this line after testing. + */ + node_ptr->cfgEvent.payload = "["; + node_ptr->cfgEvent.payload.append ( "{\"path\":\"/root_sig\"," ); + node_ptr->cfgEvent.payload.append ( "\"value\":\"" ); + node_ptr->cfgEvent.payload.append ( node_ptr->cfgEvent.key ); + node_ptr->cfgEvent.payload.append ("\",\"op\":\"replace\"},"); + + node_ptr->cfgEvent.payload.append ( "{\"path\":\"/passwd_expiry_days\"," ); + node_ptr->cfgEvent.payload.append ( "\"value\":\"" ); + node_ptr->cfgEvent.payload.append ( cfgAging ); + node_ptr->cfgEvent.payload.append ("\",\"op\":\"replace\"},"); + + node_ptr->cfgEvent.payload.append ( "{\"path\":\"/passwd_hash\"," ); + node_ptr->cfgEvent.payload.append ( "\"value\":\"" ); + node_ptr->cfgEvent.payload.append ( cfgHash ); + node_ptr->cfgEvent.payload.append ("\",\"op\":\"replace\"},"); + + node_ptr->cfgEvent.payload.append ( "{\"path\":\"/action\","); + + if ( install ) + node_ptr->cfgEvent.payload.append ( "\"value\":\"install\","); + else + node_ptr->cfgEvent.payload.append ( "\"value\":\"apply\","); + + node_ptr->cfgEvent.payload.append ( "\"op\":\"replace\"}]"); + + qlog ("%s Payload: %s\n", hostname.c_str(), + node_ptr->cfgEvent.payload.c_str() ); + + return (this->workQueue_enqueue( node_ptr->cfgEvent)); +} + + +/*****************************************************************************/ +/******************** H A N D L E R S ************************/ +/*****************************************************************************/ + +int mtcInvApi_handler ( libEvent & event ) +{ + int rc = FAIL ; + char * resp_ptr = (char*)event.response.data(); + + if (( !event.operation.compare(SYSINV_OPER__UPDATE_TASK)) || + ( !event.operation.compare(SYSINV_OPER__FORCE_TASK))) + { + /* Load the inventory response */ + rc = jsonUtil_patch_load ( resp_ptr, event.inv_info); + } + else if ( !event.operation.compare(SYSINV_OPER__UPDATE_UPTIME)) + { + rc = jsonUtil_patch_load ( resp_ptr, event.inv_info); + } + else if ( !event.operation.compare(SYSINV_OPER__UPDATE_VALUE)) + { + /* Load the inventory response */ + rc = jsonUtil_patch_load ( resp_ptr, event.inv_info); + if ( rc != PASS ) + { + elog ("Bad inventory response to update key:value '%s:%s' request\n", + event.key.c_str(), event.value.c_str()); + } +#ifdef WANT_KEY_VALUE_FROM_DATABASE + else + { + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + rc = obj_ptr->update_key_value ( event.hostname, event.key , event.value ); + } +#endif + } + else if (( !event.operation.compare(SYSINV_OPER__UPDATE_STATE)) || + ( !event.operation.compare(SYSINV_OPER__UPDATE_STATES)) || + ( !event.operation.compare(SYSINV_OPER__FORCE_STATES))) + { + /* Load the inventory response */ + rc = jsonUtil_patch_load ( resp_ptr, event.inv_info); + if ( rc != PASS ) + { + elog ("Unable to communicate with inventory database\n"); + event.status = FAIL_DATABASE_DOWN ; + } + else + { + if ( (!event.inv_info.name.compare("none")) || + (event.inv_info.name.empty())) + { + elog ("Got a None or Null Response from Inventory 'Update' Request\n"); + print_inv ( event.inv_info ); + event.status = FAIL_STRING_EMPTY ; + } + else + { + if (!adminStateOk (event.inv_info.admin)) + { + event.status = FAIL_INVALID_DATA ; + elog ("%s missing or invalid 'admin' state from Inv Patch response\n", + event.inv_info.name.c_str()); + } + if (!operStateOk (event.inv_info.oper)) + { + event.status = FAIL_INVALID_DATA ; + elog ("%s missing or invalid 'oper' state from Inv Patch response\n", + event.inv_info.name.c_str()); + } + if (!availStatusOk (event.inv_info.avail)) + { + event.status = FAIL_INVALID_DATA ; + elog ("%s missing or invalid 'avail' status from Inv Patch response\n", + event.inv_info.name.c_str()); + } + } + } + } + else if (( !event.operation.compare(SYSINV_OPER__CONFIG_SHOW )) || + ( !event.operation.compare(SYSINV_OPER__CONFIG_MODIFY))) + { + ilog ("%s Handled (in FSM)\n", event.log_prefix.c_str()); + if ( event.response.length() > 200 ) + { + rc = PASS ; + } + else + { + rc = FAIL_INVALID_DATA ; + } + } + + if ( rc ) + { + wlog ("%s Handled with error (%d:%d)\n", event.log_prefix.c_str(), event.status, rc ); + return (rc); + } + + else if ( event.status ) + return (event.status); + + return ( PASS ); +} + +/* The handles the Inventory Add (POST) request's response */ +void nodeLinkClass::mtcInvApi_add_handler ( struct evhttp_request *req, void *arg ) +{ + int rc = PASS ; + + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + if ( !req ) + { + elog ("%s '%s' Request Timeout (%d) (%p)\n", + sysinvEvent.hostname.c_str(), + sysinvEvent.service.c_str(), + sysinvEvent.timeout , arg); + + sysinvEvent.status = FAIL_TIMEOUT ; + goto _add_handler_done ; + } + + obj_ptr->sysinvEvent.status = mtcHttpUtil_status ( obj_ptr->sysinvEvent ) ; + if ( obj_ptr->sysinvEvent.status != PASS ) + { + elog ("%s Sysinv Add Failed (%d)\n", + obj_ptr->sysinvEvent.hostname.c_str(), + obj_ptr->sysinvEvent.status ); + goto _add_handler_done ; + } + + if ( mtcHttpUtil_get_response ( obj_ptr->sysinvEvent ) != PASS ) + goto _add_handler_done ; + + /* Parse through the response and fill in json_info */ + rc = jsonUtil_load_host ( (char*)obj_ptr->sysinvEvent.response.data(), + obj_ptr->sysinvEvent.inv_info ); + if ( rc != PASS ) + { + elog ("%s Failed to Parse Sysinv Response (json)\n", + obj_ptr->sysinvEvent.hostname.c_str()); + + obj_ptr->sysinvEvent.status = FAIL_JSON_PARSE ; + } + + jlog ("%s Response: %s\n", obj_ptr->sysinvEvent.hostname.c_str(), + obj_ptr->sysinvEvent.response.c_str()); + + +_add_handler_done: + + event_base_loopbreak((struct event_base *)arg); +} + +/* The handles the inventory Query (QUERY) request's response */ +void nodeLinkClass::mtcInvApi_get_handler ( struct evhttp_request *req, void *arg ) +{ + int rc = PASS ; + + /* Declare and clean the json info object string containers */ + jsonUtil_info_type json_info ; + jsonUtil_init ( json_info ); /* init it */ + + /* Find the host this handler instance is being run against + * and get its event base - sysinvEvent.base */ + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + if ( ! req ) + { + elog ("Inventory 'Get' Request Timeout (%s)\n", + obj_ptr->sysinvEvent.entity_path.c_str()); + + obj_ptr->sysinvEvent.status = FAIL_TIMEOUT ; + goto _get_handler_done ; + } + + /* Check the HTTP Status Code */ + else if ( mtcHttpUtil_status ( obj_ptr->sysinvEvent ) != PASS ) + { + elog ("Inventory HTTP Get Request Failed (%s) (%d)\n", + obj_ptr->sysinvEvent.entity_path.c_str(), + obj_ptr->sysinvEvent.status ); + goto _get_handler_done ; + } + + if ( mtcHttpUtil_get_response ( obj_ptr->sysinvEvent ) != PASS ) + { + elog ("Inventory Server may be down (%s)\n", + obj_ptr->sysinvEvent.entity_path.c_str()); + json_info.elements = 0 ; + } + else + { + /* Parse through the response and fill in json_info */ + rc = jsonUtil_inv_load ( (char*)obj_ptr->sysinvEvent.response.data(), json_info ); + if ( rc == FAIL ) + { + elog ("Unable to parse inventory response (%s) (%s)\n", + obj_ptr->sysinvEvent.entity_path.c_str(), + obj_ptr->sysinvEvent.response.c_str()); + json_info.elements = 0 ; + } + else + { + dlog ("%s json string with %d records\n", obj_ptr->sysinvEvent.entity_path.c_str(), + json_info.elements ); + obj_ptr->sysinvEvent.entity_path_next = json_info.next.c_str(); + } + for ( int i = 0 ; i < json_info.elements ; i++ ) + { + if ( json_info.host[i].uuid.length() == UUID_LEN ) + { + nodeLinkClass * obj_ptr = get_mtcInv_ptr (); + node_inv_type node ; + node.uuid = json_info.host[i].uuid ; + node.name = json_info.host[i].name ; + node.ip = json_info.host[i].ip ; + node.mac = json_info.host[i].mac ; + node.id = json_info.host[i].id ; + node.admin = json_info.host[i].admin; + node.oper = json_info.host[i].oper ; + node.avail = json_info.host[i].avail; + node.type = json_info.host[i].type ; + node.func = json_info.host[i].func ; + node.task = json_info.host[i].task ; + + node.uptime= json_info.host[i].uptime ; + + node.bm_un = json_info.host[i].bm_un ; + node.bm_ip = json_info.host[i].bm_ip ; + node.bm_type = json_info.host[i].bm_type ; + + node.oper_subf = json_info.host[i].oper_subf ; + node.avail_subf = json_info.host[i].avail_subf ; + node.infra_ip = json_info.host[i].infra_ip ; + + if (node.name.compare("none")) + { + obj_ptr->sysinvEvent.count++ ; + + /* Add the node to maintenance */ + + rc = obj_ptr->add_host ( node ) ; + switch (rc) + { + case RETRY: + break ; + case PASS: + if ( obj_ptr->get_operState (node.name) == MTC_OPER_STATE__ENABLED ) + { + obj_ptr->ctl_mtcAlive_gate (node.name, false) ; + } + // jsonUtil_print ( json_info, i ); + break ; + case FAIL: + default: + elog ("Failed to add hostname '%s' to maintenance (rc:%d)\n", + node.name.c_str(), rc ); + break ; + } + } + else + { + dlog ("Refusing to add hostname '%s' to maintenance\n", node.name.c_str()); + } + } + } + } + +_get_handler_done: + + /* This is needed to get out of the loop */ + event_base_loopbreak((struct event_base *)arg); +} + + +/* The handles the Inventory Query request's response + * Should only be called for the active controller */ +void nodeLinkClass::mtcInvApi_qry_handler ( struct evhttp_request *req, void *arg ) +{ + int rc = PASS ; + + /* Find the host this handler instance is being run against + * and get its event base - sysinvEvent.base */ + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + nodeLinkClass::node * node_ptr = + obj_ptr->getEventBaseNode ( SYSINV_HOST_QUERY, (struct event_base *)arg ) ; + + if ( node_ptr == NULL ) + { + slog ("Node Lookup Failed - Sysinv Event (%p)\n", arg); + goto _qry_handler_done ; + } + + else if ( node_ptr->sysinvEvent.hostname.empty() ) + { + elog ("%s Don't know what hostname to look for\n", + node_ptr->sysinvEvent.hostname.c_str()); + node_ptr->sysinvEvent.status = FAIL_UNKNOWN_HOSTNAME ; + goto _qry_handler_done ; + } + + else if ( ! req ) + { + elog ("%s 'Query' Request Timeout (%d)\n", + node_ptr->sysinvEvent.hostname.c_str(), + node_ptr->sysinvEvent.timeout); + + node_ptr->sysinvEvent.status = FAIL_TIMEOUT ; + goto _qry_handler_done ; + } + + /* Check the HTTP Status Code */ + node_ptr->sysinvEvent.status = mtcHttpUtil_status ( node_ptr->sysinvEvent ) ; + if ( node_ptr->sysinvEvent.status == HTTP_NOTFOUND ) + { + dlog ("%s Sysinv Query (Not-Found) (%d)\n", + node_ptr->hostname.c_str(), + node_ptr->sysinvEvent.status); + + goto _qry_handler_done ; + } + else if ( node_ptr->sysinvEvent.status != PASS ) + { + elog ("%s Sysinv HTTP Request Failed (%d)\n", + node_ptr->hostname.c_str(), + node_ptr->sysinvEvent.status); + + goto _qry_handler_done ; + } + + if ( mtcHttpUtil_get_response ( node_ptr->sysinvEvent ) != PASS ) + goto _qry_handler_done ; + + jlog ("%s Address : %s\n", node_ptr->sysinvEvent.hostname.c_str(), + node_ptr->sysinvEvent.address.c_str()); + jlog ("%s Payload : %s\n", node_ptr->sysinvEvent.hostname.c_str(), + node_ptr->sysinvEvent.payload.c_str()); + jlog ("%s Response: %s\n", node_ptr->sysinvEvent.hostname.c_str(), + node_ptr->sysinvEvent.response.c_str()); + + node_inv_init ( node_ptr->sysinvEvent.inv_info ); + rc = jsonUtil_load_host ( (char*)node_ptr->sysinvEvent.response.data(), + node_ptr->sysinvEvent.inv_info ); + if ( rc != PASS ) + { + elog ("%s Failed to Parse Json Response\n", node_ptr->sysinvEvent.hostname.c_str()); + node_ptr->sysinvEvent.status = FAIL_JSON_PARSE ; + } + else + { + /* Pass the operaiton regardless of finding it */ + node_ptr->sysinvEvent.status = HTTP_NOTFOUND ; + if ( node_ptr->sysinvEvent.inv_info.uuid.length() == UUID_LEN ) + { + if ( !node_ptr->sysinvEvent.inv_info.name.compare(node_ptr->sysinvEvent.hostname)) + { + node_ptr->sysinvEvent.status = PASS ; + } + else + { + wlog ("%s was not found in the database\n", + node_ptr->sysinvEvent.hostname.c_str()); + node_ptr->sysinvEvent.status = HTTP_NOTFOUND ; + } + } + else + { + wlog ("%s has no uuid\n", node_ptr->sysinvEvent.hostname.c_str()); + } + } + +_qry_handler_done: + + mtcHttpUtil_log_event ( node_ptr->sysinvEvent ); + + /* This is needed to get out of the loop */ + event_base_loopbreak((struct event_base *)arg); +} + +/* The Inventory 'Add' request handler wrapper abstracted from nodeLinkClass */ +void mtcInvApi_add_Handler ( struct evhttp_request *req, void *arg ) +{ + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + obj_ptr->mtcInvApi_add_handler ( req , arg ); +} + +/* The Inventory 'Qry' request handler wrapper abstracted from nodeLinkClass */ +void mtcInvApi_qry_Handler ( struct evhttp_request *req, void *arg ) +{ + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + obj_ptr->mtcInvApi_qry_handler ( req , arg ); +} + +/* The Inventory 'Get' request handler wrapper abstracted from nodeLinkClass */ +void mtcInvApi_get_Handler ( struct evhttp_request *req, void *arg ) +{ + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + obj_ptr->mtcInvApi_get_handler ( req , arg ); +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcInvApi.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcInvApi.h new file mode 100755 index 00000000..38677d15 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcInvApi.h @@ -0,0 +1,125 @@ +#ifndef __INCLUDE_MTCINVAPI_H__ +#define __INCLUDE_MTCINVAPI_H__ + +/* + * Copyright (c) 2013-2014 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform - Maintenance - Inventory Access API Header + */ + +/** + * @addtogroup mtcInvApi + * @{ + * + * This file implements the a set of mtcInvApi utilities that maintenance + * calls upon to set/get host information to/from the sysinv database. + * + * The APIs exposed from this file are + * + * + * mtcInvApi_read_inventory - Reads all the host inventory records from the + * sysinv database in a specified batch number. + * + * mtcInvApi_add_host - Adds a host to the sysinv database. + * + * mtcInvApi_load_host - Loads the inventory content for a specified host. + * + * See nodeClass.h for these prototypes + * + * mtcInvApi_update_task - Updates the task field of the specified host. + * + * mtcInvApi_update_uptime - Updates the uptime of the specified host. + * + * mtcInvApi_update_value - Updates any field of the specified host. + * + * mtcInvApi_update_state - Updates a maintenance state of specified host. + * + * mtcInvApi_update_states - Updates all maintenance states of specified host. + * + * + * Each utility is paired with a private handler. + * + * mtcInvApi_get_handler - handles response for mtcInvApi_read_inventory + * + * mtcInvApi_add_Handler - handles response for mtcInvApi_add_host + * + * mtcInvApi_qry_handler - handles response for mtcInvApi_load_host + * + * mtcInvApi_upd_handler - handles response for all update utilities + * + * Warning: These calls cannot be nested. + * + **/ + +#include "mtcHttpUtil.h" /* for mtcHttpUtil_libEvent_init + mtcHttpUtil_api_request + mtcHttpUtil_log_event */ + +#define MTC_INV_LABEL "/v1/ihosts/" /**< inventory host url label */ +#define MTC_INV_IUSER_LABEL "/v1/iuser/" /**< inventory config url label */ +#define MTC_INV_BATCH "?limit=" /**< batch read limit specified prefix */ +#define MTC_INV_BATCH_MAX 5 /**< maximum allowed batched read */ + +int mtcInvApi_handler ( libEvent & event ); + +/** Load all the host inventory from the sysinv database + * + * This API is only ever called once ; during initialization + * to get an initial snapshot of the host information in + * the database. + * + * @param batch - the number of inventory hosts' info to get in + * each request ; until all the elements are read. + * + * @return execution status + * + *- PASS - indicates successful send request + *- FAIL_TIMEOUT - no response received in timeout period + *- FAIL_JSON_PARSE - response json string did not parse properly + *- HTTP status codes - any standard HTTP codes + * + *****************************************************************************/ +int mtcInvApi_read_inventory ( int batch ); + +/** Add a host to the sysinv database + * + * This API is only ever called once ; during initialization + * to add controller-0 to the database. + * + * @param info - reference to a structure containing the inventory elements + * to add. + * + * @return execution status + * + *- PASS indicates successful send request + *- FAIL indicates a failed send request (may have refined failure codes) + * + *****************************************************************************/ +int mtcInvApi_add_host ( node_inv_type & info ); + +/** Load all the elements of a host from the sysinv database. + * + * @param hostname - reference to a name string of the host to load. + * @param info - reference to a struct where the host info loaded from + * the database will be put. + * + * @return execution status + * + *- PASS - indicates successful send request + *- FAIL_TIMEOUT - no response received in timeout period + *- FAIL_JSON_PARSE - response json string did not parse properly + *- HTTP status codes - any standard HTTP codes + * + *****************************************************************************/ +int mtcInvApi_load_host ( string & hostname , + node_inv_type & inv_info ); + +/** @} mtcInvApi */ + +#endif /* __INCLUDE_MTCINVAPI_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcIpmiUtil.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcIpmiUtil.cpp new file mode 100644 index 00000000..349e2c58 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcIpmiUtil.cpp @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + * + * + * @file + * Wind River Titanium Cloud Maintenance IPMI Utilities + */ +#include +#include +#include + +using namespace std; + +#include "nodeBase.h" /* for ... mtce common definitions */ +#include "nodeClass.h" /* for ... */ + +/* IPMI Command strings */ +const char mtc_ipmiRequest_str[IPMITOOL_THREAD_CMD__LAST][20] = +{ + "null", + "Reset", + "Power-On", + "Power-Off", + "Power-Cycle", + "Query BMC Info", + "Query Power Status", + "Query Reset Reason" +}; + +const char * getIpmiCmd_str ( int command ) +{ + if (( command > IPMITOOL_THREAD_CMD__NULL ) && + ( command < IPMITOOL_THREAD_CMD__LAST )) + { + return (&mtc_ipmiRequest_str[command][0]); + } + slog ("Invalid command (%d)\n", command ); + return (&mtc_ipmiRequest_str[IPMITOOL_THREAD_CMD__NULL][0]); +} + +const char mtc_ipmiAction_str[IPMITOOL_THREAD_CMD__LAST][30] = +{ + "null", + "resetting", + "powering on", + "powering off", + "power cycling", + "querying bmc info", + "querying power status", + "querying reset cause" +}; + +const char * getIpmiAction_str ( int command ) +{ + if (( command > IPMITOOL_THREAD_CMD__NULL ) && + ( command < IPMITOOL_THREAD_CMD__LAST )) + { + return (&mtc_ipmiAction_str[command][0]); + } + slog ("Invalid command (%d)\n", command ); + return (&mtc_ipmiAction_str[IPMITOOL_THREAD_CMD__NULL][0]); +} + +/***************************************************************************** + * + * Name : ipmi_command_send + * + * Description: This utility starts the ipmitool command handling thread + * with the specified command. + * + * Returns : PASS if all the pre-start semantic checks pass and the + * thread was started. + * + * Otherwise the thread was not started and some non zero + * FAIL_xxxx code is returned after a representative design + * log is generated. + * + *****************************************************************************/ + +int nodeLinkClass::ipmi_command_send ( struct nodeLinkClass::node * node_ptr, int command ) +{ + int rc = PASS ; + + node_ptr->ipmitool_thread_info.command = command ; + + /* Update / Setup the BMC access credentials */ + node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip ; + node_ptr->thread_extra_info.bm_un = node_ptr->bm_un ; + node_ptr->thread_extra_info.bm_pw = node_ptr->bm_pw ; + node_ptr->thread_extra_info.bm_type = node_ptr->bm_type ; + +#ifdef WANT_FIT_TESTING + { + bool want_fit = false ; + int fit = FIT_CODE__IPMI_COMMAND_SEND ; + int command = node_ptr->ipmitool_thread_info.command ; + if ( daemon_want_fit ( fit, node_ptr->hostname, "mc_info" ) == true ) + { + want_fit = true ; + } + else if (( command == IPMITOOL_THREAD_CMD__POWER_STATUS ) && + ( daemon_want_fit ( fit, node_ptr->hostname, "power_status" ) == true )) + { + want_fit = true ; + } + else if ( daemon_want_fit ( fit, node_ptr->hostname, "reset_cause" ) == true ) + { + want_fit = true ; + } + else if (( command == IPMITOOL_THREAD_CMD__POWER_RESET ) && + ( daemon_want_fit ( fit, node_ptr->hostname, "reset" ) == true )) + { + want_fit = true ; + } + else if (( command == IPMITOOL_THREAD_CMD__POWER_ON ) && + ( daemon_want_fit ( fit, node_ptr->hostname, "power_on" ) == true )) + { + want_fit = true ; + } + else if (( command == IPMITOOL_THREAD_CMD__POWER_OFF ) && + ( daemon_want_fit ( fit, node_ptr->hostname, "power_off" ) == true )) + { + want_fit = true ; + } + else if (( command == IPMITOOL_THREAD_CMD__POWER_CYCLE ) && + ( daemon_want_fit ( fit, node_ptr->hostname, "power_cycle" ) == true )) + { + want_fit = true ; + } + + if ( want_fit == true ) + { + slog ("%s FIT %s\n", node_ptr->hostname.c_str(), getIpmiCmd_str(command) ); + node_ptr->ipmitool_thread_info.status = node_ptr->ipmitool_thread_ctrl.status = rc = FAIL_FIT ; + node_ptr->ipmitool_thread_info.status_string = "ipmi_command_send fault insertion failure" ; + return ( rc ); + } + } +#endif + + if (( hostUtil_is_valid_ip_addr ( node_ptr->thread_extra_info.bm_ip ) == true ) && + ( !node_ptr->thread_extra_info.bm_un.empty() ) && + ( !node_ptr->thread_extra_info.bm_pw.empty ())) + { + node_ptr->ipmitool_thread_ctrl.status = rc = + thread_launch ( node_ptr->ipmitool_thread_ctrl, + node_ptr->ipmitool_thread_info ) ; + if ( rc != PASS ) + { + elog ("%s failed to launch power control thread (rc:%d)\n", + node_ptr->hostname.c_str(), rc ); + } + else + { + dlog ("%s %s %s thread launched\n", node_ptr->hostname.c_str(), + node_ptr->ipmitool_thread_ctrl.name.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command) ); + } + node_ptr->ipmitool_thread_ctrl.retries = 0 ; + } + else + { + node_ptr->ipmitool_thread_ctrl.status = rc = + node_ptr->ipmitool_thread_info.status = FAIL_INVALID_DATA ; + node_ptr->ipmitool_thread_info.status_string = "one or more bmc credentials are invalid" ; + + wlog ("%s %s %s %s\n", node_ptr->hostname.c_str(), + hostUtil_is_valid_ip_addr ( node_ptr->thread_extra_info.bm_ip ) ? "" : "bm_ip:invalid", + node_ptr->thread_extra_info.bm_un.empty() ? "bm_un:empty" : "", + node_ptr->thread_extra_info.bm_pw.empty() ? "bm_pw:empty" : ""); + } + + return (rc); +} + +/***************************************************************************** + * + * Name : ipmi_command_recv + * + * Description: This utility will check for ipmitool command thread completion. + * + * Returns : PASS is returned if the thread reports done. + * RETRY is returned if the thread has not completed. + * FAIL_RETRY is returned after 10 back-to-back calls return RETRY. + * + *****************************************************************************/ + +int nodeLinkClass::ipmi_command_recv ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = RETRY ; + + /* check for 'thread done' completion */ + if ( thread_done( node_ptr->ipmitool_thread_ctrl ) == true ) + { + if (( rc = node_ptr->ipmitool_thread_info.status ) != PASS ) + { + elog ("%s %s command failed (rc:%d)\n", + node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command), + rc ); + } + else + { + if ( node_ptr->ipmitool_thread_info.command == IPMITOOL_THREAD_CMD__POWER_RESET ) + { + if ( node_ptr->ipmitool_thread_info.data.find(IPMITOOL_POWER_RESET_RESP) == std::string::npos ) + rc = FAIL_RESET_CONTROL ; + } + else if ( node_ptr->ipmitool_thread_info.command == IPMITOOL_THREAD_CMD__POWER_OFF ) + { + if ( node_ptr->ipmitool_thread_info.data.find(IPMITOOL_POWER_OFF_RESP) == std::string::npos ) + rc = FAIL_POWER_CONTROL ; + } + else if ( node_ptr->ipmitool_thread_info.command == IPMITOOL_THREAD_CMD__POWER_ON ) + { + if ( node_ptr->ipmitool_thread_info.data.find(IPMITOOL_POWER_ON_RESP) == std::string::npos ) + rc = FAIL_POWER_CONTROL ; + } + else if ( node_ptr->ipmitool_thread_info.command == IPMITOOL_THREAD_CMD__POWER_CYCLE ) + { + if ( node_ptr->ipmitool_thread_info.data.find(IPMITOOL_POWER_CYCLE_RESP) == std::string::npos ) + rc = FAIL_POWER_CONTROL ; + } + + if ( rc ) + { + node_ptr->ipmitool_thread_info.status = rc ; + node_ptr->ipmitool_thread_info.status_string = ("power command failed"); + wlog ("%s %s Response: %s\n", node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command), + node_ptr->ipmitool_thread_info.data.c_str()); + + } + else + { + blog ("%s %s Response: %s\n", node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command), + node_ptr->ipmitool_thread_info.data.c_str()); + } + } + +#ifdef WANT_FIT_TESTING + if ( rc == PASS ) + { + bool want_fit = false ; + int fit = FIT_CODE__IPMI_COMMAND_RECV ; + if ( daemon_want_fit ( fit, node_ptr->hostname, "mc_info" ) == true ) + { + want_fit = true ; + } + else if ( daemon_want_fit ( fit, node_ptr->hostname, "reset_cause" ) == true ) + { + want_fit = true ; + } + else if (( node_ptr->ipmitool_thread_info.command == IPMITOOL_THREAD_CMD__POWER_RESET ) && + ( daemon_want_fit ( fit, node_ptr->hostname, "reset" ) == true )) + { + want_fit = true ; + } + else if (( node_ptr->ipmitool_thread_info.command == IPMITOOL_THREAD_CMD__POWER_ON ) && + ( daemon_want_fit ( fit, node_ptr->hostname, "power_on" ) == true )) + { + want_fit = true ; + } + else if (( node_ptr->ipmitool_thread_info.command == IPMITOOL_THREAD_CMD__POWER_OFF ) && + ( daemon_want_fit ( fit, node_ptr->hostname, "power_off" ) == true )) + { + want_fit = true ; + } + else if (( node_ptr->ipmitool_thread_info.command == IPMITOOL_THREAD_CMD__POWER_CYCLE ) && + ( daemon_want_fit ( fit, node_ptr->hostname, "power_cycle" ) == true )) + { + want_fit = true ; + } + + if ( want_fit == true ) + { + node_ptr->ipmitool_thread_info.status = rc = FAIL_FIT ; + node_ptr->ipmitool_thread_info.status_string = "ipmi_command_recv fault insertion failure" ; + } + } +#endif + } + + /* handle max retries reached */ + else if ( node_ptr->ipmitool_thread_ctrl.retries++ >= IPMITOOL_MAX_RECV_RETRIES ) + { + elog ("%s %s command timeout (%d of %d)\n", + node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command), + node_ptr->ipmitool_thread_ctrl.retries, + IPMITOOL_MAX_RECV_RETRIES); + + rc = FAIL_RETRY; + } + + /* handle progressive retry */ + else + { + if ( node_ptr->ipmitool_thread_ctrl.id == 0 ) + { + slog ("%s %s command not-running\n", + node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command)); + rc = FAIL_NOT_ACTIVE ; + } + else + { + ilog ("%s %s command in-progress (polling %d of %d)\n", + node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command), + node_ptr->ipmitool_thread_ctrl.retries, + IPMITOOL_MAX_RECV_RETRIES); + rc = RETRY ; + } + } + + if ( rc != RETRY ) + { + node_ptr->ipmitool_thread_ctrl.done = true ; + node_ptr->ipmitool_thread_ctrl.retries = 0 ; + node_ptr->ipmitool_thread_ctrl.id = 0 ; + node_ptr->ipmitool_thread_info.id = 0 ; + node_ptr->ipmitool_thread_info.command = 0 ; + } + return (rc); +} + +/***************************************************************************** + * + * Name : ipmi_command_done + * + * Description: This utility frees the ipmitool command thread for next execution. + * + *****************************************************************************/ + +void nodeLinkClass::ipmi_command_done ( struct nodeLinkClass::node * node_ptr ) +{ + node_ptr->ipmitool_thread_ctrl.done = true ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcIpmiUtil.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcIpmiUtil.h new file mode 100644 index 00000000..e6f13d9b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcIpmiUtil.h @@ -0,0 +1,89 @@ +#ifndef __INCLUDE_MTCIPMIUTIL_H__ +#define __INCLUDE_MTCIPMIUTIL_H__ + +/* + * Copyright (c) 2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud's Maintenance IPMI Utilities Header + */ + +#include "nodeBase.h" /* for ... */ + +#define MC_INFO_LABEL_DELIMITER ((const char *)(": ")) +#define MC_INFO_LABEL_FW_VERSION ((const char *)("Firmware Revision")) +#define MC_INFO_LABEL_HW_VERSION ((const char *)("Device Revision")) +#define MC_INFO_LABEL_DEVICE_ID ((const char *)("Device ID")) +#define MC_INFO_LABEL_PRODUCT_ID ((const char *)("Product ID")) +#define MC_INFO_LABEL_PRODUCT_NAME ((const char *)("Product Name")) +#define MC_INFO_LABEL_MANUFACTURE_ID ((const char *)("Manufacturer ID")) +#define MC_INFO_LABEL_MANUFACTURE_NAME ((const char *)("Manufacturer Name")) + +#define IPMITOOL_POWER_RESET_CMD ((const char *)("chassis power reset")) +#define IPMITOOL_POWER_RESET_RESP ((const char *)("Chassis Power Control: Reset")) + +#define IPMITOOL_POWER_OFF_CMD ((const char *)("chassis power off")) +#define IPMITOOL_POWER_OFF_RESP ((const char *)("Chassis Power Control: Down/Off")) + +#define IPMITOOL_POWER_ON_CMD ((const char *)("chassis power on")) +#define IPMITOOL_POWER_ON_RESP ((const char *)("Chassis Power Control: Up/On")) + +#define IPMITOOL_POWER_CYCLE_CMD ((const char *)("chassis power cycle")) +#define IPMITOOL_POWER_CYCLE_RESP ((const char *)("Chassis Power Control: Cycle")) + +#define IPMITOOL_POWER_STATUS_CMD ((const char *)("chassis power status")) +#define IPMITOOL_POWER_ON_STATUS ((const char *)("Chassis Power is on")) +#define IPMITOOL_POWER_OFF_STATUS ((const char *)("Chassis Power is off")) + +#define IPMITOOL_RESTART_CAUSE_CMD ((const char *)("chassis restart_cause")) + +#define IPMITOOL_MC_INFO_CMD ((const char *)("mc info")) + +#define IPMITOOL_CMD_FILE_SUFFIX ((const char *)("_power_cmd_result")) +#define IPMITOOL_MC_INFO_FILE_SUFFIX ((const char *)("_mc_info")) +#define IPMITOOL_RESTART_CAUSE_FILE_SUFFIX ((const char *)("_restart_cause")) +#define IPMITOOL_POWER_STATUS_FILE_SUFFIX ((const char *)("_power_status")) + +#define IPMITOOL_MAX_RECV_RETRIES (10) + +/* Warning : Changes here require 'mtc_ipmiRequest_str' string array to be updated */ +typedef enum +{ + IPMITOOL_THREAD_CMD__NULL = 0, + IPMITOOL_THREAD_CMD__POWER_RESET, + + IPMITOOL_THREAD_CMD__POWER_ON, + IPMITOOL_THREAD_CMD__POWER_OFF, + IPMITOOL_THREAD_CMD__POWER_CYCLE, + + IPMITOOL_THREAD_CMD__MC_INFO, + IPMITOOL_THREAD_CMD__POWER_STATUS, + IPMITOOL_THREAD_CMD__RESTART_CAUSE, + IPMITOOL_THREAD_CMD__LAST + +} ipmitool_cmd_enum ; + +const char * getIpmiCmd_str ( int command ); +const char * getIpmiAction_str ( int command ); + + +typedef struct +{ + std::string product_name ; + std::string product_id ; + std::string manufacturer_name ; + std::string manufacturer_id ; + std::string device_id ; + std::string fw_version ; + std::string hw_version ; +} mc_info_type ; + +int ipmiUtil_mc_info_load ( string hostname, const char * filename, mc_info_type & mc_info ); +void ipmiUtil_mc_info_init ( mc_info_type & mc_info ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcKeyApi.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcKeyApi.cpp new file mode 100755 index 00000000..e1f99edd --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcKeyApi.cpp @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance + * Authentication Utility API + * + * mtcKeyApi_get_token + * _key_POST_request - Request a authentication token + * jsonApi_auth_request + * mtcHttpUtil_connect_new + * mtcHttpUtil_request_new + * mtcHttpUtil_header_add + * mtcHttpUtil_request_make + * evhttp_connection_set_timeout + * event_base_dispatch + * + * _key_POST_handler - called by libevent like an interrupt handler + * evbuffer_remove - reads the response data out of da resp buffer + * jsonApi_auth_load - extract the data we want from resp json string + * tokenid - load data: the 3604 byte authentication token + * adminURL - load data: the key address + * issued - load data: can use this later so that we + * expiry - load data: don't have to keep requesting tokens + * event_base_loopbreak - end the interrupt handler +*/ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "key" + +#include "nodeClass.h" /* for ... maintenance class nodeLinkClass */ +#include "nodeUtil.h" +#include "httpUtil.h" /* for ... libEvent */ +#include "mtcKeyApi.h" /* for ... this module header */ +#include "jsonUtil.h" /* for ... Json utilities */ + +/* Token info is stored in the common public + * area of the maintenance nodelinkClass structure */ + +/* http://localhost:5000/v2.0/tokens -X POST -H "Content-Type: application/json" + * -H "Accept: application/json" + * -H "User-Agent: python-keyclient" + * -H "Connection: close" + * + * { + * "auth": + * { + * "tenantName": "services", + * "passwordCredentials": + * { + * "username": "mtce", + * "password": "password" + * } + * } + * } + * + */ +int throttle = 0 ; + +/* The handles the keystone POST request's response message */ +int mtcKeyApi_handler ( libEvent & event ) +{ + jsonUtil_auth_type info ; + string hn = event.hostname ; + int rc = PASS ; + + nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; + + /* Copy the token info into the static libEvent tokenEvent struct */ + obj_ptr->tokenEvent = event ; + + if ( event.status ) + { + rc = obj_ptr->tokenEvent.status ; + elog ( "%s Token Request Failed (%d) \n", hn.c_str(), rc ); + } + else if ( jsonApi_auth_load ( hn, (char*)obj_ptr->tokenEvent.response.data(), info ) ) + { + rc = obj_ptr->tokenEvent.status = FAIL_JSON_PARSE ; + elog ( "%s Token Request Failed - Json Parse Error\n", hn.c_str()); + } + else + { + jlog ("%s Token Exp: %s\n", hn.c_str(), info.expiry.c_str() ); + jlog ("%s Admin URL: %s\n" ,hn.c_str(), info.adminURL.c_str() ); + jlog ("%s Token Len: %ld\n",hn.c_str(), info.tokenid.length() ); + obj_ptr->tokenEvent.token.issued = info.issued ; + obj_ptr->tokenEvent.token.expiry = info.expiry ; + obj_ptr->tokenEvent.token.token = info.tokenid ; + obj_ptr->tokenEvent.token.url = info.adminURL ; + obj_ptr->tokenEvent.status = PASS ; + if ( obj_ptr->token_refresh_rate ) + { + ilog ( "Token Refresh: [%s] [Expiry: %s %s]\n", + md5sum_string ( obj_ptr->tokenEvent.token.token).c_str(), + obj_ptr->tokenEvent.token.expiry.substr(0,10).c_str(), + obj_ptr->tokenEvent.token.expiry.substr(11,8).c_str()); + } + } + + /* Check for a response string */ + if ( obj_ptr->tokenEvent.token.token.empty() ) + { + elog ("%s Failed to get token\n", + obj_ptr->tokenEvent.hostname.c_str()); + rc = FAIL_TOKEN_GET; + } + + /* Check for Key URL */ + else if ( obj_ptr->tokenEvent.token.url.empty() ) + { + elog ("%s Failed to get token URL\n", + obj_ptr->tokenEvent.hostname.c_str()); + rc = FAIL_TOKEN_URL; + } + else + { + dlog ("%s Token Refresh O.K.\n", obj_ptr->tokenEvent.hostname.c_str()); + } + return (rc); +} + +void corrupt_token ( keyToken_type & key ) +{ + key.token.replace ( 800, 50, "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" ); +} + +/* fetches an authorization token as a blocking request */ +int nodeLinkClass::mtcKeyApi_get_token ( string hostname ) +{ + mtcHttpUtil_event_init ( &this->tokenEvent, + hostname, + "mtcKeyApi_get_token", + hostUtil_getServiceIp ( SERVICE_TOKEN ), + hostUtil_getServicePort ( SERVICE_TOKEN )); + + this->tokenEvent.prefix_path = hostUtil_getPrefixPath(); + this->tokenEvent.blocking = true ; + this->tokenEvent.request = KEYSTONE_TOKEN ; + this->tokenEvent.operation = KEYSTONE_SIG ; + + this->tokenEvent.token.token.clear() ; + this->tokenEvent.token.url.clear(); + this->tokenEvent.token.issued.clear(); + this->tokenEvent.token.expiry.clear(); + + ilog ("%s Prefix path: %s\n", hostname.c_str(), this->tokenEvent.prefix_path.c_str() ); + return ( mtcHttpUtil_api_request ( this->tokenEvent )); +} + +/* fetches an authorization token and key URL and UUID info */ +int nodeLinkClass::mtcKeyApi_refresh_token ( string hostname ) +{ + GET_NODE_PTR(hostname); + mtcHttpUtil_event_init ( &node_ptr->httpReq, + hostname, + "mtcKeyApi_refresh_token", + hostUtil_getServiceIp ( SERVICE_TOKEN ), + hostUtil_getServicePort ( SERVICE_TOKEN )); + + node_ptr->httpReq.prefix_path = hostUtil_getPrefixPath(); + node_ptr->httpReq.hostname = hostname ; + node_ptr->httpReq.uuid = node_ptr->uuid ; + node_ptr->httpReq.request = KEYSTONE_TOKEN ; + node_ptr->httpReq.operation = KEYSTONE_SIG ; + node_ptr->httpReq.max_retries = 3 ; + node_ptr->httpReq.cur_retries = 0 ; + + node_ptr->httpReq.token.token.clear() ; + node_ptr->httpReq.token.url.clear(); + node_ptr->httpReq.token.issued.clear(); + node_ptr->httpReq.token.expiry.clear(); + + ilog ("%s Prefix path: %s\n", hostname.c_str(), this->tokenEvent.prefix_path.c_str() ); + return(this->workQueue_enqueue ( node_ptr->httpReq)); +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcKeyApi.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcKeyApi.h new file mode 100755 index 00000000..6e53aad4 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcKeyApi.h @@ -0,0 +1,25 @@ +#ifndef __INCLUDE_MTCKEYAPI_H__ +#define __INCLUDE_MTCKEYAPI_H__ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include +#include + +#include "mtcHttpUtil.h" + +//#define MTC_POST_KEY_ADDR "localhost" +//#define MTC_POST_KEY_PORT 5000 +#define MTC_POST_KEY_LABEL "/v3/auth/tokens" + +int mtcKeyApi_init ( string ip, int port ); + +int mtcKeyApi_handler ( libEvent & event ); + +void corrupt_token ( keyToken_type & key ); + +#endif /* __INCLUDE_MTCKEYAPI_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeComp.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeComp.cpp new file mode 100755 index 00000000..54ce30a0 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeComp.cpp @@ -0,0 +1,1903 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Compute Maintenance Daemon + */ + +/************************************************************** + * Implementation Structure + ************************************************************** + * + * Call sequence: + * + * daemon_init + * daemon_files_init + * daemon_configure + * daemon_signal_init + * mtc_message_init + * mtc_socket_init + * + * daemon_service_run + * forever ( timer_handler ) + * mtc_service_command + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for hostent */ +#include +#include +#include +#include +#include +//#include /* for ... syslog */ +#include +#include + +using namespace std; + +#include "daemon_ini.h" /* for ... Init parset header */ +#include "daemon_common.h" /* for ... common daemon definitions */ +#include "daemon_option.h" /* for ... daemon main options */ + +#include "nodeBase.h" /* for ... Common Definitions */ +#include "nodeTimers.h" /* fpr ... Timer Service */ +#include "nodeUtil.h" /* for ... Common Utilities */ +#include "nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ +#include "mtcNodeMsg.h" /* for ... common maintenance messaging */ +#include "mtcNodeComp.h" /* for ... this module header */ +#include "regexUtil.h" /* for ... Regex and String utilities */ +extern "C" +{ +#include "amon.h" /* for ... active monitoring utilities */ +#include "rmon_api.h" /* for ... resource monitoring utilities */ + +} + +static ctrl_type ctrl ; +ctrl_type * get_ctrl_ptr ( void ) +{ + return (&ctrl); +} + +string get_who_i_am ( void ) +{ + return (ctrl.who_i_am) ; +} + +bool is_subfunction_compute ( void ) +{ + if ( ctrl.subfunction & COMPUTE_TYPE ) + return true ; + else + return false ; +} + +/* returns my hostname */ +string get_hostname ( void ) +{ + return ( &ctrl.hostname[0] ); +} + +/** + * Daemon Configuration Structure - The allocated struct + * @see daemon_common.h for daemon_config_type struct format. + */ +static daemon_config_type mtc_config ; +daemon_config_type * daemon_get_cfg_ptr () { return &mtc_config ; } + +/** + * Messaging Socket Control Struct - The allocated struct + * @see nodeBase.h for mtc_socket_type struct format. + */ +static mtc_socket_type mtc_sock ; +static mtc_socket_type * sock_ptr ; + + +int run_goenabled_scripts ( string type ); + +/* Looks up the timer ID and asserts the corresponding node's ringer */ +void timer_handler ( int sig, siginfo_t *si, void *uc) +{ + /* Avoid compiler errors/warnings */ + UNUSED(sig); + UNUSED(si); + UNUSED(uc); + + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + + if ( !(*tid_ptr) ) + { + return ; + } + else if ( *tid_ptr == ctrl.timer.tid ) + { + mtcTimer_stop_int_safe ( ctrl.timer ); + ctrl.timer.ring = true ; + } + else if ( *tid_ptr == ctrl.goenabled.timer.tid ) + { + mtcTimer_stop_int_safe ( ctrl.goenabled.timer ); + ctrl.goenabled.timer.ring = true ; + } + else if ( *tid_ptr == ctrl.hostservices.timer.tid ) + { + mtcTimer_stop_int_safe ( ctrl.hostservices.timer ); + ctrl.hostservices.timer.ring = true ; + } + else + { + mtcTimer_stop_tid_int_safe ( tid_ptr ); + } +} + +void _close_mgmnt_rx_socket ( void ) +{ + if ( mtc_sock.mtc_client_rx_socket ) + { + delete(mtc_sock.mtc_client_rx_socket); + mtc_sock.mtc_client_rx_socket = 0 ; + } +} + +void _close_infra_rx_socket ( void ) +{ + if ( mtc_sock.mtc_client_infra_rx_socket ) + { + delete(mtc_sock.mtc_client_infra_rx_socket); + mtc_sock.mtc_client_infra_rx_socket = 0 ; + } +} + +void _close_mgmnt_tx_socket ( void ) +{ + if (mtc_sock.mtc_client_tx_socket) + { + delete (mtc_sock.mtc_client_tx_socket); + mtc_sock.mtc_client_tx_socket = 0 ; + } +} + +void _close_infra_tx_socket ( void ) +{ + if (mtc_sock.mtc_client_infra_tx_socket) + { + delete (mtc_sock.mtc_client_infra_tx_socket); + mtc_sock.mtc_client_infra_tx_socket = 0 ; + } +} + +void _close_rmon_sock ( void ) +{ + if ( mtc_sock.rmon_socket ) + { + close (mtc_sock.rmon_socket); + mtc_sock.rmon_socket = 0 ; + } +} + +void _close_amon_sock ( void ) +{ + if ( mtc_sock.amon_socket ) + { + close (mtc_sock.amon_socket); + mtc_sock.amon_socket = 0 ; + } +} + +void daemon_exit ( void ) +{ + daemon_files_fini (); + + _close_mgmnt_rx_socket (); + _close_infra_rx_socket (); + _close_mgmnt_tx_socket (); + _close_infra_tx_socket (); + _close_rmon_sock (); + _close_amon_sock (); + + exit (0) ; +} + + +/* Startup config read */ +static int mtc_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("agent", "mtc_agent_port")) + { + config_ptr->mtc_agent_port = atoi(value); + config_ptr->mask |= CONFIG_AGENT_PORT ; + } + else if (MATCH("client", "mtc_rx_mgmnt_port")) + { + config_ptr->mtc_rx_mgmnt_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_MTC_MGMNT_PORT ; + } + else if (MATCH("client", "rmon_event_port")) + { + config_ptr->rmon_event_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_RMON_PORT ; + } + else if (MATCH("timeouts", "failsafe_shutdown_delay")) + { + config_ptr->failsafe_shutdown_delay = atoi(value); + ilog ("Shutdown TO : %d secs\n", config_ptr->failsafe_shutdown_delay ); + } + else + { + return (PASS); + } + return (FAIL); +} + +/* Read the mtc.ini file and load control */ +/* settings into the daemon configuration */ +int daemon_configure ( void ) +{ + int rc = FAIL ; + + /* Read the ini */ + mtc_config.mask = 0 ; + if (ini_parse(MTCE_CONF_FILE, mtc_config_handler, &mtc_config) < 0) + { + elog("Failed to load '%s'\n", MTCE_CONF_FILE ); + return (FAIL_LOAD_INI); + } + + get_debug_options ( MTCE_CONF_FILE, &mtc_config ); + + /* Verify loaded config against an expected mask + * as an ini file fault detection method */ + if ( mtc_config.mask != CONFIG_CLIENT_MASK ) + { + elog ("Failed Compute Mtc Configuration (%x)\n", + (( -1 ^ mtc_config.mask ) & CONFIG_CLIENT_MASK) ); + rc = FAIL_INI_CONFIG ; + } + + else + { + ilog("Agent Mgmnt : %d (tx)\n", mtc_config.mtc_agent_port ); + ilog("Client Mgmnt: %d (rx)\n", mtc_config.mtc_rx_mgmnt_port ); + + if (ini_parse(MTCE_CONF_FILE, client_timeout_handler, &mtc_config) < 0) + { + elog ("Can't load '%s'\n", MTCE_CONF_FILE ); + return (FAIL_LOAD_INI); + } + + rc = PASS ; + } + return (rc); +} + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +void setup_mgmnt_rx_socket ( void ) +{ + dlog ("setup of mgmnt RX\n"); + ctrl.mgmnt_iface = daemon_mgmnt_iface() ; + ctrl.mgmnt_iface = daemon_get_iface_master ((char*)ctrl.mgmnt_iface.data()); + + if ( ! ctrl.mgmnt_iface.empty() ) + { + ilog("Mgmnt iface : %s\n", ctrl.mgmnt_iface.c_str() ); + get_iface_macaddr ( ctrl.mgmnt_iface.data(), ctrl.macaddr ); + get_iface_address ( ctrl.mgmnt_iface.data(), ctrl.address , true ); + get_hostname ( &ctrl.hostname[0], MAX_HOST_NAME_SIZE ); + + _close_mgmnt_rx_socket (); + mtc_sock.mtc_client_rx_socket = new msgClassRx(ctrl.address.c_str(),mtc_sock.mtc_cmd_port, IPPROTO_UDP, ctrl.mgmnt_iface.data(), false ); + + /* update health of socket */ + if ( mtc_sock.mtc_client_rx_socket ) + { + /* look for fault insertion request */ + if ( daemon_is_file_present ( MTC_CMD_FIT__MGMNT_RXSOCK ) ) + mtc_sock.mtc_client_rx_socket->return_status = FAIL ; + + if ( mtc_sock.mtc_client_rx_socket->return_status == PASS ) + { + mtc_sock.mtc_client_rx_socket->sock_ok (true); + } + else + { + elog ("failed to init 'management rx' socket (rc:%d)\n", + mtc_sock.mtc_client_rx_socket->return_status ); + mtc_sock.mtc_client_rx_socket->sock_ok (false); + } + } + } +} + + +void setup_infra_rx_socket ( void ) +{ + dlog ("setup of infra RX\n"); + /* Fetch the infrastructure interface name. + * calls daemon_get_iface_master inside so the + * aggrigated name is returned if it exists */ + get_infra_iface (&mtc_config.infra_iface ); + if ( strlen(mtc_config.infra_iface) ) + { + /* Only get the infrastructure network address if it is provisioned */ + if ( get_iface_address ( mtc_config.infra_iface, ctrl.address_infra, false ) == PASS ) + { + ctrl.infra_iface_provisioned = true ; + ilog ("Infra iface : %s\n", mtc_config.infra_iface ); + ilog ("Infra addr : %s\n", ctrl.address_infra.c_str()); + } + } + if ( !ctrl.address_infra.empty() ) + { + _close_infra_rx_socket (); + + /* Only set up the socket if an infra interface is provisioned */ + mtc_sock.mtc_client_infra_rx_socket = new msgClassRx(ctrl.address_infra.c_str(),mtc_sock.mtc_cmd_port, IPPROTO_UDP, ctrl.infra_iface.data(), false ); + + /* update health of socket */ + if ( mtc_sock.mtc_client_infra_rx_socket ) + { + /* look for fault insertion request */ + if ( daemon_is_file_present ( MTC_CMD_FIT__INFRA_RXSOCK ) ) + mtc_sock.mtc_client_infra_rx_socket->return_status = FAIL ; + + if ( mtc_sock.mtc_client_infra_rx_socket->return_status == PASS ) + { + mtc_sock.mtc_client_infra_rx_socket->sock_ok (true); + } + else + { + elog ("failed to init 'infrastructure rx' socket (rc:%d)\n", + mtc_sock.mtc_client_infra_rx_socket->return_status ); + mtc_sock.mtc_client_infra_rx_socket->sock_ok (false); + } + } + } +} + +void setup_mgmnt_tx_socket ( void ) +{ + dlog ("setup of mgmnt TX\n"); + _close_mgmnt_tx_socket (); + mtc_sock.mtc_client_tx_socket = new msgClassTx(CONTROLLER,mtc_sock.mtc_agent_port, IPPROTO_UDP, ctrl.mgmnt_iface.data()); + + if ( mtc_sock.mtc_client_tx_socket ) + { + /* look for fault insertion request */ + if ( daemon_is_file_present ( MTC_CMD_FIT__MGMNT_TXSOCK ) ) + mtc_sock.mtc_client_tx_socket->return_status = FAIL ; + + if ( mtc_sock.mtc_client_tx_socket->return_status == PASS ) + { + mtc_sock.mtc_client_tx_socket->sock_ok(true); + } + else + { + elog ("failed to init 'management tx' socket (rc:%d)\n", + mtc_sock.mtc_client_tx_socket->return_status ); + mtc_sock.mtc_client_tx_socket->sock_ok(false); + } + } +} + +void setup_infra_tx_socket ( void ) +{ + ilog ("setup of infra TX\n"); + if ( ctrl.infra_iface_provisioned == false ) + { + return ; + } + + _close_infra_tx_socket (); + mtc_sock.mtc_client_infra_tx_socket = new msgClassTx(CONTROLLER_NFS,mtc_sock.mtc_agent_port, IPPROTO_UDP, mtc_config.infra_iface); + + if ( mtc_sock.mtc_client_infra_tx_socket ) + { + /* look for fault insertion request */ + if ( daemon_is_file_present ( MTC_CMD_FIT__INFRA_TXSOCK ) ) + mtc_sock.mtc_client_infra_tx_socket->return_status = FAIL ; + + if ( mtc_sock.mtc_client_infra_tx_socket->return_status == PASS ) + { + mtc_sock.mtc_client_infra_tx_socket->sock_ok(true); + } + else + { + elog ("failed to init 'infrastructure tx' socket (rc:%d)\n", + mtc_sock.mtc_client_infra_tx_socket->return_status ); + mtc_sock.mtc_client_infra_tx_socket->sock_ok(false); + } + } +} + + +void setup_amon_socket ( void ) +{ + char filename [MAX_FILENAME_LEN] ; + string port_string ; + + snprintf ( filename , MAX_FILENAME_LEN, "%s/%s.conf", PMON_CONF_FILE_DIR, program_invocation_short_name ) ; + + if ( ini_get_config_value ( filename, "process", "port", port_string , false ) != PASS ) + { + elog ("failed to get active monitor port from %s\n", filename ); + mtc_sock.amon_socket = 0 ; + return ; + } + + mtc_sock.amon_socket = + active_monitor_initialize ( program_invocation_short_name, atoi(port_string.data())); + if ( mtc_sock.amon_socket ) + { + int val = 1; + + /* Make the active monitor socket non-blocking */ + if ( 0 > ioctl(mtc_sock.amon_socket, FIONBIO, (char *)&val) ) + { + elog ("Failed to set amon socket non-blocking\n"); + close (mtc_sock.amon_socket); + } + else + { + ilog ("Active Monitor Socket %d\n", mtc_sock.amon_socket ); + return ; + } + } + mtc_sock.amon_socket = 0 ; +} + +void setup_rmon_socket ( void ) +{ + mtc_sock.rmon_socket = + resource_monitor_initialize ( program_invocation_short_name, mtc_config.rmon_event_port, ALL_USAGE ); + if ( mtc_sock.rmon_socket ) + { + int val = 1; + + /* Make the active monitor socket non-blocking */ + if ( 0 > ioctl(mtc_sock.rmon_socket, FIONBIO, (char *)&val) ) + { + elog ("failed to set rmon event port non-blocking (%d:%s),\n", errno, strerror(errno)); + close ( mtc_sock.rmon_socket ); + } + else + { + ilog ("Resource Monitor Socket %d\n", mtc_sock.rmon_socket ); + return ; + } + } + else + { + elog ("failed to register as client with rmond\n"); + } + mtc_sock.rmon_socket = 0 ; +} + +/****************************************************************** + * + * Construct the messaging sockets + * + * 1. Unicast receive socket mgmnt (mtc_client_rx_socket) + * 2. Unicast receive socket infra (mtc_client_infra_rx_socket) + * 3. Unicast transmit socket mgmnt (mtc_client_tx_socket) + * 4. Unicast transmit socket infra (mtc_client_infra_tx_socket) + * + * 5. socket for pmond acive monitoring + * 6. socket to receive rmond events (including AVS) + * + *******************************************************************/ +int mtc_socket_init ( void ) +{ + /* Setup the Management Interface Recieve Socket */ + /* Read the port config strings into the socket struct */ + mtc_sock.mtc_agent_port = mtc_config.mtc_agent_port; + mtc_sock.mtc_cmd_port = mtc_config.mtc_rx_mgmnt_port; + + ctrl.mtcAgent_ip = getipbyname ( CONTROLLER ); + ilog ("Controller : %s\n", ctrl.mtcAgent_ip.c_str()); + + /************************************************************/ + /* Setup the Mgmnt Interface Receive Socket */ + /************************************************************/ + setup_mgmnt_rx_socket (); + + /************************************************************/ + /* Setup the Infra Interface Receive Socket */ + /************************************************************/ + setup_infra_rx_socket () ; + + /************************************************************/ + /* Setup the Mgmnt Interface Transmit messaging to mtcAgent */ + /************************************************************/ + setup_mgmnt_tx_socket (); + + /*************************************************************/ + /* Setup the Infra Interface Transmit Messaging to mtcAgent */ + /*************************************************************/ + setup_infra_tx_socket () ; + + /*************************************************************/ + /* Setup and Open the active monitoring socket */ + /*************************************************************/ + setup_amon_socket (); + + /*************************************************************/ + /* Setup and Open the resource monitor event socket */ + /*************************************************************/ + setup_rmon_socket (); + + return (PASS); +} + +/**************************************************************************************** + * + * Build up an 'identity' string to be included in the periodic mtcAlive message. + * + * hostname + * personality + * mac address + * mgmnt ip address + * infra ip address + * + ***************************************************************************************/ +string _self_identify ( string nodetype ) +{ + string hostname = &ctrl.hostname[0]; + + /* Build up the identity string for return to caller */ + ctrl.who_i_am = "{\"hostname\":\""; + ctrl.who_i_am.append( hostname.data() ); + ctrl.who_i_am.append( "\""); + + ctrl.who_i_am.append(",\"personality\":\""); + ctrl.who_i_am.append( nodetype.data() ); + ctrl.who_i_am.append( "\""); + + ctrl.who_i_am.append( ",\"mgmt_ip\":\""); + ctrl.who_i_am.append( ctrl.address.data() ); + ctrl.who_i_am.append( "\""); + + ctrl.who_i_am.append( ",\"infra_ip\":\""); + ctrl.who_i_am.append( ctrl.address_infra.data() ); + ctrl.who_i_am.append( "\""); + + ctrl.who_i_am.append( ",\"mgmt_mac\":\""); + ctrl.who_i_am.append( ctrl.macaddr.data() ); + ctrl.who_i_am.append( "\""); + + ilog ("Identity : %s\n", ctrl.who_i_am.c_str() ); + return ( ctrl.who_i_am ); +} + +/* Init a specific script 'execution' struct */ +void script_exec_init ( script_exec_type * script_exec_ptr ) +{ + if ( script_exec_ptr ) + { + script_exec_ptr->pid = 0 ; + script_exec_ptr->done = false ; + script_exec_ptr->status = -1 ; + script_exec_ptr->name.clear() ; + } +} + +/* Init a specific script 'control' struct */ +void script_ctrl_init ( script_ctrl_type * script_ctrl_ptr ) +{ + if ( script_ctrl_ptr ) + { + script_ctrl_ptr->posted = MTC_CMD_NONE ; + script_ctrl_ptr->monitor = MTC_CMD_NONE ; + script_ctrl_ptr->scripts = 0 ; + script_ctrl_ptr->scripts_done = 0 ; + } +} + +/**************************************************************************** + * + * Name : _scripts_cleanup + * + * Description: For the specified script group ... + * + * Kill off any scripts that are still running and + * clear active flag. + * + ****************************************************************************/ +void _scripts_cleanup ( script_set_enum script_set ) +{ + script_ctrl_type * script_ptr ; + switch ( script_set ) + { + case GOENABLED_MAIN_SCRIPTS: + case GOENABLED_SUBF_SCRIPTS: + script_ptr = &ctrl.goenabled ; + break ; + case HOSTSERVICES_SCRIPTS: + script_ptr = &ctrl.hostservices ; + break ; + default: + slog ("invalid script set (%d)\n", script_set ); + return ; + } + + mtcTimer_reset ( script_ptr->timer ); + + /* loop over looking to see if all the scripts are done */ + for ( int i = 0 ; i < script_ptr->scripts ; i++ ) + { + if (( script_ptr->script[i].pid ) && ( script_ptr->script[i].done == false )) + { + int result = kill ( script_ptr->script[i].pid, 0 ); + if ( result == 0 ) + { + result = kill ( script_ptr->script[i].pid, SIGKILL ); + if ( result == 0 ) + { + wlog ("kill of %s with pid %d succeeded\n", script_ptr->script[i].name.c_str(), script_ptr->script[i].pid ); + } + else + { + elog ("kill of %s with pid %d failed\n", script_ptr->script[i].name.c_str(), script_ptr->script[i].pid ); + } + } + } + script_exec_init ( &script_ptr->script[i] ); + } + + script_ctrl_init ( script_ptr ); + ctrl.active_script_set = NO_SCRIPTS ; +} + +/**************************************************************************** + * + * Name : _manage_services_scripts + * + * Description: Looks for 3 conditions. + * + * 1. done - all scripts executed - PASS or FAIL_xxxxxx + * 2. timeout - scripts took too long to complete - FAIL_TIMEOUT + * 3. empty - no scripts to run or manage - auto PASS + * + * When done, timeout or empty sends appropriate result + * to mtcAgent. + * + ***************************************************************************/ +void _manage_services_scripts ( void ) +{ + bool failed = false ; + char str [BUF_SIZE] ; + + if ( ! ctrl.hostservices.scripts ) + { + /* send a PASS result */ + mtce_send_event ( sock_ptr, MTC_CMD_HOST_SVCS_RESULT, NULL ); + return ; + } + + memset (str,0,BUF_SIZE); + + /* do if all the scripts are done ? */ + if ( ctrl.hostservices.scripts_done == ctrl.hostservices.scripts ) + { + /* loop over looking to see if all the scripts are done */ + for ( int i = 0 ; i < ctrl.hostservices.scripts ; i++ ) + { + if ( ctrl.hostservices.script[i].status ) + { + if ( failed == false ) + { + /* only report of the first failure */ + snprintf(str, BUF_SIZE, "%s failed ; rc:%d", + ctrl.hostservices.script[i].name.data(), + ctrl.hostservices.script[i].status ); + failed = true ; + } + } + } + /* handle the aggrigate status */ + if ( failed == true ) + { + elog ("Host Services: %s\n", str ); + mtce_send_event ( sock_ptr, MTC_CMD_HOST_SVCS_RESULT, str ); + } + else + { + ilog ("Host Services Complete ; all passed\n"); + mtce_send_event ( sock_ptr, MTC_CMD_HOST_SVCS_RESULT, NULL ); + } + } + + /* do if have we timed out ? */ + else if ( ctrl.hostservices.timer.ring == true ) + { + bool found = false ; + if ( ctrl.hostservices.posted || ctrl.hostservices.monitor ) + { + snprintf(str, BUF_SIZE, "unknown test (timeout)"); + for ( int i = 0 ; i < ctrl.hostservices.scripts ; i++ ) + { + if ( ctrl.hostservices.script[i].done == false ) + { + snprintf(str, BUF_SIZE, "%s (timeout)", ctrl.hostservices.script[i].name.data() ); + found = true ; + wlog ("host services timeout on %s\n", ctrl.hostservices.script[i].name.c_str()); + mtce_send_event ( sock_ptr, MTC_CMD_HOST_SVCS_RESULT, str ); + break ; + } + } + } + + if ( found == false ) + { + slog ("unexpected host services timer ring (cmd:%x)", ctrl.hostservices.posted ); + } + } + else + { + return ; + } + + _scripts_cleanup (ctrl.active_script_set) ; +} + +/**************************************************************************** + * + * Name : _manage_goenabled_tests + * + * Description: Looks for 3 conditions. + * + * 1. done - all scripts executed - PASS or FAIL_xxxxxx + * 2. timeout - scripts took too long to complete - FAIL_TIMEOUT + * 3. empty - no scripts to run or manage - auto PASS + * + * When done, timeout or empty sends appropriate result + * to mtcAgent. + * + ***************************************************************************/ + +void _manage_goenabled_tests ( void ) +{ + bool failed = false ; + char str [BUF_SIZE] ; + memset (str,0,BUF_SIZE); + + if ( ! ctrl.goenabled.scripts ) + { + switch ( ctrl.active_script_set ) + { + case GOENABLED_SUBF_SCRIPTS: + { + time ( &ctrl.goenabled_subf_time ); + daemon_remove_file ( GOENABLED_SUBF_FAIL ); + + ilog ("GoEnabled Subfunction Testing Complete ; no tests to run\n"); + daemon_log ( GOENABLED_SUBF_PASS , str ); + send_mtc_msg ( sock_ptr, MTC_MSG_SUBF_GOENABLED, "" ); + break ; + } + case GOENABLED_MAIN_SCRIPTS: + { + time ( &ctrl.goenabled_main_time ); + daemon_remove_file ( GOENABLED_MAIN_FAIL ); + + ilog ("GoEnabled Testing Complete ; no tests to run\n"); + daemon_log ( GOENABLED_MAIN_PASS , str ); + send_mtc_msg ( sock_ptr, MTC_MSG_MAIN_GOENABLED, ""); + break ; + } + default: + slog ("called with invalid active script set (%d)\n", + ctrl.active_script_set ); + } + return ; + } + + if ( ctrl.goenabled.scripts_done == ctrl.goenabled.scripts ) + { + /* loop over looking to see if all the scripts are done */ + for ( int i = 0 ; i < ctrl.goenabled.scripts ; i++ ) + { + if ( ctrl.goenabled.script[i].status ) + { + if ( failed == false ) + { + snprintf(str, BUF_SIZE, "%s (rc:%d)", + ctrl.goenabled.script[i].name.data(), + ctrl.goenabled.script[i].status ); + failed = true ; + } + wlog ("goenabled test %s FAILED with exit status :%d:%x\n", + ctrl.goenabled.script[i].name.c_str(), + ctrl.goenabled.script[i].status, + ctrl.goenabled.script[i].status); + } + } + if ( failed == true ) + { + switch ( ctrl.active_script_set ) + { + case GOENABLED_SUBF_SCRIPTS: + { + time ( &ctrl.goenabled_subf_time ); + daemon_remove_file ( GOENABLED_SUBF_PASS ); + + ilog ("GoEnabled Subfunction Testing Failed ; at least one test failed\n"); + daemon_log ( GOENABLED_SUBF_FAIL , str ); + send_mtc_msg ( sock_ptr, MTC_MSG_SUBF_GOENABLED_FAILED, str ); + break ; + } + case GOENABLED_MAIN_SCRIPTS: + { + time ( &ctrl.goenabled_main_time ); + daemon_remove_file ( GOENABLED_MAIN_PASS ); + + ilog ("GoEnabled Testing Failed ; at least one test failed\n"); + daemon_log ( GOENABLED_MAIN_FAIL , str ); + send_mtc_msg ( sock_ptr, MTC_MSG_MAIN_GOENABLED_FAILED, str ); + break ; + } + default: + slog ("called with invalid active script set (%d)\n", + ctrl.active_script_set ); + } + } + else + { + switch ( ctrl.active_script_set ) + { + case GOENABLED_SUBF_SCRIPTS: + { + time ( &ctrl.goenabled_subf_time ); + + ilog ("GoEnabled Subfunction Testing Complete ; all tests passed\n"); + daemon_log ( GOENABLED_SUBF_PASS , str ); + send_mtc_msg ( sock_ptr, MTC_MSG_SUBF_GOENABLED, "" ); + break ; + } + case GOENABLED_MAIN_SCRIPTS: + { + time ( &ctrl.goenabled_main_time ); + + ilog ("GoEnabled Testing Complete ; all tests passed\n"); + daemon_log ( GOENABLED_MAIN_PASS , str ); + send_mtc_msg ( sock_ptr, MTC_MSG_MAIN_GOENABLED, ""); + break ; + } + default: + slog ("called with invalid active script set (%d)\n", + ctrl.active_script_set ); + } + /* fall through to cleanup */ + } + mtcTimer_reset ( ctrl.goenabled.timer ); + } + + else if ( ctrl.goenabled.timer.ring == true ) + { + bool found = false ; + snprintf(str, BUF_SIZE, "unknown test (timeout)"); + for ( int i = 0 ; i < ctrl.goenabled.scripts ; i++ ) + { + if ( ctrl.goenabled.script[i].done == false ) + { + snprintf(str, BUF_SIZE, "%s (timeout)", ctrl.goenabled.script[i].name.data() ); + found = true ; + wlog ("goenabled test timeout - %s\n", ctrl.goenabled.script[i].name.c_str()); + break ; + } + } + if ( found == false ) + { + slog ("unexpected goenabled timer ring (%x)", ctrl.goenabled.posted ); + } + + switch ( ctrl.active_script_set ) + { + case GOENABLED_SUBF_SCRIPTS: + { + daemon_remove_file ( GOENABLED_SUBF_PASS ); + send_mtc_msg ( sock_ptr, MTC_MSG_SUBF_GOENABLED_FAILED, str ); + daemon_log ( GOENABLED_SUBF_FAIL , str ); + break ; + } + case GOENABLED_MAIN_SCRIPTS: + { + daemon_remove_file ( GOENABLED_SUBF_PASS ); + send_mtc_msg ( sock_ptr, MTC_MSG_MAIN_GOENABLED_FAILED, str ); + daemon_log ( GOENABLED_MAIN_FAIL , str ); + break ; + } + default: + slog ("called with invalid active script set (%d)\n", + ctrl.active_script_set ); + } + } + else + { + return ; + } + _scripts_cleanup (ctrl.active_script_set) ; +} + + +/* The main service loop */ +int daemon_init ( string iface, string nodetype_str ) +{ + int rc = PASS ; + + ctrl.nodetype_str = nodetype_str ; + + ctrl.who_i_am = "" ; + ctrl.macaddr = "" ; + ctrl.address = "" ; + ctrl.address_infra = "" ; + ctrl.mtcAgent_ip = ""; + ctrl.function = 0 ; + ctrl.subfunction = 0 ; + ctrl.system_type = daemon_system_type (); + ctrl.infra_iface_provisioned = false ; + + /* convert node type to integer */ + ctrl.nodetype = get_host_function_mask ( nodetype_str ) ; + ilog ("Node Type : %s (%d:%x)\n", nodetype_str.c_str(), ctrl.nodetype, ctrl.nodetype); + + /* Initialize socket construct and pointer to it */ + memset ( &mtc_sock, 0, sizeof(mtc_sock)); + sock_ptr = &mtc_sock ; + + /* Assign interface to config */ + ctrl.mgmnt_iface = iface ; + + if ( daemon_files_init () != PASS ) + { + printf ("Pid, log or other files could not be opened\n"); + rc = FAIL_FILES_INIT ; + } + + /* Bind signal handlers */ + else if ( daemon_signal_init () != PASS ) + { + elog ("daemon_signal_init failed\n"); + rc = FAIL_SIGNAL_INIT ; + } + + /* Configure the compute */ + else if ( (rc = daemon_configure ()) != PASS ) + { + elog ("Daemon service configuration failed (rc:%i)\n", rc ); + return (FAIL_DAEMON_CONFIG) ; + } + + else if ( set_host_functions ( nodetype_str, &ctrl.nodetype, + &ctrl.function, + &ctrl.subfunction ) != PASS ) + { + elog ("failed to extract nodetype info\n"); + rc = FAIL_NODETYPE; + } + + /* Setup the heartbeat service messaging sockets */ + if ( (rc = mtc_socket_init ()) != PASS ) + { + elog ("socket initialization failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT ; + } + + /* Get my hostname and ip address */ + /* Should not return from this call without an IP address */ + string who_i_am = _self_identify ( ctrl.nodetype_str ); + + mtcTimer_init ( ctrl.timer, &ctrl.hostname[0] , "mtc timer" ); + mtcTimer_init ( ctrl.goenabled.timer, &ctrl.hostname[0], "goenable timer" ); + mtcTimer_init ( ctrl.hostservices.timer, &ctrl.hostname[0], "host services timer" ); + + /* initialize the script group control structures */ + script_ctrl_init ( &ctrl.goenabled ); + script_ctrl_init ( &ctrl.hostservices ); + for ( int i = 0 ; i < MAX_RUN_SCRIPTS ; i++ ) + { + script_exec_init ( &ctrl.goenabled.script[i] ); + script_exec_init ( &ctrl.hostservices.script[i] ); + } + + ctrl.active_script_set = NO_SCRIPTS ; + + /* default genabled time struct */ + time (&ctrl.goenabled_main_time); + time (&ctrl.goenabled_subf_time); + + /* Clear and then populate the script sets need to run on daemon startup */ + ctrl.posted_script_set.clear(); + + /* Only automatically run the main goenabled tests on process start-up + * if they have not already been run. This then handles mtcClient + * restart in the no-reboot patching case */ + if ( daemon_is_file_present ( GOENABLED_MAIN_PASS ) == false ) + { + ctrl.posted_script_set.push_front(GOENABLED_MAIN_SCRIPTS); + } + + return (rc) ; +} + +int select_log_count = 0 ; +void daemon_service_run ( void ) +{ + int rmon_code; + string resource_name; + + int rc = PASS ; + int infra_retry_count = 0 ; + int file_not_present_count = 0 ; + + /* Start mtcAlive message timer */ + /* Send first mtcAlive ASAP */ + mtcTimer_start ( ctrl.timer, timer_handler, 1 ); + + /* lets go select so that the sock does not go crazy */ + dlog ("%s running main loop with %d msecs socket timeout\n", + &ctrl.hostname[0], (SOCKET_WAIT/1000) ); + + std::list socks ; + + /* Run heartbeat service forever or until stop condition */ + for ( ; ; ) + { + /* set the master fd_set */ + FD_ZERO(&mtc_sock.readfds); + socks.clear(); + + if ( mtc_sock.mtc_client_rx_socket && mtc_sock.mtc_client_rx_socket->return_status==PASS ) + { + socks.push_front (mtc_sock.mtc_client_rx_socket->getFD()); + FD_SET(mtc_sock.mtc_client_rx_socket->getFD(), &mtc_sock.readfds); + } + + if ( !ctrl.address_infra.empty() ) + { + if ( mtc_sock.mtc_client_infra_rx_socket && mtc_sock.mtc_client_infra_rx_socket->return_status==PASS ) + { + socks.push_front (mtc_sock.mtc_client_infra_rx_socket->getFD()); + FD_SET(mtc_sock.mtc_client_infra_rx_socket->getFD(), &mtc_sock.readfds); + } + } + + mtc_sock.amon_socket = active_monitor_get_sel_obj (); + if ( mtc_sock.amon_socket ) + { + socks.push_front (mtc_sock.amon_socket); + FD_SET(mtc_sock.amon_socket, &mtc_sock.readfds); + } + + mtc_sock.rmon_socket = resource_monitor_get_sel_obj (); + if ( mtc_sock.rmon_socket ) + { + socks.push_front (mtc_sock.rmon_socket); + FD_SET(mtc_sock.rmon_socket, &mtc_sock.readfds); + } + + /* Initialize the timeval struct to wait for 50 mSec */ + mtc_sock.waitd.tv_sec = 0; + mtc_sock.waitd.tv_usec = SOCKET_WAIT; + + /* Call select() and wait only up to SOCKET_WAIT */ + socks.sort(); + +#ifdef WANT_SELECTS + ilog_throttled ( select_log_count, 200 , "Selects: mgmnt:%d infra:%d amon:%d rmon:%d - Size:%ld First:%d Last:%d\n", + mtc_sock.mtc_client_rx_socket, + mtc_sock.mtc_client_infra_rx_socket, + mtc_sock.amon_socket, + mtc_sock.rmon_socket, + socks.size(), socks.front(), socks.back()); +#endif + + rc = select( socks.back()+1, + &mtc_sock.readfds, NULL, NULL, + &mtc_sock.waitd); + + /* If the select time out expired then */ + if (( rc < 0 ) || ( rc == 0 )) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if (( rc < 0 ) && ( errno != EINTR )) + { + elog ("Select Failed (rc:%d) %s \n", errno, strerror(errno)); + } + } + else + { + if ((mtc_sock.mtc_client_rx_socket && mtc_sock.mtc_client_rx_socket->return_status==PASS) && FD_ISSET(mtc_sock.mtc_client_rx_socket->getFD(), &mtc_sock.readfds)) + { + mtc_service_command ( sock_ptr, MGMNT_INTERFACE ); + } + if ((mtc_sock.mtc_client_infra_rx_socket && mtc_sock.mtc_client_infra_rx_socket->return_status==PASS) && FD_ISSET(mtc_sock.mtc_client_infra_rx_socket->getFD(), &mtc_sock.readfds)) + { + if ( !ctrl.address_infra.empty() ) + { + mtc_service_command ( sock_ptr, INFRA_INTERFACE ); + } + } + if ( FD_ISSET(mtc_sock.amon_socket, &mtc_sock.readfds)) + { + dlog3 ("Active Monitor Select Fired\n"); + active_monitor_dispatch (); + } + if ( FD_ISSET(mtc_sock.rmon_socket, &mtc_sock.readfds)) + { + dlog3 ("Resource Monitor Select Fired\n"); + rc = service_rmon_inbox( sock_ptr, rmon_code, resource_name ); + + if (rc == PASS) { + + switch ( rmon_code ) { + case RMON_CLEAR: + mtce_send_event ( sock_ptr, MTC_EVENT_RMON_CLEAR, resource_name.c_str() ); + break; + + case RMON_MINOR: + mtce_send_event ( sock_ptr, MTC_EVENT_RMON_MINOR, resource_name.c_str() ); + break; + + case RMON_MAJOR: + mtce_send_event ( sock_ptr, MTC_EVENT_RMON_MAJOR, resource_name.c_str() ); + break; + + case RMON_CRITICAL: + mtce_send_event ( sock_ptr, MTC_EVENT_RMON_CRIT, resource_name.c_str() ); + break; + case MTC_EVENT_AVS_CLEAR: + case MTC_EVENT_AVS_MAJOR: + case MTC_EVENT_AVS_CRITICAL: + mtce_send_event ( sock_ptr, rmon_code, "" ); + break; + default: + break; + } + + } + } + } + + /* retry getting the infra ip address if its not already provisioned */ + if ( ctrl.infra_iface_provisioned == false ) + { + if ( infra_retry_count++ > 100 ) + { + if ( strlen(mtc_config.infra_iface) ) + { + ilog ("Retrying Interface %s\n", mtc_config.infra_iface ) + /* Only get the infrastructure network address if it is provisioned */ + rc = get_iface_address ( mtc_config.infra_iface, ctrl.address_infra, false ); + if ( rc == PASS ) + { + ctrl.infra_iface_provisioned = true ; + } + } + infra_retry_count = 0 ; + } + } + + + if (( ctrl.active_script_set == GOENABLED_MAIN_SCRIPTS ) || + ( ctrl.active_script_set == GOENABLED_SUBF_SCRIPTS )) + { + _manage_goenabled_tests ( ); + } + else if ( ctrl.active_script_set == HOSTSERVICES_SCRIPTS ) + { + _manage_services_scripts ( ); + } + /* now service posted requests */ + else if ( ctrl.active_script_set == NO_SCRIPTS ) + { + if ( ! ctrl.posted_script_set.empty() ) + { + /* get the next script set to execute */ + ctrl.active_script_set = ctrl.posted_script_set.front(); + if ( ctrl.active_script_set == GOENABLED_MAIN_SCRIPTS ) + { + if (( daemon_is_file_present ( CONFIG_COMPLETE_FILE )) && + ( daemon_is_file_present ( GOENABLED_MAIN_READY ))) + { + ctrl.posted_script_set.pop_front(); + if (( rc = run_goenabled_scripts ( "self-test" )) != PASS ) + { + if ( rc == RETRY ) + { + ilog ("main goenable testing already in progress\n"); + } + else + { + elog ("main goenable test start failed (rc:%d)\n", rc ); + } + } + else + { + ilog ("main goenable tests started\n"); + } + file_not_present_count = 0 ; + } + else + { + ctrl.active_script_set = NO_SCRIPTS ; + ilog_throttled (file_not_present_count, 10000, + "waiting on goenable gates (%s and %s)\n", + CONFIG_COMPLETE_FILE, + GOENABLED_MAIN_READY ); + } + } + else if ( ctrl.active_script_set == GOENABLED_SUBF_SCRIPTS ) + { + if (( daemon_is_file_present ( CONFIG_COMPLETE_COMPUTE )) && + ( daemon_is_file_present ( GOENABLED_SUBF_READY ))) + { + ctrl.posted_script_set.pop_front(); + if ( run_goenabled_scripts ( "self-test" ) != PASS ) + { + if ( rc == RETRY ) + { + ilog ("subf goenable testing already in progress\n"); + } + else + { + elog ("subf goenable test start failed (rc:%d)\n", rc ); + } + } + else + { + ilog ("subf goenable tests started\n"); + } + file_not_present_count = 0 ; + } + else + { + ctrl.active_script_set = NO_SCRIPTS ; + ilog_throttled (file_not_present_count, 10000, + "waiting on subfuction goenable gate %s\n", + GOENABLED_SUBF_READY); + } + } + else if ( ctrl.active_script_set == HOSTSERVICES_SCRIPTS ) + { + ctrl.posted_script_set.pop_front(); + /* Handle running the host services scripts. */ + if ( is_host_services_cmd ( ctrl.hostservices.posted )) + { + if (( rc = run_hostservices_scripts ( ctrl.hostservices.posted )) != PASS ) + { + char str[BUF_SIZE] ; + memset (str,0,BUF_SIZE); + snprintf ( str, BUF_SIZE, "%s rc:%d", "launch failed", rc ); + elog ("%s scripts failed (rc:%d)\n", get_mtcNodeCommand_str(ctrl.hostservices.posted), rc ); + ctrl.hostservices.posted = MTC_CMD_NONE ; + + /* send error message */ + mtce_send_event ( sock_ptr, MTC_CMD_HOST_SVCS_RESULT, str ); + } + } + else + { + ctrl.hostservices.monitor = ctrl.hostservices.posted ; + ctrl.hostservices.posted = MTC_CMD_NONE ; + } + } + else + { + slog ("invalid script set (%d)\n", ctrl.active_script_set ); + } + } + } + if ( ctrl.timer.ring == true ) + { + bool socket_reinit = true ; + + /** + * Look for failing sockets and try to recover them, + * but only one at a time if there are multiple failing. + * Priority is the command receiver, thehn transmitter, + * followed by the infra and others. + **/ + + /* Mgmnt Rx */ + if (( mtc_sock.mtc_client_rx_socket == NULL ) || + ( mtc_sock.mtc_client_rx_socket->sock_ok() == false )) + { + setup_mgmnt_rx_socket(); + wlog ("calling setup_mgmnt_rx_socket (auto-recovery)\n"); + socket_reinit = true ; + } + + /* Mgmnt Tx */ + else if (( mtc_sock.mtc_client_tx_socket == NULL ) || + ( mtc_sock.mtc_client_tx_socket->sock_ok() == false )) + { + setup_mgmnt_tx_socket(); + wlog ("calling setup_mgmnt_tx_socket\n"); + socket_reinit = true ; + } + + /* Infra Rx */ + else if (( ctrl.infra_iface_provisioned == true ) && + (( mtc_sock.mtc_client_infra_rx_socket == NULL ) || + ( mtc_sock.mtc_client_infra_rx_socket->sock_ok() == false ))) + { + setup_infra_rx_socket(); + wlog ("calling setup_infra_rx_socket (auto-recovery)\n"); + socket_reinit = true ; + } + + /* Infra Tx */ + else if (( ctrl.infra_iface_provisioned == true ) && + (( mtc_sock.mtc_client_infra_tx_socket == NULL ) || + ( mtc_sock.mtc_client_infra_tx_socket->sock_ok() == false ))) + { + setup_infra_tx_socket(); + wlog ("calling setup_infra_tx_socket (auto-recovery)\n"); + socket_reinit = true ; + } + + /* RMON event notifications */ + else if ( mtc_sock.rmon_socket <= 0 ) + { + setup_rmon_socket (); + wlog ("calling setup_rmon_socket (auto-recovery)\n"); + socket_reinit = true ; + } + + else if ( mtc_sock.amon_socket <= 0 ) + { + setup_amon_socket (); + wlog ("calling setup_amon_socket (auto-recovery)\n"); + socket_reinit = true ; + } + else + { + socket_reinit = false ; + } + + if ( socket_reinit ) + { + /* re-get identity if interfaces are re-initialized */ + string who_i_am = _self_identify ( ctrl.nodetype_str ); + } + + send_mtcAlive_msg ( sock_ptr, ctrl.who_i_am, MGMNT_INTERFACE ); + send_mtcAlive_msg ( sock_ptr, ctrl.who_i_am, INFRA_INTERFACE ); + + /* Re-Start mtcAlive message timer */ + mtcTimer_start ( ctrl.timer, timer_handler, MTC_ALIVE_TIMER ); + + if ( daemon_is_file_present ( MTC_CMD_FIT__DIR ) ) + { + /* fault insertion testing */ + if ( daemon_is_file_present ( MTC_CMD_FIT__MGMNT_RXSOCK )) + { + if ( mtc_sock.mtc_client_rx_socket ) + { + mtc_sock.mtc_client_rx_socket->sock_ok (false); + _close_mgmnt_rx_socket(); + } + } + if ( daemon_is_file_present ( MTC_CMD_FIT__MGMNT_TXSOCK )) + { + if ( mtc_sock.mtc_client_tx_socket ) + { + mtc_sock.mtc_client_tx_socket->sock_ok (false); + _close_mgmnt_tx_socket (); + } + } + if ( daemon_is_file_present ( MTC_CMD_FIT__INFRA_RXSOCK )) + { + if ( mtc_sock.mtc_client_infra_rx_socket ) + { + mtc_sock.mtc_client_infra_rx_socket->sock_ok (false); + _close_infra_rx_socket (); + } + } + if ( daemon_is_file_present ( MTC_CMD_FIT__INFRA_TXSOCK )) + { + if ( mtc_sock.mtc_client_infra_tx_socket ) + { + mtc_sock.mtc_client_infra_tx_socket->sock_ok (false); + _close_infra_tx_socket (); + } + } + if ( daemon_is_file_present ( MTC_CMD_FIT__RMON_SOCK )) + { + _close_rmon_sock (); + } + if ( daemon_is_file_present ( MTC_CMD_FIT__AMON_SOCK )) + { + _close_amon_sock (); + } + } + } + + daemon_signal_hdlr (); + } + daemon_exit(); +} + +#define MAX_ARGS 4 +static char start[] = "start" ; +static char stop[] = "stop" ; +int _launch_all_scripts ( script_ctrl_type & group, + std::list & scripts, + string label, + string action, + string option ) +{ + int index ; + char * argv[MAX_ARGS] ; + + if ( action == "start" ) + argv[1] = start ; + else + argv[1] = stop ; + + argv[2] = (char*)option.data() ; + argv[MAX_ARGS-1] = NULL ; + + /* initialize control struct */ + for ( int i = 0 ; i < MAX_RUN_SCRIPTS ; i++ ) + { + group.script[i].pid = 0 ; + group.script[i].status = 0 ; + group.script[i].done = false ; + group.script[i].name = "" ; + } + group.scripts = scripts.size() ; + group.scripts_done = 0 ; + + ilog ("Sorted %s File List: %d\n", label.c_str(), group.scripts ); + + std::list::iterator string_iter_ptr ; + for ( string_iter_ptr = scripts.begin () ; + string_iter_ptr != scripts.end () ; + string_iter_ptr++ ) + { + ilog (" ... %s %s\n", string_iter_ptr->c_str(), action.c_str()); + } + + /* Run Maintenance on Inventory */ + for ( index = 0, + string_iter_ptr = scripts.begin () ; + string_iter_ptr != scripts.end () ; + string_iter_ptr++ ) + { + group.script[index].name = *string_iter_ptr ; + group.script[index].pid = fork(); + if ( group.script[index].pid == 0 ) + { + bool close_file_descriptors = false ; + if ( setup_child ( close_file_descriptors ) != PASS ) + { + exit(EXIT_FAILURE); + } + + /* Set child to default signaling */ + signal (SIGCHLD, SIG_DFL); + + umask(022); + + /* Setup exec arguement */ + char script_name[MAX_FILE_SIZE]; + snprintf ( &script_name[0], MAX_FILE_SIZE, "%s", string_iter_ptr->data()) ; + argv[0] = script_name ; + for ( int x = 0 ; x < MAX_ARGS ; x++ ) + { + dlog ("argv[%d] = %s\n", x , argv[x]); + } + + openlog ( program_invocation_short_name, LOG_PID, LOG_USER ); + syslog ( LOG_INFO, "%s %s\n", string_iter_ptr->c_str(), action.c_str()); + if ( 0 > execv(argv[0], argv )) + { + syslog ( LOG_INFO, "%s failed (%d) (%s)\n", + string_iter_ptr->c_str(), + errno, + strerror(errno)); + } + closelog(); + exit(1); + } + gettime ( group.script[index].time_start ); + dlog ("%s %02d: %s (pid:%d) is running\n", + label.c_str(), + index, + string_iter_ptr->c_str(), + group.script[index].pid ); + index++ ; + } + return (PASS); +} + +/*********************************************************************** + * + * Name : run_hostservices_scripts + * + * Purpose : Call the files in /etc/services.d with start or stop + * + * Description: Controller maintenance requests a host's command handler + * to 'Start' or 'Stop' Host Services. This results in a call + * to this handler. All the files in /etc/services.d are + * read. Each is called alphabetically with the requested + * command option of start or stop. + * + * The execution time and exit status of each script is logged. + * The exit status of each script is checked. If any return a + * non-zero value then that is an indication of that operation + * failed and the overall command is failed. In the failure case, + * which includes an overall execution timeout case, this utility + * returns a message to maintenance indicating the name of the + * script that failed and its return code. + * + * Params : + * + * cmd - 'uint' representing start or stop services commands + * + * MTC_CMD_STOP_CONTROL_SVCS + * MTC_CMD_STOP_COMPUTE_SVCS + * MTC_CMD_STOP_STORAGE_SVCS + * MTC_CMD_START_CONTROL_SVCS + * MTC_CMD_START_COMPUTE_SVCS + * MTC_CMD_START_STORAGE_SVCS + * + * Returns : Operation PASS or non-zero return code with the failing + * script name in the message buffer. + * + ****************************************************************************/ +int run_hostservices_scripts ( unsigned int cmd ) +{ + string dir = SERVICES_DIR ; + string action = "" ; + string func = "" ; + + switch ( cmd ) + { + case MTC_CMD_STOP_CONTROL_SVCS: + dir.append("/controller"); + action = "stop" ; + func = "controller"; + break ; + case MTC_CMD_STOP_COMPUTE_SVCS: + dir.append("/compute"); + action = "stop" ; + func = "compute"; + break ; + case MTC_CMD_STOP_STORAGE_SVCS: + dir.append("/storage"); + action = "stop" ; + func = "storage"; + break ; + case MTC_CMD_START_CONTROL_SVCS: + dir.append("/controller"); + action = "start" ; + func = "controller"; + break ; + case MTC_CMD_START_COMPUTE_SVCS: + dir.append("/compute"); + action = "start" ; + func = "compute"; + break ; + case MTC_CMD_START_STORAGE_SVCS: + dir.append("/storage"); + action = "start" ; + func = "storage"; + break ; + default: + ctrl.active_script_set = NO_SCRIPTS ; + return (FAIL_BAD_CASE); + } + + /* list of service files */ + std::list scripts ; + if ( load_filenames_in_dir ( dir.data(), scripts ) != PASS ) + { + elog ("failed to load host services scripts dir:%s\n", dir.c_str()); + ctrl.active_script_set = NO_SCRIPTS ; + return (FAIL_READ_FILES) ; + } + + + /* For the stop command we need the mtcClient to run both controller and + * compute stop services if we are on a CPE system. + * This saves the mtcAgent from having to issue and manage 2 commands, + * one for controller and 1 for compute */ + if ( ctrl.system_type != SYSTEM_TYPE__NORMAL ) + { + if ( action == "stop" ) + { + std::list more_scripts ; + if ( cmd == MTC_CMD_STOP_COMPUTE_SVCS ) + { + dir = SERVICES_DIR ; + dir.append("/controller"); + } + else + { + dir = SERVICES_DIR ; + dir.append("/compute"); + } + + if ( load_filenames_in_dir ( dir.data(), more_scripts ) != PASS ) + { + ctrl.active_script_set = NO_SCRIPTS ; + return (FAIL_READ_FILES) ; + } + + if ( ! more_scripts.empty() ) + { + scripts.merge(more_scripts); + } + } + } + + if ( scripts.empty() ) + { + ilog ("no service scripts\n"); + ctrl.hostservices.scripts = 0 ; + _manage_services_scripts (); + ctrl.active_script_set = NO_SCRIPTS ; + return (PASS); + } + + scripts.sort(); + mtcTimer_reset ( ctrl.hostservices.timer ); + mtcTimer_start ( ctrl.hostservices.timer, timer_handler, mtc_config.host_services_timeout ); + + /* launch the scripts */ + return (_launch_all_scripts ( ctrl.hostservices, scripts, "Host Services", action, "both" )); +} + + +/*********************************************************************** + * + * Name : run_goenabled_start + * + * Purpose : Call the files in /etc/goenable.d with start command + * + * Description: This procedure forks off a mtcClient child process + * which runs a sorted list of files in the /etc/goenable.d + * diectory. + * + * This child waits for the completion of each goenabled script before + * running the next one. + * + * Success Path Behavior: + * + * If all the scripts complete with an exit status of zero then this + * child process will send a GOENABLED message to the mtcAgent + * informing it that the intest phase completed successfully. + * This success message is logged in the mtcAgent and if this + * host was undergoing an enable sequence or graceful recovery then + * those FSMs would see the pass and proceed to its next state/phase. + * + * Failure Path Behavior: + * + * If one of the goenabled scripts exits with a return code other than + * zero then the child creates a GOENABLED_FAILED message containing + * the name of the script that failed and the error code that was + * returned and sends that message to the mtcAgent which will cause + * a failure of an enable or graceful recovery of that host. + * + * Returns : operation PASS or FAIL. + * + */ + +int run_goenabled_scripts ( string requestor ) +{ + int rc = RETRY ; + + /* list of service files */ + std::list scripts ; + std::list::iterator string_iter_ptr ; + + /* handle mutual exclusion */ + if ( ctrl.goenabled.posted ) + { + return (rc); + } + + if ((rc = load_filenames_in_dir ( GOENABLED_DIR, scripts )) != PASS ) + { + return (FAIL_READ_FILES); + } + else if ( scripts.empty() ) + { + ctrl.goenabled.posted = MTC_CMD_NONE ; + _manage_goenabled_tests (); + return (PASS); + } + + if ( ctrl.active_script_set == GOENABLED_SUBF_SCRIPTS ) + { + ilog ("GoEnabled Scripts : Sub-Function Context\n"); + daemon_remove_file ( GOENABLED_SUBF_FAIL ); + } + else + { + ilog ("GoEnabled Scripts : Main-Function Context\n"); + daemon_remove_file ( GOENABLED_MAIN_FAIL ); + } + + scripts.sort(); + + /* manage the goenabled timeout timer */ + if ( ctrl.goenabled.timer.tid ) + { + slog ("goenabled timer unexpectedly active\n"); + mtcTimer_stop ( ctrl.goenabled.timer ); + } + + dlog ("Goenabled Timeout : %d secs\n", mtc_config.goenabled_timeout ); + mtcTimer_start ( ctrl.goenabled.timer, timer_handler, mtc_config.goenabled_timeout ); + + /* launch the scripts */ + return (_launch_all_scripts ( ctrl.goenabled, scripts, "Test", "start", requestor )); +} + + +/* Reap the go enabled tests */ +void daemon_sigchld_hdlr ( void ) +{ + pid_t tpid = 0 ; + int status = 0 ; + bool found = 0 ; + static script_ctrl_type * scripts_ptr ; + + dlog("Received SIGCHLD ...\n"); + + /* select the correct script set based on which is active */ + switch ( ctrl.active_script_set ) + { + case GOENABLED_MAIN_SCRIPTS: + case GOENABLED_SUBF_SCRIPTS: + { + scripts_ptr = &ctrl.goenabled ; + break ; + } + case HOSTSERVICES_SCRIPTS: + { + scripts_ptr = &ctrl.hostservices ; + break ; + } + default: + { + wlog ("child handler running with no active script set (%d)\n", ctrl.active_script_set ); + return ; + } + } + + while ( 0 < ( tpid = waitpid ( -1, &status, WNOHANG | WUNTRACED ))) + { + /* loop over all the scripts and get the child execution status */ + for ( int i = 0 ; i < scripts_ptr->scripts ; i++ ) + { + if ( tpid == scripts_ptr->script[i].pid ) + { + found = true ; + scripts_ptr->script[i].status = status ; + if ( scripts_ptr->script[i].done == false ) + { + dlog("%5d %s exited (%d)\n", scripts_ptr->script[i].pid, scripts_ptr->script[i].name.c_str(), i ); + scripts_ptr->script[i].done = true ; + scripts_ptr->scripts_done++ ; + } + else + { + slog ("%5d %s exited already (%d)\n", scripts_ptr->script[i].pid, scripts_ptr->script[i].name.c_str(), i ); + } + + /* script ended */ + if (WIFEXITED(scripts_ptr->script[i].status)) + { + gettime ( scripts_ptr->script[i].time_stop ); + timedelta ( scripts_ptr->script[i].time_start, + scripts_ptr->script[i].time_stop, + scripts_ptr->script[i].time_delta ); + + dlog ("%s exited properly \n", scripts_ptr->script[i].name.c_str()); + + /* only print log if there is an error */ + scripts_ptr->script[i].status = WEXITSTATUS(scripts_ptr->script[i].status) ; + if ( status ) + { + elog ("FAILED: %s (%ld.%03ld secs) (rc:%d)\n", + scripts_ptr->script[i].name.c_str(), + scripts_ptr->script[i].time_delta.secs, + scripts_ptr->script[i].time_delta.msecs/1000, + scripts_ptr->script[i].status); + } + else + { + ilog ("PASSED: %s (%ld.%03ld secs)\n", + scripts_ptr->script[i].name.c_str(), + scripts_ptr->script[i].time_delta.secs, + scripts_ptr->script[i].time_delta.msecs/1000); + } + } + else if (WIFSIGNALED(scripts_ptr->script[i].status)) + { + wlog ("%s test uncaught signal\n", scripts_ptr->script[i].name.c_str()); + } + else if (WIFSTOPPED(scripts_ptr->script[i].status)) + { + wlog ("%s test stopped.\n", scripts_ptr->script[i].name.c_str()); + } + } + } + } + if ( ( tpid > 0 ) && ( found == false ) ) + { + ilog ("PID:%d reaped with no corresponding process\n", tpid ); + } +} + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + ; +} + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/*************************************************************************** + * * + * Module Test Head * + * * + ***************************************************************************/ + +extern int mtcCompMsg_testhead ( void ); + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + int rc = PASS; + int stage = 1; + printf ("\n"); + rc = mtcCompMsg_testhead (); + printf ("\n\n+---------------------------------------------------------+\n"); + + /*********************************************** + * STAGE 1: some test + ************************************************/ + printf ( "| Test %d : Maintenance Service Test ............. ", stage ); + if ( rc != PASS ) + { + FAILED_STR ; + rc = FAIL ; + } + else + PASSED ; + + printf ("+---------------------------------------------------------+\n"); + return PASS ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeComp.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeComp.h new file mode 100644 index 00000000..b893f1d7 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeComp.h @@ -0,0 +1,113 @@ +#ifndef __INCLUDE_MTCNODECOMP_HH__ +#define __INCLUDE_MTCNODECOMP_HH__ +/* + * Copyright (c) 2015-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Node Maintenance Client 'mtcClient' Header + * + */ + +#include +#include +#include + +/** Compute Config mask */ +#define CONFIG_CLIENT_MASK (CONFIG_AGENT_PORT |\ + CONFIG_CLIENT_MTC_MGMNT_PORT |\ + CONFIG_CLIENT_RMON_PORT) + +#define MAX_RUN_SCRIPTS (20) + +typedef enum +{ + NO_SCRIPTS, + GOENABLED_MAIN_SCRIPTS, + GOENABLED_SUBF_SCRIPTS, + HOSTSERVICES_SCRIPTS, +} script_set_enum ; + +typedef struct +{ + int status ; /* script execution exit status */ + pid_t pid ; /* the script's PID */ + bool done ; /* set to true when a script has completed */ + string name ; /* the full path/filename of the script */ + + time_debug_type time_start ; /* time stamps used to measure the */ + time_debug_type time_stop ; /* execution time of */ + time_delta_type time_delta ; /* the script */ +} script_exec_type; +void script_exec_init ( script_exec_type * script_exec_ptr ); + +typedef struct +{ + unsigned int posted ; /* posted for execution command */ + unsigned int monitor ; /* set to the previously posted command + * after this commands' scripts have + * been launched. */ + int scripts ; /* the number of scripts to run */ + int scripts_done ; /* number of scripts that completed */ + struct mtc_timer timer ; /* the scripts completion timeout timer */ + script_exec_type script[MAX_RUN_SCRIPTS]; /* array of script exec status */ + +} script_ctrl_type ; +void script_ctrl_init ( script_ctrl_type * script_ctrl_ptr ); + +typedef struct +{ + char hostname [MAX_HOST_NAME_SIZE+1]; + string macaddr ; + string address ; + string address_infra ; + string who_i_am ; + + string nodetype_str ; + + string mgmnt_iface ; + string infra_iface ; + + unsigned int nodetype ; + unsigned int function ; + unsigned int subfunction ; + + struct mtc_timer timer ; /* mtcAlive timer */ + + bool infra_iface_provisioned ; + + /* tracks the time the level specific goenabled file was last created */ + time_t goenabled_main_time ; + time_t goenabled_subf_time ; + + /* Go Enable Control execution control struct, timing and completion status */ + script_ctrl_type goenabled ; + + /* Start/Stop Hosts Services execution control timing and completion status */ + script_ctrl_type hostservices ; + + /* The script set that is executing */ + script_set_enum active_script_set ; + + /* The list of posted script set requests */ + list posted_script_set; + + /* The system type */ + system_type_enum system_type ; + + /* Where to send events */ + string mtcAgent_ip ; + +} ctrl_type ; + +ctrl_type * get_ctrl_ptr ( void ); + +bool is_subfunction_compute ( void ); +int run_goenabled_scripts ( mtc_socket_type * sock_ptr , string requestor ); +int run_hostservices_scripts ( unsigned int cmd ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeCtrl.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeCtrl.cpp new file mode 100755 index 00000000..e288c3d4 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeCtrl.cpp @@ -0,0 +1,1545 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance Daemon + */ + +#include +#include +#include +#include +#include +#include +#include +#include /* for hostent */ +#include +#include +#include +#include +#include +#include +#include /* for ... close and usleep */ +#include /* for ... HTTP_ status definitions */ +#include /* for ... RTMGRP_LINK */ + +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "mtc" + +#include "daemon_common.h" /* */ +#include "daemon_ini.h" /* Init parset header */ +#include "daemon_option.h" /* */ + +#include "nodeBase.h" /* Service header */ +#include "nodeTimers.h" /* */ +#include "nodeClass.h" /* */ +#include "nodeUtil.h" /* */ +#include "threadUtil.h" /* for ... threadUtil_init/fini */ +#include "timeUtil.h" /* for ... daemon_sample_time_init */ +#include "tokenUtil.h" /* for ... keystone_config_handler */ +#include "nodeMacro.h" /* for ... CREATE_REUSABLE_INET_UDP_TX_SOCKET */ +#include "nodeEvent.h" /* for ... inotify utility services */ +#include "mtcNodeFsm.h" /* */ +#include "mtcNodeMsg.h" /* */ +#include "mtcHttpSvr.h" /* for ... mtcHttpSvr_init/_fini/_look */ +#include "mtcInvApi.h" /* */ +#include "mtcSmgrApi.h" /* */ +#include "nlEvent.h" /* for ... open_netlink_socket */ + +/************************************************************** + * Implementation Structure + ************************************************************** + * + * Call sequence: + * + * daemon_init + * daemon_configure + * daemon_signal_init + * mtc_hostname_read + * mtc_message_init (obsolete ?) + * mtc_socket_init + * + * daemon_service_run + * forever ( timer_handler ) + * mtc_fsm_run + * mtc_service_inbox + * + */ + +extern void mtcTimer_handler ( int sig, siginfo_t *si, void *uc); +extern int service_events ( nodeLinkClass * obj_ptr, + mtc_socket_type * sock_ptr ); +extern bool mtc_get_inventory_in_progress ( void ); + +int mtc_service_inbox ( nodeLinkClass * obj_ptr, + mtc_socket_type * sock_ptr, + int interface ); + +string my_hostname = "" ; + +/** Instanciate the NodeLinkClass and pointer to it */ +nodeLinkClass mtcInv ; +nodeLinkClass * mtcInv_ptr ; +nodeLinkClass * get_mtcInv_ptr ( void ) +{ + return (&mtcInv); +} + +static event_type mtce_event ; +event_type * get_eventPtr ( void ) +{ + return(&mtce_event); +} + +int module_init ( void ) +{ + mtcInv_ptr = &mtcInv ; + return (PASS); +} + +void daemon_sigchld_hdlr ( void ) +{ + ; /* dlog("Received SIGCHLD ... no action\n"); */ +} + +/** + * Daemon Configuration Structure - The allocated struct + * @see mtc.h for daemon_config_type struct format. + */ +static daemon_config_type mtc_config ; +daemon_config_type * daemon_get_cfg_ptr ( void ) { return &mtc_config ; } + + +/** + * Heartbeat Daemon Messaging Socket Control Struct - The allocated struct + * @see bhs.h for mtc_socket_type struct format. + */ +static mtc_socket_type mtc_sock ; +static mtc_socket_type * sock_ptr ; +mtc_socket_type * get_sockPtr ( void ) +{ return ( &mtc_sock ) ; } + +msgSock_type * get_mtclogd_sockPtr ( void ) +{ + return (&mtc_sock.mtclogd); +} + +void daemon_exit ( void ) +{ + /* Cancel the uptime timer */ + if ( mtcInv.mtcTimer_uptime.tid ) + { + mtcTimer_stop ( mtcInv.mtcTimer_uptime ); + } + + daemon_dump_info (); + daemon_files_fini (); + + /* Close the watch over the /etc/shadow file */ + set_inotify_close ( mtcInv.inotify_shadow_file_fd, + mtcInv.inotify_shadow_file_wd); + + /* Close open sockets */ + if (mtc_sock.mtc_agent_rx_socket) + delete (mtc_sock.mtc_agent_rx_socket); + + if (mtc_sock.mtc_agent_tx_socket) + delete (mtc_sock.mtc_agent_tx_socket); + + if (mtc_sock.mtc_client_rx_socket) + delete(mtc_sock.mtc_client_rx_socket); + + if (mtc_sock.mtc_client_tx_socket) + delete (mtc_sock.mtc_client_tx_socket); + + if (mtc_sock.mtc_client_infra_rx_socket) + delete (mtc_sock.mtc_client_infra_rx_socket); + + if (mtc_sock.mtc_client_infra_tx_socket) + delete (mtc_sock.mtc_client_infra_tx_socket); + + if (mtc_sock.mtc_event_rx_sock) + delete (mtc_sock.mtc_event_rx_sock); + + if (mtc_sock.mtc_to_hbs_sock) + delete (mtc_sock.mtc_to_hbs_sock); + + if ( mtc_sock.mtclogd.sock > 0 ) + close (mtc_sock.mtclogd.sock); + + if ( mtc_sock.netlink_sock > 0 ) + close (mtc_sock.netlink_sock); + + if ( mtc_sock.ioctl_sock > 0 ) + close (mtc_sock.ioctl_sock); + + mtcHttpSvr_fini ( mtce_event ); + + threadUtil_fini () ; + + exit (0) ; +} + + +/** Control Config Mask */ +#define CONFIG_AGENT_MASK (CONFIG_AGENT_PORT |\ + CONFIG_MTC_TO_HBS_CMD_PORT |\ + CONFIG_MTC_TO_HWMON_CMD_PORT |\ + CONFIG_HBS_TO_MTC_EVENT_PORT |\ + CONFIG_AGENT_HA_PORT |\ + CONFIG_AGENT_KEY_PORT |\ + CONFIG_AGENT_TOKEN_REFRESH |\ + CONFIG_AGENT_LOC_TIMEOUT |\ + CONFIG_AGENT_INV_EVENT_PORT |\ + CONFIG_AGENT_API_RETRIES |\ + CONFIG_CLIENT_PORT) + +static int mtc_nfvi_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("infrastructure-rest-api", "port")) + { + config_ptr->vim_cmd_port = atoi(value); + } + else + { + return (PASS); + } + return (FAIL); +} + + + + +/* Startup config read */ +static int mtc_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("agent", "ha_port")) + { + config_ptr->ha_port = atoi(value); + config_ptr->mask |= CONFIG_AGENT_HA_PORT ; + } + + else if (MATCH("agent", "inv_event_port")) + { + config_ptr->inv_event_port = atoi(value); + config_ptr->mask |= CONFIG_AGENT_INV_EVENT_PORT ; + } + + else if (MATCH("agent", "keystone_port")) + { + config_ptr->keystone_port = atoi(value); + config_ptr->mask |= CONFIG_AGENT_KEY_PORT ; + } + + else if (MATCH("agent", "mtc_agent_port")) + { + config_ptr->mtc_agent_port = atoi(value); + config_ptr->mask |= CONFIG_AGENT_PORT ; + } + else if (MATCH("agent", "mtc_to_hbs_cmd_port")) + { + config_ptr->mtc_to_hbs_cmd_port = atoi(value); + config_ptr->mask |= CONFIG_MTC_TO_HBS_CMD_PORT ; + } + else if (MATCH("agent", "mtc_to_guest_cmd_port")) + { + config_ptr->mtc_to_guest_cmd_port = atoi(value); +// config_ptr->mask |= CONFIG_MTC_TO_GUEST_CMD_PORT ; + } + else if (MATCH("agent", "hbs_to_mtc_event_port")) + { + config_ptr->hbs_to_mtc_event_port = atoi(value); + config_ptr->mask |= CONFIG_HBS_TO_MTC_EVENT_PORT ; + } + else if (MATCH("client", "hwmon_cmd_port")) + { + config_ptr->hwmon_cmd_port = atoi(value); + config_ptr->mask |= CONFIG_MTC_TO_HWMON_CMD_PORT ; + } + else if (MATCH("client", "daemon_log_port")) + { + config_ptr->daemon_log_port = atoi(value); + } + else if (MATCH("client", "mtc_rx_mgmnt_port")) + { + config_ptr->cmd_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_PORT ; + } + else if (MATCH("agent", "token_refresh_rate")) + { + config_ptr->token_refresh_rate = atoi(value); + config_ptr->mask |= CONFIG_AGENT_TOKEN_REFRESH ; + } + else if (MATCH("agent", "api_retries")) + { + config_ptr->api_retries = atoi(value); + config_ptr->mask |= CONFIG_AGENT_API_RETRIES ; + mtcInv.api_retries = config_ptr->api_retries ; + } + else if (MATCH("agent", "mnfa_threshold_type")) + { + config_ptr->mnfa_threshold_type = strdup(value); + } + else if (MATCH("agent", "mnfa_threshold_percent")) + { + config_ptr->mnfa_threshold_percent = atoi(value); + } + else if (MATCH("agent", "mnfa_threshold_number")) + { + config_ptr->mnfa_threshold_number = atoi(value); + } + else if (MATCH("timeouts", "failsafe_shutdown_delay")) + { + config_ptr->failsafe_shutdown_delay = atoi(value); + ilog ("Shutdown TO : %d secs\n", config_ptr->failsafe_shutdown_delay ); + } + else if (MATCH("agent", "autorecovery_threshold")) + { + config_ptr->autorecovery_threshold = atoi(value); + ilog ("AR Threshold: %d\n", + config_ptr->autorecovery_threshold ); + } + else if (MATCH("agent", "offline_period")) + { + mtcInv.offline_period = atoi(value); + ilog ("OfflineAudit: %d msecs\n", mtcInv.offline_period ); + } + else if (MATCH("agent", "offline_threshold")) + { + mtcInv.offline_threshold = atoi(value); + ilog ("OfflineThrsh: %d\n", mtcInv.offline_threshold ); + } + else + { + return (PASS); + } + return (FAIL); +} + +/* Read and process mtc.ini file settings into the daemon configuration */ +int daemon_configure ( void ) +{ + int rc = PASS ; + + timeUtil_sched_init ( ); + + /* Read the ini */ + mtc_config.mask = 0 ; + if (ini_parse(MTCE_CONF_FILE, mtc_config_handler, &mtc_config) < 0) + { + elog ("Can't load '%s'\n", MTCE_CONF_FILE ); + return (FAIL_LOAD_INI); + } + + if (ini_parse(MTCE_INI_FILE, keystone_config_handler, &mtc_config) < 0) + { + elog ("Can't load '%s'\n", MTCE_INI_FILE ); + return (FAIL_LOAD_INI); + } + + if (ini_parse(NFVI_PLUGIN_CFG_FILE, mtc_nfvi_handler, &mtc_config) < 0) + { + elog ("Can't load '%s'\n", NFVI_PLUGIN_CFG_FILE ); + return (FAIL_LOAD_INI); + } + + if (ini_parse(SYSINV_CFG_FILE, sysinv_config_handler, &mtc_config) < 0) + { + elog ("Can't load '%s'\n", SYSINV_CFG_FILE ); + return (FAIL_LOAD_INI); + } + + /* Loads key Mtce debug values that can override the defaults */ + if (ini_parse(MTCE_CONF_FILE, debug_config_handler, &mtc_config) < 0) + { + elog ("Can't load '%s'\n", MTCE_CONF_FILE ); + return (FAIL_LOAD_INI); + } + + /* Loads key Mtce timeout values that can override the defaults */ + if (ini_parse(MTCE_CONF_FILE, timeout_config_handler, &mtc_config) < 0) + { + elog ("Can't load '%s'\n", MTCE_CONF_FILE ); + return (FAIL_LOAD_INI); + } + /* Loads key Mtce timeout values that can override the defaults */ + if (ini_parse(MTCE_INI_FILE, timeout_config_handler, &mtc_config) < 0) + { + elog ("Can't load '%s'\n", MTCE_INI_FILE ); + return (FAIL_LOAD_INI); + } + + /* Load the compute enable timeouts */ + if ( mtc_config.compute_mtcalive_timeout ) + mtcInv.compute_mtcalive_timeout = mtc_config.compute_mtcalive_timeout ; + else + mtcInv.compute_mtcalive_timeout = DEFAULT_MTCALIVE_TIMEOUT ; + + /* Load the controller enable timeouts */ + if ( mtc_config.controller_mtcalive_timeout ) + mtcInv.controller_mtcalive_timeout = mtc_config.controller_mtcalive_timeout ; + else + mtcInv.controller_mtcalive_timeout = DEFAULT_MTCALIVE_TIMEOUT ; + + if ( mtc_config.goenabled_timeout ) + mtcInv.goenabled_timeout = mtc_config.goenabled_timeout ; + else + mtcInv.goenabled_timeout = DEFAULT_GOENABLE_TIMEOUT ; + + mtcInv.loc_recovery_timeout = mtc_config.loc_recovery_timeout ; + mtcInv.mnfa_recovery_timeout = mtc_config.mnfa_recovery_timeout ; + + if ( mtc_config.node_reinstall_timeout ) + mtcInv.node_reinstall_timeout = mtc_config.node_reinstall_timeout ; + else + mtcInv.node_reinstall_timeout = MTC_REINSTALL_TIMEOUT_DEFAULT ; + + + if ( mtc_config.dor_mode_timeout <= 0 ) + { + slog ("DOR Mode Timeout is invalid (%d), setting to default (%d)\n", + mtc_config.dor_mode_timeout, + DEFAULT_DOR_MODE_TIMEOUT); + + mtc_config.dor_mode_timeout = DEFAULT_DOR_MODE_TIMEOUT ; + } + + /* validate and auto correct manage multi node failure avoidance thresholds */ + if (( mtc_config.mnfa_threshold_type != NULL ) && + ( !strncmp (mtc_config.mnfa_threshold_type, "percent", strlen("percent")))) + { + if ( mtc_config.mnfa_threshold_percent > 100 ) + { + mtc_config.mnfa_threshold_percent = 100 ; + } + mtcInv.mnfa_threshold_type = MNFA_PERCENT ; + ilog ("mnfAvoidance: %d%c\n", mtc_config.mnfa_threshold_percent, '%' ); + mtcInv.mnfa_threshold_percent = mtc_config.mnfa_threshold_percent ; + } + else + { + mtcInv.mnfa_threshold_type = MNFA_NUMBER ; + ilog ("mnfAvoidance: %d hosts\n", mtc_config.mnfa_threshold_number ); + mtcInv.mnfa_threshold_number = mtc_config.mnfa_threshold_number ; + } + + if ( mtc_config.swact_timeout ) + { + if ( mtc_config.swact_timeout < (MTC_SWACT_POLL_TIMER*2)) + mtcInv.swact_timeout = (MTC_SWACT_POLL_TIMER*2); + else + mtcInv.swact_timeout = mtc_config.swact_timeout ; + } + + /* Allow the token refresh rate to be specified in the config file */ + /* but no bigger than every 8 hours - that's all that has been tested */ + mtcInv.token_refresh_rate = mtc_config.token_refresh_rate ; + if ( mtc_config.token_refresh_rate > MTC_HRS_8 ) + { + wlog ("Token refresh rate rounded down to 8 hour maximum\n"); + mtcInv.token_refresh_rate = MTC_HRS_8 ; + } + + mtcInv.uptime_period = mtc_config.uptime_period ; + + if ( mtc_config.online_period < MTC_MIN_ONLINE_PERIOD_SECS ) + mtcInv.online_period = MTC_MIN_ONLINE_PERIOD_SECS ; + else + mtcInv.online_period = mtc_config.online_period ; + + if (( mtc_config.sysinv_timeout == 0 ) || ( mtc_config.sysinv_timeout > 127 )) + { + mtc_config.sysinv_timeout = HTTP_SYSINV_CRIT_TIMEOUT ; + } + mtcInv.sysinv_timeout = mtc_config.sysinv_timeout ; + + if (( mtc_config.sysinv_noncrit_timeout == 0 ) || ( mtc_config.sysinv_noncrit_timeout > 127 )) + { + mtc_config.sysinv_noncrit_timeout = HTTP_SYSINV_NONC_TIMEOUT ; + } + mtcInv.sysinv_noncrit_timeout = mtc_config.sysinv_noncrit_timeout ; + + if (( mtc_config.work_queue_timeout == 0 ) || ( mtc_config.work_queue_timeout > 500 )) + { + mtc_config.work_queue_timeout = MTC_WORKQUEUE_TIMEOUT ; + } + mtcInv.work_queue_timeout = mtc_config.work_queue_timeout ; + + if ( mtcInv.offline_period < MIN_OFFLINE_PERIOD_MSECS ) + { + ilog ("offline audit too small (%d) ; correcting to %d\n", + mtcInv.offline_period, MIN_OFFLINE_PERIOD_MSECS ); + + mtcInv.offline_period = MIN_OFFLINE_PERIOD_MSECS ; + } + + if ( mtcInv.offline_threshold == MIN_OFFLINE_THRESHOLD ) + { + ilog ("offline threshold too small (%d) ; correcting to %d\n", + mtcInv.offline_threshold, MIN_OFFLINE_THRESHOLD ); + + mtcInv.offline_threshold = MIN_OFFLINE_THRESHOLD ; + } + + /* Load in the In-Service and Out-Of-Service Test Periods */ + mtcInv.insv_test_period = mtc_config.insv_test_period ; + mtcInv.oos_test_period = mtc_config.oos_test_period ; + + ilog ("TokenRefresh: %3d secs\n" , mtcInv.token_refresh_rate); + ilog ("API Retries : %3d secs\n" , mtcInv.api_retries); + + /* Verify loaded config against an expected mask + * as an ini file fault detection method */ + if ( mtc_config.mask != CONFIG_AGENT_MASK ) + { + elog ("Control configuration failed (%x)\n", + ((-1 ^ mtc_config.mask) & CONFIG_AGENT_MASK)); + return (FAIL_INI_CONFIG); + } + + mtc_config.mgmnt_iface = daemon_get_iface_master ( mtc_config.mgmnt_iface ); + ilog("Mgmnt iface : %s\n", mtc_config.mgmnt_iface ); + + /* Fetch the infrastructure interface name. + * calls daemon_get_iface_master inside so the + * aggrigated name is returned if it exists */ + get_infra_iface (&mtc_config.infra_iface ); + if ( strlen (mtc_config.infra_iface) ) + { + string infra_ip = "" ; + rc = get_iface_address ( mtc_config.infra_iface, infra_ip, false ); + if ( rc ) + { + elog ("failed to get IP address fron infra interface '%s' (rc:%d)\n", mtc_config.infra_iface, rc ); + } + else + { + ilog ("Infra iface : %s\n", mtc_config.infra_iface ); + ilog ("Infra addr : %s\n", infra_ip.c_str()); + } + mtcInv.infra_network_provisioned = true ; + } + + /* Log the startup settings */ + ilog("Cmd Req Port: %d (tx)\n", mtc_config.cmd_port ); + ilog("Cmd Rsp Port: %d (rx)\n", mtc_config.mtc_agent_port ); + ilog("Events Port: %d (rx)\n", mtc_config.hbs_to_mtc_event_port ); + ilog("Inv Port : %d (tx)\n", mtc_config.sysinv_api_port ); + ilog("Inv Address : %s (tx)\n", mtc_config.sysinv_api_bind_ip ); + ilog("Inv Event : %d (rx)\n", mtc_config.inv_event_port ); + ilog("Keystone Port: %d (rx)\n", mtc_config.keystone_port ); + ilog("Mtce Logger : %d (tx)\n", mtc_config.daemon_log_port ); + ilog("nfv-vim-api : %d (port)\n", mtc_config.vim_cmd_port ); + ilog("hbsAgent : %d (port)\n", mtc_config.mtc_to_hbs_cmd_port ); + ilog("guestAgent : %d (port)\n", mtc_config.mtc_to_guest_cmd_port ); + ilog("hwmond : %d (port)\n", mtc_config.hwmon_cmd_port ); + ilog("auth_host : %s \n", mtc_config.keystone_auth_host ); + + /* Get this Controller Activity State */ + mtc_config.active = daemon_get_run_option ("active") ; + ilog ("Controller : %s\n", + mtc_config.active ? "Active" : "In-Active" ); + + /* remove any existing fit */ + daemon_init_fit (); + + return (PASS); +} + +/* Construct the messaging sockets * + * 1. unicast transmit (to compute) socket * + * 2. unicast receive (fronm compute) socket */ +int mtc_socket_init ( void ) +{ + int rc = 0 ; + int socket_size = 0 ; + char ip_address[INET6_ADDRSTRLEN]; + + /***********************************************************/ + /* Setup UDP Maintenance Command Transmit Socket Mgmnt I/F */ + /***********************************************************/ + + /* Read the port config strings into the socket struct */ + mtc_sock.mtc_agent_port = mtc_config.mtc_agent_port; + mtc_sock.mtc_cmd_port = mtc_config.cmd_port; + + /* create transmit socket */ + msgClassAddr::getAddressFromInterface(mtc_config.mgmnt_iface, ip_address, INET6_ADDRSTRLEN); + sock_ptr->mtc_agent_tx_socket = new msgClassTx(ip_address, mtc_config.mtc_agent_port, IPPROTO_UDP, mtc_config.mgmnt_iface); + rc = sock_ptr->mtc_agent_tx_socket->return_status; + if(rc != PASS) + { + delete sock_ptr->mtc_agent_tx_socket; + return rc; + } + + /***********************************************************/ + /* Setup UDP Maintenance Command Transmit Socket Infra I/F */ + /***********************************************************/ + if ( strlen( mtc_config.infra_iface ) ) + { + /* create infra transmit socket only if the interface is provisioned */ + msgClassAddr::getAddressFromInterface(mtc_config.infra_iface, ip_address, INET6_ADDRSTRLEN); + sock_ptr->mtc_agent_infra_tx_socket = new msgClassTx(ip_address, mtc_config.mtc_agent_port, IPPROTO_UDP, mtc_config.infra_iface); + rc = sock_ptr->mtc_agent_infra_tx_socket->return_status; + if(rc != PASS) + { + delete sock_ptr->mtc_agent_infra_tx_socket; + return rc; + } + } + + /********************************************************************* + * Setup Maintenance Command Reply and Event Receiver Socket + * - management interface + * + * This socket is used to receive command replies over the management + * interface and asynchronous events from the mtcClient and other + * maintenance service daemons. + *********************************************************************/ + sock_ptr->mtc_agent_rx_socket = + new msgClassRx(CONTROLLER, sock_ptr->mtc_agent_port, IPPROTO_UDP ); + if (( sock_ptr->mtc_agent_rx_socket == NULL ) || + ( sock_ptr->mtc_agent_rx_socket->return_status )) + { + elog("failed to create mtcClient receive socket on port %d for %s\n", + sock_ptr->mtc_agent_port, + mtc_config.mgmnt_iface ); + + if ( sock_ptr->mtc_agent_rx_socket ) + { + delete (sock_ptr->mtc_agent_rx_socket); + sock_ptr->mtc_agent_rx_socket = NULL ; + } + return (FAIL_SOCKET_CREATE); + } + + /* Set messaging buffer size */ + /* if we need a bigger then default we can use a sysctl to raise the max */ + socket_size = MTC_AGENT_RX_BUFF_SIZE ; + if (( rc = sock_ptr->mtc_agent_rx_socket->setSocketMemory ( mtc_config.mgmnt_iface, "mtce command and event receiver (Mgmnt network)", socket_size )) != PASS ) + { + elog ("setsockopt failed for SO_RCVBUF (%d:%m)\n", errno ); + delete (sock_ptr->mtc_agent_rx_socket); + sock_ptr->mtc_agent_rx_socket = NULL ; + return (FAIL_SOCKET_OPTION); + } + socklen_t optlen = sizeof(sock_ptr->mtc_agent_rx_socket_size); + getsockopt ( sock_ptr->mtc_agent_rx_socket->getFD(), SOL_SOCKET, SO_RCVBUF, + &sock_ptr->mtc_agent_rx_socket_size, &optlen ); + + ilog ("Listening On: 'mtc client receive' socket %d (%d rx bytes - req:%d) (%s)\n", + sock_ptr->mtc_agent_port, + sock_ptr->mtc_agent_rx_socket_size, MTC_AGENT_RX_BUFF_SIZE, + mtc_config.mgmnt_iface); + + + /********************************************************************* + * Setup Maintenance message receiver on the infrastructure network + * if it is provisioned + * + *********************************************************************/ + + if ( mtcInv.infra_network_provisioned == true ) + { + sock_ptr->mtc_agent_infra_rx_socket = + new msgClassRx(CONTROLLER_NFS, sock_ptr->mtc_agent_port, IPPROTO_UDP ); + if (( sock_ptr->mtc_agent_infra_rx_socket == NULL ) || + ( sock_ptr->mtc_agent_infra_rx_socket->return_status )) + { + elog("failed to create mtcClient receive socket on port %d for %s\n", + sock_ptr->mtc_agent_port, + mtc_config.infra_iface ); + + if ( sock_ptr->mtc_agent_infra_rx_socket ) + { + delete (sock_ptr->mtc_agent_infra_rx_socket); + sock_ptr->mtc_agent_infra_rx_socket = NULL ; + } + return (FAIL_SOCKET_CREATE); + } + + /* Set messaging buffer size */ + /* if we need a bigger then default we can use a sysctl to raise the max */ + socket_size = MTC_AGENT_RX_BUFF_SIZE ; + if (( rc = sock_ptr->mtc_agent_infra_rx_socket->setSocketMemory ( mtc_config.infra_iface, "mtce command and event receiver (Infra network)", socket_size )) != PASS ) + { + elog ("setsockopt failed for SO_RCVBUF (%d:%m)\n", errno ); + delete (sock_ptr->mtc_agent_infra_rx_socket); + sock_ptr->mtc_agent_infra_rx_socket = NULL ; + return (FAIL_SOCKET_OPTION); + } + socklen_t optlen = sizeof(sock_ptr->mtc_agent_infra_rx_socket_size); + getsockopt ( sock_ptr->mtc_agent_infra_rx_socket->getFD(), SOL_SOCKET, SO_RCVBUF, + &sock_ptr->mtc_agent_infra_rx_socket_size, &optlen ); + + ilog ("Listening On: 'mtc client receive' socket %d (%d rx bytes - req:%d) (%s)\n", + sock_ptr->mtc_agent_port, + sock_ptr->mtc_agent_infra_rx_socket_size, MTC_AGENT_RX_BUFF_SIZE, + mtc_config.infra_iface); + } + + + /***********************************************************/ + /* Setup UDP Hardware Monitor Command Transmit socket */ + /***********************************************************/ + + /* Read the port config strings into the socket struct */ + mtc_sock.hwmon_cmd_port = mtc_config.hwmon_cmd_port; + + /* create transmit socket */ + msgClassAddr::getAddressFromInterface(mtc_config.mgmnt_iface, ip_address, INET6_ADDRSTRLEN); + sock_ptr->hwmon_cmd_sock = new msgClassTx(ip_address, mtc_config.hwmon_cmd_port, IPPROTO_UDP, mtc_config.mgmnt_iface); + rc = sock_ptr->hwmon_cmd_sock->return_status; + if ( rc!=PASS ) + { + elog("Failed create socket (%d:%s)\n", errno, strerror(errno)); + return (rc); + } + + /***********************************************************/ + /* Heartbeat Event Receiver Interface - (UDP over 'lo') */ + /***********************************************************/ + + int port = daemon_get_cfg_ptr()->hbs_to_mtc_event_port ; + mtc_sock.mtc_event_rx_sock = new msgClassRx(LOOPBACK_IP, port, IPPROTO_UDP); + rc = mtc_sock.mtc_event_rx_sock->return_status; + if ( rc ) + { + elog ("Failed to setup mtce event receive port %d\n", port ); + return (rc) ; + } + + /* Setup the maintenance event receiver for sysinv and vim requests */ + memset ( &mtce_event, 0, sizeof(event_type)); + mtce_event.port = mtc_config.inv_event_port ; + rc = mtcHttpSvr_init ( mtce_event ); + + /***********************************************************/ + /* UDP Transmit Socket for Sending Heartbeat Commands */ + /***********************************************************/ + + port = daemon_get_cfg_ptr()->mtc_to_hbs_cmd_port ; + sock_ptr->mtc_to_hbs_sock = new msgClassTx(LOOPBACK_IP, port, IPPROTO_UDP); + rc = sock_ptr->mtc_to_hbs_sock->return_status; + if ( rc ) + { + elog ("Failed to setup mtce to hbs transmit command port %d\n", port ); + return (rc) ; + } + + sock_ptr->mtclogd.port = port = daemon_get_cfg_ptr()->daemon_log_port ; + CREATE_REUSABLE_INET_UDP_TX_SOCKET ( LOOPBACK_IP, + port, + sock_ptr->mtclogd.sock, + sock_ptr->mtclogd.addr, + sock_ptr->mtclogd.port, + sock_ptr->mtclogd.len, + "mtc logger message", + rc ); + if ( rc ) + { + elog ("Failed to setup mtce logger port %d\n", port ); + return (rc) ; + } + + /* Use the base timer to delay for a time to give + * the heartbeat service time to init */ + // ilog ("Delay 3 secs allowing Inventory & Heartbeat daemons to be ready\n"); + // mtcWait_secs (3); + + return (rc); +} + +int mtc_set_availStatus ( string & hostname, mtc_nodeAvailStatus_enum status ) +{ + return ( mtcInv.set_availStatus ( hostname, status )); +} + +/* Get and store my hostname */ +int mtc_hostname_read ( void ) +{ + int rc ; + + /* declare and init a var to hold the queried local hostname */ + char local_hostname[MAX_HOST_NAME_SIZE+1] ; + memset (&local_hostname[0], 0, MAX_HOST_NAME_SIZE); + + /* read the host name */ + rc = gethostname(&local_hostname[0], MAX_HOST_NAME_SIZE ); + if ( rc == PASS ) + { + string string_hostname = local_hostname ; + mtcInv.set_my_hostname ( string_hostname ); + + if ( mtcInv.get_my_hostname () == string_hostname ) + return (PASS) ; + } + else + { + dlog ("gethostname failed (%d)\n", rc ); + } + return (FAIL); +} + +/* The main service loop */ +int daemon_init ( string iface, string nodetype ) +{ + int rc = PASS ; + + /* Not used presently */ + mtcInv.functions = nodetype ; + + httpUtil_init (); + + /* Initialize socket construct and pointer to it */ + memset ( &mtc_sock, 0, sizeof(mtc_sock)); + sock_ptr = &mtc_sock ; + + /* Assign interface to config */ + mtc_config.mgmnt_iface = (char*)iface.data() ; + + if ( daemon_files_init () != PASS ) + { + elog ("Pid, log or other files could not be opened\n"); + return ( FAIL_FILES_INIT ) ; + } + + mtcInv.system_type = daemon_system_type (); + + /* Get and store my hostname */ + if ( mtc_hostname_read () != PASS ) + { + elog ("Failed hostname setup\n"); + return (FAIL_HOSTNAME_SETUP) ; + } + + /* init the base timers */ + mtcTimer_init ( mtcInv.mtcTimer, mtcInv.my_hostname, "mtc timer" ); /* Init general mtc timer */ + mtcAlarm_init (); + mtc_stages_init (); + threadUtil_init ( mtcTimer_handler ) ; + + /* Bind signal handlers */ + rc = daemon_signal_init () ; + if ( rc ) + { + elog ("daemon_signal_init failed\n"); + return ( FAIL_SIGNAL_INIT) ; + } + + /* Configure the control */ + rc = daemon_configure (); + if ( rc ) + { + elog ("Daemon service configuration failed (%i)\n", rc ); + return ( FAIL_DAEMON_CONFIG ) ; + } + + daemon_make_dir(IPMITOOL_OUTPUT_DIR) ; + +#ifdef WANT_FIT_TESTING + daemon_make_dir(FIT__INFO_FILEPATH); +#endif + + return (rc); +} + +int _self_provision ( void ) +{ + int rc ; + int load_retries ; + bool waiting_msg = false ; + node_inv_type my_identity ; + node_inv_type record_info ; + + node_inv_init ( my_identity ); + node_inv_init ( record_info ); + + ilog ("My Hostname : %s\n", mtcInv.my_hostname.c_str()); + + for ( ;; ) + { + get_ip_addresses ( mtcInv.my_hostname, mtcInv.my_local_ip , mtcInv.my_float_ip ); + if ( mtcInv.my_float_ip.empty() || mtcInv.my_float_ip.empty() ) + { + if ( waiting_msg == false ) + { + ilog ("Waiting on ip address config ...\n"); + waiting_msg = true ; + } + mtcWait_secs (3); + } + else + { + break ; + } + daemon_signal_hdlr (); + } + + + my_identity.name = mtcInv.my_hostname ; + my_identity.ip = mtcInv.my_local_ip ; + get_iface_macaddr ( mtc_config.mgmnt_iface , my_identity.mac ); + + /* Verify interface properties */ + if ( my_identity.mac.empty() || + ( my_identity.mac.length() != COL_CHARS_IN_MAC_ADDR ) || + my_identity.name.empty() || + my_identity.ip.empty ()) + { + elog ("Failed to acquire mgmt interface (%s) properties\n", mtc_config.mgmnt_iface ); + daemon_exit(); + } + + /* Set the states for the database */ + my_identity.type = "controller"; + my_identity.func = mtcInv.functions ; + my_identity.admin = "unlocked" ; + my_identity.oper = "enabled" ; + my_identity.avail = "available" ; + + my_identity.avail_subf = "not-installed" ; + my_identity.oper_subf = "disabled" ; + + my_identity.uuid = "" ; /* uuid will be learned later */ + + if ( mtcInv.add_host ( my_identity ) ) + { + elog ("Failed to add (%s) host\n", my_identity.name.c_str()); + daemon_exit(); + } + + /* Get an Authentication Token */ + ilog ("%s Requesting initial token\n", mtcInv.my_hostname.c_str() ); + do + { + rc = tokenUtil_new_token ( mtcInv.tokenEvent, mtcInv.my_hostname ); + if ( rc ) + { + elog ("Failed to get authentication token (%d)\n", rc ); + } + + /* Give system inventory some time after start-up. + * We see frequent first request failures requiring retry when this + * wait was inside the above if clause. Moving it here ensure there + * is always a small wait after the token fetch on process startup. */ + daemon_signal_hdlr (); + sleep (5); + daemon_signal_hdlr (); + + } while ( rc != PASS ) ; + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__CORRUPT_TOKEN, mtcInv.my_hostname )) + tokenUtil_fail_token (); +#endif + + load_retries = 0 ; + do + { + daemon_signal_hdlr (); + + rc = mtcInv.mtcInvApi_load_host ( my_identity.name, record_info ) ; + if (( rc == PASS ) || ( rc == HTTP_OK )) + { + ilog ("%s found in database (%s)\n", + record_info.name.c_str(), + record_info.uuid.c_str()); + + /* load in the uuid, and board management info */ + mtcInv.set_uuid ( my_identity.name, record_info.uuid ); + mtcInv.set_task ( my_identity.name, record_info.task ); + mtcInv.set_bm_un ( my_identity.name, record_info.bm_un ); + mtcInv.set_bm_ip ( my_identity.name, record_info.bm_ip ); + mtcInv.set_bm_type ( my_identity.name, record_info.bm_type ); + + if ( my_identity.name == record_info.name ) + { + + /* If the active controller was 'locked' and is being auto-corrected + * to 'unlocked' then ensure that there is no locked alarm set for it */ + if ( record_info.admin != "locked" ) + { + mtcAlarm_clear ( my_identity.name, MTC_ALARM_ID__LOCK ); + /* this is not required because its already inited to clear */ + // node_ptr->alarms[MTC_ALARM_ID__LOCK] = FM_ALARM_SEVERITY_CLEAR + } + +// mtcInv.set_subf_info ( my_identity.name, record_info.func, +// record_info.oper_subf, +// record_info.avail_subf ); + if ( my_identity.mac != record_info.mac ) + { + wlog ("%s mac address mismatch (%s - %s)\n", + my_identity.name.c_str(), + my_identity.mac.c_str(), + record_info.mac.c_str()); + } + + if ( my_identity.ip != record_info.ip ) + { + wlog ("%s ip address mismatch (%s - %s)\n", + my_identity.name.c_str(), + my_identity.ip.c_str(), + record_info.ip.c_str()); + } + } + } + else + { + if ( rc == HTTP_NOTFOUND ) + { + wlog ("%s inventory record not found in database, retrying ... \n", + my_identity.name.c_str()); + } + else if ( rc == FAIL_HTTP_ZERO_STATUS ) + { + wlog ("%s inventory record load timeout, retrying ... \n", + my_identity.name.c_str()); + } + else if ( rc == FAIL_RETRY ) + { + wlog ("%s inventory config dependency not met, retrying ...\n", + my_identity.name.c_str()); + } + else + { + wlog ("%s inventory record load failed (rc:%d), retrying ...\n", + my_identity.name.c_str(), rc ); + } + load_retries++ ; + if ( load_retries > (mtcInv.api_retries+10) ) + { + elog ("... giving up after %d retries\n", load_retries ); + daemon_exit(); + } + mtcWait_secs (15); + } + } while ( rc != PASS ) ; + + mtcInv.set_active_controller_hostname ( my_identity.name ); + mtcInv.set_activity_state (true); + mtcInv.set_adminAction ( my_identity.name, MTC_ADMIN_ACTION__ADD ); + mtcInv.ctl_mtcAlive_gate ( my_identity.name, true ); + + /* Setup the heartbeat service messaging sockets */ + rc = mtc_socket_init ( ) ; + if ( rc != PASS ) + { + elog ("Socket initialization failed (rc:%d)\n", rc ); + return (FAIL_SOCKET_INIT) ; + } + + daemon_make_dir(IPMITOOL_OUTPUT_DIR) ; + +#ifdef WANT_FIT_TESTING + daemon_make_dir(FIT__INFO_FILEPATH); +#endif + + return(rc); +} + +/* Main FSM Loop */ +void nodeLinkClass::fsm ( void ) +{ + if ( head ) + { + int rc ; + daemon_signal_hdlr (); + this->uptime_handler (); + for ( struct node * node_ptr = head ; node_ptr != NULL ; node_ptr = node_ptr->next ) + { + string hn = node_ptr->hostname ; + rc = fsm ( node_ptr ) ; + if ( rc ) + { + dlog ("%s fsm returned error code %d\n", hn.c_str(), rc ); + } + if ( this->host_deleted == true ) + { + this->host_deleted = false ; + return ; + } + + daemon_signal_hdlr (); + mtcHttpSvr_look ( mtce_event ); + } + } +} + +void daemon_service_run ( void ) +{ + int rc ; + + /* socket descriptor list */ + std::list socks ; + + /* Set the mode */ + mtcInv_ptr->maintenance = true ; + mtcInv_ptr->heartbeat = false ; + + if (( mtc_sock.ioctl_sock = open_ioctl_socket ( )) <= 0 ) + { + elog ("Failed to create ioctl socket"); + daemon_exit (); + } + + /* Not monitoring address changes RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR */ + if (( mtc_sock.netlink_sock = open_netlink_socket ( RTMGRP_LINK )) <= 0 ) + { + elog ("Failed to create netlink listener socket"); + daemon_exit (); + } + + /* Init HTTP Messaging */ + mtcHttpUtil_init (); + + ilog ("SW VERSION : %s\n", daemon_sw_version ().c_str()); + + /* Collect inventory in active state only */ + if ( mtc_config.active == true ) + { + /* provision this controller */ + if ( _self_provision () != PASS ) + { + elog ("Failed to self provision active controller\n"); + daemon_exit (); + } + + /* The following are base object controller timers ; init them */ + mtcTimer_init ( mtcInv.mtcTimer_token, mtcInv.my_hostname, "token timer" ); + mtcTimer_init ( mtcInv.mtcTimer_uptime,mtcInv.my_hostname, "uptime timer" ); + mtcTimer_init ( mtcInv.mtcTimer_mnfa, mtcInv.my_hostname, "mnfa timer" ); + mtcTimer_init ( mtcInv.mtcTimer_dor, mtcInv.my_hostname, "DOR mode timer" ); + + if ( get_link_state ( mtc_sock.ioctl_sock, mtc_config.mgmnt_iface, &mtcInv.mgmnt_link_up_and_running ) ) + + { + mtcInv.mgmnt_link_up_and_running = false ; + wlog ("Failed to query %s operational state ; defaulting to down\n", mtc_config.mgmnt_iface ); + } + else + { + ilog ("Mgmnt %s link is %s\n", mtc_config.mgmnt_iface, mtcInv.mgmnt_link_up_and_running ? "Up" : "Down" ); + } + + if ( mtcInv.infra_network_provisioned == true ) + { + if ( get_link_state ( mtc_sock.ioctl_sock, mtc_config.infra_iface, &mtcInv.infra_link_up_and_running ) ) + { + mtcInv.infra_link_up_and_running = false ; + wlog ("Failed to query %s operational state ; defaulting to down\n", mtc_config.infra_iface ); + } + else + { + ilog ("Infra %s link is %s\n", mtc_config.infra_iface, mtcInv.infra_link_up_and_running ? "Up" : "Down" ); + } + } + + //wlog ("Waiting 15 seconds before talking to inventory ....\n"); + //mtcWait_secs (15); + //wlog ("Reading Inventory\n"); + + /* start loading inventory */ + int retry_count = 0 ; + do + { + /* Load Inventory */ + rc = mtcInvApi_read_inventory ( MTC_INV_BATCH_MAX ); + if ( rc != PASS ) + { + retry_count++ ; + elog ("failed to read inventory records for batch of %d\n", MTC_INV_BATCH_MAX ); + elog ("... retrying in 5 seconds\n"); + mtcWait_secs (5); + } + else + { + retry_count = 0 ; + } + + if ( retry_count > 10 ) + { + elog ("failed to read inventory after %d retries\n", retry_count ); + elog ("... giving up ; exiting \n"); + daemon_exit (); + } + } while ( rc == FAIL ) ; + + if ( mtcInv_ptr->token_refresh_rate != 0 ) + { + ilog ("Starting 'Token' Refresh timer (%d minutes)\n", + (mtcInv_ptr->token_refresh_rate/60) ); + if ( mtcTimer_start ( mtcInv_ptr->mtcTimer_token, + mtcTimer_handler, + mtcInv_ptr->token_refresh_rate ) != PASS ) + { + elog ("Failed to start 'Token' Refresh Timer\n"); + daemon_exit ( ) ; + } + } + + ilog ("Starting 'Uptime' Refresh timer (%d seconds)\n", + MTC_UPTIME_REFRESH_TIMER ); + /* Start a inventory refresh timer */ + if ( mtcTimer_start ( mtcInv.mtcTimer_uptime, + mtcTimer_handler, + MTC_UPTIME_REFRESH_TIMER+(rand()%10)) != PASS ) + { + elog ("Failed to start 'Uptime' Refresh Timer\n"); + daemon_exit ( ) ; + } + } + + /* Add an inotify watch on the shadow file. */ + set_inotify_watch_file ( SHADOW_FILE, + mtcInv.inotify_shadow_file_fd , + mtcInv.inotify_shadow_file_wd ); + + /* Add this controller to the heartbeat service so that we + * receive the out-of-band heartbeat 'flags' even though + * we don't self monitor the active controller specifically + * This add may be duplicate but covers the initial config case */ + send_hbs_command ( mtcInv.my_hostname, MTC_CMD_ADD_HOST ); + + socks.clear(); + socks.push_front (mtc_sock.mtc_event_rx_sock->getFD()); // service_events + socks.push_front (mtc_sock.mtc_agent_rx_socket->getFD()); // mtc_service_inbox + + if ( mtcInv.infra_network_provisioned == true ) + { + socks.push_front (mtc_sock.mtc_agent_infra_rx_socket->getFD()); // mtc_service_inbox + } + + socks.push_front (mtc_sock.netlink_sock); + + if ( mtce_event.fd ) + socks.push_front( mtce_event.fd ) ; + + /* Avoid selecting on file descriptors that are 0 */ + if ( mtcInv.inotify_shadow_file_fd ) + socks.push_front (mtcInv.inotify_shadow_file_fd); + + socks.sort(); + + mtcInv.print_node_info(); + + /* enable the base level signal handler latency monitor */ + daemon_latency_monitor (true); + + /* DOR Mode Check */ + { + int enabled_nodes = mtcInv.enabled_nodes(); + if ( enabled_nodes ) + { + struct timespec ts ; + clock_gettime (CLOCK_MONOTONIC, &ts ); + + /* Support low uptime FIT for testing */ + if ( daemon_is_file_present ( MTC_CMD_FIT__UPTIME )) + { + ts.tv_sec = daemon_get_file_int ( MTC_CMD_FIT__UPTIME ); + slog ("FIT: Uptime %ld secs or %ld min %ld secs\n", + ts.tv_sec, + ts.tv_sec/60, + ts.tv_sec%60); + } + + if ( ts.tv_sec < MTC_MINS_20 ) + { + /* CPE DOR window is much greater in CPE since heartbeat + * cannot start until the inactive CPE has run both manifests */ + int timeout = DEFAULT_DOR_MODE_CPE_TIMEOUT ; + + /* override the timeout to a smaller value for normal system */ + if ( mtcInv.system_type == SYSTEM_TYPE__NORMAL ) + { + /* calculate time from config variable and number of enabled hosts */ + timeout = mtc_config.dor_mode_timeout + (enabled_nodes); + } + + mtcInv.dor_mode_active = true ; + mtcInv.dor_start_time = ts.tv_sec ; + + ilog ("%-12s ---------- ; DOR Recovery ---------------------- -------------------\n", mtcInv.my_hostname.c_str()); + ilog ("%-12s is ACTIVE ; DOR Recovery %2d:%02d mins (%4d secs) (duration %3d secs)\n", + mtcInv.my_hostname.c_str(), + mtcInv.dor_start_time/60, + mtcInv.dor_start_time%60, + mtcInv.dor_start_time, + timeout ); + ilog ("%-12s ---------- ; DOR Recovery ---------------------- -------------------\n", mtcInv.my_hostname.c_str()); + ilog ("%-12s host state ; DOR Recovery controller uptime host uptime \n", mtcInv.my_hostname.c_str()); + ilog ("%-12s ---------- ; DOR Recovery ---------------------- -------------------\n", mtcInv.my_hostname.c_str()); + mtcTimer_start ( mtcInv.mtcTimer_dor, mtcTimer_handler, timeout ); + } + } + } + + /* Run Maintenance service forever */ + for ( ; ; ) + { + /** + * Can't just run 'mtcHttpSvr_look' off select as it is seen to miss events. + * Would like to use event_base_loopexit with event_base_loopcontinue + * but the continue API is not available until 2.1.2-alpha. + * In the meantime we will have to continue to service it all the time + * mtcHttpSvr_work ( mtce_event ); + **/ + mtcHttpSvr_look ( mtce_event ); + tokenUtil_log_refresh (); + + if ( mtcInv_ptr->num_hosts () == 0 ) + { + // mtcHttpSvr_look ( mtce_event ); + sleep (1); + continue ; + } + + mtcInv.fsm ( ); + + /* Initialize the master fd_set */ + FD_ZERO(&mtc_sock.readfds); + FD_SET(mtc_sock.mtc_event_rx_sock->getFD(), &mtc_sock.readfds); + FD_SET(mtc_sock.mtc_agent_rx_socket->getFD(), &mtc_sock.readfds); + if ( mtcInv.infra_network_provisioned == true ) + { + FD_SET(mtc_sock.mtc_agent_infra_rx_socket->getFD(),&mtc_sock.readfds); + } + + if ( mtce_event.fd ) + { + FD_SET(mtce_event.fd, &mtc_sock.readfds); + } + if ( mtcInv.inotify_shadow_file_fd ) + { + FD_SET(mtcInv.inotify_shadow_file_fd, &mtc_sock.readfds); + } + if ( mtc_sock.netlink_sock ) + { + FD_SET(mtc_sock.netlink_sock, &mtc_sock.readfds); + } + + /* Initialize the timeval struct */ + mtc_sock.waitd.tv_sec = 0; + if ( mtcInv.system_type == SYSTEM_TYPE__NORMAL ) + mtc_sock.waitd.tv_usec = MTCAGENT_SELECT_TIMEOUT ; + else + mtc_sock.waitd.tv_usec = MTCAGENT_CPE_SELECT_TIMEOUT ; + + /* This is used as a delay up to select_timeout */ + rc = select( socks.back()+1, &mtc_sock.readfds, NULL, NULL, &mtc_sock.waitd); + + /* If the select time out expired then */ + if (( rc < 0 ) || ( rc == 0 )) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if (( rc < 0 ) && ( errno != EINTR )) + { + elog ( "Select Failed (rc:%d) %s \n", errno, strerror(errno)); + } + } + else + { + if ( FD_ISSET( mtce_event.fd , &mtc_sock.readfds)) + { + mtcHttpSvr_look ( mtce_event ); + } + if (FD_ISSET(mtc_sock.netlink_sock, &mtc_sock.readfds)) + { + dlog ("netlink socket fired\n"); + if ( mtcInv.service_netlink_events ( mtc_sock.netlink_sock, mtc_sock.ioctl_sock ) != PASS ) + { + elog ("service_netlink_events failed (rc:%d)\n", rc ); + } + } + + if (FD_ISSET(sock_ptr->mtc_event_rx_sock->getFD(), &mtc_sock.readfds)) + { + if ( (rc = service_events ( &mtcInv, &mtc_sock )) != PASS ) + { + elog ("service_events failed (rc:%d)\n", rc ); + } + } + + if ( FD_ISSET(sock_ptr->mtc_agent_rx_socket->getFD(), &mtc_sock.readfds)) + { + int cnt = 0 ; + /* Service up to MAX_RX_MSG_BATCH of messages at once */ + for ( ; cnt < MAX_RX_MSG_BATCH ; cnt++ ) + { + rc = mtc_service_inbox ( &mtcInv, &mtc_sock , MGMNT_INTERFACE) ; + if ( rc > RETRY ) + { + mlog2 ("mtc_service_inbox failed (rc:%d) (Mgmnt)\n", rc ); + break ; + } + if ( rc == RETRY ) + break ; + } + if ( cnt > 1 ) + { + mlog2 ("serviced %d messages in one batch (Mgmnt)\n", cnt ); + } + } + + if (( mtcInv.infra_network_provisioned == true ) && + ( sock_ptr->mtc_agent_infra_rx_socket != NULL ) && + ( FD_ISSET(sock_ptr->mtc_agent_infra_rx_socket->getFD(), &mtc_sock.readfds))) + { + int cnt = 0 ; + /* Service up to MAX_RX_MSG_BATCH of messages at once */ + for ( ; cnt < MAX_RX_MSG_BATCH ; cnt++ ) + { + rc = mtc_service_inbox ( &mtcInv, &mtc_sock, INFRA_INTERFACE ) ; + if ( rc > RETRY ) + { + mlog2 ("mtc_service_inbox failed (rc:%d) (Infra)\n", rc ); + break ; + } + if ( rc == RETRY ) + break ; + } + if ( cnt > 1 ) + { + mlog2 ("serviced %d messages in one batch (Infra)\n", cnt ); // ERIC dlog + } + } + if (FD_ISSET(mtcInv.inotify_shadow_file_fd, &mtc_sock.readfds)) + { + rc = get_inotify_events ( mtcInv.inotify_shadow_file_fd, (IN_MODIFY | IN_CREATE | IN_IGNORED) ); + if ( rc ) + { + ilog ("Shadow file has changed (%x)\n", rc ); + if ( mtcInv.manage_shadow_change ( mtcInv.my_hostname ) != PASS ) + { + elog ("failed to manage shadow file change notification (%d)\n", rc ); + } + if ( rc & IN_IGNORED ) + { + socks.remove(mtcInv.inotify_shadow_file_fd); + set_inotify_close ( mtcInv.inotify_shadow_file_fd, mtcInv.inotify_shadow_file_wd ); + set_inotify_watch_file ( SHADOW_FILE, + mtcInv.inotify_shadow_file_fd , + mtcInv.inotify_shadow_file_wd ); + socks.push_back (mtcInv.inotify_shadow_file_fd); + socks.sort(); + wlog ("Reselecting on %s change (Select:%d)\n", SHADOW_FILE, mtcInv.inotify_shadow_file_fd ); + } + } + } + } + + // timeUtil_sched_sample ( ); + + daemon_signal_hdlr (); + + /* If the timer is no longer active and we are in DOR mode + * then exit DOR mode. We do it here instead of */ + if (( mtcInv.dor_mode_active == true ) && ( mtcInv.mtcTimer_dor.tid == NULL )) + { + ilog ("DOR mode disable\n"); + mtcInv.dor_mode_active = false ; + } + } + daemon_exit (); +} + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + daemon_dump_membuf_banner (); + + mtcTimer_mem_log (); + mtcInv.print_node_info (); + + daemon_dump_membuf (); /* write mem_logs to log file and clear log list */ + + //mtcInv.doneQueue_dump_all (); + mtcInv.mtcCmd_doneQ_dump_all (); + + daemon_dump_membuf (); /* write mem_logs to log file and clear log list */ + + //mtcInv.workQueue_dump_all (); + mtcInv.mtcCmd_workQ_dump_all (); + + daemon_dump_membuf (); /* write mem_logs to log file and clear log list */ + + mtcInv.memDumpAllState (); + + daemon_dump_membuf (); /* write mem_logs to log file and clear log list */ +} + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} +/*************************************************************************** + * * + * Module Test Head * + * * + ***************************************************************************/ + +extern int mtcJsonInv_testhead ( void ); + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + int rc = PASS; + + mtc_config.testmode = true ; + + nodeLinkClass * mtcInv_testhead_ptr = new nodeLinkClass ; + + printf ("\n\n"); + printf (TESTHEAD_BAR); + + printf ("| Node Class Test Head - Private and Public Member Functions\n"); + printf (TESTHEAD_BAR); + for ( int i = 0 ; i < 11 ; i++ ) + { + if ( mtcInv_testhead_ptr->testhead ( i+1 ) ) + { + FAILED_STR ; + rc = FAIL ; + } + else + PASSED ; + } + + printf (TESTHEAD_BAR); + printf ("| Maintenance Timer Test Head\n"); + printf (TESTHEAD_BAR); + return (rc); +} + +int send_event ( string & hostname, unsigned int event_cmd, iface_enum iface ) +{ + UNUSED(hostname) ; + UNUSED(event_cmd) ; + UNUSED(iface); + return PASS ; +} + diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp new file mode 100755 index 00000000..493b356e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp @@ -0,0 +1,477 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/*************************************************************************** + * + * @file + * Wind River CGTS Platform Node Maintenance "Finite State Machine" + * + * Description: This FSM follows the X.731 specification. + * + * The FSM manages nodes based on the following three perspectives + * + * Administrative: action taken on node (mtc_nodeAdministrative_action_type) + * Operational : state of the node mtc_nodeOperational_state_type) + * Availability : status of current node state (mtc_nodeAvailability_status_type) + * + */ + +using namespace std; + +#define __AREA__ "fsm" + +#include "nodeClass.h" +#include "tokenUtil.h" +#include "mtcNodeFsm.h" +#include "mtcInvApi.h" +#include "mtcNodeMsg.h" +#include "mtcNodeHdlrs.h" /* for ... mtcTimer_handl */ + +int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + keyToken_type * __token_ptr = tokenUtil_get_ptr(); + + if ( node_ptr == NULL ) + { + slog ("Null Node Pointer\n"); + return FAIL ; + } + + /* if the multi-Node-Failure Avoidance timer rang then run its ecovery handler */ + if ( mtcTimer_mnfa.ring == true ) + { + mtcTimer_mnfa.ring = false ; + mnfa_exit ( true ); + } + + /* handle clear task request */ + if ( node_ptr->clear_task == true ) + { + mtcInvApi_update_task ( node_ptr, "" ); + node_ptr->clear_task = false ; + } + + /* Service the libEvent work queue */ + workQueue_process ( node_ptr ) ; + + /* Service the maintenance command queue if there are commands waiting */ + if ( node_ptr->mtcCmd_work_fifo.size()) + { + rc = nodeLinkClass::cmd_handler ( node_ptr ); + if ( rc == RETRY ) + { + return (rc); + } + } + + /* Monitor and Manage active threads */ + thread_handler ( node_ptr->ipmitool_thread_ctrl, node_ptr->ipmitool_thread_info ); + + /* manage the host connected state and board management alarms */ + nodeLinkClass::bm_handler ( node_ptr ); + + /* + * Always run the offline handler + * + * - does nothing unless in fault handling mode + * - looks for offline state during fault handling + */ + nodeLinkClass::offline_handler ( node_ptr ); + + /* + * Always run the online handler. + * + * - handles offline/online state transitions based on periodic audit + * with mtcAlive debouncing + */ + nodeLinkClass::online_handler ( node_ptr ); + + if ( this->mtcTimer_token.ring == true ) + { + dlog ("%s renewing token\n", my_hostname.c_str()); + + /* this is a blocking call */ + tokenUtil_new_token ( tokenEvent, my_hostname ); + + if ( __token_ptr->delay == true ) + { + __token_ptr->delay = false ; + mtcTimer_start( mtcTimer_token, mtcTimer_handler, 5 ); + } + else + { + mtcTimer_start ( mtcTimer_token, mtcTimer_handler, token_refresh_rate ); + } + } + else if ( __token_ptr->delay == true ) + { + ilog ( "Token Refresh in 5 seconds\n" ); + if ( mtcTimer_token.tid ) + mtcTimer_stop ( mtcTimer_token ); + + __token_ptr->delay = false ; + + /* force refresh of token in 5 seconds */ + mtcTimer_start( mtcTimer_token, mtcTimer_handler, 5 ); + } + + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__DELETE ) + { + flog ("%s -> Delete Action\n", node_ptr->hostname.c_str()); + nodeLinkClass::delete_handler ( node_ptr ); + return (PASS); + } + + + /* Run the config FSM if the configAction bool is set. + * We keep this as a separate action unto itself so that + * mtce can continue to service all other actions for the + * same host while it handles configuration commands */ + if (( node_ptr->configAction == MTC_CONFIG_ACTION__INSTALL_PASSWD ) || + ( node_ptr->configAction == MTC_CONFIG_ACTION__CHANGE_PASSWD ) || + ( node_ptr->configAction == MTC_CONFIG_ACTION__CHANGE_PASSWD_AGAIN )) + { + nodeLinkClass::cfg_handler ( node_ptr ); + } + + /**************************************************************************** + * No Op: Do nothing for this Healthy Enabled Running Host + * This block of code was added to resolve an issue. With this change: + * the insv_test_handler gets run as soon as a host's main function is enabled. + **************************************************************************** + */ + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + ((node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || + (node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))) + { + // flog ("%s -> insv_test_handler\n", node_ptr->hostname.c_str()); + nodeLinkClass::insv_test_handler ( node_ptr ); + } + + /**************************************************************************** + * Add Host Services: + **************************************************************************** + */ + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__ADD ) + { + flog ("%s -> Add Action\n", node_ptr->hostname.c_str()); + nodeLinkClass::add_handler ( node_ptr ); + } + + + /**************************************************************************** + * No Op: Do nothing for this Healthy Enabled Running Host + **************************************************************************** + */ + else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && + ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + ((node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || + (node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))) + { + // flog ("%s -> oos_test_handler\n", node_ptr->hostname.c_str()); + nodeLinkClass::oos_test_handler ( node_ptr ); + } + + else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__POWERCYCLE ) + { + nodeLinkClass::powercycle_handler ( node_ptr ); + } + + /**************************************************************************** + * Reset Host: Run the Reset handler for this Reset Action on Locked Host + **************************************************************************** + */ + else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__RESET ) + { + flog ("%s -> Reset Action\n", node_ptr->hostname.c_str()); + nodeLinkClass::reset_handler ( node_ptr ); + nodeLinkClass::oos_test_handler ( node_ptr ); + } + + /**************************************************************************** + * Reboot Host: Run the Reboot handler for this Reboot Action on Locked Host + **************************************************************************** + */ + else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__REBOOT ) + { + flog ("%s -> Reboot Action\n", node_ptr->hostname.c_str()); + nodeLinkClass::reboot_handler ( node_ptr ); + } + + /**************************************************************************** + * Recovering Host: Run Enable handler for failed or recovering host + **************************************************************************** + */ + else if ((( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && + ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__FAILED )) || + ( node_ptr->adminAction == MTC_ADMIN_ACTION__ENABLE)) + { + flog ("%s -> Run Enable Handler\n", node_ptr->hostname.c_str()); + nodeLinkClass::enable_handler ( node_ptr ); + } + + /* Do nothing with locked disabled offline state */ + else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && + ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__DISABLED ) && + (( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFDUTY ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ))) + { + flog ("%s -> Run OOS Test Handler\n", node_ptr->hostname.c_str()); + nodeLinkClass::oos_test_handler ( node_ptr ); + } + + /**************************************************************************** + * Recovering Host: Run Recovery handler for failed or recovering host + **************************************************************************** + */ + else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__RECOVER ) && + ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED )) + { + flog ("%s -> Run Recovery\n", node_ptr->hostname.c_str()); + nodeLinkClass::recovery_handler ( node_ptr ); + } + + /**************************************************************************** + * Recovering Host: Run Enable handler for failed or recovering host + **************************************************************************** + */ + else if ( ( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && + ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__DISABLED ) && + (( node_ptr->availStatus == MTC_AVAIL_STATUS__FAILED ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__INTEST ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ))) + { + flog ("%s -> Run Enable\n", node_ptr->hostname.c_str()); + nodeLinkClass::enable_handler ( node_ptr ); + } + + /* Try and recover an accidentally powered of host */ + else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && + ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ) && + ( node_ptr->hwmon_powercycle.attempts == 0 ) && + ( node_ptr->hwmon_powercycle.state == RECOVERY_STATE__INIT )) + { + ilog ("%s auto-poweron for unlocked host\n", node_ptr->hostname.c_str()); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__POWERON ); + + /* FSM sanity check below will reject this operation, need exit now */ + return (PASS); + } + + /**************************************************************************** + * Unlock Host: Run Enable handler for the Unlock Action + ***************************************************************************/ + else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__UNLOCK ) + { + flog ("%s -> Unlock Action\n", node_ptr->hostname.c_str()); + + /* Proceed to unlock host */ + nodeLinkClass::enable_handler ( node_ptr ); + } + + /**************************************************************************** + * Run the Subfunction FSM, usually after the ADD or at the end of the enable + * in a small system. + ****************************************************************************/ + else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__ENABLE_SUBF ) + { + flog ("%s -> Running SubFunction Enable handler (%d)\n", + node_ptr->hostname.c_str(), + node_ptr->handlerStage.enable ); + + nodeLinkClass::enable_subf_handler ( node_ptr ); + } + + /**************************************************************************** + * Lock Host: Run Disable handler for the Lock Action + **************************************************************************** + */ + else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__LOCK ) || + ( node_ptr->adminAction == MTC_ADMIN_ACTION__FORCE_LOCK )) + { + // flog ("%s -> Lock Action\n", node_ptr->hostname.c_str()); + nodeLinkClass::disable_handler ( node_ptr ); + } + + /**************************************************************************** + * Semantic Handling: Reject Recovery Actions Against In-Service Host + **************************************************************************** + */ + else if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + (( node_ptr->adminAction == MTC_ADMIN_ACTION__POWEROFF ) || + ( node_ptr->adminAction == MTC_ADMIN_ACTION__RESET ) || + ( node_ptr->adminAction == MTC_ADMIN_ACTION__REBOOT ) || + ( node_ptr->adminAction == MTC_ADMIN_ACTION__REINSTALL ))) + { + flog ("%s -> OOS Action Check\n", node_ptr->hostname.c_str()); + + /* TEMPORARY: To allow reset of unlocked host for fault insertion. */ + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__RESET ) + { + wlog ("%s Allowing Reset of unlocked host for FIT\n", node_ptr->hostname.c_str()); + + if ( node_ptr->hostname.compare(nodeLinkClass::my_hostname)) + { + nodeLinkClass::reset_handler ( node_ptr ); + } + else + { + wlog ("%s Cowardly avoiding reset of self\n", node_ptr->hostname.c_str()); + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); + + /* Clear the UI task since we are not really resetting */ + mtcInvApi_update_task ( node_ptr, "" ); + } + } + else + { + elog ("%s Administrative '%s' Operation Rejected\n", + node_ptr->hostname.c_str(), + get_adminAction_str (node_ptr->adminAction) ); + + elog ("%s Cannot perform out-of-service action against in-service host\n", + node_ptr->hostname.c_str()); + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); + + /* Clear the UI task since we are not really resetting */ + mtcInvApi_update_task ( node_ptr, "" ); + } + } + + /**************************************************************************** + * Reload Host: Run the Reload handler to Nuke the disk on Locked Host + **************************************************************************** + */ + else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__REINSTALL ) + { + flog ("%s -> Reload Action\n", node_ptr->hostname.c_str()); + nodeLinkClass::reinstall_handler ( node_ptr ); + } + + /**************************************************************************** + * No Op: Do nothing for this Healthy Enabled Locked CPE Simplex Host + **************************************************************************** + */ + else if (( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX ) && + ( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && + ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED )) + { + nodeLinkClass::insv_test_handler ( node_ptr ); + nodeLinkClass::oos_test_handler ( node_ptr ); + } + + /**************************************************************************** + * Power-Off Host: + **************************************************************************** + */ + else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__POWEROFF ) + { + flog ("%s -> Power-Off Action\n", node_ptr->hostname.c_str()); + nodeLinkClass::power_handler ( node_ptr ); + nodeLinkClass::oos_test_handler ( node_ptr ); + } + + /**************************************************************************** + * Power-On Host: + **************************************************************************** + */ + else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__POWERON ) + { + flog ("%s -> Power-On Action\n", node_ptr->hostname.c_str()); + nodeLinkClass::power_handler ( node_ptr ); + nodeLinkClass::oos_test_handler ( node_ptr ); + } + + /**************************************************************************** + * Swact Host Services: + **************************************************************************** + */ + else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__SWACT ) || + ( node_ptr->adminAction == MTC_ADMIN_ACTION__FORCE_SWACT )) + + { + flog ("%s -> Swact Action\n", node_ptr->hostname.c_str()); + nodeLinkClass::swact_handler ( node_ptr ); + } + + /***** DEGRADED Cases *******/ + + /* Handle the degrade action */ + else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && + ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )) + { + /* We do nothing, the in service test catches this */ + // flog ("%s -> Degrade Recovery\n", node_ptr->hostname.c_str()); + ; // nodeLinkClass::degrade_handler ( node_ptr ); + } + + else + { + if (( node_ptr->adminState >= MTC_ADMIN_STATES ) || + ( node_ptr->operState >= MTC_OPER_STATES ) || + ( node_ptr->availStatus >= MTC_AVAIL_STATUS )) + { + elog ("Unhandled FSM Case: %s %d-%d-%d\n", + node_ptr->hostname.c_str(), + node_ptr->adminState, + node_ptr->operState, + node_ptr->availStatus ); + } + else + { + wlog ("Unsupported FSM State: %s Action:%s %s-%s-%s ; auto-correcting ...\n", + node_ptr->hostname.c_str(), + get_adminAction_str ( node_ptr->adminAction ), + adminState_enum_to_str (node_ptr->adminState).c_str(), + operState_enum_to_str (node_ptr->operState).c_str(), + availStatus_enum_to_str (node_ptr->availStatus).c_str()); + + } + /* Unlocked state overrides unsupported oper-avail states + * Try to recover the host */ + if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) + { + /* Reset the state in the database for these error states */ + node_ptr->adminState = MTC_ADMIN_STATE__UNLOCKED ; + node_ptr->operState = MTC_OPER_STATE__DISABLED ; + node_ptr->availStatus = MTC_AVAIL_STATUS__ONLINE ; + mtcInvApi_update_states ( node_ptr, "unlocked", "disabled" , "online" ); + + /* Force the action */ + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__UNLOCK ); + + } + else + { + /* Reset the state in the database for these error states */ + node_ptr->adminState = MTC_ADMIN_STATE__LOCKED ; + node_ptr->operState = MTC_OPER_STATE__DISABLED ; + node_ptr->availStatus = MTC_AVAIL_STATUS__OFFLINE ; + mtcInvApi_update_states ( node_ptr, "locked", "disabled" , "offline" ); + + /* Force the action */ + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__FORCE_LOCK ); + } + return (PASS); + } + + return (rc) ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.h new file mode 100755 index 00000000..2a0ae5fa --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.h @@ -0,0 +1,38 @@ +#ifndef __INCLUDE_MTCNODEFSM_HH__ +#define __INCLUDE_MTCNODEFSM_HH__ +/* + * Copyright (c) 2013-2014 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Node Maintenance "Finite State Machine" + * Interface, Types and Definitions. + */ + +#include +#include + +using namespace std; + +#include "nodeClass.h" + +/** Maintenance FSM Testing Support + * + * The test head sets test mode, provisions a test node and + * then proceeds to setup the node's x.731 states and calls + * the fsm to run against that node. The FSM and handlers + * are is coded with test case clauses that transition the + * fsmtest word + * test + */ + +/** Maintenance FSM test head interface */ +int mtcNodeFsm_testhead ( nodeLinkClass * obj_ptr ); + +int mtc_fsm_run ( nodeLinkClass * obj_ptr ); + +#endif /* __INCLUDE_MTCNODEFSM_HH__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp new file mode 100755 index 00000000..8b5a2f46 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp @@ -0,0 +1,6828 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/**************************************************************************** + * @file + * Wind River CGTS Platform Node "Handlers" Implementation + * + * Description: This file contains the handlers that implement the X.731 FSM. + + * Interfaces: + * + * nodeLinkClass::timer_handler + * nodeLinkClass::enable_handler + * nodeLinkClass::disable_handler + * nodeLinkClass::delete_handler + * nodeLinkClass::degrade_handler + * nodeLinkClass::reset_handler + * nodeLinkClass::reinstall_handler + * nodeLinkClass::event_handler + * nodeLinkClass::power_handler + * nodeLinkClass::recovery_handler + * nodeLinkClass::cfg_handler + + ****************************************************************************/ + +using namespace std; + +#define __AREA__ "hdl" + +#include "nodeBase.h" /* for ... basic definitions */ +#include "mtcAlarm.h" /* for ... mtcAlarm_ */ +#include "nodeTimers.h" /* for ... mtcTimer_start/stop */ + +#include "jsonUtil.h" /* for ... jsonApi_array_value */ +#include "tokenUtil.h" +#include "regexUtil.h" /* for ... regexUtil_pattern_match */ + +#include "nodeClass.h" /* All base stuff */ +#include "ipmiUtil.h" /* for ... power and reset support */ + +#include "mtcNodeMsg.h" /* for ... send_mtc_cmd */ +#include "mtcInvApi.h" /* for ... SYSINV API */ +#include "mtcSmgrApi.h" /* for ... SM API */ +#include "mtcVimApi.h" /* for ... VIm API */ + +#include "daemon_ini.h" /* for ... ini_parse */ +#include "daemon_common.h" + + +#define LOAD_NODETYPE_TIMERS \ + if ( is_controller(node_ptr) ) \ + { \ + node_ptr->mtcalive_timeout = daemon_get_cfg_ptr()->controller_mtcalive_timeout ; \ + } \ + else \ + { \ + node_ptr->mtcalive_timeout = daemon_get_cfg_ptr()->compute_mtcalive_timeout ; \ + } \ + this->goenabled_timeout = daemon_get_cfg_ptr()->goenabled_timeout + 3 ; \ + // Adding 3 seconds to the timeout so that the agent timeout is a + // little longer than the client. + +/************************************************************* + * + * Name : calc_reset_prog_timeout + * + * Purpose : Calculate the overall reset progression timeout + * + * ***********************************************************/ +int nodeLinkClass::calc_reset_prog_timeout ( struct nodeLinkClass::node * node_ptr, + int retries ) +{ + /* for the management interface */ + int to = MTC_RESET_PROG_OFFLINE_TIMEOUT ; + + /* and add on for the bmc interface if its provisioned */ + if ( node_ptr->bm_provisioned == true ) + to += MTC_RESET_PROG_OFFLINE_TIMEOUT ; + + /* add a small buffer */ + to += (MTC_ENABLED_TIMER*4) ; + + /* factor in the number of retries */ + to *= (retries+1) ; + + ilog ("%s Reboot/Reset progression has %d sec 'wait for offline' timeout\n", + node_ptr->hostname.c_str(), to ); + ilog ("%s ... sources - mgmnt:Yes infra:%s bmc:%s\n", + node_ptr->hostname.c_str(), + infra_network_provisioned ? "Yes" : "No", + node_ptr->bm_provisioned ? "Yes" : "No" ); + return (to); +} + +void mtcTimer_handler ( int sig, siginfo_t *si, void *uc); + +/* Looks up the timer ID and asserts the corresponding node's ringer */ +void nodeLinkClass::timer_handler ( int sig, siginfo_t *si, void *uc) +{ + struct nodeLinkClass::node * node_ptr ; + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + + /* Avoid compiler errors/warnings for parms we must + * have but currently do nothing with */ + sig=sig ; uc = uc ; + + if ( !(*tid_ptr) ) + { + // tlog ("Called with a NULL Timer ID\n"); + return ; + } + + /* Is this an offline timer */ + node_ptr = get_offline_timer ( *tid_ptr ); + if ( node_ptr ) + { + // tlog ("%s offline timer ring\n", node_ptr->hostname.c_str()); + mtcTimer_stop_int_safe ( node_ptr->offline_timer ); + node_ptr->offline_timer.ring = true ; + return ; + } + + /* Is this TID a mtcAlive timer TID ? */ + node_ptr = get_mtcAlive_timer ( *tid_ptr ); + if ( node_ptr ) + { + // tlog ("%s MtcAlive 'offline' timer ring\n", node_ptr->hostname.c_str()); + mtcTimer_stop_int_safe ( node_ptr->mtcAlive_timer ); + node_ptr->mtcAlive_timer.ring = true ; + return ; + } + + /* Is this TID a for the command FSM */ + node_ptr = get_mtcCmd_timer ( *tid_ptr ); + if ( node_ptr ) + { + // tlog ("%s Mtc Command FSM timer ring\n", node_ptr->hostname.c_str()); + mtcTimer_stop_int_safe ( node_ptr->mtcCmd_timer ); + node_ptr->mtcCmd_timer.ring = true ; + return ; + } + + /* Is this TID a inservice test timer TID ? */ + node_ptr = get_insvTestTimer ( *tid_ptr ); + if ( node_ptr ) + { + // tlog ("%s Insv Test timer ring\n", node_ptr->hostname.c_str()); + mtcTimer_stop_int_safe ( node_ptr->insvTestTimer ); + node_ptr->insvTestTimer.ring = true ; + return ; + } + + /* Is this TID a out-of-service test timer TID ? */ + node_ptr = get_oosTestTimer ( *tid_ptr ); + if ( node_ptr ) + { + // tlog ("%s Oos Test timer ring\n", node_ptr->hostname.c_str()); + mtcTimer_stop_int_safe ( node_ptr->oosTestTimer ); + node_ptr->oosTestTimer.ring = true ; + return ; + } + + /* Is this TID a swact timer TID ? */ + node_ptr = get_mtcSwact_timer ( *tid_ptr ); + if ( node_ptr ) + { + // tlog ("%s Swact Timer ring\n", node_ptr->hostname.c_str()); + mtcTimer_stop_int_safe ( node_ptr->mtcSwact_timer ); + node_ptr->mtcSwact_timer.ring = true ; + return ; + } + + /* Dead Office Recovery Mode Timer */ + if ( *tid_ptr == mtcTimer_dor.tid ) + { + mtcTimer_stop_int_safe ( mtcTimer_dor ); + mtcTimer_dor.ring = true ; + return ; + } + + /* Multi-Node Failure Avoidance Timer ? */ + if ( *tid_ptr == mtcTimer_mnfa.tid ) + { + // tlog ("%s Mnfa timer ring\n", mtcTimer_mnfa.hostname.c_str()); + mtcTimer_stop_int_safe ( mtcTimer_mnfa ); + mtcTimer_mnfa.ring = true ; + return ; + } + + /* is base mtc timer */ + if ( *tid_ptr == mtcTimer.tid ) + { + // tlog ("%s Mtc timer ring\n", mtcTimer.hostname.c_str()); + mtcTimer_stop_int_safe ( mtcTimer ); + mtcTimer.ring = true ; + return ; + } + + /* is uptime refresh timer ? */ + if ( *tid_ptr == mtcTimer_uptime.tid ) + { + // tlog ("%s Uptime 'refresh' timer ring\n", mtcTimer_uptime.hostname.c_str()); + mtcTimer_stop_int_safe ( mtcTimer_uptime ); + mtcTimer_uptime.ring = true ; + + /* This timer provides self corrective action handler as a secondary service + * Currently it looks for the following ... + * + * 1. Stuck libevent smgrEvent.mutex gate and frees it after 5 uptime intervals + * + **/ + if ( smgrEvent.mutex ) + { + daemon_config_type * cfg_ptr = daemon_get_cfg_ptr(); + + /* Clear this mutex flag if stuck for more than 5 minutes */ + if ( ++smgrEvent.stuck > ((cfg_ptr->swact_timeout/60)+1)) + { + // wlog ("Swact Mutex found stuck and has been auto cleared\n"); + smgrEvent.stuck = 0 ; + smgrEvent.mutex = false ; + } + } + else + { + /* Clear the stuck count */ + smgrEvent.stuck = 0 ; + } + return ; + } + /* is keystone token refresh timer ? */ + if (( *tid_ptr == mtcTimer_token.tid ) ) + { + // tlog ("%s Token 'refresh' timer ring\n", mtcTimer_token.hostname.c_str()); + mtcTimer_stop_int_safe ( mtcTimer_token ); + mtcTimer_token.ring = true ; + return ; + } + + /* is the http request timer ? */ + node_ptr = get_http_timer ( *tid_ptr ); + if ( node_ptr ) + { + // tlog ("%s Http timer ring\n", node_ptr->http_timer.hostname.c_str()); + mtcTimer_stop_int_safe ( node_ptr->http_timer ); + node_ptr->http_timer.ring = true ; + + if ( node_ptr->http_timer.mutex == true ) + node_ptr->http_timer.error = true ; + + return ; + } + + /* get the node */ + node_ptr = get_mtcTimer_timer ( *tid_ptr ); + if ( node_ptr ) + { + // tlog ("%s Timer ring\n", node_ptr->hostname.c_str()); + mtcTimer_stop_int_safe ( node_ptr->mtcTimer ); + node_ptr->mtcTimer.ring = true ; + return ; + } + + /* Is this TID a config timer TID ? */ + node_ptr = get_mtcConfig_timer ( *tid_ptr ); + if ( node_ptr ) + { + // tlog ("%s Config Timer ring\n", node_ptr->hostname.c_str()); + mtcTimer_stop_int_safe ( node_ptr->mtcConfig_timer ); + node_ptr->mtcConfig_timer.ring = true ; + return ; + } + + /* is the thread timer ? */ + node_ptr = get_thread_timer ( *tid_ptr ); + if ( node_ptr ) + { + mtcTimer_stop_int_safe ( node_ptr->ipmitool_thread_ctrl.timer ); + node_ptr->ipmitool_thread_ctrl.timer.ring = true ; + return ; + } + + /* is the ping timer ? */ + node_ptr = get_ping_timer ( *tid_ptr ); + if ( node_ptr ) + { + /* is this the bm ping timer */ + if ( *tid_ptr == node_ptr->bm_ping_info.timer.tid ) + { + mtcTimer_stop_int_safe ( node_ptr->bm_ping_info.timer ); + node_ptr->bm_ping_info.timer.ring = true ; + return ; + } + /* there may be other ping timers introduced later */ + } + + /* is the bmc handler timer ? */ + node_ptr = get_bm_timer ( *tid_ptr ); + if ( node_ptr ) + { + /* is this the bm ping timer */ + if ( *tid_ptr == node_ptr->bm_timer.tid ) + { + mtcTimer_stop_int_safe ( node_ptr->bm_timer ); + node_ptr->bm_timer.ring = true ; + return ; + } + } + + /* is the bmc handler timer ? */ + node_ptr = get_bmc_access_timer ( *tid_ptr ); + if ( node_ptr ) + { + /* is this the bm ping timer */ + if ( *tid_ptr == node_ptr->bmc_access_timer.tid ) + { + mtcTimer_stop_int_safe ( node_ptr->bmc_access_timer ); + node_ptr->bmc_access_timer.ring = true ; + return ; + } + } + + /* is the host services handler timer ? */ + node_ptr = get_host_services_timer ( *tid_ptr ); + if ( node_ptr ) + { + /* is this the bm ping timer */ + if ( *tid_ptr == node_ptr->host_services_timer.tid ) + { + mtcTimer_stop_int_safe ( node_ptr->host_services_timer ); + node_ptr->host_services_timer.ring = true ; + return ; + } + } + + node_ptr = get_powercycle_recovery_timer ( *tid_ptr ); + if ( node_ptr ) + { + if (( *tid_ptr == node_ptr->hwmon_powercycle.recovery_timer.tid ) ) + { + if ( node_ptr->hwmon_powercycle.attempts ) + { + tlog ("%s powercycle monitor completed successfully after attempt %d\n", + node_ptr->hostname.c_str(), + node_ptr->hwmon_powercycle.attempts); + } + + recovery_ctrl_init ( node_ptr->hwmon_powercycle ); + + if (( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && + ( node_ptr->availStatus != MTC_AVAIL_STATUS__POWERED_OFF )) + { + node_ptr->clear_task = true ; + } + + /* cancel the timer */ + mtcTimer_stop_int_safe ( node_ptr->hwmon_powercycle.recovery_timer ); + + node_ptr->hwmon_powercycle.recovery_timer.ring = true ; + + return ; + } + } + + node_ptr = get_powercycle_control_timer ( *tid_ptr ); + if ( node_ptr ) + { + if (( *tid_ptr == node_ptr->hwmon_powercycle.control_timer.tid ) ) + { + /* cancel the timer */ + mtcTimer_stop_int_safe ( node_ptr->hwmon_powercycle.control_timer ); + + node_ptr->hwmon_powercycle.control_timer.ring = true ; + + return ; + } + } + + /* Is this TID a reset recovery timer TID ? */ + node_ptr = get_reset_recovery_timer ( *tid_ptr ); + if ( node_ptr ) + { + if (( *tid_ptr == node_ptr->hwmon_reset.recovery_timer.tid ) ) + { + tlog ("%s clearing hwmon reset holdoff timer\n", + node_ptr->hostname.c_str()); + + recovery_ctrl_init ( node_ptr->hwmon_reset ); + + mtcTimer_stop_int_safe ( node_ptr->hwmon_reset.recovery_timer ); + + node_ptr->hwmon_reset.recovery_timer.ring = true ; + return ; + } + } + + /* Is this TID a reset control timer TID ? */ + node_ptr = get_reset_control_timer ( *tid_ptr ); + if ( node_ptr ) + { + if (( *tid_ptr == node_ptr->hwmon_reset.control_timer.tid ) ) + { + tlog ("%s ringing hwmon reset control timer\n", + node_ptr->hostname.c_str()); + + mtcTimer_stop_int_safe ( node_ptr->hwmon_reset.control_timer ); + + node_ptr->hwmon_reset.control_timer.ring = true ; + + return ; + } + } + + /* cancel the timer by tid */ + mtcTimer_stop_tid_int_safe ( tid_ptr ); +} + +/* Inventory Object wrapper - does a node lookup and calls the timer handler */ +void mtcTimer_handler ( int sig, siginfo_t *si, void *uc) +{ + nodeLinkClass * object_ptr = get_mtcInv_ptr() ; + object_ptr->timer_handler ( sig, si, uc ); +} + +/** Responsible for recovering a host into its enabled state + * + * Steps: availibility is either unavailable or failed or intest if previous enable failed + * 1. enable Start + * operational = disabled + * 2. Notify VM Manager (signal) + * 3. send disabled message to heartbeat service (message) + * 4. reboot host (message) + * availability = intest + * 5. wait for mtc alive (timer) + * 6. wait for go enabled (timer) + * 7. send enabled message to heartbeat service (message) + * 8. change state to enabled + * availability - available + */ + +int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + if ( THIS_HOST ) + { + /****************************************************************** + * + * Intercept the unlock action for self. + * 1. change the admin state to unlocked, + * 2. send a lazy reboot and + * 3. wait for the reboot + * + ******************************************************************/ + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__UNLOCK ) + { + bool aio = false ; + if ( SIMPLEX_CPE_SYSTEM ) + aio = true ; + else + aio = false ; + + mtcInvApi_update_states_now ( node_ptr, "unlocked", "disabled" , "offline", "disabled", "offline" ); + mtcInvApi_update_task_now ( node_ptr, aio ? MTC_TASK_CPE_SX_UNLOCK_MSG : MTC_TASK_SELF_UNLOCK_MSG ); + + wlog ("%s unlocking %s with reboot\n", + my_hostname.c_str(), + aio ? "Simplex System" : "Active Controller" ); + + /* should not return */ + return ( lazy_graceful_fs_reboot ( node_ptr )); + } + } + + switch ( (int)node_ptr->handlerStage.enable ) + { + case MTC_ENABLE__FAILURE: + { + /************************************************************** + * Failure of thr active controller has special handling. + * + * Condition 1: While there is no in-service backup controller + * to swact to. In this case the ctive controller + * - is only degraded to avoid a system outage. + * - the CPE subfunction is failed + * - compute SubFunction Alarm is raised + * - Enable alarm is raised + * - A process monitor alarm may also be raised if + * the failure was that of a critical process. + * + * Condition 2: While there is another controller to Swact to. + * In this case the active conroller is failed + * and maintenance will trigger SM to Swact and + * the failing active controller will get + * auto-recovered by the takeover controller. + * + * Condition 3: AIO Simplex failures can request thresholded + * auto-recovery. In doing so maintenance will + * increment the count in an auto recovery counter + * file and self reboot if that count does not exceed + * the auto recovery threshold. After 3 retries the + * threshold is exceeded and then maiantenance stops + * self rebooting and enters the state specified by + * condition 1 above. + * + ***************************************************************/ + bool degrade_only = false ; + + elog ("%s Main Enable FSM (from failed)\n", node_ptr->hostname.c_str()); + + mtcTimer_reset ( node_ptr->mtcTimer ); + + /* Stop heartbeat */ + send_hbs_command ( node_ptr->hostname, MTC_CMD_STOP_HOST ); + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + hbs_minor_clear ( node_ptr, (iface_enum)iface ); + } + + node_ptr->cmdReq = MTC_CMD_NONE ; + node_ptr->cmdRsp = MTC_CMD_NONE ; + node_ptr->cmdRsp_status = 0 ; + + /* Raise Critical Enable Alarm */ + alarm_enabled_failure ( node_ptr ); + + /* Handle active controller failures */ + if ( THIS_HOST ) + { + /* Don't fail the only controller, degrade instead */ + degrade_only = true ; + + /* If the inactive controller is enabled then tru to swact to it. + * SM will reject till its eady, until then just run degraded */ + if ( is_inactive_controller_main_insv() == true ) + { + wlog ("%s has critical failure\n", node_ptr->hostname.c_str()); + wlog ("%s ... requesting swact to in-service inactive controller\n", node_ptr->hostname.c_str()); + + mtcInvApi_update_task_now ( node_ptr, MTC_TASK_FAILED_SWACT_REQ ); + + /* Inform the VIM of the failure */ + mtcVimApi_state_change ( node_ptr, VIM_HOST_FAILED, 3 ); + + /* ask SM to swact to the backup controller */ + mtcSmgrApi_request ( node_ptr, CONTROLLER_SWACT, 0 ); + + for ( int i = 0 ; i < SMGR_MAX_RETRIES ; i++ ) + { + daemon_signal_hdlr (); + sleep (1); + + /* Try and receive the response */ + if ( mtcHttpUtil_receive ( nodeLinkClass::smgrEvent ) != RETRY ) + { + wlog ("%s SM Swact Request Response: %s\n", + node_ptr->hostname.c_str(), + smgrEvent.response.c_str()); + break ; + } + } + if ( nodeLinkClass::smgrEvent.active == true ) + { + slog ("%s freeing smgrEvent activity state\n", node_ptr->hostname.c_str()); + nodeLinkClass::smgrEvent.active = false ; + } + + /* if we get here then proceed to delay for another swact attempt */ + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE_SWACT_WAIT ); + + /* force ourselves into the enable handler */ + if (( node_ptr->adminAction != MTC_ADMIN_ACTION__ENABLE) && + ( node_ptr->adminAction != MTC_ADMIN_ACTION__SWACT) && + ( node_ptr->adminAction != MTC_ADMIN_ACTION__LOCK) && + ( node_ptr->adminAction != MTC_ADMIN_ACTION__FORCE_LOCK)) + { + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__ENABLE ); + } + + /* Wait 30 seconds before trying the Swact again */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_SECS_30 ); + break ; + } + else + { + this->autorecovery_enabled = true ; + + /* use thresholded auto recovery for simplext failure case */ + manage_autorecovery ( node_ptr ); + + if ( this->autorecovery_disabled == false ) + { + wlog ("%s has critical failure.\n", node_ptr->hostname.c_str()); + wlog ("%s ... downgrading to degrade with auto recovery disabled\n", node_ptr->hostname.c_str()); + wlog ("%s ... to avoid disabling only enabled controller\n", node_ptr->hostname.c_str()); + this->autorecovery_disabled = true ; + } + + if ( CPE_SYSTEM ) + { + /* Raise Critical Compute Function Alarm */ + alarm_compute_failure ( node_ptr , FM_ALARM_SEVERITY_CRITICAL ); + } + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + } + } + + /* Start fresh the next time we enter graceful recovery handler */ + node_ptr->graceful_recovery_counter = 0 ; + node_ptr->health_threshold_counter = 0 ; + + if ( CPE_SYSTEM ) + { + node_ptr->inservice_failed_subf = true ; + subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED, + MTC_AVAIL_STATUS__FAILED ); + } + + if ( degrade_only == true ) + { + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__DEGRADED ); + } + else + { + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__DISABLED, + MTC_AVAIL_STATUS__FAILED ); + } + + /* Inform the VIM of the failure */ + mtcVimApi_state_change ( node_ptr, VIM_HOST_FAILED, 3 ); + + /* if we get here in controller simplex mode then go degraded + * if we are not already degraded. Otherwise, fail. */ + if ( THIS_HOST && ( is_inactive_controller_main_insv() == false )) + { + /* autorecovery must be disabled */ + if (( node_ptr->adminState != MTC_ADMIN_STATE__UNLOCKED ) || + ( node_ptr->operState != MTC_OPER_STATE__ENABLED ) || + ( node_ptr->availStatus != MTC_AVAIL_STATUS__DEGRADED)) + { + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__DEGRADED ); + } + /* adminAction state is already changed to NONE. */ + } + else + { + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE_WAIT ); + } + + break; + } + case MTC_ENABLE__FAILURE_SWACT_WAIT: + { + if (( node_ptr->operState != MTC_OPER_STATE__ENABLED ) || + ( node_ptr->availStatus != MTC_AVAIL_STATUS__DEGRADED )) + { + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__DEGRADED ); + } + + /* wait for the swact or to re-try MTC_ENABLE_FAILURE and likely + * try the swact request again */ + if ( node_ptr->mtcTimer.ring == true ) + { + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE ); + } + break ; + } + case MTC_ENABLE__FAILURE_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) == false ) + { + break ; + } + /* Stop the enable sequence if the locked now; + * this might occur if the unlock failed from inventory */ + if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) + { + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + mtcInvApi_update_task ( node_ptr, "" ); + } + enableStageChange ( node_ptr, MTC_ENABLE__START ); + node_ptr->mtcTimer.ring = false ; + break ; + /* Fall through */ + } + case MTC_ENABLE__START: + { + manage_dor_recovery ( node_ptr, FM_ALARM_SEVERITY_CLEAR ); + + plog ("%s Main Enable FSM (from start)%s\n", + node_ptr->hostname.c_str(), + node_ptr->was_dor_recovery_mode ? " (from DOR)" : "" ); + + /* clear all the past enable failure bools */ + clear_main_failed_bools ( node_ptr ); + clear_subf_failed_bools ( node_ptr ); + clear_hostservices_ctls ( node_ptr ); + + /* Clear all degrade flags except for the HWMON one */ + clear_host_degrade_causes ( node_ptr->degrade_mask ); + node_ptr->degraded_resources_list.clear(); + + /* Purge this hosts work and done queues */ + workQueue_purge ( node_ptr ); + doneQueue_purge ( node_ptr ); + mtcCmd_workQ_purge ( node_ptr ); + mtcCmd_doneQ_purge ( node_ptr ); + + /* Assert the mtc alive gate */ + node_ptr->mtcAlive_gate = true ; + + node_ptr->mtcAlive_online = false ; + node_ptr->mtcAlive_offline = true ; + node_ptr->health_threshold_counter = 0 ; + node_ptr->graceful_recovery_counter = 0 ; + node_ptr->http_retries_cur = 0 ; + node_ptr->insv_test_count = 0 ; + node_ptr->mnfa_graceful_recovery = false ; + + node_ptr->goEnabled = false ; + node_ptr->goEnabled_subf = false ; + + mtc_nodeAvailStatus_enum availStatus_temp = node_ptr->availStatus ; + switch ( node_ptr->availStatus ) + { + case MTC_AVAIL_STATUS__INTEST: + case MTC_AVAIL_STATUS__FAILED: + + /* enable auto recovery if the inactive controller + * is out of service */ + if (( is_controller (node_ptr) ) && ( NOT_THIS_HOST )) + this->autorecovery_enabled = true ; + + /* fall through */ + + case MTC_AVAIL_STATUS__DEGRADED: + case MTC_AVAIL_STATUS__AVAILABLE: + { + if (( is_active_controller ( node_ptr->hostname )) && + ( is_inactive_controller_main_insv() == false )) + { + wlog ("%s recovering active controller from %s-%s-%s\n", + node_ptr->hostname.c_str(), + get_adminState_str(node_ptr->adminState).c_str(), + get_operState_str(node_ptr->operState).c_str(), + get_availStatus_str(node_ptr->availStatus).c_str()); + + mtcInvApi_update_task ( node_ptr, "" ); + + /* Special case */ + // alarm_enabled_clear ( node_ptr, false ); + + //mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__CONFIG ); + //node_ptr->alarms[MTC_ALARM_ID__CONFIG] = FM_ALARM_SEVERITY_CLEAR ; + + //allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + // MTC_OPER_STATE__ENABLED, + // MTC_AVAIL_STATUS__DEGRADED ); + + // adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + + // return (PASS); + } + else + { + alarm_enabled_failure ( node_ptr ); + + if ( node_ptr->availStatus != MTC_AVAIL_STATUS__FAILED ) + { + if ( node_ptr->operState != MTC_OPER_STATE__DISABLED ) + { + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__STATUSCHANGE_FAILED ); + } + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__DISABLED, + MTC_AVAIL_STATUS__FAILED ); + } + } + break ; + } + /* Lets make any availability state corrections */ + + case MTC_AVAIL_STATUS__OFFDUTY: + case MTC_AVAIL_STATUS__ONLINE: + availStatus_temp = MTC_AVAIL_STATUS__ONLINE; + break ; + case MTC_AVAIL_STATUS__OFFLINE: + case MTC_AVAIL_STATUS__NOT_INSTALLED: + availStatus_temp = MTC_AVAIL_STATUS__OFFLINE; + break ; + + default: + slog ("Unknown availability state (%d)\n", availStatus_temp); + break ; + } + + /* Never send a disable request to SM for this controller + * or SM will shut us down. */ + if ( is_controller ( node_ptr ) && NOT_THIS_HOST ) + { + mtcSmgrApi_request ( node_ptr, + CONTROLLER_DISABLED, + SMGR_MAX_RETRIES ); + } + rc = allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__DISABLED, + availStatus_temp ); + + if (( rc != PASS ) && ( node_ptr->adminAction == MTC_ADMIN_ACTION__UNLOCK )) + { + allStateChange ( node_ptr, MTC_ADMIN_STATE__LOCKED, + MTC_OPER_STATE__DISABLED, + availStatus_temp ); + + mtcInvApi_update_task ( node_ptr, MTC_TASK_UNLOCK_FAILED ); + + elog ("%s 'unlock' failed by System Inventory (rc:%d)\n", + node_ptr->hostname.c_str(), rc ) ; + + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, 15 ); + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE_WAIT ); + break ; + } + + if ( NOT_THIS_HOST ) + { + /* lets stop heartbeat */ + enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_STOP_CMD ); + } + else + { + /* skip over the reset part as that was taken care and we are + * in the reboot recovery phase now. Look for the mtcAlive */ + + /* In self-enable we don't need to purge mtcAlive just need + * to wait for one more. Assum,e offline, not online and open + * the mtcAlive gate. */ + node_ptr->mtcAlive_gate = false ; + node_ptr->mtcAlive_online = false ; + node_ptr->mtcAlive_offline = true ; + /* set mtcAlive timeout */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_SECS_30 ) ; + + /* timer is started ok so we can do the stage transition */ + enableStageChange ( node_ptr, MTC_ENABLE__MTCALIVE_WAIT ); + } + break ; + } + + case MTC_ENABLE__HEARTBEAT_STOP_CMD: + { + /* Stop heartbeat */ + send_hbs_command ( node_ptr->hostname, MTC_CMD_STOP_HOST ); + + /* Clear the minor and failkure flags if it is set for this host */ + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + hbs_minor_clear ( node_ptr, (iface_enum)iface ); + node_ptr->heartbeat_failed[iface] = false ; + } + + /* now reset/reboot the node by running reset progression */ + enableStageChange ( node_ptr, MTC_ENABLE__RESET_PROGRESSION ); + + break ; + } + + case MTC_ENABLE__RECOVERY_TIMER: + { + /* start the recovery wait timer */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RECOVERY_TIMEOUT ); + ilog ("%s Delaying Recovery for %d seconds\n", + node_ptr->hostname.c_str(),MTC_RECOVERY_TIMEOUT); + + enableStageChange ( node_ptr, MTC_ENABLE__RECOVERY_WAIT ); + + break ; + } + + case MTC_ENABLE__RECOVERY_WAIT: + { + if ( node_ptr->mtcTimer.ring == true ) + { + enableStageChange ( node_ptr, MTC_ENABLE__RESET_PROGRESSION ); + + node_ptr->mtcTimer.ring = false ; + } + if ( node_ptr->availStatus != MTC_AVAIL_STATUS__FAILED ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__FAILED ); + } + break; + } + case MTC_ENABLE__RESET_PROGRESSION: + { + int overall_timeout = 0 ; + + plog ("%s reboot\n", node_ptr->hostname.c_str() ); + + /* Health will get updated in the first + * mtcAlive message after reset */ + node_ptr->health = NODE_HEALTH_UNKNOWN ; + + node_ptr->mtcCmd_work_fifo.clear(); + mtcCmd_init ( node_ptr->cmd ); + node_ptr->cmd.stage = MTC_CMD_STAGE__START ; + node_ptr->cmd.cmd = MTC_OPER__RESET_PROGRESSION ; + node_ptr->cmd.parm1 = 0 ; /* retries */ + node_ptr->cmd.task = true ; /* send task updates */ + node_ptr->mtcCmd_work_fifo.push_front(node_ptr->cmd); + + /* calculate the overall timeout period taking into account + * all the reboot/reset sources that will be tried */ + overall_timeout = calc_reset_prog_timeout ( node_ptr , node_ptr->cmd.parm1 ) ; + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, overall_timeout ) ; + enableStageChange ( node_ptr, MTC_ENABLE__RESET_WAIT ); + + break ; + } + case MTC_ENABLE__RESET_WAIT: + { + /* Wait or reset progression FSM to complete */ + if ( node_ptr->mtcTimer.ring == true ) + { + wlog ("%s Reset Progression Timeout\n", node_ptr->hostname.c_str()); + + /* trigger some delay before another attempt */ + enableStageChange ( node_ptr, MTC_ENABLE__RECOVERY_TIMER ); + + /* if we timeout then remove the reset progression command + * and cleanup the done queue ; just in case */ + if ( node_ptr->mtcCmd_done_fifo.size() ) + node_ptr->mtcCmd_done_fifo.pop_front(); + if ( node_ptr->mtcCmd_work_fifo.size() ) + node_ptr->mtcCmd_work_fifo.pop_front(); + } + else if ( node_ptr->mtcCmd_done_fifo.size() ) + { + mtcTimer_reset ( node_ptr->mtcTimer ); + + node_ptr->mtcCmd_done_fifo_ptr = + node_ptr->mtcCmd_done_fifo.begin(); + if ( node_ptr->mtcCmd_done_fifo_ptr->status != PASS ) + { + wlog ("%s Reset Unsuccessful (retries:%d) (rc:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->cmd.parm1, + node_ptr->mtcCmd_done_fifo_ptr->status ); + + /* trigger some delay before another attempt */ + enableStageChange ( node_ptr, MTC_ENABLE__RECOVERY_TIMER ); + } + else /* ... we got the reset or reboot */ + { + /* Set the FSM task state to booting */ + mtcInvApi_update_task ( node_ptr, MTC_TASK_BOOTING ); + enableStageChange ( node_ptr, MTC_ENABLE__INTEST_START ); + } + /* Remove the reset progression command now that it is done */ + node_ptr->mtcCmd_done_fifo.pop_front(); + } + break ; + } + + case MTC_ENABLE__INTEST_START: + { + plog ("%s Booting (timeout: %d secs) (%d)\n", + node_ptr->hostname.c_str(), + node_ptr->mtcalive_timeout, + node_ptr->node_unlocked_counter); + + node_ptr->cmdReq = MTC_CMD_NONE ; + node_ptr->cmdRsp = MTC_CMD_NONE ; + node_ptr->unknown_health_reported = false ; + node_ptr->mtcAlive_online = false ; + node_ptr->mtcAlive_offline = true ; + node_ptr->goEnabled = false ; + + clear_service_readies ( node_ptr ); + + /* Set uptime to zero in mtce and in the database */ + node_ptr->uptime_save = 0 ; + set_uptime ( node_ptr, 0 , false ); + + /* start the timer that waits for MTC READY */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, node_ptr->mtcalive_timeout ); + + node_ptr->mtcAlive_purge = 0 ; + + /* timer is started ok so we can do the stage transition */ + enableStageChange ( node_ptr, MTC_ENABLE__MTCALIVE_PURGE ); + + break ; + } + case MTC_ENABLE__MTCALIVE_PURGE: + { + node_ptr->mtcAlive_purge += 1 ; + + if ( node_ptr->mtcAlive_purge >= 20 ) + { + /* open gate */ + node_ptr->mtcAlive_gate = false ; + + node_ptr->mtcAlive_purge = 0 ; + /* timer is started ok so we can do the stage transition */ + enableStageChange ( node_ptr, MTC_ENABLE__MTCALIVE_WAIT ); + } +#ifdef WANT_PURGE_LOG + else + { + dlog2 ("%s purging (%d) ...\n", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_purge ); + } +#endif + /* Clear out any mtcAlive messages that may + * have come in while we were purging */ + node_ptr->mtcAlive_online = false ; + node_ptr->mtcAlive_offline = true ; + break ; + } + case MTC_ENABLE__MTCALIVE_WAIT: + { + /* search for the mtc alive message */ + if ( node_ptr->mtcAlive_online == true ) + { + mtcTimer_reset ( node_ptr->mtcTimer ); + + /* Check to see if the host is/got configured correctly */ + if ( (node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED) == 0 ) + { + elog ("%s configuration incomplete or failed (oob:%x:%x)\n", + node_ptr->hostname.c_str(), + node_ptr->mtce_flags, + MTC_FLAG__I_AM_CONFIGURED); + + /* raise an alarm for the failure of the config */ + alarm_config_failure ( node_ptr ); + mtcInvApi_update_task ( node_ptr, MTC_TASK_MAIN_CONFIG_FAIL ); + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE ); + } + else + { + plog ("%s is MTCALIVE (uptime:%d)\n", node_ptr->hostname.c_str(), node_ptr->uptime ); + + /* Set the node mtcAlive timer to configured value. + * This will revert bact to normal timeout after any first + * unlock value that may be in effect. */ + LOAD_NODETYPE_TIMERS ; + + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__STATUSCHANGE_ONLINE ); + node_ptr->offline_log_reported = false ; + node_ptr->online_log_reported = true ; + + /* Request Out-Of--Service test execution */ + send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MAIN_GOENABLED, MGMNT_INTERFACE ); + + /* now officially in the In-Test state */ + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__INTEST ); + + /* O.K. Clear the alive */ + node_ptr->mtcAlive_online = false ; + + /* Go to the goEnabled stage */ + enableStageChange ( node_ptr, MTC_ENABLE__GOENABLED_TIMER ); + + mtcInvApi_update_task ( node_ptr, MTC_TASK_TESTING ); + } + break ; + } + else if ( mtcTimer_expired ( node_ptr->mtcTimer) ) + { + elog ("%s Timeout waiting for MTCALIVE\n", node_ptr->hostname.c_str()); + + /* raise an alarm for the enable failure */ + alarm_enabled_failure ( node_ptr ); + + /* go back and issue reboot again */ + enableStageChange ( node_ptr, MTC_ENABLE__RESET_PROGRESSION ); + + if ( node_ptr->availStatus != MTC_AVAIL_STATUS__FAILED ) + { + /* no longer In-Test ; we are 'Failed' again" */ + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__FAILED ); + } + + /* Set the FSM task state to init failed */ + mtcInvApi_update_task ( node_ptr, MTC_TASK_BOOT_FAIL ); + + break ; + } + else if ( node_ptr->mtcAlive_gate == true ) + { + slog ("%s mtcAlive gate unexpectedly set, correcting ...\n", + node_ptr->hostname.c_str()); + + node_ptr->mtcAlive_gate = false ; + } + + /* wait some more */ + break ; + } + + case MTC_ENABLE__GOENABLED_TIMER: + { + mtcTimer_start ( node_ptr->mtcTimer, + mtcTimer_handler, this->goenabled_timeout); + + ilog ("%s waiting for GOENABLED (timeout: %d secs)\n", + node_ptr->hostname.c_str(), this->goenabled_timeout ); + + node_ptr->goEnabled = false ; + + /* start waiting fhr the ENABLE READY message */ + enableStageChange ( node_ptr, MTC_ENABLE__GOENABLED_WAIT ); + + break ; + } + case MTC_ENABLE__GOENABLED_WAIT: + { + /* The healthy code comes from the host in the mtcAlive message. + * This 'if' clause was introduced to detected failure of host + * without having to wait for the GOENABLED phase to timeout. + * + * This case is particularly important in the DOR case where + * computes may have come up and fail to run their manifests + * and sit there in an unconfigured state. We don't want them to + * be gracefully recovered to enabled in that case. Instead + * we want to recover the card through a reset as quickly as + * possible. */ + if ( node_ptr->health == NODE_UNHEALTHY ) + { + elog ("%s is UNHEALTHY\n", node_ptr->hostname.c_str()); + + mtcTimer_reset ( node_ptr->mtcTimer ); + this->force_full_enable ( node_ptr ); + } + /* search for the Go Enable message */ + else if ( node_ptr->goEnabled_failed == true ) + { + elog ("%s got GOENABLED Failed\n", node_ptr->hostname.c_str()); + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcInvApi_update_task ( node_ptr, MTC_TASK_INTEST_FAIL ); + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE ); + } + /* search for the Go Enable message */ + else if ( node_ptr->goEnabled == true ) + { + mtcTimer_reset ( node_ptr->mtcTimer ); + plog ("%s got GOENABLED\n", node_ptr->hostname.c_str()); + // plog ("%s main configured OK\n", node_ptr->hostname.c_str()); + + /* O.K. clearing the state now that we got it */ + node_ptr->goEnabled = false ; + + mtcInvApi_update_task ( node_ptr, MTC_TASK_INITIALIZING ); + + /* ok. great, got the go-enabled message, lets move on */ + enableStageChange ( node_ptr, MTC_ENABLE__HOST_SERVICES_START ); + } + else if ( mtcTimer_expired ( node_ptr->mtcTimer )) + { + elog ("%s has GOENABLED Timeout\n", node_ptr->hostname.c_str()); + ilog ("%s ... the out-of-service tests took too long to complete\n", + node_ptr->hostname.c_str()); + + mtcInvApi_update_task ( node_ptr, MTC_TASK_INTEST_FAIL_TO_ ); + node_ptr->mtcTimer.ring = false ; + + /* raise an alarm for the enable failure */ + alarm_enabled_failure ( node_ptr ); + + /* go back and issue reboot again */ + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE ); + + /* no longer In-Test ; we are 'Failed' again" */ + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__FAILED ); + } + else + { + ; /* wait some more */ + } + break ; + } + + case MTC_ENABLE__HOST_SERVICES_START: + { + bool start = true ; + + plog ("%s Starting Host Services\n", node_ptr->hostname.c_str()); + if ( this->launch_host_services_cmd ( node_ptr, start ) != PASS ) + { + node_ptr->hostservices_failed = true ; + + elog ("%s %s failed ; launch\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + + mtcInvApi_update_task ( node_ptr, MTC_TASK_START_SERVICE_FAIL ); + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE ); + } + else + { + mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLING ); + + /* Only run hardware monitor if board management is provisioned */ + if ( node_ptr->bm_provisioned == true ) + { + send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + } + + enableStageChange ( node_ptr, MTC_ENABLE__HOST_SERVICES_WAIT ); + } + break ; + } + + case MTC_ENABLE__HOST_SERVICES_WAIT: + { + /* Wait for host services to complete - pass or fail. + * The host_services_handler manages timeout. */ + rc = this->host_services_handler ( node_ptr ); + if ( rc == RETRY ) + { + /* wait for the mtcClient's response ... */ + break ; + } + else if ( rc != PASS ) + { + node_ptr->hostservices_failed = true ; + /* distinguish 'timeout' from other 'execution' failures */ + if ( rc == FAIL_TIMEOUT ) + { + elog ("%s %s failed ; timeout\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + + mtcInvApi_update_task ( node_ptr, + MTC_TASK_START_SERVICE_TO ); + } + else + { + elog ("%s %s failed ; rc:%d\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str(), + rc); + + mtcInvApi_update_task ( node_ptr, + MTC_TASK_START_SERVICE_FAIL ); + } + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE ); + } + else /* success path */ + { + /* heartbeat is only started now in a normal system. */ + if (( LARGE_SYSTEM ) && ( NOT_THIS_HOST )) + { + /* allow the fsm to wait for up to 1 minute for the + * hbsClient's ready event before starting heartbeat + * test. */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_1 ); + enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_WAIT ); + } + else + { + /* The heartbeat soak will be started in the enable + * subfunction handler in the combined system. */ + enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE ); + } + } + break ; + } + + case MTC_ENABLE__HEARTBEAT_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + wlog ("%s hbsClient ready event timeout\n", node_ptr->hostname.c_str()); + } + else if ( node_ptr->hbsClient_ready == false ) + { + break ; + } + else + { + mtcTimer_reset ( node_ptr->mtcTimer ); + } + + plog ("%s Starting %d sec Heartbeat Soak (with%s)\n", + node_ptr->hostname.c_str(), + MTC_HEARTBEAT_SOAK_BEFORE_ENABLE, + node_ptr->hbsClient_ready ? " ready event" : "out ready event" ); + + /* Start Monitoring Services - heartbeat, process and hardware */ + send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + + /* allow heartbeat to run for 10 seconds before we declare enable */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_HEARTBEAT_SOAK_BEFORE_ENABLE ); + enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_SOAK ); + + break ; + } + case MTC_ENABLE__HEARTBEAT_SOAK: + { + if ( node_ptr->mtcTimer.ring == true ) + { + plog ("%s heartbeating\n", node_ptr->hostname.c_str() ); + /* if heartbeat is not working then we will + * never get here and enable the host */ + enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE ); + } + break ; + } + case MTC_ENABLE__STATE_CHANGE: + { + /* Check the work queue complete and done status's */ + mtcInvApi_force_task ( node_ptr, "" ); + + if ( node_ptr->degrade_mask ) + { + /* Allow host to enable in the degraded state */ + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__DEGRADED ); + } + else + { + /* Set node as unlocked-enabled */ + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__AVAILABLE ); + } + + /* Now that we have posted the unlocked-enabled-available state we need + * to force the final part of the enable sequence through */ + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) + { + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__ENABLE ); + } + + /* Start a timer that failed enable if the work queue + * does not empty or if commands in the done queue have failed */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, work_queue_timeout ); + + enableStageChange ( node_ptr, MTC_ENABLE__WORKQUEUE_WAIT ); + + break ; + } + case MTC_ENABLE__WORKQUEUE_WAIT: + { + bool fail = false ; + rc = workQueue_done ( node_ptr ); + if ( rc == RETRY ) + { + /* wait longer */ + break ; + } + else if ( rc == FAIL_WORKQ_TIMEOUT ) + { + elog ("%s enable failed ; Enable workQueue timeout, purging ...\n", node_ptr->hostname.c_str()); + mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLE_WORK_TO ); + fail = true ; + } + else if ( rc != PASS ) + { + elog ("%s Enable failed ; Enable doneQueue has failed commands\n", node_ptr->hostname.c_str()); + mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLE_WORK_FAIL ); + fail = true ; + } + else if ( NOT_THIS_HOST ) + { + /* Loop over the heartbeat interfaces and fail the Enable if any of them are failing */ + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + if ( node_ptr->heartbeat_failed[i] == true ) + { + elog ("%s Enable failure due to %s Network *** Heartbeat Loss ***\n", + node_ptr->hostname.c_str(), + get_iface_name_str ((iface_enum)i)); + + fail = true ; + mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLE_FAIL_HB ); + } + } + } + + if ( fail == false ) + { + /* Go enabled */ + enableStageChange ( node_ptr, MTC_ENABLE__ENABLED ); + } + else + { + workQueue_purge ( node_ptr ); + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE ); + } + + mtcTimer_reset ( node_ptr->mtcTimer ); + + break ; + } + case MTC_ENABLE__ENABLED: + { + if ( is_controller(node_ptr) ) + { + /* Defer telling SM the controller state if + * this is a CPE and this is the only controller */ + if ( CPE_SYSTEM && ( num_controllers_enabled() > 0 )) + { + wlog ("%s deferring SM enable notification till subfunction-enable complete\n", + node_ptr->hostname.c_str()); + } + else + { + mtc_cmd_enum cmd = CONTROLLER_ENABLED ; + + /* Override cmd of ENABLED if action is UNLOCK */ + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__UNLOCK ) + { + cmd = CONTROLLER_UNLOCKED ; + } + + if ( mtcSmgrApi_request ( node_ptr, cmd, SMGR_MAX_RETRIES ) != PASS ) + { + wlog ("%s Failed to send 'unlocked-enabled' to HA Service Manager (%d) ; enabling anyway\n", + node_ptr->hostname.c_str(), cmd ); + } + } + } + + alarm_enabled_clear ( node_ptr, false ); + + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__CONFIG ); + node_ptr->alarms[MTC_ALARM_ID__CONFIG] = FM_ALARM_SEVERITY_CLEAR ; + node_ptr->degrade_mask &= ~DEGRADE_MASK_CONFIG ; + + enableStageChange ( node_ptr, MTC_ENABLE__START ); + + if ( CPE_SYSTEM ) + { + ilog ("%s running compute sub-function enable handler\n", node_ptr->hostname.c_str()); + mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLING_SUBF ); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__ENABLE_SUBF ); + } + else + { + + node_ptr->enabled_count++ ; + + /* Inform the VIM that this host is enabled */ + mtcVimApi_state_change ( node_ptr, VIM_HOST_ENABLED, 3 ); + + plog ("%s is ENABLED%s\n", node_ptr->hostname.c_str(), + node_ptr->was_dor_recovery_mode ? " (from DOR)" : ""); + node_ptr->dor_recovery_mode = false ; + node_ptr->was_dor_recovery_mode = false ; + node_ptr->http_retries_cur = 0 ; + + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + + node_ptr->health_threshold_counter = 0 ; + } + + break ; + } + + default: + rc = FAIL_BAD_CASE ; + } + return (rc); +} + +int recovery_state_gate = -1 ; + +/* Graceful Recovery handler + * ------------------------- + * Tries to recover a failed host back in service + * - auto recovery if it only disappeared for 5 seconds + * - avoiding a double reset if it was gone for longer or was known to reset */ +int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + if ( node_ptr->recoveryStage != recovery_state_gate ) + { + recovery_state_gate = node_ptr->recoveryStage ; + } + switch ( (int)node_ptr->recoveryStage ) + { + case MTC_RECOVERY__FAILURE: + { + if ( node_ptr->mtcTimer.ring == false ) + { + break ; + } + recoveryStageChange ( node_ptr, MTC_RECOVERY__START ); + node_ptr->mtcTimer.ring = false ; + + break ; + } + + case MTC_RECOVERY__START: + { + /* Purge this hosts work queues */ + mtcCmd_workQ_purge ( node_ptr ); + mtcCmd_doneQ_purge ( node_ptr ); + + node_ptr->http_retries_cur = 0 ; + node_ptr->unknown_health_reported = false ; + + plog ("%s %sGraceful Recovery (uptime was %d)\n", + node_ptr->hostname.c_str(), + node_ptr->mnfa_graceful_recovery ? "MNFA " : "", + node_ptr->uptime ); + + /* Cancel any outstanding timers */ + mtcTimer_reset ( node_ptr->mtcTimer ); + + /* clear all the past enable failure bools */ + clear_main_failed_bools ( node_ptr ); + clear_subf_failed_bools ( node_ptr ); + clear_hostservices_ctls ( node_ptr ); + + /* Disable the heartbeat service for Graceful Recovery */ + send_hbs_command ( node_ptr->hostname, MTC_CMD_STOP_HOST ); + + /* Clear the minor and failure flags if it is set for this host */ + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + hbs_minor_clear ( node_ptr, (iface_enum)iface ); + node_ptr->heartbeat_failed[iface] = false ; + } + + /* Have we reached the maximum allowed fast recovery attempts. + * + * If we have then force the full enable by + * 1. clearing the recovery action + * 2. Setting the node operational state to Disabled + * 3. Setting the Enable action + */ + if ( ++node_ptr->graceful_recovery_counter > MTC_MAX_FAST_ENABLES ) + { + /* gate off further mtcAlive messaging timme the offline + * handler runs. This prevents stale messages from making it + * in and prolong the offline detection time */ + node_ptr->mtcAlive_gate = true ; + + elog ("%s Graceful Recovery Failed (retries=%d)\n", + node_ptr->hostname.c_str(), node_ptr->graceful_recovery_counter ); + + /* This forces exit from the recover handler and entry into the + * enable_handler via FAILED availability state and no aciton. */ + nodeLinkClass::force_full_enable ( node_ptr ); + + break ; + } + else + { + /* TODO: Consider taking this log out as writing to the database + * during a fast graceful recovery might no be the best idea */ + if ( node_ptr->graceful_recovery_counter > 1 ) + mtcInvApi_update_task ( node_ptr, "Graceful Recovery Retry" ); + else + mtcInvApi_update_task ( node_ptr, "Graceful Recovery"); + + recoveryStageChange ( node_ptr, MTC_RECOVERY__REQ_MTCALIVE ) ; + } + + break ; + } + case MTC_RECOVERY__REQ_MTCALIVE: + { + /* Clear any recent mtcAlive notification ; start a new :) */ + node_ptr->mtcAlive_online = false ; + + /* Clear any recent goEnable notification ; start a new :) */ + node_ptr->goEnabled = false ; + + /* Save the node's last recorded uptime and request mtcAlive from + * seemingly failed host. Uptime is saved because when the next + * mtcAlive comes it the uptime will be over written and we need + * it to compare as a dicision point later on in recovery handling */ + node_ptr->uptime_save = node_ptr->uptime ; + + /* A host is considered failed if it goes away for more + * than a Loss Of Communication Recovery Timeout specified as mtc.ini + * configuration option 'loc_recovery_timeout' time in seconds. */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, loc_recovery_timeout ); + + ilog ("%s requesting mtcAlive with %d sec timeout\n", + node_ptr->hostname.c_str(), loc_recovery_timeout); + + recoveryStageChange ( node_ptr, MTC_RECOVERY__REQ_MTCALIVE_WAIT ) ; + + break ; + } + case MTC_RECOVERY__REQ_MTCALIVE_WAIT: + { + if ( node_ptr->mtcAlive_online == true ) + { + manage_dor_recovery ( node_ptr, FM_ALARM_SEVERITY_CLEAR ); + + mtcTimer_stop ( node_ptr->mtcTimer ); + + ilog ("%s got requested mtcAlive%s\n", + node_ptr->hostname.c_str(), + node_ptr->was_dor_recovery_mode ? " (DOR)" : "" ); + + /* Check to see if the host is/got configured correctly */ + if ( (node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED) == 0 ) + { + elog ("%s Not Configured (Graceful Recovery)\n", node_ptr->hostname.c_str()); + + /* raise an alarm for the failure of the config */ + alarm_config_failure ( node_ptr ); + force_full_enable ( node_ptr ); + break ; + } + + /* Check to see if the host is/got configured correctly */ + else if ( (node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY) ) + { + elog ("%s Configuration Failure (Graceful Recovery)\n", node_ptr->hostname.c_str()); + + /* raise an alarm for the failure of the config */ + alarm_config_failure ( node_ptr ); + force_full_enable ( node_ptr ); + break ; + } + + else if ( node_ptr->mnfa_graceful_recovery == true ) + { + if ( node_ptr->uptime > MTC_MINS_10 ) + { + /* did not reboot case */ + wlog ("%s Connectivity Recovered ; host did not reset\n", node_ptr->hostname.c_str()); + wlog ("%s ... continuing with MNFA graceful recovery\n", node_ptr->hostname.c_str()); + wlog ("%s ... with no affect to host services\n", node_ptr->hostname.c_str()); + + /* allow the fsm to wait for up to 1 minute for the + * hbsClient's ready event before starting heartbeat + * test. */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_1 ); + recoveryStageChange ( node_ptr, MTC_RECOVERY__HEARTBEAT_START ) ; + } + else + { + /* did reboot case */ + wlog ("%s Connectivity Recovered ; host has reset\n", node_ptr->hostname.c_str()); + ilog ("%s ... continuing with MNFA graceful recovery\n", node_ptr->hostname.c_str()); + ilog ("%s ... without additional reboot %s\n", + node_ptr->hostname.c_str(), node_ptr->bm_ip.empty() ? "or reset" : "" ); + + /* now officially in the In-Test state */ + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__INTEST ); + + /* O.K. Clear the alive */ + node_ptr->mtcAlive_online = false ; + + /* Go to the goEnabled stage */ + recoveryStageChange ( node_ptr, MTC_RECOVERY__GOENABLED_TIMER ); + + if ( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_CRITICAL ) + { + mtcAlarm_critical ( node_ptr->hostname, MTC_ALARM_ID__ENABLE ); + node_ptr->alarms[MTC_ALARM_ID__ENABLE] = FM_ALARM_SEVERITY_CRITICAL ; + } + break ; + } + } + else if (( node_ptr->uptime_save ) && ( node_ptr->uptime >= node_ptr->uptime_save )) + { + /* did not reboot case */ + wlog ("%s Connectivity Recovered ; host did not reset%s\n", + node_ptr->hostname.c_str(), + node_ptr->was_dor_recovery_mode ? " (DOR)" : "" ); + + wlog ("%s ... continuing with graceful recovery\n", node_ptr->hostname.c_str()); + wlog ("%s ... with no affect to host services\n", node_ptr->hostname.c_str()); + + /* allow the fsm to wait for up to 1 minute for the + * hbsClient's ready event before starting heartbeat + * test. */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_1 ); + recoveryStageChange ( node_ptr, MTC_RECOVERY__HEARTBEAT_START ) ; + } + else + { + wlog ("%s Connectivity Recovered ; host has reset\n", node_ptr->hostname.c_str()); + ilog ("%s ... continuing%sgraceful recovery ; (OOB: %08x)\n", + node_ptr->hostname.c_str(), + node_ptr->was_dor_recovery_mode ? " (DOR) " : " ", + node_ptr->mtce_flags); + ilog ("%s ... without additional reboot %s (uptime:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->bm_ip.empty() ? "or reset" : "", + node_ptr->uptime ); + + /* now officially in the In-Test state */ + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__INTEST ); + + /* Go to the goEnabled stage */ + recoveryStageChange ( node_ptr, MTC_RECOVERY__GOENABLED_TIMER ); + + if ( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_CRITICAL ) + { + mtcAlarm_critical ( node_ptr->hostname, MTC_ALARM_ID__ENABLE ); + node_ptr->alarms[MTC_ALARM_ID__ENABLE] = FM_ALARM_SEVERITY_CRITICAL ; + } + } + } + /* A timer ring indicates that the host is not up */ + else if ( node_ptr->mtcTimer.ring == true ) + { + /* So now this means the node is failed + * we need to stop services and transition into + * a longer 'waiting' for the asynchronous mtcAlive + * that should come as part of the automatic reboot + * Steps are + * 1. Stop Services + * 2. Create mtcAlive timer + * 2a. MtcAlive indicating reset ; run start services and recover + * 2b. MtcAlive indicating no reset ; force full enable + * 2c MtcAlive Timeout: force full enable + */ + wlog ("%s Loss Of Communication for %d seconds ; disabling host%s\n", + node_ptr->hostname.c_str(), + loc_recovery_timeout, + node_ptr->dor_recovery_mode ? " (DOR)" : "" ); + wlog ("%s ... stopping host services\n", node_ptr->hostname.c_str()); + wlog ("%s ... continuing with graceful recovery\n", node_ptr->hostname.c_str()); + + /* clear all mtc flags. Will be updated on the next/first + * mtcAlive message upon recovery */ + node_ptr->mtce_flags = 0 ; + + /* Set node as unlocked-disabled-failed */ + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__DISABLED, + MTC_AVAIL_STATUS__FAILED ); + + if ( CPE_SYSTEM ) + { + subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED, + MTC_AVAIL_STATUS__FAILED ); + } + + /* Inform the VIM that this host has failed */ + mtcVimApi_state_change ( node_ptr, VIM_HOST_FAILED, 3 ); + + if ( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_CRITICAL ) + { + mtcAlarm_critical ( node_ptr->hostname, MTC_ALARM_ID__ENABLE ); + node_ptr->alarms[MTC_ALARM_ID__ENABLE] = FM_ALARM_SEVERITY_CRITICAL ; + } + /* Clear all degrade flags except for the HWMON one */ + clear_host_degrade_causes ( node_ptr->degrade_mask ); + node_ptr->degraded_resources_list.clear(); + + if ( is_controller(node_ptr) ) + { + if ( mtcSmgrApi_request ( node_ptr, CONTROLLER_DISABLED , SMGR_MAX_RETRIES ) != PASS ) + { + wlog ("%s Failed to send 'unlocked-disabled' to HA Service Manager\n", + node_ptr->hostname.c_str() ); + } + } + recoveryStageChange ( node_ptr, MTC_RECOVERY__MTCALIVE_TIMER ); + } + break ; + } + case MTC_RECOVERY__MTCALIVE_TIMER: + { + int timeout = 0 ; + + /* Set the FSM task state to booting */ + node_ptr->uptime = 0 ; + mtcInvApi_update_task ( node_ptr, MTC_TASK_RECOVERY_WAIT ); + + start_offline_handler ( node_ptr ); + + timeout = node_ptr->mtcalive_timeout ; + + /* Only try and issue in-line recovery reboot or reset if + * NOT in Dead Office Recovery (DOR) mode. */ + if ( node_ptr->dor_recovery_mode == false ) + { + /* If the infrastructure network is provisioned then try + * and issue a reset over it to expedite the recovery + * for the case where the management heartbeat has + * failed but the infra has not. + * Keeping it simple by just issing the command and not looping on it */ + if (( node_ptr->infra_ip.length () > 5 ) && + ( node_ptr->heartbeat_failed[MGMNT_IFACE] == true ) && + ( node_ptr->heartbeat_failed[INFRA_IFACE] == false )) + { + ilog ("%s issuing one time graceful recovery reboot over infra network\n", node_ptr->hostname.c_str()); + send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, INFRA_INTERFACE ) ; + } + + if ((node_ptr->bm_provisioned) && (node_ptr->bm_accessible)) + { + ilog ("%s issuing one time board management graceful recovery reset\n", node_ptr->hostname.c_str()); + + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_RESET ); + if ( rc ) + { + wlog ("%s board management reset failed\n", node_ptr->hostname.c_str()); + } + else + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + recoveryStageChange ( node_ptr, MTC_RECOVERY__RESET_RECV_WAIT ); + break ; + } + } + else + { + wlog ("%s cannot issue Reset\n", node_ptr->hostname.c_str() ); + wlog ("%s ... board management not provisioned or accessible\n", node_ptr->hostname.c_str() ); + } + } + else + { + /* Just allow Graceful Recovery to take its course. */ + /* Load configured mtcAlive and goEnabled timers */ + LOAD_NODETYPE_TIMERS ; + + /* load the mtcAlive timeout to accomodate for dor recovery */ + timeout = node_ptr->mtcalive_timeout + daemon_get_cfg_ptr()->dor_recovery_timeout_ext ; + } + + /* start the timer that waits for MTCALIVE */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, timeout ); + + plog ("%s %s (%d secs)%s(uptime was %d) \n", + node_ptr->hostname.c_str(), + MTC_TASK_RECOVERY_WAIT, + timeout, + node_ptr->dor_recovery_mode ? " (DOR) " : " " , + node_ptr->uptime_save ); + + clear_service_readies ( node_ptr ); + + recoveryStageChange ( node_ptr, MTC_RECOVERY__MTCALIVE_WAIT ); + break ; + } + + case MTC_RECOVERY__RESET_RECV_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer )) + { + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT ); + break ; + } + + if ( rc ) + { + elog ("%s Reset command failed\n", node_ptr->hostname.c_str()); + } + else + { + ilog ("%s is Resetting\n", node_ptr->hostname.c_str()); + } + + /* start the timer that waits for MTCALIVE */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, node_ptr->mtcalive_timeout ); + + plog ("%s %s (%d secs) (uptime was %d)\n", + node_ptr->hostname.c_str(), + MTC_TASK_RECOVERY_WAIT, + node_ptr->mtcalive_timeout, + node_ptr->uptime_save ); + + clear_service_readies ( node_ptr ); + + recoveryStageChange ( node_ptr, MTC_RECOVERY__MTCALIVE_WAIT ); + } + break ; + } + case MTC_RECOVERY__MTCALIVE_WAIT: + { + /* search for the mtc alive message */ + if ( node_ptr->mtcAlive_online == true ) + { + mtcTimer_stop ( node_ptr->mtcTimer ); + + manage_dor_recovery ( node_ptr, FM_ALARM_SEVERITY_CLEAR ); + + /* If the host's uptime is bigger than the saved uptime then + * the host has not reset yet we have disabled services + * then now we need to reset the host to prevet VM duplication + * by forcing a full enable */ + if ( node_ptr->uptime_save & ( node_ptr->uptime >= node_ptr->uptime_save ) ) + { + ilog ("%s regained MTCALIVE from host that did not reboot (uptime:%d)\n", + node_ptr->hostname.c_str(), node_ptr->uptime ); + ilog ("%s ... uptimes before:%d after:%d\n", node_ptr->hostname.c_str(), node_ptr->uptime_save, node_ptr->uptime ); + ilog ("%s ... exiting graceful recovery\n", node_ptr->hostname.c_str()); + ilog ("%s ... forcing full enable with reset\n", node_ptr->hostname.c_str()); + + nodeLinkClass::force_full_enable ( node_ptr ); + } + /* Check to see if the host is/got configured */ + else if ( (node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED) == 0 ) + { + elog ("%s Not Configured (Graceful Recovery)\n", node_ptr->hostname.c_str()); + + /* raise an alarm for the failure of the config */ + alarm_config_failure ( node_ptr ); + force_full_enable ( node_ptr ); + break ; + } + + /* Check to see if the host is/got configured correctly */ + else if ( (node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY) ) + { + elog ("%s Configuration Failure (Graceful Recovery)\n", node_ptr->hostname.c_str()); + + /* raise an alarm for the failure of the config */ + alarm_config_failure ( node_ptr ); + force_full_enable ( node_ptr ); + break ; + } + else + { + ilog ("%s regained MTCALIVE from host that has rebooted (uptime curr:%d save:%d)\n", + node_ptr->hostname.c_str(), node_ptr->uptime, node_ptr->uptime_save ); + ilog ("%s ... continuing with graceful recovery %s\n", + node_ptr->hostname.c_str(), + node_ptr->dor_recovery_mode ? "(DOR)" : " "); + ilog ("%s ... without additional reboot %s\n", + node_ptr->hostname.c_str(), node_ptr->bm_ip.empty() ? "or reset" : "" ); + + /* now officially in the In-Test state */ + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__INTEST ); + + /* O.K. Clear the alive */ + node_ptr->mtcAlive_online = false ; + + /* Go to the goEnabled stage */ + recoveryStageChange ( node_ptr, MTC_RECOVERY__GOENABLED_TIMER ); + } + break ; + } + else if ( node_ptr->mtcTimer.ring == true ) + { + manage_dor_recovery ( node_ptr, FM_ALARM_SEVERITY_CLEAR ); + + /* Set the FSM task state to init failed */ + mtcInvApi_update_task ( node_ptr, "Graceful Recovery Failed" ); + + node_ptr->mtcTimer.ring = false ; + + elog ("%s has MTCALIVE Timeout\n", node_ptr->hostname.c_str()); + + nodeLinkClass::force_full_enable ( node_ptr ); + + break ; + } + else if (( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ) && + ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->bm_provisioned == true ) && + ( node_ptr->bm_accessible == true ) && + ( node_ptr->hwmon_powercycle.state == RECOVERY_STATE__INIT ) && + ( thread_idle ( node_ptr->ipmitool_thread_ctrl )) && + ( node_ptr->ipmitool_thread_info.command != IPMITOOL_THREAD_CMD__POWER_ON )) + { + ilog ("%s powering on unlocked powered off host\n", node_ptr->hostname.c_str()); + if ( ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_ON ) != PASS ) + { + node_ptr->ipmitool_thread_ctrl.done = true ; + thread_kill ( node_ptr->ipmitool_thread_ctrl , node_ptr->ipmitool_thread_info ) ; + } + } + else if (( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ) && + ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->bm_provisioned == true ) && + ( node_ptr->bm_accessible == true ) && + ( node_ptr->hwmon_powercycle.state == RECOVERY_STATE__INIT ) && + ( thread_done ( node_ptr->ipmitool_thread_ctrl )) && + ( node_ptr->ipmitool_thread_info.command == IPMITOOL_THREAD_CMD__POWER_ON )) + { + if ( ipmi_command_recv ( node_ptr ) == PASS ) + { + ilog ("%s powered on\n", node_ptr->hostname.c_str()); + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__OFFLINE ); + } + } + else if ( node_ptr->mtcAlive_gate == true ) + { + slog ("%s mtcAlive gate unexpectedly set, auto-correcting ...\n", + node_ptr->hostname.c_str()); + + node_ptr->mtcAlive_gate = false ; + } + + /* wait some more */ + break ; + } + case MTC_RECOVERY__GOENABLED_TIMER: + { + node_ptr->goEnabled = false ; + + /* See if the host is there and already in the go enabled state */ + send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MAIN_GOENABLED, MGMNT_INTERFACE ); + + /* start the reboot timer - is cought in the mtc alive case */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, this->goenabled_timeout ); + + /* ok time started */ + ilog ("%s waiting for GOENABLED ; with %d sec timeout\n", + node_ptr->hostname.c_str(), + this->goenabled_timeout ); + + + /* Default to unknown health */ + node_ptr->health = NODE_HEALTH_UNKNOWN ; + + /* start waiting fhr the ENABLE READY message */ + recoveryStageChange ( node_ptr, MTC_RECOVERY__GOENABLED_WAIT ); + break ; + } + case MTC_RECOVERY__GOENABLED_WAIT: + { + /* The healthy code comes from the host in the mtcAlive message. + * This 'if' clause was introduced to detected failure of host + * without having to wait for the GOENABLED phase to timeout. + * + * This case is particularly important in the DOR case where + * computes may have come up and fail to run their manifests + * and sit there in an unconfigured state. We don't want them to + * be gracefully recovered to enabled in that case. Instead + * we want to recover the card through a reset as quickly as + * possible. */ + if ( node_ptr->health == NODE_UNHEALTHY ) + { + elog ("%s is UNHEALTHY\n", node_ptr->hostname.c_str()); + mtcTimer_reset ( node_ptr->mtcTimer ); + this->force_full_enable ( node_ptr ); + } + /* search for the Go Enable message */ + else if ( node_ptr->goEnabled_failed == true ) + { + elog ("%s got GOENABLED Failed\n", node_ptr->hostname.c_str()); + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcInvApi_update_task ( node_ptr, MTC_TASK_INTEST_FAIL ); + this->force_full_enable ( node_ptr ); + } + + /* search for the Go Enable message */ + else if ( node_ptr->goEnabled == true ) + { + plog ("%s got GOENABLED (Graceful Recovery)\n", node_ptr->hostname.c_str()); + mtcTimer_reset ( node_ptr->mtcTimer ); + + /* O.K. clearing the state now that we got it */ + node_ptr->goEnabled = false ; + + recoveryStageChange ( node_ptr, MTC_RECOVERY__HOST_SERVICES_START ); + } + else if ( node_ptr->mtcTimer.ring == true ) + { + elog ("%s has GOENABLED Timeout\n", node_ptr->hostname.c_str()); + + node_ptr->mtcTimer.ring = false ; + + this->force_full_enable ( node_ptr ); + } + break; + } + + case MTC_RECOVERY__HOST_SERVICES_START: + { + bool start = true ; + + plog ("%s Starting Host Services\n", node_ptr->hostname.c_str()); + if ( this->launch_host_services_cmd ( node_ptr, start ) != PASS ) + { + elog ("%s %s failed ; launch\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + node_ptr->hostservices_failed = true ; + this->force_full_enable ( node_ptr ); + } + else + { + recoveryStageChange ( node_ptr, MTC_RECOVERY__HOST_SERVICES_WAIT ); + } + break ; + } + case MTC_RECOVERY__HOST_SERVICES_WAIT: + { + /* Wait for host services to complete - pass or fail. + * The host_services_handler manages timeout. */ + rc = this->host_services_handler ( node_ptr ); + if ( rc == RETRY ) + { + /* wait for the mtcClient's response ... */ + break ; + } + else if ( rc != PASS ) + { + node_ptr->hostservices_failed = true ; + if ( rc == FAIL_TIMEOUT ) + { + elog ("%s %s failed ; timeout\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + + mtcInvApi_update_task ( node_ptr, + MTC_TASK_START_SERVICE_TO ); + } + else + { + elog ("%s %s failed ; rc=%d\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str(), + rc); + + mtcInvApi_update_task ( node_ptr, + MTC_TASK_START_SERVICE_FAIL ); + } + this->force_full_enable ( node_ptr ); + } + else /* success path */ + { + /* The active controller would never get/be here but + * if it did then just fall through to change state. */ + if ( CPE_SYSTEM ) + { + /* Here we need to run the sub-fnction goenable and start + * host services if this is the other controller in a AIO + * system. */ + if ( NOT_THIS_HOST ) + { + /* start a timer that waits for the /var/run/.compute_config_complete flag */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_COMPUTE_CONFIG_TIMEOUT ); + + /* We will come back to MTC_RECOVERY__HEARTBEAT_START + * after we enable the compute subfunction */ + recoveryStageChange ( node_ptr, MTC_RECOVERY__CONFIG_COMPLETE_WAIT ); + } + else + { + recoveryStageChange ( node_ptr, MTC_RECOVERY__STATE_CHANGE ); + } + } + /* Otherwise in a normal system and not the active controller, + * just start the heartbeat soak */ + else if ( NOT_THIS_HOST ) + { + /* allow the fsm to wait for up to 1 minute for the + * hbsClient's ready event before starting heartbeat + * test. */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_1 ); + recoveryStageChange ( node_ptr, MTC_RECOVERY__HEARTBEAT_START ); + } + else + { + recoveryStageChange ( node_ptr, MTC_RECOVERY__STATE_CHANGE ); + } + } + break ; + } + case MTC_RECOVERY__CONFIG_COMPLETE_WAIT: + { + /* look for file */ + if ( node_ptr->mtce_flags & MTC_FLAG__SUBF_CONFIGURED ) + { + plog ("%s-compute configured\n", node_ptr->hostname.c_str()); + + mtcTimer_reset ( node_ptr->mtcTimer ); + + recoveryStageChange ( node_ptr, MTC_RECOVERY__SUBF_GOENABLED_TIMER ); + } + + /* timeout handling */ + else if ( node_ptr->mtcTimer.ring == true ) + { + elog ("%s-compute configuration timeout\n", node_ptr->hostname.c_str()); + + mtcInvApi_update_task ( node_ptr, MTC_TASK_RECOVERY_FAIL ); + nodeLinkClass::force_full_enable ( node_ptr ); + } + else + { + ; /* wait longer */ + } + break ; + } + case MTC_RECOVERY__SUBF_GOENABLED_TIMER: + { + ilog ("%s-compute running out-of-service tests\n", node_ptr->hostname.c_str()); + + /* See if the host is there and already in the go enabled state */ + send_mtc_cmd ( node_ptr->hostname, MTC_REQ_SUBF_GOENABLED, MGMNT_INTERFACE ); + + /* start the reboot timer - is cought in the mtc alive case */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, this->goenabled_timeout ); + + node_ptr->goEnabled_subf = false ; + + /* start waiting for the GOENABLED message */ + recoveryStageChange ( node_ptr, MTC_RECOVERY__SUBF_GOENABLED_WAIT ); + + break ; + } + case MTC_RECOVERY__SUBF_GOENABLED_WAIT: + { + /* search for the Go Enable message */ + if ( node_ptr->goEnabled_failed_subf == true ) + { + elog ("%s-compute one or more out-of-service tests failed\n", node_ptr->hostname.c_str()); + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcInvApi_update_task ( node_ptr, MTC_TASK_RECOVERY_FAIL ); + this->force_full_enable ( node_ptr ); + } + + /* search for the Go Enable message */ + else if ( node_ptr->goEnabled_subf == true ) + { + /* stop the timer */ + mtcTimer_reset ( node_ptr->mtcTimer ); + + plog ("%s-compute passed out-of-service tests\n", node_ptr->hostname.c_str()); + + /* O.K. clearing the state now that we got it */ + node_ptr->goEnabled_subf = false ; + + /* ok. great, got the go-enabled message, lets move on */ + recoveryStageChange ( node_ptr, MTC_RECOVERY__SUBF_SERVICES_START ); + } + else if ( node_ptr->mtcTimer.ring == true ) + { + elog ("%s-compute out-of-service test execution timeout\n", node_ptr->hostname.c_str()); + node_ptr->mtcTimer.ring = false ; + mtcInvApi_update_task ( node_ptr, MTC_TASK_RECOVERY_FAIL ); + this->force_full_enable ( node_ptr ); + } + else + { + ; /* wait some more */ + } + break ; + } + + case MTC_RECOVERY__SUBF_SERVICES_START: + { + bool start = true ; + bool subf = true ; + + plog ("%s-compute Starting Host Services\n", node_ptr->hostname.c_str()); + + if ( this->launch_host_services_cmd ( node_ptr, start, subf ) != PASS ) + { + elog ("%s-compute %s failed ; launch\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + node_ptr->hostservices_failed_subf = true ; + mtcInvApi_update_task ( node_ptr, MTC_TASK_RECOVERY_FAIL ); + this->force_full_enable ( node_ptr ); + } + else + { + recoveryStageChange ( node_ptr, MTC_RECOVERY__SUBF_SERVICES_WAIT ); + } + break ; + } + case MTC_RECOVERY__SUBF_SERVICES_WAIT: + { + /* Wait for host services to complete - pass or fail. + * The host_services_handler manages timeout. */ + rc = this->host_services_handler ( node_ptr ); + if ( rc == RETRY ) + { + /* wait for the mtcClient's response ... */ + break ; + } + else if ( rc != PASS ) + { + node_ptr->hostservices_failed_subf = true ; + if ( rc == FAIL_TIMEOUT ) + { + elog ("%s-compute %s failed ; timeout\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + + mtcInvApi_update_task ( node_ptr, + MTC_TASK_START_SERVICE_TO ); + } + else + { + elog ("%s-compute %s failed ; rc=%d\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str(), + rc); + + mtcInvApi_update_task ( node_ptr, + MTC_TASK_START_SERVICE_FAIL ); + } + this->force_full_enable ( node_ptr ); + } + else /* success path */ + { + /* allow the fsm to wait for up to 1 minute for the + * hbsClient's ready event before starting heartbeat + * test. */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_1 ); + recoveryStageChange ( node_ptr, MTC_RECOVERY__HEARTBEAT_START ); + } + break ; + } + case MTC_RECOVERY__HEARTBEAT_START: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + wlog ("%s hbsClient ready event timeout\n", node_ptr->hostname.c_str()); + } + else if ( node_ptr->hbsClient_ready == false ) + { + break ; + } + else + { + mtcTimer_reset ( node_ptr->mtcTimer ); + } + + plog ("%s Starting %d sec Heartbeat Soak (with%s)\n", + node_ptr->hostname.c_str(), + MTC_HEARTBEAT_SOAK_BEFORE_ENABLE, + node_ptr->hbsClient_ready ? " ready event" : "out ready event" ); + + /* Enable the heartbeat service for Graceful Recovery */ + send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + + /* allow heartbeat to run for 10 seconds before we declare enable */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_HEARTBEAT_SOAK_BEFORE_ENABLE ); + + /* if heartbeat is not working then we will + * never get here and enable the host */ + recoveryStageChange ( node_ptr, MTC_RECOVERY__HEARTBEAT_SOAK ); + + break ; + } + case MTC_RECOVERY__HEARTBEAT_SOAK: + { + if ( node_ptr->mtcTimer.ring == true ) + { + /* if heartbeat is not working then we will + * never get here and enable the host */ + recoveryStageChange ( node_ptr, MTC_RECOVERY__STATE_CHANGE ); + } + break ; + } + case MTC_RECOVERY__STATE_CHANGE: + { + if ( CPE_SYSTEM ) + { + /* Set node as unlocked-enabled */ + subfStateChange ( node_ptr, MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__AVAILABLE ); + } + + if ( node_ptr->degrade_mask ) + { + /* Allow host to enable in the degraded state */ + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__DEGRADED ); + } + else + { + /* Set node as unlocked-enabled */ + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__AVAILABLE ); + } + + /* Only run hardware monitor board management is provisioned */ + if ( node_ptr->bm_provisioned == true ) + { + send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + } + + /* Inform the VIM that this host is enabled */ + mtcVimApi_state_change ( node_ptr, VIM_HOST_ENABLED, 3 ); + + /* Start a timer that failed enable if the work queue + * does not empty or if commands in the done queue have failed */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, work_queue_timeout ); + + mtcInvApi_force_task ( node_ptr, "" ); + + recoveryStageChange ( node_ptr, MTC_RECOVERY__WORKQUEUE_WAIT ) ; + break ; + } + case MTC_RECOVERY__WORKQUEUE_WAIT: + { + rc = workQueue_done ( node_ptr ); + if ( rc == RETRY ) + { + /* wait longer */ + break ; + } + else if ( rc == PASS ) + { + /* Start Graceful Recovery */ + recoveryStageChange ( node_ptr, MTC_RECOVERY__ENABLE_START ) ; + break ; + } + else if ( rc == FAIL_WORKQ_TIMEOUT ) + { + wlog ("%s Graceful Recovery failed ; workQueue empty timeout, purging ...\n", node_ptr->hostname.c_str()); + workQueue_purge ( node_ptr ); + } + else if ( rc != PASS ) + { + wlog ("%s Graceful Recovery failed ; doneQueue contains failed commands\n", node_ptr->hostname.c_str()); + } + mtcInvApi_update_task ( node_ptr, MTC_TASK_RECOVERY_FAIL ); + nodeLinkClass::force_full_enable ( node_ptr ); + break ; + } + case MTC_RECOVERY__ENABLE_START: + { + /* Create the recovery enable timer. This timer is short. + * A node need to stay enabled with the hartbeat service + * running for a period of time before declaring it enabled */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_HEARTBEAT_SOAK_BEFORE_ENABLE ); + + recoveryStageChange ( node_ptr, MTC_RECOVERY__ENABLE_WAIT ) ; + break; + } + case MTC_RECOVERY__ENABLE_WAIT: + { + /* When this timer fires the host has been up for enough time */ + if ( node_ptr->mtcTimer.ring == true ) + { + if ( is_controller(node_ptr) ) + { + if ( mtcSmgrApi_request ( node_ptr, + CONTROLLER_ENABLED, + SMGR_MAX_RETRIES ) != PASS ) + { + wlog ("%s Failed to send 'unlocked-disabled' to HA Service Manager ; allowing enable\n", + node_ptr->hostname.c_str()); + } + } + /* Node Has Recovered */ + node_ptr->graceful_recovery_counter = 0 ; + recoveryStageChange ( node_ptr, MTC_RECOVERY__START ); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + node_ptr->health_threshold_counter = 0 ; + node_ptr->enabled_count++ ; + node_ptr->http_retries_cur = 0 ; + + doneQueue_purge ( node_ptr ); + if ( node_ptr->was_dor_recovery_mode ) + { + report_dor_recovery ( node_ptr , "is ENABLED" ); + } + else + { + plog ("%s is ENABLED (Gracefully Recovered)\n", + node_ptr->hostname.c_str()); + } + alarm_enabled_clear ( node_ptr, false ); + } + break ; + } + default: + { + rc = FAIL_BAD_CASE ; + break ; + } + } + return (rc); +} + +/* + * Start Stop Host Services Handler + * -------------------------------- + * Waits for the specified host services command to complete. + * + * Returns PASS - command completed successfully + * RETRY - command still running + * FAIL_xxxx - command failure for reason + * + */ +int nodeLinkClass::host_services_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = FAIL ; + + if ( node_ptr && ( is_host_services_cmd ( node_ptr->host_services_req.cmd ) == true )) + { + /* Handle command overall umbrella timeout */ + if ( mtcTimer_expired ( node_ptr->host_services_timer ) ) + { + elog ("%s %s timeout\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + + /* treat as command failure */ + mtcCmd_workQ_purge ( node_ptr ); + mtcCmd_doneQ_purge ( node_ptr ); + rc = FAIL_TIMEOUT ; + } + + /* Handle the case where both the done and work fifo's are empty. + * ... yet this is the state while we are waiting for */ + else if (( node_ptr->mtcCmd_done_fifo.size() == 0 ) && + ( node_ptr->mtcCmd_work_fifo.size() == 0 )) + { + mtcTimer_reset ( node_ptr->host_services_timer ); + slog ("%s %s command missing\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + rc = FAIL_BAD_STATE ; + } + + /* look for 'done' case - pass and failed */ + else if (( node_ptr->mtcCmd_done_fifo.size() != 0 ) && + ( node_ptr->mtcCmd_work_fifo.size() == 0 )) + { + mtcTimer_reset ( node_ptr->host_services_timer ); + if ( node_ptr->host_services_req.status == PASS ) + { + ilog ("%s %s completed\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + rc = PASS ; + } + else + { + wlog ("%s %s ; rc:%d\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.status_string.c_str(), + node_ptr->host_services_req.status); + + rc = FAIL_OPERATION ; + } + /* Purge the done command fifo now that we have consumed the result. + * The work fifo is already empty or we would not be in this case */ + mtcCmd_doneQ_purge ( node_ptr ); + } + /* still working ... */ + else + { + /* wait longer */ + rc = RETRY ; + } + } + else + { + slog ("%s invalid host services command (%d)\n", + node_ptr->hostname.c_str(), + node_ptr->cmd.parm1 ); + + rc = FAIL_BAD_PARM ; + } + + return (rc); +} + + +/* Disable handler + * --------------- + * Algorithm that puts a node into the operationally disabled state */ +int nodeLinkClass::disable_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + switch ( (int)node_ptr->handlerStage.disable ) + { + case MTC_DISABLE__START: + { + mtcTimer_reset ( node_ptr->mtcTimer ); + + /* Purge this hosts work and done queues */ + workQueue_purge ( node_ptr ); + doneQueue_purge ( node_ptr ); + mtcCmd_workQ_purge ( node_ptr ); + mtcCmd_doneQ_purge ( node_ptr ); + + /* clear all the enable failure bools */ + clear_main_failed_bools ( node_ptr ); + clear_subf_failed_bools ( node_ptr ); + clear_hostservices_ctls ( node_ptr ); + + disableStageChange ( node_ptr, MTC_DISABLE__DIS_SERVICES_WAIT) ; + + stop_offline_handler ( node_ptr ); + + if (( node_ptr->bm_provisioned == true ) && + ( node_ptr->bm_accessible == true ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF )) + { + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_ON ); + if ( rc ) + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_POWERON_SEND) ; + } + else + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_POWERON_RECV) ; + } + + if ( rc == PASS ) + { + ilog ("%s Power On request sent\n", node_ptr->hostname.c_str()); + } + } + + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__FORCE_LOCK ) + { + mtc_nodeAvailStatus_enum locked_status = MTC_AVAIL_STATUS__OFFLINE ; + plog ("%s Administrative 'force-lock' Operation\n", node_ptr->hostname.c_str()); + + /* If the host was inservice then set its locked state as ONLINE for now. + * Otherwise its defaulted to offline */ + if (( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__INTEST ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__FAILED )) + { + locked_status = MTC_AVAIL_STATUS__ONLINE ; + } + + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__DISABLED, + locked_status ); + + if ( CPE_SYSTEM ) + { + subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED, + locked_status ); + } + } + else + { + plog ("%s Administrative 'lock' Operation\n", node_ptr->hostname.c_str()); + } + + /* reset retries counter in prep for next stage */ + node_ptr->retries = 0 ; + node_ptr->http_retries_cur = 0 ; + node_ptr->pmond_ready = false ; + + /* Clear all degrade flags except for the HWMON one */ + clear_host_degrade_causes ( node_ptr->degrade_mask ); + node_ptr->degraded_resources_list.clear(); + + if ( is_controller(node_ptr) ) + { + mtcInvApi_update_task ( node_ptr, MTC_TASK_DISABLE_CONTROL ); + } + // else + // { + // consider putting in the host type + // } + + if ( NOT_THIS_HOST ) + { + /* Disable path for Controllers */ + if ( is_controller(node_ptr) ) + { + if ( mtcSmgrApi_request ( node_ptr, + CONTROLLER_LOCKED, + SMGR_MAX_RETRIES ) != PASS ) + { + wlog ("%s Failed to send 'locked-disabled' to HA Service Manager\n", + node_ptr->hostname.c_str() ); + } + } + + /* Clear the minor flag if it is set for this host */ + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + hbs_minor_clear ( node_ptr, (iface_enum)iface ); + } + + /* Turn off Heartbeat to that host */ + send_hbs_command ( node_ptr->hostname, MTC_CMD_STOP_HOST ); + } + + /* If the stage is still MTC_DISABLE__DIS_SERVICES_WAIT then the + * host should already be powered on so lets send the stop + * services command */ + if ( node_ptr->handlerStage.disable == MTC_DISABLE__DIS_SERVICES_WAIT ) + { + bool start = false ; + if ( this->launch_host_services_cmd ( node_ptr, start ) != PASS ) + { + wlog ("%s %s failed ; launch\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + + /* proceed to handle force lock if the launch fails */ + disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_FORCE_LOCK ); + } + } + break ; + } + case MTC_DISABLE__DIS_SERVICES_WAIT: + { + /* manage host services stop command to this target */ + rc = this->host_services_handler ( node_ptr ); + if ( rc == RETRY ) + { + break ; + } + else if ( rc != PASS ) + { + if ( rc == FAIL_TIMEOUT ) + { + wlog ("%s %s failed ; timeout\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str()); + } + else + { + wlog ("%s %s failed ; rc:%d\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str(), + rc); + } + } + disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_FORCE_LOCK) ; + break ; + } + case MTC_DISABLE__HANDLE_POWERON_SEND: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer )) + { + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_ON ); + if ( rc ) + { + elog ("%s failed to send Power On request\n", node_ptr->hostname.c_str()); + disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_FORCE_LOCK) ; + } + else + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_POWERON_RECV) ; + } + } + break ; + } + case MTC_DISABLE__HANDLE_POWERON_RECV: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer )) + { + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT ); + break ; + } + if ( rc ) + { + elog ("%s auto power-on failed\n", node_ptr->hostname.c_str()); + } + else + { + ilog ("%s is Powering On\n", node_ptr->hostname.c_str()); + } + disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_FORCE_LOCK) ; + } + break ; + } + case MTC_DISABLE__HANDLE_FORCE_LOCK: + { + /* If this is a force lock against a compute then we have to reset it */ + if (( node_ptr->adminAction == MTC_ADMIN_ACTION__FORCE_LOCK )) + { + /* Stop the timer if it is active coming into this case */ + mtcTimer_reset ( node_ptr->mtcTimer ); + + /* purge in support of retries */ + mtcCmd_doneQ_purge ( node_ptr ); + mtcCmd_workQ_purge ( node_ptr ); + + ilog ("%s Issuing Force-Lock Reset\n", node_ptr->hostname.c_str()); + mtcCmd_init ( node_ptr->cmd ); + node_ptr->cmd.stage = MTC_CMD_STAGE__START ; + node_ptr->cmd.cmd = MTC_OPER__RESET_PROGRESSION ; + node_ptr->cmd.parm1 = 2 ; /* 2 retries */ + node_ptr->mtcCmd_work_fifo.push_back(node_ptr->cmd); + + int timeout = ((MTC_RESET_PROG_TIMEOUT*(node_ptr->cmd.parm1+1))*2) ; + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, timeout ) ; + + mtcInvApi_update_task ( node_ptr, MTC_TASK_DISABLE_FORCE ); + + /* Force instance evacuation */ + disableStageChange ( node_ptr, MTC_DISABLE__RESET_HOST_WAIT ); + } + else + { + disableStageChange ( node_ptr, MTC_DISABLE__TASK_STATE_UPDATE ) ; + } + break ; + } + case MTC_DISABLE__RESET_HOST_WAIT: + { + /* Check for the operation timeout - should not occur */ + if ( node_ptr->mtcTimer.ring == true ) + { + wlog ("%s Reset Progression Timeout ; aborting ...\n", node_ptr->hostname.c_str()); + + /* Purge this hosts work and done queues */ + mtcCmd_doneQ_purge ( node_ptr ); + mtcCmd_workQ_purge ( node_ptr ); + + /* aborting after timeout ; need to avoid a stuck FSM + * reset progression already did retries */ + mtcInvApi_update_task ( node_ptr, MTC_TASK_REBOOT_TIMEOUT ); + + disableStageChange ( node_ptr, MTC_DISABLE__TASK_STATE_UPDATE ); + } + + /* Handle the case where the done fifo is empty ; avoid the segfault */ + else if ( node_ptr->mtcCmd_done_fifo.size() == 0 ) + { + /* Should never get here but ..... + * Handle the case where the work queue is also empty. + * Avoid stuck FSM */ + if ( node_ptr->mtcCmd_work_fifo.size() == 0 ) + { + slog ("%s unexpected empty work queue ; trying reboot/reset again\n", + node_ptr->hostname.c_str() ); + + /* reset progression failed so try again */ + disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_FORCE_LOCK ); + } + else + { + ; /* typical wait path - wait some more */ + } + } + else + { + /* TODO: Future: get the specific command rather than just the head */ + node_ptr->mtcCmd_done_fifo_ptr = node_ptr->mtcCmd_done_fifo.begin(); + + /* defensive programming */ + if ( node_ptr->mtcCmd_done_fifo_ptr != node_ptr->mtcCmd_work_fifo.end()) + { + /* exit reset progression and any retries once the host is offline */ + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) + { + mtcTimer_stop ( node_ptr->mtcTimer ); + stop_offline_handler ( node_ptr ); + disableStageChange ( node_ptr, MTC_DISABLE__TASK_STATE_UPDATE ) ; + } + else if ( node_ptr->mtcCmd_done_fifo_ptr->cmd != MTC_OPER__RESET_PROGRESSION ) + { + slog ("%s purging front entry of done cmdQueue\n", + node_ptr->hostname.c_str()); + + /* reset progression failed so try again */ + disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_FORCE_LOCK ); + } + else + { + ilog ("%s host still not offline ; trying reboot/reset again ....\n", node_ptr->hostname.c_str() ); + + /* reset progression failed so try again */ + disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_FORCE_LOCK ); + } + } + else + { + slog ("%s unexpected empty work queue ; trying force lock\n", node_ptr->hostname.c_str() ); + + /* reset progression failed so try again */ + disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_FORCE_LOCK ); + } + } + break ; + } + + case MTC_DISABLE__TASK_STATE_UPDATE: + { + mtc_nodeAvailStatus_enum avail ; + + /* Tell the host that it is locked */ + send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE ); + if ( infra_network_provisioned ) + { + send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, INFRA_INTERFACE ); + } + + /* Change the oper and avail states in the database */ + if (( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__FAILED ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF )) + { + avail = MTC_AVAIL_STATUS__OFFLINE ; + } + else + { + avail = MTC_AVAIL_STATUS__ONLINE ; + } + allStateChange ( node_ptr, MTC_ADMIN_STATE__LOCKED, MTC_OPER_STATE__DISABLED, avail ); + mtcInvApi_subf_states (node_ptr,"disabled",get_availStatus_str(avail)); + + /* Inform the VIM that this host is disabled */ + mtcVimApi_state_change ( node_ptr, VIM_HOST_DISABLED, 3 ); + + /* Inform the VIM that the dataports are offline */ + update_dport_states (node_ptr, MTC_EVENT_AVS_OFFLINE ); + mtcVimApi_state_change ( node_ptr, VIM_DPORT_OFFLINE, 3 ); + + /* Start a timer that waits for the work queue to complete */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, work_queue_timeout ); + disableStageChange( node_ptr, MTC_DISABLE__WORKQUEUE_WAIT ); + + break ; + } + case MTC_DISABLE__WORKQUEUE_WAIT: + { + rc = workQueue_done ( node_ptr ); + if ( rc == RETRY ) + { + /* wait longer */ + break ; + } + else if ( rc == FAIL_WORKQ_TIMEOUT ) + { + wlog ("%s Disable warning ; workQueue empty timeout, purging ...\n", node_ptr->hostname.c_str()); + workQueue_purge ( node_ptr ); + } + else if ( rc != PASS ) + { + wlog ("%s Disable warning ; doneQueue contained failed commands\n", node_ptr->hostname.c_str()); + } + disableStageChange( node_ptr, MTC_DISABLE__DISABLED ); + break ; + } + case MTC_DISABLE__DISABLED: + { + /* Stop the timer if it is active coming into this case */ + mtcTimer_reset ( node_ptr->mtcTimer ); + + /* This will get updated during the next + * mtcLive message from this blade */ + node_ptr->health = NODE_HEALTH_UNKNOWN ; + + /* Set the lock alarm */ + if (( node_ptr->adminAction == MTC_ADMIN_ACTION__LOCK ) || + ( node_ptr->adminAction == MTC_ADMIN_ACTION__FORCE_LOCK )) + { + mtcAlarm_warning ( node_ptr->hostname, MTC_ALARM_ID__LOCK ); + node_ptr->alarms[MTC_ALARM_ID__LOCK] = FM_ALARM_SEVERITY_WARNING ; + } + + /* open the mtcAlive gate while we are disabled */ + node_ptr->mtcAlive_gate = false ; + + disableStageChange( node_ptr, MTC_DISABLE__START ); + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); + + node_ptr->mtcCmd_work_fifo.clear(); + node_ptr->mtcCmd_done_fifo.clear(); + node_ptr->http_retries_cur = 0 ; + + /***** Powercycle FSM Stuff *****/ + + recovery_ctrl_init ( node_ptr->hwmon_reset ); + recovery_ctrl_init ( node_ptr->hwmon_powercycle ); + + /* Load configured mtcAlive and goEnabled timers */ + LOAD_NODETYPE_TIMERS ; + + mtcInvApi_force_task ( node_ptr, "" ); + + plog ("%s Disable Complete\n", node_ptr->hostname.c_str()); + + break ; + } + + default: + { + elog ("%s Bad Case (%d)\n", node_ptr->hostname.c_str(), + node_ptr->handlerStage.disable ); + rc = FAIL_BAD_CASE ; + } + } + return (rc); +} + +/* Uptime handler + * ---------------*/ +int nodeLinkClass::uptime_handler ( void ) +{ + /* Service uptime refresh timer */ + if ( this->mtcTimer_uptime.ring == true ) + { + int rc = PASS ; + unsigned int uptime = 0; + + /* Send uptime valies to inventory */ + for ( this->host = this->hostname_inventory.begin () ; + this->host != this->hostname_inventory.end () ; + this->host++ ) + { + bool do_uptime_update = false ; + string hostname = "" ; + + hostname.append( this->host->c_str()) ; + + /* only update every 5 minutes after being up for an hour */ + uptime = this->get_uptime ( hostname ) ; + if ( uptime < 3600 ) + { + do_uptime_update = true ; + } + else + { + int ctr = this->get_uptime_refresh_ctr ( hostname ); + + /* Update uptime only every 5 minutes after the + * host has been up for more than one hour */ + if (( uptime > 3600 ) && ( (ctr*(this->uptime_period)) >= MTC_MINS_5 )) + { + do_uptime_update = true ; + } + else + { + this->set_uptime_refresh_ctr ( hostname , (ctr+1) ) ; + } + } + /* Handle update if required */ + if (( rc != PASS ) && ( do_uptime_update == true )) + { + wlog ("%s Uptime refresh bypassed due to previous error\n", hostname.c_str()); + } + else if (( do_uptime_update == true ) || ( uptime == 0 )) + { + /* Sent uptime update request. + * But exit this iteration if we get an error as we + * don't want to stall mtce for all hosts on such a + * simple operation */ + + // ilog ("%s - %d\n", hostname.c_str(), uptime ); + if ( uptime == 0 ) + { + this->set_uptime ( hostname, uptime , false ) ; + } + else + { + this->set_uptime ( hostname, uptime , true ) ; + } + } + } + /* Re-Start the uptime timer */ + mtcTimer_start ( this->mtcTimer_uptime, mtcTimer_handler, + (this->uptime_period+(rand()%10))); + } + return PASS ; +} + +/* Offline handler + * --------------- + * Algorithm that manages offline/online state for a locked host */ +int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr ) +{ + switch ( (int)node_ptr->offlineStage ) + { + case MTC_OFFLINE__IDLE: + { + return (PASS) ; /* typical path */ + } + case MTC_OFFLINE__START: + { + node_ptr->mtcAlive_mgmnt = false ; + node_ptr->mtcAlive_infra = false ; + + node_ptr->offline_search_count = 0 ; + + mtcTimer_reset ( node_ptr->offline_timer ); + ilog ("%s starting %d msec offline audit (%s-%s)\n", + node_ptr->hostname.c_str(), + offline_period, + operState_enum_to_str(node_ptr->operState).c_str(), + availStatus_enum_to_str(node_ptr->availStatus).c_str()); + + node_ptr->offlineStage = MTC_OFFLINE__SEND_MTCALIVE ; + /* fall through on start */ + } + case MTC_OFFLINE__SEND_MTCALIVE: + { + alog2 ("%s searching for offline (%s-%s)\n", + node_ptr->hostname.c_str(), + operState_enum_to_str(node_ptr->operState).c_str(), + availStatus_enum_to_str(node_ptr->availStatus).c_str()); + + node_ptr->mtcAlive_gate = false ; + node_ptr->mtcAlive_mgmnt = false ; + node_ptr->mtcAlive_infra = false ; + + /* Request a mtcAlive from host from Mgmnt and Infra (if provisioned) */ + send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, MGMNT_INTERFACE ); + if ( infra_network_provisioned ) + { + send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, INFRA_INTERFACE ); + } + + /* reload the timer */ + mtcTimer_start_msec ( node_ptr->offline_timer, mtcTimer_handler, offline_period ); + + node_ptr->offlineStage = MTC_OFFLINE__WAIT ; + + break ; + } + case MTC_OFFLINE__WAIT: + { + /* be sure the mtcAlive gate is open */ + node_ptr->mtcAlive_gate = false ; + if ( mtcTimer_expired ( node_ptr->offline_timer ) == true ) + { + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) + { + plog ("%s offline (external)\n", node_ptr->hostname.c_str()); + node_ptr->offlineStage = MTC_OFFLINE__IDLE ; + } + else if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) + { + slog ("%s offline search while 'enabled' ; invalid\n", node_ptr->hostname.c_str()); + node_ptr->offlineStage = MTC_OFFLINE__IDLE ; + } + else if ( !node_ptr->mtcAlive_mgmnt && !node_ptr->mtcAlive_infra ) + { + if ( ++node_ptr->offline_search_count > offline_threshold ) + { + node_ptr->mtcAlive_online = false ; + + plog ("%s going offline ; (threshold (%d msec * %d)\n", + node_ptr->hostname.c_str(), + offline_period, + offline_threshold ); + + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__OFFLINE ); + + /* Inform the VIM that this host is offline */ + mtcVimApi_state_change ( node_ptr, VIM_HOST_OFFLINE, 1 ); + + node_ptr->offlineStage = MTC_OFFLINE__IDLE ; + } + else + { + alog ("%s missed mtcAlive %d of %d times\n", + node_ptr->hostname.c_str(), + node_ptr->offline_search_count, + offline_threshold ); + } + } + else + { + node_ptr->mtcAlive_online = true ; + if ( node_ptr->mtcAlive_mgmnt || node_ptr->mtcAlive_infra ) + { + ilog ("%s still seeing mtcAlive (%c:%c)\n", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_mgmnt ? 'Y' : 'n', + node_ptr->mtcAlive_infra ? 'Y' : 'n'); + } + else + { + alog ("%s still seeing mtcAlive (%c:%c)\n", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_mgmnt ? 'Y' : 'n', + node_ptr->mtcAlive_infra ? 'Y' : 'n'); + } + } + + if ( node_ptr->offlineStage == MTC_OFFLINE__IDLE ) + { + ilog ("%s exiting offline handling\n", node_ptr->hostname.c_str()); + } + else + { + node_ptr->offlineStage = MTC_OFFLINE__SEND_MTCALIVE ; + } + } + break ; + } + default: + { + slog ("%s unexpected stage ; correcting to idle\n", + node_ptr->hostname.c_str()); + + node_ptr->offlineStage = MTC_OFFLINE__IDLE ; + } + } + return (PASS); +} + +/* Online handler + * --------------- + * Algorithm that manages offline/online state for a locked host */ +int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + /* don't need to manage the offline or online state + * for the following availability states */ + if (( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFDUTY ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__INTEST ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__NOT_INSTALLED )) + { + return (PASS); + } + + switch ( (int)node_ptr->onlineStage ) + { + case MTC_ONLINE__START: + { + alog3 ("%s Offline Handler (%d)\n", + node_ptr->hostname.c_str(), + node_ptr->onlineStage ); + + if ( node_ptr->mtcAlive_gate == true ) + { + alog ("%s mtcAlive gate unexpectedly set, correcting ...\n", + node_ptr->hostname.c_str()); + + node_ptr->mtcAlive_gate = false ; + } + + /* Start with a zero count. This counter is incremented every + * time we get a mtc alive message from that host */ + node_ptr->mtcAlive_online = false ; + node_ptr->mtcAlive_misses = 0 ; + + /* Start mtcAlive message timer */ + mtcTimer_start ( node_ptr->mtcAlive_timer, mtcTimer_handler, online_period ); + node_ptr->onlineStage = MTC_ONLINE__WAITING ; + break ; + } + case MTC_ONLINE__RETRYING: + { + /* Start mtcAlive message timer */ + mtcTimer_start ( node_ptr->mtcAlive_timer, mtcTimer_handler, online_period ); + node_ptr->onlineStage = MTC_ONLINE__WAITING ; + break ; + } + case MTC_ONLINE__WAITING: + { + if ( node_ptr->mtcAlive_timer.ring == false ) + break ; + + alog ("%s mtcAlive [%s] [ misses:%d]\n", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_online ? "Yes" : "No", + node_ptr->mtcAlive_misses ); + + if ( node_ptr->mtcAlive_online == false ) + { + node_ptr->mtcAlive_hits = 0 ; + if ( node_ptr->mtcAlive_misses++ > MTC_OFFLINE_MISSES ) + { + /* If already online then and no counts then that means the node is not up - go offline */ + if (( node_ptr->availStatus != MTC_AVAIL_STATUS__OFFLINE ) && + ( node_ptr->availStatus != MTC_AVAIL_STATUS__POWERED_OFF )) + { + ilog ("%s mtcAlive lost ; going 'offline'\n", + node_ptr->hostname.c_str()); + + clear_service_readies ( node_ptr ); + + /* otherwise change state */ + mtcInvApi_update_state(node_ptr, MTC_JSON_INV_AVAIL,"offline" ); + if ( CPE_SYSTEM ) + { + mtcInvApi_update_state(node_ptr, MTC_JSON_INV_AVAIL_SUBF,"offline" ); + } + + /* Inform the VIM that this host is offline */ + mtcVimApi_state_change ( node_ptr, VIM_HOST_OFFLINE, 1 ); + } + } + else + { + /* handle retries < MTC_OFFLINE_MISSES */ + node_ptr->mtcAlive_timer.ring = false ; + node_ptr->onlineStage = MTC_ONLINE__RETRYING ; + break ; + } + } + else + { + bool gate_online = false ; + + /* if we are getting counts then the node is up so change status */ + if ( node_ptr->availStatus != MTC_AVAIL_STATUS__ONLINE ) + { + node_ptr->mtcAlive_hits++ ; + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ) + { + /* need 5 mtcAlive messages befpore we allow a power-off to go online */ + if ( node_ptr->mtcAlive_hits < MTC_MTCALIVE_HITS_TO_GO_ONLINE ) + { + gate_online = true ; + dlog ("%s ... %d\n", node_ptr->hostname.c_str(), node_ptr->mtcAlive_hits ); + } + } + + if ( gate_online == false ) + { + ilog ("%s mtcAlive ; going 'online'\n", + node_ptr->hostname.c_str()); + + mtcInvApi_update_state ( node_ptr, MTC_JSON_INV_AVAIL, "online" ); + if ( CPE_SYSTEM ) + { + mtcInvApi_update_state ( node_ptr, MTC_JSON_INV_AVAIL_SUBF, "online" ); + } + } + } + } + + /* While the host is locked ... */ + if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) + { + /* ... keep the 'host locked' file on this host refreshed while in the locked state + * ... send it on both interfaces just in case */ + send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE ); + // send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, INFRA_INTERFACE ); + } + + /* Start over */ + node_ptr->mtcAlive_timer.ring = false ; + node_ptr->onlineStage = MTC_ONLINE__START ; + break ; + } + default: + node_ptr->onlineStage = MTC_ONLINE__START ; + } + return (rc); +} + + +/* Controller Swact Handler + * ------------------------ + * Using a REST API into HA Service Manager through Inventory, this handler + * is responsible for quering for active services on the specified + * controller and then if services are found to be running , requesting + * migration of those active services away from that controller */ + +#define SWACT_DONE \ +{ \ + if ( node_ptr->mtcSwact_timer.tid ) \ + { \ + mtcTimer_stop ( node_ptr->mtcSwact_timer ); \ + } \ + mtcTimer_start ( node_ptr->mtcSwact_timer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); \ + node_ptr->swactStage = MTC_SWACT__DONE ; \ +} + +#define SWACT_FAIL_THRESHOLD (3) +#define SWACT_RETRY_THRESHOLD (10) +#define SWACT_FAIL_MSEC_DELAY (250) +#define SWACT_RECV_MSEC_DELAY (50) +#define SWACT_POLL_DELAY (10) +#define SWACT_TIMEOUT_DELAY (50) + +int nodeLinkClass::swact_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + if ( daemon_is_file_present ( PLATFORM_SIMPLEX_MODE ) == true ) + { + slog ("%s rejecting Swact request in simplex mode\n", node_ptr->hostname.c_str()); + node_ptr->swactStage = MTC_SWACT__START ; + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + return (PASS); + } + switch ( (int)node_ptr->swactStage ) + { + /* Start / Init Stage */ + case MTC_SWACT__START: + { + plog ("%s Administrative SWACT Requested\n", node_ptr->hostname.c_str() ); + + /* Cleanup and init the swact timer - start fresh */ + if ( node_ptr->mtcSwact_timer.tid ) + { + wlog ("%s Cancelling outstanding Swact timer\n", node_ptr->hostname.c_str()); + mtcTimer_stop ( node_ptr->mtcSwact_timer ); + } + mtcTimer_init ( node_ptr->mtcSwact_timer ); + + /* reset error / control Counters to zero */ + nodeLinkClass::smgrEvent.count = 0 ; + nodeLinkClass::smgrEvent.fails = 0 ; + nodeLinkClass::smgrEvent.cur_retries = 0 ; + + /* Empty the event message strings */ + nodeLinkClass::smgrEvent.payload = "" ; + nodeLinkClass::smgrEvent.response = "" ; + + /* Post a user message 'Swact: Request' and + * then delay to allow it to be displayed */ + mtcInvApi_force_task ( node_ptr, MTC_TASK_SWACT_REQUEST ); + mtcTimer_start ( node_ptr->mtcSwact_timer, mtcTimer_handler, (MTC_TASK_UPDATE_DELAY/2) ); + node_ptr->swactStage = MTC_SWACT__QUERY ; + break ; + } + + /* Handle and threshold all Query Failures */ + case MTC_SWACT__QUERY_FAIL: + { + if ( ++nodeLinkClass::smgrEvent.fails >= SWACT_FAIL_THRESHOLD ) + { + wlog ("%s Query Services Failed: Max Retries (max:%d)\n", + node_ptr->hostname.c_str(), nodeLinkClass::smgrEvent.fails); + mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_FAIL_QUERY); + SWACT_DONE ; + } + else + { + wlog ("%s Query Services: Retrying (cnt:%d)\n", + node_ptr->hostname.c_str(), nodeLinkClass::smgrEvent.fails); + mtcTimer_start_msec ( node_ptr->mtcSwact_timer, mtcTimer_handler, SWACT_FAIL_MSEC_DELAY ); + node_ptr->swactStage = MTC_SWACT__QUERY ; + } + break ; + } + + /* Query Services on this host */ + case MTC_SWACT__QUERY: + { + if ( node_ptr->mtcSwact_timer.ring == true ) + { + rc = mtcSmgrApi_request ( node_ptr, CONTROLLER_QUERY, 0 ); + if ( rc ) + { + nodeLinkClass::smgrEvent.status = rc ; + node_ptr->swactStage = MTC_SWACT__QUERY_FAIL ; + } + else + { + /* Ok, we got a successful send request ; + * delay a bit and check for the response */ + nodeLinkClass::smgrEvent.cur_retries = 0 ; + nodeLinkClass::smgrEvent.fails = 0 ; + mtcTimer_start_msec ( node_ptr->mtcSwact_timer, mtcTimer_handler, SWACT_RECV_MSEC_DELAY ); + node_ptr->swactStage = MTC_SWACT__QUERY_RECV ; + } + } + break ; + } + + case MTC_SWACT__QUERY_RECV: + { + if ( node_ptr->mtcSwact_timer.ring == true ) + { + /* Try and receive the response */ + rc = mtcHttpUtil_receive ( nodeLinkClass::smgrEvent ); + if ( rc == RETRY ) + { + if ( ++nodeLinkClass::smgrEvent.cur_retries > SWACT_RETRY_THRESHOLD ) + { + wlog ("%s Too many receive retries (cnt:%d)\n", + node_ptr->hostname.c_str(), nodeLinkClass::smgrEvent.cur_retries ); + rc = FAIL ; + } + else + { + mtcTimer_start_msec ( node_ptr->mtcSwact_timer, mtcTimer_handler, SWACT_RECV_MSEC_DELAY ); + break ; + } + } + if (( rc != PASS ) && ( rc != RETRY )) + { + elog ("%s Service Query Failed: Receive Error (rc:%d)\n", + node_ptr->hostname.c_str(), rc ); + mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_FAILED); + SWACT_DONE ; + } + else + { + /* Parse through the response - no retries on response string errors */ + bool active = false ; + rc = mtcSmgrApi_service_state ( nodeLinkClass::smgrEvent, active ); + if ( rc ) + { + /* Setup common error message for the user*/ + ilog ("%s Swact: Service Query Failed\n", node_ptr->hostname.c_str()); + mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_FAILED); + SWACT_DONE ; + } + else if ( active == true ) + { + /* O.K. We need to Swact */ + nodeLinkClass::smgrEvent.fails = 0 ; + nodeLinkClass::smgrEvent.cur_retries = 0 ; + node_ptr->swactStage = MTC_SWACT__SWACT ; + + /* Tell the user what we are doing */ + mtcInvApi_force_task ( node_ptr, MTC_TASK_SWACT_INPROGRESS ); + } + else + { + /* If not true then somehow we are being asked to + * Swact a controller that is not running any services */ + ilog ("%s %s\n", node_ptr->hostname.c_str(), MTC_TASK_SWACT_NOSERVICE); + mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_NOSERVICE); + SWACT_DONE ; + } + } + } + break ; + } + + /* Phase 2: Perform Swact */ + case MTC_SWACT__SWACT: + { + rc = mtcSmgrApi_request ( node_ptr, CONTROLLER_SWACT, 0 ); + if ( rc ) + { + /* Abort after SWACT_FAIL_THRESHOLD retries - verified */ + if ( ++nodeLinkClass::smgrEvent.fails >= SWACT_FAIL_THRESHOLD ) + { + elog ( "%s Swact: Failed Request (rc:%d) (max:%d)\n", + node_ptr->hostname.c_str(), rc, + nodeLinkClass::smgrEvent.fails); + + mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_FAILED ); + SWACT_DONE ; + } + else + { + elog ( "%s Swact: Retrying Request (rc:%d) (cnt:%d)\n", + node_ptr->hostname.c_str(), rc, + nodeLinkClass::smgrEvent.fails); + } + } + else + { + plog ("%s Swact: In Progress\n", node_ptr->hostname.c_str()); + nodeLinkClass::smgrEvent.status = PASS ; + nodeLinkClass::smgrEvent.fails = 0 ; + nodeLinkClass::smgrEvent.cur_retries = 0 ; + mtcTimer_start_msec ( node_ptr->mtcSwact_timer, mtcTimer_handler, SWACT_RECV_MSEC_DELAY ); + node_ptr->swactStage = MTC_SWACT__SWACT_RECV ; + } + break ; + } + + case MTC_SWACT__SWACT_RECV: + { + if ( node_ptr->mtcSwact_timer.ring == true ) + { + /* Try and receive the response */ + rc = mtcHttpUtil_receive ( nodeLinkClass::smgrEvent ); + if ( rc == RETRY ) + { + if ( ++nodeLinkClass::smgrEvent.cur_retries > SWACT_RETRY_THRESHOLD ) + { + wlog ("%s Too many receive retries (cnt:%d)\n", + node_ptr->hostname.c_str(), nodeLinkClass::smgrEvent.cur_retries ); + rc = FAIL ; + } + else + { + mtcTimer_start_msec ( node_ptr->mtcSwact_timer, mtcTimer_handler, SWACT_RECV_MSEC_DELAY ); + break ; + } + } + if (( rc != PASS ) && ( rc != RETRY )) + { + elog ("%s Swact Failed: Receive Error (rc:%d)\n", + node_ptr->hostname.c_str(), rc ); + mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_FAILED); + SWACT_DONE ; + } + else + { + mtcTimer_start ( node_ptr->mtcSwact_timer, mtcTimer_handler, MTC_SWACT_POLL_TIMER ); + mtcSmgrApi_request ( node_ptr, CONTROLLER_QUERY, 0 ); + node_ptr->swactStage = MTC_SWACT__SWACT_POLL ; + } + } + break ; + } + + case MTC_SWACT__SWACT_POLL: + { + if ( node_ptr->mtcSwact_timer.ring == true ) + { + if (++nodeLinkClass::smgrEvent.count >= + (nodeLinkClass::swact_timeout/MTC_SWACT_POLL_TIMER)) + { + elog ("%s Swact Failed: Timeout\n", node_ptr->hostname.c_str()); + mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_TIMEOUT); + SWACT_DONE ; + } + rc = mtcHttpUtil_receive ( smgrEvent ); + if ( rc != RETRY ) + { + bool active = true ; + mtcSmgrApi_service_state ( smgrEvent, active ); + if ( active == false ) + { + dlog ("%s %s\n",node_ptr->hostname.c_str(), MTC_TASK_SWACT_COMPLETE ); + mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_COMPLETE ); + SWACT_DONE ; + break ; + } + else + { + mtcSmgrApi_request ( node_ptr, CONTROLLER_QUERY, 0 ); + } + } + else + { + plog ("%s Swact: In-Progress\n", node_ptr->hostname.c_str()); + } + mtcTimer_start ( node_ptr->mtcSwact_timer, mtcTimer_handler, MTC_SWACT_POLL_TIMER ); + } + break ; + } + case MTC_SWACT__DONE: + { + /* Wait for the done timer to expire. + * When it does ; exit the SWACT FSM after clearing + * the task and setting it back to the start. */ + if ( node_ptr->mtcSwact_timer.ring == true ) + { + mtcHttpUtil_free_conn ( nodeLinkClass::smgrEvent ); + mtcInvApi_force_task ( node_ptr, ""); + nodeLinkClass::smgrEvent.active = false ; + nodeLinkClass::smgrEvent.mutex = false ; + node_ptr->mtcSwact_timer.ring = false ; + node_ptr->swactStage = MTC_SWACT__START ; + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + if ( smgrEvent.status ) + { + wlog ("%s Swact: Failed\n", node_ptr->hostname.c_str()); + + } + else + { + plog ("%s Swact: Completed\n", node_ptr->hostname.c_str()); + } + } + break; + } + + default: + node_ptr->swactStage = MTC_SWACT__START ; + } + return (rc); +} + +/* Reset Handler + * ------------ + * Issue a reset to a host */ +int nodeLinkClass::reset_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + switch ( node_ptr->resetStage ) + { + case MTC_RESET__FAIL: + { + elog ("%s Reset failed ; aborting after max retries\n", node_ptr->hostname.c_str()); + mtcInvApi_update_task ( node_ptr, MTC_TASK_RESET_FAIL); + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); + resetStageChange ( node_ptr , MTC_RESET__FAIL_WAIT ); + break ; + } + case MTC_RESET__FAIL_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + node_ptr->mtcTimer.ring = false ; + resetStageChange ( node_ptr , MTC_RESET__DONE ); + + recovery_ctrl_init ( node_ptr->hwmon_reset ); ; + mtcTimer_reset ( node_ptr->hwmon_reset.recovery_timer ); + } + break ; + } + case MTC_RESET__START: + { + plog ("%s Administrative 'Reset' Action\n", node_ptr->hostname.c_str()); + mtcInvApi_update_task ( node_ptr, "Reset Requested" ); + node_ptr->retries = 0 ; + + start_offline_handler ( node_ptr ); + + if ( hostUtil_is_valid_ip_addr (node_ptr->bm_ip ) == false ) + { + /** + * New working provisioning is learned by from the + * dnsmasq.bmc_hosts file changes through inotify watch so + * it is entirely possible that the retries in this fsm + * eventually succeed. + **/ + wlog ("%s bm_ip (%s) is invalid (%d) \n", + node_ptr->hostname.c_str(), + node_ptr->bm_ip.c_str(), + rc ); + resetStageChange ( node_ptr , MTC_RESET__FAIL ); + break ; + } + node_ptr->power_action_retries = MTC_RESET_ACTION_RETRY_COUNT ; + /* the fall through is intentional */ + } + case MTC_RESET__REQ_SEND: + { + node_ptr->power_action_retries--; + + /* Handle loss of connectivity over retries */ + if ( node_ptr->bm_provisioned == false ) + { + elog ("%s BMC not provisioned\n", node_ptr->hostname.c_str() ); + mtcInvApi_force_task ( node_ptr, MTC_TASK_BMC_NOT_PROV ); + resetStageChange ( node_ptr , MTC_RESET__FAIL ); + break ; + } + + if ( node_ptr->bm_accessible == false ) + { + wlog ("%s Power Off request rejected ; BMC not accessible ; retry in %d seconds \n", + node_ptr->hostname.c_str(), + MTC_POWER_ACTION_RETRY_DELAY); + + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + resetStageChange ( node_ptr , MTC_RESET__QUEUE ); + break ; + } + + else + { + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_RESET ); + + if ( rc ) + { + wlog ("%s Reset request failed (%d)\n", node_ptr->hostname.c_str(), rc ); + resetStageChange ( node_ptr , MTC_RESET__QUEUE ); + } + else + { + blog ("%s Reset requested\n", node_ptr->hostname.c_str()); + resetStageChange ( node_ptr , MTC_RESET__RESP_WAIT ); + } + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + } + break ; + } + + case MTC_RESET__RESP_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT ); + break ; + } + + if ( rc ) + { + elog ("%s Reset command failed (rc:%d)\n", node_ptr->hostname.c_str(), rc ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + resetStageChange ( node_ptr, MTC_RESET__QUEUE ); + } + else + { + ilog ("%s is Resetting\n", node_ptr->hostname.c_str()); + mtcInvApi_update_task ( node_ptr, "Resetting: waiting for offline" ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RESET_TO_OFFLINE_TIMEOUT ); + resetStageChange ( node_ptr, MTC_RESET__OFFLINE_WAIT ); + } + } + break ; + } + + case MTC_RESET__QUEUE: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + node_ptr->mtcTimer.ring = false ; + if ( node_ptr->power_action_retries > 0 ) + { + char buffer[64] ; + int attempts = MTC_RESET_ACTION_RETRY_COUNT - node_ptr->power_action_retries ; + snprintf ( buffer, 64, MTC_TASK_RESET_QUEUE, attempts, MTC_RESET_ACTION_RETRY_COUNT); + mtcInvApi_update_task ( node_ptr, buffer); + + /* check the thread error status if thetre is one */ + if ( node_ptr->ipmitool_thread_info.status ) + { + wlog ("%s ... %s (rc:%d)\n", node_ptr->hostname.c_str(), + node_ptr->ipmitool_thread_info.status_string.c_str(), + node_ptr->ipmitool_thread_info.status ); + } + + resetStageChange ( node_ptr , MTC_RESET__REQ_SEND ); + } + else + { + resetStageChange ( node_ptr , MTC_RESET__FAIL ); + } + } + break ; + } + + case MTC_RESET__OFFLINE_WAIT: + { + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) + { + if (node_ptr->mtcTimer.tid) + mtcTimer_stop ( node_ptr->mtcTimer ); + + plog ("%s Reset Successful\n", node_ptr->hostname.c_str()); + resetStageChange ( node_ptr , MTC_RESET__DONE ); + } + else if ( node_ptr->mtcTimer.ring == true ) + { + elog ("%s Reset operation timeout - host did not go offline\n", node_ptr->hostname.c_str()); + resetStageChange ( node_ptr , MTC_RESET__FAIL ); + } + break ; + } + + case MTC_RESET__DONE: + default: + { + mtcTimer_reset ( node_ptr->mtcTimer ); + + hwmon_recovery_monitor ( node_ptr, MTC_EVENT_HWMON_RESET ); + + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); + + mtcInvApi_force_task ( node_ptr, "" ); + + clear_service_readies ( node_ptr ); + + plog ("%s Reset Completed\n", node_ptr->hostname.c_str()); + break ; + } + } + return (PASS); +} + +/* Reinstall handler + * -------------- + * Manage reinstall operations for a locked-disabled host */ +int nodeLinkClass::reinstall_handler ( struct nodeLinkClass::node * node_ptr ) +{ + switch ( node_ptr->reinstallStage ) + { + case MTC_REINSTALL__START: + { + int host_reinstall_wait_timer = node_ptr->mtcalive_timeout + node_reinstall_timeout ; + node_ptr->retries = host_reinstall_wait_timer / MTC_REINSTALL_WAIT_TIMER ; + + start_offline_handler ( node_ptr ); + + node_ptr->cmdReq = MTC_CMD_WIPEDISK ; + + plog ("%s Administrative Reinstall Requested\n", node_ptr->hostname.c_str()); + if ( send_mtc_cmd ( node_ptr->hostname, MTC_CMD_WIPEDISK, MGMNT_INTERFACE ) != PASS ) + { + elog ("Failed to send 'reinstall' request to %s\n", node_ptr->hostname.c_str()); + reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL ); + } + else + { + node_ptr->cmdRsp = MTC_CMD_NONE ; + + if ( node_ptr->mtcTimer.tid ) + { + mtcTimer_stop ( node_ptr->mtcTimer ); + } + + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_CMD_RSP_TIMEOUT ); + + ilog ("%s waiting for REINSTALL ACK \n", node_ptr->hostname.c_str() ); + + reinstallStageChange ( node_ptr , MTC_REINSTALL__RESP_WAIT ); + } + break ; + } + case MTC_REINSTALL__RESP_WAIT: + { + if ( node_ptr->cmdRsp != MTC_CMD_WIPEDISK ) + { + if ( node_ptr->mtcTimer.ring == true ) + { + elog ("%s REINSTALL ACK Timeout\n", + node_ptr->hostname.c_str()); + + reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL ); + } + } + else + { + /* declare successful reinstall request */ + plog ("%s REINSTALL Request Succeeded\n", node_ptr->hostname.c_str()); + + mtcTimer_stop ( node_ptr->mtcTimer ); + + /* We need to wait for the host to go offline */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RESET_TO_OFFLINE_TIMEOUT ); + + /* Wait for the host to go offline */ + reinstallStageChange ( node_ptr , MTC_REINSTALL__OFFLINE_WAIT ); + } + break ; + } + case MTC_REINSTALL__OFFLINE_WAIT: + { + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) + { + mtcTimer_stop ( node_ptr->mtcTimer ); + + clear_service_readies ( node_ptr ); + + ilog ("%s Reinstall Progress: host is offline ; waiting for host to come back\n", node_ptr->hostname.c_str()); + + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_REINSTALL_WAIT_TIMER ); + reinstallStageChange ( node_ptr , MTC_REINSTALL__ONLINE_WAIT ); + } + else if ( node_ptr->mtcTimer.ring == true ) + { + elog ("%s offline timeout - reinstall failed\n", node_ptr->hostname.c_str()); + reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL ); + } + break ; + } + case MTC_REINSTALL__ONLINE_WAIT: + { + if ( node_ptr->mtcTimer.ring == true ) + { + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ) + { + mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_SUCCESS); + mtcTimer_stop ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); + reinstallStageChange ( node_ptr , MTC_REINSTALL__MSG_DISPLAY ); + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__STATUSCHANGE_REINSTALL_COMPLETE ); + } + else + { + if ( --node_ptr->retries < 0 ) + { + elog ("%s online timeout - reinstall failed\n", node_ptr->hostname.c_str()); + reinstallStageChange ( node_ptr , MTC_REINSTALL__FAIL ); + } + else + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_REINSTALL_WAIT_TIMER ); + } + } + } + break; + } + case MTC_REINSTALL__FAIL: + { + mtcInvApi_update_task ( node_ptr, MTC_TASK_REINSTALL_FAIL); + mtcTimer_stop ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); + reinstallStageChange ( node_ptr , MTC_REINSTALL__MSG_DISPLAY ); + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__STATUSCHANGE_REINSTALL_FAILED ); + break ; + } + case MTC_REINSTALL__MSG_DISPLAY: + { + if ( node_ptr->mtcTimer.ring == true ) + { + node_ptr->mtcTimer.ring = false ; + reinstallStageChange ( node_ptr , MTC_REINSTALL__DONE ); + } + break ; + } + case MTC_REINSTALL__DONE: + default: + { + plog ("%s Reinstall Completed\n", node_ptr->hostname.c_str()); + + /* Default timeout values */ + LOAD_NODETYPE_TIMERS ; + + mtcTimer_stop ( node_ptr->mtcTimer ); + + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); + + recovery_ctrl_init ( node_ptr->hwmon_reset ); + recovery_ctrl_init ( node_ptr->hwmon_powercycle ); + + mtcInvApi_force_task ( node_ptr, "" ); + break ; + } + } + return (PASS); +} + +/* Reboot handler + * -------------- + * Manage reinstall operations for a disabled host */ +int nodeLinkClass::reboot_handler ( struct nodeLinkClass::node * node_ptr ) +{ + // ilog ("%s Administrative 'reboot' Action (%d)\n", node_ptr->hostname.c_str(), node_ptr->resetProgStage ); + + switch ( node_ptr->resetProgStage ) + { + case MTC_RESETPROG__START: + { + plog ("%s Administrative Reboot Requested\n", node_ptr->hostname.c_str() ); + + /* start with a clean command slate */ + mtcCmd_doneQ_purge ( node_ptr ); + mtcCmd_workQ_purge ( node_ptr ); + mtcInvApi_update_task ( node_ptr, MTC_TASK_RESET_PROG ); + if ( node_ptr->adminAction != MTC_ADMIN_ACTION__REBOOT ) + { + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_AUTO_REBOOT ); + } + node_ptr->retries = 0 ; + + /* If this is a simplex all-in-one system then issue the lazy reboot and just wait */ + if ( THIS_HOST ) + { + mtcInvApi_update_task_now ( node_ptr, "Please stand-by while the active controller gracefully reboots" ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_2 ) ; + node_ptr->resetProgStage = MTC_RESETPROG__WAIT ; + + /* Launch a backup sysreq thread */ + fork_sysreq_reboot ( daemon_get_cfg_ptr()->failsafe_shutdown_delay ); + + /* Tell SM we are unhealthy so that it shuts down all its services */ + daemon_log ( SMGMT_UNHEALTHY_FILE, "Active Controller Reboot request" ); + send_mtc_cmd ( node_ptr->hostname, MTC_CMD_LAZY_REBOOT, MGMNT_INTERFACE ) ; + } + else + { + node_ptr->resetProgStage = MTC_RESETPROG__REBOOT ; + } + break ; + } + case MTC_RESETPROG__REBOOT: + { + #define REBOOT_RETRIES (0) + node_ptr->mtcCmd_work_fifo.clear(); + mtcCmd_init ( node_ptr->cmd ); + node_ptr->cmd.stage = MTC_CMD_STAGE__START ; + node_ptr->cmd.cmd = MTC_OPER__RESET_PROGRESSION ; + node_ptr->cmd.parm1 = REBOOT_RETRIES ; /* retries */ + node_ptr->cmd.task = false ; /* send task updates */ + node_ptr->mtcCmd_work_fifo.push_front(node_ptr->cmd); + + mtcTimer_reset ( node_ptr->mtcTimer ); + + /* calculate the overall timeout period taking into account + * all the reboot/reset sources that will be tried */ + int overall_timeout = calc_reset_prog_timeout ( node_ptr, REBOOT_RETRIES ) ; + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, overall_timeout ) ; + node_ptr->resetProgStage = MTC_RESETPROG__WAIT ; + + break ; + } + case MTC_RESETPROG__WAIT: + { + /* Look for the command handler FSM timeout and abor in that case */ + if ( node_ptr->mtcTimer.ring == true ) + { + ilog ("%s reboot (progression) timeout\n", node_ptr->hostname.c_str()); + + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ) ; + mtcInvApi_force_task ( node_ptr, MTC_TASK_REBOOT_ABORT ); + node_ptr->resetProgStage = MTC_RESETPROG__FAIL ; + } + else if ( THIS_HOST ) + { + ; /* wait for the reboot or FSM timeout */ + } + else if ( node_ptr->mtcCmd_work_fifo.empty()) + { + slog ("%s unexpected empty cmd queue\n", node_ptr->hostname.c_str()); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ) ; + mtcInvApi_force_task ( node_ptr, MTC_TASK_REBOOT_ABORT ); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + node_ptr->resetProgStage = MTC_RESETPROG__FAIL ; + } + else + { + node_ptr->mtcCmd_work_fifo_ptr = node_ptr->mtcCmd_work_fifo.begin() ; + if ( node_ptr->mtcCmd_work_fifo_ptr->stage == MTC_CMD_STAGE__DONE ) + { + if ( node_ptr->mtcTimer.tid ) + mtcTimer_stop ( node_ptr->mtcTimer ); + + if ( node_ptr->mtcCmd_work_fifo_ptr->status == PASS ) + { + plog ("%s Reboot Completed\n", node_ptr->hostname.c_str() ); + node_ptr->mtcTimer.ring = true ; + node_ptr->resetProgStage = MTC_RESETPROG__FAIL ; /* not really fail but use its clean up function */ + } + else if ( ++node_ptr->retries <= 5 ) + { + char buffer[255] ; + snprintf ( buffer, 255, MTC_TASK_REBOOT_FAIL_RETRY, node_ptr->retries, 5 ); + wlog ("%s %s\n", node_ptr->hostname.c_str(), buffer ); + mtcInvApi_update_task ( node_ptr, buffer ); + if ( node_ptr->mtcCmd_done_fifo.size() ) + node_ptr->mtcCmd_done_fifo.pop_front(); + node_ptr->resetProgStage = MTC_RESETPROG__REBOOT ; + } + else + { + wlog ("%s %s\n", node_ptr->hostname.c_str(), MTC_TASK_REBOOT_ABORT ); + if ( node_ptr->mtcCmd_done_fifo.size() ) + node_ptr->mtcCmd_done_fifo.pop_front(); + mtcInvApi_force_task ( node_ptr, MTC_TASK_REBOOT_ABORT ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ) ; + node_ptr->resetProgStage = MTC_RESETPROG__FAIL ; + } + } + break ; + } + case MTC_RESETPROG__FAIL: + { + if ( node_ptr->mtcTimer.ring == true ) + { + if ( !node_ptr->mtcCmd_work_fifo.empty() ) + node_ptr->mtcCmd_work_fifo.pop_front(); + if ( !node_ptr->mtcCmd_work_fifo.empty() ) + mtcCmd_workQ_purge ( node_ptr ); + + if ( !node_ptr->mtcCmd_done_fifo.empty() ) + node_ptr->mtcCmd_done_fifo.pop_front(); + if ( !node_ptr->mtcCmd_done_fifo.empty() ) + mtcCmd_doneQ_purge ( node_ptr ); + + mtcInvApi_force_task ( node_ptr, "" ); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + node_ptr->resetProgStage = MTC_RESETPROG__START ; + } + } + break ; + } + default: + { + slog ("%s unsupported reboot stage (%d) ; clearing action\n", + node_ptr->hostname.c_str(), + node_ptr->resetProgStage ); + + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); + } + } + + return (PASS); +} + +/* Power Handler + * ----------------- */ +int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + switch ( node_ptr->powerStage ) + { + case MTC_POWEROFF__FAIL: + { + elog ("%s Power-Off failed ; aborting after max retries\n", node_ptr->hostname.c_str()); + mtcInvApi_update_task ( node_ptr, MTC_TASK_POWEROFF_FAIL); + mtcTimer_reset ( node_ptr->mtcTimer ) ; + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); + powerStageChange ( node_ptr , MTC_POWEROFF__FAIL_WAIT ); + break ; + } + case MTC_POWEROFF__FAIL_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + node_ptr->mtcTimer.ring = false ; + powerStageChange ( node_ptr , MTC_POWER__DONE ); + } + break ; + } + case MTC_POWEROFF__START: + { + plog ("%s Administrative 'Power-Off' Action\n", node_ptr->hostname.c_str()); + mtcInvApi_force_task ( node_ptr, "Power-Off Requested" ); + + start_offline_handler ( node_ptr ); + + if ( hostUtil_is_valid_ip_addr (node_ptr->bm_ip ) == false ) + { + /** + * New working provisioning is learned by from the + * dnsmasq.bmc_hosts file changes through inotify watch so + * it is entirely possible that the retries in this fsm + * eventually succeed. + **/ + wlog ("%s bm_ip (%s) is invalid (%d) \n", + node_ptr->hostname.c_str(), + node_ptr->bm_ip.c_str(), + rc ); + } + else + { + ; // send_hwmon_command ( node_ptr->hostname, MTC_CMD_STOP_HOST ); + } + + node_ptr->power_action_retries = MTC_POWER_ACTION_RETRY_COUNT ; + //the fall through to MTC_POWEROFF__REQ_SEND is intentional + } + case MTC_POWEROFF__REQ_SEND: + { + node_ptr->power_action_retries--; + + /* Handle loss of connectivity over retries */ + if ( node_ptr->bm_provisioned == false ) + { + elog ("%s BMC not provisioned\n", node_ptr->hostname.c_str()); + mtcInvApi_force_task ( node_ptr, MTC_TASK_BMC_NOT_PROV ); + powerStageChange ( node_ptr , MTC_POWEROFF__FAIL ); + break ; + } + + if ( node_ptr->bm_accessible == false ) + { + wlog ("%s Power Off request rejected ; BMC not accessible ; retry in %d seconds\n", + node_ptr->hostname.c_str(), + MTC_POWER_ACTION_RETRY_DELAY); + + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + powerStageChange ( node_ptr , MTC_POWEROFF__QUEUE ); + break ; + } + + else + { + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_OFF ); + if ( rc ) + { + wlog ("%s Power-Off request failed (%d)\n", node_ptr->hostname.c_str(), rc ); + powerStageChange ( node_ptr , MTC_POWEROFF__QUEUE ); + } + else + { + blog ("%s Power-Off requested\n", node_ptr->hostname.c_str()); + powerStageChange ( node_ptr , MTC_POWEROFF__RESP_WAIT ); + } + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + } + break ; + } + + case MTC_POWEROFF__RESP_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT ); + break ; + } + + if ( rc ) + { + elog ("%s Power-Off command failed\n", node_ptr->hostname.c_str()); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + powerStageChange ( node_ptr , MTC_POWEROFF__QUEUE ); + } + else + { + ilog ("%s is Powering Off\n", node_ptr->hostname.c_str() ); + mtcInvApi_update_task ( node_ptr, "Powering Off" ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); + powerStageChange ( node_ptr , MTC_POWEROFF__DONE ); + node_ptr->power_on = false ; + } + } + break ; + } + case MTC_POWEROFF__QUEUE: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + node_ptr->mtcTimer.ring = false ; + if ( node_ptr->power_action_retries > 0 ) + { + char buffer[255] ; + int attempts = MTC_POWER_ACTION_RETRY_COUNT - node_ptr->power_action_retries ; + snprintf ( buffer, 255, MTC_TASK_POWEROFF_QUEUE, attempts, MTC_POWER_ACTION_RETRY_COUNT); + mtcInvApi_update_task ( node_ptr, buffer); + + /* check the thread error status if thetre is one */ + if ( node_ptr->ipmitool_thread_info.status ) + { + wlog ("%s ... %s (rc:%d)\n", node_ptr->hostname.c_str(), + node_ptr->ipmitool_thread_info.status_string.c_str(), + node_ptr->ipmitool_thread_info.status ); + } + powerStageChange ( node_ptr , MTC_POWEROFF__REQ_SEND ); + } + else + { + powerStageChange ( node_ptr , MTC_POWEROFF__FAIL ); + } + } + break ; + } + case MTC_POWEROFF__DONE: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + plog ("%s Power-Off Completed\n", node_ptr->hostname.c_str()); + + stop_offline_handler ( node_ptr ); + + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__POWERED_OFF ); + + powerStageChange ( node_ptr , MTC_POWER__DONE ); + } + break ; + } + + /* ----------------------- */ + /* POWER ON Group of Cases */ + /* ----------------------- */ + + case MTC_POWERON__FAIL: + { + elog ("%s Power-On failed ; aborting after max retries\n", node_ptr->hostname.c_str()); + mtcInvApi_update_task ( node_ptr, MTC_TASK_POWERON_FAIL); + mtcTimer_reset ( node_ptr->mtcTimer ) ; + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); + powerStageChange ( node_ptr , MTC_POWERON__FAIL_WAIT ); + break ; + } + case MTC_POWERON__FAIL_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + node_ptr->mtcTimer.ring = false ; + powerStageChange ( node_ptr , MTC_POWER__DONE ); + } + break ; + } + case MTC_POWERON__START: + { + plog ("%s Administrative 'Power-On' Action\n", node_ptr->hostname.c_str()); + mtcInvApi_update_task ( node_ptr, "Power-On Requested" ); + + if ( hostUtil_is_valid_ip_addr ( node_ptr->bm_ip ) == false ) + { + /** + * New working provisioning is learned by from the + * dnsmasq.bmc_hosts file changes through inotify watch so + * it is entirely possible that the retries in this fsm + * eventually succeed. + **/ + wlog ("%s bm_ip (%s) is invalid (%d) \n", + node_ptr->hostname.c_str(), + node_ptr->bm_ip.c_str(), + rc ); + } + + node_ptr->power_action_retries = MTC_POWER_ACTION_RETRY_COUNT ; + powerStageChange ( node_ptr , MTC_POWERON__POWER_STATUS ); + //the fall through to MTC_POWERON__REQ_SEND is intentional + } + case MTC_POWERON__POWER_STATUS: + { + if ( node_ptr->bm_accessible == false ) + { + wlog ("%s Power On request rejected ; BMC not accessible ; retry in %d seconds\n", + node_ptr->hostname.c_str(), + MTC_POWER_ACTION_RETRY_DELAY); + + node_ptr->power_action_retries-- ; + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + powerStageChange ( node_ptr , MTC_POWERON__QUEUE ); + break ; + } + + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_STATUS ) ; + if ( rc ) + { + node_ptr->power_action_retries-- ; + powerStageChange ( node_ptr , MTC_POWERON__QUEUE ); + } + else + { + powerStageChange ( node_ptr , MTC_POWERON__POWER_STATUS_WAIT ); + } + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + break ; + } + case MTC_POWERON__POWER_STATUS_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT ); + } + else if ( rc == PASS ) + { + if ( node_ptr->ipmitool_thread_info.data.find (IPMITOOL_POWER_ON_STATUS) != std::string::npos ) + { + ilog ("%s power is already on ; no action required\n", node_ptr->hostname.c_str()); + node_ptr->power_on = true ; + mtcInvApi_update_task ( node_ptr, "Power Already On" ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); + powerStageChange ( node_ptr , MTC_POWERON__DONE ); + } + else + { + ilog ("%s power is off ; powering on ...\n", node_ptr->hostname.c_str() ); + powerStageChange ( node_ptr , MTC_POWERON__REQ_SEND ); + } + } + else + { + powerStageChange ( node_ptr , MTC_POWERON__POWER_STATUS ); + } + } + break ; + } + case MTC_POWERON__REQ_SEND: + { + node_ptr->power_action_retries--; + + /* Ensure that mtce is updated with the latest board + * management ip address for this host */ + if ( node_ptr->bm_provisioned == false ) + { + elog ("%s BMC not provisioned or accessible (%d:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->bm_provisioned, + node_ptr->bm_accessible ); + + powerStageChange ( node_ptr , MTC_POWERON__FAIL ); + break ; + } + + if ( node_ptr->bm_accessible == false ) + { + wlog ("%s Power-On will fail ; not accessible to BMC ; retry in %d seconds \n", + node_ptr->hostname.c_str(), MTC_POWER_ACTION_RETRY_DELAY); + + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + powerStageChange ( node_ptr , MTC_POWERON__QUEUE ); + break ; + } + else + { + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_ON ); + if ( rc ) + { + wlog ("%s Power-On request failed (%d)\n", + node_ptr->hostname.c_str(), rc ); + + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + powerStageChange ( node_ptr , MTC_POWERON__QUEUE ); + } + else + { + blog ("%s Power-On requested\n", node_ptr->hostname.c_str()); + + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + + powerStageChange ( node_ptr , MTC_POWERON__RESP_WAIT ); + } + } + break ; + } + case MTC_POWERON__RESP_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_RETRY_WAIT ); + break ; + } + + if ( rc ) + { + elog ("%s Power-On command failed\n", node_ptr->hostname.c_str()); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + powerStageChange ( node_ptr , MTC_POWERON__QUEUE ); + } + else + { + ilog ("%s is Powering On\n", node_ptr->hostname.c_str() ); + mtcInvApi_update_task ( node_ptr, "Powering On" ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); + powerStageChange ( node_ptr , MTC_POWERON__DONE ); + node_ptr->power_on = true ; + } + } + break ; + } + case MTC_POWERON__QUEUE: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + node_ptr->mtcTimer.ring = false ; + if ( node_ptr->power_action_retries > 0 ) + { + char buffer[64] ; + int attempts = MTC_POWER_ACTION_RETRY_COUNT - node_ptr->power_action_retries ; + snprintf ( buffer, 64, MTC_TASK_POWERON_QUEUE, attempts, MTC_POWER_ACTION_RETRY_COUNT); + mtcInvApi_update_task ( node_ptr, buffer); + + /* check the thread error status if thetre is one */ + if ( node_ptr->ipmitool_thread_info.status ) + { + wlog ("%s ... %s (rc:%d)\n", node_ptr->hostname.c_str(), + node_ptr->ipmitool_thread_info.status_string.c_str(), + node_ptr->ipmitool_thread_info.status ); + } + + powerStageChange ( node_ptr , MTC_POWERON__POWER_STATUS ); + } + else + { + powerStageChange ( node_ptr , MTC_POWERON__FAIL ); + } + } + break ; + } + case MTC_POWERON__DONE: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + plog ("%s Power-On Completed\n", node_ptr->hostname.c_str()); + + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__OFFLINE ); + + // send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + + powerStageChange ( node_ptr , MTC_POWER__DONE ); + } + break ; + } + + case MTC_POWER__DONE: + default: + { + mtcTimer_reset ( node_ptr->mtcTimer ); + + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); + + recovery_ctrl_init ( node_ptr->hwmon_reset ); + recovery_ctrl_init ( node_ptr->hwmon_powercycle ); + + mtcInvApi_force_task ( node_ptr, "" ); + break ; + } + } + return (PASS); +} + + +/* Power Cycle Handler + * ------------------- */ +int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + if ( node_ptr->bm_accessible == false ) + { + wlog ("%s 'powercycle' abort ; not accessible to BMC\n", node_ptr->hostname.c_str() ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL ); + } + + /* Manage max retries */ + if ( node_ptr->hwmon_powercycle.retries >= MAX_POWERCYCLE_STAGE_RETRIES ) + { + wlog ("%s 'powercycle' abort ; max retries reached\n", node_ptr->hostname.c_str() ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL ); + } + + /* Manage max retries */ + if ( node_ptr->hwmon_powercycle.queries >= MAX_POWERCYCLE_QUERY_RETRIES ) + { + wlog ("%s power state query retries exceeded ; failing current iteration\n", node_ptr->hostname.c_str()); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL ); + } + + switch ( node_ptr->powercycleStage ) + { + case MTC_POWERCYCLE__FAIL: + { + mtcTimer_reset ( node_ptr->hwmon_powercycle.control_timer ); + + wlog ("%s entering 'powercycle' failed stage ATTEMPT: %d\n", + node_ptr->hostname.c_str() , + node_ptr->hwmon_powercycle.attempts ); + + /* Note: hwmon will continue to send powercycle requests to restart once it is accessible */ + + /* TODO: RELEASE NOTE: Node may be left in the disabled state + * - need to track power state and raise logs or alarms if host is stuck in power off state. + * - The ipmitool update does add tracking of the power state but does not introduce the alarm */ + + // send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + + /* Let the next event perform anothe rpower-cycle retry */ + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__DONE ); + + mtcInvApi_update_task ( node_ptr, MTC_TASK_POWERCYCLE_FAIL , node_ptr->hwmon_powercycle.attempts ); + + hwmon_recovery_monitor ( node_ptr, MTC_EVENT_HWMON_POWERCYCLE ); + + break ; + } + case MTC_POWERCYCLE__START: + { + switch ( node_ptr->subStage ) + { + case MTC_SUBSTAGE__START: + { + if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) + { + ilog ("%s failing host for powercycle\n", node_ptr->hostname.c_str() ); + alarm_enabled_failure ( node_ptr ); + + /* Set node as unlocked-disabled-failed */ + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__DISABLED, + MTC_AVAIL_STATUS__FAILED ); + } + ilog ("%s is %s-%s-%s\n", node_ptr->hostname.c_str(), + get_adminState_str (node_ptr->adminState).c_str(), + get_operState_str (node_ptr->operState).c_str(), + get_availStatus_str(node_ptr->availStatus).c_str()); + + node_ptr->hwmon_powercycle.state = RECOVERY_STATE__ACTION ; + + node_ptr->hwmon_powercycle.attempts++ ; + + mtcTimer_reset ( node_ptr->hwmon_powercycle.control_timer ); + + /*********************************************************************************** + * + * Perminent Power-Down Case + * ------------------------- + * If we exceed the maximum power cycle attempt retries then we + * give up and power the unit down and leave it that way. + * + ***********************************************************************************/ + if ( node_ptr->hwmon_powercycle.attempts > MAX_POWERCYCLE_ATTEMPT_RETRIES ) + { + ilog ("%s -------------------------------------------------------------\n", node_ptr->hostname.c_str()); + wlog ("%s critical event is persistent ; too many failed attempts (%d)\n", + node_ptr->hostname.c_str(), node_ptr->hwmon_powercycle.attempts ); + ilog ("%s -------------------------------------------------------------\n", node_ptr->hostname.c_str()); + + /* terminate any in progress work, likely auto recovery if unlocked, for this host */ + mtcTimer_reset ( node_ptr->mtcCmd_timer ); + mtcCmd_workQ_purge ( node_ptr ); + mtcCmd_doneQ_purge ( node_ptr ); + + // node_ptr->powercycle_completed = true ; + node_ptr->hwmon_powercycle.retries = 0 ; + node_ptr->hwmon_powercycle.queries = 0 ; + mtcInvApi_update_task ( node_ptr, MTC_TASK_POWERCYCLE_DOWN, node_ptr->hwmon_powercycle.attempts ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWEROFF ); + } + else + { + wlog ("%s starting 'powercycle' recovery ATTEMPT: %d\n", + node_ptr->hostname.c_str(), + node_ptr->hwmon_powercycle.attempts ); + + // send_hwmon_command ( node_ptr->hostname, MTC_CMD_STOP_HOST); + mtcInvApi_update_task ( node_ptr, MTC_TASK_POWERCYCLE_HOST, node_ptr->hwmon_powercycle.attempts ); + + node_ptr->hwmon_powercycle.retries = 0 ; /* remove for back to back power cycles */ + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, 1 ); + subStageChange ( node_ptr, MTC_SUBSTAGE__SEND ); + } + break ; + } + + /* Query current power state */ + case MTC_SUBSTAGE__SEND: + { + if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) + { + int delay = MTC_IPMITOOL_REQUEST_DELAY ; + ilog ("%s querying current power state\n", node_ptr->hostname.c_str()); + + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_STATUS ); + if ( rc ) + { + node_ptr->hwmon_powercycle.retries++ ; + wlog ("%s failed to send 'power state query' ; retrying %d of %d\n", + node_ptr->hostname.c_str(), + node_ptr->hwmon_powercycle.retries, + MAX_POWERCYCLE_STAGE_RETRIES ); + + node_ptr->hwmon_powercycle.queries++ ; + + /* Retry the send */ + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + } + else + { + node_ptr->hwmon_powercycle.queries = 0 ; + subStageChange ( node_ptr, MTC_SUBSTAGE__RECV ); + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, delay ); + } + } + break ; + } + + /* Interpret current power state query */ + case MTC_SUBSTAGE__RECV: + { + if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) + { + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + break ; + } + + if ( rc ) + { + node_ptr->hwmon_powercycle.retries++ ; + elog ("%s 'power query' command failed ; retrying %d or %d\n", + node_ptr->hostname.c_str(), + node_ptr->hwmon_powercycle.retries, + MAX_POWERCYCLE_STAGE_RETRIES ); + + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + subStageChange ( node_ptr, MTC_SUBSTAGE__SEND ); + } + else + { + bool on = false ; + + ilog ("%s Power Status: %s\n", + node_ptr->hostname.c_str(), + node_ptr->ipmitool_thread_info.data.c_str()); + + if ( node_ptr->ipmitool_thread_info.data.find ( IPMITOOL_POWER_ON_STATUS ) != std::string::npos ) + { + on = true ; + } + if ( rc == PASS ) + { + /* maintain current power state */ + node_ptr->power_on = on ; + + if ( on == true ) + { + ilog ("%s invoking 'powerdown' phase\n", node_ptr->hostname.c_str()); + + subStageChange ( node_ptr, MTC_SUBSTAGE__DONE ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWEROFF ); + } + else + { + wlog ("%s is already powered-off ; starting powercycle with power-on\n", node_ptr->hostname.c_str() ); + subStageChange ( node_ptr, MTC_SUBSTAGE__DONE ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON ); + } + } + else + { + /* TODO: use FAIL handler */ + node_ptr->hwmon_powercycle.retries = MAX_POWERCYCLE_STAGE_RETRIES+1 ; + // powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL ); + elog ("%s failed to query power status ; aborting powercycle action\n", + node_ptr->hostname.c_str()); + } + } + } + break ; + } + default: + { + slog ("%s %s.%s stage\n", node_ptr->hostname.c_str(), + get_powercycleStages_str(node_ptr->powercycleStage).c_str(), + get_subStages_str(node_ptr->subStage).c_str()); + + subStageChange ( node_ptr, MTC_SUBSTAGE__DONE ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__START ); + break ; + } + } + break ; + } + + case MTC_POWERCYCLE__POWEROFF: + { + int delay = MTC_IPMITOOL_REQUEST_DELAY ; + + /* Stop heartbeat if we are powering off the host */ + send_hbs_command ( node_ptr->hostname, MTC_CMD_STOP_HOST ); + + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_OFF ); + if ( rc ) + { + elog ("%s failed to send power-off command to BMC (%d)\n", + node_ptr->hostname.c_str(), + rc ); + + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL ); + } + else + { + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, delay ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWEROFF_CMND_WAIT ); + } + break ; + } + case MTC_POWERCYCLE__POWEROFF_CMND_WAIT: + { + if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) + { + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + break ; + } + + if ( rc ) + { + elog ("%s power-off command failed (rc:%d:%d)\n", + node_ptr->hostname.c_str(), + rc , node_ptr->ipmitool_thread_info.status); + + if ( node_ptr->ipmitool_thread_info.status ) + { + wlog ("%s ... %s\n", + node_ptr->hostname.c_str(), + node_ptr->ipmitool_thread_info.status_string.c_str()); + } + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL ); + } + else + { + ilog ("%s waiting up to %d seconds for 'offline'\n", node_ptr->hostname.c_str(), MTC_POWEROFF_TO_OFFLINE_TIMEOUT ); + + /* Set the power-off timeout */ + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, + mtcTimer_handler, + MTC_POWEROFF_TO_OFFLINE_TIMEOUT ); + + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWEROFF_WAIT ); + } + } + break ; + } + case MTC_POWERCYCLE__POWEROFF_WAIT: + { + if (( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE )) + { + /* since the host is powered down lets reflect that in the database */ + node_ptr->uptime = 0 ; + mtcInvApi_update_uptime ( node_ptr, node_ptr->uptime ); + + clear_service_readies ( node_ptr ); + + mtcTimer_reset ( node_ptr->hwmon_powercycle.control_timer ); + + if ( node_ptr->hwmon_powercycle.attempts > MAX_POWERCYCLE_ATTEMPT_RETRIES ) + { + wlog ("%s -------------------------------------------------------------------\n", + node_ptr->hostname.c_str() ); + wlog ("%s ... Leaving server POWERED DOWN to protect hardware from damage ...\n", + node_ptr->hostname.c_str() ); + wlog ("%s -------------------------------------------------------------------\n", + node_ptr->hostname.c_str() ); + + /* Cancelling the recovery timer prevents auto-recovery. + * Recovery must be through manual actions. */ + mtcTimer_reset ( node_ptr->hwmon_powercycle.recovery_timer ); + node_ptr->hwmon_powercycle.state = RECOVERY_STATE__BLOCKED ; + + /* Block Auto-Recovery Path + * ------------------------ + * If we have reached the max retries and are unlocked then + * leave the powercycle action active so that the enable + * and graceful recovery handlers don't recover this host. + * ------------------------- + * Manual action is required to recover a host that has + * exceeded the maximum powercycle retries */ + if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) + { + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + } + + /* While the node_ptr->hwmon_powercycle.control_timer is + * inactive the MTC_POWERCYCLE__DONE stagwe is a NOOP + * thereby keeping us doing nothing till the next manual + * action */ + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__DONE ); + } + else if ( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ) + { + ilog ("%s already powered-off, skipping cool-off\n", node_ptr->hostname.c_str()); + mtcTimer_reset ( node_ptr->hwmon_powercycle.control_timer ); + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, 10 ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON ); + } + else + { + ilog ("%s waiting %d seconds before power-on ; cool down time\n", + node_ptr->hostname.c_str(), + MTC_POWERCYCLE_COOLDOWN_DELAY ); + + node_ptr->hwmon_powercycle.holdoff = MTC_POWERCYCLE_COOLDOWN_DELAY/60 ; + + /* Set the power-off timeout */ + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, COMMAND_DELAY ); + + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__COOLOFF ); + node_ptr->hwmon_powercycle.state = RECOVERY_STATE__COOLOFF ; + } + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__POWERED_OFF ); + } + + /* handle timeout case */ + else if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) + { + /* TODO: manage the retry count */ + elog ("%s timeout waiting for 'offline' state ; retrying ...\n", node_ptr->hostname.c_str() ); + + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWEROFF ); + } + break ; + } + + case MTC_POWERCYCLE__COOLOFF: + { + if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) + { + mtcInvApi_update_task ( node_ptr, + MTC_TASK_POWERCYCLE_COOL, + node_ptr->hwmon_powercycle.attempts, + node_ptr->hwmon_powercycle.holdoff); + ilog ("%s Power-Cycle cool-off (%d minutes remaining)\n", + node_ptr->hostname.c_str(), + node_ptr->hwmon_powercycle.holdoff ); + + if ( node_ptr->hwmon_powercycle.holdoff > 1 ) + { + node_ptr->hwmon_powercycle.holdoff-- ; + } + else + { + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON ); + node_ptr->hwmon_powercycle.state = RECOVERY_STATE__ACTION ; + } + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_MINS_1 ); + } + break ; + } + + case MTC_POWERCYCLE__POWERON: + { + if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) + { + int delay = MTC_IPMITOOL_REQUEST_DELAY ; + clog ("%s %s stage\n", node_ptr->hostname.c_str(), + get_powercycleStages_str(node_ptr->powercycleStage).c_str()); + + if ( node_ptr->bm_accessible == false ) + { + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, + mtcTimer_handler, + MTC_POWERCYCLE_COOLDOWN_DELAY ); + + wlog ("%s not accessible ; waiting another %d seconds before power-on\n", + node_ptr->hostname.c_str(), + MTC_POWERCYCLE_COOLDOWN_DELAY ); + } + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_ON ); + if ( rc ) + { + elog ("%s failed to send power-on command to BMC (%d)\n", + node_ptr->hostname.c_str(), + rc ); + + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL ); + } + else + { + ilog ("%s Power-On requested\n", node_ptr->hostname.c_str() ); + mtcInvApi_update_task ( node_ptr, MTC_TASK_POWERCYCLE_ON, node_ptr->hwmon_powercycle.attempts ); + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, delay ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON_CMND_WAIT ); + } + } + break ; + } + case MTC_POWERCYCLE__POWERON_CMND_WAIT: + { + if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) + { + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + break ; + } + + if ( rc ) + { + wlog ("%s Power-On request failed (rc:%d)\n", node_ptr->hostname.c_str(), rc ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL ); + } + else + { + ilog ("%s Power-On response: %s\n", + node_ptr->hostname.c_str(), + node_ptr->ipmitool_thread_info.data.c_str() ); + + /* Give the power on request time to execute */ + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_CMD_RSP_TIMEOUT ); + + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__OFFLINE ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON_VERIFY ); + } + } + break ; + } + case MTC_POWERCYCLE__POWERON_VERIFY: + { + if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) + { + rc = ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_STATUS ); + if ( rc ) + { + wlog ("%s Power-On command failed (rc:%d)\n", node_ptr->hostname.c_str(), rc ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL ); + } + else + { + wlog ("%s power status query requested\n", node_ptr->hostname.c_str() ); + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON_VERIFY_WAIT ); + } + } + break ; + } + case MTC_POWERCYCLE__POWERON_VERIFY_WAIT: + { + if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) + { + bool on = false ; + + rc = ipmi_command_recv ( node_ptr ); + if ( rc == RETRY ) + { + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + break ; + } + if ( rc == PASS ) + { + if ( node_ptr->ipmitool_thread_info.data.find (IPMITOOL_POWER_ON_STATUS) != std::string::npos ) + { + on = true ; + } + } + + ilog ("%s power state query result: %s\n", + node_ptr->hostname.c_str(), + node_ptr->ipmitool_thread_info.data.c_str() ); + + if (( rc == PASS ) && ( on == true )) + { + node_ptr->power_on = true ; + ilog ("%s is Powered On - waiting for 'online' (%d sec timeout)\n", + node_ptr->hostname.c_str(), + MTC_POWERON_TO_ONLINE_TIMEOUT); + + mtcInvApi_update_task ( node_ptr, MTC_TASK_POWERCYCLE_BOOT, node_ptr->hwmon_powercycle.attempts ); + + /* Set the online timeout */ + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_POWERON_TO_ONLINE_TIMEOUT ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON_WAIT ); + } + else + { + wlog ("%s Power-On failed or did not occur ; retrying (rc:%d:%d)\n", node_ptr->hostname.c_str(), rc, on ); + node_ptr->power_on = false ; + mtcInvApi_update_task ( node_ptr, MTC_TASK_POWERCYCLE_RETRY, node_ptr->hwmon_powercycle.attempts ); + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_BM_POWERON_TIMEOUT ); + node_ptr->hwmon_powercycle.queries++ ; + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON ); + break ; + } + } + break ; + } + case MTC_POWERCYCLE__POWERON_WAIT: + { + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ) + { + ilog ("%s online (after powercycle)\n", node_ptr->hostname.c_str()); + + node_ptr->hwmon_powercycle.holdoff = MTC_POWERCYCLE_BACK2BACK_DELAY/60 ; + + mtcTimer_reset ( node_ptr->hwmon_powercycle.control_timer ); + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, 1 ); + node_ptr->hwmon_powercycle.state = RECOVERY_STATE__HOLDOFF ; + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__HOLDOFF ); + } + else if ( node_ptr->hwmon_powercycle.control_timer.ring == true ) + { + elog ("%s timeout waiting for 'online' state\n", node_ptr->hostname.c_str() ); + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL ); + } + break ; + } + case MTC_POWERCYCLE__HOLDOFF: + { + if ( node_ptr->hwmon_powercycle.control_timer.ring == true ) + { + mtcInvApi_update_task ( node_ptr, + MTC_TASK_POWERCYCLE_HOLD, + node_ptr->hwmon_powercycle.attempts, + node_ptr->hwmon_powercycle.holdoff); + ilog ("%s Power-Cycle hold-off (%d minutes remaining) (uptime:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->hwmon_powercycle.holdoff, + node_ptr->uptime ); + + if ( node_ptr->hwmon_powercycle.holdoff > 1 ) + { + node_ptr->hwmon_powercycle.holdoff--; + } + else + { + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__DONE ); + } + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_MINS_1 ); + } + break ; + } + case MTC_POWERCYCLE__DONE: + { + if ( node_ptr->hwmon_powercycle.control_timer.ring == true ) + { + mtcInvApi_update_task ( node_ptr, "" ); + + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); + node_ptr->addStage = MTC_ADD__START ; + + hwmon_recovery_monitor ( node_ptr, MTC_EVENT_HWMON_POWERCYCLE ); + + enableStageChange ( node_ptr, MTC_ENABLE__START ); + recoveryStageChange ( node_ptr, MTC_RECOVERY__START); /* reset the fsm */ + disableStageChange ( node_ptr, MTC_DISABLE__START); /* reset the fsm */ + + plog ("%s Power-Cycle Completed (uptime:%d)\n", node_ptr->hostname.c_str(), node_ptr->uptime ); + } + break ; + } + + default: + { + powercycleStageChange ( node_ptr, MTC_POWERCYCLE__DONE ); + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); + break ; + } + } + return (rc); +} + +/* Delete Handler + * ----------------- */ +int nodeLinkClass::delete_handler ( struct nodeLinkClass::node * node_ptr ) +{ + switch ( node_ptr->delStage ) + { + case MTC_DEL__START: + { + ilog ("%s Delete Operation Started (%s)\n", node_ptr->hostname.c_str(), node_ptr->uuid.c_str()); + node_ptr->retries = 0 ; + send_mtc_cmd ( node_ptr->hostname, MTC_CMD_WIPEDISK, MGMNT_INTERFACE ) ; + + if ( node_ptr->bm_provisioned == true ) + { + set_bm_prov ( node_ptr, false); + } + + if ( node_ptr->ipmitool_thread_ctrl.stage != THREAD_STAGE__IDLE ) + { + int delay = THREAD_POST_KILL_WAIT ; + thread_kill ( node_ptr->ipmitool_thread_ctrl , node_ptr->ipmitool_thread_info ) ; + + ilog ("%s thread active ; sending kill ; waiting %d seconds\n", + node_ptr->hostname.c_str(), delay ); + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, delay ); + node_ptr->delStage = MTC_DEL__WAIT ; + } + else + { + node_ptr->delStage = MTC_DEL__DONE ; + } + + + /* Send delete commands to monitor services */ + send_hbs_command ( node_ptr->hostname, MTC_CMD_DEL_HOST ); + send_hwmon_command ( node_ptr->hostname, MTC_CMD_DEL_HOST ); + send_guest_command ( node_ptr->hostname, MTC_CMD_DEL_HOST ); + + /* Clear all the alarms for this host and generate a costomer delete log */ + alarmUtil_clear_all ( node_ptr->hostname ); + mtcAlarm_log ( node_ptr->hostname, MTC_LOG_ID__COMMAND_DELETE ); + + break ; + } + case MTC_DEL__WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + if ( node_ptr->ipmitool_thread_ctrl.stage != THREAD_STAGE__IDLE ) + { + if ( node_ptr->retries++ < 3 ) + { + wlog ("%s still waiting on active thread ; sending another kill signal (try %d or %d)\n", + node_ptr->hostname.c_str(), node_ptr->retries, 3 ); + + thread_kill ( node_ptr->ipmitool_thread_ctrl, node_ptr->ipmitool_thread_info ) ; + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, THREAD_POST_KILL_WAIT ); + break ; + } + else + { + elog ("%s thread refuses to stop ; giving up ...\n", + node_ptr->hostname.c_str()); + } + } + node_ptr->delStage = MTC_DEL__DONE ; + } + break ; + } + default: + case MTC_DEL__DONE: + { + dlog ("%s delete almost done !!\n", node_ptr->hostname.c_str()); + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); + del_host ( node_ptr->hostname ); + this->host_deleted = true ; + break ; + } + } + return (PASS); +} + + +/* Add Handler + * ----------------- */ +int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + switch ( node_ptr->addStage ) + { + case MTC_ADD__START: + case MTC_ADD__START_DELAY: + { + bool timer_set = false ; + plog ("%s Host Add\n", node_ptr->hostname.c_str()); + + /* Request a mtcAlive message ; gives us uptime ; don't trust what is in the database */ + node_ptr->uptime = 0 ; + send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, MGMNT_INTERFACE ); + + ilog ("%s %s %s-%s-%s (%s)\n", + node_ptr->hostname.c_str(), + node_ptr->ip.c_str(), + adminState_enum_to_str (node_ptr->adminState).c_str(), + operState_enum_to_str (node_ptr->operState).c_str(), + availStatus_enum_to_str(node_ptr->availStatus).c_str(), + + node_ptr->uuid.length() ? node_ptr->uuid.c_str() : "" ); + + if ( CPE_SYSTEM ) + { + if ( daemon_is_file_present ( CONFIG_COMPLETE_COMPUTE ) == false ) + { + if ( node_ptr->operState_subf != MTC_OPER_STATE__DISABLED ) + { + subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED, MTC_AVAIL_STATUS__OFFLINE ); + } + } + ilog ("%s-%s %s-%s-%s\n", + node_ptr->hostname.c_str(), + node_ptr->subfunction_str.c_str(), + adminState_enum_to_str (node_ptr->adminState).c_str(), + operState_enum_to_str (node_ptr->operState_subf).c_str(), + availStatus_enum_to_str(node_ptr->availStatus_subf).c_str()); + } + + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )) + { + wlog ("%s Add with availability status 'unlocked-enabled-%s' ; overriding to 'available'\n", + node_ptr->hostname.c_str(), + availStatus_enum_to_str(node_ptr->availStatus).c_str()); + mtcInvApi_update_state ( node_ptr, "availability", "available" ); + } + + /* handle other cases */ + EFmAlarmSeverityT sev = mtcAlarm_state ( node_ptr->hostname, + MTC_ALARM_ID__ENABLE); + + if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) + { + node_ptr->alarms[MTC_ALARM_ID__LOCK] = FM_ALARM_SEVERITY_WARNING ; + + /* If the node is locked then the Enable alarm + * should not be present */ + if ( sev != FM_ALARM_SEVERITY_CLEAR ) + { + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__ENABLE ); + sev = FM_ALARM_SEVERITY_CLEAR ; + } + } + + /* Manage enable alarm over process restart. + * + * - clear the alarm in the active controller case + * - maintain the alarm, set degrade state in MAJOR and CRIT cases + * - clear alarm for all other severities. + */ + if ( THIS_HOST ) + { + if ( sev != FM_ALARM_SEVERITY_CLEAR ) + { + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__ENABLE ); + } + } + else + { + if (( sev == FM_ALARM_SEVERITY_CRITICAL ) || + ( sev == FM_ALARM_SEVERITY_MAJOR )) + { + node_ptr->alarms[MTC_ALARM_ID__ENABLE] = sev ; + node_ptr->degrade_mask |= DEGRADE_MASK_ENABLE ; + } + else if ( sev != FM_ALARM_SEVERITY_CLEAR ) + { + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__ENABLE ); + } + } + + if ( is_controller(node_ptr) ) + { + mtc_cmd_enum state = CONTROLLER_DISABLED ; + + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + (( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))) + { + state = CONTROLLER_UNLOCKED ; + } + else if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) + { + state = CONTROLLER_LOCKED ; + } + + if ( THIS_HOST ) + { + nodeLinkClass::set_active_controller_hostname(node_ptr->hostname); + if ( !node_ptr->task.compare(MTC_TASK_SWACT_INPROGRESS) ) + { + ilog ("%s %s\n",node_ptr->hostname.c_str(), MTC_TASK_SWACT_NO_COMPLETE); + mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_NO_COMPLETE); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, 20 ); + timer_set = true ; + } + } + else + { + nodeLinkClass::set_inactive_controller_hostname(node_ptr->hostname); + + if ( !node_ptr->task.compare(MTC_TASK_SWACT_INPROGRESS) ) + { + ilog ("%s %s\n",node_ptr->hostname.c_str(), MTC_TASK_SWACT_COMPLETE ); + + /* Work Around for issue: */ + mtcInvApi_update_uptime ( node_ptr, node_ptr->uptime ); + + mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_COMPLETE ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, 10 ); + timer_set = true ; + } + } + + /************************************************************* + * Don't send a disable to SM if we are in simplex and locked. + * This will cause SM to shut down all services. + * + * Including a hostname check just in case simplex mode + * is ever or still true with a second controller provisioned + * but not unlocked. Defensive code. + * + * TODO: This should exist in AIO. Without it services will + * not be running if you lock controller and then + * reboot while this controller is disabled. + */ + if (( THIS_HOST ) && + ( is_inactive_controller_main_insv() == false ) && + ( node_ptr->operState == MTC_OPER_STATE__DISABLED )) + { + ilog ("%s recovering from %s-disabled\n", + node_ptr->hostname.c_str(), + get_adminState_str (node_ptr->adminState).c_str()); + } + else + { + mtcSmgrApi_request ( node_ptr, state , SWACT_FAIL_THRESHOLD ); + } + } + + if ( daemon_get_cfg_ptr()->debug_level & 1 ) + nodeLinkClass::host_print (node_ptr); + + if ( timer_set == false ) + { + node_ptr->mtcTimer.ring = true ; + } + node_ptr->addStage = MTC_ADD__CLEAR_TASK ; + break ; + } + + case MTC_ADD__CLEAR_TASK: + { + if ( is_controller(node_ptr) ) + { + if ( node_ptr->mtcTimer.ring == true ) + { + if ( !node_ptr->task.empty () ) + { + mtcInvApi_force_task ( node_ptr, "" ); + } + } + else + { + break ; + } + } + else + { + /* do it immediately for all otyher server types */ + mtcInvApi_force_task ( node_ptr, "" ); + } + /* default retries counter to zero before START_SERVICES */ + node_ptr->retries = 0 ; + node_ptr->addStage = MTC_ADD__START_SERVICES ; + break ; + } + + case MTC_ADD__START_SERVICES: + { + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + (( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))) + { + ilog ("%s scheduling start host services\n", + node_ptr->hostname.c_str()); + + node_ptr->start_services_needed = true ; + node_ptr->start_services_retries = 0 ; + } + + node_ptr->addStage = MTC_ADD__MTC_SERVICES ; + break ; + } + case MTC_ADD__MTC_SERVICES: + { + if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) + { + /* Inform the VIM that this host is enabled */ + mtcVimApi_state_change ( node_ptr, VIM_HOST_ENABLED, 3 ); + } + else + { + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__FAILED ) + { + mtcVimApi_state_change ( node_ptr, VIM_HOST_FAILED, 3 ); + } + else + { + mtcVimApi_state_change ( node_ptr, VIM_HOST_DISABLED, 3 ); + } + } + + send_hbs_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); + + /* Add this host to other maintenance services */ + if (( ! SIMPLEX_CPE_SYSTEM ) && ( node_ptr->bm_provisioned )) + { + send_hwmon_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); + } + if ( ( CPE_SYSTEM ) || ( is_compute (node_ptr) == true )) + { + send_guest_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); + } + + /* Start a timer that failed enable if the work queue + * does not empty or if commands in the done queue have failed */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, work_queue_timeout ); + + node_ptr->addStage = MTC_ADD__WORKQUEUE_WAIT ; + break ; + } + case MTC_ADD__WORKQUEUE_WAIT: + { + rc = workQueue_done ( node_ptr ); + if ( rc == RETRY ) + { + /* wait longer */ + break ; + } + else if ( rc == FAIL_WORKQ_TIMEOUT ) + { + wlog ("%s Add failed ; workQueue empty timeout, purging ...\n", node_ptr->hostname.c_str()); + workQueue_purge ( node_ptr ); + } + else if ( rc != PASS ) + { + wlog ("%s Add failed ; doneQueue contains failed commands\n", node_ptr->hostname.c_str()); + } + + /* Stop the work queue wait timer */ + mtcTimer_reset ( node_ptr->mtcTimer ); + + /* Only start it on this add operation if host is + * already unlocked and enabled and not the active controller */ + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) + { + if (( NOT_THIS_HOST ) && ( LARGE_SYSTEM )) + { + send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + } + } + + /* Only run hardware monitor if the bm ip is provisioned */ + if (( hostUtil_is_valid_bm_type ( node_ptr->bm_type )) && + ( hostUtil_is_valid_ip_addr ( node_ptr->bm_ip ))) + { + set_bm_prov ( node_ptr, true ) ; + send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + } + + /* handle coming out of the ADD in a degraded state */ + if (( node_ptr->degrade_mask != 0 ) && + (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ))) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); + } + + node_ptr->mtcAlive_gate = false ; + node_ptr->addStage = MTC_ADD__DONE ; + break; + } + case MTC_ADD__DONE: + default: + { + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + + /* Send sysinv the wrsroot password hash + * and aging data as an install command */ + if ( SIMPLEX && THIS_HOST && + ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED )) + { + node_ptr->configStage = MTC_CONFIG__START ; + node_ptr->configAction = MTC_CONFIG_ACTION__INSTALL_PASSWD ; + } + + if (( ! SIMPLEX_CPE_SYSTEM ) && + ( node_ptr->bm_provisioned == true )) + { + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__BM ); + node_ptr->alarms[MTC_ALARM_ID__BM] = FM_ALARM_SEVERITY_CLEAR ; + } + + /* Special Add handling for the AIO system */ + if ( CPE_SYSTEM ) + { + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) + { + /* If AIO and unlocked-enabled and not this host then + * start the heartbeat service + * (inactive controller case) */ + if ( NOT_THIS_HOST ) + { + send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + } + + /* In AIO if in DOR mode and the host is unlocked enabled + * we need to run the subfunction handler and request + * to start host services. */ + if ( this->dor_mode_active ) + { + node_ptr->start_services_needed_subf = true ; + adminActionChange ( node_ptr , MTC_ADMIN_ACTION__ENABLE_SUBF ); + } + } + } + + node_ptr->addStage = MTC_ADD__START; + plog ("%s Host Add Completed (uptime:%d)\n", node_ptr->hostname.c_str(), node_ptr->uptime ); + node_ptr->add_completed = true ; + break ; + } + } + return (rc); +} + +int nodeLinkClass::bm_handler ( struct nodeLinkClass::node * node_ptr ) +{ + /* Call the bmc ssh connection monitor if this node's bm is provisioned */ + if ( node_ptr->bm_provisioned == true ) + { + if (( node_ptr->bm_accessible == true ) && ( node_ptr->bm_ping_info.ok == false )) + { + wlog ("%s bmc access lost\n", node_ptr->hostname.c_str()); + + /* remove the mc info file in case there is a firmware + * upgrade in progress. hwmond reads it and get + * the bmc fw version from it */ + string mc_info_filename = IPMITOOL_OUTPUT_DIR ; + mc_info_filename.append(node_ptr->hostname); + mc_info_filename.append(IPMITOOL_MC_INFO_FILE_SUFFIX); + daemon_remove_file ( mc_info_filename.data() ); + + thread_kill ( node_ptr->ipmitool_thread_ctrl, node_ptr->ipmitool_thread_info ); + + bmc_access_data_init ( node_ptr ); + + ipmiUtil_mc_info_init ( node_ptr->mc_info ); + + node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + + /* start a timer that will raise the BM Access alarm + * if we are not accessible by the time it expires */ + plog ("%s bmc access timer started (%d secs)\n", node_ptr->hostname.c_str(), MTC_MINS_2); + mtcTimer_reset ( node_ptr->bmc_access_timer ); + mtcTimer_start ( node_ptr->bmc_access_timer, mtcTimer_handler, MTC_MINS_2 ); + } + + /* This block queries and logs BMC Info and last Reset Cause */ + if (( node_ptr->bm_accessible == false ) && + ( node_ptr->bm_ping_info.ok == true ) && + (( node_ptr->mc_info_query_done == false ) || + ( node_ptr->reset_cause_query_done == false ) || + ( node_ptr->power_status_query_done == false )) && + ( mtcTimer_expired (node_ptr->bm_timer ) == true )) + { + int rc = PASS ; + if (( node_ptr->mc_info_query_active == false ) && + ( node_ptr->mc_info_query_done == false )) + { + if ( ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__MC_INFO ) != PASS ) + { + elog ("%s %s send failed\n", + node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command)); + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + } + else + { + dlog ("%s %s\n", node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command)); + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + node_ptr->mc_info_query_active = true ; + } + } + else if (( node_ptr->mc_info_query_active == true ) && + ( node_ptr->mc_info_query_done == false)) + { + if ( ( rc = ipmi_command_recv ( node_ptr ) ) == RETRY ) + { + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + } + else if ( rc != PASS ) + { + /* this error is reported by the ipmi receive driver ... + * blog ("%s %s command failed\n", node_ptr->hostname.c_str(), + * getIpmiCmd_str(node_ptr->ipmitool_thread_info.command)); + */ + node_ptr->mc_info_query_active = false ; + node_ptr->ipmitool_thread_ctrl.done = true ; + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + } + else + { + node_ptr->mc_info_query_active = false ; + node_ptr->mc_info_query_done = true ; + node_ptr->ipmitool_thread_ctrl.done = true ; + ipmiUtil_mc_info_load ( node_ptr->hostname, node_ptr->ipmitool_thread_info.data.data(), node_ptr->mc_info ); + } + } + else if (( node_ptr->mc_info_query_active == false ) && + ( node_ptr->mc_info_query_done == true )) + { + if (( node_ptr->reset_cause_query_active == false ) && + ( node_ptr->reset_cause_query_done == false )) + { + if ( ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__RESTART_CAUSE ) != PASS ) + { + elog ("%s %s send failed\n", node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command)); + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + } + else + { + dlog ("%s %s\n", node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command)); + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + node_ptr->reset_cause_query_active = true ; + } + } + else if (( node_ptr->reset_cause_query_active == true ) && + ( node_ptr->reset_cause_query_done == false )) + { + if ( ( rc = ipmi_command_recv ( node_ptr ) ) == RETRY ) + { + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + } + else if ( rc != PASS ) + { + elog ("%s %s command failed\n", node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command)); + node_ptr->reset_cause_query_active = false ; + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + node_ptr->ipmitool_thread_ctrl.done = true ; + } + else + { + node_ptr->reset_cause_query_active = false ; + node_ptr->reset_cause_query_done = true ; + node_ptr->ipmitool_thread_ctrl.done = true ; + ilog ("%s %s\n", node_ptr->hostname.c_str(), + node_ptr->ipmitool_thread_info.data.c_str()); + } + node_ptr->ipmitool_thread_ctrl.done = true ; + } + else if (( node_ptr->mc_info_query_done == true ) && + ( node_ptr->reset_cause_query_done == true ) && + ( node_ptr->power_status_query_done == false )) + { + if ( node_ptr->power_status_query_active == false ) + { + if ( ipmi_command_send ( node_ptr, IPMITOOL_THREAD_CMD__POWER_STATUS ) != PASS ) + { + elog ("%s %s send failed\n", node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command)); + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + } + else + { + dlog ("%s %s\n", node_ptr->hostname.c_str(), + getIpmiCmd_str(node_ptr->ipmitool_thread_info.command)); + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + node_ptr->power_status_query_active = true ; + } + } + else if ( node_ptr->power_status_query_done == false ) + { + if ( ( rc = ipmi_command_recv ( node_ptr ) ) == RETRY ) + { + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + } + else if ( rc ) + { + node_ptr->power_status_query_active = false ; + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_POWER_ACTION_RETRY_DELAY ); + node_ptr->ipmitool_thread_ctrl.done = true ; + } + else + { + node_ptr->power_status_query_active = false ; + node_ptr->power_status_query_done = true ; + node_ptr->ipmitool_thread_ctrl.done = true ; + node_ptr->ipmitool_thread_info.command = 0 ; + node_ptr->bm_accessible = true ; + node_ptr->bm_accessible = true ; + mtcTimer_reset ( node_ptr->bmc_access_timer ); + + ilog ("%s %s\n", node_ptr->hostname.c_str(), + node_ptr->ipmitool_thread_info.data.c_str()); + plog ("%s bmc is accessible\n", node_ptr->hostname.c_str()); + + if ( node_ptr->ipmitool_thread_info.data.find (IPMITOOL_POWER_OFF_STATUS) != std::string::npos ) + { + if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__POWERED_OFF ); + } + else + { + wlog ("%s is powered off while in the unlocked state\n", node_ptr->hostname.c_str()); + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__POWERED_OFF ); + } + } + } + node_ptr->ipmitool_thread_ctrl.done = true ; + } + } + } + } + if ( node_ptr->bm_ping_info.ok == false ) + { + /* Auto correct key ping information ; should ever occur but if it does ... */ + if (( node_ptr->bm_ping_info.hostname.empty()) || ( node_ptr->bm_ping_info.ip.empty())) + { + /* if the bm ip is not yet learned then this log will flood */ + //slog ("%s host ping info missing ; (%d:%d)\n", + // node_ptr->hostname.c_str(), + // node_ptr->bm_ping_info.hostname.empty(), + // node_ptr->bm_ping_info.ip.empty()); + node_ptr->bm_ping_info.hostname = node_ptr->hostname ; + node_ptr->bm_ping_info.ip = node_ptr->bm_ip ; + } + } + + /* don't run the ping monitor if the ip address is invalid */ + if ( hostUtil_is_valid_ip_addr ( node_ptr->bm_ping_info.ip ) == true ) + { + pingUtil_acc_monitor ( node_ptr->bm_ping_info ); + } + + /* Manage the Board Management Access Alarm */ + if (( node_ptr->bm_accessible == false ) && + ( mtcTimer_expired ( node_ptr->bmc_access_timer ) == true )) + { + node_ptr->bm_ping_info.ok = false ; + + node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + + /* start a timer that will raise the BM Access alarm + * if we are not accessible by the time it expires */ + plog ("%s bmc access timer started (%d secs)\n", node_ptr->hostname.c_str(), MTC_MINS_2); + mtcTimer_reset ( node_ptr->bmc_access_timer ); + mtcTimer_start ( node_ptr->bmc_access_timer, mtcTimer_handler, MTC_MINS_2 ); + + if ( node_ptr->alarms[MTC_ALARM_ID__BM] == FM_ALARM_SEVERITY_CLEAR ) + { + mtcAlarm_warning ( node_ptr->hostname, MTC_ALARM_ID__BM ); + node_ptr->alarms[MTC_ALARM_ID__BM] = FM_ALARM_SEVERITY_WARNING ; + } + } + + /* if BMs are accessible then see if we need to clear the Major BM Alarm. */ + else if (( node_ptr->alarms[MTC_ALARM_ID__BM] != FM_ALARM_SEVERITY_CLEAR ) && + ( node_ptr->mc_info_query_done == true ) && + ( node_ptr->reset_cause_query_done == true ) && + ( node_ptr->power_status_query_done == true )) + { + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__BM ); + node_ptr->alarms[MTC_ALARM_ID__BM] = FM_ALARM_SEVERITY_CLEAR ; + } + } + else + { + if ( node_ptr->alarms[MTC_ALARM_ID__BM] != FM_ALARM_SEVERITY_CLEAR ) + { + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__BM ); + node_ptr->alarms[MTC_ALARM_ID__BM] = FM_ALARM_SEVERITY_CLEAR ; + } + } + + return (PASS); +} + + +int nodeLinkClass::oos_test_handler ( struct nodeLinkClass::node * node_ptr ) +{ + switch (node_ptr->oosTestStage) + { + case MTC_OOS_TEST__LOAD_NEXT_TEST: + { + oosTestStageChange ( node_ptr, MTC_OOS_TEST__START_WAIT ); + break ; + } + case MTC_OOS_TEST__START_WAIT: + { + /* Monitor timer errors */ + mtcTimer_dump_data (); + + // blog ("%s Inservice Test Period %d secs\n", node_ptr->hostname.c_str(), oos_test_period); + mtcTimer_start ( node_ptr->oosTestTimer, mtcTimer_handler, oos_test_period ); + oosTestStageChange ( node_ptr, MTC_OOS_TEST__WAIT ); + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__CORRUPT_TOKEN, node_ptr->hostname )) + tokenUtil_fail_token (); + + else if ( daemon_want_fit ( FIT_CODE__STUCK_TASK, node_ptr->hostname )) + mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_INPROGRESS); + + else if ( daemon_want_fit ( FIT_CODE__STOP_HOST_SERVICES, node_ptr->hostname )) + { + bool start = false ; + this->launch_host_services_cmd ( node_ptr, start ); + } + else if ( daemon_want_fit ( FIT_CODE__START_HOST_SERVICES, node_ptr->hostname )) + { + if (( node_ptr->start_services_needed == false ) && + ( node_ptr->start_services_running_main == false )) + { + node_ptr->start_services_needed = true ; + node_ptr->start_services_retries = 0 ; + } + else + { + ilog ("%s start host services (FIT) rejected (%d:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->start_services_needed, + node_ptr->start_services_running_main); + } + } +#endif + + + /* Avoid forcing the states to the database when on the first & second pass. + * This is because it is likely we just read all the states and + * if coming out of a DOR or a SWACT we don't need to un-necessarily + * produce that extra sysinv traffic. + * Also, no point forcing the states while there is an admin action + * or enable or graceful recovery going on as well because state changes + * are being done in the FSM already */ + if (( node_ptr->oos_test_count > 1 ) && + ( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && + ( !node_ptr->handlerStage.raw ) && + ( !node_ptr->recoveryStage )) + { + /* Change the oper and avail states in the database */ + allStateChange ( node_ptr, node_ptr->adminState, + node_ptr->operState, + node_ptr->availStatus ); + } + +#ifdef WANT_CLEAR_ALARM_AUDIT + + /* TODO: Obsolete with new Alarm Strategy */ + /* Self Correct Stuck Failure Alarms */ + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + (( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))) + { + if ( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CLEAR ) + { + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__CONFIG ); + node_ptr->alarms[MTC_ALARM_ID__CONFIG] = FM_ALARM_SEVERITY_CLEAR ; + } + alarm_enabled_clear ( node_ptr , false); + } +#endif + /* Make sure the locked status on the host itself is set */ + if (( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__DISABLED ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ) && + ( !(node_ptr->mtce_flags & MTC_FLAG__I_AM_LOCKED) )) + { + ilog ("%s setting 'locked' status\n", node_ptr->hostname.c_str()); + + /* Tell the host that it is locked */ + send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE); + } + + if (( daemon_is_file_present ( MTC_CMD_FIT__GOENABLE_AUDIT )) && + ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) + { + /* Request Out-Of--Service test execution */ + send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MAIN_GOENABLED, MGMNT_INTERFACE ); + if ( node_ptr->operState_subf == MTC_OPER_STATE__ENABLED) + { + send_mtc_cmd ( node_ptr->hostname, MTC_REQ_SUBF_GOENABLED, MGMNT_INTERFACE ); + } + } + + break ; + } + case MTC_OOS_TEST__WAIT: + { + if ( node_ptr->oosTestTimer.ring == true ) + { + oosTestStageChange ( node_ptr, MTC_OOS_TEST__DONE ); + } + break ; + } + case MTC_OOS_TEST__DONE: + default: + { + node_ptr->oos_test_count++ ; + oosTestStageChange ( node_ptr, MTC_OOS_TEST__LOAD_NEXT_TEST ); + + /* clear out the retry counter periodically */ + node_ptr->http_retries_cur = 0 ; + + break ; + } + } + return (PASS); +} + + + +int local_counter = 0 ; + +int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr ) +{ + switch (node_ptr->insvTestStage) + { + case MTC_INSV_TEST__START: + { + mtcTimer_reset ( node_ptr->insvTestTimer ); + + /* Run the inservice test more frequently while + * start_services_needed is true and we are not + * in failure retry mode */ + if (( node_ptr->start_services_needed == true ) && + ( node_ptr->hostservices_failed == false ) && + ( node_ptr->hostservices_failed_subf == false )) + { + mtcTimer_start ( node_ptr->insvTestTimer, mtcTimer_handler, MTC_SECS_2 ); + } + else + { + mtcTimer_start ( node_ptr->insvTestTimer, mtcTimer_handler, insv_test_period ); + } + insvTestStageChange ( node_ptr, MTC_INSV_TEST__WAIT ); + break ; + } + case MTC_INSV_TEST__WAIT: + { + if ( node_ptr->insvTestTimer.ring == true ) + { + insvTestStageChange ( node_ptr, MTC_INSV_TEST__RUN ); + } + /* manage degrade state and alarms */ + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) + { + /************************************************************ + * Manage In-Service Alarms * + ***********************************************************/ + + /* Manage Inservice Enable Alarm */ + if ( node_ptr->hostservices_failed ) + { + alarm_insv_failure ( node_ptr ); + } + else + { + alarm_insv_clear ( node_ptr, false ); + } + + /* Manage Compute Subfunction Failure Alarm */ + if ( node_ptr->hostservices_failed_subf ) + { + alarm_compute_failure ( node_ptr, FM_ALARM_SEVERITY_MAJOR ); + } + else + { + alarm_compute_clear ( node_ptr, false ); + } + + /************************************************************ + * Manage host degrade based on degrade mask * + ***********************************************************/ + if (( node_ptr->degrade_mask == 0 ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); + } + + /* expected degrade audit */ + else if (( node_ptr->degrade_mask ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); + } + } + break ; + } + case MTC_INSV_TEST__RUN: + { + +#ifdef WANT_FIT_TESTING + + daemon_load_fit (); + + if ( daemon_want_fit ( FIT_CODE__UNLOCK_HOST, node_ptr->hostname ) ) + { + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__UNLOCK ); + } + + if ( daemon_want_fit ( FIT_CODE__LOCK_HOST, node_ptr->hostname ) ) + { + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__LOCK ); + } + + if ( daemon_want_fit ( FIT_CODE__FORCE_LOCK_HOST, node_ptr->hostname ) ) + { + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__FORCE_LOCK ); + } + + if (( daemon_want_fit ( FIT_CODE__DO_NOTHING_THREAD, node_ptr->hostname )) || + ( daemon_want_fit ( FIT_CODE__STRESS_THREAD , node_ptr->hostname ))) + { + node_ptr->ipmitool_thread_ctrl.stage = THREAD_STAGE__IGNORE ; + node_ptr->ipmitool_thread_ctrl.id = true ; + node_ptr->ipmitool_thread_info.id = true ; + node_ptr->ipmitool_thread_info.command = IPMITOOL_THREAD_CMD__POWER_STATUS ; + + /* Update / Setup the BMC access credentials */ + node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip ; + node_ptr->thread_extra_info.bm_un = node_ptr->bm_un ; + node_ptr->thread_extra_info.bm_pw = node_ptr->bm_pw ; + node_ptr->thread_extra_info.bm_type = node_ptr->bm_type ; + node_ptr->ipmitool_thread_info.extra_info_ptr = &node_ptr->thread_extra_info ; + if ( thread_launch_thread ( mtcThread_ipmitool, &node_ptr->ipmitool_thread_info ) == 0 ) + { + slog ("%s FIT launching mtcThread_ipmitool power query thread ; launch failed\n", node_ptr->hostname.c_str()); + } + else + { + slog ("%s FIT launching mtcThread_ipmitool power query thread\n", node_ptr->hostname.c_str()); + } + node_ptr->ipmitool_thread_ctrl.done = true ; + } + +#endif + + /* Manage active controller auto recovery bool. + * If the inactive controller is inservice then disable + * controller autorecovery. Otherwise enable it but in this case + * don't change the disable bool as that is used to gate auto + * recovery once the threshoild is reached */ + if ( is_controller ( node_ptr ) && NOT_THIS_HOST ) + { + if (( this->autorecovery_enabled == true ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) + { + autorecovery_clear ( CONTROLLER_0 ); + autorecovery_clear ( CONTROLLER_1 ); + this->autorecovery_enabled = false ; + this->autorecovery_disabled = false ; + } + else if (( this->autorecovery_enabled == false ) && + ( node_ptr->operState != MTC_OPER_STATE__ENABLED )) + { + this->autorecovery_enabled = true ; + } + } + + /* Monitor the health of the host - no pass file */ + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) + { + /************************************************************ + * Prevent the start host services from running while in DOR + ***********************************************************/ + if ( node_ptr->dor_recovery_mode == true ) + { + /* wait longer for the host to boot up */ + wlog ("%s DOR recovery active ; waiting on host\n", + node_ptr->hostname.c_str()); + } + else if ( this->dor_mode_active == true ) + { + ilog_throttled ( this->dor_mode_active_log_throttle, 20, + "DOR mode active\n"); + } + + /************************************************************* + * Handle Start Host Services if its posted for execution + ************************************************************/ + else if ( node_ptr->start_services_needed == true ) + { + /* If Main Start Host Services is not already running then launch it */ + if (( node_ptr->start_services_running_main == false ) && + ( node_ptr->start_services_running_subf == false )) + { + bool start = true ; + if ( this->launch_host_services_cmd ( node_ptr , start ) != PASS ) + { + node_ptr->hostservices_failed = true ; + node_ptr->start_services_retries++ ; + } + else + { + node_ptr->start_services_running_main = true ; + } + } + /* Handle start host services response for both main and + * subfunction levels */ + else + { + /* Wait for host services to complete - pass or fail. + * The host_services_handler manages timeout. */ + int rc = this->host_services_handler ( node_ptr ); + if ( rc == RETRY ) + { + /* wait for the mtcClient's response ... */ + break ; + } + + node_ptr->start_services_running_main = false ; + + if ( rc != PASS ) + { + + /* set the correct failed flag */ + if ( node_ptr->start_services_needed_subf == true ) + { + node_ptr->start_services_running_subf = false ; + node_ptr->hostservices_failed_subf = true ; + } + else + { + node_ptr->hostservices_failed = true ; + } + + node_ptr->start_services_retries++ ; + + wlog ("%s %s request failed ; (retry %d)\n", + node_ptr->hostname.c_str(), + node_ptr->host_services_req.name.c_str(), + node_ptr->start_services_retries); + } + else /* success path */ + { + /* clear the correct fail flag */ + if (( node_ptr->start_services_needed_subf == true ) && + ( node_ptr->start_services_running_subf == true )) + { + node_ptr->start_services_needed_subf = false ; + node_ptr->start_services_running_subf = false ; + node_ptr->hostservices_failed_subf = false ; + } + else + { + node_ptr->hostservices_failed = false ; + } + + /************************************************* + * Handle running the subfunction start compute + * host services command as a background operation + * after the controller start result has come in + * as a PASS. + ************************************************/ + if ( node_ptr->start_services_needed_subf == true ) + { + bool start = true ; + bool subf = node_ptr->start_services_needed_subf ; + if ( this->launch_host_services_cmd ( node_ptr, start, subf ) != PASS ) + { + node_ptr->hostservices_failed_subf = true ; + + /* try again on next audit */ + node_ptr->start_services_retries++ ; + } + else + { + node_ptr->start_services_running_subf = true ; + } + } + else + { + /* All host service scripts pass ; done */ + clear_hostservices_ctls ( node_ptr ); + node_ptr->hostservices_failed_subf = false ; + node_ptr->hostservices_failed = false ; + } + } + } + } + + if ( NOT_THIS_HOST ) + { + if ((( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )) && + (!(node_ptr->mtce_flags & MTC_FLAG__I_AM_HEALTHY) && + !(node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY))) + { + if ( node_ptr->unknown_health_reported == false ) + { + wlog ( "%s has UNKNOWN HEALTH\n", node_ptr->hostname.c_str()); + node_ptr->unknown_health_reported = true ; + } + } + } + } + + /** Manage the subfunction goenabled alarm over a mtcAgent restart + * In the restart case the subfunction fsm enable handler is not run so + * we try to detect the missing goenabled_subf flag as an inservice test. + * + * Only in CPE type + * - clear the alarm if the issue goes away - + * i.e. the goenabled tests eventually pass. Today + * hey are not re-run in the background but someday they may be + * - raise the alarm and go degraded if the goEnabled_subf flag is not set + * and we have only a single enabled controller (which must be this one) + * and the alarm is not already raised. + **/ + if ( CPE_SYSTEM ) + { + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + ( node_ptr->mtce_flags & MTC_FLAG__SUBF_CONFIGURED )) /* handle initial install case */ + { + if (( node_ptr->goEnabled_subf == true ) && + ( node_ptr->inservice_failed_subf == false ) && + ( node_ptr->goEnabled_failed_subf == false ) && + ( node_ptr->hostservices_failed_subf == false )) + { + if ( node_ptr->alarms[MTC_ALARM_ID__CH_COMP] != FM_ALARM_SEVERITY_CLEAR ) + { + alarm_compute_clear ( node_ptr, false ); + ilog ("%s cleared alarm %s due to failure recovery (degrade:%x)\n", + node_ptr->hostname.c_str(), + mtcAlarm_getId_str(MTC_ALARM_ID__CH_COMP).c_str(), + node_ptr->degrade_mask); + + + if ( node_ptr->degrade_mask == 0 ) + { + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__AVAILABLE ); + + subfStateChange ( node_ptr, MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__AVAILABLE ); + + /* Inform the VIM that this host is enabled */ + mtcVimApi_state_change ( node_ptr, VIM_HOST_ENABLED, 3 ); + } + } + } + /* + * Send out-of-service test command and wait for the + * next audit interval to see the result. + * + * node_ptr->goEnabled_subf == true is pass + * node_ptr->goEnabled_subf_failed == true is fail + * + **/ + if (( node_ptr->operState_subf == MTC_OPER_STATE__DISABLED ) && + ( this->autorecovery_disabled == false ) && + ( node_ptr->start_services_needed == false )) + { + if (( node_ptr->adminAction != MTC_ADMIN_ACTION__ENABLE_SUBF ) && + ( node_ptr->adminAction != MTC_ADMIN_ACTION__ENABLE )) + { + if (( node_ptr->inservice_failed_subf == false ) && + ( node_ptr->hostservices_failed_subf == false )) + { + ilog ("%s-compute ... running recovery enable\n", node_ptr->hostname.c_str()); + + alarm_compute_clear ( node_ptr, true ); + + enableStageChange ( node_ptr, MTC_ENABLE__START ); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__ENABLE_SUBF ); + } + else + { + ilog ("%s-compute subfunction is unlocked-disabled (non-operational)\n", + node_ptr->hostname.c_str()); + } + } + else + { + ilog ("%s-compute ... waiting on current goEnable completion\n", node_ptr->hostname.c_str() ); + } + } + } + /* Only raise this alarm while in simplex */ + if (( num_controllers_enabled() < 2 ) && + (( node_ptr->goEnabled_failed_subf == true ) || + ( node_ptr->inservice_failed_subf == true ) || + ( node_ptr->hostservices_failed_subf == true ))) + { + if ( node_ptr->alarms[MTC_ALARM_ID__CH_COMP] == FM_ALARM_SEVERITY_CLEAR ) + { + wlog ("%s insv test detected subfunction failure ; degrading host\n", + node_ptr->hostname.c_str()); + + alarm_compute_failure ( node_ptr , FM_ALARM_SEVERITY_MAJOR ); + + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__DEGRADED ); + + subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED, + MTC_AVAIL_STATUS__FAILED ); + + } + } + } + + /* Monitor the health of the host - no pass file */ + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && + (( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))) + { + /* Manage asserting degrade due to Software Management */ + if (( node_ptr->mtce_flags & MTC_FLAG__SM_DEGRADED ) && + ( !(node_ptr->degrade_mask & DEGRADE_MASK_SM))) + { + /* set the SM degrade flag in the mask */ + node_ptr->degrade_mask |= DEGRADE_MASK_SM ; + + ilog ("%s sm degrade\n", node_ptr->hostname.c_str()); + + /* degrade the host if not already degraded */ + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); + } + } + + /* Manage de-asserting degrade due to Software Management */ + if ((!(node_ptr->mtce_flags & MTC_FLAG__SM_DEGRADED)) && + (node_ptr->degrade_mask & DEGRADE_MASK_SM)) + { + /* clear the SM degrade flag */ + node_ptr->degrade_mask &= ~DEGRADE_MASK_SM ; + + ilog ("%s sm degrade clear\n", node_ptr->hostname.c_str()); + + /* if the degrade mask is now clear then consider clearing the degrade state */ + if ( node_ptr->degrade_mask == 0 ) + { + /* ... but only if we are degraded */ + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); + } + } + } + + if ( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY) + { + /* not healthy .... */ + if ( THIS_HOST ) + { + /* initial config is complete and last manifest apply failed ... */ + if (( daemon_is_file_present ( CONFIG_COMPLETE_FILE )) && + ( daemon_is_file_present ( CONFIG_FAIL_FILE ))) + { + wlog_throttled ( node_ptr->health_threshold_counter, (MTC_UNHEALTHY_THRESHOLD*3), "%s is UNHEALTHY\n", node_ptr->hostname.c_str()); + if ( node_ptr->health_threshold_counter >= MTC_UNHEALTHY_THRESHOLD ) + { + node_ptr->degrade_mask |= DEGRADE_MASK_CONFIG ; + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); + } + + /* threshold is reached so raise the config alarm if it is not already raised */ + if ( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CRITICAL ) + { + mtcAlarm_critical ( node_ptr->hostname, MTC_ALARM_ID__CONFIG ); + node_ptr->alarms[MTC_ALARM_ID__CONFIG] = FM_ALARM_SEVERITY_CRITICAL ; + } + } + } + } + else + { + if ( ++node_ptr->health_threshold_counter >= MTC_UNHEALTHY_THRESHOLD ) + { + elog ( "%s is UNHEALTHY failed ; forcing re-enabled\n", + node_ptr->hostname.c_str()); + + force_full_enable ( node_ptr ) ; + } + else + { + wlog ( "%s is UNHEALTHY (cnt:%d)\n", + node_ptr->hostname.c_str(), + node_ptr->health_threshold_counter ); + } + } + } + else + { + node_ptr->health_threshold_counter = 0 ; + } + } + + node_ptr->insv_test_count++ ; + insvTestStageChange ( node_ptr, MTC_INSV_TEST__START ); + + break ; + } + default: + { + node_ptr->insv_test_count++ ; + insvTestStageChange ( node_ptr, MTC_INSV_TEST__START ); + break ; + } + } + return (PASS); +} + + +int nodeLinkClass::cfg_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + switch (node_ptr->configStage ) + { + case MTC_CONFIG__START: + { + ilog ("%s Starting a %s:%s shadow entry change check\n", + node_ptr->hostname.c_str(), + SHADOW_FILE, + USERNAME_ROOT ); + + /* Post the show command with a catch-all timeout timer */ + rc = mtcInvApi_cfg_show ( node_ptr->hostname ) ; + if ( rc ) + { + elog ("%s Config SHOW command failed\n", node_ptr->hostname.c_str() ); + configStageChange ( node_ptr, MTC_CONFIG__FAILURE ); + } + else + { + mtcTimer_start ( node_ptr->mtcConfig_timer, mtcTimer_handler, (sysinv_timeout+1) ); + configStageChange ( node_ptr, MTC_CONFIG__SHOW ); + } + break ; + } + case MTC_CONFIG__SHOW: + { + /* timeout yet ? */ + if ( node_ptr->mtcConfig_timer.ring == true ) + { + elog ("%s timeout\n", node_ptr->cfgEvent.log_prefix.c_str()); + configStageChange ( node_ptr, MTC_CONFIG__TIMEOUT ); + break ; + } + + /* done Yet ? */ + rc = doneQueue_dequeue ( node_ptr->cfgEvent ) ; + if ( rc == RETRY ) + { + /* Still waiting */ + break ; + } + else if ( rc == PASS ) + { + string temp_value = "" ; + mtcTimer_stop ( node_ptr->mtcConfig_timer ); + node_ptr->cfgEvent.value = "" ; + node_ptr->cfgEvent.uuid = "" ; + if (( rc = jsonUtil_get_array_idx ( (char*)node_ptr->cfgEvent.response.data(), "iusers", 0 , temp_value )) == PASS ) + { + jsonUtil_get_key_val ( (char*)temp_value.data(), "root_sig", node_ptr->cfgEvent.value); + } + + if ( node_ptr->cfgEvent.value.empty() || + !node_ptr->cfgEvent.value.compare("null") || rc ) + { + elog ("%s null or missing 'root_sig' value (%d:%s)\n", + node_ptr->cfgEvent.service.c_str(), rc, + node_ptr->cfgEvent.value.empty() ? "empty" : node_ptr->cfgEvent.value.c_str()); + + node_ptr->cfgEvent.status = FAIL_INVALID_DATA ; + configStageChange ( node_ptr, MTC_CONFIG__FAILURE ); + break; + } + + ilog ("%s root_sig:%s\n", node_ptr->cfgEvent.log_prefix.c_str(), + node_ptr->cfgEvent.value.c_str()); + + dlog ("Database Signature: %s\n", node_ptr->cfgEvent.value.c_str()); + + /* + * generate a md5 signature for this user's Shadow entry. + * We will do so for the entire entry as either the password + * or the password age may change and we need to track and notify + * for both. + */ + char cfgInfo[1024] = {0}; + node_ptr->cfgEvent.key = get_shadow_signature ( (char*)SHADOW_FILE , (char*)USERNAME_ROOT, + &cfgInfo[0], sizeof(cfgInfo)); + node_ptr->cfgEvent.information = cfgInfo; + + if ( node_ptr->cfgEvent.key.empty() ) + { + elog ("failed to get md5sum of username '%s' from '%s'\n", USERNAME_ROOT, SHADOW_FILE ); + node_ptr->cfgEvent.status = FAIL_INVALID_DATA ; + configStageChange ( node_ptr, MTC_CONFIG__FAILURE ); + break ; + } + + dlog ("File Signature : %s\n", node_ptr->cfgEvent.key.c_str()); + if ( node_ptr->cfgEvent.key.compare(node_ptr->cfgEvent.value)) + { + bool install = false ; + if ( node_ptr->configAction == MTC_CONFIG_ACTION__INSTALL_PASSWD ) + { + install = true ; + ilog ("%s shadow file hash and aging ... install config\n", USERNAME_ROOT ); + } + else + { + ilog ("%s shadow entry has changed ... updating config\n", USERNAME_ROOT ); + ilog ("... old signature - %s\n", node_ptr->cfgEvent.value.c_str()); + ilog ("... new signature - %s\n", node_ptr->cfgEvent.key.c_str()); + } + + if ((rc = jsonUtil_get_array_idx ( (char*)node_ptr->cfgEvent.response.data(), "iusers", 0 , temp_value )) == PASS ) + { + jsonUtil_get_key_val ( (char*)temp_value.data(), "uuid", node_ptr->cfgEvent.uuid); + } + + if ( rc || node_ptr->cfgEvent.uuid.empty() || !node_ptr->cfgEvent.uuid.compare("null")) + { + elog ("%s null or missing reconfig 'uuid' (%d:%s)\n", + node_ptr->cfgEvent.service.c_str(), rc, + node_ptr->cfgEvent.uuid.empty() ? "empty" : node_ptr->cfgEvent.uuid.c_str()); + return ( FAIL_INVALID_DATA ); + } + ilog ("%s uuid:%s\n", + node_ptr->cfgEvent.log_prefix.c_str(), + node_ptr->cfgEvent.uuid.c_str()); + + /* Post the modify command */ + rc = mtcInvApi_cfg_modify ( node_ptr->hostname, install ) ; + if ( rc ) + { + elog ("%s Config MODIFY command failed\n", node_ptr->hostname.c_str() ); + configStageChange ( node_ptr, MTC_CONFIG__FAILURE ); + } + else + { + mtcTimer_start ( node_ptr->mtcConfig_timer, mtcTimer_handler, (sysinv_timeout+1) ); + configStageChange ( node_ptr, MTC_CONFIG__MODIFY ); + } + } + else + { + ilog ("%s shadow entry has not changed (%s)\n", + USERNAME_ROOT, node_ptr->cfgEvent.key.c_str()); + configStageChange ( node_ptr, MTC_CONFIG__DONE ); + } + } + else + { + elog ("%s failed (%d:%d)\n", node_ptr->cfgEvent.log_prefix.c_str(), rc, + node_ptr->cfgEvent.status ); + configStageChange ( node_ptr, MTC_CONFIG__FAILURE ); + } + break ; + } + + case MTC_CONFIG__MODIFY: + { + /* timeout yet ? */ + if ( node_ptr->mtcConfig_timer.ring == true ) + { + elog ("%s timeout\n", node_ptr->cfgEvent.log_prefix.c_str()); + configStageChange ( node_ptr, MTC_CONFIG__TIMEOUT ); + break ; + } + + /* done Yet ? */ + rc = doneQueue_dequeue ( node_ptr->cfgEvent ) ; + if ( rc == RETRY ) + { + /* Still waiting */ + break ; + } + else if ( rc == PASS ) + { + mtcTimer_stop ( node_ptr->mtcConfig_timer ); + if ( node_ptr->cfgEvent.response_len ) + { + configStageChange ( node_ptr, MTC_CONFIG__VERIFY ); + } + else + { + elog ("%s modify without response (%d:%d)\n", + node_ptr->cfgEvent.log_prefix.c_str(), rc, + node_ptr->cfgEvent.status ); + configStageChange ( node_ptr, MTC_CONFIG__FAILURE ); + } + } + else + { + elog ("%s modify failed (%d:%d)\n", + node_ptr->cfgEvent.log_prefix.c_str(), rc, + node_ptr->cfgEvent.status ); + configStageChange ( node_ptr, MTC_CONFIG__FAILURE ); + } + break ; + } + case MTC_CONFIG__VERIFY: + { + node_ptr->cfgEvent.value = "" ; + rc = jsonUtil_get_key_val ( (char*)node_ptr->cfgEvent.response.data(), + "root_sig", node_ptr->cfgEvent.value); + if ( node_ptr->cfgEvent.value.empty() || + !node_ptr->cfgEvent.value.compare("null") || rc ) + { + elog ("%s null or missing 'root_sig' value (%d:%s)\n", + node_ptr->cfgEvent.service.c_str(), rc, + node_ptr->cfgEvent.value.empty() ? "empty" : node_ptr->cfgEvent.value.c_str()); + + node_ptr->cfgEvent.status = FAIL_INVALID_DATA ; + configStageChange ( node_ptr, MTC_CONFIG__FAILURE ); + break; + } + + if ( node_ptr->cfgEvent.key.compare(node_ptr->cfgEvent.value)) + { + elog ("%s root_sig modify compare failed\n", + node_ptr->cfgEvent.log_prefix.c_str()); + wlog ("... database signature - %s\n", node_ptr->cfgEvent.value.c_str()); + wlog ("... file signature - %s\n", node_ptr->cfgEvent.key.c_str()); + + configStageChange ( node_ptr, MTC_CONFIG__FAILURE ); + } + else + { + ilog ("%s modify succeeded\n", node_ptr->cfgEvent.log_prefix.c_str()); + configStageChange ( node_ptr, MTC_CONFIG__DONE ); + } + break ; + } + case MTC_CONFIG__FAILURE: + { + elog ("%s Command Failure\n", node_ptr->cfgEvent.log_prefix.c_str()); + + /* Call to remove this command from the work queue ; if it exists */ + workQueue_del_cmd ( node_ptr, node_ptr->cfgEvent.sequence ); + + configStageChange ( node_ptr, MTC_CONFIG__DONE ); + break ; + } + case MTC_CONFIG__TIMEOUT: + { + elog ("%s Command Timeout\n", node_ptr->cfgEvent.log_prefix.c_str()); + + /* Call to remove this command from the work queue ; if it exists */ + workQueue_del_cmd ( node_ptr, node_ptr->cfgEvent.sequence ); + + node_ptr->oper_failures++ ; + mtcHttpUtil_free_conn ( node_ptr->cfgEvent ); + mtcHttpUtil_free_base ( node_ptr->cfgEvent ); + + configStageChange ( node_ptr, MTC_CONFIG__DONE ); + break ; + } + case MTC_CONFIG__DONE: + default: + { + if (( node_ptr->configAction == MTC_CONFIG_ACTION__INSTALL_PASSWD ) || + ( node_ptr->configAction == MTC_CONFIG_ACTION__CHANGE_PASSWD )) + { + /* We are done */ + node_ptr->configAction = MTC_CONFIG_ACTION__NONE ; + } + if ( node_ptr->configAction == MTC_CONFIG_ACTION__CHANGE_PASSWD_AGAIN ) + { + /* Run the FSM again */ + node_ptr->configAction = MTC_CONFIG_ACTION__CHANGE_PASSWD ; + } + node_ptr->configStage = MTC_CONFIG__START ; + break ; + } + } + return (PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.h new file mode 100755 index 00000000..95b300cd --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.h @@ -0,0 +1,24 @@ +#ifndef __INCLUDE_MTCNODEHDLRS_H__ +#define __INCLUDE_MTCNODEHDLRS_H__ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance + * + * JSON Utility Header + */ + +#include +#include + +using namespace std; + +void mtcTimer_handler ( int sig, siginfo_t *si, void *uc); + +#endif /* __INCLUDE_MTCNODEHDLRS_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeMnfa.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeMnfa.cpp new file mode 100644 index 00000000..efff678b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeMnfa.cpp @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Node Maintenance + * "Multi-Node-Failure Avoidance feature utility implementation" + * + */ + +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "nodeBase.h" +#include "nodeClass.h" +#include "nodeTimers.h" +#include "mtcNodeHdlrs.h" + +/* create a log of all the hosts that are in the mnfa pool */ +void log_mnfa_pool ( std::list & mnfa_awol_list ) +{ + std::list::iterator mnfa_awol_ptr ; + string pool_list = "" ; + for ( mnfa_awol_ptr = mnfa_awol_list.begin() ; + mnfa_awol_ptr != mnfa_awol_list.end() ; + mnfa_awol_ptr++ ) + { + pool_list.append (" "); + pool_list.append (mnfa_awol_ptr->data()); + } + ilog ("MNFA POOL:%s\n", pool_list.c_str()); +} + +/******************************************************************************* + * + * Name : mnfa_calculate_threshold + * + * Description: Calculates and returns the mnfa threshold based + * on enabled hosts. + * + * Auto corrects the value to a min number. + * + * Calculate the multi-node failure avoidance handling threshold + * This is the number of hosts than need to fail simultaneously + * in order to trigger mode ; i.e. mnfa_active=true + * + *******************************************************************************/ +int nodeLinkClass::mnfa_calculate_threshold ( string hostname ) +{ + int mnfa_enabled_nodes = enabled_nodes (); + + /* Calculate the threshold */ + if ( mnfa_threshold_type == MNFA_PERCENT ) + mnfa_threshold = mnfa_enabled_nodes / mnfa_threshold_percent ; + else + mnfa_threshold = mnfa_threshold_number ; + + /* Don't allow the multi-node failure avoidance + * to ever be 1 or we would never fail a host */ + if ( mnfa_threshold < mnfa_threshold_number ) + { + ilog ("%s MNFA threshold rounded to %d from %d\n", + hostname.c_str(), + mnfa_threshold_number, + mnfa_enabled_nodes / mnfa_threshold_percent ); + mnfa_threshold = mnfa_threshold_number ; + } + + if ( mnfa_awol_list.size() ) + { + log_mnfa_pool ( mnfa_awol_list ); + } + return (mnfa_threshold); +} + +/***************************************************************************** + * + * Name : mnfa_add_host + * + * Description: Add a failed host the the mnfa count and manage + * the failed list + * + *****************************************************************************/ +void nodeLinkClass::mnfa_add_host ( struct nodeLinkClass::node * node_ptr , iface_enum iface ) +{ + if ( node_ptr->hbs_minor[iface] == false ) + { + bool enter = false ; + bool added = false ; + + node_ptr->hbs_minor[iface] = true ; + node_ptr->hbs_minor_count[iface]++ ; + mnfa_host_count[iface]++; + + /* if we are active then add the node to the awol list */ + if ( mnfa_active == true ) + { + /* once we are mnfa_active we need to give all the + * hbs_minor=true hosts a graceful recovery token + * mnfa_graceful_recovery = true and add to the awol list */ + node_ptr->mnfa_graceful_recovery = true ; + added = true ; + mnfa_awol_list.push_back(node_ptr->hostname); + mnfa_awol_list.unique(); + } + else if (( mnfa_active == false ) && + ( mnfa_host_count[iface] >= mnfa_calculate_threshold( node_ptr->hostname ))) + { + enter = true ; + } + + ilog ("%s MNFA %s (%s) %d enabled hosts (threshold:%d) (%d:%s:%d) (%d:%s:%d)\n", + node_ptr->hostname.c_str(), + added ? "added to pool" : "new candidate", + get_iface_name_str(iface), + enabled_nodes(), + mnfa_threshold, + mnfa_host_count[MGMNT_IFACE], + get_iface_name_str(MGMNT_IFACE), + node_ptr->hbs_minor_count[MGMNT_IFACE], + mnfa_host_count[INFRA_IFACE], + get_iface_name_str(INFRA_IFACE), + node_ptr->hbs_minor_count[INFRA_IFACE]); + + if ( enter == true ) + { + mnfa_enter (); + } + } +} + +/***************************************************************************** + * + * Name : mnfa_recover_host + * + * Description: Recover a host that may or may not be in the mnfa + * pool by sending it into the graceful recover FSM. + * + *****************************************************************************/ +void nodeLinkClass::mnfa_recover_host ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ) + { + if ( node_ptr->degrade_mask == 0 ) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); + } + } + + if ( node_ptr->mnfa_graceful_recovery == true ) + { + /* Restart the heartbeat for this recovered host */ + // send_hbs_command ( node_ptr->hostname, MTC_RESTART_HBS ); + + if ( node_ptr->adminAction != MTC_ADMIN_ACTION__RECOVER ) + { + ilog ("%s graceful recovery from MNFA\n", node_ptr->hostname.c_str()); + recoveryStageChange ( node_ptr, MTC_RECOVERY__START ); + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__RECOVER ); + } + else + { + wlog ("%s already gracefully recovering\n", node_ptr->hostname.c_str() ); + } + } +} + +/**************************************************************************** + * + * Name : mnfa_enter + * + * Description: Perform the operations required to enter mnfa mode + * + * These include ... + * + * 1. Send the backoff command to heartbeat service. This tells the + * heartbeat service to send heartbeat requests less frequently. + * + * 2. Set mode active + * + * 3. Store all the hosts that have failed into the mnfa_awol_list + * + * 4. Give each enabled host with hbs_minor=true the + * mnfa_graceful_recovery token + * + * 5. Start the MNFA Auto-Recovery timer with time based on the config + * setting mnfa_recovery_timeout + * + ****************************************************************************/ +void nodeLinkClass::mnfa_enter ( void ) +{ + wlog ("MNFA ENTER --> Entering Multi-Node Failure Avoidance\n"); + mtcAlarm_log ( active_controller_hostname , MTC_LOG_ID__EVENT_MNFA_ENTER ); + mnfa_active = true ; + + send_hbs_command ( my_hostname, MTC_BACKOFF_HBS ); + + /* Handle the case where we are already trying to recover from a + * previous mnfa but the failure case occurs again. If that + * happens we need to cancel the timer that will issue + * the period recovery command. */ + if ( mtcTimer_mnfa.tid ) + mtcTimer_stop ( mtcTimer_mnfa ); + + /* Loop through inventory and recover each host that + * remains in the hbs_minor state. + * Clear heartbeat degrades */ + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ((( ptr->hbs_minor[MGMNT_IFACE] == true ) || + ( ptr->hbs_minor[INFRA_IFACE] == true )) && + ( ptr->operState == MTC_OPER_STATE__ENABLED )) + { + /* Give all the hosts in the mnfa list a graceful + * recovery token mnfa_graceful_recovery = true + * basically a get out of double reset free card */ + ptr->mnfa_graceful_recovery = true ; + + mnfa_awol_list.push_back(ptr->hostname); + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + + mnfa_awol_list.unique(); + + /* Start the timer that will eventually send the MTC_RECOVER_HBS command */ + wlog ("MNFA Auto-Recovery in %d seconds\n", mnfa_recovery_timeout); + mtcTimer_start ( mtcTimer_mnfa, mtcTimer_handler, mnfa_recovery_timeout); +} + +/**************************************************************************** + * + * Name : mnfa_enter + * + * Description: Perform the operations required to exit mnfa mode + * These include ... + * + * 1. manage mnfa counters/oms + * + * 2. disable mnfa mode (mnfa_active = false) + * + * 3. Start the heartbeat recovery timer. This is a timer that + * adds a bit of debounce to the recovery. + * After MTC_MNFA_RECOVERY_TIMER time period mtce will send + * a command to the heartbeat service commanding it to + * re-instate the default/runtime heartbeat period. + * + * 4. Loop through all the enabled inventory and clear the heartbeat + * degrade conditions and issue a heartbeat restart to any + * hosts that remain in the hbs_minor state. + * + * if ( force == true ) + * The mnfa_recovery_timeout has expired + * All hosts in the awol list are forced failed and into the + * enable_handler FSM. + * else + * The mnfa recovery threshold has crossed + * Send all enabled hosts in the hbs_minor=true state into the + * graceful recovery FSM + * + ****************************************************************************/ +void nodeLinkClass::mnfa_exit ( bool force ) +{ + if ( mnfa_active == true ) + { + wlog ("MNFA EXIT <-- Exiting Multi-Node Failure Avoidance %s\n", + force ? "(Auto-Recover)" : ""); + + mtcAlarm_log ( active_controller_hostname , MTC_LOG_ID__EVENT_MNFA_EXIT ); + mnfa_occurances++ ; + mnfa_active = false ; + + if ( force == true ) + { + elog ("... MNFA %d sec timeout - forcing full enable on ... \n", + mnfa_recovery_timeout); + + log_mnfa_pool ( mnfa_awol_list ); + } + + /* Loop through inventory and recover each host that + * remains in the hbs_minor state. + * Clear heartbeat degrades */ + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ((( ptr->hbs_minor[INFRA_IFACE] == true ) || + ( ptr->hbs_minor[MGMNT_IFACE] == true )) && + ( ptr->operState == MTC_OPER_STATE__ENABLED )) + { + ptr->hbs_minor[MGMNT_IFACE] = false ; + ptr->hbs_minor[INFRA_IFACE] = false ; + + if ( force == true ) + { + elog ("... %s failed ; auto-recovering\n", + ptr->hostname.c_str()); + + /* Set node as failed */ + availStatusChange ( ptr, MTC_AVAIL_STATUS__FAILED ); + enableStageChange ( ptr, MTC_ENABLE__START ); + adminActionChange ( ptr, MTC_ADMIN_ACTION__NONE ); + } + else + { + if ( ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ) + { + if ( ptr->degrade_mask == 0 ) + { + availStatusChange ( ptr, MTC_AVAIL_STATUS__AVAILABLE ); + } + } + + if ( ptr->adminAction != MTC_ADMIN_ACTION__RECOVER ) + { + recoveryStageChange ( ptr, MTC_RECOVERY__START ); + adminActionChange ( ptr, MTC_ADMIN_ACTION__RECOVER ); + } + else + { + wlog ("%s already gracefully recovering\n", ptr->hostname.c_str() ); + } + } + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + + /* Stop the ... failure -> full enable ... window timer if it is active */ + if ( mtcTimer_mnfa.tid ) + mtcTimer_stop ( mtcTimer_mnfa ); + + /* Start the timer that will eventually send the MTC_RECOVER_HBS command */ + mtcTimer_start ( mtcTimer_mnfa, mtcTimer_handler, MTC_MNFA_RECOVERY_TIMER ); + } + else + { + send_hbs_command ( my_hostname, MTC_RECOVER_HBS ); + } + + mnfa_host_count[MGMNT_IFACE] = 0 ; + mnfa_host_count[INFRA_IFACE] = 0 ; + mnfa_awol_list.clear(); +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeMsg.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeMsg.h new file mode 100755 index 00000000..c93afad0 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeMsg.h @@ -0,0 +1,139 @@ +#ifndef __INCLUDE_MTCNODEMSG_HH__ +#define __INCLUDE_MTCNODEMSG_HH__ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Node Maintenance "Messaging" + * + */ + +#include +#include +#include + +using namespace std; + +#include "nodeBase.h" +#include "nodeUtil.h" /* for ... msgSock_type */ +#include "msgClass.h" + + +/************************************************************************* + * Common Service Messaging Stuff + ************************************************************************* + + +----------* +-----------* + * * --- agent_addr ---> * * + * mtcAgent * * mtcClient * + * * * * + * * <-- client_addr ---> * * + * * * * + +--> * * ---+ * * + | +----------* | +-----------* + | | + event config + | | + | +----------* | +-----------* + +--- * * <--+ * * + * hbsAgent * * hbsClient * + * * -- multicast req --> * * + * * * * + * * <-- hb pulse resp -- * * + * * * * + +----------* +-----------* */ + +#define SA struct sockaddr* + + +#define MTC_AGENT_RX_BUFF_SIZE (MAX_NODES*MAX_MSG) + +#define MAX_RX_MSG_BATCH (20) + +/** Maintenance messaging socket control structure */ +typedef struct +{ + /** These sockets define the maintenance system msging. */ + + /** UDP sockets used by the mtcAgent to transmit and receive + * maintenance commands to the client (compute) node and + * receive the compute node reply in the receive direction */ + msgClassSock* mtc_agent_tx_socket ; /**< tx to mtc client mgmnt */ + msgClassSock* mtc_agent_infra_tx_socket; /**< tx to mtc client infra */ + msgClassSock* mtc_agent_rx_socket ; /**< rx from mtc client mgmnt */ + msgClassSock* mtc_agent_infra_rx_socket; /**< rx from mtc client infra */ + int mtc_agent_port ; /**< the agent rx port number */ + + struct sockaddr_in agent_addr; /**< socket attributes struct */ + int mtc_agent_rx_socket_size ; + int mtc_agent_infra_rx_socket_size ; + + /** UDP sockets used by the mtcClient to receive maintenance + * commands from and transmit replies to the mtcAgent */ + msgClassSock* mtc_client_rx_socket ; /**< rx from controller */ + msgClassSock* mtc_client_tx_socket ; /**< tx to controller mgmnt */ + msgClassSock* mtc_client_infra_tx_socket ; /**< tx to controller infra */ + msgClassSock* mtc_client_infra_rx_socket ; /**< rx from controller infra */ + int mtc_cmd_port ; /**< mtc command port number */ + struct sockaddr_in mtc_cmd_addr ; /**< socket attributes mgmnt */ + + + + /***************************************************************/ + + /** Event Receive Interface - (UDP over 'lo') */ + int mtc_event_rx_port ; /**< mtc event receive port */ + msgClassSock* mtc_event_rx_sock ; /**< ... socket */ + + /** UDP Mtc to Hbs command port */ + int mtc_to_hbs_port ; /**< hbs command port */ + msgClassSock* mtc_to_hbs_sock ; /**< ... socket */ + + + /** UDP Hardware Monitor Command Port */ + int hwmon_cmd_port ; /**< ava event port */ + msgClassSock* hwmon_cmd_sock ; /**< ... socket */ + + /** UDP Logger Port */ + msgSock_type mtclogd ; /**< messaging into mtclogd */ + + /* For select dispatch */ + struct timeval waitd ; + fd_set readfds; + + /** Active Monitor Socket */ + int amon_socket ; + + /** Resource Monitor Socket */ + int rmon_socket ; + + bool main_go_enabled_reply_ack ; + bool subf_go_enabled_reply_ack ; + + int netlink_sock ; /* netlink socket */ + int ioctl_sock ; /* general ioctl socket */ + + +} mtc_socket_type ; + + +mtc_socket_type * get_sockPtr ( void ); +int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd, string who_i_am ); +int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interface ); + +int recv_mtc_reply_noblock ( void ); + +int send_mtc_cmd ( string & hostname, int cmd, int interface ); +int mtc_service_command ( mtc_socket_type * sock_ptr , int interface ); +int mtc_set_availStatus ( string & hostname, mtc_nodeAvailStatus_enum status ); +int service_rmon_inbox ( mtc_socket_type * sock_ptr, int & rmon_code, string & resource_name ); +int mtce_send_event ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_name_ptr ); +int mtc_infra_init ( mtc_socket_type * sock_ptr , char * iface ); +string get_who_i_am ( void ); + +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcSmgrApi.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcSmgrApi.cpp new file mode 100644 index 00000000..99c3ba4f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcSmgrApi.cpp @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance + * Access to Service Manager via REST API Interface. + * + */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "mgr" + +#include "nodeClass.h" /* for ... maintenance class nodeLinkClass */ +#include "mtcSmgrApi.h" /* for ... this module header */ +#include "jsonUtil.h" /* for ... jsonUtil_get_key_val */ + +/*******************************************/ +/* Internal Private utilities and handlers */ +/*******************************************/ + +/* The handles the inventory PATCH request's response message */ +void nodeLinkClass::mtcSmgrApi_handler ( struct evhttp_request *req, void *arg ) +{ + if ( ! req ) + { + elog ("%s %s %s Request Timeout (%d)\n", + smgrEvent.hostname.c_str(), + smgrEvent.service.c_str(), + smgrEvent.operation.c_str(), + smgrEvent.timeout); + + smgrEvent.status = FAIL_TIMEOUT ; + goto mtcSmgrApi_handler_out ; + } + + mtcHttpUtil_status ( smgrEvent ); + if ( smgrEvent.status != PASS ) + { + wlog ("%s '%s' HTTP %s Request Failed (%d)\n", + smgrEvent.hostname.c_str(), + smgrEvent.service.c_str(), + smgrEvent.operation.c_str(), + smgrEvent.status); + } + + smgrEvent.response.clear(); + if (( mtcHttpUtil_get_response ( smgrEvent )) && ( smgrEvent.response.empty() )) + { + wlog ("%s failed to get a response\n", smgrEvent.hostname.c_str() ); + } + +mtcSmgrApi_handler_out: + + mtcHttpUtil_free_conn ( smgrEvent ); + mtcHttpUtil_free_base ( smgrEvent ); + + smgrEvent.active = false ; + + /* This is needed to get out of the loop */ + event_base_loopbreak((struct event_base *)arg); +} + +/* + * Name : mtcSmgrApi_request + * + * Description: Submit any of the following requests to + * system management against the specified + * conroller hostname. + * + * General Operation - Event Lib Request + * ------------------- --------------------------------- + * CONTROLLER_QUERY - SMGR_QUERY_SWACT (non-blocking) + * CONTROLLER_SWACT - SMGR_START_SWACT (non-blocking) + * CONTROLLER_DISABLED - SMGR_HOST_DISABLED ( blocking) + * CONTROLLER_ENABLED - SMGR_HOST_ENABLED ( blocking) + * CONTROLLER_LOCKED - SMGR_HOST_LOCKED ( blocking) + * CONTROLLER_UNLOCKED - SMGR_HOST_UNLOCKED ( blocking) + * + * Notes : Retries are ignored for non-blocking operations + */ +int nodeLinkClass::mtcSmgrApi_request ( struct nodeLinkClass::node * node_ptr, mtc_cmd_enum operation, int retries ) +{ + int count = 0 ; + int rc = PASS ; + string operation_string = "unknown" ; + + if ( system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX ) + { + dlog ("%s simpex mode ; SM '%d' request not sent\n", node_ptr->hostname.c_str(), operation ); + return ( PASS ); + } + + if ( smgrEvent.active == true ) + { + wlog ("%s Service Manager %s Request - In-Progress (retry)\n", + node_ptr->hostname.c_str(), + smgrEvent.operation.c_str()); + + smgrEvent.status = FAIL_MUTEX_ERROR ; + return (RETRY); + } + + rc = mtcHttpUtil_event_init ( &smgrEvent, + my_hostname, + "mtcSmgrApi_request", + hostUtil_getServiceIp (SERVICE_SMGR), + hostUtil_getServicePort(SERVICE_SMGR)); + if ( rc ) + { + elog ("%s failed to allocate libEvent memory (%d)\n", node_ptr->hostname.c_str(), rc ); + return (rc); + } + /* Set the common context of this new operation */ + smgrEvent.status = RETRY ; + smgrEvent.hostname = node_ptr->hostname ; + smgrEvent.uuid = get_uuid ( node_ptr->hostname ); + + /* Clear payload and response */ + smgrEvent.address = MTC_SMGR_LABEL ; + smgrEvent.address.append(node_ptr->hostname); + smgrEvent.token.url = smgrEvent.address ; + smgrEvent.blocking = true ; + + smgrEvent.payload = "{\"origin\":\"mtce\"," ; + + if ( operation == CONTROLLER_QUERY ) + { + smgrEvent.operation = "Query" ; + smgrEvent.request = SMGR_QUERY_SWACT ; + string availStatus = availStatus_enum_to_str (get_availStatus (node_ptr->hostname)); + smgrEvent.blocking = false ; + smgrEvent.payload = "" ; + ilog ("%s sending 'query services' request to HA Service Manager\n", + smgrEvent.hostname.c_str()); + + return ( mtcHttpUtil_api_request ( smgrEvent )) ; + } + else if ( operation == CONTROLLER_SWACT ) + { + smgrEvent.operation = "Swact" ; + smgrEvent.request = SMGR_START_SWACT ; + string availStatus = availStatus_enum_to_str (get_availStatus (node_ptr->hostname)); + smgrEvent.blocking = false ; + smgrEvent.payload.append ("\"action\":\"swact\","); + smgrEvent.payload.append ("\"admin\":\"unlocked\","); + smgrEvent.payload.append ("\"oper\":\"enabled\","); + smgrEvent.payload.append ("\"avail\":\""); + smgrEvent.payload.append (availStatus); + smgrEvent.payload.append ("\"}"); + ilog ("%s sending 'swact' request to HA Service Manager\n", + smgrEvent.hostname.c_str()); + + return ( mtcHttpUtil_api_request ( smgrEvent )) ; + } + else if ( operation == CONTROLLER_DISABLED ) + { + operation_string = "disabled" ; + smgrEvent.operation = "Disable" ; + smgrEvent.request = SMGR_HOST_DISABLED ; + + string availStatus = availStatus_enum_to_str (get_availStatus (node_ptr->hostname)); + string adminState = adminState_enum_to_str (get_adminState (node_ptr->hostname)); + string adminAction = adminAction_enum_to_str (get_adminAction (node_ptr->hostname)); + + smgrEvent.payload.append ("\"action\":\""); + if (adminAction.compare("lock")) + adminAction = "event" ; + smgrEvent.payload.append (adminAction); + smgrEvent.payload.append ("\","); + + smgrEvent.payload.append ("\"admin\":\""); + smgrEvent.payload.append (adminState); + smgrEvent.payload.append ("\","); + + smgrEvent.payload.append ("\"oper\":\"disabled\","); + + smgrEvent.payload.append ("\"avail\":\""); + smgrEvent.payload.append (availStatus); + smgrEvent.payload.append ("\"}"); + + ilog ("%s sending '%s-disabled' request to HA Service Manager\n", + smgrEvent.hostname.c_str(), adminState.c_str() ); + } + else if ( operation == CONTROLLER_ENABLED ) + { + smgrEvent.request = SMGR_HOST_ENABLED ; + smgrEvent.operation = "Enable" ; + operation_string = "enabled" ; + + string availStatus = availStatus_enum_to_str (get_availStatus (node_ptr->hostname)); + string adminState = adminState_enum_to_str (get_adminState (node_ptr->hostname)); + string adminAction = adminAction_enum_to_str (get_adminAction (node_ptr->hostname)); + + smgrEvent.payload.append ("\"action\":\""); + if (adminAction.compare("unlock")) + adminAction = "event" ; + smgrEvent.payload.append (adminAction); + smgrEvent.payload.append ("\","); + + smgrEvent.payload.append ("\"admin\":\""); + smgrEvent.payload.append (adminState); + smgrEvent.payload.append ("\","); + + smgrEvent.payload.append ("\"oper\":\"enabled\","); + + smgrEvent.payload.append ("\"avail\":\""); + smgrEvent.payload.append (availStatus); + smgrEvent.payload.append ("\"}"); + + ilog ("%s sending 'unlocked-enabled' request to HA Service Manager\n", + smgrEvent.hostname.c_str()); + } + else if ( operation == CONTROLLER_LOCKED ) + { + operation_string = "locked" ; + smgrEvent.request = SMGR_HOST_LOCKED ; + smgrEvent.operation = "Lock" ; + smgrEvent.payload.append ("\"action\":\"lock\","); + smgrEvent.payload.append ("\"admin\":\"locked\","); + smgrEvent.payload.append ("\"oper\":\"disabled\","); + smgrEvent.payload.append ("\"avail\":\"online\"}"); + + ilog ("%s sending 'locked-disabled' request to HA Service Manager\n", + smgrEvent.hostname.c_str()); + + } + else if ( operation == CONTROLLER_UNLOCKED ) + { + operation_string = "unlocked" ; + smgrEvent.request = SMGR_HOST_UNLOCKED ; + smgrEvent.operation = "Unlock" ; + smgrEvent.payload.append ("\"action\":\"unlock\","); + smgrEvent.payload.append ("\"admin\":\"unlocked\","); + smgrEvent.payload.append ("\"oper\":\"enabled\","); + smgrEvent.payload.append ("\"avail\":\"available\"}"); + + ilog ("%s sending 'unlocked-enabled' request to Service Manager\n", + smgrEvent.hostname.c_str()); + } + else + { + return (FAIL_BAD_CASE); + } + do + { + rc = mtcHttpUtil_api_request ( smgrEvent ) ; + + if ((( operation == CONTROLLER_DISABLED ) || + ( operation == CONTROLLER_LOCKED )) && + ( rc == HTTP_NOTFOUND )) + { + dlog ("%s Service Management (%d)\n", node_ptr->hostname.c_str(), rc ); + rc = PASS ; + } + if ( rc != PASS ) + { + count++ ; + wlog ("%s failed sending '%s' state to SM (rc:%d) ... retrying (cnt:%d)\n", + node_ptr->hostname.c_str(), operation_string.c_str(), + rc, count ); + } + } while ( ( rc != PASS ) && ( count < retries ) ) ; + + if ( rc ) + { + elog ("%s failed sending '%s' state to Service Management failed (%d) ; giving up (cnt:%d)\n", + node_ptr->hostname.c_str(), + operation_string.c_str(), + rc , count ); + } + else + { + ilog ("%s is '%s' to Service Management\n", node_ptr->hostname.c_str(), operation_string.c_str()); + } + return ( rc ); +} + +int mtcSmgrApi_service_state ( libEvent & event , bool & swactable_services ) +{ + int rc = FAIL ; + + swactable_services = false ; + + if ( event.response.empty() ) + { + elog ("%s Query Service State Failed - Empty Response\n", + event.hostname.c_str()); + return (rc); + } + else + { + /* O.K. Lets look at the response - load it into the strings */ + string origin = "" ; + string hn = "" ; + string yesno = "" ; + string origin_key = "origin" ; + string hostname_key = "hostname" ; + string swactable_services_key = "swactable_services" ; + /* + * { + * "origin" : "sm " + * "admin" : "unlocked | locked " don't care + * "oper" : "enabled | disabled " don't care + * "avail" : "any availability state " don't care + * "hostname" : "controller-0 | controller-1" + * "active_services" : "yes | no " + * "swactable_services": "yes | no " + * } + */ + + int rc1 = jsonUtil_get_key_val((char*)event.response.c_str(), + hostname_key, hn ); + int rc2 = jsonUtil_get_key_val((char*)event.response.c_str(), + origin_key, origin ); + int rc3 = jsonUtil_get_key_val((char*)event.response.c_str(), + swactable_services_key, yesno ); + if (( rc1 != PASS ) || + ( rc2 != PASS ) || + ( rc3 != PASS ) ) + { + elog ("%s Query Services Failed - Read Key:Values (hn:%d or:%d yn:%d)\n", + event.hostname.c_str(), + rc1, rc2, rc3 ); + } + else if ( event.hostname.compare (hn) ) + { + elog ("%s Query Services Failed - Wrong Controller (%s)\n", + event.hostname.c_str(), + hn.c_str()); + } + else if ( !yesno.compare("yes") ) + { + swactable_services = true ; + rc = PASS ; + } + else if ( !yesno.compare("no") ) + { + swactable_services = false ; + rc = PASS ; + } + else + { + elog ("%s Query Services Failed - %s service state (%s:%s)\n", + event.hostname.c_str(), + yesno.c_str(), + hn.c_str(), + yesno.c_str()); + } + } + return (rc); +} + +/* The Neutron request handler wrapper abstracted from nodeLinkClass */ +void mtcSmgrApi_Handler ( struct evhttp_request *req, void *arg ) +{ + get_mtcInv_ptr()->mtcSmgrApi_handler ( req , arg ); +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcSmgrApi.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcSmgrApi.h new file mode 100644 index 00000000..27318b9e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcSmgrApi.h @@ -0,0 +1,51 @@ +#ifndef __INCLUDE_MTCSMHAAPI_H__ +#define __INCLUDE_MTCSMHAAPI_H__ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include +#include + +// #include "mtcHttpUtil.h" +#include "httpUtil.h" + + /** + * @file + * Wind River CGTS Platform Controller Maintenance + * + * Service Manager HA API Header + * + * This module offers init and cleanup utils along with + * a single request based utility + * + * int mtcSmhaApi_request ( hostname, operation, retries ) + * + * Operations: + * + * HA Service Manager - Command Descriptions + * ------------------ ------------------------------ + * CONTROLLER_LOCKED - specified controller is locked + * CONTROLLER_UNLOCKED - specified controller is unlocked + * CONTROLLER_DISABLED - specified controller is unlocked-disabled + * CONTROLLER_ENABLED - specified controller is unlocked-enabled + * CONTROLLER_SWACT - swact services away from specified controller + * CONTROLLER_QUERY - query active services on specified controller + * + */ + +#define MTC_SMGR_LABEL "/v1/servicenode/" +#define MTC_SMGR_ADDR "localhost" + +/** Initializes the module */ +int mtcSmgrApi_init ( string ip, int port ); + +/** Frees the module's dynamically allocated resources */ +void mtcSmgrApi_fini ( void ); + +int mtcSmgrApi_service_state ( libEvent & event, bool & active_services ); + +#endif /* __INCLUDE_MTCSMHAAPI_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcSubfHdlrs.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcSubfHdlrs.cpp new file mode 100644 index 00000000..a6e6ed10 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcSubfHdlrs.cpp @@ -0,0 +1,659 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/**************************************************************************** + * @file + * Wind River CGTS Platform Node "Handlers" Implementation + * + * Description: This file contains the handlers that implement the X.731 FSM. + + * Interfaces: + * + * nodeLinkClass::subf_enable_handler + * nodeLinkClass:: disable_handler + * nodeLinkClass:: delete_handler + * nodeLinkClass::degrade_handler + * nodeLinkClass::reset_handler + * nodeLinkClass::event_handler + * nodeLinkClass::recovery_handler + + ****************************************************************************/ + +using namespace std; + +#define __AREA__ "hdl" + +#include "nodeClass.h" /* All base stuff */ +#include "mtcAlarm.h" /* for ... mtcAlarm_ */ +#include "mtcNodeMsg.h" /* for ... send_mtc_cmd */ +#include "nodeTimers.h" /* for ... mtcTimer_start/stop */ +#include "jsonUtil.h" /* for ... jsonApi_array_value */ +#include "mtcNodeHdlrs.h" /* for ... mtcTimer_handler */ +#include "mtcInvApi.h" /* for ... SYSINV API */ +#include "mtcSmgrApi.h" /* for ... SM API */ +#include "mtcVimApi.h" /* for ... VIm API */ + +#include "daemon_common.h" + +int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + /* Setup the log prefix */ + string name = node_ptr->hostname ; + name.append("-compute"); + + bool simplex = false ; + if (( SIMPLEX ) || + (( THIS_HOST ) && + (( this->is_inactive_controller_main_insv() == false ) || + ( this->is_inactive_controller_subf_insv() == false )))) + { + simplex = true ; + } + + switch ( (int)node_ptr->handlerStage.enable ) + { + case MTC_ENABLE__FAILURE_WAIT: + { + if ( node_ptr->mtcTimer.ring == true ) + { + wlog ("%s workQueue empty timeout, purging ...\n", name.c_str()); + } + else + { + /* give the work queues some time to complete */ + rc = workQueue_done ( node_ptr ); + if ( rc == RETRY ) + { + /* wait longer */ + break ; + } + } + workQueue_purge ( node_ptr ); + doneQueue_purge ( node_ptr ); + + enableStageChange ( node_ptr, MTC_ENABLE__START ); + + /* avoid failing this controller if there is no inactive to + * take over and avoid thrashing back and forth if the sub + * function on the inactive is disabled */ + if ( simplex ) + { + /* if autorecovery is enabled then handle it that way. */ + if ( this->autorecovery_enabled == true ) + { + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + enableStageChange ( node_ptr, MTC_ENABLE__START ); + + manage_autorecovery ( node_ptr ); + } + + wlog ("%s is ENABLED-degraded (failed subfunction)\n", name.c_str()); + } + else + { + /* if there is another controller enabled then just force a full enable of this one */ + force_full_enable ( node_ptr ) ; + } + break ; + } + + case MTC_ENABLE__START: + { + plog ("%s Subf Enable FSM (from start)\n", name.c_str()); + + node_ptr->unknown_health_reported = false ; + node_ptr->goEnabled_failed_subf = false ; + + /* load compute subfunciton alarm state */ + EFmAlarmSeverityT sev = mtcAlarm_state ( node_ptr->hostname, + MTC_ALARM_ID__CH_COMP); + if ( sev != FM_ALARM_SEVERITY_CLEAR ) + { + node_ptr->alarms[MTC_ALARM_ID__CH_COMP] = sev ; + node_ptr->degrade_mask |= DEGRADE_MASK_SUBF; + } + + /* start a timer that waits for the /var/run/.compute_config_complete flag */ + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_COMPUTE_CONFIG_TIMEOUT ); + + enableStageChange ( node_ptr, MTC_ENABLE__CONFIG_COMPLETE_WAIT ); + break ; + } + + /* Wait for the CONFIG_COMPLETE_COMPUTE flag file that indicates + * that the compute part of the combo-blade init is finished */ + case MTC_ENABLE__CONFIG_COMPLETE_WAIT: + { + /* look for file */ + if ( node_ptr->mtce_flags & MTC_FLAG__SUBF_CONFIGURED ) + { + mtcTimer_reset (node_ptr->mtcTimer); + plog ("%s Subf Configured OK\n", name.c_str()); + enableStageChange ( node_ptr, MTC_ENABLE__GOENABLED_TIMER ); + alarm_config_clear ( node_ptr ); + } + else if ( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY ) + { + mtcTimer_reset (node_ptr->mtcTimer); + elog ("%s configuration failed (oob:%x:%x)\n", + name.c_str(), + node_ptr->mtce_flags, + MTC_FLAG__I_AM_NOT_HEALTHY); + + alarm_config_failure ( node_ptr ); + + if ( simplex ) + mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_FAIL_ ); + else + mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_FAIL ); + + enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED ); + } + + /* timeout handling */ + else if ( node_ptr->mtcTimer.ring == true ) + { + elog ("%s configuration timeout (%d secs)\n", + name.c_str(), + MTC_COMPUTE_CONFIG_TIMEOUT ); + + alarm_config_failure ( node_ptr ); + + if ( simplex ) + mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_TO_ ); + else + mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_TO ); + + enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED ); + } + else + { + ; /* wait longer */ + } + break ; + } + + case MTC_ENABLE__GOENABLED_TIMER: + { + /***************************************************************** + * + * issue: subfunction go-enable patching script fails and + * maintenance reboots the active controller when no-reboot + * patching maintenance in CPE. + * + * The fix is to avoid running the subfunction go-enabled tests + * on self while patching. + * + ****************************************************************/ + if (( THIS_HOST ) && + (( daemon_is_file_present ( PATCHING_IN_PROG_FILE )) || + ( daemon_is_file_present ( NODE_IS_PATCHED_FILE )))) + { + ilog ("%s skipping out-of-service tests while self patching\n", name.c_str()); + + /* set the goenabled complete flag */ + daemon_log ( GOENABLED_SUBF_PASS, "out-of-service tests skipped due to patching"); + node_ptr->goEnabled_failed_subf = false ; + + alarm_compute_clear ( node_ptr, true ); + + /* ok. great, got the go-enabled message, lets move on */ + enableStageChange ( node_ptr, MTC_ENABLE__HOST_SERVICES_START ); + break ; + } + ilog ("%s running out-of-service tests\n", name.c_str()); + + /* See if the host is there and already in the go enabled state */ + send_mtc_cmd ( node_ptr->hostname, MTC_REQ_SUBF_GOENABLED, MGMNT_INTERFACE ); + + /* start the reboot timer - is cought in the mtc alive case */ + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, this->goenabled_timeout ); + + /* start waiting fhr the ENABLE READY message */ + enableStageChange ( node_ptr, MTC_ENABLE__GOENABLED_WAIT ); + + node_ptr->goEnabled_subf = false ; + node_ptr->goEnabled_failed_subf = false ; + + break ; + } + case MTC_ENABLE__GOENABLED_WAIT: + { + /* search for the Go Enable message */ + if ( node_ptr->goEnabled_failed_subf == true ) + { + mtcTimer_reset ( node_ptr->mtcTimer ); + elog ("%s one or more out-of-service tests failed\n", name.c_str()); + + mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_INTEST_FAIL_ : MTC_TASK_INTEST_FAIL ); + + /* Need thresholded auto recovery for this failure mode */ + if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX ) + this->autorecovery_enabled = true ; + + enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED ); + } + + /* search for the Go Enable message */ + else if ( node_ptr->goEnabled_subf == true ) + { + mtcTimer_reset ( node_ptr->mtcTimer ); + + alarm_enabled_clear ( node_ptr, false ); + + plog ("%s passed out-of-service tests\n", name.c_str()); + + /* O.K. clearing the state now that we got it */ + // node_ptr->goEnabled_subf = true ; + node_ptr->goEnabled_failed_subf = false ; + + /* ok. great, got the go-enabled message, lets move on */ + + if ( node_ptr->start_services_needed_subf == true ) + { + /* If the add_handler set start_services_needed_subf to + * true then we bypass inline execution and allow it to + * be serviced as a scheduled background operation. */ + enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_CHECK ); + } + else + { + enableStageChange ( node_ptr, MTC_ENABLE__HOST_SERVICES_START ); + } + break ; + } + + else if ( node_ptr->mtcTimer.ring == true ) + { + elog ("%s out-of-service test execution timeout\n", name.c_str()); + + mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_INTEST_FAIL_TO_ : MTC_TASK_INTEST_FAIL_TO ); + + /* Need thresholded auto recovery for this failure mode */ + if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX ) + this->autorecovery_enabled = true ; + + enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED ); + } + else + { + ; /* wait some more */ + } + break ; + } + case MTC_ENABLE__HOST_SERVICES_START: + { + bool start = true ; + bool subf = true ; + + plog ("%s %s host services\n", + name.c_str(), + node_ptr->start_services_needed_subf ? "scheduling start compute" : + "starting compute"); + + if ( node_ptr->start_services_needed_subf == true ) + { + bool force = true ; + + /* If the add_handler set start_services_needed_subf to + * true then we bypass inline execution and allow it to + * be serviced as a scheduled background operation. */ + enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_CHECK ); + alarm_compute_clear ( node_ptr, force ); + } + + else if ( launch_host_services_cmd ( node_ptr, start, subf ) != PASS ) + { + node_ptr->hostservices_failed_subf = true ; + + wlog ("%s %s failed ; launch\n", + name.c_str(), + node_ptr->host_services_req.name.c_str()); + + /* Need thresholded auto recovery for this failure mode */ + if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX ) + this->autorecovery_enabled = true ; + + enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED ); + } + else + { + enableStageChange ( node_ptr, MTC_ENABLE__HOST_SERVICES_WAIT ); + } + break ; + } + + case MTC_ENABLE__HOST_SERVICES_WAIT: + { + /* Wait for host services to complete - pass or fail. + * The host_services_handler manages timeout. */ + rc = host_services_handler ( node_ptr ); + if ( rc == RETRY ) + { + /* wait for the mtcClient's response ... */ + break ; + } + else if ( rc != PASS ) + { + /* Need thresholded auto recovery for this failure mode */ + if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX ) + this->autorecovery_enabled = true ; + + node_ptr->hostservices_failed_subf = true ; + if ( rc == FAIL_TIMEOUT ) + { + elog ("%s %s failed ; timeout\n", + name.c_str(), + node_ptr->host_services_req.name.c_str()); + + /* Report "Enabling Compute Service Timeout" to sysinv/horizon */ + mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLING_SUBF_TO ); + } + else + { + elog ("%s %s failed ; rc:%d\n", + name.c_str(), + node_ptr->host_services_req.name.c_str(), + rc); + + /* Report "Enabling Compute Service Failed" to sysinv/horizon */ + mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLING_SUBF_FAIL ); + } + enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED ); + } + else /* success path */ + { + alarm_compute_clear ( node_ptr, true ); + node_ptr->hostservices_failed_subf = false ; + enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_CHECK ); + } + break ; + } + case MTC_ENABLE__HEARTBEAT_CHECK: + { + if ( THIS_HOST ) + { + enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE ); + } + else + { + /* allow the fsm to wait for up to 1 minute for the + * hbsClient's ready event before starting heartberat + * test. */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_1 ); + enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_WAIT ); + } + + break ; + } + case MTC_ENABLE__HEARTBEAT_WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcTimer ) ) + { + wlog ("%s hbsClient ready event timeout\n", name.c_str()); + } + else if ( node_ptr->hbsClient_ready == false ) + { + break ; + } + else + { + mtcTimer_reset ( node_ptr->mtcTimer ); + } + + plog ("%s Starting %d sec Heartbeat Soak (with%s)\n", + name.c_str(), + MTC_HEARTBEAT_SOAK_BEFORE_ENABLE, + node_ptr->hbsClient_ready ? " ready event" : "out ready event" ); + + /* Start Monitoring Services - heartbeat, process and hardware */ + send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST ); + + /* allow heartbeat to run for 10 seconds before we declare enable */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_HEARTBEAT_SOAK_BEFORE_ENABLE ); + enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_SOAK ); + break ; + } + case MTC_ENABLE__HEARTBEAT_SOAK: + { + if ( node_ptr->mtcTimer.ring == true ) + { + plog ("%s heartbeating\n", name.c_str() ); + /* if heartbeat is not working then we will + * never get here and enable the host */ + enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE ); + } + break ; + } + case MTC_ENABLE__STATE_CHANGE: + { + /* Check the work queue complete and done status's */ + mtcInvApi_force_task ( node_ptr, "" ); + + /* Now that we have posted the unlocked-enabled-available state we need + * to force the final part of the enable sequence through */ + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) + { + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__ENABLE ); + } + enableStageChange ( node_ptr, MTC_ENABLE__WORKQUEUE_WAIT ); + + /* Start a timer that failed enable if the work queue + * does not empty or if commands in the done queue have failed */ + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, work_queue_timeout ); + + break ; + } + case MTC_ENABLE__WORKQUEUE_WAIT: + { + bool fail = false ; + rc = workQueue_done ( node_ptr ); + if ( rc == RETRY ) + { + /* wait longer */ + break ; + } + else if ( rc == FAIL_WORKQ_TIMEOUT ) + { + elog ("%s enable failed ; Enable workQueue timeout, purging ...\n", name.c_str()); + + mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_ENABLE_WORK_TO_ : MTC_TASK_ENABLE_WORK_TO ); + + fail = true ; + } + else if ( rc != PASS ) + { + elog ("%s enable failed ; Enable doneQueue has failed commands\n", name.c_str()); + + mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_ENABLE_WORK_FAIL_ : MTC_TASK_ENABLE_WORK_FAIL ); + + fail = true ; + } + else if ( this->system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX ) + { + /* Loop over the heartbeat interfaces and fail the Enable if any of them are failing */ + for ( int i = 0 ; i < MAX_IFACES ; i++ ) + { + if ( node_ptr->heartbeat_failed[i] == true ) + { + elog ("%s Enable failure due to %s Network *** Heartbeat Loss ***\n", + name.c_str(), + get_iface_name_str ((iface_enum)i)); + + mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLE_FAIL_HB ); + fail = true ; + } + } + } + if ( fail == true ) + { + enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED ); + break ; + } + else + { + if ( node_ptr->dor_recovery_mode || node_ptr->was_dor_recovery_mode ) + { + node_ptr->dor_recovery_mode = false ; + node_ptr->was_dor_recovery_mode = true ; + } + + if (( node_ptr->alarms[MTC_ALARM_ID__CH_COMP] != FM_ALARM_SEVERITY_CLEAR ) || + ( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_CLEAR ) || + ( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CLEAR )) + { + wlog ("%s enable to degraded migration due to alarm [%d:%d:%d]\n", + name.c_str(), + node_ptr->alarms[MTC_ALARM_ID__CH_COMP], + node_ptr->alarms[MTC_ALARM_ID__ENABLE], + node_ptr->alarms[MTC_ALARM_ID__CONFIG] ); + + enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED ); + } + else if ( node_ptr->degrade_mask ) + { + enableStageChange ( node_ptr, MTC_ENABLE__DEGRADED ); + } + else + { + enableStageChange ( node_ptr, MTC_ENABLE__ENABLED ); + } + } + break ; + } + case MTC_ENABLE__ENABLED: + { + bool force = true ; + + /* Set node as unlocked-enabled */ + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__AVAILABLE ); + + subfStateChange ( node_ptr, MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__AVAILABLE ); + + node_ptr->subf_enabled = true ; + node_ptr->inservice_failed_subf = false ; + if ( node_ptr->was_dor_recovery_mode ) + { + report_dor_recovery ( node_ptr , "is ENABLED" ); + } + else + { + plog ("%s is ENABLED\n", name.c_str()); + } + + /* already cleared if true so no need to do it again */ + if ( node_ptr->start_services_needed_subf != true ) + { + alarm_compute_clear ( node_ptr, force ); + } + + enableStageChange ( node_ptr, MTC_ENABLE__DONE ); + + break ; + } + /* Allow the host to come up in the degraded state */ + case MTC_ENABLE__DEGRADED: + { + if ( node_ptr->alarms[MTC_ALARM_ID__CH_COMP] == FM_ALARM_SEVERITY_CLEAR ) + { + subfStateChange ( node_ptr, MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__AVAILABLE ); + } + else + { + subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED, + MTC_AVAIL_STATUS__FAILED ); + } + + /* Set node as unlocked-enabled */ + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__DEGRADED ); + + if ( node_ptr->was_dor_recovery_mode ) + { + report_dor_recovery ( node_ptr , "is ENABLED-degraded" ); + } + else + { + wlog ("%s is ENABLED-degraded\n", name.c_str()); + } + enableStageChange ( node_ptr, MTC_ENABLE__DONE ); + + break ; + } + /* Allow the host to come up in the degraded state */ + case MTC_ENABLE__SUBF_FAILED: + { + subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED, + MTC_AVAIL_STATUS__FAILED ); + + /* Set node as unlocked-enabled */ + allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED, + MTC_OPER_STATE__ENABLED, + MTC_AVAIL_STATUS__DEGRADED ); + + if ( node_ptr->was_dor_recovery_mode ) + { + report_dor_recovery ( node_ptr , "is DISABLED-failed" ); + } + else + { + elog ("%s is DISABLED-failed (subfunction failed)\n", + name.c_str() ); + } + this->dor_mode_active = false ; + + alarm_compute_failure ( node_ptr , FM_ALARM_SEVERITY_CRITICAL ) ; + + /* Start a timer that failed enable if the work queue + * does not empty or if commands in the done queue have failed */ + mtcTimer_reset ( node_ptr->mtcTimer ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, work_queue_timeout ); + enableStageChange ( node_ptr, MTC_ENABLE__FAILURE_WAIT ); + + break ; + } + case MTC_ENABLE__DONE: + { + mtcTimer_reset ( node_ptr->mtcTimer ); + + /* Override cmd of ENABLED if action is UNLOCK */ + mtc_cmd_enum cmd = CONTROLLER_ENABLED ; + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__UNLOCK ) + { + cmd = CONTROLLER_UNLOCKED ; + } + + mtcSmgrApi_request ( node_ptr, cmd, SMGR_MAX_RETRIES ); + mtcVimApi_state_change ( node_ptr, VIM_HOST_ENABLED, 3 ); + + adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE ); + enableStageChange ( node_ptr, MTC_ENABLE__START ); + + node_ptr->enabled_count++ ; + node_ptr->health_threshold_counter = 0 ; + + node_ptr->was_dor_recovery_mode = false ; + node_ptr->dor_recovery_mode = false ; + this->dor_mode_active = false ; + + mtcInvApi_force_task ( node_ptr, "" ); + break ; + } + default: + rc = FAIL_BAD_CASE ; + } + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcThreads.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcThreads.cpp new file mode 100644 index 00000000..0e7c4acd --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcThreads.cpp @@ -0,0 +1,385 @@ + +/* + * Copyright (c) 2016-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Node Maintenance - Threading implementation" + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define gettid() syscall(SYS_gettid) + +using namespace std; + +#include "daemon_common.h" + +#include "nodeBase.h" +#include "hostUtil.h" /* for ... hostUtil_mktmpfile */ +#include "nodeUtil.h" +#include "threadUtil.h" +#include "ipmiUtil.h" /* for ... IPMITOOL_CMD_FILE_SUFFIX ... */ +#include "mtcThreads.h" /* for ... IPMITOOL_THREAD_CMD__RESET ... */ + +void * mtcThread_ipmitool ( void * arg ) +{ + thread_info_type * info_ptr ; + thread_extra_info_type * extra_ptr ; + + /* Pointer Error Detection and Handling */ + if ( !arg ) + { + slog ("*** ipmitool thread called with null arg pointer *** corruption\n"); + return NULL ; + } + + /* cast pointers from arg */ + info_ptr = (thread_info_type*)arg ; + extra_ptr = (thread_extra_info_type*)info_ptr->extra_info_ptr ; + + info_ptr->pw_file_fd = 0 ; + + /* Set cancellation option so that a delete operation can + * kill this thread immediately */ + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL ); + + if ( daemon_want_fit ( FIT_CODE__DO_NOTHING_THREAD, info_ptr->hostname )) + { + info_ptr->progress++ ; + info_ptr->runcount++ ; + pthread_exit (&info_ptr->status ); + return NULL ; + } + + /* allow the parent to confirm thread id */ + info_ptr->id = pthread_self() ; + if ( extra_ptr != NULL ) + { + int rc = PASS ; + string command = "" ; + string response = "" ; + + switch ( info_ptr->command ) + { + case IPMITOOL_THREAD_CMD__POWER_RESET: + { + command = IPMITOOL_POWER_RESET_CMD ; + response = IPMITOOL_POWER_RESET_RESP ; + break ; + } + case IPMITOOL_THREAD_CMD__POWER_ON: + { + command = IPMITOOL_POWER_ON_CMD ; + response = IPMITOOL_POWER_ON_RESP ; + break ; + } + case IPMITOOL_THREAD_CMD__POWER_OFF: + { + command = IPMITOOL_POWER_OFF_CMD ; + response = IPMITOOL_POWER_OFF_RESP ; + break ; + } + case IPMITOOL_THREAD_CMD__POWER_CYCLE: + { + command = IPMITOOL_POWER_CYCLE_CMD ; + response = IPMITOOL_POWER_CYCLE_RESP ; + break ; + } + case IPMITOOL_THREAD_CMD__POWER_STATUS: + { + command = IPMITOOL_POWER_STATUS_CMD ; + break ; + } + case IPMITOOL_THREAD_CMD__RESTART_CAUSE: + { + command = IPMITOOL_RESTART_CAUSE_CMD ; + break ; + } + case IPMITOOL_THREAD_CMD__MC_INFO: + { + command = IPMITOOL_MC_INFO_CMD ; + break ; + } + default: + { + rc = info_ptr->status = FAIL_BAD_CASE ; + info_ptr->data = "unsupported command: " ; + info_ptr->data.append(itos(info_ptr->command)); + break ; + } + } + + if ( rc == PASS ) + { + bool bypass_ipmitool_request = false ; + + dlog_t ("%s '%s' command\n", info_ptr->log_prefix, command.c_str()); + + /* create the password file */ + string password_tempfile = IPMITOOL_OUTPUT_DIR ; + password_tempfile.append(".") ; + password_tempfile.append(program_invocation_short_name); + password_tempfile.append("-"); + password_tempfile.append(info_ptr->hostname); + password_tempfile.append("-"); + + info_ptr->pw_file_fd = hostUtil_mktmpfile (info_ptr->hostname, + password_tempfile, + info_ptr->password_file, + extra_ptr->bm_pw ); + + if ( info_ptr->pw_file_fd <= 0 ) + { + info_ptr->status_string = "failed to get an open temporary password filedesc" ; + info_ptr->status = FAIL_FILE_CREATE ; + goto ipmitool_thread_done ; + } + + if ( info_ptr->pw_file_fd > 0) + close (info_ptr->pw_file_fd); + info_ptr->pw_file_fd = 0 ; + + if ( info_ptr->password_file.empty() ) + { + info_ptr->status_string = "failed to get a temporary password filename" ; + info_ptr->status = FAIL_FILE_CREATE ; + goto ipmitool_thread_done ; + } + + dlog_t ("%s password file: %s\n", info_ptr->log_prefix, info_ptr->password_file.c_str()); + + + /* create the output filename */ + string ipmitool_datafile = IPMITOOL_OUTPUT_DIR ; + ipmitool_datafile.append(info_ptr->hostname); + + if ( info_ptr->command == IPMITOOL_THREAD_CMD__MC_INFO ) + { + ipmitool_datafile.append(IPMITOOL_MC_INFO_FILE_SUFFIX); + } + else if ( info_ptr->command == IPMITOOL_THREAD_CMD__RESTART_CAUSE ) + { + ipmitool_datafile.append(IPMITOOL_RESTART_CAUSE_FILE_SUFFIX); + } + else if ( info_ptr->command == IPMITOOL_THREAD_CMD__POWER_STATUS ) + { + ipmitool_datafile.append(IPMITOOL_POWER_STATUS_FILE_SUFFIX); + } + else + { + ipmitool_datafile.append(IPMITOOL_CMD_FILE_SUFFIX); + } + + dlog_t ("%s datafile:%s\n", info_ptr->hostname.c_str(), ipmitool_datafile.c_str()); + + /************** Create the ipmitool request **************/ + string ipmitool_request = + ipmiUtil_create_request ( command, + extra_ptr->bm_ip, + extra_ptr->bm_un, + info_ptr->password_file, + ipmitool_datafile ); + + + if ( daemon_is_file_present ( MTC_CMD_FIT__DIR ) == true ) + { + if (( command == IPMITOOL_MC_INFO_CMD ) && + ( daemon_is_file_present ( MTC_CMD_FIT__MC_INFO ))) + { + bypass_ipmitool_request = true ; + rc = PASS ; + } + else if (( command == IPMITOOL_POWER_STATUS_CMD ) && + ( daemon_is_file_present ( MTC_CMD_FIT__POWER_STATUS ))) + { + bypass_ipmitool_request = true ; + rc = PASS ; + } + else if (( command == IPMITOOL_RESTART_CAUSE_CMD ) && + ( daemon_is_file_present ( MTC_CMD_FIT__RESTART_CAUSE ))) + { + bypass_ipmitool_request = true ; + rc = PASS ; + } + else if ((( command == IPMITOOL_POWER_RESET_CMD ) || + ( command == IPMITOOL_POWER_OFF_CMD ) || + ( command == IPMITOOL_POWER_ON_CMD ) || + ( command == IPMITOOL_POWER_CYCLE_CMD )) && + ( daemon_is_file_present ( MTC_CMD_FIT__POWER_CMD ))) + { + bypass_ipmitool_request = true ; + rc = PASS ; + } + else if ( daemon_want_fit ( FIT_CODE__AVOID_N_FAIL_IPMITOOL_REQUEST, info_ptr->hostname )) + { + slog ("%s FIT FIT_CODE__AVOID_N_FAIL_IPMITOOL_REQUEST\n", info_ptr->hostname.c_str()); + bypass_ipmitool_request = true ; + rc = FAIL_FIT ; + } + else if ( daemon_want_fit ( FIT_CODE__STRESS_THREAD, info_ptr->hostname )) + { + slog ("%s FIT FIT_CODE__STRESS_THREAD\n", info_ptr->hostname.c_str()); + bypass_ipmitool_request = true ; + rc = PASS ; + } + } + + dlog_t ("%s %s", info_ptr->hostname.c_str(), ipmitool_request.c_str()); /* ERIC */ + + if ( ! bypass_ipmitool_request ) + { + daemon_remove_file ( ipmitool_datafile.data() ) ; + + nodeUtil_latency_log ( info_ptr->hostname, NODEUTIL_LATENCY_MON_START, 0 ); + rc = system ( ipmitool_request.data()) ; + if ( rc != PASS ) + { + wlog_t ("%s ipmitool system call failed (%d:%d:%m)\n", info_ptr->hostname.c_str(), rc, errno ); + } + nodeUtil_latency_log ( info_ptr->hostname, "ipmitool system call", 1000 ); + } + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__THREAD_TIMEOUT, info_ptr->hostname ) ) + { + for ( ; ; ) + { + sleep (1) ; + pthread_signal_handler ( info_ptr ); + } + } + if ( daemon_want_fit ( FIT_CODE__THREAD_SEGFAULT, info_ptr->hostname ) ) + { + daemon_do_segfault(); + } +#endif + /* clean-up */ + if ( info_ptr->pw_file_fd > 0 ) + close(info_ptr->pw_file_fd); + info_ptr->pw_file_fd = 0 ; + + unlink(info_ptr->password_file.data()); + daemon_remove_file ( info_ptr->password_file.data() ) ; + info_ptr->password_file.clear(); + + if ( rc != PASS ) + { + info_ptr->status_string = "failed ipmitool command : " ; + info_ptr->status_string.append(getIpmiCmd_str(info_ptr->command)); + info_ptr->status = FAIL_SYSTEM_CALL ; + + if ( ipmitool_request.length () ) + { + string _temp = ipmitool_request ; + size_t pos1 = _temp.find ("-f", 0) ; + size_t pos2 = _temp.find (" > ", 0) ; + + if (( pos1 != std::string::npos ) && ( pos2 != std::string::npos )) + { + /* don't log the password filename */ + wlog_t ("%s ... %s%s\n", + info_ptr->hostname.c_str(), + _temp.substr(0,pos1).c_str(), + _temp.substr(pos2).c_str()); + } + else + { + wlog_t ("%s ... %s\n", + info_ptr->hostname.c_str(), + ipmitool_request.c_str()); + } + } + } + else + { + bool ipmitool_datafile_present = false ; + + /* look for the output data file */ + for ( int i = 0 ; i < 10 ; i++ ) + { + pthread_signal_handler ( info_ptr ); + if ( daemon_is_file_present ( ipmitool_datafile.data() )) + { + ipmitool_datafile_present = true ; + break ; + } + info_ptr->progress++ ; + sleep (1); + } + + if ( ipmitool_datafile_present ) + { + if ( info_ptr->command == IPMITOOL_THREAD_CMD__MC_INFO ) + { + /* tell the main process the name of the file containing the mc info data */ + info_ptr->data = ipmitool_datafile ; + info_ptr->status_string = "pass" ; + info_ptr->status = PASS ; + } + else + { + info_ptr->data = daemon_read_file (ipmitool_datafile.data()) ; + info_ptr->status_string = "pass" ; + info_ptr->status = PASS ; + } + } + else + { + info_ptr->status_string = "command did not produce output file ; timeout" ; + info_ptr->status = FAIL_FILE_ACCESS ; + } + } + } + } + else + { + info_ptr->status_string = "null 'extra info' pointer" ; + info_ptr->status = FAIL_NULL_POINTER ; + goto ipmitool_thread_done ; + } + +ipmitool_thread_done: + + if ( info_ptr->pw_file_fd > 0) + close (info_ptr->pw_file_fd); + info_ptr->pw_file_fd = 0 ; + + if ( ! info_ptr->password_file.empty() ) + { + unlink(info_ptr->password_file.data()); + daemon_remove_file ( info_ptr->password_file.data() ) ; + info_ptr->password_file.clear(); + } + + pthread_signal_handler ( info_ptr ); + + if ( info_ptr->status ) + { + dlog_t ("%s exit with (rc:%d)\n", + info_ptr->log_prefix, + info_ptr->status); + } + + info_ptr->progress++ ; + info_ptr->runcount++ ; + pthread_exit (&info_ptr->status ); + return NULL ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcThreads.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcThreads.h new file mode 100644 index 00000000..e3ebd93c --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcThreads.h @@ -0,0 +1,29 @@ +#ifndef __INCLUDE_MTCTHREAD_HH__ +#define __INCLUDE_MTCTHREAD_HH__ + +/* + * Copyright (c) 2013-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Node Maintenance "Thread Header" + * Header and Maintenance API + */ + +typedef struct +{ + string bm_ip ; + string bm_un ; + string bm_pw ; + string bm_type ; + +} thread_extra_info_type ; + +void * mtcThread_ipmitool ( void * ); +void * mtcThread_ipmitool_test ( void * arg ); + +#endif // __INCLUDE_MTCTHREAD_HH__ diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcVimApi.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcVimApi.cpp new file mode 100644 index 00000000..761d8e3b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcVimApi.cpp @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Controller Maintenance + * Access to Service Manager via REST API Interface. + * + */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "vim" + +using namespace std; + +#include "nodeBase.h" /* for ... common definitions */ +#include "nodeUtil.h" /* for ... common utilities */ +#include "nodeClass.h" /* for ... maintenance class nodeLinkClass */ +#include "mtcVimApi.h" /* for ... this module header */ +#include "jsonUtil.h" /* for ... jsonUtil_get_key_val */ + +/*******************************************/ +/* Internal Private utilities and handlers */ +/*******************************************/ + +/* The handles the inventory PATCH request's response message */ +int mtcVimApi_handler ( libEvent & event ) +{ + jlog ("%s Response:%s\n", event.log_prefix.c_str(), event.response.c_str()); + + return (PASS); +} + +string nodeLinkClass::mtcVimApi_state_get ( string hostname, int & http_status_code ) +{ + string payload = "" ; + + /* payload + * { + * { "availability":"", + * "operational":"", + * "administrative":"", + * "data_ports_oper:"", + * "data_ports_avail:"", + * "subfunction_oper:"", + * "subfunction_avail:"", + * }, + * "uuid":"", + * "hostname":"", + * "subfunctions":"", + * "personality":"" + * } + **/ + + nodeLinkClass::node * node_ptr = getNode ( hostname ) ; + if ( !node_ptr ) + { + payload.append (" \"status\" : \"fail\""); + payload.append (",\"reason\" : \"not found\""); + payload.append (",\"action\" : \"undetermined\""); + payload.append ("}"); + + http_status_code = HTTP_NOTFOUND ; + return ( payload ); + } + payload = ("{\"") ; + payload.append (MTC_JSON_INV_ADMIN); + payload.append ("\":\""); + payload.append (adminState_enum_to_str(node_ptr->adminState)); + + payload.append ("\",\""); + + payload.append (MTC_JSON_INV_OPER); + payload.append ("\":\""); + payload.append (operState_enum_to_str(node_ptr->operState )); + + payload.append ("\",\""); + + payload.append (MTC_JSON_INV_AVAIL); + payload.append ("\":\""); + payload.append (availStatus_enum_to_str(node_ptr->availStatus )); + payload.append ("\","); + + /* Only add these feilds if we have received messages from the vswitch + * The avail state is defaulted to offline */ + if ( node_ptr->availStatus_dport != MTC_AVAIL_STATUS__OFFDUTY ) + { + payload.append ("\"data_ports_oper\":\""); + payload.append (operState_enum_to_str(node_ptr->operState_dport)); + payload.append ("\","); + + payload.append ("\"data_ports_avail\":\""); + payload.append (availStatus_enum_to_str(node_ptr->availStatus_dport)); + payload.append ("\","); + } + + payload.append ("\"subfunction_oper\":\""); + payload.append (operState_enum_to_str(node_ptr->operState_subf)); + payload.append ("\","); + + payload.append ("\"subfunction_avail\":\""); + payload.append (availStatus_enum_to_str(node_ptr->availStatus_subf)); + payload.append ("\"},"); + + payload.append ("\"hostname\":\""); + payload.append (hostname); + payload.append ("\","); + + payload.append ("\"uuid\":\""); + payload.append (node_ptr->uuid); + payload.append ("\","); + + payload.append ("\"subfunctions\":\""); + payload.append (node_ptr->functions); + payload.append ("\","); + + payload.append ("\"personality\":\""); + payload.append (node_ptr->function_str); + payload.append ("\"}"); + + return (payload); +} + +/* curl -i -X GET -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/systems */ + +int nodeLinkClass::mtcVimApi_system_info ( string & response ) +{ + int http_status_code = HTTP_OK ; + if ( hosts ) + response = "{\"hosts\":[" ; + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + dlog ("%s %6d %s\n", ptr->uuid.c_str(), ptr->uptime, ptr->hostname.c_str() ); + + if ( ptr->add_completed == true ) + { + response.append("{\"uuid\":\""); + response.append(ptr->uuid); + response.append("\",\"hostname\":\""); + response.append(ptr->hostname); + response.append("\",\"uptime\":"); + response.append(itos(ptr->uptime)); + response.append("}"); + if (( ptr->next != NULL ) && ( ptr != tail )) + { + response.append(","); + } + } + else + { + http_status_code = MTC_HTTP_ACCEPTED ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + { + break ; + } + } + response.append("]}"); + jlog ("Response: %s", response.c_str() ); + return (http_status_code); +} + +/** + * Name : mtcVimApi_state_change + * + * Description: Submit any of the following requests to + * the VIM against the specified hostname. + * + * Operations: + * + * VIM Commands - Descriptions + * ------------- ------------------------------ + * HOST_DISABLE - Inform VIM that this host is now Mtce-Disabled + * HOST_ENABLE - Inform VIM that this host is now Mtce-Enabled + * HOST_FAIL - Inform VIM that this host has failed and + * undergoing auto recovery + * + */ +int nodeLinkClass::mtcVimApi_state_change ( struct nodeLinkClass::node * node_ptr, + libEvent_enum request, + int retries ) +{ + int http_status_code = HTTP_OK ; + string type ="host" ; + mtcHttpUtil_event_init ( &node_ptr->httpReq, + node_ptr->hostname, + "mtcVimApi_state_change", + hostUtil_getServiceIp ( SERVICE_VIM ), + hostUtil_getServicePort ( SERVICE_VIM )); + + /* Set the host context */ + node_ptr->httpReq.hostname = node_ptr->hostname ; + node_ptr->httpReq.uuid = node_ptr->uuid; + node_ptr->httpReq.cur_retries = 0 ; + node_ptr->httpReq.max_retries = retries ; + node_ptr->httpReq.active = true ; + node_ptr->httpReq.noncritical = false ; + switch ( request ) + { + case VIM_HOST_DISABLED: + node_ptr->httpReq.request = request ; + node_ptr->httpReq.operation = VIM_HOST__DISABLED ; + break ; + case VIM_HOST_ENABLED: + node_ptr->httpReq.request = request ; + node_ptr->httpReq.operation = VIM_HOST__ENABLED ; + break ; + case VIM_HOST_OFFLINE: + node_ptr->httpReq.request = request ; + node_ptr->httpReq.operation = VIM_HOST__OFFLINE; + break ; + case VIM_HOST_FAILED: + node_ptr->httpReq.request = request ; + node_ptr->httpReq.operation = VIM_HOST__FAILED ; + break ; + case VIM_DPORT_OFFLINE: + type = "data port"; + node_ptr->httpReq.request = request ; + node_ptr->httpReq.operation = "offline" ; + break ; + case VIM_DPORT_CLEARED: + type = "data port"; + node_ptr->httpReq.request = request ; + node_ptr->httpReq.operation = "clear" ; + break ; + case VIM_DPORT_DEGRADED: + type = "data port"; + node_ptr->httpReq.request = request ; + node_ptr->httpReq.operation = "major" ; + break ; + case VIM_DPORT_FAILED: + type = "data port"; + node_ptr->httpReq.request = request ; + node_ptr->httpReq.operation = "critical" ; + break ; + default: + return (FAIL_BAD_PARM) ; + } + + node_ptr->httpReq.payload = "{\"state-change\": " ; + node_ptr->httpReq.payload.append (mtcVimApi_state_get ( node_ptr->hostname , http_status_code )); + + if (( request == VIM_HOST_FAILED ) || ( request == VIM_DPORT_FAILED )) + { + elog ("%s %s\n", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str()); + } + else + { + ilog ("%s sending '%s' state change to vim (%s)", + node_ptr->hostname.c_str(), + type.c_str(), + node_ptr->httpReq.operation.c_str()); + dlog ("%s %s\n", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str()); + } + + return(workQueue_enqueue( node_ptr->httpReq)); +} diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcVimApi.h b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcVimApi.h new file mode 100644 index 00000000..7382f483 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcVimApi.h @@ -0,0 +1,49 @@ +#ifndef __INCLUDE_MTCVIMAPI_H__ +#define __INCLUDE_MTCVIMAPI_H__ +/* + * Copyright (c) 2013, 2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include +#include + +#include "mtcHttpUtil.h" + + /** + * @file + * Wind River CGTS Platform Maintenance VIM Support + * + * Virtual Infrastructure Manager (VIM) API Header + * + * This module offers init and cleanup utils along with + * a single request based utility to inform VIM of Host Failures and when + * mtce transitions a host into the Enabled or Disabled State due to Sysinv + * Lock and Unlock requests that complete. + * + * int mtcVimApi_state_change ( node_ptr, operation, retries ) + * + * Operations: + * + * VIM Commands - Descriptions + * ------------- ------------------------------ + * HOST_DISABLE - Inform VIM that this host is now Mtce-Disabled + * HOST_ENABLE - Inform VIM that this host is now Mtce-Enabled + * HOST_FAIL - Inform VIM that this host has failed and + * undergoing auto recovery + * + */ + +#define MTC_VIM_LABEL "/nfvi-plugins/v1/hosts/" + +/** Initializes the module */ +int mtcVimApi_init ( string ip, int port ); + +/** Frees the module's dynamically allocated resources */ +void mtcVimApi_fini ( void ); + +int mtcVimApi_handler ( libEvent & event ); + +#endif /* __INCLUDE_MTCVIMAPI_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcWorkQueue.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcWorkQueue.cpp new file mode 100644 index 00000000..fdbed85c --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcWorkQueue.cpp @@ -0,0 +1,868 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Controller Maintenance HTTP Utilities. + * + * Public Interfaces: tied to the nodeLinkClass + * + * nodeLinkClass::workQueue_enqueue + * nodeLinkClass::doneQueue_dequeue + * + * + * Private Helper Utilities: + * + * _get_work_state_str + * _get_event_log_prefix_string + * + * + */ + +using namespace std; + +#include "nodeClass.h" /* for ... maintenance class nodeLinkClass */ +#include "mtcHttpUtil.h" /* this module header */ +#include "mtcNodeHdlrs.h" /* for ... mtcTimer_handl */ +#include "nodeUtil.h" /* for ... common Node Utilities */ + +#define QUEUE_OVERLOAD (40) + +string _get_work_state_str ( httpStages_enum state ) +{ + if ( state == HTTP__TRANSMIT ) return ("Tx "); + else if ( state == HTTP__RECEIVE ) return (" Rx"); + else if ( state == HTTP__FAILURE ) return (" Er "); + else if ( state == HTTP__RECEIVE_WAIT ) return ("Wait"); + else + { + elog ("Invalid Http Work Queue State: %d\n", state ); + return ("----"); + } +} + + +void nodeLinkClass::workQueue_dump ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr->libEvent_work_fifo.size() ) + { + syslog ( LOG_INFO, "\n"); + syslog ( LOG_INFO, "+------+-------+--------------+---------+---------------+----------------------+\n"); + syslog ( LOG_INFO, "| Mode | Seq | Hostname | Service | Request | IP Address Port | Payload ...\n"); + syslog ( LOG_INFO, "+------+-------+--------------+---------+---------------+----------------------+\n"); + for ( node_ptr->libEvent_work_fifo_ptr = node_ptr->libEvent_work_fifo.begin(); + node_ptr->libEvent_work_fifo_ptr != node_ptr->libEvent_work_fifo.end(); + node_ptr->libEvent_work_fifo_ptr ++ ) + { + syslog ( LOG_INFO, "| %-4s | %5d | %-12s | %-7s | %-13s | %15s:%d | %s\n", + _get_work_state_str(node_ptr->libEvent_work_fifo_ptr->state).c_str(), + node_ptr->libEvent_work_fifo_ptr->sequence, + node_ptr->libEvent_work_fifo_ptr->hostname.c_str(), + node_ptr->libEvent_work_fifo_ptr->service.c_str(), + node_ptr->libEvent_work_fifo_ptr->operation.c_str(), + node_ptr->libEvent_work_fifo_ptr->ip.c_str(), + node_ptr->libEvent_work_fifo_ptr->port, + node_ptr->libEvent_work_fifo_ptr->payload.c_str()); + } + syslog ( LOG_INFO, "+------+-------+--------------+---------+--------------+----------------------+\n"); + } + else + { + dlog ("%s work queue is empty\n", node_ptr->hostname.c_str()); + } +} + +void nodeLinkClass::workQueue_dump_all ( void ) +{ + struct node * ptr = static_cast(NULL) ; + + /* check for empty list condition */ + if ( head == NULL ) + { + syslog ( LOG_INFO, "No inventory\n"); + return ; + } + /* Now search the node list */ + for ( ptr = head ; ptr != NULL ; ptr = ptr->next ) + { + workQueue_dump ( ptr ); + } +} + +void nodeLinkClass::doneQueue_dump ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr->libEvent_done_fifo.size() ) + { + for ( node_ptr->libEvent_done_fifo_ptr = node_ptr->libEvent_done_fifo.begin(); + node_ptr->libEvent_done_fifo_ptr != node_ptr->libEvent_done_fifo.end(); + node_ptr->libEvent_done_fifo_ptr ++ ) + { + syslog ( LOG_INFO, "%15s httpReq doneQueue:%5d - %s '%s' -> Status:%d\n", + node_ptr->libEvent_done_fifo_ptr->hostname.c_str(), + node_ptr->libEvent_done_fifo_ptr->sequence, + node_ptr->libEvent_done_fifo_ptr->service.c_str(), + node_ptr->libEvent_done_fifo_ptr->operation.c_str(), + node_ptr->libEvent_done_fifo_ptr->status ); + } + } +} + +void nodeLinkClass::doneQueue_dump_all ( void ) +{ + struct node * ptr = static_cast(NULL) ; + + /* check for empty list condition */ + if ( head == NULL ) + { + syslog ( LOG_INFO, "\nNo inventory\n"); + return ; + } + /* Now search the node list */ + for ( ptr = head ; ptr != NULL ; ptr = ptr->next ) + { + doneQueue_dump ( ptr ); + doneQueue_purge ( ptr ); + } +} + + +/* *********************************************************************** + * + * Name : nodeLinkClass::workQueue_enqueue + * + * Description: Adds the next sequence number to the supplied event + * reference, creates a log prefix based on the event's + * hostname, service, operation and sequence number + * (to avoid repeated recreation) and then copies that + * event to the work queue. + * + * @param event is a reference to the callers libEvent. + * @return an integer with value of PASS. + * + * ************************************************************************/ + +int nodeLinkClass::workQueue_enqueue ( libEvent & event ) +{ + char seq_str[64] ; + memset ( &seq_str[0], 0 , 64 ); + + GET_NODE_PTR(event.hostname) ; + + event.sequence = node_ptr->oper_sequence++ ; + sprintf ( &seq_str[0], "%d", event.sequence ); + + event.log_prefix = event.hostname ; + event.log_prefix.append (" "); + event.log_prefix.append (event.service) ; + //event.log_prefix.append (" '"); + //event.log_prefix.append (event.operation) ; + //event.log_prefix.append ("' seq:"); + event.log_prefix.append (" seq:"); + event.log_prefix.append (seq_str) ; + + node_ptr->libEvent_work_fifo.push_back(event); + + qlog ("%s Enqueued\n", event.log_prefix.c_str()); + + return (PASS) ; +} + +/* *********************************************************************** + * + * Name : nodeLinkClass::doneQueue_dequeue + * + * Description: Searches the done queue for the event matching the supplied + * event reference , specifically the sequence number. If found + * it pulls the execution status information and then proceeds + * to remove it from the done queue. + * + * If the event is found then the event status is returned. + * if not found then a RETRY is returned. + * If the done event status is RETRY then a FAIL is returned since + * it should not be on the done queue with a retry status. + * + * @param event is a reference to the callers libEvent. + * @return an integer with values of PASS, FAIL, RETRY + * + * ************************************************************************/ + +int nodeLinkClass::doneQueue_dequeue ( libEvent & event ) +{ + int rc = FAIL ; + bool found = false ; + GET_NODE_PTR(event.hostname) ; + for ( node_ptr->libEvent_done_fifo_ptr = node_ptr->libEvent_done_fifo.begin(); + node_ptr->libEvent_done_fifo_ptr != node_ptr->libEvent_done_fifo.end(); + node_ptr->libEvent_done_fifo_ptr++ ) + { + if ( node_ptr->libEvent_done_fifo_ptr->sequence == event.sequence ) + { + ilog ("%s fetched from done queue\n", + node_ptr->libEvent_done_fifo_ptr->log_prefix.c_str()); + + /* get on the response data */ + event.http_status = node_ptr->libEvent_done_fifo_ptr->http_status ; + event.status = node_ptr->libEvent_done_fifo_ptr->status ; + event.active = node_ptr->libEvent_done_fifo_ptr->active ; + event.value = node_ptr->libEvent_done_fifo_ptr->value ; + event.result = node_ptr->libEvent_done_fifo_ptr->result ; + event.response = node_ptr->libEvent_done_fifo_ptr->response ; + event.response_len= node_ptr->libEvent_done_fifo_ptr->response_len ; + + node_ptr->libEvent_done_fifo.erase(node_ptr->libEvent_done_fifo_ptr); + found = true ; + if ( event.status == RETRY ) + { + slog ("%s over riding rety to fail\n", + node_ptr->libEvent_done_fifo_ptr->log_prefix.c_str() ); + event.status = FAIL ; + } + rc = event.status ; + break ; + } + } + if ( found == false ) + { + qlog ("%s not found in done queue\n", event.log_prefix.c_str()); + rc = RETRY ; + } + return (rc); +} + +/* *********************************************************************** + * + * Name : nodeLinkClass::workQueue_process + * + * Description: This is a Per Host Finite State Machine (FSM) that + * processes the work queue for the supplied host's + * node pointer. + * + * Constructs: + * + * node_ptr->libEvent_work_fifo - the current work queue/fifo + * node_ptr->libEvent_done_fifo - queue/fifo of completed requests + * + * Operations: + * + * requests are added to the libEvent_work_fifo with workQueue_enqueue. + * requests are removed from the libEvent_done_fifo with workQueue_dequeue. + * + * Behavior: + * + * In process libEvents are copied from the callers work queue to + * its thisReq. + * + * Completed events including execution status are copied to the host's + * done fifo. + * + * Failed events may be retried up to max_retries as specified by + * the callers libEvent. + * + * @param event is a reference to the callers libEvent. + * + * @return an integer with values of PASS, FAIL, RETRY + * + * ************************************************************************/ + +int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + /* handle purging the done queue on the last */ + if ( node_ptr->libEvent_done_fifo.size() > 0 ) + { + /* Manage the done queue so that it does not grow forever + * if command producers do not come back and dequeue their + * responses */ + if ( node_ptr->libEvent_done_fifo.size() > 10 ) + { + qlog ("%s Done Queue has %ld elements\n", + node_ptr->hostname.c_str(), + node_ptr->libEvent_done_fifo.size()); + + /* TODO: look at the status of the commands and print a log of those that failed */ + + /* Remove the first 8 - its a fifo the first ones at the front are the oldest */ + for ( int i=0 ; i < 8 ; i++ ) + { + node_ptr->libEvent_done_fifo.pop_front(); + } + qlog ("%s Done Queue has %ld elements remaining\n", + node_ptr->hostname.c_str(), + node_ptr->libEvent_done_fifo.size()); + } + } + + if ( node_ptr->libEvent_work_fifo.empty() ) + { + // qlog_throttled ( node_ptr->no_work_log_throttle, 300, + // "%s Idle ... \n", + // node_ptr->hostname.c_str()); + node_ptr->no_work_log_throttle = 0 ; + return (PASS); + } + + if ( daemon_get_cfg_ptr()->debug_work & 8 ) + { + // workQueue_print ( node_ptr ) ; + syslog ( LOG_INFO, "\n"); + syslog ( LOG_INFO, "+------+-------+--------------+---------+---------------+-----+----------------------+\n"); + syslog ( LOG_INFO, "| Mode | Seq | Hostname | Service | Request | Tmo | IP Address Port | payload ...\n"); + syslog ( LOG_INFO, "+------+-------+--------------+---------+---------------+-----+----------------------+\n"); + for ( node_ptr->libEvent_work_fifo_ptr = node_ptr->libEvent_work_fifo.begin(); + node_ptr->libEvent_work_fifo_ptr != node_ptr->libEvent_work_fifo.end(); + node_ptr->libEvent_work_fifo_ptr ++ ) + { + syslog ( LOG_INFO, "| %-4s | %5d | %-12s | %-7s | %-13s | %3d | %15s:%d | %s\n", + _get_work_state_str(node_ptr->libEvent_work_fifo_ptr->state).c_str(), + node_ptr->libEvent_work_fifo_ptr->sequence, + node_ptr->libEvent_work_fifo_ptr->hostname.c_str(), + node_ptr->libEvent_work_fifo_ptr->service.c_str(), + node_ptr->libEvent_work_fifo_ptr->operation.c_str(), + node_ptr->libEvent_work_fifo_ptr->timeout, + node_ptr->libEvent_work_fifo_ptr->ip.c_str(), + node_ptr->libEvent_work_fifo_ptr->port, + node_ptr->libEvent_work_fifo_ptr->payload.c_str()); + } + syslog ( LOG_INFO, "+------+-------+--------------+---------+--------------+-----+----------------------+\n"); + } + + + int size = node_ptr->libEvent_work_fifo.size() ; + if ( size > QUEUE_OVERLOAD ) + { + elog ( "%s work queue overload ; clearing %d entries\n", node_ptr->hostname.c_str(), size ); + workQueue_purge ( node_ptr ); + return (FAIL); + } + + if ( node_ptr->libEvent_work_fifo.empty() ) + { + slog ("%s unexpected empty 'libEvent_work_fifo_ptr' (should have %d elements)\n", + node_ptr->hostname.c_str(), size ); + workQueue_purge ( node_ptr ); + return (FAIL_NULL_POINTER); + } + + node_ptr->libEvent_work_fifo_ptr = node_ptr->libEvent_work_fifo.begin(); + switch ( node_ptr->libEvent_work_fifo_ptr->state ) + { + case HTTP__TRANSMIT: + { + node_ptr->thisReq = node_ptr->libEvent_work_fifo.front(); + + qlog ("%s Transmitted\n", node_ptr->thisReq.log_prefix.c_str() ); + + rc = mtcHttpUtil_api_request ( node_ptr->thisReq ) ; + if ( rc ) + { + node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->thisReq.state = HTTP__FAILURE ; + } + else + { + node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->thisReq.state = HTTP__RECEIVE_WAIT ; + + if ( node_ptr->http_timer.tid ) + mtcTimer_stop ( node_ptr->http_timer ); + rc = mtcTimer_start_msec ( node_ptr->http_timer, mtcTimer_handler, HTTP_RECEIVE_WAIT_MSEC ); + if ( rc != PASS ) + { + elog ("%s failed to start http command timer ; failing command\n", node_ptr->thisReq.log_prefix.c_str()); + node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->thisReq.state = HTTP__FAILURE ; + } + } + break ; + } + case HTTP__RECEIVE_WAIT: + { + if ( node_ptr->http_timer.ring == true ) + { + if ( node_ptr->http_timer.error == true ) + { + slog ("%s timer handler ran while still in start utility ; handled ...\n", node_ptr->thisReq.log_prefix.c_str()); + node_ptr->http_timer.error = false ; + } + + if (( node_ptr->http_timer._guard != 0x12345678 ) || ( node_ptr->http_timer.guard_ != 0x77654321 )) + { + slog ("%s timer struct guard barrier detected corruption\n", node_ptr->thisReq.log_prefix.c_str()); + } + node_ptr->http_timer.ring = false ; + node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->thisReq.state = HTTP__RECEIVE ; + } + break ; + } + case HTTP__RECEIVE: + { + /* Try and receive the response */ + if ( node_ptr->thisReq.base == NULL ) + { + slog ("%s has unexpected null HTTP request base pointer\n", + node_ptr->thisReq.log_prefix.c_str()); + + node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->thisReq.state = HTTP__FAILURE ; + break ; + } + + int msec_timeout = (node_ptr->thisReq.timeout*1000); + int wait_time = (++node_ptr->thisReq.rx_retry_cnt)*HTTP_RECEIVE_WAIT_MSEC ; + + rc = mtcHttpUtil_receive ( node_ptr->thisReq ); + if ( rc == RETRY ) + { + node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->thisReq.state = HTTP__RECEIVE_WAIT ; + mtcTimer_start_msec ( node_ptr->http_timer, mtcTimer_handler, HTTP_RECEIVE_WAIT_MSEC ); + + if ((wait_time > (msec_timeout/4)) && ( node_ptr->thisReq.low_wm == false ) ) + { + qlog1 ("%s reached lower (1/4) timeout watermark (%d msec)\n", + node_ptr->thisReq.log_prefix.c_str(), wait_time ); + node_ptr->libEvent_work_fifo_ptr->low_wm = node_ptr->thisReq.low_wm = true ; + break ; + } + else if ((wait_time > (msec_timeout/2)) && ( node_ptr->thisReq.med_wm == false )) + { + qlog ("%s reached mid (1/2) timeout watermark (%d msec)\n", + node_ptr->thisReq.log_prefix.c_str(), wait_time); + node_ptr->libEvent_work_fifo_ptr->med_wm = node_ptr->thisReq.med_wm = true ; + break ; + } + else if (( wait_time > ((msec_timeout/4)*3)) && ( node_ptr->thisReq.high_wm == false )) + { + wlog ("%s reached high (3/4) timeout watermark (%d msec)\n", + node_ptr->thisReq.log_prefix.c_str(), wait_time ); + node_ptr->libEvent_work_fifo_ptr->high_wm = node_ptr->thisReq.high_wm = true ; + break ; + } + else + { + /* Only print every 16 starting with 2 */ + if ( (node_ptr->thisReq.rx_retry_cnt & 0xF) == 2 ) + { + qlog ("%s rx_retry_cnt:%d\n", + node_ptr->thisReq.log_prefix.c_str(), + node_ptr->thisReq.rx_retry_cnt ); + } + break ; + } + } + if ( rc != PASS ) + { + node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->thisReq.state = HTTP__FAILURE ; + } + else + { + if ( node_ptr->thisReq.cur_retries ) + { + ilog ("%s Completed (after %d retries) (took %d of %d msecs)\n", + node_ptr->thisReq.log_prefix.c_str(), + node_ptr->thisReq.cur_retries, wait_time, + node_ptr->thisReq.timeout*1000); + } + else + { + qlog ("%s Completed (took %d of %d msecs)\n", + node_ptr->thisReq.log_prefix.c_str(), + wait_time, + node_ptr->thisReq.timeout*1000); + } + node_ptr->thisReq.exec_time_msec = wait_time ; + + node_ptr->thisReq.rx_retry_cnt = 0 ; + + mtcHttpUtil_free_conn ( node_ptr->thisReq ); + mtcHttpUtil_free_base ( node_ptr->thisReq ); + + /* Don't add success responses to non-critical commands like + * "update uptime" and "update task" to the done queue */ + if ( !node_ptr->thisReq.noncritical ) + { + /* Copy done event to the done queue */ + node_ptr->libEvent_done_fifo.push_back(node_ptr->thisReq); + + } + /* Pop that done event off the work queue */ + node_ptr->libEvent_work_fifo.pop_front(); + } + break ; + } + case HTTP__FAILURE: + { + bool want_retry = false ; + + mtcHttpUtil_free_conn ( node_ptr->thisReq ); + mtcHttpUtil_free_base ( node_ptr->thisReq ); + + node_ptr->http_retries_cur++ ; + node_ptr->thisReq.cur_retries++ ; + + if ( node_ptr->thisReq.noncritical == true ) + { + if ( node_ptr->thisReq.cur_retries > node_ptr->thisReq.max_retries ) + { + node_ptr->oper_failures++ ; + + wlog ("%s retry conjestion abort of non-critical command (%d:%d)\n", + node_ptr->thisReq.log_prefix.c_str(), + node_ptr->thisReq.cur_retries, + node_ptr->thisReq.max_retries ); + + /* Pop this aborted event off the work queue */ + node_ptr->libEvent_work_fifo.pop_front(); + } + else + { + want_retry = true ; + } + } + /* other wise its critical and we are going for the retries */ + else if ( node_ptr->thisReq.cur_retries >= node_ptr->thisReq.max_retries ) + { + node_ptr->oper_failures++ ; + elog ("%s Failed (rc:%d) - (%d of %d) (work->%s) (Critical:%s) (Total Fails:%d)\n", + node_ptr->thisReq.log_prefix.c_str(), + node_ptr->thisReq.status, + node_ptr->thisReq.cur_retries, + node_ptr->thisReq.max_retries, + node_ptr->thisReq.noncritical ? "drop" : "done", + node_ptr->thisReq.noncritical ? "No" : "Yes", + node_ptr->oper_failures ); + + if ( node_ptr->thisReq.noncritical == false ) + { + /* Copy done event to the done queue */ + node_ptr->libEvent_done_fifo.push_back(node_ptr->thisReq); + } + /* Pop that done event off the work queue */ + node_ptr->libEvent_work_fifo.pop_front(); + } + else + { + want_retry = true ; + } + + if ( want_retry ) + { + wlog ("%s Failed (rc:%d) - (%d of %d) (Timeout=%d) (Critical:%s)\n", + node_ptr->thisReq.log_prefix.c_str(), + node_ptr->thisReq.status, + node_ptr->thisReq.cur_retries, + node_ptr->thisReq.max_retries, + node_ptr->thisReq.timeout, + node_ptr->thisReq.noncritical ? "No" : "Yes" ); + + node_ptr->thisReq.response.clear(); + + node_ptr->thisReq.status = PASS ; + node_ptr->thisReq.http_status = 0 ; + node_ptr->thisReq.active = false ; + node_ptr->thisReq.response_len= 0 ; + + /* + * If this is an inventory request ... + * + * 1. Init the inv struct + * 2. increase the timeout if is a critical command + * + * */ + if ( node_ptr->thisReq.service.find("mtcInvApi") != std::string::npos ) + { + node_inv_init ( node_ptr->thisReq.inv_info ) ; + if ( node_ptr->thisReq.noncritical == false ) + { + int temp = node_ptr->libEvent_work_fifo_ptr->timeout ; + + /* + * Increase and update the timeout value for critical commands + * in hope that it will succeed on he next go around. + */ + node_ptr->libEvent_work_fifo_ptr->timeout += get_mtcInv_ptr()->sysinv_timeout ; + dlog ("%s timeout extended from %d to %d secs\n", + node_ptr->thisReq.log_prefix.c_str(), temp, + node_ptr->libEvent_work_fifo_ptr->timeout ); + } + } + + /* Save the retry count */ + node_ptr->libEvent_work_fifo_ptr->cur_retries = + node_ptr->thisReq.cur_retries ; + + node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->thisReq.state = HTTP__TRANSMIT ; + } + break ; + } + default: + { + slog ("%s Bad libEvent work state (%d) ; clearing work/done queue\n", + node_ptr->hostname.c_str(), + node_ptr->libEvent_work_fifo_ptr->state ); + node_ptr->libEvent_work_fifo.clear(); + node_ptr->libEvent_done_fifo.clear(); + rc = FAIL_BAD_CASE ; + } + } + return (rc) ; +} + +/* *********************************************************************** + * + * Name : nodeLinkClass::workQueue_del_cmd + * + * Description: To handle the pathalogical case where an event seems to + * have timed out at the callers level then this interface + * can be called to delete it from the work queue. + * + * @param node_ptr so that the hosts work queue can be found + * @param sequence to specify the specific sequence number to remove + * @return always PASS since there is nothing the caller can or needs + * to do if the command is not present. + * + */ +int nodeLinkClass::workQueue_del_cmd ( struct nodeLinkClass::node * node_ptr, int sequence ) +{ + bool found = false ; + for ( node_ptr->libEvent_work_fifo_ptr = node_ptr->libEvent_work_fifo.begin(); + node_ptr->libEvent_work_fifo_ptr != node_ptr->libEvent_work_fifo.end(); + node_ptr->libEvent_work_fifo_ptr++ ) + { + if ( node_ptr->libEvent_work_fifo_ptr->sequence == sequence ) + { + wlog ("%s force removed from work queue\n", + node_ptr->libEvent_work_fifo_ptr->log_prefix.c_str()); + node_ptr->libEvent_work_fifo.erase(node_ptr->libEvent_work_fifo_ptr); + found = true ; + break ; + } + } + if ( found == false ) + { + wlog ("%s command Seq:%d not found in work queue\n", + node_ptr->hostname.c_str(), sequence ); + } + return(PASS); +} + +/* *********************************************************************** + * + * Name : nodeLinkClass::doneQueue_purge + * + * Description: Removes all items from the done queue. + * + * Returns a failure, the sequence number of the first command + * in the done queue that did not PASS. + * + */ +int nodeLinkClass::doneQueue_purge ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + int size = node_ptr->libEvent_done_fifo.size() ; + if ( size ) + { + int index = 0 ; + for ( node_ptr->libEvent_done_fifo_ptr = node_ptr->libEvent_done_fifo.begin(); + node_ptr->libEvent_done_fifo_ptr != node_ptr->libEvent_done_fifo.end(); + node_ptr->libEvent_done_fifo_ptr++ ) + { + index++ ; + if ( node_ptr->libEvent_done_fifo_ptr->status ) + { + /* Don't report noncritical command failure status. + * Such commands might be "update uptime" and "update task" + * and we don't want them to fail operations */ + if ( !node_ptr->libEvent_done_fifo_ptr->noncritical ) + { + elog ("%s critical operation failed (rc:%d)\n", + node_ptr->libEvent_done_fifo_ptr->log_prefix.c_str(), + node_ptr->libEvent_done_fifo_ptr->status); + + if ( ! node_ptr->libEvent_done_fifo_ptr->payload.empty() ) + { + elog ("%s ... %s\n", node_ptr->hostname.c_str(), + node_ptr->libEvent_done_fifo_ptr->payload.c_str()); + } + + /* Save sequence of first failed priority command */ + if ( rc == PASS ) + { + rc = node_ptr->libEvent_done_fifo_ptr->sequence ; + } + } + else + { + wlog ("%s noncritical operation failed (rc:%d)\n", + node_ptr->libEvent_done_fifo_ptr->log_prefix.c_str(), + node_ptr->libEvent_done_fifo_ptr->status); + + if ( ! node_ptr->libEvent_done_fifo_ptr->payload.empty() ) + { + wlog ("%s ... %s\n", node_ptr->hostname.c_str(), + node_ptr->libEvent_done_fifo_ptr->payload.c_str()); + } + } + } + } + if ( rc == PASS ) + { + qlog ("%s all (%d) priority queued operations passed (qlog)\n", node_ptr->hostname.c_str(), size ); + } + + qlog ("%s purging %d items from doneQueue\n", node_ptr->hostname.c_str(), size ); + node_ptr->libEvent_done_fifo.clear(); + } + return (rc); +} + +/* *********************************************************************** + * + * Name : nodeLinkClass::workQueue_purge + * + * Description: Removes all items from the work queue. + * + */ +int nodeLinkClass::workQueue_purge ( struct nodeLinkClass::node * node_ptr ) +{ + int size = node_ptr->libEvent_work_fifo.size() ; + if ( size ) + { + /* TODO: find out how to force close a connection. + * Don't free the connection if it is in the receiving state or + * we might get a segfault + * There is only ever one connection open at a time for a specific host + * so its only 'thisReq' we need to worry about. */ + if ( node_ptr->libEvent_work_fifo_ptr->state != HTTP__RECEIVE ) + { + mtcHttpUtil_free_conn ( node_ptr->thisReq ); + mtcHttpUtil_free_base ( node_ptr->thisReq ); + } + + wlog ("%s purging %d items from workQueue\n", node_ptr->hostname.c_str(), size ); + for ( node_ptr->libEvent_work_fifo_ptr = node_ptr->libEvent_work_fifo.begin(); + node_ptr->libEvent_work_fifo_ptr != node_ptr->libEvent_work_fifo.end(); + node_ptr->libEvent_work_fifo_ptr++ ) + { + if ( node_ptr->libEvent_work_fifo_ptr->state == HTTP__TRANSMIT ) + { + wlog ("%s ... was not executed\n", + node_ptr->libEvent_work_fifo_ptr->log_prefix.c_str()); + } + else + { + wlog ("%s ... did not complete (%s)\n", + node_ptr->libEvent_work_fifo_ptr->log_prefix.c_str(), + _get_work_state_str(node_ptr->libEvent_work_fifo_ptr->state).c_str()); + } + } + + node_ptr->libEvent_work_fifo.clear(); + } + else + { + qlog ("%s all work done\n", node_ptr->hostname.c_str()); + } + + // node_ptr->libEvent_work_fifo_ptr->state = HTTP__TRANSMIT ; + return (PASS); +} + +int nodeLinkClass::workQueue_done ( struct nodeLinkClass::node * node_ptr ) +{ + int rc = PASS ; + + /* have we timed out waiting the the work queue tom deplete */ + if ( node_ptr->mtcTimer.ring == true ) + { + qlog ( "%s Ring handler\n" , node_ptr->hostname.c_str()); + + node_ptr->mtcTimer.ring = false ; + /* Search through work queue and don't fail if the + * only requests remaining is an uptime */ + for ( node_ptr->libEvent_work_fifo_ptr = node_ptr->libEvent_work_fifo.begin(); + node_ptr->libEvent_work_fifo_ptr != node_ptr->libEvent_work_fifo.end(); + node_ptr->libEvent_work_fifo_ptr++ ) + { + /* Don't report work queue timeout if there are only noncritical + * commands left in the work queue. Such commands might be + * "update uptime" and "update task" */ + if ( !node_ptr->libEvent_work_fifo_ptr->noncritical ) + { + rc = FAIL_WORKQ_TIMEOUT ; + } + } + if ( rc != PASS ) + { + elog ("%s timeout on work queue complete\n", node_ptr->hostname.c_str()); + } + } + + /* We still doing enable work ? */ + else if ( node_ptr->libEvent_work_fifo.size () == 0 ) + { + qlog ( "%s Empty Work Queue\n" , node_ptr->hostname.c_str()); + + /* O.K. the work queue is done - cancel the timer */ + if ( node_ptr->mtcTimer.tid ) + mtcTimer_stop ( node_ptr->mtcTimer ); + + /* Error logs are generated inside */ + rc = doneQueue_purge ( node_ptr ); + } + else + { + qlog ( "%s Retry\n" , node_ptr->hostname.c_str()); + + rc = RETRY ; + } + +#ifdef WANT_FIT_TESTING + if (( rc == PASS ) && ( daemon_want_fit ( FIT_CODE__WORK_QUEUE, node_ptr->hostname ))) + rc = FAIL_FIT ; +#endif + + return (rc); +} + + +/* *********************************************************************** + * + * Name : nodeLinkClass::workQueue_present + * + * Description: Checks to see if this libEvent is in the work queue. + * + * @return true if present otherwise false + * + */ +bool nodeLinkClass::workQueue_present ( libEvent & event ) +{ + nodeLinkClass::node * node_ptr = this->getNode (event.hostname); + if ( node_ptr != NULL ) + { + if ( node_ptr->libEvent_work_fifo.size() ) + { + for ( node_ptr->libEvent_work_fifo_ptr = node_ptr->libEvent_work_fifo.begin(); + node_ptr->libEvent_work_fifo_ptr != node_ptr->libEvent_work_fifo.end(); + node_ptr->libEvent_work_fifo_ptr++ ) + { + if ( node_ptr->libEvent_work_fifo_ptr->sequence == event.sequence ) + { + qlog ("%s ... found in work queue\n", node_ptr->libEvent_work_fifo_ptr->log_prefix.c_str()); + return (true); + } + } + } + } + + wlog ("%s ... not found in work queue\n", event.log_prefix.c_str()); + return (false); +} diff --git a/mtce-common/cgts-mtce-common-1.0/mtclog/Makefile b/mtce-common/cgts-mtce-common-1.0/mtclog/Makefile new file mode 100644 index 00000000..0ad09ef2 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/mtclog/Makefile @@ -0,0 +1,43 @@ +# +# Copyright (c) 2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = mtclog.cpp +OBJS = $(SRCS:.cpp=.o) +LDLIBS = -lstdc++ -ldaemon -lcommon -lrt -lcrypto +INCLUDES = -I../daemon -I../common -I../maintenance -I. +CCFLAGS = -g -O2 -Wall -Wextra -Werror + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +# all: common daemon build clean_common +all: common build clean_common + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +build: clean static_analysis $(OBJS) + $(CXX) $(CCFLAGS) $(OBJS) -L../daemon -L../common $(LDLIBS) -o mtclogd + +common: + ( cd ../common ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +daemon: + ( cd ../daemon ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +clean_common: + @rm -f ../common/*.o + @rm -f ../daemon/*.o + +clean: clean_common + @rm -f $(OBJ) mtclogd *.o *.a diff --git a/mtce-common/cgts-mtce-common-1.0/mtclog/mtclog.cpp b/mtce-common/cgts-mtce-common-1.0/mtclog/mtclog.cpp new file mode 100644 index 00000000..38c33e15 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/mtclog/mtclog.cpp @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2015-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform maintenance Log daemon + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "daemon_ini.h" +#include "daemon_common.h" +#include "daemon_option.h" + +#include "nodeBase.h" /* for ... service module header */ +#include "nodeTimers.h" /* Timer Service */ +#include "nodeUtil.h" /* Common Utilities */ +#include "nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ +// #include "mtcNodeMsg.h" /* Common Messaging */ + +string my_hostname = "" ; +string my_local_ip = "" ; +string my_float_ip = "" ; + +static daemon_config_type _config ; +daemon_config_type * daemon_get_cfg_ptr () { return &_config ; } + +static msgSock_type log_sock ; + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + fflush (stdout); + fflush (stderr); + + if ( log_sock.sock > 0 ) + { + close(log_sock.sock); + log_sock.sock = 0 ; + } + + exit (0); +} + +/******************************************************************* + * Module Utilities * + ******************************************************************/ +/* SIGCHLD handler support - for waitpid */ +void daemon_sigchld_hdlr ( void ) +{ + dlog("Received SIGCHLD ...\n"); +} + +/***************************************************************************** + * + * Name : _config_handler + * + * Purpose : Read specified config options + * + *****************************************************************************/ +static int _config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("client", "daemon_log_port")) + { + config_ptr->daemon_log_port = atoi(value); + // ilog ("Log port = %d\n", atoi(value)); + } + return (PASS); +} + +/***************************************************************************** + * + * Name : daemon_configure + * + * Purpose : Run the config handler against a specified config file. + * + *****************************************************************************/ +int daemon_configure ( void ) +{ + int rc = PASS ; + + if (ini_parse( MTCE_CONF_FILE, _config_handler, &_config) < 0) + { + elog("Can't load '%s'\n", MTCE_CONF_FILE ); + } + + get_debug_options ( MTCE_CONF_FILE, &_config ); + + /* This ensures any link aggregation interface overrides the physical */ + _config.mgmnt_iface = daemon_get_iface_master ( _config.mgmnt_iface ); + ilog("Mgmnt Iface : %s\n", _config.mgmnt_iface ); + + bool waiting_msg = false ; + for ( ;; ) + { + get_ip_addresses ( my_hostname, my_local_ip , my_float_ip ); + if ( my_float_ip.empty() || my_local_ip.empty() ) + { + if ( waiting_msg == false ) + { + ilog ("Waiting on ip address config ...\n"); + waiting_msg = true ; + + /* Flush the init data */ + fflush (stdout); + fflush (stderr); + } + mtcWait_secs (3); + } + else + { + break ; + } + } + + ilog("Logger Port : %d\n", _config.daemon_log_port ); + return (rc); +} + + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Setup the daemon messaging interfaces/sockets */ +int socket_init ( void ) +{ + int rc = FAIL ; + int port = _config.daemon_log_port ; + CREATE_NONBLOCK_INET_UDP_RX_SOCKET ( LOOPBACK_IP, + port, + log_sock.sock, + log_sock.addr, + log_sock.port, + log_sock.len, + "daemon log receiver", + rc ); + if ( rc ) + { + printf ("Failed to setup the daemon log reciver port %d\n", _config.daemon_log_port ); + } + return (rc); +} + +/* The main heartbeat service loop */ +int daemon_init ( string iface, string nodetype ) +{ + int rc = PASS ; + + /* Not used by this daemon */ + UNUSED(nodetype); + /* init the control struct */ + my_hostname = "" ; + my_local_ip = "" ; + my_float_ip = "" ; + + /* Assign interface to config */ + _config.mgmnt_iface = (char*)iface.data() ; + + if ( daemon_files_init ( ) != PASS ) + { + elog ("Pid, log or other files could not be opened\n"); + return ( FAIL_FILES_INIT ) ; + } + + /* Bind signal handlers */ + if ( daemon_signal_init () != PASS ) + { + elog ("daemon_signal_init failed\n"); + return ( FAIL_SIGNAL_INIT ) ; + } + + /************************************************************************ + * There is no point continuing with init ; i.e. running daemon_configure, + * initializing sockets and trying to query for an ip address until the + * daemon's configuration requirements are met. Here we wait for those + * flag files to be present before continuing. + ************************************************************************ + * Wait for /etc/platform/.initial_config_complete & /var/run/.goenabled */ + daemon_wait_for_file ( CONFIG_COMPLETE_FILE , 0); + daemon_wait_for_file ( GOENABLED_MAIN_READY , 0); + + /* Configure the daemon */ + if ( (rc = daemon_configure ( )) != PASS ) + { + elog ("Daemon service configuration failed (rc:%i)\n", rc ); + rc = FAIL_DAEMON_CONFIG ; + } + + /* Setup the messaging sockets */ + else if ( (rc = socket_init ( )) != PASS ) + { + elog ("socket initialization failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT ; + } + + return (rc); +} + +/* **************************************************** + * Start the service + * ****************************************************/ +void daemon_service_run ( void ) +{ + int rc = PASS ; + int count = 0 ; + + /* For select dispatch */ + struct timeval waitd ; + fd_set readfds; + + /* Wait for config complete indicated by presence + * of /etc/platform/.initial_config_complete */ + struct stat p ; + memset ( &p, 0 , sizeof(struct stat)); + do + { + stat (CONFIG_COMPLETE_FILE, &p); + mtcWait_secs (2); + wlog_throttled ( count, 60, "Waiting for %s\n", CONFIG_COMPLETE_FILE); + + /* The CONFIG_COMPLETE file may be empty so don't look at size, + * look at the node and dev ids as non-zero instead */ + } while ((p.st_ino == 0 ) || (p.st_dev == 0)) ; + + /* Run daemon main loop */ + for ( ; ; ) + { + if ( log_sock.sock <= 0 ) + { + daemon_exit (); + } + + /* Initialize the timeval struct to wait for 1 mSec */ + waitd.tv_sec = 0; + waitd.tv_usec = (SOCKET_WAIT*5); + FD_ZERO(&readfds); + FD_SET(log_sock.sock, &readfds); + + /* Call select() and wait only up to SOCKET_WAIT */ + rc = select( log_sock.sock+1, &readfds, NULL, NULL, &waitd); + /* If the select time out expired then */ + if (( rc < 0 ) || ( rc == 0 )) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if (( rc < 0 ) && ( errno != EINTR )) + { + elog ("Select Failed (rc:%d) %s\n", errno, strerror(errno)); + } + } + else + { + if (FD_ISSET(log_sock.sock, &readfds)) + { + int bytes ; + log_message_type log ; + unsigned int len = sizeof(log_sock.addr); + + + /* Look for maintenance command messages */ + memset (&log, 0, sizeof(log_message_type)); + bytes = recvfrom ( log_sock.sock, + (char*)&log, sizeof(log_message_type), 0, + (struct sockaddr *) &log_sock.addr, &len ); + if ( bytes > 0 ) + { + if ( strnlen ( &log.hostname[0], MAX_HOST_NAME_SIZE )) + { + if ( strnlen ( &log.filename[0], MAX_FILENAME_LEN )) + { + if ( strnlen ( &log.logbuffer[0], MAX_LOG_MSG )) + { + char temp_buf [20] ; + strncpy ( temp_buf, &log.logbuffer[0], 19 ); + temp_buf[19] = '\0' ; + dlog ("%s %s [%s]\n", &log.hostname[0], &log.filename[0], &temp_buf[0] ); + daemon_log ( &log.filename[0], &log.logbuffer[0] ); + } + } + } + } + } + } + daemon_signal_hdlr (); + } + daemon_exit (); +} + + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + daemon_dump_membuf_banner (); + daemon_dump_membuf(); +} + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + ilog ("Empty test head.\n"); + return (PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/Makefile b/mtce-common/cgts-mtce-common-1.0/pmon/Makefile new file mode 100755 index 00000000..2453621b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/Makefile @@ -0,0 +1,42 @@ +# +# Copyright (c) 2014-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = pmonAlarm.cpp pmonInit.cpp pmonMsg.cpp pmonHdlr.cpp pmonFsm.cpp +OBJS = $(SRCS:.cpp=.o) +LDLIBS = -lstdc++ -ldaemon -lcommon -lrt -lcrypto -lfmcommon -ljson-c +INCLUDES = -I../daemon -I../common -I../hostw -I. +CCFLAGS = -g -O2 -Wall -Wextra -Werror + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +all: clean static_analysis common daemon build clean_common + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +build: clean static_analysis $(OBJS) + $(CXX) $(CCFLAGS) $(OBJS) -L../daemon -L../common $(LDLIBS) -o pmond + +common: + ( cd ../common ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +daemon: + ( cd ../daemon ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +clean_common: + @rm -f ../common/*.o + @rm -f ../daemon/*.o + +clean: clean_common + @rm -f $(OBJ) pmond *.o *.a diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmon.h b/mtce-common/cgts-mtce-common-1.0/pmon/pmon.h new file mode 100755 index 00000000..69fbc24e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmon.h @@ -0,0 +1,655 @@ +/* + * Copyright (c) 2014-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/* + * This implements the CGCS process Monitor ; /usr/local/bin/pmond + * + * Call trace is as follows: + * daemon_init + * pmon_timer_init + * pmon_hdlr_init + * daemon_files_init + * daemon_signal_init + * daemon_configure + * ini_parse + * get_debug_options + * get_iface_macaddr + * get_iface_address + * get_iface_hostname + * socket_init + * pmon_msg_init + * event_port_init + * pulse_port_init + * + * daemon_service_run + * wait for goenable signal + * pmon_send_event ( READY ) + * pmon_service + * _config_dir_load + * _config_files_load + * _forever + * service_events + * pmon_send_pulse + * pmon_send_hostwd + * + * This daemon waits for a "goenabled" signal an then reads all the process + * configuration files in /etc/pmon.d and begins monitoring them accordingly. + * A process confguration file is expected to contain the following information + * ... + * ... + * ... + * + * But who watches the watcher ? Well there is a built-in mechanism for that. + * A 'failing' or 'not running' Process Monitor Daemon (pmond) will lead to a + * degrade condition for that host. + * + * Here is how it works ... + * + * Step 1: pmond is in inittab so that it will be respawned if it dies. + * + * Step 2: While running pmond periodically sends a pulse message to the + * the local heartbeat Client (hbsClient). + * + * Note: The hbsClient pulse response message has a flags field with 1 + * bit dedicated to indicate the presence of the pmond on that host. + * + * Step 3: Every time the hbsClient receives a pmond pulse message it sets + * the pmond bit in the flags field of its pulse response. + * + * Note: So if the pmond dies it stops sending its pulse message that the + * pmond bit in the pulse response flags will not be set. + * + * Step 4: The heartbeat agent (hbsAgent) looks at the pulse response flags. + * For every response that does not contain a pmond flag it increments + * the pmond 'missing' counter for that host. + * + * Step 5: Every time it sees the pmod flag it clears the counter. + * If that counter reaches PMOND_MISSING_THRESHOLD then that host + * is set to degraded. The degrade condition is cleared as soon + * as a single pmond flag is observed. + * + */ + /** + * @file + * Wind River CGCS Platform Process Monitor Service Header + */ + +#include +#include +#include +#include /* for .. signaling */ +#include /* for .. close and usleep */ +#include /* for .. system */ +#include /* for config dir reading */ +#include /* for the list of conf file names */ +#include /* for ... syslog */ +#include /* for ... waitpid */ +#include /* for ... time */ +#include /* for program control header */ +#include /* */ +#include /* for ... socket */ +#include /* for ... domain socket type */ +#include /* for ... UDP socket type */ +#include +#include +#include +#include /* for hostent */ +#include +#include +#include /* for CLD_xxxx si_codes */ + +using namespace std; + +/* external header APIs */ +#include "fmAPI.h" /* for ... EFmAlarmSeverityT */ + +#include "nodeBase.h" +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ +#include "nodeTimers.h" /* maintenance timer utilities start/stop */ +#include "nodeUtil.h" /* common utilities */ +#include "msgClass.h" + +/** + * @addtogroup pmon_base + * @{ + */ + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "mon" + +#ifndef UNUSED +#define UNUSED(_x_) ((void) _x_) +#endif + +#define AMON_MAGIC_NUM 0x12345678 + +#define CONFIG_DIR ((const char *)"/etc/pmon.d") + +#define PMON_CLEAR (0) +#define PMON_ASSERT (1) +#define PMON_LOG (2) + +#define PMON_MAX_ALARMS (100) + +/* Notification of Death Of Arbitrary Process */ + +/* New PRCTL Flag + * + * Set/get notification for task state changes */ +#define PR_DO_NOTIFY_TASK_STATE 17 + +/* This is the data structure for requestion process death + * (and other state change) information. Sig of -1 means + * query, sig of 0 means deregistration, positive sig means + * that you want to set it. sig and events are value-result + * and will be updated with the previous values on every + * successful call. + */ +struct task_state_notify_info +{ + pid_t pid ; + int sig ; + unsigned int events; +}; + +/* The "events" bits in the struct correspond to the si_code values in the siginfo_t struct + * that would normally be sent along with a SIGCHLD to the parent process. + * The bits are mapped like this: + * + * 1 << (info->si_code & 0xFF) + +The possible si_code values are defined in /usr/include/bits/siginfo.h and are: + +enum +{ + CLD_EXITED = 1, Child has exited. + CLD_KILLED, Child was killed. + CLD_DUMPED, Child terminated abnormally. + CLD_TRAPPED, Traced child has trapped. + CLD_STOPPED, Child has stopped. + CLD_CONTINUED Stopped child has continued. +}; +*/ + +/* So declare a corresponding set of constants: */ + +#define MON_EXITED 0x2 +#define MON_KILLED 0x4 +#define MON_DUMPED 0x8 +#define MON_TRAPPED 0x10 +#define MON_STOPPED 0x20 +#define MON_CONTINUED 0x40 + +typedef enum +{ + PMOND_RECOVERY_METHOD__SYSVINIT = 0, + PMOND_RECOVERY_METHOD__SYSTEMD = 1, +} recovery_method_type ; + +/* + * Used to mark a configured process + * This aids in freeing duped memory over a process re-config + */ +#define PMOND_INIT_CHECK (0xBABE ) + +/* Note: Any addition to this struct requires explicit + * init in daemon_init. + * Cannot memset a struct contianing a string type. + **/ +typedef struct +{ + system_type_enum system_type ; /**< store the system type */ + system_state_enum system_state; /**< current system state */ + /* iface attributes ; hostname, ip and mac address */ + char my_hostname [MAX_HOST_NAME_SIZE+1]; + string my_macaddr ; /**< MAC address of event port */ + string my_address ; /**< IP address daemon is running on */ + int pulse_period ; /**< send_pulse interval in millisecs */ + int processes ; /**< Number of Monitored Processes */ + bool run_audit ; /**< Forces get_events audit to run */ + struct sigaction info ; /**< This daemon signal action struct */ + struct sigaction prev ; /**< Action handler that was replaced */ + /**< This is put back on the exit */ + bool event_mode ; /**< true=event mode ; false=polling */ + int fd ; /**< inotify file descriptor */ + int wd ; /**< inotify watch descriptor */ + + unsigned int nodetype ; + unsigned int function ; + unsigned int subfunction ; + + recovery_method_type recovery_method ; /**< How processes are recovered */ + bool reload_config ; + bool patching_in_progress ; + +} pmon_ctrl_type ; +void pmon_set_ctrl_ptr ( pmon_ctrl_type * ctrl_ptr ); + +#define PMON_RT_SIGNAL (SIGRTMIN+1) +#define PMON_EVENT_FLAGS ( MON_EXITED | MON_KILLED ) + // MON_DUMPED + // MON_STOPPED + // MON_TRAPPED + +int setup_signal_handler ( int rt_signal_num ); + +#define MAX_CONFIG_LEN (256) +#define MAX_PROCESSES (100) +#define MAX_STATUS_ERROR_TEXT_LEN 128 +#define MAX_COMMAND_LEN (512) + +/* Daemon Config Bit Masks */ +#define CONFIG_AUDIT_PERIOD 0x1 +#define CONFIG_TX_PORT 0x2 +#define CONFIG_RX_PORT 0x4 +#define CONFIG_PULSE_PORT 0x8 +#define CONFIG_MULTICAST 0x10 +#define CONFIG_START_DELAY 0x20 +#define CONFIG_HOSTWD_PERIOD 0x40 +#define CONFIG_CMD_PORT 0x80 + +/** Daemon Config Mask */ +#define CONFIG_MASK (CONFIG_AUDIT_PERIOD |\ + CONFIG_TX_PORT |\ + CONFIG_RX_PORT |\ + CONFIG_PULSE_PORT |\ + CONFIG_START_DELAY |\ + CONFIG_CMD_PORT |\ + CONFIG_HOSTWD_PERIOD) + +/* Monitored Process Config Bit Mask */ +#define CONF_PROCESS (0x0001) +#define CONF_SCRIPT (0x0002) +#define CONF_STYLE (0x0004) +#define CONF_PIDFILE (0x0008) +#define CONF_RESTARTS (0x0010) +#define CONF_SEVERITY (0x0020) +#define CONF_INTERVAL (0x0040) +#define CONF_DEBOUNCE (0x0080) +#define CONF_STARTTIME (0x0100) +#define CONF_MODE (0x0200) +#define CONF_STATUS_ARG (0x0400) +#define CONF_START_ARG (0x0800) +#define CONF_TIMEOUT (0x1000) +#define CONF_THRESHOLD (0x2000) +#define CONF_PERIOD (0x4000) +#define CONF_PORT (0x8000) + + +/* Monitored Passive Process Config Mask */ +#define CONF_MASK (CONF_PROCESS | \ + CONF_SCRIPT | \ + CONF_STYLE | \ + CONF_PIDFILE | \ + CONF_SEVERITY | \ + CONF_RESTARTS | \ + CONF_INTERVAL | \ + CONF_DEBOUNCE) + +/* Monitored Active Process Config Mask */ +#define CONF_AMON_MASK (CONF_PORT | \ + CONF_PERIOD | \ + CONF_TIMEOUT | \ + CONF_THRESHOLD) + +/* Monitored Status Process Config Mask */ +#define CONF_STATUS_MON_MASK (CONF_PROCESS | \ + CONF_SCRIPT | \ + CONF_STYLE | \ + CONF_SEVERITY | \ + CONF_RESTARTS | \ + CONF_INTERVAL | \ + CONF_PERIOD | \ + CONF_TIMEOUT | \ + CONF_START_ARG | \ + CONF_STATUS_ARG) + +#define SEVERITY_CLEAR 0 +#define SEVERITY_MINOR 1 +#define SEVERITY_MAJOR 2 +#define SEVERITY_CRITICAL 3 + +#define PMON_RESTART_WAIT (10) +#define MINORLOG_THRESHOLD (20) +#define PIDWAIT_THRESHOLD (15) +#define MAX_RESPAWN_SECS (5) + +typedef enum +{ + PMON_STAGE__START = 0, + PMON_STAGE__MANAGE = 1, + PMON_STAGE__RESPAWN = 2, + PMON_STAGE__MONITOR_WAIT = 3, + PMON_STAGE__MONITOR = 4, + PMON_STAGE__RESTART_WAIT = 5, + PMON_STAGE__IGNORE = 6, + PMON_STAGE__FINISH = 7, + PMON_STAGE__POLLING = 8, + PMON_STAGE__START_WAIT = 9, + PMON_STAGE__TIMER_WAIT =10, + PMON_STAGE__STAGES =11, +} passiveStage_enum ; + +typedef enum +{ + ACTIVE_STAGE__IDLE, + ACTIVE_STAGE__START_MONITOR, + ACTIVE_STAGE__PULSE_REQUEST, + ACTIVE_STAGE__REQUEST_WAIT, + ACTIVE_STAGE__PULSE_RESPONSE, + ACTIVE_STAGE__GAP_SETUP, + ACTIVE_STAGE__GAP_WAIT, + ACTIVE_STAGE__FAILED, + ACTIVE_STAGE__DEBOUNCE_SETUP, + ACTIVE_STAGE__DEBOUNCE, + ACTIVE_STAGE__FINISH, + ACTIVE_STAGE__STAGES, +} activeStage_enum ; + +/** Status monitoring states */ +typedef enum +{ + STATUS_STAGE__BEGIN, + STATUS_STAGE__EXECUTE_STATUS, + STATUS_STAGE__EXECUTE_STATUS_WAIT, + STATUS_STAGE__EXECUTE_START, + STATUS_STAGE__EXECUTE_START_WAIT, + STATUS_STAGE__INTERVAL_WAIT, + STATUS_STAGE__STAGES, +} statusStage_enum ; + +#define AMON_MAX_LEN (100) +typedef struct +{ + int tx_sock ; /**< socket to monitored process */ + int tx_port ; /**< port to monitored process */ + struct sockaddr_in tx_addr ; /**< process socket attributes */ + char tx_buf[AMON_MAX_LEN]; /**< Server receive buffer */ + socklen_t len ; /**< Socket Length */ +} active_mon_socket_type ; + +/* Process Specific Monitor Configuration - Static and Dynamic Data */ +typedef struct +{ + unsigned short init_check; /**< checksum of the process config */ + unsigned int mask ; /**< Passive monitor config read mask */ + passiveStage_enum stage ; /**< Passive monitor FSM stage control */ + + /* Config Items */ + const char * process ; /**< The name of the process to monitor */ + const char * service ; /**< The name of the service to monitor + This is used in centos systemd and + when it comes to respawning processes + this takes precidence if it is not null */ + const char * script ; /**< Path to and restart script filename */ + const char * style ; /**< recovery method ; lsb, ocf, systemd */ + const char * pidfile ; /**< The path to process pidfile */ + const char * severity ; /**< Process failure severity + critical : host is failed + major : host is degraded + minor : log is generated */ + + unsigned int restarts ; /**< Number of back to back unsuccessful + restarts before severity assertion */ + + unsigned int interval ; /**< Number of seconds to wait between + back-to-back unsuccessful restarts */ + + unsigned int debounce ; /**< Number of seconds the process needs + to run before declaring it as running + O.K. after a restart. Time after + which back-to-back restart count is + cleared. */ + + unsigned int startuptime ; /**< Seconds to wait after process start + before starting the debounce monitor*/ + + const char * mode ; /**< Monitor mode passive or active. + Passive mode is always performed and + assumed if setting is not specified */ + + const char * subfunction ; /**< contains a string specifying the subfunction + of the host */ + + bool quorum ; /**< Whether or not the process is in the + system health quorum (for host watchdog) */ + + bool quorum_failure ; /**< flag indicating that a quorum + process has failed. Implements a + single audit debounce for quorum + process failures */ + + bool quorum_unrecoverable ; /**< flag indicating that a quorum + process has been declared unrecoverable */ + + bool full_init_reqd ; /**< Whether or not we should wait for full + goenabled tests passing before we should + try to restart process */ + + /** Passive Monitoring Dynamic Data */ + /* ------------------------------- */ + bool passive_monitoring ; /**< set true when being monitored */ + struct mtc_timer * pt_ptr ; /**< fsm and handler process timer pointer */ + + /** holds the alarm severity state of CLEAR, MINOR, MAJOR, CRITICAL */ + EFmAlarmSeverityT alarm_severity ; + bool restart ; + bool failed ; + bool ignore ; /**< ignore this process ; debug purposes */ + bool stopped ; /**< process was stopped by command */ + unsigned int restarts_cnt ; /**< back to back restarts count */ + unsigned int debounce_cnt ; /**< running monitor debounce count */ + unsigned int minorlog_cnt ; /**< track minor log count for thresholding */ + unsigned int pidwait_cnt ; /**< throttle pidwait logs indicating that + spawned child has not exited yet + preventing respawn of new process */ + + bool sigchld_rxed ; /**< Child respawn exit received */ + unsigned int stage_cnt ; /**< general stage specific count */ + unsigned int failed_cnt ; /**< number of times process has failed */ + int child_pid ; /**< Restart scriptm chile process ID (obs) */ + int pid ; /**< The PID of the this process */ + int sev ; /**< Translated severity code; MAJ,MIN,CRIT */ + int status ; /**< exit status */ + + /* Active Monitoring Config Members */ + /* -------------------------------- */ + bool active_monitoring ; /**< true if active monitoring enabled */ + + unsigned int amask ; /**< Active monitoring config mask */ + int port ; /**< Heartbeat period in seconds */ + + /* period and timeout is also used in Status Monitoring */ + unsigned int period ; /**< Heartbeat period in seconds */ + unsigned int timeout ; /**< Heartbeat timeout in seconds */ + + unsigned int threshold ; /**< Number of back to back heartbeat + failures before action */ + + /** Active Monitoring UNIX Domain socket */ + /* ------------------------------------ */ + active_mon_socket_type msg ; /**< Active monitoring messaging interface */ + + /* Active Monitoring Dynamic Data */ + /* ------------------------------ */ + activeStage_enum active_stage ; /**< Passive Monitor FSM Stage Control */ + bool active_failed ; /**< Active monitoring failed signal */ + unsigned int tx_sequence ; /**< outgoing sequence number */ + unsigned int rx_sequence ; /**< incoming sequence number */ + bool waiting ; /**< waiting for response */ + + unsigned int pulse_count ; /**< running pulse count */ + unsigned int b2b_miss_peak ; /**< max number of back to back misses */ + unsigned int b2b_miss_count ; /**< current back to back miss count */ + unsigned int afailed_count ; /**< total active mon'ing failed count */ + unsigned int recv_err_cnt ; /**< counts the receive errors */ + unsigned int send_err_cnt ; /**< counts the transmit errors */ + unsigned int mesg_err_cnt ; /**< response message error count */ + unsigned int mesg_err_peak ; /**< response message error count */ + unsigned int adebounce_cnt ; /**< active monitor debounce counter */ + bool active_debounce ; /**< true = in active mon'ing debounce */ + bool active_response ; /**< set true on first active response */ + + time_debug_type time_start ; /**< launch start time */ + time_debug_type time_stop ; /**< launch stop time */ + time_delta_type time_delta ; /**< launch execution time */ + + /* Status Monitoring */ + const char * start_arg ; /**< start argument for the script */ + const char * status_arg ; /**< status argument for the script */ + const char * status_failure_text_file; /**< path to status failure text file */ + + unsigned int status_mask ; /**< Status monitoring config mask */ + statusStage_enum status_stage ; /**< Status Monitor FSM Stage Control */ + + bool status_monitoring ; /**< true if status monitoring */ + bool status_failed ; + bool was_failed ; /**< indicates the process was in the failed state */ + + #define AUDIT_EVENT_SEND_REFESH_THRESHOLD (3) + int audit_alarm_refresh_count ; /**< audit event send refresh counter */ + + const char * recovery_method ; /**< the process/service recovery method */ +} process_config_type ; +process_config_type * get_process_config_ptr ( int index ); +process_config_type * get_process_config_ptr ( string process ); + +int pmon_process_config ( void * user, + const char * section, + const char * name, + const char * value); + + +/* pmonHdlr.cpp API */ +void pmon_timer_init( void ); +int pmon_hdlr_init ( pmon_ctrl_type * ctrl_ptr ); +void pmon_hdlr_fini ( pmon_ctrl_type * ctrl_ptr ); +void pmon_service ( pmon_ctrl_type * ctrl_ptr ); + +/* pmonMsg.cpp API */ +int pmon_inbox_init ( void ); +void pmon_service_inbox ( void ); + +/** Daemon Service messaging socket control structure */ +typedef struct +{ + /** PMON Command Receive Interface - (UDP over 'lo') */ + msgClassSock* cmd_sock; /**< receive pmon commands socket */ + int cmd_port; /**< command receive port */ + + /** UDP socket used to send pmond events to maintenance */ + msgClassSock* event_sock; /**< Tx Event Socket */ + int event_port; /**< Tx Event Port number */ + + /** UDP Inet "Im Alive" pulse message interface */ + int pulse_port; /**< Pmon I'm Alive Pulse Port */ + msgClassSock* pulse_sock; /**< Pmon I'm Alive Pulse Sock */ + mtc_message_type pulse ; /**< Static pulse message */ + int msg_len ; /**< Pulse message length */ + + /** UDP socket used to send pmond events to maintenance */ + int amon_sock; /**< Active monitor Rx Socket */ + int amon_port; /**< Active Monitor Rx Port */ + struct sockaddr_in amon_addr; /**< Active Monitor Attributes */ + + /** Unix socket used to send pmond status to hostwd */ + int hostwd_sock; + char* hostwd_path; + struct sockaddr_un hostwd_addr; + +// msgClassSock* mtclogd_sock; /**< sage to mtclogd */ + msgSock_type mtclogd ; + +} pmon_socket_type ; +pmon_socket_type * pmon_getSock_ptr ( void ); + +pmon_ctrl_type * get_ctrl_ptr ( void ) ; + +void pmon_msg_init ( void ); +void pmon_msg_fini ( void ); + +int pulse_port_init ( void ) ; +int event_port_init ( const char * iface , int port ); +int amon_port_init ( int port ); +int hostwd_port_init ( void ); + +int pmon_send_event ( unsigned int event_cmd , process_config_type * ptr ); + +void close_process_socket ( process_config_type * ptr ); +int open_process_socket ( process_config_type * ptr ); + + +void manage_process_failure ( process_config_type * ptr ); +int register_process ( process_config_type * ptr ); +int unregister_process ( process_config_type * ptr ); +int respawn_process ( process_config_type * ptr ); +int get_process_pid ( process_config_type * ptr ); +bool process_running ( process_config_type * ptr ); +int manage_alarm ( process_config_type * ptr, int action ); +int process_config_load ( process_config_type * ptr, const char * config_file_ptr); +bool want_degrade_clear ( void ) ; +string get_status_failure_text ( process_config_type * ptr ); +void kill_running_child ( process_config_type * ptr ); +bool kill_running_process ( int pid ); + +int amon_service_inbox ( int processes ); + +/** Process monitor timer handler */ +void pmon_timer_handler ( int sig, siginfo_t *si, void *uc); + +/** FSM Handler for Passive Monitoring */ +int pmon_passive_handler ( process_config_type * ptr ); + +/** Passive Monitoring FSM Stage Transition Utility */ +int passiveStageChange ( process_config_type * ptr , passiveStage_enum newStage ); +const char * get_pmonStage_str ( process_config_type * ptr ); + +/** FSM Handler for Active Monitoring */ +int pmon_active_handler ( process_config_type * ptr ); + +/** Active Monitoring FSM Stage Transition Utility */ +int activeStageChange ( process_config_type * ptr , activeStage_enum newStage ); +const char * get_amonStage_str ( process_config_type * ptr ); + +int amon_send_request ( process_config_type * ptr ); + +/** FSM Handler for Status Monitoring */ +int pmon_status_handler ( process_config_type * ptr ); + +/** Status Monitoring FSM Stage Transition Utility */ +int statusStageChange ( process_config_type * ptr , statusStage_enum newStage ); + +/** Status Monitoring Commands */ +int execute_status_command ( process_config_type * ptr ); +int execute_start_command ( process_config_type * ptr ); + +void quorum_process_failure ( process_config_type * ptr ); + +#define PMON_MIN_ACTIVE_PERIOD (1) +#define PMON_MAX_ACTIVE_PERIOD (120) + +#define PMON_MIN_START_DELAY (1) +#define PMON_MAX_START_DELAY (120) + +#define PMON_MIN_AUDIT_PERIOD (50) +#define PMON_MAX_AUDIT_PERIOD (999) + +int pmon_send_pulse ( void ) ; +int pmon_send_hostwd ( void ) ; + +/** Message versions */ +#define MTC_MSG_VERSION_15_12_GA_PMON (1) +#define MTC_MSG_REVISION_15_12_GA_PMON (1) + + +/** + * @} pmon_base + */ diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonAlarm.cpp b/mtce-common/cgts-mtce-common-1.0/pmon/pmonAlarm.cpp new file mode 100644 index 00000000..5fcc0a4f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonAlarm.cpp @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud 'Process Monitor' Alarm Module + */ + +#include +#include +#include +#include +#include + +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "alm" + +#include "daemon_common.h" /* */ + +#include "nodeBase.h" /* */ +#include "nodeTimers.h" /* */ +#include "nodeUtil.h" /* */ +#include "pmonAlarm.h" /* for ... this module header */ +#include "pmon.h" + +alarmUtil_type alarm_list[PMON_ALARM_ID__LAST] ; + +void pmonAlarm_init ( void ) +{ + alarmUtil_type * ptr ; + + /** Process Failure Alarm ****************************************************/ + + ptr = &alarm_list[PMON_ALARM_ID__PMOND]; + memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT))); + snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", PMOND_ALARM_ID); + + ptr->name = "process failure" ; + ptr->instc_prefix = "process=" ; + + ptr->critl_reason = ""; + ptr->minor_reason = ""; + ptr->major_reason = ""; + ptr->clear_reason = ""; + + ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL ; + ptr->alarm.probable_cause = FM_ALARM_CAUSE_UNKNOWN; + ptr->alarm.inhibit_alarms = FM_FALSE; + ptr->alarm.service_affecting = FM_TRUE ; + ptr->alarm.suppression = FM_TRUE ; + + ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */ + ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */ + + snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, + "If problem consistently occurs after Host is locked and unlocked then " + "contact next level of support for root cause analysis and recovery."); +} + +string _getIdentity ( pmon_alarm_id_enum id ) +{ + switch ( id ) + { + case PMON_ALARM_ID__PMOND :return (PMOND_ALARM_ID); + default: return ("200.000"); + } +} + +string pmonAlarm_getId_str ( pmon_alarm_id_enum id ) +{ + return(_getIdentity(id)); +} + +string _getInstance ( pmon_alarm_id_enum id ) +{ + if ( id < PMON_ALARM_ID__LAST ) + { + return(alarm_list[id].instc_prefix); + } + return (""); +} + +EFmAlarmSeverityT pmonAlarm_state ( string hostname, pmon_alarm_id_enum id ) +{ + string identity = _getIdentity(id) ; + string instance = _getInstance(id) ; + return ( alarmUtil_query ( hostname, identity, instance)); +} + +/****************************************************************************** + * + * Name : manage_queried_alarms + * + * Description: query FM for all the existing process monitor alarms and build + * up the callers 'saved_alarm_list' with those process names and + * corresponding severity. + * + * Assumptions: If the hostname is passed in as not empty then assume the clear + * is requested. + * + * Updates : callers saved_alarm_list + * + ******************************************************************************/ + +void manage_queried_alarms ( list & saved_alarm_list, string hostname ) +{ + saved_alarm_list.clear(); + + /** + * Query all the pmon alarms and if there is an alarm for a + * process that is functioing properly then clear the alarm. + **/ + SFmAlarmDataT * alarm_list_ptr = (SFmAlarmDataT*) malloc ((sizeof(SFmAlarmDataT)*PMON_MAX_ALARMS)); + if ( alarm_list_ptr ) + { + if ( alarmUtil_query_identity ( pmonAlarm_getId_str(PMON_ALARM_ID__PMOND), alarm_list_ptr, PMON_MAX_ALARMS ) == PASS ) + { + for ( int i = 0 ; i < PMON_MAX_ALARMS ; ++i ) + { + /* loop over each active alarm and maintain its activity state */ + if ( strnlen ((alarm_list_ptr+i)->entity_instance_id , MAX_FILENAME_LEN ) ) + { + int rc ; + AlarmFilter alarm_filter ; + SFmAlarmDataT alarm_query ; + memset(&alarm_query, 0, sizeof(alarm_query)); + memset(&alarm_filter, 0, sizeof(alarm_filter)); + + snprintf ( &alarm_filter.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", PMOND_ALARM_ID ); + snprintf ( &alarm_filter.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "%s", (alarm_list_ptr+i)->entity_instance_id ); + + if (( rc = fm_get_fault ( &alarm_filter, &alarm_query )) == FM_ERR_OK ) + { + string entity = alarm_filter.entity_instance_id ; + size_t pos = entity.find("process="); + if ( pos != std::string::npos ) + { + string pn = entity.substr(pos+strlen("process=")); + ilog ("%s alarm is %s (process:%s)\n", alarm_filter.entity_instance_id, + alarmUtil_getSev_str(alarm_query.severity).c_str(), pn.c_str()); + + /* filter out 'process=pmond' as that alarm is handled by hbsAgent */ + if ( pn.compare("pmond") ) + { + if ( !hostname.empty() ) + { + pmonAlarm_clear ( hostname, PMON_ALARM_ID__PMOND, pn ); + } + else + { + active_process_alarms_type this_alarm ; + this_alarm.process = pn ; + this_alarm.severity = alarm_query.severity ; + saved_alarm_list.push_front ( this_alarm ); + } + } + } + } + else + { + ilog ("fm_get_fault failed (rc:%d)\n", rc ); + } + } + else + { + dlog2 ("last entry %d\n", i); + break ; + } + } + } + } +} + +/************************* A L A R M I N G **************************/ + +/* Clear the specified hosts's maintenance alarm */ +int pmonAlarm_clear ( string hostname, pmon_alarm_id_enum id, string process ) +{ + if ( id < PMON_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + instance.append(process); + + ilog ("%s clearing '%s' %s alarm (%s.%s)\n", + hostname.c_str(), + process.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s '%s' process has been successfully recovered and is now functioning properly.", + hostname.data(), process.data()); + + return ( alarmUtil_clear ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Assert a specified hosts's mtce alarm with a CRITICAL severity level */ +int pmonAlarm_critical ( string hostname, pmon_alarm_id_enum id, string process ) +{ + if ( id < PMON_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + instance.append(process); + + elog ("%s setting critical '%s' %s alarm (%s.%s)\n", + hostname.c_str(), + process.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s critical '%s' process has failed and could not be auto-recovered gracefully. " + "Auto-recovery progression by host reboot is required and in progress. " + "Manual Lock and Unlock may be required if auto-recovery is unsuccessful.", + hostname.data(), process.data()); + + return ( alarmUtil_critical ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Assert a specified host's mtce alarm with a MAJOR severity level */ +int pmonAlarm_major ( string hostname, pmon_alarm_id_enum id, string process ) +{ + if ( id < PMON_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + instance.append(process); + + wlog ("%s setting major '%s' %s alarm (%s.%s)\n", + hostname.c_str(), + process.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s is degraded due to the failure of its '%s' process. " + "Auto recovery of this major process is in progress.", + hostname.data(), process.data()); + + return ( alarmUtil_major ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Assert a specified host's mtce alarm with a MINOR severity level */ +int pmonAlarm_minor ( string hostname, pmon_alarm_id_enum id, string process, int restarts ) +{ + if ( id < PMON_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + instance.append(process); + + wlog ("%s setting minor '%s' %s alarm (%s.%s)\n", + hostname.c_str(), + process.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s '%s' process has failed. %s", + hostname.data(), process.data(), + ((restarts == 0) ? "Manual recovery is required." : "Auto recovery in progress." ) ); + + return ( alarmUtil_minor ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/*************************** L O G G I N G **********************************/ + +/** Create a CRITICAL maintenance log */ +int pmonAlarm_critical_log ( string hostname, pmon_alarm_id_enum id, string process ) +{ + if ( id < PMON_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + instance.append(process); + + elog ("%s creating critical '%s' %s log (%s.%s)\n", + hostname.c_str(), + process.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s critical '%s' process has failed and could not be auto-recovered gracefully.", + hostname.data(), process.data()); + + return ( alarmUtil_critical_log ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Create a MAJOR maintenance log */ +int pmonAlarm_major_log ( string hostname, pmon_alarm_id_enum id, string process ) +{ + if ( id < PMON_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + instance.append(process); + + wlog ("%s creating major '%s' %s log (%s.%s)\n", + hostname.c_str(), + process.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s is degraded due to the failure of its '%s' process. " + "Auto recovery of this major process is in progress.", + hostname.data(), process.data()); + + return ( alarmUtil_major_log ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} + +/** Create a MINOR maintenance log */ +int pmonAlarm_minor_log ( string hostname, pmon_alarm_id_enum id, string process, int restarts ) +{ + if ( id < PMON_ALARM_ID__LAST ) + { + string identity = _getIdentity(id); + string instance = _getInstance(id); + instance.append(process); + + wlog ("%s creating minor '%s' %s log (%s.%s)\n", + hostname.c_str(), + process.c_str(), + alarm_list[id].name.c_str(), + identity.c_str(), + instance.c_str()); + + snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, + "%s '%s' process has failed. %s", + hostname.data(), process.data(), + ((restarts == 0) ? "Manual recovery is required." : "Auto recovery in progress.") ); + + return ( alarmUtil_minor_log ( hostname, identity, instance, alarm_list[id].alarm )); + } + return (FAIL_BAD_PARM); +} diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonAlarm.h b/mtce-common/cgts-mtce-common-1.0/pmon/pmonAlarm.h new file mode 100644 index 00000000..79414e1c --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonAlarm.h @@ -0,0 +1,64 @@ +#ifndef __PMONALARM_H__ +#define __PMONALARM_H__ + +/* + * Copyright (c) 2015-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Titanium Cloud Process Monitor 'Alarm' Header + */ + +#include +#include +#include +#include +#include + +using namespace std; + +#include "alarmUtil.h" /* for .. alarmUtil_ */ + +/** Alarm Abstract Reference IDs */ +typedef enum +{ + PMON_ALARM_ID__PMOND = 0, + PMON_ALARM_ID__LAST = 1, +} pmon_alarm_id_enum ; + +/* Keep track of queried alarms and severities */ +typedef struct +{ + string process ; + EFmAlarmSeverityT severity ; +} active_process_alarms_type ; + +/* Clear any pending alarms if the specified hostname is valid */ +void manage_queried_alarms ( list & alarm_list, string hostname="" ); + +void pmonAlarm_init ( void ); + +EFmAlarmSeverityT pmonAlarm_state ( string hostname, pmon_alarm_id_enum id ); + +string pmonAlarm_getId_str ( pmon_alarm_id_enum id ); + +/** Clear the specified process monitor alarm for specific host */ +int pmonAlarm_clear ( string hostname, pmon_alarm_id_enum id, string process ); + +/** Assert a specified alarm or log against the specified host with a MINOR severity level */ +int pmonAlarm_minor ( string hostname, pmon_alarm_id_enum id, string process, int restarts ); +int pmonAlarm_minor_log( string hostname, pmon_alarm_id_enum id, string process, int restarts ); + +/** Assert a specified alarm or log against the specified host with a MAJOR severity level */ +int pmonAlarm_major ( string hostname, pmon_alarm_id_enum id, string process ); +int pmonAlarm_major_log( string hostname, pmon_alarm_id_enum id, string process ); + +/** Assert a specified alarm or log against the specified host with a CRITICAL severity level */ +int pmonAlarm_critical ( string hostname, pmon_alarm_id_enum id, string process ); +int pmonAlarm_critical_log( string hostname, pmon_alarm_id_enum id, string process ); + +#endif /* __PMONALARM_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonFsm.cpp b/mtce-common/cgts-mtce-common-1.0/pmon/pmonFsm.cpp new file mode 100644 index 00000000..2ef7a2ff --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonFsm.cpp @@ -0,0 +1,1157 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Process Monitor Service + * Passive and Active Monitoring FSMs. + */ + +#include "pmon.h" +#include "alarmUtil.h" /* for ... alarmUtil_getSev_str */ + +#define WARNING_THROTTLE (10) + +const char passiveStages_str [PMON_STAGE__STAGES][32] = +{ + "Start", + "Manage", + "Respawn", + "Monitor-Wait", + "Monitor-Process", + "Restart-Wait", + "Ignore-Process", + "Handler-Finish", + "Subfunction-Polling", + "Monitor-Start-Wait", +} ; + +const char statusStages_str [STATUS_STAGE__STAGES][32] = +{ + "Begin", + "Execute-Status", + "Execute-Status-Wait", + "Execute-Start", + "Execute-Start-Wait", + "Interval-Wait" +} ; + +int statusStageChange ( process_config_type * ptr , statusStage_enum newStage ) +{ + if (( newStage < STATUS_STAGE__STAGES ) && + ( ptr->status_stage < STATUS_STAGE__STAGES )) + { + clog ("%s %s -> %s (%d->%d)\n", + ptr->process, + statusStages_str[ptr->status_stage], + statusStages_str[newStage], + ptr->status_stage, newStage); + ptr->status_stage = newStage ; + return (PASS); + } + else + { + slog ("%s Invalid Stage (now:%d new:%d)\n", ptr->process, ptr->status_stage, newStage ); + ptr->status_stage = STATUS_STAGE__BEGIN ; + return (FAIL); + } +} + +int passiveStageChange ( process_config_type * ptr , passiveStage_enum newStage ) +{ + if (( newStage < PMON_STAGE__STAGES ) && + ( ptr->stage < PMON_STAGE__STAGES )) + { + clog ("%s %s -> %s (%d->%d)\n", + ptr->process, + passiveStages_str[ptr->stage], + passiveStages_str[newStage], + ptr->stage, newStage); + ptr->stage = newStage ; + return (PASS); + } + else + { + slog ("%s Invalid Stage (now:%d new:%d)\n", + ptr->process, ptr->stage, newStage ); + ptr->stage = PMON_STAGE__FINISH ; + return (FAIL); + } +} + +const char * get_pmonStage_str ( process_config_type * ptr ) +{ + if ( ptr->stage < PMON_STAGE__STAGES ) + { + return ( &passiveStages_str[ptr->stage][0] ) ; + } + return (NULL); +} + +const char activeStages_str [ACTIVE_STAGE__STAGES][32] = +{ + "Idle", /* 0 */ + "Start", /* 1 */ + "Request", /* 2 */ + "Wait", /* 3 */ + "Response", /* 4 */ + "Gap-Setup", /* 5 */ + "Gap", /* 6 */ + "Failed", /* 7 */ + "Debounce-Setup", + "Debounce", + "Finish", +} ; + +int activeStageChange ( process_config_type * ptr , activeStage_enum newStage ) +{ + if (( newStage < ACTIVE_STAGE__STAGES ) && + ( ptr->active_stage < ACTIVE_STAGE__STAGES )) + { + clog ("%s %s -> %s (%d->%d)\n", + ptr->process, + activeStages_str[ptr->active_stage], + activeStages_str[newStage], + ptr->active_stage, newStage); + ptr->active_stage = newStage ; + return (PASS); + } + else + { + slog ("%s Invalid Stage (now:%d new:%d)\n", + ptr->process, ptr->active_stage, newStage ); + ptr->active_stage = ACTIVE_STAGE__PULSE_REQUEST ; + return (FAIL); + } +} + +const char * get_amonStage_str ( process_config_type * ptr ) +{ + if ( ptr->active_stage < ACTIVE_STAGE__STAGES ) + { + return ( &activeStages_str[ptr->active_stage][0] ) ; + } + return (NULL); +} + + +void clear_amon_counts ( process_config_type * ptr ) +{ + if ( ptr->b2b_miss_count > ptr->b2b_miss_peak ) + ptr->b2b_miss_peak = ptr->b2b_miss_count ; + + if ( ptr->mesg_err_cnt > ptr->mesg_err_peak ) + ptr->mesg_err_peak = ptr->mesg_err_cnt ; + + ptr->b2b_miss_count = 0 ; + ptr->send_err_cnt = 0 ; + ptr->recv_err_cnt = 0 ; + ptr->mesg_err_cnt = 0 ; +} + + +/* Active Monitoring Handler + * -------------------------- + * Verifies that the process has an active pid */ +int pmon_active_handler ( process_config_type * ptr ) +{ + int rc = PASS ; + + if ( ptr->active_stage < ACTIVE_STAGE__STAGES ) + { + dlog2 ("%s Active-%s Stage %d\n", + ptr->process, + activeStages_str[ptr->active_stage], + ptr->active_stage ); + } + else + { + activeStageChange ( ptr, ACTIVE_STAGE__PULSE_REQUEST ); + } + + switch ( ptr->active_stage ) + { + case ACTIVE_STAGE__IDLE: + { + break ; + } + case ACTIVE_STAGE__START_MONITOR: + { + rc = open_process_socket ( ptr ); + if ( rc != PASS ) + { + ptr->active_failed = true ; + elog ("%s 'open_process_socket' failed (%d)\n", ptr->process , rc ); + manage_process_failure ( ptr ); + } + + activeStageChange ( ptr, ACTIVE_STAGE__GAP_SETUP ); + break ; + } + case ACTIVE_STAGE__GAP_SETUP: + { + if ( ptr->pt_ptr->tid ) + { + mtcTimer_stop ( ptr->pt_ptr ); + } + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, ptr->period ); + activeStageChange ( ptr, ACTIVE_STAGE__GAP_WAIT ); + break ; + } + case ACTIVE_STAGE__GAP_WAIT: + { + if ( ptr->pt_ptr->ring == true ) + { + activeStageChange ( ptr, ACTIVE_STAGE__PULSE_REQUEST ); + } + break ; + } + case ACTIVE_STAGE__FAILED: + { + ptr->active_response = false ; + ptr->active_failed = true ; + ptr->afailed_count++ ; + ptr->b2b_miss_count = 0 ; + if ( ptr->pt_ptr->tid ) + mtcTimer_stop ( ptr->pt_ptr ); + + manage_process_failure ( ptr ); + + /* Stage change is handled else where */ + break ; + } + case ACTIVE_STAGE__REQUEST_WAIT: + { + if ( ptr->pt_ptr->ring == true ) + { + activeStageChange ( ptr, ACTIVE_STAGE__PULSE_REQUEST ); + } + break ; + } + case ACTIVE_STAGE__PULSE_REQUEST: + { + ptr->waiting = true ; + if ( amon_send_request ( ptr ) != PASS ) + { + ptr->waiting = false ; + ptr->send_err_cnt++ ; + wlog ("%s pulse request send failed (%d:%d)\n", + ptr->process, + ptr->b2b_miss_count, + ptr->send_err_cnt ); + if ( ++ptr->b2b_miss_count >= ptr->threshold ) + { + activeStageChange ( ptr, ACTIVE_STAGE__FAILED ); + } + else + { + activeStageChange ( ptr, ACTIVE_STAGE__GAP_SETUP ); + } + } + else + { + ptr->pulse_count++ ; + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, ptr->timeout ); + activeStageChange ( ptr, ACTIVE_STAGE__PULSE_RESPONSE ); + } + break ; + } + case ACTIVE_STAGE__PULSE_RESPONSE: + { + if ( ptr->rx_sequence != 0 ) + { + /* handle the first response */ + if ( ptr->active_response == false ) + { + ptr->active_response = true ; + } + + if ( ptr->rx_sequence != ptr->tx_sequence ) + { + ptr->b2b_miss_count++ ; + ptr->mesg_err_cnt++ ; + wlog ( "%s out-of-sequence response (%d:%d)\n", + ptr->process , + ptr->tx_sequence, + ptr->rx_sequence); + + if ( ptr->b2b_miss_count >= ptr->threshold ) + { + activeStageChange ( ptr, ACTIVE_STAGE__FAILED ); + } + } + else + { + if ( ptr->b2b_miss_count > ptr->b2b_miss_peak ) + ptr->b2b_miss_peak = ptr->b2b_miss_count ; + + ptr->b2b_miss_count = 0 ; + if ( ptr->active_debounce == true ) + { + ilog ("%s is healthy (debouncing)\n", ptr->process ); + } + else + { + mlog2 ("%s is healthy\n", ptr->process ); + } + } + /* manage active monitoring debounce */ + if ( ptr->active_debounce == true ) + { + if ( ++ptr->adebounce_cnt >= ((ptr->period+1)) ) + { + ilog ("%s Debounced (%d)\n", ptr->process, ptr->pid ); + ptr->active_debounce = false; + ptr->adebounce_cnt = 0 ; + ptr->restarts_cnt = 0 ; + ptr->quorum_failure = false; + ptr->quorum_unrecoverable = false; + + clear_amon_counts ( ptr ); + ptr->active_failed = false ; + manage_alarm ( ptr , PMON_CLEAR ); + } + } + ptr->rx_sequence = 0 ; + + /* Wait out the remaining part of the period */ + ptr->waiting = false ; + } + + if ( ptr->pt_ptr->ring == true ) + { + /* Are we still waiting for a response ? */ + if ( ptr->waiting == true ) + { + /* handle case where response is delayed due to goenabled */ + if ( ptr->full_init_reqd && + !( is_goenabled ( get_ctrl_ptr()->nodetype, true ) ) + ) + { + /* we don't expect a resonse... do nothing to wait + another loop */ + } + else + { + ptr->recv_err_cnt++ ; + ptr->b2b_miss_count++ ; + elog ("%s missing pulse response (Miss:%d) (%d:%d)\n", + ptr->process, + ptr->b2b_miss_count, + ptr->tx_sequence, + ptr->rx_sequence); + + if ( ptr->b2b_miss_count >= ptr->threshold ) + { + /***************************************************** + * Only fail active heartbeating after MTC_MINS_3 of + * never having received a response. + * + * This condition is added to address an issue + * reported where the kernel takes a + * long time to timeout on external dns namservers + * after a DOR when the system is isolated from the + * external network. + ****************************************************/ + if (( ptr->active_response == false ) && + ( ptr->period < MTC_MINS_3 ) && + ( ptr->b2b_miss_count < (MTC_MINS_3/ptr->period ))) + { + ; /* more forgiving startup handling */ + } + else + { + activeStageChange ( ptr, ACTIVE_STAGE__FAILED ); + break ; + } + } + } + } + activeStageChange ( ptr, ACTIVE_STAGE__PULSE_REQUEST ); + break ; + } + else if ( ptr->waiting == false ) + { + ; /* got the data ; just wait out the timer */ + } + break ; + } + default: + { + activeStageChange ( ptr, ACTIVE_STAGE__GAP_SETUP ); + break ; + } + } + return (rc); +} + +/* Passive Monitoring Handler + * -------------------------- + * Verifies that the process has an active pid */ +int pmon_passive_handler ( process_config_type * ptr ) +{ + int rc = RETRY ; + + if ( ptr->stage < PMON_STAGE__STAGES ) + { + flog ("%s %s Stage %d\n", ptr->process, passiveStages_str[ptr->stage], ptr->stage ); + } + else + { + slog ("%s Invalid stage (%d) ; correcting\n", ptr->process, ptr->stage ); + passiveStageChange ( ptr, PMON_STAGE__FINISH ); + } + + switch ( ptr->stage ) + { + case PMON_STAGE__START: + { + dlog ( "%s failed:%d severity:%s restarts_cnt:%d debounce_cnt:%d\n", + ptr->process, + ptr->failed, + alarmUtil_getSev_str(ptr->alarm_severity).c_str(), + ptr->restarts_cnt, + ptr->debounce_cnt); + + ptr->stage_cnt = 0 ; + + break ; + } + /* Manage Restart Counts */ + case PMON_STAGE__MANAGE: + { + if ( ptr->restart == true ) + { + pmon_ctrl_type * ctrl_ptr = get_ctrl_ptr() ; + if ( ctrl_ptr->patching_in_progress == true ) + { + /* if patching is in progress and we get a process restart command + * then that means the rpms have all been installed already so we + * can exit patching in progress state */ + ctrl_ptr->patching_in_progress = false ; + } + + ; /* fall through and just change state at the bottom */ + } + /* Handle Critical processes. + * Critical with 0 restarts + * Critical with # restarts + * Req'ts: + * 1. Avoid re-reporting the event + * 2. Send minor for first occurance + * 3. Try restarts if it supports it + * 4. Stay in this stage once the max restarts has been reached. + */ + else if ( ptr->sev == SEVERITY_CRITICAL ) + { + /* handle the No-restarts case */ + /* Go straight to event assertion */ + if ( ptr->restarts == 0 ) + { + manage_alarm ( ptr, PMON_ASSERT ); + + /* Send critical notification */ + pmon_send_event ( MTC_EVENT_PMON_CRIT, ptr ); + + wlog ("%s auto-restart disabled\n", ptr->process ); + passiveStageChange ( ptr, PMON_STAGE__IGNORE ) ; + + /* if process is in quorum, and we're not trying to restart + * it, we declare the quorum failed */ + if ( ptr->quorum ) + { + quorum_process_failure ( ptr ); + } + break ; + } + else if ( ptr->restarts_cnt >= ptr->restarts ) + { + manage_alarm ( ptr, PMON_ASSERT ); + + /* Send critical notification */ + pmon_send_event ( MTC_EVENT_PMON_CRIT, ptr ); + + ptr->restarts_cnt = 0 ; + ilog ("%s allowing auto-restart of failed critical process\n", ptr->process); + + /* if process is in quorum, and we haven't been able to + * restart it, we declare the quorum failed */ + if ( ptr->quorum ) + { + quorum_process_failure ( ptr ); + } + + /* Note: the above clear or restarts_cnt and commented break below + * forces pmond to try and continue to recover the failing + * critical process if for some reason the host does not + * go through a reboot */ + /* avoid stage change below and wait for the reboot */ + // break ; + } + else + { + /* Send a restart log to maintenance on the first restart only */ + if ( ptr->restarts_cnt == 0 ) + { + manage_alarm ( ptr, PMON_LOG ); + } + + /* Try and recover if the process is critical but + * supports some number of restart attempts first */ + } + } + /* Send a log on the first restart */ + /* */ + /* Note: This clause needs to be before the next one */ + /* to handle the restarts = 0 case */ + else if (( ptr->restarts_cnt == 0 ) && ( ptr->restarts != 0 )) + { + ilog ("%s Sending Log Event to Maintenance\n", ptr->process ); + + /* Send a log on the first one or every time + * we start a fresh restart cycle */ + manage_alarm ( ptr, PMON_LOG ); + } + else if (( ptr->restarts_cnt == 0 ) && ( ptr->restarts == 0 )) + { + /* Auto recovery is disable, generate a log and raise a minor alarm */ + + wlog ("%s Sending Log Event to Maintenance\n", ptr->process ); + manage_alarm ( ptr, PMON_LOG ); + + manage_alarm ( ptr, PMON_ASSERT ); + + wlog ("%s Auto-Restart Disabled ... but monitoring for recovery\n", ptr->process ); + + /* if process is in quorum, and we're not trying to + * restart it, we declare the quorum failed */ + if ( ptr->quorum ) + { + ptr->quorum_failure = true; + quorum_process_failure ( ptr ); + } + + passiveStageChange ( ptr, PMON_STAGE__IGNORE ) ; + break ; + } + /* Manage notification based on restart + * threshold for non-critical processes */ + else if ( ptr->restarts_cnt >= ptr->restarts ) + { + /* Restart threshold reached ; sending event to maintenance */ + manage_alarm ( ptr, PMON_ASSERT ); + + /* Start the counts again */ + ptr->restarts_cnt = 0 ; + ptr->debounce_cnt = 0 ; + + /* if process is in quorum, and we haven't been able to + * restart it, we declare the quorum failed */ + if ( ptr->quorum ) + { + quorum_process_failure ( ptr ); + } + } + passiveStageChange ( ptr, PMON_STAGE__RESPAWN ) ; + break ; + } + + /* Spawn the process */ + case PMON_STAGE__RESPAWN: + { + ilog ("%s stability period (%d secs)\n", ptr->process, ptr->debounce ); + + /* Restart the process */ + respawn_process ( ptr ) ; + + /* Start the monitor debounce timer. */ + if ( ptr->pt_ptr->tid ) mtcTimer_stop ( ptr->pt_ptr ); + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, ptr->startuptime ); + passiveStageChange ( ptr, PMON_STAGE__MONITOR_WAIT ) ; + + /* Don't wait for the debounce timer to take this process out of 'commanded restart' mode. + * Do it now, otherwise tight patch loop stress testing might fail */ + if ( ptr->restart == true ) + { + ilog ("%s exit manual restart request mode\n", ptr->process ) + ptr->restart = false ; + } + break ; + } + + /* Give the process time to startup + * before trying to monitor it */ + case PMON_STAGE__MONITOR_WAIT: + { + /* Give the process time to start */ + if ( ptr->pt_ptr->ring == true ) + { + if (( !ptr->sigchld_rxed ) || ( !ptr->child_pid ) || ( ptr->status )) + { + if ( ptr->child_pid == 0 ) + { + elog ("%s spawn has null child pid\n", ptr->process ); + } + else if ( ptr->sigchld_rxed == false ) + { + elog ("%s spawn timeout (%d)\n", ptr->process, ptr->child_pid ); + } + else if ( ptr->status != PASS ) + { + elog ("%s spawn failed (rc:%d) (%d)\n", ptr->process, ptr->status, ptr->child_pid ); + } + kill_running_child ( ptr ) ; + + /* we had a startup timeout ; do restart */ + mtcTimer_start( ptr->pt_ptr, pmon_timer_handler, ptr->interval ); + passiveStageChange ( ptr, PMON_STAGE__RESTART_WAIT ) ; + } + else + { + /* clear the monitor debounce counter */ + ptr->debounce_cnt = 0 ; + + /* Start debounce monitor phase */ + passiveStageChange ( ptr, PMON_STAGE__MONITOR ) ; + // ilog ("%s Monitor Start ...\n", ptr->process); + process_running ( ptr ); + ilog ("%s Monitor (%d)\n", ptr->process, ptr->pid ); + } + + ptr->sigchld_rxed = false ; + } + break ; + } + /* Monitor the newly respawned process */ + case PMON_STAGE__MONITOR: + { + /* The process needs to stay running for x seconds before + * clearing any assertion or declaring that this restart + * attempt was successful */ + + /* The process should be running. + * If not then cancel the timer and start over through + * the RESTART_WAIT stage which ensures that we manage + * back to back restarts properly */ + if ( ! process_running ( ptr ) ) + { + wlog ("%s Respawn Monitor Failed (%d of %d), retrying in (%d secs)\n", + ptr->process, + ptr->restarts_cnt, + ptr->restarts, + ptr->interval); + + passiveStageChange ( ptr, PMON_STAGE__TIMER_WAIT ) ; + } + else if ( ptr->pt_ptr->ring == true ) + { + if ( ++ptr->debounce_cnt >= ptr->debounce ) + { + /* We made it through the monitor debounce + * period so lets finish up */ + ilog ("%s Stable (%d)\n", ptr->process, ptr->pid ); + passiveStageChange ( ptr, PMON_STAGE__FINISH ) ; + } + /* else continue to monitor the freshly respawned process */ + else + { + /* Start the monitor timer again since + * the debounce period is not over */ + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, 1 ); + dlog ("%s Debounce Monitor (TID:%p)\n", ptr->process, ptr->pt_ptr->tid ); + } + } + break ; + } + + case PMON_STAGE__TIMER_WAIT: + { + if ( mtcTimer_expired ( ptr->pt_ptr ) ) + { + /* if restart interval is zero then just ring the timer right away */ + if ( ptr->interval == 0 ) + { + ptr->pt_ptr->ring = true ; + } + else + { + /* Now we are in the restart wait phase */ + mtcTimer_start( ptr->pt_ptr, pmon_timer_handler, ptr->interval ); + } + + kill_running_child ( ptr ) ; + passiveStageChange ( ptr, PMON_STAGE__RESTART_WAIT ) ; + } + else + { + dlog ("%s debounce timer wait\n", ptr->process); + } + break ; + } + /* Lets wait a bit before we try another restart */ + case PMON_STAGE__RESTART_WAIT: + { + if ( ptr->pt_ptr->ring == true ) + { + /* Force the immediate (re)start */ + passiveStageChange ( ptr, PMON_STAGE__MANAGE) ; + } + break ; + } + /* A state that leaves a process failed but takes it out of + * that failed state if it auto recovers on its own or + * through external means */ + case PMON_STAGE__IGNORE: + { + int pid ; + if ((pid = get_process_pid ( ptr ))) + { + int result = kill (pid, 0 ); + if ( result == 0 ) + { + /* allow process recovery if it is started outside pmond */ + if ( ptr->stopped == true ) + ptr->stopped = false ; + + passiveStageChange ( ptr, PMON_STAGE__FINISH ); + } + } + break ; + } + case PMON_STAGE__FINISH: + { + kill_running_child ( ptr ) ; + ilog ("%s Recovered (%d)\n", ptr->process, ptr->pid ); + + /* Set all counts to default state ; + * Even if they may have already been :) */ + ptr->failed = false ; + ptr->debounce_cnt = 0 ; + + passiveStageChange ( ptr, PMON_STAGE__START ) ; + + /* Register the new process with the kernel */ + register_process ( ptr ); + + if ( !ptr->active_monitoring ) + { + ptr->restarts_cnt = 0 ; + /* It's possible that a restart succeeded even after the + * max restarts threshold was reached (and we thought things + * were dead, so we marked quorum processes as unrecoverable) + */ + if ( ptr->quorum ) + { + ptr->quorum_failure = false; + ptr->quorum_unrecoverable = false; + } + + manage_alarm ( ptr, PMON_CLEAR ); + } + /* Recover Active monitoring , + * event clear will occur in the active monitoring + * FSM after it passes the debouce cycle */ + else + { + /* Open the process's active monitoring + * socket if it was or is closed */ + if ( ptr->msg.tx_sock == 0 ) + open_process_socket ( ptr ); + + /* Clear sequence*/ + ptr->tx_sequence = 0 ; + ptr->rx_sequence = 0 ; + + /* Clear active monitoring state controls */ + // ptr->active_failed = false ; + ptr->waiting = false ; + + /* Set the active monitor debounce flag and clear its counter */ + ptr->active_debounce = true ; + ptr->adebounce_cnt = 0 ; + + activeStageChange ( ptr, ACTIVE_STAGE__PULSE_REQUEST ) ; + } + rc = PASS ; + break ; + } + /****************************************************************************** + * + * This polling stage was introduced for the introduction of the 2-Server + * configuration, aka combo blade/host. + * + * /etc/pmon.d/ files that declare + * + * subfunction = compute + * or + * subfunction = storage + * + * .. are not immediately monitored by pmond on startup. + * + * Instead, pmond will wait for the specified subfunction config complete + * file to be present before starting to monitor that process. + * + * This stage is here to manage that delayed monitoring startup of + * subfunction dependent processes. + * + *******************************************************************************/ + case PMON_STAGE__POLLING: + { + if ( ptr->pt_ptr->ring == false ) + { + break ; + } + else + { + string config_filename = "" ; + pmon_ctrl_type * ctrl_ptr = get_ctrl_ptr() ; + if ( ptr->subfunction ) + { + if ( !strcmp (ptr->subfunction, "compute" ) ) + { + config_filename = CONFIG_COMPLETE_COMPUTE ; + } + else if ( !strcmp (ptr->subfunction, "storage" ) ) + { + config_filename = CONFIG_COMPLETE_STORAGE ; + } + /******************************************************** + * issue: processes that set the subfunction to + * 'last-config' get a dependency override in + * the AIO system. Such processes need to be + * monitored only after the last configuration + * step. Right now that is compute in aio. + * + ********************************************************/ + else if (( ctrl_ptr->system_type != SYSTEM_TYPE__NORMAL ) && + ( !strcmp (ptr->subfunction, "last-config" ))) + { + config_filename = CONFIG_COMPLETE_COMPUTE ; + dlog ("%s dependency over-ride ; will wait for %s\n", + ptr->process, + config_filename.c_str()); + } + } + + if ( config_filename.empty() ) + { + passiveStageChange ( ptr, PMON_STAGE__IGNORE ); + elog ("%s is subfunction polling with no subfunction ; ignoring\n", ptr->process ); + } + else + { + bool start_monitoring = true; + string waiting_for = ""; + + if ( daemon_is_file_present ( config_filename.data() ) != true ) + { + start_monitoring = false; + waiting_for = config_filename; + } + else if ( !strcmp (ptr->subfunction, "compute" ) ) + { + if ( daemon_is_file_present ( DISABLE_COMPUTE_SERVICES ) == true ) + { + /* Compute services are disabled - do not start monitoring */ + start_monitoring = false; + waiting_for = DISABLE_COMPUTE_SERVICES; + } + } + + mtcTimer_reset ( ptr->pt_ptr ); + if ( start_monitoring == true ) + { + ptr->passive_monitoring = true ; + + /* check for startup failures from alarm query. */ + if ( ptr->failed == true ) + { + /* manage the process if its in the failed state */ + passiveStageChange ( ptr, PMON_STAGE__MANAGE ); + } + else + { + ilog ("monitor start of %s in %d seconds\n", ptr->process, daemon_get_cfg_ptr()->start_delay ); + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, daemon_get_cfg_ptr()->start_delay ); + passiveStageChange ( ptr, PMON_STAGE__START_WAIT ); + } + } + else + { + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, 3 ); + wlog_throttled ( ptr->stage_cnt, 500, "%s monitoring is waiting on %s\n", + ptr->process, waiting_for.c_str()); + } + } + } + break ; + } + case PMON_STAGE__START_WAIT: + { + if ( ptr->pt_ptr->ring == true ) + { + ilog ("%s process monitoring started\n", ptr->process ); + register_process ( ptr ); + if ( ptr->active_monitoring == true ) + { + if ( open_process_socket ( ptr ) != PASS ) + { + elog ("%s failed to open process socket\n", + ptr->process ); + } + } + passiveStageChange ( ptr, PMON_STAGE__MANAGE ); + } + break ; + } + + default: + { + slog ("%s Invalid stage (%d)\n", ptr->process, ptr->stage ); + + /* Default to finish for invalid case. + * If there is an issue then it will be detected */ + passiveStageChange ( ptr, PMON_STAGE__FINISH ); + } + } + return (rc); +} + +/* Status Monitoring Handler + * -------------------------- + * Monitors a process with status command */ +int pmon_status_handler ( process_config_type * ptr ) +{ + if ( ptr->status_stage >= STATUS_STAGE__STAGES ) + { + wlog ("%s Invalid status_stage (%d) ; correcting\n", ptr->process, ptr->status_stage ); + statusStageChange ( ptr, STATUS_STAGE__BEGIN); + } + + switch ( ptr->status_stage ) + { + // First state + case STATUS_STAGE__BEGIN: + { + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, ptr->period ); + dlog ("%s start period timer %p\n", ptr->process, ptr->pt_ptr->tid ); + statusStageChange ( ptr, STATUS_STAGE__EXECUTE_STATUS ); + break ; + } + + // Execute the status command + case STATUS_STAGE__EXECUTE_STATUS: + { + if ( ptr->pt_ptr->ring == true ) //wake up from period + { + ptr->status = PASS; + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, ptr->timeout ); + dlog ("%s start the status command timer %p\n", ptr->process, ptr->pt_ptr->tid ); + + // Execute the status call + int rc = execute_status_command(ptr); + if (rc != PASS) + { + elog ("%s execute_status_command returned a failure (%d)\n", ptr->process, rc); + ptr->status = rc; + } + + statusStageChange ( ptr, STATUS_STAGE__EXECUTE_STATUS_WAIT ); + } + break ; + } + + // Wait for the status command to finish and process results + case STATUS_STAGE__EXECUTE_STATUS_WAIT: + { + // Give the command time to execute. The daemon_sigchld_hdlr will force + // a ring when the command execute successfully or returns a failure + if ( (ptr->pt_ptr->ring == true) || (ptr->status != PASS ) ) + { + // Stop timer if we had one + if ( ptr->pt_ptr->tid ) + { + dlog ("%s stop the status command timer %p\n", ptr->process, ptr->pt_ptr->tid ); + mtcTimer_stop( ptr->pt_ptr); + } + ptr->pt_ptr->ring = false; + + if (( !ptr->sigchld_rxed ) || ( !ptr->child_pid ) || (ptr->status != PASS)) + { + if ( ptr->child_pid == 0 ) + { + elog ("%s status command has null child pid\n", ptr->process ); + } + else if ( ptr->sigchld_rxed == false ) + { + elog ("%s status command execution timed out (%d)\n", ptr->process, ptr->child_pid ); + kill_running_process ( ptr->child_pid ); + } + + elog ("%s status returned a failure (rc:%d) ; process(es) start pending\n", ptr->process, ptr->status ); + + // Go to execute start state since we do not know the status of the process + ptr->status_failed = true; + ptr->was_failed = true ; + statusStageChange ( ptr, STATUS_STAGE__EXECUTE_START ); + } + else + { + // Status reports everything is ok, reset variables + dlog ("%s status command was successful\n", ptr->process); + ptr->restarts_cnt = 0; + + if ( ptr->failed == true ) + { + manage_alarm ( ptr, PMON_CLEAR ); + } + ptr->status_failed = false; + ptr->failed = false; + statusStageChange ( ptr, STATUS_STAGE__BEGIN ); + + } + + ptr->child_pid = 0; + ptr->sigchld_rxed = false; + } + break; + } + + // Interval wait time before doing a start again if the start + // had previously failed + case STATUS_STAGE__INTERVAL_WAIT: + { + if (ptr->pt_ptr->ring == true) + { + statusStageChange ( ptr, STATUS_STAGE__EXECUTE_START ); + } + break ; + } + + // Execute the start command + case STATUS_STAGE__EXECUTE_START: + { + ptr->status = PASS; + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, ptr->timeout ); + dlog ("%s start the start command timer %p\n", ptr->process, ptr->pt_ptr->tid ); + + int rc = execute_start_command (ptr); + if (rc != PASS) + { + elog ("%s execute_start_command returned a failure (%d)\n", ptr->process, rc); + ptr->status = rc; + } + statusStageChange ( ptr, STATUS_STAGE__EXECUTE_START_WAIT ); + break; + } + + // Wait for the start command to finish and process results + case STATUS_STAGE__EXECUTE_START_WAIT: + { + // Give the command time to execute. The daemon_sigchld_hdlr will force + // a ring when the command execute successfully or returns a failure + if ( (ptr->pt_ptr->ring == true) || (ptr->status != PASS) ) + { + // stop timer if we had one + if ( ptr->pt_ptr->tid ) + { + dlog ("%s stop the start command timer %p\n", ptr->process, ptr->pt_ptr->tid ); + mtcTimer_stop( ptr->pt_ptr); + } + ptr->pt_ptr->ring = false; + + // If the status had failed then ptr->status_failed will be set to true. Status failure + // will also cause restarts count increment, alarm and degrade state + if (( !ptr->sigchld_rxed ) || ( !ptr->child_pid ) || ( ptr->status ) || (ptr->status_failed)) + { + if ( ptr->child_pid == 0 ) + { + elog ("%s start command has null child pid\n", ptr->process ); + } + else if ( ptr->sigchld_rxed == false ) + { + elog ("%s start command execution timed out (%d)\n", ptr->process, ptr->child_pid ); + kill_running_process ( ptr->child_pid ); + } + else if ( ptr->status != PASS ) + { + elog ("%s start command returned a failure (rc:%d)\n", ptr->process, ptr->status); + } + + /* Send a log on the first failure */ + if (( ptr->restarts_cnt == 0 ) && ( ptr->restarts != 0 ) ) + { + wlog ("%s Sending Log Event to Maintenance\n", ptr->process ); + + /* Send a log on the first one or every time we start a fresh restart cycle */ + manage_alarm ( ptr, PMON_LOG ); + } + + /* Manage notification based on restart */ + else if ( ptr->restarts_cnt >= ptr->restarts ) + { + wlog ("%s Failure threshold (%d) reached ; alarming\n", ptr->process, ptr->restarts ); + manage_alarm ( ptr, PMON_ASSERT ); + ptr->failed = true; // this is used to degrade un-degrade the host + ptr->restarts_cnt = 0 ; + } + else + { + wlog ("%s has %d of %d failures ; retrying ...\n", + ptr->process, + ptr->restarts_cnt, + ptr->restarts ); + } + + ptr->restarts_cnt++; + ptr->failed_cnt++ ; + + //only want to check for status false on first restart iteration so reset the flag + ptr->status_failed = false; + + // Go to interval state only if start failed otherwise we want to check + // the process status again + if (( !ptr->sigchld_rxed ) || ( !ptr->child_pid ) || ( ptr->status )) + { + // In here because the start failed + + // Wait the interval time and then execute a start command again + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, ptr->interval ); + dlog ("%s start interval timer %p\n", ptr->process, ptr->pt_ptr->tid ); + statusStageChange ( ptr, STATUS_STAGE__INTERVAL_WAIT ); + } + else + { + // In here because status failed but start was successful + + wlog ("%s start command was successful ; here because status had failed\n", ptr->process); + statusStageChange ( ptr, STATUS_STAGE__BEGIN ); + } + } + else + { + // Start was successful + wlog ("%s start command was successful\n", ptr->process); + statusStageChange ( ptr, STATUS_STAGE__BEGIN ); + } + + ptr->child_pid = 0; + ptr->sigchld_rxed = false; + } + break; + } + + default: + { + elog ("%s invalid status_stage (%d)\n", ptr->process, ptr->status_stage ); + + /* Default to first state for invalid case. there is an issue then it will be detected */ + statusStageChange ( ptr, STATUS_STAGE__BEGIN ); + } + } + return (PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp new file mode 100644 index 00000000..eb1b56a1 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp @@ -0,0 +1,2070 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Process Monitor Service Handler + */ + +#include /* dirname */ + +using namespace std; + +#include "pmon.h" +#include "nodeEvent.h" /* for ... set_inotify_watch, set_inotify_close */ +#include "nodeTimers.h" /* for ... mtcTimer_init */ +#include "alarmUtil.h" /* for ... alarmUtil_getSev_str */ +#include "pmonAlarm.h" /* for ... PMON_ALARM_ID__PMOND */ + +/* Preserve a local copy of a pointer to the control struct to + * avoid having to publish a get utility prototype into pmon.h */ +static pmon_ctrl_type * _pmon_ctrl_ptr = NULL ; + +void pmon_set_ctrl_ptr ( pmon_ctrl_type * ctrl_ptr ) +{ + /* Save the control pointer */ + _pmon_ctrl_ptr = ctrl_ptr ; +} + +/* pmonTimer_audit - get_events periodic audit timer */ +static struct mtc_timer pmonTimer_audit ; +static struct mtc_timer pmonTimer_degrade ; +static struct mtc_timer pmonTimer_pulse ; +static struct mtc_timer pmonTimer_hostwd ; +static struct mtc_timer ptimer[MAX_PROCESSES] ; + +/** List of config files */ +std::list config_files ; +std::list::iterator string_iter_ptr ; + +/* If there is an alarm in the list that matches one in the process list + * then update that process with its severity and failed state. + * If there is a process in the saved list that is not in the process list + * then clear its alarm as it is no longer valid. + */ +void manage_process_alarms ( list & _list, + process_config_type * const ptr, + int const processes ); + +static process_config_type process_config[MAX_PROCESSES] ; + +/* lookup process control by index and return its pointer if found. + * Otherwise if not found return NULL */ +process_config_type * get_process_config_ptr ( int index ) +{ + if ( index < _pmon_ctrl_ptr->processes ) + return ( &process_config[index] ); + return ( NULL ); +} + +/* lookup process control by name and return its pointer if found. + * Otherwise if not found return NULL */ +process_config_type * get_process_config_ptr ( string process ) +{ + if ( _pmon_ctrl_ptr ) + { + for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + if ( process.compare(process_config[i].process) == 0 ) + { + dlog ("%s process found\n", process.c_str()); + return (&process_config[i]); + } + } + } + wlog ("%s process not found in control list\n", process.c_str()); + return (NULL); +} + +#define _MAX_LEN_ (MAX_FILE_SIZE*2) + +/******************************************************************* + * Process Dump Support * + ******************************************************************* + * * + * Utilities that add specific config lines to the dump list * + * * + ******************************************************************/ + +/* Log nostname, ip, mac and pulse period */ +void mem_log_ctrl ( pmon_ctrl_type * ptr ) +{ + #define MAX_LEN 500 + char str[MAX_LEN] ; + snprintf (&str[0], MAX_LEN, "%s %s %s Pulse Rate:%d msecs\n", + &ptr->my_hostname[0], + ptr->my_address.c_str(), + ptr->my_macaddr.c_str(), + ptr->pulse_period ); + mem_log(str); +} + +/* Log process specific controls */ +void mem_log_process ( process_config_type * ptr ) +{ + #define MAX_LEN 500 + char str[MAX_LEN] ; + snprintf (&str[0], MAX_LEN, "%-25s [%5d] %8s Restarts:%u Interval:%u Debounce:%u Startuptime:%u\n", + ptr->process, + ptr->pid, + ptr->severity, + ptr->restarts, + ptr->debounce, + ptr->interval, + ptr->startuptime); + mem_log(str); +} + +/* Log process specific state */ +void mem_log_pstate ( process_config_type * ptr ) +{ + #define MAX_LEN 500 + char str[MAX_LEN] ; + snprintf (&str[0], MAX_LEN, " Passive: %10s (%d) Failed:%s Restart:%s FCount:%2u subFunc:%s Severity:%s %s %s\n", + get_pmonStage_str(ptr), + ptr->stage, + ptr->failed ? "true " : "false", + ptr->restart ? "true " : "false", + ptr->failed_cnt, + ptr->subfunction ? ptr->subfunction : "None", + alarmUtil_getSev_str(ptr->alarm_severity).c_str(), + ptr->ignore ? "ignored" : "", + ptr->stopped ? "stopped" : "" ); + mem_log(str); +} + +/* Log process specific active monitor controls */ +void mem_log_aconfig ( process_config_type * ptr ) +{ + #define MAX_LEN 500 + char str[MAX_LEN] ; + snprintf (&str[0], MAX_LEN, " Active : %10s (%d) Pulses:%2u Seq:%2u Period:%2u Timeout:%2u Thld:%2u %s\n", + get_amonStage_str(ptr), + ptr->active_stage, + ptr->pulse_count, + ptr->tx_sequence, + ptr->period, + ptr->timeout, + ptr->threshold, + ptr->waiting ? "... waiting" : ""); + mem_log(str); +} + +/* Log process specific active monitor state */ +void mem_log_astate ( process_config_type * ptr ) +{ + #define MAX_LEN 500 + char str[MAX_LEN] ; + snprintf (&str[0], MAX_LEN, " Stats - Failed:%s Count:%2u b2bp:%2u b2bc:%2u rxer:%2u txer:%2u msge:%2u msgp:%2u\n", + ptr->active_failed ? "true " : "false", + ptr->afailed_count, + ptr->b2b_miss_peak, + ptr->b2b_miss_count, + ptr->recv_err_cnt, + ptr->send_err_cnt, + ptr->mesg_err_cnt, + ptr->mesg_err_peak); + mem_log(str); +} + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + if ( _pmon_ctrl_ptr ) + { + daemon_dump_membuf_banner(); + mem_log_ctrl ( _pmon_ctrl_ptr ); + daemon_dump_membuf(); + for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + process_config_type * ptr = get_process_config_ptr(i); + mem_log ('\n'); + mem_log_process ( ptr ); + mem_log_pstate ( ptr ); + if ( ptr->active_monitoring ) + { + mem_log_aconfig ( ptr ); + mem_log_astate ( ptr ); + } + } + daemon_dump_membuf(); + } +} + +/******************************************************************* + * Module Initialize and Finalizes Interfaces * + ******************************************************************/ + +/* Initial init of timers. */ +/* Not run on a sighup */ +void pmon_timer_init ( void ) +{ + mtcTimer_init ( pmonTimer_audit, _pmon_ctrl_ptr->my_hostname, "audit" ) ; + mtcTimer_init ( pmonTimer_pulse, _pmon_ctrl_ptr->my_hostname, "pulse" ) ; + mtcTimer_init ( pmonTimer_hostwd , _pmon_ctrl_ptr->my_hostname, "hostwd" ) ; + mtcTimer_init ( pmonTimer_degrade, _pmon_ctrl_ptr->my_hostname, "degrade audit" ); + + for ( int i = 0 ; i < MAX_PROCESSES ; i++ ) + { + /* Bind the process timer to the process struct */ + process_config[i].pt_ptr = &ptimer[i] ; + + /* Init the timer for this process */ + mtcTimer_init ( process_config[i].pt_ptr, _pmon_ctrl_ptr->my_hostname, "process" ) ; + } +} + +void _process_death_hdlr ( int sig_num, siginfo_t * info_ptr, void * context_ptr ); + +/* Register realtime signal handler with the kernel */ +int signal_hdlr_init ( int sig_num ) +{ + int rc ; + + memset (&_pmon_ctrl_ptr->info, 0, sizeof(_pmon_ctrl_ptr->info)); + memset (&_pmon_ctrl_ptr->prev, 0, sizeof(_pmon_ctrl_ptr->info)); + + _pmon_ctrl_ptr->info.sa_sigaction = _process_death_hdlr ; + _pmon_ctrl_ptr->info.sa_flags = (SA_NOCLDSTOP | SA_NOCLDWAIT | SA_SIGINFO) ; + + rc = sigaction ( sig_num, &_pmon_ctrl_ptr->info , &_pmon_ctrl_ptr->prev ); + if ( rc ) + { + elog("Registering : Realtime Signal %d - (%d) (%s)\n", + sig_num, errno, strerror(errno)); + rc = FAIL_SIGNAL_INIT ; + } + else + { + ilog("Registering : Realtime Signal %d\n", sig_num); + } + return (rc) ; +} + +/* + * Init the handler + * - Must support re-init that might occur over a SIGHUP + **/ +int pmon_hdlr_init ( pmon_ctrl_type * ctrl_ptr ) +{ + int rc ; + + /* Save the control pointer */ + _pmon_ctrl_ptr = ctrl_ptr ; + + /* Force running of the audit at the very start */ + _pmon_ctrl_ptr->run_audit = true ; + + rc = signal_hdlr_init ( PMON_RT_SIGNAL ); + + /* Log the control setting going into the main loop */ + mem_log_ctrl ( _pmon_ctrl_ptr ) ; + + /* init the inotify file descriptors */ + _pmon_ctrl_ptr->fd = 0 ; + _pmon_ctrl_ptr->wd = 0 ; + + return (rc) ; +} + + +/* Module Cleanup */ +void pmon_hdlr_fini ( pmon_ctrl_type * ctrl_ptr ) +{ + for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + /* Close any active monitoring sockets */ + close_process_socket ( &process_config[i] ); + } + + /* Turn off inotify */ + set_inotify_close ( ctrl_ptr->fd, ctrl_ptr->wd ); +} + +void manage_process_failure ( process_config_type * ptr ) +{ + /******************************************************************* + * The next 2 'if' clauses try to prevent raising alarms for + * process failure detections while the host is shutting down. + *******************************************************************/ + + /* When handling a process failure check to see if we are already in + * the stopping state. + * If not, then query the current system state and save it. */ + if ( _pmon_ctrl_ptr->system_state != MTC_SYSTEM_STATE__STOPPING ) + { + /* update current state */ + _pmon_ctrl_ptr->system_state = get_system_state(); + } + + /* Ignore process failures while in stopping (i.e. shutdown) mode */ + if ( _pmon_ctrl_ptr->system_state == MTC_SYSTEM_STATE__STOPPING ) + { + /* don't report process failures during system shutdown. */ + wlog ("%s terminated by system shutdown (pid:%d) ; ignoring\n", + ptr->process , ptr->pid ); + ptr->ignore = true ; + return ; + } + + /* Should not need this clause */ + if ( ptr->stopped == true ) + { + slog ("%s process is in the stopped state\n", ptr->process); + } + + elog ("%s failed (%d) (p:%d a:%d)\n", ptr->process, ptr->pid, ptr->failed, ptr->active_failed); + passiveStageChange ( ptr, PMON_STAGE__MANAGE) ; + + if ( ptr->failed == false ) + { + ptr->failed = true ; + ptr->restart = false ; + // pmon_send_event ( MTC_EVENT_PMON_LOG, ptr ) ; + } + + /* TODO: Consider clearing active_failed flag regardless */ + if ( ptr->active_monitoring == true ) + { + activeStageChange ( ptr, ACTIVE_STAGE__PULSE_REQUEST ) ; + ptr->active_failed = true ; + } +} + +/* + * Manage process config strdup memory over a config/reconfig. + * On reconfig ; the PMOND_INIT_CHECK should be set and for each + * config pointed that is not null ; feee the memory. + */ +void init_process_config_memory ( void ) +{ + for ( int i = 0 ; i < MAX_PROCESSES ; i++ ) + { + if ( process_config[i].init_check == PMOND_INIT_CHECK ) + { + if ( process_config[i].process ) free ((void*)process_config[i].process); + if ( process_config[i].service ) free ((void*)process_config[i].service); + if ( process_config[i].script ) free ((void*)process_config[i].script); + if ( process_config[i].style ) free ((void*)process_config[i].style); + if ( process_config[i].pidfile ) free ((void*)process_config[i].pidfile); + if ( process_config[i].severity ) free ((void*)process_config[i].severity); + if ( process_config[i].mode ) free ((void*)process_config[i].mode); + if ( process_config[i].start_arg ) free ((void*)process_config[i].start_arg); + if ( process_config[i].status_arg) free ((void*)process_config[i].status_arg); + + if ( process_config[i].status_failure_text_file) free ((void*)process_config[i].status_failure_text_file); + if ( process_config[i].subfunction ) free ((void*)process_config[i].subfunction); + if ( process_config[i].recovery_method ) free ((void*)process_config[i].recovery_method); + } + /* init the process config memory ; now that we have freed past strdup allocations*/ + memset ( (char*)&process_config[i], 0, sizeof(process_config_type)); + } +} + +/* Read and load process monitor configuration from + * all the process config files from /etc/pmon.d */ +void load_processes ( void ) +{ + list saved_alarm_list ; + + int rc = PASS ; + + /* 1. Free timers, + * 2. shutdown sockets + * 3. track processes with raised alarms + */ + for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + if ( process_config[i].pt_ptr->tid ) + { + mtcTimer_stop ( process_config[i].pt_ptr ); + } + close_process_socket ( &process_config[i] ); + } + + /* Query fm for existing pmon process alarms and + * for each that is found store their 'name' and + * 'severity' in the passed in saved list */ + manage_queried_alarms ( saved_alarm_list ); + + /* init the process config memory */ + init_process_config_memory (); + + /* Default to event mode */ + _pmon_ctrl_ptr->event_mode = true ; + + /* Start with zero processes */ + _pmon_ctrl_ptr->processes = 0 ; + + /* Read in the list of config files and their contents */ + load_filenames_in_dir ( CONFIG_DIR, config_files ) ; + + ilog ("Loading Process Configurations\n"); + ilog ("--------------------------------------------------------------\n"); + + /* Run Maintenance on Inventory */ + for ( string_iter_ptr = config_files.begin () ; + string_iter_ptr != config_files.end () ; + ++string_iter_ptr ) + { + process_config_type * ptr = &process_config[_pmon_ctrl_ptr->processes] ; + rc = process_config_load ( ptr, string_iter_ptr->data() ); + if ( rc ) + { + memset ((char*)ptr, 0, sizeof(process_config_type)); + } + else + { + /* stages for passive and active monitoring are initially set + * inside the process_config_load */ + _pmon_ctrl_ptr->processes++ ; + ptr->init_check = PMOND_INIT_CHECK ; + } + } + + pmon_send_event ( MTC_EVENT_PMON_CLEAR, &process_config[0] ) ; + + ilog ("Registering Processes With Kernel\n"); + ilog ("---------------------------------------------------------------\n"); + + /* Register all the processes with the kernel */ + for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + process_config[i].restart= false; + process_config[i].failed = false; + if ( process_config[i].status_monitoring ) + { + process_config[i].status_stage = STATUS_STAGE__BEGIN ; + } + else if ( process_config[i].stage == PMON_STAGE__MANAGE ) + { + register_process ( &process_config[i] ); + if ( process_config[i].active_monitoring == true ) + { + if ( open_process_socket ( &process_config[i] ) != PASS ) + { + elog ("%s failed to open process socket\n", + process_config[i].process ); + } + } + } + } + _pmon_ctrl_ptr->reload_config = false ; + + /* If there were process alarms that existed over the reload + * then ensure that those processes are updated with that information. */ + if ( saved_alarm_list.size () ) + { + ilog ("there are %ld active alarms over reload\n", saved_alarm_list.size()); + manage_process_alarms ( saved_alarm_list, &process_config[0], _pmon_ctrl_ptr->processes ); + } +} + + +/* Looks up the timer ID and asserts the corresponding ringer */ +void pmon_timer_handler ( int sig, siginfo_t *si, void *uc) +{ + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + + /* Avoid compiler errors/warnings for parms we must + * have but currently do nothing with */ + UNUSED(sig); + UNUSED(uc); + + if ( !(*tid_ptr) ) + { + return ; + } + + else if ( *tid_ptr == pmonTimer_pulse.tid ) + { + pmonTimer_pulse.ring = true ; + } + else if ( *tid_ptr == pmonTimer_degrade.tid ) + { + mtcTimer_stop_int_safe ( pmonTimer_degrade ); + pmonTimer_degrade.ring = true ; + _pmon_ctrl_ptr->patching_in_progress = false ; + } + else if ( *tid_ptr == pmonTimer_audit.tid ) + { + mtcTimer_stop_int_safe ( pmonTimer_audit ); + pmonTimer_audit.ring = true ; + } + /* is host watchdog pmon timer */ + else if ( *tid_ptr == pmonTimer_hostwd.tid ) + { + pmonTimer_hostwd.ring = true ; + + /* we do not stop the timer; instead let it auto-restart */ + } + else + { + bool found = false ; + for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + if ( *tid_ptr == process_config[i].pt_ptr->tid ) + { + mtcTimer_stop_int_safe ( process_config[i].pt_ptr ); + process_config[i].pt_ptr->ring = true ; + found = true ; + break ; + } + } + if ( !found ) + { + //wlog ("Unknown timer\n"); + /* try and cleanup by stopping this unknown timer via its tid */ + mtcTimer_stop_tid_int_safe (tid_ptr); + } + } +} + +/***************************************************************************** + * + * Name : process_config_load + * + * Purpose : Load the content of a config file + * + *****************************************************************************/ +int process_config_load (process_config_type * pc_ptr, const char * config_file_ptr ) +{ + char service_name_buf [_MAX_LEN_] ; + memset (service_name_buf,0, sizeof(service_name_buf)); + + if ( _pmon_ctrl_ptr->processes >= MAX_PROCESSES ) + { + wlog ("Cannot Monitor more than %d processes\n", MAX_PROCESSES ); + return (FAIL); + } + + /* Read the process config file */ + pc_ptr->mask = 0 ; + pc_ptr->amask = 0 ; + pc_ptr->status_mask = 0 ; + pc_ptr->status_monitoring = false; + pc_ptr->passive_monitoring = false; + pc_ptr->audit_alarm_refresh_count = 0 ; + + if (ini_parse( config_file_ptr, pmon_process_config, pc_ptr) < 0) + { + elog("Read Failure : %s\n", config_file_ptr ); + return (FAIL); + } + + /* Set some defaults just in case they were not specified */ + if ( !pc_ptr->mode ) + { + pc_ptr->mode = strdup("Passive") ; + } + if ( !pc_ptr->startuptime ) + { + pc_ptr->startuptime = PMON_MIN_START_DELAY ; + } + + /* default recovery method to process init script */ + snprintf ( &service_name_buf[0], _MAX_LEN_, "%s", pc_ptr->script ); + + /* Print error logs if there is no recovery method present for this service/process */ + if ( _pmon_ctrl_ptr->recovery_method == PMOND_RECOVERY_METHOD__SYSTEMD ) + { + /* If the config file does not specify a service name + * then the service name defaults to the process name */ + if ( ! pc_ptr->service ) + { + snprintf ( &service_name_buf[0], _MAX_LEN_, "%s/%s.service", SYSTEMD_SERVICE_FILE_DIR, pc_ptr->process ); + if ( daemon_is_file_present ( service_name_buf ) == false ) + { + if ( daemon_is_file_present ( pc_ptr->script ) == false ) + { + /* print a log if we have no recovery method */ + wlog ("%s has no recovery method\n", pc_ptr->process ); + wlog ("... neither %s nor %s exist\n", service_name_buf, pc_ptr->script ); + } + } + } + else + { + snprintf ( &service_name_buf[0], _MAX_LEN_, "%s/%s.service", SYSTEMD_SERVICE_FILE_DIR, pc_ptr->service ); + if ( daemon_is_file_present ( service_name_buf ) == false ) + { + /* print a log if we have no recovery method */ + wlog ("%s service has no recovery method\n", pc_ptr->service ); + wlog ("... %s does not exist\n", service_name_buf ); + } + } + } + + update_config_option ( &pc_ptr->recovery_method , service_name_buf ); + + if ( !strcmp ( pc_ptr->mode, "status" ) ) + { + pc_ptr->status_monitoring = true; + + if (( pc_ptr->status_mask == CONF_STATUS_MON_MASK ) && + ( pc_ptr->process[0] != '\0' ) && + ( pc_ptr->severity[0] != '\0')) + { + dlog1 ("Config File : %s\n", string_iter_ptr->c_str()); + + if ( !strcmp ( pc_ptr->severity, "critical" )) + { + pc_ptr->sev = SEVERITY_CRITICAL ; + } + else if ( !strcmp ( pc_ptr->severity, "major" )) + { + pc_ptr->sev = SEVERITY_MAJOR ; + } + else if ( !strcmp ( pc_ptr->severity, "minor" )) + { + pc_ptr->sev = SEVERITY_MINOR ; + } + else + { + wlog ("%s has invalid severity ; ignoring\n", pc_ptr->process ); + pc_ptr->ignore = strdup ("ignored"); + } + + /* Bind the process timer to the process struct */ + pc_ptr->pt_ptr = &ptimer[_pmon_ctrl_ptr->processes] ; + + /* set the timer service owner to the process name */ + pc_ptr->pt_ptr->service = pc_ptr->process ; + + pc_ptr->restarts_cnt = 0 ; + pc_ptr->pid = 0 ; + pc_ptr->child_pid = 0 ; + pc_ptr->restart = false ; + pc_ptr->failed = false ; + pc_ptr->status_failed = false ; + pc_ptr->was_failed = false ; + pc_ptr->sigchld_rxed = false ; + + ilog ("%7s Mon : %-27s %-8s\n", pc_ptr->mode, + pc_ptr->process, + pc_ptr->ignore ? "ignored" : pc_ptr->severity); + pc_ptr->status_stage = STATUS_STAGE__BEGIN ; + } + else + { + wlog ("Status Parse Failure: %s\n", string_iter_ptr->c_str()); + wlog ("Status Mask Expected: %x Detected: %x\n", CONF_STATUS_MON_MASK, pc_ptr->status_mask ); + return (FAIL); + } + + return (PASS); + } + + + if (( pc_ptr->mask == CONF_MASK ) && + ( pc_ptr->process[0] != '\0' ) && + ( pc_ptr->severity[0] != '\0')) + { + dlog1 ("Config File : %s\n", string_iter_ptr->c_str()); + + if ( !strcmp ( pc_ptr->severity, "critical" )) + { + pc_ptr->sev = SEVERITY_CRITICAL ; + } + else if ( !strcmp ( pc_ptr->severity, "major" )) + { + pc_ptr->sev = SEVERITY_MAJOR ; + } + else if ( !strcmp ( pc_ptr->severity, "minor" )) + { + pc_ptr->sev = SEVERITY_MINOR ; + } + else + { + wlog ("%s has invalid severity ; ignoring\n", pc_ptr->process ); + pc_ptr->ignore = strdup ("ignored"); + } + + /* Bind the process timer to the process struct */ + pc_ptr->pt_ptr = &ptimer[_pmon_ctrl_ptr->processes] ; + + /* Init the timer for this process */ + mtcTimer_init ( pc_ptr->pt_ptr ) ; + pc_ptr->pt_ptr->hostname = pc_ptr->process ; + pc_ptr->pt_ptr->service = pc_ptr->process ; + + pc_ptr->restarts_cnt = 0 ; + pc_ptr->debounce_cnt = 0 ; + pc_ptr->pid = 0 ; + pc_ptr->child_pid = 0 ; + pc_ptr->restart = false ; + pc_ptr->failed = false ; + pc_ptr->sigchld_rxed = false ; + pc_ptr->stopped = false ; + + pc_ptr->alarm_severity = FM_ALARM_SEVERITY_CLEAR ; + + if (( _pmon_ctrl_ptr->system_type != SYSTEM_TYPE__NORMAL ) && + ( pc_ptr->subfunction != NULL )) + { + /* subfunction process monitoring is deferred until + * that subfunction init is complete */ + ilog ("%7s Def : %-30s %-8s - %s (%s)\n", pc_ptr->mode, + pc_ptr->process, + pc_ptr->ignore ? "ignored" : pc_ptr->severity, service_name_buf, + pc_ptr->subfunction); + /* defer subfunction processes to the FSM to get enabled */ + pc_ptr->stage = PMON_STAGE__POLLING ; + pc_ptr->pt_ptr->ring = true ; + } + else + { + /* if not a subfunction then monitoring defaults + * to true immediately */ + pc_ptr->passive_monitoring = true ; + + ilog ("%7s Mon : %-30s %-8s - %s\n", pc_ptr->mode, + pc_ptr->process, + pc_ptr->ignore ? "ignored" : pc_ptr->severity, service_name_buf); + pc_ptr->stage = PMON_STAGE__MANAGE ; + } + // mem_log_process ( pc_ptr ); + } + else + { + wlog ("Parse Failure: %s\n", string_iter_ptr->c_str()); + wlog ("Mask Expected: %x Detected: %x\n", CONF_MASK, pc_ptr->mask ); + return (FAIL); + } + + if ( !strcmp ( pc_ptr->mode, "active" ) ) + { + if ( pc_ptr->amask == CONF_AMON_MASK ) + { + if (( pc_ptr->period == 0 ) || + ( pc_ptr->period > PMON_MAX_ACTIVE_PERIOD )) + { + elog ("%s monitor period out-of-range (%d secs), setting to max\n", + pc_ptr->process, + pc_ptr->period ); + + pc_ptr->period = PMON_MAX_ACTIVE_PERIOD ; + } + if ( pc_ptr->timeout > pc_ptr->period ) + { + elog ("%s monitor 'timeout' longer than 'period' (%d:%d secs), rounding down\n", + pc_ptr->process, + pc_ptr->timeout, + pc_ptr->period ); + + pc_ptr->timeout = pc_ptr->period ; + } + + /* Init the active component */ + pc_ptr->active_stage = ACTIVE_STAGE__PULSE_REQUEST ; + pc_ptr->active_monitoring = true ; + pc_ptr->active_failed = false ; + pc_ptr->pulse_count = 0 ; + pc_ptr->b2b_miss_peak = 0 ; + pc_ptr->b2b_miss_count = 0 ; + } + else + { + wlog ("%s Parse Failure\n", string_iter_ptr->c_str()); + wlog ("%s Active Mask Expected: %x Detected: %x\n", + pc_ptr->process, + CONF_AMON_MASK, + pc_ptr->amask ); + return (FAIL); + } + } + return (PASS); +} + +int get_process_pid ( process_config_type * ptr ) +{ + int pid = 0 ; + FILE * pid_file_stream = fopen ( ptr->pidfile, "r" ); + if ( pid_file_stream != NULL ) + { + int num = fscanf ( pid_file_stream, "%d", &pid); + if ( num != 1 ) + { + wlog ("fscanf failed to read pid from %s\n", ptr->pidfile ); + } + fclose (pid_file_stream); + } + return (pid); +} + +/* search the process list for the child_pid in + * order to find the parent it is associated with */ +process_config_type * find_parent_process ( int child_pid ) +{ + for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + if ( process_config[i].child_pid == child_pid ) + { + return (&process_config[i]); + } + } + /* look based on PID */ + for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + if ( process_config[i].pid == child_pid ) + { + return (&process_config[i]); + } + } + + return (NULL); +} + +/* search the process list for the child_pid in + * order to find the parent it is associated with */ +bool want_degrade_clear ( void ) +{ + int i ; + bool clear = true ; + for ( i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + /* Don't report current or previous status on + * processes that are not being monitored */ + if (( !process_config[i].passive_monitoring ) && + ( !process_config[i].status_monitoring )) + { + continue ; + } + if (( process_config[i].failed == true ) || ( process_config[i].active_failed == true )) + { + if (( process_config[i].alarm_severity == FM_ALARM_SEVERITY_MAJOR ) || + ( process_config[i].alarm_severity == FM_ALARM_SEVERITY_CRITICAL )) + { + wlog ("%s is still failed '%s' ; degrade assert\n", + process_config[i].process, + alarmUtil_getSev_str(process_config[i].alarm_severity).c_str()); + + /* Resend the process event to maintenance every threshold count */ + if ( ++process_config[i].audit_alarm_refresh_count > AUDIT_EVENT_SEND_REFESH_THRESHOLD ) + { + process_config[i].audit_alarm_refresh_count = 0 ; + if ( process_config[i].alarm_severity == FM_ALARM_SEVERITY_MAJOR ) + pmon_send_event ( MTC_EVENT_PMON_MAJOR, &process_config[i] ) ; + else + pmon_send_event ( MTC_EVENT_PMON_CRIT, &process_config[i] ) ; + } + clear = false ; + } + } + } + return (clear); +} + +static char unknown_process[] = "unknown process" ; +bool kill_running_process ( int pid ) +{ + bool rc = false ; + if ( pid ) + { + int result = kill ( pid, 0 ); + if ( result == 0 ) + { + char * proc_name_ptr = &unknown_process[0] ; + + result = kill ( pid, SIGKILL ); + process_config_type * ptr = find_parent_process ( pid ) ; + if ( ptr ) + { + proc_name_ptr = (char*)ptr->process ; + } + if ( ptr && ( result == 0 ) ) + { + if ( daemon_is_file_present ( ptr->pidfile ) ) + { + if ( get_process_pid ( ptr ) == pid ) + { + ilog ("%s removing stale pidfile (%d) %s\n", ptr->process, pid, ptr->pidfile ); + daemon_remove_file ( ptr->pidfile ); + } + } + wlog ("%s kill succeeded (%d)\n", proc_name_ptr, pid ); + rc = true ; + } + else + { + wlog ("%s kill failed or process not running (%d)\n", proc_name_ptr, pid ); + } + } + } + else + { + wlog ("%s cannot kill pid %d\n", unknown_process, pid); + } + return (rc); +} + +/* if the child (startup script) pid is still running then kill it */ +void kill_running_child ( process_config_type * ptr ) +{ + if ( ptr->child_pid ) + { + if ( kill_running_process ( ptr->child_pid ) == true ) + { + wlog ("%s start script still running (%d) ; killed\n", ptr->process, ptr->child_pid ); + } + ptr->child_pid = 0 ; + } +} + +bool process_running ( process_config_type * ptr ) +{ + int pid = get_process_pid ( ptr ); + if ( pid ) + { + int result = kill (pid, 0 ); + ptr->pid = pid ; + if ( result == 0 ) + { + if (( ptr->pid != 0 ) && ( ptr->pid != pid )) + { + wlog ("%s pid changed (was:%d now:%d)\n", + ptr->process , + ptr->pid, + pid); + + ptr->pid = 0 ; + return (false); + } + else if (( ptr->pid == 0 ) && ( pid )) + { + ilog ("%s Running (%d)\n", ptr->process, ptr->pid); + } + else + { + dlog1 ("%s Running (%d) (%d)\n", ptr->process, pid, ptr->pid ); + } + return (true) ; + } + else + { + dlog ("%s process not running (kill 0 result:%d) (get_process_pid:%d)\n", ptr->process, result, pid ); + } + } + else + { + ilog ("%s process not running\n", ptr->process ); + } + ptr->pid = 0 ; + return (false); +} + +/* Temporary till we get kernel event */ +void _get_events ( void ) +{ + int pid = 0 ; + for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + bool running = false ; + + if ( process_config[i].ignore == true ) + { + process_config[i].failed = false ; + process_config[i].restart= false ; + continue ; + } + + else if ( process_config[i].stage == PMON_STAGE__POLLING ) + { + continue ; + } + + else if ( process_config[i].status_monitoring ) + { + continue ; + } + + /* Skip already failed processes */ + else if ( process_config[i].failed == false ) + { + if ((pid = get_process_pid ( &process_config[i] ))) + { + int result = kill (pid, 0 ); + process_config[i].pid = pid ; + if ( result == 0 ) + { + dlog3 ("%s (%d) is running\n", process_config[i].process, pid); + running = true ; + } + else + { + dlog ("%s (%d) not running (%d:%d) (%s)\n", + process_config[i].process, pid, + result, errno, strerror(errno)) ; + } + } + else + { + dlog ("%s Pid (unknown) - no pidfile\n", process_config[i].process ) + } + + /* If not running then fail the process + * to trigger auto-recovery */ + if ( running == false ) + { + wlog ("%s Not Running\n", process_config[i].process ); + + manage_process_failure ( &process_config[i] ); + } + } + } + + /* turn off the audit */ + _pmon_ctrl_ptr->run_audit = false ; +} + +/* This is the data structure for requestion process death + * (and other state change) information. Sig of -1 means + * query, sig of 0 means deregistration, positive sig means + * that you want to set it. sig and events are value-result + * and will be updated with the previous values on every + * successful call. */ + +int unregister_process ( process_config_type * ptr ) +{ + dlog1 ("%s pid %d\n", ptr->process, ptr->pid ); + if ( ptr->pid ) + { + struct task_state_notify_info info ; + info.pid = ptr->pid ; + info.sig = 0 ; + info.events = PMON_EVENT_FLAGS ; + if ( prctl (PR_DO_NOTIFY_TASK_STATE, &info )) + { + wlog ("%s failed to unregister process %d\n", ptr->process, ptr->pid ); + } + else + { + ilog ("%s unregistered (%d)\n", ptr->process, ptr->pid ); + } + } + return (PASS); +} + +int register_process ( process_config_type * ptr ) +{ + int pid = get_process_pid ( ptr ); + if ( pid ) + { + ptr->pid = pid ; + ptr->restart= false ; + if (( _pmon_ctrl_ptr->event_mode ) && ( !ptr->ignore )) + { + struct task_state_notify_info info ; + info.pid = pid ; + info.sig = PMON_RT_SIGNAL ; + info.events = PMON_EVENT_FLAGS; + if ( prctl (PR_DO_NOTIFY_TASK_STATE, &info ) ) + { + elog ("%s failed to register pid:%d (%d) (%s)\n", ptr->process, pid, errno, strerror(errno)); + if ( errno == EINVAL ) + { + _pmon_ctrl_ptr->event_mode = false ; + wlog ( "%s Switching to Polling mode\n", ptr->process); + } + else + { + ptr->failed = true ; + } + } + else + { + ilog ("%s Registered (%d)\n", ptr->process , pid ); + ptr->failed = false ; + passiveStageChange ( ptr, PMON_STAGE__MANAGE ) ; + if ( ptr->active_monitoring == false ) + { + manage_alarm ( ptr, PMON_CLEAR ); + } + } + } + /* Don't 'else' because event mode might + * change in the above clause */ + if ( _pmon_ctrl_ptr->event_mode == false ) + { + wlog ("%s Registered (%d) in polling mode\n", + ptr->process , pid); + if ( process_running ( ptr ) == false ) + { + ptr->failed = true ; + } + else + { + ptr->failed = false ; + manage_alarm ( ptr, PMON_CLEAR ); + passiveStageChange ( ptr, PMON_STAGE__MANAGE ) ; + } + } + } + else + { + ilog ("%s is not running\n", ptr->process ); + ptr->failed = true ; + } + + if ( ptr->failed ) + { + manage_process_failure ( ptr ); + return (FAIL); + } + else + { + return (PASS); + } +} + + +/* This respawns a process through the 'script' string from the process config file. + * The pmond log files are first closed so their fd's are not duped to the child. + * The syslog facility is used to log child messages to user.log + * The waitpid interface is used to manage acknowledging the exit of the child process */ + +#define PMOND_EXECV_ARGS (4) + +int respawn_process ( process_config_type * ptr ) +{ + pid_t pid ; + + int rc = PASS ; + bool restart = false ; + + if ( process_running ( ptr ) == true ) + { + ilog ("%s restart of running process\n", ptr->process ); + restart = true ; + } + + /* Handle the case where the process is running but the known pid suggests its not. + * Do this by quering by processname and if it returns a valid PID then kill it before + * we start managing its death */ + pid = get_pid_by_name_pipe ( ptr->process ) ; + if ( pid ) + { + /* Note: We could just go with this new PID ; update the struct and such + * but that could be a bit risky ; instead we kill and restart. */ + kill_running_process ( pid ); + } + + unregister_process ( ptr ); + + ptr->restarts_cnt++ ; + + /* default restart result and ponitoring controls */ + ptr->status = RETRY ; /* keep looking */ + ptr->pidwait_cnt = 0 ; /* TODO: should be a timer .... start count */ + ptr->sigchld_rxed = false ; /* sigchild handler did not run */ + + /* Fork the daemon to trigger the process specific restart */ + ptr->child_pid = pid = fork () ; + if (pid == 0) + { + /* execv arg list */ + char * argv[PMOND_EXECV_ARGS] ; + for ( int i = 0 ; i < PMOND_EXECV_ARGS ; i++ ) argv[i] = NULL ; + + char recovery_cmd[_MAX_LEN_] ; + + bool close_file_descriptors = true ; + if ( setup_child ( close_file_descriptors ) != PASS ) + exit(EXIT_FAILURE); + + signal (SIGCHLD, SIG_DFL); + + openlog ((char*)ptr->process, LOG_PID, LOG_USER ); + + /* Default File Creation Mask */ + umask(022); + + memset (recovery_cmd,0,sizeof(recovery_cmd)); + + ilog ("Service:%s\n", ptr->service ? ptr->service : "unknown"); + + #define SYSTEMCTL_CMD "/usr/bin/systemctl" + #define RESTART_CMD "restart" + #define START_CMD "start" + if ( get_ctrl_ptr()->recovery_method == PMOND_RECOVERY_METHOD__SYSTEMD ) + { + + /* systemd recovery method - if the service is specified then it takes precidence */ + if ( ptr->service ) + sprintf ( &recovery_cmd[0], "%s", ptr->service ); + else + sprintf ( &recovery_cmd[0], "%s", ptr->process ); + + argv[0] = (char*)&SYSTEMCTL_CMD ; /* path to executable */ + argv[1] = (char*)&RESTART_CMD ; /* the recovery command */ + argv[2] = &recovery_cmd[0] ; /* the process name */ + } + else + { + /* init script method */ + snprintf( &recovery_cmd[0], _MAX_LEN_, "%s", ptr->script ) ; + argv[0] = &recovery_cmd[0] ; /* path to script */ + argv[1] = (restart ? (char*)&RESTART_CMD : (char*)&START_CMD) ; /* the process name */ + } + + rc = execv(argv[0], argv ); + if ( 0 > rc ) + { + syslog ( LOG_WARNING, "%s recovery failed with method '%s': (%s %s %s) (%d:%m)\n", + ptr->process, + ptr->recovery_method, + argv[0], + argv[1], + argv[2] ? "" : argv[2] , + errno ); + } + else + { + syslog ( LOG_INFO, "%s recovered witb method '%s': (%s %s %s)\n", + ptr->process, + ptr->recovery_method, + argv[0], + argv[1], + argv[2] ? "" : argv[2] ); + } + + closelog(); + exit (rc); + } + if ( pid == -1 ) + { + elog ("%s fork failed (%s)\n", ptr->process , strerror(errno)); + + /* TODO: Consider making this a critical fault + * after 100 retries. + * All possibilities based on man page are + * due to resource limitations and if that does + * not resolve in 100 retries then it probably will never. + **/ + return (FAIL); + } + + gettime ( ptr->time_start ); + + ilog ("%s Spawn (%d) fork\n", ptr->process, ptr->child_pid ); + + return (PASS); +} + +/***************************************************************************** + * + * Name : execute_start_command + * + * Purpose : execute start script command + + *****************************************************************************/ +int execute_start_command(process_config_type * ptr) +{ + pid_t child_pid; + + wlog("%s process(es) start\n", ptr->process); + + dlog ("Main Pid:%d \n", getpid() ); + + ptr->sigchld_rxed = false ; /* sigchild handler did not run */ + + ptr->child_pid = child_pid = fork (); + if (child_pid == 0) + { + dlog ("Child Pid:%d \n", getpid() ); + + char* argv[] = { basename((char*)ptr->script), (char*)ptr->start_arg, NULL}; + char cmd[MAX_FILE_SIZE] ; + memset (cmd,0,sizeof(cmd)); + + snprintf ( &cmd[0], MAX_FILE_SIZE, "%s", ptr->script); + + bool close_file_descriptors = true ; + if ( setup_child ( close_file_descriptors ) != PASS ) + { + exit(255); + } + + /* Set child to ignore child exit */ + signal (SIGCHLD, SIG_DFL); + + /* Setup the exec arguement */ + int res = execv(cmd, argv); + elog ( "Failed to run %s return code:%d error:%s\n", cmd, res, strerror(errno) ); + exit (255); + } + + if ( child_pid == -1 ) + { + elog ("Fork failed (%s)\n", strerror(errno)); + return (FAIL); + } + + gettime ( ptr->time_start ); + + return (PASS); +} + +/***************************************************************************** + * + * Name : execute_status_command + * + * Purpose : execute status script command + + *****************************************************************************/ +int execute_status_command (process_config_type * ptr) +{ + pid_t child_pid; + + dlog("%s process(es) status query\n", ptr->process); + dlog ("Main Pid:%d \n", getpid() ); + + ptr->sigchld_rxed = false ; /* sigchild handler did not run */ + + ptr->child_pid = child_pid = fork (); + if (child_pid == 0) + { + dlog ("Child Pid:%d \n", getpid() ); + + char* argv[] = {basename((char*)ptr->script), (char*)ptr->status_arg, NULL}; + char cmd[MAX_FILE_SIZE] ; + memset (cmd,0,sizeof(cmd)); + + snprintf ( &cmd[0], MAX_FILE_SIZE, "%s", ptr->script); + + bool close_file_descriptors = true ; + if ( setup_child ( close_file_descriptors ) != PASS ) + { + exit(255); + } + + /* Set child to ignore child exit */ + signal (SIGCHLD, SIG_DFL); + + /* Setup the exec arguement */ + int res = execv(cmd, argv); + elog ( "Failed to run %s return code:%d error:%s\n", cmd, res, strerror(errno) ); + exit (255); + } + + if ( child_pid == -1 ) + { + elog ("Fork failed (%s)\n", strerror(errno)); + return (FAIL); + } + + gettime ( ptr->time_start ); + + return (PASS); +} + +void daemon_sigchld_hdlr ( void ) +{ + pid_t tpid = 0 ; + bool found = 0 ; + int status = 0 ; + + dlog("Received SIGCHLD ...\n"); + + while ( 0 < ( tpid = waitpid ( -1, &status, WNOHANG | WUNTRACED ))) + { + process_config_type * process_ptr = find_parent_process ( tpid ) ; + if ( process_ptr ) + { + process_ptr->sigchld_rxed = true ; + + if (WIFEXITED(status)) + { + if ( process_ptr->status_monitoring == false ) + { + dlog ("%s spawn script exited properly (%d)\n", process_ptr->process, tpid ); + } + else + { + /* with status mode we do not need to wait for a timeout since we got a response */ + /* force a ring */ + process_ptr->pt_ptr->ring = true; + } + + gettime ( process_ptr->time_stop ); + timedelta ( process_ptr->time_start, + process_ptr->time_stop, + process_ptr->time_delta ); + + /* only print log if there is an error */ + process_ptr->status = WEXITSTATUS(status) ; + + if ( process_ptr->status ) + { + if ( process_ptr->status_monitoring == false ) + { + dlog ("%s spawn failed (rc:%d:%x) (%ld.%03ld secs)\n", + process_ptr->process, + process_ptr->status, + process_ptr->status, + process_ptr->time_delta.secs, + process_ptr->time_delta.msecs/1000); + } + } + else + { + if ( process_ptr->status_monitoring == false ) + { + /* only print this log if the spawn time took longer than 1 second */ + if ( process_ptr->time_delta.secs ) + { + ilog ("%s spawned in %ld.%03ld secs\n", + process_ptr->process, + process_ptr->time_delta.secs, + process_ptr->time_delta.msecs/1000); + } + } + } + } + else if (WIFSIGNALED(status)) + { + process_ptr->status = FAIL ; + wlog ("%s test uncaught signal\n", process_ptr->process ); + } + else if (WIFSTOPPED(status)) + { + process_ptr->status = FAIL ; + wlog ("%s test stopped.\n", process_ptr->process ); + } + } + else + { + dlog ("parent process for PID:%d lookup failed ; reaped likely after timeout\n", tpid ); + return ; + } + } + if ( ( tpid > 0 ) && ( found == false ) ) + { + wlog ("PID:%d found no corresponding process\n", tpid ); + } +} + +int manage_alarm ( process_config_type * ptr, int action ) +{ + int rc = PASS ; + + pmon_ctrl_type * ctrl_ptr = get_ctrl_ptr () ; + + string processInfo = ptr->process; + // check for extra text + if((ptr->status_monitoring ) && (ptr->status_failure_text_file)) + { + string extra_text = get_status_failure_text(ptr); + if(!extra_text.empty()) + { + processInfo.append(" ("); + processInfo.append(extra_text); + processInfo.append(")"); + } + } + + if ( action == PMON_CLEAR ) + { + if ( ptr->alarm_severity != FM_ALARM_SEVERITY_CLEAR ) + { + ilog ("%s from '%s' to 'clear'\n", ptr->process, alarmUtil_getSev_str(ptr->alarm_severity).c_str()); + pmonAlarm_clear ( ctrl_ptr->my_hostname, PMON_ALARM_ID__PMOND, processInfo ); + ptr->alarm_severity = FM_ALARM_SEVERITY_CLEAR ; + } + ptr->failed = false ; + } + else if ( action == PMON_LOG ) + { + /* CGTS 4010: Pmon logs and alarm ID should not be identical. + * Choice was made to not raise pmon logs for process + * failures. If we do in the future then we should + * use a different number from 200.006 + * pmonAlarm_minor_log ( ctrl_ptr->my_hostname, PMON_ALARM_ID__PMOND, processInfo, ptr->restarts ); + */ + ilog ("%s process has failed ; %s\n", ptr->process, + (ptr->restarts == 0) ? "Manual recovery is required." : "Auto recovery in progress."); + + /* Unlike the above call to pmonAlarm_minor_log, this call only creates a log entry in mtcAgent.log */ + pmon_send_event ( MTC_EVENT_PMON_LOG, ptr ) ; + } + else + { + if ( ptr->restart == true ) + { + /* handle as error now rather than command */ + ptr->restart = false ; + } + switch ( ptr->sev ) + { + case SEVERITY_CRITICAL: + { + wlog ("%s Critical Assert\n", ptr->process ); + ptr->failed = true ; + if ( ptr->alarm_severity != FM_ALARM_SEVERITY_CRITICAL ) + { + pmonAlarm_critical ( ctrl_ptr->my_hostname, PMON_ALARM_ID__PMOND, processInfo ); + ptr->alarm_severity = FM_ALARM_SEVERITY_CRITICAL ; + } + break ; + } + case SEVERITY_MAJOR: + { + wlog ("%s Major Assert\n", ptr->process ); + ptr->failed = true ; + if ( ptr->alarm_severity != FM_ALARM_SEVERITY_MAJOR ) + { + pmonAlarm_major ( ctrl_ptr->my_hostname, PMON_ALARM_ID__PMOND, processInfo ); + ptr->alarm_severity = FM_ALARM_SEVERITY_MAJOR ; + } + break ; + } + case SEVERITY_MINOR: + { + wlog ("%s Minor Assert\n", ptr->process ); + ptr->failed = true ; + if ( ptr->alarm_severity != FM_ALARM_SEVERITY_MINOR ) + { + pmonAlarm_minor ( ctrl_ptr->my_hostname, PMON_ALARM_ID__PMOND, processInfo, ptr->restarts ); + ptr->alarm_severity = FM_ALARM_SEVERITY_MINOR ; + } + break ; + } + default: + { + slog ("%s has Invalid Severity", ptr->process); + ptr->sev = SEVERITY_CLEAR ; + ptr->failed = false ; + rc = RETRY ; + break ; + } + } + } + return (rc); +} + + +/********************************************************************************* + * + * Name : _process_death_hdlr + * + * Purpose : Handle realtime signal events from "Notification of death + * of arbitrary process" (NODOAP) service in the kernel. + * + * Description: This handler is bound into the kernel with signal_hdlr_init + * Monitored processes are registered with the NODOAP feature + * + * 1. when service starts + * 2. after a process is re-spawned and deemed stable and recovered + * + * The kernel passes the pid of the dead process in through info_ptr->si_pid. + * This handler searches the process list for that pid. If found then it triggers + * that process to be recovered by the fsm. if that process for some crazy reason + * is already in the failed state then this handler deferrs to allowing the fsm + * to complete. + * + * If the pid is not found in the process control structure then the pidfiles + * are searched. if the process is not fould in that secondary search then the + * handler forces the get_events audit to run as a catch all. + * + * Note: The _get_events audit already runs periodically but at a much slower rate. + * + * Update: emacdona: commented out debug logs as we should not be logging + * in a signal handler + * + */ +void _process_death_hdlr ( int sig_num, siginfo_t * info_ptr, void * context_ptr ) +{ + UNUSED(context_ptr); + UNUSED(sig_num) ; + + if ( info_ptr ) + { + process_config_type * ptr = &process_config[0] ; + bool found = false ; + dlog ("Sig:%d Pid:%d Code:%d Exit:%d\n", + info_ptr->si_signo, + info_ptr->si_pid, + info_ptr->si_code, + info_ptr->si_status ); + + for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + ptr = &process_config[i] ; + + if ( ptr->pid == info_ptr->si_pid ) + { + found = true ; + + if ( ptr->failed != true ) + { + ptr->failed = true ; + manage_process_failure ( ptr ); + } + break ; + } + } + if ( !found ) + { + for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) + { + int pid ; + ptr = &process_config[i] ; + + if ((pid = get_process_pid ( ptr ))) + { + if ( pid == info_ptr->si_pid ) + { + found = true ; + if ( ptr->failed != true ) + { + /* One notification from the kernel is all we need */ + manage_process_failure ( ptr ); + } + break ; + } + } + } + } + if ( !found ) + { + /* Failed to find process for pid */ + /* Forcing _get_events audit */ + _pmon_ctrl_ptr->run_audit = true ; + } + } + else + { + /* Handler called with NULL siginfo pointer */ + /* Forcing _get_events audit */ + _pmon_ctrl_ptr->run_audit = true ; + } +} + +/************************************************************************ + * + * Name : manage_process_alarms + * + * Description: This interface manages process alarms over a process + * configuration reload + * + * Steps: + * + * 1. Loop over each item in the list and mark the process as failed + * with the specified severity level. + * + * 2. If the process is not found then clear its alarm as it is no + * longer a valid process in the new profile and we don't want a + * lingering stuck alarm. + * + *************************************************************************/ + +void manage_process_alarms ( list & _list, + process_config_type * const ptr, + int const processes ) +{ + /* get out if the list is empty ; should not have been called if + * empty but ... just in case */ + if ( ! _list.empty() ) + { + list::iterator _iter_ptr ; + + /* loop over the list ... */ + for ( _iter_ptr=_list.begin(); _iter_ptr!=_list.end(); ++_iter_ptr ) + { + /* for each item assum it is not found */ + bool found = false ; + + /* try and find this process in the new process profile */ + for ( int i = 0 ; i < processes ; i++ ) + { + if ( ! _iter_ptr->process.compare((ptr+i)->process) ) + { + /* If the process is found then mark it as failed and update its severity. + * At this point we then assume that there is an alarm raised for this process. */ + found = true ; + + (ptr+i)->failed = false ; + wlog ("%s process was failed critical ; clearing existing alarm\n", _iter_ptr->process.c_str() ); + pmonAlarm_clear ( get_ctrl_ptr()->my_hostname, PMON_ALARM_ID__PMOND, _iter_ptr->process ); + } + } + + /* if not found then just clear the alarm */ + if ( found == false) + { + wlog ("%s process alarm clear ; not in current process profile\n", _iter_ptr->process.c_str() ); + pmonAlarm_clear ( get_ctrl_ptr()->my_hostname, PMON_ALARM_ID__PMOND, _iter_ptr->process ); + } + } + } +} + +void pmon_service ( pmon_ctrl_type * ctrl_ptr ) +{ + std::list socks ; + struct timeval waitd; + fd_set readfds; + int select_fail_count = 0 ; + int flush_thld = 0 ; + int rc = PASS ; + int shutdown_log_throttle = 0; + + /* iNotify stuff */ + bool inotify_fault = false ; + + daemon_config_type * cfg_ptr = daemon_get_cfg_ptr (); + pmon_socket_type * sock_ptr = pmon_getSock_ptr (); + int select_timeout = (cfg_ptr->audit_period*100); + int audit_period = (cfg_ptr->audit_period/10); + int pulse_period = cfg_ptr->audit_period ; + int hostwd_period = (cfg_ptr->hostwd_update_period); + int degrade_period = (cfg_ptr->audit_period/50); + + if ( audit_period == 0 ) audit_period = 10 ; + if ( degrade_period == 0 ) degrade_period = 10 ; + + ilog ("Starting to monitor processes\n"); + pmon_send_hostwd ( ); + + /* Load and register generic processes - not subfunction processes */ + load_processes (); + + /* Setup inotify to watch CONFIG_DIR */ + if ( set_inotify_watch ( CONFIG_DIR, ctrl_ptr->fd, ctrl_ptr->wd ) ) + inotify_fault = true ; + + socks.clear(); + socks.push_front (sock_ptr->cmd_sock->getFD()); + socks.push_front (sock_ptr->event_sock->getFD()); + socks.push_front (sock_ptr->amon_sock); + socks.sort(); + + ilog ("Starting 'Audit' timer (%d secs)\n", audit_period ); + mtcTimer_start ( pmonTimer_audit, pmon_timer_handler, audit_period ); + + ilog ("Starting 'Degrade Audit' timer (%d secs)\n", degrade_period ); + mtcTimer_start ( pmonTimer_degrade, pmon_timer_handler, degrade_period ); + + ilog ("Starting 'Pulse' timer (%d secs)\n", pulse_period ); + mtcTimer_start_msec ( pmonTimer_pulse, pmon_timer_handler, pulse_period ); + + ilog ("Starting 'Host Watchdog' timer (%d secs)\n", hostwd_period ); + mtcTimer_start ( pmonTimer_hostwd, pmon_timer_handler, hostwd_period ); + + for ( ; ; ) + { + /* Accomodate for hup reconfig */ + select_timeout = (cfg_ptr->audit_period*100); + audit_period = (cfg_ptr->audit_period/10); + degrade_period = (cfg_ptr->audit_period/50); + + if ( audit_period < 1 ) audit_period = 10 ; + if ( degrade_period < 1 ) degrade_period = 10 ; + + daemon_signal_hdlr (); + + /* Initialize the master fd_set */ + FD_ZERO(&readfds); + if ( sock_ptr->cmd_sock->getFD() ) + { + FD_SET(sock_ptr->cmd_sock->getFD(), &readfds); + } + if ( sock_ptr->event_sock->getFD() ) + { + FD_SET(sock_ptr->event_sock->getFD(), &readfds); + } + if ( sock_ptr->amon_sock ) + { + FD_SET(sock_ptr->amon_sock, &readfds); + } + + waitd.tv_sec = 0; + waitd.tv_usec = select_timeout ; + + /* This is used as a delay up to select_timeout */ + rc = select( socks.back()+1, &readfds, NULL, NULL, &waitd); + /* If the select time out expired then */ + if (( rc < 0 ) || ( rc == 0 )) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if (( rc < 0 ) && ( errno != EINTR )) + { + wlog_throttled ( select_fail_count, 20, + "Socket Select Failed (rc:%d) %s \n", + errno, strerror(errno)); + } + } + else + { + if ( FD_ISSET(sock_ptr->cmd_sock->getFD(), &readfds)) + { + pmon_service_inbox (); + } + + if (FD_ISSET(sock_ptr->amon_sock, &readfds)) + { + amon_service_inbox ( _pmon_ctrl_ptr->processes ); + } + } + + if (pmonTimer_pulse.ring == true ) + { + pmonTimer_pulse.ring = false ; + /* Send a I'm Alive message to the pulse interface */ + /* Robustness Update: Added an event_mode bool that will + * be true if the kernel supports notification of death + * of arbitrary process patch. If that feature is not present + * then allow pmon to operate but in a degraded state. Eventually + * we can turn this into a customer alarm/log. + * Degrade is acheived by not sending the pulses to the watcher. + */ + if ( ctrl_ptr->event_mode == true ) + { + pmon_send_pulse ( ); + } + } + + /* Avoid pmond thrashing trying to recover processes during + * system shutdown. */ + if ( _pmon_ctrl_ptr->system_state == MTC_SYSTEM_STATE__STOPPING ) + { + wlog_throttled ( shutdown_log_throttle, 500, + "process monitoring disabled during system shutdown\n"); + usleep (500); + continue ; + } + if ( shutdown_log_throttle ) shutdown_log_throttle = 0 ; + + if ( inotify_fault == false ) + { + if ( get_inotify_events ( ctrl_ptr->fd ) == true ) + { + if ( _pmon_ctrl_ptr->reload_config == false ) + { + _pmon_ctrl_ptr->reload_config = true ; + ilog ("Setting config reload flag\n"); + + /* Hijack the audit timer for the next period for config reload */ + if ( pmonTimer_degrade.tid ) + { + mtcTimer_stop (pmonTimer_degrade); + } + if ( daemon_is_file_present ( PATCHING_IN_PROG_FILE ) == true ) + { + _pmon_ctrl_ptr->patching_in_progress = true ; + wlog ("Patching in progress ; delaying config reload by 30 secs...\n"); + mtcTimer_start ( pmonTimer_degrade, pmon_timer_handler, (degrade_period + 30) ); + } + else + { + mtcTimer_start ( pmonTimer_degrade, pmon_timer_handler, degrade_period ); + } + } + } + } + + if ( pmonTimer_hostwd.ring == true ) + { + if ( ctrl_ptr->event_mode == true ) + { + pmon_send_hostwd ( ); + pmonTimer_hostwd.ring = false; + } + } + + /* Run Get Events by audit timer */ + if (pmonTimer_audit.ring == true ) + { + _get_events (); + mtcTimer_start ( pmonTimer_audit, pmon_timer_handler, audit_period ); + } + + /* Run the degrade set/clear by audit */ + if (pmonTimer_degrade.ring == true ) + { + /* run the degrade clear audit */ + if ( want_degrade_clear () == true ) + { + dlog ("sending degrade clear\n"); + pmon_send_event ( MTC_EVENT_PMON_CLEAR, &process_config[0] ) ; + } + else + { + dlog ("sending degrade assert\n"); + // pmon_send_event ( MTC_EVENT_PMON_MAJOR, &process_config[0] ) ; + } + + /* Check for config reload state request */ + if ( _pmon_ctrl_ptr->reload_config == true ) + { + /* But defer it while there is a process in the + * manually requested restart state */ + bool restart_request_active = false ; + for ( int i = 0 ; i < ctrl_ptr->processes ; i++ ) + { + if ( process_config[i].restart == true ) + { + /* Added as fix */ + wlog ("deferring process config reload to next audit\n"); + wlog ("... while manual restart of '%s' is in progress\n", + process_config[i].process ); + restart_request_active = true ; + break ; + } + } + if ( restart_request_active == false ) + { + load_processes (); + } + } + mtcTimer_start ( pmonTimer_degrade, pmon_timer_handler, degrade_period ); + } + + /* Get_events run by forced audit or not in event mode */ + else if (( ctrl_ptr->run_audit == true ) || + ( ctrl_ptr->event_mode == false )) + { + _get_events ( ); + } + + /* Monitor Processes */ + for ( int i = 0 ; i < ctrl_ptr->processes ; i++ ) + { + /* Allow a process to be ignored */ + if ( process_config[i].ignore == true ) + { + process_config[i].failed = false ; + process_config[i].active_failed = false ; + + /* Handle process auto recovery from stopped state */ + if (( process_config[i].pt_ptr->ring == true ) && ( process_config[i].stopped == true )) + { + elog ("%s process was stopped but never restarted ; auto recovery in progress\n", process_config[i].process ); + process_config[i].stopped = false ; + process_config[i].ignore = false ; + passiveStageChange ( &process_config[i], PMON_STAGE__MANAGE ); + } + continue ; + } + else if ( process_config[i].status_monitoring ) + { + pmon_status_handler ( &process_config[i] ); + } + else if (( process_config[i].stage == PMON_STAGE__POLLING ) || + ( process_config[i].stage == PMON_STAGE__START_WAIT ) || + ( process_config[i].restart == true ) || + ( process_config[i].failed == true )) + { + /* Run the FSM for this failed process */ + pmon_passive_handler ( &process_config[i] ) ; + } + else if ( process_config[i].active_monitoring ) + { + // if ( process_config[i].active_failed == false ) + if ( process_config[i].failed == false ) + { + pmon_active_handler ( &process_config[i] ); + } + else + { + elog ("%s Failed Active Monitoring ... recovering.\n", process_config[i].process ); + manage_process_failure ( &process_config[i]) ; + } + } + } + + /* Debugging */ + if (daemon_get_cfg_ptr()->debug_level & 1 ) + { + char proc_mask [MAX_PROCESSES*2] ; + bool somefailed = false ; + memset (&proc_mask[0], 0, sizeof(proc_mask)); + for ( int x = 0 , y = 0 ; x < ctrl_ptr->processes ; x++, y+=2 ) + { + if ( process_config[x].failed ) + { + proc_mask[y] = '1' ; + somefailed = true ; + } + else + proc_mask[y] = '0' ; + proc_mask[y+1] = ' ' ; + } + if ( somefailed ) + { + alog ( "Process Mask: %s\n", &proc_mask[0] ); + } + } + + /* Support the log flush config option */ + if ( cfg_ptr->flush ) + { + if ( ++flush_thld > cfg_ptr->flush ) + { + flush_thld = 0 ; + fflush (stdout); + fflush (stderr); + } + } + + } +} + +string get_status_failure_text ( process_config_type * ptr ) +{ + string extra_text(""); + if(( ptr->status_failure_text_file != NULL ) && + ( ptr->status_failure_text_file[0] != '\0')) + { + FILE * status_text_file_stream = + fopen ( ptr->status_failure_text_file, "r" ); + if ( status_text_file_stream == NULL ) + { + wlog (" Failed to get extra alam text from file %s\n", + ptr->status_failure_text_file ); + } + else + { + char buffer[MAX_STATUS_ERROR_TEXT_LEN]; + if ( fgets(buffer, MAX_STATUS_ERROR_TEXT_LEN, + status_text_file_stream) != NULL) + { + extra_text = buffer; + } + fclose(status_text_file_stream); + } + } + return extra_text; +} + +/**************************************************************************** + * + * Name : quorum_process_failure + * + * Description: manage debounce and log report of quorum process failure + * + * Warnings : Only call this when there is a quorum process faiure + * that has exceeded the threshold count. + * + ****************************************************************************/ +void quorum_process_failure ( process_config_type * ptr ) +{ + wlog ("%s quorum process %s\n", + ptr->process, + ptr->quorum_failure ? "unrecoverable" : "failed" ); + + if ( ptr->quorum_failure == true ) + { + ptr->quorum_unrecoverable = true; + } + else + { + ptr->quorum_failure = true; + } +} + diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonInit.cpp b/mtce-common/cgts-mtce-common-1.0/pmon/pmonInit.cpp new file mode 100644 index 00000000..6837100b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonInit.cpp @@ -0,0 +1,581 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Process Monitor Service Header + */ + +#include "pmon.h" +#include "alarmUtil.h" /* for ... alarmUtil_getSev_str and alarmUtil_query_identity */ +#include "pmonAlarm.h" /* for ... PMON_ALARM_ID__PMOND */ + +/* Process Monitor Configuration File */ +#define CONFIG_FILE ((const char *)"/etc/mtc/pmond.conf") + +/* Process Monitor Control Structure */ +static pmon_ctrl_type pmon_ctrl ; +pmon_ctrl_type * get_ctrl_ptr ( void ) { return (&pmon_ctrl); } + + +/** Daemon Configuration Structure + * - Allocation and get pointer + * @see daemon_common.h for daemon_config_type struct format. */ +static daemon_config_type pmon_config ; +daemon_config_type * daemon_get_cfg_ptr () { return &pmon_config ; } + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + pmon_msg_fini (); + pmon_hdlr_fini ( &pmon_ctrl ); + daemon_files_fini (); + daemon_dump_info (); + exit (0); +} + + +/* Startup config read */ +static int pmon_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("config", "audit_period")) + { + config_ptr->audit_period = atoi(value); + config_ptr->mask |= CONFIG_AUDIT_PERIOD ; + } + else if (MATCH("config", "pmon_cmd_port")) + { + config_ptr->pmon_cmd_port = atoi(value); + config_ptr->mask |= CONFIG_CMD_PORT ; + } + else if (MATCH("config", "pmon_event_port")) + { + config_ptr->pmon_event_port = atoi(value); + config_ptr->mask |= CONFIG_TX_PORT ; + } + else if (MATCH("config", "pmon_amon_port")) + { + config_ptr->pmon_amon_port = atoi(value); + config_ptr->mask |= CONFIG_RX_PORT ; + } + else if (MATCH("config", "pmon_pulse_port")) + { + config_ptr->pmon_pulse_port = atoi(value); + config_ptr->mask |= CONFIG_PULSE_PORT ; + } + else if (MATCH("config", "audit_period")) + { + config_ptr->audit_period = atoi(value); + config_ptr->mask |= CONFIG_AUDIT_PERIOD ; + } + else if (MATCH("config", "hostwd_update_period")) + { + config_ptr->hostwd_update_period = atoi(value); + config_ptr->mask |= CONFIG_HOSTWD_PERIOD ; + } + else if (MATCH("timeouts", "start_delay")) + { + config_ptr->start_delay = atoi(value); + config_ptr->mask |= CONFIG_START_DELAY ; + } + return (PASS); +} + + +/***************************************************************************** + * + * Name : pmon_load_config + * + * Purpose : Read process config file settings into the daemon configuration + * + *****************************************************************************/ +int pmon_process_config ( void * user, + const char * section, + const char * name, + const char * value) +{ + int rc = FAIL ; + process_config_type * ptr = (process_config_type*)user; + + if (MATCH("process", "process")) + { + ptr->mask |= CONF_PROCESS ; + ptr->status_mask |= CONF_PROCESS ; + ptr->process = strdup(value); + dlog1 ("Process : %s\n", ptr->process ); + rc = PASS ; + } + if (MATCH("process", "service")) + { + ptr->service = strdup(value); + dlog1 ("Service : %s\n", ptr->service ); + rc = PASS ; + } + else if (MATCH("process", "script")) + { + ptr->mask |= CONF_SCRIPT ; + ptr->status_mask |= CONF_SCRIPT ; + ptr->script = strdup(value); + dlog1 ("Script : %s\n", ptr->script ); + } + else if (MATCH("process", "style")) + { + ptr->mask |= CONF_STYLE ; + ptr->status_mask |= CONF_STYLE ; + ptr->style = strdup(value); + dlog1 ("Style : %s\n", ptr->style ); + rc = PASS ; + } + else if (MATCH("process", "pidfile")) + { + ptr->mask |= CONF_PIDFILE ; + ptr->pidfile = strdup(value); + dlog1 ("Pid File : %s\n", ptr->pidfile ); + rc = PASS ; + } + else if (MATCH("process", "severity")) + { + ptr->mask |= CONF_SEVERITY ; + ptr->status_mask |= CONF_SEVERITY ; + ptr->severity = strdup(value); + dlog1 ("Severity : %s\n", ptr->severity ); + rc = PASS ; + } + else if (MATCH("process", "restarts")) + { + ptr->mask |= CONF_RESTARTS ; + ptr->status_mask |= CONF_RESTARTS ; + ptr->restarts = atoi(value); + dlog1 ("Restarts : %d\n", ptr->restarts ); + rc = PASS ; + } + else if (MATCH("process", "interval")) + { + ptr->mask |= CONF_INTERVAL ; + ptr->status_mask |= CONF_INTERVAL ; + ptr->interval = atoi(value); + dlog1 ("Interval : %d\n", ptr->interval ); + rc = PASS ; + } + else if (MATCH("process", "debounce")) + { + ptr->mask |= CONF_DEBOUNCE ; + ptr->debounce = atoi(value); + dlog1 ("Debounce : %d\n", ptr->debounce ); + rc = PASS ; + } + else if (MATCH("process", "startuptime")) + { + ptr->startuptime = atoi(value); + dlog1 ("Debounce : %d\n", ptr->startuptime ); + rc = PASS ; + } + else if (MATCH("process", "subfunction")) + { + ptr->subfunction = strdup(value); + dlog1 ("Subfunction: %s\n", ptr->subfunction ); + rc = PASS ; + } + + else if (MATCH("process", "mode")) + { + // ptr->mask |= CONF_MODE ; + ptr->mode = strdup(value); + if (( strcmp(ptr->mode, "active" )) && + ( strcmp(ptr->mode, "passive" )) && + ( strcmp(ptr->mode, "status" ))) + { + ptr->ignore = true ; + dlog1 ("Mode : ignore\n"); + } + else + { + dlog1 ("Mode : %s\n", ptr->mode ); + } + rc = PASS ; + } + else if (MATCH("process", "quorum")) + { + if (atoi(value) > 0) + { + ptr->quorum = true; + } + dlog1 ("Quorum : %d\n", (int) ptr->quorum ); + rc = PASS ; + } + else if (MATCH("process", "full_init_reqd")) + { + if (atoi(value) > 0) + { + ptr->full_init_reqd = true; + } + dlog1 ("Full_init_reqd : %d\n", (int) ptr->quorum ); + rc = PASS ; + } + if (( ptr->mode != NULL ) && ( !strcmp(ptr->mode, "active" ))) + { + if (MATCH("process", "port")) + { + ptr->amask |= CONF_PORT ; + ptr->port = atoi(value); + dlog1 ("Active Port: %d\n", ptr->port ); + rc = PASS ; + } + if (MATCH("process", "period")) + { + ptr->amask |= CONF_PERIOD ; + ptr->period = atoi(value); + dlog1 ("Period : %d\n", ptr->period ); + rc = PASS ; + } + else if (MATCH("process", "timeout")) + { + ptr->amask |= CONF_TIMEOUT ; + ptr->timeout = atoi(value); + dlog1 ("Timeout : %d\n", ptr->timeout ); + rc = PASS ; + } + else if (MATCH("process", "threshold")) + { + ptr->amask |= CONF_THRESHOLD ; + ptr->threshold = atoi(value); + dlog1 ("Threshold : %d\n", ptr->threshold ); + rc = PASS ; + } + } + + if (( ptr->mode != NULL ) && ( !strcmp(ptr->mode, "status" ))) + { + if (MATCH("process", "period")) + { + ptr->status_mask |= CONF_PERIOD ; + ptr->period = atoi(value); + dlog1 ("Period : %d\n", ptr->period ); + rc = PASS ; + } + else if (MATCH("process", "timeout")) + { + ptr->status_mask |= CONF_TIMEOUT ; + ptr->timeout = atoi(value); + dlog1 ("Timeout : %d\n", ptr->timeout ); + rc = PASS ; + } + else if (MATCH("process", "status_arg")) + { + ptr->status_mask |= CONF_STATUS_ARG ; + ptr->status_arg = strdup(value); + dlog1 ("status script argument : %s\n", ptr->status_arg ); + rc = PASS ; + } + else if (MATCH("process", "start_arg")) + { + ptr->status_mask |= CONF_START_ARG ; + ptr->start_arg = strdup(value); + dlog1 ("start script argument : %s\n", ptr->start_arg ); + rc = PASS ; + } + else if (MATCH("process", "status_failure_text")) + { + ptr->status_failure_text_file = strdup(value); + dlog1 ("Status error text file : %s\n", ptr->status_failure_text_file); + rc = PASS ; + } + } + + else + rc = PASS ; + + return (rc); +} + +/***************************************************************************** + * + * Name : daemon_configure + * + * Purpose : Read process config file settings into the daemon configuration + * + *****************************************************************************/ +int daemon_configure ( void ) +{ + int rc = PASS ; + + if (ini_parse( CONFIG_FILE, pmon_config_handler, &pmon_config) < 0) + { + elog("Can't load '%s'\n", CONFIG_FILE ); + } + + get_debug_options ( CONFIG_FILE, &pmon_config ); + + /* Verify loaded config against an expected mask + * as an ini file fault detection method */ + if ( pmon_config.mask != CONFIG_MASK ) + { + elog ("Error: Agent configuration failed (%x)\n", + ((-1 ^ pmon_config.mask) & CONFIG_MASK)); + return (FAIL_INI_CONFIG); + } + + /* This ensures any link aggregation interface overrides the physical */ + pmon_config.mgmnt_iface = daemon_get_iface_master ( pmon_config.mgmnt_iface ); + + /* Log the startup settings */ + ilog("Interface : %s\n", pmon_config.mgmnt_iface ); + ilog("Event Port : %d\n", pmon_config.pmon_event_port ); + + get_iface_macaddr ( pmon_config.mgmnt_iface, pmon_ctrl.my_macaddr ); + get_iface_address ( pmon_config.mgmnt_iface, pmon_ctrl.my_address, true ); + get_hostname (&pmon_ctrl.my_hostname[0], MAX_HOST_NAME_SIZE ); + + /* Manage the daemon pulse period setting - ensure in bound values */ + if ( pmon_config.audit_period < PMON_MIN_AUDIT_PERIOD ) + { + wlog ("Pulse Period: %d msecs (rounded up)\n", + PMON_MIN_AUDIT_PERIOD ); + pmon_ctrl.pulse_period = PMON_MIN_AUDIT_PERIOD ; + } + else if ( pmon_config.audit_period > PMON_MAX_AUDIT_PERIOD ) + { + wlog ("Pulse Period: %d msecs (rounded down)\n", + PMON_MAX_AUDIT_PERIOD ); + pmon_ctrl.pulse_period = PMON_MAX_AUDIT_PERIOD ; + } + else + { + pmon_ctrl.pulse_period = pmon_config.audit_period ; + ilog("Pulse Period: %d\n", pmon_ctrl.pulse_period ); + } + + /* Manage the daemon pulse period setting - ensure in bound values */ + if ( pmon_config.start_delay < PMON_MIN_START_DELAY ) + { + wlog ("Start Delay : %d msecs (rounded up)\n", + PMON_MIN_AUDIT_PERIOD ); + pmon_config.start_delay = PMON_MIN_START_DELAY ; + } + else if ( pmon_config.start_delay > PMON_MAX_START_DELAY ) + { + wlog ("Start Delay : %d msecs (rounded down)\n", + PMON_MAX_AUDIT_PERIOD ); + pmon_config.start_delay = PMON_MAX_START_DELAY ; + } + else + { + ilog("Start Delay : %d\n", pmon_config.start_delay ); + } + + if ( (rc = pmon_hdlr_init (&pmon_ctrl)) != PASS ) + { + elog ("pmon_hdlt_init failed\n"); + rc = FAIL_HDLR_INIT ; + } + ilog ("Function : %d\n", pmon_ctrl.function ); + ilog ("SubFunction : %d\n", pmon_ctrl.subfunction ); + + pmon_ctrl.reload_config = true ; + pmon_ctrl.patching_in_progress = false ; + + return (rc); +} + + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Setup the daemon messaging interfaces/sockets */ +int socket_init ( void ) +{ + pmon_msg_init ( ); + + /* Setup the pmon event port. + * This is the port that pmon sends events + * to maintenance on */ + int rc = event_port_init ( pmon_config.mgmnt_iface , + pmon_config.pmon_event_port ); + + /* Setup the pmon autonomout pulse port. + * This is the port that pmon sends i'm alive messages + * to the hbsClient - the watcher of the watcher */ + if ( rc == PASS ) + { + rc = pulse_port_init ( ); + } + + /* Init the avtive monitor receive port. + * This is the port that all active monitored + * processes send their responses on */ + if ( rc == PASS ) + { + rc = amon_port_init ( pmon_config.pmon_amon_port ); + } + + /* Setup the pmon hostwd connection. + * This lets pmon commuicate essential process info to the + * host watchdog process */ + if ( rc == PASS ) + { + rc = hostwd_port_init ( ); + } + + pmon_inbox_init ( ); + + return (rc); +} + +/* The main heartbeat service loop */ +int daemon_init ( string iface, string nodetype_str ) +{ + int rc = PASS ; + + /* init the control struct */ + memset ( &pmon_ctrl.my_hostname[0], 0, sizeof(pmon_ctrl.my_hostname)); + pmon_ctrl.my_macaddr = "" ; + pmon_ctrl.my_address = "" ; + pmon_ctrl.pulse_period = PMON_MAX_AUDIT_PERIOD ; + pmon_ctrl.processes = 0 ; + pmon_ctrl.system_type = daemon_system_type (); + + /* sets in pmonHdlr.cpp */ + pmon_set_ctrl_ptr ( &pmon_ctrl ); + + pmonAlarm_init (); + + /* Assign interface to config */ + pmon_config.mgmnt_iface = (char*)iface.data() ; + + if ( daemon_files_init ( ) != PASS ) + { + elog ("Pid, log or other files could not be opened\n"); + return ( FAIL_FILES_INIT ) ; + } + + /* Bind signal handlers */ + if ( daemon_signal_init () != PASS ) + { + elog ("daemon_signal_init failed\n"); + return ( FAIL_SIGNAL_INIT ); + } + + if ( set_host_functions ( nodetype_str, &pmon_ctrl.nodetype, &pmon_ctrl.function, &pmon_ctrl.subfunction ) != PASS ) + { + elog ("failed to extract nodetype\n"); + return ( FAIL_NODETYPE ); + } + + /************************************************************************ + * There is no point continuing with init ; i.e. running daemon_configure, + * initializing sockets and trying to query for an ip address until the + * daemon's configuration requirements are met. Here we wait for those + * flag files to be present before continuing. + ************************************************************************ + * Wait for /etc/platform/.initial_config_complete & /var/run/.goenabled */ + daemon_wait_for_file ( CONFIG_COMPLETE_FILE , 0); + daemon_wait_for_file ( GOENABLED_MAIN_READY , 0); + + /* Configure the daemon */ + if ( (rc = daemon_configure ( )) != PASS ) + { + elog ("Daemon service configuration failed (rc:%i)\n", rc ); + rc = FAIL_DAEMON_CONFIG ; + } + + /* Setup the messaging sockets */ + else if ( (rc = socket_init ( )) != PASS ) + { + elog ("socket initialization failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT ; + } + else + { + /* Init the pmon service timers */ + pmon_timer_init (); + } + + /* + * Setup the recovery method based on the O/S + * + * WRL - SYSVINIT + * CENTOS - SYSTEMD + * + **/ + if ( daemon_is_file_present ( CENTOS_RELEASE_FILE ) ) + { + pmon_ctrl.recovery_method = PMOND_RECOVERY_METHOD__SYSTEMD ; + pmon_ctrl.system_state = get_system_state(); + } + else + { + pmon_ctrl.recovery_method = PMOND_RECOVERY_METHOD__SYSVINIT ; + } + ilog ("Recovery Method: %s\n", pmon_ctrl.recovery_method ? "systemd via systemctl" : "sysvinit via script" ); + return (rc); +} + +/* Start the service + * + * 1. Wait for host config (install) complete + * 2. Wait for goenable + * 3. Do startup delay + * 4. run the pmon service inside pmonHdlr.cpp + * + */ +void daemon_service_run ( void ) +{ + int rc = PASS ; + + process_config_type dummy_process ; + memset ( (char*)&dummy_process, 0, (sizeof(process_config_type))); + dummy_process.process = strdup("pmond"); + + ilog ("Transmitting: 'monitor ready event'\n" ); + do + { + rc = pmon_send_event ( MTC_EVENT_MONITOR_READY, &dummy_process ) ; + if ( rc == RETRY ) + { + mtcWait_secs ( 2 ); + } + if ( rc == FAIL ) + { + elog ("Failed to Send READY event (rc=%d)\n", rc ); + elog ("Trying to provide service anyway\n"); + } + } while ( rc == RETRY ) ; + + /* Wait a few seconds after go enabled to + * allow the rest of init to finish before + * starting to process monitor */ + ilog ("Delaying %d seconds to allow other processes to start\n", pmon_config.start_delay); + for ( int i = 0 ; i < pmon_config.start_delay ; i++ ) + { + mtcWait_secs ( 1 ); + pmon_send_pulse ( ); + } + + pmon_service ( &pmon_ctrl ); + daemon_exit (); +} + + + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/** Teat Head Entry */ +int daemon_run_testhead ( void ) +{ + ilog ("Empty test head.\n"); + return (PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp b/mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp new file mode 100644 index 00000000..4b388b01 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp @@ -0,0 +1,788 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Process Monitor Service Messaging + */ + +#include +#include + +#include "pmon.h" +#include "nodeMacro.h" +#include "pmonAlarm.h" /* for ... this module's alarm utilities / defs */ +#include "hostwMsg.h" +#include "jsonUtil.h" + +/* Where to send events */ +string mtcAgent_ip = "" ; + +static int pulse_log_threshold ; + +/** + * Messaging Socket Control Struct - The allocated struct + */ +static pmon_socket_type pmon_sock; +pmon_socket_type * pmon_getSock_ptr ( void ) +{ + return ( &pmon_sock ); +} + +msgSock_type * get_mtclogd_sockPtr ( void ) +{ + return (&pmon_sock.mtclogd); +} + +/*********************************************************************/ +/* Setup pmon broadcast transmit port/socket */ +/*********************************************************************/ +int event_port_init ( const char * iface , int port ) +{ + int rc = PASS ; + + /* Load the event port number */ + pmon_sock.event_port = port ; + + mtcAgent_ip = getipbyname ( CONTROLLER ); + ilog ("ControllerIP: %s\n", mtcAgent_ip.c_str()); + + + pmon_sock.event_sock = new msgClassTx(mtcAgent_ip.c_str(), port, IPPROTO_UDP, iface); + if (pmon_sock.event_sock->return_status!=PASS) + { + elog ("Failed to setup pmond to mtce transmit event port %d\n", port ); + return (pmon_sock.event_sock->return_status) ; + } + + return rc; +} + + +/* Setup the Unix Domain Transmit Pulse Socket */ +int pulse_port_init ( void ) +{ + int rc = PASS ; + int port = daemon_get_cfg_ptr()->pmon_pulse_port; + pmon_sock.pulse_sock = new msgClassTx(LOOPBACK_IP, port, IPPROTO_UDP); + + if (pmon_sock.pulse_sock->return_status!=PASS) return (pmon_sock.pulse_sock->return_status) ; + + snprintf (&pmon_sock.pulse.hdr[0], MSG_HEADER_SIZE, "%s", get_pmond_pulse_header()); + pmon_sock.msg_len = ((sizeof(mtc_message_type))-(BUF_SIZE)); + return (rc); +} + +/* Setup the Unix Host Watchdog Socket */ +#define _THROTTLE_LEVEL (5) +int hostwd_port_init ( void ) +{ + int rc = FAIL ; + int fail_count = 0 ; + memset(&pmon_sock.hostwd_addr, 0, sizeof(pmon_sock.hostwd_addr)); + while (rc == FAIL) + { + int len; + int connected; + pmon_sock.hostwd_sock = socket(AF_UNIX, SOCK_DGRAM, 0); + + if (pmon_sock.hostwd_sock <= 0) { + if ( fail_count++ > _THROTTLE_LEVEL ) { + wlog("Could not connect to create hostwd socket - will retry\n"); + } + sleep(1); + continue; + } + + /* Set up the socket address */ + memset (&pmon_sock.hostwd_addr, 0, sizeof(pmon_sock.hostwd_addr)); + pmon_sock.hostwd_addr.sun_family = AF_UNIX; + + /* Unix abstract namespace takes a string that starts with a NULL + * as the identifier. Thus, we need a pointer to byte[1] of the + * sockaddr_un.sun_path (a char array) + */ + strncpy( &(pmon_sock.hostwd_addr.sun_path[1]), + HOSTW_UNIX_SOCKNAME, + UNIX_PATH_MAX-1); + len = sizeof(pmon_sock.hostwd_addr); + + connected = connect( pmon_sock.hostwd_sock, (sockaddr*) &pmon_sock.hostwd_addr, + len); + if (connected == -1) { + if ( fail_count++ > _THROTTLE_LEVEL ) { + wlog("Could not connect to hostwd port - will retry\n"); + } + close(pmon_sock.hostwd_sock); + pmon_sock.hostwd_sock = 0; + sleep(1); + } else { + rc = PASS; + } + } + return (rc); +} + +/* Build a message for host watchdog, and send it */ +int pmon_send_hostwd ( void ) +{ + if (pmon_sock.hostwd_sock) + { + mtc_message_type msg; + int bytes; + int i; + + memset(&msg, 0, sizeof(msg)); + + memcpy (&msg.hdr, get_cmd_req_msg_header(), MSG_HEADER_SIZE); + msg.ver = MTC_CMD_VERSION; + msg.rev = MTC_CMD_REVISION; + msg.cmd = MTC_CMD_NONE; /* All good - take no action */ + + for (i = 0; i < get_ctrl_ptr()->processes; i++) + { + process_config_type * pProcess = get_process_config_ptr (i); + + if (pProcess->quorum && pProcess->quorum_unrecoverable) + { + ilog ("%s unrecoverable ; reporting this to host watchdog\n", pProcess->process); + snprintf ( (char*) &(msg.buf), + BUF_SIZE - 1, + "PMON detected %s failure", + pProcess->process); + + msg.cmd = MTC_EVENT_PMON_CRIT; /* things are bad */ + } + } + + bytes = sendto( pmon_sock.hostwd_sock, + (char*)&msg, + sizeof(msg), + 0, /* flags */ + (struct sockaddr *) &pmon_sock.hostwd_addr, + sizeof(pmon_sock.hostwd_addr) ); + + if (bytes == sizeof(msg)) + { + return (PASS); + } + else + { + elog("Error sending message to host watchdog -- error %d (%s)\n", + errno, strerror(errno)); + return (FAIL); + } + } + return (FAIL); +} + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Init the messaging socket control structure + * The following messaging interfaces use this structure and + * are initialized separately + * + * pulse_port_init - port that pmon I'm alive messages are transmitted on + * event_port_init - port that pmon sends events to mtce on + * amon_port_init - aggrigated active process monitor receive port + * + * */ +void pmon_msg_init ( void ) +{ + memset(&pmon_sock, 0, sizeof(pmon_sock)); + pulse_log_threshold = 0 ; +} + +void pmon_msg_fini ( void ) +{ + /* Close the pmond pulse socket */ + if ( pmon_sock.cmd_sock ) + delete (pmon_sock.cmd_sock ); + if ( pmon_sock.event_sock ) + delete (pmon_sock.event_sock); + if (pmon_sock.pulse_sock) + delete pmon_sock.pulse_sock; + if ( pmon_sock.amon_sock ) + close (pmon_sock.amon_sock); + if ( pmon_sock.hostwd_sock ) + close (pmon_sock.hostwd_sock); +} + +/* Initialize the command receive port + * Its a LO interface only */ +int pmon_inbox_init ( void ) +{ + pmon_sock.cmd_sock = new msgClassRx(LOOPBACK_IP,daemon_get_cfg_ptr()->pmon_cmd_port,IPPROTO_UDP); + if ( pmon_sock.cmd_sock ) + { + pmon_sock.cmd_port = daemon_get_cfg_ptr()->pmon_cmd_port ; + pmon_sock.cmd_sock->sock_ok(true); + return (PASS); + } + return (FAIL); +} + +int pmon_send_pulse ( void ) +{ + #define LOG_THROTTLE 1000 + + int bytes = pmon_sock.pulse_sock->write((char*)&pmon_sock.pulse, pmon_sock.msg_len); + + if ( bytes <= 0 ) + { + /* Force reconnect attempt on next go around */ + elog ("Cannot sendto hbsClient (bytes=%d) (%d:%s)\n", + bytes , errno, strerror(errno)); + } + else + { + ilog_throttled ( pulse_log_threshold, LOG_THROTTLE, "sent health pulse - %d bytes (throttled:%d)\n", bytes, LOG_THROTTLE ); + + dlog3 ( "Pulse: %s (%d:%d)\n", &pmon_sock.pulse.hdr[0], + pmon_sock.pulse_sock->get_dst_addr()->getSockLen(), bytes ); + } + return (PASS) ; +} + + +int pmon_send_event ( unsigned int event_cmd , process_config_type * ptr ) +{ + mtc_message_type event ; + + int rc = PASS ; + int bytes = 0 ; + + /* Don't report events while we are in reset mode */ + if ( daemon_is_file_present ( NODE_RESET_FILE ) ) + return ( PASS ); + + memset (&event, 0 , sizeof(mtc_message_type)); + + if (( event_cmd == MTC_EVENT_MONITOR_READY) || + ( event_cmd == MTC_EVENT_PMON_LOG) || + ( event_cmd == MTC_EVENT_PMON_MINOR) || + ( event_cmd == MTC_EVENT_PMON_MAJOR) || + ( event_cmd == MTC_EVENT_PMON_CRIT ) || + ( event_cmd == MTC_EVENT_PMON_CLEAR )) + { + pmon_ctrl_type * ctrl_ptr = get_ctrl_ptr () ; + + snprintf ( &event.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header()); + + /* Set the version/revision for PMON messages. */ + event.ver = MTC_MSG_VERSION_15_12_GA_PMON ; + event.rev = MTC_MSG_REVISION_15_12_GA_PMON ; + + if ( ptr->process ) + { + /* We don't use the buffer for pmon events to remove it from the size */ + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-MAX_FILENAME_LEN)); + + snprintf( &event.buf[0], MAX_PROCESS_NAME_LEN, "%s", ptr->process ); + + /* Put the process function in parm zero of the event message */ + event.num = 1 ; + event.parm[0] = ctrl_ptr->nodetype ; /* default to node type */ + + if ( event_cmd == MTC_EVENT_PMON_CLEAR ) + { + dlog ("pmond degrade clear\n" ); + snprintf( &event.buf[0], MAX_PROCESS_NAME_LEN, "%s", "pmond" ); + } + else if (( event_cmd == MTC_EVENT_PMON_CRIT ) || + ( event_cmd == MTC_EVENT_PMON_MAJOR )) + { + wlog ("%s caused degrade assert\n", ptr->process ); + } + else if ( event_cmd == MTC_EVENT_PMON_MINOR ) + { + slog ("degrade does not apply to minor\n" ); + rc = FAIL_BAD_CASE ; + } + + /* override with subfunction case */ + if (( ctrl_ptr->subfunction != 0 ) && + ( ctrl_ptr->subfunction != ctrl_ptr->function )) + { + if ( ptr->subfunction != NULL ) + { + string temp = ptr->subfunction ; + event.parm[0]= get_host_function_mask (temp) ; + if ( ( event_cmd == MTC_EVENT_PMON_MINOR) || + ( event_cmd == MTC_EVENT_PMON_MAJOR) || + ( event_cmd == MTC_EVENT_PMON_LOG) || + ( event_cmd == MTC_EVENT_PMON_CRIT ) ) + { + mlog ("%s process failed\n", ptr->process ); + } + else if (( event_cmd == MTC_EVENT_PMON_CLEAR ) && ( ptr->was_failed == true )) + { + ilog ("%s process recovered\n", ptr->process ); + ptr->was_failed = false ; + } + } + } + } + } + else if ( event_cmd == MTC_EVENT_LOOPBACK ) + { + snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_loopback_header()); + + /* We don't use the buffer for pmon events to remove it from the size */ + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)); + } + else + { + elog ("Unsupported process monitor event (%d)\n", event_cmd ); + return ( FAIL_BAD_CASE ); + } + + event.cmd = event_cmd ; + + print_mtc_message ( LOCALHOST, MTC_CMD_TX, event, get_iface_name_str(MGMNT_INTERFACE), false ); + + /* Send the event */ + if ((rc = pmon_sock.event_sock->write((char*)&event.hdr[0], bytes)) != bytes ) + { + elog ("Message send failed. (%d)\n", rc); + elog ("Message: %d bytes to <%s:%d>\n", bytes, + pmon_sock.event_sock->get_dst_addr()->toString(), + pmon_sock.event_sock->get_dst_addr()->getPort()); + } + else + { + string severity = get_event_str ( event.cmd ); + mlog ("Sending '%s' event for process '%s' to %s:%d (bytes:%d)\n", + severity.c_str(), event.buf, + pmon_sock.event_sock->get_dst_addr()->toString(), + pmon_sock.event_sock->get_dst_addr()->getPort(), bytes); + rc = PASS ; + } + return rc ; +} + +/************************************************************************** + * + * ********** A C T I V E M O N I T O R I N G ********** + * + *************************************************************************/ + + +int amon_port_init ( int port ) +{ + int val = 1 ; + int rc = FAIL ; + if ( port ) + { + pmon_sock.amon_port = port ; + pmon_sock.amon_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if ( 0 >= pmon_sock.amon_sock ) + return (-errno); + + if ( setsockopt ( pmon_sock.amon_sock , SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)) == -1 ) + { + wlog ( "amon: failed to set active monitor socket as re-useable (%d:%s)\n", + errno, strerror(errno)); + } + + /* Set socket to be non-blocking. */ + rc = ioctl(pmon_sock.amon_sock, FIONBIO, (char *)&val); + if ( 0 > rc ) + { + elog ("Failed to set amon socket non-blocking\n"); + } + + /* Setup with localhost ip */ + memset(&pmon_sock.amon_addr, 0, sizeof(struct sockaddr_in)); + pmon_sock.amon_addr.sin_family = AF_INET ; + // pmon_sock.amon_addr.sin_addr.s_addr = htonl(INADDR_ANY); + pmon_sock.amon_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + pmon_sock.amon_addr.sin_port = htons(pmon_sock.amon_port) ; + + /* bind socket to the receive addr */ + if ( bind ( pmon_sock.amon_sock, (const struct sockaddr *)&pmon_sock.amon_addr, sizeof(struct sockaddr_in)) == -1 ) + { + elog ( "failed to bind to rx socket with port %d (%d:%s)\n", port, errno, strerror(errno) ); + close (pmon_sock.amon_sock); + pmon_sock.amon_sock = 0 ; + return (-errno); + } + else + { + rc = PASS ; + } + } + else + { + elog ("No port specified\n"); + } + + return (rc) ; +} + +int open_process_socket ( process_config_type * ptr ) +{ + int rc = FAIL ; + + /* Prop the port numnber into the message struct */ + if ( ptr->port ) + ptr->msg.tx_port = ptr->port ; + + if ( ptr->msg.tx_port ) + { + /* if the sock is already open then close it first */ + if ( ptr->msg.tx_sock ) + { + wlog ("%s open on already open socket %d, closing first\n", + ptr->process, ptr->msg.tx_sock ); + close (ptr->msg.tx_sock); + } + ptr->msg.tx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if ( 0 >= ptr->msg.tx_sock ) + return (-errno); + + /* Setup with localhost ip */ + memset(&ptr->msg.tx_addr, 0, sizeof(struct sockaddr_in)); + ptr->msg.tx_addr.sin_family = AF_INET ; + ptr->msg.tx_addr.sin_addr.s_addr = inet_addr(LOOPBACK_IP); + ptr->msg.tx_addr.sin_port = htons(ptr->msg.tx_port) ; + + rc = PASS ; + } + else + { + elog ("%s has no port specified\n", ptr->process ); + } + return (rc) ; +} + +void close_process_socket ( process_config_type * ptr ) +{ + if ( ptr->msg.tx_sock ) + close ( ptr->msg.tx_sock ); +} + +int amon_service_inbox ( int processes ) +{ + #define MAX_T 100 + int count = 0 ; + int bytes = 0 ; + char buf[AMON_MAX_LEN] ; + socklen_t len = sizeof(struct sockaddr_in) ; + do + { + char str[AMON_MAX_LEN] ; + unsigned int magic = 0 ; + int seq = 0 ; + memset ( buf,0,sizeof(buf)); + memset ( str,0,sizeof(str)); + bytes = recvfrom( pmon_sock.amon_sock, buf, AMON_MAX_LEN, 0, (struct sockaddr *)&pmon_sock.amon_addr, &len); + if ( bytes > 0 ) + { + sscanf ( buf, "%99s %8x %d", str, &magic, &seq ); + if ( AMON_MAGIC_NUM == (magic ^ -1) ) + { + if ( str[0] != '\0' ) + { + bool found = false ; + for ( int i = 0 ; i < processes ; i++ ) + { + process_config_type * ptr = get_process_config_ptr (i); + if ( ptr != NULL ) + { + if ( ! strcmp ( str, ptr->process ) ) + { + found = true ; + alog ( "%s %x %d (found)\n", ptr->process, magic, seq ); + if ( ptr->waiting == true ) + { + ptr->rx_sequence = seq ; + } + else + { + wlog_throttled ( count , MAX_T, "%s unexpected monitor pulse\n", ptr->process ); + } + break ; + } + } + } + if ( !found ) + { + wlog_throttled (count, MAX_T, "Unexpected message (%s)\n", buf ); + } + } + else + { + wlog ("Null string !\n"); + } + } + else + { + wlog_throttled ( count, MAX_T, "message with invalid magic number (%x)\n", magic ); + } + } + else if (( 0 > bytes ) && ( errno != EINTR ) && ( errno != EAGAIN )) + { + wlog_throttled ( count , MAX_T, "receive error (%d:%s)\n", errno, strerror(errno)); + } + } while ( bytes > 0 ) ; + + /* Put the messages in he structs */ + return (PASS); +} + + +int amon_send_request ( process_config_type * ptr ) +{ + int rc ; + + ptr->rx_sequence = 0 ; + memset ( ptr->msg.tx_buf, 0, sizeof(ptr->msg.tx_buf)); + sprintf ( ptr->msg.tx_buf, "%s %x %u", ptr->process, AMON_MAGIC_NUM, ++ptr->tx_sequence ) ; + + rc = sendto ( ptr->msg.tx_sock, + ptr->msg.tx_buf , + strlen ( ptr->msg.tx_buf), 0, + (struct sockaddr *) &ptr->msg.tx_addr, + sizeof(struct sockaddr_in)); + if ( 0 >= rc ) + { + elog ("%s sendto error (%d:%s) (%s) (%s)\n", + ptr->process, + errno , strerror(errno), + ptr->msg.tx_buf, + inet_ntoa(ptr->msg.tx_addr.sin_addr)); + } + else + { + mlog3 ("%s\n", &ptr->msg.tx_buf[0] ); + rc = PASS ; + } + return (rc); +} + + +#define MAX_COMMANDS (4) + +/****************************************************************** + * + * Handle pmon command request messages + * + * Supports a max MAX_COMMANDS number of queued messages per call + * Checks for valid command string and content before action + * + * message format : json string { key:value , key:value } + * + * supported requests: + * + * { "command":"restart", "process":"" } + * + *****************************************************************/ +void pmon_service_inbox ( void ) +{ + int bytes = 0 ; + int retries = 0 ; + do + { + /* Receive command messages */ + char cmd_buf[MAX_COMMAND_LEN] ; + + memset ( &cmd_buf, 0, sizeof(cmd_buf)); + + bytes = pmon_sock.cmd_sock->read((char*)&cmd_buf, MAX_COMMAND_LEN ); + if ( bytes > 0 ) + { + if ( bytes <= MAX_COMMAND_LEN ) + { + bool string_is_terminated = false ; + for ( int i = 0 ; i < MAX_COMMAND_LEN ; i++ ) + { + if ( cmd_buf[i] == '\0' ) + { + string_is_terminated = true ; + break ; + } + } + if ( string_is_terminated ) + { + string command ; + string process ; + + mlog1 ("rx <- %s\n", cmd_buf ); + int rc1 = jsonUtil_get_key_val ( &cmd_buf[0], "command", command ); + int rc2 = jsonUtil_get_key_val ( &cmd_buf[0], "process", process ); + mlog ("cmd:%s process:%s\n", command.c_str(), process.c_str()); + if ( rc1 || rc2 ) + { + ilog ("failed to parse command request.\n"); + ilog ("... expecting: command:, process:\n"); + wlog ("... received : %s\n", cmd_buf ); + } + else + { + if ( (!command.compare("none")) || (!process.compare("none"))) + { + wlog ("one or more invalid command request key:value pairs\n"); + wlog ("... command:%s process:%s\n", + command.c_str(), + process.c_str()); + } + /* handle start command + * - get the pointer to the specified process + * - if its in the stopped state then take it out of that state + * - inject it into the respawn phase of the passive monitor FSM + */ + else if ( !command.compare("start")) + { + process_config_type * ptr = get_process_config_ptr ( process ); + ilog ("%s process 'start' request\n", process.c_str()); + if ( ptr != NULL ) + { + if ( strcmp ( ptr->mode, "status" ) == 0 ) + { + wlog ("%s process-start rejected\n", process.c_str()); + wlog ("%s ... status monitoring mode 'start' not supported\n", process.c_str()); + } + else if ( ptr->stopped == true ) + { + mtcTimer_reset ( ptr->pt_ptr ); + ptr->failed = true ; // so get_events will ignore it till process respawn is complete + ptr->stopped = false ; // take the process out of the stopped state + ptr->ignore = false ; // have the fsm stop ignoriing the process ; start respawn + passiveStageChange ( ptr, PMON_STAGE__RESPAWN ); + } + else + { + wlog ("%s process is not in the stopped state ; start request ignored\n", ptr->process ); + } + } + } + /* handle stop command + * - get the pointer to the specified process + * - unregister the process to avoid a kernel notification + * - kill the process + * - put it in the ignored state + * - put it in the stopped state + * - reinitialize its active monitoring states and stats + * - start the auto recovery timer + */ + else if ( !command.compare("stop")) + { + process_config_type * ptr = get_process_config_ptr ( process ); + ilog ("%s process 'stop' request\n", process.c_str()); + if ( ptr != NULL ) + { + if ( strcmp ( ptr->mode, "status" ) == 0 ) + { + wlog ("%s process-stop rejected\n", process.c_str()); + wlog ("%s ... status monitoring mode 'stop' not supported\n", process.c_str()); + } + else if ( ptr->stopped == true ) + { + wlog ("%s process is already stopped ; stop request ignored\n", ptr->process ); + } + else + { + int auto_recovery_timeout = MTC_MINS_30 ; + unregister_process ( ptr ); + kill_running_process ( ptr->pid ); + ptr->stopped = true ; + ptr->ignore = true ; + passiveStageChange ( ptr, PMON_STAGE__IGNORE ) ; /* as a backup */ + if ( !strcmp ( ptr->mode, "active" ) ) + { + activeStageChange ( ptr , ACTIVE_STAGE__IDLE ); + } + + mtcTimer_reset ( ptr->pt_ptr ); + /* Start a recovery timer */ + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, auto_recovery_timeout ); + ilog ("%s process 'stopped' by request ; auto restart in %d seconds\n", + ptr->process, + auto_recovery_timeout); + } + } + } + /* handle restart command */ + else if ( !command.compare("restart")) + { + /* handle this 'pmond' process restart request */ + if ( !process.compare("pmond") ) + { + ilog ("%s self-restart ; by request\n", process.c_str()); + /* process is auto restarted by systemd in centos or inittab in WRL */ + daemon_exit (); + } + else + { + process_config_type * ptr = get_process_config_ptr ( process ); + ilog ("%s process 'restart' request\n", process.c_str()); + if ( ptr != NULL ) + { + if ( strcmp ( ptr->mode, "status" ) == 0 ) + { + wlog ("%s process-restart rejected\n", process.c_str()); + wlog ("%s ... status monitoring mode restart not supported\n", process.c_str()); + } + else if ( ptr->restart == false ) + { + ilog ("%s process-restart ; by request\n", process.c_str()); + ptr->restart = true ; + if ( ptr->stopped ) + { + ptr->stopped = false ; + ptr->ignore = false ; + } + passiveStageChange ( ptr, PMON_STAGE__MANAGE ); + } + else + { + ilog ("%s process-restart ; in progress\n", + process.c_str()); + } + } + else + { + wlog ("%s process-restart ; cannot execute, process not found\n", + process.c_str()); + } + } + } + else + { + wlog ("unsupported command:%s for process:%s\n", + command.c_str(), process.c_str()); + } + } /* end else */ + } + else + { + wlog ("badly formed command request (%d) (not null terminated)\n", bytes ); + } + } + else + { + ; /* message to big ; do not log to protect against DOS attack */ + } + } + else if ( bytes < 0 ) + { + if ( errno == EAGAIN ) + { + return ; + } + else + { + wlog ("commnd socket read error (%d:%d:%m)\n", bytes, errno ); + } + } + retries++ ; + } while ( ( bytes != 0 ) && ( retries < MAX_COMMANDS ) ) ; +} + diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/acpid.conf b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/acpid.conf new file mode 100644 index 00000000..7d5740c4 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/acpid.conf @@ -0,0 +1,17 @@ +[process] +process = acpid +service = acpid +pidfile = /var/run/acpid.pid +script = /etc/init.d/acpid +style = lsb ; ocf or lsb +severity = minor ; minor, major, critical +restarts = 3 ; restart retries before error assertion +interval = 5 ; number of seconds to wait between restarts +debounce = 20 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 5 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/ntpd.conf b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/ntpd.conf new file mode 100644 index 00000000..568a89ee --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/ntpd.conf @@ -0,0 +1,20 @@ +[process] +process = ntpd +service = ntpd +pidfile = /var/run/ntp.pid +script = /etc/init.d/ntpd +style = lsb ; ocf or lsb +severity = minor ; minor, major, critical +restarts = 0 ; restart retries before error assertion +interval = 10 ; number of seconds to wait between restarts +debounce = 10 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +; These settings will generate a log only without attempting to restart +; pmond will put the process into an ignore state after failure. + +startuptime = 180 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon new file mode 100644 index 00000000..0b028808 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon @@ -0,0 +1,85 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 95 95 +# +### BEGIN INIT INFO +# Provides: pmon +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: process Monitor daemon +### END INIT INFO + +# echo "7:3:respawn:/usr/local/bin/pmond" >> /etc/inittab + +. /etc/init.d/functions + +DAEMON_NAME="pmond" +DAEMON="/usr/local/bin/${DAEMON_NAME}" +IFACE="" + +if [ ! -e "$DAEMON" ] ; then + logger "$DAEMON is missing" + exit 1 +fi + +RETVAL=0 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +case "$1" in + start) + echo -n "Starting $DAEMON_NAME: " + # Prevent multipe starts + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo "OK" + exit $RETVAL + fi + start-stop-daemon --start -b -x ${DAEMON} -- + RETVAL=$? + if [ $RETVAL -eq 0 ] ; then + echo "OK" + else + echo "FAIL" + fi + ;; + + stop) + echo -n "Stopping ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + killproc ${DAEMON_NAME} + fi + echo "OK" + ;; + + restart) + $0 stop + $0 start + ;; + + status) + pid=`pidof ${DAEMON_NAME}` + RETVAL=$? + if [ ${RETVAL} -eq 0 ] ; then + echo "${DAEMON_NAME} is running" + else + echo "${DAEMON_NAME} is NOT running" + fi + ;; + + condrestart) + [ -f /var/lock/subsys/${DAEMON_NAME} ] && $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit $RETVAL diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-restart b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-restart new file mode 100755 index 00000000..bef8b41f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-restart @@ -0,0 +1,47 @@ +#!/bin/bash + +# Copyright (c) 2013-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# +# This utility is primarily used by no reboot patching for process restart +# +# This script sends a jason string containing the the restart command +# and ${1} as the specified process name to pmond over the loopback +# interface on port 2117 +# +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +PROTOCOL="UDP4-DATAGRAM" +ADDRESS="127.0.0.1" + +socat_exec=`(which socat) 2> /dev/null` + +process=${1} + +if [ -z ${socat_exec} ] ; then + logger "Error: $0 cannot find socat exec" + exit ${NOT_INSTALLED} +fi + +if [ ! -z ${process} ] ; then + # hostwd is not a pmon monitored process + if [ "${process}" == "hostwd" ] ; then + systemctl restart hostw + else + port=$(cat /etc/mtc/pmond.conf | awk '{if ($1 == "pmon_cmd_port") { print $3; }}') + echo "{\"command\":\"restart\", \"process\":\"${process}\"}" | socat - ${PROTOCOL}:${ADDRESS}:${port} + fi +else + logger "Error: $0 called with no process specified" + RETVAL=${INVALID_ARGS} +fi + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-start b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-start new file mode 100755 index 00000000..590d0b5d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-start @@ -0,0 +1,47 @@ +#!/bin/bash + +# Copyright (c) 2013-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# +# This utility is primarily used by no reboot patching for process restart +# +# This script sends a jason string containing the the restart command +# and ${1} as the specified process name to pmond over the loopback +# interface on port 2117 +# +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +PROTOCOL="UDP4-DATAGRAM" +ADDRESS="127.0.0.1" + +socat_exec=`(which socat) 2> /dev/null` + +process=${1} + +if [ -z ${socat_exec} ] ; then + logger "Error: $0 cannot find socat exec" + exit ${NOT_INSTALLED} +fi + +if [ ! -z ${process} ] ; then + # hostwd is not a pmon monitored process + if [ "${process}" == "hostwd" ] ; then + systemctl restart hostw + else + port=$(cat /etc/mtc/pmond.conf | awk '{if ($1 == "pmon_cmd_port") { print $3; }}') + echo "{\"command\":\"start\", \"process\":\"${process}\"}" | socat - ${PROTOCOL}:${ADDRESS}:${port} + fi +else + logger "Error: $0 called with no process specified" + RETVAL=${INVALID_ARGS} +fi + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-stop b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-stop new file mode 100755 index 00000000..a881a35d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-stop @@ -0,0 +1,47 @@ +#!/bin/bash + +# Copyright (c) 2013-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# +# This utility is primarily used by no reboot patching for process restart +# +# This script sends a jason string containing the the restart command +# and ${1} as the specified process name to pmond over the loopback +# interface on port 2117 +# +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +PROTOCOL="UDP4-DATAGRAM" +ADDRESS="127.0.0.1" + +socat_exec=`(which socat) 2> /dev/null` + +process=${1} + +if [ -z ${socat_exec} ] ; then + logger "Error: $0 cannot find socat exec" + exit ${NOT_INSTALLED} +fi + +if [ ! -z ${process} ] ; then + # hostwd is not a pmon monitored process + if [ "${process}" == "hostwd" ] ; then + systemctl restart hostw + else + port=$(cat /etc/mtc/pmond.conf | awk '{if ($1 == "pmon_cmd_port") { print $3; }}') + echo "{\"command\":\"stop\", \"process\":\"${process}\"}" | socat - ${PROTOCOL}:${ADDRESS}:${port} + fi +else + logger "Error: $0 called with no process specified" + RETVAL=${INVALID_ARGS} +fi + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-test.sh b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-test.sh new file mode 100755 index 00000000..1b86a6b5 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon-test.sh @@ -0,0 +1,236 @@ +#!/bin/bash + +# +# Copyright (c) 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# +########################################################################### +# +# This is a pmon test script that is not packaged into the load. +# It is used for feature regression testing. +# +# Test options are +# - restart +# - future .... +# +# +# Restart Test Example: +# +# process operation status [before pid]:[after pid] +# +# controller-0:~# /home/wrsroot/pmon-test.sh restart +# +# acpid restart PASSED [651]:[19095] +# ceilometer-polling restart PASSED [13844]:[22519] +# fsmond restart PASSED [8719]:[26343] +# guestServer restart PASSED [8710]:[29108] +# hbsClient restart PASSED [8729]:[31248] +# host_agent restart PASSED [13840]:[630] +# io-monitor-manager restart PASSED [11398]:[3713] +# libvirtd restart PASSED [3841]:[7323] +# logmgmt restart PASSED [2443]:[10701] +# mtcClient restart PASSED [8735]:[13749] +# mtclogd restart PASSED [8681]:[15771] +# neutron-dhcp-agent restart PASSED [10911]:[23967] +# neutron-metadata-agent restart PASSED [11051]:[27210] +# neutron-sriov-nic-agent restart PASSED [11092]:[30551] +# nova-compute restart PASSED [14776]:[1109] +# ntpd does not support restart +# rmond restart PASSED [13893]:[5129] +# sm-api restart PASSED [8896]:[8460] +# skipping 'sm' process +# sm-eru restart PASSED [8904]:[10993] +# sm-watchdog restart PASSED [8695]:[14621] +# skipping 'sshd' process +# sw-patch-agent restart PASSED [2740]:[17461] +# sw-patch-controller-daemon restart PASSED [2558]:[21336] +# sysinv-agent restart PASSED [2757]:[25128] +# syslog-ng restart PASSED [684]:[28125] +# vswitch does not support restart + +############################################################################ +. /etc/nova/openrc + +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +trap ctrl_c INT + +function ctrl_c(){ +echo "Exiting ..." +exit 0 +} + +DEBUG=false + +function dlog { + if [ ${DEBUG} == true ] ; then + echo "Debug: $1" + fi +} + +# defaults +restarts=1 +debounce=10 +startuptime=40 +factor=10 + +printf "\n" + +# Loop over all the files in pmon.d dir and include pmond itself +FILES=/etc/pmon.d/* +#for file in "pmond" ${FILES} +for file in ${FILES} +do + if [ "${file}" == "pmond" ] ; then + process=${file} + pidfile="/var/run/pmond.pid" + else + restarts=`cat ${file} | grep ^restarts | cut -f2 -d'=' | cut -f2 -d' '` + process=`cat ${file} | grep ^process | cut -f2 -d'=' | cut -f2 -d' '` + fi + + + printf "%-30s %s - " "${process}" "${1}" + + # Avoid testing certain processes + # ceph - pmond does not support + if [ "$process" == "ceph" -o "$process" == "vswitch" ] ; then + echo "${process} ${1} is not supported ... skipping" + continue + fi + + if [ "$process" == "sshd" -a "${TERM}" == "xterm" ] ; then + echo "${process} ${1} not supported in xterm mode ; need to run on console" + continue + fi + + + debounce=`cat ${file} | grep ^debounce | cut -f2 -d'=' | cut -f2 -d' '` + if [ -z "${debounce}" ] ; then + debounce=10 + fi + + startuptime=`cat ${file} | grep ^startuptime | cut -f2 -d'=' | cut -f2 -d' '` + if [ -z "${startuptime}" ] ; then + startuptime=10 + fi + + pidfile=`cat ${file} | grep ^pidfile | cut -f2 -d'=' | cut -f2 -d' '` + + dlog "supports $restarts restarts debounce:$debounce startuptime:$startuptime pidfile:$pidfile" + + if [ -z "$restarts" -o -z "$process" -o -z "$pidfile" -o -z "$debounce" -o -z "$startuptime" ] ; then + printf "FAILED to parse ${file} - $restarts:restarts debounce:$debounce startuptime:$startuptime pidfile:$pidfile" + continue + fi + + if [ "$1" == "restart" ] ; then + if [ ! -f ${pidfile} ] ; then + printf "${1} FAILED ... pifdile missing (${pidfile})\n" + if [ ${process} == "pmond" ] ; then + exit ${GENERIC_ERROR} + else + continue + fi + fi + pid1=`head -1 ${pidfile}` + kill -0 ${pid1} + if [ $? -eq 0 ] ; then + pmon-restart ${process} + else + echo "FAILED - process not Running" + fi + elif [ "$1" == "stop" ] ; then + debounce=1 + startuptime=1 + factor=1 + # Not all processes can be stopped + if [ "$process" == "pmond" -o "$process" == "sm" ] ; then + echo "${process} stop not supported ... skipping" + continue + elif [ "$process" == "hbsClient" -a ! -e "/var/run/.node_locked" ] ; then + echo "${process} refusing to stop of heartbeat client on inservice host" + continue + else + pmon-stop ${process} + fi + elif [ "${1}" == "start" ] ; then + if [ "$process" == "pmond" -o "$process" == "sm" ] ; then + echo "${process} stop not supported ... skipping" + continue + else + factor=1 + pmon-start ${process} + sleep 1 + s=`tail -2 /var/log/pmond.log | grep "$process process is not in the stopped state"` + if [ ! -z "${s}" ] ; then + echo "FAILED not in stopped state" + continue + fi + fi + elif [ "${1}" == "kill" ] ; then + kill -9 ${pid1} + sleep 2 + elif [ "${1}" == "alarm" ] ; then + printf "action not yet supported" + continue + elif [ "${1}" == "critical" ] ; then + printf "action not yet supported" + continue + else + printf "\n\nError:\nInvalid operation '${1}' specified\n" + printf "... must be restart, kill, alarm, or critical\n" + printf "\n" + exit ${UNSUPPORTED_FEATURE} + fi + + sleeptime=$((debounce + startuptime + factor)) + sleep $sleeptime + if [ "$1" == "stop" ] ; then + if [ -e "${pidfile}" ] ; then + # some processes are auto restarted by systemd + if [ "${process}" != "mtcClient" -a "${process}" != "syslog-ng" ] ; then + echo "FAILED - pidfile still present" + fi + else + kill -0 ${pid1} 2> /dev/null + if [ $? -eq 0 ] ; then + echo "FAILED - process is still running ($pid1)" + else + pid2=`/usr/sbin/pidof ${process}` + if [ -z ${pid2} ] ; then + echo "PASSED - process is stopped" + else + echo "FAILED - process is running again ($pid2)" + fi + fi + fi + else + pid2=`head -1 ${pidfile}` + if [ "$pid1" != "$pid2" ] ; then + kill -0 ${pid2} + if [ $? -eq 0 ] ; then + sleep 10 + pid3=`head -1 ${pidfile}` + if [ "$pid2" != "$pid3" ] ; then + echo "FAILED - pid changed" + else + printf "PASSED [%5d]:[%5d]\n" "${pid1}" "${pid2}" + fi + else + echo "FAILED - no process" + fi + else + echo "FAILED - process not stopped" + fi + fi +done +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon.logrotate b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon.logrotate new file mode 100755 index 00000000..ea151b26 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon.logrotate @@ -0,0 +1,16 @@ +#daily +nodateext + +/var/log/pmond.log +{ + nodateext + size 10M + start 1 + missingok + rotate 20 + compress + sharedscripts + postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true + endscript +} diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon.service b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon.service new file mode 100644 index 00000000..8cad6359 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmon.service @@ -0,0 +1,24 @@ +[Unit] +Description=Titanium Cloud Maintenance Process Monitor +After=config.service +# The following thirdparty service files are not modified by Titanium Cloud, +# so add "After" clauses here rather than "Before=pmon.service" to those +After=sshd.service ntpd.service acpid.service syslog-ng.service +Before=hostw.service + +[Service] +Type=forking +ExecStart=/etc/rc.d/init.d/pmon start +ExecStop=/etc/rc.d/init.d/pmon stop +ExecReload=/etc/rc.d/init.d/pmon reload +PIDFile=/var/run/pmond.pid +KillMode=process + +# Failure handling +TimeoutStartSec=10s +TimeoutStopSec=10s +Restart=always +RestartSec=2 + +[Install] +WantedBy=multi-user.target diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmond.conf b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmond.conf new file mode 100755 index 00000000..7a3afff1 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/pmond.conf @@ -0,0 +1,38 @@ +; CGTS Process Monitor Configuration File +[config] ; Configuration +audit_period = 500 ; Period in milliseconds (50 > 999) + ; Anything outside this range turns + ; off the process monitor pulse events + ; +hostwd_update_period = 30 ; Period in seconds between update messages from + ; PMON to host watchdog + ; +pmon_cmd_port = 2117 ; Receive Command port +pmon_event_port = 2101 ; Transmit Event Port +pmon_pulse_port = 2109 ; I'm Alive pulse port +pmon_amon_port = 2200 ; Active Process Monitor Receive Port + +daemon_log_port = 2121 ; daemon logger port + +[defaults] + +[timeouts] +start_delay = 30 ; secs after goenable before monitoring + ; managed range 1 .. 120 seconds + +[features] + +[debug] ; SIGHUP to reload +debug_timer = 0 ; enable(1) or disable(0) timer logs (tlog) +debug_msg = 0 ; enable(1) or disable(0) message logs (mlog) +debug_state = 0 ; enable(1) or disable(0) state change logs (clog) +debug_level = 0 ; decimal mask 0..15 (8,4,2,1) +debug_all = 0 ; set all debug labels to the specified value + +flush = 1 ; enable(1) or disable(0) force log flush (main loop) +flush_thld = 5 ; if enabled - force flush after this number of loops + +debug_event = none ; Not used +debug_filter = none ; Not used +stress_test = 0 ; In-Service Stress test number +fit_code = 0 ; codes are in nodeBase.h -> fit_code_enum diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/sshd.conf b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/sshd.conf new file mode 100644 index 00000000..80c507a7 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/sshd.conf @@ -0,0 +1,17 @@ +[process] +process = sshd +service = sshd +pidfile = /var/run/sshd.pid +script = /etc/init.d/sshd +style = lsb ; ocf or lsb +severity = minor ; minor, major, critical +restarts = 10 ; restart retries before error assertion +interval = 5 ; number of seconds to wait between restarts +debounce = 20 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 5 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/syslog-ng.conf b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/syslog-ng.conf new file mode 100644 index 00000000..5bb48129 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/syslog-ng.conf @@ -0,0 +1,17 @@ +[process] +process = syslog-ng +service = syslog-ng +pidfile = /var/run/syslog-ng/syslog-ng.pid +script = /etc/init.d/syslog +style = lsb ; ocf or lsb +severity = minor ; minor, major, critical +restarts = 2 ; restart retries before error assertion +interval = 2 ; number of seconds to wait between restarts +debounce = 20 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 5 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring diff --git a/mtce-common/cgts-mtce-common-1.0/public/Makefile b/mtce-common/cgts-mtce-common-1.0/public/Makefile new file mode 100644 index 00000000..2e020030 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/public/Makefile @@ -0,0 +1,51 @@ +# +# Copyright (c) 2014-2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = amon.c +OBJS = $(SRCS:.c=.o) +INCLUDES = -I. +LDFLAGS = -shared +CFLAGS = -fPIC -g -O2 -Wall -Wextra -Werror +TARGET_LIB = libamon.so +lib: build +ifeq (,$(shell which ${CC})) +CC=gcc +endif + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +.c.o: + $(CC) $(INCLUDES) $(CFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +build: static_analysis ${TARGET_LIB} + +${TARGET_LIB}: ${TARGET_LIB}.${VER_MJR} + ln -sf $^ $@ + +${TARGET_LIB}.${VER_MJR}: ${TARGET_LIB}.${VER} + ln -sf $^ $@ + +${TARGET_LIB}.${VER}: $(OBJS) + $(CC) ${LDFLAGS} -Wl,-soname,${TARGET_LIB}.${VER_MJR} -o $@ $^ + +$(SRCS:.c=.d):%.d:%.c + $(CC) $(CFLAGS) -MM $< >$@ + +include $(SRCS:.c=.d) + +clean: + rm -f *.o + rm -f *.so + rm -f *.so.* + rm -f *.d diff --git a/mtce-common/cgts-mtce-common-1.0/public/amon.c b/mtce-common/cgts-mtce-common-1.0/public/amon.c new file mode 100644 index 00000000..81e56333 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/public/amon.c @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River Active (Process) Monitoring Implementation. + * See amon.h for API. + * + **/ + +#include "amon.h" +#include + +/* Pass code */ +#ifndef PASS +#define PASS (0) +#endif + +/* Fail Code */ +#ifndef FAIL +#define FAIL (1) +#endif + +/* Retry Code */ +#ifndef RETRY +#define RETRY (2) +#endif + +/* maximum string and socket endpoint path length */ +#define AMON_MAX_LEN (100) + +/* initialization signature to gate functional + * api calls made prior to initialization */ +#define INIT_SIG (0xdeafdead) + +#define TXPORT 2200 + +/** Control Structure */ +typedef struct +{ + unsigned int init ; /**< Init signature */ + + int rx_sock ; /**< inet pulse request rx socket */ + int rx_port ; /**< inet pulse request rx port number */ + struct sockaddr_in rx_addr ; /**< inet pulse request rx attributes */ + char rx_buf[AMON_MAX_LEN] ; /**< pulse request message */ + + int tx_sock ; /**< inet pulse response tx socket */ + int tx_port ; /**< inet pulse response tx port number */ + struct sockaddr_in tx_addr ; /**< inet pulse response tx attributes */ + char tx_buf[AMON_MAX_LEN] ; /**< pulse response message */ + + char name[AMON_MAX_LEN] ; /**< name of process using this instance */ + + bool debug_mode ; /**< debug mode if true */ + int fit_code ; /**< fit code MAGIC, SEQ, PROCESS */ +} active_mon_socket_type ; + +/* Instance Control Structure - Per Process Private Data */ +static active_mon_socket_type amon ; + +/* open unix domain socket */ +int active_monitor_initialize ( const char * process_name_ptr, int port ) +{ + struct stat p ; + int val = 1 ; + memset ( &amon, 0, sizeof(amon)); + memset ( &p, 0 , sizeof(struct stat)); + + syslog ( LOG_INFO , "%s is actively Monitored over lo:%d:%x\n", process_name_ptr, port, port ); + + sprintf ( amon.name, "/var/run/%s.debug", process_name_ptr ); + + stat ( amon.name, &p ) ; + if ((p.st_ino != 0 ) && (p.st_dev != 0)) + { + amon.debug_mode = true ; + syslog ( LOG_INFO, "Enabling Active Monitor Debug Mode\n"); + if ( p.st_size ) + { + FILE * filename = fopen ( amon.name, "rb" ) ; + if ( filename != NULL ) + { + memset ( &amon.name, 0, AMON_MAX_LEN); + if ( fgets ( amon.name, 20, filename ) != NULL ) + { + if ( !strncmp ( amon.name, FIT_MAGIC_STRING, strlen (FIT_MAGIC_STRING))) + { + amon.fit_code = FIT_MAGIC ; + syslog ( LOG_INFO, "Enabling FIT on 'magic calculation'\n"); + } + else if ( !strncmp ( amon.name, FIT_SEQUENCE_STRING, strlen(FIT_SEQUENCE_STRING))) + { + amon.fit_code = FIT_SEQ ; + syslog ( LOG_INFO, "Enabling FIT on 'sequence number'\n"); + } + else if ( !strncmp ( amon.name, FIT_PROCESS_STRING, strlen(FIT_PROCESS_STRING))) + { + amon.fit_code = FIT_PROCESS ; + syslog ( LOG_INFO, "Enabling FIT on 'process name'\n"); + } + else + { + syslog ( LOG_INFO, "Unsupported FIT string (%s)\n", amon.name ); + } + } + fclose (filename); + } + else + { + syslog ( LOG_INFO, "Failed to open %s\n", amon.name); + } + } + } + /* Init the control struct - includes all members */ + memset ( amon.name, 0, AMON_MAX_LEN); + + if ( process_name_ptr ) + { + memcpy ( amon.name, process_name_ptr, strlen (process_name_ptr)) ; + } + else + { + syslog ( LOG_INFO, "active_monitor_initialize called with null process name"); + return (-ESRCH); + } + + /*******************************************************/ + /* Create and Setup Inet Receive Socket */ + /*******************************************************/ + amon.rx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if ( 0 >= amon.rx_sock ) + return (-errno); + + if ( setsockopt ( amon.rx_sock , SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)) == -1 ) + { + syslog ( LOG_INFO, "%s failed to set socket as re-useable (%d:%s)\n", + process_name_ptr, errno, strerror(errno)); + } + + /* Setup with localhost ip */ + memset(&amon.rx_addr, 0, sizeof(struct sockaddr_in)); + amon.rx_addr.sin_family = AF_INET ; + amon.rx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + amon.rx_addr.sin_port = htons(port) ; + amon.rx_port = port ; + + /* bind socket to the receive addr */ + if ( bind ( amon.rx_sock, (const struct sockaddr *)&amon.rx_addr, sizeof(struct sockaddr_in)) == -1 ) + { + syslog ( LOG_ERR, "failed to bind to rx socket with port %d\n", port ); + close (amon.rx_sock); + amon.rx_sock = 0 ; + return (-errno); + } + + /*******************************************************/ + /* Create and Setup Inet Transmit Socket */ + /*******************************************************/ + amon.tx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if ( 0 >= amon.tx_sock ) + return (-errno); + + /* Setup with localhost ip */ + memset(&amon.tx_addr, 0, sizeof(struct sockaddr_in)); + amon.tx_addr.sin_family = AF_INET ; + amon.tx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + amon.tx_addr.sin_port = htons(TXPORT) ; + amon.tx_port = TXPORT ; + + /*******************************************************************/ + + /* Set init sig */ + amon.init = INIT_SIG ; + + /* Return the socket descriptor */ + return (amon.rx_sock); +} + +/* */ +int active_monitor_get_sel_obj ( void ) +{ + if (( amon.init != INIT_SIG ) || ( amon.rx_sock <= 0 )) + { + syslog (LOG_WARNING , "'%s' called with invalid init (sock:%d)\n", + __FUNCTION__, amon.rx_sock); + } + + return (amon.rx_sock); +} + +/* Receive pulse request */ +int active_monitor_dispatch ( void ) +{ + int msg_cnt = 0 ; + int rc = RETRY ; + socklen_t len = sizeof(struct sockaddr_un); + + if ( amon.init != INIT_SIG ) + { + syslog (LOG_WARNING , "'%s' called with invalid init\n", __FUNCTION__ ); + return (-EPERM); + } + + do + { + memset ( amon.rx_buf, 0 , AMON_MAX_LEN ); + rc = recvfrom ( amon.rx_sock, amon.rx_buf, AMON_MAX_LEN, 0, + (struct sockaddr *)&amon.rx_addr, &len); + if ( rc == -1 ) + { + if (( errno != EINTR ) && ( errno != EAGAIN )) + { + syslog ( LOG_WARNING, "amon:%s 'recvfrom' %s.%d failed (%d:%s)\n", + __FUNCTION__, inet_ntoa(amon.tx_addr.sin_addr), amon.rx_port, + errno, strerror(errno)); + rc = -errno ; + } + else + { + rc = -EAGAIN ; + } + } + + /* Otherwise we got a message */ + else + { + /* Small Song and Dance + * + * Invert magic number and maintain the sequence number + * + **/ + char str[AMON_MAX_LEN] ; + unsigned int magic = 0 ; + int seq ; + + memset (str, 0, AMON_MAX_LEN ); + sscanf ( amon.rx_buf, "%s %8x %d", str, &magic, &seq ); + + /* Fault Insertion Controls */ + if ( amon.fit_code == FIT_PROCESS ) + { + str[0] = 'x' ; + str[1] = 'x' ; + } + if ( amon.fit_code == FIT_SEQ ) + { + seq-- ; + } + if ( amon.fit_code != FIT_MAGIC ) + { + magic = magic ^ -1 ; + } + + memset ( amon.tx_buf, 0 , AMON_MAX_LEN ); + sprintf( amon.tx_buf, "%s %8x %d%c", str, magic, seq, '\0' ); + + if ( strcmp ( str, amon.name ) ) + { + syslog ( LOG_ERR, "recv message for wrong process: %s:%s (%d)\n", str, amon.name, seq ); + } + else + { + if ( amon.debug_mode ) + { + syslog ( LOG_INFO, "recv: %s (%d)\n", amon.rx_buf, seq ); + syslog ( LOG_INFO, "send: %s (%d)\n", amon.tx_buf, seq ); + } + rc = sendto ( amon.tx_sock, &amon.tx_buf[0], strlen(amon.tx_buf), 0, + (struct sockaddr *) &amon.tx_addr, sizeof(struct sockaddr_in)); + if ( rc == -1 ) + { + syslog ( LOG_WARNING, "amon:%s 'sendto' %s.%d failed (%d:%s)\n", + __FUNCTION__, inet_ntoa(amon.tx_addr.sin_addr), TXPORT, errno, strerror(errno)); + syslog ( LOG_WARNING, "amon:%s:%s -> %s\n", + __FUNCTION__, amon.rx_buf, amon.tx_buf ); + rc = -errno ; + } + else + { + rc = PASS ; + } + } + } + } while (( rc == RETRY ) && ( msg_cnt < 5 )) ; + return (rc); +} + +void active_monitor_finalize ( void ) +{ + if ( amon.tx_sock ) + { + close (amon.tx_sock); + } + if ( amon.rx_sock ) + { + close (amon.rx_sock); + } +} diff --git a/mtce-common/cgts-mtce-common-1.0/public/amon.h b/mtce-common/cgts-mtce-common-1.0/public/amon.h new file mode 100644 index 00000000..e5becec0 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/public/amon.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2014, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Active Process Monitor Library Header + */ + +#include /* for ... snprintf */ +#include /* for ... unlink, close and usleep */ +#include /* for ... socket */ +#include /* for ... domain socket type */ +#include /* for ... inet socket type */ +#include /* for ... inet_addr, inet_ntoa macro */ +#include /* for ... syslog */ +#include /* for ... EINTR, errno, strerror */ +#include /* for ... true and false */ +#include /* for ... file stat */ + + +/** + * @addtogroup active_monitor_library + * @{ + * + * This is a convenience module in support of actively monitoring process + * health within a carrier grade processing environment. + * + * Packaged as a shared library that processes can link to. + * + * This module provides four simple interfaces to that provide the following general functions + * + * - open an abstract socket interface for active monitoring messaging + * - return the socket file descriptor for event driven selection + * - service events on socket + * - close the socket when done + * + * *Interfaces including work flow are* + * + * Init: + * + * active_monitor_initialize ( "hbsClient" , port ); + * + * Setup event driven handling: + * + * int active_monitor_socket = active_monitor_get_sel_obj(); + * FD_SET( active_monitor_socket, &readfds); + * + * Main loop: + * + * if ( FD_ISSET(active_monitor_socket, &readfds)) + * active_monitor_dispatch (); + * + * Exit: + * + * active_monitor_finalize (); + * + */ + +/** Initialize the library and open the messaging socket(s). + * + * Creates socket and binds to named endpoint. + * + * Prints status or errors to syslog. + * + * @param process_name_ptr - char pointer to string containing monitored process name + * @param port - integer specifying the port number this process is listening on + * + * @returns The socket file descriptor on success or negative version of + * standard Linux error numbers (errno) codes from socket(2) or bind(2) + * + **/ +int active_monitor_initialize ( const char * process_name_ptr, int port ); + +/** Supplies the messaging socket file descriptor. + * + * @returns The created socket file descriptor for event driven select + * or zero if initialize was not called of there was error creating + * the socket. + * + **/ +int active_monitor_get_sel_obj ( void ); + +/** The work horse of this library. + * + * This interface services the receive, implements the sanity algorithm + * on the receive message and sends back a sane response. + * + * @returns Zero on success or any standard connect(2), + * sendto(2) or recvfrom(2) error codes as well as + * + * - EPERM : if called prior to initialize. + * - EAGAIN : if no message to receive. + * + * */ +int active_monitor_dispatch ( void ); + +/** Close the socket */ +void active_monitor_finalize ( void ); + +/** Debug mode is enabled if the following file is found during initialize + * + * /var/run/.debug + * + * Failt Insertion Mode is enabled if the first word of line one + * of this file contains one of the following words + * + * sequence - corrupt the sequence number returned + * magic - corrupt the magic number returned + * process - corrupt the process name returned + * + */ + +/** FAult Insertion Mode Strings */ +#define FIT_MAGIC_STRING "magic" +#define FIT_SEQUENCE_STRING "sequence" +#define FIT_PROCESS_STRING "process" + +/** Fault Insertion Codes */ +#define FIT_NONE 0 +#define FIT_MAGIC 1 +#define FIT_SEQ 2 +#define FIT_PROCESS 3 + +/** + * @} active_monitor_library + */ diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/Makefile b/mtce-common/cgts-mtce-common-1.0/rmon/Makefile new file mode 100755 index 00000000..fb46a7ba --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/Makefile @@ -0,0 +1,40 @@ +# +# Copyright (c) 2014-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = rmonInit.cpp rmonMsg.cpp rmonHdlr.cpp rmonHttp.cpp interfaceHdlr.cpp rmonFM.cpp remoteloggingHdlr.cpp thinmetaHdlr.cpp +OBJS = $(SRCS:.cpp=.o) +LDLIBS = -lstdc++ -ldaemon -lcommon -lfmcommon -lrmonapi -lrt -levent -lcrypto -luuid -lpthread -ljson-c +INCLUDES = -I../daemon -I../common -IrmonApi/ -I. -I.. +CCFLAGS = -g -O2 -Wall -Werror -Wextra + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +all: clean static_analysis common daemon build + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +build: clean static_analysis $(OBJS) + $(CXX) $(CCFLAGS) $(OBJS) -L../daemon -L../common -LrmonApi/ $(LDLIBS) -o rmond + +lib: build $(OBJS) + +common: + ( cd ../common ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +daemon: + ( cd ../daemon ; make lib VER=$(VER) VER_MJR=$(VER_MJR)) + +clean: + @rm -f $(OBJ) rmond *.o *.a diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/interfaceHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/rmon/interfaceHdlr.cpp new file mode 100644 index 00000000..57f9494d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/interfaceHdlr.cpp @@ -0,0 +1,1183 @@ +/* + * Copyright (c) 2013-2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGCS Platform Resource Monitor Interface Handler + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "rmon.h" /* rmon header file */ +#include +#include "nlEvent.h" /* for ... get_netlink_events */ +#include +#include + +/* Used to set interface alarms through the FM API */ +static SFmAlarmDataT alarmData; + +/* Used to set port alarms through the FM API */ +static SFmAlarmDataT alarmDataPOne; + +/* Used to set port alarms through the FM API */ +static SFmAlarmDataT alarmDataPTwo; + +const char rmonStages_str [RMON_STAGE__STAGES][32] = +{ + "Handler-Init", + "Handler-Start", + "Manage-Restart", + "Monitor-Wait", + "Monitor-Resource", + "Restart-Wait", + "Ignore-Resource", + "Handler-Finish", + "Failed-Resource", + "Failed-Resource-clr", +} ; + +/***************************************************************************** + * + * Name : interfaceResourceStageChange + * + * Purpose : Put an interface resource in the requested stage for use by the + * interface resource handler + * + *******************************************} + **********************************/ +int interfaceResourceStageChange ( interface_resource_config_type * ptr , rmonStage_enum newStage ) +{ + if (( newStage < RMON_STAGE__STAGES ) && + ( ptr->stage < RMON_STAGE__STAGES )) + { + clog ("%s %s -> %s (%d->%d)\n", + ptr->resource, + rmonStages_str[ptr->stage], + rmonStages_str[newStage], + ptr->stage, newStage); + ptr->stage = newStage ; + return (PASS); + } + else + { + slog ("%s Invalid Stage (now:%d new:%d)\n", + ptr->resource, ptr->stage, newStage ); + ptr->stage = RMON_STAGE__FINISH ; + return (FAIL); + } +} + +/************************************************************ + * + * Name : get_iflink_interface + * + * Purpose : get the ifname of the linked parent interface + ***********************************************************/ +string get_iflink_interface (string ifname) +{ + + string iflink_file = INTERFACES_DIR + ifname + "/iflink"; + + ifstream finIflink ( iflink_file.c_str() ); + string iflink_line; + string ret = ""; + char iface_buffer [INTERFACE_NAME_LEN] = ""; + int iflink = -1; + + if (finIflink.is_open()) + { + + while ( getline (finIflink, iflink_line) ) { + iflink = atoi(iflink_line.c_str()); + } + finIflink.close(); + + if_indextoname (iflink, iface_buffer); + + if (iface_buffer[0] != '\0') + ret = iface_buffer; + + } + return ret; +} + + +/***************************************************************************** + * + * Name : init_physical_interfaces + * + * Purpose : Map an interface (mgmt, oam or infra) to a physical port + * + *****************************************************************************/ +void init_physical_interfaces ( interface_resource_config_type * ptr ) +{ + FILE * pFile; + char line_buf[50]; + string str; + string physical_interface = ""; + enum interface_type { single, vlan, bond }; + interface_type ifaceType; // default assumption + + memset(ptr->interface_one, 0, sizeof(ptr->interface_one)); + memset(ptr->interface_two, 0, sizeof(ptr->interface_two)); + memset(ptr->bond, 0, sizeof(ptr->bond)); + + if ( strcmp(ptr->resource, MGMT_INTERFACE_NAME) == 0 ) + { + str = MGMT_INTERFACE_FULLNAME; + } + else if ( strcmp(ptr->resource, INFRA_INTERFACE_NAME) == 0 ) + { + str = INFRA_INTERFACE_FULLNAME; + } + else if ( strcmp(ptr->resource, OAM_INTERFACE_NAME) == 0 ) + { + str = OAM_INTERFACE_FULLNAME; + } + + + pFile = fopen (PLATFORM_DIR , "r"); + /* get the physical interface */ + if (pFile != NULL) + { + ifstream fin( PLATFORM_DIR ); + string line; + while ( getline( fin, line ) ) + { + + if ( line.find(str) != string::npos ) + { + stringstream ss( line ); + getline( ss, physical_interface, '=' ); // token = string before = + getline( ss, physical_interface, '=' ); // token = string after = + + // determine the interface type + string uevent_interface_file = INTERFACES_DIR + + physical_interface + "/uevent"; + ifstream finUevent( uevent_interface_file.c_str() ); + + // if we cannot locate this file then instead of disabling + // Kernel interface monitoring all together, we will use + // use the interface naming convention to do a best effort + // estimate of the interface type... the show must go on! + if (!finUevent) { + elog ("Cannot find uevent interface file (%s) to " + "resolve interface type for resource %s. " + "Disabling monitoring\n" , + uevent_interface_file.c_str(), ptr->resource); + + ptr->interface_used = false; + fclose(pFile); + return; + } + else { // proceed with uevent method + string line; + ifaceType = single; + while( getline( finUevent, line ) ) + { + if ( line.find ("DEVTYPE") == 0 ) + { + if ( line.find ("=vlan") != string::npos ) + ifaceType = vlan; + else if ( line.find ("=bond") != string::npos ) + ifaceType = bond; + break; + } + } + } + + switch (ifaceType) { + case single: + memcpy(ptr->interface_one, + physical_interface.c_str(), + physical_interface.size()); + ilog("Interface : %s : %s \n", + ptr->interface_one, ptr->resource ); + break; + case bond: + memcpy(ptr->bond, + physical_interface.c_str(), + physical_interface.size()); + ilog("Bond Interface : %s : %s \n", + ptr->bond, ptr->resource ); + break; + case vlan: + ilog("VLAN Interface : %s : %s \n", + physical_interface.c_str(), ptr->resource); + // if it is a VLAN interface, we need + // to determine its parent interface, + // which may be a single interface or + // a bonded interface + string parent_interface = get_iflink_interface(physical_interface); + if (!parent_interface.empty()) { + + dlog ("Parent interface for VLAN : %s\n", + parent_interface.c_str()); + + physical_interface = parent_interface; + + string uevent_parent_file = INTERFACES_DIR + + parent_interface + "/uevent"; + + ifstream finUevent2( uevent_parent_file.c_str() ); + string line; + bool bond_configured = false; + while( getline( finUevent2, line ) ) + { + // if this uevent does not have a DEVTYPE + // then its a single interface. If this + // does have a DEVTYPE then check explicity + // for bond. Since we don't allow vlan over + // vlan, for all other DEVTYPEs, assume + // this is a single interface. + if ( (line.find ("DEVTYPE") == 0) && + (line.find ("=bond") != string::npos) ) { + + ilog ("Parent interface of VLAN interface " + "resolved as Bond\n"); + bond_configured = true; + break; + } + } + if (!bond_configured) { + memcpy(ptr->interface_one, + parent_interface.c_str(), + parent_interface.size()); + } + } + break; + } // end of switch + break; + } + } + fclose(pFile); + } + + /* Lagged interface */ + if ((ptr->interface_one[0] == '\0') && (!physical_interface.empty())) + { + + string lagged_interface_file = INTERFACES_DIR + + physical_interface + "/bonding/slaves"; + + ifstream finTwo( lagged_interface_file.c_str() ); + if (!finTwo) { + elog ("Cannot find bond interface file (%s) to " + "resolve slave interfaces\n", lagged_interface_file.c_str()); + } + else { + string line; + while( getline( finTwo, line ) ) + { + strcpy(line_buf, line.c_str()); + // the slave interfaces are listed as enXYYY enXYYY... + // starting with the primary. Read all other slaves + // as interface_two + sscanf(line_buf, "%19s %19s", ptr->interface_one, ptr->interface_two); + ilog("%s interface: %s, interface two: %s \n", ptr->resource, + ptr->interface_one, ptr->interface_two); + break; + } + } + } + + if ( ptr->interface_one[0] == '\0' ) + { + ptr->interface_used = false; + } + else + { + ptr->interface_used = true; + if ( ptr->interface_two[0] == '\0' ) + { + /* this is not a lagged interface */ + ptr->lagged = false; + } else { + /* this is a lagged interface */ + ptr->lagged = true; + } + } +} + +/***************************************************************************** + * + * Name : service_resource_state + * + * Purpose : Set the interface resource in the correct state for the interface + * resource handler + * + *****************************************************************************/ +void service_resource_state ( interface_resource_config_type * ptr ) +{ + + if (ptr->lagged == true) + { + /* the lagged interface is initialized */ + if ((ptr->resource_value == INTERFACE_UP) && (ptr->resource_value_lagged == INTERFACE_UP) && + (ptr->failed == true )) + { + /* If both interfaces are up and there is a fault, it needs to be cleared */ + ptr->sev = SEVERITY_CLEARED; + interfaceResourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + else if ((((ptr->resource_value == INTERFACE_UP) && (ptr->resource_value_lagged == INTERFACE_DOWN)) || + ((ptr->resource_value_lagged == INTERFACE_UP) && (ptr->resource_value == INTERFACE_DOWN))) && + (ptr->sev != SEVERITY_MAJOR)) + { + /* if one interface failed its a major condition */ + + if (ptr->sev == SEVERITY_CRITICAL) + { + /* need to clear port alarm but not interface alarm */ + interfaceResourceStageChange ( ptr, RMON_STAGE__FAILED_CLR ); + } + else + { + interfaceResourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + + ptr->failed = true; + ptr->sev = SEVERITY_MAJOR; + } + else if (((ptr->resource_value == INTERFACE_DOWN) && (ptr->resource_value_lagged == INTERFACE_DOWN)) && + (ptr->sev != SEVERITY_CRITICAL)) + { + /* both lagged interfaces failed, this is a critical condition */ + ptr->failed = true; + ptr->sev = SEVERITY_CRITICAL ; + interfaceResourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + } + else + { + /* interface is not lagged */ + if ( (ptr->resource_value == INTERFACE_DOWN) && (ptr->sev != SEVERITY_CRITICAL) ) + { + /* the interface has failed */ + ptr->failed = true; + ptr->sev = SEVERITY_CRITICAL ; + interfaceResourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + else if ((ptr->resource_value == INTERFACE_UP) && (ptr->failed == true )) + { + /* If the interface is up and there is a fault, it needs to be cleared */ + ptr->sev = SEVERITY_CLEARED; + interfaceResourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + } +} + +/***************************************************************************** + * + * Name : get_link_state + * + * Purpose : Check to see if the current interface link is up or down + * + *****************************************************************************/ +int get_link_state ( int ioctl_socket, char iface[20], bool * running_ptr ) +{ + int get_link_state_throttle = 0 ; + struct ifreq if_data; + int rc = FAIL ; + + if (iface[0] == '\0') + { + elog ("Null interface name\n"); + return ( rc ) ; + } + + memset( &if_data, 0, sizeof(if_data) ); + sprintf( if_data.ifr_name, "%s", iface ); + if( 0 <= ioctl( ioctl_socket, SIOCGIFFLAGS, &if_data ) ) + { + if( if_data.ifr_flags & IFF_RUNNING ) + { + *running_ptr = true; + } + else + { + *running_ptr = false; + } + + /* reset log flood gate counter */ + get_link_state_throttle = 0 ; + + rc = PASS ; + } + else + { + wlog_throttled (get_link_state_throttle, 100, + "Failed to get %s (%s) interface state (%d:%s)\n", + iface, if_data.ifr_name, errno, strerror(errno)); + } + return ( rc ); +} + +/***************************************************************************** + * + * Name : service_interface_events + * + * Purpose : Service state changes for monitored interfaces + * + *****************************************************************************/ +int service_interface_events ( int nl_socket , int ioctl_socket ) +{ + list links_gone_down ; + list links_gone_up ; + list::iterator iter_curr_ptr ; + rmon_ctrl_type * _rmon_ctrl_ptr; + interface_resource_config_type * ptr; + + _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); + if ( get_netlink_events ( nl_socket, links_gone_down, links_gone_up ) ) + { + for (int i=0; i<_rmon_ctrl_ptr->interface_resources; i++) + { + ptr = get_interface_ptr(i); + if ( ptr->interface_used == true ) + { + + bool running = false ; + + if ( !links_gone_down.empty() ) + { + /* Look at the down list */ + for ( iter_curr_ptr = links_gone_down.begin(); + iter_curr_ptr != links_gone_down.end() ; + iter_curr_ptr++ ) + { + if ( strcmp ( ptr->interface_one, iter_curr_ptr->c_str()) == 0 ) + { + wlog ("link %s is down\n", ptr->interface_one ); + + if ( get_link_state ( ioctl_socket, iter_curr_ptr->c_str(), &running ) == PASS ) + { + wlog ("%s is down (oper:%s)\n", + iter_curr_ptr->c_str(), + running ? "up" : "down" ); + if (!running) + { + ptr->resource_value = INTERFACE_DOWN; + } + else + { + ptr->resource_value = INTERFACE_UP; + } + } + else + { + wlog ("%s is down (driver query failed)\n", iter_curr_ptr->c_str() ); + ptr->resource_value = INTERFACE_DOWN; + } + } + + if (ptr->lagged == true) + { + if ( strcmp ( ptr->interface_two, iter_curr_ptr->c_str()) == 0 ) + { + wlog ("link %s is down\n", ptr->interface_two); + + if ( get_link_state ( ioctl_socket, iter_curr_ptr->c_str(), &running ) == PASS ) + { + wlog ("%s is down (oper:%s)\n", + iter_curr_ptr->c_str(), + running ? "up" : "down" ); + if (!running) + { + ptr->resource_value_lagged = INTERFACE_DOWN; + } + else + { + ptr->resource_value_lagged = INTERFACE_UP; + } + } + else + { + wlog ("%s is down (driver query failed)\n", iter_curr_ptr->c_str() ); + ptr->resource_value_lagged = INTERFACE_DOWN; + } + } + if ( strcmp ( ptr->bond, iter_curr_ptr->c_str()) == 0 ) + { + wlog ("bond: %s is down\n", ptr->bond); + //ptr->resource_value_lagged = INTERFACE_DOWN; + //ptr->resource_value = INTERFACE_DOWN; + } + } + } + } + if ( !links_gone_up.empty() ) + { + //wlog ("one or more links have dropped\n"); + /* Look at the down list */ + for ( iter_curr_ptr = links_gone_up.begin(); + iter_curr_ptr != links_gone_up.end() ; + iter_curr_ptr++ ) + { + + if ( strcmp ( ptr->interface_one, iter_curr_ptr->c_str()) == 0 ) + { + wlog ("link %s is up\n", ptr->interface_one ); + + if ( get_link_state ( ioctl_socket, iter_curr_ptr->c_str(), &running ) == PASS ) + { + wlog ("%s is up (oper:%s)\n", + iter_curr_ptr->c_str(), + running ? "up" : "down" ); + if (!running) + { + ptr->resource_value = INTERFACE_DOWN; + } + else + { + ptr->resource_value = INTERFACE_UP; + } + } + else + { + wlog ("%s is down(driver query failed)\n", iter_curr_ptr->c_str() ); + ptr->resource_value = INTERFACE_DOWN; + } + } + if (ptr->lagged == true) + { + if ( strcmp ( ptr->interface_two, iter_curr_ptr->c_str()) == 0 ) + { + wlog ("link %s is up\n", ptr->interface_two ); + + if ( get_link_state ( ioctl_socket, iter_curr_ptr->c_str(), &running ) == PASS ) + { + wlog ("%s is up (oper:%s)\n", + iter_curr_ptr->c_str(), + running ? "up" : "down" ); + if (!running) + { + ptr->resource_value_lagged = INTERFACE_DOWN; + } + else + { + ptr->resource_value_lagged = INTERFACE_UP; + } + } + else + { + wlog ("%s is down (driver query failed)\n", iter_curr_ptr->c_str() ); + ptr->resource_value_lagged = INTERFACE_DOWN; + } + } + if ( strcmp ( ptr->bond, iter_curr_ptr->c_str()) == 0 ) + { + wlog ("bond: %s is up\n", ptr->bond); + //ptr->resource_value_lagged = INTERFACE_UP; + //ptr->resource_value = INTERFACE_UP; + } + } + } + } + /* set the states for the interface handler */ + service_resource_state( ptr ); + } + } + } + + return (PASS); +} + +/***************************************************************************** + * + * Name : interface_alarming_init + * + * Purpose : Initializes any previously raised interface alarms if rmon is restarted + * + *****************************************************************************/ +void interface_alarming_init ( interface_resource_config_type * ptr) +{ + AlarmFilter alarmFilter; + ptr->failed = false; + rmon_ctrl_type * _rmon_ctrl_ptr; + + _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); + + /* handle active alarms for the interface ports */ + SFmAlarmDataT *intf_alarms = NULL; + unsigned int num_intf_alarms = 0; + strcpy(alarmFilter.alarm_id, ptr->alarm_id_port); + strcpy(alarmFilter.entity_instance_id, _rmon_ctrl_ptr->my_hostname); + strcat(alarmFilter.entity_instance_id, ".port="); + + if (rmon_fm_get(&alarmFilter, &intf_alarms, &num_intf_alarms) == FM_ERR_OK) + { + bool intf_one_found = false; + bool intf_two_found = false; + SFmAlarmDataT *a = intf_alarms; + + for( unsigned int i = 0; i < num_intf_alarms; i++, a++ ) + { + /* only handle specific port alarm */ + if (strncmp(a->alarm_id, ptr->alarm_id_port, sizeof(a->alarm_id)) == 0) + { + /* check interface port one alarm */ + if (!intf_one_found && ptr->interface_one[0] != '\0') + { + if (strstr(a->entity_instance_id, ptr->interface_one)) + { + ptr->failed = true; + intf_one_found = true; + } + } + + /* check interface port two alarm */ + if (!intf_two_found && ptr->interface_two[0] != '\0') + { + if (strstr(a->entity_instance_id, ptr->interface_two)) + { + ptr->failed = true; + intf_two_found = true; + } + } + + /* clear this alarm as it is no longer valid as the interface ports have + changed */ + if (!intf_one_found && !intf_two_found) + { + ilog("clearing alarm %s", a->entity_instance_id); + strcpy(alarmFilter.entity_instance_id, a->entity_instance_id); + rmon_fm_clear (&alarmFilter); + } + } + } + + free(intf_alarms); + } + + /* handle interface alarm */ + SFmAlarmDataT *active_alarm = (SFmAlarmDataT*) malloc (sizeof (SFmAlarmDataT)); + strcpy(alarmFilter.alarm_id, ptr->alarm_id); + strcpy(alarmFilter.entity_instance_id, _rmon_ctrl_ptr->my_hostname); + strcat(alarmFilter.entity_instance_id, ".interface="); + strcat(alarmFilter.entity_instance_id, ptr->resource); + + if (fm_get_fault( &alarmFilter, active_alarm) == FM_ERR_OK) + { + if (active_alarm != NULL) + { + ptr->failed = true; + } + } + + free(active_alarm); + + /* + * If the interface is DOWN, and neither a port + * nor an interface alarm is found for that interface, + * then that implies that the interface was DOWN before + * RMON came up. Consider that as a failed case as well + */ + if (ptr->interface_used && !ptr->failed && + (ptr->resource_value == INTERFACE_DOWN || + ptr->resource_value_lagged == INTERFACE_DOWN)) { + ilog("Interface %s has initial state DOWN. Marked as failed\n", + ptr->resource); + ptr->failed = true; + } + + /* service interface resource */ + if (ptr->failed) + { + ptr->alarm_raised = true; + service_resource_state ( ptr ); + } +} + +/***************************************************************************** + * + * Name : _set_alarm_defaults + * + * Purpose : Set the defaults for the interface and port alarms + * *****************************************************************************/ +void _set_alarm_defaults( interface_resource_config_type * ptr, rmon_ctrl_type * _rmon_ctrl_ptr ) +{ + /* common data for all alarm messages */ + + /* Interface alarms */ + strcpy(alarmData.uuid, ""); + strcpy(alarmData.entity_type_id ,"system.host"); + strcpy(alarmData.entity_instance_id, _rmon_ctrl_ptr->my_hostname); + strcat(alarmData.entity_instance_id, ".interface="); + alarmData.alarm_state = FM_ALARM_STATE_SET; + alarmData.alarm_type = FM_ALARM_OPERATIONAL; + alarmData.probable_cause = FM_ALARM_CAUSE_UNKNOWN; + snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), + "Check cabling and far-end port configuration and status on adjacent equipment."); + alarmData.timestamp = 0; + alarmData.service_affecting = FM_TRUE; + alarmData.suppression = FM_TRUE; + strcpy(alarmData.alarm_id, ptr->alarm_id); + + /* Port One alarms */ + strcpy(alarmDataPOne.uuid, ""); + strcpy(alarmDataPOne.entity_type_id ,"system.host"); + strcpy(alarmDataPOne.entity_instance_id, _rmon_ctrl_ptr->my_hostname); + strcat(alarmDataPOne.entity_instance_id, ".port="); + alarmDataPOne.alarm_state = FM_ALARM_STATE_SET; + alarmDataPOne.alarm_type = FM_ALARM_OPERATIONAL; + alarmDataPOne.probable_cause = FM_ALARM_CAUSE_UNKNOWN; + snprintf(alarmDataPOne.proposed_repair_action , sizeof(alarmDataPOne.proposed_repair_action), + "Check cabling and far-end port configuration and status on adjacent equipment."); + alarmDataPOne.timestamp = 0; + alarmDataPOne.service_affecting = FM_TRUE; + alarmDataPOne.suppression = FM_TRUE; + strcpy(alarmDataPOne.alarm_id, ptr->alarm_id_port); + + /* Port Two alarms */ + strcpy(alarmDataPTwo.uuid, ""); + strcpy(alarmDataPTwo.entity_type_id ,"system.host"); + strcpy(alarmDataPTwo.entity_instance_id, _rmon_ctrl_ptr->my_hostname); + strcat(alarmDataPTwo.entity_instance_id, ".port="); + alarmDataPTwo.alarm_state = FM_ALARM_STATE_SET; + alarmDataPTwo.alarm_type = FM_ALARM_OPERATIONAL; + alarmDataPTwo.probable_cause = FM_ALARM_CAUSE_UNKNOWN; + snprintf(alarmDataPTwo.proposed_repair_action , sizeof(alarmDataPTwo.proposed_repair_action), + "Check cabling and far-end port configuration and status on adjacent equipment."); + alarmDataPTwo.timestamp = 0; + alarmDataPTwo.service_affecting = FM_TRUE; + alarmDataPTwo.suppression = FM_TRUE; + strcpy(alarmDataPTwo.alarm_id, ptr->alarm_id_port); +} + +/***************************************************************************** + * + * Name : interface_handler + * + * Purpose : Handle the failed interfaces and raise alarms through + * the FM API as well as sending events to registered clients + *****************************************************************************/ +void interface_handler( interface_resource_config_type * ptr ) +{ + #define MAX_CLEAR_COUNT (10) + AlarmFilter alarmFilter; + bool portOne = false; + bool portTwo = false; + + rmon_ctrl_type * _rmon_ctrl_ptr; + + _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); + + if ( ptr->stage < RMON_STAGE__STAGES ) + { + dlog2 ("%s %s Stage %d\n", ptr->resource, rmonStages_str[ptr->stage], ptr->stage ); + } + else + { + interfaceResourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + + switch ( ptr->stage ) + { + case RMON_STAGE__START: + { + dlog ( "%s failed:%d set_cnt:%d debounce_cnt:%d\n", + ptr->resource, + ptr->failed, + ptr->count, + ptr->debounce_cnt); + break ; + } + case RMON_STAGE__MANAGE: + + + { + /* sets alarms if thresholds are crossed */ + if (ptr->alarm_status == ALARM_ON) + { + + _set_alarm_defaults( ptr, _rmon_ctrl_ptr ); + + /* Interface and Port alarming */ + if (strcmp(ptr->resource, OAM_INTERFACE_NAME) == 0) + { + if ( ptr->sev == SEVERITY_CRITICAL ) + { + alarmData.severity = FM_ALARM_SEVERITY_CRITICAL; + ilog ("'OAM' Interface failed. \n"); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "'OAM' Interface failed."); + } + else if ( ptr->sev == SEVERITY_MAJOR ) + { + alarmData.severity = FM_ALARM_SEVERITY_MAJOR; + ilog ("'OAM' Interface degraded. \n"); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "'OAM' Interface degraded."); + } + + if ((ptr->interface_one[0] != '\0') && (ptr->resource_value == INTERFACE_DOWN)) + { + portOne = true; + ilog ("'OAM' Port failed. \n"); + snprintf(alarmDataPOne.reason_text, sizeof(alarmDataPOne.reason_text), + "'OAM' Port failed."); + /* Set port name in entity instance ID */ + strcat(alarmDataPOne.entity_instance_id, ptr->interface_one); + + } + if ((ptr->interface_two[0] != '\0') && (ptr->resource_value_lagged == INTERFACE_DOWN)) + { + portTwo = true; + ilog ("'OAM' Port failed. \n"); + snprintf(alarmDataPTwo.reason_text, sizeof(alarmDataPTwo.reason_text), + "'OAM' Port failed."); + /* Set port name in entity instance ID */ + strcat(alarmDataPTwo.entity_instance_id, ptr->interface_two); + } + } + else if (strcmp(ptr->resource, MGMT_INTERFACE_NAME) == 0) + { + if ( ptr->sev == SEVERITY_CRITICAL ) + { + alarmData.severity = FM_ALARM_SEVERITY_CRITICAL; + ilog ("'MGMT' Interface failed. \n"); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "'MGMT' Interface failed."); + } + else if ( ptr->sev == SEVERITY_MAJOR ) + { + alarmData.severity = FM_ALARM_SEVERITY_MAJOR; + ilog ("'MGMT' Interface degraded. \n"); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "'MGMT' Interface degraded."); + } + + if ((ptr->interface_one[0] != '\0') && (ptr->resource_value == INTERFACE_DOWN)) + { + portOne = true; + ilog ("'MGMT' Port failed. \n"); + snprintf(alarmDataPOne.reason_text, sizeof(alarmDataPOne.reason_text), + "'MGMT' Port failed."); + /* Set port name in entity instance ID */ + strcat(alarmDataPOne.entity_instance_id, ptr->interface_one); + + } + if ((ptr->interface_two[0] != '\0') && (ptr->resource_value_lagged == INTERFACE_DOWN)) + { + portTwo = true; + ilog ("'MGMT' Port failed. \n"); + snprintf(alarmDataPTwo.reason_text, sizeof(alarmDataPTwo.reason_text), + "'MGMT' Port failed."); + /* Set port name in entity instance ID */ + strcat(alarmDataPTwo.entity_instance_id, ptr->interface_two); + } + } + else if (strcmp(ptr->resource, INFRA_INTERFACE_NAME) == 0) + { + if ( ptr->sev == SEVERITY_CRITICAL ) + { + alarmData.severity = FM_ALARM_SEVERITY_CRITICAL; + ilog ("'INFRA' Interface failed. \n"); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "'INFRA' Interface failed."); + } + else if ( ptr->sev == SEVERITY_MAJOR ) + { + alarmData.severity = FM_ALARM_SEVERITY_MAJOR; + ilog ("'INFRA' Interface degraded. \n"); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "'INFRA' Interface degraded."); + } + + if ((ptr->interface_one[0] != '\0') && (ptr->resource_value == INTERFACE_DOWN)) + { + portOne = true; + ilog ("'INFRA' Port failed. \n"); + snprintf(alarmDataPOne.reason_text, sizeof(alarmDataPOne.reason_text), + "'INFRA' Port failed."); + /* Set port name in entity instance ID */ + strcat(alarmDataPOne.entity_instance_id, ptr->interface_one); + + } + if ((ptr->interface_two[0] != '\0') && (ptr->resource_value_lagged == INTERFACE_DOWN)) + { + portTwo = true; + ilog ("'INFRA' Port failed. \n"); + snprintf(alarmDataPTwo.reason_text, sizeof(alarmDataPTwo.reason_text), + "'INFRA' Port failed."); + /* Set port name in entity instance ID */ + strcat(alarmDataPTwo.entity_instance_id, ptr->interface_two); + } + } + snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), + "%s major_threshold_set",ptr->resource); + + /* Set interface name in entity instance ID */ + strcat(alarmData.entity_instance_id, ptr->resource); + + dlog("Creating Interface Alarm: %s \n", ptr->resource); + if (rmon_fm_set(&alarmData, NULL) == FM_ERR_OK ) + { + ilog("Alarm created for resource: %s \n", ptr->resource); + ptr->alarm_raised = true; + } + else + { + ilog("Alarm create for resource: %s failed \n", ptr->resource); + } + + + if (portOne) + { + alarmDataPOne.severity = FM_ALARM_SEVERITY_MAJOR; + dlog("Creating Port One Alarm: %s \n", ptr->resource); + if (rmon_fm_set(&alarmDataPOne, NULL) == FM_ERR_OK ) + { + ilog("Alarm created for resource: %s port one \n", ptr->resource); + } else + { + ilog("Alarm create for resource: %s port one failed \n", ptr->resource); + } + } + + if (portTwo) + { + alarmDataPTwo.severity = FM_ALARM_SEVERITY_MAJOR; + dlog("Creating Port Two Alarm: %s \n", ptr->resource); + if (rmon_fm_set(&alarmDataPTwo, NULL) == FM_ERR_OK ) + { + ilog("Alarm created for resource: %s port two \n", ptr->resource); + } else + { + ilog("Alarm create for resource: %s port two failed \n", ptr->resource); + } + } + + + if (ptr->alarm_raised) + { + if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND)) + { + if ( send_interface_msg ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) + { + wlog ("%s request send failed \n", ptr->resource); + ptr->failed_send++; + } + else + { + ptr->failed_send = 0; + } + interfaceResourceStageChange ( ptr, RMON_STAGE__MONITOR_WAIT ); + } + else + { + ptr->failed_send = 0; + interfaceResourceStageChange ( ptr, RMON_STAGE__MONITOR_WAIT ); + } + } + } + else + { + interfaceResourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + break; + } + + case RMON_STAGE__MONITOR_WAIT: + { + if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND) && (ptr->failed_send > 0)) + { + if ( send_interface_msg ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) + { + wlog ("%s request send failed \n", ptr->resource); + ptr->failed_send++; + } + else + { + ptr->failed_send = 0; + } + } + break; + } + + case RMON_STAGE__FAILED_CLR: + { + /* clear raised port alarms if one port comes back up */ + if (ptr->alarm_raised) + { + SFmAlarmDataT *active_alarm = (SFmAlarmDataT*) malloc (sizeof (SFmAlarmDataT)); + strcpy(alarmFilter.alarm_id, ptr->alarm_id_port); + + if (ptr->interface_one[0] != '\0') + { + strcpy(alarmFilter.entity_instance_id, _rmon_ctrl_ptr->my_hostname); + strcat(alarmFilter.entity_instance_id, ".port="); + strcat(alarmFilter.entity_instance_id, ptr->interface_one); + + if (fm_get_fault( &alarmFilter, active_alarm) == FM_ERR_OK) + { + if (active_alarm != NULL) + { + if ( ptr->resource_value == INTERFACE_UP ) + { + if (rmon_fm_clear(&alarmFilter) == FM_ERR_OK) + { + ilog ("Cleared alarms for port one, interface: %s \n", ptr->resource); + } + else + { + ilog ("Failed to cleared alarms for port one, interface: %s \n", ptr->resource); + } + } + } + } + } + + if (ptr->interface_two[0] != '\0') + { + strcpy(alarmFilter.entity_instance_id, _rmon_ctrl_ptr->my_hostname); + strcat(alarmFilter.entity_instance_id, ".port="); + strcat(alarmFilter.entity_instance_id, ptr->interface_two); + + if (fm_get_fault( &alarmFilter, active_alarm) == FM_ERR_OK) + { + if (active_alarm != NULL) + { + if ( ptr->resource_value_lagged == INTERFACE_UP ) + { + if (rmon_fm_clear(&alarmFilter) == FM_ERR_OK) + { + ilog ("Cleared alarms for port two, interface: %s \n", ptr->resource); + } + else + { + ilog ("Failed to cleared alarms for port two, interface: %s \n", ptr->resource); + } + } + } + } + } + free(active_alarm); + } + + interfaceResourceStageChange ( ptr, RMON_STAGE__MANAGE); + break; + } + + case RMON_STAGE__FINISH: + { + + if ((ptr->alarm_status == ALARM_ON) && (ptr->alarm_raised)) + { + strcpy(alarmFilter.alarm_id, ptr->alarm_id_port); + + if (ptr->interface_one[0] != '\0') + { + /* clear port one alarm */ + strcpy(alarmFilter.entity_instance_id,_rmon_ctrl_ptr->my_hostname); + strcat(alarmFilter.entity_instance_id, ".port="); + strcat(alarmFilter.entity_instance_id, ptr->interface_one); + + EFmErrorT err = rmon_fm_clear(&alarmFilter); + if ((err == FM_ERR_OK) || (err == FM_ERR_ENTITY_NOT_FOUND)) + { + ilog ("Cleared alarms for port one, interface: %s \n", ptr->resource); + } + else + { + ilog ("Failed to cleared alarm for port one, interface: %s (rc:%d)\n", ptr->resource, err); + } + } + if (ptr->interface_two[0] != '\0') + { + /* clear port two alarm */ + strcpy(alarmFilter.entity_instance_id,_rmon_ctrl_ptr->my_hostname); + strcat(alarmFilter.entity_instance_id, ".port="); + strcat(alarmFilter.entity_instance_id, ptr->interface_two); + + EFmErrorT err = rmon_fm_clear(&alarmFilter); + if ((err == FM_ERR_OK) || (err == FM_ERR_ENTITY_NOT_FOUND)) + { + ilog ("Cleared alarms for port two, interface: %s \n", ptr->resource); + } + else + { + elog ("Failed to cleared alarms for port two, interface: %s (rc:%d)\n", ptr->resource, err ); + } + } + + /* clear interface alarm */ + strcpy(alarmFilter.alarm_id, ptr->alarm_id); + strcpy(alarmFilter.entity_instance_id,_rmon_ctrl_ptr->my_hostname); + strcat(alarmFilter.entity_instance_id, ".interface="); + strcat(alarmFilter.entity_instance_id, ptr->resource); + + EFmErrorT err = rmon_fm_clear(&alarmFilter); + if ((err == FM_ERR_OK) || (err == FM_ERR_ENTITY_NOT_FOUND)) + { + ilog ("Cleared alarms for interface: %s \n", ptr->resource); + snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), + "%s cleared_alarms_for_resource:", ptr->resource); + + if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND)) + { + while (( send_interface_msg ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) && + ( ptr->failed_send < MAX_FAIL_SEND )) + { + wlog ("%s request send failed \n", ptr->resource); + ptr->failed_send++; + } + ptr->alarm_raised = false; + ptr->failed_send = 0; + ptr->failed = false ; + ptr->count = 0; + ptr->sev = SEVERITY_CLEARED ; + ptr->stage = RMON_STAGE__START ; + } + else + { + ptr->alarm_raised = false; + ptr->failed_send = 0; + ptr->failed = false ; + ptr->count = 0 ; + ptr->sev = SEVERITY_CLEARED ; + ptr->stage = RMON_STAGE__START ; + } + } + else + { + wlog ("%s alarm clear failed (rc:%d)\n", ptr->resource, err); + } + } + else + { + ptr->failed_send = 0; + ptr->failed = false ; + ptr->count = 0 ; + ptr->sev = SEVERITY_CLEARED ; + ptr->stage = RMON_STAGE__START ; + } + break ; + } + + default: + { + slog ("%s Invalid stage (%d)\n", ptr->resource, ptr->stage ); + + /* Default to finish for invalid case. + * If there is an issue then it will be detected */ + interfaceResourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + } +} diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/remoteloggingHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/rmon/remoteloggingHdlr.cpp new file mode 100644 index 00000000..0a8e3eba --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/remoteloggingHdlr.cpp @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2013-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River Titanium Cloud Platform remote logging Monitor Handler + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for storing dynamic resource names */ +#include +#include +#include "rmon.h" /* rmon header file */ +#include "rmonHttp.h" /* for rmon HTTP libEvent utilties */ +#include "rmonApi.h" +#include +#include +#include +#include +#include +#include +#include /* for ... RTMGRP_LINK */ +#include "nlEvent.h" /* for ... open_netlink_socket */ +#include "nodeEvent.h" /* for inotify */ +#include /* for ... json-c json string parsing */ +#include "jsonUtil.h" +#include +#include +#include + +static libEvent_type remoteLoggingAudit; // for system remotelogging-show + +static inline SFmAlarmDataT +create_remoteLogging_controller_connectivity_alarm (SFmAlarmDataT data, + AlarmFilter filter) +{ + snprintf (data.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", + filter.alarm_id); + data.alarm_state = FM_ALARM_STATE_SET; + snprintf(data.entity_type_id, FM_MAX_BUFFER_LENGTH, "system.host"); + snprintf(data.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", + filter.entity_instance_id); + data.severity = FM_ALARM_SEVERITY_MINOR; + snprintf(data.reason_text, sizeof(data.reason_text), + "Controller cannot establish connection with remote logging server."); + data.alarm_type = FM_ALARM_COMM; + data.probable_cause = FM_ALARM_COMM_SUBSYS_FAILURE; + data.service_affecting = FM_FALSE; + snprintf(data.proposed_repair_action, sizeof(data.proposed_repair_action), + "Ensure Remote Log Server IP is reachable from Controller through " + "OAM interface; otherwise contact next level of support."); + return data; +} + +// alarm data for all remote loggin alarms +static SFmAlarmDataT alarmData; + +int rmonHdlr_remotelogging_query (resource_config_type * ptr); + +// this is used to create a buffer to store output from a command +// that gets the connection status of a port. +// the command filters the /proc/net/tcp(udp) files leaving only the status +// you generally expect a 1 character integer value for the status +#define CONNECTION_STATUS_COMMAND_OUTPUT_BUFFER_SIZE 8 + +/***************************************************************************** + * + * Name : rmonHdlr_remotelogging_handler + * + * Purpose : Handles the remote logging response message + * + *****************************************************************************/ +void rmonHdlr_remotelogging_handler ( struct evhttp_request *req, void *arg ) +{ + + if ( !req ) + { + elog (" Request Timeout\n"); + remoteLoggingAudit.status = FAIL_TIMEOUT ; + goto _remote_logging_handler_done ; + } + + remoteLoggingAudit.status = rmonHttpUtil_status ( remoteLoggingAudit ) ; + if ( remoteLoggingAudit.status == HTTP_NOTFOUND ) + { + + goto _remote_logging_handler_done ; + } + else if ( remoteLoggingAudit.status != PASS ) + { + dlog (" remote logging HTTP Request Failed (%d)\n", + remoteLoggingAudit.status); + + goto _remote_logging_handler_done ; + } + + if ( rmonHttpUtil_get_response ( remoteLoggingAudit ) != PASS ) + goto _remote_logging_handler_done ; + +_remote_logging_handler_done: + /* This is needed to get out of the loop */ + event_base_loopbreak((struct event_base *)arg); +} + +/***************************************************************************** + * + * Name : rmonHdlr_remotelogging_query + * + * Purpose : Send a HTTP remotelogging show request + * + *****************************************************************************/ + +int rmonHdlr_remotelogging_query (resource_config_type * ptr) +{ + + // we want this handler to run once every 5 minutes + // rmon currently runs once every 30 seconds + static bool first_execution = true; + static int exec_counter = 9; + exec_counter = (exec_counter + 1) % 10; + if(exec_counter != 0) + { + return 0; + } + // extract the ip and port for the remote logging server + FILE* pFile; + string remote_ip_address = ""; + string remote_port = ""; + string transport_type = ""; + string line; + bool feature_enabled = false; + + std::ifstream syslog_config("/etc/syslog-ng/syslog-ng.conf"); + // look for this line in the config file: + // destination remote_log_server {tcp("128.224.140.219" port(514));}; + while(std::getline(syslog_config, line)) + { + // include remotelogging.conf is present if the feature is enabled + if(line.find("@include \"remotelogging.conf\"") == 0) + { + feature_enabled = true; + } + if(line.find("destination remote_log_server") != 0) + { + continue; + } + int start = line.find("{") + 1; + int end = line.find("(", start + 1); + transport_type= line.substr(start, end - start); + start = line.find("\"") + 1; + end = line.find("\"", start + 1); + remote_ip_address = line.substr(start, end - start); + start = line.find("port(") + 5; + end = line.find(")", start + 1); + remote_port = line.substr(start, end - start); + } + + syslog_config.close(); + + // cleanup of any alarms if the remotelogging feature is not enabled + // this is important for when users turn off the remote logging feature when an alarm is active + // if the line containing this information is not in config, remote logging is not used + if(remote_ip_address.empty() || remote_port.empty() || transport_type.empty() || !feature_enabled) + { + // currently, only controllers raise alarms + if(is_controller()) + { + // clear any raised alarms + if(ptr->alarm_raised) + { + rmon_ctrl_type* _rmon_ctrl_ptr; + _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); + AlarmFilter alarmFilter; + snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, REMOTE_LOGGING_CONTROLLER_CONNECTIVITY_ALARM_ID); + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, + "%s", _rmon_ctrl_ptr->my_hostname); + int rc; + if ((rc = rmon_fm_clear(&alarmFilter)) != FM_ERR_OK) + { + wlog ("Failed to clear stale remotelogging connectivity alarm for" + "entity instance id: %s error: %d", + alarmFilter.entity_instance_id, rc); + } + else + { + ptr->alarm_raised = false; + } + } + } + return 0; + } + + // construct the remote logging server IP string + // the files being looked at(/proc/net/tcp(udp)) uses hex values, so convert the + // string we got from the config file to that format + // - convert all numbers to hex and hex to capitals + // reverse ordering of the "ipv4" values + std::stringstream config_ip(remote_ip_address); // the ip string from the config file + std::stringstream proc_file_ip; // the ip string formatted to compare to /proc/net/tcp(udp) + int ipv = 4; + + // IPv4 + if(remote_ip_address.find(".") != string::npos) + { + // ipv4 example: config file 10.10.10.45, /proc/net/tcp 2D0A0A0A + int a, b, c, d; + char trash; + config_ip >> a >> trash >> b >> trash >> c >> trash >> d; + proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << d; + proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << c; + proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << b; + proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << a; + proc_file_ip << ":"; + proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << atoi(remote_port.c_str()); + } + // IPv6 + else if (remote_ip_address.find(":") != string::npos) + { + ipv = 6; + // ipv6 example: config file 0:0:0:0:ffff:0:80e0:8d6c , /proc/net/tcp6 0000000000000000FFFF00006C8D0E080 + int a, b, c, d; + char trash; + // first, the hex that are in the same order from config to /proc/net/... + for(int i = 0; i < 6; i++) + { + config_ip >> std::hex >> a >> trash; + proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << a; + } + + // now the hex that needs to be re ordered + config_ip >> std::hex >> a >> trash >> c; + b = (a & 0xFF); + a = (a >> 8); + d = (c & 0xFF); + c = (c >> 8); + + proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << d; + proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << c; + proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << b; + proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(2) << a; + proc_file_ip << ":"; + proc_file_ip << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << atoi(remote_port.c_str()); + } + // garbage + else + { + wlog("Unrecognized ip format in syslog config file\n"); + } + + string connection_check_filename; + if(transport_type == "tcp") + { + connection_check_filename = "tcp"; + } + else if (transport_type == "udp") + { + connection_check_filename = "udp"; + } + // todo: eventually we will have TLS as a transport type and potentially others + else + { + wlog("Unrecognized remote logging transport type: %s \n", transport_type.c_str()); + } + + if(ipv == 6) + { + connection_check_filename = connection_check_filename + "6"; + } + + std::string command = "cat /proc/net/" + connection_check_filename +" | awk '{print $3 \" \" $4}' | grep " \ + + proc_file_ip.str() + " | awk '{print $2}'"; + if(!(pFile = popen(command.c_str(), "r"))) + { + elog ("Failed to execute command for getting remotelogging tcp port status"); + } + else + { + char cmd_output[CONNECTION_STATUS_COMMAND_OUTPUT_BUFFER_SIZE]; + int connection_status = 0; + rmon_ctrl_type* _rmon_ctrl_ptr; + _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); + AlarmFilter alarmFilter; + SFmAlarmDataT active_alarm; + + memset(cmd_output, 0, CONNECTION_STATUS_COMMAND_OUTPUT_BUFFER_SIZE); + fgets((char*) &cmd_output, CONNECTION_STATUS_COMMAND_OUTPUT_BUFFER_SIZE, pFile); + pclose(pFile); + std::stringstream s(cmd_output); + s >> std::hex >> connection_status; + + snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, REMOTE_LOGGING_CONTROLLER_CONNECTIVITY_ALARM_ID); + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, + "%s", _rmon_ctrl_ptr->my_hostname); + + if(first_execution) + { + if (fm_get_fault (&alarmFilter, &active_alarm) == FM_ERR_OK) + { + ptr->alarm_raised = true; + } + else + { + ptr->alarm_raised = false; + } + } + if(connection_status == 1) + { + if(is_controller()) + { + // connection is established, clear the alarm + if(ptr->alarm_raised) + { + int rc; + if ((rc = rmon_fm_clear(&alarmFilter)) != FM_ERR_OK) + { + wlog ("Failed to clear stale remotelogging connectivity alarm for" + "entity instance id: %s error: %d", + alarmFilter.entity_instance_id, rc); + } + else + { + ptr->alarm_raised = false; + } + } + } + } + else + { + if(is_controller()) + { + // connection is not established, raise an alarm + if (!ptr->alarm_raised) + { + int rc; + alarmData = \ + create_remoteLogging_controller_connectivity_alarm(alarmData, + alarmFilter); + + if ((rc = rmon_fm_set(&alarmData, NULL)) != FM_ERR_OK) + { + wlog("Failed to create alarm %s for entity instance id: %s" + "error: %d \n", REMOTE_LOGGING_CONTROLLER_CONNECTIVITY_ALARM_ID, + alarmData.entity_instance_id, (int) rc); + } + else + { + ptr->alarm_raised = true; + } + } + } + else + { + elog ("%s cannot connect to remote log server", _rmon_ctrl_ptr->my_hostname); + } + } + } + + return 0; +} + diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmon.h b/mtce-common/cgts-mtce-common-1.0/rmon/rmon.h new file mode 100755 index 00000000..f376d9f2 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmon.h @@ -0,0 +1,852 @@ +#ifndef __INCLUDE_RMON_HH__ +#define __INCLUDE_RMON_HH__ +/* + * Copyright (c) 2013-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/* + * This implements the CGCS Resource Monitor ; /usr/local/bin/rmond + * The Resource monitor or rmon is a utility to provide: cpu, memory and + * filesystem usage and alarm stats both to the user and to registered client + * processes on the host it is running on. + * + * Call trace is as follows: + * daemon_init + * rmon_timer_init + * rmon_hdlr_init + * daemon_files_init + * daemon_signal_init + * daemon_configure + * ini_parse + * get_debug_options + * get_iface_macaddr + * get_iface_address + * get_iface_hostname + * socket_init + * rmon_msg_init + * setup_tx_port + * + * daemon_service_run + * wait for goenable signal + * rmon_send_event ( READY ) + * rmon_service + * _config_dir_load + * _config_files_load + * _forever + * service_events: + * _get_events every audit period seconds + * resource_handler handles the resource values and sends + * alarm messages through fm api to set or clear resource + * thresholds as well as notifying registered clients through + * the rmon client api. + * + * + * + * + * This daemon waits for a "goenabled" signal an then reads all the resource + * configuration files in: /etc/rmon.d and begins monitoring them accordingly. + * A resource confguration file is expected to contain the following information: + * + * [resource] + * resource = ; name of resource being monitored + * debounce = ; number of seconds to wait before degrade clear + * severity = ; minor, major, critical + * minor_threshold = ; minor resource utilization threshold + * major_threshold = ; major resource utilization threshold + * critical_threshold = ; critical resource utilization threshold + * num_tries = ; number of tries before the alarm is raised or cleared + * alarm_on = ; dictates whether maintainance gets alarms from rmon + * 1 for on, 0 for off + * + * Here is how it works ... + * + * Every audit period seconds the resources defined in the config files get + * monitored. If the resource ie. CPU usage crosses a threshold: + * (minor, major or critical) count times an alarm is raised and message is sent to + * all clients registered for the resource. If the resource usage drops below + * that threshold count times, the alarms are cleared and a message is sent to + * all registered clients in order to clear the alarm. The audit period as well as + * other rmon config options are specifiedin the: /etc/mtc/rmond.conf file with + * the following (example) information: + * + * ; CGTS Resource Monitor Configuration File + * [config] ; Configuration + * audit_period = 10 ; Resource polling period in seconds (1 - 120) + * + * rmon_tx_port = 2101 ; Transmit Event and Command Reply Port + * per_node = 0 ; enable (1) or disable (0) memory checking per processor node + * rmon_api_port = 2300 : Resource Monitor API Receive Port + * + * [defaults] + * + * [timeouts] + * start_delay = 10 ; managed range 1 .. 120 seconds + * + * [features] + * + * [debug] ; SIGHUP to reload + * debug_timer = 0 ; enable(1) or disable(0) timer logs (tlog) + * debug_msg = 0 ; enable(1) or disable(0) message logs (mlog) + * debug_state = 0 ; enable(1) or disable(0) state change logs (clog) + * debug_level = 0 ; decimal mask 0..15 (8,4,2,1) + * + * flush = 0 ; enable(1) or disable(0) force log flush (main loop) + * flush_thld = 2 ; if enabled - force flush after this number of loops + * + * debug_event = none ; Not used + * debug_filter = none ; Not used + * stress_test = 0 ; In-Service Stress test number + * + * To check the alarms that are raised the command: + * system alarm-list can be used. Rmon alarms have the following codes: + * + * 100.101: CPU usage threshold crossed + * 100.102: vSwitch CPU usage threshold crossed + * 100.103: Memory usage threshold crossed + * 100.104: Filesystem usage threshold crossed + * + * To register your process for rmon notifications using the rmon client api + * please see the files: rmon_api.h for usage of the api as well as: + * rmon_api_client_test.cpp and rmon_api_client_test.h for an example + * implementation for your process. + * + */ + /** + * @file + * Wind River CGCS Platform Resource Monitor Service Header + */ + +#include +#include +#include +#include +#include +#include +#include +#include /* for hostent */ +#include +#include +#include +#include +#include +#include +#include /* for close and usleep */ +#include +#include /* for round */ +#include "nodeBase.h" +#include "daemon_ini.h" /* Ini Parser Header */ +#include "daemon_common.h" /* Common definitions and types for daemons */ +#include "daemon_option.h" /* Common options for daemons */ +#include "nodeTimers.h" /* maintenance timer utilities start/stop */ +#include "nodeUtil.h" /* common utilities */ +#include "tokenUtil.h" /* for ... keystone_config_handler */ +#include "fmAPI.h" +#include "httpUtil.h" /* for ... libEvent */ +extern "C" +{ + #include "rmon_api.h" /* for ... resource monitoring utilities */ +} +/** + * @addtogroup RMON_base + * @{ + */ +using namespace std; + +#ifdef __AREA__ +#undef __AREA__ +#endif +#define __AREA__ "mon" + +/* openstack Identity version */ +#define OS_IDENTITY_VERSION_PREFIX ((const char *)"/v3") + +/* Config and resource files used by rmon */ +#define CONFIG_DIR ((const char *)"/etc/rmon.d") +#define INT_CONFIG_DIR ((const char *)"/etc/rmon_interfaces.d") +#define COMPUTE_VSWITCH_DIR ((const char *)"/etc/nova/compute_extend.conf") +#define COMPUTE_RESERVED_CONF ((const char *)"/etc/nova/compute_reserved.conf") +#define DYNAMIC_FS_FILE ((const char *)"/etc/rmonfiles.d/dynamic.conf") +#define STATIC_FS_FILE ((const char *)"/etc/rmonfiles.d/static.conf") + +// this path is different in Wind River Linux vs. CentOS. +// For the latter, we shall look specifically within +// the bonding interface device directory +#define INTERFACES_DIR ((const char *)"/sys/class/net/") +#define PLATFORM_DIR ((const char *)"/etc/platform/platform.conf") + +#define MOUNTS_DIR ((const char *)"/proc/mounts") +#define COMPUTE_CONFIG_PASS ((const char *)"/var/run/.config_pass") +#define COMPUTE_CONFIG_FAIL ((const char *)"/var/run/.config_fail") +#define RMON_FILES_DIR ((const char *)"/etc/rmonfiles.d") +#define NTPQ_QUERY_SCRIPT ((const char *)"query_ntp_servers.sh") + +/* Constant search keys used to update rmon resource usage */ +#define CPU_RESOURCE_NAME ((const char *)"Platform CPU Usage") +#define V_CPU_RESOURCE_NAME ((const char *)"vSwitch CPU Usage") +#define MEMORY_RESOURCE_NAME ((const char *)"Platform Memory Usage") +#define FS_RESOURCE_NAME ((const char *)"Platform Filesystem Usage") +#define INSTANCE_RESOURCE_NAME ((const char *)"Platform Nova Instances") +#define V_MEMORY_RESOURCE_NAME ((const char *)"vSwitch Memory Usage") +#define V_PORT_RESOURCE_NAME ((const char *)"vSwitch Port Usage") +#define V_INTERFACE_RESOURCE_NAME ((const char *)"vSwitch Interface Usage") +#define V_LACP_INTERFACE_RESOURCE_NAME ((const char *)"vSwitch LACP Interface Usage") +#define V_OVSDB_RESOURCE_NAME ((const char *)"vSwitch OVSDB Usage") +#define V_NETWORK_RESOURCE_NAME ((const char *)"vSwitch Network Usage") +#define V_OPENFLOW_RESOURCE_NAME ((const char *)"vSwitch Openflow Usage") +#define V_CINDER_THINPOOL_RESOURCE_NAME ((const char *)"Cinder LVM Thinpool Usage") +#define V_NOVA_THINPOOL_RESOURCE_NAME ((const char *)"Nova LVM Thinpool Usage") +#define REMOTE_LOGGING_RESOURCE_NAME ((const char *)"Remote Logging Connectivity") + +/* dynamic resources used for thin provisioning monitoring */ +#define CINDER_VOLUMES ((const char *)"cinder-volumes") +#define NOVA_LOCAL ((const char *)"nova-local") + +#define RMON_RESOURCE_NOT ((const char *)"read_dynamic_file_system") +#define RESPONSE_RMON_RESOURCE_NOT ((const char *)"/var/run/.dynamicfs_registered") + +#define POSTGRESQL_FS_PATH ((const char *)"/var/lib/postgresql") + +#define RESOURCE_DISABLE (0) + +/* Thin provisioning metadata monitoring */ +#define THINMETA_FSM_RETRY 3 +#define THINMETA_CONFIG_SECTION "thinpool_metadata" +#define THINMETA_DEFAULT_CRITICAL_THRESHOLD 0 // feature is disabled by default +#define THINMETA_DEFAULT_ALARM_ON 1 // alarm is enabled +#define THINMETA_DEFAULT_AUTOEXTEND_ON 1 // autoextend is enabled (only if monitoring is enabled!) +#define THINMETA_DEFAULT_AUTOEXTEND_BY 20 // autoextend by 20%, same as example in /etc/lvm/lvm.conf +#define THINMETA_DEFAULT_AUTOEXTEND_PERCENT 1 // autoextend by a percentage +#define THINMETA_DEFAULT_AUDIT_PERIOD 10 // seconds to perform audit, same as LVM (broken) audit of lvmetad +#define THINMETA_RESULT_BUFFER_SIZE (1024) // result for lvm commands may be bigger than default BUFFER_SIZE +#define THINMETA_INVALID_NAME ((const char *) "invalid name!") + +/* Constant search keys used to update rmon interface usage */ +#define MGMT_INTERFACE_NAME ((const char *)"mgmt") +#define INFRA_INTERFACE_NAME ((const char *)"infra") +#define OAM_INTERFACE_NAME ((const char *)"oam") +#define MGMT_INTERFACE_FULLNAME ((const char *)"management_interface") +#define OAM_INTERFACE_FULLNAME ((const char *)"oam_interface") +#define INFRA_INTERFACE_FULLNAME ((const char *)"infrastructure_interface") + + /* Daemon Config Constants */ +#define CONFIG_AUDIT_PERIOD 1 +#define PM_AUDIT_PERIOD 15 +#define NTP_AUDIT_PERIOD 600 //10 minutes +#define NTPQ_CMD_TIMEOUT 60 //1 minute +#define CONFIG_TX_PORT 2 +#define CONFIG_RX_PORT 4 +#define CONFIG_CRITICAL_THR 5 +#define CONFIG_NODE 12 +#define CONFIG_START_DELAY 20 + +/* rmon resource default percent thresholds */ +#define DEFAULT_MINOR (80) +#define DEFAULT_MAJOR (90) +#define DEFAULT_CRITICAL (95) +#define UNUSED_CRITICAL (101) + +/* processor node0 default memory thresholds */ +#define DEFAULT_MINOR_ABS_NODE0 (512) +#define DEFAULT_MAJOR_ABS_NODE0 (307) +#define DEFAULT_CRITICAL_ABS_NODE0 (102) +#define UNUSED_CRITICAL_ABS_NODE0 (0) + +/* processor node1 default memory thresholds */ +#define DEFAULT_MINOR_ABS_NODE1 (0) +#define DEFAULT_MAJOR_ABS_NODE1 (0) +#define DEFAULT_CRITICAL_ABS_NODE1 (0) + +/* absolute threshold array index */ +#define RMON_MINOR_IDX (0) +#define RMON_MAJOR_IDX (1) +#define RMON_CRITICAL_IDX (2) + +/* Defualt startup settings */ +#define DEFAULT_NUM_TRIES (2) /* Number of tries before an alarm is set or cleared */ +#define DEFAULT_ALARM_STATUS (1) /* Alarms are on by default */ +#define DEFAULT_PERCENT (1) /* Percentage thresholds are used by default */ +#define PERCENT_USED (1) /* Percent is used for the resource */ +#define PERCENT_UNUSED (0) /* Absolute values are used for the resource */ +#define DYNAMIC_ALARM (1) /* Filesystem alarm is a dynamic alarm, persisting among nodes */ +#define STATIC_ALARM (2) /* Filesystem alarm is a local, static resource */ +#define STANDARD_ALARM (3) /* Alarm is not a filesystem alarm */ + +#define HUGEPAGES_NODE 0 /* 0 or 1 for per hugepages node memory stats */ +#define PROCESSOR_NODE 0 /* 0 or 1 for per processor node memory stats */ +#define ALARM_OFF 0 /* Do not notify maintainance if alarm off */ +#define ALARM_ON 1 /* Notify maintainance if alarm on */ +#define PASS (0) +#define FAIL (1) + +/* Monitored Resource severity levels */ +#define SEVERITY_MINOR 0 +#define SEVERITY_MAJOR 1 +#define SEVERITY_CRITICAL 2 +#define SEVERITY_CLEARED 3 +#define MINORLOG_THRESHOLD (20) +#define PROCLOSS_THRESHOLD (5) +#define MAX_RESOURCES (100) +#define MAX_FILESYSTEMS (100) +#define MAX_BASE_CPU (100) + +#define DEGRADE_CLEAR_MSG ((const char *)("cleared_degrade_for_resource")) + +/* File System Custum Thresholds */ +#define TMPFS_MINOR (8) +#define TMPFS_MAJOR (6) +#define TMPFS_CRITICAL (4) +#define BOOTFS_MINOR (200) +#define BOOTFS_MAJOR (100) +#define BOOTFS_CRITICAL (50) + +#define MAX_FAIL_SEND (10) +#define MAX_SWACT_COUNT (10) + +/* Percent thresholds Database monitoring */ +#define FS_MINOR (70) +#define FS_MAJOR (80) +#define FS_CRITICAL (90) + +/* Resource Alarm ids */ +#define CPU_ALARM_ID ((const char *)"100.101") +#define V_CPU_ALARM_ID ((const char *)"100.102") +#define MEMORY_ALARM_ID ((const char *)"100.103") +#define FS_ALARM_ID ((const char *)"100.104") +#define INSTANCE_ALARM_ID ((const char *)"100.105") +#define OAM_PORT_ALARM_ID ((const char *)"100.106") +#define OAM_ALARM_ID ((const char *)"100.107") +#define MGMT_PORT_ALARM_ID ((const char *)"100.108") +#define MGMT_ALARM_ID ((const char *)"100.109") +#define INFRA_PORT_ALARM_ID ((const char *)"100.110") +#define INFRA_ALARM_ID ((const char *)"100.111") +#define VRS_PORT_ALARM_ID ((const char *)"100.112") //used for HP branch only +#define VRS_ALARM_ID ((const char *)"100.113") //used for HP branch only +#define NTP_ALARM_ID ((const char *)"100.114") +#define V_MEMORY_ALARM_ID ((const char *)"100.115") +#define V_CINDER_THINPOOL_ALARM_ID ((const char *)"100.116") +#define V_NOVA_THINPOOL_ALARM_ID ((const char *)"100.117") +#define THINMETA_ALARM_ID ((const char *)"800.103") + +// ripped from fm-api constants for Neutron AVS alarms +// being moved over to RMON +#define V_PORT_ALARM_ID ((const char *)"300.001") +#define V_INTERFACE_ALARM_ID ((const char *)"300.002") + +// remote logging alarm ID +#define REMOTE_LOGGING_CONTROLLER_CONNECTIVITY_ALARM_ID ((const char *)"100.118") + +// SDN specific alarms +#define V_OPENFLOW_CONTROLLER_ALARM_ID ((const char *)"300.012") +#define V_OPENFLOW_NETWORK_ALARM_ID ((const char *)"300.013") +#define V_OVSDB_MANAGER_ALARM_ID ((const char *)"300.014") +#define V_OVSDB_ALARM_ID ((const char *)"300.015") + +#define INTERFACE_NAME_LEN (10) +#define INTERFACE_UP (1) +#define INTERFACE_DOWN (0) +#define MAX_CLIENTS (100) +#define RMON_MAX_LEN (100) +#define MOUNTED (1) +#define NOT_MOUNTED (0) +#define MTC_EVENT_RMON_READY (0x0f0f0f0f) + +#define NTP_ERROR (255) + +/** Daemon Config Mask */ +#define CONF_MASK (CONFIG_AUDIT_PERIOD |\ + PM_AUDIT_PERIOD |\ + NTP_AUDIT_PERIOD |\ + NTPQ_CMD_TIMEOUT |\ + CONFIG_NODE |\ + CONFIG_START_DELAY |\ + CONFIG_TX_PORT |\ + CONFIG_RX_PORT |\ + CONFIG_CRITICAL_THR) + +#define CONF_RMON_API_MASK (CONF_PORT | \ + CONF_PERIOD | \ + CONF_TIMEOUT | \ + CONF_THRESHOLD) + + + +#define RMON_MIN_START_DELAY (1) +#define RMON_MAX_START_DELAY (120) + +#define RMON_MIN_AUDIT_PERIOD (10) /* Minimum audit period for resource if none specified */ +#define RMON_MAX_AUDIT_PERIOD (120) /* Maximum audit period for resource if none specified */ + +#define RMON_MIN_PM_PERIOD (60) /* Minimum pm period for resource if none specified */ +#define RMON_MAX_PM_PERIOD (600) /* Maximum pm period for resource if none specified */ + +#define RMON_MIN_NTP_AUDIT_PERIOD (10) /* Minimum audit period for resource if none specified */ +#define RMON_MAX_NTP_AUDIT_PERIOD (1200) /* Maximum audit period for resource if none specified */ + +/* Monitored Resource Config Bit Mask */ +#define CONF_RESOURCE (0x01) +#define CONF_STYLE (0x04) +#define CONF_SEVERITY (0x20) +#define CONF_INTERVAL (0x40) +#define CONF_DEBOUNCE (0x80) + +/* Usual buffer sizes */ +#define RATE_THROTTLE (6) +#define BUFFER_SIZE (128) +/* Monitored Resource stages for resource handler fsm */ +typedef enum +{ + RMON_STAGE__INIT, + RMON_STAGE__START, + RMON_STAGE__MANAGE, + RMON_STAGE__MONITOR_WAIT, + RMON_STAGE__MONITOR, + RMON_STAGE__RESTART_WAIT, + RMON_STAGE__IGNORE, + RMON_STAGE__FINISH, + RMON_STAGE__FAILED, + RMON_STAGE__FAILED_CLR, + RMON_STAGE__STAGES, +} rmonStage_enum ; + +typedef enum +{ + NTP_STAGE__BEGIN, + NTP_STAGE__EXECUTE_NTPQ, + NTP_STAGE__EXECUTE_NTPQ_WAIT, + NTP_STAGE__STAGES, +} ntpStage_enum ; + + /* The return values from the ntpq querie */ +typedef enum +{ + NTP_OK = 0, /* All NTP servers are reachable and one is selected */ + NTP_NOT_PROVISIONED = 1, /* No NTP servers are provisioned */ + NTP_NONE_REACHABLE = 2, /* None of the NTP servers are reachable */ + NTP_SOME_REACHABLE = 3, /* Some NTP servers are reachable and one selected */ + NTP_SOME_REACHABLE_NONE_SELECTED = 4 /* Some NTP servers are reachable but none is selected, will treat at as none reachable */ +} NTPQueryStatus; + +typedef enum +{ + RESOURCE_TYPE__UNKNOWN, + RESOURCE_TYPE__FILESYSTEM_USAGE, + RESOURCE_TYPE__MEMORY_USAGE, + RESOURCE_TYPE__CPU_USAGE, + RESOURCE_TYPE__DATABASE_USAGE, + RESOURCE_TYPE__NETWORK_USAGE, + RESOURCE_TYPE__PORT, + RESOURCE_TYPE__INTERFACE, + RESOURCE_TYPE__CONNECTIVITY, +} resType_enum ; + +/* Structure to store memory stats (KiB) */ +typedef struct +{ + unsigned long int MemTotal; + unsigned long int MemFree; + unsigned long int Buffers; + unsigned long int Cached; + unsigned long int SlabReclaimable; + unsigned long int CommitLimit; + unsigned long int Committed_AS; + unsigned long int HugePages_Total; + unsigned long int HugePages_Free; + unsigned long int FilePages; + unsigned long int Hugepagesize; + unsigned long int AnonPages; +} memoryinfo; + +#define RMON_API_MAX_LEN (100) +typedef struct +{ + int tx_sock ; /**< socket to monitored process */ + int tx_port ; /**< port to monitored process */ + struct sockaddr_in tx_addr ; /**< process socket attributes */ + char tx_buf[RMON_API_MAX_LEN]; /**< Server receive buffer */ + socklen_t len ; /**< Socket Length */ +} rmon_api_socket_type ; + +typedef struct +{ + + /* Config Items */ + unsigned int mask ; + resType_enum res_type ; /* specifies the generic resource type */ + const char * resource ; /* The name of the Resource being monitored */ + const char * severity ; /* MINOR, MAJOR or CRITICAL for each resource */ + unsigned int debounce ; /* Period to wait before clearing alarms */ + unsigned int minor_threshold; /* % Value for minor threshold crossing */ + unsigned int major_threshold; /* % Value for major threshold crossing */ + unsigned int critical_threshold; /* % Value for critical threshold crossing */ + unsigned int minor_threshold_abs_node0; /* Absolute value for minor threshold crossing processor node 0 */ + unsigned int major_threshold_abs_node0; /* Absolute value for major threshold crossing processor node 0 */ + unsigned int critical_threshold_abs_node0; /* Absolute value for critical threshold crossing processor node 0 */ + unsigned int minor_threshold_abs_node1; /* Absolute value for minor threshold crossing processor node 1 */ + unsigned int major_threshold_abs_node1; /* Absolute value for major threshold crossing processor node 1 */ + unsigned int critical_threshold_abs_node1; /* Absolute value for critical threshold crossing processor node 1 */ + unsigned int num_tries ; /* Number of times a resource has to be in + failed or cleared state before sending alarm */ + unsigned int alarm_status ; /* 1 or 0. If it is 0 threshold crossing alarms are not sent */ + unsigned int percent ; /* 1 or 0. If it is 1, the percentage is used, otherwise if 0, + the absolute value is used for thresholds crossing values */ + unsigned int alarm_type; /* standard, dynamic or static */ + + /* Dynamic Data */ + const char * type ; + const char * device ; + int i ; /* timer array index */ + unsigned int debounce_cnt ; /* running monitor debounce count */ + unsigned int minorlog_cnt ; /* track minor log count for thresholding */ + unsigned int count ; /* track the number of times the condition has been occured */ + bool failed ; /* track if the resource needs to be serviced by the resource handler */ + double resource_value ; /* Usage for the Linux blades: controller, compute and storage */ + double resource_prev ; /* the previous resource_value */ + int sev ; /* The severity of the failed resource */ + rmonStage_enum stage ; /* The stage the resource is in within the resource handler fsm */ + char alarm_id[FM_MAX_BUFFER_LENGTH] ; /* Used by FM API, type of alarm being raised */ + char errorMsg[ERR_SIZE]; + rmon_api_socket_type msg; + bool alarm_raised ; + int failed_send ; /* The number of times the rmon api failed to send a message */ + int mounted ; /* 1 or 0 depending on if the dynamic fs resource is mounted */ + int socket_id ; /* socket id corresponding to a physical processor */ + int response_error_log_throttle; /* log throttle counter for error in receiving response for resource info */ + int parse_error_log_throttle ; /* log throttle counter for failing to parse resource info */ + int key_error_log_throttle ; /* log throttle counter for failing to obtain resource info */ + int resource_monitor_throttle ; /* log throttle for the this resource being monitored */ +} resource_config_type ; + +typedef struct +{ + + /* Config Items */ + unsigned int mask ; + const char * resource ; /* The name of the Resource being monitored */ + const char * severity ; /* MINOR, MAJOR or CRITICAL for each resource */ + unsigned int debounce ; /* Period to wait before clearing alarms */ + unsigned int num_tries ; /* Number of times a resource has to be in + failed or cleared state before sending alarm */ + unsigned int alarm_status ; /* 1 or 0. If it is 0 threshold crossing alarms are not sent */ + + /* Dynamic Data */ + int i ; /* timer array index */ + char interface_one[20] ; /* primary interface */ + char interface_two[20] ; /* second interface if lagged */ + char bond[20] ; /* bonded interface name */ + bool lagged ; /* Lagged interface=true or not=false */ + unsigned int debounce_cnt ; /* running monitor debounce count */ + unsigned int minorlog_cnt ; /* track minor log count for thresholding */ + unsigned int count ; /* track the number of times the condition has been occured */ + bool failed ; /* track if the resource needs to be serviced by the resource handler */ + int resource_value ; /* 1 if the interface is up and 0 if it is down */ + int resource_value_lagged ; /* 1 if the interface is up and 0 if it is down for lagged interfaces */ + int sev ; /* The severity of the failed resource */ + rmonStage_enum stage ; /* The stage the resource is in within the resource handler fsm */ + char int_name[INTERFACE_NAME_LEN] ; /* Name of the tracked interface ex: eth1 */ + char alarm_id[FM_MAX_BUFFER_LENGTH] ; /* Used by FM API, type of alarm being raised */ + char alarm_id_port[FM_MAX_BUFFER_LENGTH] ; /* Used by FM API, type of alarm being raised for the ports */ + char errorMsg[ERR_SIZE]; + rmon_api_socket_type msg; + bool link_up_and_running; /* whether the interface is up or down initially */ + bool interface_used; /* true if the interface is configured */ + bool alarm_raised; + int failed_send; /* The number of times the rmon api failed to send a message */ + + +} interface_resource_config_type ; + +typedef struct +{ + + /* Config Items */ + const char * vg_name ; /* LVM Volume Group name */ + const char * thinpool_name ; /* LVM Thin Pool in VG to monitor */ + unsigned int critical_threshold ; /* critical alarm threshold percentage for metadata utilization, + 0 to disable monitoring*/ + unsigned int alarm_on ; /* 1 or 0. 1 to enable critical alarm, 0 to disable it */ + unsigned int autoextend_on ; /* 1 or 0. 1 to first try extending the metadata before + raising alarm, 0 for autoextend off */ + unsigned int autoextend_by ; /* autoextend by percentage or absolute value in MiB */ + unsigned int autoextend_percent ; /* use percent or MiB in autoexent_by */ + unsigned int audit_period ; /* frequency at which resources are polled, in seconds */ + + /* Dynamic Data */ + bool section_exists ; /* will be 1 if [THINMDA_CONFIG_SECTION] section is defined in + configuration file */ + double resource_value ; /* metadata usage percent */ + double resource_prev ; /* the previous value */ + bool alarm_raised ; /* track if alarm is raised to avoid re-raising */ + bool first_run ; /* to check for state consistency on first run */ + rmonStage_enum stage ; /* The stage the resource is in within the resource handler fsm */ + +} thinmeta_resource_config_type; + +/** Daemon Service messaging socket control structure **/ +typedef struct +{ + int rmon_tx_sock; /**< RMON API Tx Socket */ + int rmon_tx_port; /**< RMON API Tx Port */ + struct sockaddr_in rmon_tx_addr; /**< RMON API Tx Address */ + int rmon_rx_sock; /**< RMON API Rx Socket */ + int rmon_rx_port; /**< RMON API Rx Port */ + struct sockaddr_in rmon_rx_addr; /**< RMON API Rx Address */ + int netlink_sock; /**< Netlink event socket */ + int ioctl_sock; + msgSock_type mtclogd ; +} rmon_socket_type ; +rmon_socket_type * rmon_getSock_ptr ( void ); + +typedef struct +{ char resource[50]; + char registered_not[NOT_SIZE] ; /* The api notification the client has registerted for */ + char client_name[NOT_SIZE] ; /* The api notification the client has registerted for */ + + /** RMON API socket */ + /* ------------------------------------ */ + rmon_api_socket_type msg ; /**< Resource monitoring messaging interface */ + + /* RMON API Dynamic Data */ + /* ------------------------------ */ + bool resource_failed ; /**< resource monitoring failed signal */ + unsigned int tx_sequence ; /**< outgoing sequence number */ + unsigned int rx_sequence ; /**< incoming sequence number */ + bool waiting ; /**< waiting for response */ + int port ; + unsigned int msg_count ;/**< running pulse count */ + unsigned int b2b_miss_peak ; /**< max number of back to back misses */ + unsigned int b2b_miss_count ; /**< current back to back miss count */ + unsigned int afailed_count ; /**< total resouce mon'ing failed count */ + unsigned int recv_err_cnt ; /**< counts the receive errors */ + unsigned int send_err_cnt ; /**< counts the transmit errors */ + unsigned int send_msg_count ; /**< number of messages sent */ + unsigned int mesg_err_cnt ; /**< response message error count */ + unsigned int mesg_err_peak ; /**< response message error count */ + unsigned int adebounce_cnt ; /**< resource monitor debounce counter */ + bool resource_debounce ; /**< true = in resource mon'ing debounce */ + rmon_socket_type rx_sock ; /* rx socket for that client */ + +} registered_clients; + +void rmon_msg_init ( void ); +void rmon_msg_fini ( void ); +int setup_tx_port ( const char * iface , const char * mcast , int port ); +int rmon_send_event ( unsigned int event_cmd , const char * process_name_ptr ); + + +/* Note: Any addition to this struct requires explicit + * init in daemon_init. + * Cannot memset a struct contianing a string type. + **/ +typedef struct +{ + /* iface attributes ; hostname, ip, audit period and mac address */ + char my_hostname [MAX_HOST_NAME_SIZE+1]; + string my_macaddr ; + int audit_period ; /* Frequency at which resources are polled */ + int pm_period ; /* Frequency at which ceilometer PM's are created */ + int ntp_audit_period; /* Frequency at which we check if the NTP servers are still reachable */ + int ntpq_cmd_timeout; /* Max amount of time in seconds to wait for the ntpq command to complete */ + string my_address ; + int resources ; /**< Number of Monitored resources */ + int interface_resources ; /**< Number of monitored interface resources */ + int thinmeta_resources; /**< Number of monitored thinpool metadata resources */ + int per_node ; /* Memory checking per node enabled: 1 or disabled: 0 */ + int clients ; + int rmon_critical_thr ; + int fd; /* Used for inotify */ + int wd; /* Used for inotify */ + + unsigned int function ; + unsigned int subfunction ; + unsigned int nodetype ; + +} rmon_ctrl_type ; + +bool is_controller ( void ); + +/* Init tx message */ +void rmon_msg_init ( void ); + +/* Delete tx message */ +void rmon_msg_fini ( void ); + +/* Initizialize the settings from the rmond.conf file */ +int rmon_hdlr_init ( rmon_ctrl_type * ctrl_ptr ); + +/* Initialize the timers */ +void rmon_timer_init( void ); + +/* Service client register and deregister requests + * when rmon was not alive */ +void rmon_alive_notification (int & clients); + +/* Service inbox when rmon is born */ +int rmon_service_file_inbox ( int clients, char buf[RMON_MAX_LEN], bool add=true ); + +/* rmon_api functions */ +int rmon_service_inbox ( int clients ); + +/* Send set or clear alarm notification to registered clients */ +int rmon_send_request ( resource_config_type * ptr, int clients); + +/* send rmon interface resource set and clear alarm messages to registered client processes */ +int send_interface_msg ( interface_resource_config_type * ptr, int clients); + +/* Init rmon api tx and rx ports */ +int rmon_port_init ( int tx_port ); + +/* Main loop to poll and handle resource monitoring */ +void rmon_service (rmon_ctrl_type * ctrl_ptr); + +/* Update the number of registered clients */ +void update_total_clients (int total_clients); + +/* Add a registered client to the list of clients */ +void add_registered_client (registered_clients client); + +/* Read in the per resource specific thresholds */ +int rmon_resource_config ( void * user, + const char * section, + const char * name, + const char * value); + +/* Read in the per interface resource specific values */ +int rmon_interface_config ( void * user, + const char * section, + const char * name, + const char * value); + +/* Read in LVM Thinpool metadata resource specific values */ +int rmon_thinmeta_config ( void * user, + const char * section, + const char * name, + const char * value); + +/* Returns a registered client at a given index */ +registered_clients * get_registered_clients_ptr ( int index ); + +/* read the dynamic file systems file and send a response back */ +void process_dynamic_fs_file(); + +/* send the notification that the file has been read */ +int rmon_resource_response ( int clients ); + +/* Updates the interface data structure with the state (up or down) of the interface */ +void check_interface_status( interface_resource_config_type * ptr ); + +/* Check if the node is a compute node */ +bool check_compute(); + +/* Handle failed platform interfaces */ +void interface_handler( interface_resource_config_type * ptr ); + +/* Handle LVM thinpool metadata usage */ +int thinmeta_handler( thinmeta_resource_config_type * ptr ); + +/* Compute the thinpool metadata usage for a specific LVM thinpool */ +int calculate_metadata_usage(thinmeta_resource_config_type * ptr); + +/* Returns the reference to the rmon control pointer */ +rmon_ctrl_type * get_rmon_ctrl_ptr (); + +/* Initialize LVM Thin Pool Metadata monitoring */ +void thinmeta_init(thinmeta_resource_config_type * res, struct mtc_timer * timers, int count); + +/* Clears any previously raised interface alarms if rmon is restarted */ +void interface_alarming_init ( interface_resource_config_type * ptr ); + +/* Map an interface (mgmt, oam or infra) to a physical port */ +void init_physical_interfaces ( interface_resource_config_type * ptr ); + +/* returns true if the link is up for the specified interface */ +int get_link_state ( int ioctl_socket, char iface[20], bool * running_ptr ); + +/* Service state changes for monitored interfaces */ +int service_interface_events ( int nl_socket , int ioctl_socket ); + +/* Set the interface resource in the correct state for the interface resource handler */ +void service_resource_state ( interface_resource_config_type * ptr ); + +/* Get the interface resource by index */ +interface_resource_config_type * get_interface_ptr ( int index ); + +/* Get the resource by index */ +resource_config_type * get_resource_ptr ( int index ); + +/* Resource monitor handler cleanup */ +void rmon_hdlr_fini ( rmon_ctrl_type * ctrl_ptr ); + +void build_entity_instance_id ( resource_config_type *ptr, char *entity_instance_id); + +/* Resource monitor FM interface */ +void rmon_fm_init ( void ); +void rmon_fm_handler ( void ); +EFmErrorT rmon_fm_set ( const SFmAlarmDataT *alarm, fm_uuid_t *fm_uuid ); +EFmErrorT rmon_fm_clear ( AlarmFilter *alarmFilter ); +EFmErrorT rmon_fm_get ( AlarmFilter *alarmFilter, SFmAlarmDataT **alarm, unsigned int *num_alarm ); + +/* Save dynamic memory resource (both system memory and AVS memory) */ +int save_dynamic_mem_resource ( string resource_name, string criticality, + double r_value, int percent, int abs_values[3], + const char * alarm_id, int socket_id /*=0*/ ); + +/* Resource failure processing for percentage based thresholds */ +void process_failures ( resource_config_type * ptr ); +/* Resource failure processing for absolute based thresholds */ +void process_failures_absolute ( resource_config_type * ptr ); + + +// convert Severity level into literal defination +static inline string FmAlarmSeverity_to_string(EFmAlarmSeverityT severity) +{ + switch (severity) { + case FM_ALARM_SEVERITY_CLEAR: + return "clear"; + case FM_ALARM_SEVERITY_WARNING: + return "warning"; + case FM_ALARM_SEVERITY_MINOR: + return "minor"; + case FM_ALARM_SEVERITY_MAJOR: + return "major"; + case FM_ALARM_SEVERITY_CRITICAL: + return "critical"; + default: + return NULL; + } +} + +/**************************************************************************** + * + * Name : log_value + * + * Purpose : Log resource state values while avoiding log flooding for + * trivial fluxuations. + * + * Description: Recommends whether the current resource state value should + * be logged based on current, previous and step values. + * + * Caller should not generate such log if a false is returned. + * + * A true is returned if the currrent and previous resource values differ + * by +/- step amount. + * + * The caller specifies the step that can be overridden by a smaller value + * in rmond.conf:log_step value. + * + * If step is zero then a true is always returned in support of a debug mode + * where we get the current reading as a log on every audit. + * + * The callers previous value is updated to current whenever true is returned. + * + ****************************************************************************/ + +/* a default step value ; change of + or - 5 triggers log */ +#define DEFAULT_LOG_VALUE_STEP (5) + +bool log_value ( double & current, double & previous, int step ); + + +#endif /* __INCLUDE_RMON_HH__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi.h b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi.h new file mode 100644 index 00000000..70225e14 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi.h @@ -0,0 +1,50 @@ +#ifndef __INCLUDE_RMONAPI_H__ +#define __INCLUDE_RMONAPI_H__ +/* + * Copyright (c) 2013, 2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +#include +#include + +#include "rmonHttp.h" + + /** + * @file + * Wind River CGTS Platform + * + * rmon API Header + + */ + +#define RMON_PUT_VSWITCH_OPER_LABEL "v1" +#define RMON_PUT_VSWITCH "/engine/stats" + + + +/* Poll request is a GET operation that looks like this ... + * + * http://localhost:9000/v1/engine/stats + * The following defines are used to help construct that request + * + */ + + +/** Initializes the module */ +int rmonApi_init ( string ip, int port ); + +/** Frees the module's dynamically allocated resources */ +void rmonApi_fini ( void ); + + +/**remote logging service request handlers */ + +void rmonHdlr_remotelogging_handler ( struct evhttp_request *req, void *arg ); +int rmonHdlr_remotelogging_query (resource_config_type * ptr); + +/**ceilometer sample create request handlers */ +void rmonHdlr_ceilometer_handler ( struct evhttp_request *req, void *arg ); +#endif diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/Makefile b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/Makefile new file mode 100644 index 00000000..08ee7b38 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/Makefile @@ -0,0 +1,51 @@ +# +# Copyright (c) 2014-2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = rmon_api.c +OBJS = $(SRCS:.c=.o) +INCLUDES = -I. +LDFLAGS = -shared +CFLAGS = -fPIC -g -O2 -Wall -Wextra -Werror +TARGET_LIB = librmonapi.so +lib: build +ifeq (,$(shell which ${CC})) +CC=gcc +endif + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +.c.o: + $(CC) $(INCLUDES) $(CFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +build: static_analysis ${TARGET_LIB} + +${TARGET_LIB}: ${TARGET_LIB}.${VER_MJR} + ln -sf $^ $@ + +${TARGET_LIB}.${VER_MJR}: ${TARGET_LIB}.${VER} + ln -sf $^ $@ + +${TARGET_LIB}.${VER}: $(OBJS) + $(CC) ${LDFLAGS} -Wl,-soname,${TARGET_LIB}.${VER_MJR} -o $@ $^ + +$(SRCS:.c=.d):%.d:%.c + $(CC) $(CFLAGS) -MM $< >$@ + +include $(SRCS:.c=.d) + +clean: + rm -f *.o + rm -f *.so + rm -f *.so.* + rm -f *.d diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_api.c b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_api.c new file mode 100644 index 00000000..95d01989 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_api.c @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2013, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Resource Monitor Client Notification API Library + * See rmon_api.h for API header. + * + **/ + +#include "rmon_api.h" +#include + +/* Pass code */ +#ifndef PASS +#define PASS (0) +#endif + +/* Fail Code */ +#ifndef FAIL +#define FAIL (1) +#endif + +/* Retry Code */ +#ifndef RETRY +#define RETRY (2) +#endif + +/* maximum string and socket endpoint path length */ +#define RMON_MAX_LEN (100) + +/* initialization signature to gate functional + * api calls made prior to initialization */ +#define INIT_SIG (0xffffdead) + +/* rmon default messaging port */ +#define RMONTXPORT 2300 + +/** Control Structure */ +typedef struct +{ + unsigned int init ; /**< Init signature */ + + int client_rx_sock ; /**< inet pulse request rx socket */ + int client_rx_port ; /**< inet pulse request rx port number */ + struct sockaddr_in client_rx_addr ; /**< inet pulse request rx attributes */ + char client_rx_buf[RMON_MAX_LEN] ; + + int rmon_tx_sock ; /**< inet pulse response tx socket */ + int rmon_tx_port ; /**< inet pulse response tx port number */ + struct sockaddr_in rmon_tx_addr ; /**< inet pulse response tx attributes */ + char rmon_tx_buf[RMON_MAX_LEN] ; + + int rmon_rx_sock ; /**< inet pulse response rx socket */ + int rmon_rx_port ; /**< inet pulse response rx port number */ + struct sockaddr_in rmon_rx_addr ; /**< inet pulse response rx attributes */ + char rmon_rx_buf[RMON_MAX_LEN] ; + + char name[RMON_MAX_LEN] ; /**< name of process using this instance */ + + bool debug_mode ; /**< debug mode if true */ + int fit_code ; /**< fit code MAGIC, SEQ, PROCESS */ +} resource_mon_socket_type ; + +/* Instance Control Structure - Per Process Private Data */ +static resource_mon_socket_type rmon ; +/* Mutex For sending client process information to rmon */ +pthread_mutex_t client_mutex; + +int remove_rmon_client( const char * process_name_ptr, int socket ); + +int add_rmon_client ( const char * process_name_ptr, int port , const char * registration, int rx_port); + +int resource_monitor_initialize ( const char * process_name_ptr, int port , const char * registration); + +int resource_monitor_deregister( const char * process_name_ptr, int socket ); + +int resource_monitor_get_sel_obj ( void ); + +int remove_rmon_client( const char * process_name_ptr, int socket ); + +void resource_monitor_finalize (); + +int create_tx_socket(); + +/* Create and Setup Inet Transmit Socket + * return PASS (0) on success + * -# on kernel call error + * non-zero on internal error + * + **/ +int create_tx_socket( int rx_port ) +{ + int val = 1 ; + int ok = 1 ; + + rmon.rmon_tx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if ( 0 >= rmon.rmon_tx_sock ) + { + syslog ( LOG_ERR, "create_tx_socket failed to create 'tx' socket (%d:%m)", errno ); + return (-errno); + } + + if ( setsockopt ( rmon.rmon_tx_sock , SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)) == -1 ) + { + syslog ( LOG_WARNING, "create_tx_socket failed to set 'tx' socket as reusable (%d:%m)", errno ); + } + + /* Setup with localhost ip */ + memset(&rmon.rmon_tx_addr, 0, sizeof(struct sockaddr_in)); + rmon.rmon_tx_addr.sin_family = AF_INET ; + rmon.rmon_tx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + rmon.rmon_tx_addr.sin_port = htons(RMONTXPORT) ; + rmon.rmon_tx_port = RMONTXPORT ; + + /* Set socket to be non-blocking. */ + int rc = ioctl(rmon.rmon_tx_sock, FIONBIO, (char *)&ok); + if ( 0 > rc ) + { + syslog ( LOG_WARNING, "create_tx_socket failed to set 'tx' socket as non-blocking (%d:%m)\n", errno ); + } + + /* if the sock is already open then close it first */ + if ( rmon.rmon_rx_sock ) + { + close (rmon.rmon_rx_sock); + } + + rmon.rmon_rx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if ( 0 >= rmon.rmon_rx_sock ) + { + syslog ( LOG_WARNING, "create_rx_socket failed (%d:%m)\n", errno ); + return (-errno); + } + if ( setsockopt ( rmon.rmon_rx_sock , SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)) == -1 ) + { + syslog ( LOG_WARNING, "create_tx_socket failed to set 'rx' socket as reusable (%d:%m)", errno ); + } + + /* Setup with localhost ip */ + memset(&rmon.rmon_rx_addr, 0, sizeof(struct sockaddr_in)); + rmon.rmon_rx_addr.sin_family = AF_INET ; + rmon.rmon_rx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + rmon.rmon_rx_addr.sin_port = htons(rx_port) ; + rmon.rmon_rx_port = rx_port ; + + /* Set socket to be non-blocking. */ + rc = ioctl(rmon.rmon_rx_sock, FIONBIO, (char *)&ok); + if ( 0 > rc ) + { + syslog ( LOG_ERR, "create_tx_socket failed to set 'rx' socket as non-blocking (%d:%m)\n", errno ); + return -errno; + } + /* bind socket to the receive addr */ + if ( bind ( rmon.rmon_rx_sock, (const struct sockaddr *)&rmon.rmon_rx_addr, sizeof(struct sockaddr_in)) == -1 ) + { + syslog ( LOG_ERR, "failed to bind rmon 'rx' socket with port %d (%d:%m)\n", rx_port, errno ); + close (rmon.rmon_rx_sock); + rmon.rmon_rx_sock = 0 ; + return -errno; + } + return PASS; +} + +/* open lo socket */ +int add_rmon_client ( const char * process_name_ptr, int port , const char * registration, int rx_port) +{ + struct stat p ; + int val = 1 ; + memset ( &rmon, 0, sizeof(rmon)); + memset ( &p, 0 , sizeof(struct stat)); + + if ( registration == NULL ) + { + syslog ( LOG_INFO, "resource_monitor_initialize called with null registration info"); + return (0); + } + + syslog ( LOG_INFO , "Add Client '%s' to rmon (port:%d)\n", registration, port ); + + sprintf ( rmon.name, "/var/run/%s.rmon", process_name_ptr ); + + stat ( rmon.name, &p ) ; + if ((p.st_ino != 0 ) && (p.st_dev != 0)) + { + rmon.debug_mode = true ; + syslog ( LOG_INFO, "Enabling resource Monitor Debug Mode\n"); + if ( p.st_size ) + { + FILE * filename = fopen ( rmon.name, "rb" ) ; + if ( filename != NULL ) + { + memset ( &rmon.name, 0, RMON_MAX_LEN); + if ( fgets ( rmon.name, 20, filename ) != NULL ) + { + if ( !strncmp ( rmon.name, FIT_MAGIC_STRING, strlen (FIT_MAGIC_STRING))) + { + rmon.fit_code = FIT_MAGIC ; + syslog ( LOG_INFO, "Enabling FIT on 'magic calculation'\n"); + } + else if ( !strncmp ( rmon.name, FIT_SEQUENCE_STRING, strlen(FIT_SEQUENCE_STRING))) + { + rmon.fit_code = FIT_SEQ ; + syslog ( LOG_INFO, "Enabling FIT on 'sequence number'\n"); + } + else if ( !strncmp ( rmon.name, FIT_PROCESS_STRING, strlen(FIT_PROCESS_STRING))) + { + rmon.fit_code = FIT_PROCESS ; + syslog ( LOG_INFO, "Enabling FIT on 'process name'\n"); + } + else + { + syslog ( LOG_INFO, "Unsupported FIT string (%s)\n", rmon.name ); + } + } + fclose (filename); + } + else + { + syslog ( LOG_INFO, "Failed to open %s\n", rmon.name); + } + } + } + /* Init the control struct - includes all members */ + memset ( rmon.name, 0, RMON_MAX_LEN); + + if ( process_name_ptr ) + { + memcpy ( rmon.name, process_name_ptr, strlen (process_name_ptr)) ; + } + else + { + syslog ( LOG_INFO, "resource_monitor_initialize called with null process name"); + return (0); + } + + /*******************************************************/ + /* Create and Setup Inet Receive Socket */ + /*******************************************************/ + rmon.client_rx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if ( 0 >= rmon.client_rx_sock ) + { + syslog ( LOG_INFO, "add_rmon_client error:1"); + return (0); + } + + if ( setsockopt ( rmon.client_rx_sock , SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)) == -1 ) + { + syslog ( LOG_INFO, "%s failed to set socket as re-useable (%d:%s)\n", + process_name_ptr, errno, strerror(errno)); + } + + /* Setup with localhost ip */ + memset(&rmon.client_rx_addr, 0, sizeof(struct sockaddr_in)); + rmon.client_rx_addr.sin_family = AF_INET ; + rmon.client_rx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + rmon.client_rx_addr.sin_port = htons(port) ; + rmon.client_rx_port = port ; + + /* bind socket to the receive addr */ + if ( bind ( rmon.client_rx_sock, (const struct sockaddr *)&rmon.client_rx_addr, sizeof(struct sockaddr_in)) == -1 ) + { + syslog ( LOG_ERR, "failed to bind to rx socket with port %d\n", port ); + close (rmon.client_rx_sock); + rmon.client_rx_sock = 0 ; + return (0); + + } + + int rc = create_tx_socket ( rx_port ); + + if (rc != PASS ) + { + syslog ( LOG_ERR, "add_rmon_client failed to create_tx_socket (rc:%d)", rc ); + return (0); + } + if ((registration != NULL) && (rc == PASS)) + { + int bytes = 0; +#ifdef WANT_CLIENT_REGISTER_SOCKET_SEND + socklen_t len = sizeof(struct sockaddr_in) ; + + /* client registering, send rmon the resources registered for */ + memset(rmon.rmon_tx_buf, 0, sizeof(rmon.rmon_tx_buf)); + snprintf(rmon.rmon_tx_buf, sizeof(rmon.rmon_tx_buf), "%s %s %d", process_name_ptr, registration, port); + bytes = sendto ( rmon.rmon_tx_sock, &rmon.rmon_tx_buf, strlen(rmon.rmon_tx_buf), 0, + (struct sockaddr *) &rmon.rmon_tx_addr, sizeof(struct sockaddr_in)); + fd_set readfds; + struct timeval waitd; + bytes = 0; + + FD_ZERO(&readfds); + FD_SET(rmon.rmon_rx_sock, &readfds); + + waitd.tv_sec = WAIT_DELAY; + waitd.tv_usec = 0; + /* This is used as a delay up to select_timeout */ + select(FD_SETSIZE, &readfds, NULL, NULL, &waitd); + + if (FD_ISSET(rmon.rmon_rx_sock, &readfds)) + { + /* wait for the response from rmon to verify that the client is registered */ + memset(rmon.rmon_rx_buf, 0, sizeof(rmon.rmon_rx_buf)); + rmon.rmon_rx_buf[0] = 0; + bytes = recvfrom( rmon.rmon_rx_sock, rmon.rmon_rx_buf, RMON_MAX_LEN, 0, (struct sockaddr *)&rmon.rmon_rx_addr, &len ); + } +#endif + if (bytes <= 0) { + /* no respone, write the client name and notification to a file for later use */ + FILE * pFile; + memset(rmon.rmon_rx_buf, 0, sizeof(rmon.rmon_rx_buf)); + snprintf(rmon.rmon_rx_buf, sizeof(rmon.rmon_rx_buf), "%s %s %d", process_name_ptr, registration, port); + pFile = fopen (RMON_API_REG_DIR , "a+"); + if ( pFile ) + { + // take out a writer lock on this file to + // ensure that no other entity is writing to it + // at this time + int lock = flock(fileno(pFile), LOCK_EX); + if (lock < 0) { + syslog (LOG_ERR, "Failed to get exclusive lock on" + " '%s' (errno: %d)", RMON_API_REG_DIR, errno); + } else { + fprintf(pFile, "%s\n", rmon.rmon_rx_buf); + // release write lock + flock(fileno(pFile), LOCK_UN); + fclose(pFile); + } + } + else + { + syslog ( LOG_ERR, "Failed to open '%s'\n", RMON_API_REG_DIR ); + } + } + else + { + syslog ( LOG_ERR, "add_rmon_client send message succeeded"); + } + + /* Set init sig */ + rmon.init = INIT_SIG ; + + /* Return the socket descriptor */ + return (rmon.client_rx_sock); + } + else + { + syslog ( LOG_ERR, "Failed register due to previous failure\n"); + } + return (0); +} + +int rmon_notification ( const char * notification_name ) +{ + int port = RMONTXPORT; + int rc; + + /* send the message to check the dynamic file */ + memset(rmon.rmon_tx_buf, 0, sizeof(rmon.rmon_tx_buf)); + snprintf(rmon.rmon_tx_buf, sizeof(rmon.rmon_tx_buf), "%s %s %d", notification_name, RESOURCE_NOT, port); + rc = sendto ( rmon.rmon_tx_sock, &rmon.rmon_tx_buf, strlen(rmon.rmon_tx_buf), 0, + (struct sockaddr *) &rmon.rmon_tx_addr, sizeof(struct sockaddr_in)); + return rc; +} + +int resource_monitor_initialize ( const char * process_name_ptr, int port , const char * registration) +{ + /* use a mutex to prevent multiple clients from registering at once */ + int clt_rx_sock; + int rx_port = port - 1; + + pthread_mutex_lock(&client_mutex); + clt_rx_sock = add_rmon_client(process_name_ptr, port , registration, rx_port ); + pthread_mutex_unlock(&client_mutex); + + return clt_rx_sock; + +} + +int resource_monitor_deregister( const char * process_name_ptr, int socket ) +{ + /* use a mutex to prevent multiple clients from de-registering at once */ + int rc; + pthread_mutex_lock(&client_mutex); + rc = remove_rmon_client(process_name_ptr, socket); + pthread_mutex_unlock(&client_mutex); + + return rc; +} + + +/* */ +int resource_monitor_get_sel_obj ( void ) +{ + if (( rmon.init != INIT_SIG ) || ( rmon.client_rx_sock <= 0 )) + { + syslog (LOG_WARNING , "'%s' called with invalid init (sock:%d)\n", + __FUNCTION__, rmon.client_rx_sock); + } + + return (rmon.client_rx_sock); +} + +int remove_rmon_client( const char * process_name_ptr, int socket ) +{ + int rc; + int port = RMONTXPORT; + int bytes; + socklen_t len = sizeof(struct sockaddr_in); + + /* client deregistering, send rmon the client process name */ + memset(rmon.rmon_tx_buf, 0, sizeof(rmon.rmon_tx_buf)); + snprintf(rmon.rmon_tx_buf, sizeof(rmon.rmon_tx_buf), "%s %s %d", process_name_ptr, CLR_CLIENT, port); + rc = sendto ( rmon.rmon_tx_sock, &rmon.rmon_tx_buf, strlen(rmon.rmon_tx_buf), 0, + (struct sockaddr *) &rmon.rmon_tx_addr, sizeof(struct sockaddr_in)); + sleep(WAIT_DELAY); + /* wait for the response from rmon to verify that the client is de-registered */ + memset(rmon.rmon_rx_buf, 0, sizeof(rmon.rmon_rx_buf)); + rmon.rmon_rx_buf[0] = 0; + bytes = recvfrom( rmon.rmon_rx_sock, rmon.rmon_rx_buf, RMON_MAX_LEN, 0, (struct sockaddr *)&rmon.rmon_rx_addr, &len); + + if ((bytes <= 0) || (rmon.rmon_rx_buf[0] == 0)) { + + FILE * pFile; + memset(rmon.rmon_rx_buf, 0, sizeof(rmon.rmon_rx_buf)); + snprintf(rmon.rmon_tx_buf, sizeof(rmon.rmon_tx_buf), "%s %s %d", + process_name_ptr, CLR_CLIENT, port); + pFile = fopen (RMON_API_DEREG_DIR , "a+"); + if (pFile) { + // take out a writer lock on this file to + // ensure that no other entity is writing to it + // at this time + int lock = flock(fileno(pFile), LOCK_EX); + if (lock < 0) { + syslog (LOG_ERR, "Failed to get exclusive lock on" + " '%s' (errno: %d)", RMON_API_DEREG_DIR, errno); + } else{ + fprintf(pFile, "%s\n", rmon.rmon_rx_buf); + // release the lock + flock(fileno(pFile), LOCK_UN); + fclose(pFile); + } + } else { + syslog (LOG_ERR, "Failed to open '%s'\n", + RMON_API_DEREG_DIR ); + } + } + + if ( socket ) + { + /* close the client receive port */ + close (socket); + } + rc = PASS ; + + return rc; +} + +/* close the rmon ports */ +void resource_monitor_finalize () +{ + if ( rmon.rmon_tx_sock ) + { + close (rmon.rmon_tx_sock); + } + if ( rmon.rmon_rx_sock ) + { + close (rmon.rmon_rx_sock); + } + +} diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_api.h b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_api.h new file mode 100644 index 00000000..eea51d28 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_api.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2014, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Resource Monitor Client Notification API Library Header + */ + +#include /* for ... snprintf */ +#include /* for ... unlink, close and usleep */ +#include /* for ... socket */ +#include /* for ... domain socket type */ +#include /* for ... inet socket type */ +#include /* for ... inet_addr, inet_ntoa macro */ +#include /* for ... syslog */ +#include /* for ... EINTR, errno, strerror */ +#include /* for ... true and false */ +#include /* for ... file stat */ +#include /* for ... mutual exclusion */ +#include +#include +/** + + * This is a convenience module in support of resource monitoring notificiations to + * client processes + * + * Packaged as a shared library that processes can link to. + * + * This module provides four simple interfaces to that provide the following general functions + * + * - open an abstract socket interface for resource monitoring messaging + * - return the socket file descriptor for event driven selection + * - service events on socket + * - close the socket when done + * + * *Interfaces including work flow are* + * + * Init: + * + * resource_monitor_initialize ( "testClient" , 2302, CPU_USAGE ); + * + * Setup event driven handling: + * + * int resource_monitor_socket = resource_monitor_get_sel_obj(); + * FD_SET( resource_monitor_socket, &readfds); + * + * Main loop: + * + * if ( FD_ISSET(resource_monitor_socket, &readfds)) + * resource_monitor_dispatch (); + * + * Exit: + * + * resource_monitor_deregister("testClient", 2302); + * + */ + +/** Initialize the library and open the messaging socket(s). + * + * Creates socket and binds to named endpoint. + * + * Prints status or errors to syslog. + * + * @param process_name_ptr - char pointer to string containing monitored process name + * @param port - integer specifying the port number this process is listening on + * + * @returns The socket file descriptor on success or negative version of + * standard Linux error numbers (errno) codes from socket(2) or bind(2) + * + **/ + + +/* Notification resource types */ +#define CPU_USAGE ((const char *)"cpuUsage") +#define MEMORY_USAGE ((const char *)"memoryUsage") +#define FS_USAGE ((const char *)"fsUsage") +#define ALL_USAGE ((const char *)"allUsage") +#define CLR_CLIENT ((const char *)"clearClient") +#define RESOURCE_NOT ((const char *)"resourceNotification") +#define NOT_SIZE (100) +#define ERR_SIZE (100) +#define MAX_ERR_CNT (5) +/** Supplies the messaging socket file descriptor. + * + * @returns The created socket file descriptor for event driven select + * or zero if initialize was not called of there was error creating + * the socket. A notification message is sent to rmon to tell it that a new client + * is registering for a notification of type resource. From then on, rmon will send + * alarm set and clear messages for that resource to the process until it deregisters. + **/ +int resource_monitor_initialize ( const char * process_name_ptr, int port, const char * resource ); + + +int rmon_notification ( const char * notification_name ); + +/* returns the client socket fd */ +int resource_monitor_get_sel_obj ( void ); + +/** Close the rmon tx socket */ +void resource_monitor_finalize ( void ); + +/** Debug mode is enabled if the following file is found during initialize + * + * /var/run/.debug + * + * Failt Insertion Mode is enabled if the first word of line one + * of this file contains one of the following words + * + * sequence - corrupt the sequence number returned + * magic - corrupt the magic number returned + * process - corrupt the process name returned + * + */ + + +/* Deregister a client process from rmon notifications */ +int resource_monitor_deregister( const char * process_name_ptr, int socket ); + +/** FAult Insertion Mode Strings */ +#define FIT_MAGIC_STRING "magic" +#define FIT_SEQUENCE_STRING "sequence" +#define FIT_PROCESS_STRING "process" + +/** Fault Insertion Codes */ +#define FIT_NONE 0 +#define FIT_MAGIC 1 +#define FIT_SEQ 2 +#define FIT_PROCESS 3 +#define WAIT_DELAY (3) +#define PASS (0) +#define FAIL (1) + +/* location of file for registering clients */ +#define RMON_API_REG_DIR ((const char *)"/etc/rmonapi.d/register.txt") +/* location of file for deregistering clients */ +#define RMON_API_DEREG_DIR ((const char *)"/etc/rmonapi.d/deregister.txt") +/* location of file for the current registered clients */ +#define RMON_API_ACTIVE_DIR ((const char *)"/etc/rmonapi.d/active.txt") diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_nodeMacro.h b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_nodeMacro.h new file mode 100644 index 00000000..421c717a --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/rmon_nodeMacro.h @@ -0,0 +1,76 @@ +/* +* Copyright (c) 2013-2014 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* +*/ + + +#define CREATE_REUSABLE_INET_UDP_TX_SOCKET(ip, port, s, a, p, l, n, rc) \ +{ \ + int on = 1 ; \ + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); \ + if ( 0 >= s ) \ + { \ + printf ("failed to create '%s' socket (%d:%s)\n", n, errno, strerror(errno)); \ + rc = FAIL_SOCKET_CREATE ; \ + } \ + else if ( setsockopt ( s , SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) \ + { \ + printf ("failed to make '%s' socket re-useable (%d:%s)\n", n, errno, strerror(errno)); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_OPTION ; \ + } \ + else \ + { \ + memset(&a, 0, sizeof(struct sockaddr_in)); \ + l = sizeof(a); \ + p = port ; \ + a.sin_family = AF_INET ; \ + a.sin_addr.s_addr = inet_addr(ip); \ + a.sin_port = htons(p) ; \ + } \ +} + +#define CREATE_NONBLOCK_INET_UDP_RX_SOCKET(ip, port, s, a, p, l, n, rc) \ +{ \ + int on = 1 ; \ + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); \ + if ( 0 >= s ) \ + { \ + printf ("failed to create '%s' socket (%d:%s)\n", n, errno, strerror(errno)); \ + rc = FAIL_SOCKET_CREATE ; \ + } \ + else if ( setsockopt ( s , SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) \ + { \ + printf ("failed to make '%s' socket re-useable (%d:%s)\n", n, errno, strerror(errno)); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_OPTION ; \ + } \ + else if ( 0 > ioctl(s, FIONBIO, (char *)&on)) \ + { \ + printf ("failed to set '%s' socket non-blocking (%d:%s)\n", n, errno, strerror(errno)); \ + close(s); \ + s = 0 ; \ + rc = FAIL_SOCKET_NOBLOCK ; \ + } \ + else \ + { \ + memset(&a, 0, sizeof(struct sockaddr_in)); \ + l = sizeof(a); \ + p = port ; \ + a.sin_family = AF_INET ; \ + a.sin_addr.s_addr = inet_addr(ip); \ + a.sin_port = htons(p) ; \ + if ( bind ( s, (const struct sockaddr *)&a, sizeof(struct sockaddr_in)) == -1 ) \ + { \ + printf ( "failed to bind '%s' socket with port %d\n", n, p ); \ + close (s); \ + s = 0 ; \ + rc = -errno; \ + } \ + printf ("Listening on '%s' socket %s port %d\n", n, inet_ntoa(a.sin_addr), p); \ + } \ +} diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/Makefile b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/Makefile new file mode 100644 index 00000000..66091098 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/Makefile @@ -0,0 +1,22 @@ +# +# Copyright (c) 2014 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = rmon_api_client_test.cpp +OBJS = $(SRCS:.cpp=.o) +LDLIBS = -lstdc++ -lrmonapi +INCLUDES = -I../ -I. +CCFLAGS = -g -O2 -Wall -Wextra -Werror + +all: build + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +build: $(OBJS) + $(CXX) $(CCFLAGS) $(OBJS) -L../ -L/usr/lib64/ $(LDLIBS) -o rmond_api_test + +clean: + @rm -f $(OBJ) rmond_api_test *.o *.a diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/rmon_api_client_test.cpp b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/rmon_api_client_test.cpp new file mode 100644 index 00000000..fb7400e0 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/rmon_api_client_test.cpp @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2014-2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Resource Monitor API Test Client + */ +/* + *This simulates a test client process to test out the rmon client notification + *api. To run: ./rmond_api_test + *If left blank it runs with the default port: 2302 and default process name. When testing + *with more than one client test process, these values must be entered. For help: + *./rmond_api_test --help + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "../rmon_nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ +#include "rmon_api_client_test.h" + +extern "C" +{ + #include "../rmon_api.h" +} +#define MAX_HOST_NAME_SIZE (100) +#define FAIL_SOCKET_INIT -1 +#define FAIL_SOCKET_CREATE -2 +#define PASS 0 +#define FAIL 1 +#define LOOPBACK_IP "127.0.0.1" +#define RX_PORT 2302 +static char my_hostname [MAX_HOST_NAME_SIZE+1]; + + +/** + * Messaging Socket Control Struct - The allocated struct + * @see rmon_api_client_test.h for rmon_socket_type struct format. + */ +static rmon_socket_type rmon_sock ; +static rmon_socket_type * sock_ptr ; + +/** Client Config mask */ +#define CONFIG_CLIENT_MASK (CONFIG_AGENT_PORT |\ + CONFIG_CLIENT_API_PORT |\ + CONFIG_CLIENT_PORT) + + + + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Initialize the unicast api response message */ +/* One time thing ; tx same message all the time. */ +int rmon_message_init ( void ) +{ + /* Build the transmit api response message */ + memset ( &sock_ptr->tx_message, 0, sizeof (rmon_message_type)); + memcpy ( &sock_ptr->tx_message.m[RMON_HEADER_SIZE], my_hostname, strlen(my_hostname)); + return (PASS); +} + +int rmon_socket_init ( int port, const char * process_name ) +{ + + int on = 1 ; + int rc = PASS ; + + + /***********************************************************/ + /* Setup the RMON API Message Receive Socket */ + /***********************************************************/ + + CREATE_NONBLOCK_INET_UDP_RX_SOCKET ( LOOPBACK_IP, + port, + rmon_sock.rmon_api_sock, + rmon_sock.rmon_api_addr, + rmon_sock.rmon_api_port, + rmon_sock.rmon_api_len, + "rmon api socket receive", + rc ); + if ( rc ) return (rc) ; + + /* Open the active monitoring socket */ + rmon_sock.rmon_socket = resource_monitor_initialize ( process_name, port, ALL_USAGE ); + printf("Resource Monitor API Socket %d\n", rmon_sock.rmon_socket ); + if ( 0 > rmon_sock.rmon_socket ) + rmon_sock.rmon_socket = 0 ; + + /* Make the resource monitor api socket non-blocking */ + rc = ioctl(rmon_sock.rmon_socket, FIONBIO, (char *)&on); + if ( 0 > rc ) + { + printf("Failed to set rmon socket non-blocking (%d:%m)\n", errno ); + return (FAIL_SOCKET_NOBLOCK); + } + + + return (PASS); +} + +int daemon_init (int port, const char * process_name ) +{ + int rc = PASS ; + + /* Initialize socket construct and pointer to it */ + memset ( &rmon_sock, 0, sizeof(rmon_sock)); + sock_ptr = &rmon_sock ; + + /* Setup the resmon api rx messaging sockets */ + if ( (rc = rmon_socket_init (port, process_name)) != PASS ) + { + printf ("socket initialization failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT; + } + return (rc); +} + +#define RMON_MAX_LEN (100) +int client_service_inbox ( const char * process_name) +{ + #define MAX_T 100 + int bytes = 0 ; + char buf[RMON_MAX_LEN] ; + socklen_t len = sizeof(struct sockaddr_in) ; + char str[RMON_MAX_LEN]; + int sequence = 0; + int rc = FAIL; + + do + { + memset ( buf,0,RMON_MAX_LEN); + memset ( str,0,RMON_MAX_LEN); + + bytes = recvfrom( rmon_sock.rmon_socket, buf, RMON_MAX_LEN, 0, (struct sockaddr *)&rmon_sock.client_sockAddr, &len); + if ( bytes > 0 ) + { + + sscanf ( buf, "%s %d", str, &sequence ); + if ( str[0] != '\0' ) + { + printf("%s \n",str); + + if (strstr(str, "cleared_alarms_for_resource:") != NULL) { + /* Sleep for 10 secs */ + sleep (10); + rc = resource_monitor_deregister( process_name, rmon_sock.rmon_socket ); + if ( rc == PASS ) { + printf("deregistered test client\n"); + break; + } + } + } + else + { + printf("Null string !\n"); + } + + } + else if (( 0 > bytes ) && ( errno != EINTR ) && ( errno != EAGAIN )) + { + printf("problem with test client recv \n"); + } + } while ( bytes > 0 ) ; + + return rc; +} + +#define MAX_LEN 300 +int main ( int argc, char *argv[] ) +{ + int rc = 0 ; + int port = RX_PORT; + const char * process_name = PROCESS_NAME; + + if ((argc > 1) && (strcmp(argv[1],"--help") == 0)) { + printf("usage: ./rmond_api_test \n"); + return 0; + } + else if (argc > 1) { + port = atoi(argv[1]); + } + if (argc > 2) { + process_name = argv[2]; + } + + + daemon_init(port, process_name); + rc = rmon_message_init(); + if (rc == PASS) { + + printf("socket initialized \n"); + } + + rmon_sock.rmon_socket = resource_monitor_get_sel_obj (); + std::list socks; + socks.clear(); + socks.push_front ( rmon_sock.rmon_socket ); + socks.sort(); + + /* Run test loop forever or until stop condition */ + for ( ; ; ) + { + + /* Initialize the timeval struct */ + rmon_sock.waitd.tv_sec = 20; + rmon_sock.waitd.tv_usec = 0; + + /* Initialize the master fd_set */ + FD_ZERO(&rmon_sock.readfds); + FD_SET(rmon_sock.rmon_socket, &rmon_sock.readfds); + + rc = select( socks.back()+1, + &rmon_sock.readfds, NULL, NULL, + &rmon_sock.waitd); + + /* If the select time out expired then */ + if (( rc < 0 ) || ( rc == 0 )) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if (( rc < 0 ) && ( errno != EINTR )) + { + printf ("Socket Select Failed (rc:%d) %s \n", errno, strerror(errno)); + } + } + + + if ( FD_ISSET(rmon_sock.rmon_socket, &rmon_sock.readfds)) + { + printf("Resource Monitor API Select Fired got message from rmon:\n"); + rc = client_service_inbox(process_name); + + if (rc == PASS) { + break; + } + } + } + return 0; +} + diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/rmon_api_client_test.h b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/rmon_api_client_test.h new file mode 100644 index 00000000..4865a76c --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/rmon_api_client_test.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2014 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Resource Monitor API Test Client Header + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** Maximum service fail count before action */ +#define MAX_FAIL_COUNT (1) + +#define RMON_HEADER_SIZE (15) +#define RMON_MAX_MSG (50) +#define FAIL_SOCKET_NOBLOCK (36) +#define FAIL_SOCKET_OPTION (38) + +/* default process name if none is specified */ +#define PROCESS_NAME ((const char *)"testClient") + + + +typedef struct +{ + /** Message buffer */ + char m [RMON_MAX_MSG]; + + /** Sequence number */ + unsigned int s ; + + /* Fast Lookup Clue Info */ + unsigned int c ; + + /* Status Flags */ + /* bit 0: Process Monitor Status: 1=running + * bit 1: tbd */ + unsigned int f ; + + /* reserved for future use */ + unsigned int r ; + +} rmon_message_type ; + +/** rmon notification api messaging socket control structure */ +typedef struct +{ + + /** Client (compute) socket address attributes + * + * Used by both controller and compute nodes + * when messaging to and from the compute + * node maintenance. + */ + struct sockaddr_in client_addr ; + socklen_t client_addr_len ; + + + /** Unix domain socket used to transmit on-node event messages + * to from other local services such as rmon - heartbeat service */ + int send_event_socket ; + struct sockaddr_un agent_domain ; + socklen_t agent_domain_len ; + + /** rmon api Socket using UDP Inet over 'lo' interface */ + int rmon_api_sock ; /**< receive rmon pulses socket */ + int rmon_api_port ; /**< the port */ + struct sockaddr_in rmon_api_addr ; /**< attributes */ + socklen_t rmon_api_len ; /**< length */ + + int rmon_socket ; /**< Active monitor socket */ + /** The addr and port are stored in the shared librmonapi.so library */ + + struct sockaddr_in client_sockAddr ; /**< Client socket attributes */ + socklen_t agentLen ; /**< Agent socket attr struct len */ + socklen_t clientLen ; /**< Client socket attr struct len */ + int tx_socket ; /**< general transmit socket ID */ + int rx_socket ; /**< general receive socket ID */ + rmon_message_type tx_message ; /**< transmit message */ + rmon_message_type rx_message ; /**< receive message */ + int rmon_client_port ; + int fail_count ; /**< Socket retry thresholding */ + + /* For select dispatch */ + struct timeval waitd ; + fd_set readfds; + msgSock_type mtclogd ; + +} rmon_socket_type ; + + diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/rmond_api_test b/mtce-common/cgts-mtce-common-1.0/rmon/rmonApi/tests/rmond_api_test new file mode 100755 index 0000000000000000000000000000000000000000..2df1080ac9fb9137d4331cc1e39eb1173168e75d GIT binary patch literal 63446 zcmeFad3;pW`9FTo++=bCWMK(}eF9-qfrL$V5+E>vfC&kU3Qo2O3}iDiA)tcDqDDk< z?_$NJtxMHv>xx#u4XakswraJuRBfApt<;se@qNF~a%Zyn>G${f{r>sAIxzQ~XF1P# z&U2pgoaNr!xj3(2fkz0-d_2}{i_ogxITEF=mbLAV64bbr<&y}Oqpoj_ zl_1hmnK9`)Duj=ZO!DKCSE@(UNnP%wE@x6p>u*xiKDLeD5=}4h@r&GZ&K3_UNkT7_aDA>{;ENb@A+5q zvEP1_JUZ|91)K9XJOXm3p9lo@>V$Fy6zmMYvkN@i=j%icyR9>Pw=U$&>p~x27yN(k zg8%d`@cX)uGp-AKs4G0$)tP-JcG2Fmy1+-ekiWMJ{Ec1UXLNxd)dl{RF7!Xzh21!w zJ8SQ^UC5c+1%FBx_?x?^_rfmtS9QT(+y(#1F7!Oyg`Dy(>iBGor3~q~m30&;2y(w++?`!<6 zscrEV;#W9QS#kR5j9F8xKwxc6U2Py7DQk!X0#;zr#=x@R+E6$WY?xPF77ho)Rv^D* zNuV;=pwT5u=2h3#220AytAmQxIpv(C#gVB26pWOGsuSrYLy^sajagu>ZL0a7tFZtlI#fsxnv|k@-+9WY$LN%B=dv2>O;i zU$KFtKo$DQLUqB4h!u`BMCz(RuGmx-D2oI)K}q27AGlycLv3B4x~`%u5~{1U%AtrQ z<(5@eHdr;mnu_|(5EP*6d#?T$5Np;{{ttSpO^flwX}D?CuBaN)dpfr;a?ta+a6m_u^#!bc`!pEZ*IUda19?asr@}F9dsl0;Cl`9eqaC~jc z!J4rfa>D&*TM07%IL5S@PYXwG?rwqi*DQvQKuM4#JqPJ;f4%|LC0MV?xM5krlQ$t3 zZ~aljkuld|X=7W*-SY9)8(Kb!TQ^*DBb4f9eWCfUmvExx!GeQ2^a+KMxZ+FFaHM}I zkt>#d8g3jw*kcU`%+Ja3r7~qp&036z^sG`Q&{VBqB!G)A2Cr3=q|q^WEn4EUV(_u@ zr^eu8*L^uLc-Cz`{usPj`;k@{gGa;M&#D-FHy3MJr7?JPl>1p1gYV&DEvr5T55u~j zO)>b>T&!hni@_(y;4hEC_l&{sjlsvx*WZi5_m1JeH3rW$sQKI#gHJO^qz}a4`^MlO zi^2Dc!5@slbFFJWFT~*c8zj;rG57&7__t#417q+j(M9sln5B^f@; zzw2NmzGX3p{vFRc&nTu-ljnkTYQ%h8Q?OS$HG=Ru5?ha+!e_*E!VDp;MJ2J)mkrM4`B|m)=~+7=>yE6 z)mkXwj|p=~wdP3p7~$@Ovn2c;VGf~IpM>8g%%RhoD&f}&b7-_$5`K|z3gHvq0x{xg z!VG1tM~J@a=>-G+J+!@DB)cNVM*i@U?_F6k4}Q z_zJ=t0ena*O zWchiVe&BEZb92krpZI^~ubB0)&u6u?p!Dvfdyvh2tQ6v&qRi$O4}CfG(6IsHwH<$% zGv>AZ(A)C=_C0@MpXW&S?}oP7zs`Pr_ombxhegY$U!OX)>mVdnK+LAHmg^}qZ3O`T z?l@VZ`L#p;9(t&CfOv7odvnIT=uf%hNXm}oXxD+g(6{9}2q-`UHee{hP`hTZ#em;| z^)SBEvk#uTCUTRmk-;r}*mt{^qj86xS_gRtsBiYc zmTHKFR=*kQvs%}(vTtGF+$V2APqqf2&rp}M zs7v#oTW&&SDMPoR2B`L-bKnFF($Wis_D@CFdWzryZaf&5S_c4MwKw}8`OUAL=Wl+` zzvF`wg(dli52m7G|KUU11~EBo^+dgY{_51JPpkG;jY}E2lftd4y^k$~#O406+z-J4 zlKstpZSt&n<}p$k06$%GfTUALwLXs#GW>^5+Tiy$KkYwsY>r6bbyz`7;F=1VY5A76JP4pX+hd@(HUqmMNRZXCQ7wzmhuh^fV^ZW?*U}_X-$dBI~@Or#%7;t{Uh|( z^&EHs;(nRg5{Htle~#jJCpCaywY)G|V@nTN*)mpk+vik%o)+8kF(~tQkDUQ{clz#G zOuu-isWR>-=o{T3W4xA`OnW%HYl&qgaarV$$E zbNWlWFC7&>h6uasU}FzCY;`-}4ku#N3_|o{X#J_cV@OAMfH1DXQul1+?%-#AV(tIoFfql$d5C{+yKoAFb196$L@vV z`zs~(PbvyL*u&ra5f~J-d)8D0(w48-=z}R&?I0&dFI#%-sk7iEoN5MvyzR-%ZJ=5y zJFmi()a6khYPsUGQ>UIfN3ws^G8pWb7E*SuM_UibRmm~XcTav9hAL?OsO4r^U2gNU zDZ3Vur3nRF`hn$mqV#vzai)Lw^|H~G{)v6073Mb|ZJg)7^H%?c0={gBfOK%ud1Q^{r4cLmKy zwSLV&<~aTx2UGpij-Y+be=lhMYt>!|^Y88(>E@sIB{nz7P@-VkUsHDe9+oed28jjD z??*;aujW5v_1E$b*xcXzCzST@cqX-_R$A=c$Qb{$fsKDugqE{`%-`LJ^8bt=(9O&| z{)1>A_;);)di;$joG;xRmK$j1zc6(Bvy{G#l}&rM@wk6a<0r z*2kZqZ}&>x>Q`!htHBg_^_B=~na@Iauk71j7+uMRh1x8?lx)R#*FQ3>d0i>&vi6nWqS2 z$F2)jChwF>R_?J*^J*lSj8ct@Y@J zis>UNrJd><>R^Z(pHw=ggV^~SeONHFyQllcjb~Np*sKUBp)d(Wu|YV`SOb>8dpRb)Y>X0)Phw zC;oZr)Ip?K7$Bus9hPFm-iq{cqA;^jMFA&4x)33A_Ci^ap%S|4=BYt7>tsZ+IWRA{F_T?t0gJ*8ClTQ)U z;{$wCVaxGIt=#0)OFi?G(|6eOlYJG*>40;S9jgE&F7DI8cOm&?J#$$P_%4l`&wA!3 zXL$-x3u<<2@plI)wptcedas5&Kjblu(HqRJ-Ul5!tG(Wfj9n;4Tu@jbdosc1tutHo zr;^XMy@381gZ_zod00-2&e1v+BE%09p{MQ2izqNp`PmXL`R&SMB8a55))_|Y( z=^~9Vcs`pG%g1MF_>sq;NX_$k+;8$@p3Cd6K=(Q8fg0v1DnA2MVzuB-5X4`3*it!$xmYA8yOakAZO_Cfl>ETaLuzELOrs4{V1d zJX+IL>HqJq?sM$zs(K?iM`He_)6aG4O;Py1IvuIgDLP%C(-k_c(&UaHd@b^24C zKBCj-b^3-*|EAN=b?U`tl%Kvj9jVhPI$faC6*{fb=|-Jis?!^F`cs`gqSNPf`i4&b zrqj=LDl3n<;PJcZzR}Ce8*3wt;|oHyWes({N#o8OH*susqeNzpQLuFpN+~Z>zEY@1 zWH0T?^#gLG-0@oats0i60>JqOD!Sa&lLvQ@`Qa{{A2ZVBu3DkmTsIcD^@|F)XB;1y zyMW`yjGZ>@U=~h1RP}fg2IKJ%Ph7$)gONB1zh)x536d5+X&44m5W~Cs=qd<^i;Kte zmSBtD=L6w0)Y1LQ8W8O7LrC{M%tFSmdI2*8m+sd~;+!7H-pFjcV-G@*=>9MPud|>0 z!pptCeOn6f&(8tI9s-sgHFq;R6xpQTFF-aS8;_-=CJ*V3807?^WwP%Y79Vao4QOL> z#?2CPEV~5aQl_wm#BC|LYfyPM%S`GCbRM%PV|61v7eMzbkV&!Z^T3y~xGyl>s1`dy zlJFd)lkluGPW%siQ0WdRnwoJ4JUyz0!X{biPk=Wm3lZrkQddKC0+sg}*#v^@t+g}{VU5R#gD-ABMUJYY|~fwI{~ z9+#!wNP=g$N2z!UfK)NbO85Y>YI_alA96Ss!RLAnH3e^nj=g*&xCQxVIkMme^ucTZ z$?ROOlg}(0Z~Kw!olTYOg9u{1C$e8s;`;c;07{xa1W?lLjKWD54r8M^*W^Abm=g;%Zlb(&x&vkxkf?4%PeYIR~OD|2&7;Aosc9@~YcMDE zcj(Hrcn$hfJygM~!NSz@F^JL|!SsK1oqz)rJTf=vW z)G>r7X!rq`U)wz0Mw;jeI|gnpj4}85H%!~3zkfD(u?0nu&j}A zxL$GTD-x0RJ^_W&SCQO%5m?h#7r;W6lY;(BFHttPoJXNl`kG@<#4{YvbiV~5>F50k zp#3(p>nX7h{!{{LT!}H1)_-m`FlqhwQ1`U{S1aV+A4}RbPs`-mf63&!WcpG6>!--% zhGLoAc)m<-x>_dRyHh6Le^@3zcvmJrjH4g-zgfC-{~wh|=$7+ka%;0pZogM1cf2T* zAOBq@KTTne^uIe>CikwA$^Fs;`#*4#gnsdmOn&)>Odk4JCJ!gW25J2tIZY;yE|AG% z%f8{mW$XL_{V}-XN1-JtC8*-j~S%i~iaF&}lMxW~oe`t(VDj*UIGihh%d2 z_cD3mA2RuM3PNC7{};1l@=}pZUT%=dZ+6S%l^@IG)n{dL z8M;7Rg;3ak8T>>H>gO5o16X&^P*1{0z_u800)8>b=c#}gfCB~j1`YIx^(b^czJvOc z(Hbm@NEqZ17rV$oRj7ham^QfNG;pO2UOrSNE2K9KJ}*Z?Ygfx;{U(`gxEjfj7ctK} z^Do27W$1cLlBv#8CxVtcg=dUX17{`r%PCTj;}iLq@|}{Ea8l6?g)%09O&ihsPCM+l2*tAM}f-)S}wZWIqxMOXl20HxHMIGxTu9i zowBVM%wnrcM8h-hf|_E;*}I_Lh>KAkV_w)j;fF&?PmM-bbZ53KehkL4*nx>}$U5;1 z1iVNggMk$Wzh1lp+}~Y%#&A%rWLjvXxE!%@NWJ(5KsVH+N^0vTe1j^)kl^Vc*b^{4 z4ymFxqjsWW;yGm}G@`@7P!MIHRLM{UI^^&7qFAk{0teStZdp}P#!FOX)VGKNh z8-r>*VgR~61K&X#q*CL>OBvu;CMKcVJYp&+^Id8Ni&$Br2_EU3Efz!8`7FgXanR*5 z3~R*rMBvGAF^HFvp@|R}cC(T)QG*!vHz+M`1@X>UhRs+#J3((*u|okh%FfOq7Vee1YK)i=OOV1)MQFyGCieXwwo{wm6?ZcKCdRGvoGjSWOFKU=o`tZU zzb5{$c#-*6nSVhXVSW~bJHHlRCm=6<0(+K)leE zWf(s66e8k%M1~tcyg4dilHi~6IiGoIFq)-=M?m~lRDx#s)>DoKNeOR)_@}6ZEO9q= zNU#@R-btX6CqX^Gr>;mB$x3n5D*`k$0@eMlzeJbt*^?sKFF(*I;KCt<)gm@N&BQDVtKOGq7Tl-U( zO#BV`f4Cg*|N9WDycO3&JLyke?lJJywj$1mA*WLqcOxt*#)4&DJGSUZm?)M&Rd3BZ z=p89_HCXw#L@`ofd@?!L#PvfRl50CSu4~JcK>`{n7iqW0oe$3wcZ22ODBGPFTi|$$ zVeg^1cQ`~}0+mrn_d&KufPXtL#`S_jiWU&KtBTPjL(7nCEpbP|EmGh=Bk*m{W;Bw# zN%7Ys_KF$cThSG-Dlkl3jTNT%qeOJ2*Z|(9D8G!0*&NcCSk%*T;{jJM^;#Xrb_m+$OB({QZB1#-D>bM|27+-|` z^@s#?N; zibzA?72+81U$`_qv3HzhJ7TPa{^%pe*~-b%k!!ev+zw#gwJ*aw;%wtoY|B~k5>AS? z%`kQ8zegi0=ie@EvrYoFC3q_&q{g3FP-&LI&%=P#wl9u8qTW zC1~P7TplCSFJj)yog`h=K2b>eZC9eM zDL{*!Xyjmg6+NR0cqnmXsL^8Ib!OZcmu863+Iq=so8EF6e|hx&((r%#BBQO4CegT7(S{01)GZE>~(vNcQWK@-w< z3hsIZk4}^YXNijt%F96CVI4FB#0qqK5An5T3aBJ}~br z@o%_9`WH$Vk76=NRh+X#GW;n$$!7P9(@B&UU0pU1$>g3T9sxu8VvpqG363o9s*4(Y zXA4Q>xsD=I(rgr6!}>2&MS0xQmb!&aJ*KqdDUecZjyQ@?ntnpzXNe8yp!BQdA)9!U zT=OucWxNTcG6=#nJdTeNwShO3M{_2 zm87MvB$^DnX$Udo7(i0IoqgKx#h{o=b&a|4UKd7qU ziCKHTS>kc(IYUX}VOl#qmAF|Vk-Gj$3F2W}R9C~nVCbt-f_M(6IHWeSL@s9d^v6|k zp3b!)-i$fS_n7Jop4nwQ#GaRaEW3Y(+{OAnmqUW5c}hnb%9RK|3<`C@9^z500I2m4XIg6%OcX1^Ju`tdujZt&z2lc6elzaUDI} zGdv3py9rS%eW>aM9)!w91Ai_``4WUQ6i-H@hRR^0VP)P(O8+Tq5j-=ETFWr}8fy3s zNC`YmZ7%^f#f>4UEOYFk{N&mH2NKRg2IpgAZoHFBmS73_Pw@3~FpD<#+<% zD^^PIBwH%Mnc+NYyDf(N?O9{gWxR*-QkyF*qIMQ*4+D1!LaGPsVc-=8>4sF32B`ienLBWfo1%DP>Pyw}|LC(Mv5>2IKI!c0aCxu{N<#f6hR4^Nn_&cjO(3lj3gZ&CT%_zvohWf{+jC4Qke z4%`V1&Q56s{>ye!Xf|-=CxPq2GuQ(u zu>U}OwMMKfx>R-7tjcVek9bicVaj{MJL@x^2-~X9#xu)}P{ORq%<@b!VfGAUb>HNP z%TVW4tTz|fm_FMF+|OKm#zIi7WZoHY`|J>aS6q&EeRdF@vFtzQo8pLF=5*+KITr4SB-99@2 zYqPAl1YDPsQ)=e+*@Kvk#E*c#znv7C&F!;z0j_7j@oHNx)o0v`%06R8nGL};gvgox zl8DblQC?JdQ}bMETw^~@8$la9?~4kG=Z(TBkC~f(!w`K;@F^Ej*a^pwGvFMHRY^oT zhzqcTLWkKI@S2g6765P0@gci^GKgMh2Cm8H0y{&Tj^>Q~c@^@4_h;3ngST%%n+bm! z`L5iM@TRSz@eVI&vq^iKfZ&~6rRxkamg0WK@`BTiLMNA?NHV96g*DJB?S~1kz!>Ri zxBU|=&PJFi?RIAAOzQ^#y5my`InEA3cUGY!ZsI}lM}ha^{FT>eUOu_SCqm?7Hl#fO zK8fY6Uy1DvEbrj?$lsQ4vPxs4@kT>UcdmwB(lY02+!w(>gf4d;~yL4Jq# zro9et!TKHE5B55|Yv*@(x7O?MezDg%9Wl%6@U+VB@ZOo<;Y~2V!`oMWiT65hAj+9M zkF>lFPul$skIeiIPtyFlMxInH7Jo%+N7vm+6MQlTBZv#C0=)84BdsN3nit+sAUVXS z({Kl)7epz<(kIb+PXTSS^cn0EJq2$fi!$BOgPtr^=3KXoL@XT%AM<2!1*6Jj=`vf< z3cHH3Y;l1sgMPQGxPzPxk+-4S<8oe>p6OGt*7 znazC|{kxP8%f;nV0&GJcJ4;OKhKCmN!h>*gL^Z@F#lfg0^ z-(~&K5@y7*u|6mX4q+v8#xMFEB-&FzT5R~3X~W0o6Nyb)5Z zWG?%Pc=};q2HKAdBQs>&WI21}A#gI!YKwSJ(S885Ck!Jq(2Mt+ig>@!eg%j>w51mD zFxUP&kRKa@ly4RBsM)s8Kv=_9wwM{n<9TcmkNNE(KrAs-SzF=j*cv$hi05g3;h$hk zlRpd_>AY3^A?qwh!3=p>i=36rLsoey;)4VB1wih?SJuo7YTm$_zlLq>?*Vb2p-Rie z^hD8I3@`f$kl!#Yva^(NE268dBHnYiKLFELhEdj3!eM}C8MsJAm$Iu8$KtsSe5E=u zUDd>{TKZe~LqZejc-#YpibwTBnfZV-i`MJFh$cGI$bVk&-;NL}$p4w+oQ1%^@ff>rbbj*)YK z3>tzQBUVwkI|yMQw;BR7=y*Q(QpATU>}!Cy*-({R6!G~E`yL=4Zbx|92f{NzzG4Wn z=d2>$F|z*%pIcqZ;<qu?vs z#|)_4loWAKWRC%2zM-1YR*T(ibO%(#CtK`gU|M4sbq7wRS{s1eYX~vJ;iw>%u81-D zp5Q|yFdgpz7V#dUeFvz&Yg10S_9rq3DcAlao*c_pB{MpG79S(yEWt%yVqba8WS<0A z;Y(lwLdAXF*0;UcxH*Gwbfjugx7Zv$mUZ?vo;Qhk;3-VRf=47sd4&%px_RpyF%n~0 zEbWC&H>W1^CmiKzITRs3;wU7X%jtM!pB#-tnWFCly?P*KuKBA|akbXO<61z5^EffT zu9=}drkXWp0rdg7<|C7Ui7qe=nNnL+GE>cL%!*IA3RDaCV*KiE)V3Ay~^ z%jTJl7PgWrLl*IiTUbn@&a9^%KwpHo7mATEliPWrj_sn%25ynzd4= zFQ7m$5ZGvmvZ^~^71!dP#m2_Qhl1@e0y{L3O)#+yQ)oJ|f zD>dY{;tjoxM!Jo6_ETkj?6|ToO&htQEN{G7U3XfvaBZ2=8F&I zClu1;GeFLfWDG98(~uulmSb%&KDP=y8`5+m>j)TYTtr|=EX7z|QnqW#^LXp*(jtvZ zi^g*xBtTa#?V)M+qB`_ZqRkfvs!zyNd>FJ{C;__XB3w&68)>{mdx0@@X^Yl_-2?`= zi~JCEj`-66XujpL6cxet>MGyU>4gl-KRnY!uq}*F?*OMF*cL{xQ3!z*7BVtuT}+Nr zpkzEY;tD|)+p{63dLUYMLa2beo>8fr&G!%Tvrmog7ux9{F5iNJdUW%RgOa!O9nIF( zyoL@X9@7k*9bg{&6S17l|3s8Pz&T`y1>q9iJEN`r`#C0rTUvpwhWpxnY{Cc@3zounNC4ap@+@PpO zx7J=a3}tU*S^Js~w@Aa?L}&{nNyAO)p92t2pwVzYEJt9L6Io@U5wpaNwMt-Q&Mx<+he;S_K(juNQqVBwlqCNrXIfz>GcVzyBOsQcQv{*WV93hA` ze4(TI$dUMw7Gr4-5chrnacU&~ThrW;$agx%l+Ds*a!lD@`rrtv>~xKD%kp)OF=dN1 zeT^ym_-eQ8x$VpHO^@mmx?bxv6Ro-mB^UwNZD<+aY{`!@7y6K1W`*9+CC)*KpQsXI zx)u6BqZ@$!l@^5l-H@j%XXvvwn1OK>xYSWvD|qGMU4>Q0hr7*R)9%yd`g8|<23+!4 zT`mkZYifIfe-=J}pZ<%3?{Je~s8q+xJ)7tBYYuK{|U(~zskf^XMwA?nR1B@e6i z^SDpltp${V?k!yNPoVfF_lmuM1KguFaKmf34g%{Gi#5*~Hojgla0F0*8{Z9kwX~ZS z(|qz%$7cs`)%bw}b0l(i4?IZ^HN?(Yx@mIh&8LLJTswJ(OG*<~=oMT+MT#(vy=Mrn zdrtuw`7MHncdUe`A@oMhA$^>LZzas<620Rk{1M>BgQw#GHLb)GIz{=qnecO-Pw<&x z3%?0wSubenONMKzxPe^U4c|(xs!}L)8uX65OLIF1MSM`sM{#TuRwkO3cA(>h&4*wnOM44b2C;-k8< zruIyxCdt5>Ixw`WX+PX@o37znQ&YY85;X;mfwty#TwX?X=eOqy{0Wo*k7zoOHh%%E zbmtvf4y9sJrA=6Eqrk;U76PrrV6;`QF*G8(O)XVNEMaga) z$sDb67Mz2riijdU?`Yd|5m4}r#u}@Lf2v>)0&-#-f(k-U*u<(MnhV7Ov|1eElF)tU zMDH9z)@(FH9AAh&8#d7JiDg}!*#&O@n@K=U^K z8ur`e$TZx5<{QRBFgj1#YaR!VT!H(Lsn`Z=fCp?94(93wV5I${l67&*G2I zhu}uwS+*h-0U*Hhyowy;Hg8An0v-;ULiNDnDDS5zRBn|O?GNcU(K=dWVT5CqA=Lm;3xGC^d{Z_gFr z0l)l|?$-qi;gy4*>fE(b5ppDHbJtOmSS zpxOx?EqA>osx}^vXfjvLu&W(uXx>i%!M2kD{DfmK$7jSw2)1*Bg~5 zb(!CpvR{0SvT~#HDQYV<__iAr{w|98$Z)tv7rwy=@rqRoAcf)3H&lnL_Dm*Zk)hO- zjP+HulRBYgo-(rEydI)e}b1o&_yh&j2r0mS*TuZQ2uBsXL!%r8yAB5PK22bhXCwhLz=QBf8ha}o7-!uTN03%e0Ris~ zJ`IVuc(&$Pj3SVnX!Cb&hqLn?avQt=ODbG(*y z@lMg-hx;%XUE8*8y-bV*z+VeRQP{hoyW5 zM?OcEafOA?3~?IfH+y9j@`8t-8O+ItU}l4OBp!bV+#rcO_*6v6!3!8d8DMx{GVm;| z9fPWfCq7^eU^YE%jAZ5Ur(#_I5-a)`Jf{!it8By>By#!%x*Gj8<|0{UiCaeIq)@L@ z3~VxAPBTMXKoOH4LODSP&EN}e@GMd)9N>%z^6z10iLEei{P*Q;dcm_ysRW)w^I>H> zwoJ&1ILH5<)(lbG51#)h zCltXcQkCV3N-B>ho#oRXYkrSqxAZd3H1Y`$)H^&8uSZIkWf`C{m_v`;JPhiTNp)rj zywS=^SucTU-%b4DMM)!OS`8bV-z2j5Bf|{9- zl+at)$=$n?Qvh_5++tJFaf!W!qFt7gOma6N@_RGWJ;TcHJsyI3#I-4(G#cdxCBcnH zf`Kou(gIj0RSS>L?@i(On>nc>K#cusfXE3@&;_M4`) z#+@S%^qJX%)%9mqs*iC$AX@jF9uvCH?13kC$B8*Tlm;_xlHnb&SRcB&%5E4j2<4}Cn*alkjB*<-M~z2w zDStFF4l`ri;_=k)G*`d)2~ytaO30)h6oCj5wLGgB8?}w9bDUdeJeri>oAy$=k8eYX z;^69Voe;&&>`|dbPjr#AV^qW>iKhoBWsH!?ZTO?AOi_5%R$AlCC}Xs? zsl8!Cdcib2*CsR5qini<7M>BsQYE5hPSV~+1fKhKIpD~OgIhRj&N$1smtxF8uQ<`6 zmK!Y>Q{UXE>(4=N;p~69>+$nli5R-1<>B>WP-p>SgSF5Mcs$USst0#Igvp_!2HYZy z=6DVt)W>p*T@4`5Cmp3A-YtP=nA*}O$<{9ghZ|VVayig;B>@Lv(NSGAPAmjo`OY#@ z)bJ`+cq&n%Ym`x7IbSol0``^NP0wCQPWHW#vPxTD@}E5pqjFLYRr5KnGRlE^ie4he zd(&L0_PK*acJF0kxp!6fvt>(H$6MLG2f_Q-Fyle=JTI073vJ@D7;N5-*PEa)7Hn5+ z$4}REyJjweS9#P7EL+hXqH*U%s}6g5z!Z!DL9HSa+q{-FQ9Yx~y6&i=m+Z`tsgI27 zrEZY40a#0EQ7simNUPD7U}cbA8v~+59Sx^UP}y4>q2BZW`OjB%V(A*wZio#>JA`Zz zEf)4B*chX_j5QmTzBkP6K@&B_a6^ervE115PLd|QAhtp_^TKG)!O}Qo7 zIz9&_yS+^ZU##??xNSmp6j#jAVrn>TmzezCb9yj9;GqY=3uO&EWe!7w)7LH+K$|ah z0l3R$E&y9y?gCh*HVf4Zy10r1S7?kJ-v+r`s9~(?zcR{V81}T0gQ5K`gnO)CV3{Js zSIJ4Z3JenqUopNh+%P^=TT$Iu863Z9>Xg8gNn@)+wKy7GTiZCkJcOTNk7a2CjITKT z^znGFYDYGg65JH&v`AZIhkC=Cb+!1x_3`*+`Hq^gTCH|lS@;Qg{@i<8WQPij=HtFY z_sU>-BTZi3Fs zUq&*3`Tq&}-J{SfXm6|ME58Rc(Dxl#4kx)%gF@)uB7a>m$sg=efUshSkdLgEfDL$$MMh^C5lWfX;C6*5mXVfl)>+N#i65U?Pw zU`wlmq@Xc@^0F{hL^Dx>%^)Gx9H7+HZPYwcsJ5qW9O|lVtO+)RDgu#)GPp0K!Gy9e zkwqX-6(SGTvPfMGIGgGkDuIAHs!~Plw2#-ZVdx|3cNJF zS{g~(5*^4&P^%FP2mwU(g>Hos8DPs*IV_<)=7~1@6-)Gq^E5fD;)G{js`Ff&$F5w2 z_4CjOa%J0@Tg99cK{f-~7r2XT0(Zk#wChdB(0&Eph2&%jPEg$PJQ zoW1!C3CF1dfK`DwRRFLq5a$+H1;LKj`B;{7ysDsM3OcGFU|rBr1%X&)C$khNYgV0uoN#?scwoFw0ou86i*f!N}O|_o%S*8wL+lKX|&vmnath79PK(`=Cr08A#3%1 z02$6Wr|3d^bJbp_WhXeEx_EnBoW1FVIFB=h8HZ(mYu&rcS&JfhZ^Xshe-;vXbn6S( z?t63WETDgNaTT~;#ly%&fbO~2a|Y^$F2sF|)Z^VBXM61jFJ`yAfuE0!N3h0AND@8f z-no4UPmRp_Ra}BSU}l`xu2?l^n%#YzC&BQU8`1VHSkfnc3^9RK;2TpF=hz=^eF53) z#ms#Gj&I#=zut{9p5AzG?wNMtxO0L1MdBi0UfD?YUnEw^{B}=wBi-E5wEyA}pHpw> z>v-+Kd%+mki!vG87bAP#DrWc2Lbhr>{pRLdWDRi#fxH0pE9^R_%F_dlfFaH868n%42V@iW#wB{~*DUyluphbjgSbTd zMT?#J$i?H&qK9Sk;KV8?8WNEq+w6a)K2&XK8UHJ1f8+{FAXaKU7?nAIi&{8dwsL zzm=E=?sJwdE{;s%3}ncY!l4U-Bvs)D4r}YI>@#Id)4zZx!x!Qjeu6K!5x<`pP*Kf_ z)C9u83SH?WRcR;^Y(NxlK&AEdc_AymuyEN~C1(Ye6|X3?s+vNTLB!KYpdKab1Ld0o z;rg4BM4zbj z@k5Udb(`}FizftJ#Yzf0seMXgZRq?)X^|CW)s0XvybeD($S1xy8w6@2b!8S_VVN!G z49>7n)(V#es>*6Y)tdtm%hk&&K5O3M!2EghWIbK$8_LVhstoF>F_>2vs3< zj#v%BwSjQi#-I{yrc*d9f2}jR77GOGs;W>Cd>-yyUkBZTFh`*60s%b{39bz`%tSgH zqRT3)D#KVX7}HvjI;;vB%4)-I6XzF~oK;v-OotAI@#LNrz@lOUjTNb@uI{8^6EPO# zNXnifwbj$S{K3$|#TXI+=@Wb;u%>KNfTPQzL)X+-bBz*`cAq!_ZIK;PznN*cz831L zU*HU2JsQF=LSsUaP+7GU(80b_>+3Lx*zFj)fvTEF)CB>n^)CJ#0L#}%RiI+s28`i| zUY~7j3RjfjZLFwwQ&mF^+E`g#2r2nz1)3T#hy#3AVpukV?eE%eJC3uqevGkJV?|&C zxh%CdayY4g5(um-+Z>KSeL0H4w2+KZYzv){6}NZf$@n$S+6dZ9gW(ZN{1uLCp+LPZ z(Mh}ro{ynXoE4DKr>dc>B2XDx%iWL(Q2B-VIucB!=_2^4)9lGArZIrXXgVp}R92t2 z3}TA219Awc@s4ieBGw8N2P*3VA=#^uI%(WUo$(`gnW&5^!@{!Ml?tt`W0YC2EO+6O zyrm^*&x9;J3=j)9hAM(+Z+YD&>15J6owT)*fy)Gs(_kWoNaYPpHNhGeW79p}*CV0D#M&JiWe!AKC>AlW%qePcvz7urPVnOHwhpB}Za z9Iw9FRz+h&L$J02Q^V$(^15o+7QrWscI%lXww#ItE`=i=659CdcQh{KHV%WM3;`VF z1GRyQFoq0%eHhaPMYWG<7^0jkYue2}-%(GsTaWDYrDrXjTYyPH%?!)tFQG3->Kd!9 z>}(jj976)*4`W$x-Y~np1!WOe+`O}vw42<% z!x&-L82Z_)4myg}BpF@QB@g*AR%T}^O&c}*x>y*^kwUQQ>K zq40+B7%ue2hA6G6L7hBp8V?&_&K}>?Ft%cCMRGx7O;}WnuZdIy z$7AG$aPTpFjLSH zo+{LatFVsfF!e@%KHLf{uWiCFZ3mVG!#IqMJM-JfgM+TdFJq&C(|w75GA^OnxbEo9GDU~J;gC-EEF{OM}ATBs~+)kW5! zkuuWISHtW~{HS&qgPLKFQP)J=DTo7ll4uwFxQ)P?G#u=FQQk=mn#8fe(fEJ2hL|QJ zdqZPIM6T8`b)(sA8W%~J^Rf2BG%E)~0R}-JP*K-dE2p(~8c+TXb&Dqkur8E)0&8nN z19dNxcqN@&FHHwW6dX*$r4PskPKrHPWR1h<0Vzkv5bE*J5UshV3N6V2>!5 za-9CK(8fltEP&Z>6Z*nfO>UjIZjno@D!G`@hiq23z8Xs+OCmd)tS95-aBUDPHUw#U z&$|E3$df+UW|l#-va>78u=uN$mI!fK)`U|^Il?ewB5=ZhdS0*sxzXcES_Es`&P*bG zq2szkwYBqs#|F}yBkMviLjXO?2*(`$tQ6fut>siM?a^iu(OuKYnL*c6BZdr)LhE2m zdC*|oQHlD`cYrl=v#J-{P!@Xwiij=)zGE3tgCh;?c6lfo!JbyGF==jd+OD=4W{+fH z8yJu`K)PD(&Xk*cA6Jp?ayMg^RgTCX9h`EiXYX_!CU|zySi7#Sj{7pq&oyQMv@y*j zcjHzoX9R3$!`SQLkAk5e@3TOP>@zwpx9pM1y2c26ox5Neo~7|SSpw4@nA>3rEG8Sc zI1kI9u(qMDu^zWFthG@YSP)blysK6hm*Z3z=> zt}NS3ZDhQlcgj#wvpKM?E*vonz;c&_d z)Ka^+WLX~8xv+L+NFF4}sMMxYrl@#@ely``2@i-&e@6EnQU`@L8chz?)JHbQD3rIb(A^riT3}Cs zVE|b;qLN$pj!m1!fe^EMhGU>Z*fu#uT3g%f&N@@YoO;xlO@TToP~pBqz`|xGy1ME- zY$0SZYIW4LteCrX1Z{|r=Tvf*f-Nh zxt!wMIqLM&(QoqJB+o6eGr#~)H$J%rU|n6$Rc%LUp+4Gj&yyL&&~zLH@}99gV8m9R5x|^d zjUDUWP>$l{Sx>uZV~RSAQwIemy6RcGz3WHAJ!9vB`~vhf4>~v@W3w!`?ug`Ad~)g7 z5ad!o`ILJ|Eswm6H{|+xQqvHuQri&>KCHuOEiM5J6A%43#RnP1)b!o{jLX7;O&yuJ zL0d)!?BCtUz(~g#$4I~)7RP3ci&%K`93RO$PD=xHRmef|;6Lz_5(ET7|ce}kk zPybXu;!Z_{g$rWZck39vErfHDB|Q4usP6tl5-c#0r}V(fJ(KJ0dTuGJ56M}aJ2>Sg zavdtC*0xW6D6f%gTosDGI~idit=22MeKNuG+7^9s(& zTa`DDE#d+dZLF#;TPr)V-8jnTX1`sl_1dp14bDUfnh2Gf(+5_h?gCj2?o9x-(lzH7 z>fj8T@Lo{1RlXSqI3}ESyzy6y?VLQw4=gM=Yi=&y6BTYO599~16>EFbl8S7!)8S zijzm}JgvcrM|EQj0zHEh*O{?i{vBGX!)R)0;t}tAaNTHytAjW!K*X=*2@>MV+8W%& zz~JwCYU5Uin@e*a5O;sk1tHlSe*W>|*QE$U9(I>vgQ= ze_VZ;klN->w3#u~o=n}B(d#?x$G9)6lEGQp3=cWq3?k+U{?+&KnHVvF4qh5{R8TB#TL{^bz0t{EN4wp)-4Fa5~=pS7#P#9C> zR+?)QWlTn0c8s3-%wo^l%10V3Ze#Uvpz-kHdDz4?*6MzX9Vi{AtadiVMw?aD)w`yW z>mS52_jI}A2#_Nt8Z$ap5Gtx@w|Ex4w~25@Rmr-ES3rawIm+ zio7iL8K_BPg|*yd|JH`lKptIfvSqt}?S>cS?(3%U{d5XJCTp}(HO8nUv)}4!j9_8O zzt)RoGc2Z#ZkGF&xqY#G%)9THi<-yTqx0SR{>H)NW~r<*ZsEUkS-_v$V-tpu-m|6d2AuGYN9f53ZUu-2!&+)T}zt5firclULW`*=I& z0IzwaTxaccY3=JYuV(wH>zhrE$sdg3QceDaD9$l?^OiV<=&t1dNS8yrG}%X8)GCuT zl2jDwN|xakR*A{R7A)ytEYnEyDmV^6lYhh2qR?dDc2PySYV3Q7`<$=My9aVnCxgRUiGqpOa)+UFTTGPYh<$j z%&Y2lx>Xi_rL%EbJ|f8+ot+;=wHI!Lwz`5=o9sy!RcNvUTtTZ%)-(XsnXLN~H*2-Y zmbnEBO*Y!%4%3%W%JinNicQwk5oh5AgZm5=mDOP$F)!tdxUD(cWKAi^YT44kuR9i8 z)xIFYj(HS?)kzEPadlr=Fn-sC0Fj-?#Y(Y%c zRE~|C$(ouezGR<)j9F0DWMlQ{xI8g$bbLwM!?udAb6h|-Un1K(v?*!KXIsm!a|~b% zXj?0;a|~cqmz)MLntSEu?Ya)*-B{(At6<4>&T$QV;v&~v=NQ09wk>eB*V>3PmbICHp!`Y;TAErAM__*jV(4ZfZB{xESSsGnuao9rPMRbsMFyV{29_Zh%wjYW#dc5{t(&C#ZP=FO4jm6~8QEime0rf9FMd3URM zSEaC4b}Y5Bt5W5z@+(a??4k-y*7TKa)z|-@>aH$EtD_80cL8yMc6Y%_vB(Ej1X8~4 z@}p=Bp0X^kb%6?7*rN4Y_J=|Rmh3JBYNDG+NP5viTiPJ8f~^-c@q(t&NW_FTCUW5g z8ZXc$q$Z{zjXyNTb~S17d1v0|{JiHJerl`f34Am2%=^x~GiT1soS8k(8v%kJj!B~<{sUW+TH9Ov0+yZ)eZ!z}M%cfh8za%~JhN|-3deB2L8JwPwbM%6avnxQ3fEvQF+>s!|pc|&B%N&mP6YovdCf=#D?LV{K3 zpPXRx=>L9m$V0kT$G{I1ls=vQ=MtfPXCV*tdagx2{w^_k`4NdZGE!_W~;^M z)9L+b3~i+Mbxl6>=?vXIHIxjkkY46lJbPxQ^Wz-4=v^N}o9JaWVXDj!$t%%@=p`p* z5M+oZ$=qHIOi?iq5gn0tX`@;o!@ zMNOakfxp!7tLpNoFqM1KCdqK2hVEQ?gPHx^j_5TtUmY6-qrZIy3)>lo)%r5ct~BV&&?Db zkhDqlxl>@}tsM73xaDN!x$=BO^M6|1XG`qiGW17Po~vzKp5fYcoxUsCwP#yjTV40@;7V$&C2s#o`wC@mB zo)fD`2edYIIR&-yoMrL&g?%c|Nz?yYzRV7unDDsT$61NBk8`vel`oUy*#|cLkBU4G zeYjEkKWur<{BG2q|Cc=P`&n&gm(=BC(%QdVr+)3~^5v5!S8z(m*`w9xyl$<|;q095 zzm+EyVXyYbd(`FT>q!lBSBp!^T+ZaXH{azrH|3nu+UKfZb-|yL!o@dL-rY_g{wj4~ zXpgJz9vm5{-n*h>A-?R?5D09l!eG(Aa7VwZN@)Am;q9*4e_%J#3obs{@iFpszI*|d z4&W^5>d?T>t)ig5Jv&ESRlX4bcf9;ncXb;Ah#U0Z8&(J8Yj_Zo)VAYmr#OT}`}%4F z`}+p=;LCXVtJshRh^vOS9tdgB|41Jckslbr`k*(6EbvqH`G!1thlgENcrHNc0a2kR zh4NPXoigwc+_8JxApKi=d*#~{jRCa=G7nOyL3-u8Fhw<`+%33y;8Rcm=cRC6`fFKT zq^@_|oxzEJMK*nX1_AJu#`;}#5kjDP)&wg)l>C6;Gjp{fl-3`qi+ucu>fhx<$&b}v ziKw@)ULBaWrAi#`}sYr2nhy_!nM&lke`t0>wQxKdcMyD?7 z6FLr0I{#_9_BIV?rN{-M+RSC`KZ#h;rSa!RzFI4q|Ej-^_`C;N*}w-f`1{6aR{A`uZILa;x~5TwK`W zqc1M>j${eoAljmisc`Dioq+hXev2Lo)FK@- zF4`GFN&XB^hO(r^`ioRQQvIKs_EVvvx=otDnj6Pbe?g1{kJu^~-5IJM(B$_K&3`n3 zypMW>=a=Zesrrw0QZl$!e`$$+|967?Uu1AgT~@!XMF0LnL8*>6O4G3V>s8;@--aiG z!Wn%IX#(}>Zc%;4)1CBE@Ez8EggEsW9w^a2ej!Ng(Q%0KR=)`mY&(<@x8b!A8a4_h z=-Yv5VnOkZ;EZYhpGELX_|NL(_#^#uQT}6Bg8n5wxFHy~I}KjbsfYBkx+o_DZf?wv zig5g8L?J%T9V^7gW1do!&IHGKh)Z4dm}zl-k%CeFiEgx^D4mZx&Ihvc4UXe{7M~Q4 z-&uUJX7O7c$I&c)o8$PH#Ty;Ry)543IG$zkW{gi!8uI>nH`Q_cNk`UW z6yvy(#XsRVUS#px<8dI1e-izAPa5+6dYNUh-%iEWyE`2F;}nehHN&x=&Ehhf&tLVq zS$vSGcRI;1D)sKu2BEy^3{qks>hZ`G?J!m$ROhBRzQxYdu#>^Vbr5G6q*khpSD}{^VK!yvWX# z9>;SuJ@RrJ{4d^vOXT+`e&n_wuj1~R68S#@uTUuEz74zyZxS3|_?|Q#o+9~Kz#9wN z;oZQC$`zI18-YvxaP{pQ+D>)>Z>qZ^?gwb5MJzVG`c`cZs|ly;yXA?=HcY1DAHf{+yY#tNh8vh>&-;m85f^1b?yw|8WU^*5k>% zYd)S^xnA*je*X6Bl5`Tf2Bn!0YF>Y=Qf*7;vY2TJf!5Y67hvZXV_toCV?-~tEgMHV zK_j8muzf=#!y|jQZL10y|9x#6YMmV$aqK$yUUAp}Xd(uNz!?d;c~B=0gRvk7ftq5l z_VCWZUQkl?gVY36_iyNO9OjS7P=;nJ$Pn773Q;&Lf^L!Jj>yRw1E<67&k~5m2HA@UV&oW-L=Kl zB}xiwmlWRy^9~^9mc!JujE>Qf-Vt9lTN=d|m z?9K|rkA$@Po1&}Hlv35UB}-@KpCcg+4m?BIoa2*bvha2Ia!5<>^7V0Z#e(lu;J zv4;>h#SEKBF1a?ti>DtY{a2zT&p@spNm`I++09DV29)~q;r;t-I0|QYa5t#TM-J5X zwM0={cQ>{OYpvDAxFevAR8{Q>vV84`YimOf(c%JER+3FCx&hGT=rI8)d@0Ut5!POR N%j2VW5& +#include "rmon.h" + + +typedef enum +{ + CLR_REQUEST = 0, + SET_REQUEST = 1 + +} fm_req_type_t; + + +typedef struct fm_req_info +{ + fm_req_type_t req_type; + union + { + AlarmFilter alarm_filter; + SFmAlarmDataT alarm; + + } _data; + struct fm_req_info *next; + +} fm_req_info_t; + + +typedef struct fm_req_queue +{ + fm_req_info_t *head; + fm_req_info_t *tail; + pthread_mutex_t mutex; + +} fm_req_queue_t; + + +static fm_req_queue_t fm_req_q; +static void rmon_fm_enq(fm_req_type_t req, void *data); +static void rmon_fm_deq(void); + + +/****************************/ +/* Initialization Utilities */ +/****************************/ +void rmon_fm_init (void) +{ + fm_req_q.head = NULL; + fm_req_q.tail = NULL; + pthread_mutex_init(&fm_req_q.mutex, NULL); +} + +void rmon_fm_fini (void) +{ + pthread_mutex_lock(&fm_req_q.mutex); + fm_req_info_t *i = fm_req_q.head; + while (i != NULL) { + fm_req_info_t *n = i->next; + delete i; + i = n; + } + fm_req_q.head = fm_req_q.tail = NULL; + pthread_mutex_unlock(&fm_req_q.mutex); + pthread_mutex_destroy(&fm_req_q.mutex); +} + +/***************************/ +/* handler function */ +/***************************/ +void rmon_fm_handler (void) +{ + while (fm_req_q.head != NULL) { + EFmErrorT err; + fm_req_info_t *fm_req = fm_req_q.head; + if (fm_req->req_type == CLR_REQUEST) { + ilog("clearing alarm %s", fm_req->_data.alarm_filter.entity_instance_id); + err = fm_clear_fault (&fm_req->_data.alarm_filter); + } + else { + ilog("setting alarm %s", fm_req->_data.alarm.entity_instance_id); + err = fm_set_fault (&fm_req->_data.alarm, NULL); + } + + if (err == FM_ERR_NOCONNECT) { + ilog("FM_ERR_NOCONNECT"); + return; + } + + rmon_fm_deq(); + } +} + +/*****************************/ +/* request functions */ +/*****************************/ +EFmErrorT rmon_fm_clear (AlarmFilter *alarmFilter) +{ + EFmErrorT err = FM_ERR_NOCONNECT; + if (fm_req_q.head == NULL) + err = fm_clear_fault (alarmFilter); + if (err == FM_ERR_NOCONNECT) { + ilog("retry clearing alarm %s", alarmFilter->entity_instance_id); + rmon_fm_enq (CLR_REQUEST, (void *) alarmFilter); + return FM_ERR_OK; + } + + return err; +} + +EFmErrorT rmon_fm_set (const SFmAlarmDataT *alarm, fm_uuid_t *fm_uuid) +{ + EFmErrorT err = FM_ERR_NOCONNECT; + if (fm_req_q.head == NULL) + err = fm_set_fault (alarm, fm_uuid); + if (err == FM_ERR_NOCONNECT) { + ilog("retry setting alarm %s", alarm->entity_instance_id); + rmon_fm_enq (SET_REQUEST, (void *) alarm); + return FM_ERR_OK; + } + + return err; +} + +EFmErrorT rmon_fm_get (AlarmFilter *alarmFilter, SFmAlarmDataT **alarm, unsigned int *num_alarm) +{ + unsigned int n = 0; + EFmErrorT err = FM_ERR_NOT_ENOUGH_SPACE; + + while (err == FM_ERR_NOT_ENOUGH_SPACE) { + /* get additional 3 more alarms at a time, as max. number of port alarms + is 6 (2 ports per interface: OAM, INFRA, MGMT */ + n += 3; + SFmAlarmDataT *list = (SFmAlarmDataT *) malloc(sizeof(SFmAlarmDataT) * n); + if (list != NULL) { + err = fm_get_faults (&alarmFilter->entity_instance_id, list, &n); + if (err == FM_ERR_OK) { + *alarm = list; + *num_alarm = n; + return FM_ERR_OK; + } + free(list); + } + else { + err = FM_ERR_NOMEM; + } + } + + *alarm = NULL; + *num_alarm = 0; + + return err; +} + + +/****************************/ +/* queue functions */ +/****************************/ +void rmon_fm_deq (void) +{ + pthread_mutex_lock (&fm_req_q.mutex); + fm_req_info_t *fm_req = fm_req_q.head; + if (fm_req->next == NULL) { + fm_req_q.head = fm_req_q.tail = NULL; + } + else { + fm_req_q.head = fm_req->next; + } + pthread_mutex_unlock (&fm_req_q.mutex); + delete fm_req; +} + +void rmon_fm_enq (fm_req_type_t req, void *data) +{ + fm_req_info_t *fm_req = new fm_req_info_t; + fm_req->next = NULL; + fm_req->req_type = req; + if (req == CLR_REQUEST) + fm_req->_data.alarm_filter = *((AlarmFilter *)data); + else + fm_req->_data.alarm = *((SFmAlarmDataT *)data); + pthread_mutex_lock (&fm_req_q.mutex); + if (fm_req_q.tail == NULL) { + fm_req_q.head = fm_req_q.tail = fm_req; + } + else { + fm_req_q.tail->next = fm_req; + fm_req_q.tail = fm_req; + } + pthread_mutex_unlock (&fm_req_q.mutex); +} + diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/rmon/rmonHdlr.cpp new file mode 100644 index 00000000..71f1bf16 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonHdlr.cpp @@ -0,0 +1,4914 @@ + /* + * Copyright (c) 2013-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGCS Platform Resource Monitor Handler + */ +#include "rmon.h" /* rmon header file */ +#include "rmonHttp.h" /* for rmon HTTP libEvent utilties */ +#include "rmonApi.h" /* vswitch calls */ +#include +#include +#include +#include +#include +#include +#include /* for storing dynamic resource names */ +#include +#include +#include +#include +#include +#include +#include +#include /* for ... RTMGRP_LINK */ +#include "nlEvent.h" /* for ... open_netlink_socket */ +#include "nodeEvent.h" /* for inotify */ +#include /* for ... json-c json string parsing */ +#include "jsonUtil.h" +#include "tokenUtil.h" /* for ... tokenUtil_new_token */ + +/* Preserve a local copy of a pointer to the control struct to + * avoid having to publish a get utility prototype into rmon.h */ +static rmon_ctrl_type * _rmon_ctrl_ptr = NULL ; +static interface_resource_config_type interface_resource_config[MAX_RESOURCES] ; +static resource_config_type resource_config[MAX_RESOURCES] ; +static thinmeta_resource_config_type thinmeta_resource_config[MAX_RESOURCES] ; +static registered_clients registered_clt[MAX_CLIENTS]; + +static libEvent_type ceilometerEvent; // for ceilometer REST API request +static libEvent tokenEvent; // for token request + +/* Used to set alarms through the FM API */ +static SFmAlarmDataT alarmData; +static struct mtc_timer rmonTimer_event ; +static struct mtc_timer rmonTimer_pm ; +static struct mtc_timer rmonTimer_ntp ; + +static struct mtc_timer rtimer[MAX_RESOURCES] ; +static struct mtc_timer thinmetatimer[MAX_RESOURCES] ; + +static ntpStage_enum ntp_stage ; /* The stage the ntp is in within the resource handler fsm */ +static int ntp_status ; /* status returned by the ntpq command */ +static int ntp_child_pid ; + +/* for dynamic resources */ +bool modifyingResources = false; +vector criticality_resource; +vector dynamic_resource; +vector types; +vector devices; +vector fs_index; +vector fs_state; + +/** List of config files */ +std::list config_files ; +std::list::iterator string_iter_ptr ; +std::list interface_config_files ; + +/* percent or abs value for fs resources */ +int fs_percent = 0; +int swact_count = 0; + +/* for cpu usage */ +time_t t1, t2; +int num_cpus = 0; +int num_base_cpus = 0; +int included_cpu[MAX_BASE_CPU]; + +static string hostUUID = ""; + +/* Initial cpu time */ +vector cpu_time_initial; +/* Later cpu time */ +vector cpu_time_later; + +void save_fs_resource ( string resource_name, string criticality, + int enabled, int percent, int abs_values[3], + int alarm_type, string type, string device, int mounted ); +void calculate_fs_usage( resource_config_type * ptr ); +void _space_to_underscore (string & str ); + +struct thread_data +{ + pid_t tid; + pid_t pid; + unsigned long long nr_switches_count; + bool thread_running; + double resource_usage; + resource_config_type * resource; +}; + +/* info passed to pthreads */ +struct thread_data t_data; +pthread_t thread; +pthread_mutex_t lock; + +/* strict memory accounting off = 0 or on = 1 */ +int IS_STRICT = 0; + +void mem_log_ctrl ( rmon_ctrl_type * ptr ) +{ +#define MAX_LEN 500 + char str[MAX_LEN] ; + snprintf (&str[0], MAX_LEN, "%s %s %s\n", + &ptr->my_hostname[0], + ptr->my_address.c_str(), + ptr->my_macaddr.c_str() ); + mem_log(str); +} + +void mem_log_resource ( resource_config_type * ptr ) +{ +#define MAX_LEN 500 + char str[MAX_LEN] ; + snprintf (&str[0], MAX_LEN, "Resource:%-15s Sev:%-8s Tries:%u Debounce:%d\n", + ptr->resource, ptr->severity, ptr->count, ptr->debounce); + mem_log(str); +} + +void mem_log_interface_resource ( interface_resource_config_type * ptr ) +{ +#define MAX_LEN 500 + char str[MAX_LEN] ; + snprintf (&str[0], MAX_LEN, "Resource:%-15s Sev:%-8s Debounce:%d\n", + ptr->resource, ptr->severity, ptr->debounce); + mem_log(str); +} + +int _config_dir_load (void); +int _config_files_load (void); + + +const char rmonStages_str [RMON_STAGE__STAGES][32] = +{ + "Handler-Init", + "Handler-Start", + "Manage-Restart", + "Monitor-Wait", + "Monitor-Resource", + "Restart-Wait", + "Ignore-Resource", + "Handler-Finish", + "Failed-Resource", + "Failed-Resource-clr", +} ; + +const char ntpStages_str [NTP_STAGE__STAGES][32] = +{ + "Begin", + "Execute-NTPQ", + "Execute-NTPQ-Wait", +} ; + +registered_clients * get_registered_clients_ptr ( int index ) +{ + if ( index <= _rmon_ctrl_ptr->clients ) + return ( ®istered_clt[index] ); + return ( NULL ); +} + +rmon_ctrl_type * get_rmon_ctrl_ptr () +{ + return _rmon_ctrl_ptr; +} + +interface_resource_config_type * get_interface_ptr ( int index ) +{ + if ( index <= _rmon_ctrl_ptr->interface_resources ) + return ( &interface_resource_config[index] ); + return ( NULL ); +} + +resource_config_type * get_resource_ptr ( int index ) +{ + if ( index >= 0 && index <= _rmon_ctrl_ptr->resources ) + return ( &resource_config[index] ); + return NULL; +} + +/***************************************************************************** + * + * Name : get_resource_index + * + * Purpose : Get the resource's index based on the name + * + *****************************************************************************/ +int get_resource_index ( const char *resource_name, int *index ) +{ + for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ ) + { + if ( strcmp(resource_config[i].resource, resource_name) == 0) + { + *index = i; + return (PASS); + } + } + return (FAIL); +} + +/***************************************************************************** + * + * Name : rmon_hdlr_fini + * + * Purpose : Clean up the resource monitor module + * + *****************************************************************************/ +void rmon_hdlr_fini ( rmon_ctrl_type * ctrl_ptr ) +{ + for ( int i = 0 ; i < ctrl_ptr->resources ; i++ ) + { + // mem_log ('\n'); + mem_log_resource ( &resource_config[i] ); + } + pthread_mutex_destroy(&lock); + /* Turn off inotify */ + //set_inotify_close ( ctrl_ptr->fd, ctrl_ptr->wd ); +} + +/***************************************************************************** + * + * Name : resourceStageChange + * + * Purpose : Put a resource in the requested stage for use by the resource handler + * + *****************************************************************************/ +int resourceStageChange ( resource_config_type * ptr , rmonStage_enum newStage ) +{ + if (( newStage < RMON_STAGE__STAGES ) && + ( ptr->stage < RMON_STAGE__STAGES )) + { + clog ("%s %s -> %s (%d->%d)\n", + ptr->resource, + rmonStages_str[ptr->stage], + rmonStages_str[newStage], + ptr->stage, newStage); + ptr->stage = newStage ; + return (PASS); + } + else + { + slog ("%s Invalid Stage (now:%d new:%d)\n", + ptr->resource, ptr->stage, newStage ); + ptr->stage = RMON_STAGE__FINISH ; + return (FAIL); + } +} + +/***************************************************************************** + * + * Name : ntpStageChange + * + * Purpose : Stage change handler for NTP resource + * + *****************************************************************************/ +int ntpStageChange ( ntpStage_enum newStage ) +{ + if ((newStage < NTP_STAGE__STAGES ) && + ( ntp_stage < NTP_STAGE__STAGES )) + { + clog ("NTP %s -> %s (%d->%d)\n", + ntpStages_str[ntp_stage], + ntpStages_str[newStage], + ntp_stage, newStage); + ntp_stage = newStage ; + return (PASS); + } + else + { + slog ("NTP Invalid Stage (now:%d new:%d)\n", ntp_stage, newStage ); + ntp_stage = NTP_STAGE__BEGIN ; + return (FAIL); + } +} + +/***************************************************************************** + * + * Name : _config_files_load + * + * Purpose : Load the content of each config file into resource_config[x] + * + *****************************************************************************/ +int _config_files_load (void) +{ + int i = 0 ; + /* Run Maintenance on Inventory */ + for ( string_iter_ptr = config_files.begin () ; + string_iter_ptr != config_files.end () ; + string_iter_ptr++ ) + { + if ( i >= MAX_RESOURCES ) + { + wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES ); + break ; + } + /* Read the resource config file */ + resource_config[i].mask = 0 ; + if (ini_parse( string_iter_ptr->data(), rmon_resource_config, + &resource_config[i]) < 0) + { + ilog("Read Failure : %s\n", string_iter_ptr->data() ); + } + + else + { + dlog ("Config File : %s\n", string_iter_ptr->c_str()); + + /* Init the timer for this resource */ + mtcTimer_reset ( rtimer[i] ) ; + rtimer[i].service = resource_config[i].resource ; + + resource_config[i].i = i ; + /* allow to clear an existing alarm if the first reading is good + after reboot + */ + resource_config[i].failed = false ; + resource_config[i].count = 0 ; + resource_config[i].resource_value = 0 ; + resource_config[i].resource_prev = 0 ; + resource_config[i].stage = RMON_STAGE__INIT ; + resource_config[i].sev = SEVERITY_CLEARED ; + resource_config[i].alarm_type = STANDARD_ALARM; + resource_config[i].failed_send = 0; + resource_config[i].alarm_raised = false; + + /* add the alarm ids for the FM API per resource monitored */ + if (strcmp(resource_config[i].resource, CPU_RESOURCE_NAME) == 0) { + /* platform cpu utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, CPU_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__CPU_USAGE ; + } + else if (strcmp(resource_config[i].resource, V_CPU_RESOURCE_NAME) == 0) { + /* vswitch cpu utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_CPU_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__CPU_USAGE ; + } + else if (strcmp(resource_config[i].resource, MEMORY_RESOURCE_NAME) == 0) { + /* platform memory utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, MEMORY_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__MEMORY_USAGE ; + } + else if (strcmp(resource_config[i].resource, V_MEMORY_RESOURCE_NAME) == 0) { + /* vswitch memory utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_MEMORY_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__MEMORY_USAGE ; + } + else if (strcmp(resource_config[i].resource, FS_RESOURCE_NAME) == 0) { + /* platform disk utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, FS_ALARM_ID); + resource_config[i].mounted = MOUNTED; + resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ; + } + else if (strcmp(resource_config[i].resource, INSTANCE_RESOURCE_NAME) == 0) { + /* platform disk utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, INSTANCE_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ; + } + else if (strcmp(resource_config[i].resource, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) { + /* platform virtual thin pool utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_CINDER_THINPOOL_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ; + } + else if (strcmp(resource_config[i].resource, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) { + /* platform virtual thin pool utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, V_NOVA_THINPOOL_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ; + } + else if (strcmp(resource_config[i].resource, V_PORT_RESOURCE_NAME) == 0) { + /* vswitch port utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, + V_PORT_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__PORT ; + } + else if (!strcmp(resource_config[i].resource, V_INTERFACE_RESOURCE_NAME) || + !strcmp(resource_config[i].resource, V_LACP_INTERFACE_RESOURCE_NAME)) { + /* vswitch interface(lacp or otherwise) utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, + V_INTERFACE_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__INTERFACE ; + } + else if (!strcmp(resource_config[i].resource, V_OVSDB_RESOURCE_NAME)) { + /* vswitch OVSDB manager utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, + V_OVSDB_MANAGER_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__DATABASE_USAGE ; + } + else if (!strcmp(resource_config[i].resource, V_OPENFLOW_RESOURCE_NAME)) { + /* vswitch Openflow utilization */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, + V_OPENFLOW_CONTROLLER_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__NETWORK_USAGE ; + } + else if (strcmp(resource_config[i].resource, REMOTE_LOGGING_RESOURCE_NAME) == 0) { + /* remote logging connectivity */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, + REMOTE_LOGGING_CONTROLLER_CONNECTIVITY_ALARM_ID); + resource_config[i].res_type = RESOURCE_TYPE__CONNECTIVITY ; + } + else + { + resource_config[i].res_type = RESOURCE_TYPE__UNKNOWN ; + } + + ilog ("Monitoring %2d: %s (%s)\n", + i, + resource_config[i].resource, + resource_config[i].severity); + mem_log_resource ( &resource_config[i] ); + i++; + + } + + } + + _rmon_ctrl_ptr->resources = i ; + ilog ("Monitoring %d Resources\n", _rmon_ctrl_ptr->resources ); + return (PASS); +} + +/***************************************************************************** + * + * Name : _inter_config_load + * + * Purpose : Load the content of each config file into interface_resource_config[x] + * + *****************************************************************************/ +int _inter_config_load (void) +{ + int i = 0 ; + + for ( string_iter_ptr = interface_config_files.begin () ; + string_iter_ptr != interface_config_files.end () ; + string_iter_ptr++ ) + { + if ( i >= MAX_RESOURCES ) + { + wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES ); + break ; + } + + /* Read the interface resource config file */ + resource_config[i].mask = 0 ; + if (ini_parse( string_iter_ptr->data(), rmon_interface_config, + &interface_resource_config[i]) < 0) + { + ilog("Read Failure : %s\n", string_iter_ptr->data() ); + } + + else + { + dlog ("Config File : %s\n", string_iter_ptr->c_str()); + ilog ("Monitoring %2d: %s (%s)\n", i, interface_resource_config[i].resource , + interface_resource_config[i].severity ); + + interface_resource_config[i].i = i ; + interface_resource_config[i].failed = false ; + interface_resource_config[i].stage = RMON_STAGE__INIT ; + interface_resource_config[i].sev = SEVERITY_CLEARED ; + interface_resource_config[i].failed_send = 0; + interface_resource_config[i].alarm_raised = false; + + /* add the alarm ids for the FM API per resource monitored */ + if (strcmp(interface_resource_config[i].resource, OAM_INTERFACE_NAME) == 0) { + /* add the alarm id for the FM API per resource monitored */ + snprintf(interface_resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, OAM_ALARM_ID); + snprintf(interface_resource_config[i].alarm_id_port, FM_MAX_BUFFER_LENGTH, OAM_PORT_ALARM_ID); + } + else if (strcmp(interface_resource_config[i].resource, MGMT_INTERFACE_NAME) == 0) { + /* add the alarm id for the FM API per resource monitored */ + snprintf(interface_resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, MGMT_ALARM_ID); + snprintf(interface_resource_config[i].alarm_id_port, FM_MAX_BUFFER_LENGTH, MGMT_PORT_ALARM_ID); + } + else if (strcmp(interface_resource_config[i].resource, INFRA_INTERFACE_NAME) == 0) { + /* add the alarm id for the FM API per resource monitored */ + snprintf(interface_resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, INFRA_ALARM_ID); + snprintf(interface_resource_config[i].alarm_id_port, FM_MAX_BUFFER_LENGTH, INFRA_PORT_ALARM_ID); + } + + mem_log_interface_resource ( &interface_resource_config[i] ); + i++; + + } + } + + _rmon_ctrl_ptr->interface_resources = i ; + ilog ("Monitoring %d Interface Resources\n", _rmon_ctrl_ptr->interface_resources ); + return (PASS); +} + +/***************************************************************************** + * + * Name : _thinmeta_config_load + * + * Purpose : Load the content of each config file into thinmeta_config[x] + * + *****************************************************************************/ +int _thinmeta_config_load (void) +{ + int i = 0 ; + + /* Set hard-coded defaults for all structures */ + for ( int j = 0; j < MAX_RESOURCES; j++) + { + thinmeta_resource_config_type * res; + res = &thinmeta_resource_config[i]; + res->critical_threshold = THINMETA_DEFAULT_CRITICAL_THRESHOLD; + res->alarm_on = THINMETA_DEFAULT_ALARM_ON; + res->autoextend_on = THINMETA_DEFAULT_AUTOEXTEND_ON; + res->autoextend_by = THINMETA_DEFAULT_AUTOEXTEND_BY; + res->autoextend_percent = THINMETA_DEFAULT_AUTOEXTEND_PERCENT; + res->audit_period = THINMETA_DEFAULT_AUDIT_PERIOD; + } + + /* Load resources */ + for ( string_iter_ptr = config_files.begin () ; + string_iter_ptr != config_files.end () ; + string_iter_ptr++ ) + { + if ( i >= MAX_RESOURCES ) + { + wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES ); + break ; + } + /* Read the resource config file */ + if (ini_parse( string_iter_ptr->data(), rmon_thinmeta_config, + &thinmeta_resource_config[i]) < 0) + { + ilog("Read Failure : %s\n", string_iter_ptr->data() ); + } + else + { + thinmeta_resource_config_type * res; + res = &thinmeta_resource_config[i]; + if (!res->section_exists) + { + dlog3 ("Config File : %s does not have a [%s] section\n", + string_iter_ptr->c_str(), THINMETA_CONFIG_SECTION); + continue; + } + dlog ("Config File : %s\n", string_iter_ptr->c_str()); + + /* validate loaded configuration */ + if (!res->vg_name || !res->thinpool_name) + { + elog("Invalid VG and/or thinpool names for thinpool metadata " + "in config file: %s, disabling monitoring", string_iter_ptr->c_str()); + res->critical_threshold = RESOURCE_DISABLE; + res->vg_name = THINMETA_INVALID_NAME; + res->thinpool_name = THINMETA_INVALID_NAME; + } + else if (res->critical_threshold > 99) + { + elog("Metadata monitoring error in config file: %s. Option critical_threshold > 99%%, " + "value in config file: %i, disabling monitoring", + string_iter_ptr->c_str(), res->critical_threshold) + res->critical_threshold = 0; + } + else if (res->alarm_on > 1) + { + elog("Metadata monitoring error in config file: %s. Option alarm_on is NOT boolean, " + "value in config file: %i, disabling monitoring", string_iter_ptr->c_str(), res->alarm_on); + res->critical_threshold = RESOURCE_DISABLE; + } + else if (res->autoextend_on > 1) + { + elog("Metadata monitoring error in config file: %s. Option autoextend_on is NOT boolean, " + "value in config file: %i, disabling monitoring", + string_iter_ptr->c_str(), res->autoextend_on) + res->critical_threshold = RESOURCE_DISABLE; + } + else if (res->autoextend_percent > 1) + { + elog("Metadata monitoring error in config file: %s. Option autoextend_percent is NOT boolean, " + "value in config file: %i, disabling monitoring", + string_iter_ptr->c_str(), res->autoextend_percent) + res->critical_threshold = RESOURCE_DISABLE; + } + else if ((res->autoextend_percent && res->autoextend_by > 100) || + (res->autoextend_on && res->autoextend_by < 1)) + { + elog("Metadata monitoring error in config file: %s. Option autoextend_by not in [1,100] interval, " + "value in config file: %i, disabling monitoring", + string_iter_ptr->c_str(), res->autoextend_by) + res->critical_threshold = RESOURCE_DISABLE; + } + else if ((res->audit_period < 1) || (res->audit_period > 10000)) + { + elog("Metadata monitoring error in config file: %s. Option audit_period not in [1,10000] interval, " + "value in config file: %i, disabling monitoring", + string_iter_ptr->c_str(), res->audit_period) + res->critical_threshold = RESOURCE_DISABLE; + } + + ilog ("%s/%s pool metadata monitored; resource index: %2d\n", res->vg_name , + res->thinpool_name, i ); + i++; + } + + } + + _rmon_ctrl_ptr->thinmeta_resources = i ; + ilog ("Monitoring %d Thinpool Metadata Resources\n", _rmon_ctrl_ptr->thinmeta_resources ); + return (PASS); +} + +/***************************************************************************** + * + * Name : rmon_hdlr_init + * + * Purpose : Init the handler but also support re-init that might occur over a SIGHUP + * + *****************************************************************************/ + +#define RMON_TIMER_TYPE__EVENT "event" +#define RMON_TIMER_TYPE__PM "pm" +#define RMON_TIMER_TYPE__NTP "ntp" +#define RMON_TIMER_TYPE__RES "resource" +#define RMON_TIMER_TYPE__THIN "thinpool" + +int rmon_hdlr_init ( rmon_ctrl_type * ctrl_ptr ) +{ + /* Save the control pointer */ + _rmon_ctrl_ptr = ctrl_ptr ; + + mtcTimer_init ( rmonTimer_event, LOCALHOST, RMON_TIMER_TYPE__EVENT) ; + mtcTimer_init ( rmonTimer_pm, LOCALHOST, RMON_TIMER_TYPE__PM ) ; + + if (is_controller()) + mtcTimer_init ( rmonTimer_ntp,LOCALHOST, RMON_TIMER_TYPE__NTP ) ; + + for ( int i = 0 ; i < MAX_RESOURCES ; i++ ) + mtcTimer_init ( rtimer[i], LOCALHOST, RMON_TIMER_TYPE__RES ); + ctrl_ptr->resources = 0 ; + + for ( int i = 0 ; i < MAX_RESOURCES ; i++ ) + mtcTimer_init ( thinmetatimer[i], LOCALHOST, RMON_TIMER_TYPE__THIN ); + ctrl_ptr->thinmeta_resources = 0 ; + + /* Initialize the Resource Monitor Array */ + memset ( (char*)&resource_config[0], 0, sizeof(resource_config_type)*MAX_RESOURCES); + memset ( (char*)&interface_resource_config[0], 0, sizeof(interface_resource_config_type)*MAX_RESOURCES); + memset ( (char*)&thinmeta_resource_config[0], 0, sizeof(interface_resource_config_type)*MAX_RESOURCES); + memset ( (char*)®istered_clt[0], 0, sizeof(registered_clients)*MAX_CLIENTS); + + /* Read in the list of config files and their contents */ + load_filenames_in_dir ( CONFIG_DIR, config_files ) ; + /* Read in the list of interface config files and their contents */ + load_filenames_in_dir ( INT_CONFIG_DIR, interface_config_files ) ; + + _thinmeta_config_load(); + _config_files_load (); + _inter_config_load (); + + /* init Thin Metadata Monitoring after config reload - including timers */ + thinmeta_init(thinmeta_resource_config, thinmetatimer, ctrl_ptr->thinmeta_resources); + + /* Log the control setting going into the main loop */ + mem_log_ctrl ( _rmon_ctrl_ptr ); + + /* Initialize instance mount monitoring */ + if (pthread_mutex_init(&lock, NULL) != 0) + { + elog("mutex init failed \n"); + } + + t_data.thread_running = false; + t_data.resource_usage = MOUNTED; + t_data.nr_switches_count = 0; + t_data.pid = getpid(); + + return (PASS) ; +} + +/***************************************************************************** + * + * Name : _set_severity + * + * Purpose : Restores the resource value and the severity of the alarm + * + *****************************************************************************/ +void _set_resource_usage ( string reason_text, resource_config_type * ptr ) +{ + unsigned int found; + string res_val; + size_t last_index; + string temp_val; + char resource_usage[10]; + + /* extract the resource value from the reason text */ + found = reason_text.find_last_of( ' ' ); + temp_val = reason_text.substr(found+1); + last_index = temp_val.find_first_not_of("0123456789"); + res_val = temp_val.substr(0, last_index); + snprintf (resource_usage, sizeof(resource_usage), res_val.c_str()); + sscanf(resource_usage, "%lf", &ptr->resource_value); +} + +/***************************************************************************** + * + * Name : build_entity_instance_id + * + * Purpose : build the alarm's entity_instance_id based on the + * resource type and alarm type. + * + *****************************************************************************/ +void build_entity_instance_id ( resource_config_type *ptr, char *entity_instance_id ) +{ + dlog ("resource name: %s, resource type: %s, alarm type: %d \n", ptr->resource, ptr->type, ptr->alarm_type); + + // Make certain the id is cleared + entity_instance_id[0] = 0; + + if ( ptr->alarm_type == DYNAMIC_ALARM ) + { + if ((ptr->type != NULL) && (strcmp(ptr->type, "lvg") == 0 )) + { + /* This case covers volume groups */ + /* Use host=.volumegroup=type for id*/ + snprintf((char*)entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.volumegroup=%s", _rmon_ctrl_ptr->my_hostname, ptr->resource); + } + else + { + /* Use host=.filesystem=type for id*/ + snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.filesystem=%s", _rmon_ctrl_ptr->my_hostname, ptr->resource); + } + } + else if ( ptr->alarm_type == STATIC_ALARM ) + { + /* Use host=.filesystem=type for id*/ + snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.filesystem=%s", _rmon_ctrl_ptr->my_hostname, ptr->resource); + } + else if ((ptr->alarm_type == STANDARD_ALARM) && (strstr(ptr->resource, V_MEMORY_RESOURCE_NAME) != NULL)) + { + /* AVS memory */ + snprintf(alarmData.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.processor=%d", _rmon_ctrl_ptr->my_hostname, ptr->socket_id); + } + else if (strstr(ptr->resource, V_CINDER_THINPOOL_RESOURCE_NAME) != NULL) + { + /* Cinder thin pool alarm should not be raised against a specific host */ + /* as the volumes are synced between controllers through drbd. */ + /* Instead we use a common entity instance id for both controllers. */ + snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH, "host=controller"); + } + else + { + /* Use hostname for alarm */ + snprintf(entity_instance_id, FM_MAX_BUFFER_LENGTH, _rmon_ctrl_ptr->my_hostname); + } + + dlog ("resource %s entity instance id: %s\n", ptr->resource, entity_instance_id); + + return; +} + + +/***************************************************************************** + * + * Name : thinpool_virtual_space_usage_init + * + * Purpose : Determine if we should monitor virtual usage or not: no purpose + * in doing so if thin provisioning is not used. + * + * Params : index - the index of the virtual space resource + * + * Return : None. + * + *****************************************************************************/ +void thinpool_virtual_space_usage_init(int index, + const char *poolName, + const char *poolOwner) { + + if (!poolName or !poolOwner) { + slog ("No poolName or poolOwner provided"); + return; + } + ilog("index = %d, poolName = %s, poolOwner = %s", index, poolName, poolOwner); + + /* Buffer (and its size) for keeping the initial result after executing + the above command. */ + char current_pool_type[BUFFER_SIZE]; + const unsigned int buffer_size = BUFFER_SIZE; + /* The command for seeing if the pool type is thin. */ + char lvm_thin_cmd[BUFFER_SIZE]; + const char *thin_pool_expected_result = NULL; + + MEMSET_ZERO(current_pool_type); + MEMSET_ZERO(lvm_thin_cmd); + + if (strcmp(poolName, "nova-local-pool") == 0) { + const char *nova_thin_pool_expected_result = "thin-pool"; + thin_pool_expected_result = nova_thin_pool_expected_result; + sprintf(lvm_thin_cmd, "lvs --segments | grep \"%s\" | awk '{print $5}'", poolName); + } + else if (strcmp(poolName, "cinder-volumes-pool") == 0) { + const char *cinder_thin_pool_expected_result = "thin"; + thin_pool_expected_result = cinder_thin_pool_expected_result; + sprintf(lvm_thin_cmd, "cat /etc/cinder/cinder.conf | awk -F = '/^lvm_type.*=.*/ { print $2; }' | tail -n 1 | tr -d ' '"); + } + else { + slog("Invalid pool name given."); + return; + } + + /* Result code. */ + int rc; + + /* Execute the command. */ + rc = execute_pipe_cmd(lvm_thin_cmd, current_pool_type, buffer_size); + + /* If the command has been executed successfuly, continue. */ + if (rc == PASS) { + if (current_pool_type != NULL) { + /* If the pool type is not thin, disable the alarm for virtual + usage. */ + ilog("%s current pool type is set to = %s", poolOwner, current_pool_type); + if(strcmp(current_pool_type, thin_pool_expected_result) != 0) { + resource_config[index].alarm_status = ALARM_OFF; + ilog("%s LVM Thinpool Usage alarm off: thin provisioning not used", poolOwner); + } else { + resource_config[index].alarm_status = ALARM_ON; + ilog("%s LVM Thinpool Usage alarm on: thin provisioning used", poolOwner); + } + } + } else { + resource_config[index].alarm_status = ALARM_OFF; + elog("%s LVM Thinpool monitoring state unknown ; alarm disabled (rc:%i)", + poolOwner, rc); + } +} + +/***************************************************************************** + * + * Name : virtual_space_usage_init + * + * Purpose : Determine if we should monitor virtual usage or not: no purpose + * in doing so if thin provisioning is not used. + * + * Return : None. + * + *****************************************************************************/ + +void virtual_space_usage_init(const char* resource_name) { + + ilog ("Initialize thin pools for resource %s\n", resource_name); + int index; + if ( get_resource_index( resource_name, &index ) == PASS ) { + + if (strcmp(resource_name, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) { + thinpool_virtual_space_usage_init(index,"cinder-volumes-pool","Cinder"); + + } else if (strcmp(resource_name, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) { + thinpool_virtual_space_usage_init(index, "nova-local-pool","Nova"); + } + } + else { + wlog ("failed get_resource_index for resource %s\n", resource_name); + } +} + +/***************************************************************************** + * + * Name : rmon_alarming_init + * + * Purpose : Clears any previously raised rmon alarms if rmon is restarted + * + *****************************************************************************/ +void rmon_alarming_init ( resource_config_type * ptr ) +{ + dlog ("resource name: %s, resource type: %s, alarm type: %d \n", ptr->resource, ptr->type, ptr->alarm_type); + + AlarmFilter alarmFilter; + + SFmAlarmDataT *active_alarm = (SFmAlarmDataT*) calloc (1, sizeof (SFmAlarmDataT)); + if (active_alarm == NULL) + { + elog("Failed to allocate memory for SFmAlarmDataT\n"); + return; + } + + build_entity_instance_id (ptr, alarmData.entity_instance_id); + + snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id); + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id); + + if (fm_get_fault( &alarmFilter, active_alarm) == FM_ERR_OK) + { + if (active_alarm != NULL) { + + string reasonText(active_alarm->reason_text); + /* Set the resource severity */ + ptr->failed = true; + ptr->alarm_raised = true; + ptr->count = ptr->num_tries; + if ( active_alarm->severity == FM_ALARM_SEVERITY_MINOR ) + { + ptr->sev = SEVERITY_MINOR; + } + else if ( active_alarm->severity == FM_ALARM_SEVERITY_MAJOR ) + { + ptr->sev = SEVERITY_MAJOR; + if ( ptr->res_type == RESOURCE_TYPE__FILESYSTEM_USAGE ) + { + string err_res_name(ptr->resource); + _space_to_underscore(err_res_name); + + /* clear host degrade for fs usage alarms */ + snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), "%s %s:", + err_res_name.c_str(), + DEGRADE_CLEAR_MSG ); + + rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ); + } + } + else + { + ptr->sev = SEVERITY_CRITICAL; + } + resourceStageChange ( ptr, RMON_STAGE__MONITOR_WAIT ); + + if (strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) != 0) + { + /* Set the resource severity */ + _set_resource_usage( reasonText, ptr ); + ilog ("%s setting previously failed resource alarm id: %s entity_instance_id: %s usage: %0.2f\n", + ptr->resource, ptr->alarm_id, alarmFilter.entity_instance_id, ptr->resource_value); + } + else + { + ilog ("%s setting previously failed resource alarm id: %s entity_instance_id: %s\n", + ptr->resource, ptr->alarm_id, alarmFilter.entity_instance_id); + } + } + } + free(active_alarm); +} + +/***************************************************************************** + * + * Name : send_clear_msg + * + * Purpose : Send a message to all registered clients to set the node to + * available (clear the degrade) + * + *****************************************************************************/ +void send_clear_msg ( int index ) +{ + int count = 0; + AlarmFilter alarmFilter; + + SFmAlarmDataT *active_alarm = (SFmAlarmDataT*) calloc (1, sizeof (SFmAlarmDataT)); + if (active_alarm == NULL) + { + elog("Failed to allocate memory for SFmAlarmDataT\n"); + return; + } + + string err_res_name(resource_config[index].resource); + _space_to_underscore(err_res_name); + snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, resource_config[index].alarm_id); + + build_entity_instance_id (&resource_config[index], alarmData.entity_instance_id); + + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id); + + /* Notify rmon clients of fault being cleared */ + snprintf(resource_config[index].errorMsg, sizeof(resource_config[index].errorMsg), + "%s cleared_alarms_for_resource:", err_res_name.c_str()); + + /* check if there is an alarm first for this resource. If there is not then the node */ + /* should not be in a degrade state */ + EFmErrorT ret = fm_get_fault( &alarmFilter, active_alarm); + if ( (ret == FM_ERR_OK) && (active_alarm != NULL) ) + { + while (( rmon_send_request ( &resource_config[index], _rmon_ctrl_ptr->clients ) != PASS ) && (count < 3 )) + { + wlog ("%s request send failed \n", resource_config[index].resource); + count++; + } + if (count > 2) + { + wlog ("%s request send failed, count:%d \n", resource_config[index].resource, count); + resource_config[index].failed_send++; + } + if ((resource_config[index].failed_send == MAX_FAIL_SEND) || (count < 3)) + { + /* Reset the values to defaults */ + swact_count = 0; + ilog("Setting resource: %s back to defaults \n", resource_config[index].resource); + resource_config[index].failed = false ; + resource_config[index].alarm_raised = false ; + resource_config[index].count = 0 ; + resource_config[index].sev = SEVERITY_CLEARED ; + resource_config[index].stage = RMON_STAGE__START ; + resource_config[index].failed_send = 0; + } + } + else //alarm not found or error + { + if (ret == FM_ERR_ENTITY_NOT_FOUND) + { + dlog ("Alarm not found for resource: %s entity_instance_id: %s \n", alarmFilter.alarm_id, alarmFilter.entity_instance_id); + } + else + { + wlog ("fm_get_fault failed for resource: %s entity_instance_id: %s err: %d\n", alarmFilter.alarm_id, + alarmFilter.entity_instance_id, ret); + } + + if (active_alarm == NULL) + { + elog("fm_get_fault returned null active_alarm\n"); + } + + swact_count++; + if (swact_count == MAX_SWACT_COUNT) + { + /* Reset the values to defaults */ + while (( rmon_send_request ( &resource_config[index], _rmon_ctrl_ptr->clients ) != PASS ) && (count < 3 )) + { + wlog ("%s request send failed \n", resource_config[index].resource); + count++; + } + swact_count = 0; + ilog("Setting resource: %s back to defaults \n", resource_config[index].resource); + resource_config[index].failed = false ; + resource_config[index].alarm_raised = false ; + resource_config[index].count = 0 ; + resource_config[index].sev = SEVERITY_CLEARED ; + resource_config[index].stage = RMON_STAGE__START ; + resource_config[index].failed_send = 0; + } + } + free(active_alarm); +} + +/***************************************************************************** + * + * Name : read_fs_file + * + * Purpose : read the memory mapped dynamic file system file + *****************************************************************************/ +void read_fs_file ( vector & dynamic_resources ) +{ + FILE * pFile; + char buf[MAX_LEN]; + int fd; + string delimiter = ","; + size_t pos; + string token; + struct stat fileInfo; + struct flock fl; + + memset ((char *)&fileInfo, 0 , sizeof(fileInfo)); + + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + fl.l_pid = getpid(); + + pFile = fopen (DYNAMIC_FS_FILE , "r"); + if (pFile != NULL) { + + fd = fileno(pFile); + /* lock the file */ + fl.l_type = F_RDLCK; + + /* lock the file for read and write */ + fcntl(fd, F_SETLKW, &fl); + + if (fd == -1) + { + elog("Error opening file for reading"); + } + + if (fstat(fd, &fileInfo) == -1) + { + elog("Error getting the file size"); + } + + char *map = static_cast( mmap(0, fileInfo.st_size, PROT_READ, MAP_SHARED, fd, 0)); + if (map == MAP_FAILED) + { + elog("Error mmapping the file"); + } + string str(map); + + snprintf( buf, MAX_LEN, str.c_str()); + /* free the mmapped memory */ + if (munmap(map, fileInfo.st_size) == -1) + { + elog("Error un-mmapping the file"); + } + fclose(pFile); + /* unlock the file */ + fl.l_type = F_UNLCK; + fcntl(fd, F_SETLK, &fl); + + while ((pos = str.find(delimiter)) != string::npos) { + /* separate the resources from the file */ + token = str.substr(0, pos); + dynamic_resources.push_back(token); + dlog("reading resource %s \n", token.c_str()); + str.erase(0, pos + delimiter.length()); + } + } +} + +/***************************************************************************** + * + * Name : add_dynamic_fs_resource + * + * Purpose : Add the dynamic file system resources + *****************************************************************************/ +void add_dynamic_fs_resource ( bool send_response ) +{ + char resource[50]; + char temp_resource[50]; + char device [50]; + char mount_point[50]; + char temp_state[20]; + char type [50]; + char buf[200]; + string criticality = "critical"; + vector resource_list; + int absolute_thresholds[3]; + + memset(absolute_thresholds, 0, sizeof(absolute_thresholds)); + fs_index.clear(); + fs_state.clear(); + + /* get a list of all the dynamic fs mounts */ + read_fs_file(resource_list); + + for(std::vector::iterator it = resource_list.begin(); it != resource_list.end(); ++it) + { + string str = *it; + snprintf(buf, sizeof(buf), str.c_str()); + + // For resources without mounts the mount_point will be NULL + memset(&mount_point[0], 0, sizeof(mount_point)); + sscanf(buf, "%49s %19s %49s %49s %49s", temp_resource, temp_state, type, device, mount_point); + string state(temp_state); + + bool found = false; + + if (mount_point[0] != '\0') + { + // for resources with mounts, the resource name is the mount value + snprintf(resource, FM_MAX_BUFFER_LENGTH, mount_point); + } + else + { + // for resources without mounts, the resource name is the device value + snprintf(resource, FM_MAX_BUFFER_LENGTH, device); + } + + /* the dynamic file system is enabled, add it if need be */ + for (int i=0; i<_rmon_ctrl_ptr->resources; i++) + { + if ( strcmp(resource, resource_config[i].resource) == 0) + { + dlog ("resource %s already exists, update the state to %s \n", resource, state.c_str()); + /* resource already exists no need to add it again */ + /* update the state, it may have changed */ + fs_index.push_back(i); + fs_state.push_back(state); + found = true; + break; + } + } + + if (!found) // new resource to monitor, lets add it + { + int enabled_resource = ALARM_OFF; + if (strcmp(temp_state,"enabled") == 0) + { + enabled_resource = ALARM_ON; + } + + if (mount_point[0] != '\0') + { + save_fs_resource ( resource, criticality, enabled_resource, fs_percent, absolute_thresholds, DYNAMIC_ALARM, type, device, MOUNTED ); + } + else + { + save_fs_resource ( resource, criticality, enabled_resource, fs_percent, absolute_thresholds, DYNAMIC_ALARM, type, device, NOT_MOUNTED ); + } + + if (enabled_resource == ALARM_ON) { + calculate_fs_usage( &resource_config[_rmon_ctrl_ptr->resources - 1] ); + rmon_alarming_init( &resource_config[_rmon_ctrl_ptr->resources - 1] ); + } + } + } + + if (send_response) + { + ilog ("sending response to dynamic FS add, to the rmon client\n"); + /* let the rmon client know that we are done with the file */ + rmon_resource_response(_rmon_ctrl_ptr->clients); + } +} + +/***************************************************************************** + * + * Name : clear_alarm_for_resource + * + * Purpose : Clear the alarm of the resource passed in + * + *****************************************************************************/ +void clear_alarm_for_resource ( resource_config_type * ptr ) +{ + dlog ("resource name: %s, resource type: %s, alarm type: %d \n", ptr->resource, ptr->type, ptr->alarm_type); + AlarmFilter alarmFilter; + + build_entity_instance_id (ptr, alarmData.entity_instance_id); + + snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id); + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id); + + int ret = rmon_fm_clear(&alarmFilter); + if (ret == FM_ERR_OK) + { + ilog ("Cleared stale alarm %s for entity instance id: %s", alarmFilter.alarm_id, alarmFilter.entity_instance_id); + } + else if (ret == FM_ERR_ENTITY_NOT_FOUND) + { + dlog ("Stale alarm %s for entity instance id: %s was not found", alarmFilter.alarm_id, alarmFilter.entity_instance_id); + } + else + { + wlog ("Failed to clear stale alarm %s for entity instance id: %s error: %d", alarmFilter.alarm_id, alarmFilter.entity_instance_id, ret); + } +} + + +/***************************************************************************** + * + * Name : process_dynamic_fs_file + * + * Purpose : read the dynamic files directory and add the dynamic filesystem + * resources when the file is updated + *****************************************************************************/ +void process_dynamic_fs_file() +{ + int index = 0; + + pthread_mutex_lock(&lock); + modifyingResources = true; + pthread_mutex_unlock(&lock); + + add_dynamic_fs_resource(true); + + pthread_mutex_lock(&lock); + modifyingResources = false; + pthread_mutex_unlock(&lock); + + /* deal with changes of dynamic file system enabled state */ + for (unsigned int i=0; iclients > 0 ) + { + //send a clear degrade node + send_clear_msg(index); + } + + // we need to clear the resource's alarm if there was any set for this resource + clear_alarm_for_resource(&resource_config[index]); + } + else + { + /* There was no active alarm to clear */ + ilog("Setting resource: %s back to defaults \n", resource_config[index].resource); + resource_config[index].alarm_status = ALARM_OFF; + resource_config[index].failed = false; + resource_config[index].alarm_raised = false; + resource_config[index].count = 0 ; + resource_config[index].sev = SEVERITY_CLEARED ; + resource_config[index].stage = RMON_STAGE__START ; + } + } + else if ( strcmp(fs_state.at(i).c_str(), "enabled") == 0 ) + { + // resource has been enabled + if ( resource_config[index].alarm_status == ALARM_OFF ) + { + /* Turn the resource checking back on if it was off */ + resource_config[index].alarm_status = ALARM_ON; + + //reset values + resource_config[index].failed = false; + resource_config[index].alarm_raised = false; + resource_config[index].count = 0 ; + resource_config[index].sev = SEVERITY_CLEARED ; + resource_config[index].stage = RMON_STAGE__START ; + + rmon_alarming_init( &resource_config[index] ); + + ilog("%s is now enabled \n", resource_config[index].resource); + if (strcmp(resource_config[index].resource, CINDER_VOLUMES) == 0) + { + virtual_space_usage_init(V_CINDER_THINPOOL_RESOURCE_NAME); + } + if (strcmp(resource_config[index].resource, NOVA_LOCAL) == 0) + { + virtual_space_usage_init(V_NOVA_THINPOOL_RESOURCE_NAME); + } + } + else // alarm aready on (enabled) + { + ilog("%s is already enabled \n", resource_config[index].resource); + } + } + else + { + wlog("%s invalid dynamic file system state: %s \n", resource_config[index].resource, fs_state.at(i).c_str()); + } + } +} + +/***************************************************************************** + * + * Name : process_static_fs_file + * + * Purpose : Reads in the list of static file systems for monitoring + * + *****************************************************************************/ +void process_static_fs_file() +{ + FILE * pFile; + vector mounts; + char buf[MAX_LEN]; + char resource[50]; + char type[50]; + char device[50]; + bool found = false; + int enabled_resource = ALARM_ON; + string criticality = "critical"; + int absolute_thresholds[3] = {0}; + + pFile = fopen (STATIC_FS_FILE , "r"); + if (pFile != NULL) { + ifstream fin( STATIC_FS_FILE ); + string line; + + while( getline( fin, line )) { + /* process each line */ + mounts.push_back(line); + } + fclose(pFile); + + + for(std::vector::iterator it = mounts.begin(); it != mounts.end(); ++it) + { + string str = *it; + snprintf(buf, MAX_LEN, str.c_str()); + sscanf(buf, "%49s %49s %49s %d %d %d", resource, device, type, &absolute_thresholds[0], &absolute_thresholds[1], &absolute_thresholds[2]); + + if (!found) + { + if (fs_percent == PERCENT_USED) + { + /* do not use the absolute thresholds */ + memset(absolute_thresholds, 0, sizeof(absolute_thresholds)); + } + /* add the resource */ + save_fs_resource ( resource, criticality, enabled_resource, fs_percent, absolute_thresholds, STATIC_ALARM, type, device, MOUNTED ); + calculate_fs_usage( &resource_config[_rmon_ctrl_ptr->resources - 1] ); + } + } + } + else + { + elog("Error, no static file system file present at: %s\n", STATIC_FS_FILE); + } +} + +/***************************************************************************** + * + * Name : rmon_timer_handler + * + * Purpose : Looks up the timer ID and asserts the corresponding ringer + * + *****************************************************************************/ +void rmon_timer_handler ( int sig, siginfo_t *si, void *uc) +{ + timer_t * tid_ptr = (void**)si->si_value.sival_ptr ; + + /* Avoid compiler errors/warnings for parms we must + * have but currently do nothing with */ + UNUSED(sig); + UNUSED(uc); + + if ( !(*tid_ptr) ) + { + // tlog ("Called with a NULL Timer ID\n"); + return ; + } + + /* is event rmon timer */ + if ( *tid_ptr == rmonTimer_event.tid ) + { + mtcTimer_stop_int_safe ( rmonTimer_event); + rmonTimer_event.ring = true ; + } + + else if ( *tid_ptr == rmonTimer_pm.tid ) + { + mtcTimer_stop_int_safe ( rmonTimer_pm); + rmonTimer_pm.ring = true ; + } + + else if ( (is_controller()) && (*tid_ptr == rmonTimer_ntp.tid) ) + { + mtcTimer_stop_int_safe ( rmonTimer_ntp); + rmonTimer_ntp.ring = true ; + } + + else + { + bool found = false ; + for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ ) + { + if ( *tid_ptr == rtimer[i].tid ) + { + mtcTimer_stop_int_safe ( rtimer[i] ); + rtimer[i].ring = true ; + found = true ; + break ; + } + } + if ( !found ) + { + for ( int i = 0 ; i < _rmon_ctrl_ptr->thinmeta_resources ; i++ ) + { + if ( *tid_ptr == thinmetatimer[i].tid ) + { + mtcTimer_stop_int_safe ( thinmetatimer[i] ); + thinmetatimer[i].ring = true ; + found = true ; + break ; + } + } + } + if ( !found ) + { + /* try and cleanup by stopping this unknown timer via its tid */ + mtcTimer_stop_tid_int_safe (tid_ptr); + } + } +} + +/***************************************************************************** + * + * Name : clear_ntp_alarms + * + * Purpose : Loop through each current alarms and deleted them if the server + * is now reachable or the server no longer is assigned to ntpq + * + *****************************************************************************/ +void clear_ntp_alarms(std::list &non_reachable_ntp_servers, unsigned int alarm_count, SFmAlarmDataT *active_alarms, bool clear_major_alarm) +{ + dlog ("Total NTP alarm_count:%d", alarm_count); + AlarmFilter alarmFilter; + char alarm_to_search[FM_MAX_BUFFER_LENGTH]; + + fm_alarm_id alarm_id; + snprintf(alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID); + + // clear the major alarms if required + if (clear_major_alarm) + { + snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID ); + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.ntp", _rmon_ctrl_ptr->my_hostname); + + int ret = rmon_fm_clear(&alarmFilter); + if (ret != FM_ERR_OK) + { + if (ret != FM_ERR_ENTITY_NOT_FOUND) + { + wlog ("Failed to clear major alarm %s for entity instance id:%s error:%d", NTP_ALARM_ID, alarmFilter.entity_instance_id, ret); + } + } + else + { + ilog ("Cleared major alarm %s for entity instance id:%s", NTP_ALARM_ID, alarmFilter.entity_instance_id); + } + } + + if (active_alarms == NULL) + { + elog ("Null pointer for active_alarms"); + return; + } + + // clear minor alarms if required + bool found; + std::list::iterator iter; + std::list::iterator iter_bad_list; + + // for each NTP alarms in the system see if it match any of the invalid NTP servers + // if it does not match then the alarm must be removed since that NTP server + // is no longer being monitored or is now valid + for ( unsigned int i = 0; i < alarm_count; i++ ) + { + if ( ((active_alarms+i)->severity) == FM_ALARM_SEVERITY_MINOR ) + { + // Verify that this NTP minor alarm is still valid, This server could no longer exist or is now marked + // reachable + dlog ("Verify NTP minor alarm is still valid, entity instance id:%s", (active_alarms+i)->entity_instance_id); + + found = false; + + // check for stale minor alarm + for ( iter = non_reachable_ntp_servers.begin (); iter != non_reachable_ntp_servers.end (); iter++ ) + { + // e.g. host=controller-0.ntp=102.111.2.2 + snprintf(alarm_to_search, FM_MAX_BUFFER_LENGTH, "%s.ntp=%s", _rmon_ctrl_ptr->my_hostname, iter->c_str()); + + dlog ("Non reachable NTP server to search %s", iter->c_str()); + + if (strstr((active_alarms+i)->entity_instance_id, iter->c_str()) != NULL) + { + // server is in non reachable list, do not clear it + found = true; + dlog ("Alarm is still valid %s", iter->c_str()); + break; + } + } + + if (!found) + { + // lets clear it but only if it's this controller's alarm, it could be the peer controller's alarm + if (strstr((active_alarms+i)->entity_instance_id, _rmon_ctrl_ptr->my_hostname) != NULL) + { + snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID); + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", (active_alarms+i)->entity_instance_id); + + if (rmon_fm_clear(&alarmFilter) != FM_ERR_OK) + { + wlog ("Failed to clear minor alarm %s for entity instance id:%s", NTP_ALARM_ID, (active_alarms+i)->entity_instance_id); + } + else + { + ilog ("Cleared minor alarm %s for entity instance id:%s", NTP_ALARM_ID, (active_alarms+i)->entity_instance_id); + } + } + } + } + } +} + +/***************************************************************************** + * + * Name : ntp_query_results + * + * Purpose : Analyze the return code from script query_ntp_servers.sh. + * Create alarms if the servers are non reachable, Clear alarms if they are + * now reachable + * + *****************************************************************************/ +void ntp_query_results (int ntp_query_status ) +{ + dlog ("ntp_query_results ntp_query_status:%d", ntp_query_status); + + std::list non_reachable_ntp_servers; + + // if no NTP servers are provisioned on the system, we still need to clear old NTP + // alarms if there are any. But we do not need to read the tmp server file. + if (ntp_query_status != NTP_NOT_PROVISIONED) + { + // read the temp file which contains a list of reachable and non reachable servers + // this file is the output from the query_ntp_servers.sh script + + const char *server_info = "/tmp/ntpq_server_info"; + FILE *pFile; + pFile = fopen(server_info, "r"); + if (pFile != NULL) + { + const char * delim = ";\n\r"; + char * ip; + char line[500]; + + int pos = 0; + while ( memset(line, 0, sizeof(line)) && (fgets((char*) &line, sizeof(line), pFile) != NULL) ) + { + // the first line in the tmp file is the reachable servers, the second is the non reachable servers + if (pos == 1) + { + for (ip = strtok (line, delim); ip; ip = strtok (NULL, delim)) + { + non_reachable_ntp_servers.push_back(ip); + dlog("Found non reachable NTP servers:%s\n", ip); + } + break; + } + pos++; + } + fclose(pFile); + } + else + { + elog("Failed to open file: %s\n", server_info); + return; + } + } + + // retreive all the current NTP alarms + int rc; + unsigned int max_alarms=75; + fm_alarm_id alarm_id; + snprintf(alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID); + SFmAlarmDataT *active_alarms = (SFmAlarmDataT*) calloc (max_alarms, sizeof (SFmAlarmDataT)); + if (active_alarms == NULL) + { + elog ("Failed to allocate memory for NTP alarms"); + return; + } + + int ret = fm_get_faults_by_id( &alarm_id, active_alarms, &max_alarms); + if (!(ret == FM_ERR_OK || ret == FM_ERR_ENTITY_NOT_FOUND)) + { + elog ("fm_get_faults_by_id failed trying to retreive all the NTP alarms, error:%d", ret); + free(active_alarms); + return; + } + + // Clear alarms if required + + bool clear_major_alarm = false; + bool created_major_alarm = false; + + if ( ntp_query_status == NTP_NOT_PROVISIONED || ntp_query_status == NTP_SOME_REACHABLE || ntp_query_status == NTP_OK ) + { + // We are going to clear the major alarm since there is at least one server selected or + // no servers are provisioned + clear_major_alarm = true; + } + + // fm_get_faults_by_id returns the number of alarms found + if (max_alarms != 0) + { + // verify if alarms need to cleared and clear them + clear_ntp_alarms(non_reachable_ntp_servers, max_alarms, active_alarms, clear_major_alarm); + } + + // There are no NTP servers provisioned so there is no alarms to raise + if (ntp_query_status == NTP_NOT_PROVISIONED) + { + return; + } + + // Raise alarms if required + + // Set up alarms data + AlarmFilter alarmFilter; + snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), "Monitor and if condition persists, contact next level of support."); + snprintf(alarmData.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", NTP_ALARM_ID); + strcpy(alarmData.uuid, ""); + snprintf(alarmData.entity_type_id, FM_MAX_BUFFER_LENGTH, "ntp"); + alarmData.alarm_state = FM_ALARM_STATE_SET; + alarmData.alarm_type = FM_ALARM_COMM; + alarmData.probable_cause = FM_ALARM_CAUSE_UNKNOWN; + alarmData.timestamp = 0; + alarmData.service_affecting = FM_FALSE; + alarmData.suppression = FM_FALSE; + + // Here we raise the major alarm if required + if (ntp_query_status == NTP_NONE_REACHABLE || ntp_query_status == NTP_SOME_REACHABLE_NONE_SELECTED) + { + wlog("NTP configuration does not contain any valid or reachable NTP servers"); + + // Check if alarm is raised already + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.ntp", _rmon_ctrl_ptr->my_hostname); + + bool found = false; + for ( unsigned int i = 0; i < max_alarms; i++ ) + { + if ( strncmp((active_alarms+i)->entity_instance_id, alarmFilter.entity_instance_id, sizeof((active_alarms+i)->entity_instance_id)) == 0 ) + { + // Alarm already exist + dlog("Alarm %s already raised for entity instance id:%s\n", NTP_ALARM_ID, alarmFilter.entity_instance_id); + found = true; + break; + } + } + + // Alarm does not exist so raise it + if (!found && !created_major_alarm) + { + // Alarm does not exist so raise it + alarmData.severity = FM_ALARM_SEVERITY_MAJOR; + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), "NTP configuration does not contain any valid or reachable NTP servers."); + snprintf(alarmData.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarmFilter.entity_instance_id); + + rc = rmon_fm_set(&alarmData, NULL); + if (rc == FM_ERR_OK ) + { + ilog("Alarm %s created for entity instance id:%s \n", NTP_ALARM_ID, alarmData.entity_instance_id); + created_major_alarm = true; + } + else + { + ilog("Failed to create alarm %s for entity instance id:%s error: %d \n", NTP_ALARM_ID, alarmData.entity_instance_id, (int)rc); + } + } + } + + // Here were raise alarms for individual servers + if (ntp_query_status != NTP_OK) + { + wlog("Some or all of the NTP servers are not reachable"); + std::list::iterator iter; + alarmData.severity = FM_ALARM_SEVERITY_MINOR; + + // Loop through all the non reachable NTP servers + // Check to see if an alarms is lready raised for the server. + // If we do not find an alarm for the server then we raise it + for ( iter = non_reachable_ntp_servers.begin (); iter != non_reachable_ntp_servers.end (); iter++ ) + { + bool found = false; + + // Build the alarm entity instatance id + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s.ntp=%s", _rmon_ctrl_ptr->my_hostname, iter->c_str()); + + dlog("Search alarms for entity instance id:%s \n", alarmFilter.entity_instance_id); + for ( unsigned int i = 0; i < max_alarms; i++ ) + { + if ( strncmp((active_alarms+i)->entity_instance_id, alarmFilter.entity_instance_id, sizeof((active_alarms+i)->entity_instance_id)) == 0 ) + { + dlog("Alarm %s already raised for entity instance id:%s\n", NTP_ALARM_ID, alarmFilter.entity_instance_id); + found = true; + break; + } + } + + // If the NTP alarm was not found then raise one for this NTP server + if (!found) + { + + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), "NTP address %s is not a valid or a reachable NTP server.", iter->c_str() ); + snprintf(alarmData.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarmFilter.entity_instance_id); + + rc = rmon_fm_set(&alarmData, NULL); + if (rc == FM_ERR_OK ) + { + ilog("Alarm %s created for entity instance id:%s \n", NTP_ALARM_ID, alarmData.entity_instance_id); + } + else + { + ilog("Failed to create alarm %s for entity instance id:%s error:%d \n", NTP_ALARM_ID, alarmData.entity_instance_id, (int)rc); + } + } + } + } + + free(active_alarms); + return; +} + +/***************************************************************************** + * + * Name : query_ntp_servers + * + * Purpose : execute script query_ntp_servers.sh which run the "ntpq -np" + * which query the healths of the NTP servers. The script will return a + * status code and also create a temporate file which will save the list + * of reachable and non reachable NTP servers. This temp file is required + * to generate proper alarms + * + *****************************************************************************/ +int query_ntp_servers ( ) +{ + pid_t child_pid; + + dlog ("Main Pid:%d \n", getpid() ); + + ntp_child_pid = child_pid = fork (); + if (child_pid == 0) + { + dlog ("Child Pid:%d \n", getpid() ); + + char* argv[] = {(char*)NTPQ_QUERY_SCRIPT, NULL}; + char cmd[MAX_FILE_SIZE] ; + memset (cmd,0,MAX_FILE_SIZE); + + snprintf ( &cmd[0], MAX_FILE_SIZE, "%s/%s", RMON_FILES_DIR, NTPQ_QUERY_SCRIPT ); + + bool close_file_descriptors = true ; + if ( setup_child ( close_file_descriptors ) != PASS ) + { + exit(NTP_ERROR); + } + + /* Set child to ignore child exit */ + signal (SIGCHLD, SIG_DFL); + + /* Setup the exec arguement */ + int res = execv(cmd, argv); + elog ( "Failed to run %s return code:%d error:%s\n", cmd, res, strerror(errno) ); + exit (NTP_ERROR); + } + + if ( child_pid == -1 ) + { + elog ("Fork failed (%s)\n", strerror(errno)); + + /* TODO: Consider making this a critical fault + * after 100 retries. + * All possibilities based on man page are + * due to resource limitations and if that does + * not resolve in 100 retries then ip probably will never. + **/ + return (FAIL); + } + + return (PASS); +} + +/***************************************************************************** + * + * Name : rmonHdlr_ceilometer_handler + * + * Purpose : Handles the ceilometer sample create response message + * + *****************************************************************************/ +void rmonHdlr_ceilometer_handler( struct evhttp_request *req, void *arg ) +{ + if ( !req ) + { + elog (" Request Timeout\n"); + ceilometerEvent.status = FAIL_TIMEOUT; + goto _ceilometer_handler_done ; + } + + ceilometerEvent.status = rmonHttpUtil_status(ceilometerEvent); + if ( ceilometerEvent.status != PASS ) + { + elog ("ceilometer HTTP request Failed (%d)\n", ceilometerEvent.status); + rmonHttpUtil_get_response(ceilometerEvent); + goto _ceilometer_handler_done ; + } + +_ceilometer_handler_done: + event_base_loopbreak((struct event_base *)arg); +} + +/***************************************************************************** + * + * Name : generate_ceilometer_pm + * + * Purpose : Generate ceilometer PMs through the REST API + * + *****************************************************************************/ +void generate_ceilometer_pm ( string r_id, string m_id, string m_type, + string m_unit, string m_volume, + string m_metadata ) +{ + int rc = PASS; + daemon_config_type * cfg_ptr = daemon_get_cfg_ptr(); + string command_path=""; + string host_ip = cfg_ptr->keystone_auth_host; + int port = cfg_ptr->ceilometer_port; + int count = 0; + + rmonHttpUtil_libEvent_init ( &ceilometerEvent, CEILOMETER_EVENT_SIG, host_ip, port); + + ceilometerEvent.address.append("/v2/meters/"); + ceilometerEvent.address.append(m_id); + + ceilometerEvent.user_agent = "ceilometerclient.openstack.common.apiclient"; + + ceilometerEvent.payload = "[{"; + ceilometerEvent.payload.append("\"resource_id\":\""); + ceilometerEvent.payload.append(r_id); + ceilometerEvent.payload.append("\",\"counter_name\":\""); + ceilometerEvent.payload.append(m_id); + ceilometerEvent.payload.append("\",\"counter_type\":\""); + ceilometerEvent.payload.append(m_type); + ceilometerEvent.payload.append("\",\"counter_unit\":\""); + ceilometerEvent.payload.append(m_unit); + ceilometerEvent.payload.append("\",\"counter_volume\":\""); + ceilometerEvent.payload.append(m_volume); + ceilometerEvent.payload.append("\",\"resource_metadata\":"); + // the resource metadata is dictionary of key-value pairs + ceilometerEvent.payload.append(m_metadata); + ceilometerEvent.payload.append("}]"); + dlog ("Payload is : %s\n", ceilometerEvent.payload.c_str()); + + rc = rmonHttpUtil_api_request (CEILOMETER_SAMPLE_CREATE, ceilometerEvent, command_path); + do + { + if ( rc != PASS ) + { + count++; + wlog ("ceilometer failed request (%d) ... retrying (%d)\n", rc, count); + } + rmonHttpUtil_log_event (ceilometerEvent); + + } while ( ( rc!=PASS ) && ( count < REST_API_RETRY_COUNT ) ); + + if ( rc!= PASS ) + { + elog ("ceilometer sample create Failed (%d) (cnt:%d)\n", rc, count); + } +} + +void clear_rmon_api_counts ( registered_clients * ptr ) +{ + if ( ptr->b2b_miss_count > ptr->b2b_miss_peak ) + { + ptr->b2b_miss_peak = ptr->b2b_miss_count ; + } + + if ( ptr->mesg_err_cnt > ptr->mesg_err_peak ) + { + ptr->mesg_err_peak = ptr->mesg_err_cnt ; + } + ptr->b2b_miss_count = 0 ; + ptr->send_err_cnt = 0 ; + ptr->recv_err_cnt = 0 ; + ptr->mesg_err_cnt = 0 ; +} + +/***************************************************************************** + * + * Name : _space_to_underscore + * + * Purpose : Converts spaces in a string to underscores + * *****************************************************************************/ +void _space_to_underscore (string & str ) +{ + char space = ' '; + for(unsigned int i = 0; i < str.size(); i++) + { + if(str[i] == space) + { + str[i] = '_'; + } + } +} + +/***************************************************************************** + * + * Name : set_alarm_defaults + * + * Purpose : Set the defaults for the fm alarms + * *****************************************************************************/ +void set_alarm_defaults ( resource_config_type * ptr ) +{ + strcpy(alarmData.uuid, ""); + /* common data for all alarm messages */ + snprintf(alarmData.entity_type_id, FM_MAX_BUFFER_LENGTH, "system.host"); + + build_entity_instance_id (ptr, alarmData.entity_instance_id); + + alarmData.alarm_state = FM_ALARM_STATE_SET; + alarmData.alarm_type = FM_ALARM_OPERATIONAL; + alarmData.probable_cause = FM_ALARM_THRESHOLD_CROSSED; + alarmData.timestamp = 0; + alarmData.service_affecting = FM_FALSE; + alarmData.suppression = FM_TRUE; + snprintf(alarmData.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id); + +} + +/***************************************************************************** + * + * Name : resource_handler + * + * Purpose : Handle the failed resources and raise alarms through + * the FM API as well as calling a function to notify registered clients + *****************************************************************************/ +int resource_handler ( resource_config_type * ptr ) +{ + int rc = RETRY ; + AlarmFilter alarmFilter; + string err_res_name(ptr->resource); + _space_to_underscore(err_res_name); + + if ( ptr->stage < RMON_STAGE__STAGES ) + { + dlog2 ("%s %s Stage %d\n", ptr->resource, rmonStages_str[ptr->stage], ptr->stage ); + } + else + { + resourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + + switch ( ptr->stage ) + { + case RMON_STAGE__START: + { + dlog ( "%s failed:%d set_cnt:%d debounce_cnt:%d\n", + ptr->resource, + ptr->failed, + ptr->count, + ptr->debounce_cnt); + break ; + } + case RMON_STAGE__MANAGE: + { + /* send messages to maintnance in thresholds are crossed */ + if (ptr->alarm_status == ALARM_ON) + { + /* set up the fm api alarm defaults */ + set_alarm_defaults( ptr ); + if ( strcmp(ptr->resource, MEMORY_RESOURCE_NAME) == 0 ) + { + snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), + "Monitor and if condition persists, contact next level of support; may require additional memory on Host."); + } + else if ( strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) == 0 ) + { + snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), + "Check Management and Infrastructure Networks and Controller or Storage Nodes."); + } + else + { + if ((ptr->type != NULL) && (strcmp(ptr->type, "lvg") == 0 )) + { + snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), + "Monitor and if condition persists, consider adding additional physical volumes to the volume group."); + } + else + { + snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), + "Monitor and if condition persists, contact next level of support."); + } + } + + if ( ptr->sev == SEVERITY_MINOR ) + { + alarmData.severity = FM_ALARM_SEVERITY_MINOR; + + if ( ptr->percent == PERCENT_USED ) { + + if ( ptr->alarm_type == STANDARD_ALARM ) + { + ilog ("%s threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", + ptr->resource, ptr->minor_threshold, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "%s threshold exceeded; threshold: %u%%, actual: %.2f%%.", + ptr->resource, ptr->minor_threshold, ptr->resource_value); + } + else { + ilog ("Filesystem threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", + ptr->minor_threshold, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "Filesystem exceeded; threshold: %u%%, actual: %.2f%%.", + ptr->minor_threshold, ptr->resource_value); + } + } else { + if ( ptr->alarm_type == STANDARD_ALARM ) + { + ilog ("%s threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", + ptr->resource, ptr->minor_threshold_abs_node0, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "%s threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", + ptr->resource, ptr->minor_threshold_abs_node0, ptr->resource_value); + } else { + ilog ("Filesystem threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", + ptr->minor_threshold_abs_node0, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "Filesystem threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", + ptr->minor_threshold_abs_node0, ptr->resource_value); + } + } + snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), + "%s minor_threshold_set", err_res_name.c_str()); + } + else if ( ptr->sev == SEVERITY_MAJOR ) + { + alarmData.severity = FM_ALARM_SEVERITY_MAJOR; + + if (strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) != 0) + { + if (ptr->percent == PERCENT_USED){ + if ( ptr->alarm_type == STANDARD_ALARM ) + { + ilog ("%s threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", + ptr->resource, ptr->major_threshold, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "%s threshold exceeded; threshold: %u%%, actual: %.2f%%.", + ptr->resource, ptr->major_threshold, ptr->resource_value); + } + else { + ilog ("Filesystem threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", + ptr->major_threshold, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "Filesystem threshold exceeded; threshold: %u%%, actual: %.2f%%.", + ptr->major_threshold, ptr->resource_value); + } + } else { + if ( ptr->alarm_type == STANDARD_ALARM ) + { + ilog ("%s threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", + ptr->resource, ptr->major_threshold_abs_node0, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "%s threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", + ptr->resource, ptr->major_threshold_abs_node0, ptr->resource_value); + } else { + ilog ("Filesystem threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", + ptr->major_threshold_abs_node0, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "Filesystem threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", + ptr->major_threshold_abs_node0, ptr->resource_value); + } + } + } + else if (strcmp(ptr->resource, INSTANCE_RESOURCE_NAME) == 0) + { + /* instance alarming is a special case of alarm */ + wlog ("No access to remote VM volumes.\n"); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "No access to remote VM volumes."); + } + + if ( ptr->res_type == RESOURCE_TYPE__FILESYSTEM_USAGE ) + { + snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), + "%s %s",err_res_name.c_str(), DEGRADE_CLEAR_MSG ); + } + else + { + snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), + "%s major_threshold_set",err_res_name.c_str()); + } + } + else if ( ptr->sev == SEVERITY_CRITICAL ) + { + alarmData.severity = FM_ALARM_SEVERITY_CRITICAL; + + if (ptr->percent == PERCENT_USED){ + if ( ptr->alarm_type == STANDARD_ALARM ) + { + ilog ("%s threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", + ptr->resource, ptr->critical_threshold, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "%s threshold exceeded; threshold: %u%%, actual: %.2f%%.", + ptr->resource, ptr->critical_threshold, ptr->resource_value); + } + else { + ilog ("Filesystem threshold exceeded; threshold: %d%%, actual: %.2f%%. \n", + ptr->critical_threshold, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "Filesystem threshold exceeded; threshold: %u%%, actual: %.2f%%.", + ptr->critical_threshold, ptr->resource_value); + } + } else { + if ( ptr->alarm_type == STANDARD_ALARM ) + { + ilog ("%s threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", + ptr->resource, ptr->critical_threshold_abs_node0, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "%s threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", + ptr->resource, ptr->critical_threshold_abs_node0, ptr->resource_value); + } else { + ilog ("Filesystem threshold exceeded; threshold: %dMB, remaining value: %.2fMB. \n", + ptr->critical_threshold_abs_node0, ptr->resource_value); + snprintf(alarmData.reason_text, sizeof(alarmData.reason_text), + "Filesystem threshold exceeded; threshold: %uMB, remaining value: %.2fMB.", + ptr->critical_threshold_abs_node0, ptr->resource_value); + } + } + snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), + "%s major_threshold_set",err_res_name.c_str()); + } + + rc = rmon_fm_set(&alarmData, NULL); + if (rc == FM_ERR_OK ) { + ilog("%s: %s alarm\n", + ptr->resource, + FmAlarmSeverity_to_string(alarmData.severity).c_str()); + ptr->alarm_raised = true; + } else { + ilog("%s: %s alarm failed (rc:%d)\n", + ptr->resource, + FmAlarmSeverity_to_string(alarmData.severity).c_str(), + (int)rc); + } + + if (ptr->alarm_raised) + { + if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND)) + { + /* If degrade debounce is non-zero then this + * alarm condition is candidate for host degrade */ + if (ptr->debounce) + { + if ( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) + { + ptr->failed_send++; + wlog ("%s request send failed (count:%d)\n", + ptr->resource, + ptr->failed_send ); + } + else + { + ptr->failed_send = 0; + } + } + } + else + { + ptr->failed_send = 0; + } + resourceStageChange ( ptr, RMON_STAGE__MONITOR_WAIT ); + } + } + else { + resourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + + break; + } + + case RMON_STAGE__IGNORE: + { + + //nothing to do here, go to the finished stage + resourceStageChange ( ptr, RMON_STAGE__FINISH ); + + break ; + } + + case RMON_STAGE__MONITOR_WAIT: + { + if ((_rmon_ctrl_ptr->clients > 0) && (ptr->failed_send < MAX_FAIL_SEND) && (ptr->failed_send > 0)) + { + if ( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) + { + wlog ("%s request send failed \n", ptr->resource); + ptr->failed_send++; + } + else + { + ptr->failed_send = 0; + } + } + break; + } + + case RMON_STAGE__FINISH: + { + if ((ptr->alarm_status == ALARM_ON) && (ptr->alarm_raised)) + { + snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, ptr->alarm_id); + + build_entity_instance_id (ptr, alarmData.entity_instance_id); + + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id); + ilog ("%s alarm clear\n", ptr->resource ); + + /* clear the alarm */ + EFmErrorT ret = rmon_fm_clear(&alarmFilter); + if (( ret == FM_ERR_OK ) || ( ret == FM_ERR_ENTITY_NOT_FOUND )) + { + if (ret == FM_ERR_ENTITY_NOT_FOUND) + { + dlog ("%s alarm clear failed, entity '%s' not found", + ptr->resource, alarmData.entity_instance_id); + } + + snprintf(ptr->errorMsg, sizeof(ptr->errorMsg), "%s cleared_alarms_for_resource", err_res_name.c_str()); + if ( (_rmon_ctrl_ptr->clients > 0) && ( ptr->failed_send < MAX_FAIL_SEND ) && (ret == FM_ERR_OK) ) + { + while (( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) && + ( ptr->failed_send < MAX_FAIL_SEND )) + { + wlog ("%s request send failed \n", ptr->resource); + ptr->failed_send++; + } + + ptr->alarm_raised = false; + ptr->failed_send = 0; + ptr->failed = false ; + ptr->count = 0 ; + ptr->sev = SEVERITY_CLEARED ; + ptr->stage = RMON_STAGE__START ; + } + else + { + ptr->alarm_raised = false; + ptr->failed_send = 0; + ptr->failed = false ; + ptr->count = 0 ; + ptr->sev = SEVERITY_CLEARED ; + ptr->stage = RMON_STAGE__START ; + } + } + else + { + wlog("%s alarm clear failed, entity '%s' (rc:%d)\n", + ptr->resource, + alarmData.entity_instance_id, + ret); + } + } + else + { + ptr->alarm_raised = false; + ptr->failed_send = 0; + ptr->failed = false ; + ptr->count = 0 ; + ptr->sev = SEVERITY_CLEARED ; + ptr->stage = RMON_STAGE__START ; + } + rc = PASS ; + break ; + } + default: + { + slog ("%s Invalid stage (%d)\n", ptr->resource, ptr->stage ); + + /* Default to finish for invalid case. + * If there is an issue then it will be detected */ + resourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + } + return rc; +} + +/***************************************************************************** + * + * Name : process_failures + * + * Purpose : Check whether a percentage resource is to be failed or a failure + * threshold is to be cleared by the resource_handler + * + *****************************************************************************/ +void process_failures ( resource_config_type * ptr ) +{ + if (ptr->stage == RMON_STAGE__INIT) + { + /* first time after restart/reboot, clear the alarm if the first reading is good */ + resourceStageChange ( ptr, RMON_STAGE__START ); + if (ptr->resource_value < ptr->minor_threshold) + { + // assuming we left as alarm on last time + ptr->alarm_status = ALARM_ON; + ptr->alarm_raised = true; + ptr->failed = true; + ilog("%s Setting the state to FINISH\n", ptr->resource); + resourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + // Now we start counting as normal ... + } + else + { + if (ptr->failed) + { + /* If the resource is already failed, check to see if it is to be cleared */ + if ((( ptr->sev == SEVERITY_MINOR) && ( ptr->resource_value < ptr->minor_threshold )) || + (( ptr->sev == SEVERITY_MAJOR) && ( ptr->resource_value < ptr->major_threshold )) || + (( ptr->sev == SEVERITY_CRITICAL) && ( ptr->resource_value < ptr->critical_threshold ))) + { + if (ptr->count > ptr->num_tries) + ptr->count = ptr->num_tries; + + if (ptr->count > 0) + ptr->count--; + + if (ptr->count == 0) { + ptr->sev = SEVERITY_CLEARED; + ilog("%s Setting the state to FINISH\n", ptr->resource); + resourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + } + else + { + /* While in failed state, the resource usage must sustain normal level + * num_tries number of times before an alarm can be cleared. Keep incrementing the counter + * as it will be set to num_tries in the above block as soon as resource usage returns to + * normal level.*/ + ptr->count++; + + // rmon needs to send degrade assert message periodically as the + // condition might be cleared by maintenance over controller swact. + // + // added meaning to the debounce config setting. + // must be non-zero to degrade the host. + if ((ptr->alarm_raised) && (ptr->debounce) && + (_rmon_ctrl_ptr->clients > 0)) + { + if ( rmon_send_request ( ptr, _rmon_ctrl_ptr->clients ) != PASS ) + { + ptr->failed_send++ ; + wlog ("%s request send failed (count:%d)\n", + ptr->resource, + ptr->failed_send); + } + else + { + mlog ("%s rmon_send_request ok\n", ptr->resource ); + ptr->failed_send = 0 ; + } + } + else + { + /* typical path for resources that + * - do not degrade host + * - do not raise alarms */ + dlog ("%s: alarm:%d debounce:%d clients:%d\n", + ptr->resource, + (ptr->alarm_raised), + (ptr->debounce), + (_rmon_ctrl_ptr->clients)); + } + } + } + } + + /* Check to see if a resource is over the failure thresholds for: minor, major and critical failures */ + if (( ptr->resource_value >= ptr->minor_threshold ) && + ( ptr->resource_value < ptr->major_threshold ) + && (ptr->sev != SEVERITY_MINOR)) + { + ptr->count++; + if ( ptr->count >= ptr->num_tries) { + ptr->failed = true; + ptr->sev = SEVERITY_MINOR; + resourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + } + + else if (( ptr->resource_value >= ptr->major_threshold ) && + ( ptr->resource_value < ptr->critical_threshold ) + && (ptr->sev != SEVERITY_MAJOR)) + { + ptr->count++; + if ( ptr->count >= ptr->num_tries){ + ptr->failed = true; + ptr->sev = SEVERITY_MAJOR; + resourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + } + else if (( ptr->resource_value >= ptr->critical_threshold )&& + (ptr->sev != SEVERITY_CRITICAL)) + { + ptr->count++; + if (ptr->count >= ptr->num_tries){ + ptr->failed = true; + ptr->sev = SEVERITY_CRITICAL; + resourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + } + else + { + /* if the host experienced a resource blip in the previous audit run and usage + * is now back at the normal level, decrement the count.*/ + if ((!ptr->failed) && (ptr->count > 0)){ + ptr->count--; + dlog("Resource %s is back at the normal level, count is set to %d", ptr->resource, ptr->count); + } + } +} + +/***************************************************************************** + * + * Name : process_failures_absolute + * + * Purpose : Check whether an absolute resource is to be failed or a + * failure threshold is to be cleared by the resource_handler + * + *****************************************************************************/ +void process_failures_absolute ( resource_config_type * ptr ) +{ + int node = 0; + + if (strcmp(ptr->resource,"processor_node1") == 0) + { + /* per node memory checking is enabled */ + node = 1; + } + + if (ptr->failed) { + /* If the resource is already failed, check to see if it is to be cleared */ + if (node == 0) { + + if ((( ptr->sev == SEVERITY_MINOR) && ( ptr->resource_value > ptr->minor_threshold_abs_node0 )) || + (( ptr->sev == SEVERITY_MAJOR) && ( ptr->resource_value > ptr->major_threshold_abs_node0 )) || + (( ptr->sev == SEVERITY_CRITICAL) && ( ptr->resource_value > ptr->critical_threshold_abs_node0 ))) + { + if (ptr->count > ptr->num_tries) + ptr->count = ptr->num_tries; + if (ptr->count > 0) + ptr->count--; + + if (ptr->count == 0) { + ptr->sev = SEVERITY_CLEARED; + resourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + } + else + { + /* While in failed state, the resource usage must sustain normal level + * num_tries number of times before an alarm can be cleared. Keep incrementing the counter + * as it will be set to num_tries in the above block as soon as resource usage returns to + * normal level.*/ + ptr->count++; + } + } + else { + + if ((( ptr->sev == SEVERITY_MINOR) && ( ptr->resource_value > ptr->minor_threshold_abs_node1 )) || + (( ptr->sev == SEVERITY_MAJOR) && ( ptr->resource_value > ptr->major_threshold_abs_node1 )) || + (( ptr->sev == SEVERITY_CRITICAL) && ( ptr->resource_value > ptr->critical_threshold_abs_node1 ))) + { + if (ptr->count > ptr->num_tries) + ptr->count = ptr->num_tries; + if (ptr->count > 0) + ptr->count--; + + if (ptr->count == 0) { + ptr->sev = SEVERITY_CLEARED; + resourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + } + else + { + /* While in failed state, the resource usage must sustain normal level + * num_tries number of times before an alarm can be cleared. Keep incrementing the counter + * as it will be set to num_tries in the above block as soon as resource usage returns to + * normal level.*/ + ptr->count++; + } + } + } + + if (node == 0) { + /* Check to see if a resource is over the failure thresholds for: minor, major and critical failures node 0 */ + if (( ptr->resource_value <= ptr->minor_threshold_abs_node0 ) && + ( ptr->resource_value > ptr->major_threshold_abs_node0 ) && + (ptr->sev != SEVERITY_MINOR)) + { + ptr->count++; + if ( ptr->count >= ptr->num_tries){ + ptr->failed = true; + ptr->sev = SEVERITY_MINOR; + resourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + } + + else if (( ptr->resource_value <= ptr->major_threshold_abs_node0 ) && + ( ptr->resource_value > ptr->critical_threshold_abs_node0 ) && + (ptr->sev != SEVERITY_MAJOR)) + { + ptr->count++; + if ( ptr->count >= ptr->num_tries){ + ptr->failed = true; + ptr->sev = SEVERITY_MAJOR; + resourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + } + else if (( ptr->resource_value < ptr->critical_threshold_abs_node0 )&& + (ptr->sev != SEVERITY_CRITICAL)) + { + ptr->count++; + if (ptr->count >= ptr->num_tries){ + ptr->failed = true; + ptr->sev = SEVERITY_CRITICAL; + resourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + } + else + { + /* if the host experienced a resource blip in the previous audit run and usage + * is now back at the normal level, decrement the count.*/ + if ((!ptr->failed) && (ptr->count > 0)){ + ptr->count--; + dlog("Resource %s is back at the normal level, count is set to %d", ptr->resource, ptr->count); + } + } + } else { + + /* Check to see if a resource is over the failure thresholds for: minor, major and critical failures node 1 */ + if (( ptr->resource_value <= ptr->minor_threshold_abs_node1 ) && + ( ptr->resource_value > ptr->major_threshold_abs_node1 ) && + (ptr->sev != SEVERITY_MINOR)) + { + ptr->count++; + if ( ptr->count >= ptr->num_tries){ + ptr->failed = true; + ptr->sev = SEVERITY_MINOR; + resourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + } + else if (( ptr->resource_value <= ptr->major_threshold_abs_node1 ) && + ( ptr->resource_value > ptr->critical_threshold_abs_node1 ) && + (ptr->sev != SEVERITY_MAJOR)) + { + ptr->count++; + if ( ptr->count >= ptr->num_tries){ + ptr->failed = true; + ptr->sev = SEVERITY_MAJOR; + resourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + } + else if (( ptr->resource_value < ptr->critical_threshold_abs_node1 )&& + (ptr->sev != SEVERITY_CRITICAL)) + { + ptr->count++; + if (ptr->count >= ptr->num_tries){ + ptr->failed = true; + ptr->sev = SEVERITY_CRITICAL; + resourceStageChange ( ptr, RMON_STAGE__MANAGE); + } + } + else + { + /* if the host experienced a resource blip in the previous audit run and usage + * is now back at the normal level, decrement the count.*/ + if ((!ptr->failed) && (ptr->count > 0)){ + ptr->count--; + dlog("Resource %s is back at the normal level, count is set to %d", ptr->resource, ptr->count); + } + } + } +} + +void update_total_clients (int total_clients) +{ + _rmon_ctrl_ptr->clients = total_clients; +} + +void add_registered_client (registered_clients client) +{ + + registered_clt[_rmon_ctrl_ptr->clients] = client; + ilog("added registered client: %s \n", client.client_name); +} + +/***************************************************************************** + * + * Name : add_fs_resource + * + * Purpose : Add a dynamic or static fs resource by reading + * the: /etc/rmonfiles.d/dynamic.conf file + *****************************************************************************/ +void add_fs_resource ( int resource_index, int criticality_index, int enabled, + int percent, int abs_values[3], int alarm_type, + int types_index, int devices_index, int mounted ) +{ + int fs_resource_index; + get_resource_index( FS_RESOURCE_NAME, &fs_resource_index ); + + int i = _rmon_ctrl_ptr->resources; + + if (i > MAX_RESOURCES) { + wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES ); + } + else { + + resource_config[i].resource = dynamic_resource.at(resource_index).c_str(); + resource_config[i].severity = criticality_resource.at(criticality_index).c_str(); + resource_config[i].type = types.at(types_index).c_str(); + resource_config[i].device = devices.at(devices_index).c_str(); + resource_config[i].critical_threshold = UNUSED_CRITICAL; // initialization + resource_config[i].critical_threshold_abs_node0 = UNUSED_CRITICAL_ABS_NODE0; + + resource_config[i].num_tries = DEFAULT_NUM_TRIES; + resource_config[i].alarm_status = enabled; + resource_config[i].percent = percent; + resource_config[i].mounted = mounted; + resource_config[i].alarm_type = alarm_type; + resource_config[i].debounce = resource_config[fs_resource_index].debounce; + + // percentage based threshold measure + switch (percent) { + case PERCENT_USED: + if (abs_values[0] == 0) { + // if this is a static mounted file system resource + // then use common threshold values provided for the + // File System Resource + if ( (alarm_type == STATIC_ALARM) && (mounted == MOUNTED) ) { + resource_config[i].minor_threshold = + resource_config[fs_resource_index].minor_threshold; + + resource_config[i].major_threshold = + resource_config[fs_resource_index].major_threshold; + + if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { + resource_config[i].critical_threshold = + resource_config[fs_resource_index].critical_threshold; + } + resource_config[i].num_tries = + resource_config[fs_resource_index].num_tries; + } + else { + /* There are no specific percent thresholds for + the dynamic resource, use defaults */ + resource_config[i].minor_threshold = FS_MINOR; + resource_config[i].major_threshold = FS_MAJOR; + if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { + resource_config[i].critical_threshold = FS_CRITICAL; + } + } + } + else if (abs_values[0] != 0) { + /* Specific percent thresholds are defined for the dynamic resource */ + resource_config[i].minor_threshold = abs_values[0]; + resource_config[i].major_threshold = abs_values[1]; + if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { + resource_config[i].critical_threshold = abs_values[2]; + } + } + break; + + case PERCENT_UNUSED: + if (abs_values[0] == 0) { + // if this is a static mounted file system then use common + // threshold values provided for the File System Resource + if ( (alarm_type == STATIC_ALARM) && (mounted == MOUNTED) ) { + resource_config[i].minor_threshold_abs_node0 = + resource_config[fs_resource_index].minor_threshold_abs_node0; + + resource_config[i].major_threshold_abs_node0 = + resource_config[fs_resource_index].major_threshold_abs_node0; + + if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { + resource_config[i].critical_threshold_abs_node0 = DEFAULT_CRITICAL_ABS_NODE0; + } + resource_config[i].num_tries = + resource_config[fs_resource_index].num_tries; + } + else { + /* If the percent thresholds are selected + * use the default thresholds for the absolute + * value thresholds for the dynamic resource */ + resource_config[i].minor_threshold_abs_node0 = DEFAULT_MINOR_ABS_NODE0; + resource_config[i].major_threshold_abs_node0 = DEFAULT_MAJOR_ABS_NODE0; + if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { + resource_config[i].critical_threshold_abs_node0 = DEFAULT_CRITICAL_ABS_NODE0; + } + } + } + else if (abs_values[0] != 0) { + /* Specific absolute value thresholds are specified for the dynamic resource */ + resource_config[i].minor_threshold_abs_node0 = abs_values[0]; + resource_config[i].major_threshold_abs_node0 = abs_values[1]; + if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { + resource_config[i].critical_threshold_abs_node0 = abs_values[2]; + } + } + break; + } + + ilog ("Monitoring %2d: %-20s (%s) (%s)\n", i, resource_config[i].resource , + resource_config[i].severity, (enabled ? "enabled" : "disabled") ); + + /* Init the timer for this resource */ + mtcTimer_init ( rtimer[i] ) ; + + rtimer[i].hostname = "localhost" ; + rtimer[i].service = resource_config[i].resource ; + resource_config[i].i = i; + resource_config[i].failed = false ; + resource_config[i].count = 0 ; + resource_config[i].stage = RMON_STAGE__START ; + resource_config[i].sev = SEVERITY_CLEARED ; + resource_config[i].failed_send = 0; + resource_config[i].alarm_raised = false; + resource_config[i].res_type = RESOURCE_TYPE__FILESYSTEM_USAGE ; + + /* add the alarm id for the FM API per resource monitored */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, FS_ALARM_ID); + + mem_log_resource ( &resource_config[i] ); + + i++; + _rmon_ctrl_ptr->resources = i; + } +} + +/***************************************************************************** + * + * Name : save_dynamic_resource + * + * Purpose : Loops through resources and only adds a dynamic file system + * resource if it does not yet exist + ******************************************************************************/ +void save_fs_resource ( string resource_name, string criticality, + int enabled, int percent, + int abs_values[3], int alarm_type, + string type, string device, int mounted) +{ + + size_t resource_index; + size_t criticality_index; + size_t types_index; + size_t devices_index; + + bool newResource = true; + + for (int k=0; k< _rmon_ctrl_ptr->resources; k++) { + + if (strcmp(resource_config[k].resource, resource_name.c_str()) == 0) { + newResource = false; + break; + } + } + + if (newResource == true) { + dlog ("%s(%s) fs resource add in %s state\n", resource_name.c_str(), + criticality.c_str(), (enabled) ? "enabled" : "disabled"); + dynamic_resource.push_back(resource_name); + resource_index = dynamic_resource.size() - 1; + /* add the criticality value to a vector for permenant storage */ + criticality_resource.push_back(criticality); + criticality_index = criticality_resource.size() - 1; + types.push_back(type); + types_index = types.size() - 1; + devices.push_back(device); + devices_index = devices.size() - 1; + add_fs_resource ( resource_index, criticality_index, enabled, percent, abs_values, alarm_type, types_index, devices_index, mounted ); + } +} + +/***************************************************************************** + * + * Name : add_dynamic_mem_resource + * + * Purpose : Add a dynamic memory resource at runtime based on the name and criticality. + * The resource has both custom or default percent and absolute thresholds. + * *****************************************************************************/ +int add_dynamic_mem_resource ( int resource_index, int criticality_index, + double r_value, int percent, int abs_values[3], + const char * alarm_id, int socket_id=0 ) +{ + + int i = _rmon_ctrl_ptr->resources; + int new_index = i; + if (i >= MAX_RESOURCES) { + wlog ("Cannot Monitor more than %d resources\n", MAX_RESOURCES ); + } + else { + + resource_config[i].resource = dynamic_resource.at(resource_index).c_str(); + resource_config[i].severity = criticality_resource.at(criticality_index).c_str(); + + if ((percent == 1) && (abs_values[0] == 0)) { + /* There are no specific percent thresholds for the dynamic resource, use defaults */ + resource_config[i].minor_threshold = DEFAULT_MINOR; + resource_config[i].major_threshold = DEFAULT_MAJOR; + if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { + resource_config[i].critical_threshold = DEFAULT_CRITICAL; + } else { + resource_config[i].critical_threshold = UNUSED_CRITICAL; + } + } + else if ((percent == 1) && (abs_values[0] != 0)) { + /* Specific percent thresholds are defined for the dynamic resource */ + resource_config[i].minor_threshold = abs_values[0]; + resource_config[i].major_threshold = abs_values[1]; + if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { + resource_config[i].critical_threshold = abs_values[2]; + } else { + resource_config[i].critical_threshold = UNUSED_CRITICAL; + } + } + + if ((percent == 0) && (abs_values[0] == 0)) { + /* If the percent thresholds are selected use the default thresholds for the absolute + * value thresholds for the dynamic resource */ + resource_config[i].minor_threshold_abs_node0 = DEFAULT_MINOR_ABS_NODE0; + resource_config[i].major_threshold_abs_node0 = DEFAULT_MAJOR_ABS_NODE0; + if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { + resource_config[i].critical_threshold_abs_node0 = DEFAULT_CRITICAL_ABS_NODE0; + } else { + resource_config[i].critical_threshold_abs_node0 = UNUSED_CRITICAL_ABS_NODE0; + } + resource_config[i].minor_threshold_abs_node1 = DEFAULT_MINOR_ABS_NODE1; + resource_config[i].major_threshold_abs_node1 = DEFAULT_MAJOR_ABS_NODE1; + resource_config[i].critical_threshold_abs_node1 = DEFAULT_CRITICAL_ABS_NODE1; + } + else if ((percent == 0) && (abs_values[0] != 0)) { + /* Specific absolute value thresholds are specified for the dynamic resource */ + resource_config[i].minor_threshold_abs_node0 = abs_values[0]; + resource_config[i].major_threshold_abs_node0 = abs_values[1]; + if (_rmon_ctrl_ptr->rmon_critical_thr == 1) { + resource_config[i].critical_threshold_abs_node0 = abs_values[2]; + } else { + resource_config[i].critical_threshold_abs_node0 = UNUSED_CRITICAL_ABS_NODE0; + } + resource_config[i].minor_threshold_abs_node1 = DEFAULT_MINOR_ABS_NODE1; + resource_config[i].major_threshold_abs_node1 = DEFAULT_MAJOR_ABS_NODE1; + resource_config[i].critical_threshold_abs_node1 = DEFAULT_CRITICAL_ABS_NODE1; + } + + resource_config[i].num_tries = DEFAULT_NUM_TRIES; + resource_config[i].alarm_status = DEFAULT_ALARM_STATUS; + resource_config[i].percent = percent; + + ilog ("Monitoring %2d: Dynamic Resource- %s (%s)\n", i, resource_config[i].resource , + resource_config[i].severity ); + + /* Init the timer for this resource */ + mtcTimer_init ( rtimer[i] ) ; + + rtimer[i].hostname = "localhost" ; + rtimer[i].service = resource_config[i].resource ; + resource_config[i].i = i; + resource_config[i].failed = false ; + resource_config[i].count = 0 ; + resource_config[i].resource_value = r_value ; + resource_config[i].resource_prev = r_value ; + resource_config[i].stage = RMON_STAGE__START ; + resource_config[i].sev = SEVERITY_CLEARED ; + resource_config[i].alarm_type = STANDARD_ALARM; + resource_config[i].failed_send = 0; + resource_config[i].alarm_raised = false; + resource_config[i].socket_id = socket_id; + + /* add the alarm id for the FM API per resource monitored */ + snprintf(resource_config[i].alarm_id, FM_MAX_BUFFER_LENGTH, alarm_id); + + mem_log_resource ( &resource_config[i] ); + i++; + _rmon_ctrl_ptr->resources = i; + } + return new_index; +} + +/***************************************************************************** + * + * Name : save_dynamic_mem_resource + * + * Purpose : Loops through resources and only adds a memory resource if it does not yet + * exist + ******************************************************************************/ +int save_dynamic_mem_resource ( string resource_name, string criticality, + double r_value, int percent, int abs_values[3], + const char * alarm_id, int socket_id=0 ) +{ + + size_t resource_index; + size_t criticality_index; + bool newResource = true; + int updated_index; + + for (int k=0; k< _rmon_ctrl_ptr->resources; k++) { + + if (strcmp(resource_config[k].resource, resource_name.c_str()) == 0) { + resource_config[k].resource_value= + resource_config[k].resource_prev = r_value; + updated_index = k; + newResource = false; + break; + } + } + + if (newResource == true) { + dynamic_resource.push_back(resource_name); + resource_index = dynamic_resource.size() - 1; + /* add the criticality value to a vector for permenant storage */ + criticality_resource.push_back(criticality); + criticality_index = criticality_resource.size() - 1; + updated_index = add_dynamic_mem_resource(resource_index, criticality_index, + r_value, percent, abs_values, + alarm_id, socket_id); + rmon_alarming_init( &resource_config[updated_index] ); + resource_config[updated_index].resource_prev = + resource_config[updated_index].resource_value= r_value; + } + return updated_index; +} + +/***************************************************************************** + * + * Name : calculate_fs_usage + * + * Purpose : Calculate the file system usage as a percentage or an absolute value + * for the number of MiB remaining overall and in a specific fs. The calculation + * is done by executing the df command and getting the response for each type + * of filesystem being monitored. + *****************************************************************************/ +void calculate_fs_usage ( resource_config_type * ptr ) +{ + dlog("%s, is mounted resource: %d is enabled: %d\n", ptr->resource, ptr->mounted, ptr->alarm_status); + + FILE *pFile; + int last_index; + char fsLine[128]; + char buf[200]; + double fsUsage = 0; + char mounted_on[50], file_system[50], capacity[10]; + unsigned long long size, used, available; + string res_val; + double cap_percent; + double MiB = 1024.0; + double free_units = 0; + double usage_percents = 0; + double total_units = 0; + + if (ptr->mounted == MOUNTED) + { + if (strcmp(ptr->resource, FS_RESOURCE_NAME) == 0) + { + // We do not calculate the total for filesystem + // Resource FS_RESOURCE_NAME represents the total filesystem + return; + } + else + { + snprintf(buf, sizeof(buf), "timeout 2 df -T -P --local %s 2>/dev/null", ptr->resource); + } + + /* convert output of "df -P" from KiB to MiB */ + if(!(pFile = popen(buf, "r"))) + { + elog("Error, command df is not executed on resource: %s\n", ptr->resource); + } + else + { + while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL)) + { + sscanf(fsLine, "%49s %*s %llu %llu %llu %9s %49s", file_system, &size, &used, &available, capacity, mounted_on); + if (strcmp(mounted_on, ptr->resource) == 0) + { + string temp_val(capacity); + // exclude percentage (%) sign + last_index = temp_val.find_first_not_of("0123456789"); + res_val = temp_val.substr(0, last_index); + snprintf(capacity, sizeof(capacity), res_val.c_str()); + sscanf(capacity, "%lf", &cap_percent); + + if (ptr->percent == PERCENT_USED) + { + fsUsage = cap_percent; + ptr->resource_value = fsUsage; + if ( log_value ( ptr->resource_value, + ptr->resource_prev, + DEFAULT_LOG_VALUE_STEP ) ) + { + plog("filesystem: %s usage: %.2f%%\n", + ptr->resource, ptr->resource_value); + } + } + else + { + fsUsage = (double) (((100 - cap_percent) / 100) * size); + fsUsage = fsUsage / MiB; + ptr->resource_value = fsUsage; + if ( log_value ( ptr->resource_value, + ptr->resource_prev, + DEFAULT_LOG_VALUE_STEP ) ) + { + plog("filesystem: %s has %f (MiB) (free)\n", + ptr->resource, ptr->resource_value); + } + } + + // The size of the file system is 2X the user specified size to allow upgrades. + // Currently we are alarming on the used size but instead the alarming should be based on used size /2. + // As a result there is no indication to the user that they have may have eaten into the reserved space + // for upgrades resulting in an aborted upgrade. + if (strcmp(mounted_on, POSTGRESQL_FS_PATH) == 0) + { + ptr->resource_value = ptr->resource_value / 2; + } + } + } + } + pclose(pFile); + } + else if(strcmp(ptr->resource, NOVA_LOCAL) == 0) + { + /*rmon queries the thin pool usage if the volume group is nova-local*/ + snprintf(buf, sizeof(buf), "timeout 2 lvdisplay -C --noheadings --nosuffix -o data_percent --units m " + "/dev/nova-local/nova-local-pool 2>/dev/null"); + + if(!(pFile = popen(buf, "r"))) + { + elog("Error, command lvdisplay free units is not executed \n"); + } + else + { + while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL)) + { + usage_percents = atof(fsLine); + } + pclose(pFile); + } + ptr->resource_value = usage_percents; + if ( log_value ( ptr->resource_value, + ptr->resource_prev, + DEFAULT_LOG_VALUE_STEP )) + { + plog("filesystem: %s, usage: %f%% \n", ptr->resource, ptr->resource_value); + } + } + else if(strcmp(ptr->resource, CINDER_VOLUMES) == 0) + { + /*rmon queries the thin pool usage if the volume group is cinder-volumes*/ + snprintf(buf, sizeof(buf), "timeout 2 lvdisplay -C --noheadings --nosuffix -o data_percent --units m " + "/dev/cinder-volumes/cinder-volumes-pool 2>/dev/null"); + + if(!(pFile = popen(buf, "r"))) + { + elog("Error, command lvdisplay free units is not executed \n"); + } + else + { + while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL)) + { + usage_percents = atof(fsLine); + } + pclose(pFile); + } + ptr->resource_value = usage_percents; + if ( log_value ( ptr->resource_value, + ptr->resource_prev, + DEFAULT_LOG_VALUE_STEP )) + { + plog("filesystem: %s, usage: %.2f%% \n", ptr->resource, ptr->resource_value); + } + } + else + { + /* for the unmounted dynamic file system resources, use the vgdisplay command to get vg free units */ + snprintf(buf, sizeof(buf), "timeout 2 vgdisplay -C --noheadings --nosuffix -o vg_free --units m %s 2>/dev/null", ptr->resource); + + if(!(pFile = popen(buf, "r"))) + { + elog("Error, command vgdisplay free units is not executed \n"); + } + else + { + while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL)) + { + free_units = atof(fsLine); + } + pclose(pFile); + } + + /* for the unmounted dynamic file system resources, use the vgdisplay command to get vg size */ + snprintf(buf, sizeof(buf), "timeout 2 vgdisplay -C --noheadings --nosuffix -o vg_size --units m %s 2>/dev/null", ptr->resource ); + + if(!(pFile = popen(buf, "r"))) + { + elog("Error, command vgdisplay total units is not executed \n"); + } + else + { + while (memset(fsLine, 0, sizeof(fsLine)) && (fgets((char*) &fsLine, sizeof(fsLine), pFile) != NULL)) + { + total_units = atof(fsLine); + } + pclose(pFile); + } + + if ( ptr->percent == PERCENT_USED ) + { + if (total_units != 0) + { + ptr->resource_value = (double) (( (total_units - free_units) / total_units ) * 100); + } + else + { + ptr->resource_value = 0; + } + if ( log_value ( ptr->resource_value, + ptr->resource_prev, + DEFAULT_LOG_VALUE_STEP )) + { + plog("volume-group: %s, usage: %.2f%%\n", ptr->resource, ptr->resource_value); + } + } + else + { + ptr->resource_value = free_units; + if ( log_value ( ptr->resource_value, + ptr->resource_prev, + DEFAULT_LOG_VALUE_STEP )) + { + plog("volume-group: %s, %.2f (MiB) free\n", ptr->resource, ptr->resource_value); + } + } + } +} + +/***************************************************************************** + * + * Name : init_memory_checking + * + * Purpose : Get the memory accounting used either 0: overcommit or 1: strict + *****************************************************************************/ +void init_memory_accounting() +{ + + const char *strict_memory_file = "/proc/sys/vm/overcommit_memory"; + + ifstream mem_file ( strict_memory_file ); + string strict_line; + + if (mem_file.is_open()) + { + + while ( getline (mem_file, strict_line) ) { + IS_STRICT = atoi(strict_line.c_str()); + } + mem_file.close(); + + } + +} + +/***************************************************************************** + * + * Name : thinpool_calcVirtUsage + * + * Purpose : Obtain the percentage of the used virtual space in thin + * provisioning. + * + * Params : index - the index of the monitored resource (virtual space) + * + * Return : PASS/FAIL + * + *****************************************************************************/ +int thinpool_calcVirtUsage(int index, + const char *poolName, + const char *poolOwner, + const char *allocParam) { + + /* Initialize the variables used in calculating the virtual usage. */ + double provisioned_capacity = 0; + double total_capacity = 0; + double allocation_ratio = 1; + double ratio = 0; + double MiB = 1024.0; + + /* Buffer (and its size) for keeping the initial result after executing + the above commands. */ + char result[BUFFER_SIZE]; + const unsigned int buffer_size = BUFFER_SIZE; + + /* Return code. */ + int rc; + + /* Save the necessary commands for obtaining the information about virtual + thin pool usage: provisioned capacity, total capacity and maximum + oversubscription ratio. */ + const char *provisioned_capacity_cmd = NULL; + const char *allocation_ratio_cmd = NULL; + char total_capacity_cmd[BUFFER_SIZE]; + + snprintf(total_capacity_cmd, sizeof(total_capacity_cmd), + "lvs --units m --segments | grep \"%s\" | awk '{print $6}' | sed '$s/.$//'", + poolName); + + if (strcmp (poolOwner, "Cinder") == 0) { + const char *cinder_provisioned_capacity_cmd ="lvs --units m | grep \"volume-[.]*\" | awk '{ sum+=$4} END {print sum}'"; + const char *cinder_allocation_ratio_cmd = "cat /etc/cinder/cinder.conf | grep \"^max_over_subscription_ratio\" | cut -d '=' -f 2"; + provisioned_capacity_cmd = cinder_provisioned_capacity_cmd; + allocation_ratio_cmd = cinder_allocation_ratio_cmd; + } else if (strcmp (poolOwner, "Nova") == 0) { + const char *nova_provisioned_capacity_cmd = "lvs --units m | grep \"[.]*_disk\" | awk '{ sum+=$4} END {print sum}'"; + provisioned_capacity_cmd = nova_provisioned_capacity_cmd; + } + /* Determine the provisioned capacity. */ + rc = execute_pipe_cmd(provisioned_capacity_cmd, result, buffer_size); + if (rc != PASS) { + wlog("%s LVM Thinpool ; unable to query provisioned capacity (rc:%i)", + poolOwner, rc); + return (FAIL); + } + provisioned_capacity = atof(result); + dlog("%s LVM Thinpool provisioned capacity is %f", poolOwner, provisioned_capacity); + + /* If the threshold is of percentage type, then also determine the total + thin pool capacity and the max oversubscription ratio. */ + rc = execute_pipe_cmd(total_capacity_cmd, result, buffer_size); + if (rc != PASS) { + elog("%s LVM Thinpool ; unable to query total capacity (rc:%i)", + poolOwner, rc); + return (FAIL); + } + total_capacity = atof(result); + dlog("%s LVM Thinpool total capacity is %f", + poolOwner, total_capacity); + + if (strcmp (poolOwner, "Cinder") == 0) { + rc = execute_pipe_cmd(allocation_ratio_cmd, result, buffer_size); + if (rc != PASS) { + elog("%s LVM Thinpool %s ratio could not be determined (rc:%i)", + allocParam, poolOwner, rc); + return (FAIL); + } + allocation_ratio = atof(result); + } else if (strcmp (poolOwner, "Nova") == 0) { + allocation_ratio = 1.0; + } + dlog("%s LVM Thinpool %s is %f", poolOwner, allocParam, allocation_ratio); + + /* If the allocation_ratio is 0 or hasn't been found, its default + value should be 1. */ + if (allocation_ratio == 0) + allocation_ratio = 1; + + /* Compute the current virtual space usage of the thin pool. */ + if (total_capacity != 0){ + ratio = provisioned_capacity / (total_capacity * allocation_ratio) * 100; + } else { + /*3 minutes (30 sec * rate_throttle = 180 sec)*/ + /* Change the warning log to a debug log to avoid generating this log in + rmond.log when Cinder is Ceph backended. Once the repackaging of cinder_virtual_resource.conf + and nova_virtual_resource.conf is done, we will change it back to warning log. */ + dlog("%s LVM Thinpool total capacity is 0\n", poolOwner); + return (FAIL); + } + + /* Update the resource value configuration. */ + if (resource_config[index].percent == 1) { + resource_config[index].resource_value = ratio; + if ( log_value ( resource_config[index].resource_value, + resource_config[index].resource_prev, + DEFAULT_LOG_VALUE_STEP )) + { + plog("%s LVM Thinpool Usage: %.2f%%", poolOwner, ratio); + } + } + else { + resource_config[index].resource_value = + ((total_capacity * allocation_ratio) - provisioned_capacity) * MiB; + if ( log_value ( resource_config[index].resource_value, + resource_config[index].resource_prev, + DEFAULT_LOG_VALUE_STEP )) + { + plog("%s LVM Thinpool has %.2f (MiB) free", + poolOwner, + resource_config[index].resource_value); + } + } + return (PASS); +} +/***************************************************************************** + * + * Name : calculate_virtual_space_usage + * + * Purpose : Obtain the percentage of the used virtual space in thin + * provisioning. + * + * Params : index - the index of the monitored resource (virtual space) + * + * Return : PASS/FAIL + * + *****************************************************************************/ +int calculate_virtual_space_usage(int index, const char* constant) { + int rc = 0; + if (strcmp(constant, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) { + rc = thinpool_calcVirtUsage(index, + "cinder-volumes-pool", + "Cinder", + "max_over_subscription_ratio"); + } else if (strcmp(constant, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) { + rc = thinpool_calcVirtUsage(index, + "nova-local-pool", + "Nova", + "disk_allocation_ratio"); + } + + return rc; +} + +/***************************************************************************** + * + * Name : calculate_memory_usage + * + * Purpose : Calculate the memory usage as a percentage or absolute value for the + * number of MiB left. The overall average memory usage as well as the per NUMA + * node memory usage is computed. + *****************************************************************************/ +void calculate_memory_usage( int index ) { + + const char *mem_info = "/proc/meminfo"; + FILE *pFile; + char memoryLine[40]; + char attribute_name[30]; + double memUsage, memUsageHuge; + char *line0 = &memoryLine[0]; + char *line3 = &memoryLine[3]; + char *line10 = &memoryLine[10]; + unsigned long int value; + unsigned long int avail = 0; + unsigned long int memTotal; + int resource_name_size = 100; + string resource_name_huge = "processor_hugepages_"; + string resource_name = "processor_"; + char numa_node[resource_name_size]; + string criticality = "critical"; + double MiB = 1024.0; + int absolute_thresholds[3]; + memoryinfo memInfo; + struct dirent *ent; + DIR *numa_node_dir; + vector numa_files; + vector node_files; + + memset ( (char*)&memInfo, 0, sizeof(memoryinfo)); + + if ((pFile = fopen(mem_info, "r")) == NULL){ + dlog("failed to open: /proc/meminfo \n"); + } + + else { + + while (memset(memoryLine, 0, sizeof(memoryLine)) && (fgets((char*) &memoryLine, sizeof(memoryLine), pFile) != NULL)) { + + if (*line3 == 'T') { + /* match MemTotal */ + value = 0UL; + if (sscanf(memoryLine, "MemTotal: %lu", &value) == 1) { + memInfo.MemTotal = value; + continue; + } + } else if (*line3 == 'F') { + /* match MemFree */ + value = 0UL; + if (sscanf(memoryLine, "MemFree: %lu", &value) == 1) { + memInfo.MemFree = value; + continue; + } + } else if (*line3 == 'f') { + /* match Buffers */ + value = 0UL; + if (sscanf(memoryLine, "Buffers: %lu", &value) == 1) { + memInfo.Buffers = value; + continue; + } + } else if (*line3 == 'h') { + /* match Cached */ + value = 0UL; + if (sscanf(memoryLine, "Cached: %lu", &value) == 1) { + memInfo.Cached = value; + continue; + } + } else if ((*line0 == 'S') && (*line3 == 'c')) { + /* match Slab Reclaimable */ + value = 0UL; + if (sscanf(memoryLine, "SReclaimable: %lu", &value) == 1) { + memInfo.SlabReclaimable = value; + continue; + } + } else if ((*line0 == 'C') && (*line10 == 't')) { + /* match CommitLimit */ + value = 0UL; + if (sscanf(memoryLine, "CommitLimit: %lu", &value) == 1) { + memInfo.CommitLimit = value; + continue; + } + } else if ((*line0 == 'C') && (*line10 == 'A')) { + /* match Committed_AS */ + value = 0UL; + if (sscanf(memoryLine, "Committed_AS: %lu", &value) == 1) { + memInfo.Committed_AS = value; + continue; + } + } else if ((*line0 == 'H') && (*line10 == 'T')) { + /* match Hugepages_Total */ + value = 0UL; + if (sscanf(memoryLine, "HugePages_Total: %lu", &value) == 1) { + memInfo.HugePages_Total = value; + continue; + } + } + else if ((*line0 == 'H') && (*line10 == 'z')) { + /* match Hugepagesize */ + value = 0UL; + if (sscanf(memoryLine, "Hugepagesize: %lu", &value) == 1) { + memInfo.Hugepagesize = value; + continue; + } + } + else if ((*line0 == 'A') && (*line3 == 'n')) { + /* match AnonPages */ + value = 0UL; + if (sscanf(memoryLine, "AnonPages: %lu", &value) == 1) { + memInfo.AnonPages = value; + continue; + } + } + } + fclose(pFile); + } + + avail = memInfo.MemFree + memInfo.Buffers + memInfo.Cached + memInfo.SlabReclaimable; + memTotal = avail + memInfo.AnonPages; + dlog("memTotal: %lu\n", memTotal); + + /* average memory utilization */ + if (IS_STRICT == 1) { + /* strict memory checking enabled */ + if (resource_config[index].percent == 1) { + memUsage = (double) memInfo.Committed_AS / memInfo.CommitLimit; + memUsage = memUsage * 100; + } else { + memUsage = (double) (memInfo.CommitLimit - memInfo.Committed_AS) / MiB; + } + } else { + if (resource_config[index].percent == 1) + { + memUsage = (double) memInfo.AnonPages / memTotal; + memUsage = memUsage * 100; + } else + { + memUsage = (double) avail / MiB; + } + } + resource_config[index].resource_value = memUsage; + if (resource_config[index].percent == 1) + { + if ( log_value ( resource_config[index].resource_value, + resource_config[index].resource_prev, + DEFAULT_LOG_VALUE_STEP )) + { + plog("%s: %.2f%%\n", + resource_config[index].resource, memUsage); + } + } + else + { + if ( log_value ( resource_config[index].resource_value, + resource_config[index].resource_prev, + DEFAULT_LOG_VALUE_STEP )) + { + plog("%s: %.2f (MiB) free\n", + resource_config[index].resource, memUsage); + } + } + if ((numa_node_dir= opendir ("/sys/devices/system/node/")) != NULL) { + /* print all the files and directories within directory */ + while ((ent = readdir (numa_node_dir)) != NULL) { + if (strstr(ent->d_name, "node") != NULL) { + numa_files.push_back(ent->d_name); + } + } + closedir (numa_node_dir); + } + + /* loop through all NUMA nodes to get memory usage per NUMA node */ + for (unsigned int p=0; pper_node == 1) { + /* if set to 1 get the per NUMA node memory values */ + memset(absolute_thresholds, 0, sizeof(absolute_thresholds)); + avail = memInfo.MemFree + memInfo.FilePages + memInfo.SlabReclaimable; + memTotal = avail + memInfo.AnonPages; + /* NUMA node memory usage */ + if (resource_config[index].percent == 1) { + memUsage = (double) memInfo.AnonPages / memTotal; + memUsage = memUsage * 100; + dlog("Memory Usage %s: %.2f%% \n", resource_name.c_str(), memUsage); + } else { + memUsage = (double) avail / MiB; + dlog("Memory Available %s: %.2f MB \n", resource_name.c_str(), memUsage); + } + /* initialize a new dynamic resource for the NUMA node if it does not already exist */ + save_dynamic_mem_resource ( resource_name, criticality, memUsage, resource_config[index].percent, + absolute_thresholds, MEMORY_ALARM_ID ); + } + + + if (HUGEPAGES_NODE == 1) { + /* huge pages memory usage for the NUMA node */ + if (memInfo.HugePages_Total != 0){ + if (resource_config[index].percent == 1){ + memUsageHuge = (double) (memInfo.HugePages_Total - memInfo.HugePages_Free) / memInfo.HugePages_Total; + memUsageHuge = memUsageHuge * 100; + dlog("Memory Usage %s: %.2f%% \n", resource_name_huge.c_str(), memUsageHuge); + } else { + memUsageHuge = (double) memInfo.HugePages_Free * (memInfo.Hugepagesize/MiB) ; + dlog("Memory Available %s: %.2f MB \n", resource_name_huge.c_str(), memUsageHuge); + } + save_dynamic_mem_resource ( resource_name_huge, criticality, memUsageHuge, resource_config[index].percent, + absolute_thresholds, MEMORY_ALARM_ID ); + } + } + resource_name_huge = "processor_hugepages_"; + resource_name = "processor_"; + } +} + +/***************************************************************************** + * + * Name : get_cpu_time + * + + * Purpose : Parse per-cpu hi-resolution scheduling stats + * + *****************************************************************************/ +int get_cpu_time( unsigned long long * cpu_time ) +{ +#define MAX_STRING_SIZE (19) + + const char *sched_stat = "/proc/schedstat"; + FILE * pFile; + char cpu_line[500]; + unsigned long long value; + int version = 0; + int index = 0; + char cpu_time_len[50]; + + if ((pFile = fopen(sched_stat, "r")) == NULL){ + dlog("failed to open: /proc/schedstat \n"); + return (FAIL); + } + + else { + /* Parse per-cpu hi-resolution scheduling stats */ + while (memset(cpu_line, 0, sizeof(cpu_line)) && (fgets((char*) &cpu_line, sizeof(cpu_line), pFile) != NULL)) { + + if (version != 15){ + /* only version 15 is supported */ + if (sscanf(cpu_line, "version %llu", &value) == 1) { + version = (int) value; + } + } + else if ((strstr(cpu_line, "cpu") != NULL) && (version == 15)) + { + sscanf(cpu_line, "%*s %*s %*s %*s %*s %*s %*s %49s ",cpu_time_len); + if (((unsigned)strlen(cpu_time_len)) < MAX_STRING_SIZE) { + /* get the cpu time values for each cpu which is the 7th field */ + sscanf(cpu_line, "%*s %*s %*s %*s %*s %*s %*s %llu ",&value); + cpu_time[index++] = value; + } + else { + elog("%s exceeded 2^64 for cpu stats cannot calculate cpu usage\n", cpu_time_len); + cpu_time[index++] = 0; + } + } + + } + fclose(pFile); + } + + return (PASS); +} + +/***************************************************************************** + * + * Name : cpu_monitoring_init + * + + * Purpose : Get the base cpu list if running on a compute. Also get the number + * of cpus from: /proc/cpuinfo + *****************************************************************************/ +void cpu_monitoring_init() +{ + + string base_cpu=""; + FILE * pFile; + string delimiter = ",", delimiterTwo = "-"; + size_t pos = 0; + string token; + char cpu_line[100]; + const char *cpu_info = "/proc/cpuinfo"; + char processor[20]; + + pFile = fopen (COMPUTE_RESERVED_CONF , "r"); + if (pFile != NULL){ + ilog("File %s is present\n", COMPUTE_RESERVED_CONF); + ifstream fin( COMPUTE_RESERVED_CONF ); + string line; + + while( getline( fin, line ) ) { + /* process each line */ + if( line.find ("PLATFORM_CPU_LIST=") != string::npos ) { + stringstream ss( line ); + getline( ss, base_cpu, '=' ); // token = string before = + getline( ss, base_cpu, '=' ); // token = string after = + ilog("Found PLATFORM_CPU_LIST set to %s in file %s\n", base_cpu.c_str(), COMPUTE_RESERVED_CONF); + } + } + fclose (pFile); + } + + if (base_cpu.compare("") != 0) + { + /* get base cpus if they are available */ + if ((pos = base_cpu.find(delimiter)) != string::npos) { + + /* if the base cpus are listed with a comma, ex: 1,2 */ + base_cpu = base_cpu + delimiter; + while ((pos = base_cpu.find(delimiter)) != string::npos) { + token = base_cpu.substr(0, pos); + included_cpu[num_base_cpus++] = atoi(token.c_str()); + base_cpu.erase(0, pos + delimiter.length()); + } + } else if ((pos = base_cpu.find(delimiterTwo)) != string::npos) { + + /* if the base cpus are listed with a dash, ex: 1-3 */ + base_cpu = base_cpu + delimiterTwo; + token = base_cpu.substr(0, pos); + int first_cpu = atoi(token.c_str()); + base_cpu.erase(0, pos + delimiterTwo.length()); + pos = base_cpu.find(delimiterTwo); + token = base_cpu.substr(0, pos); + int last_cpu = atoi(token.c_str()); + + /* loop through the list of base cpus */ + for (num_base_cpus=0; num_base_cpus<=(last_cpu - first_cpu); num_base_cpus++){ + included_cpu[num_base_cpus++] = first_cpu++; + } + } + + if (num_base_cpus == 0) { + /* only one base cpu available */ + included_cpu[num_base_cpus++] = atoi(base_cpu.c_str()); + } + } + + ilog("Number of base CPUs for this node is %d \n", num_base_cpus); + + /* get the number of cpus */ + if ((pFile = fopen(cpu_info, "r")) == NULL){ + wlog("failed to open: /proc/cpuinfo \n"); + } + + else { + + /* Parse per-cpu hi-resolution scheduling stats */ + while (memset(cpu_line, 0, sizeof(cpu_line)) && (fgets((char*) &cpu_line, sizeof(cpu_line), pFile) != NULL)) { + + sscanf(cpu_line, "%19s %*s %*s", processor); + if (strcmp(processor, "processor") == 0) { + num_cpus++; + } + } + fclose(pFile); + } + + ilog("Number of CPUs for this node is %d \n", num_cpus); +} + +/***************************************************************************** + * + * Name : calculate_linux_usage + * + * Purpose : Calculate the cpu usage for Linux cards: controller, compute, storage + * The calculation runs as a delta. The first time the function is called no + * valid cpu calculation occurs. From the second time onwards, the cpu uasge is + * calculated by taking the delta from the previous time the function was called + * + *****************************************************************************/ +int calculate_linux_usage( resource_config_type * ptr ) +{ + + double delta_seconds; + unsigned long long cpu_occupancy[num_cpus]; + unsigned long long cpu_delta_time; + unsigned long long total_avg_cpu = 0; + unsigned int counted_cpu=0; + int rc; + unsigned long long cpu_time[num_cpus]; + + if (cpu_time_initial.size() == 0) { + /* get the cpu time initially if the first cpu time does not exist */ + rc = get_cpu_time( cpu_time ); + /* get the first timestamp */ + time(&t1); + + if (rc != PASS) + { + wlog("Failed get_cpu_time \n"); + return (FAIL); + } + + for (int x=0; xresource_value = 0; + } + else { + /* get the later cpu time if the first cpu time exists */ + rc = get_cpu_time( cpu_time ); + + if (rc != PASS) + { + wlog("Failed get_cpu_time \n"); + return (FAIL); + } + + /* get the later timestamp */ + time(&t2); + + for (int x=0; xresource_value = (double) (total_avg_cpu / counted_cpu); + /* clear the old cpu times and set the current times as the old times */ + cpu_time_initial.clear(); + for (int x=0; xresource_value, + ptr->resource_prev, + LINUX_CPU_LOG_VALUE_STEP )) + { + plog("%s: %.2f%% (average)\n", ptr->resource, ptr->resource_value); + } + } + + return (PASS); +} + +/* Read the node UUID from the: /etc/platform/platform.conf file */ +void _readUUID () +{ + FILE * pFile; + const char *platformFile = "/etc/platform/platform.conf"; + + pFile = fopen (platformFile , "r"); + if (pFile != NULL) { + ifstream fin( platformFile ); + string line; + + while( getline( fin, line ) ) { + /* process each line */ + if( line.find ("UUID=") != string::npos ) { + stringstream ss( line ); + getline( ss, hostUUID, '=' ); // token = string before = + getline( ss, hostUUID, '=' ); // token = string after = + } + } + fclose (pFile); + } +} + +/***************************************************************************** + * + * Name : _load_rmon_interfaces + * + * Purpose : Update the monitored network interfaces from the: + * /etc/plaform/interfaces file + *****************************************************************************/ +void _load_rmon_interfaces () +{ + + rmon_socket_type * sock_ptr = rmon_getSock_ptr (); + + /* initialize interface monitoring */ + for ( int j = 0 ; j < _rmon_ctrl_ptr->interface_resources; j++ ) + { + init_physical_interfaces ( &interface_resource_config[j] ); + } + + for (int i=0; i<_rmon_ctrl_ptr->interface_resources; i++) + { + if ( interface_resource_config[i].interface_used == true ) + { + /* set the link state for all the primary physical interfaces */ + if ( get_link_state ( sock_ptr->ioctl_sock, interface_resource_config[i].interface_one, &interface_resource_config[i].link_up_and_running ) ) + { + interface_resource_config[i].link_up_and_running = false ; + interface_resource_config[i].resource_value = INTERFACE_DOWN; + wlog ("Failed to query %s operational state ; defaulting to down\n", interface_resource_config[i].interface_one) ; + } + else + { + ilog ("%s link is: %s\n", interface_resource_config[i].interface_one, interface_resource_config[i].link_up_and_running ? "Up" : "Down" ); + if (interface_resource_config[i].link_up_and_running) + { + interface_resource_config[i].resource_value = INTERFACE_UP; + } + else + { + interface_resource_config[i].resource_value = INTERFACE_DOWN; + interface_resource_config[i].failed = true; + } + } + if (interface_resource_config[i].lagged == true) + { + /* set the link state for all the lagged physical interfaces */ + if ( get_link_state ( sock_ptr->ioctl_sock, interface_resource_config[i].interface_two, &interface_resource_config[i].link_up_and_running ) ) + { + interface_resource_config[i].link_up_and_running = false ; + wlog ("Failed to query %s operational state ; defaulting to down\n", interface_resource_config[i].interface_two) ; + } + else + { + ilog ("%s link is: %s\n", interface_resource_config[i].interface_two, interface_resource_config[i].link_up_and_running ? "Up" : "Down" ); + if (interface_resource_config[i].link_up_and_running) + { + interface_resource_config[i].resource_value_lagged = INTERFACE_UP; + } + else + { + interface_resource_config[i].resource_value_lagged = INTERFACE_DOWN; + interface_resource_config[i].failed = true; + } + } + } + } + } + + for ( int j = 0 ; j < _rmon_ctrl_ptr->interface_resources; j++ ) + { + interface_alarming_init ( &interface_resource_config[j] ); + } +} + +/***************************************************************************** + * + * Name : resource_stall_monitor + * + * Purpose : Detects stalls in the resource monitoring threads + ******************************************************************************/ +int resource_stall_monitor ( resource_config_type * ptr, pid_t tid, pid_t pid) +{ + #define MAX_SCHEDSTAT_LEN (128) + char file_path [MAX_FILENAME_LEN] ; + char schedstat [MAX_SCHEDSTAT_LEN] ; + FILE * fp ; + int rc = PASS; + unsigned long long nr_switches_old = t_data.nr_switches_count; + + snprintf ( &file_path[0], MAX_FILENAME_LEN, "/proc/%d/task/%d/schedstat", pid, tid ); + fp = fopen (file_path, "r" ); + if ( fp ) + { + /* check to see if the thread is stalled */ + memset ( schedstat, 0 , MAX_SCHEDSTAT_LEN ); + if ( fgets ( &schedstat[0], MAX_SCHEDSTAT_LEN, fp) != NULL) + { + if ( sscanf ( schedstat, "%*s %*s %llu", &t_data.nr_switches_count) >= 1 ) + { + dlog ("%s: nr_count: %llu, nr_count_old: %llu \n", ptr->resource, t_data.nr_switches_count, nr_switches_old); + if ((nr_switches_old != t_data.nr_switches_count) && (ptr->failed)) + { + /* Clear the stall monitor alarm */ + ilog("%s thread has unstalled \n", ptr->resource); + ptr->sev = SEVERITY_CLEARED; + t_data.nr_switches_count = 0; + resourceStageChange ( ptr, RMON_STAGE__FINISH ); + } + } + else + { + wlog ("Failed to get schedstat from (%s)\n", file_path); + rc = FAIL; + } + } + else + { + wlog ("failed to read from (%s)\n", file_path ); + rc = FAIL; + } + fclose(fp); + } + else + { + wlog ("Failed to open (%s)\n", file_path); + rc = FAIL; + } + + if ((((nr_switches_old == t_data.nr_switches_count) && (ptr->sev != SEVERITY_MAJOR))) || + (rc == FAIL)) + { + /* thread has stalled raise alarm */ + elog("%s thread has stalled \n", ptr->resource); + ptr->sev = SEVERITY_MAJOR; + ptr->failed = true; + resourceStageChange ( ptr, RMON_STAGE__MANAGE ); + } + + return rc; +} + +/***************************************************************************** + * + * Name : check_instance_file + * + * Purpose : Thread spawned by rmon to check if: /etc/nova/instances is mounted. + * It needs to be a thread because of NFS hang issues. + * + *****************************************************************************/ +void *check_instance_file(void *threadarg) +{ + struct thread_data *res_data; + FILE * pFile; + FILE *testFile; + string line; + struct stat p; + const char *instances_dir = "/etc/nova/instances"; + const char *test_file = "/etc/nova/instances/.rmon_test"; + + res_data = (struct thread_data *) threadarg; + + pthread_mutex_lock(&lock); + res_data->thread_running = true; + res_data->tid = syscall(SYS_gettid); + pthread_mutex_unlock(&lock); + + dlog("%s process id: %d, thread id: %d \n", res_data->resource->resource, res_data->pid, res_data->tid); + res_data->resource_usage = NOT_MOUNTED; + pFile = fopen (MOUNTS_DIR , "r"); + + /* query /proc/mounts and make sure the /etc/nova/instances file system is there */ + if (pFile != NULL) + { + ifstream fin( MOUNTS_DIR ); + while( getline( fin, line ) ) + { + /* process each line */ + if( line.find (instances_dir) != string::npos ) + { + /* the mount is present */ + res_data->resource_usage = MOUNTED; + break; + } + } + fclose (pFile); + } + + if ( res_data->resource_usage == MOUNTED ) + { + /* put the test file in and check that it is accessible */ + testFile = fopen(test_file, "w"); + if (testFile != NULL) + { + fclose (testFile); + if( remove( test_file ) != 0 ) + { + elog("Failure in removing rmond test file: %s \n", test_file); + } + } + else + { + res_data->resource_usage = NOT_MOUNTED; + } + } + + if (res_data->resource_usage == NOT_MOUNTED) + { + /* fail the resource */ + stat (COMPUTE_CONFIG_PASS, &p); + if ((p.st_ino != 0 ) || (p.st_dev != 0)) + { + pthread_mutex_lock(&lock); + if (res_data->resource->sev != SEVERITY_MAJOR) + { + res_data->resource->sev = SEVERITY_MAJOR; + res_data->resource->failed = true; + resourceStageChange ( res_data->resource, RMON_STAGE__MANAGE ); + } + pthread_mutex_unlock(&lock); + } + } + else if ((res_data->resource_usage == MOUNTED) && (res_data->resource->failed)) + { + pthread_mutex_lock(&lock); + res_data->resource->sev = SEVERITY_CLEARED; + resourceStageChange ( res_data->resource, RMON_STAGE__FINISH ); + pthread_mutex_unlock(&lock); + } + + pthread_mutex_lock(&lock); + res_data->thread_running = false; + pthread_mutex_unlock(&lock); + + pthread_exit(NULL); +} + + +/***************************************************************************** + * + * Name : postPMs + * + * Purpose : create samples for each resource in Ceilometer + * + *****************************************************************************/ +int _postPMs () +{ + char meta_data[MAX_LEN]; + if ( hostUUID.empty() ) + { + /* keep trying to get the host UUID if it is not present */ + _readUUID(); + } + + if ( !hostUUID.empty() ) + { + // indicate the platform hostname as metadata for all resources + char *hoststring = strdup(_rmon_ctrl_ptr->my_hostname); + if (hoststring) { + char *host = strtok(hoststring,"="); + host = strtok(NULL, "="); + snprintf(&meta_data[0], MAX_LEN, "{\"host\":\"%s\"}", host); + free(hoststring); + } + + for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ ) + { + ostringstream strs; + strs << resource_config[i].resource_value ; + string res_val = strs.str(); + + if (strcmp(resource_config[i].resource, CPU_RESOURCE_NAME) == 0) { + /* cpu resource pm */ + generate_ceilometer_pm ( hostUUID, "platform.cpu.util", "delta", "%", + res_val, string(meta_data) ); + } + else if (strcmp(resource_config[i].resource, MEMORY_RESOURCE_NAME) == 0) { + /* memory resource pm */ + if (resource_config[i].percent == 1) { + generate_ceilometer_pm ( hostUUID, "platform.mem.util", "delta", "%", + res_val, string(meta_data) ); + } else { + generate_ceilometer_pm ( hostUUID, "platform.mem.util", "gauge", "MB", + res_val, string(meta_data) ); + } + } + else if (strcmp(resource_config[i].resource, FS_RESOURCE_NAME) == 0) { + /* filesystem resource pm */ + if (resource_config[i].percent == 1) { + generate_ceilometer_pm ( hostUUID, "platform.fs.util", "delta", "%", + res_val, string(meta_data) ); + } else { + generate_ceilometer_pm ( hostUUID, "platform.fs.util", "gauge", "MB", + res_val, string(meta_data) ); + } + } + } // end of resource loop + } + return (PASS); +} + +/***************************************************************************** + * + * Name : _get_events + * + * Purpose : query each resource and extract the required usage values + * + *****************************************************************************/ + +extern bool is_cpe ( void ); +extern bool is_compute ( void ); + +void _get_events (void) +{ + int rc; + string v_cpu; + FILE * pFile; + + if ( _rmon_ctrl_ptr->clients == 0 ) + { + wlog ("Monitoring with no registered clients\n"); + } + + for ( int i = 0 ; i < _rmon_ctrl_ptr->resources ; i++ ) + { + const char *resource = resource_config[i].resource; + ilog_throttled ( resource_config[i].resource_monitor_throttle, 120, + "Monitoring '%s'\n", + resource ); + + if (strcmp(resource, CPU_RESOURCE_NAME) == 0) + { + /* linux cards: controller, compute and storage cpu utilization */ + rc = calculate_linux_usage( &resource_config[i] ); + if ( rc == PASS ) + { + /* get if the resource is failed to be used by resource handler */ + process_failures ( &resource_config[i]); + } + } + else if (!strcmp(resource, V_CPU_RESOURCE_NAME) || + !strcmp(resource, V_MEMORY_RESOURCE_NAME) || + !strcmp(resource, V_PORT_RESOURCE_NAME) || + !strcmp(resource, V_INTERFACE_RESOURCE_NAME) || + !strcmp(resource, V_LACP_INTERFACE_RESOURCE_NAME) || + !strcmp(resource, V_OVSDB_RESOURCE_NAME) || + !strcmp(resource, V_NETWORK_RESOURCE_NAME) || + !strcmp(resource, V_OPENFLOW_RESOURCE_NAME)) + { + /* ensure that configuration has completed before computing + * vswitch resource utilization */ + if ( !daemon_is_file_present ( CONFIG_COMPLETE_COMPUTE ) ) + continue ; + + pFile = fopen (COMPUTE_VSWITCH_DIR , "r"); + if (pFile != NULL){ + fclose (pFile); + } + else + { + wlog ("%s failed to open %s\n", resource, COMPUTE_VSWITCH_DIR); + } + } + else if (strstr(resource_config[i].resource, V_MEMORY_RESOURCE_NAME) != NULL) + { + /* vswitch memory with specific sockets */ + /* skip these ones as they are already taken care of above */ + } + else if(strcmp(resource, REMOTE_LOGGING_RESOURCE_NAME) == 0) + { + rmonHdlr_remotelogging_query(&resource_config[i]); + } + else if (strcmp(resource, INSTANCE_RESOURCE_NAME) == 0) + { + /* do not perform this check if we are not on a compute node. + * its not valid on storage not combo load */ + if ( !is_compute () ) + continue ; + + if ( !daemon_is_file_present ( CONFIG_COMPLETE_COMPUTE ) ) + continue ; + + /* nova instances mount check */ + pFile = fopen (COMPUTE_VSWITCH_DIR , "r"); + if (pFile != NULL) + { + rc = PASS ; + pthread_mutex_lock(&lock); + if (!t_data.thread_running) + { + pthread_attr_t attr ; + t_data.resource = &resource_config[i]; + pthread_attr_init (&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + /* launch a thread to monitor the /etc/nova/instances mount */ + rc = pthread_create(&thread, &attr, check_instance_file, (void *) &t_data); + if (rc) + { + elog("%s ERROR; return code from pthread_create() is %d\n", + resource, rc); + } + pthread_attr_destroy (&attr); + } + else + { + /* If thread is still running check that it is not stalled */ + resource_stall_monitor(&resource_config[i], t_data.tid, t_data.pid); + } + pthread_mutex_unlock(&lock); + fclose (pFile); + } + } + else if (strcmp(resource, MEMORY_RESOURCE_NAME) == 0) { + /* memory utilization */ + calculate_memory_usage(i); + /* get if the resource is failed to be used by resource handler */ + if (resource_config[i].percent == PERCENT_USED) { + process_failures ( &resource_config[i]); + } else { + process_failures_absolute ( &resource_config[i]); + } + + } + else if ((strcmp(resource, V_CINDER_THINPOOL_RESOURCE_NAME) == 0) && + (resource_config[i].alarm_status == ALARM_ON)) { + /* virtual thin pool space utilization */ + rc = calculate_virtual_space_usage(i, V_CINDER_THINPOOL_RESOURCE_NAME); + /* only check resource for fail and clear if it is active */ + if (rc == PASS) { + if (resource_config[i].percent == PERCENT_USED) { + /* get if the resource is failed to be used by resource handler */ + process_failures (&resource_config[i]); + } else { + process_failures_absolute (&resource_config[i]); + } + } + } + else if ((strcmp(resource, V_NOVA_THINPOOL_RESOURCE_NAME) == 0) && + (resource_config[i].alarm_status == ALARM_ON)){ + /* do not perform this check if we are not on a compute node. + * its not valid on storage not combo load */ + if ( !is_compute () && !is_cpe () ) + continue ; + + if ( !daemon_is_file_present ( CONFIG_COMPLETE_COMPUTE ) ) + continue ; + + /* virtual thin pool space utilization */ + rc = calculate_virtual_space_usage(i, V_NOVA_THINPOOL_RESOURCE_NAME); + /* only check resource for fail and clear if it is active */ + if (rc == PASS) { + if (resource_config[i].percent == PERCENT_USED) { + /* get if the resource is failed to be used by resource handler */ + process_failures (&resource_config[i]); + } else { + process_failures_absolute (&resource_config[i]); + } + } + } + else if (strcmp(resource, FS_RESOURCE_NAME) == 0) { + /* file system utilization */ + /* do nothing as we calculate individual file system location and not the total */ + } + else { + /* dynamic file system resource */ + + pthread_mutex_lock(&lock); + if ((resource_config[i].alarm_status == ALARM_ON) && (modifyingResources == false)) + { + /* only calculate the resource usage if file systems aren't being added */ + calculate_fs_usage( &resource_config[i] ); + + /* only check resource for fail and clear if it is active */ + if (resource_config[i].percent == PERCENT_USED) { + /* get if the resource is failed to be used by resource handler */ + process_failures ( &resource_config[i]); + } else { + process_failures_absolute ( &resource_config[i]); + } + } + else if ((resource_config[i].alarm_status == ALARM_OFF) && (modifyingResources == false) + && (resource_config[i].failed == true)) + { + //send a clear message + send_clear_msg(i); + + // we need to clear the resource's alarm if there was any set for this resource + clear_alarm_for_resource(&resource_config[i]); + } + pthread_mutex_unlock(&lock); + } + } // end of rmon resources + + /* + * since interface resources are event based resourcs, i.e. + * they would only be called when netlink socket reports a + * link state event, we need to run a periodic audit on them + * as part of RMON event audit. + * This audit shall resend interface degrade statuses to maintaince + * if interface is in failed state + */ + for ( int j = 0; j < _rmon_ctrl_ptr->interface_resources; j++ ) + { + if ( interface_resource_config[j].interface_used && + interface_resource_config[j].failed == true ) + { + send_interface_msg ( &interface_resource_config[j], + _rmon_ctrl_ptr->clients ); + } + } +} + +int kill_running_process ( int pid ) +{ + int result = kill ( pid, 0 ); + if ( result == 0 ) + { + result = kill ( pid, SIGKILL ); + if ( result == 0 ) + { + wlog ("NTP process kill succeeded (%d)\n", pid ); + } + else + { + elog ("NTP process kill failed (%d)\n", pid ); + } + } + return (PASS); +} + +/* SIGCHLD handler support - for waitpid */ +static bool rmon_sigchld_received = false ; +void daemon_sigchld_hdlr ( void ) +{ + dlog("Received SIGCHLD ...\n"); + + int status = 0; + pid_t tpid = 0; + + while ( 0 < ( tpid = waitpid ( -1, &status, WNOHANG | WUNTRACED ))) + { + dlog("NTP query script returned WIFEXITED:%d and WEXITSTATUS:%d for pid:%d\n", WIFEXITED(status), WEXITSTATUS(status), tpid); + + if (tpid == ntp_child_pid) + { + rmon_sigchld_received = true ; + + /* no need to wait for a timeout since we got a response, force a ring */ + rmonTimer_ntp.ring = true; + ntp_status = WEXITSTATUS(status); + } + else + { + dlog ("PID:%d lookup failed ; reaped likely after timeout\n", tpid ); + ntp_status = NTP_ERROR; + } + } +} + +int ntp_audit_handler ( ) +{ + if ( ntp_stage >= NTP_STAGE__STAGES ) + { + wlog ("Invalid ntp_stage (%d) ; correcting\n", ntp_stage ); + ntpStageChange ( NTP_STAGE__BEGIN); + } + + switch ( ntp_stage ) + { + // First state + case NTP_STAGE__BEGIN: + { + mtcTimer_start ( rmonTimer_ntp, rmon_timer_handler, _rmon_ctrl_ptr->ntp_audit_period ); + dlog ("Start NTP period timer (%d secs) %p\n", _rmon_ctrl_ptr->ntp_audit_period, rmonTimer_ntp.tid); + ntpStageChange ( NTP_STAGE__EXECUTE_NTPQ ); + break ; + } + + // Execute the ntpq command + case NTP_STAGE__EXECUTE_NTPQ: + { + if ( rmonTimer_ntp.ring == true ) //wake up from NTP period + { + ntp_status = PASS; + mtcTimer_start ( rmonTimer_ntp, rmon_timer_handler, _rmon_ctrl_ptr->ntpq_cmd_timeout ); + dlog ("Start NTPQ command timer (%d secs) %p\n", _rmon_ctrl_ptr->ntpq_cmd_timeout, rmonTimer_ntp.tid); + + // Execute the ntpq command + int rc = query_ntp_servers(); + if (rc != PASS) + { + elog ("NTP execute_status_command returned a failure (%d)\n", rc); + ntp_status = NTP_ERROR; + } + + ntpStageChange ( NTP_STAGE__EXECUTE_NTPQ_WAIT ); + } + break ; + } + + // Wait for the ntpq command to finish and process results + case NTP_STAGE__EXECUTE_NTPQ_WAIT: + { + // Give the command time to execute. The daemon_sigchld_hdlr will force + // a ring when the command execute successfully or returns a failure + if ( ( rmonTimer_ntp.ring == true) || (ntp_status == NTP_ERROR ) ) + { + // Stop the NTP timer if still running + if ( rmonTimer_ntp.tid ) + { + mtcTimer_stop ( rmonTimer_ntp ); + } + + if (( !rmon_sigchld_received) || (ntp_status == NTP_ERROR)) + { + if ( rmon_sigchld_received == false ) + { + elog ("NTPQ command execution timed out (pid:%d)\n", ntp_child_pid ); + } + + elog ("NTPQ returned an execution failure (rc:%d) (pid:%d)\n", ntp_status, ntp_child_pid); + if (ntp_child_pid != 0) + { + kill_running_process ( ntp_child_pid ); + } + } + else + { + dlog ("NTPQ command was successful ; analyzing results\n"); + ntp_query_results(ntp_status); + } + + ntpStageChange ( NTP_STAGE__BEGIN ); + ntp_child_pid = 0; + rmon_sigchld_received = false; + } + break; + } + + default: + { + elog ("NTP invalid ntp_stage (%d)\n", ntp_stage ); + + /* Default to first state for invalid case. there is an issue then it will be detected */ + ntpStageChange ( NTP_STAGE__BEGIN ); + } + } + return (PASS); +} + + +/***************************************************************************** + * + * Name : rmon_service + * + * Purpose : main loop for monitoring resources + * + *****************************************************************************/ +void rmon_service (rmon_ctrl_type * ctrl_ptr) +{ + fd_set readfds; + struct timeval waitd; + std::list socks; + rmon_socket_type * sock_ptr = rmon_getSock_ptr (); + + /* initialize FM handler */ + rmon_fm_init(); + + /* ignore SIGPIPE on swacts */ + signal(SIGPIPE, SIG_IGN); + + /* initialize the memory accounting: either Strict or OOM */ + init_memory_accounting(); + /* initialize the cpu monitoring defaults */ + cpu_monitoring_init(); + _readUUID(); + /* Start an event timer for the interval of the resources being monitored */ + ilog ("Starting 'Event Monitor' timer (%d secs) \n", ctrl_ptr->audit_period); + mtcTimer_start ( rmonTimer_event, rmon_timer_handler, 1 ); + + ilog ("Starting 'PM Monitor' timer (%d secs) \n", ctrl_ptr->pm_period); + mtcTimer_start ( rmonTimer_pm, rmon_timer_handler,ctrl_ptr->pm_period); + + if (is_controller()) + { + ntp_stage = NTP_STAGE__BEGIN; + } + + /* Get an Authentication Token */ + ilog ("%s Requesting initial token\n", ctrl_ptr->my_hostname ); + tokenEvent.status = tokenUtil_new_token ( tokenEvent, ctrl_ptr->my_hostname ); + if ( tokenEvent.status != PASS ) + { + elog ("Failed to get authentication token (%d)\n", tokenEvent.status); + if ( tokenEvent.base ) + { + slog ("%s token base:%p\n", + ctrl_ptr->my_hostname, + tokenEvent.base); + } + } + + /* service all the register and deregister requests in the queue */ + rmon_alive_notification( _rmon_ctrl_ptr->clients ); + + ilog ("registered clients: %d\n", _rmon_ctrl_ptr->clients); + + /* Initialize the resource specific configuration */ + for (int j=0; j<_rmon_ctrl_ptr->resources; j++) + { + if ( strcmp(resource_config[j].resource, FS_RESOURCE_NAME) == 0 ) { + /* determine whether percent or absolute values are used */ + /* determine if virtual thin pool memory usage alarm should be on or off */ + fs_percent = resource_config[j].percent; + } + } + /* add the static filesystem resources */ + process_static_fs_file(); + /* initialize the resource alarms */ + for (int j=0; j<_rmon_ctrl_ptr->resources; j++) + { + rmon_alarming_init ( &resource_config[j] ); + } + + /* add any dynamic resources from before */ + add_dynamic_fs_resource(false); + + /* Clear any stale dynamic alarms that can be caused by dynamic resources. */ + /* An alarm become stale for example if it was raised against a local volumn group (lvg) and */ + /* later on the lvg is deleted. The node will come up and the lvg resource will not longer exist and */ + /* it's related alarms not refreshed. Dynamic alarms are any alarms which it's resource can be */ + /* provisioned. */ + + AlarmFilter alarmFilter; + unsigned int max_alarms=75; + char alarm_to_search[FM_MAX_BUFFER_LENGTH]; + + fm_alarm_id alarm_id; + snprintf(alarm_id, FM_MAX_BUFFER_LENGTH, FS_ALARM_ID); + + SFmAlarmDataT *active_alarms = (SFmAlarmDataT*) calloc (max_alarms, sizeof (SFmAlarmDataT)); + if (active_alarms != NULL) + { + /* get all the current alarms with id of FS_ALARM_ID which are alarms related to the file system */ + /* fm_get_faults_by_id returns the number of alarms found */ + if (fm_get_faults_by_id( &alarm_id, active_alarms, &max_alarms) == FM_ERR_OK) + { + bool found = false; + for ( unsigned int i = 0; i < max_alarms; i++ ) + { + /* only get the 100.104 alarms */ + if ((strncmp((active_alarms+i)->alarm_id, FS_ALARM_ID, sizeof((active_alarms+i)->alarm_id)) == 0) + && (strstr((active_alarms+i)->entity_instance_id, _rmon_ctrl_ptr->my_hostname) != NULL) ) + { + found = false; + for (int j=0; j<_rmon_ctrl_ptr->resources; j++) + { + /* since we build the entity_instance_id with multiple data we must recreate it */ + snprintf(alarm_to_search, FM_MAX_BUFFER_LENGTH, "%s.volumegroup=%s", _rmon_ctrl_ptr->my_hostname, resource_config[j].resource); + if (strncmp(alarm_to_search, (active_alarms+i)->entity_instance_id, sizeof(alarm_to_search)) == 0) + { + found = true; + break; + } + + snprintf(alarm_to_search, FM_MAX_BUFFER_LENGTH, "%s.filesystem=%s", _rmon_ctrl_ptr->my_hostname, resource_config[j].resource); + if (strncmp(alarm_to_search, (active_alarms+i)->entity_instance_id, sizeof(alarm_to_search)) == 0) + { + found = true; + break; + } + + // We found the resource but lets check if the alarm is enable for it, if it's not + // we want to clear that alarm + if (found) + { + if (resource_config[j].alarm_status == ALARM_OFF) + { + found = false; + } + } + } + if (!found) + { + /* the alarm did not match any current resources so let's clear it */ + snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, (active_alarms+i)->alarm_id ); + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, (active_alarms+i)->entity_instance_id); + + ilog ("Clearing stale alarm %s for entity instance id: %s", (active_alarms+i)->alarm_id, (active_alarms+i)->entity_instance_id); + + if (rmon_fm_clear(&alarmFilter) != FM_ERR_OK) + { + wlog ("Failed to clear stale alarm for entity instance id: %s", (active_alarms+i)->entity_instance_id); + } + } + } + } + } + free(active_alarms); + } + else + { + elog ("Failed to allocate memory for clearing stale dynamic alarms"); + } + + if (( sock_ptr->ioctl_sock = open_ioctl_socket ( )) <= 0 ) + { + elog ("Failed to create ioctl socket"); + } + + /* Not monitoring address changes RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR */ + if (( sock_ptr->netlink_sock = open_netlink_socket ( RTMGRP_LINK )) <= 0 ) + { + elog ("Failed to create netlink listener socket"); + } + + /* load the current interfaces for monitoring */ + _load_rmon_interfaces(); + + socks.clear(); + socks.push_front (sock_ptr->rmon_tx_sock); + socks.push_front (sock_ptr->netlink_sock); + socks.sort(); + + for (;;) { + /* Accomodate for hup reconfig */ + FD_ZERO(&readfds); + FD_SET(sock_ptr->rmon_tx_sock, &readfds); + FD_SET(sock_ptr->netlink_sock, &readfds); + waitd.tv_sec = 0; + waitd.tv_usec = SOCKET_WAIT ; + tokenUtil_log_refresh (); + + /* This is used as a delay up to select timeout ; SOCKET_WAIT */ + select( socks.back()+1, &readfds, NULL, NULL, &waitd); + if (FD_ISSET(sock_ptr->rmon_tx_sock, &readfds)) + { + _rmon_ctrl_ptr->clients = rmon_service_inbox ( _rmon_ctrl_ptr->clients ); + } + else if (FD_ISSET(sock_ptr->netlink_sock, &readfds)) + { + dlog ("netlink socket fired\n"); + if ( service_interface_events ( sock_ptr->netlink_sock, sock_ptr->ioctl_sock ) != PASS ) + { + elog ("service_interface_events failed \n"); + } + } + + /* Manage the health of the resources */ + if ( rmonTimer_event.ring == true ) + { + // restart the audit period timer + mtcTimer_start ( rmonTimer_event, rmon_timer_handler, ctrl_ptr->audit_period ); + /* service all the register and deregister requests in the queue */ + rmon_alive_notification( _rmon_ctrl_ptr->clients ); + _get_events ( ); + } + + if ( rmonTimer_pm.ring == true ) + { + mtcTimer_start ( rmonTimer_pm, rmon_timer_handler, ctrl_ptr->pm_period ); + tokenUtil_token_refresh ( tokenEvent, ctrl_ptr->my_hostname ); + _postPMs(); + } + + //We only monitor the NTP servers on the controller node + if ( is_controller() ) + { + ntp_audit_handler (); + } + + /* loop through all the resource timers waiting for a ring */ + for ( int j = 0 ; j < ctrl_ptr->resources ; j++ ) + { + if (resource_config[j].failed == true) { + /* Run the FSM for this failed resource */ + resource_handler ( &resource_config[j]); + } + } + + /* loop through all the interface resources */ + for ( int j = 0 ; j < ctrl_ptr->interface_resources ; j++ ) + { + if (interface_resource_config[j].failed == true) { + /* Run the FSM for this failed interface */ + interface_handler ( &interface_resource_config[j] ); + } + } + + /* loop thorough all the LVM thinpool metadata resources waiting for a ring */ + for ( int j = 0; j < ctrl_ptr->thinmeta_resources; j++ ) + { + if (thinmeta_resource_config[j].critical_threshold) { + // a threshold of 0 disables monitoring + if (thinmetatimer[j].ring == true) { + // restart the audit period timer + mtcTimer_start ( thinmetatimer[j], rmon_timer_handler, + thinmeta_resource_config[j].audit_period ); + dlog("%s/%s running audit (resource index: %i)", + thinmeta_resource_config[j].vg_name, + thinmeta_resource_config[j].thinpool_name, j) + /* Handle resource */ + int k; + for (k = THINMETA_FSM_RETRY; k > 0; k--) { + // call again the FSM in case it instructs us to RETRY + if(thinmeta_handler(&thinmeta_resource_config[j]) != RETRY) { + break; + } + } + if (k == 0) { + dlog("%s/%s too many state changes in FSM at: %i stage!", + thinmeta_resource_config[j].vg_name, + thinmeta_resource_config[j].thinpool_name, + thinmeta_resource_config[j].stage); + } + } + } + } + + /* handle RMON FM interface */ + rmon_fm_handler (); + + daemon_signal_hdlr (); + } + +} + +/**************************************************************************** + * + * Name : log_value + * + * Purpose : Log resource state values while avoiding log flodding for + * trivial fluxuations. + * + * Description: Recommends whether the current resource state value should + * be logged based on current, previous and step values. + * + * Caller should not generate such log if a false is returned. + * + * A true is returned if the currrent and previous resource values differ + * by +/- step amount. + * + * The caller specifies the step that can be overridden by a smaller value + * in rmond.conf:log_step value. + * + * If step is zero then a true is always returned in support of a debug mode + * where we get the current reading as a log on every audit. + * + * The callers previous value is updated to current whenever true is returned. + * + ****************************************************************************/ +bool log_value ( double & current, double & previous, int step ) +{ + /* Support step override for debug purposes + * Allows for more frequent logging */ + int _step = daemon_get_cfg_ptr()->log_step ; + + /* a lower value from the conf file takes precidence */ + if ( _step > step ) + _step = step ; + + if (( round(current) >= ( round(previous) + _step )) || + ( round(current) <= ( round(previous) - _step ))) + { + previous = current ; + return true ; + } + return false ; +} diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonHttp.cpp b/mtce-common/cgts-mtce-common-1.0/rmon/rmonHttp.cpp new file mode 100644 index 00000000..cc40fc38 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonHttp.cpp @@ -0,0 +1,674 @@ +/* + * Copyright (c) 2013, 2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform rmon HTTP Utilities. + * + */ + +#include +#include +#include +#include /* for .. close and usleep */ +#include /* for .. system */ +#include /* for config dir reading */ +#include /* for the list of conf file names */ +#include /* for ... syslog */ +#include /* for ... waitpid */ +#include "rmon.h" +#include "tokenUtil.h" /* for ... tokenUtil_get_ptr */ +using namespace std; + +//#include "nodeClass.h" /* for ... maintenance class nodeLinkClass */ +#include "rmonHttp.h" /* this module header */ +//#include "rmonJsonUtil.h" /* Json Utilities */ +#include "rmonApi.h" + + +extern void rmonHdlr_remotelogging_handler ( struct evhttp_request *req, void *arg ); +extern void rmonHdlr_ceilometer_handler( struct evhttp_request *req, void *arg ); + +void rmonHttpUtil_free_base ( libEvent_type & event ); + + +static node_inv_type default_inv ; + +/***************************************************************************** + * + * Name : rmonHttpUtil_libEvent_init + * + * Purpose : Initialize the libEvent message for the HTTP request + * + *****************************************************************************/ +int rmonHttpUtil_libEvent_init ( libEvent_type *ptr , + string service, + string ip, + int port ) +{ + ptr->type = EVHTTP_REQ_GET ; /* request type GET/PUT/PATCH etc */ + + /* Characteristics */ + ptr->ip = ip ; + ptr->port = port ; + ptr->hostname = "default" ; + + /* Controls */ + ptr->status = FAIL ; /* The handler must run to make this PASS */ + ptr->active = false ; + ptr->mutex = false ; + ptr->stuck = 0 ; + ptr->found = false ; + ptr->count = 0 ; + //ptr->stage = 0 ; + ptr->result = "" ; + ptr->timeout = 0 ; + + /* Personality */ + ptr->service = service ; + ptr->request = RMON_SERVICE_NONE ; + + /* Execution Data */ + ptr->entity_path.clear() ; + ptr->entity_path_next.clear() ; + ptr->address.clear(); + ptr->payload.clear(); + ptr->response.clear(); + ptr->user_agent.clear(); + + /* Better to access a default struct than a bad pointer */ + ptr->inv_info_ptr = &default_inv ; + + /* Check for memory leaks */ + if ( ptr->base ) + { + slog ("rmon http base memory leak avoidance (%p) fixme !!\n", ptr->base); + event_base_free(ptr->base); + } + /* Create event base - like opening a socket */ + ptr->base = event_base_new(); + if ( ! ptr->base ) + { + elog ("Failed to create '%s' libEvent (event_base_new)\n", + ptr->service.c_str()); + + return(FAIL_EVENT_BASE) ; + } + return (PASS); +} + + +void rmonHttpUtil_start_timer ( libEvent_type & event ) +{ + clock_gettime (CLOCK_MONOTONIC, &event.start_ts ); +} + +void rmonHttpUtil_stop_timer ( libEvent_type & event ) +{ + clock_gettime (CLOCK_MONOTONIC, &event.stop_ts ); +} + +/* *********************************************************************** + * + * Name : rmonHttpUtil_free_conn + * + * Description: Free an event's connection memory if it exists. + * + * ************************************************************************/ +void rmonHttpUtil_free_conn ( libEvent_type & event ) +{ + if ( event.conn ) + { + dlog ("rmond Free Connection (%p)\n", event.conn ); + evhttp_connection_free ( event.conn ); + event.conn = NULL ; + } + else + { + wlog ("rmond Already Freed Connection\n"); + } +} + +/* *********************************************************************** + * + * Name : rmonHttpUtil_free_base + * + * Description: Free an event's base memory if it exists. + * + * ************************************************************************/ +void rmonHttpUtil_free_base ( libEvent_type & event ) +{ + /* Free the base */ + if ( event.base ) + { + dlog ("rmond Free Base (%p)\n", event.base ); + + event_base_free(event.base); + event.base = NULL ; + if ( event.conn ) + { + dlog ("rmond Free Connection (%p) --------- along with base\n", event.conn ); + evhttp_connection_free ( event.conn ); + event.conn = NULL ; + } + } + else + { + wlog ("rmond Already Freed Event Base\n"); + } +} + +/***************************************************************************** + * + * Name : rmonHttpUtil_connect_new + * + * Purpose : generic HTTP Conect utility + * + *****************************************************************************/ +int rmonHttpUtil_connect_new ( libEvent_type & event ) +{ + if ( event.base ) + { + /* Open an http connection to specified IP and port */ + event.conn = evhttp_connection_base_new ( event.base, NULL, + event.ip.c_str(), + event.port ); + if ( event.conn ) + { + dlog("connect successfull \n"); + return(PASS) ; + } + else + { + elog ("Failed to create http connection (evhttp_connection_base_new)\n"); + return (FAIL_CONNECT); + } + } + else + { + elog ("Null Event base\n"); + return (FAIL_EVENT_BASE); + } +} + +/* generic HTTP Conect utility */ +int rmonHttpUtil_request_new ( libEvent_type & event, + void(*hdlr)(struct evhttp_request *, void *)) +{ + int rc = PASS ; + + /* make a new request and bind the event handler to it */ + event.req = evhttp_request_new( hdlr , event.base ); + if ( ! event.req ) + { + dlog ("call to 'evhttp_request_new' returned NULL\n"); + rc = FAIL ; + } + + return (rc); +} + +/* Fill in the output buffer */ +/* return of 0 or -1 are errors */ +int rmonHttpUtil_payload_add ( libEvent_type & event ) +{ + int rc = PASS ; + + /* Returns the output buffer. */ + event.buf = evhttp_request_get_output_buffer ( event.req ); + + /* Check for no buffer */ + if ( ! event.buf ) + { + elog ("evhttp_request_get_output_buffer returned null (%p)\n", event.req ); + rc = FAIL ; + } + else + { + /* write the payload into the buffer */ + rc = evbuffer_add_printf ( event.buf, "%s", event.payload.c_str()); + if ( rc == -1 ) + { + elog ("evbuffer_add_printf returned error (-1)\n"); + rc = FAIL ; + } + else if ( rc == 0 ) + { + elog ("no data added to output buffer (len=0)\n"); + rc = FAIL ; + } + else + { + rc = PASS ; + } + } + return (rc); +} + +/* get the output buffer length and convert it to a string that is returned */ +string rmonHttpUtil_payload_len ( libEvent_type * ptr ) +{ + string body_len ; + char len_str[10] ; + int len = evbuffer_get_length ( ptr->req->output_buffer ) ; + if (( len == -1 ) || ( len == 0 )) + { + body_len = "" ; + } + else + { + memset ( &len_str[0], 0 , 10 ); + sprintf ( &len_str[0], "%d", len ); + body_len = len_str ; + dlog3 ("%s Buffer Len:%s\n", ptr->hostname.c_str(), body_len.c_str() ); + } + return ( body_len ); +} + +int rmonHttpUtil_header_add ( libEvent_type * ptr, http_headers_type * hdrs_ptr ) +{ + int rc = PASS ; + + if ( hdrs_ptr->entries > MAX_HEADERS ) + { + elog ("%s Too many headers (%d:%d)\n", + ptr->hostname.c_str(), MAX_HEADERS, hdrs_ptr->entries ); + return FAIL ; + } + for ( int i = 0 ; i < hdrs_ptr->entries ; i++ ) + { + /* Add the header */ + rc = evhttp_add_header( ptr->req->output_headers, + hdrs_ptr->entry[i].key.c_str() , + hdrs_ptr->entry[i].value.c_str()); + if ( rc ) + { + elog ("evhttp_add_header returned failure (%d:%s:%s)\n", rc, + hdrs_ptr->entry[i].key.c_str(), + hdrs_ptr->entry[i].value.c_str()); + rc = FAIL ; + break ; + } + } + return (rc); +} + +/***************************************************************************** + * + * Name : rmonHttpUtil_request_make + * + * Purpose : Make the HTTP request + * + *****************************************************************************/ +int rmonHttpUtil_request_make ( libEvent_type * ptr, + enum evhttp_cmd_type type, + string path ) +{ + return (evhttp_make_request( ptr->conn, ptr->req, type, path.data())); +} + +/***************************************************************************** + * + * Name : rmonHttpUtil_status + * + * Purpose : Get the status of the HTTP request + * + *****************************************************************************/ +int rmonHttpUtil_status ( libEvent_type & event ) +{ + int rc = PASS ; + + event.status = evhttp_request_get_response_code (event.req); + switch (event.status) + { + case HTTP_OK: + case 201: + case 202: + case 203: + case 204: + { + dlog3 ("%s HTTP_OK (%d)\n", event.hostname.c_str(), event.status ); + event.status = PASS ; + break; + } + case 401: + { + /* Authentication error - refresh the token */ + rc = RETRY ; + break ; + } + case 0: + { + dlog ("%s Status: 0\n", event.hostname.c_str()); + event.status = FAIL_HTTP_ZERO_STATUS ; + rc = FAIL_HTTP_ZERO_STATUS ; + break ; + } + default: + { + dlog ("%s Status: %d\n", event.hostname.c_str(), event.status ); + rc = event.status ; + break; + } + } + return (rc); +} + + + +/***************************************************************************** + * + * Name : rmonHttpUtil_api_request + * + * Purpose : Issue a HTTP REST API Request + * + *****************************************************************************/ +#define URL_LEN 200 +int rmonHttpUtil_api_request ( rmon_libEvent_enum request, + libEvent_type & event, + string command_path ) + +{ + http_headers_type hdrs ; + enum evhttp_cmd_type type = EVHTTP_REQ_PUT ; + int timeout = 1 ; + int hdr_entry = 0 ; + string payload = "" ; + int rc = FAIL ; + void(*handler)(struct evhttp_request *, void *) = NULL ; + + if ( request == REMOTE_LOGGING_REQUEST ) + { + /* Bind the handler for the request */ + handler = &rmonHdlr_remotelogging_handler ; + + /* The type of HTTP request */ + type = EVHTTP_REQ_GET ; + + /* set the timeout */ + timeout = HTTP_REMOTELOGGING_TIMEOUT ; + } + + else if ( request == CEILOMETER_SAMPLE_CREATE ) + { + /* Bind the handler for the request */ + handler = &rmonHdlr_ceilometer_handler ; + + /* The type of HTTP request */ + type = EVHTTP_REQ_POST ; + + /* set the timeout */ + timeout = HTTP_CEILOMETER_TIMEOUT ; + } + + else + { + slog ("%s Unsupported Request (%d)\n", event.hostname.c_str(), request); + return (FAIL_BAD_CASE); + } + + /* Establish connection */ + if ( rmonHttpUtil_connect_new ( event )) + { + return (FAIL_CONNECT); + } + + /* Create request */ + if ( rmonHttpUtil_request_new ( event, handler )) + { + return (FAIL_REQUEST_NEW); + } + + if ( type == EVHTTP_REQ_POST ) + { + /* Add payload to the output buffer but only for POST request. */ + if ( rmonHttpUtil_payload_add ( event ) ) + { + event.status = FAIL_PAYLOAD_ADD; + return ( event.status ); + } + if ( daemon_get_cfg_ptr()->debug_json ) + { + if ((!string_contains(event.payload,"token")) && + (!string_contains(event.payload,"assword"))) + { + jlog ("%s Payload : %s\n", event.hostname.c_str(), + event.payload.c_str() ); + } + else + { + jlog ("%s Payload : ... contains private content ...\n", + event.hostname.c_str()); + } + } + } + + if ( !command_path.empty() ) + { + event.address = command_path ; + dlog (" Address : %s\n", event.address.c_str()); + } + + /* Build the HTTP Header */ + hdrs.entry[hdr_entry].key = "Host" ; + hdrs.entry[hdr_entry].value = event.ip ; + hdr_entry++; + hdrs.entry[hdr_entry].key = "X-Auth-Project-Id" ; + hdrs.entry[hdr_entry].value = "admin"; + hdr_entry++; + + + hdrs.entry[hdr_entry].key = "Content-Type" ; + hdrs.entry[hdr_entry].value = "application/json" ; + hdr_entry++; + hdrs.entry[hdr_entry].key = "Accept" ; + hdrs.entry[hdr_entry].value = "application/json" ; + hdr_entry++; + + if ( request == CEILOMETER_SAMPLE_CREATE ) + { + hdrs.entry[hdr_entry].key = "User-Agent" ; + hdrs.entry[hdr_entry].value = event.user_agent ; + hdr_entry++; + + hdrs.entry[hdr_entry].key = "X-Auth-Token" ; + hdrs.entry[hdr_entry].value = tokenUtil_get_ptr()->token ; + hdr_entry++; + } + + hdrs.entry[hdr_entry].key = "Connection" ; + hdrs.entry[hdr_entry].value = "close" ; + hdr_entry++; + hdrs.entries = hdr_entry ; + + /* Add the headers */ + if ( rmonHttpUtil_header_add ( &event, &hdrs )) + { + return (FAIL_HEADER_ADD); + } + + rc = rmonHttpUtil_request_make ( &event, type, event.address.data() ); + + if ( rc == PASS ) + { + /* Send the message with timeout */ + evhttp_connection_set_timeout(event.req->evcon, timeout); + event_base_dispatch(event.base); + rmonHttpUtil_free_conn ( event ); + rmonHttpUtil_free_base ( event ); + + return(event.status) ; + } + elog ("%s Call to 'evhttp_make_request' failed (rc:%d)\n", + event.hostname.c_str(), rc); + + return (FAIL_MAKE_REQUEST); +} + +/***************************************************************************** + * + * Name : rmonHttpUtil_receive + * + * Purpose : Get the HTTP request response into a libEvent object + * + *****************************************************************************/ +int rmonHttpUtil_receive ( libEvent_type & event ) +{ + /* Send the request but don't wait for the response */ + // int rc = event_base_loop(event.base, EVLOOP_NONBLOCK) ; + int rc = event_base_loop(event.base, EVLOOP_ONCE) ; + switch ( rc ) + { + case PASS: /* 0 */ + { + /* Set in-progress flag */ + if ( event.active == false ) + { + /* look at the reported handler status */ + if ( event.status != PASS ) + rc = event.status ; + + rmonHttpUtil_log_event ( event ); + } + else + { + rc = RETRY ; + } + break ; + } + case 1: + { + dlog ("%s %s No Events Pending (1)\n", + event.hostname.c_str(), + event.service.c_str()); + rc = FAIL ; + break ; + } + case -1: + { + event.active = false ; + elog ("%s %s Failed event_base_loop (-1)\n", + event.hostname.c_str(), + event.service.c_str()); + rc = FAIL ; + break ; + } + default: + { + event.active = false ; + slog ("%s %s Failed event_base_loop - Unexpected Return (%d)\n", + event.hostname.c_str(), + event.service.c_str(), rc ); + rc = FAIL ; + break ; + } + } + return (rc); +} + +/* Get the length of the json response + * Deal with oversized messages. + * + * Get the length of the buffer so we can + * allocate one big enough to copy too. + */ +int rmonHttpUtil_get_length ( libEvent_type & event ) +{ + event.response_len = evbuffer_get_length (event.req->input_buffer); + if ( event.response_len == 0 ) + { + dlog ("%s %s Request Failed - Zero Length Response\n", + event.hostname.c_str(), + event.service.c_str()); + event.status = FAIL_JSON_ZERO_LEN ; + } + else + { + event.status = PASS ; + } + return ( event.status ); +} + +/* Load the response string into the event struct */ +int rmonHttpUtil_get_response ( libEvent_type & event ) +{ + if ( rmonHttpUtil_get_length ( event ) == PASS ) + { + size_t real_len ; + + /* Get a stack buffer, zero it, copy to it and terminate it */ + char * stack_buf_ptr = (char*)malloc (event.response_len+1); + memset ( stack_buf_ptr, 0, event.response_len+1 ); + real_len = evbuffer_remove( event.req->input_buffer, stack_buf_ptr, + event.response_len); + + if ( real_len != event.response_len ) + { + wlog ("%s %s Length differs from removed length (%ld:%ld)\n", + event.hostname.c_str(), + event.service.c_str(), + event.response_len, + real_len ); + } + + /* Terminate the buffer , this is where the +1 above is required. + * Without it there is memory corruption reported by Linux */ + *(stack_buf_ptr+event.response_len) = '\0'; + + /* Store the response */ + event.response = stack_buf_ptr ; + dlog ("%s Response: %s\n", event.hostname.c_str(), event.response.c_str()); + + free (stack_buf_ptr); + } + return ( event.status ); +} + +/***************************************************************************** + * + * Name : rmonHttpUtil_log_event + * + * Purpose : Log the HTTP event + * + *****************************************************************************/ +void rmonHttpUtil_log_event ( libEvent_type & event ) +{ + string event_sig = daemon_get_cfg_ptr()->debug_event ; + + dlog3 ("Event Signature (%s)\n", event_sig.c_str()); + if ( !event_sig.compare(event.service) || (event.status)) + { + if ( !event.address.empty() ) + { + log_event ("%s %s Address : %s\n", event.hostname.c_str(), event_sig.c_str(), event.address.c_str()); + } + if (!event.payload.empty()) + { + if ((!string_contains(event.payload,"token")) && + (!string_contains(event.payload,"assword"))) + { + log_event ("%s %s Payload : %s\n", event.hostname.c_str(), event_sig.c_str(), event.payload.c_str()); + } + else + { + log_event ("%s %s Payload : ... contains private content ...\n", event.hostname.c_str(), event_sig.c_str()); + } + } + if ( !event.response.empty() ) + { + if ((!string_contains(event.payload,"token")) && + (!string_contains(event.payload,"assword"))) + { + log_event ("%s %s Response: %s\n", event.hostname.c_str(), event_sig.c_str(), event.response.c_str()); + } + else + { + log_event ("%s %s Response: ... contains private content ...\n", event.hostname.c_str(), event_sig.c_str()); + } + } + } +} diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonHttp.h b/mtce-common/cgts-mtce-common-1.0/rmon/rmonHttp.h new file mode 100644 index 00000000..c9c95029 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonHttp.h @@ -0,0 +1,161 @@ +#ifndef __INCLUDE_rmonHTTPUTIL_H__ +#define __INCLUDE_rmonHTTPUTIL_H__ +/* + * Copyright (c) 2013, 2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform rmon + * + * libevent HTTP support utilities and control structure support header + */ + +#include /* for ... string */ +#include /* for ... http libevent client */ + +using namespace std; + +/** Maximum libevent response message size in bytes. */ +#define MAX_EVENT_LEN (16384) + +#define HTTP_VSWITCH_TIMEOUT (10) +#define HTTP_REMOTELOGGING_TIMEOUT (10) +#define HTTP_CEILOMETER_TIMEOUT (10) + +#define VSWITCH_EVENT_SIG "vswitchEvent" +#define REMOTE_LOGGING_EVENT_SIG "remoteLoggingEvent" +#define CEILOMETER_EVENT_SIG "ceilometerEvent" + +/** Request Type Enums for the common rmonHttpUtil_request utility */ +typedef enum { + RMON_SERVICE_NONE, + VSWITCH_REQUEST, + REMOTE_LOGGING_REQUEST, + CEILOMETER_SAMPLE_CREATE +} rmon_libEvent_enum ; + +/** Local event control structure for REST API services + * + * Keystone and Inventory + * + */ +typedef struct +{ + /** Execution Controls */ + bool mutex ; /**< single operation at a time */ + bool active ; /**< true if waiting on response */ + int stuck ; /**< Count mutex active stuck state */ + int status ; /**< Execution Status */ + string result ; /**< Command specific result str */ + bool found ; /**< true if query was found */ + int timeout ; /**< Request timeout */ + int count ; /**< retry recover counter */ + int fails ; /**< fail counter */ + int retries ; /**< retry counter ; for receive */ + string service ; /**< Service being executed */ + string hostname ; /**< Target hostname */ + string uuid ; /**< The UUID for this request */ + string ip ; /**< Server IP address */ + rmon_libEvent_enum request ; + int port ; /**< Server port number */ + string user_agent ; /**< set the User-Agent header */ + + enum evhttp_cmd_type type; /**< HTTP Request Type ; PUT/GET */ + struct event_base *base; /**< libEvent API service base */ + struct evhttp_connection *conn; /**< HTTP connection ptr */ + struct evhttp_request *req ; /**< HTTP request ptr */ + struct evbuffer *buf ; /**< HTTP output buffer ptr */ + struct evbuffer_ptr evp ; /**< HTTP output buffer ptr */ + + /** Timestamps used to measure the responsiveness of REST API */ + struct timespec start_ts ; /**< Request Dispatch Timestamp */ + struct timespec stop_ts ; /**< Response Handler Timestamp */ + + string entity_path ; /**< HTTP entity request string */ + string entity_path_next ; /**< next entity request string */ + + /** Result Info */ + node_inv_type * inv_info_ptr ; /**< Inventory data pointer */ + string address ; /**< http url address */ + string payload ; /**< the request's payload */ + size_t response_len ; /**< the json response length */ + string response ; /**< the json response string */ +} libEvent_type; + +int rmonHttpUtil_libEvent_init ( libEvent_type * ptr , + string service, + string ip, + int port ); + +void rmonHttpUtil_start_timer ( libEvent_type & event ); +void rmonHttpUtil_stop_timer ( libEvent_type & event ); +void rmonHttpUtil_log_time ( libEvent_type & event ); + + +/** Maximum number of headers that can be added to an HTTP message. */ +#define MAX_HEADERS (10) + +#if 0 +/** A header entry type. */ +typedef struct +{ + string key ; /**< the header label. */ + string value ; /**< the header value. */ +} http_header_entry_type; + +/** The header entry table. */ +typedef struct +{ + int entries ; /**< Number of entries in the header table. */ + http_header_entry_type entry[MAX_HEADERS]; /**< entry array. */ +} http_headers_type ; +#endif + +/** Add payload to the HTTP message body. */ +int rmonHttpUtil_payload_add ( libEvent_type & event ); + +/** Add all headers in header table to the HTTP connection message. */ +int rmonHttpUtil_header_add ( libEvent_type * ptr, http_headers_type * hdrs_ptr ); + +/** Create an HTTP request. */ +int rmonHttpUtil_request_make ( libEvent_type * ptr, enum evhttp_cmd_type type, string path ); + +/** Open a connection to an HTTP server. */ +int rmonHttpUtil_connect_new ( libEvent_type & event ); + +/** Get a new HTTP request pointer. */ +int rmonHttpUtil_request_new ( libEvent_type & event, + void(*hdlr)(struct evhttp_request *, void *)); + +/** Common REST API Request Utility */ +int rmonHttpUtil_api_request ( rmon_libEvent_enum request, + libEvent_type & event, + string command_path ); + +/** Common REST API Request Utility */ +int rmonHttpUtil_request ( libEvent_type & event , bool block, + void(*hdlr)(struct evhttp_request *, void *)); + +/** Common REST API Receive Utility for non-blocking requests */ +int rmonHttpUtil_receive ( libEvent_type & event ); + +/** HTTP response status checker */ +int rmonHttpUtil_status ( libEvent_type & event ); + +/** TODO: FIXME: Get the payload string length. */ +string rmonHttpUtil_payload_len ( libEvent_type * ptr ); + +/** Get the length of the json response */ +int rmonHttpUtil_get_length ( libEvent_type & event ); + +/** Load the json response into the event struct */ +int rmonHttpUtil_get_response ( libEvent_type & event ); + +/** print event filtered event */ +void rmonHttpUtil_log_event ( libEvent_type & event ); + +#endif /* __INCLUDE_rmonHTTPUTIL_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonInit.cpp b/mtce-common/cgts-mtce-common-1.0/rmon/rmonInit.cpp new file mode 100644 index 00000000..78dca1c8 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonInit.cpp @@ -0,0 +1,616 @@ +/* + * Copyright (c) 2013-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Resource Monitor Service Initialization + */ + +#include "rmon.h" + +/* File definitions */ +#define CONFIG_FILE ((const char *)"/etc/mtc/rmond.conf") + + +static rmon_ctrl_type rmon_ctrl ; +rmon_ctrl_type * get_ctrlPtr ( void ) ; + +static daemon_config_type rmon_config ; +daemon_config_type * daemon_get_cfg_ptr () { return &rmon_config ; } + +/* Cleanup exit handler */ +void daemon_exit ( void ) +{ + rmon_msg_fini (); + rmon_hdlr_fini ( &rmon_ctrl ); + daemon_dump_info (); + daemon_files_fini (); + exit (0); +} + +bool is_compute ( void ) +{ + if (( rmon_ctrl.function == COMPUTE_TYPE ) && ( rmon_ctrl.subfunction == CGTS_NODE_NULL )) + return (true); + else + return (false); +} + +bool is_controller ( void ) +{ + if ( rmon_ctrl.function == CONTROLLER_TYPE ) + return (true); + else + return (false); +} + +bool is_cpe ( void ) +{ + if (( rmon_ctrl.function == CONTROLLER_TYPE ) && ( rmon_ctrl.subfunction == COMPUTE_TYPE )) + return (true); + else + return (false); +} + +/***************************************************************************** + * + * Name : rmon_config_handler + * + * Purpose : Startup config read from file: rmond.conf + * + *****************************************************************************/ +static int rmon_config_handler ( void * user, + const char * section, + const char * name, + const char * value) +{ + daemon_config_type* config_ptr = (daemon_config_type*)user; + + if (MATCH("config", "audit_period")) + { + config_ptr->audit_period = atoi(value); + config_ptr->mask |= CONFIG_AUDIT_PERIOD ; + } + else if (MATCH("config", "pm_period")) + { + config_ptr->pm_period = atoi(value); + config_ptr->mask |= PM_AUDIT_PERIOD ; + } + else if (MATCH("config", "ntp_audit_period")) + { + config_ptr->ntp_audit_period = atoi(value); + config_ptr->mask |= NTP_AUDIT_PERIOD ; + } + else if (MATCH("config", "ntpq_cmd_timeout")) + { + config_ptr->ntpq_cmd_timeout = atoi(value); + config_ptr->mask |= NTPQ_CMD_TIMEOUT ; + } + else if (MATCH("config", "rmon_tx_port")) + { + config_ptr->rmon_tx_port = atoi(value); + config_ptr->mask |= CONFIG_TX_PORT ; + } + else if (MATCH("config", "per_node")) + { + config_ptr->per_node = atoi(value); + config_ptr->mask |= CONFIG_NODE ; + } + else if (MATCH("timeouts", "start_delay")) + { + config_ptr->start_delay = atoi(value); + config_ptr->mask |= CONFIG_START_DELAY ; + } + else if (MATCH("config", "rmon_api_tx_port")) + { + config_ptr->rmon_api_tx_port = atoi(value); + config_ptr->mask |= CONFIG_TX_PORT ; + } + else if (MATCH("config", "critical_threshold")) + { + config_ptr->rmon_critical_thr = atoi(value); + config_ptr->mask |= CONFIG_CRITICAL_THR ; + } + else if (MATCH("config", "log_step")) + { + config_ptr->log_step = atoi(value); + } + return (PASS); +} + +/***************************************************************************** + * + * Name : rmon_interface_config + * + * Purpose : Read interface resource config file settings into the daemon configuration + * + *****************************************************************************/ +int rmon_interface_config ( void * user, + const char * section, + const char * name, + const char * value) +{ + int rc = FAIL ; + interface_resource_config_type * ptr = (interface_resource_config_type*)user; + + if (MATCH("resource", "resource")) + { + ptr->mask |= CONF_RESOURCE ; + ptr->resource = strdup(value); + dlog ("Resource : %s\n", ptr->resource); + rc = PASS ; + } + else if (MATCH("resource", "severity")) + { + ptr->mask |= CONF_SEVERITY ; + ptr->severity = strdup(value); + dlog ("Severity : %s\n", ptr->severity ); + rc = PASS ; + } + else if (MATCH("resource", "debounce")) + { + /* A zero value prevents degrade accompanying any alarm */ + ptr->mask |= CONF_DEBOUNCE ; + ptr->debounce = atoi(value); + dlog ("Debounce : %d\n", ptr->debounce ); + rc = PASS ; + } + else if (MATCH("resource", "num_tries")) + { + ptr->num_tries = atoi(value); + dlog ("Number of Tries : %d\n", ptr->num_tries ); + rc = PASS ; + } + else if (MATCH("resource", "alarm_on")) + { + ptr->alarm_status= atoi(value); + dlog ("Resource Alarm Status : %d\n", ptr->alarm_status); + rc = PASS ; + } + + + return (rc); +} + +/***************************************************************************** + * + * Name : rmon_thinmeta_config + * + * Purpose : Read resource config file settings into the daemon configuration + * + *****************************************************************************/ +int rmon_thinmeta_config ( void * user, + const char * section, + const char * name, + const char * value) +{ + int rc = FAIL ; + thinmeta_resource_config_type * ptr = (thinmeta_resource_config_type*)user; + + if(strcmp(section, "thinpool_metadata") == 0) + { + // This configuration item has the thinpool metadata section + ptr->section_exists = true; + } + + if (MATCH(THINMETA_CONFIG_SECTION, "vg_name")) + { + ptr->vg_name = strdup(value); + dlog ("Thinpool VG Name : %s\n", ptr->vg_name); + rc = PASS ; + } + else if (MATCH(THINMETA_CONFIG_SECTION, "thinpool_name")) + { + ptr->thinpool_name = strdup(value); + dlog ("Thinpool Thinpool Name : %s\n", ptr->thinpool_name); + rc = PASS ; + } + else if (MATCH(THINMETA_CONFIG_SECTION, "critical_threshold")) + { + ptr->critical_threshold = atoi(value); + dlog ("Thinpool Critical Alarm Threshold : %d%%\n", ptr->critical_threshold); + rc = PASS ; + } + else if (MATCH(THINMETA_CONFIG_SECTION, "alarm_on")) + { + ptr->alarm_on = atoi(value); + dlog ("Thinpool Metadata alarm_on : %s\n", ptr->alarm_on? "On": "Off"); + rc = PASS ; + } + else if (MATCH(THINMETA_CONFIG_SECTION, "autoextend_on")) + { + ptr->autoextend_on = atoi(value); + dlog ("Thinpool Metadata autoextend : %s\n", ptr->autoextend_on? "On": "Off"); + rc = PASS ; + } + else if (MATCH(THINMETA_CONFIG_SECTION, "autoexent_by")) + { + ptr->autoextend_by = atoi(value); + dlog ("Metadata Autoextend by : %d\n", ptr->autoextend_by); + rc = PASS ; + } + else if (MATCH(THINMETA_CONFIG_SECTION, "autoextend_percent")) + { + ptr->autoextend_percent = atoi(value); + dlog ("Thinpool Metadata Autoextend by : %s\n", + ptr->autoextend_percent? "percents": "absolute value (MiB)"); + rc = PASS ; + } + else if (MATCH(THINMETA_CONFIG_SECTION, "audit_period")) + { + ptr->audit_period = atoi(value); + dlog ("Metadata Audit Period : %ds\n", ptr->audit_period); + rc = PASS ; + } + + return (rc); +} + +/***************************************************************************** + * + * Name : rmon_resource_config + * + * Purpose : Read resource config file settings into the daemon configuration + * + *****************************************************************************/ +int rmon_resource_config ( void * user, + const char * section, + const char * name, + const char * value) +{ + int rc = FAIL ; + resource_config_type * ptr = (resource_config_type*)user; + + if (MATCH("resource", "resource")) + { + ptr->mask |= CONF_RESOURCE ; + ptr->resource = strdup(value); + dlog ("Resource : %s\n", ptr->resource); + rc = PASS ; + } + else if (MATCH("resource", "severity")) + { + ptr->mask |= CONF_SEVERITY ; + ptr->severity = strdup(value); + dlog ("Severity : %s\n", ptr->severity ); + rc = PASS ; + } + else if (MATCH("resource", "debounce")) + { + ptr->mask |= CONF_DEBOUNCE ; + ptr->debounce = atoi(value); + dlog ("Debounce : %d\n", ptr->debounce ); + rc = PASS ; + } + else if (MATCH("resource", "minor_threshold")) + { + ptr->minor_threshold = atoi(value); + dlog ("Minor Threshold : %d\n", ptr->minor_threshold ); + rc = PASS ; + } + else if (MATCH("resource", "major_threshold")) + { + ptr->major_threshold = atoi(value); + dlog ("Major Threshold : %d\n", ptr->major_threshold ); + rc = PASS ; + } + else if (MATCH("resource", "critical_threshold")) + { + ptr->critical_threshold = atoi(value); + dlog ("Critical Threshold : %d\n", ptr->critical_threshold ); + rc = PASS ; + } + else if (MATCH("resource", "minor_threshold_abs_node0")) + { + ptr->minor_threshold_abs_node0 = atoi(value); + dlog ("Minor Threshold Absolute Node 0 : %d\n", ptr->minor_threshold_abs_node0 ); + rc = PASS ; + } + else if (MATCH("resource", "major_threshold_abs_node0")) + { + ptr->major_threshold_abs_node0 = atoi(value); + dlog ("Major Threshold Absolute Node 0 : %d\n", ptr->major_threshold_abs_node0 ); + rc = PASS ; + } + else if (MATCH("resource", "critical_threshold_abs_node0")) + { + ptr->critical_threshold_abs_node0 = atoi(value); + dlog ("Critical Threshold Absolute Node 0 : %d\n", ptr->critical_threshold_abs_node0 ); + rc = PASS ; + } + else if (MATCH("resource", "minor_threshold_abs_node1")) + { + ptr->minor_threshold_abs_node1 = atoi(value); + dlog ("Minor Threshold Absolute Node 1 : %d\n", ptr->minor_threshold_abs_node1 ); + rc = PASS ; + } + else if (MATCH("resource", "major_threshold_abs_node1")) + { + ptr->major_threshold_abs_node1 = atoi(value); + dlog ("Major Threshold Absolute Node 1 : %d\n", ptr->major_threshold_abs_node1 ); + rc = PASS ; + } + else if (MATCH("resource", "critical_threshold_abs_node1")) + { + ptr->critical_threshold_abs_node1 = atoi(value); + dlog ("Critical Threshold Absolute Node 1 : %d\n", ptr->critical_threshold_abs_node1 ); + rc = PASS ; + } + else if (MATCH("resource", "num_tries")) + { + ptr->num_tries = atoi(value); + dlog ("Number of Tries : %d\n", ptr->num_tries ); + rc = PASS ; + } + else if (MATCH("resource", "alarm_on")) + { + ptr->alarm_status= atoi(value); + dlog ("Resource Alarm Status : %d\n", ptr->alarm_status); + rc = PASS ; + } + else if (MATCH("resource", "percent")) + { + ptr->percent= atoi(value); + dlog ("Resource Percent : %d\n", ptr->percent); + rc = PASS ; + } + + return (rc); +} + +/***************************************************************************** + * + * Name : daemon_configure + * + * Purpose : Read process config file settings into the daemon configuration + * + *****************************************************************************/ +int daemon_configure ( void ) +{ + int rc = PASS ; + + if (ini_parse( CONFIG_FILE, rmon_config_handler, &rmon_config) < 0) + { + elog("Can't load '%s'\n", CONFIG_FILE ); + return (FAIL_LOAD_INI); + } + + if (ini_parse(MTCE_INI_FILE, keystone_config_handler, &rmon_config) < 0) + { + elog ("Can't load '%s'\n", MTCE_INI_FILE ); + return (FAIL_LOAD_INI); + } + + get_debug_options ( CONFIG_FILE, &rmon_config ); + + /* Verify loaded config against an expected mask + * as an ini file fault detection method */ + if ( rmon_config.mask != CONF_MASK ) + { + elog ("Error: Agent configuration failed (%x)\n", + ((-1 ^ rmon_config.mask) & CONF_MASK)); + return (FAIL_INI_CONFIG); + } + + /* Manage the daemon pulse period setting - ensure in bound values */ + if ( rmon_config.audit_period < RMON_MIN_AUDIT_PERIOD ) + { + rmon_ctrl.audit_period = RMON_MIN_AUDIT_PERIOD ; + } + else if ( rmon_config.audit_period > RMON_MAX_AUDIT_PERIOD ) + { + rmon_ctrl.audit_period = RMON_MAX_AUDIT_PERIOD ; + } + else + { + rmon_ctrl.audit_period = rmon_config.audit_period ; + } + ilog("Event Audit Period: %d secs\n", rmon_ctrl.audit_period ); + rmon_ctrl.rmon_critical_thr = rmon_config.rmon_critical_thr; + + /* Manage the ceilometer pm period setting - ensure in bound values */ + if ( rmon_config.pm_period < RMON_MIN_PM_PERIOD ) + { + rmon_ctrl.pm_period = RMON_MIN_PM_PERIOD ; + } + else if ( rmon_config.pm_period > RMON_MAX_PM_PERIOD ) + { + rmon_ctrl.pm_period = RMON_MAX_PM_PERIOD ; + } + else + { + rmon_ctrl.pm_period = rmon_config.pm_period ; + } + ilog("PM Audit Period: %d\n", rmon_ctrl.pm_period ); + + /* Manage the NTP query pulse period setting - ensure in bound values */ + if ( rmon_config.ntp_audit_period < RMON_MIN_NTP_AUDIT_PERIOD ) + { + rmon_ctrl.ntp_audit_period = RMON_MIN_NTP_AUDIT_PERIOD ; + } + else if ( rmon_config.ntp_audit_period > RMON_MAX_NTP_AUDIT_PERIOD ) + { + rmon_ctrl.ntp_audit_period = RMON_MAX_NTP_AUDIT_PERIOD ; + } + else + { + rmon_ctrl.ntp_audit_period = rmon_config.ntp_audit_period ; + } + ilog("NTP Audit Period: %d secs\n", rmon_ctrl.ntp_audit_period ); + + + // NTPQ Command timeout + if ( rmon_config.ntpq_cmd_timeout >= rmon_ctrl.ntp_audit_period ) + { + rmon_ctrl.ntpq_cmd_timeout = NTPQ_CMD_TIMEOUT ; + wlog("NTPQ command timeout (%d secs) should be less than ntp_audit_period (%d secs) ; forcing default\n", + rmon_ctrl.ntpq_cmd_timeout, rmon_ctrl.ntp_audit_period ); + } + else + { + rmon_ctrl.ntpq_cmd_timeout = rmon_config.ntpq_cmd_timeout ; + } + ilog("NTPQ command timeout: %d secs\n", rmon_ctrl.ntpq_cmd_timeout ); + + rmon_ctrl.per_node = rmon_config.per_node; + + return (rc); +} + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Construct the messaging sockets * + * 1. receive socket (mtc_client_rx_socket) * + * 2. transmit socket (mtc_client_tx_socket) */ +int socket_init ( void ) +{ + int rc; + + rmon_msg_init ( ); + /* Init the resource monitor api tx port. + * This is the port that the rmon client api uses to + * inform rmon of any registering or deregistering client + * processes */ + rc = rmon_port_init ( rmon_config.rmon_api_tx_port ); + + return (rc); +} + +/***************************************************************************** + * + * Name : daemon_init + * + * Purpose : initialize the daemon and sockets + * + *****************************************************************************/ +int daemon_init ( string iface, string nodetype_str ) +{ + int rc = PASS ; + char temp_hostname [MAX_HOST_NAME_SIZE+1]; + + /* init the control struct */ + memset ( &rmon_ctrl.my_hostname[0], 0, MAX_HOST_NAME_SIZE+1); + rmon_ctrl.my_macaddr = "" ; + rmon_ctrl.my_address = "" ; + rmon_ctrl.resources = 0 ; + rmon_ctrl.clients = 0 ; + + /* Assign interface to config */ + rmon_config.mgmnt_iface = (char*)iface.data() ; + + if ( daemon_files_init ( ) != PASS ) + { + elog ("Pid, log or other files could not be opened\n"); + return ( FAIL_FILES_INIT ) ; + } + + if ( set_host_functions ( nodetype_str, &rmon_ctrl.nodetype, &rmon_ctrl.function, &rmon_ctrl.subfunction ) != PASS ) + { + elog ("failed to extract nodetype\n"); + return ( FAIL_NODETYPE ); + } + + /* Bind signal handlers */ + if ( daemon_signal_init () != PASS ) + { + elog ("daemon_signal_init failed\n"); + return ( FAIL_SIGNAL_INIT ); + } + + /************************************************************************ + * There is no point continuing with init ; i.e. running daemon_configure, + * initializing sockets and trying to query for an ip address until the + * daemon's configuration requirements are met. Here we wait for those + * flag files to be present before continuing. + ************************************************************************ + * Wait for /etc/platform/.initial_config_complete & /var/run/goenabled */ + daemon_wait_for_file ( CONFIG_COMPLETE_FILE , 0); + daemon_wait_for_file ( GOENABLED_MAIN_PASS , 0); + + /* Configure the daemon */ + if ( (rc = daemon_configure ( )) != PASS ) + { + elog ("Daemon service configuration failed (rc:%i)\n", rc ); + rc = FAIL_DAEMON_CONFIG ; + } + + /* This ensures any link aggregation interface overrides the physical */ + rmon_config.mgmnt_iface = daemon_get_iface_master ( rmon_config.mgmnt_iface ); + + /* Log the startup settings */ + ilog("Interface : %s\n", rmon_config.mgmnt_iface ); + ilog("TX Interface: %d\n", rmon_config.rmon_tx_port ); + + get_iface_macaddr ( rmon_config.mgmnt_iface, rmon_ctrl.my_macaddr ); + get_iface_address ( rmon_config.mgmnt_iface, rmon_ctrl.my_address , true ); + get_iface_hostname ( rmon_config.mgmnt_iface, &temp_hostname[0] ); + + strcat(rmon_ctrl.my_hostname, "host=" ); + strcat(rmon_ctrl.my_hostname, temp_hostname); + + if ( (rc = rmon_hdlr_init (&rmon_ctrl)) != PASS ) + { + ilog ("rmon_hdlt_init failed\n"); + rc = FAIL_HDLR_INIT ; + } + + /* Setup the messaging sockets */ + else if ( (rc = socket_init ( )) != PASS ) + { + elog ("socket initialization failed (rc:%d)\n", rc ); + rc = FAIL_SOCKET_INIT ; + } + + return (rc); +} + +/***************************************************************************** + * + * Name : daemon_service_run + * + * Purpose : The main rmon service launch + * + * Waits for initial config complete and then go enabled pass flag files + * before starting resource monitoring. + * + *****************************************************************************/ +void daemon_service_run ( void ) +{ + rmon_service ( &rmon_ctrl ); + daemon_exit (); +} + +/* Push daemon state to log file */ +void daemon_dump_info ( void ) +{ + daemon_dump_membuf_banner (); + daemon_dump_membuf(); +} + +const char MY_DATA [100] = { "eieio\n" } ; +const char * daemon_stream_info ( void ) +{ + return (&MY_DATA[0]); +} + +/***************************************************************************** + * + * Name : daemon_run_testhead + * + * Purpose : Run the rmon test suite by sending alarms to maintainance + * (To be used in Sprint 11 for testing) + * + *****************************************************************************/ +int daemon_run_testhead ( void ) +{ + /* Clear All */ + return (FAIL); +} + + diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonMsg.cpp b/mtce-common/cgts-mtce-common-1.0/rmon/rmonMsg.cpp new file mode 100644 index 00000000..a87738db --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonMsg.cpp @@ -0,0 +1,777 @@ +/* + * Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGCS Platform Resource Monitor Messaging API + * This class implements a server that accepts client processes + * registering and deregistering for rmon notifications. This class + * also implements a send function to send alarm messages and clear + * messages to the clients registered for a particular resource. + */ + +#include /* for config dir reading */ +#include /* for the list of conf file names */ +#include +#include +#include +#include +#include "rmon.h" +#include "rmonApi/rmon_nodeMacro.h" + + +/** + * Messaging Socket Control Struct - The allocated struct + */ + +static rmon_socket_type rmon_sock; +rmon_socket_type * rmon_getSock_ptr ( void ) +{ + return ( &rmon_sock ); +} + +msgSock_type * get_mtclogd_sockPtr ( void ) +{ + return (&rmon_sock.mtclogd); +} + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Init the messaging socket control structure + * The following messaging interfaces use this structure and + * are initialized separately + * */ + +void rmon_msg_init ( void ) +{ + memset(&rmon_sock, 0, sizeof(rmon_sock)); +} + +void rmon_msg_fini ( void ) +{ + if ( rmon_sock.rmon_tx_sock ) { + close (rmon_sock.rmon_tx_sock); + } if ( rmon_sock.rmon_rx_sock ) { + close (rmon_sock.rmon_rx_sock); + } if ( rmon_sock.netlink_sock ) { + close (rmon_sock.netlink_sock); + } if ( rmon_sock.ioctl_sock ) { + close (rmon_sock.ioctl_sock); + } +} + + /*Initialize the default rmon tx socket from the socket provided in: + /etc/rmond.conf */ +int rmon_port_init ( int tx_port ) +{ + int val = 1 ; + int rc = FAIL ; + if ( tx_port ) + { + rmon_sock.rmon_tx_port = tx_port ; + + rmon_sock.rmon_tx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if ( 0 >= rmon_sock.rmon_tx_sock ) + return (-errno); + + if ( setsockopt ( rmon_sock.rmon_tx_sock , SOL_SOCKET, SO_REUSEADDR, &val, sizeof(int)) == -1 ) + { + wlog ( "rmon: failed to set rmon api tx socket as re-useable (%d:%m)\n", errno ); + } + + /* Set socket to be non-blocking. */ + rc = ioctl(rmon_sock.rmon_tx_sock, FIONBIO, (char *)&val); + if ( 0 > rc ) + { + elog ("Failed to set rmon tx socket non-blocking\n"); + } + + /* Setup with localhost ip */ + memset(&rmon_sock.rmon_tx_addr, 0, sizeof(struct sockaddr_in)); + rmon_sock.rmon_tx_addr.sin_family = AF_INET ; + // rmon_sock.rmon_addr.sin_addr.s_addr = htonl(INADDR_ANY); + rmon_sock.rmon_tx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + rmon_sock.rmon_tx_addr.sin_port = htons(rmon_sock.rmon_tx_port) ; + + /* bind socket to the receive addr */ + if ( bind ( rmon_sock.rmon_tx_sock, (const struct sockaddr *)&rmon_sock.rmon_tx_addr, sizeof(struct sockaddr_in)) == -1 ) + { + elog ( "failed to bind to 'tx' socket with port %d (%d:%m)\n", tx_port, errno ); + close (rmon_sock.rmon_tx_sock); + rmon_sock.rmon_tx_sock = 0 ; + return (-errno); + } + } + else + { + elog ("No tx port specified\n"); + } + + return (rc) ; +} + +/* Open a socket for a new client process */ +int open_resource_socket ( char str[RMON_MAX_LEN], char registered_not[RMON_MAX_LEN], int port ) +{ + int rc = FAIL ; + int on = 1; + registered_clients clt; + + memset((char*)&clt, 0, sizeof(clt)); + strcpy(clt.registered_not, registered_not); + + clt.rx_sock.rmon_rx_port = port - 1 ; + clt.rx_sock.rmon_rx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + + if ( 0 >= clt.rx_sock.rmon_rx_sock ) + { + elog ("failed to open 'rx' socket (%d:%m)", errno ); + return (-errno); + } + + if ( setsockopt ( clt.rx_sock.rmon_rx_sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(int)) == -1 ) + { + wlog ( "rmon: failed to set rmon api rx socket as re-useable (%d:%m)\n", errno); + } + + /* Set socket to be non-blocking. */ + rc = ioctl(clt.rx_sock.rmon_rx_sock, FIONBIO, (char *)&on); + if ( 0 > rc ) + { + elog ("Failed to set rmon rx socket non-blocking\n"); + } + + /* Setup with localhost ip */ + memset(&clt.rx_sock.rmon_rx_addr, 0, sizeof(struct sockaddr_in)); + clt.rx_sock.rmon_rx_addr.sin_family = AF_INET ; + // rmon_sock.rmon_addr.sin_addr.s_addr = htonl(INADDR_ANY); + clt.rx_sock.rmon_rx_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + clt.rx_sock.rmon_rx_addr.sin_port = htons(clt.rx_sock.rmon_rx_port) ; + clt.port = port; + strcpy(clt.client_name, str); + + /* Prop the port numnber into the message struct */ + if ( clt.port ) { + clt.msg.tx_port = clt.port ; + } + + if ( clt.msg.tx_port ) + { + /* if the sock is already open then close it first */ + if ( clt.msg.tx_sock ) + { + wlog ("%s open on already open socket %d, closing first\n", + clt.client_name, clt.msg.tx_sock ); + close (clt.msg.tx_sock); + } + clt.msg.tx_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if ( 0 >= clt.msg.tx_sock ) + { + elog ("failed to open 'tx' socket (%d:%m)", errno ); + return (-errno); + } + + /* Setup with localhost ip */ + memset(&clt.msg.tx_addr, 0, sizeof(struct sockaddr_in)); + clt.msg.tx_addr.sin_family = AF_INET ; + clt.msg.tx_addr.sin_addr.s_addr = inet_addr(LOOPBACK_IP); + clt.msg.tx_addr.sin_port = htons(clt.msg.tx_port) ; + + /* Make the resource monitor client api socket non-blocking */ + rc = ioctl(clt.msg.tx_sock , FIONBIO, (char *)&on); + if ( 0 > rc ) + { + elog("Failed to set rmon socket non-blocking\n"); + } + + add_registered_client(clt); + + rc = PASS ; + } + else + { + elog ("%s has no port specified\n", clt.client_name ); + } + return (rc) ; +} + +/* close the client process socket */ +void close_resource_socket ( registered_clients * ptr ) +{ + if ( ptr->msg.tx_sock ) { + close ( ptr->msg.tx_sock ); + } +} + +/* remove a client from the list of registered clients */ +int delete_client ( int clients, int index ) +{ + /* close the client socket first */ + close_resource_socket( get_registered_clients_ptr(index)); + if (index == (clients -1 )) { + registered_clients *tmp_ptr = get_registered_clients_ptr(index); + memset(tmp_ptr, 0 , sizeof(*tmp_ptr)); + } + else { + + for (int j = index; j < (clients - 1); j++) + { + registered_clients * clt = get_registered_clients_ptr(j); + registered_clients * cltTwo = get_registered_clients_ptr(j+1); + *clt = *cltTwo; + cltTwo = clt; + } + } + clients--; + ilog("deleted registered client, %d clients left \n",clients); + return clients; +} + +void send_response( char message[RMON_MAX_LEN], registered_clients * clt ) +{ + int rc; + /* send a message to the rmon api to tell it the client is registered or deregistered */ + rc = sendto ( clt->rx_sock.rmon_rx_sock, + &message , + strlen ( message ), 0, + (struct sockaddr *) &clt->rx_sock.rmon_rx_addr, + sizeof(struct sockaddr_in)); + + if ( 0 >= rc ) + { + elog ("sendto error (%d:%s)\n", errno , strerror(errno)); + } + +} + +/* Send the outstanding registration and deregistration messages to rmon */ +void rmon_alive_notification (int & clients) +{ + FILE * pFile; + char * line = NULL; + size_t len = RMON_MAX_LEN; + ssize_t read; + vector active_clients; + vector dereg_clients; + char buf[RMON_MAX_LEN]; + + active_clients.clear(); + dereg_clients.clear(); + + /* service deregister requests in queue */ + pFile = fopen (RMON_API_DEREG_DIR , "r"); + if (pFile != NULL) { + // take out a reader lock on this file incase another + // entity is exclusively writing to it at this time + flock(fileno(pFile), LOCK_SH); + while ((read = getline(&line, &len, pFile)) != -1) { + clients = rmon_service_file_inbox(clients, line, false ); + string str(line, find(line, line + len, '\0')); + /* add the deregistered clients to the list to avoid relaunching them */ + dereg_clients.push_back(str); + } + // release shared lock + flock(fileno(pFile), LOCK_UN); + fclose(pFile); + } + + /* In the case that rmon restarts or rmon_alive_notifaction() + * is called periodically, and the clients have not re-registered, + * then attempt registration from active.txt, ONLY for clients + * that are not already in the registered_client framework + */ + pFile = fopen (RMON_API_ACTIVE_DIR , "r"); + if (pFile != NULL) { + // take out a reader lock on this file incase another + // entity is exclusively writing to it at this time + flock(fileno(pFile), LOCK_SH); + ifstream fin( RMON_API_ACTIVE_DIR ); + string readLine; + + while (getline(fin, readLine)) { + if ((dereg_clients.empty()) || + (find(dereg_clients.begin(), dereg_clients.end(), readLine) == dereg_clients.end())) { + /* only add a previously active client if it has not de-registered */ + active_clients.push_back(readLine); + } + } + // release shared lock + flock(fileno(pFile), LOCK_UN); + fclose(pFile); + } + // remove(RMON_API_ACTIVE_DIR); + for (unsigned int i=0; i active_clients; + vector new_active_clients; + char lineBuf[NOT_SIZE]; + + active_clients.clear(); + new_active_clients.clear(); + + pFile = fopen (RMON_API_ACTIVE_DIR , "r"); + + if (pFile != NULL) { + // take out a reader lock which will block + // if a writer has exclusive access to this + // file. + flock(fileno(pFile), LOCK_SH); + ifstream fin( RMON_API_ACTIVE_DIR ); + string readLine; + + while (getline(fin, readLine)) { + active_clients.push_back(readLine); + } + // release shared lock + flock(fileno(pFile), LOCK_UN); + fclose(pFile); + } + + for (unsigned int i=0; iclient_name, str) == 0 ) + { + found = true; + if (strcmp(CLR_CLIENT, registered_not) == 0) { + /* the client process wants to deregister, delete it and close it's socket */ + remove_active_client(clt->client_name); + total_clients = delete_client(clients, j); + break; + } + break; + } + } + + /* only add a client process if it is not already added */ + if (!found) { + + ilog("registering client \n"); + if ( str[0] != '\0' ) + { + rc = open_resource_socket(str, registered_not, port); + if (rc == FAIL) { + wlog("resource client port open failed \n"); + } else if (rc==PASS) { + + total_clients++; + if ( add == true ) + { + /* Add the client to the active clients file */ + pFile = fopen (RMON_API_ACTIVE_DIR , "a+"); + if (pFile) + { + // take out a writer lock on this file to + // ensure that no other entity is writing to it + // at this time + int lock = flock(fileno(pFile), LOCK_EX); + if (lock < 0) + { + elog("Failed to get an exclusive on" + " '%s' (errno: %d)", RMON_API_ACTIVE_DIR, errno); + } + else + { + ilog ("adding %s to %s\n", active_buf, RMON_API_ACTIVE_DIR ); + fprintf(pFile, "%s\n", active_buf); + + // release the lock + flock(fileno(pFile), LOCK_UN); + } + fclose(pFile); + } + else + { + elog("Failed to open file %s", RMON_API_ACTIVE_DIR); + } + } + else + { + dlog ("avoid adding duplicate entry\n"); + } + } + } + else + { + wlog ("Null string !\n"); + } + } + + return total_clients; +} + +/* Service client registration and deregistration requests from select */ +int rmon_service_inbox ( int clients ) +{ + #define MAX_T (3) + int count = 0 ; + int bytes = 0 ; + char buf[RMON_MAX_LEN] ; + char active_buf[RMON_MAX_LEN] ; + socklen_t len = sizeof(struct sockaddr_in) ; + int rc; + unsigned int port = 0 ; + bool found = false; + int total_clients = clients; + char str[RMON_MAX_LEN] ; + char registered_not[RMON_MAX_LEN]; + FILE * pFile; + + memset ( buf,0,RMON_MAX_LEN); + memset ( str,0,RMON_MAX_LEN); + bytes = recvfrom( rmon_sock.rmon_tx_sock, buf, RMON_MAX_LEN, 0, (struct sockaddr *)&rmon_sock.rmon_tx_addr, &len); + if ( bytes > 0 ) + { + sscanf ( buf, "%99s %99s %u", str, registered_not, &port ); //RMON_MAX_LEN is defined as 100 + strcpy( active_buf, buf ); + + if ( strcmp(str, RMON_RESOURCE_NOT) != 0 ) + { + for (int j=0; jclient_name, str) == 0 ) { + + found = true; + memset ( buf,0,RMON_MAX_LEN ); + strcpy( buf, "client_already_registered"); + send_response(buf, clt); + + if (strcmp(CLR_CLIENT, registered_not) == 0) { + /* the client process wants to deregister, delete it and close it's socket */ + total_clients = delete_client(clients, j); + memset ( buf,0,RMON_MAX_LEN); + strcpy( buf, "deregistered_client"); + send_response(buf, clt); + break; + } + break; + } + } + + /* only add a client process if it is not already added */ + if (!found) { + + ilog("registering client \n"); + if ( str[0] != '\0' ) + { + rc = open_resource_socket(str, registered_not, port); + + if (rc == FAIL) { + dlog("resource client port open failed \n"); + } else if (rc==PASS) { + + memset ( buf,0,RMON_MAX_LEN ); + strcpy( buf, "registered_client"); + registered_clients * clt = get_registered_clients_ptr(clients); + send_response(buf, clt); + total_clients++; + /* Add the client to the active clients file */ + pFile = fopen (RMON_API_ACTIVE_DIR , "a+"); + if (pFile) + { + // take out a writer lock on this file to + // ensure that no other entity is writing to it + // at this time + int lock = flock(fileno(pFile), LOCK_EX); + if (lock < 0) + { + elog("Failed to get an exclusive on" + " '%s' (errno: %d)", RMON_API_ACTIVE_DIR, errno); + } + else + { + fprintf(pFile, "%s\n", active_buf); + // release the lock + flock(fileno(pFile), LOCK_UN); + } + fclose(pFile); + } + else + { + elog("Failed to open file %s", RMON_API_ACTIVE_DIR); + } + } + } + else + { + wlog ("Null string !\n"); + } + } + } + else if ( strcmp(str, RMON_RESOURCE_NOT) == 0 ) { + /* read the dynamic file systems file and send a response back */ + process_dynamic_fs_file(); + } + } + else if (( 0 > bytes ) && ( errno != EINTR ) && ( errno != EAGAIN )) + { + wlog_throttled ( count , MAX_T, "receive error (%d:%s)\n", errno, strerror(errno)); + } + + return total_clients; +} + +/* send resource response */ +int rmon_resource_response ( int clients ) +{ + int rc = FAIL ; + + for (int j=0; jwaiting = true; + if(( strcmp(clt->registered_not, RMON_RESOURCE_NOT) == 0)) { + /* only send to clients that are registered for the rmon api updates */ + clt->rx_sequence = 0 ; + memset ( clt->msg.tx_buf, 0, RMON_MAX_LEN ); + strcpy( clt->msg.tx_buf, "done_reading_dynamic_file_systems") ; + dlog("sending: %s on socket: %d bytes: %lu \n", clt->msg.tx_buf, clt->msg.tx_sock, strlen(clt->msg.tx_buf)); + rc = sendto (clt->msg.tx_sock, + clt->msg.tx_buf , + strlen ( clt->msg.tx_buf), 0, + (struct sockaddr *) &clt->msg.tx_addr, + sizeof(struct sockaddr_in)); + if ( rc < 0 ) + { + elog ("%s sendto error (%d:%s) (%s) (%s)\n", + clt->client_name, + errno , strerror(errno), + clt->msg.tx_buf, + inet_ntoa(clt->msg.tx_addr.sin_addr)); + clt->send_err_cnt++ ; + } + else + { + mlog ("%s\n", &clt->msg.tx_buf[0] ); + clt->waiting = false; + clt->send_err_cnt = 0; + clt->send_msg_count++ ; + rc = PASS ; + } + /* + * In certain rare instances, the UDP response packet + * sent back to the rmon client (over localhost), may + * be lost, resulting in the rmon client waiting indefinately + * (or until timeout). As a fail-safe, we will also set an + * the acknowledgement flag file that the client can + * look at on timeout + */ + daemon_log(RESPONSE_RMON_RESOURCE_NOT, ""); + } + } + return (rc); +} + +/* send rmon resource set and clear alarm messages to registered client processes */ +int rmon_send_request ( resource_config_type * ptr, int clients) +{ + dlog("%s, number of clients: %d\n", ptr->resource, clients); + int rc = FAIL ; + int total_clients = clients; + + for (int j=0; jwaiting = true; + + dlog("registered notification client: %s\n", clt->registered_not); + if(( strcmp(clt->registered_not, ptr->resource) == 0) || ( strcmp(clt->registered_not, ALL_USAGE) == 0)) { + /* only send to clients that are registered for the resource type in question */ + clt->rx_sequence = 0 ; + memset ( clt->msg.tx_buf, 0, RMON_MAX_LEN ); + sprintf ( clt->msg.tx_buf, "%s %u", ptr->errorMsg, ++clt->tx_sequence ) ; + mlog( "%s sending: %s on socket: %d bytes: %lu\n", + ptr->resource, + clt->msg.tx_buf, + clt->msg.tx_sock, + strlen(clt->msg.tx_buf)); + rc = sendto (clt->msg.tx_sock, + clt->msg.tx_buf , + strlen ( clt->msg.tx_buf), 0, + (struct sockaddr *) &clt->msg.tx_addr, + sizeof(struct sockaddr_in)); + if ( rc < 0 ) + { + elog ("%s %s sendto error (%d:%s) rc: (%d) (%s) (%s)\n", + ptr->resource, + clt->client_name, + errno , strerror(errno), + rc, + clt->msg.tx_buf, + inet_ntoa(clt->msg.tx_addr.sin_addr)); + clt->send_err_cnt++ ; + if (clt->send_err_cnt >= MAX_ERR_CNT) { + /* assume the client process is killed, deregister the client */ + ilog("%s client process: %s is not responding, deregistering it \n", ptr->resource, clt->client_name); + total_clients = delete_client(clients, j); + update_total_clients(total_clients); + } + } + else + { + mlog ("%s %s\n", ptr->resource, &clt->msg.tx_buf[0] ); + clt->waiting = false; + clt->send_err_cnt = 0; + clt->send_msg_count++ ; + rc = PASS ; + } + } + } + return (rc); +} + +/* send rmon interface resource set and clear alarm messages to registered client processes */ +int send_interface_msg ( interface_resource_config_type * ptr, int clients) +{ + int rc = FAIL ; + int total_clients = clients; + + for (int j=0; jwaiting = true; + if(( strcmp(clt->registered_not, ptr->resource) == 0) || ( strcmp(clt->registered_not, ALL_USAGE) == 0)) { + /* only send to clients that are registered for the resource type in question */ + clt->rx_sequence = 0 ; + memset ( clt->msg.tx_buf, 0, RMON_MAX_LEN ); + sprintf ( clt->msg.tx_buf, "%s %u", ptr->errorMsg, ++clt->tx_sequence ) ; + mlog("sending: %s on socket: %d bytes: %lu\n", + clt->msg.tx_buf, + clt->msg.tx_sock, + strlen(clt->msg.tx_buf)); + rc = sendto (clt->msg.tx_sock, + clt->msg.tx_buf , + strlen ( clt->msg.tx_buf), 0, + (struct sockaddr *) &clt->msg.tx_addr, + sizeof(struct sockaddr_in)); + if ( 0 >= rc ) + { + elog ("%s sendto error (%d:%s) (%s) (%s)\n", + clt->client_name, + errno , strerror(errno), + clt->msg.tx_buf, + inet_ntoa(clt->msg.tx_addr.sin_addr)); + clt->send_err_cnt++ ; + if (clt->send_err_cnt >= MAX_ERR_CNT) { + /* assume the client process is killed, deregister the client */ + ilog("client process: %s is not responding, deregistering it \n", clt->client_name); + total_clients = delete_client(clients, j); + update_total_clients(total_clients); + } + } + else + { + mlog ("%s\n", &clt->msg.tx_buf[0] ); + clt->waiting = false; + clt->send_err_cnt = 0; + clt->send_msg_count++ ; + rc = PASS ; + } + } + } + return (rc); +} diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/Makefile b/mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/Makefile new file mode 100644 index 00000000..3aa3451b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/Makefile @@ -0,0 +1,32 @@ +# +# Copyright (c) 2014-2015 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +SRCS = rmon_resource_notify.cpp +OBJS = $(SRCS:.cpp=.o) +LDLIBS = -lstdc++ -lrmonapi -lrt +INCLUDES = -I../../common -I../../daemon -I../ -I. +CCFLAGS = -g -O2 -Wall -Wextra -Werror + +STATIC_ANALYSIS_TOOL = cppcheck +STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) + +all: build + +.cpp.o: + $(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@ + +static_analysis: +ifeq ($(STATIC_ANALYSIS_TOOL_EXISTS), 1) + $(STATIC_ANALYSIS_TOOL) --language=c++ --enable=warning -U__AREA__ -DWANT_FIT_TESTING *.cpp *.h +else + echo "Warning: '$(STATIC_ANALYSIS_TOOL)' static analysis tool not installed ; bypassing ..." +endif + +build: clean static_analysis $(OBJS) + $(CXX) $(CCFLAGS) $(OBJS) -L../../common -L../../daemon -L../rmonApi/ $(LDLIBS) -o rmon_resource_notify + +clean: + @rm -f $(OBJ) rmon_resource_notify *.o *.a diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/rmon_resource_notify.cpp b/mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/rmon_resource_notify.cpp new file mode 100644 index 00000000..5509fb2e --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/rmon_resource_notify.cpp @@ -0,0 +1,550 @@ +/* + * Copyright (c) 2013-2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River CGTS Platform Resource Monitor Resource Notify + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +#include "../rmonApi/rmon_nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ +#include "rmon_resource_notify.h" + +extern "C" +{ +#include "../rmonApi/rmon_api.h" +} +#define LOOPBACK_IP "127.0.0.1" +#define RX_PORT (2304) + +static char my_hostname [MAX_HOST_NAME_SIZE+1]; +static rmon_socket_type rmon_sock ; +static rmon_socket_type * sock_ptr ; + +/** Client Config mask */ +#define CONFIG_CLIENT_MASK (CONFIG_AGENT_PORT |\ + CONFIG_CLIENT_API_PORT |\ + CONFIG_CLIENT_PORT) + +/****************************/ +/* Initialization Utilities */ +/****************************/ + +/* Initialize the unicast api response message */ +/* One time thing ; tx same message all the time. */ +int rmon_message_init ( void ) +{ + /* Build the transmit api response message */ + memset ( &sock_ptr->tx_message, 0, sizeof (rmon_message_type)); + memcpy ( &sock_ptr->tx_message.m[RMON_HEADER_SIZE], my_hostname, strlen(my_hostname)); + return (PASS); +} + +int rmon_socket_init ( int port, const char * process_name ) +{ + + int on = 1 ; + int rc = PASS ; + + CREATE_NONBLOCK_INET_UDP_RX_SOCKET ( LOOPBACK_IP, + port, + rmon_sock.rmon_api_sock, + rmon_sock.rmon_api_addr, + rmon_sock.rmon_api_port, + rmon_sock.rmon_api_len, + "rmon api socket receive", + rc ); + if ( rc ) return (rc) ; + + /* Open the monitoring socket */ + rmon_sock.rmon_socket = resource_monitor_initialize ( process_name, port, RMON_RESOURCE_NOT ); + //ilog("Resource Monitor API Socket %d\n", rmon_sock.rmon_socket); + if ( 0 > rmon_sock.rmon_socket ) + { + close_syslog(); + return (FAIL); + } + + /* Make the socket non-blocking */ + rc = ioctl(rmon_sock.rmon_socket, FIONBIO, (char *)&on); + if ( 0 > rc ) + { + //elog("Failed to set rmon socket non-blocking (%d:%m)\n", errno ); + return (FAIL_SOCKET_NOBLOCK); + } + return (PASS); +} + + +int daemon_init (int port, const char * process_name ) +{ + int rc = PASS ; + + /* Initialize socket construct and pointer to it */ + memset ( &rmon_sock, 0, sizeof(rmon_sock)); + sock_ptr = &rmon_sock ; + + /* Setup the resmon api rx messaging sockets */ + if ( (rc = rmon_socket_init (port, process_name)) != PASS ) + { + // elog("socket initialization failed (rc:%d)\n", rc); + rc = FAIL_SOCKET_INIT; + } + return (rc); +} + +#define RMON_MAX_LEN (100) +int client_service_inbox () +{ +#define MAX_T 100 + int bytes = 0 ; + char buf[RMON_MAX_LEN] ; + socklen_t len = sizeof(struct sockaddr_in) ; + char str[RMON_MAX_LEN]; + int rc = FAIL; + + do + { + memset ( buf,0,RMON_MAX_LEN); + memset ( str,0,RMON_MAX_LEN); + + bytes = recvfrom( rmon_sock.rmon_socket, buf, RMON_MAX_LEN, 0, (struct sockaddr *)&rmon_sock.client_sockAddr, &len); + if ( bytes > 0 ) + { + sscanf ( buf, "%99s", str); + if ( str[0] != '\0' ) + { + if ( strcmp(str, RMON_DONE) == 0) + { + return (PASS); + } + } + return (FAIL); + } + else if (( 0 > bytes ) && ( errno != EINTR ) && ( errno != EAGAIN )) + { + //ilog("problem with test client recv \n"); + } + } while ( bytes > 0 ) ; + + return rc; +} + +/* Maximum length of the dynamic resource list */ +#define DYNAMIC_RESOURCE_MAX (1024) + +int main ( int argc, char *argv[] ) +{ + int rc; + int port = RX_PORT; + const char * process_name = PROCESS_NAME; + char res_name[30]; + char state[20]; + char mount[50]; + char type[20]; + char device[50]; + char volume_group[50]; + string delimiter = ","; + unsigned long long timeout = DEFAULT_RESPONSE_TIMEOUT; + char dynamic_res[DYNAMIC_RESOURCE_MAX]; + char resource_name [50]; + struct stat fileInfo; + struct timespec start, stop; + struct flock fl; + int fd; + bool toNotify = false; + vector dynamic_resources; + size_t pos; + string token; + + open_syslog(); + + memset ((char *)&fileInfo, 0 , sizeof(fileInfo)); + memset(&res_name[0], 0, sizeof(res_name)); + memset(&state[0], 0, sizeof(state)); + memset(&mount[0], 0, sizeof(mount)); + memset(&type[0], 0, sizeof(type)); + memset(&device[0], 0, sizeof(device)); + memset(&volume_group[0], 0, sizeof(volume_group)); + + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + fl.l_pid = getpid(); + + if ((argc > 1) && (strcmp(argv[1],"--help") == 0)) { + printf("usage: rmon_resource_notify " + "--resource-name " + "--resource-state " + "--resource-type " + "--device " + "--mount-point " + "--volume_group " + "--timeout \n"); + close_syslog(); + return FAIL; + } + + for (int i=0; i(mmap(0, fileInfo.st_size, PROT_READ, MAP_SHARED, fd, 0)); + if (map == MAP_FAILED) + { + close(fd); + // elog("Error mmapping the file"); + close_syslog(); + return (FAIL); + } + string oldFile(map); + /* extract the resource name */ + sscanf(dynamic_res, "%49s", resource_name); + string newResource(resource_name); + string updatedResource(dynamic_res); + dynamic_resources.clear(); + + if ( oldFile.find(updatedResource) == string::npos ) + { + if ( oldFile.find(newResource) != string::npos ) + { + /* the resource exists, update it in the file */ + while ((pos = oldFile.find(delimiter)) != string::npos) + { + /* separate the resources from the file */ + token = oldFile.substr(0, pos); + if (token.find(newResource) == string::npos) + { + dynamic_resources.push_back(token); + } + oldFile.erase(0, pos + delimiter.length()); + } + oldFile = ""; + for (unsigned int i=0; i(mmap(0, textsize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)); + if (map == MAP_FAILED) + { + close(fd); + // elog("Error mmapping the file"); + close_syslog(); + return (FAIL); + } + + /* write the resource into memory */ + memcpy(map, text.c_str(), textsize); + + /* write the updated list to the disk */ + if (msync(map, textsize, MS_SYNC) == -1) + { + ; // elog("Could not sync the file to disk"); + } + + + /* free the mmapped memory */ + if (munmap(map, textsize) == -1) + { + /* unlock the file */ + fl.l_type = F_UNLCK; + fcntl(fd, F_SETLK, &fl); + // elog("Error un-mmapping the file"); + close_syslog(); + return (FAIL); + } + } + close(fd); + /* unlock the file */ + fl.l_type = F_UNLCK; + fcntl(fd, F_SETLK, &fl); + if (!toNotify) + { + close_syslog(); + return (PASS); + } + + /* Check to see if rmond is running */ + rc = system("pidof rmond"); + if (WEXITSTATUS(rc) != 0) + { + return (PASS); + } + + rc = daemon_init(port, process_name); + if (rc == PASS) { + + if( clock_gettime( CLOCK_MONOTONIC, &start) == -1 ) { + // elog("clock gettime \n" ); + close_syslog(); + return (FAIL); + } + + rmon_message_init(); + rmon_sock.rmon_socket = resource_monitor_get_sel_obj (); + std::list socks; + socks.clear(); + socks.push_front ( rmon_sock.rmon_socket ); + socks.sort(); + // remove the rmon resource notify flag file + // as this will be reset by rmon + remove (RESPONSE_RMON_RESOURCE_NOT); + /* signal to rmon that the dynamic file has been written */ + rc = rmon_notification ( RMON_RESOURCE_NOT ); + + for ( ; ; ) + { + /* Initialize the timeval struct */ + rmon_sock.waitd.tv_sec = 0; + rmon_sock.waitd.tv_usec = SELECT_TIMEOUT * 100; + + /* Initialize the master fd_set */ + FD_ZERO(&rmon_sock.readfds); + FD_SET(rmon_sock.rmon_socket, &rmon_sock.readfds); + + rc = select( socks.back()+1, + &rmon_sock.readfds, NULL, NULL, + &rmon_sock.waitd); + + /* If the select time out expired then */ + if (( rc < 0 ) || ( rc == 0 )) + { + /* Check to see if the select call failed. */ + /* ... but filter Interrupt signal */ + if (( rc < 0 ) && ( errno != EINTR )) + { + //ilog("Socket Select Failed (rc:%d) %s \n", errno, strerror(errno)); + } + } + + if ( FD_ISSET(rmon_sock.rmon_socket, &rmon_sock.readfds)) + { + rc = client_service_inbox(); + + if (rc == PASS) { + close_syslog(); + return PASS; + } + } + + if ( clock_gettime( CLOCK_MONOTONIC, &stop) == -1 ) { + // elog("clock gettime\n"); + return (FAIL); + } + unsigned long delta = (stop.tv_sec - start.tv_sec) * 1000 + (stop.tv_nsec - start.tv_nsec) / 1000000; + if (delta > timeout) + { + /* we exceeded the timeout. + * It may have happened that rmon + * sent its acknowledgment but that response + * got lost. In that case check for the flag file + * as a last ditch effort + */ + if (access(RESPONSE_RMON_RESOURCE_NOT, F_OK) != -1) { + close_syslog() + return (PASS); + } + close_syslog(); + return (FAIL); + } + } + } + close_syslog(); + return FAIL; +} + diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/rmon_resource_notify.h b/mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/rmon_resource_notify.h new file mode 100644 index 00000000..74dbba2d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmon_resource_notify/rmon_resource_notify.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2013-2015 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + + /** + * @file + * Wind River CGTS Platform Resource Monitor Resource Notify Header + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nodeBase.h" +#include "nodeUtil.h" + +/** Maximum service fail count before action */ +#define MAX_FAIL_COUNT (1) + +#define RMON_HEADER_SIZE (15) +#define RMON_MAX_MSG (50) +#define MAX_COUNT (3) +#define SELECT_TIMEOUT (100) +#define DEFAULT_RESPONSE_TIMEOUT (120 * 1000) // 2mins + +/* default process name if none is specified */ +#define PROCESS_NAME ((const char *)"rmonResourceNotify") + +#define RMON_DONE ((const char *)"done_reading_dynamic_file_systems") +#define DYNAMIC_FS_FILE ((const char *)"/etc/rmonfiles.d/dynamic.conf") +#define RMON_RESOURCE_NOT ((const char *)"read_dynamic_file_system") +#define RESPONSE_RMON_RESOURCE_NOT ((const char *)"/var/run/.dynamicfs_registered") + +typedef struct +{ + /** Message buffer */ + char m [RMON_MAX_MSG]; + + /** Sequence number */ + unsigned int s ; + + /* Fast Lookup Clue Info */ + unsigned int c ; + + /* Status Flags */ + unsigned int f ; + + /* reserved for future use */ + unsigned int r ; + +} rmon_message_type ; + +/** rmon resource notify socket control structure */ +typedef struct +{ + + struct sockaddr_in client_addr ; + socklen_t client_addr_len ; + + /** Unix domain socket used to transmit on-node event messages + * to from other local services such as rmon */ + int send_event_socket ; + struct sockaddr_un agent_domain ; + socklen_t agent_domain_len ; + + /** rmon api Socket using UDP Inet over 'lo' interface */ + int rmon_api_sock ; /**< receive rmon pulses socket */ + int rmon_api_port ; /**< the port */ + struct sockaddr_in rmon_api_addr ; /**< attributes */ + socklen_t rmon_api_len ; /**< length */ + + int rmon_socket ; /**< Active monitor socket */ + /** The addr and port are stored in the shared librmonapi.so library */ + + struct sockaddr_in client_sockAddr ; /**< Client socket attributes */ + socklen_t agentLen ; /**< Agent socket attr struct len */ + socklen_t clientLen ; /**< Client socket attr struct len */ + int tx_socket ; /**< general transmit socket ID */ + int rx_socket ; /**< general receive socket ID */ + rmon_message_type tx_message ; /**< transmit message */ + rmon_message_type rx_message ; /**< receive message */ + int rmon_client_port ; + int fail_count ; /**< Socket retry thresholding */ + + /* For select dispatch */ + struct timeval waitd ; + fd_set readfds; + msgSock_type mtclogd ; + +} rmon_socket_type ; + + diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/cinder_virtual_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/cinder_virtual_resource.conf new file mode 100644 index 00000000..03d19dff --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/cinder_virtual_resource.conf @@ -0,0 +1,27 @@ +[resource] +resource = Cinder LVM Thinpool Usage +debounce = 20 ; number of seconds to wait before degrade clear +severity = critical ; minor, major, critical +minor_threshold = 70 ; minor Cinder LVM Thinpool utilization threshold percentage +major_threshold = 80 ; major Cinder LVM Thinpool utilization threshold percentage +critical_threshold = 90 ; critical Cinder LVM Thinpool utilization threshold percentage (use 101 if unused) +minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 +major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 +critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) +minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 +major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 +critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 +num_tries = 2 ; number of tries before the alarm is raised +alarm_on = 1 ; 1 for alarm on, 0 for alarm off +percent = 1 ; 1 for percentage used, 0 for absolute value (Cinder LVM Thinpool available in MiB) (default is 1) + +[thinpool_metadata] +vg_name = cinder-volumes ; name of parent VG +thinpool_name = cinder-volumes-pool ; name of LV Thin Pool to monitor +critical_threshold = 80 ; critical alarm threshold percentage for metadata utilization +alarm_on = 1 ; 1 to raise critical alarm on threshold, 0 not to raise alarm +autoextend_on = 1 ; 1 to first try extending the metadata on threshold, 0 for autoextend off. +autoexent_by = 20 ; autoextend by a percentage or absolute value in MiB +autoextend_percent = 1 ; percent or MiB +audit_period = 10 ; interval to performa the audit + diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/cpu_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/cpu_resource.conf new file mode 100644 index 00000000..dc0ab8dd --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/cpu_resource.conf @@ -0,0 +1,16 @@ +[resource] +resource = Platform CPU Usage +debounce = 20 ; number of seconds to wait before degrade clear +severity = critical ; minor, major, critical +minor_threshold = 80 ; minor cpu utilization threshold percentage +major_threshold = 90 ; major cpu utilization threshold percentage +critical_threshold = 95 ; critical cpu utilization threshold percentage (use 101 if unused) +minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 +major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 +critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 +minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 +major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 +critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 +num_tries = 2 ; number of tries before the alarm is raised +alarm_on = 1 ; 1 for alarm on, 0 for alarm off +percent = 1 ; Always use 1 for this resource (thresholds by percentage) diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/filesystem_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/filesystem_resource.conf new file mode 100644 index 00000000..e8496b54 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/filesystem_resource.conf @@ -0,0 +1,16 @@ +[resource] +resource = Platform Filesystem Usage +debounce = 20 ; number of seconds to wait before degrade clear +severity = critical ; minor, major, critical +minor_threshold = 70 ; minor filesystem utilization threshold percentage +major_threshold = 80 ; major filesystem utilization threshold percentage +critical_threshold = 90 ; critical filesystem utilization threshold percentage (use 101 if unused) +minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 +major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 +critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) +minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 +major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 +critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 +num_tries = 2 ; number of tries before the alarm is raised +alarm_on = 1 ; 1 for alarm on, 0 for alarm off +percent = 1 ; 1 for percentage used, 0 for absolute value (file system available in MiB) (default is 1) diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/infrastructure_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/infrastructure_resource.conf new file mode 100644 index 00000000..ca0956ac --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/infrastructure_resource.conf @@ -0,0 +1,6 @@ +[resource] +resource = infra +debounce = 20 ; number of seconds to wait before degrade clear +severity = critical ; minor, major, critical +num_tries = 2 ; number of tries before the alarm is raised +alarm_on = 1 ; 1 for alarm on, 0 for alarm off diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/instance_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/instance_resource.conf new file mode 100644 index 00000000..0b2d004c --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/instance_resource.conf @@ -0,0 +1,16 @@ +[resource] +resource = Platform Nova Instances +debounce = 20 ; number of seconds to wait before degrade clear +severity = critical ; minor, major, critical +minor_threshold = 80 ; minor memory utilization threshold percentage +major_threshold = 90 ; major memory utilization threshold percentage +critical_threshold = 95 ; critical memory utilization threshold percentage (use 101 if unsed) +minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 +major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 +critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) +minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 +major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 +critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 +num_tries = 2 ; number of tries before the alarm is raised +alarm_on = 1 ; 1 for alarm on, 0 for alarm off +percent = 1 ; 1 for percentage used, 0 for absolute value (memory available in MiB) (default is 1) diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/management_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/management_resource.conf new file mode 100644 index 00000000..f7f3a513 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/management_resource.conf @@ -0,0 +1,6 @@ +[resource] +resource = mgmt +debounce = 20 ; number of seconds to wait before degrade clear +severity = critical ; minor, major, critical +num_tries = 2 ; number of tries before the alarm is raised +alarm_on = 1 ; 1 for alarm on, 0 for alarm off diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/memory_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/memory_resource.conf new file mode 100644 index 00000000..926e28cd --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/memory_resource.conf @@ -0,0 +1,16 @@ +[resource] +resource = Platform Memory Usage +debounce = 20 ; number of seconds to wait before degrade clear +severity = critical ; minor, major, critical +minor_threshold = 70 ; minor memory utilization threshold percentage +major_threshold = 80 ; major memory utilization threshold percentage +critical_threshold = 90 ; critical memory utilization threshold percentage (use 101 if unsed) +minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 +major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 +critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) +minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 +major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 +critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 +num_tries = 2 ; number of tries before the alarm is raised +alarm_on = 1 ; 1 for alarm on, 0 for alarm off +percent = 1 ; 1 for percentage used, 0 for absolute value (memory available in MiB) (default is 1) diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/nova_virtual_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/nova_virtual_resource.conf new file mode 100644 index 00000000..da968712 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/nova_virtual_resource.conf @@ -0,0 +1,16 @@ +[resource] +resource = Nova LVM Thinpool Usage +debounce = 20 ; number of seconds to wait before degrade clear +severity = critical ; minor, major, critical +minor_threshold = 70 ; minor Nova LVM Thinpool threshold percentage +major_threshold = 80 ; major Noval LVM Thinpool utilization threshold percentage +critical_threshold = 90 ; critical Nova LVM Thinpool utilization threshold percentage (use 101 if unused) +minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 +major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 +critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) +minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 +major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 +critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 +num_tries = 2 ; number of tries before the alarm is raised +alarm_on = 1 ; 1 for alarm on, 0 for alarm off +percent = 1 ; 1 for percentage used, 0 for absolute value (Nova LVM Thinpool available in MiB) (default is 1) diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/oam_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/oam_resource.conf new file mode 100644 index 00000000..d1b8692f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/oam_resource.conf @@ -0,0 +1,6 @@ +[resource] +resource = oam +debounce = 20 ; number of seconds to wait before degrade clear +severity = critical ; minor, major, critical +num_tries = 2 ; number of tries before the alarm is raised +alarm_on = 1 ; 1 for alarm on, 0 for alarm off diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/query_ntp_servers.sh b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/query_ntp_servers.sh new file mode 100644 index 00000000..f48768b6 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/query_ntp_servers.sh @@ -0,0 +1,179 @@ +#!/bin/bash +# +# Copyright (c) 2015-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Return values; +# 0 - All NTP servers are reachable and one is selected +# 1 - No NTP servers are provisioned +# 2 - None of the NTP servers are reachable +# 3 - Some NTP servers reachable and one is selected +# 4 - Some NTP servers reachable but none is selected +# +# This script execute ntpq -np to determined which provisioned servers +# reachable. A server will be considered reachable only when the Tally Code +# is a * or a +. Also the controller node will not be considered a reachable +# server +# +# Here is an example of the ntpq command +# remote refid st t when poll reach delay offset jitter +# ============================================================================== +# +192.168.204.104 206.108.0.133 2 u 203 1024 377 0.226 -3.443 1.137 +# +97.107.129.217 200.98.196.212 2 u 904 1024 377 21.677 5.577 0.624 +# 192.95.27.155 24.150.203.150 2 u 226 1024 377 15.867 0.381 1.124 +# -97.107.129.217 200.98.196.212 2 u 904 1024 377 21.677 5.577 0.624 +# *182.95.27.155 24.150.203.150 2 u 226 1024 377 15.867 0.381 1.124 +# +# The temporary file /tmp/ntpq_server_info" will look something like this. +# - The first line contains all the external NTP servers configured (it excludes the +# peer controller). +# - The second line lists all the unreachable external NTP servers. It also excludes +# the peer controller. +# +# more /tmp/ntpq_server_info +# 10.10.10.42;10.10.10.43;10.10.10.44; +# 10.10.10.43; +# +# This file is re-created everytime the this script is run. It is used by caller of +# the script to get more detail regarding the NTP servers status. +# +# This script logs to user.log + +NTP_OK=0 +NTP_NOT_PROVISIONED=1 +NTP_NONE_REACHABLE=2 +NTP_SOME_REACHABLE=3 +NTP_SOME_REACHABLE_NONE_SELECTED=4 + +# is it the ip address of a controller node +isController () +{ + host=$1 + res=$(echo $(grep $host /etc/hosts) | grep controller) + + if [[ "$res" != "" ]] ; then + logger -p info -t $0 "$host is a controller" + return 1 + else + return 0 + fi +} + +# set up ouput file +ntpq_server_info="/tmp/ntpq_server_info" +rm -f $ntpq_server_info + +# find out if there is any servers provisioned +server_count=$(cat /etc/ntp.conf | awk '{print $1;}' | grep -c '^server') +bad_server_count=0 + +# exit if there is no servers provisioned +if [ $server_count -eq 0 ]; then + logger -p info -t $0 "No NTP servers are provisioned (1)" + exit $NTP_NOT_PROVISIONED +fi + +# query the ntp servers with ntpq +ntpres="$(ntpq -pn)" + +echo -e "\n Results from 'ntpq -pn' command \n $ntpres \n" | logger -p info -t $0 + +# keep for debugging +# the first argument is a filename and used instead of using ntpq +#if [ "$1" ]; then +# ntpres=$(cat $1) +#fi + +# keep for debugging hard code the filname istead of passing it as an argument +#ntpres=$(cat "/home/wrsroot/test") +#server_count=3 +# + +# remove the header lines +server_list=$( echo "$ntpres" | tail -n +3 ) + +# loop through and find non reachable servers +# a server is reachable with its prepended with a "*" or a "+" + +SAVEIFS=$IFS +IFS='' + +# list all provisioned servers and save in temp file +while read line +do + server=$(echo $line | awk '{print $1;}') + if [[ "$line" != " "* ]] ; then + # if the first char is not a space then remove it e.g +159.203.31.244 + server=$(echo $server| cut -c 2-) + fi + + # add provisioned ntp server to temp file if not the controller + isController $server + if [[ "$?" == 0 ]]; then + echo -n $server";" >> $ntpq_server_info + fi +done < <(echo "$server_list") + +echo >> $ntpq_server_info + +# list all non reachable ntp servers and save in temp file +while read line +do + if [[ "$line" != "*"* ]] && [[ "$line" != "+"* ]] ;then + + server=$(echo $line | awk '{print $1;}') + if [[ "$line" != " "* ]] ; then + # if the first char is not a space then remove it e.g +159.203.31.244 + server=$(echo $server| cut -c 2-) + fi + + # add the non reachable ntp servers to temp file is its not a controller + isController $server + if [[ "$?" == 0 ]]; then + ((bad_server_count++)) + echo -n $server";" >> $ntpq_server_info + fi + fi +done < <(echo "$server_list") +IFS=$SAVEIFS + +# +logger -p info -t $0 Total number of provisioned servers $server_count +logger -p info -t $0 Total number of unreachable servers $bad_server_count +# + +# check if there is a "*" which represent a selected server +# there should be only one but handling multiple +selected=$(echo "$server_list" | grep -c '^*') + +if [ "$bad_server_count" -eq 0 ];then + if [ $selected -eq 0 ]; then + logger -p info -t $0 "All external NTP servers are reachable but none is selected (4)" + exit $NTP_SOME_REACHABLE_NONE_SELECTED + else + logger -p info -t $0 "All external NTP servers are reachable and one is selected (0)" + exit $NTP_OK + fi +fi + +# it does not matter if the peer controller is the server selected, if all the +# external NTP servers are not reachable then we return NTP_NONE_REACHABLE +if [ "$bad_server_count" -eq "$server_count" ];then + logger -p info -t $0 "None of the external NTP servers are reachable (2)" + exit $NTP_NONE_REACHABLE +fi + +if [ "$bad_server_count" -lt "$server_count" ];then + if [ $selected -eq 0 ]; then + # this is probably not a valid scenario + logger -p info -t $0 "Some external NTP servers are reachable but none is selected (4)" + exit $NTP_SOME_REACHABLE_NONE_SELECTED + else + logger -p info -t $0 "Some external NTP servers are not reachable and one selected (3)" + exit $NTP_SOME_REACHABLE + fi +fi + +logger -p err -t $0 "Should not exit here" +exit -1 \ No newline at end of file diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/remotelogging_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/remotelogging_resource.conf new file mode 100644 index 00000000..e8540c42 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/remotelogging_resource.conf @@ -0,0 +1,6 @@ +[resource] +resource = Remote Logging Connectivity +debounce = 20 ; number of seconds to wait before degrade clear +severity = minor ; minor, major, critical +num_tries = 2 ; number of tries before the alarm is raised +alarm_on = 1 ; 1 for alarm on, 0 for alarm off diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon new file mode 100644 index 00000000..241b4dea --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon @@ -0,0 +1,97 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 95 95 +# +### BEGIN INIT INFO +# Provides: rmon +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Resource Monitor daemon +### END INIT INFO + +# echo "7:3:respawn:/usr/local/bin/rmond" >> /etc/inittab + +. /etc/init.d/functions + +DAEMON_NAME="rmond" +DAEMON="/usr/local/bin/${DAEMON_NAME}" +IFACE="" + +if [ ! -e "$DAEMON" ] ; then + logger "$DAEMON is missing" + exit 1 +fi + +RETVAL=0 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +case "$1" in + start) + # Prevent multipe starts + if [ -e /var/lock/subsys/${DAEMON_NAME} ] ; then + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + exit $RETVAL + else + rm -f /var/lock/subsys/${DAEMON_NAME} + fi + fi + echo -n "Starting $DAEMON_NAME: " + start-stop-daemon --start -b -x ${DAEMON} -- + RETVAL=$? + if [ $RETVAL -eq 0 ] ; then + echo "OK" + touch /var/lock/subsys/${DAEMON_NAME} + else + echo "FAIL" + fi + ;; + + stop) + echo -n "Stopping ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + killproc ${DAEMON_NAME} + fi + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo "FAIL" + else + echo "OK" + + # remove lockfile + rm -f /var/lock/subsys/${DAEMON_NAME} + fi + ;; + + restart) + $0 stop + sleep 1 + $0 start + ;; + + status) + pid=`pidof ${DAEMON_NAME}` + RETVAL=$? + if [ ${RETVAL} -eq 0 ] ; then + echo "${DAEMON_NAME} is running" + else + echo "${DAEMON_NAME} is NOT running" + fi + ;; + + condrestart) + [ -f /var/lock/subsys/${DAEMON_NAME} ] && $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit $RETVAL diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.conf new file mode 100644 index 00000000..b0df981d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.conf @@ -0,0 +1,21 @@ +[process] ; The block label +process = rmond ; The name of the process to be monitored. +service = rmon ; The name of the service known to systemd +pidfile = /var/run/rmond.pid ; The path to process pidfile +script = /etc/init.d/rmon ; The path and restart script file name +style = lsb ; The type of script file. Only lsb is supported +severity = major ; Process failure severity + ; critical : host is failed + ; major : host is degraded + ; minor : log is generated +restarts = 3 ; Number of back to back unsuccessful restarts before severity assertion +interval = 10 ; Number of seconds to wait between back-to-back unsuccessful restarts +debounce = 20 ; Number of seconds the process needs to run before declaring + ; it as running O.K. after a restart. + ; Time after which back-to-back restart count is cleared. +startuptime = 5 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active: heartbeat monitoring, i.e. request / response messaging + + diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.logrotate b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.logrotate new file mode 100755 index 00000000..88d75c98 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.logrotate @@ -0,0 +1,43 @@ +#daily +nodateext + +/var/log/rmond.log +{ + nodateext + size 100M + start 1 + missingok + rotate 20 + compress + postrotate + sharedscripts + postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true + endscript +} + +/var/log/rmond_notify.log +{ + size 10M + start 1 + missingok + rotate 10 + compress + sharedscripts + postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true + endscript +} + +/var/log/rmond.dump +{ + size 10M + start 1 + rotate 10 + compress + missingok + sharedscripts + postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true + endscript +} diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.service b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.service new file mode 100644 index 00000000..5fd53ed3 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon.service @@ -0,0 +1,23 @@ +[Unit] +Description=Titanium Cloud Maintenance Resource Monitor +After=network.target syslog.service config.service +Before=pmon.service + +[Service] +Type=forking +ExecStart=/etc/rc.d/init.d/rmon start +ExecStop=/etc/rc.d/init.d/rmon stop +ExecReload=/etc/rc.d/init.d/rmon reload +PIDFile=/var/run/rmond.pid + +# Failure handling +TimeoutStartSec=10s +TimeoutStopSec=10s + +# Auto recovery by pmond +Restart=no +RestartSec=10 + +[Install] +WantedBy=multi-user.target + diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon_reload_on_cpe.sh b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon_reload_on_cpe.sh new file mode 100644 index 00000000..2548c9ab --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmon_reload_on_cpe.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# +# Copyright (c) 2015-2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +source "/etc/init.d/log_functions.sh" + +# is it a compute subfunction on a CPE system +isCompute () +{ + [ -f /etc/platform/platform.conf ] || return 0 + res=$(grep "subfunction" /etc/platform/platform.conf | grep "controller,compute" | wc -l) + + if [ "$res" -eq 0 ] ; then + return 0 + else + return 1 + fi +} + +# only reload rmon if it is a CPE system +isCompute + +if [[ "$?" -eq 0 ]]; then + log "Cannot run on a non CPE system." + exit 0 +fi + +if [ ! -f /var/run/.compute_config_complete ]; then + log "Cannot run prior to compute configuration complete." + exit 0 +fi + +################################################################################################# +# Temporarily switch this to a process kill instead of reload due to a problem found +# in the rmon config reload handling. A clone Jira was created to track the fix that will migrate +# this back to a reload. +################################################################################################# +# rc=`pkill -hup rmond` +# log "rmond config reload (rc=$rc)" + +/usr/local/sbin/pmon-restart rmond +logger "requesting graceful rmon restart in goenabled test on cpe" + +exit 0 diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmond.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmond.conf new file mode 100755 index 00000000..c5f5689f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/rmond.conf @@ -0,0 +1,34 @@ +; CGTS Resource Monitor Configuration File +[config] ; Configuration +audit_period = 30 ; Resource polling period in seconds (1 - 120) +pm_period = 300 ; Period For posting PMs to Ceilometer +ntp_audit_period = 600 ; Resource polling period for querying NTP servers in seconds (10 - 1200) +ntpq_cmd_timeout = 60 ; Max amount of time in seconds to wait for the ntpq command to complete + +rmon_tx_port = 2101 ; Transmit Event and Command Reply Port +per_node = 0 ; enable (1) or disable (0) memory checking per processor node +rmon_api_tx_port = 2300 ; Resource Monitor API tx Port +critical_threshold = 1 ; enable (1) or disable (0) critical threshold checking + +log_step = 20 ; create resource value logs when readings cross this +/- value threshold + ; - this represents highest step for all resources + ; - resources can override to smaller step + +[timeouts] +start_delay = 10 ; managed range 1 .. 120 seconds + +[features] + +[debug] ; SIGHUP to reload +debug_timer = 0 ; enable(1) or disable(0) timer logs (tlog) +debug_msg = 0 ; enable(1) or disable(0) message logs (mlog) +debug_state = 0 ; enable(1) or disable(0) state change logs (clog) +debug_level = 0 ; decimal mask 0..15 (8,4,2,1) +debug_all = 0 ; + +flush = 1 ; enable(1) or disable(0) force log flush (main loop) +flush_thld = 5 ; if enabled - force flush after this number of loops + +debug_event = none ; Not used +debug_filter = none ; Not used +stress_test = 0 ; In-Service Stress test number diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/virtual_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/virtual_resource.conf new file mode 100644 index 00000000..7031d113 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/virtual_resource.conf @@ -0,0 +1,16 @@ +[resource] +resource = Cinder LVM Backend Usage +debounce = 20 ; number of seconds to wait before degrade clear +severity = critical ; minor, major, critical +minor_threshold = 70 ; minor Cinder LVM Backend threshold percentage +major_threshold = 80 ; major Cinder LVM Backend utilization threshold percentage +critical_threshold = 90 ; critical Cinder LVM Backend utilization threshold percentage (use 101 if unused) +minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 +major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 +critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) +minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 +major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 +critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 +num_tries = 2 ; number of tries before the alarm is raised +alarm_on = 1 ; 1 for alarm on, 0 for alarm off +percent = 1 ; 1 for percentage used, 0 for absolute value (Cinder LVM Backend available in MiB) (default is 1) diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/thinmetaHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/rmon/thinmetaHdlr.cpp new file mode 100644 index 00000000..c7184bf8 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/rmon/thinmetaHdlr.cpp @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2017 Wind River Systems, Inc. +* +* SPDX-License-Identifier: Apache-2.0 +* + */ + +/** + * @file + * Wind River Titanium Cloud Platform, LVM Thinpool Metadata Monitor Handler + */ +#include "rmon.h" /* rmon header file */ + +/* Used to set interface alarms through the FM API */ +static SFmAlarmDataT alarmData; + +/******************************************************************************* + * + * Name : _build_entity_instance_id + * + * Purpose : Build the entity instance id needed by our alarm + * + * *****************************************************************************/ +void thinmeta_init(thinmeta_resource_config_type * res, struct mtc_timer * timers, int count) { + if (count > MAX_RESOURCES) { + elog("Thinpool metadata resource 'count' is: %i, maximum number or resources is: %i, " + "initializing count to max!", + count, MAX_RESOURCES); + count = MAX_RESOURCES; + } + + for (int i = 0; i < count; i++) { + + /* Mark first execution after reloading the configuration */ + res[i].first_run = true; + + /* Init timer defaults for this resource */ + mtcTimer_init ( timers[i] ) ; + timers[i].hostname = "localhost" ; + timers[i].service = res[i].thinpool_name ; + timers[i].ring = true; // set it to true for the initial run + + } +} + +/******************************************************************************* + * + * Name : _build_entity_instance_id + * + * Purpose : Build the entity instance id needed by our alarm + * + * *****************************************************************************/ +void _build_entity_instance_id(thinmeta_resource_config_type * ptr, char * entity) { + if (!entity) { + elog("%s/%s pool alarm failed to create entity instance id, 'entity' is NULL!", + ptr->vg_name, ptr->thinpool_name); + return; + } + rmon_ctrl_type * _rmon_ctrl_ptr; + _rmon_ctrl_ptr = get_rmon_ctrl_ptr(); + + snprintf(entity, sizeof(alarmData.entity_instance_id), + "%s.lvmthinpool=%s/%s", _rmon_ctrl_ptr->my_hostname, ptr->vg_name, ptr->thinpool_name); +} + +/******************************************************************************* + * + * Name : _set_thinmeta_alarm + * + * Purpose : Set or clears the threshold alarm + * + * *****************************************************************************/ +void _set_thinmeta_alarm( thinmeta_resource_config_type * ptr) +{ + strcpy(alarmData.uuid, ""); + strcpy(alarmData.entity_type_id ,"system.host"); + _build_entity_instance_id(ptr, alarmData.entity_instance_id); + alarmData.alarm_state = FM_ALARM_STATE_SET; + alarmData.alarm_type = FM_ALARM_OPERATIONAL; + alarmData.probable_cause = FM_ALARM_STORAGE_PROBLEM; + if ( ptr->autoextend_on ) { + snprintf(alarmData.reason_text , sizeof(alarmData.reason_text), + "Metadata usage for LVM thin pool %s/%s " + "exceeded threshold and automatic extension failed; " + "threshold: %u%%, actual: %.2f%%.", + ptr->vg_name, ptr->thinpool_name, + ptr->critical_threshold, ptr->resource_value); + snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), + "Increase Storage Space Allotment for Cinder on the 'lvm' backend. " + "Consult System Administration Manual for more details. " + "If problem persists, contact next level of support."); + } + else { + snprintf(alarmData.reason_text , sizeof(alarmData.reason_text), + "Metadata usage for LVM thin pool %s/%s exceeded threshold; " + "threshold: %u%%, actual: %.2f%%.", + ptr->vg_name, ptr->thinpool_name, ptr->critical_threshold, ptr->resource_value); + snprintf(alarmData.proposed_repair_action , sizeof(alarmData.proposed_repair_action), + "Extend the metadata LV with 'lvextend --poolmetadatasize " + "+M %s/%s'. " + "Consult System Administration Manual for more details. " + "If problem persists, contact next level of support.", + ptr->vg_name, ptr->thinpool_name); + } + alarmData.timestamp = 0; + alarmData.service_affecting = FM_FALSE; + alarmData.suppression = FM_TRUE; + alarmData.severity = FM_ALARM_SEVERITY_CRITICAL; + strcpy(alarmData.alarm_id, THINMETA_ALARM_ID); + + dlog("%s/%s pool exceeding usage threshold, raising alarm\n", ptr->vg_name, ptr->thinpool_name); + int ret = rmon_fm_set(&alarmData, NULL) == FM_ERR_OK; + if (ret == FM_ERR_OK || ret == FM_ERR_ALARM_EXISTS) { + if (!ptr->alarm_raised) { + // log only once to avoid filling logs + ilog("%s/%s pool exceeding usage threshold, alarm raised", ptr->vg_name, ptr->thinpool_name); + ptr->alarm_raised = true; + } + } + else { + elog("Creation of alarm %s for entity instance id: %s failed. Error: %d \n", + alarmData.alarm_id, alarmData.entity_instance_id, ret); + ptr->alarm_raised = false; + } +} + +/***************************************************************************** + * + * Name : _clear_thinmeta_alarm + * + * Purpose : Clear the alarm of the resource passed in + * + *****************************************************************************/ +void _clear_thinmeta_alarm ( thinmeta_resource_config_type * ptr ) +{ + dlog ("%s/%s below threshold, clearing alarm\n", ptr->vg_name, ptr->thinpool_name); + AlarmFilter alarmFilter; + + _build_entity_instance_id (ptr, alarmData.entity_instance_id); + + snprintf(alarmFilter.alarm_id, FM_MAX_BUFFER_LENGTH, THINMETA_ALARM_ID); + snprintf(alarmFilter.entity_instance_id, FM_MAX_BUFFER_LENGTH, alarmData.entity_instance_id); + + int ret = rmon_fm_clear(&alarmFilter); + if (ret == FM_ERR_OK) { + ilog ("Cleared stale alarm %s for entity instance id: %s", + alarmFilter.alarm_id, alarmFilter.entity_instance_id); + ptr->alarm_raised = false; + } + else if (ret == FM_ERR_ENTITY_NOT_FOUND) { + if (!ptr->first_run) { + wlog ("Alarm %s for entity instance id: %s was not found", + alarmFilter.alarm_id, alarmFilter.entity_instance_id); + } + ptr->alarm_raised = false; + } + else { + elog ("Failed to clear stale alarm %s for entity instance id: %s error: %d", + alarmFilter.alarm_id, alarmFilter.entity_instance_id, ret); + ptr->alarm_raised = true; + } +} + +/***************************************************************************** + * + * Name : is_pool_ready + * + * Purpose : Check if an LVM Thin Pool is configured + * Return : PASS/FAIL + * + *****************************************************************************/ +bool is_pool_ready(thinmeta_resource_config_type * ptr) { + char result[BUFFER_SIZE]; + int rc = PASS; + char cmd[BUFFER_SIZE]; + snprintf(cmd, sizeof(cmd), "timeout 2 lvs --noheadings -o vg_name,lv_name --separator / %s/%s", + ptr->vg_name, ptr->thinpool_name); + rc = execute_pipe_cmd(cmd, result, sizeof(result)); + if (rc == 5 || rc == 1) { // ECMD_FAILED or ECMD_PROCESSED + // pool or VG was not found or not ready + return false; + } + else if (rc) { + // unexpected error + elog("%s/%s pool config query failed", ptr->vg_name, ptr->thinpool_name); + wlog("...cmd: '%s' exit status: %i result: '%s'", cmd, rc, result); + return false; + } + return true; +} + +/***************************************************************************** + * + * Name : calculate_metadata_usage + * + * Purpose : Obtain the percentage of used metadata space for a thin pool + * in thin provisioning. + * Return : PASS/FAIL + * + *****************************************************************************/ +int calculate_metadata_usage(thinmeta_resource_config_type * ptr) { + char result[BUFFER_SIZE]; + int rc = PASS; + char meta_usage_cmd[BUFFER_SIZE]; + + snprintf(meta_usage_cmd, sizeof(meta_usage_cmd), + "set -o pipefail; timeout 2 lvs -o metadata_percent --noheadings %s/%s | tr -d ' '", + ptr->vg_name, ptr->thinpool_name); + rc = execute_pipe_cmd(meta_usage_cmd, result, sizeof(result)); + if (rc == 1) { // ECMD_PROCESSED + // sometimes lvs command fail to process, not critical just retry in this case + dlog("%s/%s pool metadata usage query failed\n", ptr->vg_name, ptr->thinpool_name); + dlog("...cmd: '%s' exit status: %i result: '%s'\n", meta_usage_cmd, rc, result); + rc = execute_pipe_cmd(meta_usage_cmd, result, sizeof(result)); + } + if (rc != PASS) { + elog("%s/%s pool metadata usage query failed", ptr->vg_name, ptr->thinpool_name); + wlog("...cmd: '%s' exit status: %i result: '%s'", meta_usage_cmd, rc, result); + return (FAIL); + } + ptr->resource_value = atof(result); + if ( log_value ( ptr->resource_value, + ptr->resource_prev, + DEFAULT_LOG_VALUE_STEP ) ) + { + plog("%s/%s pool metadata usage is: %.2f%%\n", + ptr->vg_name, ptr->thinpool_name, ptr->resource_value); + } + return rc; +} + +/***************************************************************************** + * + * Name : extend_thinpool_metadata + * + * Purpose : Extend the Logical Volume used by LVM Thin Pool metadata + * Return : PASS/FAIL + * + *****************************************************************************/ +int extend_thinpool_metadata(thinmeta_resource_config_type * ptr) { + char result[THINMETA_RESULT_BUFFER_SIZE]; + int rc = PASS; + char cmd[BUFFER_SIZE]; + + dlog(">>> ptr->autoextend_percent: %i", ptr->autoextend_percent); + dlog("%s/%s pool, extending metadata by %i%s\n", ptr->vg_name, ptr->thinpool_name, + ptr->autoextend_by, ptr->autoextend_percent? "%": "MiB"); + if (ptr->autoextend_percent) { + char meta_lv_name[BUFFER_SIZE]; + /* Get metadata LV name + * 'lvextend --poolmetadatasize' parameter is only allowed in MiB not percents. + * For percent we need to rely on 'lvextend -l...%LV', but we first have to get + * the real name of the metadata LV */ + snprintf(cmd, sizeof(cmd), + "set -o pipefail; timeout 2 lvs %s/%s -o metadata_lv --noheadings | " + "tr -d '[] '", + ptr->vg_name, ptr->thinpool_name); + rc = execute_pipe_cmd(cmd, meta_lv_name, sizeof(meta_lv_name)); + if (rc != PASS) { + elog("%s/%s pool metadata name query failed. Aborting auto extend.", + ptr->vg_name, ptr->thinpool_name); + return (FAIL); + } + dlog("%s/%s pool metadata LV name is: %s\n", + ptr->vg_name, ptr->thinpool_name, meta_lv_name); + /* Extend metadata cmd*/ + snprintf(cmd, sizeof(cmd), + "timeout 10 lvextend -l +%u%%LV %s/%s", + ptr->autoextend_by, ptr->vg_name, meta_lv_name); + } + else { + /* Extend metadata cmd*/ + snprintf(cmd, sizeof(cmd), + "timeout 10 lvextend --poolmetadatasize +%uM %s/%s", + ptr->autoextend_by, ptr->vg_name, ptr->thinpool_name); + } + rc = execute_pipe_cmd(cmd, result, sizeof(result)); + if (rc != PASS) { + dlog("%s/%s pool metadata size extension failed\n", ptr->vg_name, ptr->thinpool_name); + dlog("...cmd: '%s' exit status: %i result: '%s'\n", cmd, rc, result); + return (FAIL); + } + return rc; +} + +/***************************************************************************** + * + * Name : thinmeta_handler + * + * Purpose : Handle the metadata usage and raise alarms through the FM API + * + *****************************************************************************/ +int thinmeta_handler( thinmeta_resource_config_type * ptr ) { + if (!ptr) { + elog ("Function called with NULL pointer!"); + return (PASS); + } + switch ( ptr->stage ) { + case RMON_STAGE__INIT: + { + /* Check if pool is ready */ + dlog("%s/%s pool config query", ptr->vg_name, ptr->thinpool_name); + if (!is_pool_ready(ptr)) { + ilog("%s/%s pool not ready, monitoring will be resumed when ready", + ptr->vg_name, ptr->thinpool_name); + ptr->stage = RMON_STAGE__MONITOR_WAIT; + } + else { + dlog("%s/%s pool ready", ptr->vg_name, ptr->thinpool_name); + ptr->stage = RMON_STAGE__MONITOR; + return (RETRY); // execute next stage immediately + } + break; + } + case RMON_STAGE__MONITOR_WAIT: + { + /* Waiting for pool to be ready*/ + if (is_pool_ready(ptr)) { + ilog("%s/%s pool ready, starting monitoring", + ptr->vg_name, ptr->thinpool_name); + ptr->stage = RMON_STAGE__MONITOR; + return (RETRY); // execute next stage immediately + } + break; + } + case RMON_STAGE__MONITOR: + { + dlog("%s/%s pool metadata usage monitoring", ptr->vg_name, ptr->thinpool_name); + /* calculate usage. The first time we calculate thinpool meta + * usage is to get the baseline resource value, if it exceeds + * the critical threshold and if the resource configuration + * allows us to autoextend thinpools then we do an extend + * operation and then check again if our thinpool usage has + * fallen below the critical watermark. */ + if(calculate_metadata_usage(ptr) == FAIL) { + ptr->stage = RMON_STAGE__INIT; + return (RETRY); // execute next stage immediately + break; + } + + /* act on thresholds */ + if((ptr->alarm_raised || ptr->first_run) && + ptr->resource_value < ptr->critical_threshold) { + // clear alarm + _clear_thinmeta_alarm(ptr); + } + else if(ptr->resource_value >= ptr->critical_threshold) { + if (ptr->autoextend_on) { + // Extend metadata + // Retry at each pass (failures are fast) till successful, in case + // our VG is extended on the fly and we suddenly get enough space. + // Log operation and error only once to avoid filling log file. + if(!ptr->alarm_raised) { + ilog("%s/%s pool metadata will be extended by: %i%s", + ptr->vg_name, ptr->thinpool_name, + ptr->autoextend_by, ptr->autoextend_percent? "%": "MiB"); + } + if(extend_thinpool_metadata(ptr) == PASS) { + // after extension recalculate metadata usage + if(calculate_metadata_usage(ptr) == FAIL) { + // this was successful < 1s ago, should not happen! + elog("%s/%s pool second metadata usage calculation failed!", + ptr->vg_name, ptr->thinpool_name); + } + } + else { + if(!ptr->alarm_raised) { + elog("%s/%s pool metadata extension failed ", + ptr->vg_name, ptr->thinpool_name); + } + } + } + if ((ptr->resource_value >= ptr->critical_threshold) && // resource_value may change + ptr->alarm_on) { + // raise alarm (if autoextend is disabled or failed) + _set_thinmeta_alarm(ptr); + } + else if (ptr->alarm_on && (ptr->alarm_raised || ptr->first_run)) { + // condition also needed if alarm existed prior to rmon startup + _clear_thinmeta_alarm(ptr); + } + } + /* Mark first run as complete */ + ptr->first_run = false; + break; + } + default: + { + slog ("%s/%s Invalid stage (%d)\n", ptr->vg_name, ptr->thinpool_name, ptr->stage); + /* Default to init for invalid case */ + ptr->stage = RMON_STAGE__INIT; + return (RETRY); // execute next stage immediately + } + } + return (PASS); +} diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/config b/mtce-common/cgts-mtce-common-1.0/scripts/config new file mode 100644 index 00000000..4d8d092b --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/config @@ -0,0 +1,37 @@ +#!/bin/bash +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# Controller Config Complete emulation file + +case "$1" in + start) + #touch /etc/platform/.initial_controller_config_complete + #touch /etc/platform/.initial_config_complete + #touch /var/run/.controller_config_complete + #touch /var/run/.config_pass + ;; + + stop) + #rm -f /etc/platform/.initial_controller_config_complete + #rm -f /etc/platform/.initial_config_complete + #rm -f /var/run/.controller_config_complete + #rm -f /var/run/.config_pass + ;; + + restart) + ;; + + status) + ;; + + *) + echo "usage: $0 { start | stop | status | restart }" + exit 1 + ;; +esac + +exit $RETVAL diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/config.service b/mtce-common/cgts-mtce-common-1.0/scripts/config.service new file mode 100644 index 00000000..f4fc0a52 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/config.service @@ -0,0 +1,13 @@ +[Unit] +Description=controller config emulation service +After=network.target +Before=sshd.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/etc/init.d/config start +ExecStop=/etc/init.d/config stop + +[Install] +WantedBy=multi-user.target diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/dmemchk.sh b/mtce-common/cgts-mtce-common-1.0/scripts/dmemchk.sh new file mode 100755 index 00000000..6781b624 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/dmemchk.sh @@ -0,0 +1,358 @@ +#!/bin/bash +################################################################################ +# +# Copyright (c) 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +################################################################################ +# +# Description: Displays memory usage information to check for memory leaks. +# +# Behaviour : The script takes in a list of commands whose processes you want +# monitored and then finds their process IDs, and uses that to find +# their current Resident Set Size (RSS) using ps, and looks up their +# Proportional Set Size (PSS) in /proc//smaps. Only the initial +# process run by the system is monitored; child processes and other +# instances of these processes are ignored. +# +# This script is to be run on a controller node, and requires that it be run +# with sudo privileges or else it may not have access to /proc//smaps for +# each of the desired processes. +# +# The script should be run with the following options: +# sudo ./memchk -t [#] --C [commands] +# Where following -t the time in seconds with which you want this script to repeat +# should be indicated. The default if no time is specified is 3600 seconds (1 hour). +# Following --C all arguments are viewed as commands you wish to be monitored. +# Each command should be separated by a space. +# e.g. sudo ./memchk -t 1800 --C command1 command2 command3 +# +# Error logs can be found in /tmp/memchk_err.log +# Standard output can be found in /tmp/memchk_out.log +# +################################################################################ + +changeP=() +changeR=() +rss=() +firstP=() +lastP=() +firstR=() +lastR=() +leaking=() +leakFlag=0 +flag=0 +period=0 +s1=() # Holds behaviour of most recent sample (does not change if present behaviour continues) +s2=() # Holds type of the previously observed behaviour that is different from the present behaviour +commands=() +trend=() +baseline=() # Sum of all RSS values for a given PID +count=() # Number of times RSS values have been sampled for a given PID +increasing=() +decreasing=() +stable=() +pattern=() # Stores a string indicating the present pattern + + +function trapCalled { + echo $'\nReceived trap signal' >&2 + exit +} + +trap trapCalled SIGHUP SIGINT SIGTERM + +function helpMessage { + echo "--------------------------------------------------------------------------------------" + echo "Memory Leak And Information Tracking Tool" + echo "" + echo "Usage:" + echo "" + echo "sudo ./memchk.sh --C [commands]" + echo "" + echo " -t ... time in seconds with which to run this script. No time" + echo " specified will result in the default of 3600s (1 hour)." + echo " --C ... space delimited list of commands to monitor. This option" + echo " must be the last one entered." + echo " --help | -h ... this info" + echo "" + echo "Note: This script must be run using sudo. If it is not, access to the memory information" + echo " of a given process may not be allowed by the system. PSS info is obtained from" + echo " /procs//smaps" + echo "" + echo " Error logs can be found in /tmp/memchk_err.log" + echo " Standard output can be found in /tmp/memchk_out.log" + echo "" + echo "" + echo "Examples:" + echo "" + echo "sudo memchk -t 60 --C mtcClient mtcAgent ... Check PSS and RSS values of the processes belonging to mtcClient" + echo " and mtcAgent every 60 seconds (1 minute)" + echo "sudo memchk -t 3600 --C pmond rmond hwmond ... Check PSS and RSS values of pmond, rmond and hwmond every 3600s (1h)" + echo "sudo memchl --C pmond rmond hwmond ... Check PSS and RSS values of commands using default period of 3600s (1h)" + echo "--------------------------------------------------------------------------------------" + exit 0 +} + +# Prints information on suspected leaking process +function memLeak { + printf "\n" >&2 + printf '%0.1s' "*"{1..150} >&2 + # Iterates over all keys in the array. + for proc in ${!leaking[@]} + do + printf "\nPossible mem leak in: %s PID: %s Current RSS: %s Orig RSS: %s Current PSS: %s Orig PSS: %s\n" \ + ${leaking[proc]} $proc ${rss[proc]} ${firstR[proc]} ${lastP[proc]} ${firstP[proc]} >&2 + done + printf '%0.1s' "*"{1..150} >&2 + printf "\n" >&2 +} + +if [ $UID -ne 0 ]; then + echo $'\nWarning: Memchk must be run as \'root\' user to access PSS memory information' + echo $'Use the -h option for help\n' + exit 1 +fi + +if [ $# -eq 0 ]; then + echo $'\nNo commands specified\nPlease try again and enter a command whose memory you would like to monitor' + echo $'Use the -h option for help\n' + exit 1 +fi + +exec > >(tee /tmp/NEWmemchk_out.log) 2> >(tee /tmp/NEWmemchk_err.log >&2) + +# Cycles through commandline arguments to make sure valid input was received and +# to assign values correctly +while [[ $# > 0 ]]; do + key="$1" + + case $key in + + # To make this more user-friendly, instead of having the user enter the period in seconds, consider using + # 'shopt -s extglob' and 'if [[ $2 = +([0-9])m ]];' to check if the user entered 15m as a period to indicate 15 minutes etc. + # Modify the regex for seconds and hours as well, then multiply value as necessary to convert into seconds for script + -t) + period="$2" + shift + ;; + + --C) + shift + if [ "$#" -eq "0" ]; then + printf "Error: No commands specified.\n" + exit 1 + fi + for c in "$@"; do + commands+=("$1") + shift + done + ;; + + -h|--help) + helpMessage + ;; + + *) + printf "\nUnknown argument passed: %s\n" $key + printf "Use the -h option for help\n" + exit 1 + esac + shift +done + +# Makes sure period has a positive value +if [ "$period" -le "0" ]; then + period=3600 + printf "You have entered an invalid period. Period has been set to 3600 seconds.\n" +# The rate of kB/h has been hard-coded into the table, if values greater than or equal to 1 hour are used, the table +# will not show an accurate representation in the change in usage over time. There are various accuracy issues in +# modifying the code to display data to match your chosen period. Consider this and modify accordingly. +elif [ "$period" -lt "3600" ]; then + printf "\nWARNING: You have chosen a period that is less than 1 hour. The rate of change in the table is displayed in kB/h, keep this in mind when reviewing results.\n" +fi + +while true; do + # Prints header for columns + printf "\n%15s | %8s | Leak | %10s | %13s | %8s | %8s | %8s | %13s | %8s | %8s | %8s | Period: %-${#period}ss\n" \ + "Cmd" "PID" "Trend" "Change in RSS" "RSS" "Orig RSS" "Prev RSS" "Change in PSS" "PSS" "Orig PSS" "Prev PSS" "$period" >&1 + padding=$(printf '%0.1s' "-"{1..180}) + printf '%*.*s' 0 $((156 + ${#period} )) "$padding" # Prints line of hyphens of variable size depending on the number of characters in period. + # Cycles through each of the originally entered commands. This list does not change. + for cmd in ${commands[@]} + do + # Finds all the PIDs associated with each command (commands may have more than one instance) + procs="$(pgrep $cmd)" + + # The number of processes may change on each loop. Keep this in mind if expanding or reusing this script. + for pid in ${procs[@]} + do + # In smaps the PSS value is located 3 lines below the line containing the process name. This works by setting + # the awk variable comm to contain the same value as cmd, the file is then searched for the string pattern + # contained in comm (cmd) and the PSS value associated with each instance of comm is summed and then printed. + pss=$(awk -v comm="$cmd" '$0 ~ comm {getline;getline;getline;sum += $2;} END {print sum}' /proc/"$pid"/smaps) + # obtains the RSS value of the indicated process + rssCurrent=$(ps -p "$pid" --no-header -o rss) + lastR[pid]="${rss[pid]}" + + # Child processes may exist ephemerally, as a result they may be added to our list of PIDs, but no longer + # exist when we try to read their associated files in /proc/. This makes sure the file exists and that the + # parent process is 1. If the parent process ID is not 1 then the process in question is a child proceess + # and we do not care about its memory usage (for the purposes of this specific script). The continue + # statement will return us to the for-loop and begin running for the next pid. + if [ -f "/proc/$pid/status" ] && [ "$(awk '$0 ~ "PPid:" {print $2}' /proc/"$pid"/status)" -ne "1" ]; then continue; fi + + # This checks that neither rssCurrent nor pss have empty values due to a child process being generated + # and then killed off before its values could be read. Root occasionally generates a child process of + # one of the monitored commands so the above if-statement doesn't exclude it because the PPID is 1. + if [ -z "$rssCurrent" ] || [ -z "$pss" ]; then continue; fi + + # Sets initial values for PSS and RSS. NA is set instead of 0 because using numbers could lead to false + # or inaccurate information. It also previously allowed one to see when child processes were spawned. + if [ "$flag" -ne "1" ]; then + firstP[pid]="$pss" + lastP[pid]="NA" + rss[pid]="$rssCurrent" + firstR[pid]="${rss[pid]}" + lastR[pid]="NA" + s1[pid]="" + s2[pid]="" + trend[pid]=0 + increasing[pid]=0 + decreasing[pid]=0 + stable[pid]=0 + count[pid]=0 + baseline[pid]=0 + fi + + # In the event of a memory leak (the RSS value increasing), an X is placed in the 'Leak' column of the + # printed table. The PID of the process is also added to an array to be sent to the memLeak function + # once all of the commands' processes have been checked. A flag indicating that a possible leak has + # been detected is also set. + if [ "${rss[pid]}" -lt "$rssCurrent" ]; then + lastR[pid]="${rss[pid]}" + rss[pid]="$rssCurrent" + leaking[pid]="$cmd" + leak[pid]="X" + let leakFlag=1 + fi + + # Calculates the changes in PSS and RSS usage over time. If this is the first run and there is no + # previous value with which to compare against, delta is set to 0, where delta is the change over + # time. + if [ "${lastP[pid]}" = "NA" ]; then changeP[$pid]=0; deltaP=0.000; + else changeP[pid]="$((changeP[$pid] + $pss - lastP[$pid]))"; deltaP=$(awk -v chP="${changeP[$pid]}" -v hrs="${hours}" -v t="${period}" 'BEGIN {printf "%.3f", (chP/(hrs*t))*3600; exit(0)}'); + fi + + if [ "${lastR[pid]}" = "NA" ]; then changeR[$pid]=0; deltaR=0.000; + else changeR[pid]="$((changeR[$pid] + rss[$pid] - lastR[$pid]))"; deltaR=$(awk -v chR="${changeR[$pid]}" -v hrs="${hours}" -v t="${period}" 'BEGIN {printf "%.3f", (chR/(hrs*t))*3600; exit(0)}'); + fi + + # The below if-else block seeks to determine gradual sustained patterns of RSS usage over time to determine if the memory usage is gradually + # increasing throughout the lifespan of the process (possible memory leak) or not. Non-gradual usage changes can be due to dynamic reallocation + # and such 'eratic' behaviour is not indicative of any overall trends. + # NOTE: If you would like to do this properly and determine whether or not such patterns exist by evaluating the RSS usage accross the entire + # lifespan of the script, consider the following method: + # Take the (RSS, time) value-pairs and make an augmented matrix and then use Gaussian elimination to solve the matrix and use the remaining + # values as the coefficients to create a least-squares parabola, which you can then find the first derivative of to determine the rate of + # change -- which will indicate increasing or decreasing behaviour at your current point relative to nearby datapoints and the behaviour + # of the rest of your graph (function). + # To do this consider using the python package numpy for matrix math and for derivatives. The issue with this method is finding a way to + # pipe data to a python script and have it return to the bash script. Because piping is usually done asynchronously, you may find that there + # are issues with having the values returned and printed properly in the bash script. This math can also be done in bash, but will take + # considerable effort. + #--------------------------------------------------------------------------------------------------------------------------------------------------- + # This part checks to see that an established trend is being maintained. + # It first checks that trend[pid] is greater than or equal to 3 because the else-block below increments the trend number such that when the same + # pattern e.g. an increase in RSS that exceeds the baseline average occurs 3 times in a row, it establishes that there exists a trend of RSS increasing. + # The existence of a pattern or 'trend' is gauged by whether the same behaviour has occured 3 times in a row, and that it continues to occur without + # the opposite behaviour happening. For example, if a trend of increasing RSS has been observed, and on the next sample the RSS value is found to + # be below the baseline average, this indicates that a decrease has occured and thus the trend has been broken, and a new trend must be established. + # If the current trend is either increasing or decreasing, the RSS value can be equal to the baseline average for two consecutive samples without + # the trend being broken. However, if the RSS value is equal to the baseline average for 3 consecutive samples, this indicates a new trend of the + # RSS value reaching a stable value, and the current trend of increasing or decreasing is broken; that is to say, only a trend of 'stable' will be + # permitted; if an increase or decrease is observed, all temporary and trend values will be reset, and the code will enter the else-block below and + # attempt to establish a new trend from scratch. The reason for this behaviour is that trends are determined by observing a behaviour three + # consecutive times, if a stable behaviour starts turning into an increasing or decreasing behaviour, the else-block is entered to wait for a new + # behaviour to be established. + # A trend cannot change immediately from increasing to decreasing. This is done to avoid representing erratic behaviour as a long-term pattern. + # An increasing or decreasing trend must change to 'none' -- no trend observed -- before the opposite trend can be declared. + # The baseline average is the RSS values for a PID from each sample added together and divided by the number of samples that have taken place. + let count[pid]+=1 + let baseline[pid]+="$rssCurrent" + avg=$(awk -v b="${baseline[pid]}" -v c="${count[pid]}" 'BEGIN {printf "%.0f", (b/c); exit(0)}') + if [ "${trend[pid]}" -ge "3" ]; then + if [ "${rss[pid]}" -gt "$avg" ] && ([ "${s1[pid]}" = "increasing" ] || ([ "${s1[pid]}" != "decreasing" ] && [ "${s2[pid]}" != "decreasing" ]) && [ "${stable[pid]}" -ne "3" ]); then + if [ "${s1[pid]}" != "increasing" ]; then + s2[pid]="${s1[pid]}" + s1[pid]="increasing" + fi + elif [ "${rss[pid]}" -eq "$avg" ]; then + if [ "${s1[pid]}" != "stable" ]; then + stable[pid]=0 + s2[pid]="${s1[pid]}" + s1[pid]="stable" + fi + let stable[pid]+=1 + let stable[pid]+=1 + elif [ "${rss[pid]}" -lt "$avg" ] && ([ "${s1[pid]}" = "decreasing" ] || ([ "${s1[pid]}" != "increasing" ] && [ "${s2[pid]}" != "increasing" ]) && [ "${stable[pid]}" -ne "3" ]); then + if [ "${s1[pid]}" != "decreasing" ]; then + s2[pid]="${s1[pid]}" + s1[pid]="decreasing" + fi + else + s1[pid]="" + s2[pid]="" + trend[pid]=0 + increasing[pid]=0 + decreasing[pid]=0 + stable[pid]=0 + fi + # This else-block is used to establish whether or not a trend has been established. It waits for a pattern of the RSS value of a PID to increase, + # decrease, or remain stable relative to the baseline average three times in a row before it will declare that a trend exists. This is to avoid + # viewing erratic increases and decreases in RSS as gradual increases or decreases in the system's (process') RSS usage. + else + if [ "${count[pid]}" -gt "0" ]; then + if [ "${rss[pid]}" -gt "$avg" ]; then + let trend[pid]+=1 + let increasing[pid]+=1 + s1[pid]="increasing" + elif [ "${rss[pid]}" -eq "$avg" ]; then + let trend[pid]+=1 + let stable[pid]+=1 + s1[pid]="stable" + elif [ "${rss[pid]}" -lt "$avg" ]; then + let trend[pid]+=1 + let decreasing[pid]+=1 + s1[pid]="decreasing" + fi + if [ "${increasing[pid]}" -gt "0" ] && [ "${decreasing[pid]}" -gt "0" ]; then + increasing[pid]=0 + decreasing[pid]=0 + stable[pid]=0 + trend[pid]=0 + fi + fi + fi + + if [ "${trend[pid]}" -ge "3" ]; then pattern[pid]="${s1[pid]}"; else pattern[pid]="none"; fi # Sets the trend variable for printing if a trend exists + + + + printf "\n%15s | %8s | %2s | %10s | %8s kB/h | %8s | %8s | %8s | %8s kB/h | %8s | %8s | %8s |" \ + $cmd $pid "${leak[pid]}" "${pattern[pid]}" $deltaR ${rss[pid]} ${firstR[pid]} ${lastR[pid]} $deltaP $pss ${firstP[pid]} ${lastP[pid]} >&1 + + lastP[pid]="$pss" + leak[pid]="" # Resets the indicator in the 'Leak' column + done + done + + if [ "$leakFlag" -eq "1" ]; then memLeak leaking[@]; fi # Calls the mem leak function if flag is set + unset leaking[@] # Clear the array holding PIDs of processes with potential leaks + let leakFlag=0 + let hours+=1 # Hour count[pid]er used in calculating delta + let flag=1 # Flag indicating that first run has completed so we no longer have to set values of 'NA' + echo $'\n' + sleep "$period" +done diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/goenabled b/mtce-common/cgts-mtce-common-1.0/scripts/goenabled new file mode 100644 index 00000000..b91f4b79 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/goenabled @@ -0,0 +1,186 @@ +#!/bin/bash +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 98 2 +# +### BEGIN INIT INFO +# Provides: goenabled +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Maintenance Client "Go Enable" script +### END INIT INFO + +GOENABLED_PATH=${GOENABLED_PATH:-"/etc/goenabled.d"} +GOENABLED_FILE=${GOENABLED_FILE:-"/var/run/.goenabled"} +GOENABLED_TAG=${GOENABLED_TAG:-"GOENABLED"} + +RETVAL=0 + +################################################################################ +# Log message to syslog +################################################################################ +function log +{ + logger -t ${GOENABLED_TAG} $@ +} + +################################################################################ +# Utility function to print the status of a command result +################################################################################ +function print_status() +{ + if [ "$1" -eq "0" ]; then + echo "[ OK ]" + else + echo "[FAILED]" + fi +} + +################################################################################ +# Run goenabled scripts to check system status +################################################################################ +function goenabled_check() +{ + if [ -d ${GOENABLED_PATH} ]; then + run-parts ${GOENABLED_PATH} 2>&1 | logger -t ${GOENABLED_TAG} + RET=${PIPESTATUS[0]} + if [ ${RET} -ne 0 ]; then + return ${RET} + fi + fi + + return 0 +} + + +################################################################################ +# Write goenabled state file +################################################################################ +function goenabled_enable_ready() +{ + echo "`date`: `hostname` : Ready to Run GoEnabled Scripts" > ${GOENABLED_FILE} + RET=$? + if [ ${RET} -ne 0 ]; then + log "Failed to write state file ${GOENABLED_FILE}" + return ${RET} + fi + + log "enabled" + + return 0 +} + + +################################################################################ +# Remove goenabled state file +################################################################################ +function goenabled_disable() +{ + rm -f ${GOENABLED_FILE} + RET=$? + if [ ${RET} -ne 0 ]; then + log "Failed to remove state file ${GOENABLED_FILE}" + return ${RET} + fi + + log "disabled" + + return 0 +} + +################################################################################ +# Start Action +################################################################################ +function start() +{ + echo -n "Goenabled Ready: " + +# goenabled_check +# RETVAL=$? +# if [ "$RETVAL" -ne "0" ]; then +# log "Go enabled check failed" +# print_status $RETVAL +# return +# fi + + goenabled_enable_ready + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + log "Go enabled failed" + print_status $RETVAL + return + fi + + print_status $RETVAL +} + +################################################################################ +# Stop Action +################################################################################ +function stop() +{ + echo -n "Stopping goenabled: " + + goenabled_disable + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + log "Go disabled failed" + print_status $RETVAL + return + fi + + print_status $RETVAL +} + +################################################################################ +# Status Action +################################################################################ +function status() +{ + echo -n "Checking goenabled: " + + goenabled_check + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + print_status $RETVAL + return + fi + + print_status $RETVAL +} + + +################################################################################ +# Main Entry +################################################################################ + +case "$1" in + start) + start + ;; + + stop) + stop + ;; + + restart) + stop + start + ;; + + status) + status + ;; + + *) + echo "usage: $0 { start | stop | status | restart }" + exit 1 + ;; +esac + +exit $RETVAL diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/goenabled.service b/mtce-common/cgts-mtce-common-1.0/scripts/goenabled.service new file mode 100644 index 00000000..8f233902 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/goenabled.service @@ -0,0 +1,13 @@ +[Unit] +Description=Titanium Cloud Maintenance Goenable Ready +After=config.service +Before=pmon.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/etc/init.d/goenabled start +ExecStop=/etc/init.d/goenabled stop + +[Install] +WantedBy=multi-user.target diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/hbsAgent b/mtce-common/cgts-mtce-common-1.0/scripts/hbsAgent new file mode 100755 index 00000000..d1a2f4fa --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/hbsAgent @@ -0,0 +1,555 @@ +#!/bin/sh +# +# Copyright (c) 2013-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# Support: www.windriver.com +# +# Purpose: This resource agent manages +# +# .... the Titanium Cloud Host Heartbeat Service Daemon +# +# RA Spec: +# +# http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/resource-agent-api.txt?rev=HEAD +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + + +####################################################################### + +# Fill in some defaults if no values are specified +OCF_RESKEY_binary_default="hbsAgent" +OCF_RESKEY_config_default="/etc/mtc.ini" +OCF_RESKEY_dbg_default="false" +OCF_RESKEY_logging_default="true" +OCF_RESKEY_user_default="admin" +OCF_RESKEY_pid_default="/var/run/hbsAgent.pid" +OCF_RESKEY_state_default="active" + + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_logging=${OCF_RESKEY_logging_default}} +: ${OCF_RESKEY_dbg=${OCF_RESKEY_dbg_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}} + +mydaemon="/usr/local/bin/${OCF_RESKEY_binary}" +statusfile="/var/run/${OCF_RESKEY_binary}.info" +virtualhostfile="/var/run/virtual.host" +facterexec="/usr/bin/facter" + +####################################################################### + +usage() { + cat < + + +0.7 + + +This 'hbsAgent_ra' is an OCF Compliant Resource Agent that manages start, stop +and in-service monitoring of the Host Heartbeat Agent Process on Wind River's +Titanium Cloud in an active mode. + + + +Manages the Titanium Cloud's Heartbeat (hbsAgent) Daemon. + + + + + + + +state = standby ... run heartbeat daemon in 'standby' mode (default) +state = active ... run heartbeat daemon in 'active' mode + +Heartbeat Activity State Option + + + + + + +This option is used to direct the hbsAgent dameon log stream. + +logging = true ... /var/log/hbsAgent.log (default) +logging = false ... /dev/null + +See also debug option which sets the verbosity of logging. + +Service Logging Control Option + + + + + + +dbg = false ... info, warn and err logs sent to output stream (default) +dbg = true ... Additional debug logs are also sent to the output stream + +Service Debug Control Option + + + + + + + + + + + + + + +END + return ${OCF_SUCCESS} +} + +hbsAgent_validate() { + + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "hbsAgent:validate" + fi + + check_binary "/usr/local/bin/${OCF_RESKEY_binary}" + check_binary "/usr/local/bin/mtcAgent" + check_binary pidof + + if [ ! -f ${OCF_RESKEY_config} ] ; then + msg="${OCF_RESKEY_binary} ini file missing ${OCF_RESKEY_config}" + ocf_log err "${msg}" + return ${OCF_ERR_CONFIGURED} + fi + + return ${OCF_SUCCESS} +} + +hbsAgent_status () { + + proc="hbsAgent:status" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc}" + fi + + # remove the status file before we request a new + rm -f ${statusfile} + + # Verify the pid file exists as part of status + for ((loop=0;loop<3;loop++)) { + if [ -f ${OCF_RESKEY_pid_default} ] ; then + break + else + sleep 1 + fi + } + + # See if the daemon is running + if [ -e ${OCF_RESKEY_pid} ] ; then + + # get the pif from the pidfile + pid=`cat ${OCF_RESKEY_pid}` + + # get the pid list + pids=`pidof ${OCF_RESKEY_binary}` + + # get the number of pids as 'pidn' + pidn=`pidof ${OCF_RESKEY_binary} | wc -w` + + # check for a pid list of more than one of a pid that + # does not match what is in the pidfile. + if [ "${pidn}" != "1" -o "${pids}" != "${pid}" ] ; then + + # create a warning log indicating the actions about to be taken + ocf_log warn "${proc} Warning ; pid mismatch [${pid}:${pids}:${pidn}] ; killing those not in pidfile" + + # handle the case where there are multiple processes running + if [ ${pidn} -gt 1 ] ; then + + # loop over the list and kill all the processes whose + # pid does not match the pid in the pidfile. + ocf_log info "${proc} PID: ${pids}" + pidlist=(${pids}) + for p in "${pidlist[@]}" ; do + if [ "${p}" != "${pid}" ] ; then + ocf_log info "${proc} killing duplicate instance [${p}]" + kill -9 ${p} + else + ocf_log info "${proc} keeping pidfile instance [${p}]" + fi + done + else + # handle th case where the running pid and the pidfile are a mismatch + hbsAgent_stop + return ${OCF_NOT_RUNNING} + fi + fi + else + # check to see if the process is running without a PIDfile. + # Any monitored process that is running with no pidfile should + # be killed + pid=`pidof ${OCF_RESKEY_binary}` + if [ "$pid" != "" ] ; then + kill -0 ${pid} 2> /dev/null + if [ $? -eq 0 ] ; then + # if there is no pidfile but the process is running + # then proceed to kill all that are running and state + # there is no process running. This handles both the + # active and inactive casees + ocf_log err "${proc} is running [$pid] with no pidfile ; force killing all" + hbsAgent_stop + fi + fi + # this without the above 'if' taken is the typical inactive case success path + return ${OCF_NOT_RUNNING} + fi + + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + + log_sig="${OCF_RESKEY_binary} [$pid] In-Service Active Monitor Test" + + # Ask the daemon to produce status + ocf_run kill -s USR1 $pid + + # Wait for the response + for ((loop=0;loop<10;loop++)) { + sleep 1 + if [ -f ${statusfile} ] ; then + + ocf_log info "${log_sig} Passed ($loop)" + return ${OCF_SUCCESS} + + elif [ $loop -eq 5 ] ; then + + # send the signal again + ocf_run kill -s USR1 $pid + + pid_stat=`cat /proc/${pid}/stat` + ocf_log notice "${log_sig} is slow to respond" + ocf_log notice "$pid_stat" + + elif [ $loop -eq 8 ] ; then + + pid_stat=`cat /proc/${pid}/stat` + ocf_log warn "${log_sig} is very slow to respond" + ocf_log warn "$pid_stat" + + fi + } + log_procfs + ocf_log err "${log_sig} Failed" + return ${OCF_ERR_GENERIC} + fi + return ${OCF_NOT_RUNNING} +} + +hbsAgent_monitor () { + + proc="hbsAgent:monitor" + + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc}" + fi + + # Uncomment if you want the monitor function to force-pass + # return ${OCF_SUCCESS} + + if [ -e ${OCF_RESKEY_pid} ] ; then + pid=`cat ${OCF_RESKEY_pid}` + if [ "$pid" != "" ] ; then + kill -0 $pid 2> /dev/null + if [ $? -ne 0 ] ; then + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc} called while ${OCF_RESKEY_binary} not running." + fi + return ${OCF_NOT_RUNNING} + fi + fi + fi + + hbsAgent_status + return $? +} + +hbsAgent_start () { + + local rc + + start_proc="hbsAgent:start" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${start_proc}" + fi + + # Uncomment if you want the start function to force-pass without starting + # return ${OCF_SUCCESS} + + # if there is an instance running already then stop it. + # if it can't be stopped then return a failure. + pid=`pidof ${OCF_RESKEY_binary}` + if [ "$pid" != "" ] ; then + hbsAgent_stop + rc=$? + # if the stop failed then I fail the start ; + # not permitted to start a duplicate process + if [ ${rc} -ne ${OCF_SUCCESS} ] ; then + ocf_log info "${start_proc} failed ; was unable to stop all existing instances rc:${rc}" + return ${rc} + fi + fi + + val=`${facterexec} is_virtual` + if [ ${val} = "true" ] ; then + echo "virtual host" > ${virtualhostfile} + fi + + if [ ${OCF_RESKEY_state} = "active" ] ; then + RUN_OPT_STATE="-a" + else + RUN_OPT_STATE="" + fi + + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + RUN_OPT_DEBUG="-d debug" + else + RUN_OPT_DEBUG="" + fi + + if [ ${OCF_RESKEY_logging} = "true" ] ; then + RUN_OPT_LOG="-l" + else + RUN_OPT_LOG="" + fi + + ${mydaemon} ${RUN_OPT_STATE} ${RUN_OPT_LOG} ${RUN_OPT_DEBUG} + rc=$? + + # verify it was started and set return code appropriately + if [ $rc -eq ${OCF_SUCCESS} ] ; then + for ((loop=0;loop<3;loop++)) { + if [ -f ${OCF_RESKEY_pid} ] ; then + break + else + ocf_log info "${start_proc} waiting ... loop=${loop}" + sleep 1 + fi + } + + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -ne 0 ] ; then + rc=${OCF_FAILED_MASTER} + else + if [ ! -f ${statusfile} ] ; then + ocf_log info "hbsAgent: Startup Health Test Failed - missing info" + rc = ${OCF_ERR_GENERIC} + fi + fi + else + ocf_log info "${start_proc} failed ${mydaemon} daemon rc=${rc}" + rc = ${OCF_ERR_GENERIC} + fi + + # Record success or failure and return status + if [ ${rc} -eq $OCF_SUCCESS ] ; then + msg="${start_proc}ed pid=${pid}" + ocf_log info "${msg}" + else + msg="${start_proc} failed rc=${rc}" + ocf_log err "${msg}" + rc=${OCF_NOT_RUNNING} + fi + return $rc +} + +hbsAgent_confirm_stop () { + + proc="hbsAgent:confirm_stop" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc}" + fi + + rc=0 + pid=`pidof ${OCF_RESKEY_binary}` + kill -0 ${pid} 2> /dev/null + if [ $? -eq 0 ] ; then + ocf_log info "${proc} 'kill -9 ${pid}'" + kill -9 ${pid} + ocf_log info "${proc}ped (by emergency kill -9 ${pid})" + sleep 1 + fi + + pid=`pidof ${OCF_RESKEY_binary}` + kill -0 ${pid} 2> /dev/null + if [ $? -eq 0 ] ; then + msg="${start_proc} unable kill [$pid] instance of ${OCF_RESKEY_binary}" + ocf_log err "${msg}" + rc=1 + fi + + rm -f ${OCF_RESKEY_pid} + return $rc +} + +hbsAgent_stop () { + + proc="hbsAgent:stop" + + # See if the process is running by pidfile + + pid=`pidof ${OCF_RESKEY_binary}` + ocf_log info "${proc} [${pid}]" + kill -0 ${pid} 2> /dev/null + if [ $? -ne 0 ] ; then + ocf_log info "${proc} called while already stopped (no process)" + hbsAgent_confirm_stop + if [ $? -ne 0 ] ; then + return ${OCF_FAILED_MASTER} + else + return ${OCF_SUCCESS} + fi + fi + + # try to kill 3 times before giving up + MAX=3 + for ((loop=0;loop<$MAX;loop++)) { + + # start with the pidfile + if [ -f ${OCF_RESKEY_pid} ] ; then + + pid=`cat ${OCF_RESKEY_pid}` + + # if pidfile is not empty then kill by -int + if [ "$pid" != "" ] ; then + + kill -0 ${pid} 2> /dev/null + if [ $? -eq 0 ] ; then + ocf_log info "${proc}ping [$pid] (by sigint on loop ${loop})" + kill -int ${pid} + sleep 1 + fi + fi + fi + + # break out if the process is stopped + pid=`pidof ${OCF_RESKEY_binary}` + kill -0 ${pid} 2> /dev/null + if [ $? -ne 0 ] ; then + break + fi + } + + pid=`pidof ${OCF_RESKEY_binary}` + if [ "$pid" != "" ] ; then + #ocf_log info "${proc} [pid:$pid] going into confirm_stop" + hbsAgent_confirm_stop + rc=$? + pid=`pidof ${OCF_RESKEY_binary}` + #ocf_log info "${proc} [pid:$pid] coming from confirm_stop" + if [ rc -ne 0 ] ; then + return ${OCF_FAILED_MASTER} + else + return ${OCF_SUCCESS} + fi + else + return ${OCF_SUCCESS} + fi +} + +hbsAgent_reload () { + + local rc + + proc="hbsAgent:reload" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc}" + fi + + hbsAgent_stop + rc=$? + if [ $rc -eq ${OCF_SUCCESS} ] ; then + hbsAgent_start + rc=$? + if [ $rc -eq ${OCF_SUCCESS} ] ; then + msg="${proc}ed" + ocf_log info "${mgs}" + fi + fi + + if [ ${rc} -ne ${OCF_SUCCESS} ] ; then + msg="${OCF_RESKEY_binary}: failed to restart rc=${rc}" + ocf_log info "${mgs}" + fi + + return ${rc} +} + + +case ${__OCF_ACTION} in + meta-data) meta_data + exit ${OCF_SUCCESS} + ;; + usage|help) usage + exit ${OCF_SUCCESS} + ;; +esac + +ocf_log info "hbsAgent:${__OCF_ACTION} action" + +# Anything except meta-data and help must pass validation +hbsAgent_validate || exit $? + +case ${__OCF_ACTION} in + + start) hbsAgent_start + ;; + stop) hbsAgent_stop + ;; + status) hbsAgent_status + ;; + reload) hbsAgent_reload + ;; + monitor) hbsAgent_monitor + ;; + validate-all) hbsAgent_validate + ;; + *) usage + exit ${OCF_ERR_UNIMPLEMENTED} + ;; +esac diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/hbsClient b/mtce-common/cgts-mtce-common-1.0/scripts/hbsClient new file mode 100644 index 00000000..f493c0df --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/hbsClient @@ -0,0 +1,105 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 95 95 +# +### BEGIN INIT INFO +# Provides: hbsClient +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Heartbeat Client Daemon +### END INIT INFO + +. /etc/init.d/functions + +DAEMON_NAME="hbsClient" +DAEMON="/usr/local/bin/${DAEMON_NAME}" +PIDFILE="/var/run/${DAEMON_NAME}.pid" +IFACE="" + +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +if [ ! -e "${DAEMON}" ] ; then + logger "${DAEMON} is missing" + exit ${NOT_INSTALLED} +fi + +RETVAL=0 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +case "$1" in + start) + # The heartbeat client runs on all nodes at nice -11 + logger "Starting ${DAEMON_NAME}" + echo -n "Starting ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo -n "is already running " + RETVAL=0 + else + start-stop-daemon --start -n -11 -b -x ${DAEMON} -- -l + RETVAL=$? + fi + if [ ${RETVAL} -eq 0 ] ; then + pid=`pidof ${DAEMON_NAME}` + echo "OK" + logger "${DAEMON} (${pid})" + else + echo "FAIL" + RETVAL=${GENERIC_ERROR} + fi + ;; + + stop) + logger "Stopping ${DAEMON_NAME}" + echo -n "Stopping ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + killproc ${DAEMON_NAME} + fi + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo "FAIL" + RETVAL=${NOT_RUNNING} + else + echo "OK" + fi + rm -f ${PIDFILE} + ;; + + restart) + $0 stop + $0 start + ;; + + status) + pid=`pidof ${DAEMON_NAME}` + RETVAL=$? + if [ ${RETVAL} -eq 0 ] ; then + echo "${DAEMON_NAME} is running" + else + echo "${DAEMON_NAME} is NOT running" + RETVAL=${NOT_RUNNING} + fi + ;; + + condrestart) + $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/hbsClient.conf b/mtce-common/cgts-mtce-common-1.0/scripts/hbsClient.conf new file mode 100644 index 00000000..333c1597 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/hbsClient.conf @@ -0,0 +1,30 @@ +[process] +process = hbsClient +service = hbsClient +script = /etc/init.d/hbsClient +style = lsb +pidfile = /var/run/hbsClient.pid +severity = major ; minor, major, critical +restarts = 1 ; restart retries before error assertion +interval = 1 ; number of seconds to wait between restarts +debounce = 10 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 1 ; Seconds to wait after process start before starting the debounce monitor +mode = active ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring +quorum = 1 ; process is in the host watchdog quorum +full_init_reqd = 1 ; indicates that full goenabled tests should pass + ' before we try to restart process + +subfunction = last-config ; if AIO then run monitor only after last config is run + +; Active Monitoring Options + +port = 2202 +period = 5 ; monitor period in seconds +timeout = 4 ; Messaging timeout period in seconds, must be shorter than period +threshold = 5 ; Number of back to back heartbeat failures before action + diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/hbsClient.service b/mtce-common/cgts-mtce-common-1.0/scripts/hbsClient.service new file mode 100644 index 00000000..9a31fc77 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/hbsClient.service @@ -0,0 +1,23 @@ +[Unit] +Description=Titanium Cloud Maintenance Heartbeat Client +After=network.target syslog.service config.service +Before=pmon.service mtcClient.service + +[Service] +Type=forking +ExecStart=/etc/rc.d/init.d/hbsClient start +ExecStop=/etc/rc.d/init.d/hbsClient stop +ExecReload=/etc/rc.d/init.d/hbsClient reload +PIDFile=/var/run/hbsClient.pid + +# Failure handling +TimeoutStartSec=10s +TimeoutStopSec=10s + +# process recovery is handled by pmond +Restart=no +RestartSec=5 + +[Install] +WantedBy=multi-user.target + diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/hwclock.service b/mtce-common/cgts-mtce-common-1.0/scripts/hwclock.service new file mode 100644 index 00000000..49199574 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/hwclock.service @@ -0,0 +1,13 @@ +[Unit] +Description=hwclock setup +After=network.target acpid.service +Before=config.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/etc/init.d/hwclock.sh start +ExecStop=/etc/init.d/hwclock.sh stop + +[Install] +WantedBy=multi-user.target diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/hwclock.sh b/mtce-common/cgts-mtce-common-1.0/scripts/hwclock.sh new file mode 100755 index 00000000..e77fda5d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/hwclock.sh @@ -0,0 +1,92 @@ +#!/bin/sh + +# +### BEGIN INIT INFO +# Provides: hwclock +# Required-Start: +# Required-Stop: $local_fs +# Default-Start: S +# Default-Stop: 0 6 +# Short-Description: Set system clock +# Description: Set system clock to hardware clock, according to the UTC +# setting in /etc/default/rcS (see also rcS(5)). +### END INIT INFO +# +# WARNING: If your hardware clock is not in UTC/GMT, this script +# must know the local time zone. This information is +# stored in /etc/localtime. This might be a problem if +# your /etc/localtime is a symlink to something in +# /usr/share/zoneinfo AND /usr isn't in the root +# partition! The workaround is to define TZ either +# in /etc/default/rcS, or in the proper place below. + +# Source function library. +. /etc/init.d/functions + +[ ! -x /sbin/hwclock ] && exit 0 + +[ -f /etc/default/rcS ] && . /etc/default/rcS + +[ "$UTC" = "yes" ] && tz="--utc" || tz="--localtime" +case "$1" in + start) + if [ "$VERBOSE" != no ] + then + echo "System time was `date`." + echo "Setting the System Clock using the Hardware Clock as reference..." + fi + + if [ "$HWCLOCKACCESS" != no ] + then + if [ -z "$TZ" ] + then + hwclock $tz --hctosys + else + TZ="$TZ" hwclock $tz --hctosys + fi + fi + + if [ "$VERBOSE" != no ] + then + echo "System Clock set. System local time is now `date`." + fi + ;; + stop|restart|reload|force-reload) + # + # Updates the Hardware Clock with the System Clock time. + # This will *override* any changes made to the Hardware Clock. + # + # WARNING: If you disable this, any changes to the system + # clock will not be carried across reboots. + # + if [ "$VERBOSE" != no ] + then + echo "Saving the System Clock time to the Hardware Clock..." + fi + if [ "$HWCLOCKACCESS" != no ] + then + hwclock $tz --systohc + fi + if [ "$VERBOSE" != no ] + then + echo "Hardware Clock updated to `date`." + fi + exit 0 + ;; + show) + if [ "$HWCLOCKACCESS" != no ] + then + hwclock $tz --show + fi + ;; + status) + stiatus hwclock + exit $? + ;; + *) + echo "Usage: hwclock.sh {start|stop|status|show|reload|restart}" >&2 + echo " start sets kernel (system) clock from hardware (RTC) clock" >&2 + echo " stop and reload set hardware (RTC) clock from kernel (system) clock" >&2 + exit 1 + ;; +esac diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mgmtlinkup b/mtce-common/cgts-mtce-common-1.0/scripts/mgmtlinkup new file mode 100644 index 00000000..3cffbd01 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mgmtlinkup @@ -0,0 +1,78 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 95 95 +# +### BEGIN INIT INFO +# Provides: mgmtlinkup +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Loop trying to bring up the mgmt interface +### END INIT INFO + +. /etc/init.d/functions + +PLATFORM_CONF="/etc/platform/platform.conf" + +IFACE="" + +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +LINK_UP_RETRIES=10 +LINK_UP_INTERVAL=5 + +case "$1" in + start) + echo -n "$0 : " + if [ -f ${PLATFORM_CONF} ] ; then + while [ "${LINK_UP_RETRIES}" -gt 0 ] ; do + IFACE=`cat ${PLATFORM_CONF} | grep management_interface | cut -f2 -d'='` + if [ "${IFACE}" != "" ] ; then + if ip link show ${IFACE} | grep -sq 'state UP'; then + logger "${IFACE} is up" + echo "OK $IFACE is up" + break + fi + logger "trying to bring ${IFACE} up" + ip link set dev ${IFACE} up + sleep ${LINK_UP_INTERVAL} + LINK_UP_RETRIES=`expr ${LINK_UP_RETRIES} - 1` + fi + done + if [ "${LINK_UP_RETRIES}" -eq 0 ] ; then + echo "FAILED to bring $IFACE up" + fi + fi + ;; + + restart) + start + ;; + + status) + ;; + + condrestart) + $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtc.conf b/mtce-common/cgts-mtce-common-1.0/scripts/mtc.conf new file mode 100644 index 00000000..06a181ac --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtc.conf @@ -0,0 +1,170 @@ +; CGTS Maintenance Service config file +[agent] ; Agent Configuration +scheduling_priority = 1 ; Range of 1 .. 99 is acceptable + ; +hbs_minor_threshold = 4 ; Heartbeat minor threshold count. + ; Specifies the number of consecutive + ; heartbeat misses that result in a + ; minor notification to maintenance. + +hbs_calibrate_threshold = 7 ; number of hosts before calibration kicks in +hbs_calibrate_period_factor = 200 ; x for each host over hbs_calibrate_threshold +hbs_calibrate_minor_factor = 20 ; x for each host over hbs_calibrate_threshold +hbs_calibrate_degrade_factor = 21 ; x for each host over hbs_calibrate_threshold +hbs_calibrate_fail_factor = 30 ; x for each host over hbs_calibrate_threshold + +offline_period = 100 ; number of msecs to wait for each offline audit +offline_threshold = 46 ; number of back to back mtcAlive requests missed + ; 100:46 will yield a typical 5 sec holdoff from + ; failed to offline + +mnfa_threshold_type = number ; Two different types are supported + ; 'number' or 'percent' of simultaneous + ; failures that enable multi-node + ; failure avoidance handling + ; +mnfa_threshold_percent = 10 ; if ( mnfa_threshold_type == percent ) +mnfa_threshold_number = 3 ; if ( mnfa_threshold_type == number ) + +inventory_port = 6385 ; The Inventory Port Number +keystone_port = 5000 ; The Keystone Port Number +ha_port = 7777 ; The Inventory Port Number +mtc_agent_port = 2101 ; OBS: ........ Active Controller Maintenance Rx Port +mtc_rx_mgmnt_port = 2101 ; Active Controller Maintenance Mgmnt Network Rx Port +mtc_rx_infra_port = 2111 ; Active Controller Maintenance Infra Network Rx Port +hbs_agent_mgmnt_port = 2103 ; Management Interface Heartbeat Pulse Response Rx Port +hbs_agent_infra_port = 2113 ; Infrastructure Interface Heartbeat Pulse Response Rx Port +infra_agent_port = 2110 ; Agent Command Response RX Port +mtc_to_hbs_cmd_port = 2104 ; Mtc to Hbs Command Port Number +mtc_to_guest_cmd_port = 2108 ; Mtc to guestAgent Command port +hbs_to_mtc_event_port = 2107 ; Hbs to Mtc Event Port Number +inv_event_port = 2112 ; The Inventory Event Port Number + +token_refresh_rate = 1200 ; Authentication token refresh rate in seconds. + ; A value of zero means no refresh. + ; range is 0 or 600-22800 +autorecovery_threshold = 3 ; The number of times maintenance will try to + ; auto recover a critically failed controller + ; while there is no backup controllers to fail + ; over to before giving up. + +api_retries = 10 ; number of API retries b4 failure + +[client] ; Client Configuration +scheduling_priority = 99 ; realtime scheduling; range of 1 .. 99 is acceptable +mtc_rx_mgmnt_port = 2105 ; Client Maintenance Command Rx Port +mtc_rx_infra_port = 2115 ; Client Maintenance Command Rx Port +hbs_client_mgmnt_port = 2106 ; Management Interface Heartbeat Pulse Request Rx Port +hbs_client_infra_port = 2116 ; Infrastructure Interface Heartbeat Pulse Request Rx Port +hwmon_cmd_port = 2114 ; hwmond Command Rx Port Number +pmon_pulse_port = 2109 ; Process Monitor I'm Alive pulse Port Rx Port +rmon_event_port = 2302 ; Resource Monitor Event Port Rx Port +sched_delay_threshold = 300 ; scheduler delay time in msecs that will trigger + ; a scheduler history dump +daemon_log_port = 2121 ; daemon logger port +mtcalarm_req_port = 2122 ; + +[timeouts] ; configurable maintenance timeout values in seconds + +failsafe_shutdown_delay = 120; + +goenabled_timeout = 600 ; Time (seconds) that Mtce waits for + ; a hosts's goenabled message + ; after which it fails the enable. + ; Value boosted from 300 to 600 to handle LO DOR + +host_services_timeout = 300 ; Time (seconds) that the mtcClient waits for + ; all the host services scripts to complete. + ; Introduced in support of new monitored + ; implementation. + +node_reinstall_timeout = 2400 ; Timeout in seconds for a node reinstall. + ; There is no retry mechanism, the admin will be + ; notified that the reinstall failed. + +loc_recovery_timeout = 5 ; Loss Of Communication Recovery Timeout + ; the max number of seconds that a host can be in + ; loss of communication state without failing the unit + +mnfa_recovery_timeout = 60 ; Multi-Node-Failure Avoidance Recovery Threshold + ; Similar to the LOC above for graceful recovery + ; hosts that have LOC for longer than this time in + ; seconds are failed and sent into the enable_handler + ; FSM while those that recover before this period are + ; sent into the graceful recovery_handler FSM. + +dor_mode_timeout = 20 ; The default base time in seconds for how long + ; maintenance DOR mode is active. This number + ; is extended by the number of enabled hosts. +dor_recovery_timeout_ext = 1800 ; Dor timeout extension. An extra time in seconds + ; that is added to the host specific recovery time + ; making the overall host's dor recovery timeout. + +swact_timeout = 120 ; Seconds Mtce waits for HA Service SWACT before failing + ; the swact operation + +sysinv_timeout = 20 ; timeout in seconds for sysinv REST requests +sysinv_noncrit_timeout = 10 ; timeout for non-critical sysinv REST requests +work_queue_timeout = 200 ; time to wait at the end of an action handler for all the + ; posted work items to be completed +uptime_period = 60 ; Mtce refresh of uptime in first hour. Not implemented +online_period = 7 ; Mtce refresh of locked availability status + +insv_test_period = 10 ; Time in seconds between in-service tests +oos_test_period = 300 ; Time in seconds between out-of-service tests + +audit_period = 50 ; Audit period + +[debug] ; SIGHUP to reload +debug_timer = 0 ; enable(1) or disable(0) timer logs (tlog) +debug_json = 0 ; enable(1) or disable(0) Json logs (jlog) +debug_fsm = 0 ; enable(1) or disable(0) fsm logs (flog) +debug_http = 0 ; enable(1) or disable(0) http logs (hlog) +debug_alive = 0 ; enable(1) or disable(0) mtcAlive logs (alog) +debug_bm = 0 ; enable(1) or disable(0) board management (blog) +debug_msg = 0 ; enable(1) or disable(0) message logs (mlog) +debug_state = 0 ; enable(1) or disable(0) state change logs (clog) +debug_work = 0 ; enable(1) or disable(0) work queue trace logs (qlog) +debug_level = 0 ; decimal mask 0..15 (8,4,2,1) and 16 for mem logging +debug_all = 0 ; enable full tracing + +flush = 1 ; enable(1) or disable(0) force log flush (main loop) +flush_thld = 5 ; if enabled - force flush after this number of loops +latency_thld = 500 ; scheduling latency log threshold ; msec +debug_event = none ; string name of HTTP API to trace +debug_filter = none ; filter string (not used yet) + +infra_degrade_only = 0 ; Only degrade for infra failures +testmode = 0 ; set to 1 to enable test mode +testmask = 0 ; bit field of out-of-service stress tests +fit_code = 0 ; codes are in nodeBase.h -> fit_code_enum +fit_host = none ; the hostname to apply the fit to + +stall_pmon_thld = 1250 ; number of missed pmond pulses before we enter monitor mode + ; 1200 pulses * 50 msec select timeout is 60 seconds +stall_poll_period = 20 ; number of seconds between monitor periods + +stall_mon_period = 120 ; consecutive seconds that processes do not show forward + ; progress before we perform recovery action +stall_mon_start_delay = 600 ; number of seconds after the hbsClient is started before + ; the stall monitor will start monitoring +stall_rec_thld = 2 ; How many processes have to be failed for + ; recovery action to take effect + ; A poll period failure for a process is constituted by the + ; inability to get pid, read scheduler stats or the stats + ; not changing during that period + ; 10 of the 11 audits need to fail befor we declare that + ; the process has failed. + ; In this case with 5 monitored processes and this threshold + ; set to 4 then if 4 or more of the 5 processes fail all + ; but 1 of the audits then we take recovery action + +; A list of up to 7 processes to monitor. +; Use 'none' a key word for an unused process timeslot. +mon_process_1 = pmond +mon_process_2 = fsmond +mon_process_3 = none +mon_process_4 = none +mon_process_5 = none +mon_process_6 = none +mon_process_7 = none diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtc.ini b/mtce-common/cgts-mtce-common-1.0/scripts/mtc.ini new file mode 100644 index 00000000..8001e41f --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtc.ini @@ -0,0 +1,14 @@ +; Packstack Managed Maintenance Service Config file +[agent] +keystone_auth_host = localhost ; keystone auth url +keystone_auth_uri = none ; keystone auth uri +multicast = 239.1.1.2 ; Heartbeat Multicast Address +heartbeat_period = 100 ; Heartbeat period in milliseconds +heartbeat_failure_threshold = 10 ; Heartbeat failure threshold count. +heartbeat_degrade_threshold = 6 ; Heartbeat degrade threshold count. + +[timeouts] +compute_boot_timeout = 720 ; The max time (seconds) that Mtce waits for the mtcAlive +controller_boot_timeout = 1200 ; message after which it will time out and fail the host. + + diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtcAgent b/mtce-common/cgts-mtce-common-1.0/scripts/mtcAgent new file mode 100755 index 00000000..6e75ace2 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtcAgent @@ -0,0 +1,521 @@ +#!/bin/sh +# +# Copyright (c) 2013-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# Support: www.windriver.com +# +# Purpose: This resource agent manages +# +# .... the Titanium Cloud Controller Maintenance Daemon +# +# RA Spec: +# +# http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/resource-agent-api.txt?rev=HEAD +# +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + + +####################################################################### + +# Fill in some defaults if no values are specified +OCF_RESKEY_binary_default="mtcAgent" +OCF_RESKEY_config_default="/etc/mtc.ini" +OCF_RESKEY_dbg_default="false" +OCF_RESKEY_logging_default="true" +OCF_RESKEY_mode_default="normal" +OCF_RESKEY_user_default="admin" +OCF_RESKEY_pid_default="/var/run/mtcAgent.pid" +OCF_RESKEY_state_default="standby" + + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_logging=${OCF_RESKEY_logging_default}} +: ${OCF_RESKEY_dbg=${OCF_RESKEY_dbg_default}} +: ${OCF_RESKEY_mode=${OCF_RESKEY_mode_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} +: ${OCF_RESKEY_state=${OCF_RESKEY_state_default}} + +mydaemon="/usr/local/bin/${OCF_RESKEY_binary}" +statusfile="/var/run/${OCF_RESKEY_binary}.info" + +####################################################################### + +usage() { + cat < + + +1.0 + + +This 'mtcAgent' is an OCF Compliant Resource Agent that manages start, stop +and in-service monitoring of the Host Maintenance Process on Wind River's +Titanium Cloud in the active mode. + + + +Manages the Titanium Cloud's Maintenance (mtcAgent) Daemon. + + + + + + + +state = standby ... run maintenance daemon in 'standby' mode (default) +state = active ... run maintenance daemon in 'active' mode + +Maintenance Activity State Option + + + + + +mode = normal ... run maintenance daemon in 'normal' mode (default) +mode = passive ... run maintenance daemon in 'passive' mode + +Maintenance Mode Option + + + + + + +This option is used to direct the mtcAgent dameon log stream. + +logging = true ... /var/log/mtcAgent.log (default) +logging = false ... /dev/null + +See also debug option which sets the verbosity of logging. + +Service Logging Control Option + + + + + + +dbg = false ... info, warn and err logs sent to output stream (default) +dbg = true ... Additional dbg logs are also sent to the output stream + +Service Debug Control Option + + + + + + + + + + + + + + +END + return ${OCF_SUCCESS} +} + +mtcAgent_validate() { + + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "mtcAgent:validate" + fi + + check_binary "/usr/local/bin/${OCF_RESKEY_binary}" + check_binary "/usr/local/bin/hbsAgent" + check_binary "/usr/local/bin/mtcClient" + check_binary "/usr/local/bin/hbsClient" + check_binary sysinv-api + check_binary pidof + + if [ ! -f ${OCF_RESKEY_config} ] ; then + msg="${OCF_RESKEY_binary} ini file missing ${OCF_RESKEY_config}" + ocf_log err "${msg}" + return ${OCF_ERR_CONFIGURED} + fi + + return ${OCF_SUCCESS} +} + +function log_procfs() +{ + pid=`cat ${OCF_RESKEY_pid}` + PROCSCHEDFILE="/proc/$pid/sched" + if [ -r $PROCSCHEDFILE ] ; then + PROCSCHED="$(cat $PROCSCHEDFILE 2>&1)" + echo "$PROCSCHED" | while read line; do + ocf_log info "sched: ${line}" + done + fi + + PROCSTACKFILE="/proc/$pid/stack" + echo "stack file: $PROCSTACKFILE" + if [ -r $PROCSTACKFILE ] ; then + PROCSTACK="$(cat $PROCSTACKFILE 2>&1)" + echo "$PROCSTACK" | while read line; do + ocf_log info "stack: ${line}" + done + fi +} + +# total worst case timeout of this status check is 13 seconds. +# This is 2 seconds under SM's default 15 second timeout. +mtcAgent_status () { + + proc="mtcAgent:status" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "mtcAgent:status" + fi + + # remove the status file before we request a new + rm -f ${statusfile} + + # Verify the pid file exists as part of status + for ((loop=0;loop<3;loop++)) { + if [ -f ${OCF_RESKEY_pid} ] ; then + break + else + sleep 1 + fi + } + + # See if the daemon is running + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + + log_sig="${OCF_RESKEY_binary} In-Service Active Monitor Test" + + # Ask the daemon to produce status + ocf_run kill -s USR1 $pid + + # Wait for the response + for ((loop=0;loop<10;loop++)) { + sleep 1 + if [ -f ${statusfile} ] ; then + + ocf_log info "${log_sig} Passed ($loop)" + return ${OCF_SUCCESS} + + elif [ $loop -eq 5 ] ; then + + # send the signal again + ocf_run kill -s USR1 $pid + + pid_stat=`cat /proc/${pid}/stat` + ocf_log notice "${log_sig} is slow to respond" + ocf_log notice "$pid_stat" + + elif [ $loop -eq 8 ] ; then + + pid_stat=`cat /proc/${pid}/stat` + ocf_log warn "${log_sig} is very slow to respond" + ocf_log warn "$pid_stat" + + fi + } + log_procfs + ocf_log err "${log_sig} Failed" + return ${OCF_ERR_GENERIC} + fi + return ${OCF_NOT_RUNNING} +} + +mtcAgent_monitor () { + + proc="mtcAgent:monitor" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc}" + fi + + # Uncomment if you want the monitor function to force-pass + # return ${OCF_SUCCESS} + + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -ne 0 ] ; then + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc} called while ${OCF_RESKEY_binary} not running." + fi + return ${OCF_NOT_RUNNING} + fi + + mtcAgent_status + return $? +} + + +mtcAgent_start () { + + local rc + + start_proc="mtcAgent:start" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${start_proc}" + fi + + # Uncomment if you want the start function to force-pass without starting + # return ${OCF_SUCCESS} + + # If running then issue a ping test + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + mtcAgent_status + rc=$? + if [ $rc -ne ${OCF_SUCCESS} ] ; then + msg="${start_proc} ping test failed rc=${rc}" + ocf_log err "${msg}" + mtcAgent_stop + else + # Spec says to return success if process is already running for start + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + ocf_log info "${start_proc} called while ${OCF_RESKEY_binary} is already running" + return ${OCF_SUCCESS} + fi + fi + fi + + # should not be running now or error + pid=`cat ${OCF_RESKEY_pid}` + kill -0 $pid 2> /dev/null + if [ $? -eq 0 ] ; then + msg="${start_proc} cannot kill off existing instance of ${OCF_RESKEY_binary}" + ocf_log err "${msg}" + return ${OCF_RUNNING_MASTER} + fi + + rm -f ${statusfile} + + if [ ${OCF_RESKEY_state} = "active" ] ; then + RUN_OPT_STATE="-a" + else + RUN_OPT_STATE="" + fi + + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + RUN_OPT_DEBUG="-d debug" + else + RUN_OPT_DEBUG="" + fi + + if [ ${OCF_RESKEY_mode} = "passive" ] ; then + RUN_OPT_MODE="-p" + else + RUN_OPT_MODE="" + fi + + if [ ${OCF_RESKEY_logging} = "true" ] ; then + RUN_OPT_LOG="-l" + else + RUN_OPT_LOG="" + fi + + # default PID to null + pid="" + + # Try to Start the daemon + ${mydaemon} ${RUN_OPT_STATE} ${RUN_OPT_LOG} ${RUN_OPT_MODE} ${RUN_OPT_DEBUG} + rc=$? + + # verify it was started and set return code appropriately + if [ $rc -eq ${OCF_SUCCESS} ] ; then + # Verify the pid file exists as part of status + for ((loop=0;loop<3;loop++)) { + if [ -f ${OCF_RESKEY_pid} ] ; then + break + else + ocf_log info "${start_proc} waiting ... loop=${loop}" + sleep 1 + fi + } + + pid=`cat ${OCF_RESKEY_pid}` + # ocf_log info "PID:$pid" + kill -0 $pid 2> /dev/null + if [ $? -ne 0 ] ; then + rc=${OCF_FAILED_MASTER} + else + if [ ! -f ${statusfile} ] ; then + ocf_log info "mtcAgent: Startup Health Test Failed - missing info" + rc = ${OCF_ERR_GENERIC} + fi + fi + else + ocf_log info "${start_proc} failed ${mydaemon} daemon rc=${rc}" + rc = ${OCF_ERR_GENERIC} + fi + + # Record success or failure and return status + if [ ${rc} -eq $OCF_SUCCESS ] ; then + msg="${start_proc}ed pid=${pid}" + ocf_log info "${msg}" + else + msg="${start_proc} failed rc=${rc}" + ocf_log err "${msg}" + rc=${OCF_NOT_RUNNING} + fi + return $rc +} + +mtcAgent_confirm_stop () { + + proc="mtcAgent:confirm_stop" + ocf_log info "${proc}" + + pid=`pidof ${OCF_RESKEY_binary}` + kill -0 ${pid} 2> /dev/null + if [ $? -eq 0 ] ; then + ocf_log info "${proc} 'kill -9 ${pid}'" + kill -9 ${pid} + ocf_log info "${proc}ed (by emergency kill -9 ${pid})" + sleep 1 + fi + rm -f ${OCF_RESKEY_pid} +} + +mtcAgent_stop () { + + proc="mtcAgent:stop" + + # See if the process is running by pidfile + + pid=`pidof ${OCF_RESKEY_binary}` + ocf_log info "${proc} PID:${pid}" + kill -0 ${pid} 2> /dev/null + if [ $? -ne 0 ] ; then + ocf_log info "${proc} called while already stopped (no process)" + mtcAgent_confirm_stop + return ${OCF_SUCCESS} + fi + + MAX=3 + for ((loop=0;loop<$MAX;loop++)) { + + # verify stop with pidfile + if [ -f ${OCF_RESKEY_pid} ] ; then + + pid=`cat ${OCF_RESKEY_pid}` + + # if pid file is gone we are done + if [ ${pid} = "" ] ; then + ocf_log info "${proc}ped (by -int)" + break + + # if pidfile is empty then kill by -int + else + + kill -0 ${pid} 2> /dev/null + if [ $? -ne 0 ] ; then + ocf_log info "${proc}ped (by pid)" + break + else + ocf_log info "${proc}ping (by -int - loop:${loop})" + kill -int ${pid} + sleep 1 + fi + fi + fi + } + mtcAgent_confirm_stop + return ${OCF_SUCCESS} +} + +mtcAgent_reload () { + + local rc + + proc="mtcAgent:reload" + if [ ${OCF_RESKEY_dbg} = "true" ] ; then + ocf_log info "${proc}" + fi + + mtcAgent_stop + rc=$? + if [ $rc -eq ${OCF_SUCCESS} ] ; then + #sleep 1 + mtcAgent_start + rc=$? + if [ $rc -eq ${OCF_SUCCESS} ] ; then + msg="${proc}ed" + ocf_log info "${mgs}" + fi + fi + + if [ ${rc} -ne ${OCF_SUCCESS} ] ; then + msg="${OCF_RESKEY_binary}: failed to restart rc=${rc}" + ocf_log info "${mgs}" + fi + + return ${rc} +} + +case ${__OCF_ACTION} in + meta-data) meta_data + exit ${OCF_SUCCESS} + ;; + usage|help) usage + exit ${OCF_SUCCESS} + ;; +esac + +ocf_log info "mtcAgent:${__OCF_ACTION} action" + +# Anything except meta-data and help must pass validation +mtcAgent_validate || exit $? + +case ${__OCF_ACTION} in + start) mtcAgent_start + ;; + stop) mtcAgent_stop + ;; + status) mtcAgent_status + ;; + reload) mtcAgent_reload + ;; + monitor) mtcAgent_monitor + ;; + validate-all) mtcAgent_validate + ;; + *) usage + exit ${OCF_ERR_UNIMPLEMENTED} + ;; +esac diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtcClient b/mtce-common/cgts-mtce-common-1.0/scripts/mtcClient new file mode 100644 index 00000000..5accff32 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtcClient @@ -0,0 +1,118 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 95 95 +# +### BEGIN INIT INFO +# Provides: mtcClient +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Maintenance Client Daemon +### END INIT INFO + +. /etc/init.d/functions + +DAEMON_NAME="mtcClient" +DAEMON="/usr/local/bin/${DAEMON_NAME}" +PIDFILE="/var/run/${DAEMON_NAME}.pid" +PLATFORM_CONF="/etc/platform/platform.conf" + +IFACE="" + +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +if [ ! -e "${DAEMON}" ] ; then + logger "${DAEMON} is missing" + exit ${NOT_INSTALLED} +fi + +# Note: Commenting out this check and replace it with a change to the +# mtcClient service file to have it auto restarted by systemd if it fails. +# This check or recovery action has been seen to stall the startup script +# longer than 10 seconds. +# +# if [ -f ${PLATFORM_CONF} ] ; then +# IFACE=`cat ${PLATFORM_CONF} | grep management_interface | cut -f2 -d'='` +# if [ "${IFACE}" != "" ] ; then +# if ip link show $IFACE | grep -sq 'state DOWN'; then +# ip link set dev $IFACE up +# fi +# fi +# fi + +case "$1" in + start) + logger "Starting ${DAEMON_NAME}" + echo -n "Starting ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo -n "is already running " + RETVAL=0 + else + start-stop-daemon --start -b -x ${DAEMON} -- -l + RETVAL=$? + fi + if [ ${RETVAL} -eq 0 ] ; then + pid=`pidof ${DAEMON_NAME}` + echo "OK" + logger "${DAEMON} (${pid})" + else + echo "FAIL" + RETVAL=${GENERIC_ERROR} + fi + ;; + + stop) + logger "Stopping ${DAEMON_NAME}" + echo -n "Stopping ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + killproc ${DAEMON_NAME} + fi + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo "FAIL" + RETVAL=${NOT_RUNNING} + else + echo "OK" + fi + rm -f ${PIDFILE} + ;; + + restart) + $0 stop + $0 start + ;; + + status) + pid=`pidof ${DAEMON_NAME}` + RETVAL=$? + if [ ${RETVAL} -eq 0 ] ; then + echo "${DAEMON_NAME} is running" + else + echo "${DAEMON_NAME} is NOT running" + RETVAL=${NOT_RUNNING} + fi + ;; + + condrestart) + $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtcClient.conf b/mtce-common/cgts-mtce-common-1.0/scripts/mtcClient.conf new file mode 100644 index 00000000..174bd4c3 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtcClient.conf @@ -0,0 +1,26 @@ +[process] +process = mtcClient +service = mtcClient +pidfile = /var/run/mtcClient.pid +script = /etc/init.d/mtcClient +style = lsb ; ocf or lsb +severity = major ; minor, major, critical +restarts = 1 ; restart retries before error assertion +interval = 1 ; number of seconds to wait between restarts +debounce = 10 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 1 ; Seconds to wait after process start before starting the debounce monitor +mode = active ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring +quorum = 1 ; process is in the host watchdog quorum + +; Active Monitoring Options + +port = 2201 +period = 5 ; monitor period in seconds +timeout = 4 ; Messaging timeout period in seconds, must be shorter than period +threshold = 5 ; Number of back to back heartbeat failures before action + diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtcClient.service b/mtce-common/cgts-mtce-common-1.0/scripts/mtcClient.service new file mode 100644 index 00000000..28c0c4f1 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtcClient.service @@ -0,0 +1,23 @@ +[Unit] +Description=Titanium Cloud Maintenance Command Handler Client +After=network.target syslog.service config.service +Before=pmon.service + +[Service] +Type=forking +ExecStart=/etc/rc.d/init.d/mtcClient start +ExecStop=/etc/rc.d/init.d/mtcClient stop +ExecReload=/etc/rc.d/init.d/mtcClient reload +PIDFile=/var/run/mtcClient.pid +KillMode=process +SendSIGKILL=no + +# Process recovery is handled by pmond if its running. +# Delay 10 seconds to give pmond a chance to recover +# before systemd kicks in to do it as a backup plan. +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target + diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtcTest b/mtce-common/cgts-mtce-common-1.0/scripts/mtcTest new file mode 100644 index 00000000..2bb2805c --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtcTest @@ -0,0 +1,38 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# + +banner="Host Services" + +case "$1" in + start) + logger "${banner} $1: OK" + ;; + + stop) + logger "${banner} $1: OK" + ;; + + restart) + $0 stop + $0 start + ;; + + status) + logger "${banner} $1: OK" + ;; + + condrestart) + $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac +exit 0 diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtce.logrotate b/mtce-common/cgts-mtce-common-1.0/scripts/mtce.logrotate new file mode 100644 index 00000000..17842c8a --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtce.logrotate @@ -0,0 +1,59 @@ +#daily + +# Apply all these options to all the logs +nodateext +start 1 +compress +notifempty +missingok +sharedscripts +postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true +endscript + +/var/log/mtcAgent.log +{ + size 100M + rotate 10 +} + +/var/log/hbsAgent.log +{ + size 20M + rotate 5 +} + +/var/log/mtcClient.log +{ + size 20M + rotate 5 +} + +/var/log/hbsClient.log +{ + size 20M + rotate 5 +} + +/var/log/mtclogd.log +{ + size 10M + rotate 5 +} + +/var/log/mtcAgent_api.log +{ + size 20M + rotate 5 +} + +/var/log/mtcAgent_event.log +{ + size 20M + rotate 5 +} +/var/log/mtcAgent_alarm.log +{ + size 10M + rotate 5 +} diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtcinit b/mtce-common/cgts-mtce-common-1.0/scripts/mtcinit new file mode 100755 index 00000000..8ff9bbb5 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtcinit @@ -0,0 +1,129 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + + +. /etc/init.d/functions + +MTCCLIENT_NAME="mtcClient" +MTCCLIENT="/usr/local/bin/${MTCCLIENT_NAME}" + +HBSCLIENT_NAME="hbsClient" +HBSCLIENT="/usr/local/bin/${HBSCLIENT_NAME}" + +IFACE="" + +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +if [ ! -e "$MTCCLIENT" ] ; then + logger "$MTCCLIENT is missing" + exit ${NOT_INSTALLED} +fi + +if [ ! -e "$HBSCLIENT" ] ; then + logger "$HBSCLIENT is missing" + exit ${NOT_INSTALLED} +fi + + + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +case "$1" in + start) + # The mtcClient runs on all nodes + echo -n "Starting ${MTCCLIENT_NAME}: " + if [ -n "`pidof ${MTCCLIENT_NAME}`" ] ; then + echo -n "is already running " + RETVAL=0 + else + start-stop-daemon --start -b -x ${MTCCLIENT} -- -l ${IFACE} + RETVAL=$? + fi + if [ ${RETVAL} -eq 0 ] ; then + pid=`pidof ${MTCCLIENT_NAME}` + echo "OK" + logger "${MTCCLIENT} (${pid})" + else + echo "FAIL" + RETVAL=${GENERIC_ERROR} + fi + + # The heartbeat client runs on all nodes at nice -11 + echo -n "Starting ${HBSCLIENT_NAME}: " + if [ -n "`pidof ${HBSCLIENT_NAME}`" ] ; then + echo -n "is already running " + RETVAL=0 + else + start-stop-daemon --start -N -11 -b -x ${HBSCLIENT} -- -l ${IFACE} + RETVAL=$? + fi + if [ ${RETVAL} -eq 0 ] ; then + pid=`pidof ${HBSCLIENT_NAME}` + echo "OK" + logger "${HBSCLIENT} (${pid})" + else + echo "FAIL" + RETVAL=${GENERIC_ERROR} + fi + ;; + + stop) + echo " " + echo -n "Stopping ${MTCCLIENT_NAME}: " + if [ -n "`pidof ${MTCCLIENT_NAME}`" ] ; then + killproc ${MTCCLIENT_NAME} + fi + if [ -n "`pidof ${MTCCLIENT_NAME}`" ] ; then + echo "FAIL" + RETVAL=${NOT_RUNNING} + else + echo "OK" + fi + rm -f /var/run/${MTCCLIENT_NAME}.pid + + + echo -n "Stopping ${HBSCLIENT_NAME}: " + if [ -n "`pidof ${HBSCLIENT_NAME}`" ] ; then + killproc ${HBSCLIENT_NAME} + fi + if [ -n "`pidof ${HBSCLIENT_NAME}`" ] ; then + echo "FAIL" + RETVAL=${NOT_RUNNING} + else + echo "OK" + fi + rm -f /var/run/${HBSCLIENT_NAME}.pid + + ;; + + restart) + $0 stop + sleep 1 + $0 start + ;; + + status) + RETVAL=0 + ;; + + condrestart) + $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit $RETVAL diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtclog b/mtce-common/cgts-mtce-common-1.0/scripts/mtclog new file mode 100644 index 00000000..ba102f90 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtclog @@ -0,0 +1,94 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# This script starts and stops a mtclogd Daemon process + +. /etc/init.d/functions + +DAEMON_NAME="mtclogd" +DAEMON="/usr/local/bin/${DAEMON_NAME}" +PIDFILE="/var/run/${DAEMON_NAME}.pid" +PLATFORM_CONF="/etc/platform/platform.conf" + +# Linux Standard Base (LSB) Error Codes +RETVAL=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 +UNSUPPORTED_FEATURE=3 +NOT_INSTALLED=5 +NOT_RUNNING=7 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +if [ ! -e "${DAEMON}" ] ; then + logger "${DAEMON} is missing" + exit ${NOT_INSTALLED} +fi + +case "$1" in + start) + logger "Starting ${DAEMON_NAME}" + echo -n "Starting ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo -n "is already running " + RETVAL=0 + else + start-stop-daemon --start -b -x ${DAEMON} -- -l + RETVAL=$? + fi + if [ ${RETVAL} -eq 0 ] ; then + pid=`pidof ${DAEMON_NAME}` + echo "OK" + logger "${DAEMON} (${pid})" + else + echo "FAIL" + RETVAL=${GENERIC_ERROR} + fi + ;; + + stop) + logger "Stopping ${DAEMON_NAME}" + echo -n "Stopping ${DAEMON_NAME}: " + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + killproc ${DAEMON_NAME} + fi + if [ -n "`pidof ${DAEMON_NAME}`" ] ; then + echo "FAIL" + RETVAL=${NOT_RUNNING} + else + echo "OK" + fi + rm -f ${PIDFILE} + ;; + + restart) + $0 stop + $0 start + ;; + + status) + pid=`pidof ${DAEMON_NAME}` + RETVAL=$? + if [ ${RETVAL} -eq 0 ] ; then + echo "${DAEMON_NAME} is running" + else + echo "${DAEMON_NAME} is NOT running" + RETVAL=${NOT_RUNNING} + fi + ;; + + condrestart) + $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit ${RETVAL} diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtclog.service b/mtce-common/cgts-mtce-common-1.0/scripts/mtclog.service new file mode 100644 index 00000000..c7ec2e0c --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtclog.service @@ -0,0 +1,23 @@ +[Unit] +Description=Titanium Cloud Maintenance Logger +After=network.target syslog.service config.service +Before=pmon.service + +[Service] +Type=forking +ExecStart=/etc/rc.d/init.d/mtclog start +ExecStop=/etc/rc.d/init.d/mtclog stop +ExecReload=/etc/rc.d/init.d/mtclog reload +PIDFile=/var/run/mtclogd.pid + +# Failure handling +TimeoutStartSec=10s +TimeoutStopSec=10s + +# process recovery is handled by pmond +Restart=no +RestartSec=5 + +[Install] +WantedBy=multi-user.target + diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/mtclogd.conf b/mtce-common/cgts-mtce-common-1.0/scripts/mtclogd.conf new file mode 100755 index 00000000..1ee07e66 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/mtclogd.conf @@ -0,0 +1,17 @@ +[process] +process = mtclogd +service = mtclog +pidfile = /var/run/mtclogd.pid +script = /etc/init.d/mtclog +style = lsb ; ocf or lsb +severity = minor ; minor, major, critical +restarts = 3 ; restart retries before error assertion +interval = 1 ; number of seconds to wait between restarts +debounce = 3 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 1 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/runservices b/mtce-common/cgts-mtce-common-1.0/scripts/runservices new file mode 100755 index 00000000..b6708467 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/runservices @@ -0,0 +1,189 @@ +#!/bin/bash +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 98 2 +# +### BEGIN INIT INFO +# Provides: runservices +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Maintenance "Run Host Services" script +### END INIT INFO + +RUNSERVICES_PATH=${RUNSERVICES_PATH:-"/etc/services.d"} +RUNSERVICES_FILE=${RUNSERVICES_FILE:-"/var/run/runservices"} +RUNSERVICES_LOG=${RUNSERVICES_LOG:-"/var/log/runservices.log"} +RUNSERVICES_TAG=${RUNSERVICES_TAG:-"RUNSERVICES"} + +RETVAL=0 + +################################################################################ +# Log message to syslog +################################################################################ +function log +{ + logger -t ${RUNSERVICES_TAG} $@ +} + +################################################################################ +# Utility function to print the status of a command result +################################################################################ +function print_status() +{ + if [ "$1" -eq "0" ]; then + echo "[ OK ]" + else + echo "[FAILED]" + fi +} + +################################################################################ +# Run runservices scripts to check system status +################################################################################ +function runservices() +{ + if [ -d ${RUNSERVICES_PATH} ]; then + run-parts ${RUNSERVICES_PATH} -a ${1} 2>&1 | logger -t ${RUNSERVICES_TAG} + RET=${PIPESTATUS[0]} + if [ ${RET} -ne 0 ]; then + return ${RET} + fi + fi + + return 0 +} + + +################################################################################ +# Write runservices state file +################################################################################ +function runservices_enable() +{ + echo "`date`: `hostname` : All host services passed !" > ${RUNSERVICES_FILE} + RET=$? + if [ ${RET} -ne 0 ]; then + log "Failed to write state file ${RUNSERVICES_FILE}" + return ${RET} + fi + + log "enabled" + + return 0 +} + + +################################################################################ +# Remove runservices state file +################################################################################ +function runservices_disable() +{ + rm -f ${RUNSERVICES_FILE} + RET=$? + if [ ${RET} -ne 0 ]; then + log "Failed to remove state file ${RUNSERVICES_FILE}" + return ${RET} + fi + + log "disabled" + + return 0 +} + +################################################################################ +# Start Action +################################################################################ +function start() +{ + echo -n "Starting Host Services: " + + runservices "start" + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + log "Run Services check failed" + print_status $RETVAL + return + fi + + runservices_enable + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + log "One or more Host Services failed" + print_status $RETVAL + return + fi + + print_status $RETVAL +} + +################################################################################ +# Stop Action +################################################################################ +function stop() +{ + echo -n "Stopping Host Services: " + + runservices "stop" + + runservices_disable + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + log "Run Services stop failed" + print_status $RETVAL + return + fi + + print_status $RETVAL +} + +################################################################################ +# Status Action +################################################################################ +function status() +{ + echo -n "Checking runservices: " + + runservices_check + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + print_status $RETVAL + return + fi + + print_status $RETVAL +} + + +################################################################################ +# Main Entry +################################################################################ + +case "$1" in + start) + start + ;; + + stop) + stop + ;; + + restart) + stop + start + ;; + + status) + status + ;; + + *) + echo "usage: $0 { start | stop | status | restart }" + exit 1 + ;; +esac + +exit $RETVAL diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/runservices.service b/mtce-common/cgts-mtce-common-1.0/scripts/runservices.service new file mode 100644 index 00000000..f29a5b32 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/runservices.service @@ -0,0 +1,12 @@ +[Unit] +Description=Titanium Cloud Run Host Services +After=network.target syslog.service + +[Service] +Type=simple +ExecStart=/etc/rc.d/init.d/runservices start +ExecStop=/etc/rc.d/init.d/runservices stop + +[Install] +WantedBy=multi-user.target + diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/sched_trace b/mtce-common/cgts-mtce-common-1.0/scripts/sched_trace new file mode 100644 index 00000000..3e8e0be1 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/sched_trace @@ -0,0 +1,54 @@ +#! /bin/sh +# +# Copyright (c) 2014 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 95 95 +# +### BEGIN INIT INFO +# Provides: scheduler ftrace +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Start stop scheduler trace on cpu0 +### END INIT INFO + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + + +case "$1" in + start) + # Seems to default to enabled already, but enable it anyway. + echo 1 > /proc/sys/kernel/ftrace_enabled + + cd /sys/kernel/debug/tracing + + # This file doesn't exist in newer kernels. + echo 1 > tracing_enabled + + # Enable the nop tracer. + echo nop > current_tracer + + # Enable the ones we want. + echo "sched_switch sched_wakeup sched_wakeup_new" >> set_event + + # Limit tracing to cpu0 + echo 1 > tracing_cpumask + + # Enable tracing + echo 1 > tracing_on + ;; + + stop) + # Disable tracing + echo 0 > tracing_on + + ;; + + *) + echo "usage: $0 { start | stop }" + ;; +esac diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/sensor_hp360_v1_ilo_v4.profile b/mtce-common/cgts-mtce-common-1.0/scripts/sensor_hp360_v1_ilo_v4.profile new file mode 100644 index 00000000..e655af66 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/sensor_hp360_v1_ilo_v4.profile @@ -0,0 +1,267 @@ +################################################################################### +# +# This file contains the sensor profile for the following board +# +# "ProLiant DL360 Gen9" with "iLO 4 Standard" +# +################################################################################### +# +# Please refer to sensor_integration_profile.README for a detailed +# explaination of the format and heirarchy of this file and how to +# develop a file like this for sensor integration for new servers. +# +#################################################################################### + +[SERVER] + +targets = Targets:system1:map1 +name_cmd = show /system1 name +bmc_cmd = show /system1 name +name = ProLiant DL360 Gen9 +bmc = iLO 4 Standard +info = show /system1 name +dump_cmd = show /system1 name # show -a + + +group_types = COOLING,POWER,TEMPERATURE + +[LOGS] +show_brief = show /system1/log1 +show_detail = show -a /system1/log1 + +[INFO] +cmds = 7 +cmd1 = show /system1/bootconfig1 +cmd2 = show -a /system1/bootconfig1 +cmd3 = show -a /system1/firmware1 +cmd4 = show -a /system1/memory* +cmd5 = show -a /system1/swid* +cmd6 = show -a /system1/cpu* +cmd7 = show -a /system1/slot* + +[CMDS] +reset = reset /system1 +poweron = start /system1 +poweroff = stop /system1 +powerquery = show /system1 + + +################################################################################## +# +# COOLING sensor management +# ----------------------------- +# +################################################################################## + +[COOLING] +groups = FANS1 + +[FANS1] +group = server fans + +# sensor attributes for this group +sensortype = fan +datatype = discrete +interval = 100 ; seconds +unit = RPM + +cmd = show -a /system1/fan* + +ignore = na +pass = ok +minor = na +major = Bad +critical = critical + +#detail = show -a /system1/fan* +#health_label = HealthState +#status_label = OperationalStatus + +health = HealthState:Ok,na,Bad,Failed +status = OperationalStatus:Ok,na,Bad,Failed + +sensors = fan1,fan2,fan3,fan4,fan5,fan6,fan7,fan8 + +[fan1] +name = Fan Block 1 +cmd = show /system1/fan1 +[fan2] +name = Fan Block 2 +cmd = show /system1/fan2 +[fan3] +name = Fan Block 3 +cmd = show /system1/fan3 +[fan4] +name = Fan Block 4 +cmd = show /system1/fan4 +[fan5] +name = Fan Block 5 +cmd = show /system1/fan5 +[fan6] +name = Fan Block 6 +cmd = show /system1/fan6 +[fan7] +name = Fan Block 7 +cmd = show /system1/fan7 +[fan8] +name = Fan Block 8 +cmd = show /system1/fan8 + +################################################################################## +# +# TEMPERATURE sensor management +# ----------------------------- +# +################################################################################## + +[TEMPERATURE] +groups = TEMPERATURE1 + +[TEMPERATURE1] +group = server temperature + +cmd = show -a /system1/sensor* + +sensortype = temperature +datatype = analog +interval = 30 ; seconds +unit = Celsius + +health = HealthState:Ok,na,Bad,Failed +status = OperationalStatus:Ok,na,Bad,Failed + +sensors = sensor1,sensor2,sensor3,sensor4,sensor5,sensor6,sensor7,sensor8,sensor9,sensor10,sensor11,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,sensor22,sensor23,sensor24,sensor25,sensor26,sensor27,sensor28,sensor29,sensor30,sensor31,sensor32,sensor33,sensor34,sensor35,sensor36,sensor37,sensor38,sensor39,sensor40,sensor41,sensor42 + +upper_minor = 38 +upper_major = 42 +upper_fatal = 46 + +[sensor1] +name = 01-Inlet Ambient +[sensor2] +name = 02-CPU 1 +[sensor3] +name = 03-CPU 2 +[sensor4] +name = 04-P1 DIMM 1-6 +[sensor5] +name = 05-P1 DIMM 7-12 +[sensor6] +name = 06-P2 DIMM 1-6 +[sensor7] +name = 07-P2 DIMM 7-12 +[sensor8] +name = 08-P1 Mem Zone +[sensor9] +name = 09-P1 Mem Zone +[sensor10] +name = 10-P2 Mem Zone +[sensor11] +name = 11-P2 Mem Zone +[sensor12] +name = 12-HD Max +[sensor13] +name = 13-Chipset 1 +[sensor14] +name = 14-Chipset1 Zone +[sensor15] +name = 15-P/S 1 Inlet +[sensor16] +name = 16-P/S 1 Zone +[sensor17] +name = 17-P/S 2 Inlet +[sensor18] +name = 18-P/S 2 Zone +[sensor19] +name = 19-PCI #1 +[sensor20] +name = 20-PCI #2 +[sensor21] +name = 21-VR P1 +[sensor22] +name = 22-VR P2 +[sensor23] +name = 23-VR P1 Mem +[sensor24] +name = 24-VR P1 Mem +[sensor25] +name = 25-VR P2 Mem +[sensor26] +name = 26-VR P2 Mem +[sensor27] +name = 27-VR P1Mem Zone +[sensor28] +name = 28-VR P1Mem Zone +[sensor29] +name = 29-VR P2Mem Zone +[sensor30] +name = 30-VR P2Mem Zone +[sensor31] +name = 31-HD Controller +[sensor32] +name = 32-HD Cntlr Zone +[sensor33] +name = 33-PCI 1 Zone +[sensor34] +name = 34-PCI 1 Zone +[sensor35] +name = 35-LOM Card +[sensor36] +name = 36-PCI 2 Zone +[sensor37] +name = 37-System Board +[sensor38] +name = 38-System Board +[sensor39] +name = 39-Sys Exhaust +[sensor40] +name = 40-Sys Exhaust +[sensor41] +name = 41-Sys Exhaust +[sensor42] +name = 42-SuperCAP Max + +################################################################################## +# +# POWER sensor management +# ----------------------- +# +################################################################################## + +[POWER] + +groups = POWER1 + +[POWER1] + +group = server power + +sensortype = power +datatype = discrete +interval = 100 ; seconds + + +# rule = (?:ElementName| +cmd = show -a /system1/powersupply* + +ignore = na +pass = ok +minor = na +major = Bad +critical = Failed + +health = HealthState:Good,na,Bad,Failed +status = OperationalStatus:Ok,na,Bad,Failed + +sensors = powersupply1,powersupply2 + +[powersupply1] +key = powersupply1 +cmd = show /system1/powersupply1 +name = Power Supply + +[powersupply2] +key = powersupply2 +cmd = show /system1/powersupply2 +name = Power Supply +# 380 name = System diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/sensor_hp380_v1_ilo_v4.profile b/mtce-common/cgts-mtce-common-1.0/scripts/sensor_hp380_v1_ilo_v4.profile new file mode 100644 index 00000000..0b23d3e5 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/sensor_hp380_v1_ilo_v4.profile @@ -0,0 +1,265 @@ +################################################################################### +# +# This file contains the sensor profile for the following board +# +# "ProLiant DL380 Gen9" with "iLO 4 Standard" +# +################################################################################### +# +# Please refer to sensor_integration_profile.README for a detailed +# explaination of the format and heirarchy of this file and how to +# develop a file like this for sensor integration for new servers. +# +#################################################################################### + +[SERVER] + +targets = Targets:system1:map1 +name_cmd = show /system1 name +bmc_cmd = show /system1 name +name = ProLiant DL380 Gen9 +bmc = iLO 4 Standard +info = show /system1 name +dump_cmd = show /system1 name # show -a + +group_types = COOLING,POWER,TEMPERATURE + +[LOGS] +show_brief = show /system1/log1 +show_detail = show -a /system1/log1 + +[INFO] +cmds = 7 +cmd1 = show /system1/bootconfig1 +cmd2 = show -a /system1/bootconfig1 +cmd3 = show -a /system1/firmware1 +cmd4 = show -a /system1/memory* +cmd5 = show -a /system1/swid* +cmd6 = show -a /system1/cpu* +cmd7 = show -a /system1/slot* + +[CMDS] +reset = reset /system1 +poweron = start /system1 +poweroff = stop /system1 +powerquery = show /system1 + +################################################################################## +# +# COOLING sensor management +# ----------------------------- +# +################################################################################## + +[COOLING] +groups = FANS1 + +[FANS1] +group = server fans + +# sensor attributes for this group +sensortype = fan +datatype = discrete +interval = 100 ; seconds +unit = RPM + +cmd = show -a /system1/fan* + +ignore = na +pass = ok +minor = na +major = Bad +critical = critical + +#detail = show -a /system1/fan* +#health_label = HealthState +#status_label = OperationalStatus + +health = HealthState:Ok,na,Bad,Failed +status = OperationalStatus:Ok,na,Bad,Failed + +sensors = fan1,fan2,fan3,fan4,fan5,fan6,fan7,fan8 + +[fan1] +name = Fan Block 1 +cmd = show /system1/fan1 +[fan2] +name = Fan Block 2 +cmd = show /system1/fan2 +[fan3] +name = Fan Block 3 +cmd = show /system1/fan3 +[fan4] +name = Fan Block 4 +cmd = show /system1/fan4 +[fan5] +name = Fan Block 5 +cmd = show /system1/fan5 +[fan6] +name = Fan Block 6 +cmd = show /system1/fan6 +[fan7] +name = Fan Block 7 +cmd = show /system1/fan7 +[fan8] +name = Fan Block 8 +cmd = show /system1/fan8 + +################################################################################## +# +# TEMPERATURE sensor management +# ----------------------------- +# +################################################################################## + +[TEMPERATURE] +groups = TEMPERATURE1 + +[TEMPERATURE1] +group = server temperature + +cmd = show -a /system1/sensor* + +sensortype = temperature +datatype = analog +interval = 30 ; seconds +unit = Celsius + +health = HealthState:Ok,na,Bad,Failed +status = OperationalStatus:Ok,na,Bad,Failed + +sensors = sensor1,sensor2,sensor3,sensor4,sensor5,sensor6,sensor7,sensor8,sensor9,sensor10,sensor11,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,sensor22,sensor23,sensor24,sensor25,sensor26,sensor27,sensor28,sensor29,sensor30,sensor31,sensor32,sensor33,sensor34,sensor35,sensor36,sensor37,sensor38,sensor39,sensor40,sensor41,sensor42 + +upper_minor = 38 +upper_major = 42 +upper_fatal = 46 + +[sensor1] +name = 01-Inlet Ambient +[sensor2] +name = 02-CPU 1 +[sensor3] +name = 03-CPU 2 +[sensor4] +name = 04-P1 DIMM 1-6 +[sensor5] +name = 05-P1 DIMM 7-12 +[sensor6] +name = 06-P2 DIMM 1-6 +[sensor7] +name = 07-P2 DIMM 7-12 +[sensor8] +name = 08-P1 Mem Zone +[sensor9] +name = 09-P1 Mem Zone +[sensor10] +name = 10-P2 Mem Zone +[sensor11] +name = 11-P2 Mem Zone +[sensor12] +name = 12-HD Max +[sensor13] +name = 13-Chipset 1 +[sensor14] +name = 14-Chipset1 Zone +[sensor15] +name = 15-P/S 1 Inlet +[sensor16] +name = 16-P/S 1 Zone +[sensor17] +name = 17-P/S 2 Inlet +[sensor18] +name = 18-P/S 2 Zone +[sensor19] +name = 19-PCI #1 +[sensor20] +name = 20-PCI #2 +[sensor21] +name = 21-VR P1 +[sensor22] +name = 22-VR P2 +[sensor23] +name = 23-VR P1 Mem +[sensor24] +name = 24-VR P1 Mem +[sensor25] +name = 25-VR P2 Mem +[sensor26] +name = 26-VR P2 Mem +[sensor27] +name = 27-VR P1Mem Zone +[sensor28] +name = 28-VR P1Mem Zone +[sensor29] +name = 29-VR P2Mem Zone +[sensor30] +name = 30-VR P2Mem Zone +[sensor31] +name = 31-HD Controller +[sensor32] +name = 32-HD Cntlr Zone +[sensor33] +name = 33-PCI 1 Zone +[sensor34] +name = 34-PCI 1 Zone +[sensor35] +name = 35-LOM Card +[sensor36] +name = 36-PCI 2 Zone +[sensor37] +name = 37-System Board +[sensor38] +name = 38-System Board +[sensor39] +name = 39-Sys Exhaust +[sensor40] +name = 40-Sys Exhaust +[sensor41] +name = 41-Sys Exhaust +[sensor42] +name = 42-SuperCAP Max + +################################################################################## +# +# POWER sensor management +# ----------------------- +# +################################################################################## + +[POWER] + +groups = POWER1 + +[POWER1] + +group = server power + +sensortype = power +datatype = discrete +interval = 100 ; seconds + + +# rule = (?:ElementName| +cmd = show -a /system1/powersupply* + +ignore = na +pass = ok +minor = na +major = Bad +critical = Failed + +health = HealthState:Good,na,Bad,Failed +status = OperationalStatus:Ok,na,Bad,Failed + +sensors = powersupply1,powersupply2 + +[powersupply1] +key = powersupply1 +cmd = show /system1/powersupply1 +name = Power Supply + +[powersupply2] +key = powersupply2 +cmd = show /system1/powersupply2 +name = Power Supply +# 380 name = System diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/sensor_integration_profile.README b/mtce-common/cgts-mtce-common-1.0/scripts/sensor_integration_profile.README new file mode 100644 index 00000000..55c181ea --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/sensor_integration_profile.README @@ -0,0 +1,456 @@ +################################################################################# +# +# This is a sensor profile file for the following sever +# +# All commands are SMASH CLP commands +# +# The SMASH CLP Spec is here - http:/dmtf.org/standards/smash +# +################################################################################# + +[SERVER] + +targets = Targets:SP:SYS +name_cmd = show /SYS +bmc_cmd = show /SP +name = Quanta Computer +bmc = Quanta(TM) Embedded Lights Out Manager +info = show /SYS +dump_cmd = show /SP ; there is no dump, this just prints the bmc version + +################################################################################# +# +# A sensor profile file is ini/config format that organizes sensors in groups +# using a heirarchy of [labels] and 'variables = value' +# +# [SERVER] +# targets +# name_cmd +# bmc_cmd +# name +# bmc +# dump_cmd +# group_types +# +## Detailed Explaination ... +# +# [SERVER] +# dump_cmd = command to read all the sensors and server info +# name_cmd = command to get the server name +# bmc_cmd = command to get the bmc name +# name = 'string name for the server' ; server name if available +# bmc = 'string name for the bmc' ; a string that can be queried from +# the bmc using the show command. +# targets = Targets:system1:map1 ; a list of SMASH command targets +# ; delimited by : +# +# Example: show +# +# Targets" +# system1" +# map1" +# Properties" +# Verbs" +# cd version exit show" +# +# group_types = COOLING,POWER,VOLTAGE,TEMPERATURE ; comma delimited list of sensor type labels +# +# Generic commands used to pull data from the BMC +# +# [LOGS] - read the SEL +# +# [CMDS] - reset, power on and power off commands +# +# reset = reset /SYS +# poweron = start /SYS +# poweroff = stop /SYS +# +# [INFO] - log board info like firmware versions, switch settings etc. +# +# [FIT] - force failure of a specific sensor +# +# hostname = compute-1 +# sensor = fan1 +# severity = critical +# +############################################################################### +# +# Next Level of Heirarch is each label in the group_types. 3 Examples are used +# +# [COOLING] ; [SERVER]: +# +# groups = FANS1,FANS2 ; Here we have 2 group types FANS1 and FAN2 +# +# [FANS1] +# +# group = server fans ; the group name that shows in the GUI +# cmd = show -a /system1/fan* ; the smash command that reads the sensor +# sensortype = power ; or fan,temperature,voltage,etc +# datatype = discrete ; discrete or analog +# interval = 100 ; seconds +# sensors = FAN1,FAN2,FAN3 ; abstract list of labels; one for each sensor +# +# [FAN1] +# name = +# cmd = +# +# [FAN2] +# name = +# cmd = +# +# [FAN3] +# name = +# cmd = +# +# [FANS2] +# ... another group with same format as FANS1 +# +# +## sensor attributes for this group + +# +# +# +# +#### Next Group Type +# +# [POWER] +# groups +# +# sensors +# [VOLTAGE] +# List of sensor types. +# +# Each group type contains one or more groups and one or more sensors. +# +# Here is the heirarchy model +# - targets - a key word list of targets delimited by ':' +# - name - string read from the target that represents the server +# - bmc - string read from the target that represents the bmc +# - group_types[i] - list of functional group types +# - groups[j] - list of sensor groups within this type +# - group_name - list of sensor groups within this type +# - sensors[k] - list of sensors within this group +# +# Other key labels include +# +# +################################################################################# + +# This file contains the sensor profile for the following SERVER name + +group_types = COOLING,POWER,TEMPERATURE,VOLTAGE + +# show commands for brief and detailed log dumps (if applicable) +[LOGS] +show_brief = na +show_detail = na + +# Show commands for server information (if applicable) +[INFO] +cmds = 0 + +[CMDS] +reset = reset /SYS +poweron = start /SYS +poweroff = stop /SYS + +[FIT] +hostname = none +sensor = none +severity = none + +################################################################################## +# Server Power +################################################################################## +[POWER] +# abstract labels for each group if this group type +groups = POWER1 + +[POWER1] +# The name of the group that will show up in the GUI +group = server power + +# sensor attributes for this group +sensortype = power +datatype = discrete +interval = 100 ; seconds + +# the command that will read the sensors in this group +cmd = show /SYS/powerSupply + +# list of abstract labels for the sensors in this group +sensors = PSU12,PSU1,PSU2 + +# the individual sensors in this group +[PSU12] +name = PSU Redundancy + +[PSU1] +name = PSU1 Status + +[PSU2] +name = PSU2 Status + +################################################################################## +# FAN sensor management +################################################################################## +[COOLING] +# abstract labels for each group if this group type +groups = FANS1,FANS2 + +[FANS1] +# The name of the group that will show up in the GUI +group = server fans + +# sensor attributes for this group +sensortype = fan +datatype = discrete +interval = 120 ; seconds + +# the commands that will read the sensors in this group +cmd = show /SYS/fan + +# TODO: the sensor reading rules +ignore = na +pass = ok +minor = na +major = nonCritical +critical = critical + +# status output delimiter +delimitor = = + +# list of abstract labels for the sensors in this group +sensors = FANS1_1,FANS1_2,FANS1_3,FANS1_4,FANS1_5,FANS1_6,FANS1_7,FANS1_8,FANS1_9,FANS1_10,FANS1_11,FANS1_12 + +# the individual sensors in this group +[FANS1_1] +name = Fan_SYS0_1 + +[FANS1_2] +name = Fan_SYS0_2 + +[FANS1_3] +name = Fan_SYS1_1 + +[FANS1_4] +name = Fan_SYS1_2 + +[FANS1_5] +name = Fan_SYS2_1 +parser_ut = + +[FANS1_6] +name = Fan_SYS2_2 + +[FANS1_7] +name = Fan_SYS3_1 + +[FANS1_8] +name = Fan_SYS3_2 + +[FANS1_9] +name = Fan_SYS4_1 + +[FANS1_10] +name = Fan_SYS4_2 + +[FANS1_11] +name = Fan_SYS5_1 + +[FANS1_12] +name = Fan_SYS5_2 + + +############################## +# FANS2 Grouping +############################## +[FANS2] +# The name of the group that will show up in the GUI +group = power supply fans + +# sensor attributes for this group +sensortype = fan +datatype = discrete +interval = 120 ; seconds + +# the commands that will read the sensors in this group +cmd = show /SYS/fan + +# TODO: the sensor reading rules +ignore = na +pass = ok +minor = na +major = nonCritical +critical = critical + +# Status output delimiter +delimitor = = + +# list of abstract labels for the sensors in this group +sensors = FANS2_1,FANS2_2 + +# the individual sensors in this group +[FANS2_1] +name = Fan_PSU1 + +[FANS2_2] +name = Fan_PSU2 + +################################################################################## +# +# TEMPERATURE sensor management +# ----------------------------- +# +# sensors .. The list of sensor labels in this group delimited by space. +# Each sensor has its own label for unique name and thresholds +# +################################################################################## +[TEMPERATURE] +# abstract labels for each group if this group type +groups = TEMPERATURE1 + +# TEMPERATURE sensor group 1 (sensors that have similar attributes) +[TEMPERATURE1] + +# The name of the group that will show up in the GUI +group = server temperature + +# sensor attributes for this group +sensortype = temperature +datatype = discrete +interval = 34 ; seconds +unit = Celsius + +# the commands that will read the sensors in this group +cmd = show /SYS/temperature + +# Status output delimitor +delimitor = = + +# TODO: the sensor reading rules +ignore = na +pass = ok +minor = na +major = nonCritical +critical = critical + +# list of abstract labels for the sensors in this group +sensors = TEMP1,TEMP2,TEMP3,TEMP4,TEMP5,TEMP6,TEMP7,TEMP8,TEMP9,TEMP10,TEMP11,TEMP12,TEMP13,TEMP14,TEMP15,TEMP16,TEMP17,TEMP18,TEMP19,TEMP20,TEMP21,TEMP22,TEMP23,TEMP24 + +# the individual sensors in this group +[TEMP1] +name = PCH Thermal Trip +[TEMP2] +name = MB Thermal Trip +[TEMP3] +name = Temp_CPU0 +[TEMP4] +name = Temp_CPU1 +[TEMP5] +name = Temp_VR_CPU0 +[TEMP6] +name = Temp_VR_CPU1 +[TEMP7] +name = Temp_DIMM_AB +[TEMP8] +name = Temp_DIMM_CD +[TEMP9] +name = Temp_DIMM_EF +[TEMP10] +name = Temp_DIMM_GH +[TEMP11] +name = Temp_VR_DIMM_AB +[TEMP12] +name = Temp_VR_DIMM_CD +[TEMP13] +name = Temp_VR_DIMM_EF +[TEMP14] +name = Temp_VR_DIMM_GH +[TEMP15] +name = Temp_Ambient_FP +[TEMP16] +name = Temp_PCI_Area +[TEMP17] +name = Temp_PCI_Inlet1 +[TEMP18] +name = Temp_PCI_Inlet2 +[TEMP19] +name = Temp_PCH +[TEMP20] +name = Temp_Outlet +[TEMP21] +name = Temp_HBA_LSI +[TEMP22] +name = Temp_OCP +[TEMP23] +name = Temp_PSU1 +[TEMP24] +name = Temp_PSU2 + +################################################################################## +# +# VOLTAGE sensor management +# ----------------------------- +# +# groups ... Allows management of voltage sensors by group with similar +# characteristics. +# +################################################################################## + +[VOLTAGE] +# abstract labels for each group if this group type +groups = VOLTAGE1 + +[VOLTAGE1] +# The name of the group that will show up in the GUI +group = server voltage + +# sensor attributes for this group +sensortype = voltage +datatype = discrete +interval = 300 ; seconds + +# the commands that will read the sensors in this group +cmd = show /SYS/voltage + +# TODO: the sensor reading rules +ignore = na +pass = ok +minor = na +major = nonCritical +critical = critical + +# list of abstract labels for the sensors in this group +sensors = VOLT1,VOLT2,VOLT3,VOLT4,VOLT5,VOLT6,VOLT7,VOLT8,VOLT9,VOLT10,VOLT11,VOLT12,VOLT13,VOLT14 + +# the individual sensors in this group +[VOLT1] +name = Volt_VR_CPU0 +[VOLT2] +name = Volt_VR_CPU1 +[VOLT3] +name = Volt_P5V +[VOLT4] +name = Volt_P5V_AUX +[VOLT5] +name = Volt_P3V3 +[VOLT6] +name = Volt_P1V05 +[VOLT7] +name = Volt_P1V8_AUX +[VOLT8] +name = Volt_P12V +[VOLT9] +name = Volt_P3V3_AUX +[VOLT10] +name = Volt_VR_DIMM_AB +[VOLT11] +name = Volt_VR_DIMM_CD +[VOLT12] +name = Volt_VR_DIMM_EF +[VOLT13] +name = Volt_VR_DIMM_GH +[VOLT14] +name = Volt_P3V_BAT diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/sensor_quanta_v1_ilo_v4.profile b/mtce-common/cgts-mtce-common-1.0/scripts/sensor_quanta_v1_ilo_v4.profile new file mode 100644 index 00000000..7e05e31a --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/sensor_quanta_v1_ilo_v4.profile @@ -0,0 +1,312 @@ +################################################################################### +# +# This file contains the sensor profile for the following board +# +# "Quanta Computer" with "Quanta(TM) Embedded Lights Out Manager ; v3.29" +# +################################################################################### +# +# Please refer to sensor_integration_profile.README for a detailed +# explaination of the format and heirarchy of this file and how to +# develop a file like this for sensor integration for new servers. +# +#################################################################################### + +[SERVER] + +targets = Targets:SP:SYS +name_cmd = show /SYS +bmc_cmd = show /SP +bmv_ver = v3.29 +name = Quanta Computer +bmc = Quanta(TM) Embedded Lights Out Manager +info = show /SYS +dump_cmd = show /SP ; there is no dump, this just prints the bmc version + +group_types = COOLING,POWER,TEMPERATURE,VOLTAGE + +[CMDS] +reset = reset /SYS +poweron = start /SYS +poweroff = stop /SYS +powerquery = show /SYS + +[FIT] +hostname = none +sensor = none +severity = none + +################################################################################## +# +# POWER sensor management +# ----------------------------- +# +################################################################################## + +[POWER] +groups = POWER1 + +[POWER1] +group = server power + +sensortype = power +datatype = discrete +interval = 100 ; seconds + +cmd = show /SYS/powerSupply + +sensors = PSU12,PSU1,PSU2 + +[PSU12] +name = PSU Redundancy + +[PSU1] +name = PSU1 Status + +[PSU2] +name = PSU2 Status + +################################################################################## +# +# COOLING sensor management +# ----------------------------- +# +################################################################################## + +[COOLING] +groups = FANS1,FANS2 + +############################## +# COOLING:FANS1 Grouping +############################## + +[FANS1] +group = server fans + +sensortype = fan +datatype = discrete +interval = 120 ; seconds + +cmd = show /SYS/fan + +# TODO: the sensor reading rules +ignore = na +pass = ok +minor = na +major = nonCritical +critical = critical + +delimitor = = + +sensors = FANS1_1,FANS1_2,FANS1_3,FANS1_4,FANS1_5,FANS1_6,FANS1_7,FANS1_8,FANS1_9,FANS1_10,FANS1_11,FANS1_12 + +[FANS1_1] +name = Fan_SYS0_1 + +[FANS1_2] +name = Fan_SYS0_2 + +[FANS1_3] +name = Fan_SYS1_1 + +[FANS1_4] +name = Fan_SYS1_2 + +[FANS1_5] +name = Fan_SYS2_1 + +[FANS1_6] +name = Fan_SYS2_2 + +[FANS1_7] +name = Fan_SYS3_1 + +[FANS1_8] +name = Fan_SYS3_2 + +[FANS1_9] +name = Fan_SYS4_1 + +[FANS1_10] +name = Fan_SYS4_2 + +[FANS1_11] +name = Fan_SYS5_1 + +[FANS1_12] +name = Fan_SYS5_2 + + +############################## +# COOLING:FANS2 Grouping +############################## +[FANS2] +# The name of the group that will show up in the GUI +group = power supply fans + +# sensor attributes for this group +sensortype = fan +datatype = discrete +interval = 120 ; seconds + +# the commands that will read the sensors in this group +cmd = show /SYS/fan + +# TODO: the sensor reading rules +ignore = na +pass = ok +minor = na +major = nonCritical +critical = critical + +# Status output delimiter +delimitor = = + +# list of abstract labels for the sensors in this group +sensors = FANS2_1,FANS2_2 + +# the individual sensors in this group +[FANS2_1] +name = Fan_PSU1 + +[FANS2_2] +name = Fan_PSU2 + +################################################################################## +# +# TEMPERATURE sensor management +# ----------------------------- +# +################################################################################## +[TEMPERATURE] +groups = TEMPERATURE1 + +[TEMPERATURE1] + +group = server temperature + +sensortype = temperature +datatype = discrete +interval = 34 ; seconds +unit = Celsius + +cmd = show /SYS/temperature + +delimitor = = + +# TODO: the sensor reading rules +ignore = na +pass = ok +minor = na +major = nonCritical +critical = critical + +sensors = TEMP3,TEMP4,TEMP5,TEMP6,TEMP7,TEMP8,TEMP9,TEMP10,TEMP11,TEMP12,TEMP13,TEMP14,TEMP15,TEMP16,TEMP17,TEMP18,TEMP19,TEMP20,TEMP21,TEMP23,TEMP24 + +# the individual sensors in this group +[TEMP1] +name = PCH Thermal Trip +[TEMP2] +name = MB Thermal Trip +[TEMP3] +name = Temp_CPU0 +[TEMP4] +name = Temp_CPU1 +[TEMP5] +name = Temp_VR_CPU0 +[TEMP6] +name = Temp_VR_CPU1 +[TEMP7] +name = Temp_DIMM_AB +[TEMP8] +name = Temp_DIMM_CD +[TEMP9] +name = Temp_DIMM_EF +[TEMP10] +name = Temp_DIMM_GH +[TEMP11] +name = Temp_VR_DIMM_AB +[TEMP12] +name = Temp_VR_DIMM_CD +[TEMP13] +name = Temp_VR_DIMM_EF +[TEMP14] +name = Temp_VR_DIMM_GH +[TEMP15] +name = Temp_Ambient_FP +[TEMP16] +name = Temp_PCI_Area +[TEMP17] +name = Temp_PCI_Inlet1 +[TEMP18] +name = Temp_PCI_Inlet2 +[TEMP19] +name = Temp_PCH +[TEMP20] +name = Temp_Outlet +[TEMP21] +name = Temp_HBA_LSI +[TEMP22] +name = Temp_OCP +[TEMP23] +name = Temp_PSU1 +[TEMP24] +name = Temp_PSU2 + +################################################################################## +# +# VOLTAGE sensor management +# ----------------------------- +# +################################################################################## + +[VOLTAGE] +groups = VOLTAGE1 + +[VOLTAGE1] +group = server voltage + +sensortype = voltage +datatype = discrete +interval = 300 ; seconds + +cmd = show /SYS/voltage + +# TODO: the sensor reading rules +ignore = na +pass = ok +minor = na +major = nonCritical +critical = critical + +sensors = VOLT1,VOLT2,VOLT3,VOLT4,VOLT5,VOLT6,VOLT7,VOLT8,VOLT9,VOLT10,VOLT11,VOLT12,VOLT13,VOLT14 + +# the individual sensors in this group +[VOLT1] +name = Volt_VR_CPU0 +[VOLT2] +name = Volt_VR_CPU1 +[VOLT3] +name = Volt_P5V +[VOLT4] +name = Volt_P5V_AUX +[VOLT5] +name = Volt_P3V3 +[VOLT6] +name = Volt_P1V05 +[VOLT7] +name = Volt_P1V8_AUX +[VOLT8] +name = Volt_P12V +[VOLT9] +name = Volt_P3V3_AUX +[VOLT10] +name = Volt_VR_DIMM_AB +[VOLT11] +name = Volt_VR_DIMM_CD +[VOLT12] +name = Volt_VR_DIMM_EF +[VOLT13] +name = Volt_VR_DIMM_GH +[VOLT14] +name = Volt_P3V_BAT diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/store_trace b/mtce-common/cgts-mtce-common-1.0/scripts/store_trace new file mode 100644 index 00000000..62730af2 --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/store_trace @@ -0,0 +1,59 @@ +#!/bin/bash + +# Copyright (c) 2014 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + + +# We've been called because someone wants to dump the kernel debug trace file to +# persistent storage. +# + +# This uses the "flock" binary to ensure that exactly one copy of this script +# runs on the system at a time. Taken from the flock man page. +# Basically it takes a nonblocking exclusive lock on the script itself. +[ "${FLOCKER}" != "$0" ] && exec env FLOCKER="$0" flock -en "$0" "$0" "$@" || : + + +# The expected behaviour is that the caller has disabled tracing already, but we +# are expected to re-enable it after we're done copying the data. + + +LOGFILE=kernel_trace_log + + +# Disable tracing just in case the caller forgot to do it. +echo 0 > /sys/kernel/debug/tracing/tracing_on + +# +cd /var/log + +if [ ! -d tracing ] +then + # This is the first trace log, make the directory. + mkdir tracing +else + # Handle log rotation. We'll keep up to 5 trace logs. + for i in {4..0} + do + mv tracing/${LOGFILE}.$i tracing/${LOGFILE}.$((i+1)) + done +fi + +# Copy trace file and fsync it when done. +cp /sys/kernel/debug/tracing/trace tracing/${LOGFILE}.0 + +# Wipe the trace file. +echo > /sys/kernel/debug/tracing/trace + +# Enable tracing. +echo 1 > /sys/kernel/debug/tracing/tracing_on + +# Fsync the logged trace file. This uses a custom helper app. +fsync tracing/${LOGFILE}.0 + +# Fsync the directory to flush metadata. +fsync tracing + + diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/stress_ras.sh b/mtce-common/cgts-mtce-common-1.0/scripts/stress_ras.sh new file mode 100644 index 00000000..37b7496a --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/stress_ras.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# This test script soaks the mtcAgent and hbsAgent start and stop operations. +# Start Condition: Agents are already running + +primary_resource="sysinv-api" +proc="Platform RA Soak:" +count=0 +delay_list="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15" + + +while true +do + +logger "$proc Stopping Platform Resource Agents ----------" +crm resource stop $primary_resource +for delay in $delay_list +do + sleep 1 + echo -n "." +done +echo "" + +status=`crm resource status` +for service in "sysinv-api" "sysinv-conductor" "sysinv-agent" "mtcAgent" "hbsAgent" +do + status_tmp=`echo "$status" | grep $service | cut -f2 -d')'` + if [ "$status_tmp" != " Stopped " ] ; then + echo "$proc ($count) Stop $service Failed <$status_tmp>" + sleep 5 + crm resource status + exit 0 + else + echo "$proc ($count) Stop O.K. for $service" + fi +done +logger "$proc Stop O.K. -------------------------------" + +logger "$proc Starting Platform Resource Agents ----------" +crm resource start $primary_resource +for delay in $delay_list +do + sleep 1 + echo -n "." +done +echo "" + +status=`crm resource status` +for service in "sysinv-api" "sysinv-conductor" "sysinv-agent" "mtcAgent" "hbsAgent" +do + status_tmp=`echo "$status" | grep $service | cut -f2 -d')'` + if [ "$status_tmp" != " Started " ] ; then + echo "$proc ($count) Start $service Failed <$status_tmp>" + sleep 5 + crm resource status + exit 0 + else + echo "$proc ($count) Start O.K. for $service" + fi +done +logger "$proc Start O.K. ------------------------------" + +count=`expr $count + 1` +mtc=`cat /var/run/mtcAgent.pid` +hbs=`cat /var/run/hbsAgent.pid` +echo "$mtc:`pidof mtcAgent` <:> $hbs:`pidof hbsAgent`" +done diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/stress_swact.sh b/mtce-common/cgts-mtce-common-1.0/scripts/stress_swact.sh new file mode 100644 index 00000000..ced0d7ee --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/stress_swact.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# +# Copyright (c) 2013-2014 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# This test script soaks the mtcAgent and hbsAgent start and stop operations. +# Start Condition: Agents are already running + +primary_resource="svr_vip" +proc="Openstack Swact Soak:" +count=0 +delay_list="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15" + + +while true +do + +logger "$proc Swacting to Controller 1 ----------" +crm resource move $primary_resource controller-1 +for delay in $delay_list +do + sleep 10 + echo -n "." + crm status +done + +logger "$proc Swacting to Controller 0 ----------" +crm resource move $primary_resource controller-0 +for delay in $delay_list +do + sleep 10 + echo -n "." + crm status +done + +done diff --git a/mtce-common/cgts-mtce-common-1.0/scripts/wipedisk b/mtce-common/cgts-mtce-common-1.0/scripts/wipedisk new file mode 100755 index 00000000..2b8a811d --- /dev/null +++ b/mtce-common/cgts-mtce-common-1.0/scripts/wipedisk @@ -0,0 +1,118 @@ +#! /bin/bash +# +# Copyright (c) 2013-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + + +usage () +{ + echo "Usage: `basename $0` [-h|--force]" + echo "Erases the master boot record on the hard drive." + echo "WARNING: All data on this hard drive will be lost." + echo "" + echo "Options:" + echo " -h display this help" + echo " --force do not ask for confirmation" + exit 1 +} + +OPTS=`getopt -o h -l force -- "$@"` +if [ $? != 0 ] +then + exit 1 +fi + +eval set -- "$OPTS" + +while true ; do + case "$1" in + -h) usage; shift;; + --force) FORCE=1; shift;; + --) shift; break;; + esac +done + +if [ $# != 0 ] +then + echo "Invalid argument. Use -h for help." + exit 1 +fi + +declare WIPE_HDD= + +# Only wipe the rootfs and boot device disks +rootfs_part=$(df --output=source / | tail -1) +rootfs=$(readlink -f $(find -L /dev/disk/by-path/ -samefile $rootfs_part | sed 's/-part[0-9]*'//)) + +boot_disk_part=$(df --output=source /boot | tail -1) +boot_disk=$(readlink -f $(find -L /dev/disk/by-path/ -samefile $boot_disk_part | sed 's/-part[0-9]*'//)) + +WIPE_HDD=$rootfs +if [ "$rootfs" != "$boot_disk" ] +then + WIPE_HDD="$WIPE_HDD $boot_disk" +fi + +# Due to dynamic partitioning, volume groups can have PVs across multiple disks. +# When deleting the rootfs, we should also delete all PVs (across all disks) that +# are part of volume groups that are also present on the rootfs. +rootfs_vgs=$(pvdisplay -C --separator ' | ' -o pv_name,vg_name | grep $rootfs | awk '{print $3}' | sort -u) + +pvs_to_delete="" + +for vg in $rootfs_vgs +do + pv=$(pvdisplay --select "vg_name=$vg" | awk '/PV Name/{print $3}') + pvs_to_delete="$pvs_to_delete $pv" +done + +WIPE_HDD="$pvs_to_delete $WIPE_HDD" + +if [ ! $FORCE ] +then + echo "This will result in the loss of all data on the hard drives and" + echo "will require this node to be re-installed." + echo "The following disks will be wiped:" + for dev in $WIPE_HDD + do + echo " $dev" + done | sort + echo + read -p "Are you absolutely sure? [y/n] " -r + if [[ ! $REPLY =~ ^[Yy]$ ]] + then + echo "Aborted" + exit 1 + fi + read -p "Type 'wipediskscompletely' to confirm: " -r + if [[ ! $REPLY = "wipediskscompletely" ]] + then + echo "Aborted" + exit 1 + fi +fi + +for dev in $WIPE_HDD +do + if [[ -e $dev ]] + then + echo "Wiping $dev..." + wipefs -f -a $dev + + # Clearing previous GPT tables or LVM data + # Delete the first few bytes at the start and end of the partition. This is required with + # GPT partitions, they save partition info at the start and the end of the block. + dd if=/dev/zero of=$dev bs=512 count=34 + dd if=/dev/zero of=$dev bs=512 count=34 seek=$((`blockdev --getsz $dev` - 34)) + fi +done + +if [[ -z $WIPE_HDD ]] +then + echo "No disks were detected." +else + sync + echo "The disk(s) have been wiped." +fi diff --git a/mtce-compute/LICENSE b/mtce-compute/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/mtce-compute/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/mtce-compute/PKG-INFO b/mtce-compute/PKG-INFO new file mode 100644 index 00000000..bca1a752 --- /dev/null +++ b/mtce-compute/PKG-INFO @@ -0,0 +1,12 @@ +Metadata-Version: 1.1 +Name: cgts-mtce-compute +Version: 1.0 +Summary: Titanium Cloud Platform Compute Node Maintenance Package +Home-page: +Author: Windriver +Author-email: info@windriver.com +License: Apache-2.0 + +Description: Maintenance support files for compute-only node type + +Platform: UNKNOWN diff --git a/mtce-compute/centos/build_srpm.data b/mtce-compute/centos/build_srpm.data new file mode 100644 index 00000000..fc8c9cca --- /dev/null +++ b/mtce-compute/centos/build_srpm.data @@ -0,0 +1,3 @@ +SRC_DIR=$CGCS_BASE/mwa-beas/mtce-compute/cgts-mtce-compute-1.0/scripts +COPY_LIST="$SRC_DIR/*" +TIS_PATCH_VER=4 diff --git a/mtce-compute/centos/cgts-mtce-compute.spec b/mtce-compute/centos/cgts-mtce-compute.spec new file mode 100644 index 00000000..4e1e7700 --- /dev/null +++ b/mtce-compute/centos/cgts-mtce-compute.spec @@ -0,0 +1,94 @@ +%define local_etc_pmond %{_sysconfdir}/pmon.d +%define local_etc_goenabledd %{_sysconfdir}/goenabled.d +%define local_etc_nova %{_sysconfdir}/nova + +%define debug_package %{nil} + +Name: cgts-mtce-compute +Version: 1.0 +Release: %{tis_patch_ver}%{?_tis_dist} +Summary: Compute Node Maintenance Package + +Group: base +License: Apache-2.0 +Packager: Wind River +URL: unknown + +Source0: %{name}-%{version}.tar.gz +Source1: goenabled +Source2: e_nova-init +Source3: nova-cleanup +Source4: nova-startup +Source11: nova-cleanup.conf +Source12: nova-compute.conf +Source14: libvirtd.conf +Source21: nova-goenabled.sh +Source22: virt-support-goenabled.sh +Source31: goenabled-compute.service +Source32: e_nova-init.service + +BuildRequires: systemd +BuildRequires: systemd-devel +Requires: bash +Requires: /bin/systemctl +Requires: qemu-kvm-ev + +%description +Maintenance support files for compute-only node type + +%prep +%setup + +%build + +%install + +# Compute-Only Init Scripts (source group x) +install -m 755 -p -D %{SOURCE1} %{buildroot}/etc/init.d/goenabledCompute +install -m 755 -p -D %{SOURCE2} %{buildroot}/etc/init.d/e_nova-init +install -m 755 -p -D %{SOURCE3} %{buildroot}/etc/init.d/nova-cleanup +install -m 755 -p -D %{SOURCE4} %{buildroot}/etc/init.d/nova-startup + +# Compute-Only Process Monitor Config files (source group 1x) +install -m 755 -d %{buildroot}%{local_etc_pmond} +install -m 755 -d %{buildroot}%{local_etc_nova} +install -m 644 -p -D %{SOURCE11} %{buildroot}%{local_etc_nova}/nova-cleanup.conf +install -m 644 -p -D %{SOURCE12} %{buildroot}%{local_etc_nova}/nova-compute.conf +install -m 644 -p -D %{SOURCE14} %{buildroot}%{local_etc_pmond}/libvirtd.conf + +# Compute-Only Go Enabled Test (source group 2x) +install -m 755 -d %{buildroot}%{local_etc_goenabledd} +install -m 755 -p -D %{SOURCE21} %{buildroot}%{local_etc_goenabledd}/nova-goenabled.sh +install -m 755 -p -D %{SOURCE22} %{buildroot}%{local_etc_goenabledd}/virt-support-goenabled.sh + +# Install to systemd (source group 3x) +install -m 644 -p -D %{SOURCE31} %{buildroot}%{_unitdir}/goenabled-compute.service +install -m 644 -p -D %{SOURCE32} %{buildroot}%{_unitdir}/e_nova-init.service + +%post +/bin/systemctl enable goenabled-compute.service +/bin/systemctl enable e_nova-init.service +/bin/systemctl enable qemu_clean.service + +%files +%license LICENSE + +%defattr(-,root,root,-) + +/etc/init.d/goenabledCompute +/etc/init.d/e_nova-init +/etc/init.d/nova-cleanup +/etc/init.d/nova-startup + +%{local_etc_nova}/nova-cleanup.conf +%{local_etc_nova}/nova-compute.conf +%{local_etc_pmond}/libvirtd.conf + +%{local_etc_goenabledd}/nova-goenabled.sh +%{local_etc_goenabledd}/virt-support-goenabled.sh + +%{_unitdir}/goenabled-compute.service +%{_unitdir}/e_nova-init.service + +%clean +rm -rf $RPM_BUILD_ROOT diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/LICENSE b/mtce-compute/cgts-mtce-compute-1.0/scripts/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/e_nova-init b/mtce-compute/cgts-mtce-compute-1.0/scripts/e_nova-init new file mode 100644 index 00000000..d186fd78 --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/e_nova-init @@ -0,0 +1,159 @@ +#! /bin/bash +# +# Copyright (c) 2015-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +### BEGIN INIT INFO +# Provides: nova-init +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Cleanup pmon.d nova linkages +### END INIT INFO + + +# Platform paths and flags +. /usr/bin/tsconfig + +# Linux Standard Base (LSB) Error Codes +SUCCESS=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 + +NOVA_INIT_TAG=${NOVA_INIT_TAG:-"NOVA_INIT"} + +function log +{ + logger -p local1.info -t ${NOVA_INIT_TAG} $@ +} + +NOVA_RUN="/var/run/nova" + +NOVA_INIT_FAILED="${NOVA_RUN}/.nova_init_failed" +NOVA_CLEANUP_DONE_FILE="${NOVA_RUN}/.nova_cleanup_done" +NOVA_COMPUTE_ENABLED="${NOVA_RUN}/.nova_compute_enabled" + +NOVA_CLEANUP_PID_FILE="${NOVA_RUN}/nova-cleanup.pid" + +NOVA_COMPUTE_PMOND_TARGET="/etc/nova/nova-compute.conf" +NOVA_COMPUTE_PMOND_SYMLINK="/etc/pmon.d/nova-compute.conf" + +NOVA_CLEANUP_PMOND_TARGET="/etc/nova/nova-cleanup.conf" +NOVA_CLEANUP_PMOND_SYMLINK="/etc/pmon.d/nova-cleanup.conf" + +case "$1" in + start) + log "Start" + + mkdir -p ${NOVA_RUN} + chown nova:root ${NOVA_RUN} + + # Assume we failed, and clear when we make it to nova-cleanup phase. + touch ${NOVA_INIT_FAILED} + + if [ -f ${NOVA_COMPUTE_ENABLED} ] + then + rm ${NOVA_COMPUTE_ENABLED} + fi + + if [ -f ${NOVA_CLEANUP_PID_FILE} ] + then + rm ${NOVA_CLEANUP_PID_FILE} + fi + + if [ -f ${NOVA_CLEANUP_DONE_FILE} ] + then + rm ${NOVA_CLEANUP_DONE_FILE} + fi + + if [ -e ${NOVA_COMPUTE_PMOND_SYMLINK} ] + then + rm ${NOVA_COMPUTE_PMOND_SYMLINK} + fi + + # Do not continue if the host has not been configured. We don't + # want to run nova-compute before its config file has been updated. + if [ ! -f ${INITIAL_COMPUTE_CONFIG_COMPLETE} ] + then + log "Initial compute configuration is not complete" + exit ${GENERIC_ERROR} + fi + + # Do not continue if the compute services are disabled. + if [ -f ${VOLATILE_DISABLE_COMPUTE_SERVICES} ] + then + log "Compute services are disabled, nothing to do" + rm ${NOVA_INIT_FAILED} + exit ${SUCCESS} + fi + + if ! [ -e ${NOVA_CLEANUP_PMOND_SYMLINK} ] + then + ln -s ${NOVA_CLEANUP_PMOND_TARGET} ${NOVA_CLEANUP_PMOND_SYMLINK} + if [ $? -ne 0 ] + then + log "Failed to create nova-cleanup symbolic link" + exit ${GENERIC_ERROR} + fi + fi + + # There is interaction and timing between: e_nova-init, nova-startup, + # nova-cleanup, nova-compute, nova-goenabled. It is much simpler to + # let nova-goenabled detect when service is actually enabled. + # Clear the init failure flag prior to nova-cleanup. + rm ${NOVA_INIT_FAILED} + + # Do in the background because of the long execution time. + log "Start Nova-Cleanup" + + MAX_PID_WAIT=$((SECONDS+45)) + timeout --signal KILL 300s /etc/init.d/nova-cleanup start & + + while ! [ -f ${NOVA_CLEANUP_PID_FILE} ] + do + log "Waiting for Nova-Cleanup to start" + + if [ ${SECONDS} -gt ${MAX_PID_WAIT} ] + then + log "Timeout waiting for Nova-Cleanup pid" + exit ${GENERIC_ERROR} + fi + sleep 1 + done + + NOVA_CLEANUP_PID=$(cat ${NOVA_CLEANUP_PID_FILE}) + log "Nova-Cleanup pid ${NOVA_CLEANUP_PID} started" + + log "Finished" + ;; + + stop) + log "Stop" + + # Remove this just in case... + if [ -f ${NOVA_INIT_FAILED} ] + then + rm ${NOVA_INIT_FAILED} + fi + + # Stop pmon monitoring of nova-compute + if [ -e ${NOVA_COMPUTE_PMOND_SYMLINK} ] + then + rm ${NOVA_COMPUTE_PMOND_SYMLINK} + fi + + # Stop nova-compute + /etc/init.d/nova-compute stop + + log "Finished" + ;; + + *) + echo "Usage: $0 {start|stop}" + exit ${INVALID_ARGS} + ;; +esac + +exit ${SUCCESS} diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/e_nova-init.service b/mtce-compute/cgts-mtce-compute-1.0/scripts/e_nova-init.service new file mode 100644 index 00000000..94fb6326 --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/e_nova-init.service @@ -0,0 +1,13 @@ +[Unit] +Description=Titanium Cloud Nova Init +After=network.target config.service compute-config-gate.service +Before=goenabled-compute.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/etc/init.d/e_nova-init start +ExecStop=/etc/init.d/e_nova-init stop + +[Install] +WantedBy=multi-user.target diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/goenabled b/mtce-compute/cgts-mtce-compute-1.0/scripts/goenabled new file mode 100644 index 00000000..5cfec8e4 --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/goenabled @@ -0,0 +1,187 @@ +#!/bin/bash +# +# Copyright (c) 2013-2014 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 98 2 +# +### BEGIN INIT INFO +# Provides: goenabled +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Maintenance Client "Go Enable" script +### END INIT INFO + +# Platform paths and flags +. /usr/bin/tsconfig + +GOENABLED_PATH=${GOENABLED_PATH:-"/etc/goenabled.d"} +GOENABLED_FILE=${GOENABLED_FILE:-"/var/run/.goenabled_subf"} +GOENABLED_TAG=${GOENABLED_TAG:-"GOENABLED"} + +RETVAL=0 + +################################################################################ +# Log message to syslog +################################################################################ +function log +{ + logger -t ${GOENABLED_TAG} $@ +} + +################################################################################ +# Utility function to print the status of a command result +################################################################################ +function print_status() +{ + if [ "$1" -eq "0" ]; then + echo "[ OK ]" + else + echo "[FAILED]" + fi +} + +################################################################################ +# Run goenabled scripts to check system status +################################################################################ +function goenabled_check() +{ + if [ -d ${GOENABLED_PATH} ]; then + run-parts ${GOENABLED_PATH} 2>&1 | logger -t ${GOENABLED_TAG} + RET=${PIPESTATUS[0]} + if [ ${RET} -ne 0 ]; then + return ${RET} + fi + fi + + return 0 +} + + +################################################################################ +# Write goenabled state file +################################################################################ +function goenabled_enable_ready() +{ + echo "`date`: `hostname` : Ready to Run GoEnabled Scripts" > ${GOENABLED_FILE} + RET=$? + if [ ${RET} -ne 0 ]; then + log "Failed to write state file ${GOENABLED_FILE}" + return ${RET} + fi + + log "enabled" + + return 0 +} + + +################################################################################ +# Remove goenabled state file +################################################################################ +function goenabled_disable() +{ + rm -f ${GOENABLED_FILE} + RET=$? + if [ ${RET} -ne 0 ]; then + log "Failed to remove state file ${GOENABLED_FILE}" + return ${RET} + fi + + log "disabled" + + return 0 +} + +################################################################################ +# Start Action +################################################################################ +function start() +{ + echo -n "Goenabled Ready: " + + goenabled_enable_ready + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + log "Go enabled failed" + print_status $RETVAL + return + fi + + print_status $RETVAL +} + +################################################################################ +# Stop Action +################################################################################ +function stop() +{ + echo -n "Stopping goenabled: " + + goenabled_disable + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + log "Go disabled failed" + print_status $RETVAL + return + fi + + print_status $RETVAL +} + +################################################################################ +# Status Action +################################################################################ +function status() +{ + echo -n "Checking goenabled: " + + goenabled_check + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + print_status $RETVAL + return + fi + + print_status $RETVAL +} + + +################################################################################ +# Main Entry +################################################################################ + +# Don't run this till compute is configured +if [ ! -e $VOLATILE_COMPUTE_CONFIG_COMPLETE ] ; then + logger "Compute is not configured" + exit $RETVAL +fi + +case "$1" in + start) + start + ;; + + stop) + stop + ;; + + restart) + stop + start + ;; + + status) + status + ;; + + *) + echo "usage: $0 { start | stop | status | restart }" + exit 1 + ;; +esac + +exit $RETVAL diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/goenabled-compute.service b/mtce-compute/cgts-mtce-compute-1.0/scripts/goenabled-compute.service new file mode 100644 index 00000000..94e3c3e7 --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/goenabled-compute.service @@ -0,0 +1,12 @@ +[Unit] +Description=Titanium Cloud Maintenance Compute Goenable Ready +After=config.service compute-config-gate.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/etc/init.d/goenabledCompute start +ExecStop=/etc/init.d/goenabledCompute stop + +[Install] +WantedBy=multi-user.target diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/libvirtd.conf b/mtce-compute/cgts-mtce-compute-1.0/scripts/libvirtd.conf new file mode 100644 index 00000000..1176cc42 --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/libvirtd.conf @@ -0,0 +1,25 @@ +[process] +service = libvirtd +process = libvirtd +pidfile = /var/run/libvirtd.pid +script = /etc/init.d/libvirtd +style = lsb ; ocf or lsb +severity = critical ; minor, major, critical +restarts = 3 ; restarts before error assertion +interval = 5 ; number of seconds to wait between restarts +debounce = 20 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 5 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring +subfunction = compute ; Optional label. + ; Manage this process in the context of a combo host subfunction + ; Choices: compute or storage. + ; when specified pmond will wait for + ; /var/run/.compute_config_complete or + ; /var/run/.storage_config_complete + ; ... before managing this process with the specified subfunction + ; Excluding this label will cause this process to be managed by default on startup diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-cleanup b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-cleanup new file mode 100644 index 00000000..921e9577 --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-cleanup @@ -0,0 +1,200 @@ +#! /bin/bash +# +# Copyright (c) 2015-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# Linux Standard Base (LSB) Error Codes +SUCCESS=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 + +NOVA_CLEANUP_TAG=${NOVA_CLEANUP_TAG:-"NOVA_CLEANUP"} + + +################################################################################ +# Log message to user.log +################################################################################ +function log +{ + logger -p local1.info -t ${NOVA_CLEANUP_TAG} $@ +} + + +################################################################################ +# Exit abnormally +################################################################################ +function exit_with_error +{ + touch ${NOVA_CLEANUP_FAIL_FILE} + exit ${GENERIC_ERROR} +} + + +################################################################################ +# Run Nova-Cleanup +################################################################################ +nova_clean_up() +{ + if [ -f ${NOVA_CLEANUP_FAIL_FILE} ] + then + rm -f ${NOVA_CLEANUP_FAIL_FILE} + fi + + if [ ! -f "/etc/platform/.initial_config_complete" ] + then + log "Initial configuration is not complete, nothing to do" + return + fi + + HOST_NAME=$(hostname) + + timeout --signal KILL 15s /usr/bin/nfv-notify -n controller -p 30004 \ + -t booting -d "{\"hostname\": \"${HOST_NAME}\"}" > /dev/null 2>&1 + RET=$? + if [ ${RET} -eq 0 ] + then + # Controller is done cleaning up + log "Notified controller, controller is done cleaning up" + + elif [ ${RET} -eq 254 ] + then + # Controller notified, but is not yet done cleaning up + log "Notified controller, controller is not done cleaning up" + touch ${NOVA_CLEANUP_FAIL_FILE} + + else + log "Failed to notify controller, error=${RET}" + touch ${NOVA_CLEANUP_FAIL_FILE} + fi +} + + +################################################################################ +# Main Entry +################################################################################ +NOVA_RUN="/var/run/nova" + +NOVA_CLEANUP_FAIL_FILE="${NOVA_RUN}/.nova_cleanup_fail" +NOVA_CLEANUP_DONE_FILE="${NOVA_RUN}/.nova_cleanup_done" + +NOVA_COMPUTE_PID_FILE="${NOVA_RUN}/nova-compute.pid" +NOVA_CLEANUP_PID_FILE="${NOVA_RUN}/nova-cleanup.pid" + +NOVA_COMPUTE_PMOND_TARGET="/etc/nova/nova-compute.conf" +NOVA_COMPUTE_PMOND_SYMLINK="/etc/pmon.d/nova-compute.conf" + +NOVA_CLEANUP_PMOND_TARGET="/etc/nova/nova-cleanup.conf" +NOVA_CLEANUP_PMOND_SYMLINK="/etc/pmon.d/nova-cleanup.conf" + +case "$1" in + start) + log "Start" + + mkdir -p ${NOVA_RUN} + chown nova:root ${NOVA_RUN} + + echo "$$" > ${NOVA_CLEANUP_PID_FILE} + if [ -f /etc/centos-release -a ! -L ${NOVA_COMPUTE_PID_FILE} ]; then + ln -sf /sys/fs/cgroup/systemd/system.slice/nova-compute.service/tasks ${NOVA_COMPUTE_PID_FILE} + fi + sleep 5 + + # Try for CONNECT_DELAY_SECS seconds to reach the controller + MAX_CONNECT_WAIT=$((SECONDS+240)) + + FOUND=0 + while [ ${SECONDS} -lt ${MAX_CONNECT_WAIT} ] + do + ping -c 1 controller > /dev/null 2>&1 || ping6 -c 1 controller > /dev/null 2>&1 + if [ $? -eq 0 ] + then + log "Connected to controller" + FOUND=1 + break + fi + sleep 1 + done + + if [ ${FOUND} -eq 0 ] + then + # Controller is not yet available, just exit + log "Failed to connect to the controller" + exit ${GENERIC_ERROR} + fi + + log "Connected to controller, starting cleanup" + while : + do + nova_clean_up + if [ ! -f ${NOVA_CLEANUP_FAIL_FILE} ] + then + log "Cleanup complete" + + if ! [ -e $NOVA_COMPUTE_PMOND_SYMLINK ] + then + ln -s $NOVA_COMPUTE_PMOND_TARGET $NOVA_COMPUTE_PMOND_SYMLINK + if [ $? -ne 0 ] + then + log "Failed to create nova-compute symbolic link" + exit ${GENERIC_ERROR} + fi + fi + + if [ -e $NOVA_CLEANUP_PMOND_SYMLINK ] + then + rm $NOVA_CLEANUP_PMOND_SYMLINK + fi + + log "PMOND symlinks setup complete" + + touch ${NOVA_CLEANUP_DONE_FILE} + + break + fi + + log "Sleeping for 10 seconds" + sleep 10 + done + + # Start Nova-Compute here to speed up the recovery of the + # compute, otherwise we will have to wait for pmond to start it. + log "Nova-Compute restart" + + MAX_PID_WAIT=$((SECONDS+10)) + timeout --signal KILL 10s /etc/init.d/nova-compute restart > /dev/null 2>&1 + + while ! [ -f ${NOVA_COMPUTE_PID_FILE} ] + do + log "Waiting for Nova-Compute to start" + + if [ ${SECONDS} -gt ${MAX_PID_WAIT} ] + then + log "Timeout waiting for Nova-Compute pid" + exit ${GENERIC_ERROR} + fi + sleep 1 + done + + NOVA_COMPUTE_PID=$(head -1 ${NOVA_COMPUTE_PID_FILE}) + log "Nova-Compute pid ${NOVA_COMPUTE_PID} started" + + if [ -f ${NOVA_CLEANUP_PID_FILE} ] + then + rm ${NOVA_CLEANUP_PID_FILE} + fi + + log "Finished" + ;; + + stop) + ;; + + *) + echo "Usage: $0 {start|stop}" + exit ${INVALID_ARGS} + ;; +esac + +exit ${SUCCESS} diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-cleanup.conf b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-cleanup.conf new file mode 100644 index 00000000..e1a719dd --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-cleanup.conf @@ -0,0 +1,25 @@ +[process] +process = nova-cleanup +service = nova-cleanup +pidfile = /var/run/nova/nova-cleanup.pid +script = /etc/init.d/nova-cleanup +style = lsb ; ocf or lsb +severity = critical ; minor, major, critical +restarts = 3 ; restarts before error assertion +interval = 5 ; number of seconds to wait between restarts +debounce = 360 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 15 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring +subfunction = compute ; Optional label. + ; Manage this process in the context of a combo host subfunction + ; Choices: compute or storage. + ; when specified pmond will wait for + ; /var/run/.compute_config_complete or + ; /var/run/.storage_config_complete + ; ... before managing this process with the specified subfunction + ; Excluding this label will cause this process to be managed by default on startup diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-compute.conf b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-compute.conf new file mode 100644 index 00000000..0d808957 --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-compute.conf @@ -0,0 +1,25 @@ +[process] +process = nova-compute +service = nova-compute +pidfile = /var/run/nova/nova-compute.pid +script = /etc/init.d/nova-startup +style = lsb ; ocf or lsb +severity = critical ; minor, major, critical +restarts = 3 ; restarts before error assertion +interval = 5 ; number of seconds to wait between restarts +debounce = 20 ; number of seconds that a process needs to remain + ; running before degrade is removed and retry count + ; is cleared. +startuptime = 15 ; Seconds to wait after process start before starting the debounce monitor +mode = passive ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; ignore : do not monitor or stop monitoring +subfunction = compute ; Optional label. + ; Manage this process in the context of a combo host subfunction + ; Choices: compute or storage. + ; when specified pmond will wait for + ; /var/run/.compute_config_complete or + ; /var/run/.storage_config_complete + ; ... before managing this process with the specified subfunction + ; Excluding this label will cause this process to be managed by default on startup diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-goenabled.sh b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-goenabled.sh new file mode 100644 index 00000000..e391b6d1 --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-goenabled.sh @@ -0,0 +1,80 @@ +#! /bin/bash +# +# Copyright (c) 2015-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +### CONFIG_START +# +# Timeout: 300 +# +### CONFIG_END + +# Platform paths and flags +. /usr/bin/tsconfig + +# Linux Standard Base (LSB) Error Codes +SUCCESS=0 +GENERIC_ERROR=1 + +NOVA_GOENABLED_TAG=${NOVA_GOENABLED_TAG:-"NOVA_GOENABLED"} + +function log +{ + logger -p local1.info -t ${NOVA_GOENABLED_TAG} $@ +} + +NOVA_RUN="/var/run/nova" +NOVA_INIT_FAILED="${NOVA_RUN}/.nova_init_failed" +NOVA_COMPUTE_ENABLED="${NOVA_RUN}/.nova_compute_enabled" +NOVA_ADVANCE_ENABLED="/var/run/.nova_timer_advance_enabled" + +case "$1" in + start) + if [ -e ${VOLATILE_COMPUTE_CONFIG_COMPLETE} ] && [ ! -e ${VOLATILE_DISABLE_COMPUTE_SERVICES} ] + then + log "Start" + + if [ -e ${NOVA_INIT_FAILED} ] + then + log "Nova-Init check FAILED" + exit ${GENERIC_ERROR} + fi + + log "Nova-Init check PASSED" + + while : + do + if [ -e ${NOVA_ADVANCE_ENABLED} ] + then + log "Nova setup timer advance PASSED" + break + fi + + sleep 1 + done + + while : + do + if [ -e ${NOVA_COMPUTE_ENABLED} ] + then + log "Nova-Compute service enabled PASSED" + break + fi + + sleep 1 + done + + log "Finished" + fi + ;; + + stop) + ;; + + *) + ;; +esac + +exit ${SUCCESS} diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-init.service b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-init.service new file mode 100644 index 00000000..8ae07ac0 --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-init.service @@ -0,0 +1,13 @@ +[Unit] +Description=Titanium Cloud Nova Init +After=network.target config.service +Before=goenabled-compute.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/etc/init.d/nova-init start +ExecStop=/etc/init.d/nova-init stop + +[Install] +WantedBy=multi-user.target diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-startup b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-startup new file mode 100644 index 00000000..ac0a265e --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/nova-startup @@ -0,0 +1,111 @@ +#! /bin/bash +# +# Copyright (c) 2015-2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# Linux Standard Base (LSB) Error Codes +SUCCESS=0 +GENERIC_ERROR=1 +INVALID_ARGS=2 + +NOVA_STARTUP_TAG=${NOVA_STARTUP_TAG:-"NOVA_STARTUP"} + +function log +{ + logger -p local1.info -t ${NOVA_STARTUP_TAG} $@ +} + +NOVA_RUN="/var/run/nova" +NOVA_CLEANUP_DONE_FILE="${NOVA_RUN}/.nova_cleanup_done" +NOVA_COMPUTE_ENABLED="${NOVA_RUN}/.nova_compute_enabled" + +NOVA_COMPUTE_PID_FILE="${NOVA_RUN}/nova-compute.pid" +NOVA_CLEANUP_PID_FILE="${NOVA_RUN}/nova-cleanup.pid" + +NOVA_COMPUTE_PMOND_TARGET="/etc/nova/nova-compute.conf" +NOVA_COMPUTE_PMOND_SYMLINK="/etc/pmon.d/nova-compute.conf" + +NOVA_CLEANUP_PMOND_TARGET="/etc/nova/nova-cleanup.conf" +NOVA_CLEANUP_PMOND_SYMLINK="/etc/pmon.d/nova-cleanup.conf" + +case "$1" in + start) + log "Start" + + mkdir -p ${NOVA_RUN} + chown nova:root ${NOVA_RUN} + + if [ -f ${NOVA_COMPUTE_ENABLED} ] + then + rm ${NOVA_COMPUTE_ENABLED} + fi + + if [ -f ${NOVA_CLEANUP_PID_FILE} ] + then + NOVA_CLEANUP_PID=$(cat ${NOVA_CLEANUP_PID_FILE}) + log "About to kill ${NOVA_CLEANUP_PID}" + kill -9 ${NOVA_CLEANUP_PID} + log "Killed ${NOVA_CLEANUP_PID}" + rm ${NOVA_CLEANUP_PID_FILE} + fi + + if [ -f ${NOVA_CLEANUP_DONE_FILE} ] + then + log "Nova-Cleanup already complete" + + if [ -e $NOVA_CLEANUP_PMOND_SYMLINK ] + then + rm $NOVA_CLEANUP_PMOND_SYMLINK + fi + + log "Nova-Compute restart" + + # Do in the foreground because the nova-compute script wipes + # and recreates the pid file for nova-compute. Otherwise + # you could be looking at a stale pid file. + timeout --signal KILL 10s /etc/init.d/nova-compute restart > /dev/null 2>&1 + while ! [ -f ${NOVA_COMPUTE_PID_FILE} ] + do + log "Waiting for Nova-Compute to start" + sleep 1 + done + + NOVA_COMPUTE_PID=$(head -1 ${NOVA_COMPUTE_PID_FILE}) + log "Nova-Compute pid ${NOVA_COMPUTE_PID} started" + + else + log "Nova-Cleanup is needed" + + # Do in the background because of the long execution time. + timeout --signal KILL 300s /etc/init.d/nova-cleanup start & + + while ! [ -f ${NOVA_CLEANUP_PID_FILE} ] + do + log "Waiting for Nova-Cleanup to start" + sleep 1 + done + + NOVA_CLEANUP_PID=$(cat ${NOVA_CLEANUP_PID_FILE}) + log "Nova-Cleanup pid ${NOVA_CLEANUP_PID} started" + fi + + log "Finished" + ;; + + stop) + ;; + + restart|force-reload|reload) + stop + start + ;; + + *) + echo "Usage: $0 {start|stop|restart|force-reload|reload}" + exit $INVALID_ARGS + ;; +esac + +exit $SUCCESS diff --git a/mtce-compute/cgts-mtce-compute-1.0/scripts/virt-support-goenabled.sh b/mtce-compute/cgts-mtce-compute-1.0/scripts/virt-support-goenabled.sh new file mode 100755 index 00000000..1a508b6c --- /dev/null +++ b/mtce-compute/cgts-mtce-compute-1.0/scripts/virt-support-goenabled.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# +# Copyright (c) 2017 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# virtualization support "goenabled" check. +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin + +NAME=$(basename $0) + +function LOG() +{ + logger "$NAME: $*" +} + +if [ ${UID} -ne 0 ]; then + LOG "Error: Need sudo/root permission." + exit 1 +fi + +# Evaluate various virtualization related facts. +product_name=$(dmidecode -s system-product-name 2>/dev/null) +OPTS="product_name:${product_name}, " +is_virtual=$(/usr/bin/facter is_virtual 2>/dev/null) +OPTS+="is_virtual:${is_virtual}, " +host_type=$(/usr/bin/facter virtual 2>/dev/null) +OPTS+="host_type:${host_type}, " +cpu_has_vmx=$(grep -w -q vmx /proc/cpuinfo && echo "true" || echo "false") +OPTS+="cpu_has_vmx:${cpu_has_vmx}, " +dev_kvm_exists=$([[ -e /dev/kvm ]] && echo "true" || echo "false") +OPTS+="dev_kvm_exists:${dev_kvm_exists}, " +nested_virt=$(cat /sys/module/kvm_intel/parameters/nested 2>/dev/null || echo "false") +OPTS+="nested_virt:${nested_virt}, " +hardware_virt_supported=$(virt-host-validate qemu 2>/dev/null | grep -q -w -e FAIL && echo "false" || echo "true") +OPTS+="hardware_virt_supported:${hardware_virt_supported}" +REASONS=$(virt-host-validate qemu 2>/dev/null | grep -w -e FAIL) + +# Check that virtualization is supported on hardware. It is sufficient just to +# check the output of virt-host-validate. Additional facts are gathered for +# information. +# Notes: +# - virt-host-validate checks that /dev/kvm exists, and 'vmx' CPU flag present +# - it is also possible to check whether VT-x is enabled in BIOS by reading +# Intel MSR register, but checking /dev/kvm is sufficient +# - 'vmx' cpu flag indicates whether virtualization can be supported +# - on emulated systems such as VirtualBox or QEMU, vmx is not required +# - if vmx is enabled on QEMU, it can also support nested virtualization + +if [ "${host_type}" = "physical" ] && [ "${hardware_virt_supported}" == "false" ] +then + LOG "Virtualization is not supported: ${OPTS}. Failing goenabled check." + LOG "Failure reasons:"$'\n'"${REASONS}" + exit 1 +fi + +LOG "Virtualization is supported: ${OPTS}." +exit 0 diff --git a/mtce-control/LICENSE b/mtce-control/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/mtce-control/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/mtce-control/PKG-INFO b/mtce-control/PKG-INFO new file mode 100644 index 00000000..ef610055 --- /dev/null +++ b/mtce-control/PKG-INFO @@ -0,0 +1,12 @@ +Metadata-Version: 1.1 +Name: cgts-mtce-control +Version: 1.0 +Summary: Titanium Cloud Platform Controller Node Maintenance Package +Home-page: +Author: Windriver +Author-email: info@windriver.com +License: Apache-2.0 + +Description: Maintenance support files for control-only node type + +Platform: UNKNOWN diff --git a/mtce-control/centos/build_srpm.data b/mtce-control/centos/build_srpm.data new file mode 100644 index 00000000..65f97c20 --- /dev/null +++ b/mtce-control/centos/build_srpm.data @@ -0,0 +1,4 @@ +SRC_DIR=$CGCS_BASE/mwa-beas/mtce-control/cgts-mtce-control-1.0/scripts +COPY_LIST="$SRC_DIR/* \ + $PKG_BASE/LICENSE" +TIS_PATCH_VER=4 diff --git a/mtce-control/centos/cgts-mtce-control.spec b/mtce-control/centos/cgts-mtce-control.spec new file mode 100644 index 00000000..75c0f9eb --- /dev/null +++ b/mtce-control/centos/cgts-mtce-control.spec @@ -0,0 +1,58 @@ +%define local_etc_pmond /%{_sysconfdir}/pmond.d +%define local_etc_goenabledd /%{_sysconfdir}/goenabled.d + +%define debug_package %{nil} + +Name: cgts-mtce-control +Version: 1.0 +Release: %{tis_patch_ver}%{?_tis_dist} +Summary: Titanium Cloud Platform Controller Node Maintenance Package + +Group: base +License: Apache-2.0 +Packager: Wind River +URL: unknown + +Source0: %{name}-%{version}.tar.gz +Source1: goenabled +Source2: LICENSE + +BuildRequires: systemd +BuildRequires: systemd-devel +Requires: bash +Requires: /bin/systemctl +Requires: lighttpd +Requires: qemu-kvm-ev + +%description +Maintenance support files for controller-only node type + +%prep +%setup + +%build + +%install + +install -m 755 -d %{buildroot}%{local_etc} + +# Controller-Only Process Monitor Config files +install -m 755 -d %{buildroot}%{local_etc_pmond} + +# Controller-Only Go Enabled Test +install -m 755 -d %{buildroot}%{local_etc_goenabledd} + + +%post +if [ $1 -eq 1 ] ; then + /bin/systemctl enable lighttpd.service + /bin/systemctl enable qemu_clean.service +fi +exit 0 + +%files +%license LICENSE +%defattr(-,root,root,-) + +%clean +rm -rf $RPM_BUILD_ROOT diff --git a/mtce-control/cgts-mtce-control-1.0/scripts/LICENSE b/mtce-control/cgts-mtce-control-1.0/scripts/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/mtce-control/cgts-mtce-control-1.0/scripts/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/mtce-control/cgts-mtce-control-1.0/scripts/goenabled b/mtce-control/cgts-mtce-control-1.0/scripts/goenabled new file mode 100644 index 00000000..4b613b5c --- /dev/null +++ b/mtce-control/cgts-mtce-control-1.0/scripts/goenabled @@ -0,0 +1,52 @@ +#! /bin/sh +# +# Copyright (c) 2013-2014 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 98 2 +# +### BEGIN INIT INFO +# Provides: goenabled +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Short-Description: Maintenance Client "Go Enable" script +### END INIT INFO + +RETVAL=0 + +case "$1" in + start) + echo "`date`: `hostname` : Go Enabled" > /var/run/goenabled + ;; + + stop) + rm -f /var/run/goenabled + ;; + + restart) + $0 stop + $0 start + ;; + + status) + ;; + + condrestart) + %0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit $RETVAL + + + + + +exit 0 diff --git a/mtce-storage/LICENSE b/mtce-storage/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/mtce-storage/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/mtce-storage/PKG-INFO b/mtce-storage/PKG-INFO new file mode 100644 index 00000000..d8bc4f28 --- /dev/null +++ b/mtce-storage/PKG-INFO @@ -0,0 +1,12 @@ +Metadata-Version: 1.1 +Name: cgts-mtce-storage +Version: 1.0 +Summary: CGTS Platform Storage Node Maintenance Package +Home-page: +Author: Windriver +Author-email: info@windriver.com +License: Apache-2.0 + +Description: Maintenance support files for storage-only node type + +Platform: UNKNOWN diff --git a/mtce-storage/centos/build_srpm.data b/mtce-storage/centos/build_srpm.data new file mode 100644 index 00000000..45426db7 --- /dev/null +++ b/mtce-storage/centos/build_srpm.data @@ -0,0 +1,4 @@ +SRC_DIR=$CGCS_BASE/mwa-beas/mtce-storage/cgts-mtce-storage-1.0/scripts +COPY_LIST="$SRC_DIR/* \ + $PKG_BASE/LICENSE" +TIS_PATCH_VER=3 diff --git a/mtce-storage/centos/cgts-mtce-storage.spec b/mtce-storage/centos/cgts-mtce-storage.spec new file mode 100644 index 00000000..870e39af --- /dev/null +++ b/mtce-storage/centos/cgts-mtce-storage.spec @@ -0,0 +1,61 @@ +%define local_etc_pmond /%{_sysconfdir}/pmond.d +%define local_etc_goenabledd /%{_sysconfdir}/goenabled.d +%define local_etc_servicesd /%{_sysconfdir}/services.d + +Name: cgts-mtce-storage +Version: 1.0 +Release: %{tis_patch_ver}%{?_tis_dist} +Summary: Titanium Cloud Platform Storage Node Maintenance Package + +Group: base +License: Apache-2.0 +Packager: Wind River +URL: unknown + +Source0: %{name}-%{version}.tar.gz +Source1: goenabled +Source2: goenabled-storage.service +Source3: LICENSE + +BuildRequires: systemd +BuildRequires: systemd-devel +Requires: bash +Requires: /bin/systemctl + +%description +Maintenance support files for storage-only node type + +%prep + +%build + +%install + + +# Storage-Only Init Scripts +install -m 755 -p -D %{SOURCE1} %{buildroot}/etc/init.d/goenabledStorage + +# Storage-Only Process Monitor Config files +install -m 755 -d %{buildroot}%{local_etc_pmond} + +# Storage-Only Go Enabled Tests +install -m 755 -d %{buildroot}%{local_etc_goenabledd} + +# Storage-Only Services +install -m 755 -d %{buildroot}%{local_etc_servicesd}/storage + +# Install systemd dir +install -m 644 -p -D %{SOURCE2} %{buildroot}%{_unitdir}/goenabled-storage.service + +%post +/bin/systemctl enable goenabled-storage.service + +%files + +%defattr(-,root,root,-) + +/etc/init.d/goenabledStorage +%{_unitdir}/goenabled-storage.service + +%clean +rm -rf $RPM_BUILD_ROOT diff --git a/mtce-storage/cgts-mtce-storage-1.0/scripts/LICENSE b/mtce-storage/cgts-mtce-storage-1.0/scripts/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/mtce-storage/cgts-mtce-storage-1.0/scripts/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/mtce-storage/cgts-mtce-storage-1.0/scripts/goenabled b/mtce-storage/cgts-mtce-storage-1.0/scripts/goenabled new file mode 100644 index 00000000..e8b7f94c --- /dev/null +++ b/mtce-storage/cgts-mtce-storage-1.0/scripts/goenabled @@ -0,0 +1,175 @@ +#!/bin/bash +# +# Copyright (c) 2013-2014 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +GOENABLED_PATH=${GOENABLED_PATH:-"/etc/goenabled.d"} +GOENABLED_FILE=${GOENABLED_FILE:-"/var/run/.goenabled_storage"} +GOENABLED_TAG=${GOENABLED_TAG:-"GOENABLED"} +STORAGE_CONFIG_COMPLETE="/var/run/.storage_config_complete" + +RETVAL=0 + +################################################################################ +# Log message to syslog +################################################################################ +function log +{ + logger -t ${GOENABLED_TAG} $@ +} + +################################################################################ +# Utility function to print the status of a command result +################################################################################ +function print_status() +{ + if [ "$1" -eq "0" ]; then + echo "[ OK ]" + else + echo "[FAILED]" + fi +} + +################################################################################ +# Run goenabled scripts to check system status +################################################################################ +function goenabled_check() +{ + if [ -d ${GOENABLED_PATH} ]; then + run-parts ${GOENABLED_PATH} 2>&1 | logger -t ${GOENABLED_TAG} + RET=${PIPESTATUS[0]} + if [ ${RET} -ne 0 ]; then + return ${RET} + fi + fi + + return 0 +} + + +################################################################################ +# Write goenabled state file +################################################################################ +function goenabled_enable_ready() +{ + echo "`date`: `hostname` : Ready to Run GoEnabled Scripts" > ${GOENABLED_FILE} + RET=$? + if [ ${RET} -ne 0 ]; then + log "Failed to write state file ${GOENABLED_FILE}" + return ${RET} + fi + + log "enabled" + + return 0 +} + + +################################################################################ +# Remove goenabled state file +################################################################################ +function goenabled_disable() +{ + rm -f ${GOENABLED_FILE} + RET=$? + if [ ${RET} -ne 0 ]; then + log "Failed to remove state file ${GOENABLED_FILE}" + return ${RET} + fi + + log "disabled" + + return 0 +} + +################################################################################ +# Start Action +################################################################################ +function start() +{ + echo -n "Goenabled Ready: " + + goenabled_enable_ready + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + log "Go enabled failed" + print_status $RETVAL + return + fi + + print_status $RETVAL +} + +################################################################################ +# Stop Action +################################################################################ +function stop() +{ + echo -n "Stopping goenabled: " + + goenabled_disable + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + log "Go disabled failed" + print_status $RETVAL + return + fi + + print_status $RETVAL +} + +################################################################################ +# Status Action +################################################################################ +function status() +{ + echo -n "Checking goenabled: " + + goenabled_check + RETVAL=$? + if [ "$RETVAL" -ne "0" ]; then + print_status $RETVAL + return + fi + + print_status $RETVAL +} + + +################################################################################ +# Main Entry +################################################################################ + +# Don't run this till compute is configured +if [ ! -e $STORAGE_CONFIG_COMPLETE ] ; then + logger "Storage is not configured" + exit $RETVAL +fi + +case "$1" in + start) + start + ;; + + stop) + stop + ;; + + restart|reload) + stop + start + ;; + + status) + status + ;; + + *) + echo "usage: $0 { start | stop | status | restart }" + exit 1 + ;; +esac + +exit $RETVAL diff --git a/mtce-storage/cgts-mtce-storage-1.0/scripts/goenabled-storage.service b/mtce-storage/cgts-mtce-storage-1.0/scripts/goenabled-storage.service new file mode 100644 index 00000000..6a837e5e --- /dev/null +++ b/mtce-storage/cgts-mtce-storage-1.0/scripts/goenabled-storage.service @@ -0,0 +1,13 @@ +[Unit] +Description=Titanium Cloud Maintenance Storage Goenable Ready +After=config.service +Before=pmon.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/etc/init.d/goenabledStorage start +ExecStop=/etc/init.d/goenabledStorage stop + +[Install] +WantedBy=multi-user.target diff --git a/mwa-beas.map b/mwa-beas.map new file mode 100644 index 00000000..b2ff8475 --- /dev/null +++ b/mwa-beas.map @@ -0,0 +1,7 @@ +cgcs/recipes-installer|installer +cgcs/common-bsp/files|bsp-files +cgcs/recipes-devtools/platform-kickstarts|kickstart +cgcs/middleware/mtce/recipes-common/cgts-mtce-common|mtce-common +cgcs/middleware/mtce/recipes-compute/cgts-mtce-compute|mtce-compute +cgcs/middleware/mtce/recipes-control/cgts-mtce-control|mtce-control +cgcs/middleware/mtce/recipes-storage/cgts-mtce-storage|mtce-storage