metal/mtce-common/src/common/returnCodes.h
Eric MacDonald 8a223f395d Mtce: Add heartbeat cluster information for SM query
This part one of a two part HA Improvements feature that introduces
the collection of heartbeat health at the system level.

The full feature is intended to provide service management (SM)
with the last 2 seconds of maintenace's heartbeat health view that
is reflective of each controller's connectivity to each host
including its peer controller.

The heartbeat cluster summary information is additional information
for SM to draw on when needing to make a choice of which controller
is healthier, if/when to switch over and to ultimately avoid split
brain scenarios in a two controller system.

Feature Behavior: A common heartbeat cluster data structure is
introduced and published to the sysroot for SM. The heartbeat
service populates and maintains a local copy of this structure
with data that reflects the responsivness for each monitored
network of all the monitored hosts for the last 20 heartbeat
periods. Mtce sends the current cluster summary to SM upon request.

General flow of cluster feature wrt hbsAgent:

  hbs_cluster_init: general data init
  hbs_cluster_nums: set controller and network numbers
  forever:

    select:
      hbs_cluster_add / hbs_cluster_del: - add/del hosts from mtcAgent
      hbs_sm_handler -> hbs_cluster_send: - send cluster to SM

    heartbeating:
      hbs_cluster_append: add controller cluster to pulse request
      hbs_cluster_update: get controller cluster data from pulse responses
      hbs_cluster_save: save other controller cluster view in cluster vault
      hbs_cluster_log: log cluster state changes (clog)

Test Plan:

  PASS: Verify compute system install
  PASS: Verify storage system install
  PASS: Verify cluster data ; all members of structure
  PASS: Verify storage-0 state management
  PASS: Verify add of second controller
  PASS: Verify add of storage-0 node
  PASS: Verify behavior over Swact
  PASS: Verify lock/unlock of second controller ; overall behavior
  PASS: Verify lock/unlock of storage-0 ; overall behavior
  PASS: Verify lock/unlock of storage-1 ; overall behavior
  PASS: Verify lock/unlock of compute nodes ; overall behavior
  PASS: Verify heartbeat failure and recovery of compute node
  PASS: Verify heartbeat failure and recovery of storage-0
  PASS: Verify heartbeat failure and recovery of controller
  PASS: Verify delete of controller node
  PASS: Verify delete of storage-0
  PASS: Verify delete of compute node
  PASS: Verify cluster when controller-1 active / controller-0 disabled
  PASS: Verify MNFA and recovery handling
  PASS: Verify handling in presence of multiple failure conditions
  PASS: Verify hbsAgent memory leak soak test with continuous SM query.
  PASS: Verify active controller-1 infra network failure behavior.
  PASS: Verify inactive controller-1 infra network failure behavior.

Change-Id: I4154287f6dcf5249be5ab3180f2752ab47c5da3c
Story: 2003576
Task: 24907
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
2018-10-05 22:47:17 +00:00

148 lines
4.5 KiB
C

#ifndef __INCLUDE_RETURNCODES_H__
#define __INCLUDE_RETURNCODES_H__
/*
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
*/
/**
* @file
* Wind River CGTS Platform Common Return Code Definition
*/
/** Common Return Codes */
#define PASS (0)
#define FAIL (1)
#define RETRY (2)
#define FAIL_BAD_PARM (3)
#define FAIL_BAD_CASE (4)
#define FAIL_LOAD_INI (5)
#define FAIL_INI_CONFIG (6)
#define FAIL_GET_HOSTNAME (7)
#define FAIL_HOSTNAME_LOOKUP (8)
#define FAIL_PID_OPEN (9)
#define FAIL_LOG_OPEN (10)
#define FAIL_FILE_WRITE (11)
#define FAIL_TIMER_SET_ACTION (12)
#define FAIL_TIMER_SET (13)
#define FAIL_TIMER_CREATE (14)
#define FAIL_TIMER_STOP (15)
#define FAIL_TIMEOUT (16)
#define FAIL_JSON_TOO_LONG (17)
#define FAIL_JSON_PARSE (18)
#define FAIL_JSON_ZERO_LEN (19)
#define FAIL_STRING_EMPTY (20)
#define FAIL_FILES_INIT (21)
#define FAIL_HOSTNAME_SETUP (22)
#define FAIL_DAEMON_CONFIG (23)
#define FAIL_MESSAGE_INIT (24)
#define FAIL_SIGNAL_INIT (25)
#define FAIL_DAEMON_INIT (26)
#define FAIL_NO_IP_SUPPORT (27)
#define FAIL_SELF_IDENTIFY (28)
#define FAIL_HTTP_ZERO_STATUS (29)
#define FAIL_BAD_MCAST (30)
#define FAIL_SOCKET_SELECT (31)
#define FAIL_SOCKET_CREATE (32)
#define FAIL_SOCKET_INIT (33)
#define FAIL_SOCKET_BIND (34)
#define FAIL_SOCKET_OPTION (35)
#define FAIL_SOCKET_NOBLOCK (36)
#define FAIL_SOCKET_FLAGS (37)
#define FAIL_SOCKET_LISTEN (38)
#define FAIL_MCAST_SETUP (39)
#define FAIL_MSG_UNDERRUN (40)
#define FAIL_MSG_HEADER (41)
#define FAIL_TO_RECEIVE (42)
#define FAIL_CONNECT (43)
#define FAIL_TO_TRANSMIT (44)
#define FAIL_INVALID_OPERATION (45)
#define FAIL_HOSTADDR_LOOKUP (46)
#define FAIL_SOCKET_SENDTO (47)
#define FAIL_EVENT_NEW (48)
#define FAIL_EVENT_BASE (49)
#define FAIL_DATABASE_DOWN (50)
#define FAIL_HEADER_ADD (51)
#define FAIL_PAYLOAD_ADD (52)
#define FAIL_REQUEST (53)
#define FAIL_INV_QUERY (54)
#define FAIL_REQUEST_NEW (55)
#define FAIL_MAKE_REQUEST (56)
#define FAIL_MUTEX_ERROR (57)
#define FAIL_UNKNOWN_HOSTNAME (58)
#define FAIL_TOKEN_GET (59)
#define FAIL_DEL_UNLOCKED (60)
#define FAIL_DEL_UNKNOWN (61)
#define FAIL_UNIT_ACTIVE (62)
#define FAIL_ADMIN_ACTION (63)
#define FAIL_RESET_CONTROL (64)
#define FAIL_POWER_CONTROL (65)
#define FAIL_OPER_INPROGRESS (66)
#define FAIL_SWACT_INPROGRESS (67)
#define FAIL_SWACT_NOINSVMATE (68)
#define FAIL_NODETYPE (69)
#define FAIL_RESET_POWEROFF (70)
#define FAIL_INVALID_DATA (71)
#define FAIL_BAD_STATE (72)
#define FAIL_KEY_VALUE_PARSE (73)
#define FAIL_DATA_SIZE (74)
#define FAIL_NOT_FOUND (75)
#define FAIL_WORKQ_TIMEOUT (76)
#define FAIL_HTTP_DELETE (77)
#define FAIL_AUTHENTICATION (78)
#define FAIL_RETRY (79) /* get out of a retry loop */
#define FAIL_HDLR_INIT (80)
#define FAIL_TOKEN_URL (81)
#define FAIL_LOW_STORAGE (82)
#define FAIL_NEED_DUPLEX (83)
#define FAIL_NEED_STORAGE_MON (84)
#define FAIL_READ_FILES (85)
#define FAIL_FILE_ACCESS (86)
#define FAIL_RESERVED_NAME (87)
#define FAIL_INVALID_HOSTNAME (88)
#define FAIL_INVALID_UUID (89)
#define FAIL_NOT_CONNECTED (90)
#define FAIL_BM_PROVISION_ERR (91)
#define FAIL_DUP_HOSTNAME (92)
#define FAIL_DUP_IPADDR (93)
#define FAIL_DUP_MACADDR (94)
#define FAIL____UNUSED____95 (95)
#define FAIL_LOCATE_KEY_VALUE (96)
#define FAIL_JSON_OBJECT (97)
#define FAIL_EXTERNAL_API (98)
#define FAIL_PATCH_INPROGRESS (99)
#define FAIL_NO_INFRA_PROV (100)
#define FAIL_PATCHED_NOREBOOT (101)
#define FAIL_NOT_SUPPORTED (102)
#define FAIL_NO_DATA (103)
#define FAIL_FILE_OPEN (104)
#define FAIL_FILE_CREATE (105)
#define FAIL_DIR_CREATE (106)
#define FAIL_OPERATION (107)
#define FAIL_SYSTEM_CALL (108)
#define FAIL_CREATE_OPERATION (109)
#define FAIL_THREAD_CREATE (110)
#define FAIL_THREAD_LAUNCH (112)
#define FAIL_THREAD_RUNNING (114)
#define FAIL_THREAD_EXIT (119)
#define FAIL_NOT_ACCESSIBLE (120)
#define FAIL_NOT_ACTIVE (121)
#define FAIL_OUT_OF_RANGE (122)
#define FAIL_NO_CMD_ACK (123)
#define FAIL_FORK_HANDLING (124)
#define FAIL_FIT (254)
#define FAIL_NULL_POINTER (256)
#endif /* __INCLUDE_RETURNCODES_H__ */