Adding Kubernetes alarm type for PV migration errors during AIO-SX to AIO-DX

This commit adds a new alarm type for Kubernetes Persistent Volume
patching errors during AIO-SX to AIO-DX migration.

Partial-Bug: 1927224
Signed-off-by: Pedro Henrique Linhares <PedroHenriqueLinhares.Silva@windriver.com>
Change-Id: I8f64280394999249c829372d1748a9c26fdb9ced
This commit is contained in:
Pedro Henrique Linhares 2021-05-06 18:41:57 -03:00 committed by Pedro Linhares
parent df29c7c103
commit 3280e6cd5b
2 changed files with 26 additions and 2 deletions

View File

@ -33,7 +33,7 @@ FM_ENTITY_TYPE_INSTANCE = 'instance'
FM_ENTITY_TYPE_CLUSTER = 'cluster'
FM_ENTITY_TYPE_NTP = 'ntp'
FM_ENTITY_TYPE_ML2DRIVER = 'ml2driver'
FM_ENTITY_TYPE_PV = 'pv'
FM_ENTITY_TYPE_K8S = 'kubernetes'
FM_ENTITY_TYPE_BGP_PEER = "bgp-peer"
FM_ENTITY_TYPE_STORAGE_BACKEND = 'storage_backend'
FM_ENTITY_TYPE_IMAGE_CONVERSION = 'fs_name'
@ -60,6 +60,7 @@ ALARM_GROUP_LICENSING = "600"
ALARM_GROUP_VM = "700"
ALARM_GROUP_APPLICATION = "750"
ALARM_GROUP_STORAGE = "800"
ALARM_GROUP_K8S = "850"
ALARM_GROUP_SW_MGMT = "900"
# General Alarm id
@ -104,10 +105,12 @@ FM_ALARM_ID_STORAGE_IMAGE = ALARM_GROUP_STORAGE + ".002"
FM_ALARM_ID_STORAGE_CEPH_FREE_SPACE = ALARM_GROUP_STORAGE + ".003"
FM_ALARM_ID_STORAGE_CINDER_IO_BUILDING = ALARM_GROUP_STORAGE + ".100"
FM_ALARM_ID_STORAGE_CINDER_IO_LIMITING = ALARM_GROUP_STORAGE + ".101"
FM_ALARM_ID_STORAGE_PV_FAILED = ALARM_GROUP_STORAGE + ".102"
# Alarm .103 is reserved for LVM thin pool metadata alarm
FM_ALARM_ID_STORAGE_BACKEND_FAILED = ALARM_GROUP_STORAGE + ".104"
# Kubernetes Resource Alarms
FM_ALARM_ID_K8S_RESOURCE_PV = ALARM_GROUP_K8S + ".001"
# Host-Services alarm id
FM_ALARM_ID_HOST_SERVICES_FAILED = ALARM_GROUP_HOST_SERVICES + ".001"

View File

@ -2993,6 +2993,27 @@
Management_Affecting_Severity: major
Degrade_Affecting_Severity: none
#---------------------------------------------------------------------------
# KUBERNETES
#---------------------------------------------------------------------------
850.001:
Type: Alarm
Description: Persistent Volume Migration Error
Entity_Instance_ID: kubernetes=PV-migration-failed
Severity: major
Proposed_Repair_Action: "Manually execute /usr/bin/ceph_k8s_update_monitors.sh
to confirm PVs are updated, then lock/unlock to clear
alarms. If problem persists, contact next level of
support."
Maintenance_Action:
Inhibit_Alarms:
Alarm_Type: processing-error
Probable_Cause: communication-subsystem-failure
Service_Affecting: False
Suppression: False
Management_Affecting_Severity: none
Degrade_Affecting_Severity: none
#---------------------------------------------------------------------------
# SOFTWARE