Host compute service failure alarm removal
Removal of stale alarm 270.001(Host compute service failure) is raised by the vim. This might be an old reference to nova. It’s likely not in use since stx. Test Plan: PASS: Verify with a load without the changes (removal of alarm) and the event log in platform.log shows an entry for 270.001 alarm. PASS: Verify with a load with changes of alarm removal and the event log in platform.log does not show an entry for 270.001 alarm. Depends-On: https://review.opendev.org/c/starlingx/fault/+/872603 Closes-Bug: 2004744 Change-Id: Icafb079fc2b58fb4126ac325804901ebd3f8f66e Signed-off-by: Vanathi.Selvaraju <vanathi.selvaraju@windriver.com>
This commit is contained in:
parent
0df5aaaf5d
commit
65bbbe1f0d
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2015-2021 Wind River Systems, Inc.
|
||||
# Copyright (c) 2015-2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -24,8 +24,6 @@ DLOG = debug.debug_get_logger('nfv_plugins.alarm_handlers.fm')
|
||||
_fm_alarm_id_mapping = dict([
|
||||
(alarm_objects_v1.ALARM_TYPE.MULTI_NODE_RECOVERY_MODE,
|
||||
fm_constants.FM_ALARM_ID_VM_MULTI_NODE_RECOVERY_MODE),
|
||||
(alarm_objects_v1.ALARM_TYPE.HOST_SERVICES_FAILED,
|
||||
fm_constants.FM_ALARM_ID_HOST_SERVICES_FAILED),
|
||||
(alarm_objects_v1.ALARM_TYPE.INSTANCE_FAILED,
|
||||
fm_constants.FM_ALARM_ID_VM_FAILED),
|
||||
(alarm_objects_v1.ALARM_TYPE.INSTANCE_SCHEDULING_FAILED,
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2015-2021 Wind River Systems, Inc.
|
||||
# Copyright (c) 2015-2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -32,8 +32,6 @@ _fm_event_id_mapping = dict([
|
||||
fm_constants.FM_LOG_ID_HOST_SERVICES_ENABLED),
|
||||
(event_log_objects_v1.EVENT_ID.HOST_SERVICES_DISABLED,
|
||||
fm_constants.FM_LOG_ID_HOST_SERVICES_DISABLED),
|
||||
(event_log_objects_v1.EVENT_ID.HOST_SERVICES_FAILED,
|
||||
fm_constants.FM_LOG_ID_HOST_SERVICES_FAILED),
|
||||
(event_log_objects_v1.EVENT_ID.HYPERVISOR_STATE_CHANGE,
|
||||
fm_constants.FM_LOG_ID_HYPERVISOR_STATE_CHANGE),
|
||||
(event_log_objects_v1.EVENT_ID.INSTANCE_RENAMED,
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2015-2016 Wind River Systems, Inc.
|
||||
# Copyright (c) 2015-2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -7,8 +7,6 @@ from nfv_common.alarm import * # noqa: F401,F403
|
||||
|
||||
from nfv_vim.alarm._general import clear_general_alarm # noqa: F401
|
||||
from nfv_vim.alarm._general import raise_general_alarm # noqa: F401
|
||||
from nfv_vim.alarm._host import host_clear_alarm # noqa: F401
|
||||
from nfv_vim.alarm._host import host_raise_alarm # noqa: F401
|
||||
from nfv_vim.alarm._instance import instance_clear_alarm # noqa: F401
|
||||
from nfv_vim.alarm._instance import instance_manage_alarms # noqa: F401
|
||||
from nfv_vim.alarm._instance import instance_raise_alarm # noqa: F401
|
||||
|
@ -1,134 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2015-2016 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
import uuid
|
||||
|
||||
from nfv_common import alarm
|
||||
|
||||
|
||||
# Alarm Template Definitions
|
||||
# *** Don't add a period to the end of reason_text, these are not sentences.
|
||||
_alarm_templates = {
|
||||
alarm.ALARM_TYPE.HOST_SERVICES_FAILED: {
|
||||
'entity_type': "host.services",
|
||||
'entity': "host=%(host_name)s.services=compute",
|
||||
'event_type': alarm.ALARM_EVENT_TYPE.PROCESSING_ERROR_ALARM,
|
||||
'severity': alarm.ALARM_SEVERITY.CRITICAL,
|
||||
'probable_cause': alarm.ALARM_PROBABLE_CAUSE.UNKNOWN,
|
||||
'reason_text': "Host %(host_name)s compute services failure"
|
||||
"%(additional_text)s",
|
||||
'repair_action': "Wait for host services recovery to complete; if problem "
|
||||
"persists contact next level of support",
|
||||
'exclude_alarm_context': [alarm.ALARM_CONTEXT.TENANT],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _alarm_template_get(alarm_type, alarm_context):
|
||||
"""
|
||||
Returns the alarm template associated with the given context
|
||||
"""
|
||||
if alarm_type not in _alarm_templates:
|
||||
return None
|
||||
|
||||
alarm_template = _alarm_templates[alarm_type]
|
||||
|
||||
if alarm_context in alarm_template['exclude_alarm_context']:
|
||||
return None
|
||||
|
||||
template = dict()
|
||||
template['entity_type'] = alarm_template['entity_type']
|
||||
template['entity'] = alarm_template['entity']
|
||||
template['event_type'] = alarm_template['event_type']
|
||||
template['severity'] = alarm_template['severity']
|
||||
template['probable_cause'] = alarm_template['probable_cause']
|
||||
template['reason_text'] = alarm_template['reason_text']
|
||||
template['repair_action'] = alarm_template['repair_action']
|
||||
|
||||
alarm_template_context_data = alarm_template.get('alarm_context_data', None)
|
||||
|
||||
if alarm_template_context_data is not None:
|
||||
if alarm_context in alarm_template_context_data:
|
||||
template_context = alarm_template_context_data[alarm_context]
|
||||
|
||||
if 'entity_type' in template_context:
|
||||
template['entity_type'] = template_context['entity_type']
|
||||
|
||||
if 'entity' in template_context:
|
||||
template['entity'] = template_context['entity']
|
||||
|
||||
if 'event_type' in template_context:
|
||||
template['event_type'] = template_context['event_type']
|
||||
|
||||
if 'severity' in template_context:
|
||||
template['severity'] = template_context['severity']
|
||||
|
||||
if 'probable_cause' in template_context:
|
||||
template['probable_cause'] = template_context['probable_cause']
|
||||
|
||||
if 'reason_text' in template_context:
|
||||
template['reason_text'] = template_context['reason_text']
|
||||
|
||||
if 'repair_action' in template_context:
|
||||
template['repair_action'] = template_context['repair_action']
|
||||
|
||||
return template
|
||||
|
||||
|
||||
def _alarm_raise(alarm_type, alarm_context, template, data):
|
||||
"""
|
||||
Raises an alarm given the alarm template and data
|
||||
"""
|
||||
alarm_uuid = uuid.uuid4()
|
||||
alarm_data = alarm.AlarmData(alarm_uuid, alarm_type, alarm_context,
|
||||
template['entity_type'],
|
||||
template['entity'] % data,
|
||||
template['event_type'],
|
||||
template['probable_cause'],
|
||||
template['severity'],
|
||||
alarm.ALARM_TREND_INDICATION.NO_CHANGE,
|
||||
template['reason_text'] % data,
|
||||
template['repair_action'])
|
||||
|
||||
alarm.alarm_raise(alarm_uuid, alarm_data)
|
||||
return alarm_data
|
||||
|
||||
|
||||
def host_raise_alarm(host, alarm_type, additional_text=None, alarm_context=None):
|
||||
"""
|
||||
Raise alarms against the host
|
||||
"""
|
||||
data = dict()
|
||||
data['host_name'] = host.name
|
||||
data['additional_text'] = additional_text
|
||||
|
||||
alarm_list = list()
|
||||
|
||||
# For now, override alarm context to be the admin only
|
||||
alarm_context = alarm.ALARM_CONTEXT.ADMIN
|
||||
|
||||
if alarm_context is None:
|
||||
for alarm_context in alarm.ALARM_CONTEXT:
|
||||
template = _alarm_template_get(alarm_type, alarm_context)
|
||||
if template is not None:
|
||||
alarm_data = _alarm_raise(alarm_type, alarm_context, template,
|
||||
data)
|
||||
alarm_list.append(alarm_data)
|
||||
else:
|
||||
template = _alarm_template_get(alarm_type, alarm_context)
|
||||
if template is not None:
|
||||
alarm_data = _alarm_raise(alarm_type, alarm_context, template,
|
||||
data)
|
||||
alarm_list.append(alarm_data)
|
||||
|
||||
return alarm_list
|
||||
|
||||
|
||||
def host_clear_alarm(alarm_list):
|
||||
"""
|
||||
Clear alarms against the instance
|
||||
"""
|
||||
for alarm_data in alarm_list:
|
||||
alarm.alarm_clear(alarm_data.alarm_uuid)
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2015-2018 Wind River Systems, Inc.
|
||||
# Copyright (c) 2015-2023 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
@ -14,7 +14,6 @@ from nfv_common.helpers import Singleton
|
||||
|
||||
from nfv_vim.objects._object import ObjectData
|
||||
|
||||
from nfv_vim import alarm
|
||||
from nfv_vim import event_log
|
||||
from nfv_vim import host_fsm
|
||||
from nfv_vim import nfvi
|
||||
@ -124,7 +123,6 @@ class Host(ObjectData):
|
||||
HOST_SERVICE_STATE.ENABLED if self.is_enabled() else \
|
||||
HOST_SERVICE_STATE.DISABLED
|
||||
|
||||
self._alarms = list()
|
||||
self._events = list()
|
||||
|
||||
@property
|
||||
@ -730,7 +728,6 @@ class Host(ObjectData):
|
||||
"""
|
||||
NFVI Host Delete
|
||||
"""
|
||||
alarm.host_clear_alarm(self._alarms)
|
||||
self._fsm.handle_event(host_fsm.HOST_EVENT.DELETE)
|
||||
|
||||
def periodic_timer(self):
|
||||
@ -774,43 +771,11 @@ class Host(ObjectData):
|
||||
if HOST_SERVICE_STATE.ENABLED == host_service_state:
|
||||
self._events = event_log.host_issue_log(
|
||||
self, event_log.EVENT_ID.HOST_SERVICES_ENABLED)
|
||||
alarm.host_clear_alarm(self._alarms)
|
||||
self._alarms[:] = list()
|
||||
|
||||
elif HOST_SERVICE_STATE.DISABLED == host_service_state:
|
||||
# Always log the disabled compute service
|
||||
self._events = event_log.host_issue_log(
|
||||
self, event_log.EVENT_ID.HOST_SERVICES_DISABLED)
|
||||
# Clear any previous alarms for this host
|
||||
alarm.host_clear_alarm(self._alarms)
|
||||
self._alarms[:] = list()
|
||||
# Alarm the disabled compute service if the host is still
|
||||
# enabled and is not being locked. Alarm it as a failure.
|
||||
if self.nfvi_host_is_enabled():
|
||||
if reason is None:
|
||||
additional_text = ''
|
||||
else:
|
||||
additional_text = ", %s" % reason
|
||||
self._alarms = alarm.host_raise_alarm(
|
||||
self, alarm.ALARM_TYPE.HOST_SERVICES_FAILED,
|
||||
additional_text=additional_text)
|
||||
|
||||
elif HOST_SERVICE_STATE.FAILED == host_service_state:
|
||||
if reason is None:
|
||||
additional_text = ''
|
||||
else:
|
||||
additional_text = ", %s" % reason
|
||||
|
||||
self._events = event_log.host_issue_log(
|
||||
self, event_log.EVENT_ID.HOST_SERVICES_FAILED,
|
||||
additional_text=additional_text)
|
||||
# Clear any previous alarms for this host
|
||||
alarm.host_clear_alarm(self._alarms)
|
||||
self._alarms[:] = list()
|
||||
# Alarm the failed compute service
|
||||
self._alarms = alarm.host_raise_alarm(
|
||||
self, alarm.ALARM_TYPE.HOST_SERVICES_FAILED,
|
||||
additional_text=additional_text)
|
||||
|
||||
def nfvi_host_upgrade_status(self, upgrade_inprogress, recover_instances):
|
||||
"""
|
||||
|
Loading…
Reference in New Issue
Block a user