sw-manager patch-strategy failed to install due to timeout
As part of this fix, alarm clear wait step checks for stale alarm 750.006 for 30mins. If the alarm is still no cleard, patch-strategy ignores the alarm. In the current case alarm 750.006 is not getting cleared so the patch-strategy times out. Test Plan: PASSED: Applying a patch - On DX system, Create and apply patch strategy, fm alarm-list to have an uncleared alarm(for test purpose 100.103 - Memory threshold alarm was used). After 30mins alarm was ignored and patch strategy successfully applied. PASSED: Removing a patch - On DX system, Create and apply patch strategy, fm alarm-list to have an uncleared alarm(for test purpose 100.103 - Memory threshold alarm was used). After 30mins alarm was ignored and patch strategy successfully applied. todo: to test with the actual alarm 750.006 in the lab setup. Closes-Bug: 2059305 Change-Id: I7ebaf5a24fa45a7e45f3af7e5ca588ce3ee06156 Signed-off-by: Vanathi.Selvaraju <vanathi.selvaraju@windriver.com>
This commit is contained in:
parent
791463f67f
commit
1d2e429800
@ -992,8 +992,9 @@ class UpdateControllerHostsMixin(object):
|
||||
# OSDs configured, but the alarms should clear quickly in
|
||||
# that case so this will not delay the update strategy.
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=30 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
timeout_in_secs=40 * 60,
|
||||
ignore_alarms=self._ignore_alarms,
|
||||
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||
else:
|
||||
# Less time required if host is not rebooting
|
||||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
@ -1004,7 +1005,8 @@ class UpdateControllerHostsMixin(object):
|
||||
host_list = [local_host]
|
||||
stage = strategy.StrategyStage(strategy_stage_name)
|
||||
stage.add_step(strategy.QueryAlarmsStep(
|
||||
True, ignore_alarms=self._ignore_alarms))
|
||||
True, ignore_alarms=self._ignore_alarms,
|
||||
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||
if reboot:
|
||||
stage.add_step(strategy.SwactHostsStep(host_list))
|
||||
stage.add_step(strategy.LockHostsStep(host_list))
|
||||
@ -1025,8 +1027,9 @@ class UpdateControllerHostsMixin(object):
|
||||
# OSDs configured, but the alarms should clear quickly in
|
||||
# that case so this will not delay the update strategy.
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=30 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
timeout_in_secs=40 * 60,
|
||||
ignore_alarms=self._ignore_alarms,
|
||||
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||
else:
|
||||
# Less time required if host is not rebooting
|
||||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
@ -1297,8 +1300,9 @@ class UpdateWorkerHostsMixin(object):
|
||||
for host in hosts_to_lock + hosts_to_reboot]):
|
||||
# Multiple personality nodes that need to wait for OSDs to sync:
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=30 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
timeout_in_secs=40 * 60,
|
||||
ignore_alarms=self._ignore_alarms,
|
||||
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||
else:
|
||||
if any([host.openstack_control or host.openstack_compute
|
||||
for host in hosts_to_lock + hosts_to_reboot]):
|
||||
@ -1393,9 +1397,13 @@ class SwPatchStrategy(SwUpdateStrategy,
|
||||
'100.119', # PTP alarm for SyncE
|
||||
'900.701', # Node tainted
|
||||
]
|
||||
IGNORE_ALARMS_CONDITIONAL = {'100.103': 1800}
|
||||
self._ignore_alarms += IGNORE_ALARMS
|
||||
self._single_controller = single_controller
|
||||
|
||||
# This is only for patch strategy to ignore 750.006 alarm when it becomes stale
|
||||
self._ignore_alarms_conditional = IGNORE_ALARMS_CONDITIONAL
|
||||
|
||||
# initialize the variables required by the mixins
|
||||
# ie: self._nfvi_sw_patches, self._nfvi_sw_patch_hosts
|
||||
self.initialize_mixin()
|
||||
@ -1409,7 +1417,8 @@ class SwPatchStrategy(SwUpdateStrategy,
|
||||
stage = strategy.StrategyStage(
|
||||
strategy.STRATEGY_STAGE_NAME.SW_PATCH_QUERY)
|
||||
stage.add_step(
|
||||
strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms))
|
||||
strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms,
|
||||
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||
stage.add_step(strategy.QuerySwPatchesStep())
|
||||
stage.add_step(strategy.QuerySwPatchHostsStep())
|
||||
self.build_phase.add_stage(stage)
|
||||
|
@ -1912,13 +1912,17 @@ class QueryAlarmsStep(strategy.StrategyStep):
|
||||
"""
|
||||
Query Alarms - Strategy Step
|
||||
"""
|
||||
def __init__(self, fail_on_alarms=False, ignore_alarms=None):
|
||||
def __init__(self, fail_on_alarms=False, ignore_alarms=None, ignore_alarms_conditional=None):
|
||||
super(QueryAlarmsStep, self).__init__(
|
||||
STRATEGY_STEP_NAME.QUERY_ALARMS, timeout_in_secs=60)
|
||||
if ignore_alarms is None:
|
||||
ignore_alarms = []
|
||||
self._fail_on_alarms = fail_on_alarms
|
||||
self._ignore_alarms = ignore_alarms
|
||||
# For ignoring 750.006 alarm for patch strategy
|
||||
if ignore_alarms_conditional is None:
|
||||
ignore_alarms_conditional = []
|
||||
self._ignore_alarms_conditional = ignore_alarms_conditional
|
||||
|
||||
@coroutine
|
||||
def _query_alarms_callback(self, fm_service):
|
||||
@ -1940,7 +1944,8 @@ class QueryAlarmsStep(strategy.StrategyStep):
|
||||
"%s - uuid %s due to relaxed alarm "
|
||||
"strictness" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
elif nfvi_alarm.alarm_id not in self._ignore_alarms:
|
||||
elif (nfvi_alarm.alarm_id not in self._ignore_alarms and
|
||||
nfvi_alarm.alarm_id not in self._ignore_alarms_conditional):
|
||||
DLOG.warn("Alarm: %s" % nfvi_alarm.alarm_id)
|
||||
nfvi_alarms.append(nfvi_alarm)
|
||||
else:
|
||||
@ -2106,7 +2111,8 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
||||
"""
|
||||
Alarm Wait - Strategy Step
|
||||
"""
|
||||
def __init__(self, timeout_in_secs=300, first_query_delay_in_secs=60, ignore_alarms=None):
|
||||
def __init__(self, timeout_in_secs=300, first_query_delay_in_secs=60, ignore_alarms=None,
|
||||
ignore_alarms_conditional=None):
|
||||
super(WaitAlarmsClearStep, self).__init__(
|
||||
STRATEGY_STEP_NAME.WAIT_ALARMS_CLEAR, timeout_in_secs=timeout_in_secs)
|
||||
self._first_query_delay_in_secs = first_query_delay_in_secs
|
||||
@ -2115,12 +2121,17 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
||||
self._ignore_alarms = ignore_alarms
|
||||
self._wait_time = 0
|
||||
self._query_inprogress = False
|
||||
if ignore_alarms_conditional is None:
|
||||
ignore_alarms_conditional = {}
|
||||
self._ignore_alarms_conditional = ignore_alarms_conditional
|
||||
|
||||
@coroutine
|
||||
def _query_alarms_callback(self):
|
||||
"""
|
||||
Query Alarms Callback
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
response = (yield)
|
||||
DLOG.debug("Query-Alarms callback response=%s." % response)
|
||||
|
||||
@ -2138,6 +2149,25 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
||||
"strictness" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
elif nfvi_alarm.alarm_id not in self._ignore_alarms:
|
||||
# For ignoring 750.006 alarm for patch strategy
|
||||
if nfvi_alarm.alarm_id in self._ignore_alarms_conditional:
|
||||
format_string = "%Y-%m-%dT%H:%M:%S.%f"
|
||||
alarm_timestamp = nfvi_alarm.timestamp
|
||||
alarm_timestamp_obj = datetime.strptime(
|
||||
alarm_timestamp, format_string)
|
||||
current_time = datetime.now()
|
||||
time_in_sec = (
|
||||
current_time - alarm_timestamp_obj).total_seconds()
|
||||
|
||||
# Ignore 750.006 alarm if present for more than 30 mins(1800s)
|
||||
if self._ignore_alarms_conditional[nfvi_alarm.alarm_id] < int(time_in_sec):
|
||||
self._ignore_alarms.append(
|
||||
list(self._ignore_alarms_conditional.keys())[0])
|
||||
else:
|
||||
nfvi_alarms.append(nfvi_alarm)
|
||||
else:
|
||||
nfvi_alarms.append(nfvi_alarm)
|
||||
|
||||
nfvi_alarms.append(nfvi_alarm)
|
||||
else:
|
||||
DLOG.debug("Ignoring alarm %s - uuid %s" %
|
||||
@ -2145,6 +2175,9 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
||||
self.strategy.nfvi_alarms = nfvi_alarms
|
||||
|
||||
if self.strategy.nfvi_alarms:
|
||||
for alarm in self.strategy.nfvi_alarms:
|
||||
if alarm['alarm_id'] == list(self._ignore_alarms_conditional.keys())[0]:
|
||||
self.strategy.nfvi_alarms.remove(alarm)
|
||||
# Keep waiting for alarms to clear
|
||||
pass
|
||||
else:
|
||||
@ -2193,6 +2226,7 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
||||
super(WaitAlarmsClearStep, self).from_dict(data)
|
||||
self._first_query_delay_in_secs = data['first_query_delay_in_secs']
|
||||
self._ignore_alarms = data['ignore_alarms']
|
||||
self._ignore_alarms_conditional = data['ignore_alarms_conditional']
|
||||
self._wait_time = 0
|
||||
self._query_inprogress = False
|
||||
return self
|
||||
@ -2207,6 +2241,7 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
||||
data['entity_uuids'] = list()
|
||||
data['first_query_delay_in_secs'] = self._first_query_delay_in_secs
|
||||
data['ignore_alarms'] = self._ignore_alarms
|
||||
data['ignore_alarms_conditional'] = self._ignore_alarms_conditional
|
||||
return data
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user