sw-manager patch-strategy failed to install due to timeout

As part of this fix, new parameter ignore_alarm_conditional
is added, which has the list of stale alarms that needs to
ignored after 30mins.
Alarm clear wait step checks for stale alarm 750.006 for
30mins. If the alarm is still not cleared, patch-strategy
ignores the alarm.
Now, since the stale alarms are monitored for 30mins,
the overall alarm clear timeout is increased to 2400sec.

In the current case alarm 750.006 is not getting cleared
and also it is not part of the ignore alarm list
so the patch-strategy times out.

Test Plan:
PASSED: Applying a patch - On DX system(VM),
Create and apply patch strategy,
fm alarm-list to have an uncleared alarm(for test purpose
100.103 - Memory threshold alarm was used). After 30mins
alarm was ignored and patch strategy successfully applied.
PASSED: Removing a patch - On DX system(VM),
Create and apply patch strategy,
fm alarm-list to have an uncleared alarm(for test purpose
100.103 - Memory threshold alarm was used). After 30mins
alarm was ignored and patch strategy successfully applied.
PASSED: On DX system(lab), 4 consecutive patch orchestartion
successfully applied. 750.006 - stale alarm tested.
PASSED: On DX system, create and apply strategy,
with alarm exisiting on system(not part of ignore list)
strategy would wait for 40mins before timing out.

Closes-Bug: 2059305
Change-Id: I7ebaf5a24fa45a7e45f3af7e5ca588ce3ee06156
Signed-off-by: Vanathi.Selvaraju <vanathi.selvaraju@windriver.com>
This commit is contained in:
Vanathi.Selvaraju 2024-03-27 15:53:55 -04:00
parent 8dfb971980
commit 3df717a0b7
4 changed files with 98 additions and 46 deletions

View File

@ -2990,7 +2990,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-1']}, 'entity_names': ['controller-1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
{'name': 'sw-patch-controllers', {'name': 'sw-patch-controllers',
@ -3008,7 +3008,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
] ]
@ -3100,7 +3100,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-1']}, 'entity_names': ['controller-1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
{'name': 'sw-patch-controllers', {'name': 'sw-patch-controllers',
@ -3118,7 +3118,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
] ]
@ -3224,7 +3224,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'start-instances', {'name': 'start-instances',
'entity_names': ['test_instance_0']}, 'entity_names': ['test_instance_0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
{'name': 'sw-patch-worker-hosts', {'name': 'sw-patch-worker-hosts',
@ -3246,7 +3246,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'start-instances', {'name': 'start-instances',
'entity_names': ['test_instance_1']}, 'entity_names': ['test_instance_1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
] ]
@ -3349,7 +3349,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'start-instances', {'name': 'start-instances',
'entity_names': ['test_instance_0']}, 'entity_names': ['test_instance_0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
{'name': 'sw-patch-worker-hosts', {'name': 'sw-patch-worker-hosts',
@ -3371,7 +3371,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'start-instances', {'name': 'start-instances',
'entity_names': ['test_instance_1']}, 'entity_names': ['test_instance_1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
] ]
@ -3426,7 +3426,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
{'name': 'sw-patch-worker-hosts', {'name': 'sw-patch-worker-hosts',
@ -3444,7 +3444,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-1']}, 'entity_names': ['controller-1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
] ]
@ -3499,7 +3499,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
{'name': 'sw-patch-worker-hosts', {'name': 'sw-patch-worker-hosts',
@ -3517,7 +3517,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-1']}, 'entity_names': ['controller-1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
] ]
@ -3594,7 +3594,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'start-instances', {'name': 'start-instances',
'entity_names': ['test_instance_0']}, 'entity_names': ['test_instance_0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
{'name': 'sw-patch-worker-hosts', {'name': 'sw-patch-worker-hosts',
@ -3616,7 +3616,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'start-instances', {'name': 'start-instances',
'entity_names': ['test_instance_1']}, 'entity_names': ['test_instance_1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
{'name': 'sw-patch-worker-hosts', {'name': 'sw-patch-worker-hosts',
@ -3758,7 +3758,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'start-instances', {'name': 'start-instances',
'entity_names': ['test_instance_0']}, 'entity_names': ['test_instance_0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
{'name': 'sw-patch-worker-hosts', {'name': 'sw-patch-worker-hosts',
@ -3780,7 +3780,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'start-instances', {'name': 'start-instances',
'entity_names': ['test_instance_1']}, 'entity_names': ['test_instance_1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
{'name': 'sw-patch-worker-hosts', {'name': 'sw-patch-worker-hosts',
@ -3878,7 +3878,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
{'name': 'sw-patch-worker-hosts', {'name': 'sw-patch-worker-hosts',
@ -3896,7 +3896,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-1']}, 'entity_names': ['controller-1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
{'name': 'sw-patch-worker-hosts', {'name': 'sw-patch-worker-hosts',
@ -4014,7 +4014,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
] ]
@ -4072,7 +4072,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'start-instances', {'name': 'start-instances',
'entity_names': ['test_instance_0']}, 'entity_names': ['test_instance_0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
] ]
@ -4123,7 +4123,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800} 'timeout': 2400}
] ]
}, },
] ]

View File

@ -136,7 +136,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
] ]
@ -188,7 +188,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
{ {
@ -207,7 +207,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-1']}, 'entity_names': ['controller-1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
] ]
@ -276,7 +276,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
{ {
@ -295,7 +295,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-1']}, 'entity_names': ['controller-1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
{ {
@ -433,7 +433,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
{ {
@ -452,7 +452,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-1']}, 'entity_names': ['controller-1']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
{ {
@ -573,7 +573,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
{ {
@ -763,7 +763,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['controller-0']}, 'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 1800}, 'timeout': 2400},
] ]
}, },
{ {

View File

@ -971,7 +971,8 @@ class UpdateControllerHostsMixin(object):
host_list = [host] host_list = [host]
stage = strategy.StrategyStage(strategy_stage_name) stage = strategy.StrategyStage(strategy_stage_name)
stage.add_step(strategy.QueryAlarmsStep( stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms)) True, ignore_alarms=self._ignore_alarms,
ignore_alarms_conditional=self._ignore_alarms_conditional))
if reboot: if reboot:
stage.add_step(strategy.SwactHostsStep(host_list)) stage.add_step(strategy.SwactHostsStep(host_list))
stage.add_step(strategy.LockHostsStep(host_list)) stage.add_step(strategy.LockHostsStep(host_list))
@ -992,8 +993,9 @@ class UpdateControllerHostsMixin(object):
# OSDs configured, but the alarms should clear quickly in # OSDs configured, but the alarms should clear quickly in
# that case so this will not delay the update strategy. # that case so this will not delay the update strategy.
stage.add_step(strategy.WaitAlarmsClearStep( stage.add_step(strategy.WaitAlarmsClearStep(
timeout_in_secs=30 * 60, timeout_in_secs=40 * 60,
ignore_alarms=self._ignore_alarms)) ignore_alarms=self._ignore_alarms,
ignore_alarms_conditional=self._ignore_alarms_conditional))
else: else:
# Less time required if host is not rebooting # Less time required if host is not rebooting
stage.add_step(strategy.SystemStabilizeStep( stage.add_step(strategy.SystemStabilizeStep(
@ -1004,7 +1006,8 @@ class UpdateControllerHostsMixin(object):
host_list = [local_host] host_list = [local_host]
stage = strategy.StrategyStage(strategy_stage_name) stage = strategy.StrategyStage(strategy_stage_name)
stage.add_step(strategy.QueryAlarmsStep( stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms)) True, ignore_alarms=self._ignore_alarms,
ignore_alarms_conditional=self._ignore_alarms_conditional))
if reboot: if reboot:
stage.add_step(strategy.SwactHostsStep(host_list)) stage.add_step(strategy.SwactHostsStep(host_list))
stage.add_step(strategy.LockHostsStep(host_list)) stage.add_step(strategy.LockHostsStep(host_list))
@ -1025,8 +1028,9 @@ class UpdateControllerHostsMixin(object):
# OSDs configured, but the alarms should clear quickly in # OSDs configured, but the alarms should clear quickly in
# that case so this will not delay the update strategy. # that case so this will not delay the update strategy.
stage.add_step(strategy.WaitAlarmsClearStep( stage.add_step(strategy.WaitAlarmsClearStep(
timeout_in_secs=30 * 60, timeout_in_secs=40 * 60,
ignore_alarms=self._ignore_alarms)) ignore_alarms=self._ignore_alarms,
ignore_alarms_conditional=self._ignore_alarms_conditional))
else: else:
# Less time required if host is not rebooting # Less time required if host is not rebooting
stage.add_step(strategy.SystemStabilizeStep( stage.add_step(strategy.SystemStabilizeStep(
@ -1105,7 +1109,8 @@ class UpdateStorageHostsMixin(object):
for host_list in host_lists: for host_list in host_lists:
stage = strategy.StrategyStage(strategy_stage_name) stage = strategy.StrategyStage(strategy_stage_name)
stage.add_step(strategy.QueryAlarmsStep( stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms)) True, ignore_alarms=self._ignore_alarms,
ignore_alarms_conditional=self._ignore_alarms_conditional))
if reboot: if reboot:
stage.add_step(strategy.LockHostsStep(host_list)) stage.add_step(strategy.LockHostsStep(host_list))
# Add the action step for these hosts (patch, etc..) # Add the action step for these hosts (patch, etc..)
@ -1227,7 +1232,8 @@ class UpdateWorkerHostsMixin(object):
stage = strategy.StrategyStage(strategy_stage_name) stage = strategy.StrategyStage(strategy_stage_name)
stage.add_step(strategy.QueryAlarmsStep( stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms)) True, ignore_alarms=self._ignore_alarms,
ignore_alarms_conditional=self._ignore_alarms_conditional))
if reboot: if reboot:
if 1 == len(host_list): if 1 == len(host_list):
@ -1297,8 +1303,9 @@ class UpdateWorkerHostsMixin(object):
for host in hosts_to_lock + hosts_to_reboot]): for host in hosts_to_lock + hosts_to_reboot]):
# Multiple personality nodes that need to wait for OSDs to sync: # Multiple personality nodes that need to wait for OSDs to sync:
stage.add_step(strategy.WaitAlarmsClearStep( stage.add_step(strategy.WaitAlarmsClearStep(
timeout_in_secs=30 * 60, timeout_in_secs=40 * 60,
ignore_alarms=self._ignore_alarms)) ignore_alarms=self._ignore_alarms,
ignore_alarms_conditional=self._ignore_alarms_conditional))
else: else:
if any([host.openstack_control or host.openstack_compute if any([host.openstack_control or host.openstack_compute
for host in hosts_to_lock + hosts_to_reboot]): for host in hosts_to_lock + hosts_to_reboot]):
@ -1393,9 +1400,13 @@ class SwPatchStrategy(SwUpdateStrategy,
'100.119', # PTP alarm for SyncE '100.119', # PTP alarm for SyncE
'900.701', # Node tainted '900.701', # Node tainted
] ]
IGNORE_ALARMS_CONDITIONAL = {'750.006': 1800}
self._ignore_alarms += IGNORE_ALARMS self._ignore_alarms += IGNORE_ALARMS
self._single_controller = single_controller self._single_controller = single_controller
# This is only for patch strategy to ignore 750.006 alarm when it becomes stale
self._ignore_alarms_conditional = IGNORE_ALARMS_CONDITIONAL
# initialize the variables required by the mixins # initialize the variables required by the mixins
# ie: self._nfvi_sw_patches, self._nfvi_sw_patch_hosts # ie: self._nfvi_sw_patches, self._nfvi_sw_patch_hosts
self.initialize_mixin() self.initialize_mixin()
@ -1409,7 +1420,8 @@ class SwPatchStrategy(SwUpdateStrategy,
stage = strategy.StrategyStage( stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_PATCH_QUERY) strategy.STRATEGY_STAGE_NAME.SW_PATCH_QUERY)
stage.add_step( stage.add_step(
strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms)) strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms,
ignore_alarms_conditional=self._ignore_alarms_conditional))
stage.add_step(strategy.QuerySwPatchesStep()) stage.add_step(strategy.QuerySwPatchesStep())
stage.add_step(strategy.QuerySwPatchHostsStep()) stage.add_step(strategy.QuerySwPatchHostsStep())
self.build_phase.add_stage(stage) self.build_phase.add_stage(stage)
@ -2370,7 +2382,7 @@ class SystemConfigUpdateStrategy(SwUpdateStrategy,
] ]
self._ignore_alarms += IGNORE_ALARMS self._ignore_alarms += IGNORE_ALARMS
self._single_controller = single_controller self._single_controller = single_controller
self._ignore_alarms_conditional = None
# initialize the variables required by the mixins # initialize the variables required by the mixins
self.initialize_mixin() self.initialize_mixin()
@ -3325,7 +3337,7 @@ class KubeUpgradeStrategy(SwUpdateStrategy,
] ]
# self._ignore_alarms is declared in parent class # self._ignore_alarms is declared in parent class
self._ignore_alarms += IGNORE_ALARMS self._ignore_alarms += IGNORE_ALARMS
self._ignore_alarms_conditional = None
# to_version and single_controller MUST be serialized # to_version and single_controller MUST be serialized
self._to_version = to_version self._to_version = to_version
self._single_controller = single_controller self._single_controller = single_controller

View File

@ -1912,13 +1912,17 @@ class QueryAlarmsStep(strategy.StrategyStep):
""" """
Query Alarms - Strategy Step Query Alarms - Strategy Step
""" """
def __init__(self, fail_on_alarms=False, ignore_alarms=None): def __init__(self, fail_on_alarms=False, ignore_alarms=None, ignore_alarms_conditional=None):
super(QueryAlarmsStep, self).__init__( super(QueryAlarmsStep, self).__init__(
STRATEGY_STEP_NAME.QUERY_ALARMS, timeout_in_secs=60) STRATEGY_STEP_NAME.QUERY_ALARMS, timeout_in_secs=60)
if ignore_alarms is None: if ignore_alarms is None:
ignore_alarms = [] ignore_alarms = []
self._fail_on_alarms = fail_on_alarms self._fail_on_alarms = fail_on_alarms
self._ignore_alarms = ignore_alarms self._ignore_alarms = ignore_alarms
# For ignoring 750.006 alarm for patch strategy
if ignore_alarms_conditional is None:
ignore_alarms_conditional = {}
self._ignore_alarms_conditional = ignore_alarms_conditional
@coroutine @coroutine
def _query_alarms_callback(self, fm_service): def _query_alarms_callback(self, fm_service):
@ -1940,7 +1944,8 @@ class QueryAlarmsStep(strategy.StrategyStep):
"%s - uuid %s due to relaxed alarm " "%s - uuid %s due to relaxed alarm "
"strictness" % (nfvi_alarm.alarm_id, "strictness" % (nfvi_alarm.alarm_id,
nfvi_alarm.alarm_uuid)) nfvi_alarm.alarm_uuid))
elif nfvi_alarm.alarm_id not in self._ignore_alarms: elif (nfvi_alarm.alarm_id not in self._ignore_alarms and
nfvi_alarm.alarm_id not in self._ignore_alarms_conditional):
DLOG.warn("Alarm: %s" % nfvi_alarm.alarm_id) DLOG.warn("Alarm: %s" % nfvi_alarm.alarm_id)
nfvi_alarms.append(nfvi_alarm) nfvi_alarms.append(nfvi_alarm)
else: else:
@ -1982,6 +1987,7 @@ class QueryAlarmsStep(strategy.StrategyStep):
super(QueryAlarmsStep, self).from_dict(data) super(QueryAlarmsStep, self).from_dict(data)
self._fail_on_alarms = data['fail_on_alarms'] self._fail_on_alarms = data['fail_on_alarms']
self._ignore_alarms = data['ignore_alarms'] self._ignore_alarms = data['ignore_alarms']
self._ignore_alarms_conditional = data['ignore_alarms_conditional']
return self return self
def as_dict(self): def as_dict(self):
@ -1994,6 +2000,7 @@ class QueryAlarmsStep(strategy.StrategyStep):
data['entity_uuids'] = list() data['entity_uuids'] = list()
data['fail_on_alarms'] = self._fail_on_alarms data['fail_on_alarms'] = self._fail_on_alarms
data['ignore_alarms'] = self._ignore_alarms data['ignore_alarms'] = self._ignore_alarms
data['ignore_alarms_conditional'] = self._ignore_alarms_conditional
return data return data
@ -2106,7 +2113,8 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
""" """
Alarm Wait - Strategy Step Alarm Wait - Strategy Step
""" """
def __init__(self, timeout_in_secs=300, first_query_delay_in_secs=60, ignore_alarms=None): def __init__(self, timeout_in_secs=300, first_query_delay_in_secs=60, ignore_alarms=None,
ignore_alarms_conditional=None):
super(WaitAlarmsClearStep, self).__init__( super(WaitAlarmsClearStep, self).__init__(
STRATEGY_STEP_NAME.WAIT_ALARMS_CLEAR, timeout_in_secs=timeout_in_secs) STRATEGY_STEP_NAME.WAIT_ALARMS_CLEAR, timeout_in_secs=timeout_in_secs)
self._first_query_delay_in_secs = first_query_delay_in_secs self._first_query_delay_in_secs = first_query_delay_in_secs
@ -2115,12 +2123,17 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
self._ignore_alarms = ignore_alarms self._ignore_alarms = ignore_alarms
self._wait_time = 0 self._wait_time = 0
self._query_inprogress = False self._query_inprogress = False
if ignore_alarms_conditional is None:
ignore_alarms_conditional = {}
self._ignore_alarms_conditional = ignore_alarms_conditional
@coroutine @coroutine
def _query_alarms_callback(self): def _query_alarms_callback(self):
""" """
Query Alarms Callback Query Alarms Callback
""" """
from datetime import datetime
response = (yield) response = (yield)
DLOG.debug("Query-Alarms callback response=%s." % response) DLOG.debug("Query-Alarms callback response=%s." % response)
@ -2138,6 +2151,26 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
"strictness" % (nfvi_alarm.alarm_id, "strictness" % (nfvi_alarm.alarm_id,
nfvi_alarm.alarm_uuid)) nfvi_alarm.alarm_uuid))
elif nfvi_alarm.alarm_id not in self._ignore_alarms: elif nfvi_alarm.alarm_id not in self._ignore_alarms:
# For ignoring 750.006 alarm for patch strategy
if nfvi_alarm.alarm_id in self._ignore_alarms_conditional:
format_string = "%Y-%m-%dT%H:%M:%S.%f"
alarm_timestamp = nfvi_alarm.timestamp
alarm_timestamp_obj = datetime.strptime(
alarm_timestamp, format_string)
current_time = datetime.now()
time_in_sec = (
current_time - alarm_timestamp_obj).total_seconds()
# Ignore 750.006 alarm, if present for 30 mins(1800s)
if self._ignore_alarms_conditional[nfvi_alarm.alarm_id] < int(time_in_sec):
ignore_alarm_list = list(self._ignore_alarms_conditional.keys())
for alarm_ignore in ignore_alarm_list:
if alarm_ignore == nfvi_alarm.alarm_id:
self._ignore_alarms.append(alarm_ignore)
else:
nfvi_alarms.append(nfvi_alarm)
else:
nfvi_alarms.append(nfvi_alarm)
nfvi_alarms.append(nfvi_alarm) nfvi_alarms.append(nfvi_alarm)
else: else:
DLOG.debug("Ignoring alarm %s - uuid %s" % DLOG.debug("Ignoring alarm %s - uuid %s" %
@ -2145,6 +2178,11 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
self.strategy.nfvi_alarms = nfvi_alarms self.strategy.nfvi_alarms = nfvi_alarms
if self.strategy.nfvi_alarms: if self.strategy.nfvi_alarms:
ignore_alarm_list = list(self._ignore_alarms_conditional.keys())
for alarm in self.strategy.nfvi_alarms:
for remove_alarm in ignore_alarm_list:
if alarm['alarm_id'] == remove_alarm:
self.strategy.nfvi_alarms.remove(alarm)
# Keep waiting for alarms to clear # Keep waiting for alarms to clear
pass pass
else: else:
@ -2193,6 +2231,7 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
super(WaitAlarmsClearStep, self).from_dict(data) super(WaitAlarmsClearStep, self).from_dict(data)
self._first_query_delay_in_secs = data['first_query_delay_in_secs'] self._first_query_delay_in_secs = data['first_query_delay_in_secs']
self._ignore_alarms = data['ignore_alarms'] self._ignore_alarms = data['ignore_alarms']
self._ignore_alarms_conditional = data['ignore_alarms_conditional']
self._wait_time = 0 self._wait_time = 0
self._query_inprogress = False self._query_inprogress = False
return self return self
@ -2207,6 +2246,7 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
data['entity_uuids'] = list() data['entity_uuids'] = list()
data['first_query_delay_in_secs'] = self._first_query_delay_in_secs data['first_query_delay_in_secs'] = self._first_query_delay_in_secs
data['ignore_alarms'] = self._ignore_alarms data['ignore_alarms'] = self._ignore_alarms
data['ignore_alarms_conditional'] = self._ignore_alarms_conditional
return data return data