sw-manager patch-strategy failed to install due to timeout
As part of this fix, new parameter ignore_alarm_conditional is added, which has the list of stale alarms that needs to ignored after 30mins. Alarm clear wait step checks for stale alarm 750.006 for 30mins. If the alarm is still not cleared, patch-strategy ignores the alarm. Now, since the stale alarms are monitored for 30mins, the overall alarm clear timeout is increased to 2400sec. In the current case alarm 750.006 is not getting cleared and also it is not part of the ignore alarm list so the patch-strategy times out. Test Plan: PASSED: Applying a patch - On DX system(VM), Create and apply patch strategy, fm alarm-list to have an uncleared alarm(for test purpose 100.103 - Memory threshold alarm was used). After 30mins alarm was ignored and patch strategy successfully applied. PASSED: Removing a patch - On DX system(VM), Create and apply patch strategy, fm alarm-list to have an uncleared alarm(for test purpose 100.103 - Memory threshold alarm was used). After 30mins alarm was ignored and patch strategy successfully applied. PASSED: On DX system(lab), 4 consecutive patch orchestartion successfully applied. 750.006 - stale alarm tested. PASSED: On DX system, create and apply strategy, with alarm exisiting on system(not part of ignore list) strategy would wait for 40mins before timing out. Closes-Bug: 2059305 Change-Id: I7ebaf5a24fa45a7e45f3af7e5ca588ce3ee06156 Signed-off-by: Vanathi.Selvaraju <vanathi.selvaraju@windriver.com>
This commit is contained in:
parent
8dfb971980
commit
3df717a0b7
@ -2990,7 +2990,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-1']},
|
'entity_names': ['controller-1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-controllers',
|
{'name': 'sw-patch-controllers',
|
||||||
@ -3008,7 +3008,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -3100,7 +3100,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-1']},
|
'entity_names': ['controller-1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-controllers',
|
{'name': 'sw-patch-controllers',
|
||||||
@ -3118,7 +3118,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -3224,7 +3224,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'start-instances',
|
{'name': 'start-instances',
|
||||||
'entity_names': ['test_instance_0']},
|
'entity_names': ['test_instance_0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-worker-hosts',
|
{'name': 'sw-patch-worker-hosts',
|
||||||
@ -3246,7 +3246,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'start-instances',
|
{'name': 'start-instances',
|
||||||
'entity_names': ['test_instance_1']},
|
'entity_names': ['test_instance_1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -3349,7 +3349,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'start-instances',
|
{'name': 'start-instances',
|
||||||
'entity_names': ['test_instance_0']},
|
'entity_names': ['test_instance_0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-worker-hosts',
|
{'name': 'sw-patch-worker-hosts',
|
||||||
@ -3371,7 +3371,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'start-instances',
|
{'name': 'start-instances',
|
||||||
'entity_names': ['test_instance_1']},
|
'entity_names': ['test_instance_1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -3426,7 +3426,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-worker-hosts',
|
{'name': 'sw-patch-worker-hosts',
|
||||||
@ -3444,7 +3444,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-1']},
|
'entity_names': ['controller-1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -3499,7 +3499,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-worker-hosts',
|
{'name': 'sw-patch-worker-hosts',
|
||||||
@ -3517,7 +3517,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-1']},
|
'entity_names': ['controller-1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -3594,7 +3594,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'start-instances',
|
{'name': 'start-instances',
|
||||||
'entity_names': ['test_instance_0']},
|
'entity_names': ['test_instance_0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-worker-hosts',
|
{'name': 'sw-patch-worker-hosts',
|
||||||
@ -3616,7 +3616,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'start-instances',
|
{'name': 'start-instances',
|
||||||
'entity_names': ['test_instance_1']},
|
'entity_names': ['test_instance_1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-worker-hosts',
|
{'name': 'sw-patch-worker-hosts',
|
||||||
@ -3758,7 +3758,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'start-instances',
|
{'name': 'start-instances',
|
||||||
'entity_names': ['test_instance_0']},
|
'entity_names': ['test_instance_0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-worker-hosts',
|
{'name': 'sw-patch-worker-hosts',
|
||||||
@ -3780,7 +3780,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'start-instances',
|
{'name': 'start-instances',
|
||||||
'entity_names': ['test_instance_1']},
|
'entity_names': ['test_instance_1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-worker-hosts',
|
{'name': 'sw-patch-worker-hosts',
|
||||||
@ -3878,7 +3878,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-worker-hosts',
|
{'name': 'sw-patch-worker-hosts',
|
||||||
@ -3896,7 +3896,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-1']},
|
'entity_names': ['controller-1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{'name': 'sw-patch-worker-hosts',
|
{'name': 'sw-patch-worker-hosts',
|
||||||
@ -4014,7 +4014,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -4072,7 +4072,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'start-instances',
|
{'name': 'start-instances',
|
||||||
'entity_names': ['test_instance_0']},
|
'entity_names': ['test_instance_0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -4123,7 +4123,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800}
|
'timeout': 2400}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
@ -136,7 +136,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -188,7 +188,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -207,7 +207,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-1']},
|
'entity_names': ['controller-1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
@ -276,7 +276,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -295,7 +295,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-1']},
|
'entity_names': ['controller-1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -433,7 +433,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -452,7 +452,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-1']},
|
'entity_names': ['controller-1']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -573,7 +573,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -763,7 +763,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase
|
|||||||
{'name': 'unlock-hosts',
|
{'name': 'unlock-hosts',
|
||||||
'entity_names': ['controller-0']},
|
'entity_names': ['controller-0']},
|
||||||
{'name': 'wait-alarms-clear',
|
{'name': 'wait-alarms-clear',
|
||||||
'timeout': 1800},
|
'timeout': 2400},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -971,7 +971,8 @@ class UpdateControllerHostsMixin(object):
|
|||||||
host_list = [host]
|
host_list = [host]
|
||||||
stage = strategy.StrategyStage(strategy_stage_name)
|
stage = strategy.StrategyStage(strategy_stage_name)
|
||||||
stage.add_step(strategy.QueryAlarmsStep(
|
stage.add_step(strategy.QueryAlarmsStep(
|
||||||
True, ignore_alarms=self._ignore_alarms))
|
True, ignore_alarms=self._ignore_alarms,
|
||||||
|
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||||
if reboot:
|
if reboot:
|
||||||
stage.add_step(strategy.SwactHostsStep(host_list))
|
stage.add_step(strategy.SwactHostsStep(host_list))
|
||||||
stage.add_step(strategy.LockHostsStep(host_list))
|
stage.add_step(strategy.LockHostsStep(host_list))
|
||||||
@ -992,8 +993,9 @@ class UpdateControllerHostsMixin(object):
|
|||||||
# OSDs configured, but the alarms should clear quickly in
|
# OSDs configured, but the alarms should clear quickly in
|
||||||
# that case so this will not delay the update strategy.
|
# that case so this will not delay the update strategy.
|
||||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||||
timeout_in_secs=30 * 60,
|
timeout_in_secs=40 * 60,
|
||||||
ignore_alarms=self._ignore_alarms))
|
ignore_alarms=self._ignore_alarms,
|
||||||
|
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||||
else:
|
else:
|
||||||
# Less time required if host is not rebooting
|
# Less time required if host is not rebooting
|
||||||
stage.add_step(strategy.SystemStabilizeStep(
|
stage.add_step(strategy.SystemStabilizeStep(
|
||||||
@ -1004,7 +1006,8 @@ class UpdateControllerHostsMixin(object):
|
|||||||
host_list = [local_host]
|
host_list = [local_host]
|
||||||
stage = strategy.StrategyStage(strategy_stage_name)
|
stage = strategy.StrategyStage(strategy_stage_name)
|
||||||
stage.add_step(strategy.QueryAlarmsStep(
|
stage.add_step(strategy.QueryAlarmsStep(
|
||||||
True, ignore_alarms=self._ignore_alarms))
|
True, ignore_alarms=self._ignore_alarms,
|
||||||
|
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||||
if reboot:
|
if reboot:
|
||||||
stage.add_step(strategy.SwactHostsStep(host_list))
|
stage.add_step(strategy.SwactHostsStep(host_list))
|
||||||
stage.add_step(strategy.LockHostsStep(host_list))
|
stage.add_step(strategy.LockHostsStep(host_list))
|
||||||
@ -1025,8 +1028,9 @@ class UpdateControllerHostsMixin(object):
|
|||||||
# OSDs configured, but the alarms should clear quickly in
|
# OSDs configured, but the alarms should clear quickly in
|
||||||
# that case so this will not delay the update strategy.
|
# that case so this will not delay the update strategy.
|
||||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||||
timeout_in_secs=30 * 60,
|
timeout_in_secs=40 * 60,
|
||||||
ignore_alarms=self._ignore_alarms))
|
ignore_alarms=self._ignore_alarms,
|
||||||
|
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||||
else:
|
else:
|
||||||
# Less time required if host is not rebooting
|
# Less time required if host is not rebooting
|
||||||
stage.add_step(strategy.SystemStabilizeStep(
|
stage.add_step(strategy.SystemStabilizeStep(
|
||||||
@ -1105,7 +1109,8 @@ class UpdateStorageHostsMixin(object):
|
|||||||
for host_list in host_lists:
|
for host_list in host_lists:
|
||||||
stage = strategy.StrategyStage(strategy_stage_name)
|
stage = strategy.StrategyStage(strategy_stage_name)
|
||||||
stage.add_step(strategy.QueryAlarmsStep(
|
stage.add_step(strategy.QueryAlarmsStep(
|
||||||
True, ignore_alarms=self._ignore_alarms))
|
True, ignore_alarms=self._ignore_alarms,
|
||||||
|
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||||
if reboot:
|
if reboot:
|
||||||
stage.add_step(strategy.LockHostsStep(host_list))
|
stage.add_step(strategy.LockHostsStep(host_list))
|
||||||
# Add the action step for these hosts (patch, etc..)
|
# Add the action step for these hosts (patch, etc..)
|
||||||
@ -1227,7 +1232,8 @@ class UpdateWorkerHostsMixin(object):
|
|||||||
stage = strategy.StrategyStage(strategy_stage_name)
|
stage = strategy.StrategyStage(strategy_stage_name)
|
||||||
|
|
||||||
stage.add_step(strategy.QueryAlarmsStep(
|
stage.add_step(strategy.QueryAlarmsStep(
|
||||||
True, ignore_alarms=self._ignore_alarms))
|
True, ignore_alarms=self._ignore_alarms,
|
||||||
|
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||||
|
|
||||||
if reboot:
|
if reboot:
|
||||||
if 1 == len(host_list):
|
if 1 == len(host_list):
|
||||||
@ -1297,8 +1303,9 @@ class UpdateWorkerHostsMixin(object):
|
|||||||
for host in hosts_to_lock + hosts_to_reboot]):
|
for host in hosts_to_lock + hosts_to_reboot]):
|
||||||
# Multiple personality nodes that need to wait for OSDs to sync:
|
# Multiple personality nodes that need to wait for OSDs to sync:
|
||||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||||
timeout_in_secs=30 * 60,
|
timeout_in_secs=40 * 60,
|
||||||
ignore_alarms=self._ignore_alarms))
|
ignore_alarms=self._ignore_alarms,
|
||||||
|
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||||
else:
|
else:
|
||||||
if any([host.openstack_control or host.openstack_compute
|
if any([host.openstack_control or host.openstack_compute
|
||||||
for host in hosts_to_lock + hosts_to_reboot]):
|
for host in hosts_to_lock + hosts_to_reboot]):
|
||||||
@ -1393,9 +1400,13 @@ class SwPatchStrategy(SwUpdateStrategy,
|
|||||||
'100.119', # PTP alarm for SyncE
|
'100.119', # PTP alarm for SyncE
|
||||||
'900.701', # Node tainted
|
'900.701', # Node tainted
|
||||||
]
|
]
|
||||||
|
IGNORE_ALARMS_CONDITIONAL = {'750.006': 1800}
|
||||||
self._ignore_alarms += IGNORE_ALARMS
|
self._ignore_alarms += IGNORE_ALARMS
|
||||||
self._single_controller = single_controller
|
self._single_controller = single_controller
|
||||||
|
|
||||||
|
# This is only for patch strategy to ignore 750.006 alarm when it becomes stale
|
||||||
|
self._ignore_alarms_conditional = IGNORE_ALARMS_CONDITIONAL
|
||||||
|
|
||||||
# initialize the variables required by the mixins
|
# initialize the variables required by the mixins
|
||||||
# ie: self._nfvi_sw_patches, self._nfvi_sw_patch_hosts
|
# ie: self._nfvi_sw_patches, self._nfvi_sw_patch_hosts
|
||||||
self.initialize_mixin()
|
self.initialize_mixin()
|
||||||
@ -1409,7 +1420,8 @@ class SwPatchStrategy(SwUpdateStrategy,
|
|||||||
stage = strategy.StrategyStage(
|
stage = strategy.StrategyStage(
|
||||||
strategy.STRATEGY_STAGE_NAME.SW_PATCH_QUERY)
|
strategy.STRATEGY_STAGE_NAME.SW_PATCH_QUERY)
|
||||||
stage.add_step(
|
stage.add_step(
|
||||||
strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms))
|
strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms,
|
||||||
|
ignore_alarms_conditional=self._ignore_alarms_conditional))
|
||||||
stage.add_step(strategy.QuerySwPatchesStep())
|
stage.add_step(strategy.QuerySwPatchesStep())
|
||||||
stage.add_step(strategy.QuerySwPatchHostsStep())
|
stage.add_step(strategy.QuerySwPatchHostsStep())
|
||||||
self.build_phase.add_stage(stage)
|
self.build_phase.add_stage(stage)
|
||||||
@ -2370,7 +2382,7 @@ class SystemConfigUpdateStrategy(SwUpdateStrategy,
|
|||||||
]
|
]
|
||||||
self._ignore_alarms += IGNORE_ALARMS
|
self._ignore_alarms += IGNORE_ALARMS
|
||||||
self._single_controller = single_controller
|
self._single_controller = single_controller
|
||||||
|
self._ignore_alarms_conditional = None
|
||||||
# initialize the variables required by the mixins
|
# initialize the variables required by the mixins
|
||||||
self.initialize_mixin()
|
self.initialize_mixin()
|
||||||
|
|
||||||
@ -3325,7 +3337,7 @@ class KubeUpgradeStrategy(SwUpdateStrategy,
|
|||||||
]
|
]
|
||||||
# self._ignore_alarms is declared in parent class
|
# self._ignore_alarms is declared in parent class
|
||||||
self._ignore_alarms += IGNORE_ALARMS
|
self._ignore_alarms += IGNORE_ALARMS
|
||||||
|
self._ignore_alarms_conditional = None
|
||||||
# to_version and single_controller MUST be serialized
|
# to_version and single_controller MUST be serialized
|
||||||
self._to_version = to_version
|
self._to_version = to_version
|
||||||
self._single_controller = single_controller
|
self._single_controller = single_controller
|
||||||
|
@ -1912,13 +1912,17 @@ class QueryAlarmsStep(strategy.StrategyStep):
|
|||||||
"""
|
"""
|
||||||
Query Alarms - Strategy Step
|
Query Alarms - Strategy Step
|
||||||
"""
|
"""
|
||||||
def __init__(self, fail_on_alarms=False, ignore_alarms=None):
|
def __init__(self, fail_on_alarms=False, ignore_alarms=None, ignore_alarms_conditional=None):
|
||||||
super(QueryAlarmsStep, self).__init__(
|
super(QueryAlarmsStep, self).__init__(
|
||||||
STRATEGY_STEP_NAME.QUERY_ALARMS, timeout_in_secs=60)
|
STRATEGY_STEP_NAME.QUERY_ALARMS, timeout_in_secs=60)
|
||||||
if ignore_alarms is None:
|
if ignore_alarms is None:
|
||||||
ignore_alarms = []
|
ignore_alarms = []
|
||||||
self._fail_on_alarms = fail_on_alarms
|
self._fail_on_alarms = fail_on_alarms
|
||||||
self._ignore_alarms = ignore_alarms
|
self._ignore_alarms = ignore_alarms
|
||||||
|
# For ignoring 750.006 alarm for patch strategy
|
||||||
|
if ignore_alarms_conditional is None:
|
||||||
|
ignore_alarms_conditional = {}
|
||||||
|
self._ignore_alarms_conditional = ignore_alarms_conditional
|
||||||
|
|
||||||
@coroutine
|
@coroutine
|
||||||
def _query_alarms_callback(self, fm_service):
|
def _query_alarms_callback(self, fm_service):
|
||||||
@ -1940,7 +1944,8 @@ class QueryAlarmsStep(strategy.StrategyStep):
|
|||||||
"%s - uuid %s due to relaxed alarm "
|
"%s - uuid %s due to relaxed alarm "
|
||||||
"strictness" % (nfvi_alarm.alarm_id,
|
"strictness" % (nfvi_alarm.alarm_id,
|
||||||
nfvi_alarm.alarm_uuid))
|
nfvi_alarm.alarm_uuid))
|
||||||
elif nfvi_alarm.alarm_id not in self._ignore_alarms:
|
elif (nfvi_alarm.alarm_id not in self._ignore_alarms and
|
||||||
|
nfvi_alarm.alarm_id not in self._ignore_alarms_conditional):
|
||||||
DLOG.warn("Alarm: %s" % nfvi_alarm.alarm_id)
|
DLOG.warn("Alarm: %s" % nfvi_alarm.alarm_id)
|
||||||
nfvi_alarms.append(nfvi_alarm)
|
nfvi_alarms.append(nfvi_alarm)
|
||||||
else:
|
else:
|
||||||
@ -1982,6 +1987,7 @@ class QueryAlarmsStep(strategy.StrategyStep):
|
|||||||
super(QueryAlarmsStep, self).from_dict(data)
|
super(QueryAlarmsStep, self).from_dict(data)
|
||||||
self._fail_on_alarms = data['fail_on_alarms']
|
self._fail_on_alarms = data['fail_on_alarms']
|
||||||
self._ignore_alarms = data['ignore_alarms']
|
self._ignore_alarms = data['ignore_alarms']
|
||||||
|
self._ignore_alarms_conditional = data['ignore_alarms_conditional']
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def as_dict(self):
|
def as_dict(self):
|
||||||
@ -1994,6 +2000,7 @@ class QueryAlarmsStep(strategy.StrategyStep):
|
|||||||
data['entity_uuids'] = list()
|
data['entity_uuids'] = list()
|
||||||
data['fail_on_alarms'] = self._fail_on_alarms
|
data['fail_on_alarms'] = self._fail_on_alarms
|
||||||
data['ignore_alarms'] = self._ignore_alarms
|
data['ignore_alarms'] = self._ignore_alarms
|
||||||
|
data['ignore_alarms_conditional'] = self._ignore_alarms_conditional
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
@ -2106,7 +2113,8 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
|||||||
"""
|
"""
|
||||||
Alarm Wait - Strategy Step
|
Alarm Wait - Strategy Step
|
||||||
"""
|
"""
|
||||||
def __init__(self, timeout_in_secs=300, first_query_delay_in_secs=60, ignore_alarms=None):
|
def __init__(self, timeout_in_secs=300, first_query_delay_in_secs=60, ignore_alarms=None,
|
||||||
|
ignore_alarms_conditional=None):
|
||||||
super(WaitAlarmsClearStep, self).__init__(
|
super(WaitAlarmsClearStep, self).__init__(
|
||||||
STRATEGY_STEP_NAME.WAIT_ALARMS_CLEAR, timeout_in_secs=timeout_in_secs)
|
STRATEGY_STEP_NAME.WAIT_ALARMS_CLEAR, timeout_in_secs=timeout_in_secs)
|
||||||
self._first_query_delay_in_secs = first_query_delay_in_secs
|
self._first_query_delay_in_secs = first_query_delay_in_secs
|
||||||
@ -2115,12 +2123,17 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
|||||||
self._ignore_alarms = ignore_alarms
|
self._ignore_alarms = ignore_alarms
|
||||||
self._wait_time = 0
|
self._wait_time = 0
|
||||||
self._query_inprogress = False
|
self._query_inprogress = False
|
||||||
|
if ignore_alarms_conditional is None:
|
||||||
|
ignore_alarms_conditional = {}
|
||||||
|
self._ignore_alarms_conditional = ignore_alarms_conditional
|
||||||
|
|
||||||
@coroutine
|
@coroutine
|
||||||
def _query_alarms_callback(self):
|
def _query_alarms_callback(self):
|
||||||
"""
|
"""
|
||||||
Query Alarms Callback
|
Query Alarms Callback
|
||||||
"""
|
"""
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
response = (yield)
|
response = (yield)
|
||||||
DLOG.debug("Query-Alarms callback response=%s." % response)
|
DLOG.debug("Query-Alarms callback response=%s." % response)
|
||||||
|
|
||||||
@ -2138,6 +2151,26 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
|||||||
"strictness" % (nfvi_alarm.alarm_id,
|
"strictness" % (nfvi_alarm.alarm_id,
|
||||||
nfvi_alarm.alarm_uuid))
|
nfvi_alarm.alarm_uuid))
|
||||||
elif nfvi_alarm.alarm_id not in self._ignore_alarms:
|
elif nfvi_alarm.alarm_id not in self._ignore_alarms:
|
||||||
|
# For ignoring 750.006 alarm for patch strategy
|
||||||
|
if nfvi_alarm.alarm_id in self._ignore_alarms_conditional:
|
||||||
|
format_string = "%Y-%m-%dT%H:%M:%S.%f"
|
||||||
|
alarm_timestamp = nfvi_alarm.timestamp
|
||||||
|
alarm_timestamp_obj = datetime.strptime(
|
||||||
|
alarm_timestamp, format_string)
|
||||||
|
current_time = datetime.now()
|
||||||
|
time_in_sec = (
|
||||||
|
current_time - alarm_timestamp_obj).total_seconds()
|
||||||
|
# Ignore 750.006 alarm, if present for 30 mins(1800s)
|
||||||
|
if self._ignore_alarms_conditional[nfvi_alarm.alarm_id] < int(time_in_sec):
|
||||||
|
ignore_alarm_list = list(self._ignore_alarms_conditional.keys())
|
||||||
|
for alarm_ignore in ignore_alarm_list:
|
||||||
|
if alarm_ignore == nfvi_alarm.alarm_id:
|
||||||
|
self._ignore_alarms.append(alarm_ignore)
|
||||||
|
else:
|
||||||
|
nfvi_alarms.append(nfvi_alarm)
|
||||||
|
else:
|
||||||
|
nfvi_alarms.append(nfvi_alarm)
|
||||||
|
|
||||||
nfvi_alarms.append(nfvi_alarm)
|
nfvi_alarms.append(nfvi_alarm)
|
||||||
else:
|
else:
|
||||||
DLOG.debug("Ignoring alarm %s - uuid %s" %
|
DLOG.debug("Ignoring alarm %s - uuid %s" %
|
||||||
@ -2145,6 +2178,11 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
|||||||
self.strategy.nfvi_alarms = nfvi_alarms
|
self.strategy.nfvi_alarms = nfvi_alarms
|
||||||
|
|
||||||
if self.strategy.nfvi_alarms:
|
if self.strategy.nfvi_alarms:
|
||||||
|
ignore_alarm_list = list(self._ignore_alarms_conditional.keys())
|
||||||
|
for alarm in self.strategy.nfvi_alarms:
|
||||||
|
for remove_alarm in ignore_alarm_list:
|
||||||
|
if alarm['alarm_id'] == remove_alarm:
|
||||||
|
self.strategy.nfvi_alarms.remove(alarm)
|
||||||
# Keep waiting for alarms to clear
|
# Keep waiting for alarms to clear
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
@ -2193,6 +2231,7 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
|||||||
super(WaitAlarmsClearStep, self).from_dict(data)
|
super(WaitAlarmsClearStep, self).from_dict(data)
|
||||||
self._first_query_delay_in_secs = data['first_query_delay_in_secs']
|
self._first_query_delay_in_secs = data['first_query_delay_in_secs']
|
||||||
self._ignore_alarms = data['ignore_alarms']
|
self._ignore_alarms = data['ignore_alarms']
|
||||||
|
self._ignore_alarms_conditional = data['ignore_alarms_conditional']
|
||||||
self._wait_time = 0
|
self._wait_time = 0
|
||||||
self._query_inprogress = False
|
self._query_inprogress = False
|
||||||
return self
|
return self
|
||||||
@ -2207,6 +2246,7 @@ class WaitAlarmsClearStep(strategy.StrategyStep):
|
|||||||
data['entity_uuids'] = list()
|
data['entity_uuids'] = list()
|
||||||
data['first_query_delay_in_secs'] = self._first_query_delay_in_secs
|
data['first_query_delay_in_secs'] = self._first_query_delay_in_secs
|
||||||
data['ignore_alarms'] = self._ignore_alarms
|
data['ignore_alarms'] = self._ignore_alarms
|
||||||
|
data['ignore_alarms_conditional'] = self._ignore_alarms_conditional
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user