Add wait for alarms to clear to SW patch strategy unlock hosts step

This appends the WaitAlarmsClearStep after UnlockHostsStep on controller
hosts when stx-openstack application is installed for SwPatchStrategy.
This will periodically query alarms and allows the system to stabilize.
If stx-openstack is not installed, this will do one minute wait with the
existing SystemStabilizeStep.

Change-Id: I6dbc4c6032a3bb9d160df79d46630a81960cbb37
Closes-Bug: 1893124
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
This commit is contained in:
Jim Gauld 2020-08-26 09:16:27 -04:00
parent a274a40529
commit 4c36f911c9
4 changed files with 278 additions and 34 deletions

View File

@ -2681,8 +2681,118 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15},
{'name': 'unlock-hosts',
'entity_names': ['controller-1']},
{'name': 'wait-alarms-clear',
'timeout': 600},
]
},
{'name': 'sw-patch-controllers',
'total_steps': 7,
'steps': [
{'name': 'query-alarms'},
{'name': 'swact-hosts',
'entity_names': ['controller-0']},
{'name': 'lock-hosts',
'entity_names': ['controller-0']},
{'name': 'sw-patch-hosts',
'entity_names': ['controller-0']},
{'name': 'system-stabilize',
'timeout': 60}
'timeout': 15},
{'name': 'unlock-hosts',
'entity_names': ['controller-0']},
{'name': 'wait-alarms-clear',
'timeout': 600},
]
},
]
}
sw_update_testcase.validate_strategy_persists(strategy)
sw_update_testcase.validate_phase(apply_phase, expected_results)
# Test no reboot patches
strategy = create_sw_patch_strategy(
controller_apply_type=SW_UPDATE_APPLY_TYPE.SERIAL
)
strategy._add_controller_strategy_stages(controllers=controller_hosts,
reboot=False)
apply_phase = strategy.apply_phase.as_dict()
expected_results = {
'total_stages': 2,
'stages': [
{'name': 'sw-patch-controllers',
'total_steps': 3,
'steps': [
{'name': 'query-alarms'},
{'name': 'sw-patch-hosts',
'entity_names': ['controller-1']},
{'name': 'system-stabilize',
'timeout': 30}
]
},
{'name': 'sw-patch-controllers',
'total_steps': 3,
'steps': [
{'name': 'query-alarms'},
{'name': 'sw-patch-hosts',
'entity_names': ['controller-0']},
{'name': 'system-stabilize',
'timeout': 30}
]
},
]
}
sw_update_testcase.validate_strategy_persists(strategy)
sw_update_testcase.validate_phase(apply_phase, expected_results)
def test_sw_patch_strategy_controller_stages_serial_openstack_not_installed(self):
"""
Test the sw_patch strategy add controller strategy stages:
- serial apply
- test both reboot and no reboot cases
Verify:
- patch mate controller first
"""
self.create_host('controller-0', openstack_installed=False)
self.create_host('controller-1', openstack_installed=False)
controller_hosts = []
for host in self._host_table.values():
if HOST_PERSONALITY.CONTROLLER in host.personality:
controller_hosts.append(host)
# Test reboot patches
strategy = create_sw_patch_strategy(
controller_apply_type=SW_UPDATE_APPLY_TYPE.SERIAL
)
strategy._add_controller_strategy_stages(controllers=controller_hosts,
reboot=True)
apply_phase = strategy.apply_phase.as_dict()
expected_results = {
'total_stages': 2,
'stages': [
{'name': 'sw-patch-controllers',
'total_steps': 7,
'steps': [
{'name': 'query-alarms'},
{'name': 'swact-hosts',
'entity_names': ['controller-1']},
{'name': 'lock-hosts',
'entity_names': ['controller-1']},
{'name': 'sw-patch-hosts',
'entity_names': ['controller-1']},
{'name': 'system-stabilize',
'timeout': 15},
{'name': 'unlock-hosts',
'entity_names': ['controller-1']},
{'name': 'system-stabilize',
'timeout': 60},
]
},
{'name': 'sw-patch-controllers',
@ -2700,7 +2810,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'unlock-hosts',
'entity_names': ['controller-0']},
{'name': 'system-stabilize',
'timeout': 60}
'timeout': 60},
]
},
]
@ -2805,8 +2915,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'entity_names': ['controller-0']},
{'name': 'start-instances',
'entity_names': ['test_instance_0']},
{'name': 'system-stabilize',
'timeout': 60},
{'name': 'wait-alarms-clear',
'timeout': 600},
]
},
{'name': 'sw-patch-worker-hosts',
@ -2827,8 +2937,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'entity_names': ['controller-1']},
{'name': 'start-instances',
'entity_names': ['test_instance_1']},
{'name': 'system-stabilize',
'timeout': 60}
{'name': 'wait-alarms-clear',
'timeout': 600}
]
},
]
@ -2930,8 +3040,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'entity_names': ['controller-0']},
{'name': 'start-instances',
'entity_names': ['test_instance_0']},
{'name': 'system-stabilize',
'timeout': 60}
{'name': 'wait-alarms-clear',
'timeout': 600}
]
},
{'name': 'sw-patch-worker-hosts',
@ -2952,8 +3062,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'entity_names': ['controller-1']},
{'name': 'start-instances',
'entity_names': ['test_instance_1']},
{'name': 'system-stabilize',
'timeout': 60}
{'name': 'wait-alarms-clear',
'timeout': 600}
]
},
]
@ -3007,8 +3117,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15},
{'name': 'unlock-hosts',
'entity_names': ['controller-0']},
{'name': 'system-stabilize',
'timeout': 60}
{'name': 'wait-alarms-clear',
'timeout': 600}
]
},
{'name': 'sw-patch-worker-hosts',
@ -3025,8 +3135,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15},
{'name': 'unlock-hosts',
'entity_names': ['controller-1']},
{'name': 'system-stabilize',
'timeout': 60}
{'name': 'wait-alarms-clear',
'timeout': 600}
]
},
]
@ -3102,8 +3212,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'entity_names': ['controller-0']},
{'name': 'start-instances',
'entity_names': ['test_instance_0']},
{'name': 'system-stabilize',
'timeout': 60},
{'name': 'wait-alarms-clear',
'timeout': 600},
]
},
{'name': 'sw-patch-worker-hosts',
@ -3124,8 +3234,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'entity_names': ['controller-1']},
{'name': 'start-instances',
'entity_names': ['test_instance_1']},
{'name': 'system-stabilize',
'timeout': 60}
{'name': 'wait-alarms-clear',
'timeout': 600}
]
},
{'name': 'sw-patch-worker-hosts',
@ -3266,8 +3376,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'entity_names': ['controller-0']},
{'name': 'start-instances',
'entity_names': ['test_instance_0']},
{'name': 'system-stabilize',
'timeout': 60}
{'name': 'wait-alarms-clear',
'timeout': 600}
]
},
{'name': 'sw-patch-worker-hosts',
@ -3288,8 +3398,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'entity_names': ['controller-1']},
{'name': 'start-instances',
'entity_names': ['test_instance_1']},
{'name': 'system-stabilize',
'timeout': 60}
{'name': 'wait-alarms-clear',
'timeout': 600}
]
},
{'name': 'sw-patch-worker-hosts',
@ -3369,7 +3479,6 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
reboot=True)
apply_phase = strategy.apply_phase.as_dict()
expected_results = {
'total_stages': 4,
'stages': [
@ -3387,8 +3496,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15},
{'name': 'unlock-hosts',
'entity_names': ['controller-0']},
{'name': 'system-stabilize',
'timeout': 60}
{'name': 'wait-alarms-clear',
'timeout': 600}
]
},
{'name': 'sw-patch-worker-hosts',
@ -3405,8 +3514,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15},
{'name': 'unlock-hosts',
'entity_names': ['controller-1']},
{'name': 'system-stabilize',
'timeout': 60}
{'name': 'wait-alarms-clear',
'timeout': 600}
]
},
{'name': 'sw-patch-worker-hosts',
@ -3581,8 +3690,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'entity_names': ['controller-0']},
{'name': 'start-instances',
'entity_names': ['test_instance_0']},
{'name': 'system-stabilize',
'timeout': 60},
{'name': 'wait-alarms-clear',
'timeout': 600},
]
},
]
@ -3632,8 +3741,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15},
{'name': 'unlock-hosts',
'entity_names': ['controller-0']},
{'name': 'system-stabilize',
'timeout': 60}
{'name': 'wait-alarms-clear',
'timeout': 600}
]
},
]

View File

@ -32,4 +32,5 @@ from nfv_vim.strategy._strategy_steps import UpgradeActivateStep # noqa: F401
from nfv_vim.strategy._strategy_steps import UpgradeCompleteStep # noqa: F401
from nfv_vim.strategy._strategy_steps import UpgradeHostsStep # noqa: F401
from nfv_vim.strategy._strategy_steps import UpgradeStartStep # noqa: F401
from nfv_vim.strategy._strategy_steps import WaitAlarmsClearStep # noqa: F401
from nfv_vim.strategy._strategy_steps import WaitDataSyncStep # noqa: F401

View File

@ -610,7 +610,14 @@ class SwPatchStrategy(SwUpdateStrategy):
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=MTCE_DELAY))
stage.add_step(strategy.UnlockHostsStep(host_list))
stage.add_step(strategy.SystemStabilizeStep())
if host.openstack_control:
# Wait extra time for services to go enabled and
# alarms to clear.
stage.add_step(strategy.WaitAlarmsClearStep(
timeout_in_secs=10 * 60,
ignore_alarms=self._ignore_alarms))
else:
stage.add_step(strategy.SystemStabilizeStep())
else:
# Less time required if host is not rebooting
stage.add_step(strategy.SystemStabilizeStep(
@ -632,7 +639,14 @@ class SwPatchStrategy(SwUpdateStrategy):
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=MTCE_DELAY))
stage.add_step(strategy.UnlockHostsStep(host_list))
stage.add_step(strategy.SystemStabilizeStep())
if host.openstack_control:
# Wait extra time for services to go enabled and
# alarms to clear.
stage.add_step(strategy.WaitAlarmsClearStep(
timeout_in_secs=10 * 60,
ignore_alarms=self._ignore_alarms))
else:
stage.add_step(strategy.SystemStabilizeStep())
else:
# Less time required if host is not rebooting
stage.add_step(strategy.SystemStabilizeStep(
@ -847,7 +861,15 @@ class SwPatchStrategy(SwUpdateStrategy):
stage.add_step(strategy.StartInstancesStep(
instance_list))
stage.add_step(strategy.SystemStabilizeStep())
if any(host.openstack_control for host in hosts_to_lock) or \
any(host.openstack_control for host in hosts_to_reboot):
# Wait extra time for services to go enabled
# and alarms to clear.
stage.add_step(strategy.WaitAlarmsClearStep(
timeout_in_secs=10 * 60,
ignore_alarms=self._ignore_alarms))
else:
stage.add_step(strategy.SystemStabilizeStep())
else:
# Less time required if host is not rebooting
stage.add_step(strategy.SystemStabilizeStep(

View File

@ -43,6 +43,7 @@ class StrategyStepNames(Constants):
START_INSTANCES = Constant('start-instances')
QUERY_ALARMS = Constant('query-alarms')
WAIT_DATA_SYNC = Constant('wait-data-sync')
WAIT_ALARMS_CLEAR = Constant('wait-alarms-clear')
QUERY_SW_PATCHES = Constant('query-sw-patches')
QUERY_SW_PATCH_HOSTS = Constant('query-sw-patch-hosts')
QUERY_FW_UPDATE_HOST = Constant('query-fw-update-host')
@ -1656,6 +1657,114 @@ class WaitDataSyncStep(strategy.StrategyStep):
return data
class WaitAlarmsClearStep(strategy.StrategyStep):
"""
Alarm Wait - Strategy Step
"""
def __init__(self, timeout_in_secs=300, first_query_delay_in_secs=60, ignore_alarms=None):
super(WaitAlarmsClearStep, self).__init__(
STRATEGY_STEP_NAME.WAIT_ALARMS_CLEAR, timeout_in_secs=timeout_in_secs)
self._first_query_delay_in_secs = first_query_delay_in_secs
if ignore_alarms is None:
ignore_alarms = []
self._ignore_alarms = ignore_alarms
self._wait_time = 0
self._query_inprogress = False
@coroutine
def _query_alarms_callback(self):
"""
Query Alarms Callback
"""
response = (yield)
DLOG.debug("Query-Alarms callback response=%s." % response)
self._query_inprogress = False
if response['completed']:
if self.strategy is not None:
nfvi_alarms = list()
for nfvi_alarm in response['result-data']:
if (self.strategy._alarm_restrictions ==
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.RELAXED and
nfvi_alarm.mgmt_affecting == 'False'):
DLOG.warn("Ignoring non-management affecting alarm "
"%s - uuid %s due to relaxed alarm "
"strictness" % (nfvi_alarm.alarm_id,
nfvi_alarm.alarm_uuid))
elif nfvi_alarm.alarm_id not in self._ignore_alarms:
nfvi_alarms.append(nfvi_alarm)
else:
DLOG.debug("Ignoring alarm %s - uuid %s" %
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
self.strategy.nfvi_alarms = nfvi_alarms
if self.strategy.nfvi_alarms:
# Keep waiting for alarms to clear
pass
else:
# Alarms have all cleared
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
self.stage.step_complete(result, "")
else:
# Unable to retrieve alarms
result = strategy.STRATEGY_STEP_RESULT.FAILED
self.stage.step_complete(result, "")
def apply(self):
"""
Alarm Wait
"""
DLOG.info("Step (%s) apply." % self._name)
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
def handle_event(self, event, event_data=None):
"""
Handle Host events
"""
from nfv_vim import nfvi
DLOG.debug("Step (%s) handle event (%s)." % (self._name, event))
if event == STRATEGY_EVENT.HOST_AUDIT:
if 0 == self._wait_time:
self._wait_time = timers.get_monotonic_timestamp_in_ms()
now_ms = timers.get_monotonic_timestamp_in_ms()
secs_expired = (now_ms - self._wait_time) / 1000
# Wait before checking alarms for first time
if self._first_query_delay_in_secs <= secs_expired and not self._query_inprogress:
self._query_inprogress = True
nfvi.nfvi_get_alarms(self._query_alarms_callback())
return True
return False
def from_dict(self, data):
"""
Returns the alarm wait step object initialized using the given
dictionary
"""
super(WaitAlarmsClearStep, self).from_dict(data)
self._first_query_delay_in_secs = data['first_query_delay_in_secs']
self._ignore_alarms = data['ignore_alarms']
self._wait_time = 0
self._query_inprogress = False
return self
def as_dict(self):
"""
Represent the alarm wait step as a dictionary
"""
data = super(WaitAlarmsClearStep, self).as_dict()
data['entity_type'] = ''
data['entity_names'] = list()
data['entity_uuids'] = list()
data['first_query_delay_in_secs'] = self._first_query_delay_in_secs
data['ignore_alarms'] = self._ignore_alarms
return data
class QuerySwPatchesStep(strategy.StrategyStep):
"""
Query Software Patches - Strategy Step
@ -2484,6 +2593,9 @@ def strategy_step_rebuild_from_dict(data):
elif STRATEGY_STEP_NAME.WAIT_DATA_SYNC == data['name']:
step_obj = object.__new__(WaitDataSyncStep)
elif STRATEGY_STEP_NAME.WAIT_ALARMS_CLEAR == data['name']:
step_obj = object.__new__(WaitAlarmsClearStep)
elif STRATEGY_STEP_NAME.QUERY_SW_PATCHES == data['name']:
step_obj = object.__new__(QuerySwPatchesStep)