Add wait for alarms to clear to SW patch strategy unlock hosts step
This appends the WaitAlarmsClearStep after UnlockHostsStep on controller hosts when stx-openstack application is installed for SwPatchStrategy. This will periodically query alarms and allows the system to stabilize. If stx-openstack is not installed, this will do one minute wait with the existing SystemStabilizeStep. Change-Id: I6dbc4c6032a3bb9d160df79d46630a81960cbb37 Closes-Bug: 1893124 Signed-off-by: Jim Gauld <james.gauld@windriver.com>
This commit is contained in:
parent
a274a40529
commit
4c36f911c9
|
@ -2681,8 +2681,118 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600},
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-controllers',
|
||||
'total_steps': 7,
|
||||
'steps': [
|
||||
{'name': 'query-alarms'},
|
||||
{'name': 'swact-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'lock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'sw-patch-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600},
|
||||
]
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
sw_update_testcase.validate_strategy_persists(strategy)
|
||||
sw_update_testcase.validate_phase(apply_phase, expected_results)
|
||||
|
||||
# Test no reboot patches
|
||||
strategy = create_sw_patch_strategy(
|
||||
controller_apply_type=SW_UPDATE_APPLY_TYPE.SERIAL
|
||||
)
|
||||
|
||||
strategy._add_controller_strategy_stages(controllers=controller_hosts,
|
||||
reboot=False)
|
||||
|
||||
apply_phase = strategy.apply_phase.as_dict()
|
||||
|
||||
expected_results = {
|
||||
'total_stages': 2,
|
||||
'stages': [
|
||||
{'name': 'sw-patch-controllers',
|
||||
'total_steps': 3,
|
||||
'steps': [
|
||||
{'name': 'query-alarms'},
|
||||
{'name': 'sw-patch-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 30}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-controllers',
|
||||
'total_steps': 3,
|
||||
'steps': [
|
||||
{'name': 'query-alarms'},
|
||||
{'name': 'sw-patch-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 30}
|
||||
]
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
sw_update_testcase.validate_strategy_persists(strategy)
|
||||
sw_update_testcase.validate_phase(apply_phase, expected_results)
|
||||
|
||||
def test_sw_patch_strategy_controller_stages_serial_openstack_not_installed(self):
|
||||
"""
|
||||
Test the sw_patch strategy add controller strategy stages:
|
||||
- serial apply
|
||||
- test both reboot and no reboot cases
|
||||
Verify:
|
||||
- patch mate controller first
|
||||
"""
|
||||
self.create_host('controller-0', openstack_installed=False)
|
||||
self.create_host('controller-1', openstack_installed=False)
|
||||
|
||||
controller_hosts = []
|
||||
for host in self._host_table.values():
|
||||
if HOST_PERSONALITY.CONTROLLER in host.personality:
|
||||
controller_hosts.append(host)
|
||||
|
||||
# Test reboot patches
|
||||
strategy = create_sw_patch_strategy(
|
||||
controller_apply_type=SW_UPDATE_APPLY_TYPE.SERIAL
|
||||
)
|
||||
|
||||
strategy._add_controller_strategy_stages(controllers=controller_hosts,
|
||||
reboot=True)
|
||||
|
||||
apply_phase = strategy.apply_phase.as_dict()
|
||||
|
||||
expected_results = {
|
||||
'total_stages': 2,
|
||||
'stages': [
|
||||
{'name': 'sw-patch-controllers',
|
||||
'total_steps': 7,
|
||||
'steps': [
|
||||
{'name': 'query-alarms'},
|
||||
{'name': 'swact-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'lock-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'sw-patch-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60},
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-controllers',
|
||||
|
@ -2700,7 +2810,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
'timeout': 60},
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -2805,8 +2915,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'entity_names': ['controller-0']},
|
||||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600},
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -2827,8 +2937,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'entity_names': ['controller-1']},
|
||||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_1']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -2930,8 +3040,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'entity_names': ['controller-0']},
|
||||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -2952,8 +3062,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'entity_names': ['controller-1']},
|
||||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_1']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -3007,8 +3117,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3025,8 +3135,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -3102,8 +3212,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'entity_names': ['controller-0']},
|
||||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600},
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3124,8 +3234,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'entity_names': ['controller-1']},
|
||||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_1']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3266,8 +3376,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'entity_names': ['controller-0']},
|
||||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3288,8 +3398,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'entity_names': ['controller-1']},
|
||||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_1']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3369,7 +3479,6 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
reboot=True)
|
||||
|
||||
apply_phase = strategy.apply_phase.as_dict()
|
||||
|
||||
expected_results = {
|
||||
'total_stages': 4,
|
||||
'stages': [
|
||||
|
@ -3387,8 +3496,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3405,8 +3514,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3581,8 +3690,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'entity_names': ['controller-0']},
|
||||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600},
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -3632,8 +3741,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
]
|
||||
},
|
||||
]
|
||||
|
|
|
@ -32,4 +32,5 @@ from nfv_vim.strategy._strategy_steps import UpgradeActivateStep # noqa: F401
|
|||
from nfv_vim.strategy._strategy_steps import UpgradeCompleteStep # noqa: F401
|
||||
from nfv_vim.strategy._strategy_steps import UpgradeHostsStep # noqa: F401
|
||||
from nfv_vim.strategy._strategy_steps import UpgradeStartStep # noqa: F401
|
||||
from nfv_vim.strategy._strategy_steps import WaitAlarmsClearStep # noqa: F401
|
||||
from nfv_vim.strategy._strategy_steps import WaitDataSyncStep # noqa: F401
|
||||
|
|
|
@ -610,7 +610,14 @@ class SwPatchStrategy(SwUpdateStrategy):
|
|||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
timeout_in_secs=MTCE_DELAY))
|
||||
stage.add_step(strategy.UnlockHostsStep(host_list))
|
||||
stage.add_step(strategy.SystemStabilizeStep())
|
||||
if host.openstack_control:
|
||||
# Wait extra time for services to go enabled and
|
||||
# alarms to clear.
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=10 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
else:
|
||||
stage.add_step(strategy.SystemStabilizeStep())
|
||||
else:
|
||||
# Less time required if host is not rebooting
|
||||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
|
@ -632,7 +639,14 @@ class SwPatchStrategy(SwUpdateStrategy):
|
|||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
timeout_in_secs=MTCE_DELAY))
|
||||
stage.add_step(strategy.UnlockHostsStep(host_list))
|
||||
stage.add_step(strategy.SystemStabilizeStep())
|
||||
if host.openstack_control:
|
||||
# Wait extra time for services to go enabled and
|
||||
# alarms to clear.
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=10 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
else:
|
||||
stage.add_step(strategy.SystemStabilizeStep())
|
||||
else:
|
||||
# Less time required if host is not rebooting
|
||||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
|
@ -847,7 +861,15 @@ class SwPatchStrategy(SwUpdateStrategy):
|
|||
stage.add_step(strategy.StartInstancesStep(
|
||||
instance_list))
|
||||
|
||||
stage.add_step(strategy.SystemStabilizeStep())
|
||||
if any(host.openstack_control for host in hosts_to_lock) or \
|
||||
any(host.openstack_control for host in hosts_to_reboot):
|
||||
# Wait extra time for services to go enabled
|
||||
# and alarms to clear.
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=10 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
else:
|
||||
stage.add_step(strategy.SystemStabilizeStep())
|
||||
else:
|
||||
# Less time required if host is not rebooting
|
||||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
|
|
|
@ -43,6 +43,7 @@ class StrategyStepNames(Constants):
|
|||
START_INSTANCES = Constant('start-instances')
|
||||
QUERY_ALARMS = Constant('query-alarms')
|
||||
WAIT_DATA_SYNC = Constant('wait-data-sync')
|
||||
WAIT_ALARMS_CLEAR = Constant('wait-alarms-clear')
|
||||
QUERY_SW_PATCHES = Constant('query-sw-patches')
|
||||
QUERY_SW_PATCH_HOSTS = Constant('query-sw-patch-hosts')
|
||||
QUERY_FW_UPDATE_HOST = Constant('query-fw-update-host')
|
||||
|
@ -1656,6 +1657,114 @@ class WaitDataSyncStep(strategy.StrategyStep):
|
|||
return data
|
||||
|
||||
|
||||
class WaitAlarmsClearStep(strategy.StrategyStep):
|
||||
"""
|
||||
Alarm Wait - Strategy Step
|
||||
"""
|
||||
def __init__(self, timeout_in_secs=300, first_query_delay_in_secs=60, ignore_alarms=None):
|
||||
super(WaitAlarmsClearStep, self).__init__(
|
||||
STRATEGY_STEP_NAME.WAIT_ALARMS_CLEAR, timeout_in_secs=timeout_in_secs)
|
||||
self._first_query_delay_in_secs = first_query_delay_in_secs
|
||||
if ignore_alarms is None:
|
||||
ignore_alarms = []
|
||||
self._ignore_alarms = ignore_alarms
|
||||
self._wait_time = 0
|
||||
self._query_inprogress = False
|
||||
|
||||
@coroutine
|
||||
def _query_alarms_callback(self):
|
||||
"""
|
||||
Query Alarms Callback
|
||||
"""
|
||||
response = (yield)
|
||||
DLOG.debug("Query-Alarms callback response=%s." % response)
|
||||
|
||||
self._query_inprogress = False
|
||||
|
||||
if response['completed']:
|
||||
if self.strategy is not None:
|
||||
nfvi_alarms = list()
|
||||
for nfvi_alarm in response['result-data']:
|
||||
if (self.strategy._alarm_restrictions ==
|
||||
strategy.STRATEGY_ALARM_RESTRICTION_TYPES.RELAXED and
|
||||
nfvi_alarm.mgmt_affecting == 'False'):
|
||||
DLOG.warn("Ignoring non-management affecting alarm "
|
||||
"%s - uuid %s due to relaxed alarm "
|
||||
"strictness" % (nfvi_alarm.alarm_id,
|
||||
nfvi_alarm.alarm_uuid))
|
||||
elif nfvi_alarm.alarm_id not in self._ignore_alarms:
|
||||
nfvi_alarms.append(nfvi_alarm)
|
||||
else:
|
||||
DLOG.debug("Ignoring alarm %s - uuid %s" %
|
||||
(nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid))
|
||||
self.strategy.nfvi_alarms = nfvi_alarms
|
||||
|
||||
if self.strategy.nfvi_alarms:
|
||||
# Keep waiting for alarms to clear
|
||||
pass
|
||||
else:
|
||||
# Alarms have all cleared
|
||||
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
|
||||
self.stage.step_complete(result, "")
|
||||
else:
|
||||
# Unable to retrieve alarms
|
||||
result = strategy.STRATEGY_STEP_RESULT.FAILED
|
||||
self.stage.step_complete(result, "")
|
||||
|
||||
def apply(self):
|
||||
"""
|
||||
Alarm Wait
|
||||
"""
|
||||
DLOG.info("Step (%s) apply." % self._name)
|
||||
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
|
||||
|
||||
def handle_event(self, event, event_data=None):
|
||||
"""
|
||||
Handle Host events
|
||||
"""
|
||||
from nfv_vim import nfvi
|
||||
|
||||
DLOG.debug("Step (%s) handle event (%s)." % (self._name, event))
|
||||
|
||||
if event == STRATEGY_EVENT.HOST_AUDIT:
|
||||
if 0 == self._wait_time:
|
||||
self._wait_time = timers.get_monotonic_timestamp_in_ms()
|
||||
|
||||
now_ms = timers.get_monotonic_timestamp_in_ms()
|
||||
secs_expired = (now_ms - self._wait_time) / 1000
|
||||
# Wait before checking alarms for first time
|
||||
if self._first_query_delay_in_secs <= secs_expired and not self._query_inprogress:
|
||||
self._query_inprogress = True
|
||||
nfvi.nfvi_get_alarms(self._query_alarms_callback())
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def from_dict(self, data):
|
||||
"""
|
||||
Returns the alarm wait step object initialized using the given
|
||||
dictionary
|
||||
"""
|
||||
super(WaitAlarmsClearStep, self).from_dict(data)
|
||||
self._first_query_delay_in_secs = data['first_query_delay_in_secs']
|
||||
self._ignore_alarms = data['ignore_alarms']
|
||||
self._wait_time = 0
|
||||
self._query_inprogress = False
|
||||
return self
|
||||
|
||||
def as_dict(self):
|
||||
"""
|
||||
Represent the alarm wait step as a dictionary
|
||||
"""
|
||||
data = super(WaitAlarmsClearStep, self).as_dict()
|
||||
data['entity_type'] = ''
|
||||
data['entity_names'] = list()
|
||||
data['entity_uuids'] = list()
|
||||
data['first_query_delay_in_secs'] = self._first_query_delay_in_secs
|
||||
data['ignore_alarms'] = self._ignore_alarms
|
||||
return data
|
||||
|
||||
|
||||
class QuerySwPatchesStep(strategy.StrategyStep):
|
||||
"""
|
||||
Query Software Patches - Strategy Step
|
||||
|
@ -2484,6 +2593,9 @@ def strategy_step_rebuild_from_dict(data):
|
|||
elif STRATEGY_STEP_NAME.WAIT_DATA_SYNC == data['name']:
|
||||
step_obj = object.__new__(WaitDataSyncStep)
|
||||
|
||||
elif STRATEGY_STEP_NAME.WAIT_ALARMS_CLEAR == data['name']:
|
||||
step_obj = object.__new__(WaitAlarmsClearStep)
|
||||
|
||||
elif STRATEGY_STEP_NAME.QUERY_SW_PATCHES == data['name']:
|
||||
step_obj = object.__new__(QuerySwPatchesStep)
|
||||
|
||||
|
|
Loading…
Reference in New Issue