Increase timeout to wait for alarms to clear.
This handles the case when OSDs are deployed on controller nodes and patching fails after a controller is unlocked while there is an ongoing HEALTH_WARN from ceph. Closes-Bug: 1907259 Signed-off-by: Andrei Grosu <andrei.grosu@windriver.com> Change-Id: Ibc71987049bc1040ca2c3c8db72bbac74cb35457
This commit is contained in:
parent
6642d504ae
commit
9b79211a3c
|
@ -2962,7 +2962,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600},
|
||||
'timeout': 1800},
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-controllers',
|
||||
|
@ -2980,7 +2980,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600},
|
||||
'timeout': 1800},
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -3071,8 +3071,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 1800},
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-controllers',
|
||||
|
@ -3089,8 +3089,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 1800},
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -3196,7 +3196,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_0']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600},
|
||||
'timeout': 1800},
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3218,7 +3218,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_1']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -3321,7 +3321,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_0']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3343,7 +3343,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_1']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -3398,7 +3398,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3416,7 +3416,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -3470,8 +3470,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3488,8 +3488,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60}
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -3566,7 +3566,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_0']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600},
|
||||
'timeout': 1800},
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3588,7 +3588,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_1']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3730,7 +3730,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_0']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3752,7 +3752,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_1']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3850,7 +3850,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3868,7 +3868,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-1']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
{'name': 'sw-patch-worker-hosts',
|
||||
|
@ -3985,8 +3985,8 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
'timeout': 15},
|
||||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'system-stabilize',
|
||||
'timeout': 60},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 1800},
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -4044,7 +4044,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'start-instances',
|
||||
'entity_names': ['test_instance_0']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600},
|
||||
'timeout': 1800},
|
||||
]
|
||||
},
|
||||
]
|
||||
|
@ -4095,7 +4095,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
|
|||
{'name': 'unlock-hosts',
|
||||
'entity_names': ['controller-0']},
|
||||
{'name': 'wait-alarms-clear',
|
||||
'timeout': 600}
|
||||
'timeout': 1800}
|
||||
]
|
||||
},
|
||||
]
|
||||
|
|
|
@ -610,14 +610,14 @@ class SwPatchStrategy(SwUpdateStrategy):
|
|||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
timeout_in_secs=MTCE_DELAY))
|
||||
stage.add_step(strategy.UnlockHostsStep(host_list))
|
||||
if host.openstack_control:
|
||||
# Wait extra time for services to go enabled and
|
||||
# alarms to clear.
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=10 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
else:
|
||||
stage.add_step(strategy.SystemStabilizeStep())
|
||||
# After controller node(s) are unlocked, we need extra time to
|
||||
# allow the OSDs to go back in sync and the storage related
|
||||
# alarms to clear. Note: not all controller nodes will have
|
||||
# OSDs configured, but the alarms should clear quickly in
|
||||
# that case so this will not delay the patch strategy.
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=30 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
else:
|
||||
# Less time required if host is not rebooting
|
||||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
|
@ -639,14 +639,14 @@ class SwPatchStrategy(SwUpdateStrategy):
|
|||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
timeout_in_secs=MTCE_DELAY))
|
||||
stage.add_step(strategy.UnlockHostsStep(host_list))
|
||||
if host.openstack_control:
|
||||
# Wait extra time for services to go enabled and
|
||||
# alarms to clear.
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=10 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
else:
|
||||
stage.add_step(strategy.SystemStabilizeStep())
|
||||
# After controller node(s) are unlocked, we need extra time to
|
||||
# allow the OSDs to go back in sync and the storage related
|
||||
# alarms to clear. Note: not all controller nodes will have
|
||||
# OSDs configured, but the alarms should clear quickly in
|
||||
# that case so this will not delay the patch strategy.
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=30 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
else:
|
||||
# Less time required if host is not rebooting
|
||||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
|
@ -860,18 +860,29 @@ class SwPatchStrategy(SwUpdateStrategy):
|
|||
self._default_instance_action:
|
||||
stage.add_step(strategy.StartInstancesStep(
|
||||
instance_list))
|
||||
|
||||
if any(host.openstack_control or host.openstack_compute for host in hosts_to_lock) or \
|
||||
any(host.openstack_control or host.openstack_compute for host in hosts_to_reboot):
|
||||
# Wait extra time for services to go enabled
|
||||
# and alarms to clear.
|
||||
# After controller node(s) are unlocked, we need extra time to
|
||||
# allow the OSDs to go back in sync and the storage related
|
||||
# alarms to clear. Note: not all controller nodes will have
|
||||
# OSDs configured, but the alarms should clear quickly in
|
||||
# that case so this will not delay the patch strategy.
|
||||
if any([HOST_PERSONALITY.CONTROLLER in host.personality
|
||||
for host in hosts_to_lock + hosts_to_reboot]):
|
||||
# Multiple personality nodes that need to wait for OSDs to sync:
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=10 * 60,
|
||||
timeout_in_secs=30 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
else:
|
||||
stage.add_step(strategy.SystemStabilizeStep())
|
||||
if any([host.openstack_control or host.openstack_compute
|
||||
for host in hosts_to_lock + hosts_to_reboot]):
|
||||
# Hosts with openstack that just need to wait for services to start up:
|
||||
stage.add_step(strategy.WaitAlarmsClearStep(
|
||||
timeout_in_secs=10 * 60,
|
||||
ignore_alarms=self._ignore_alarms))
|
||||
else:
|
||||
# Worker host wihout multiple personalities or openstack:
|
||||
stage.add_step(strategy.SystemStabilizeStep())
|
||||
else:
|
||||
# Less time required if host is not rebooting
|
||||
# Less time required if host is not rebooting:
|
||||
stage.add_step(strategy.SystemStabilizeStep(
|
||||
timeout_in_secs=NO_REBOOT_DELAY))
|
||||
self.apply_phase.add_stage(stage)
|
||||
|
|
Loading…
Reference in New Issue