Add host stabilization wait for post-live-migration cleanup
This adds WaitHostStabilizeTaskWork routine to be called after all VMs have migrated from the host during host-disable. Post-live-migration cleanup at the compute source host occurs after the nova database instances table get updated with destination host and task_state None. This source cleanup period has been observed to take up to 3 seconds for: disk, neutron ports, migration record, and console. There is not a deterministic way provided by nova to indicate live-migration including the cleanup has completed. This update gives sufficient wait (i.e., 10 seconds) for post-live-migration to complete at the source before host is disabled and pods are shutdown. Change-Id: Id6d500b627dea8057807bd7dfa07899bd205d3e6 Closes-Bug: 1892885 Signed-off-by: Jim Gauld <james.gauld@windriver.com>
This commit is contained in:
parent
8e266832a9
commit
c4429fd67a
|
@ -1825,3 +1825,35 @@ class NotifyInstancesHostDisabledTaskWork(state_machine.StateTaskWork):
|
||||||
self._host.uuid, self._host.name, self._callback())
|
self._host.uuid, self._host.name, self._callback())
|
||||||
|
|
||||||
return handled
|
return handled
|
||||||
|
|
||||||
|
|
||||||
|
class WaitHostStabilizeTaskWork(state_machine.StateTaskWork):
|
||||||
|
"""
|
||||||
|
Wait Host Stabilize Task Work
|
||||||
|
"""
|
||||||
|
def __init__(self, task, host, timeout_in_secs=60):
|
||||||
|
super(WaitHostStabilizeTaskWork, self).__init__(
|
||||||
|
'wait-host-stabilize_%s' % host.name, task,
|
||||||
|
timeout_in_secs=timeout_in_secs)
|
||||||
|
self._host_reference = weakref.ref(host)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _host(self):
|
||||||
|
"""
|
||||||
|
Returns the host
|
||||||
|
"""
|
||||||
|
host = self._host_reference()
|
||||||
|
return host
|
||||||
|
|
||||||
|
def timeout(self):
|
||||||
|
"""
|
||||||
|
Timeout is expected, so override to pass
|
||||||
|
"""
|
||||||
|
return state_machine.STATE_TASK_WORK_RESULT.SUCCESS, empty_reason
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""
|
||||||
|
Run wait host stabilize
|
||||||
|
"""
|
||||||
|
DLOG.verbose("Wait-Host-Stabilize for %s." % self._host.name)
|
||||||
|
return state_machine.STATE_TASK_WORK_RESULT.WAIT, empty_reason
|
||||||
|
|
|
@ -29,6 +29,7 @@ from nfv_vim.host_fsm._host_task_work import NotifyInstancesHostDisablingTaskWor
|
||||||
from nfv_vim.host_fsm._host_task_work import QueryHypervisorTaskWork
|
from nfv_vim.host_fsm._host_task_work import QueryHypervisorTaskWork
|
||||||
from nfv_vim.host_fsm._host_task_work import WaitHostServicesCreatedTaskWork
|
from nfv_vim.host_fsm._host_task_work import WaitHostServicesCreatedTaskWork
|
||||||
from nfv_vim.host_fsm._host_task_work import WaitHostServicesDisabledTaskWork
|
from nfv_vim.host_fsm._host_task_work import WaitHostServicesDisabledTaskWork
|
||||||
|
from nfv_vim.host_fsm._host_task_work import WaitHostStabilizeTaskWork
|
||||||
|
|
||||||
DLOG = debug.debug_get_logger('nfv_vim.state_machine.host_task')
|
DLOG = debug.debug_get_logger('nfv_vim.state_machine.host_task')
|
||||||
|
|
||||||
|
@ -235,6 +236,10 @@ class DisableHostTask(state_machine.StateTask):
|
||||||
task_work_list.append(NotifyHostDisabledTaskWork(
|
task_work_list.append(NotifyHostDisabledTaskWork(
|
||||||
self, host, objects.HOST_SERVICES.NETWORK))
|
self, host, objects.HOST_SERVICES.NETWORK))
|
||||||
task_work_list.append(NotifyInstancesHostDisabledTaskWork(self, host))
|
task_work_list.append(NotifyInstancesHostDisabledTaskWork(self, host))
|
||||||
|
# Wait for latent post-live-migration cleanup
|
||||||
|
if host.host_service_configured(objects.HOST_SERVICES.COMPUTE):
|
||||||
|
task_work_list.append(WaitHostStabilizeTaskWork(
|
||||||
|
self, host, timeout_in_secs=10))
|
||||||
if host.host_service_configured(objects.HOST_SERVICES.CONTAINER):
|
if host.host_service_configured(objects.HOST_SERVICES.CONTAINER):
|
||||||
# Only disable the container services if the host is being locked
|
# Only disable the container services if the host is being locked
|
||||||
# (or is already locked) and we are not running in a single
|
# (or is already locked) and we are not running in a single
|
||||||
|
|
Loading…
Reference in New Issue