Add host stabilization wait for post-live-migration cleanup

This adds WaitHostStabilizeTaskWork routine to be called after
all VMs have migrated from the host during host-disable.

Post-live-migration cleanup at the compute source host occurs
after the nova database instances table get updated with
destination host and task_state None. This source cleanup period
has been observed to take up to 3 seconds for: disk, neutron
ports, migration record, and console.

There is not a deterministic way provided by nova to indicate
live-migration including the cleanup has completed. This update
gives sufficient wait (i.e., 10 seconds) for post-live-migration
to complete at the source before host is disabled and pods are
shutdown.

Change-Id: Id6d500b627dea8057807bd7dfa07899bd205d3e6
Closes-Bug: 1892885
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
This commit is contained in:
Jim Gauld 2020-08-25 10:23:24 -04:00
parent 8e266832a9
commit c4429fd67a
2 changed files with 37 additions and 0 deletions

View File

@ -1825,3 +1825,35 @@ class NotifyInstancesHostDisabledTaskWork(state_machine.StateTaskWork):
self._host.uuid, self._host.name, self._callback()) self._host.uuid, self._host.name, self._callback())
return handled return handled
class WaitHostStabilizeTaskWork(state_machine.StateTaskWork):
"""
Wait Host Stabilize Task Work
"""
def __init__(self, task, host, timeout_in_secs=60):
super(WaitHostStabilizeTaskWork, self).__init__(
'wait-host-stabilize_%s' % host.name, task,
timeout_in_secs=timeout_in_secs)
self._host_reference = weakref.ref(host)
@property
def _host(self):
"""
Returns the host
"""
host = self._host_reference()
return host
def timeout(self):
"""
Timeout is expected, so override to pass
"""
return state_machine.STATE_TASK_WORK_RESULT.SUCCESS, empty_reason
def run(self):
"""
Run wait host stabilize
"""
DLOG.verbose("Wait-Host-Stabilize for %s." % self._host.name)
return state_machine.STATE_TASK_WORK_RESULT.WAIT, empty_reason

View File

@ -29,6 +29,7 @@ from nfv_vim.host_fsm._host_task_work import NotifyInstancesHostDisablingTaskWor
from nfv_vim.host_fsm._host_task_work import QueryHypervisorTaskWork from nfv_vim.host_fsm._host_task_work import QueryHypervisorTaskWork
from nfv_vim.host_fsm._host_task_work import WaitHostServicesCreatedTaskWork from nfv_vim.host_fsm._host_task_work import WaitHostServicesCreatedTaskWork
from nfv_vim.host_fsm._host_task_work import WaitHostServicesDisabledTaskWork from nfv_vim.host_fsm._host_task_work import WaitHostServicesDisabledTaskWork
from nfv_vim.host_fsm._host_task_work import WaitHostStabilizeTaskWork
DLOG = debug.debug_get_logger('nfv_vim.state_machine.host_task') DLOG = debug.debug_get_logger('nfv_vim.state_machine.host_task')
@ -235,6 +236,10 @@ class DisableHostTask(state_machine.StateTask):
task_work_list.append(NotifyHostDisabledTaskWork( task_work_list.append(NotifyHostDisabledTaskWork(
self, host, objects.HOST_SERVICES.NETWORK)) self, host, objects.HOST_SERVICES.NETWORK))
task_work_list.append(NotifyInstancesHostDisabledTaskWork(self, host)) task_work_list.append(NotifyInstancesHostDisabledTaskWork(self, host))
# Wait for latent post-live-migration cleanup
if host.host_service_configured(objects.HOST_SERVICES.COMPUTE):
task_work_list.append(WaitHostStabilizeTaskWork(
self, host, timeout_in_secs=10))
if host.host_service_configured(objects.HOST_SERVICES.CONTAINER): if host.host_service_configured(objects.HOST_SERVICES.CONTAINER):
# Only disable the container services if the host is being locked # Only disable the container services if the host is being locked
# (or is already locked) and we are not running in a single # (or is already locked) and we are not running in a single