From c4429fd67a3e4bc669d5fbc18921eb6be79fc214 Mon Sep 17 00:00:00 2001 From: Jim Gauld Date: Tue, 25 Aug 2020 10:23:24 -0400 Subject: [PATCH] Add host stabilization wait for post-live-migration cleanup This adds WaitHostStabilizeTaskWork routine to be called after all VMs have migrated from the host during host-disable. Post-live-migration cleanup at the compute source host occurs after the nova database instances table get updated with destination host and task_state None. This source cleanup period has been observed to take up to 3 seconds for: disk, neutron ports, migration record, and console. There is not a deterministic way provided by nova to indicate live-migration including the cleanup has completed. This update gives sufficient wait (i.e., 10 seconds) for post-live-migration to complete at the source before host is disabled and pods are shutdown. Change-Id: Id6d500b627dea8057807bd7dfa07899bd205d3e6 Closes-Bug: 1892885 Signed-off-by: Jim Gauld --- .../nfv_vim/host_fsm/_host_task_work.py | 32 +++++++++++++++++++ nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py | 5 +++ 2 files changed, 37 insertions(+) diff --git a/nfv/nfv-vim/nfv_vim/host_fsm/_host_task_work.py b/nfv/nfv-vim/nfv_vim/host_fsm/_host_task_work.py index 010b1817..7d25b664 100755 --- a/nfv/nfv-vim/nfv_vim/host_fsm/_host_task_work.py +++ b/nfv/nfv-vim/nfv_vim/host_fsm/_host_task_work.py @@ -1825,3 +1825,35 @@ class NotifyInstancesHostDisabledTaskWork(state_machine.StateTaskWork): self._host.uuid, self._host.name, self._callback()) return handled + + +class WaitHostStabilizeTaskWork(state_machine.StateTaskWork): + """ + Wait Host Stabilize Task Work + """ + def __init__(self, task, host, timeout_in_secs=60): + super(WaitHostStabilizeTaskWork, self).__init__( + 'wait-host-stabilize_%s' % host.name, task, + timeout_in_secs=timeout_in_secs) + self._host_reference = weakref.ref(host) + + @property + def _host(self): + """ + Returns the host + """ + host = self._host_reference() + return host + + def timeout(self): + """ + Timeout is expected, so override to pass + """ + return state_machine.STATE_TASK_WORK_RESULT.SUCCESS, empty_reason + + def run(self): + """ + Run wait host stabilize + """ + DLOG.verbose("Wait-Host-Stabilize for %s." % self._host.name) + return state_machine.STATE_TASK_WORK_RESULT.WAIT, empty_reason diff --git a/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py b/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py index b31972c3..8f34d3c0 100755 --- a/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py +++ b/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py @@ -29,6 +29,7 @@ from nfv_vim.host_fsm._host_task_work import NotifyInstancesHostDisablingTaskWor from nfv_vim.host_fsm._host_task_work import QueryHypervisorTaskWork from nfv_vim.host_fsm._host_task_work import WaitHostServicesCreatedTaskWork from nfv_vim.host_fsm._host_task_work import WaitHostServicesDisabledTaskWork +from nfv_vim.host_fsm._host_task_work import WaitHostStabilizeTaskWork DLOG = debug.debug_get_logger('nfv_vim.state_machine.host_task') @@ -235,6 +236,10 @@ class DisableHostTask(state_machine.StateTask): task_work_list.append(NotifyHostDisabledTaskWork( self, host, objects.HOST_SERVICES.NETWORK)) task_work_list.append(NotifyInstancesHostDisabledTaskWork(self, host)) + # Wait for latent post-live-migration cleanup + if host.host_service_configured(objects.HOST_SERVICES.COMPUTE): + task_work_list.append(WaitHostStabilizeTaskWork( + self, host, timeout_in_secs=10)) if host.host_service_configured(objects.HOST_SERVICES.CONTAINER): # Only disable the container services if the host is being locked # (or is already locked) and we are not running in a single