Merge "Wait for pods to terminate when host is locked"

This commit is contained in:
Zuul 2019-10-08 20:21:42 +00:00 committed by Gerrit Code Review
commit c5f174263a
8 changed files with 327 additions and 1 deletions

View File

@ -1 +1 @@
TIS_PATCH_VER=76
TIS_PATCH_VER=77

View File

@ -175,3 +175,26 @@ def mark_all_pods_not_ready(node_name, reason):
pod.metadata.namespace))
break
return
def get_terminating_pods(node_name):
"""
Get all pods on a node that are terminating
"""
# Get the client.
kube_client = get_client()
# Retrieve the pods on the specified node.
response = kube_client.list_namespaced_pod(
"", field_selector="spec.nodeName=%s" % node_name)
terminating_pods = list()
pods = response.items
if pods is not None:
for pod in pods:
# The presence of the deletion_timestamp indicates the pod is
# terminating.
if pod.metadata.deletion_timestamp is not None:
terminating_pods.append(pod.metadata.name)
return Result(','.join(terminating_pods))

View File

@ -2110,6 +2110,36 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
callback.send(response)
callback.close()
def get_terminating_pods(self, future, host_name, callback):
"""
Get list of terminating pods on a host
"""
response = dict()
response['completed'] = False
response['reason'] = ''
try:
future.set_timeouts(config.CONF.get('nfvi-timeouts', None))
future.work(kubernetes_client.get_terminating_pods, host_name)
future.result = (yield)
if not future.result.is_complete():
DLOG.error("Kubernetes get_terminating_pods failed, operation "
"did not complete, host_name=%s" % host_name)
return
response['result-data'] = future.result.data
response['completed'] = True
except Exception as e:
DLOG.exception("Caught exception while trying to get "
"terminating pods on %s, error=%s." % (host_name, e))
finally:
callback.send(response)
callback.close()
def host_rest_api_get_handler(self, request_dispatch):
"""
Host Rest-API GET handler callback

View File

@ -310,3 +310,125 @@ class TestNFVPluginsK8SMarkAllPodsNotReady(testcase.NFVTestCase):
self.mock_patch_namespaced_pod_status.assert_called_with(
"test-pod-ready", "test-namespace-1", mock.ANY)
self.mock_patch_namespaced_pod_status.assert_called_once()
@mock.patch('kubernetes.config.load_kube_config', mock_load_kube_config)
class TestNFVPluginsK8SGetTerminatingPods(testcase.NFVTestCase):
list_namespaced_pod_result = {
'test-node-1': kubernetes.client.V1PodList(
api_version="v1",
items=[
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="test-pod-not-terminating",
namespace="test-namespace-1",
deletion_timestamp=None)
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="test-pod-terminating",
namespace="test-namespace-1",
deletion_timestamp="2019-10-03T16:54:25Z")
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="test-pod-not-terminating-2",
namespace="test-namespace-1",
deletion_timestamp=None)
)
]
),
'test-node-2': kubernetes.client.V1PodList(
api_version="v1",
items=[
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="test-pod-not-terminating",
namespace="test-namespace-1",
deletion_timestamp=None)
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="test-pod-not-terminating-2",
namespace="test-namespace-1",
deletion_timestamp=None)
)
]
),
'test-node-3': kubernetes.client.V1PodList(
api_version="v1",
items=[
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="test-pod-not-terminating",
namespace="test-namespace-1",
deletion_timestamp=None)
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="test-pod-terminating",
namespace="test-namespace-1",
deletion_timestamp="2019-10-03T16:54:25Z")
),
kubernetes.client.V1Pod(
api_version="v1",
kind="Pod",
metadata=kubernetes.client.V1ObjectMeta(
name="test-pod-terminating-2",
namespace="test-namespace-1",
deletion_timestamp="2019-10-03T16:55:25Z")
)
]
)
}
def setUp(self):
super(TestNFVPluginsK8SGetTerminatingPods, self).setUp()
def mock_list_namespaced_pod(obj, namespace, field_selector=""):
node_name = field_selector.split('spec.nodeName=', 1)[1]
return self.list_namespaced_pod_result[node_name]
self.mocked_list_namespaced_pod = mock.patch(
'kubernetes.client.CoreV1Api.list_namespaced_pod',
mock_list_namespaced_pod)
self.mocked_list_namespaced_pod.start()
def tearDown(self):
super(TestNFVPluginsK8SGetTerminatingPods, self).tearDown()
self.mocked_list_namespaced_pod.stop()
def test_get_terminating_with_terminating(self):
result = kubernetes_client.get_terminating_pods("test-node-1")
assert result.result_data == 'test-pod-terminating'
def test_get_terminating_no_terminating(self):
result = kubernetes_client.get_terminating_pods("test-node-2")
assert result.result_data == ''
def test_get_terminating_with_two_terminating(self):
result = kubernetes_client.get_terminating_pods("test-node-3")
assert result.result_data == \
'test-pod-terminating,test-pod-terminating-2'

View File

@ -981,6 +981,144 @@ class DisableHostServicesTaskWork(state_machine.StateTaskWork):
return state_machine.STATE_TASK_WORK_RESULT.WAIT, empty_reason
class WaitHostServicesDisabledTaskWork(state_machine.StateTaskWork):
"""
Wait Host Services Disabled Task Work
"""
def __init__(self, task, host, service):
super(WaitHostServicesDisabledTaskWork, self).__init__(
'wait-host-services-disabled_%s_%s' % (host.name, service), task,
timeout_in_secs=180)
self._host_reference = weakref.ref(host)
self._service = service
self._query_inprogress = False
self._start_timestamp = None
@property
def _host(self):
"""
Returns the host
"""
host = self._host_reference()
return host
def timeout(self):
"""
Handle task work timeout
"""
if self._host.is_force_lock():
DLOG.info("Wait-Host-Services-Disabled timeout for %s, "
"force-locking, passing." % self._host.name)
return state_machine.STATE_TASK_WORK_RESULT.SUCCESS, empty_reason
if not self._host.has_reason():
self._host.update_failure_reason(
"Terminating pods on disabled host %s timed out." %
self._host.name)
return state_machine.STATE_TASK_WORK_RESULT.TIMED_OUT, empty_reason
@coroutine
def _get_callback(self):
"""
Callback for get terminating pods
"""
response = (yield)
self._query_inprogress = False
if self.task is not None:
DLOG.verbose("Get-Terminating-Pods callback for service: "
"%s %s, response=%s." %
(self._service, self._host.name, response))
if response['completed']:
if response['result-data'] != '':
DLOG.info("Get-Terminating-Pods callback for %s, "
"pods %s terminating" %
(self._service, response['result-data']))
return
# An empty response means no terminating pods exist.
DLOG.info("Get-Terminating-Pods callback for %s, "
"no pods are terminating" % self._service)
self.task.task_work_complete(
state_machine.STATE_TASK_WORK_RESULT.SUCCESS,
empty_reason)
else:
DLOG.info("Get-Terminating-Pods callback for %s, "
"failed" % self._host.name)
@coroutine
def _extend_callback(self):
"""
Callback for host services disable extend
"""
response = (yield)
if response['completed']:
DLOG.info("Extended host services disable timeout for host %s."
% self._host.name)
self._host.disable_extend_timestamp = \
timers.get_monotonic_timestamp_in_ms()
else:
DLOG.error("Failed to extend host services disable timeout for "
"host %s." % self._host.name)
def run(self):
"""
Run wait host services disabled
"""
from nfv_vim import objects
DLOG.verbose("Wait-Host-Services-Disabled for %s for service %s." %
(self._host.name, self._service))
now_ms = timers.get_monotonic_timestamp_in_ms()
self._host.disable_extend_timestamp = now_ms
self._start_timestamp = now_ms
if self._service == objects.HOST_SERVICES.CONTAINER:
# We will wait for a bit before doing our first query to ensure
# kubernetes has had time to start terminating the pods.
return state_machine.STATE_TASK_WORK_RESULT.WAIT, empty_reason
else:
reason = ("Trying to wait for unknown host service %s" %
self._service)
DLOG.error(reason)
self._host.update_failure_reason(reason)
return state_machine.STATE_TASK_WORK_RESULT.FAILED, reason
def handle_event(self, event, event_data=None):
"""
Handle events while waiting for host services to be disabled
"""
from nfv_vim import objects
handled = False
if HOST_EVENT.PERIODIC_TIMER == event:
if self._service == objects.HOST_SERVICES.CONTAINER:
if not self._query_inprogress:
now_ms = timers.get_monotonic_timestamp_in_ms()
elapsed_secs = (now_ms - self._start_timestamp) / 1000
# Wait 10s before doing our first query
if 10 <= elapsed_secs:
DLOG.verbose("Wait-Host-Services-Disabled for %s for "
"service %s. Doing query." %
(self._host.name, self._service))
self._query_inprogress = True
nfvi.nfvi_get_terminating_pods(self._host.name,
self._get_callback())
handled = True
elif HOST_EVENT.AUDIT == event:
now_ms = timers.get_monotonic_timestamp_in_ms()
elapsed_secs = (now_ms - self._host.disable_extend_timestamp) / 1000
if 120 <= elapsed_secs:
nfvi.nfvi_notify_host_services_disable_extend(
self._host.uuid, self._host.name, self._extend_callback())
return handled
class NotifyHostServicesEnabledTaskWork(state_machine.StateTaskWork):
"""
Notify Host Services Enabled Task Work

View File

@ -28,6 +28,7 @@ from nfv_vim.host_fsm._host_task_work import NotifyInstancesHostDisabledTaskWork
from nfv_vim.host_fsm._host_task_work import NotifyInstancesHostDisablingTaskWork
from nfv_vim.host_fsm._host_task_work import QueryHypervisorTaskWork
from nfv_vim.host_fsm._host_task_work import WaitHostServicesCreatedTaskWork
from nfv_vim.host_fsm._host_task_work import WaitHostServicesDisabledTaskWork
DLOG = debug.debug_get_logger('nfv_vim.state_machine.host_task')
@ -245,6 +246,8 @@ class DisableHostTask(state_machine.StateTask):
if not sw_mgmt_director.single_controller:
task_work_list.append(DisableHostServicesTaskWork(
self, host, objects.HOST_SERVICES.CONTAINER))
task_work_list.append(WaitHostServicesDisabledTaskWork(
self, host, objects.HOST_SERVICES.CONTAINER))
task_work_list.append(notify_host_services_task(
self, host, force_pass=True))
if host.host_service_configured(objects.HOST_SERVICES.COMPUTE):

View File

@ -101,6 +101,7 @@ from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_get_hosts # noqa: F40
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_get_logs # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_get_system_info # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_get_system_state # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_get_terminating_pods # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_get_upgrade # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_lock_host # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_notify_host_failed # noqa: F401

View File

@ -278,6 +278,15 @@ def nfvi_get_alarm_history(start_period, end_period, callback):
return cmd_id
def nfvi_get_terminating_pods(host_name, callback):
"""
Get terminating pods
"""
cmd_id = _infrastructure_plugin.invoke_plugin('get_terminating_pods',
host_name, callback=callback)
return cmd_id
def nfvi_register_host_add_callback(callback):
"""
Register for host add notifications