diff --git a/kuryr_kubernetes/controller/drivers/namespace_subnet.py b/kuryr_kubernetes/controller/drivers/namespace_subnet.py index f8913e201..65f782ee8 100644 --- a/kuryr_kubernetes/controller/drivers/namespace_subnet.py +++ b/kuryr_kubernetes/controller/drivers/namespace_subnet.py @@ -56,6 +56,9 @@ class NamespacePodSubnetDriver(default_subnet.DefaultPodSubnetDriver): try: ns = kubernetes.get('%s/namespaces/%s' % (constants.K8S_API_BASE, namespace)) + except exceptions.K8sResourceNotFound: + LOG.warning("Namespace %s not found", namespace) + raise except exceptions.K8sClientException: LOG.exception("Kubernetes Client Exception.") raise exceptions.ResourceNotReady(namespace) diff --git a/kuryr_kubernetes/controller/drivers/network_policy_security_groups.py b/kuryr_kubernetes/controller/drivers/network_policy_security_groups.py index becd72e0e..5febe1d4b 100644 --- a/kuryr_kubernetes/controller/drivers/network_policy_security_groups.py +++ b/kuryr_kubernetes/controller/drivers/network_policy_security_groups.py @@ -532,12 +532,9 @@ class NetworkPolicySecurityGroupsDriver(base.PodSecurityGroupsDriver): return crd_selectors def create_namespace_sg_rules(self, namespace): - kubernetes = clients.get_kubernetes_client() ns_name = namespace['metadata']['name'] LOG.debug("Creating sg rule for namespace: %s", ns_name) crd_selectors = [] - namespace = kubernetes.get( - '{}/namespaces/{}'.format(constants.K8S_API_BASE, ns_name)) knp_crds = driver_utils.get_kuryrnetpolicy_crds() for crd in knp_crds.get('items'): crd_selector = crd['spec'].get('podSelector') diff --git a/kuryr_kubernetes/controller/handlers/lbaas.py b/kuryr_kubernetes/controller/handlers/lbaas.py index 701a654a6..ffbd6c106 100644 --- a/kuryr_kubernetes/controller/handlers/lbaas.py +++ b/kuryr_kubernetes/controller/handlers/lbaas.py @@ -306,7 +306,11 @@ class LoadBalancerHandler(k8s_base.ResourceEventHandler): def _add_new_members(self, endpoints, lbaas_state, lbaas_spec): changed = False - self._sync_lbaas_sgs(endpoints, lbaas_state, lbaas_spec) + try: + self._sync_lbaas_sgs(endpoints, lbaas_state, lbaas_spec) + except k_exc.K8sResourceNotFound: + LOG.debug("The svc has been deleted while processing the endpoints" + " update. No need to add new members.") lsnr_by_id = {l.id: l for l in lbaas_state.listeners} pool_by_lsnr_port = {(lsnr_by_id[p.listener_id].protocol, diff --git a/kuryr_kubernetes/controller/handlers/namespace.py b/kuryr_kubernetes/controller/handlers/namespace.py index 64c6e15db..4d691415c 100644 --- a/kuryr_kubernetes/controller/handlers/namespace.py +++ b/kuryr_kubernetes/controller/handlers/namespace.py @@ -120,8 +120,9 @@ class NamespaceHandler(k8s_base.ResourceEventHandler): net_crd = self._add_kuryrnet_crd(ns_name, net_crd_spec) self._set_net_crd(namespace, net_crd) self._drv_sg.create_namespace_sg_rules(namespace) - except exceptions.K8sClientException: - LOG.exception("Kubernetes client exception. Rolling back " + except (exceptions.K8sClientException, + exceptions.K8sResourceNotFound): + LOG.exception("Kuryrnet CRD creation failed. Rolling back " "resources created for the namespace.") self._drv_subnets.rollback_network_resources(net_crd_spec, ns_name) if net_crd_sg.get('sgId'): diff --git a/kuryr_kubernetes/controller/handlers/pod_label.py b/kuryr_kubernetes/controller/handlers/pod_label.py index df08714f2..6c3ff9860 100644 --- a/kuryr_kubernetes/controller/handlers/pod_label.py +++ b/kuryr_kubernetes/controller/handlers/pod_label.py @@ -20,6 +20,7 @@ from kuryr_kubernetes import clients from kuryr_kubernetes import constants from kuryr_kubernetes.controller.drivers import base as drivers from kuryr_kubernetes.controller.drivers import utils as driver_utils +from kuryr_kubernetes import exceptions as k_exc from kuryr_kubernetes.handlers import k8s_base LOG = logging.getLogger(__name__) @@ -64,7 +65,11 @@ class PodLabelHandler(k8s_base.ResourceEventHandler): project_id = self._drv_project.get_project(pod) security_groups = self._drv_sg.get_security_groups(pod, project_id) self._drv_vif_pool.update_vif_sgs(pod, security_groups) - self._set_pod_labels(pod, current_pod_labels) + try: + self._set_pod_labels(pod, current_pod_labels) + except k_exc.K8sResourceNotFound: + LOG.debug("Pod already deleted, no need to retry.") + return if oslo_cfg.CONF.octavia_defaults.enforce_sg_rules: services = driver_utils.get_services() diff --git a/kuryr_kubernetes/controller/handlers/vif.py b/kuryr_kubernetes/controller/handlers/vif.py index db2a7d106..6739c529b 100644 --- a/kuryr_kubernetes/controller/handlers/vif.py +++ b/kuryr_kubernetes/controller/handlers/vif.py @@ -94,7 +94,7 @@ class VIFHandler(k8s_base.ResourceEventHandler): if not state: try: subnets = self._drv_subnets.get_subnets(pod, project_id) - except n_exc.NotFound: + except (n_exc.NotFound, k_exc.K8sResourceNotFound): LOG.warning("Subnet does not exists. If namespace driver is " "used, probably the namespace for the pod is " "already deleted. So this pod does not need to " diff --git a/kuryr_kubernetes/handlers/retry.py b/kuryr_kubernetes/handlers/retry.py index 91bd12299..2501004bd 100644 --- a/kuryr_kubernetes/handlers/retry.py +++ b/kuryr_kubernetes/handlers/retry.py @@ -20,6 +20,7 @@ from neutronclient.common import exceptions as n_exc from oslo_log import log as logging from oslo_utils import excutils +from kuryr_kubernetes import clients from kuryr_kubernetes import exceptions from kuryr_kubernetes.handlers import base from kuryr_kubernetes import utils @@ -48,10 +49,31 @@ class Retry(base.EventHandler): self._exceptions = exceptions self._timeout = timeout self._interval = interval + self._k8s = clients.get_kubernetes_client() def __call__(self, event): deadline = time.time() + self._timeout for attempt in itertools.count(1): + if event.get('type') in ['MODIFIED', 'ADDED']: + obj = event.get('object') + if obj: + try: + obj_link = obj['metadata']['selfLink'] + except KeyError: + LOG.debug("Skipping object check as it does not have " + "selfLink: %s", obj) + else: + try: + self._k8s.get(obj_link) + except exceptions.K8sResourceNotFound: + LOG.debug("There is no need to process the " + "retry as the object %s has already " + "been deleted.", obj_link) + return + except exceptions.K8sClientException: + LOG.debug("Kubernetes client error getting the " + "object. Continuing with handler " + "execution.") try: self._handler(event) break diff --git a/kuryr_kubernetes/tests/unit/handlers/test_retry.py b/kuryr_kubernetes/tests/unit/handlers/test_retry.py index 4e1be39d0..658df5b19 100644 --- a/kuryr_kubernetes/tests/unit/handlers/test_retry.py +++ b/kuryr_kubernetes/tests/unit/handlers/test_retry.py @@ -17,8 +17,10 @@ import fixtures import mock import time +from kuryr_kubernetes import exceptions from kuryr_kubernetes.handlers import retry as h_retry from kuryr_kubernetes.tests import base as test_base +from kuryr_kubernetes.tests.unit import kuryr_fixtures as k_fix class _EX1(Exception): @@ -42,6 +44,11 @@ class TestRetryHandler(test_base.TestCase): f_time = self.useFixture(fixtures.MockPatch('time.time')) f_time.mock.return_value = self.now + self.k8s = self.useFixture(k_fix.MockK8sClient()).client + f_k8s = self.useFixture(fixtures.MockPatch( + 'kuryr_kubernetes.clients.get_kubernetes_client')) + f_k8s.mock.return_value = self.k8s + @mock.patch('random.randint') @mock.patch('time.sleep') def test_should_not_sleep(self, m_sleep, m_randint): @@ -87,10 +94,27 @@ class TestRetryHandler(test_base.TestCase): m_handler = mock.Mock() m_count.return_value = list(range(1, 5)) retry = h_retry.Retry(m_handler) + event = {'type': 'DELETED'} - retry(mock.sentinel.event) + retry(event) - m_handler.assert_called_once_with(mock.sentinel.event) + m_handler.assert_called_once_with(event) + m_sleep.assert_not_called() + + @mock.patch('itertools.count') + @mock.patch.object(h_retry.Retry, '_sleep') + def test_call_outdated_event(self, m_sleep, m_count): + m_handler = mock.Mock() + m_count.return_value = list(range(1, 5)) + obj = {'metadata': {'selfLink': mock.sentinel.selflink}} + event = {'type': 'MODIFIED', 'object': obj} + self.k8s.get.side_effect = exceptions.K8sResourceNotFound(obj) + + retry = h_retry.Retry(m_handler) + retry(event) + + self.k8s.get.assert_called_once_with(obj['metadata']['selfLink']) + m_handler.assert_not_called() m_sleep.assert_not_called() @mock.patch('itertools.count') @@ -100,7 +124,7 @@ class TestRetryHandler(test_base.TestCase): timeout = 10 deadline = self.now + timeout failures = [_EX1()] * (attempts - 1) - event = mock.sentinel.event + event = {'type': 'DELETED'} m_handler = mock.Mock() m_handler.side_effect = failures + [None] m_sleep.return_value = 1 @@ -121,7 +145,7 @@ class TestRetryHandler(test_base.TestCase): timeout = 10 deadline = self.now + timeout failures = [_EX1(), _EX1(), _EX11()] - event = mock.sentinel.event + event = {'type': 'DELETED'} m_handler = mock.Mock() m_handler.side_effect = failures m_sleep.side_effect = [1] * (attempts - 1) + [0] diff --git a/kuryr_kubernetes/utils.py b/kuryr_kubernetes/utils.py index a6a27b722..2e26e9ac3 100644 --- a/kuryr_kubernetes/utils.py +++ b/kuryr_kubernetes/utils.py @@ -40,7 +40,7 @@ VALID_MULTI_POD_POOLS_OPTS = {'noop': ['neutron-vif', 'neutron': ['neutron-vif'], 'nested': ['nested-vlan'], } -DEFAULT_TIMEOUT = 180 +DEFAULT_TIMEOUT = 500 DEFAULT_INTERVAL = 3 subnet_caching_opts = [