From e2e63cfc4d5506277b53510701c97fb9f57d0430 Mon Sep 17 00:00:00 2001 From: Luis Tomas Bolivar Date: Fri, 14 Jun 2019 14:07:26 +0200 Subject: [PATCH] Ensure kuryrnet does not perform multiple repopulations This patch makes use of the KuryrNet CRD spec to ensure pools population actions on a new namespace only happen once. Closes-Bug: 1833032 Change-Id: Ia561833d594c55c17a9dc1a588d39bf3410cdf81 --- .../controller/drivers/network_policy.py | 5 ++-- .../drivers/network_policy_security_groups.py | 17 +++++++----- kuryr_kubernetes/controller/drivers/utils.py | 15 ++++++++++- .../controller/handlers/kuryrnet.py | 27 ++++++++++++++++--- .../controller/handlers/namespace.py | 5 ++++ .../test_network_policy_security_groups.py | 18 ++++++------- .../unit/controller/handlers/test_kuryrnet.py | 5 +++- 7 files changed, 69 insertions(+), 23 deletions(-) diff --git a/kuryr_kubernetes/controller/drivers/network_policy.py b/kuryr_kubernetes/controller/drivers/network_policy.py index 260c67620..c7b25832a 100644 --- a/kuryr_kubernetes/controller/drivers/network_policy.py +++ b/kuryr_kubernetes/controller/drivers/network_policy.py @@ -114,8 +114,9 @@ class NetworkPolicyDriver(base.NetworkPolicyDriver): e_rule["security_group_rule"]["id"] = sgr_id # Annotate kuryrnetpolicy CRD with current policy and ruleset pod_selector = policy['spec'].get('podSelector') - driver_utils.patch_kuryr_crd(crd, i_rules, e_rules, pod_selector, - np_spec=policy['spec']) + driver_utils.patch_kuryrnetworkpolicy_crd(crd, i_rules, e_rules, + pod_selector, + np_spec=policy['spec']) if existing_pod_selector != pod_selector: return existing_pod_selector diff --git a/kuryr_kubernetes/controller/drivers/network_policy_security_groups.py b/kuryr_kubernetes/controller/drivers/network_policy_security_groups.py index eb2cc3792..ff35ed7f2 100644 --- a/kuryr_kubernetes/controller/drivers/network_policy_security_groups.py +++ b/kuryr_kubernetes/controller/drivers/network_policy_security_groups.py @@ -442,8 +442,9 @@ class NetworkPolicySecurityGroupsDriver(base.PodSecurityGroupsDriver): e_matched, e_rules = _parse_rules('egress', crd, pod=pod) if i_matched or e_matched: - driver_utils.patch_kuryr_crd(crd, i_rules, - e_rules, crd_selector) + driver_utils.patch_kuryrnetworkpolicy_crd(crd, i_rules, + e_rules, + crd_selector) crd_pod_selectors.append(crd_selector) return crd_pod_selectors @@ -463,8 +464,9 @@ class NetworkPolicySecurityGroupsDriver(base.PodSecurityGroupsDriver): egress_rule_list, "egress", pod_ip) if i_matched or e_matched: - driver_utils.patch_kuryr_crd(crd, i_rules, e_rules, - crd_selector) + driver_utils.patch_kuryrnetworkpolicy_crd(crd, i_rules, + e_rules, + crd_selector) crd_pod_selectors.append(crd_selector) return crd_pod_selectors @@ -492,7 +494,7 @@ class NetworkPolicySecurityGroupsDriver(base.PodSecurityGroupsDriver): egress_rule_list, "egress", ns_name) if i_matched or e_matched: - driver_utils.patch_kuryr_crd( + driver_utils.patch_kuryrnetworkpolicy_crd( crd, i_rules, e_rules, crd_selector) def create_namespace_sg_rules(self, namespace): @@ -511,8 +513,9 @@ class NetworkPolicySecurityGroupsDriver(base.PodSecurityGroupsDriver): 'egress', crd, namespace=namespace) if i_matched or e_matched: - driver_utils.patch_kuryr_crd(crd, i_rules, - e_rules, crd_selector) + driver_utils.patch_kuryrnetworkpolicy_crd(crd, i_rules, + e_rules, + crd_selector) def update_namespace_sg_rules(self, namespace): LOG.debug("Updating sg rule for namespace: %s", diff --git a/kuryr_kubernetes/controller/drivers/utils.py b/kuryr_kubernetes/controller/drivers/utils.py index 6aa290b8f..db3387f40 100644 --- a/kuryr_kubernetes/controller/drivers/utils.py +++ b/kuryr_kubernetes/controller/drivers/utils.py @@ -218,7 +218,20 @@ def delete_security_group_rule(security_group_rule_id): raise -def patch_kuryr_crd(crd, i_rules, e_rules, pod_selector, np_spec=None): +def patch_kuryrnet_crd(crd, populated=True): + kubernetes = clients.get_kubernetes_client() + crd_name = crd['metadata']['name'] + LOG.debug('Patching KuryrNet CRD %s' % crd_name) + try: + kubernetes.patch_crd('spec', crd['metadata']['selfLink'], + {'populated': populated}) + except k_exc.K8sClientException: + LOG.exception('Error updating kuryrnet CRD %s', crd_name) + raise + + +def patch_kuryrnetworkpolicy_crd(crd, i_rules, e_rules, pod_selector, + np_spec=None): kubernetes = clients.get_kubernetes_client() crd_name = crd['metadata']['name'] if not np_spec: diff --git a/kuryr_kubernetes/controller/handlers/kuryrnet.py b/kuryr_kubernetes/controller/handlers/kuryrnet.py index 34f8b3c2e..bb4a7be80 100644 --- a/kuryr_kubernetes/controller/handlers/kuryrnet.py +++ b/kuryr_kubernetes/controller/handlers/kuryrnet.py @@ -16,6 +16,8 @@ from oslo_log import log as logging from kuryr_kubernetes import constants from kuryr_kubernetes.controller.drivers import base as drivers +from kuryr_kubernetes.controller.drivers import utils as driver_utils +from kuryr_kubernetes import exceptions from kuryr_kubernetes.handlers import k8s_base from kuryr_kubernetes import utils @@ -42,18 +44,37 @@ class KuryrNetHandler(k8s_base.ResourceEventHandler): def on_added(self, kuryrnet_crd): namespace = kuryrnet_crd['metadata']['annotations'].get( 'namespaceName') + subnet_id = kuryrnet_crd['spec'].get('subnetId') + if kuryrnet_crd['spec'].get('populated'): + LOG.debug("Subnet %s already populated", subnet_id) + return + # NOTE(ltomasbo): using namespace name instead of object as it is not # required project_id = self._drv_project.get_project(namespace) - subnet_id = kuryrnet_crd['spec'].get('subnetId') subnets = self._drv_subnets.get_namespace_subnet(namespace, subnet_id) sg_id = kuryrnet_crd['spec'].get('sgId', []) nodes = utils.get_nodes_ips() + # NOTE(ltomasbo): Patching the kuryrnet_crd here instead of after + # populate_pool method to ensure initial repopulation is not happening + # twice upon unexpected problems, such as neutron failing to + # transition the ports to ACTIVE or being too slow replying. + # In such case, even though the repopulation actions got triggered, + # the pools will not get the ports loaded (as they are not ACTIVE) + # and new population actions may be triggered if the controller was + # restarted before performing the populated=true patching. + driver_utils.patch_kuryrnet_crd(kuryrnet_crd, populated=True) # TODO(ltomasbo): Skip the master node where pods are not usually # allocated. for node_ip in nodes: LOG.debug("Populating subnet pool %s at node %s", subnet_id, node_ip) - self._drv_vif_pool.populate_pool(node_ip, project_id, subnets, - sg_id) + try: + self._drv_vif_pool.populate_pool(node_ip, project_id, subnets, + sg_id) + except exceptions.ResourceNotReady: + # Ensure the repopulation is retriggered if the system was not + # yet ready to perform the repopulation actions + driver_utils.patch_kuryrnet_crd(kuryrnet_crd, populated=False) + raise diff --git a/kuryr_kubernetes/controller/handlers/namespace.py b/kuryr_kubernetes/controller/handlers/namespace.py index 88a1af5ff..748af0555 100644 --- a/kuryr_kubernetes/controller/handlers/namespace.py +++ b/kuryr_kubernetes/controller/handlers/namespace.py @@ -177,6 +177,11 @@ class NamespaceHandler(k8s_base.ResourceEventHandler): kubernetes = clients.get_kubernetes_client() net_crd_name = "ns-" + namespace spec = {k: v for k, v in net_crd_spec.items()} + # NOTE(ltomasbo): To know if the subnet has bee populated with pools. + # This is only needed by the kuryrnet handler to skip actions. But its + # addition does not have any impact if not used + spec['populated'] = False + net_crd = { 'apiVersion': 'openstack.org/v1', 'kind': 'KuryrNet', diff --git a/kuryr_kubernetes/tests/unit/controller/drivers/test_network_policy_security_groups.py b/kuryr_kubernetes/tests/unit/controller/drivers/test_network_policy_security_groups.py index 5ba52c1de..6b1895df5 100644 --- a/kuryr_kubernetes/tests/unit/controller/drivers/test_network_policy_security_groups.py +++ b/kuryr_kubernetes/tests/unit/controller/drivers/test_network_policy_security_groups.py @@ -408,14 +408,14 @@ class TestNetworkPolicySecurityGroupsDriver(test_base.TestCase): self.assertEqual(matched, False) @mock.patch('kuryr_kubernetes.controller.drivers.utils.' - 'patch_kuryr_crd') + 'patch_kuryrnetworkpolicy_crd') @mock.patch('kuryr_kubernetes.controller.drivers.utils.' 'get_kuryrnetpolicy_crds') @mock.patch('kuryr_kubernetes.controller.drivers.utils.' 'delete_security_group_rule') @mock.patch('kuryr_kubernetes.controller.drivers.utils.get_pod_ip') def test_delete_sg_rules(self, m_get_pod_ip, m_delete_sg_rule, - m_get_knp_crds, m_patch_kuryr_crd): + m_get_knp_crds, m_patch_kuryrnetworkpolicy_crd): crd = self._crd_with_rule i_rule = crd['spec'].get('ingressSgRules')[0] sgr_id = i_rule['security_group_rule'].get('id') @@ -435,7 +435,7 @@ class TestNetworkPolicySecurityGroupsDriver(test_base.TestCase): m_get_knp_crds.assert_called_once() m_get_pod_ip.assert_called_once_with(pod) m_delete_sg_rule.assert_called_once_with(sgr_id) - m_patch_kuryr_crd.assert_called_with( + m_patch_kuryrnetworkpolicy_crd.assert_called_with( crd, i_rules, e_rules, crd['spec'].get('podSelector')) @mock.patch('kuryr_kubernetes.config.CONF') @@ -529,13 +529,13 @@ class TestNetworkPolicySecurityGroupsDriver(test_base.TestCase): self.assertEqual([str(self._sg_id), str(self._sg_id2)], resp) @mock.patch('kuryr_kubernetes.controller.drivers.utils.' - 'patch_kuryr_crd') + 'patch_kuryrnetworkpolicy_crd') @mock.patch('kuryr_kubernetes.controller.drivers.utils.' 'delete_security_group_rule') @mock.patch('kuryr_kubernetes.controller.drivers.utils.' 'get_kuryrnetpolicy_crds') def test_delete_namespace_sg_rule(self, m_get_knp_crd, m_delete_sg_rule, - m_patch_kuryr_crd): + m_patch_kuryrnetworkpolicy_crd): cls = network_policy_security_groups.NetworkPolicySecurityGroupsDriver m_driver = mock.MagicMock(spec=cls) i_rule = get_matched_crd_obj()['spec']['ingressSgRules'][0] @@ -547,17 +547,17 @@ class TestNetworkPolicySecurityGroupsDriver(test_base.TestCase): m_get_knp_crd.assert_called_once() m_delete_sg_rule.assert_called_once_with(sg_rule_id) - m_patch_kuryr_crd.assert_called_once() + m_patch_kuryrnetworkpolicy_crd.assert_called_once() @mock.patch('kuryr_kubernetes.controller.drivers.utils.' - 'patch_kuryr_crd') + 'patch_kuryrnetworkpolicy_crd') @mock.patch('kuryr_kubernetes.controller.drivers.utils.' 'delete_security_group_rule') @mock.patch('kuryr_kubernetes.controller.drivers.utils.' 'get_kuryrnetpolicy_crds') def test_delete_namespace_sg_rule_no_match(self, m_get_knp_crd, m_delete_sg_rule, - m_patch_kuryr_crd): + m_patch_kuryrnetworkpolicy_crd): cls = network_policy_security_groups.NetworkPolicySecurityGroupsDriver m_driver = mock.MagicMock(spec=cls) @@ -568,7 +568,7 @@ class TestNetworkPolicySecurityGroupsDriver(test_base.TestCase): m_get_knp_crd.assert_called_once() m_delete_sg_rule.assert_not_called() - m_patch_kuryr_crd.assert_not_called() + m_patch_kuryrnetworkpolicy_crd.assert_not_called() @mock.patch('kuryr_kubernetes.controller.drivers.' 'network_policy_security_groups._create_sg_rule') diff --git a/kuryr_kubernetes/tests/unit/controller/handlers/test_kuryrnet.py b/kuryr_kubernetes/tests/unit/controller/handlers/test_kuryrnet.py index b1f1430cd..6c303a371 100644 --- a/kuryr_kubernetes/tests/unit/controller/handlers/test_kuryrnet.py +++ b/kuryr_kubernetes/tests/unit/controller/handlers/test_kuryrnet.py @@ -16,6 +16,7 @@ import mock from kuryr_kubernetes.controller.drivers import base as drivers from kuryr_kubernetes.controller.drivers import namespace_subnet as subnet_drv +from kuryr_kubernetes.controller.drivers import utils as driver_utils from kuryr_kubernetes.controller.drivers import vif_pool from kuryr_kubernetes.controller.handlers import kuryrnet from kuryr_kubernetes.tests import base as test_base @@ -76,8 +77,9 @@ class TestKuryrNetHandler(test_base.TestCase): self.assertEqual(subnet_driver, handler._drv_subnets) self.assertEqual(vif_pool_driver, handler._drv_vif_pool) + @mock.patch.object(driver_utils, 'patch_kuryrnet_crd') @mock.patch.object(utils, 'get_nodes_ips') - def test_on_added(self, m_get_nodes_ips): + def test_on_added(self, m_get_nodes_ips, m_patch_kn_crd): m_get_nodes_ips.return_value = ['node-ip'] kuryrnet.KuryrNetHandler.on_added(self._handler, self._kuryrnet_crd) @@ -90,3 +92,4 @@ class TestKuryrNetHandler(test_base.TestCase): self._project_id, self._subnets, []) + m_patch_kn_crd.assert_called_once()