From e18ced4d5cc3b5d0f31ffe6c8b24b24f70909285 Mon Sep 17 00:00:00 2001 From: Lingxian Kong Date: Thu, 8 Nov 2018 22:03:26 +1300 Subject: [PATCH] Delete Octavia loadbalancers for fedora atomic k8s driver For k8s cluster, the loadbalancers created for LoadBalancer type services should be deleted before the cluster deletion. Change-Id: I75f44187b7be7d0ffb6a8f195f755de4b1564335 Closes-Bug: #1712062 --- magnum/common/exception.py | 5 + magnum/common/octavia.py | 91 +++++++++++ magnum/conf/cluster.py | 4 + magnum/drivers/common/driver.py | 7 + magnum/drivers/heat/driver.py | 4 +- magnum/drivers/k8s_fedora_atomic_v1/driver.py | 13 ++ magnum/tests/unit/common/test_octavia.py | 141 ++++++++++++++++++ .../handlers/test_cluster_conductor.py | 25 +++- 8 files changed, 286 insertions(+), 4 deletions(-) create mode 100644 magnum/common/octavia.py create mode 100644 magnum/tests/unit/common/test_octavia.py diff --git a/magnum/common/exception.py b/magnum/common/exception.py index e4c77068fd..4f4b5ab4f9 100755 --- a/magnum/common/exception.py +++ b/magnum/common/exception.py @@ -391,3 +391,8 @@ class FederationAlreadyExists(Conflict): class MemberAlreadyExists(Conflict): message = _("A cluster with UUID %(uuid)s is already a member of the" "federation %(federation_name)s.") + + +class PreDeletionFailed(Conflict): + message = _("Failed to pre-delete resources for cluster %(cluster_uuid)s, " + "error: %(msg)s.") diff --git a/magnum/common/octavia.py b/magnum/common/octavia.py new file mode 100644 index 0000000000..8bff56737b --- /dev/null +++ b/magnum/common/octavia.py @@ -0,0 +1,91 @@ +# Copyright 2018 Catalyst Cloud Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from oslo_config import cfg +from oslo_log import log as logging +import re +import time + +from magnum.common import clients +from magnum.common import exception + +LOG = logging.getLogger(__name__) +CONF = cfg.CONF + + +def wait_for_lb_deleted(octavia_client, deleted_lbs): + """Wait for the loadbalancers to be deleted. + + Load balancer deletion API in Octavia is asynchronous so that the called + needs to wait if it wants to guarantee the load balancer to be deleted. + The timeout is necessary to avoid waiting infinitely. + """ + timeout = CONF.cluster.pre_delete_lb_timeout + start_time = time.time() + + while True: + lbs = octavia_client.load_balancer_list().get("loadbalancers", []) + lbIDs = set( + [lb["id"] + for lb in lbs if lb["provisioning_status"] != "DELETED"] + ) + if not (deleted_lbs & lbIDs): + break + + if (time.time() - timeout) > start_time: + raise Exception("Timeout waiting for the load balancers " + "%s to be deleted." % deleted_lbs) + + time.sleep(1) + + +def delete_loadbalancers(context, cluster): + """Delete loadbalancers for k8s service. + + This method only works for the k8s cluster with + cloud-provider-openstack manager or controller-manager patched with + this PR: + https://github.com/kubernetes/cloud-provider-openstack/pull/223 + """ + pattern = (r'Kubernetes external service \w+ from cluster %s$' % + cluster.uuid) + valid_status = ["ACTIVE", "ERROR", "PENDING_DELETE", "DELETED"] + + try: + o_client = clients.OpenStackClients(context).octavia() + lbs = o_client.load_balancer_list().get("loadbalancers", []) + + candidates = set() + invalids = set() + for lb in lbs: + if re.match(pattern, lb["description"]): + if lb["provisioning_status"] not in valid_status: + invalids.add(lb["id"]) + continue + if lb["provisioning_status"] in ["ACTIVE", "ERROR"]: + LOG.debug("Deleting load balancer %s for cluster %s", + lb["id"], cluster.uuid) + o_client.load_balancer_delete(lb["id"], cascade=True) + candidates.add(lb["id"]) + + if invalids: + raise Exception("Cannot delete load balancers %s in transitional " + "status." % invalids) + if not candidates: + return + + wait_for_lb_deleted(o_client, candidates) + except Exception as e: + raise exception.PreDeletionFailed(cluster_uuid=cluster.uuid, + msg=str(e)) diff --git a/magnum/conf/cluster.py b/magnum/conf/cluster.py index 015f258412..1b55addcca 100644 --- a/magnum/conf/cluster.py +++ b/magnum/conf/cluster.py @@ -42,6 +42,10 @@ cluster_def_opts = [ default="/var/lib/magnum/certificate-cache", help='Explicitly specify the temporary directory to hold ' 'cached TLS certs.'), + cfg.IntOpt('pre_delete_lb_timeout', + default=60, + help=_('The timeout in seconds to wait for the load balancers' + 'to be deleted.')), ] diff --git a/magnum/drivers/common/driver.py b/magnum/drivers/common/driver.py index eb4fe69832..811b35e693 100644 --- a/magnum/drivers/common/driver.py +++ b/magnum/drivers/common/driver.py @@ -169,6 +169,13 @@ class Driver(object): raise NotImplementedError("Subclasses must implement " "'update_cluster'.") + def pre_delete_cluster(self, context, cluster): + """Delete cloud resources before deleting the cluster. + + Specific driver could implement this method as needed. + """ + return None + @abc.abstractmethod def delete_cluster(self, context, cluster): raise NotImplementedError("Subclasses must implement " diff --git a/magnum/drivers/heat/driver.py b/magnum/drivers/heat/driver.py index d498469c5e..51af553345 100755 --- a/magnum/drivers/heat/driver.py +++ b/magnum/drivers/heat/driver.py @@ -35,7 +35,6 @@ from magnum.drivers.common import driver from magnum.i18n import _ from magnum.objects import fields - LOG = logging.getLogger(__name__) @@ -107,6 +106,9 @@ class HeatDriver(driver.Driver): self._update_stack(context, cluster, scale_manager, rollback) def delete_cluster(self, context, cluster): + self.pre_delete_cluster(context, cluster) + + LOG.info("Starting to delete cluster %s", cluster.uuid) self._delete_stack(context, clients.OpenStackClients(context), cluster) def _create_stack(self, context, osc, cluster, cluster_create_timeout): diff --git a/magnum/drivers/k8s_fedora_atomic_v1/driver.py b/magnum/drivers/k8s_fedora_atomic_v1/driver.py index c4d365565d..6bbe334505 100644 --- a/magnum/drivers/k8s_fedora_atomic_v1/driver.py +++ b/magnum/drivers/k8s_fedora_atomic_v1/driver.py @@ -12,10 +12,16 @@ # License for the specific language governing permissions and limitations # under the License. +from oslo_log import log as logging + +from magnum.common import keystone +from magnum.common import octavia from magnum.drivers.common import k8s_monitor from magnum.drivers.heat import driver from magnum.drivers.k8s_fedora_atomic_v1 import template_def +LOG = logging.getLogger(__name__) + class Driver(driver.HeatDriver): @@ -38,3 +44,10 @@ class Driver(driver.HeatDriver): # the scale_manager. # https://bugs.launchpad.net/magnum/+bug/1746510 return None + + def pre_delete_cluster(self, context, cluster): + """Delete cloud resources before deleting the cluster.""" + if keystone.is_octavia_enabled(): + LOG.info("Starting to delete loadbalancers for cluster %s", + cluster.uuid) + octavia.delete_loadbalancers(context, cluster) diff --git a/magnum/tests/unit/common/test_octavia.py b/magnum/tests/unit/common/test_octavia.py new file mode 100644 index 0000000000..8ba3c84fed --- /dev/null +++ b/magnum/tests/unit/common/test_octavia.py @@ -0,0 +1,141 @@ +# Copyright 2018 Catalyst Cloud Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock + +from magnum.common import exception +from magnum.common import octavia +from magnum import objects +from magnum.tests import base +from magnum.tests.unit.db import utils + + +class OctaviaTest(base.TestCase): + def setUp(self): + super(OctaviaTest, self).setUp() + + cluster_dict = utils.get_test_cluster(node_count=1) + self.cluster = objects.Cluster(self.context, **cluster_dict) + + @mock.patch('magnum.common.clients.OpenStackClients') + def test_delete_loadbalancers(self, mock_clients): + mock_lbs = { + "loadbalancers": [ + { + "id": "fake_id_1", + "description": "Kubernetes external service " + "ad3080723f1c211e88adbfa163ee1203 from " + "cluster %s" % self.cluster.uuid, + "name": "fake_name_1", + "provisioning_status": "ACTIVE" + }, + { + "id": "fake_id_2", + "description": "Kubernetes external service " + "a9f9ba08cf28811e89547fa163ea824f from " + "cluster %s" % self.cluster.uuid, + "name": "fake_name_2", + "provisioning_status": "ERROR" + }, + ] + } + mock_octavie_client = mock.MagicMock() + mock_octavie_client.load_balancer_list.side_effect = [ + mock_lbs, {"loadbalancers": []} + ] + osc = mock.MagicMock() + mock_clients.return_value = osc + osc.octavia.return_value = mock_octavie_client + + octavia.delete_loadbalancers(self.context, self.cluster) + + calls = [ + mock.call("fake_id_1", cascade=True), + mock.call("fake_id_2", cascade=True) + ] + mock_octavie_client.load_balancer_delete.assert_has_calls(calls) + + @mock.patch('magnum.common.clients.OpenStackClients') + def test_delete_loadbalancers_with_invalid_lb(self, mock_clients): + osc = mock.MagicMock() + mock_clients.return_value = osc + mock_octavie_client = mock.MagicMock() + osc.octavia.return_value = mock_octavie_client + + mock_lbs = { + "loadbalancers": [ + { + "id": "fake_id_1", + "description": "Kubernetes external service " + "ad3080723f1c211e88adbfa163ee1203 from " + "cluster %s" % self.cluster.uuid, + "name": "fake_name_1", + "provisioning_status": "ACTIVE" + }, + { + "id": "fake_id_2", + "description": "Kubernetes external service " + "a9f9ba08cf28811e89547fa163ea824f from " + "cluster %s" % self.cluster.uuid, + "name": "fake_name_2", + "provisioning_status": "PENDING_UPDATE" + }, + ] + } + mock_octavie_client.load_balancer_list.return_value = mock_lbs + + self.assertRaises( + exception.PreDeletionFailed, + octavia.delete_loadbalancers, + self.context, + self.cluster + ) + mock_octavie_client.load_balancer_delete.assert_called_once_with( + "fake_id_1", cascade=True) + + @mock.patch('magnum.common.clients.OpenStackClients') + def test_delete_loadbalancers_timeout(self, mock_clients): + osc = mock.MagicMock() + mock_clients.return_value = osc + mock_octavie_client = mock.MagicMock() + osc.octavia.return_value = mock_octavie_client + + mock_lbs = { + "loadbalancers": [ + { + "id": "fake_id_1", + "description": "Kubernetes external service " + "ad3080723f1c211e88adbfa163ee1203 from " + "cluster %s" % self.cluster.uuid, + "name": "fake_name_1", + "provisioning_status": "ACTIVE" + }, + { + "id": "fake_id_2", + "description": "Kubernetes external service " + "a9f9ba08cf28811e89547fa163ea824f from " + "cluster %s" % self.cluster.uuid, + "name": "fake_name_2", + "provisioning_status": "ACTIVE" + }, + ] + } + mock_octavie_client.load_balancer_list.return_value = mock_lbs + + self.assertRaises( + exception.PreDeletionFailed, + octavia.delete_loadbalancers, + self.context, + self.cluster + ) diff --git a/magnum/tests/unit/conductor/handlers/test_cluster_conductor.py b/magnum/tests/unit/conductor/handlers/test_cluster_conductor.py index 2ece35cd9c..69a2ac4cd2 100644 --- a/magnum/tests/unit/conductor/handlers/test_cluster_conductor.py +++ b/magnum/tests/unit/conductor/handlers/test_cluster_conductor.py @@ -463,8 +463,10 @@ class TestHandler(db_base.DbTestCase): @patch('magnum.conductor.handlers.cluster_conductor.cert_manager') @patch('magnum.common.clients.OpenStackClients') @patch('magnum.drivers.common.driver.Driver.get_driver') - def test_cluster_delete(self, mock_driver, mock_openstack_client_class, - cert_manager): + @patch('magnum.common.keystone.is_octavia_enabled') + def test_cluster_delete(self, mock_octavia, mock_driver, + mock_openstack_client_class, cert_manager): + mock_octavia.return_value = False mock_driver.return_value = k8s_atomic_dr.Driver() osc = mock.MagicMock() mock_openstack_client_class.return_value = osc @@ -490,9 +492,11 @@ class TestHandler(db_base.DbTestCase): @patch('magnum.conductor.handlers.cluster_conductor.cert_manager') @patch('magnum.common.clients.OpenStackClients') @patch('magnum.drivers.common.driver.Driver.get_driver') - def test_cluster_delete_conflict(self, mock_driver, + @patch('magnum.common.keystone.is_octavia_enabled') + def test_cluster_delete_conflict(self, mock_octavia, mock_driver, mock_openstack_client_class, cert_manager): + mock_octavia.return_value = False mock_driver.return_value = k8s_atomic_dr.Driver() osc = mock.MagicMock() mock_openstack_client_class.return_value = osc @@ -514,3 +518,18 @@ class TestHandler(db_base.DbTestCase): taxonomy.OUTCOME_FAILURE, notifications[1].payload['outcome']) self.assertEqual( 0, cert_manager.delete_certificates_from_cluster.call_count) + + @patch('magnum.drivers.common.driver.Driver.get_driver') + @patch('magnum.common.clients.OpenStackClients') + @patch('magnum.common.keystone.is_octavia_enabled') + @patch('magnum.common.octavia.delete_loadbalancers') + def test_cluster_delete_with_lb(self, mock_delete_lb, mock_octavia, + mock_clients, mock_driver): + mock_octavia.return_value = True + mock_driver.return_value = k8s_atomic_dr.Driver() + + self.handler.cluster_delete(self.context, self.cluster.uuid) + + notifications = fake_notifier.NOTIFICATIONS + self.assertEqual(1, len(notifications)) + self.assertEqual(1, mock_delete_lb.call_count)