[K8S] Delete all related load balancers before deleting cluster

When deleting cluster, Magnum only deletes the load balancers for
Kubernetes services/ingresses before deleting Heat stack. The process of
stack deletion is to delete resources in dependencies, which means, for
Octavia resources, member is deleted first, then pool, listener, and
finally load balancer. The whole process is error-prone, especially
Octavia controller needs to talk to amphora for each API call before
deleting load balancer, if any step fails, the deletion operation will
fail.

Octavia provides cascade deletion API[1] for the load balancer, which
could delete all the related resources in one API call and doesn't
involve communication between Octavia controller and amphora instance.

This patch deletes the api/etcd load balancers (if applicable) before
deleting Heat stack, making the cluster deletion process more robust.

[1]: https://docs.openstack.org/api-ref/load-balancer/v2/index.html?expanded=remove-a-load-balancer-detail#remove-a-load-balancer

story: 2007657
task: 39743
Change-Id: Ibe8f788559d0977475d0991fc99ad91ccfd7dca7
This commit is contained in:
Lingxian Kong 2020-04-02 23:45:32 +13:00
parent c32c7e03bf
commit 33cc92efe2
4 changed files with 96 additions and 89 deletions

View File

@ -47,8 +47,8 @@ def delete_floatingip(context, fix_port_id, cluster):
id = fips["floatingips"][0]["id"]
if re.match(pattern, desc):
LOG.debug("Deleting floating ip %s for cluster %s", id,
cluster.uuid)
LOG.info("Deleting floating ip %s for cluster %s", id,
cluster.uuid)
n_client.delete_floatingip(id)
except Exception as e:
raise exception.PreDeletionFailed(cluster_uuid=cluster.uuid,

View File

@ -11,13 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_config import cfg
from oslo_log import log as logging
import re
import time
from osc_lib import exceptions as osc_exc
from oslo_config import cfg
from oslo_log import log as logging
from magnum.common import clients
from magnum.common import context as magnum_context
from magnum.common import exception
from magnum.common import neutron
@ -51,48 +53,72 @@ def wait_for_lb_deleted(octavia_client, deleted_lbs):
time.sleep(1)
def _delete_loadbalancers(context, lbs, cluster, octavia_client,
remove_fip=False, cascade=True):
candidates = set()
for lb in lbs:
status = lb["provisioning_status"]
if status not in ["PENDING_DELETE", "DELETED"]:
LOG.info("Deleting load balancer %s for cluster %s",
lb["id"], cluster.uuid)
octavia_client.load_balancer_delete(lb["id"], cascade=cascade)
candidates.add(lb["id"])
if remove_fip:
neutron.delete_floatingip(context, lb["vip_port_id"], cluster)
return candidates
def delete_loadbalancers(context, cluster):
"""Delete loadbalancers for kubernetes resources.
"""Delete loadbalancers for the cluster.
This method only works for the k8s cluster with
cloud-provider-openstack manager or controller-manager patched with
this PR:
https://github.com/kubernetes/cloud-provider-openstack/pull/223
The load balancers created for kubernetes services and ingresses are
deleted.
The following load balancers are deleted:
- The load balancers created for Kubernetes services and ingresses in
the Kubernetes cluster.
- The load balancers created for Kubernetes API and etcd for HA cluster.
"""
pattern = (r'Kubernetes .+ from cluster %s' % cluster.uuid)
valid_status = ["ACTIVE", "ERROR", "PENDING_DELETE", "DELETED"]
lb_resource_type = "Magnum::Optional::Neutron::LBaaS::LoadBalancer"
adm_ctx = magnum_context.get_admin_context()
adm_clients = clients.OpenStackClients(adm_ctx)
user_clients = clients.OpenStackClients(context)
candidates = set()
try:
o_client = clients.OpenStackClients(context).octavia()
lbs = o_client.load_balancer_list().get("loadbalancers", [])
octavia_client_adm = adm_clients.octavia()
heat_client = user_clients.heat()
octavia_client = user_clients.octavia()
candidates = set()
invalids = set()
for lb in lbs:
if re.match(pattern, lb["description"]):
if lb["provisioning_status"] not in valid_status:
invalids.add(lb["id"])
continue
if lb["provisioning_status"] in ["ACTIVE", "ERROR"]:
# Delete VIP floating ip if needed.
neutron.delete_floatingip(context, lb["vip_port_id"],
cluster)
# Get load balancers created for service/ingress
lbs = octavia_client.load_balancer_list().get("loadbalancers", [])
lbs = [lb for lb in lbs if re.match(pattern, lb["description"])]
deleted = _delete_loadbalancers(context, lbs, cluster,
octavia_client_adm, remove_fip=True)
candidates.update(deleted)
LOG.debug("Deleting load balancer %s for cluster %s",
lb["id"], cluster.uuid)
o_client.load_balancer_delete(lb["id"], cascade=True)
candidates.add(lb["id"])
# Get load balancers created for Kubernetes api/etcd
lbs = []
lb_resources = heat_client.resources.list(
cluster.stack_id, nested_depth=2,
filters={"type": lb_resource_type})
for lb_res in lb_resources:
lb_id = lb_res.physical_resource_id
try:
lb = octavia_client.load_balancer_show(lb_id)
lbs.append(lb)
except osc_exc.NotFound:
continue
deleted = _delete_loadbalancers(context, lbs, cluster,
octavia_client_adm, remove_fip=False)
candidates.update(deleted)
if invalids:
raise Exception("Cannot delete load balancers %s in transitional "
"status." % invalids)
if not candidates:
return
wait_for_lb_deleted(o_client, candidates)
wait_for_lb_deleted(octavia_client, candidates)
except Exception as e:
raise exception.PreDeletionFailed(cluster_uuid=cluster.uuid,
msg=str(e))

View File

@ -21,6 +21,11 @@ from magnum.tests import base
from magnum.tests.unit.db import utils
class TestHeatLBResource(object):
def __init__(self, physical_resource_id):
self.physical_resource_id = physical_resource_id
class OctaviaTest(base.TestCase):
def setUp(self):
super(OctaviaTest, self).setUp()
@ -58,86 +63,57 @@ class OctaviaTest(base.TestCase):
},
]
}
mock_octavie_client = mock.MagicMock()
mock_octavie_client.load_balancer_list.side_effect = [
mock_octavia_client = mock.MagicMock()
mock_octavia_client.load_balancer_list.side_effect = [
mock_lbs, {"loadbalancers": []}
]
mock_octavia_client.load_balancer_show.return_value = {
'id': 'heat_lb_id',
'provisioning_status': 'ACTIVE'
}
mock_heat_client = mock.MagicMock()
mock_heat_client.resources.list.return_value = [
TestHeatLBResource('heat_lb_id')
]
osc = mock.MagicMock()
mock_clients.return_value = osc
osc.octavia.return_value = mock_octavie_client
osc.octavia.return_value = mock_octavia_client
osc.heat.return_value = mock_heat_client
octavia.delete_loadbalancers(self.context, self.cluster)
calls = [
mock.call("fake_id_1", cascade=True),
mock.call("fake_id_2", cascade=True)
mock.call("fake_id_2", cascade=True),
mock.call("heat_lb_id", cascade=True)
]
mock_octavie_client.load_balancer_delete.assert_has_calls(calls)
mock_octavia_client.load_balancer_delete.assert_has_calls(calls)
@mock.patch('magnum.common.clients.OpenStackClients')
def test_delete_loadbalancers_no_candidate(self, mock_clients):
mock_lbs = {
"loadbalancers": []
}
mock_octavie_client = mock.MagicMock()
mock_octavie_client.load_balancer_list.return_value = mock_lbs
mock_octavia_client = mock.MagicMock()
mock_octavia_client.load_balancer_list.return_value = mock_lbs
osc = mock.MagicMock()
mock_clients.return_value = osc
osc.octavia.return_value = mock_octavie_client
osc.octavia.return_value = mock_octavia_client
octavia.delete_loadbalancers(self.context, self.cluster)
self.assertFalse(mock_octavie_client.load_balancer_delete.called)
@mock.patch("magnum.common.neutron.delete_floatingip")
@mock.patch('magnum.common.clients.OpenStackClients')
def test_delete_loadbalancers_with_invalid_lb(self, mock_clients,
mock_delete_fip):
osc = mock.MagicMock()
mock_clients.return_value = osc
mock_octavie_client = mock.MagicMock()
osc.octavia.return_value = mock_octavie_client
mock_lbs = {
"loadbalancers": [
{
"id": "fake_id_1",
"description": "Kubernetes external service "
"ad3080723f1c211e88adbfa163ee1203 from "
"cluster %s" % self.cluster.uuid,
"name": "fake_name_1",
"provisioning_status": "ACTIVE",
"vip_port_id": "c17c1a6e-1868-11e9-84cd-00224d6b7bc1"
},
{
"id": "fake_id_2",
"description": "Kubernetes external service "
"a9f9ba08cf28811e89547fa163ea824f from "
"cluster %s" % self.cluster.uuid,
"name": "fake_name_2",
"provisioning_status": "PENDING_UPDATE",
"vip_port_id": "b4ca07d1-a31e-43e2-891a-7d14f419f342"
},
]
}
mock_octavie_client.load_balancer_list.return_value = mock_lbs
self.assertRaises(
exception.PreDeletionFailed,
octavia.delete_loadbalancers,
self.context,
self.cluster
)
mock_octavie_client.load_balancer_delete.assert_called_once_with(
"fake_id_1", cascade=True)
self.assertFalse(mock_octavia_client.load_balancer_delete.called)
@mock.patch("magnum.common.neutron.delete_floatingip")
@mock.patch('magnum.common.clients.OpenStackClients')
def test_delete_loadbalancers_timeout(self, mock_clients, mock_delete_fip):
osc = mock.MagicMock()
mock_clients.return_value = osc
mock_octavie_client = mock.MagicMock()
osc.octavia.return_value = mock_octavie_client
mock_octavia_client = mock.MagicMock()
osc.octavia.return_value = mock_octavia_client
mock_lbs = {
"loadbalancers": [
@ -161,7 +137,7 @@ class OctaviaTest(base.TestCase):
},
]
}
mock_octavie_client.load_balancer_list.return_value = mock_lbs
mock_octavia_client.load_balancer_list.return_value = mock_lbs
self.assertRaises(
exception.PreDeletionFailed,

View File

@ -0,0 +1,5 @@
features:
- |
Magnum now cascade deletes all the load balancers before deleting the
cluster, not only including load balancers for the cluster services and
ingresses, but also those for Kubernetes API/etcd endpoints.