Browse Source

[K8S] Delete all related load balancers before deleting cluster

When deleting cluster, Magnum only deletes the load balancers for
Kubernetes services/ingresses before deleting Heat stack. The process of
stack deletion is to delete resources in dependencies, which means, for
Octavia resources, member is deleted first, then pool, listener, and
finally load balancer. The whole process is error-prone, especially
Octavia controller needs to talk to amphora for each API call before
deleting load balancer, if any step fails, the deletion operation will
fail.

Octavia provides cascade deletion API[1] for the load balancer, which
could delete all the related resources in one API call and doesn't
involve communication between Octavia controller and amphora instance.

This patch deletes the api/etcd load balancers (if applicable) before
deleting Heat stack, making the cluster deletion process more robust.

[1]: https://docs.openstack.org/api-ref/load-balancer/v2/index.html?expanded=remove-a-load-balancer-detail#remove-a-load-balancer

story: 2007657
task: 39743
Change-Id: Ibe8f788559d0977475d0991fc99ad91ccfd7dca7
(cherry picked from commit 33cc92efe23057aad30ec167364e2930faef82a2)
changes/85/738185/1
Lingxian Kong 3 months ago
committed by Bharat Kunwar
parent
commit
bcffb630d3
4 changed files with 98 additions and 91 deletions
  1. +2
    -2
      magnum/common/neutron.py
  2. +61
    -35
      magnum/common/octavia.py
  3. +30
    -54
      magnum/tests/unit/common/test_octavia.py
  4. +5
    -0
      releasenotes/notes/pre-delete-all-loadbalancers-350a69ec787e11ea.yaml

+ 2
- 2
magnum/common/neutron.py View File

@@ -47,8 +47,8 @@ def delete_floatingip(context, fix_port_id, cluster):
id = fips["floatingips"][0]["id"]

if re.match(pattern, desc):
LOG.debug("Deleting floating ip %s for cluster %s", id,
cluster.uuid)
LOG.info("Deleting floating ip %s for cluster %s", id,
cluster.uuid)
n_client.delete_floatingip(id)
except Exception as e:
raise exception.PreDeletionFailed(cluster_uuid=cluster.uuid,


+ 61
- 35
magnum/common/octavia.py View File

@@ -11,13 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import time

from osc_lib import exceptions as osc_exc
from oslo_config import cfg
from oslo_log import log as logging
import re
import time

from magnum.common import clients
from magnum.common import context as magnum_context
from magnum.common import exception
from magnum.common import neutron

@@ -51,48 +53,72 @@ def wait_for_lb_deleted(octavia_client, deleted_lbs):
time.sleep(1)


def delete_loadbalancers(context, cluster):
"""Delete loadbalancers for kubernetes resources.
def _delete_loadbalancers(context, lbs, cluster, octavia_client,
remove_fip=False, cascade=True):
candidates = set()

for lb in lbs:
status = lb["provisioning_status"]
if status not in ["PENDING_DELETE", "DELETED"]:
LOG.info("Deleting load balancer %s for cluster %s",
lb["id"], cluster.uuid)
octavia_client.load_balancer_delete(lb["id"], cascade=cascade)
candidates.add(lb["id"])

if remove_fip:
neutron.delete_floatingip(context, lb["vip_port_id"], cluster)

This method only works for the k8s cluster with
cloud-provider-openstack manager or controller-manager patched with
this PR:
https://github.com/kubernetes/cloud-provider-openstack/pull/223
return candidates

The load balancers created for kubernetes services and ingresses are
deleted.

def delete_loadbalancers(context, cluster):
"""Delete loadbalancers for the cluster.

The following load balancers are deleted:
- The load balancers created for Kubernetes services and ingresses in
the Kubernetes cluster.
- The load balancers created for Kubernetes API and etcd for HA cluster.
"""
pattern = (r'Kubernetes .+ from cluster %s' % cluster.uuid)
valid_status = ["ACTIVE", "ERROR", "PENDING_DELETE", "DELETED"]
lb_resource_type = "Magnum::Optional::Neutron::LBaaS::LoadBalancer"

adm_ctx = magnum_context.get_admin_context()
adm_clients = clients.OpenStackClients(adm_ctx)
user_clients = clients.OpenStackClients(context)
candidates = set()

try:
o_client = clients.OpenStackClients(context).octavia()
lbs = o_client.load_balancer_list().get("loadbalancers", [])

candidates = set()
invalids = set()
for lb in lbs:
if re.match(pattern, lb["description"]):
if lb["provisioning_status"] not in valid_status:
invalids.add(lb["id"])
continue
if lb["provisioning_status"] in ["ACTIVE", "ERROR"]:
# Delete VIP floating ip if needed.
neutron.delete_floatingip(context, lb["vip_port_id"],
cluster)

LOG.debug("Deleting load balancer %s for cluster %s",
lb["id"], cluster.uuid)
o_client.load_balancer_delete(lb["id"], cascade=True)
candidates.add(lb["id"])

if invalids:
raise Exception("Cannot delete load balancers %s in transitional "
"status." % invalids)
octavia_client_adm = adm_clients.octavia()
heat_client = user_clients.heat()
octavia_client = user_clients.octavia()

# Get load balancers created for service/ingress
lbs = octavia_client.load_balancer_list().get("loadbalancers", [])
lbs = [lb for lb in lbs if re.match(pattern, lb["description"])]
deleted = _delete_loadbalancers(context, lbs, cluster,
octavia_client_adm, remove_fip=True)
candidates.update(deleted)

# Get load balancers created for Kubernetes api/etcd
lbs = []
lb_resources = heat_client.resources.list(
cluster.stack_id, nested_depth=2,
filters={"type": lb_resource_type})
for lb_res in lb_resources:
lb_id = lb_res.physical_resource_id
try:
lb = octavia_client.load_balancer_show(lb_id)
lbs.append(lb)
except osc_exc.NotFound:
continue
deleted = _delete_loadbalancers(context, lbs, cluster,
octavia_client_adm, remove_fip=False)
candidates.update(deleted)

if not candidates:
return

wait_for_lb_deleted(o_client, candidates)
wait_for_lb_deleted(octavia_client, candidates)
except Exception as e:
raise exception.PreDeletionFailed(cluster_uuid=cluster.uuid,
msg=str(e))

+ 30
- 54
magnum/tests/unit/common/test_octavia.py View File

@@ -21,6 +21,11 @@ from magnum.tests import base
from magnum.tests.unit.db import utils


class TestHeatLBResource(object):
def __init__(self, physical_resource_id):
self.physical_resource_id = physical_resource_id


class OctaviaTest(base.TestCase):
def setUp(self):
super(OctaviaTest, self).setUp()
@@ -58,86 +63,57 @@ class OctaviaTest(base.TestCase):
},
]
}
mock_octavie_client = mock.MagicMock()
mock_octavie_client.load_balancer_list.side_effect = [
mock_octavia_client = mock.MagicMock()
mock_octavia_client.load_balancer_list.side_effect = [
mock_lbs, {"loadbalancers": []}
]
mock_octavia_client.load_balancer_show.return_value = {
'id': 'heat_lb_id',
'provisioning_status': 'ACTIVE'
}

mock_heat_client = mock.MagicMock()
mock_heat_client.resources.list.return_value = [
TestHeatLBResource('heat_lb_id')
]

osc = mock.MagicMock()
mock_clients.return_value = osc
osc.octavia.return_value = mock_octavie_client
osc.octavia.return_value = mock_octavia_client
osc.heat.return_value = mock_heat_client

octavia.delete_loadbalancers(self.context, self.cluster)

calls = [
mock.call("fake_id_1", cascade=True),
mock.call("fake_id_2", cascade=True)
mock.call("fake_id_2", cascade=True),
mock.call("heat_lb_id", cascade=True)
]
mock_octavie_client.load_balancer_delete.assert_has_calls(calls)
mock_octavia_client.load_balancer_delete.assert_has_calls(calls)

@mock.patch('magnum.common.clients.OpenStackClients')
def test_delete_loadbalancers_no_candidate(self, mock_clients):
mock_lbs = {
"loadbalancers": []
}
mock_octavie_client = mock.MagicMock()
mock_octavie_client.load_balancer_list.return_value = mock_lbs
osc = mock.MagicMock()
mock_clients.return_value = osc
osc.octavia.return_value = mock_octavie_client

octavia.delete_loadbalancers(self.context, self.cluster)

self.assertFalse(mock_octavie_client.load_balancer_delete.called)
mock_octavia_client = mock.MagicMock()
mock_octavia_client.load_balancer_list.return_value = mock_lbs

@mock.patch("magnum.common.neutron.delete_floatingip")
@mock.patch('magnum.common.clients.OpenStackClients')
def test_delete_loadbalancers_with_invalid_lb(self, mock_clients,
mock_delete_fip):
osc = mock.MagicMock()
mock_clients.return_value = osc
mock_octavie_client = mock.MagicMock()
osc.octavia.return_value = mock_octavie_client
osc.octavia.return_value = mock_octavia_client

mock_lbs = {
"loadbalancers": [
{
"id": "fake_id_1",
"description": "Kubernetes external service "
"ad3080723f1c211e88adbfa163ee1203 from "
"cluster %s" % self.cluster.uuid,
"name": "fake_name_1",
"provisioning_status": "ACTIVE",
"vip_port_id": "c17c1a6e-1868-11e9-84cd-00224d6b7bc1"
},
{
"id": "fake_id_2",
"description": "Kubernetes external service "
"a9f9ba08cf28811e89547fa163ea824f from "
"cluster %s" % self.cluster.uuid,
"name": "fake_name_2",
"provisioning_status": "PENDING_UPDATE",
"vip_port_id": "b4ca07d1-a31e-43e2-891a-7d14f419f342"
},
]
}
mock_octavie_client.load_balancer_list.return_value = mock_lbs
octavia.delete_loadbalancers(self.context, self.cluster)

self.assertRaises(
exception.PreDeletionFailed,
octavia.delete_loadbalancers,
self.context,
self.cluster
)
mock_octavie_client.load_balancer_delete.assert_called_once_with(
"fake_id_1", cascade=True)
self.assertFalse(mock_octavia_client.load_balancer_delete.called)

@mock.patch("magnum.common.neutron.delete_floatingip")
@mock.patch('magnum.common.clients.OpenStackClients')
def test_delete_loadbalancers_timeout(self, mock_clients, mock_delete_fip):
osc = mock.MagicMock()
mock_clients.return_value = osc
mock_octavie_client = mock.MagicMock()
osc.octavia.return_value = mock_octavie_client
mock_octavia_client = mock.MagicMock()
osc.octavia.return_value = mock_octavia_client

mock_lbs = {
"loadbalancers": [
@@ -161,7 +137,7 @@ class OctaviaTest(base.TestCase):
},
]
}
mock_octavie_client.load_balancer_list.return_value = mock_lbs
mock_octavia_client.load_balancer_list.return_value = mock_lbs

self.assertRaises(
exception.PreDeletionFailed,


+ 5
- 0
releasenotes/notes/pre-delete-all-loadbalancers-350a69ec787e11ea.yaml View File

@@ -0,0 +1,5 @@
features:
- |
Magnum now cascade deletes all the load balancers before deleting the
cluster, not only including load balancers for the cluster services and
ingresses, but also those for Kubernetes API/etcd endpoints.

Loading…
Cancel
Save