NSXV: protect against stale LB bindings

When a stale load balancer binding remains in the Neutron DB while the
load balancer was deleted, it causes FWaaS failures.

To protect agains, we take the following measures:
- Use try-catch to ignore the LB edge firewall rule while performing
FWaaS transactions.
- Delete the LB binding while deleting the router, while routers are
used as LB platform.

Change-Id: I3ab60093e3ac8ce6ff1d3557622745484d43b759
This commit is contained in:
Kobi Samoray 2021-03-14 15:35:46 +02:00
parent 253c1d147c
commit 8f6ecd0c5c
2 changed files with 36 additions and 12 deletions

View File

@ -12,6 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
from oslo_config import cfg
from oslo_log import log as logging
from neutron_lib import constants as n_consts
@ -138,14 +139,30 @@ class RouterExclusiveDriver(router_driver.RouterBaseDriver):
context, router_id, gw_info, force_update=True)
def delete_router(self, context, router_id):
edge_id, az_name = self.plugin._get_edge_id_and_az_by_rtr_id(
context, router_id)
if self.plugin.metadata_proxy_handler:
# The neutron router was already deleted, so we cannot get the AZ
# from it. Get it from the router-bindings DB
edge_id, az_name = self.plugin._get_edge_id_and_az_by_rtr_id(
context, router_id)
md_proxy = self.plugin.get_metadata_proxy_handler(az_name)
if md_proxy:
md_proxy.cleanup_router_edge(context, router_id)
# When LBs are hosted on router edges, there are cases where LB binding
# remains even though the LB is gone. While the router is protected
# from deletion when an LB exists, there's no proper cleanup for this
# binding entry so we clean it up here
if cfg.CONF.nsxv.use_routers_as_lbaas_platform:
lb_bindings = nsxv_db.get_nsxv_lbaas_loadbalancer_binding_by_edge(
context.session, edge_id)
for lb_binding in lb_bindings:
LOG.warning("Deleting stale LB binding for LB %s on edge %s",
lb_binding['loadbalancer_id'],
edge_id)
nsxv_db.del_nsxv_lbaas_loadbalancer_binding(
context.session, lb_binding['loadbalancer_id'])
self.edge_manager.delete_lrouter(context, router_id, dist=False)
def update_routes(self, context, router_id, nexthop):

View File

@ -4343,16 +4343,23 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
lb_rules = nsxv_db.get_nsxv_lbaas_loadbalancer_binding_by_edge(
context.session, edge_id)
for rule in lb_rules:
vsm_rule = self.nsx_v.vcns.get_firewall_rule(
edge_id, rule['edge_fw_rule_id'])[1]
lb_fw_rule = {
'action': edge_firewall_driver.FWAAS_ALLOW,
'enabled': vsm_rule['enabled'],
'destination_ip_address': vsm_rule['destination']['ipAddress'],
'name': vsm_rule['name'],
'ruleId': vsm_rule['ruleId']
}
fw_rules.append(lb_fw_rule)
vsm_rule = None
try:
vsm_rule = self.nsx_v.vcns.get_firewall_rule(
edge_id, rule['edge_fw_rule_id'])[1]
except vsh_exc.ResourceNotFound:
LOG.error("LB firewall rule %s for edge %s exists in DB but "
"not in backend", rule['edge_fw_rule_id'], edge_id)
if vsm_rule:
lb_fw_rule = {
'action': edge_firewall_driver.FWAAS_ALLOW,
'enabled': vsm_rule['enabled'],
'destination_ip_address':
vsm_rule['destination']['ipAddress'],
'name': vsm_rule['name'],
'ruleId': vsm_rule['ruleId']
}
fw_rules.append(lb_fw_rule)
fw = {'firewall_rule_list': fw_rules}
try: