From fb6fc96afc1439e3c9680710865f73f67909946d Mon Sep 17 00:00:00 2001 From: Kobi Samoray Date: Tue, 24 Aug 2021 16:37:03 +0300 Subject: [PATCH] NSXV status for out of sync LBs When a LB exists in Octavia DB, but missing in NSX, its status is never updated. The following addresses this condition by collecting the LBs from Octavia and validating them periodically. Change-Id: I3c42a934a47532968c65aaeade8210364167f35a --- vmware_nsx/plugins/nsx_v/plugin.py | 6 +- .../nsx_v/implementation/loadbalancer_mgr.py | 57 +++++++++++++++++++ .../services/lbaas/octavia/octavia_driver.py | 10 +++- .../lbaas/octavia/octavia_listener.py | 27 +++++++++ 4 files changed, 98 insertions(+), 2 deletions(-) diff --git a/vmware_nsx/plugins/nsx_v/plugin.py b/vmware_nsx/plugins/nsx_v/plugin.py index 435624e332..e117d4d131 100644 --- a/vmware_nsx/plugins/nsx_v/plugin.py +++ b/vmware_nsx/plugins/nsx_v/plugin.py @@ -386,7 +386,8 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin, self.octavia_stats_collector = ( octavia_listener.NSXOctaviaStatisticsCollector( self, - self._get_octavia_stats_getter())) + self._get_octavia_stats_getter(), + self._get_octavia_status_getter())) def init_complete(self, resource, event, trigger, payload=None): with locking.LockManager.get_lock('plugin-init-complete'): @@ -446,6 +447,9 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin, def _get_octavia_stats_getter(self): return listener_mgr.stats_getter + def _get_octavia_status_getter(self): + return loadbalancer_mgr.status_getter + def _validate_nsx_version(self): ver = self.nsx_v.vcns.get_version() if version.LooseVersion(ver) < version.LooseVersion('6.2.3'): diff --git a/vmware_nsx/services/lbaas/nsx_v/implementation/loadbalancer_mgr.py b/vmware_nsx/services/lbaas/nsx_v/implementation/loadbalancer_mgr.py index 423abc08fc..efa5f2362d 100644 --- a/vmware_nsx/services/lbaas/nsx_v/implementation/loadbalancer_mgr.py +++ b/vmware_nsx/services/lbaas/nsx_v/implementation/loadbalancer_mgr.py @@ -30,6 +30,7 @@ from vmware_nsx.plugins.nsx_v.vshield.common import ( constants as vcns_const) from vmware_nsx.plugins.nsx_v.vshield.common import exceptions as nsxv_exc from vmware_nsx.services.lbaas import base_mgr +from vmware_nsx.services.lbaas import lb_const from vmware_nsx.services.lbaas.nsx_v import lbaas_common as lb_common from vmware_nsx.services.lbaas.octavia import constants as oct_const @@ -228,3 +229,59 @@ class EdgeLoadBalancerManagerFromDict(base_mgr.EdgeLoadbalancerBaseManager): def validate_availability_zone(self, availability_zone_metadata): return None + + +def status_getter(context, core_plugin): + lb_statuses = [] + lsn_statuses = [] + pool_statuses = [] + member_statuses = [] + + lb_bindings = nsxv_db.get_nsxv_lbaas_loadbalancer_bindings(context.session) + for lb_binding in lb_bindings: + lb_status = lb_const.ONLINE + try: + _, lb_stats = core_plugin.nsx_v.vcns.get_loadbalancer_statistics( + lb_binding['edge_id']) + + for vs in lb_stats.get('virtualServer', []): + if vs['name'].startswith('vip_'): + vs_id = vs['name'].replace('vip_', '') + vs_status = lb_const.OFFLINE + if vs['status'] == 'OPEN': + vs_status = lb_const.ONLINE + lsn_statuses.append({ + 'id': vs_id, 'operating_status': vs_status}) + + for pool in lb_stats.get('pool', []): + if pool['name'].startswith('pool_'): + pool_id = pool['name'].replace('pool_', '') + pool_status = lb_const.OFFLINE + if pool['status'] == 'UP': + pool_status = lb_const.ONLINE + pool_statuses.append({ + 'id': pool_id, + 'operating_status': pool_status}) + + for member in pool.get('member', []): + if member['name'].startswith('member-'): + member_status = lb_const.OFFLINE + if member['status'] == 'UP': + member_status = lb_const.ONLINE + member_statuses.append({ + 'pool_id': pool_id, + 'member_ip': member.get('ipAddress'), + 'operating_status': member_status}) + + except Exception as e: + lb_status = lb_const.OFFLINE + LOG.error('Failed to fetch loadbalancer status from edge %s with ' + 'exception %s', lb_binding['edge_id'], e) + + lb_statuses.append({'id': lb_binding['loadbalancer_id'], + 'operating_status': lb_status}) + + return {lb_const.LOADBALANCERS: lb_statuses, + lb_const.LISTENERS: lsn_statuses, + lb_const.POOLS: pool_statuses, + lb_const.MEMBERS: member_statuses} diff --git a/vmware_nsx/services/lbaas/octavia/octavia_driver.py b/vmware_nsx/services/lbaas/octavia/octavia_driver.py index a79c5954bb..3b10ed23ac 100644 --- a/vmware_nsx/services/lbaas/octavia/octavia_driver.py +++ b/vmware_nsx/services/lbaas/octavia/octavia_driver.py @@ -30,8 +30,8 @@ from octavia.db import api as db_apis from octavia.db import repositories from octavia_lib.api.drivers import driver_lib from octavia_lib.api.drivers import exceptions - from octavia_lib.api.drivers import provider_base as driver_base +from octavia_lib.common import constants as o_const from vmware_nsx.services.lbaas import lb_const from vmware_nsx.services.lbaas.octavia import constants as d_const @@ -724,6 +724,14 @@ class NSXOctaviaDriverEndpoint(driver_lib.DriverLibrary): LOG.error("Failed to update Octavia listener statistics. " "Stats %s, Error %s", statistics, e.fault_string) + @log_helpers.log_method_call + def get_active_loadbalancers(self, ctxt): + # refresh the driver lib session + self.db_session = db_apis.get_session() + lbs, _ = self.repositories.load_balancer.get_all( + self.db_session, provisioning_status=o_const.ACTIVE) + return [lb.id for lb in lbs] + @log_helpers.log_method_call def vmware_nsx_provider_agent(exit_event): diff --git a/vmware_nsx/services/lbaas/octavia/octavia_listener.py b/vmware_nsx/services/lbaas/octavia/octavia_listener.py index a044cb02ee..6976f29fbd 100644 --- a/vmware_nsx/services/lbaas/octavia/octavia_listener.py +++ b/vmware_nsx/services/lbaas/octavia/octavia_listener.py @@ -30,9 +30,11 @@ from oslo_log import log as logging import oslo_messaging as messaging from oslo_messaging.rpc import dispatcher +from vmware_nsx.services.lbaas import lb_const from vmware_nsx.services.lbaas.octavia import constants LOG = logging.getLogger(__name__) +STATUS_CHECKER_COUNT = 10 class NSXOctaviaListener(object): @@ -304,6 +306,10 @@ class NSXOctaviaListenerEndpoint(object): kw = {'status': status} self.client.cast({}, 'update_loadbalancer_status', **kw) + def get_active_loadbalancers(self): + kw = {} + return self.client.call({}, 'get_active_loadbalancers', **kw) + @log_helpers.log_method_call def loadbalancer_create(self, ctxt, loadbalancer): ctx = neutron_context.Context(None, loadbalancer['project_id']) @@ -729,6 +735,7 @@ class NSXOctaviaStatisticsCollector(object): self.core_plugin = core_plugin self.listener_stats_getter = listener_stats_getter self.loadbalancer_status_getter = loadbalancer_status_getter + self.status_checker_counter = 0 if cfg.CONF.octavia_stats_interval: eventlet.spawn_n(self.thread_runner, cfg.CONF.octavia_stats_interval) @@ -761,4 +768,24 @@ class NSXOctaviaStatisticsCollector(object): if self.loadbalancer_status_getter: loadbalancer_status = self.loadbalancer_status_getter( context, self.core_plugin) + + if self.status_checker_counter == 0: + self.status_checker_counter = STATUS_CHECKER_COUNT + octavia_lb_ids = [] + try: + octavia_lb_ids = endpoint.get_active_loadbalancers() + except Exception as e: + LOG.error('Fetching loadbalancer list from Octavia failed ' + 'with error %e', e) + if octavia_lb_ids: + nsx_lb_ids = [ + lb['id'] for lb in + loadbalancer_status[lb_const.LOADBALANCERS]] + missing_ids = list(set(octavia_lb_ids) - set(nsx_lb_ids)) + loadbalancer_status[lb_const.LOADBALANCERS] += [ + {'id': lb_id, 'operating_status': lb_const.OFFLINE} + for lb_id in missing_ids] + else: + self.status_checker_counter -= 1 + endpoint.update_loadbalancer_status(loadbalancer_status)