NSXV status for out of sync LBs

When a LB exists in Octavia DB, but missing in NSX, its status is never
updated.
The following addresses this condition by collecting the LBs from
Octavia and validating them periodically.

Change-Id: I3c42a934a47532968c65aaeade8210364167f35a
This commit is contained in:
Kobi Samoray 2021-08-24 16:37:03 +03:00
parent f6b1fbc455
commit b6ee3dc92f
4 changed files with 97 additions and 2 deletions

View File

@ -388,7 +388,8 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
self.octavia_stats_collector = (
octavia_listener.NSXOctaviaStatisticsCollector(
self,
self._get_octavia_stats_getter()))
self._get_octavia_stats_getter(),
self._get_octavia_status_getter()))
def init_complete(self, resource, event, trigger, payload=None):
with locking.LockManager.get_lock('plugin-init-complete'):
@ -448,6 +449,9 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
def _get_octavia_stats_getter(self):
return listener_mgr.stats_getter
def _get_octavia_status_getter(self):
return loadbalancer_mgr.status_getter
def _validate_nsx_version(self):
ver = self.nsx_v.vcns.get_version()
if version.LooseVersion(ver) < version.LooseVersion('6.2.3'):

View File

@ -310,3 +310,59 @@ def _get_edge_loadbalancer_statistics(vcns, edge_id):
stats['total_connections'] += pool_stats.get('totalSessions', 0)
return stats
def status_getter(context, core_plugin):
lb_statuses = []
lsn_statuses = []
pool_statuses = []
member_statuses = []
lb_bindings = nsxv_db.get_nsxv_lbaas_loadbalancer_bindings(context.session)
for lb_binding in lb_bindings:
lb_status = lb_const.ONLINE
try:
_, lb_stats = core_plugin.nsx_v.vcns.get_loadbalancer_statistics(
lb_binding['edge_id'])
for vs in lb_stats.get('virtualServer', []):
if vs['name'].startswith('vip_'):
vs_id = vs['name'].replace('vip_', '')
vs_status = lb_const.OFFLINE
if vs['status'] == 'OPEN':
vs_status = lb_const.ONLINE
lsn_statuses.append({
'id': vs_id, 'operating_status': vs_status})
for pool in lb_stats.get('pool', []):
if pool['name'].startswith('pool_'):
pool_id = pool['name'].replace('pool_', '')
pool_status = lb_const.OFFLINE
if pool['status'] == 'UP':
pool_status = lb_const.ONLINE
pool_statuses.append({
'id': pool_id,
'operating_status': pool_status})
for member in pool.get('member', []):
if member['name'].startswith('member-'):
member_status = lb_const.OFFLINE
if member['status'] == 'UP':
member_status = lb_const.ONLINE
member_statuses.append({
'pool_id': pool_id,
'member_ip': member.get('ipAddress'),
'operating_status': member_status})
except Exception as e:
lb_status = lb_const.OFFLINE
LOG.error('Failed to fetch loadbalancer status from edge %s with '
'exception %s', lb_binding['edge_id'], e)
lb_statuses.append({'id': lb_binding['loadbalancer_id'],
'operating_status': lb_status})
return {lb_const.LOADBALANCERS: lb_statuses,
lb_const.LISTENERS: lsn_statuses,
lb_const.POOLS: pool_statuses,
lb_const.MEMBERS: member_statuses}

View File

@ -30,8 +30,8 @@ from octavia.db import api as db_apis
from octavia.db import repositories
from octavia_lib.api.drivers import driver_lib
from octavia_lib.api.drivers import exceptions
from octavia_lib.api.drivers import provider_base as driver_base
from octavia_lib.common import constants as o_const
from vmware_nsx.services.lbaas import lb_const
from vmware_nsx.services.lbaas.octavia import constants as d_const
@ -724,6 +724,14 @@ class NSXOctaviaDriverEndpoint(driver_lib.DriverLibrary):
LOG.error("Failed to update Octavia listener statistics. "
"Stats %s, Error %s", statistics, e.fault_string)
@log_helpers.log_method_call
def get_active_loadbalancers(self, ctxt):
# refresh the driver lib session
self.db_session = db_apis.get_session()
lbs, _ = self.repositories.load_balancer.get_all(
self.db_session, provisioning_status=o_const.ACTIVE)
return [lb.id for lb in lbs]
@log_helpers.log_method_call
def vmware_nsx_provider_agent(exit_event):

View File

@ -30,9 +30,11 @@ from oslo_log import log as logging
import oslo_messaging as messaging
from oslo_messaging.rpc import dispatcher
from vmware_nsx.services.lbaas import lb_const
from vmware_nsx.services.lbaas.octavia import constants
LOG = logging.getLogger(__name__)
STATUS_CHECKER_COUNT = 10
class NSXOctaviaListener(object):
@ -304,6 +306,10 @@ class NSXOctaviaListenerEndpoint(object):
kw = {'status': status}
self.client.cast({}, 'update_loadbalancer_status', **kw)
def get_active_loadbalancers(self):
kw = {}
return self.client.call({}, 'get_active_loadbalancers', **kw)
@log_helpers.log_method_call
def loadbalancer_create(self, ctxt, loadbalancer):
ctx = neutron_context.Context(None, loadbalancer['project_id'])
@ -729,6 +735,7 @@ class NSXOctaviaStatisticsCollector(object):
self.core_plugin = core_plugin
self.listener_stats_getter = listener_stats_getter
self.loadbalancer_status_getter = loadbalancer_status_getter
self.status_checker_counter = 0
if cfg.CONF.octavia_stats_interval:
eventlet.spawn_n(self.thread_runner,
cfg.CONF.octavia_stats_interval)
@ -761,4 +768,24 @@ class NSXOctaviaStatisticsCollector(object):
if self.loadbalancer_status_getter:
loadbalancer_status = self.loadbalancer_status_getter(
context, self.core_plugin)
if self.status_checker_counter == 0:
self.status_checker_counter = STATUS_CHECKER_COUNT
octavia_lb_ids = []
try:
octavia_lb_ids = endpoint.get_active_loadbalancers()
except Exception as e:
LOG.error('Fetching loadbalancer list from Octavia failed '
'with error %e', e)
if octavia_lb_ids:
nsx_lb_ids = [
lb['id'] for lb in
loadbalancer_status[lb_const.LOADBALANCERS]]
missing_ids = list(set(octavia_lb_ids) - set(nsx_lb_ids))
loadbalancer_status[lb_const.LOADBALANCERS] += [
{'id': lb_id, 'operating_status': lb_const.OFFLINE}
for lb_id in missing_ids]
else:
self.status_checker_counter -= 1
endpoint.update_loadbalancer_status(loadbalancer_status)