Merge "Disable keepalive if single backend is configured"

This commit is contained in:
Zuul 2020-02-29 20:31:00 +00:00 committed by Gerrit Code Review
commit 505996439f
4 changed files with 35 additions and 9 deletions

View File

@ -239,7 +239,8 @@ class NsxClientTestCase(NsxLibTestCase):
nsx_api_managers=nsx_api_managers or [NSX_MANAGER],
plugin_scope=PLUGIN_SCOPE,
plugin_tag=PLUGIN_TAG,
plugin_ver=PLUGIN_VER)
plugin_ver=PLUGIN_VER,
cluster_unavailable_retry=True)
super(NsxClientTestCase.MockNSXClusteredAPI, self).__init__(
nsxlib_config)

View File

@ -392,6 +392,7 @@ class ClusteredAPITestCase(nsxlib_testcase.NsxClientTestCase):
max_attempts = 3
api = self.mock_nsx_clustered_api(nsx_api_managers=conf_managers,
max_attempts=max_attempts)
api.nsxlib_config.cluster_unavailable_retry = False
api._validate = mock.Mock()
eps = list(api._endpoints.values())

View File

@ -460,6 +460,7 @@ class ClusteredAPI(object):
self._http_provider = http_provider
self._keepalive_interval = keepalive_interval
self._print_keepalive = 0
def _init_cluster(*args, **kwargs):
self._init_endpoints(providers,
@ -511,13 +512,18 @@ class ClusteredAPI(object):
break
eventlet.sleep(0.5)
for endpoint in self._endpoints.values():
# dynamic loop for each endpoint to ensure connectivity
loop = loopingcall.DynamicLoopingCall(
self._endpoint_keepalive, endpoint)
loop.start(initial_delay=self._keepalive_interval,
periodic_interval_max=self._keepalive_interval,
stop_on_exception=False)
if len(self._endpoints) > 1:
# We don't monitor connectivity when one endpoint is available,
# since there is no alternative to querying this single backend
# If endpoint was down, we can tolerate extra roundtrip to
# validate connectivity
for endpoint in self._endpoints.values():
# dynamic loop for each endpoint to ensure connectivity
loop = loopingcall.DynamicLoopingCall(
self._endpoint_keepalive, endpoint)
loop.start(initial_delay=self._keepalive_interval,
periodic_interval_max=self._keepalive_interval,
stop_on_exception=False)
LOG.debug("Done initializing API endpoint(s). "
"API cluster health: %s", self.health)
@ -526,6 +532,13 @@ class ClusteredAPI(object):
delta = datetime.datetime.now() - endpoint.last_updated
if delta.seconds >= self._keepalive_interval:
# TODO(boden): backoff on validation failure
if self._print_keepalive % 10 == 0:
# Print keepalive debug message once every 10 probes
LOG.debug("Running keepalive probe for cluster endpoint "
"'%(ep)s' ",
{'ep': endpoint})
self._print_keepalive += 1
self._validate(endpoint)
return self._keepalive_interval
return self._keepalive_interval - delta.seconds

View File

@ -85,7 +85,11 @@ class NsxLibConfig(object):
:param cluster_unavailable_retry: If True, skip fatal errors when no
endpoint in the NSX management cluster is
available to serve a request, and retry
the request instead.
the request instead. This setting can
not be False if single endpoint is
configured in the cluster, since there
will be no keepalive probes in this
case.
-- Additional parameters which are relevant only for the Policy manager:
:param allow_passthrough: If True, use nsx manager api for cases which are
@ -152,6 +156,13 @@ class NsxLibConfig(object):
self.realization_max_attempts = realization_max_attempts
self.realization_wait_sec = realization_wait_sec
if len(nsx_api_managers) == 1 and not self.cluster_unavailable_retry:
LOG.warning("When only one endpoint is provided, keepalive probes "
" are disabled. For the system to be able to recover "
" from DOWN state, cluster_unavailable_retry is set "
" to True, overriding provided configuration")
self.cluster_unavailable_retry = True
if dhcp_profile_uuid:
# this is deprecated, and never used.
versionutils.report_deprecated_feature(