Disable keepalive if single backend is configured
Keepalive can pose an extra load on the backend, especially when client spawn multiple processes. In addition, some deployments are using external load balancer with its own monitoring mechanism, in which case nsxlib probing is redundant. Thsi change suggests to avoid keepalive probing in case only one backend is configured. If cluster is DOWN, connection will always be retried upon API call. Change-Id: If6b5542f0444f5bb72c0d60e90942a7819c5d72e
This commit is contained in:
parent
9812774970
commit
52322508db
|
@ -239,7 +239,8 @@ class NsxClientTestCase(NsxLibTestCase):
|
|||
nsx_api_managers=nsx_api_managers or [NSX_MANAGER],
|
||||
plugin_scope=PLUGIN_SCOPE,
|
||||
plugin_tag=PLUGIN_TAG,
|
||||
plugin_ver=PLUGIN_VER)
|
||||
plugin_ver=PLUGIN_VER,
|
||||
cluster_unavailable_retry=True)
|
||||
|
||||
super(NsxClientTestCase.MockNSXClusteredAPI, self).__init__(
|
||||
nsxlib_config)
|
||||
|
|
|
@ -390,6 +390,7 @@ class ClusteredAPITestCase(nsxlib_testcase.NsxClientTestCase):
|
|||
max_attempts = 3
|
||||
api = self.mock_nsx_clustered_api(nsx_api_managers=conf_managers,
|
||||
max_attempts=max_attempts)
|
||||
api.nsxlib_config.cluster_unavailable_retry = False
|
||||
api._validate = mock.Mock()
|
||||
eps = list(api._endpoints.values())
|
||||
|
||||
|
|
|
@ -457,6 +457,7 @@ class ClusteredAPI(object):
|
|||
|
||||
self._http_provider = http_provider
|
||||
self._keepalive_interval = keepalive_interval
|
||||
self._print_keepalive = 0
|
||||
|
||||
def _init_cluster(*args, **kwargs):
|
||||
self._init_endpoints(providers,
|
||||
|
@ -508,6 +509,11 @@ class ClusteredAPI(object):
|
|||
break
|
||||
eventlet.sleep(0.5)
|
||||
|
||||
if len(self._endpoints) > 1:
|
||||
# We don't monitor connectivity when one endpoint is available,
|
||||
# since there is no alternative to querying this single backend
|
||||
# If endpoint was down, we can tolerate extra roundtrip to
|
||||
# validate connectivity
|
||||
for endpoint in self._endpoints.values():
|
||||
# dynamic loop for each endpoint to ensure connectivity
|
||||
loop = loopingcall.DynamicLoopingCall(
|
||||
|
@ -523,6 +529,13 @@ class ClusteredAPI(object):
|
|||
delta = datetime.datetime.now() - endpoint.last_updated
|
||||
if delta.seconds >= self._keepalive_interval:
|
||||
# TODO(boden): backoff on validation failure
|
||||
if self._print_keepalive % 10 == 0:
|
||||
# Print keepalive debug message once every 10 probes
|
||||
LOG.debug("Running keepalive probe for cluster endpoint "
|
||||
"'%(ep)s' ",
|
||||
{'ep': endpoint})
|
||||
self._print_keepalive += 1
|
||||
|
||||
self._validate(endpoint)
|
||||
return self._keepalive_interval
|
||||
return self._keepalive_interval - delta.seconds
|
||||
|
|
|
@ -85,7 +85,11 @@ class NsxLibConfig(object):
|
|||
:param cluster_unavailable_retry: If True, skip fatal errors when no
|
||||
endpoint in the NSX management cluster is
|
||||
available to serve a request, and retry
|
||||
the request instead.
|
||||
the request instead. This setting can
|
||||
not be False if single endpoint is
|
||||
configured in the cluster, since there
|
||||
will be no keepalive probes in this
|
||||
case.
|
||||
|
||||
-- Additional parameters which are relevant only for the Policy manager:
|
||||
:param allow_passthrough: If True, use nsx manager api for cases which are
|
||||
|
@ -152,6 +156,13 @@ class NsxLibConfig(object):
|
|||
self.realization_max_attempts = realization_max_attempts
|
||||
self.realization_wait_sec = realization_wait_sec
|
||||
|
||||
if len(nsx_api_managers) == 1 and not self.cluster_unavailable_retry:
|
||||
LOG.warning("When only one endpoint is provided, keepalive probes "
|
||||
" are disabled. For the system to be able to recover "
|
||||
" from DOWN state, cluster_unavailable_retry is set "
|
||||
" to True, overriding provided configuration")
|
||||
self.cluster_unavailable_retry = True
|
||||
|
||||
if dhcp_profile_uuid:
|
||||
# this is deprecated, and never used.
|
||||
versionutils.report_deprecated_feature(
|
||||
|
|
Loading…
Reference in New Issue