Browse Source

Add retries when loading keystone data and fetching endpoints

We may end up loading lots of nested stacks concurrently with
convergence and those would try to discover endpoints and fetch
access info objects from keystone. This at times results in
ConnectTimeout errors from keystone. We can avoid these errors
by adding some retries.

Also adds retries to client_plugin get_endpoint() calls, which has
simillar issue.

Change-Id: I18cde971248eff5783f97c9e7a60316d7dd93431
Task: 36349
changes/93/678193/3
Rabi Mishra 2 years ago
parent
commit
6fb8ac250a
  1. 13
      heat/common/context.py
  2. 2
      heat/engine/clients/client_plugin.py

13
heat/common/context.py

@ -12,6 +12,7 @@
# under the License.
from keystoneauth1 import access
from keystoneauth1 import exceptions as ksa_exceptions
from keystoneauth1.identity import access as access_plugin
from keystoneauth1.identity import generic
from keystoneauth1 import loading as ks_loading
@ -24,6 +25,7 @@ import oslo_messaging
from oslo_middleware import request_id as oslo_request_id
from oslo_utils import importutils
import six
import tenacity
from heat.common import config
from heat.common import endpoint_utils
@ -51,6 +53,15 @@ TRUSTEE_CONF_GROUP = 'trustee'
ks_loading.register_auth_conf_options(cfg.CONF, TRUSTEE_CONF_GROUP)
retry_on_connection_timeout = tenacity.retry(
stop=tenacity.stop_after_attempt(cfg.CONF.client_retry_limit+1),
wait=tenacity.wait_random(max=2),
retry=tenacity.retry_if_exception_type(
(ksa_exceptions.ConnectFailure,
ksa_exceptions.DiscoveryFailure)),
reraise=True)
def list_opts():
trustee_opts = ks_loading.get_auth_common_conf_options()
trustee_opts.extend(ks_loading.get_auth_plugin_conf_options(
@ -288,6 +299,8 @@ class RequestContext(context.RequestContext):
class StoredContext(RequestContext):
@retry_on_connection_timeout
def _load_keystone_data(self):
self._keystone_loaded = True
auth_ref = self.auth_plugin.get_access(self.keystone_session)

2
heat/engine/clients/client_plugin.py

@ -24,6 +24,7 @@ import requests
import six
from heat.common import config
from heat.common import context
from heat.common import exception as heat_exception
cfg.CONF.import_opt('client_retry_limit', 'heat.common.config')
@ -93,6 +94,7 @@ class ClientPlugin(object):
def url_for(self, **kwargs):
keystone_session = self.context.keystone_session
@context.retry_on_connection_timeout
def get_endpoint():
return keystone_session.get_endpoint(**kwargs)

Loading…
Cancel
Save