Add locks to cache and cleanup kinit logic

After reviewing reports of multiple CCache cropping up in logs, we
found an issue in the way novajoin is initiating and updating
cache files containing keytabs. The result was numerous extra cache
files being created and overwritten.

With this change we ensure that the credentials cache is properly
shared across workers and that when new credentials are being
created, the cache files are locked to avoid potential conflicts.

Updates DEBUG level logging to include useful cache troubleshooting
breadcrumbs.

Change-Id: I07e0004f77e0d52ab2a2707c5fe50f48f718b717
Co-Authored-By: Ade Lee <alee@redhat.com>
This commit is contained in:
Harry Rybacki 2019-10-11 09:04:25 -04:00
parent e418762753
commit a71617627a
1 changed files with 35 additions and 4 deletions

View File

@ -15,6 +15,7 @@
import cachetools
import json
import os
import threading
import time
import uuid
@ -53,6 +54,8 @@ CONF = cfg.CONF
LOG = logging.getLogger(__name__)
CCACHE_LOCK = threading.RLock()
class IPANovaJoinBase(object):
@ -63,15 +66,35 @@ class IPANovaJoinBase(object):
self.ntries = CONF.connect_retries
self.retry_delay = CONF.retry_delay
self.initial_backoff = CONF.connect_backoff
self.ccache = "MEMORY:" + str(uuid.uuid4())
LOG.debug("cache: %s", self.ccache)
os.environ['KRB5CCNAME'] = self.ccache
# NOTE(hrybacki): Prevent race conditions for two or more
# IPANovaJoinBase objects overwriting the same ccache
CCACHE_LOCK.acquire()
if self._ipa_client_configured() and not api.isdone('finalize'):
self.ccache = "MEMORY:" + str(uuid.uuid4())
os.environ['KRB5CCNAME'] = self.ccache
(hostname, realm) = self.get_host_and_realm()
LOG.debug("Establishing new ccache for ipalib API...")
kinit_keytab(str('nova/%s@%s' % (hostname, realm)),
CONF.keytab, self.ccache)
api.bootstrap(context='novajoin')
api.finalize()
else:
self.ccache = os.environ['KRB5CCNAME']
CCACHE_LOCK.release()
# NOTE(hrybacki): Functional tests will raise an AttributeError
# when run. This ensures upstream gates run while
# still dumping useful debug logs under normal
# operations.
try:
LOG.debug("Cache: %s -- PID: %s -- API hash: %s",
self.ccache, os.getpid(), str(hash(api)))
except AttributeError:
LOG.debug("Failed to access ccache in IPANovaJoinBase init. If "
"this happened outside of a functional test run, "
"please investigate further.")
self.batch_args = list()
self.backoff = self.initial_backoff
@ -158,15 +181,23 @@ class IPANovaJoinBase(object):
errors.TicketExpired,
errors.KerberosError) as e:
tries += 1
LOG.debug("[%s] kinit again: %s", message_id, e)
# pylint: disable=no-member
LOG.debug("[%s] kinit new ccache in get_connection: %s",
message_id, e)
CCACHE_LOCK.acquire()
try:
kinit_keytab(str('nova/%s@%s' %
(api.env.host, api.env.realm)),
CONF.keytab,
self.ccache)
CCACHE_LOCK.release()
LOG.debug("[%s] Cache: %s -- PID: %s -- API hash: %s",
message_id, self.ccache, os.getpid(),
str(hash(api)))
except GSSError as e:
LOG.debug("[%s] kinit failed: %s", message_id, e)
CCACHE_LOCK.release()
if self.backoff:
self.__backoff(message_id)
except errors.NetworkError: