From a71617627ab0e7c671b90ef2b1aafd6eddceb077 Mon Sep 17 00:00:00 2001 From: Harry Rybacki Date: Fri, 11 Oct 2019 09:04:25 -0400 Subject: [PATCH] Add locks to cache and cleanup kinit logic After reviewing reports of multiple CCache cropping up in logs, we found an issue in the way novajoin is initiating and updating cache files containing keytabs. The result was numerous extra cache files being created and overwritten. With this change we ensure that the credentials cache is properly shared across workers and that when new credentials are being created, the cache files are locked to avoid potential conflicts. Updates DEBUG level logging to include useful cache troubleshooting breadcrumbs. Change-Id: I07e0004f77e0d52ab2a2707c5fe50f48f718b717 Co-Authored-By: Ade Lee --- novajoin/ipa.py | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/novajoin/ipa.py b/novajoin/ipa.py index 4319af4..2e6a0f3 100644 --- a/novajoin/ipa.py +++ b/novajoin/ipa.py @@ -15,6 +15,7 @@ import cachetools import json import os +import threading import time import uuid @@ -53,6 +54,8 @@ CONF = cfg.CONF LOG = logging.getLogger(__name__) +CCACHE_LOCK = threading.RLock() + class IPANovaJoinBase(object): @@ -63,15 +66,35 @@ class IPANovaJoinBase(object): self.ntries = CONF.connect_retries self.retry_delay = CONF.retry_delay self.initial_backoff = CONF.connect_backoff - self.ccache = "MEMORY:" + str(uuid.uuid4()) - LOG.debug("cache: %s", self.ccache) - os.environ['KRB5CCNAME'] = self.ccache + + # NOTE(hrybacki): Prevent race conditions for two or more + # IPANovaJoinBase objects overwriting the same ccache + CCACHE_LOCK.acquire() if self._ipa_client_configured() and not api.isdone('finalize'): + self.ccache = "MEMORY:" + str(uuid.uuid4()) + os.environ['KRB5CCNAME'] = self.ccache (hostname, realm) = self.get_host_and_realm() + LOG.debug("Establishing new ccache for ipalib API...") kinit_keytab(str('nova/%s@%s' % (hostname, realm)), CONF.keytab, self.ccache) api.bootstrap(context='novajoin') api.finalize() + else: + self.ccache = os.environ['KRB5CCNAME'] + CCACHE_LOCK.release() + + # NOTE(hrybacki): Functional tests will raise an AttributeError + # when run. This ensures upstream gates run while + # still dumping useful debug logs under normal + # operations. + try: + LOG.debug("Cache: %s -- PID: %s -- API hash: %s", + self.ccache, os.getpid(), str(hash(api))) + except AttributeError: + LOG.debug("Failed to access ccache in IPANovaJoinBase init. If " + "this happened outside of a functional test run, " + "please investigate further.") + self.batch_args = list() self.backoff = self.initial_backoff @@ -158,15 +181,23 @@ class IPANovaJoinBase(object): errors.TicketExpired, errors.KerberosError) as e: tries += 1 - LOG.debug("[%s] kinit again: %s", message_id, e) + # pylint: disable=no-member + LOG.debug("[%s] kinit new ccache in get_connection: %s", + message_id, e) + CCACHE_LOCK.acquire() try: kinit_keytab(str('nova/%s@%s' % (api.env.host, api.env.realm)), CONF.keytab, self.ccache) + CCACHE_LOCK.release() + LOG.debug("[%s] Cache: %s -- PID: %s -- API hash: %s", + message_id, self.ccache, os.getpid(), + str(hash(api))) except GSSError as e: LOG.debug("[%s] kinit failed: %s", message_id, e) + CCACHE_LOCK.release() if self.backoff: self.__backoff(message_id) except errors.NetworkError: