Fix a race condition in the hash ring code

We're handling hash rings and updated_at differently: one
is stored on the class level, the other - on instance. Apparently,
there is a race there, resulting in updated_at never updated.
Store hash rings and updated_at in one tuple, so that they're
always loaded and stored together.

Also remove double loading of the hash ring in _get_ring that
could contribute to the problem.

Change-Id: Ib659014e07549ae3d5ec7e69da318301f5994ca8
(cherry picked from commit d3872cfcdd)
This commit is contained in:
Dmitry Tantsur 2020-09-25 14:09:43 +02:00
parent 24e44c9d22
commit 18bd79dbcc
3 changed files with 24 additions and 16 deletions

View File

@ -29,12 +29,11 @@ LOG = log.getLogger(__name__)
class HashRingManager(object):
_hash_rings = None
_hash_rings = (None, 0)
_lock = threading.Lock()
def __init__(self, use_groups=True, cache=True):
self.dbapi = dbapi.get_instance()
self.updated_at = time.time()
self.use_groups = use_groups
self.cache = cache
@ -48,19 +47,19 @@ class HashRingManager(object):
# Hot path, no lock. Using a local variable to avoid races with code
# changing the class variable.
hash_rings = self.__class__._hash_rings
if hash_rings is not None and self.updated_at >= limit:
hash_rings, updated_at = self.__class__._hash_rings
if hash_rings is not None and updated_at >= limit:
return hash_rings
with self._lock:
if self.__class__._hash_rings is None or self.updated_at < limit:
hash_rings, updated_at = self.__class__._hash_rings
if hash_rings is None or updated_at < limit:
LOG.debug('Rebuilding cached hash rings')
rings = self._load_hash_rings()
self.__class__._hash_rings = rings
self.updated_at = time.time()
hash_rings = self._load_hash_rings()
self.__class__._hash_rings = hash_rings, time.time()
LOG.debug('Finished rebuilding hash rings, available drivers '
'are %s', ', '.join(rings))
return self.__class__._hash_rings
'are %s', ', '.join(hash_rings))
return hash_rings
def _load_hash_rings(self):
rings = {}
@ -77,7 +76,7 @@ class HashRingManager(object):
def reset(cls):
with cls._lock:
LOG.debug('Resetting cached hash rings')
cls._hash_rings = None
cls._hash_rings = (None, 0)
def get_ring(self, driver_name, conductor_group):
try:
@ -95,13 +94,14 @@ class HashRingManager(object):
def _get_ring(self, driver_name, conductor_group):
# There are no conductors, temporary failure - 503 Service Unavailable
if not self.ring:
ring = self.ring # a property, don't load twice
if not ring:
raise exception.TemporaryFailure()
try:
if self.use_groups:
return self.ring['%s:%s' % (conductor_group, driver_name)]
return self.ring[driver_name]
return ring['%s:%s' % (conductor_group, driver_name)]
return ring[driver_name]
except KeyError:
raise exception.DriverNotFound(
_("The driver '%s' is unknown.") % driver_name)

View File

@ -112,7 +112,10 @@ class HashRingManagerTestCase(db_base.DbTestCase):
# since there is an active conductor for the requested hardware type.
self.assertEqual(1, len(ring))
self.ring_manager.updated_at = time.time() - 31
self.ring_manager.__class__._hash_rings = (
self.ring_manager.__class__._hash_rings[0],
time.time() - 31
)
ring = self.ring_manager.get_ring('hardware-type', '')
self.assertEqual(2, len(ring))
@ -121,7 +124,7 @@ class HashRingManagerTestCase(db_base.DbTestCase):
use_groups=self.use_groups)
ring = ring_mgr.ring
self.assertIsNotNone(ring)
self.assertIsNone(hash_ring.HashRingManager._hash_rings)
self.assertEqual((None, 0), hash_ring.HashRingManager._hash_rings)
class HashRingManagerWithGroupsTestCase(HashRingManagerTestCase):

View File

@ -0,0 +1,5 @@
---
fixes:
- |
Fixes a potential race in the hash ring code that could result in the
hash rings never updated after their initial load.