Fix infinite recursion in getRelatedChanges
It's possible to have a loop in change relationships (whether accidentally or intentionally). The current method of getting related changes in order to prune the change cache does not handle that. This reworks that method to be safe for recursion. During development, it was observed that the set of relevant changes was actually a mixture of cache keys (which are objects) and cache references (which are string representations of the same). This change also refactors the collection of relevant cache keys into a method which can be tested on its own to verify we get keys of a consistent type. Change-Id: Ie76f9b19f10b053f5c72bdaf7be3efc445b53639
This commit is contained in:
parent
51d4fe5587
commit
e4f1b9b65e
@ -0,0 +1,5 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
A bug which prevented change cache cleanup (and therefore caused
|
||||
ZooKeeper usage to grow without limits) was fixed.
|
@ -48,6 +48,7 @@ from tests.base import (
|
||||
TestConnectionRegistry,
|
||||
FIXTURE_DIR,
|
||||
)
|
||||
from zuul.zk.change_cache import ChangeKey
|
||||
from zuul.zk.layout import LayoutState
|
||||
|
||||
EMPTY_LAYOUT_STATE = LayoutState("", "", 0)
|
||||
@ -1409,6 +1410,15 @@ class TestScheduler(ZuulTestCase):
|
||||
sched.maintainConnectionCache()
|
||||
self.assertEqual(len(_getCachedChanges()), 2)
|
||||
|
||||
# Test this method separately to make sure we are getting
|
||||
# cache keys of the correct type, since we don't do run-time
|
||||
# validation.
|
||||
relevant = sched._gatherConnectionCacheKeys()
|
||||
self.assertEqual(len(relevant), 2)
|
||||
for k in relevant:
|
||||
if not isinstance(k, ChangeKey):
|
||||
raise RuntimeError("Cache key %s is not a ChangeKey" % repr(k))
|
||||
|
||||
self.hold_jobs_in_queue = False
|
||||
self.executor_api.release()
|
||||
self.waitUntilSettled()
|
||||
|
@ -3672,8 +3672,8 @@ class Ref(object):
|
||||
def isUpdateOf(self, other):
|
||||
return False
|
||||
|
||||
def getRelatedChanges(self, sched):
|
||||
return set()
|
||||
def getRelatedChanges(self, sched, related):
|
||||
pass
|
||||
|
||||
def updatesConfig(self, tenant):
|
||||
tpc = tenant.project_configs.get(self.project.canonical_name)
|
||||
@ -3879,17 +3879,21 @@ class Change(Branch):
|
||||
return True
|
||||
return False
|
||||
|
||||
def getRelatedChanges(self, sched):
|
||||
related = set()
|
||||
for reference in self.needs_changes:
|
||||
related.add(reference)
|
||||
for reference in self.needed_by_changes:
|
||||
related.add(reference)
|
||||
def getRelatedChanges(self, sched, related):
|
||||
"""Recursively update a set of related changes
|
||||
|
||||
:arg Scheduler sched: The scheduler instance
|
||||
:arg set related: The cache keys of changes which have been inspected
|
||||
so far. Will be updated with additional changes by this method.
|
||||
"""
|
||||
related.add(self.cache_stat.key)
|
||||
for reference in itertools.chain(self.needs_changes,
|
||||
self.needed_by_changes):
|
||||
key = ChangeKey.fromReference(reference)
|
||||
source = sched.connections.getSource(key.connection_name)
|
||||
change = source.getChangeByKey(key)
|
||||
related.update(change.getRelatedChanges(sched))
|
||||
return related
|
||||
if key not in related:
|
||||
source = sched.connections.getSource(key.connection_name)
|
||||
change = source.getChangeByKey(key)
|
||||
change.getRelatedChanges(sched, related)
|
||||
|
||||
def getSafeAttributes(self):
|
||||
return Attributes(project=self.project,
|
||||
|
@ -1616,18 +1616,20 @@ class Scheduler(threading.Thread):
|
||||
else:
|
||||
pipeline.state = pipeline.STATE_NORMAL
|
||||
|
||||
def maintainConnectionCache(self):
|
||||
def _gatherConnectionCacheKeys(self):
|
||||
relevant = set()
|
||||
self.log.debug("Starting connection cache maintenance")
|
||||
with self.layout_lock:
|
||||
for tenant in self.abide.tenants.values():
|
||||
for pipeline in tenant.layout.pipelines.values():
|
||||
self.log.debug("Gather relevant cache items for: %s %s",
|
||||
tenant.name, pipeline.name)
|
||||
for item in pipeline.getAllItems():
|
||||
relevant.add(item.change.cache_stat.key)
|
||||
relevant.update(
|
||||
item.change.getRelatedChanges(self))
|
||||
item.change.getRelatedChanges(self, relevant)
|
||||
return relevant
|
||||
|
||||
def maintainConnectionCache(self):
|
||||
self.log.debug("Starting connection cache maintenance")
|
||||
relevant = self._gatherConnectionCacheKeys()
|
||||
|
||||
# We'll only remove changes older than `max_age` from the cache, as
|
||||
# it may take a while for an event that was processed by a connection
|
||||
|
@ -85,6 +85,13 @@ class ChangeKey:
|
||||
msg = self.reference.encode('utf8')
|
||||
self._hash = hashlib.sha256(msg).hexdigest()
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.reference)
|
||||
|
||||
def __eq__(self, other):
|
||||
return (isinstance(other, ChangeKey) and
|
||||
self.reference == other.reference)
|
||||
|
||||
@classmethod
|
||||
def fromReference(cls, data):
|
||||
data = json.loads(data)
|
||||
|
Loading…
x
Reference in New Issue
Block a user