From a102da225cc505dc4106136dc85eb7a1a643a23f Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Fri, 3 Dec 2021 18:14:40 -0800 Subject: [PATCH] WIP: move relevant change expansion to change cache cleanup The following is possible: * Change 1 is updated in gerrit * Change 2 which Depends-On change 1 is updated * Change 3 which Depends-On change 2 is updated * A long time passes * Change 2 and 3 are updated again * A short time passes * Change 1 is pruned from the cache because it hasn't been updated in 2 hours. Change 2 and 3 remain since they were recently updated. * Change 3 is updated * The driver sees that 3 depends on 2 and looks up 2 * The driver finds 2 in the cache and stops (it does not update 2 and therefore will not re-add 1 to the cache) * Change 3 is added to a pipeline * Pipeline processing fails because it can not resolve change 1 To correct this, once we have decided what changes are too old and should be removed, and then reduced that set by the set of changes in the pipeline, find the changes related to those changes and further reduce the set to prune. In other words, move the related change expansion from outside the cache prune method to inside, so we expand the network of changes inside the cache, not just the network of changes in the pipeline. Change-Id: I9a029bc92cf2eecaff7df3598a6c6993d85978a8 TODO: see if this can be tested. --- zuul/scheduler.py | 2 -- zuul/zk/change_cache.py | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/zuul/scheduler.py b/zuul/scheduler.py index 56e2f76e77..f5ee092a57 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -1757,8 +1757,6 @@ class Scheduler(threading.Thread): change_keys = pipeline.change_list.getChangeKeys() relevant_changes = pipeline.manager.resolveChangeKeys( change_keys) - for change in relevant_changes: - change.getRelatedChanges(self, relevant) return relevant def maintainConnectionCache(self): diff --git a/zuul/zk/change_cache.py b/zuul/zk/change_cache.py index a5b8b89dd6..ab5859e7a5 100644 --- a/zuul/zk/change_cache.py +++ b/zuul/zk/change_cache.py @@ -206,6 +206,9 @@ class AbstractChangeCache(ZooKeeperSimpleBase, Iterable, abc.ABC): return key, data['data_uuid'] def prune(self, relevant, max_age=3600): # 1h + # Relevant is the list of changes directly in a pipeline. + # This method will take care of expanding that out to each + # change's network of related changes. self.log.debug("Pruning cache") cutoff_time = time.time() - max_age outdated_versions = dict() @@ -220,7 +223,17 @@ class AbstractChangeCache(ZooKeeperSimpleBase, Iterable, abc.ABC): # Save the version we examined so we can make sure to only # delete that version. outdated_versions[cache_stat.key] = cache_stat.version - to_prune = set(outdated_versions.keys()) - set(relevant) + to_prune = set(outdated_versions.keys()) - relevant + sched = self.connection.sched + # Related changes may have localized networks; keep them + # together even if one member hasn't been updated in a while. + # Only when the entire network hasn't been modified in max_age + # will any change in it be removed. + for key in to_prune: + source = sched.connections.getSource(key.connection_name) + change = source.getChangeByKey(key) + change.getRelatedChanges(sched, relevant) + to_prune = to_prune - relevant for key in to_prune: self.delete(key, outdated_versions[key]) self.log.debug("Done pruning cache")