From cee7a4e2486e58acf385424b8ae3786543daf886 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Mon, 18 Feb 2019 11:38:16 -0800 Subject: [PATCH] Switch to LRU based sha to PR cache This updates the github driver to cache PRs by sha using cachetool's LRUCache. We make this change because we need to cache closed PRs so can't rely on the action of closing a PR to remove the PR from the cache. Since we don't have a good method of evicting entries we fall back to LRU with a reasonable cache size (2k commits). Change-Id: I5fb6c8b33f9eed221a8b84e537f02e7dccf2d2df --- requirements.txt | 1 + tests/fakegithub.py | 4 +++- tests/unit/test_github_driver.py | 13 +++++++++---- zuul/driver/github/githubconnection.py | 23 ++++++++++++++++++----- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index a7a7779daa..bcfb3c762e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,6 +19,7 @@ sqlalchemy alembic cryptography>=1.6 cachecontrol +cachetools pyjwt iso8601 psutil diff --git a/tests/fakegithub.py b/tests/fakegithub.py index 93d280c442..a074e6adc4 100644 --- a/tests/fakegithub.py +++ b/tests/fakegithub.py @@ -251,7 +251,9 @@ class FakeRepository(object): } return FakeResponse(data) - def pull_requests(self, state=None): + def pull_requests(self, state=None, sort=None, direction=None): + # sort and direction are unused currently, but present to match + # real world call signatures. pulls = [] for pull in self.data.pull_requests.values(): if pull.project != self.name: diff --git a/tests/unit/test_github_driver.py b/tests/unit/test_github_driver.py index 7e5db6bbec..40a6207d5e 100644 --- a/tests/unit/test_github_driver.py +++ b/tests/unit/test_github_driver.py @@ -1336,8 +1336,13 @@ class TestGithubShaCache(BaseTestCase): } cache.update('foo/bar', pr_dict) self.assertEqual(cache.get('foo/bar', '123456'), set({1})) - pr_dict['state'] = 'closed' - cache.update('foo/bar', pr_dict) + + # Create 4096 entries so original falls off. + for x in range(0, 4096): + pr_dict['head']['sha'] = str(x) + cache.update('foo/bar', pr_dict) + cache.get('foo/bar', str(x)) + self.assertEqual(cache.get('foo/bar', '123456'), set()) def testMultiInsert(self): @@ -1382,7 +1387,7 @@ class TestGithubShaCache(BaseTestCase): self.assertEqual(cache.get('bar/foo', '789'), set()) self.assertEqual(cache.get('foo/bar', '789'), set()) - def testNoUpdate(self): + def testClosedPRRemains(self): cache = GithubShaCache() pr_dict = { 'head': { @@ -1392,4 +1397,4 @@ class TestGithubShaCache(BaseTestCase): 'state': 'closed', } cache.update('foo/bar', pr_dict) - self.assertEqual(cache.get('bar/foo', '123456'), set()) + self.assertEqual(cache.get('foo/bar', '123456'), set({1})) diff --git a/zuul/driver/github/githubconnection.py b/zuul/driver/github/githubconnection.py index 04bd5b2d48..94c989e062 100644 --- a/zuul/driver/github/githubconnection.py +++ b/zuul/driver/github/githubconnection.py @@ -29,6 +29,7 @@ import cherrypy import cachecontrol from cachecontrol.cache import DictCache from cachecontrol.heuristics import BaseHeuristic +import cachetools import iso8601 import jwt import requests @@ -75,14 +76,21 @@ class GithubShaCache(object): self.projects = {} def update(self, project_name, pr): - project_cache = self.projects.setdefault(project_name, {}) + project_cache = self.projects.setdefault( + project_name, + # Cache up to 4k shas for each project + # Note we cache the actual sha for a PR and the + # merge_commit_sha so we make this fairly large. + cachetools.LRUCache(4096) + ) sha = pr['head']['sha'] number = pr['number'] cached_prs = project_cache.setdefault(sha, set()) - if pr['state'] == 'open': + cached_prs.add(number) + merge_commit_sha = pr.get('merge_commit_sha') + if merge_commit_sha: + cached_prs = project_cache.setdefault(merge_commit_sha, set()) cached_prs.add(number) - else: - cached_prs.discard(number) def get(self, project_name, sha): project_cache = self.projects.get(project_name, {}) @@ -1200,7 +1208,12 @@ class GithubConnection(BaseConnection): github = self.getGithubClient(project_name) owner, project = project_name.split('/') repo = github.repository(owner, project) - for pr in repo.pull_requests(state='open'): + for pr in repo.pull_requests(state='open', + # We sort by updated from oldest to newest + # as that will prefer more recently + # PRs in our LRU cache. + sort='updated', + direction='asc'): pr_dict = pr.as_dict() self._sha_pr_cache.update(project_name, pr_dict) if pr.head.sha != sha: