diff --git a/requirements.txt b/requirements.txt index a7a7779daa..bcfb3c762e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,6 +19,7 @@ sqlalchemy alembic cryptography>=1.6 cachecontrol +cachetools pyjwt iso8601 psutil diff --git a/tests/fakegithub.py b/tests/fakegithub.py index 93d280c442..a074e6adc4 100644 --- a/tests/fakegithub.py +++ b/tests/fakegithub.py @@ -251,7 +251,9 @@ class FakeRepository(object): } return FakeResponse(data) - def pull_requests(self, state=None): + def pull_requests(self, state=None, sort=None, direction=None): + # sort and direction are unused currently, but present to match + # real world call signatures. pulls = [] for pull in self.data.pull_requests.values(): if pull.project != self.name: diff --git a/tests/unit/test_github_driver.py b/tests/unit/test_github_driver.py index 7e5db6bbec..40a6207d5e 100644 --- a/tests/unit/test_github_driver.py +++ b/tests/unit/test_github_driver.py @@ -1336,8 +1336,13 @@ class TestGithubShaCache(BaseTestCase): } cache.update('foo/bar', pr_dict) self.assertEqual(cache.get('foo/bar', '123456'), set({1})) - pr_dict['state'] = 'closed' - cache.update('foo/bar', pr_dict) + + # Create 4096 entries so original falls off. + for x in range(0, 4096): + pr_dict['head']['sha'] = str(x) + cache.update('foo/bar', pr_dict) + cache.get('foo/bar', str(x)) + self.assertEqual(cache.get('foo/bar', '123456'), set()) def testMultiInsert(self): @@ -1382,7 +1387,7 @@ class TestGithubShaCache(BaseTestCase): self.assertEqual(cache.get('bar/foo', '789'), set()) self.assertEqual(cache.get('foo/bar', '789'), set()) - def testNoUpdate(self): + def testClosedPRRemains(self): cache = GithubShaCache() pr_dict = { 'head': { @@ -1392,4 +1397,4 @@ class TestGithubShaCache(BaseTestCase): 'state': 'closed', } cache.update('foo/bar', pr_dict) - self.assertEqual(cache.get('bar/foo', '123456'), set()) + self.assertEqual(cache.get('foo/bar', '123456'), set({1})) diff --git a/zuul/driver/github/githubconnection.py b/zuul/driver/github/githubconnection.py index 04bd5b2d48..94c989e062 100644 --- a/zuul/driver/github/githubconnection.py +++ b/zuul/driver/github/githubconnection.py @@ -29,6 +29,7 @@ import cherrypy import cachecontrol from cachecontrol.cache import DictCache from cachecontrol.heuristics import BaseHeuristic +import cachetools import iso8601 import jwt import requests @@ -75,14 +76,21 @@ class GithubShaCache(object): self.projects = {} def update(self, project_name, pr): - project_cache = self.projects.setdefault(project_name, {}) + project_cache = self.projects.setdefault( + project_name, + # Cache up to 4k shas for each project + # Note we cache the actual sha for a PR and the + # merge_commit_sha so we make this fairly large. + cachetools.LRUCache(4096) + ) sha = pr['head']['sha'] number = pr['number'] cached_prs = project_cache.setdefault(sha, set()) - if pr['state'] == 'open': + cached_prs.add(number) + merge_commit_sha = pr.get('merge_commit_sha') + if merge_commit_sha: + cached_prs = project_cache.setdefault(merge_commit_sha, set()) cached_prs.add(number) - else: - cached_prs.discard(number) def get(self, project_name, sha): project_cache = self.projects.get(project_name, {}) @@ -1200,7 +1208,12 @@ class GithubConnection(BaseConnection): github = self.getGithubClient(project_name) owner, project = project_name.split('/') repo = github.repository(owner, project) - for pr in repo.pull_requests(state='open'): + for pr in repo.pull_requests(state='open', + # We sort by updated from oldest to newest + # as that will prefer more recently + # PRs in our LRU cache. + sort='updated', + direction='asc'): pr_dict = pr.as_dict() self._sha_pr_cache.update(project_name, pr_dict) if pr.head.sha != sha: