Switch getPullBySha to using the search api

We currently cache the sha to pull request mapping locally in order to
be able to map status events to pull requests. This cache is not
effective in some cases. For instance on merge a pull request changes
its sha so it's not known by the cache. Some projects like ansible run
post merge jobs on a pr which in turn emit status events for the now
unknown sha's. This invalidates the cache and we need to sweep through
all pull requests of the project to find the correct one. This can
better be solved by using the search api.

However the search api is more restricted and allows 30 requests per
minute per installation. Based on local testing it's hard but not
impossible to reach the limit so we need to handle it
gracefully. Luckily we get all information we need to handle it when
we reached the rate limit.

Change-Id: Idb6c64caa9f948111a58135754174d898ca1528c
This commit is contained in:
Tobias Henkel 2019-06-14 22:17:05 +02:00
parent 4a053ab53a
commit a3e4e83974
No known key found for this signature in database
GPG Key ID: 03750DEC158E5FA2
2 changed files with 25 additions and 22 deletions

View File

@ -283,6 +283,10 @@ class FakeIssue(object):
def pull_request(self):
return FakePull(self._fake_pull_request)
@property
def number(self):
return self._fake_pull_request.number
class FakeFile(object):
def __init__(self, filename):
@ -449,6 +453,14 @@ class FakeGithubClient(object):
def tokenize(s):
return re.findall(r'[\w]+', s)
def query_is_sha(s):
return re.match(r'[a-z0-9]{40}', s)
if query_is_sha(query):
return (FakeIssueSearchResult(FakeIssue(pr))
for pr in self._data.pull_requests.values()
if pr.head_sha == query)
parts = tokenize(query)
terms = set()
results = []
@ -471,4 +483,4 @@ class FakeGithubClient(object):
issue = FakeIssue(pr)
results.append(FakeIssueSearchResult(issue))
return results
return iter(results)

View File

@ -1280,7 +1280,7 @@ class GithubConnection(BaseConnection):
def getPull(self, project_name, number, event=None):
log = get_annotated_logger(self.log, event)
github = self.getGithubClient(project_name)
github = self.getGithubClient(project_name, zuul_event_id=event)
owner, proj = project_name.split('/')
for retry in range(5):
try:
@ -1344,6 +1344,8 @@ class GithubConnection(BaseConnection):
def getPullBySha(self, sha, project_name, event):
log = get_annotated_logger(self.log, event)
# Serve from the cache if existing
cached_pr_numbers = self._sha_pr_cache.get(project_name, sha)
if len(cached_pr_numbers) > 1:
raise Exception('Multiple pulls found with head sha %s' % sha)
@ -1352,31 +1354,20 @@ class GithubConnection(BaseConnection):
pr_body, pr_obj = self.getPull(project_name, pr, event)
return pr_body
pulls = []
github = self.getGithubClient(project_name)
owner, repository = project_name.split('/')
repo = github.repository(owner, repository)
for pr in repo.pull_requests(state='open',
# We sort by updated from oldest to newest
# as that will prefer more recently
# PRs in our LRU cache.
sort='updated',
direction='asc'):
pr_dict = pr.as_dict()
self._sha_pr_cache.update(project_name, pr_dict)
if pr.head.sha != sha:
continue
if pr_dict in pulls:
continue
pulls.append(pr_dict)
github = self.getGithubClient(project_name, zuul_event_id=event)
issues = list(github.search_issues(sha))
log.debug('Got PR on project %s for sha %s', project_name, sha)
if len(pulls) > 1:
if len(issues) > 1:
raise Exception('Multiple pulls found with head sha %s' % sha)
if len(pulls) == 0:
if len(issues) == 0:
return None
return pulls.pop()
pr_body, pr_obj = self.getPull(
project_name, issues.pop().issue.number, event)
self._sha_pr_cache.update(project_name, pr_body)
return pr_body
def getPullReviews(self, pr_obj, project, number, event):
log = get_annotated_logger(self.log, event)