Implement a cache of github change objects

This adds a cache of github change objects (PullRequests with associated
data). As an event comes in, the cache is checked for a matching change,
and the data is updated. If no change exists a new one is created and
"updated". When the change queue is processed, the change in question is
pulled from the cache without updating it, saving numerous API calls.
As events come in, the very same object is updated with new data, so
that when the change queue is processed it has the latest known data for
a given change.

This will drasticaly reduce the number of API calls that were made via
the github API, as well as set things up nicely for switching over to
GraphQL as it becomes possible.

Change-Id: If99505c7bcfebe0916eeb8e4ccaa47cf97f2cc9a
Story: 2000774
Task: 4610
This commit is contained in:
Jesse Keating 2017-06-08 15:08:27 -07:00
parent 9b019d8c03
commit 61040e7ccb
3 changed files with 50 additions and 41 deletions

View File

@ -949,7 +949,8 @@ class FakeGithubConnection(githubconnection.GithubConnection):
'repo': {
'full_name': pr.project
}
}
},
'files': pr.files
}
return data
@ -960,10 +961,6 @@ class FakeGithubConnection(githubconnection.GithubConnection):
pr = prs[0]
return self.getPull(pr.project, pr.number)
def getPullFileNames(self, project, number):
pr = self.pull_requests[number - 1]
return pr.files
def _getPullReviews(self, owner, project, number):
pr = self.pull_requests[number - 1]
return pr.reviews

View File

@ -118,6 +118,12 @@ class GithubWebhookListener():
event = None
if event:
if event.change_number:
project = self.connection.source.getProject(event.project_name)
self.connection._getChange(project,
event.change_number,
event.patch_number,
refresh=True)
event.project_hostname = self.connection.canonical_hostname
self.connection.logEvent(event)
self.connection.sched.addEvent(event)
@ -463,28 +469,20 @@ class GithubConnection(BaseConnection):
if change not in relevant:
del self._change_cache[key]
def getChange(self, event):
def getChange(self, event, refresh=False):
"""Get the change representing an event."""
project = self.source.getProject(event.project_name)
if event.change_number:
change = PullRequest(event.project_name)
change.project = project
change.number = event.change_number
change = self._getChange(project, event.change_number,
event.patch_number, refresh=refresh)
change.refspec = event.refspec
change.branch = event.branch
change.url = event.change_url
change.updated_at = self._ghTimestampToDate(event.updated_at)
change.patchset = event.patch_number
change.files = self.getPullFileNames(project, change.number)
change.title = event.title
change.status = self._get_statuses(project, event.patch_number)
change.reviews = self.getPullReviews(project, change.number)
change.source_event = event
change.open = self.getPullOpen(event.project_name, change.number)
change.is_current_patchset = self.getIsCurrent(event.project_name,
change.number,
event.patch_number)
change.is_current_patchset = (change.pr.get('head').get('sha') ==
event.patch_number)
elif event.ref:
change = Ref(project)
change.ref = event.ref
@ -497,6 +495,38 @@ class GithubConnection(BaseConnection):
change = Ref(project)
return change
def _getChange(self, project, number, patchset, refresh=False):
key = '%s/%s/%s' % (project.name, number, patchset)
change = self._change_cache.get(key)
if change and not refresh:
return change
if not change:
change = PullRequest(project.name)
change.project = project
change.number = number
change.patchset = patchset
self._change_cache[key] = change
try:
self._updateChange(change)
except Exception:
if key in self._change_cache:
del self._change_cache[key]
raise
return change
def _updateChange(self, change):
self.log.info("Updating %s" % (change,))
change.pr = self.getPull(change.project.name, change.number)
change.files = change.pr.get('files')
change.title = change.pr.get('title')
change.open = change.pr.get('state') == 'open'
change.status = self._get_statuses(change.project,
change.patchset)
change.reviews = self.getPullReviews(change.project,
change.number)
return change
def getGitUrl(self, project):
if self.git_ssh_key:
return 'ssh://git@%s/%s.git' % (self.git_host, project)
@ -535,7 +565,9 @@ class GithubConnection(BaseConnection):
def getPull(self, project_name, number):
github = self.getGithubClient(project_name)
owner, proj = project_name.split('/')
pr = github.pull_request(owner, proj, number).as_dict()
probj = github.pull_request(owner, proj, number)
pr = probj.as_dict()
pr['files'] = [f.filename for f in probj.files()]
log_rate_limit(self.log, github)
return pr
@ -578,14 +610,6 @@ class GithubConnection(BaseConnection):
return None
return pulls.pop()
def getPullFileNames(self, project, number):
github = self.getGithubClient(project)
owner, proj = project.name.split('/')
filenames = [f.filename for f in
github.pull_request(owner, proj, number).files()]
log_rate_limit(self.log, github)
return filenames
def getPullReviews(self, project, number):
owner, proj = project.name.split('/')
@ -723,14 +747,6 @@ class GithubConnection(BaseConnection):
pull_request.remove_label(label)
log_rate_limit(self.log, github)
def getPullOpen(self, project, number):
pr = self.getPull(project, number)
return pr.get('state') == 'open'
def getIsCurrent(self, project, number, sha):
pr = self.getPull(project, number)
return pr.get('head').get('sha') == sha
def getPushedFileNames(self, event):
files = set()
for c in event.commits:

View File

@ -58,8 +58,8 @@ class GithubSource(BaseSource):
"""Called after configuration has been processed."""
pass
def getChange(self, event):
return self.connection.getChange(event)
def getChange(self, event, refresh=False):
return self.connection.getChange(event, refresh)
def getProject(self, name):
p = self.connection.getProject(name)
@ -87,10 +87,6 @@ class GithubSource(BaseSource):
"""Get the git-web url for a project."""
return self.connection.getGitwebUrl(project, sha)
def getPullFiles(self, project, number):
"""Get filenames of the pull request"""
return self.connection.getPullFileNames(project, number)
def _ghTimestampToDate(self, timestamp):
return time.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ')