Look for depends-on lines in dependency searches

Prior to this change we looked for the current change/PR's url in any
other change/PR's message body. This meant any cross referencing of urls
would create further lookups to determine if there was a real dependency
there. Restrict this a bit more to require the Depends-On string too
when searching to limit the number of spidering queries that must be
done.

This is particularly useful for the github driver because queries are
expensive there and may be rate limited.

Change-Id: Ie49fe1a72dc844b14003d942684fd3d2a9478d21
This commit is contained in:
Clark Boylan 2019-12-05 16:58:44 -08:00
parent 45d96e945f
commit 9453df6936
4 changed files with 31 additions and 16 deletions

View File

@ -485,6 +485,15 @@ class FakeGithubClient(object):
return FakePull(fake_pr)
def search_issues(self, query):
def tokenize(s):
# Tokenize with handling for quoted substrings.
# Bit hacky and needs PDA, but our current inputs are
# constrained enough that this should work.
s = s[:-len(" type:pr is:open in:body")]
OR_split = [x.strip() for x in s.split('OR')]
tokens = [x.strip('"') for x in OR_split]
return tokens
def query_is_sha(s):
return re.match(r'[a-z0-9]{40}', s)
@ -495,24 +504,23 @@ class FakeGithubClient(object):
# Non-SHA queries are of the form:
#
# '<url> OR <url> OR ... type:pr is:open in:body'
# '"Depends-On: <url>" OR "Depends-On: <url>"
# OR ... type:pr is:open in:body'
#
# For the tests is currently enough to simply check for the
# existence of the URLs in the PR body.
urls = [u for u in (s.strip() for s in query.split())
if not re.match(r'(OR|(type|is|in):.+)', u)]
# existence of the Depends-On strings in the PR body.
tokens = tokenize(query)
terms = set(tokens)
results = []
for pr in self._data.pull_requests.values():
if not pr.body:
body = ""
else:
body = pr.body
for url in urls:
if url in body:
for term in terms:
if term in body:
issue = FakeIssue(pr)
results.append(FakeIssueSearchResult(issue))
# No need to continue checking other URLs
break
return iter(results)

View File

@ -49,7 +49,8 @@ class GerritChange(Change):
self.branch = data['branch']
self.url = data['url']
urlparse = urllib.parse.urlparse(connection.baseurl)
baseurl = "%s%s" % (urlparse.netloc, urlparse.path)
baseurl = "%s://%s%s" % (urlparse.scheme, urlparse.netloc,
urlparse.path)
baseurl = baseurl.rstrip('/')
self.uris = [
'%s/%s' % (baseurl, self.number),
@ -91,7 +92,8 @@ class GerritChange(Change):
def updateFromHTTP(self, data, connection):
urlparse = urllib.parse.urlparse(connection.baseurl)
baseurl = "%s%s" % (urlparse.netloc, urlparse.path)
baseurl = "%s://%s%s" % (urlparse.scheme, urlparse.netloc,
urlparse.path)
baseurl = baseurl.rstrip('/')
current_revision = data['revisions'][data['current_revision']]
if self.patchset is None:
@ -99,7 +101,7 @@ class GerritChange(Change):
self.project = connection.source.getProject(data['project'])
self.id = data['change_id']
self.branch = data['branch']
self.url = '%s://%s/%s' % (urlparse.scheme, baseurl, self.number)
self.url = '%s/%s' % (baseurl, self.number)
self.uris = [
'%s/%s' % (baseurl, self.number),
'%s/#/c/%s' % (baseurl, self.number),

View File

@ -84,7 +84,7 @@ class GerritSource(BaseSource):
return changes
queries = set()
for uri in change.uris:
queries.add('message:{%s}' % uri)
queries.add('message:{Depends-On: %s}' % uri)
query = '(' + ' OR '.join(queries) + ')'
results = self.connection.simpleQuery(query)
seen = set()

View File

@ -1052,7 +1052,8 @@ class GithubConnection(BaseConnection):
change.url = self.getPullUrl(
event.project_name, event.change_number)
change.uris = [
'%s/%s/pull/%s' % (self.server, project, change.number),
'https://%s/%s/pull/%s' % (
self.server, project, change.number),
]
change.source_event = event
change.is_current_patchset = (change.pr.get('head').get('sha') ==
@ -1159,7 +1160,11 @@ class GithubConnection(BaseConnection):
installation_projects.add(project_name)
keys = set()
pattern = ' OR '.join(change.uris)
# TODO: Max of 5 OR operators can be used per query and
# query can be max of 256 characters long
# If making changes to this pattern you may need to update
# tests/fakegithub.py
pattern = ' OR '.join(['"Depends-On: %s"' % x for x in change.uris])
query = '%s type:pr is:open in:body' % pattern
# Repeat the search for each installation id (project)
for installation_project in installation_projects:
@ -1247,8 +1252,8 @@ class GithubConnection(BaseConnection):
change.pr.get('updated_at'))
change.url = change.pr.get('url')
change.uris = [
'%s/%s/pull/%s' % (self.server, change.project.name,
change.number),
'https://%s/%s/pull/%s' % (
self.server, change.project.name, change.number),
]
if self.sched: