Look for depends-on lines in dependency searches
Prior to this change we looked for the current change/PR's url in any other change/PR's message body. This meant any cross referencing of urls would create further lookups to determine if there was a real dependency there. Restrict this a bit more to require the Depends-On string too when searching to limit the number of spidering queries that must be done. This is particularly useful for the github driver because queries are expensive there and may be rate limited. Change-Id: Ie49fe1a72dc844b14003d942684fd3d2a9478d21
This commit is contained in:
parent
45d96e945f
commit
9453df6936
|
@ -485,6 +485,15 @@ class FakeGithubClient(object):
|
|||
return FakePull(fake_pr)
|
||||
|
||||
def search_issues(self, query):
|
||||
def tokenize(s):
|
||||
# Tokenize with handling for quoted substrings.
|
||||
# Bit hacky and needs PDA, but our current inputs are
|
||||
# constrained enough that this should work.
|
||||
s = s[:-len(" type:pr is:open in:body")]
|
||||
OR_split = [x.strip() for x in s.split('OR')]
|
||||
tokens = [x.strip('"') for x in OR_split]
|
||||
return tokens
|
||||
|
||||
def query_is_sha(s):
|
||||
return re.match(r'[a-z0-9]{40}', s)
|
||||
|
||||
|
@ -495,24 +504,23 @@ class FakeGithubClient(object):
|
|||
|
||||
# Non-SHA queries are of the form:
|
||||
#
|
||||
# '<url> OR <url> OR ... type:pr is:open in:body'
|
||||
# '"Depends-On: <url>" OR "Depends-On: <url>"
|
||||
# OR ... type:pr is:open in:body'
|
||||
#
|
||||
# For the tests is currently enough to simply check for the
|
||||
# existence of the URLs in the PR body.
|
||||
urls = [u for u in (s.strip() for s in query.split())
|
||||
if not re.match(r'(OR|(type|is|in):.+)', u)]
|
||||
|
||||
# existence of the Depends-On strings in the PR body.
|
||||
tokens = tokenize(query)
|
||||
terms = set(tokens)
|
||||
results = []
|
||||
for pr in self._data.pull_requests.values():
|
||||
if not pr.body:
|
||||
body = ""
|
||||
else:
|
||||
body = pr.body
|
||||
for url in urls:
|
||||
if url in body:
|
||||
for term in terms:
|
||||
if term in body:
|
||||
issue = FakeIssue(pr)
|
||||
results.append(FakeIssueSearchResult(issue))
|
||||
# No need to continue checking other URLs
|
||||
break
|
||||
|
||||
return iter(results)
|
||||
|
|
|
@ -49,7 +49,8 @@ class GerritChange(Change):
|
|||
self.branch = data['branch']
|
||||
self.url = data['url']
|
||||
urlparse = urllib.parse.urlparse(connection.baseurl)
|
||||
baseurl = "%s%s" % (urlparse.netloc, urlparse.path)
|
||||
baseurl = "%s://%s%s" % (urlparse.scheme, urlparse.netloc,
|
||||
urlparse.path)
|
||||
baseurl = baseurl.rstrip('/')
|
||||
self.uris = [
|
||||
'%s/%s' % (baseurl, self.number),
|
||||
|
@ -91,7 +92,8 @@ class GerritChange(Change):
|
|||
|
||||
def updateFromHTTP(self, data, connection):
|
||||
urlparse = urllib.parse.urlparse(connection.baseurl)
|
||||
baseurl = "%s%s" % (urlparse.netloc, urlparse.path)
|
||||
baseurl = "%s://%s%s" % (urlparse.scheme, urlparse.netloc,
|
||||
urlparse.path)
|
||||
baseurl = baseurl.rstrip('/')
|
||||
current_revision = data['revisions'][data['current_revision']]
|
||||
if self.patchset is None:
|
||||
|
@ -99,7 +101,7 @@ class GerritChange(Change):
|
|||
self.project = connection.source.getProject(data['project'])
|
||||
self.id = data['change_id']
|
||||
self.branch = data['branch']
|
||||
self.url = '%s://%s/%s' % (urlparse.scheme, baseurl, self.number)
|
||||
self.url = '%s/%s' % (baseurl, self.number)
|
||||
self.uris = [
|
||||
'%s/%s' % (baseurl, self.number),
|
||||
'%s/#/c/%s' % (baseurl, self.number),
|
||||
|
|
|
@ -84,7 +84,7 @@ class GerritSource(BaseSource):
|
|||
return changes
|
||||
queries = set()
|
||||
for uri in change.uris:
|
||||
queries.add('message:{%s}' % uri)
|
||||
queries.add('message:{Depends-On: %s}' % uri)
|
||||
query = '(' + ' OR '.join(queries) + ')'
|
||||
results = self.connection.simpleQuery(query)
|
||||
seen = set()
|
||||
|
|
|
@ -1052,7 +1052,8 @@ class GithubConnection(BaseConnection):
|
|||
change.url = self.getPullUrl(
|
||||
event.project_name, event.change_number)
|
||||
change.uris = [
|
||||
'%s/%s/pull/%s' % (self.server, project, change.number),
|
||||
'https://%s/%s/pull/%s' % (
|
||||
self.server, project, change.number),
|
||||
]
|
||||
change.source_event = event
|
||||
change.is_current_patchset = (change.pr.get('head').get('sha') ==
|
||||
|
@ -1159,7 +1160,11 @@ class GithubConnection(BaseConnection):
|
|||
installation_projects.add(project_name)
|
||||
|
||||
keys = set()
|
||||
pattern = ' OR '.join(change.uris)
|
||||
# TODO: Max of 5 OR operators can be used per query and
|
||||
# query can be max of 256 characters long
|
||||
# If making changes to this pattern you may need to update
|
||||
# tests/fakegithub.py
|
||||
pattern = ' OR '.join(['"Depends-On: %s"' % x for x in change.uris])
|
||||
query = '%s type:pr is:open in:body' % pattern
|
||||
# Repeat the search for each installation id (project)
|
||||
for installation_project in installation_projects:
|
||||
|
@ -1247,8 +1252,8 @@ class GithubConnection(BaseConnection):
|
|||
change.pr.get('updated_at'))
|
||||
change.url = change.pr.get('url')
|
||||
change.uris = [
|
||||
'%s/%s/pull/%s' % (self.server, change.project.name,
|
||||
change.number),
|
||||
'https://%s/%s/pull/%s' % (
|
||||
self.server, change.project.name, change.number),
|
||||
]
|
||||
|
||||
if self.sched:
|
||||
|
|
Loading…
Reference in New Issue