Support cross-source dependencies

Additional tests and docs in later patches.

Change-Id: I3b86a1e3dd507fa5e584680fb6c86d35f9ff3e23
Story: 2001334
Task: 5885
This commit is contained in:
James E. Blair
2018-01-02 14:20:17 -08:00
parent 1c2023c108
commit 0e4c791c7b
22 changed files with 429 additions and 125 deletions

View File

@@ -442,8 +442,19 @@ class GerritConnection(BaseConnection):
# In case this change is already in the history we have a
# cyclic dependency and don't need to update ourselves again
# as this gets done in a previous frame of the call stack.
# NOTE(jeblair): I don't think it's possible to hit this case
# anymore as all paths hit the change cache first.
# NOTE(jeblair): The only case where this can still be hit is
# when we get an event for a change with no associated
# patchset; for instance, when the gerrit topic is changed.
# In that case, we will update change 1234,None, which will be
# inserted into the cache as its own entry, but then we will
# resolve the patchset before adding it to the history list,
# then if there are dependencies, we can walk down and then
# back up to the version of this change with a patchset which
# will match the history list but will have bypassed the
# change cache because the previous object had a patchset of
# None. All paths hit the change cache first. To be able to
# drop history, we need to resolve the patchset on events with
# no patchsets before adding the entry to the change cache.
if (history and change.number and change.patchset and
(change.number, change.patchset) in history):
self.log.debug("Change %s is in history" % (change,))
@@ -461,6 +472,11 @@ class GerritConnection(BaseConnection):
change.project = self.source.getProject(data['project'])
change.branch = data['branch']
change.url = data['url']
change.uris = [
'%s/%s' % (self.server, change.number),
'%s/#/c/%s' % (self.server, change.number),
]
max_ps = 0
files = []
for ps in data['patchSets']:
@@ -481,6 +497,7 @@ class GerritConnection(BaseConnection):
change.open = data['open']
change.status = data['status']
change.owner = data['owner']
change.message = data['commitMessage']
if change.is_merged:
# This change is merged, so we don't need to look any further
@@ -494,7 +511,8 @@ class GerritConnection(BaseConnection):
history = history[:]
history.append((change.number, change.patchset))
needs_changes = []
needs_changes = set()
git_needs_changes = []
if 'dependsOn' in data:
parts = data['dependsOn'][0]['ref'].split('/')
dep_num, dep_ps = parts[3], parts[4]
@@ -505,8 +523,11 @@ class GerritConnection(BaseConnection):
# already merged. So even if it is "ABANDONED", we should not
# ignore it.
if (not dep.is_merged) and dep not in needs_changes:
needs_changes.append(dep)
git_needs_changes.append(dep)
needs_changes.add(dep)
change.git_needs_changes = git_needs_changes
compat_needs_changes = []
for record in self._getDependsOnFromCommit(data['commitMessage'],
change):
dep_num = record['number']
@@ -516,10 +537,12 @@ class GerritConnection(BaseConnection):
(change, dep_num, dep_ps))
dep = self._getChange(dep_num, dep_ps, history=history)
if dep.open and dep not in needs_changes:
needs_changes.append(dep)
change.needs_changes = needs_changes
compat_needs_changes.append(dep)
needs_changes.add(dep)
change.compat_needs_changes = compat_needs_changes
needed_by_changes = []
needed_by_changes = set()
git_needed_by_changes = []
if 'neededBy' in data:
for needed in data['neededBy']:
parts = needed['ref'].split('/')
@@ -527,9 +550,13 @@ class GerritConnection(BaseConnection):
self.log.debug("Updating %s: Getting git-needed change %s,%s" %
(change, dep_num, dep_ps))
dep = self._getChange(dep_num, dep_ps, history=history)
if dep.open and dep.is_current_patchset:
needed_by_changes.append(dep)
if (dep.open and dep.is_current_patchset and
dep not in needed_by_changes):
git_needed_by_changes.append(dep)
needed_by_changes.add(dep)
change.git_needed_by_changes = git_needed_by_changes
compat_needed_by_changes = []
for record in self._getNeededByFromCommit(data['id'], change):
dep_num = record['number']
dep_ps = record['currentPatchSet']['number']
@@ -543,9 +570,13 @@ class GerritConnection(BaseConnection):
refresh = (dep_num, dep_ps) not in history
dep = self._getChange(
dep_num, dep_ps, refresh=refresh, history=history)
if dep.open and dep.is_current_patchset:
needed_by_changes.append(dep)
change.needed_by_changes = needed_by_changes
if (dep.open and dep.is_current_patchset
and dep not in needed_by_changes):
compat_needed_by_changes.append(dep)
needed_by_changes.add(dep)
change.compat_needed_by_changes = compat_needed_by_changes
self.sched.onChangeUpdated(change)
return change

View File

@@ -12,12 +12,15 @@
# License for the specific language governing permissions and limitations
# under the License.
import re
import urllib
import logging
import voluptuous as vs
from zuul.source import BaseSource
from zuul.model import Project
from zuul.driver.gerrit.gerritmodel import GerritRefFilter
from zuul.driver.util import scalar_or_list, to_list
from zuul.lib.dependson import find_dependency_headers
class GerritSource(BaseSource):
@@ -44,6 +47,59 @@ class GerritSource(BaseSource):
def getChange(self, event, refresh=False):
return self.connection.getChange(event, refresh)
change_re = re.compile(r"/(\#\/c\/)?(\d+)[\w]*")
def getChangeByURL(self, url):
try:
parsed = urllib.parse.urlparse(url)
except ValueError:
return None
m = self.change_re.match(parsed.path)
if not m:
return None
try:
change_no = int(m.group(2))
except ValueError:
return None
query = "change:%s" % (change_no,)
results = self.connection.simpleQuery(query)
if not results:
return None
change = self.connection._getChange(
results[0]['number'], results[0]['currentPatchSet']['number'])
return change
def getChangesDependingOn(self, change, projects):
queries = set()
for uri in change.uris:
queries.add('message:%s' % uri)
query = '(' + ' OR '.join(queries) + ')'
results = self.connection.simpleQuery(query)
seen = set()
changes = []
for result in results:
for match in find_dependency_headers(result['commitMessage']):
found = False
for uri in change.uris:
if uri in match:
found = True
break
if not found:
continue
key = (result['number'], result['currentPatchSet']['number'])
if key in seen:
continue
seen.add(key)
change = self.connection._getChange(
result['number'], result['currentPatchSet']['number'])
changes.append(change)
return changes
def getCachedChanges(self):
for x in self.connection._change_cache.values():
for y in x.values():
yield y
def getProject(self, name):
p = self.connection.getProject(name)
if not p:

View File

@@ -38,6 +38,15 @@ class GitSource(BaseSource):
def getChange(self, event, refresh=False):
return self.connection.getChange(event, refresh)
def getChangeByURL(self, url):
return None
def getChangesDependingOn(self, change, projects):
return []
def getCachedChanges(self):
return []
def getProject(self, name):
p = self.connection.getProject(name)
if not p:

View File

@@ -658,6 +658,9 @@ class GithubConnection(BaseConnection):
change = self._getChange(project, event.change_number,
event.patch_number, refresh=refresh)
change.url = event.change_url
change.uris = [
'%s/%s/pull/%s' % (self.server, project, change.number),
]
change.updated_at = self._ghTimestampToDate(event.updated_at)
change.source_event = event
change.is_current_patchset = (change.pr.get('head').get('sha') ==
@@ -699,58 +702,72 @@ class GithubConnection(BaseConnection):
raise
return change
def _getDependsOnFromPR(self, body):
prs = []
seen = set()
def getChangesDependingOn(self, change, projects):
changes = []
if not change.uris:
return changes
for match in self.depends_on_re.findall(body):
if match in seen:
self.log.debug("Ignoring duplicate Depends-On: %s" % (match,))
continue
seen.add(match)
# Get the github url
url = match.rsplit()[-1]
# break it into the parts we need
_, org, proj, _, num = url.rsplit('/', 4)
# Get a pull object so we can get the head sha
pull = self.getPull('%s/%s' % (org, proj), int(num))
prs.append(pull)
# Get a list of projects with unique installation ids
installation_ids = set()
installation_projects = set()
return prs
if projects:
# We only need to find changes in projects in the supplied
# ChangeQueue. Find all of the github installations for
# all of those projects, and search using each of them, so
# that if we get the right results based on the
# permissions granted to each of the installations. The
# common case for this is likely to be just one
# installation -- change queues aren't likely to span more
# than one installation.
for project in projects:
installation_id = self.installation_map.get(project)
if installation_id not in installation_ids:
installation_ids.add(installation_id)
installation_projects.add(project)
else:
# We aren't in the context of a change queue and we just
# need to query all installations. This currently only
# happens if certain features of the zuul trigger are
# used; generally it should be avoided.
for project, installation_id in self.installation_map.items():
if installation_id not in installation_ids:
installation_ids.add(installation_id)
installation_projects.add(project)
def _getNeededByFromPR(self, change):
prs = []
seen = set()
# This shouldn't return duplicate issues, but code as if it could
# This leaves off the protocol, but looks for the specific GitHub
# hostname, the org/project, and the pull request number.
pattern = 'Depends-On %s/%s/pull/%s' % (self.server,
change.project.name,
change.number)
keys = set()
pattern = ' OR '.join(change.uris)
query = '%s type:pr is:open in:body' % pattern
# FIXME(tobiash): find a way to query this for different installations
github = self.getGithubClient(change.project.name)
for issue in github.search_issues(query=query):
pr = issue.issue.pull_request().as_dict()
if not pr.get('url'):
continue
if issue in seen:
continue
# the issue provides no good description of the project :\
org, proj, _, num = pr.get('url').split('/')[-4:]
self.log.debug("Found PR %s/%s/%s needs %s/%s" %
(org, proj, num, change.project.name,
change.number))
prs.append(pr)
seen.add(issue)
# Repeat the search for each installation id (project)
for installation_project in installation_projects:
github = self.getGithubClient(installation_project)
for issue in github.search_issues(query=query):
pr = issue.issue.pull_request().as_dict()
if not pr.get('url'):
continue
# the issue provides no good description of the project :\
org, proj, _, num = pr.get('url').split('/')[-4:]
proj = pr.get('base').get('repo').get('full_name')
sha = pr.get('head').get('sha')
key = (proj, num, sha)
if key in keys:
continue
self.log.debug("Found PR %s/%s needs %s/%s" %
(proj, num, change.project.name,
change.number))
keys.add(key)
self.log.debug("Ran search issues: %s", query)
log_rate_limit(self.log, github)
self.log.debug("Ran search issues: %s", query)
log_rate_limit(self.log, github)
return prs
for key in keys:
(proj, num, sha) = key
project = self.source.getProject(proj)
change = self._getChange(project, int(num), patchset=sha)
changes.append(change)
return changes
def _updateChange(self, change, history=None):
# If this change is already in the history, we have a cyclic
# dependency loop and we do not need to update again, since it
# was done in a previous frame.
@@ -770,10 +787,8 @@ class GithubConnection(BaseConnection):
change.reviews = self.getPullReviews(change.project,
change.number)
change.labels = change.pr.get('labels')
change.body = change.pr.get('body')
# ensure body is at least an empty string
if not change.body:
change.body = ''
# ensure message is at least an empty string
change.message = change.pr.get('body') or ''
if history is None:
history = []
@@ -781,38 +796,7 @@ class GithubConnection(BaseConnection):
history = history[:]
history.append((change.project.name, change.number))
needs_changes = []
# Get all the PRs this may depend on
for pr in self._getDependsOnFromPR(change.body):
proj = pr.get('base').get('repo').get('full_name')
pull = pr.get('number')
self.log.debug("Updating %s: Getting dependent "
"pull request %s/%s" %
(change, proj, pull))
project = self.source.getProject(proj)
dep = self._getChange(project, pull,
patchset=pr.get('head').get('sha'),
history=history)
if (not dep.is_merged) and dep not in needs_changes:
needs_changes.append(dep)
change.needs_changes = needs_changes
needed_by_changes = []
for pr in self._getNeededByFromPR(change):
proj = pr.get('base').get('repo').get('full_name')
pull = pr.get('number')
self.log.debug("Updating %s: Getting needed "
"pull request %s/%s" %
(change, proj, pull))
project = self.source.getProject(proj)
dep = self._getChange(project, pull,
patchset=pr.get('head').get('sha'),
history=history)
if not dep.is_merged:
needed_by_changes.append(dep)
change.needed_by_changes = needed_by_changes
self.sched.onChangeUpdated(change)
return change

View File

@@ -37,7 +37,8 @@ class PullRequest(Change):
self.labels = []
def isUpdateOf(self, other):
if (hasattr(other, 'number') and self.number == other.number and
if (self.project == other.project and
hasattr(other, 'number') and self.number == other.number and
hasattr(other, 'patchset') and self.patchset != other.patchset and
hasattr(other, 'updated_at') and
self.updated_at > other.updated_at):

View File

@@ -12,6 +12,8 @@
# License for the specific language governing permissions and limitations
# under the License.
import re
import urllib
import logging
import time
import voluptuous as v
@@ -61,6 +63,38 @@ class GithubSource(BaseSource):
def getChange(self, event, refresh=False):
return self.connection.getChange(event, refresh)
change_re = re.compile(r"/(.*?)/(.*?)/pull/(\d+)[\w]*")
def getChangeByURL(self, url):
try:
parsed = urllib.parse.urlparse(url)
except ValueError:
return None
m = self.change_re.match(parsed.path)
if not m:
return None
org = m.group(1)
proj = m.group(2)
try:
num = int(m.group(3))
except ValueError:
return None
pull = self.connection.getPull('%s/%s' % (org, proj), int(num))
if not pull:
return None
proj = pull.get('base').get('repo').get('full_name')
project = self.getProject(proj)
change = self.connection._getChange(
project, num,
patchset=pull.get('head').get('sha'))
return change
def getChangesDependingOn(self, change, projects):
return self.connection.getChangesDependingOn(change, projects)
def getCachedChanges(self):
return self.connection._change_cache.values()
def getProject(self, name):
p = self.connection.getProject(name)
if not p:

View File

@@ -90,7 +90,18 @@ class ZuulDriver(Driver, TriggerInterface):
if not hasattr(change, 'needed_by_changes'):
self.log.debug(" %s does not support dependencies" % type(change))
return
for needs in change.needed_by_changes:
# This is very inefficient, especially on systems with large
# numbers of github installations. This can be improved later
# with persistent storage of dependency information.
needed_by_changes = set(change.needed_by_changes)
for source in self.sched.connections.getSources():
self.log.debug(" Checking source: %s", source)
needed_by_changes.update(
source.getChangesDependingOn(change, None))
self.log.debug(" Following changes: %s", needed_by_changes)
for needs in needed_by_changes:
self._createParentChangeEnqueuedEvent(needs, pipeline)
def _createParentChangeEnqueuedEvent(self, change, pipeline):