zuul/zuul/driver/gitlab/gitlabsource.py
James E. Blair df220cd4d6 Populate missing change cache entries
The drivers are expected to populate the change cache before
passing trigger events to the scheduler so that all the difficult
work is done outside the main loop.  Further, the cache cleanup
is designed to accomodate this so that events in-flight don't have
their change cache entries removed early.

However, at several points since moving the change cache into ZK,
programming errors have caused us to encounter enqueued changes
without entries in the cache.  This usually causes Zuul to abort
pipeline processing and is unrecoverable.

We should continue to address all incidences of those since they
represent Zuul not working as designed.  However, it would be nice
if Zuul was able to recover from this.

To that end, this change allows missing changes to be added to the
change cache.

That is primarily accomplished by adjusting the Source.getChange
method to accept a ChangeKey instead of an Event.  Events are only
available when the triggering event happens, whereas a ChangeKey
is available when loading the pipeline state.

A ChangeKey represents the minimal distinguishing characteristics
of a change, and so can be used in all cases.  Some drivers obtain
extra information from events, so we still pass it into the getChange
method if available, but it's entirely optional -- we should still
get a workable Change object whether or not it's supplied.

Ref (and derived: Branch, Tag) objects currently only store their
newrev attribute in the ChangeKey, however we need to be able to
create Ref objects with an oldrev as well.  Since the old and new
revs of a Ref are not inherent to the ref but rather the generating
event, we can't get that from the source system.  So we need to
extend the ChangeKey object to include that.  Adding an extra
attribute is troublesome since the ChangeKey is not a ZKObject and
therefore doesn't have access to the model api version.  However,
it's not too much of a stretch to say that the "revision" field
(which like all ChangeKey fileds is driver-dependent) should include
the old and new revs.  Therefore, in these cases the field is
upgraded in a backwards compatible way to include old and newrev
in the standard "old..new" git encoding format.  We also need to
support "None" since that is a valid value in Zuul.

So that we can continue to identify cache errors, any time we encounter
a change key that is not in the cache and we also don't have an
event object, we log an error.

Almost all of this commit is the refactor to accept change keys
instead of events in getChange.  The functional change to populate
the cache if it's missing basically consists of just removing
getChangeByKey and replacing it with getChange.  A test which deletes
the cache midway through is added.

Change-Id: I4252bea6430cd434dbfaacd583db584cc796dfaa
2022-02-17 13:14:23 -08:00

177 lines
6.0 KiB
Python

# Copyright 2019 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import re
import logging
import urllib
from zuul.model import Project
from zuul.source import BaseSource
from zuul.driver.gitlab.gitlabmodel import GitlabRefFilter
from zuul.driver.util import scalar_or_list, to_list
from zuul.zk.change_cache import ChangeKey
class GitlabSource(BaseSource):
name = 'gitlab'
log = logging.getLogger("zuul.source.GitlabSource")
def __init__(self, driver, connection, config=None):
hostname = connection.canonical_hostname
super(GitlabSource, self).__init__(driver, connection,
hostname, config)
self.change_re = re.compile(r"/(.*?)/(?:-/)?merge_requests/(\d+)")
def getRefSha(self, project, ref):
"""Return a sha for a given project ref."""
raise NotImplementedError()
def waitForRefSha(self, project, ref, old_sha=''):
"""Block until a ref shows up in a given project."""
raise NotImplementedError()
def isMerged(self, change, head=None):
"""Determine if change is merge."""
if not change.number:
return True
return change.is_merged
def canMerge(self, change, allow_needs, event=None, allow_refresh=False):
"""Determine if change can merge."""
if not change.number:
return True
return self.connection.canMerge(change, allow_needs, event=event)
def postConfig(self):
"""Called after configuration has been processed."""
raise NotImplementedError()
def getChangeKey(self, event):
connection_name = self.connection.connection_name
if event.change_number:
return ChangeKey(connection_name, event.project_name,
'MergeRequest',
str(event.change_number),
str(event.patch_number))
revision = f'{event.oldrev}..{event.newrev}'
if event.ref and event.ref.startswith('refs/tags/'):
tag = event.ref[len('refs/tags/'):]
return ChangeKey(connection_name, event.project_name,
'Tag', tag, revision)
if event.ref and event.ref.startswith('refs/heads/'):
branch = event.ref[len('refs/heads/'):]
return ChangeKey(connection_name, event.project_name,
'Branch', branch, revision)
if event.ref:
return ChangeKey(connection_name, event.project_name,
'Ref', event.ref, revision)
self.log.warning("Unable to format change key for %s" % (self,))
def getChange(self, change_key, refresh=False, event=None):
return self.connection.getChange(change_key, refresh=refresh,
event=event)
def getChangeByURL(self, url, event):
try:
parsed = urllib.parse.urlparse(url)
except ValueError:
return None
m = self.change_re.match(parsed.path)
if not m:
return None
project_name = m.group(1)
try:
num = int(m.group(2))
except ValueError:
return None
mr = self.connection.getMR(project_name, num)
if not mr:
return None
change_key = ChangeKey(self.connection.connection_name, project_name,
'MergeRequest',
str(num), mr['sha'])
change = self.connection._getChange(change_key, event=event)
return change
def getChangesDependingOn(self, change, projects, tenant):
return self.connection.getChangesDependingOn(
change, projects, tenant)
def getCachedChanges(self):
yield from self.connection._change_cache
def getProject(self, name):
p = self.connection.getProject(name)
if not p:
p = Project(name, self)
self.connection.addProject(p)
return p
def getProjectBranches(self, project, tenant, min_ltime=-1):
return self.connection.getProjectBranches(project, tenant, min_ltime)
def getProjectBranchCacheLtime(self):
return self.connection._branch_cache.ltime
def getProjectOpenChanges(self, project):
"""Get the open changes for a project."""
raise NotImplementedError()
def updateChange(self, change, history=None):
"""Update information for a change."""
raise NotImplementedError()
def getGitUrl(self, project):
"""Get the git url for a project."""
return self.connection.getGitUrl(project)
def getGitwebUrl(self, project, sha=None):
"""Get the git-web url for a project."""
raise NotImplementedError()
def getRequireFilters(self, config):
f = GitlabRefFilter(
connection_name=self.connection.connection_name,
open=config.get('open'),
merged=config.get('merged'),
approved=config.get('approved'),
labels=to_list(config.get('labels')),
)
return [f]
def getRejectFilters(self, config):
raise NotImplementedError()
def getRefForChange(self, change):
raise NotImplementedError()
def setChangeAttributes(self, change, **attrs):
return self.connection.updateChangeAttributes(change, **attrs)
# Require model
def getRequireSchema():
require = {
'open': bool,
'merged': bool,
'approved': bool,
'labels': scalar_or_list(str)
}
return require
def getRejectSchema():
reject = {}
return reject