This will encourage a multi-scheduler system to distribute the connection event processing threads among the different schedulers. Change-Id: Ifffc08b067873505ccb0bccc589229c1e744b3be
2199 lines
90 KiB
Python
2199 lines
90 KiB
Python
# Copyright 2011 OpenStack, LLC.
|
|
# Copyright 2012 Hewlett-Packard Development Company, L.P.
|
|
# Copyright 2023-2025 Acme Gating, LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import collections
|
|
import copy
|
|
import datetime
|
|
import enum
|
|
import itertools
|
|
import json
|
|
import logging
|
|
import paramiko
|
|
import pprint
|
|
import re
|
|
import re2
|
|
import requests
|
|
import shlex
|
|
import threading
|
|
import time
|
|
import urllib
|
|
import urllib.parse
|
|
import weakref
|
|
|
|
import cachetools
|
|
from typing import Dict, List
|
|
from uuid import uuid4
|
|
|
|
from opentelemetry import trace
|
|
|
|
from zuul import version as zuul_version
|
|
from zuul.connection import (
|
|
BaseConnection,
|
|
BaseThreadPoolEventConnector,
|
|
ZKBranchCacheMixin,
|
|
ZKChangeCacheMixin,
|
|
)
|
|
from zuul.driver.gerrit.auth import FormAuth
|
|
from zuul.driver.gerrit.gcloudauth import GCloudAuth
|
|
from zuul.driver.gerrit.gerritmodel import GerritChange, GerritTriggerEvent
|
|
from zuul.driver.gerrit.gerriteventssh import GerritSSHEventListener
|
|
from zuul.driver.gerrit.gerriteventchecks import GerritChecksPoller
|
|
from zuul.driver.gerrit.gerriteventkafka import GerritKafkaEventListener
|
|
from zuul.driver.gerrit.gerriteventawskinesis import (
|
|
GerritAWSKinesisEventListener,
|
|
)
|
|
from zuul.driver.gerrit.gerriteventgcloudpubsub import (
|
|
GerritGcloudPubsubEventListener,
|
|
)
|
|
from zuul.driver.git.gitwatcher import GitWatcher
|
|
from zuul.lib import tracing
|
|
from zuul.lib.logutil import get_annotated_logger
|
|
from zuul.model import Ref, Tag, Branch, Project
|
|
from zuul.zk.branch_cache import BranchCache, BranchFlag, BranchInfo
|
|
from zuul.zk.change_cache import (
|
|
AbstractChangeCache,
|
|
ChangeKey,
|
|
ConcurrentUpdateError,
|
|
)
|
|
from zuul.zk.event_queues import ConnectionEventQueue
|
|
|
|
# HTTP timeout in seconds
|
|
TIMEOUT = 30
|
|
# SSH connection timeout
|
|
SSH_TIMEOUT = TIMEOUT
|
|
|
|
# commentSizeLimit default set by Gerrit. Gerrit is a bit
|
|
# vague about what this means, it says
|
|
#
|
|
# Comments which exceed this size will be rejected ... Size
|
|
# computation is approximate and may be off by roughly 1% ...
|
|
# Default is 16k
|
|
#
|
|
# This magic number is int((16 << 10) * 0.98). Robot comments
|
|
# are accounted for separately.
|
|
GERRIT_HUMAN_MESSAGE_LIMIT = 16056
|
|
|
|
|
|
class HTTPConflictException(Exception):
|
|
message = "Received response 409"
|
|
|
|
|
|
class HTTPBadRequestException(Exception):
|
|
pass
|
|
|
|
|
|
class HTTPNotFoundException(Exception):
|
|
pass
|
|
|
|
|
|
class GerritEventProcessingException(Exception):
|
|
pass
|
|
|
|
|
|
class GerritChangeCache(AbstractChangeCache):
|
|
log = logging.getLogger("zuul.driver.GerritChangeCache")
|
|
|
|
CHANGE_TYPE_MAP = {
|
|
"Ref": Ref,
|
|
"Tag": Tag,
|
|
"Branch": Branch,
|
|
"GerritChange": GerritChange,
|
|
}
|
|
|
|
|
|
class ChangeNetworkFuture:
|
|
"""A set of changes related by dependencies"""
|
|
def __init__(self, min_ltime):
|
|
self.changes = set()
|
|
self.event = threading.Event()
|
|
self.query_results = {}
|
|
# We will not accept cached data older than this time
|
|
self.min_ltime = min_ltime
|
|
|
|
def add(self, change_key):
|
|
self.changes.add(change_key)
|
|
|
|
def setComplete(self):
|
|
self.event.set()
|
|
|
|
def wait(self):
|
|
self.event.wait()
|
|
|
|
def hasNumber(self, number):
|
|
for change_key in self.changes:
|
|
if number == change_key.stable_id:
|
|
return True
|
|
return False
|
|
|
|
def addQueryResult(self, number, data):
|
|
existing = self.query_results.get(number)
|
|
existing_time = existing and existing.zuul_query_ltime or 0
|
|
if existing_time < data.zuul_query_ltime:
|
|
self.query_results[number] = data
|
|
|
|
def getQueryResult(self, number):
|
|
data = self.query_results.get(number)
|
|
return data
|
|
|
|
def mergeQueryResults(self, future):
|
|
for (number, data) in future.query_results.items():
|
|
if number not in self.query_results:
|
|
self.query_results[number] = data
|
|
else:
|
|
own_data = self.query_results[number]
|
|
if own_data.zuul_query_ltime < data.zuul_query_ltime:
|
|
self.query_results[number] = data
|
|
|
|
|
|
class ChangeNetworkConflict(Exception):
|
|
"""Raised when two threads are found to be walking the same change graph.
|
|
|
|
The associated ChangeNetworkFuture is the winner of the conflict
|
|
and the caller should wait for it.
|
|
|
|
"""
|
|
|
|
def __init__(self, future):
|
|
self.future = future
|
|
|
|
|
|
class ChangeNetworkManager:
|
|
"""Prevent collisions between multiple threads querying changes"""
|
|
|
|
def __init__(self):
|
|
self.lock = threading.Lock()
|
|
self.futures = []
|
|
self.query_locks = weakref.WeakValueDictionary()
|
|
|
|
def getQueryLock(self, number):
|
|
# TODO: verify that WeakValueDictionary.setdefault is atomic;
|
|
# if so, we can remove this internal locking call.
|
|
with self.lock:
|
|
return self.query_locks.setdefault(number, threading.Lock())
|
|
|
|
def getQueryResult(self, number):
|
|
with self.lock:
|
|
for f in self.futures:
|
|
if f.hasNumber(number):
|
|
data = f.getQueryResult(number)
|
|
if data:
|
|
return data
|
|
|
|
def updateQueryResult(self, number, data):
|
|
with self.lock:
|
|
for f in self.futures:
|
|
if f.hasNumber(number):
|
|
f.addQueryResult(number, data)
|
|
|
|
def permissionToProceed(self, future):
|
|
with self.lock:
|
|
for f in self.futures:
|
|
if f is future:
|
|
continue
|
|
if future.changes.intersection(f.changes):
|
|
# The first thread to request permission for a
|
|
# change always succeeds, second or later threads
|
|
# connected to the same network always lose.
|
|
if future in self.futures:
|
|
self.futures.remove(future)
|
|
# Let the winner benefit from any queries the
|
|
# loser has performed.
|
|
f.mergeQueryResults(future)
|
|
raise ChangeNetworkConflict(f)
|
|
# No conflicts
|
|
if future not in self.futures:
|
|
self.futures.append(future)
|
|
return True
|
|
|
|
def setComplete(self, future):
|
|
if future is None:
|
|
return
|
|
with self.lock:
|
|
try:
|
|
self.futures.remove(future)
|
|
except ValueError:
|
|
pass
|
|
future.setComplete()
|
|
|
|
|
|
class GerritChangeData(object):
|
|
"""Compatability layer for SSH/HTTP
|
|
|
|
This class holds the raw data returned from a change query over
|
|
SSH or HTTP. Most of the work of parsing the data and storing it
|
|
on the change is in the gerritmodel.GerritChange class, however
|
|
this does perform a small amount of parsing of dependencies since
|
|
they are handled outside of that class. This provides an API to
|
|
that data independent of the source.
|
|
|
|
"""
|
|
|
|
SSH = 1
|
|
HTTP = 2
|
|
|
|
def __init__(self, fmt, data, related=None, files=None,
|
|
commentable_files=None, zuul_query_ltime=None):
|
|
self.format = fmt
|
|
self.data = data
|
|
self.files = files
|
|
self.commentable_files = commentable_files
|
|
self.zuul_query_ltime = zuul_query_ltime
|
|
self.submit_type = 'MERGE_IF_NECESSARY'
|
|
|
|
if fmt == self.SSH:
|
|
self.parseSSH(data)
|
|
else:
|
|
self.parseHTTP(data)
|
|
if related:
|
|
self.parseRelatedHTTP(data, related)
|
|
|
|
def parseSSH(self, data):
|
|
self.needed_by = []
|
|
self.depends_on = None
|
|
self.message = data['commitMessage']
|
|
self.current_patchset = str(data['currentPatchSet']['number'])
|
|
self.number = str(data['number'])
|
|
self.id = data['id']
|
|
|
|
if 'dependsOn' in data:
|
|
parts = data['dependsOn'][0]['ref'].split('/')
|
|
self.depends_on = (parts[3], parts[4])
|
|
|
|
for needed in data.get('neededBy', []):
|
|
parts = needed['ref'].split('/')
|
|
self.needed_by.append((parts[3], parts[4]))
|
|
|
|
def parseHTTP(self, data):
|
|
rev = data['revisions'][data['current_revision']]
|
|
self.message = rev['commit']['message']
|
|
self.current_patchset = str(rev['_number'])
|
|
self.number = str(data['_number'])
|
|
self.id = data['change_id']
|
|
|
|
def parseRelatedHTTP(self, data, related):
|
|
self.needed_by = []
|
|
self.depends_on = None
|
|
current_rev = data['revisions'][data['current_revision']]
|
|
self.submit_type = data.get('submit_type', self.submit_type)
|
|
for change in related['changes']:
|
|
# For cherry-pick only, allow the dependency to float to
|
|
# the latest revision since Gerrit will not prohibit
|
|
# merges with dependencies on older versions. We also
|
|
# want Zuul to test the latest patchset if possible, since
|
|
# that is what will end up being merged.
|
|
if self.submit_type == 'CHERRY_PICK':
|
|
ps = change['_current_revision_number']
|
|
else:
|
|
ps = change['_revision_number']
|
|
for parent in current_rev['commit']['parents']:
|
|
if change['commit']['commit'] == parent['commit']:
|
|
self.depends_on = (change['_change_number'], ps)
|
|
break
|
|
else:
|
|
self.needed_by.append((change['_change_number'], ps))
|
|
|
|
|
|
class QueryHistory:
|
|
class Query(enum.Enum):
|
|
SEEN = 1 # Not a real query, just that we've seen the change
|
|
CHANGE = 2 # The main change query
|
|
|
|
def __init__(self):
|
|
self.queries = collections.defaultdict(lambda: dict())
|
|
|
|
def getByKey(self, query, change_key):
|
|
if not isinstance(change_key, ChangeKey):
|
|
raise Exception("Must supply a ChangeKey")
|
|
key = (change_key.stable_id, change_key.revision)
|
|
return self.queries[query].get(key)
|
|
|
|
def add(self, query, change):
|
|
if not isinstance(change, GerritChange):
|
|
raise Exception("Must supply a GerritChange")
|
|
key = (change.number, change.patchset)
|
|
self.queries[query][key] = change
|
|
|
|
|
|
class PeekQueue:
|
|
# If we see events whose timestamp is this long since an event
|
|
# we're waiting for, give up the wait. This is a 10 second propogation
|
|
# delay. Replication timeouts are handled separately and are operator
|
|
# configurable.
|
|
timeout = 10
|
|
|
|
def __init__(self, handler, replication_timeout):
|
|
self.queue = collections.deque()
|
|
self.handler = handler
|
|
self.replication_timeout = replication_timeout
|
|
self.change_merged_cache = cachetools.LRUCache(128)
|
|
|
|
def append(self, event):
|
|
self.queue.append(event)
|
|
|
|
def _getProjectAndRef(self, data):
|
|
# We need to match up ref-updated, change-merged, and patchset-created
|
|
# events to various replication events based on project, ref tuples.
|
|
# Unfortunately gerrit events don't use consistent data structures to
|
|
# present this data across the events. Pull the required info out
|
|
# based on the event type with fallback.
|
|
kind = data.get('type')
|
|
if kind == 'ref-updated':
|
|
refupdate = data.get('refUpdate', {})
|
|
return refupdate.get('project'), refupdate.get('refName')
|
|
elif kind == 'patchset-created':
|
|
change = data.get('change', {})
|
|
patchset = data.get('patchSet', {})
|
|
return change.get('project'), patchset.get('ref')
|
|
elif kind == 'change-merged':
|
|
change = data.get('change', {})
|
|
# change-merged events need to match up the branch name to
|
|
# replication events and not the change ref (it is already
|
|
# replicated and captured by patchset-created events).
|
|
return change.get('project'), data.get('refName')
|
|
elif (project := data.get('project')) and (ref := data.get('ref')):
|
|
return project, ref
|
|
else:
|
|
return None, None
|
|
|
|
def run(self):
|
|
if not self.queue:
|
|
return
|
|
|
|
# Try to do two things:
|
|
# Hold events until they have replicated and
|
|
# Attempt to match ref-updated events with change-merged
|
|
# events.
|
|
ref_replication = collections.defaultdict(list)
|
|
ref_updates = {}
|
|
new_event_list = collections.deque()
|
|
for event in self.queue:
|
|
data = event["payload"]
|
|
kind = data.get('type')
|
|
refupdate = data.get('refUpdate', {})
|
|
ref = refupdate.get('refName')
|
|
inserted = False
|
|
if kind == 'ref-replication-scheduled':
|
|
# Note we can get many ref-replication-scheduled events for
|
|
# a single ref-replication-done event. We can also get
|
|
# overlapping ref-replication-scheduled events for the same
|
|
# ref. For this reason we don't use the -done events and
|
|
# instead rely on counting matching pairs of -scheduled and
|
|
# ref-replicated events. When all have paired up or we timeout
|
|
# the related events are considered valid.
|
|
#
|
|
# Replication events don't use the same refUpdate and refName
|
|
# conventions...
|
|
ref = data.get('ref')
|
|
project = data.get('project')
|
|
ref_replication[(project, ref)].append(event)
|
|
elif kind == 'ref-replicated':
|
|
ref = data.get('ref')
|
|
project = data.get('project')
|
|
replication_events = ref_replication[(project, ref)]
|
|
# TODO(clarkb) Is it necessary to wait for success?
|
|
if (len(replication_events) and
|
|
data.get("status") == "succeeded"):
|
|
# Its possible we start listening when things have already
|
|
# started replicating. In that case we'll empty the
|
|
# replication events list early.
|
|
replication_events.pop()
|
|
elif (kind == 'ref-updated' and
|
|
((not ref.startswith('refs/')) or
|
|
ref.startswith('refs/heads/'))):
|
|
# This is a ref-updated event for a branch, we
|
|
# want to find its change-merged event.
|
|
newrev = refupdate.get('newRev')
|
|
if newrev in self.change_merged_cache:
|
|
# As of writing, this should not happen, but if it
|
|
# does it means gerrit has reversed the order of
|
|
# events!
|
|
del self.change_merged_cache[newrev]
|
|
else:
|
|
ref_updates[newrev] = event
|
|
elif kind == 'change-merged':
|
|
newrev = data.get('newRev')
|
|
if newrev in ref_updates:
|
|
# This is a change-merged event that matches a
|
|
# ref-updated event we're interested in.
|
|
other_event = ref_updates.pop(newrev)
|
|
idx = new_event_list.index(other_event)
|
|
# Put our event immediately before the ref-updated event
|
|
new_event_list.insert(idx, event)
|
|
# Give both events the same earlier ltime so that
|
|
# we don't have ltime going backwards.
|
|
event.zuul_event_ltime = other_event.zuul_event_ltime
|
|
inserted = True
|
|
else:
|
|
self.change_merged_cache[newrev] = True
|
|
if not inserted:
|
|
new_event_list.append(event)
|
|
|
|
while new_event_list:
|
|
event = new_event_list.popleft()
|
|
data = event["payload"]
|
|
project, ref = self._getProjectAndRef(data)
|
|
ok = False
|
|
delay = None
|
|
now = time.time()
|
|
time_since_event = now - event["timestamp"]
|
|
|
|
# First check if replication is up to date.
|
|
if self.replication_timeout <= 0:
|
|
# We are not configured to look at Gerrit replication
|
|
# targets. Ignore replication status.
|
|
ok = True
|
|
elif ref and project and ref_replication[(project, ref)]:
|
|
if time_since_event >= self.replication_timeout:
|
|
# Waited long enough for replication
|
|
del ref_replication[(project, ref)]
|
|
ok = True
|
|
else:
|
|
# If replication hasn't completed wait longer
|
|
ok = False
|
|
# Wait max one second for replication to complete.
|
|
delay = min(self.replication_timeout - time_since_event, 1)
|
|
else:
|
|
# We have replicated or don't have replication events
|
|
# corresponding to our project,ref. Proceed.
|
|
ok = True
|
|
if not ok:
|
|
return delay
|
|
|
|
# Now check if ref-updated and change-merged events need to be
|
|
# reordered
|
|
kind = data.get('type')
|
|
if kind == 'ref-updated':
|
|
refupdate = data.get('refUpdate', {})
|
|
newrev = refupdate.get('newRev')
|
|
if newrev in ref_updates:
|
|
# We're waiting on data for this one
|
|
if time_since_event >= self.timeout:
|
|
# It's been 10 seconds since we saw
|
|
# the event, so it may be missing at this
|
|
# point; release it.
|
|
ok = True
|
|
else:
|
|
# Otherwise, we're still waiting
|
|
ok = False
|
|
delay = min(self.timeout - time_since_event, 1)
|
|
else:
|
|
# Not a branch ref-update
|
|
ok = True
|
|
else:
|
|
# Not a ref-update and not waiting for replication
|
|
ok = True
|
|
if not ok:
|
|
return delay
|
|
|
|
self.queue.remove(event)
|
|
self.handler(event)
|
|
|
|
|
|
class GerritEventConnector(BaseThreadPoolEventConnector):
|
|
"""Move events from Gerrit to the scheduler."""
|
|
|
|
IGNORED_EVENTS = (
|
|
'cache-eviction', # evict-cache plugin
|
|
'fetch-ref-replicated',
|
|
'fetch-ref-replication-scheduled',
|
|
'ref-replication-done'
|
|
)
|
|
|
|
log = logging.getLogger("zuul.GerritEventConnector")
|
|
|
|
def __init__(self, connection, replication_timeout):
|
|
super().__init__(connection)
|
|
self.replication_timeout = replication_timeout
|
|
self._peek_queue = PeekQueue(
|
|
self._peekQueueHandler, replication_timeout)
|
|
|
|
def _getEventProcessor(self, event):
|
|
return GerritEventProcessor(self, event).run
|
|
|
|
def _dispatchEvents(self):
|
|
# This is the first half of the event dispatcher. It reads
|
|
# events from the ssh stream event queue and passes them to a
|
|
# concurrent executor for pre-processing.
|
|
|
|
# This overrides the superclass in order to add the peek queue.
|
|
try:
|
|
peek_ack_refs = [r.ack_ref for r in self._peek_queue.queue]
|
|
event_id_offset = max(
|
|
self.event_queue.eventIdFromAckRef(r)
|
|
for r in list(self._events_in_progress) +
|
|
peek_ack_refs)
|
|
except ValueError:
|
|
event_id_offset = None
|
|
|
|
delay = None
|
|
for event in self.event_queue.iter(event_id_offset):
|
|
if self._shouldStop():
|
|
break
|
|
|
|
self._peek_queue.append(event)
|
|
delay = self._peek_queue.run()
|
|
if delay:
|
|
return delay
|
|
return self._peek_queue.run()
|
|
|
|
def _peekQueueHandler(self, event):
|
|
# Called when the peek queue has decided an event should be processed
|
|
processor = self._getEventProcessor(event)
|
|
future = self._thread_pool.submit(processor)
|
|
|
|
# Events are acknowledged in the event forwarder
|
|
# loop after pre-processing. This way we can
|
|
# ensure that no events are lost.
|
|
self._events_in_progress.add(event.ack_ref)
|
|
self._event_forward_queue.append(future)
|
|
|
|
|
|
class GerritEventProcessor:
|
|
tracer = trace.get_tracer("zuul")
|
|
|
|
def __init__(self, connector, connection_event):
|
|
self.connector = connector
|
|
self.connection = connector.connection
|
|
self.zk_client = self.connection.sched.zk_client
|
|
self.connection_event = connection_event
|
|
self.event_span = tracing.restoreSpanContext(
|
|
self.connection_event.get("span_context"))
|
|
logger = logging.getLogger("zuul.GerritEventProcessor")
|
|
self.zuul_event_id = connection_event["zuul_event_id"]
|
|
self.log = get_annotated_logger(logger, self.zuul_event_id)
|
|
self.events = []
|
|
|
|
def run(self):
|
|
if self.connector._stopped:
|
|
return
|
|
|
|
try:
|
|
attributes = {"rel": "GerritEvent"}
|
|
link = trace.Link(self.event_span.get_span_context(),
|
|
attributes=attributes)
|
|
with self.tracer.start_as_current_span(
|
|
"GerritEventProcessing", links=[link]):
|
|
self.events = self._handleEvent(self.connection_event)
|
|
except GerritEventProcessingException as e:
|
|
self.log.warning("Skipping event due to %s", e)
|
|
except Exception:
|
|
self.log.exception("Skipping event due to:")
|
|
return self.events, self.connection_event
|
|
|
|
def _handleEvent(self, connection_event):
|
|
timestamp = connection_event["timestamp"]
|
|
data = connection_event["payload"]
|
|
event = GerritTriggerEvent.fromGerritEventDict(
|
|
data, timestamp, self.connection, self.zuul_event_id)
|
|
event_type = data.get("type")
|
|
# Recheck events as we may have only cared about replication events
|
|
# in preprocessing.
|
|
if not self.connection.shouldProcessEvent(event_type, event, False):
|
|
self.log.debug("Event did not match pre-filters after "
|
|
"preprocessing %s", event)
|
|
return []
|
|
min_change_ltime = self.zk_client.getCurrentLtime()
|
|
|
|
# In order to perform connection hygene actions like those
|
|
# below, the preFilter method must pass relevant events
|
|
# through to get to this point.
|
|
if event.default_branch_changed:
|
|
self.log.debug('Updating default branch for %s to %s',
|
|
event.project_name, event.branch)
|
|
self.connection._branch_cache.setProjectDefaultBranch(
|
|
event.project_name, event.branch)
|
|
if event._accountfield_unknown:
|
|
self.log.warning("Received unrecognized event type '%s' "
|
|
"from Gerrit. Can not get account information." %
|
|
(event.type,))
|
|
if event._branch_ref_update:
|
|
self.connection.clearConnectionCacheOnBranchEvent(event)
|
|
|
|
change = self._getChange(event, min_change_ltime)
|
|
if (change and change.patchset and
|
|
event.change_number and event.patch_number is None):
|
|
event.patch_number = str(change.patchset)
|
|
|
|
return [event]
|
|
|
|
def _getChange(self, event, min_change_ltime):
|
|
# Grab the change if we are managing the project or if it exists in the
|
|
# cache as it may be a dependency
|
|
change = None
|
|
if event.change_number:
|
|
refresh = True
|
|
change_key = self.connection.source.getChangeKey(event)
|
|
change = self.connection._change_cache.get(change_key)
|
|
if change is None:
|
|
refresh = False
|
|
for tenant in self.connection.sched.abide.tenants.values():
|
|
# TODO(fungi): it would be better to have some simple means
|
|
# of inferring the hostname from the connection, or at
|
|
# least split this into separate method arguments, rather
|
|
# than assembling and passing in a baked string.
|
|
if (None, None) != tenant.getProject('/'.join((
|
|
self.connection.canonical_hostname,
|
|
event.project_name))):
|
|
refresh = True
|
|
break
|
|
else:
|
|
# We have a cache entry for this change Get the
|
|
# query ltime for the cache entry; if it's after the
|
|
# event ltime, we don't need to refresh.
|
|
if (change.zuul_query_ltime and
|
|
change.zuul_query_ltime > min_change_ltime):
|
|
refresh = False
|
|
|
|
if refresh:
|
|
# Call _getChange for the side effect of updating the
|
|
# cache. Note that this modifies Change objects outside
|
|
# the main thread.
|
|
# NOTE(jhesketh): Ideally we'd just remove the change from the
|
|
# cache to denote that it needs updating. However the change
|
|
# object is already used by Items and hence BuildSets etc. and
|
|
# we need to update those objects by reference so that they
|
|
# have the correct/new information and also avoid hitting
|
|
# gerrit multiple times.
|
|
change = self.connection._getChange(
|
|
change_key, refresh=True, event=event,
|
|
allow_key_update=True, change=change,
|
|
update_if_older_than=min_change_ltime)
|
|
return change
|
|
|
|
|
|
class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
|
|
driver_name = 'gerrit'
|
|
log = logging.getLogger("zuul.GerritConnection")
|
|
tracer = trace.get_tracer("zuul")
|
|
iolog = logging.getLogger("zuul.GerritConnection.io")
|
|
depends_on_re = re.compile(r"^Depends-On: (I[0-9a-f]{40})\s*$",
|
|
re.MULTILINE | re.IGNORECASE)
|
|
refname_bad_sequences = re2.compile(
|
|
r"[ \\*\[?:^~\x00-\x1F\x7F]|" # Forbidden characters
|
|
r"@{|\.\.|\.$|^@$|/$|^/|//+") # everything else we can check with re2
|
|
is_merged_replication_timeout = 300
|
|
replication_retry_interval = 5
|
|
_poller_class = GerritChecksPoller
|
|
_ref_watcher_class = GitWatcher
|
|
ref_watcher_poll_interval = 60
|
|
submit_retry_backoff = 10
|
|
|
|
EVENT_SOURCE_NONE = 'none'
|
|
EVENT_SOURCE_STREAM_EVENTS = 'stream-events'
|
|
EVENT_SOURCE_KAFKA = 'kafka'
|
|
EVENT_SOURCE_KINESIS = 'kinesis'
|
|
EVENT_SOURCE_GCLOUD_PUBSUB = 'gcloudpubsub'
|
|
|
|
def __init__(self, driver, connection_name, connection_config):
|
|
super(GerritConnection, self).__init__(driver, connection_name,
|
|
connection_config)
|
|
if 'server' not in self.connection_config:
|
|
raise Exception('server is required for gerrit connections in '
|
|
'%s' % self.connection_name)
|
|
if 'user' not in self.connection_config:
|
|
raise Exception('user is required for gerrit connections in '
|
|
'%s' % self.connection_name)
|
|
|
|
self.user = self.connection_config.get('user')
|
|
self.server = self.connection_config.get('server')
|
|
self.ssh_server = self.connection_config.get('ssh_server', self.server)
|
|
self.canonical_hostname = self.connection_config.get(
|
|
'canonical_hostname', self.server)
|
|
self.port = int(self.connection_config.get('port', 29418))
|
|
self.keyfile = self.connection_config.get('sshkey', None)
|
|
self.keepalive = int(self.connection_config.get('keepalive', 60))
|
|
self.max_dependencies = self.connection_config.get(
|
|
'max_dependencies', None)
|
|
if self.max_dependencies is not None:
|
|
self.max_dependencies = int(self.max_dependencies)
|
|
self.event_source = self.EVENT_SOURCE_NONE
|
|
# TODO(corvus): Document this when the checks api is stable;
|
|
# it's not useful without it.
|
|
enable_stream_events = self.connection_config.get(
|
|
'stream_events', True)
|
|
if enable_stream_events in [
|
|
'true', 'True', '1', 1, 'TRUE', True]:
|
|
self.event_source = self.EVENT_SOURCE_STREAM_EVENTS
|
|
if self.connection_config.get('kafka_bootstrap_servers', None):
|
|
self.event_source = self.EVENT_SOURCE_KAFKA
|
|
elif self.connection_config.get('aws_kinesis_region', None):
|
|
self.event_source = self.EVENT_SOURCE_KINESIS
|
|
elif self.connection_config.get('gcloud_pubsub_project', None):
|
|
self.event_source = self.EVENT_SOURCE_GCLOUD_PUBSUB
|
|
self.replication_timeout = int(self.connection_config.get(
|
|
'replication_timeout', 0))
|
|
|
|
# Thread for whatever event source we use
|
|
self.event_thread = None
|
|
# Next two are only used by checks plugin
|
|
self.poller_thread = None
|
|
self.ref_watcher_thread = None
|
|
self.client = None
|
|
self.watched_checkers = []
|
|
self.project_checker_map = {}
|
|
self.watched_event_filters = []
|
|
self.watched_event_filters_by_tenant = {}
|
|
self.watched_event_filters_lock = threading.Lock()
|
|
self.version = (0, 0, 0)
|
|
self.submit_whole_topic = None
|
|
self.ssh_timeout = SSH_TIMEOUT
|
|
|
|
self.baseurl = self.connection_config.get(
|
|
'baseurl', 'https://%s' % self.server).rstrip('/')
|
|
default_gitweb_url_template = '{baseurl}/gitweb?' \
|
|
'p={project.name}.git;' \
|
|
'a=commitdiff;h={sha}'
|
|
url_template = self.connection_config.get('gitweb_url_template',
|
|
default_gitweb_url_template)
|
|
self.gitweb_url_template = url_template
|
|
|
|
self.change_network_manager = ChangeNetworkManager()
|
|
self.projects = {}
|
|
self.gerrit_event_connector = None
|
|
self.source = driver.getSource(self)
|
|
|
|
self.session = None
|
|
self.password = self.connection_config.get('password', None)
|
|
self.git_over_ssh = self.connection_config.get('git_over_ssh', False)
|
|
self.auth_type = self.connection_config.get('auth_type', None)
|
|
self.anonymous_git = False
|
|
if self.password or self.auth_type == 'gcloud_service':
|
|
self.verify_ssl = self.connection_config.get('verify_ssl', True)
|
|
if self.verify_ssl not in ['true', 'True', '1', 1, 'TRUE']:
|
|
self.verify_ssl = False
|
|
self.user_agent = 'Zuul/%s %s' % (
|
|
zuul_version.release_string,
|
|
requests.utils.default_user_agent())
|
|
self.session = requests.Session()
|
|
if self.auth_type == 'digest':
|
|
authclass = requests.auth.HTTPDigestAuth
|
|
elif self.auth_type == 'form':
|
|
authclass = FormAuth
|
|
elif self.auth_type == 'gcloud_service':
|
|
authclass = GCloudAuth
|
|
# The executors in google cloud may not have access
|
|
# to the gerrit account credentials, so just use
|
|
# anonymous http access for git
|
|
self.anonymous_git = True
|
|
else:
|
|
authclass = requests.auth.HTTPBasicAuth
|
|
self.auth = authclass(self.user, self.password)
|
|
|
|
def setWatchedCheckers(self, checkers_to_watch):
|
|
# TODO: This is not safe for multiple tenants
|
|
# TODO: This does not remove checkers when tenants are deleted
|
|
self.log.debug("Setting watched checkers to %s", checkers_to_watch)
|
|
self.watched_checkers = set()
|
|
self.project_checker_map = {}
|
|
schemes_to_watch = set()
|
|
uuids_to_watch = set()
|
|
for x in checkers_to_watch:
|
|
if 'scheme' in x:
|
|
schemes_to_watch.add(x['scheme'])
|
|
if 'uuid' in x:
|
|
uuids_to_watch.add(x['uuid'])
|
|
if schemes_to_watch:
|
|
# get a list of all configured checkers
|
|
try:
|
|
configured_checkers = self.get('plugins/checks/checkers/')
|
|
except Exception:
|
|
self.log.exception("Unable to get checkers")
|
|
configured_checkers = []
|
|
|
|
# filter it through scheme matches in checkers_to_watch
|
|
for checker in configured_checkers:
|
|
if checker['status'] != 'ENABLED':
|
|
continue
|
|
checker_scheme, checker_id = checker['uuid'].split(':')
|
|
repo = checker['repository']
|
|
repo = self.canonical_hostname + '/' + repo
|
|
# map scheme matches to project names
|
|
if checker_scheme in schemes_to_watch:
|
|
repo_checkers = self.project_checker_map.setdefault(
|
|
repo, set())
|
|
repo_checkers.add(checker['uuid'])
|
|
self.watched_checkers.add(checker['uuid'])
|
|
# add uuids from checkers_to_watch
|
|
for x in uuids_to_watch:
|
|
self.watched_checkers.add(x)
|
|
|
|
def setWatchedEventFilters(self, tenant_name, filters):
|
|
# TODO: This does not remove filters when tenants are deleted
|
|
self.log.debug("Setting watched event filters for %s to %s",
|
|
tenant_name, filters)
|
|
with self.watched_event_filters_lock:
|
|
self.watched_event_filters_by_tenant[tenant_name] = filters
|
|
new_filters = set()
|
|
# Build a set of unique filters across all tenants
|
|
for tenant_filters in \
|
|
self.watched_event_filters_by_tenant.values():
|
|
new_filters.update(tenant_filters)
|
|
self.log.debug("Setting watched event filters to %s",
|
|
new_filters)
|
|
self.watched_event_filters = new_filters
|
|
|
|
def toDict(self):
|
|
d = super().toDict()
|
|
d.update({
|
|
"baseurl": self.baseurl,
|
|
"canonical_hostname": self.canonical_hostname,
|
|
"server": self.server,
|
|
"ssh_server": self.ssh_server,
|
|
"port": self.port,
|
|
})
|
|
return d
|
|
|
|
def url(self, path):
|
|
return self.baseurl + '/a/' + path
|
|
|
|
def get(self, path):
|
|
url = self.url(path)
|
|
self.log.debug('GET: %s' % (url,))
|
|
r = self.session.get(
|
|
url,
|
|
verify=self.verify_ssl,
|
|
auth=self.auth, timeout=TIMEOUT,
|
|
headers={'User-Agent': self.user_agent})
|
|
self.iolog.debug('Received: %s %s' % (r.status_code, r.text,))
|
|
if r.status_code == 409:
|
|
raise HTTPConflictException()
|
|
elif r.status_code == 404:
|
|
raise HTTPNotFoundException()
|
|
elif r.status_code != 200:
|
|
raise Exception("Received response %s" % (r.status_code,))
|
|
ret = None
|
|
if r.text and len(r.text) > 4:
|
|
try:
|
|
ret = json.loads(r.text[4:])
|
|
except Exception:
|
|
self.log.exception(
|
|
"Unable to parse result %s from post to %s" %
|
|
(r.text, url))
|
|
raise
|
|
return ret
|
|
|
|
def post(self, path, data):
|
|
url = self.url(path)
|
|
self.log.debug('POST: %s' % (url,))
|
|
self.log.debug('data: %s' % (data,))
|
|
r = self.session.post(
|
|
url, data=json.dumps(data).encode('utf8'),
|
|
verify=self.verify_ssl,
|
|
auth=self.auth, timeout=TIMEOUT,
|
|
headers={'Content-Type': 'application/json;charset=UTF-8',
|
|
'User-Agent': self.user_agent})
|
|
self.iolog.debug('Received: %s %s' % (r.status_code, r.text,))
|
|
if r.status_code == 409:
|
|
raise HTTPConflictException()
|
|
if r.status_code == 400:
|
|
raise HTTPBadRequestException('Received response 400: %s' % r.text)
|
|
elif r.status_code != 200:
|
|
raise Exception("Received response %s: %s" % (
|
|
r.status_code, r.text))
|
|
ret = None
|
|
if r.text and len(r.text) > 4:
|
|
try:
|
|
ret = json.loads(r.text[4:])
|
|
except Exception:
|
|
self.log.exception(
|
|
"Unable to parse result %s from post to %s" %
|
|
(r.text, url))
|
|
raise
|
|
return ret
|
|
|
|
def getProject(self, name: str) -> Project:
|
|
return self.projects.get(name)
|
|
|
|
def addProject(self, project: Project) -> None:
|
|
self.projects[project.name] = project
|
|
|
|
def getChange(self, change_key, refresh=False, event=None):
|
|
if change_key.connection_name != self.connection_name:
|
|
return None
|
|
if change_key.change_type == 'GerritChange':
|
|
return self._getChange(change_key, refresh=refresh, event=event)
|
|
elif change_key.change_type == 'Tag':
|
|
return self._getTag(change_key, refresh=refresh, event=event)
|
|
elif change_key.change_type == 'Branch':
|
|
return self._getBranch(change_key, refresh=refresh, event=event)
|
|
elif change_key.change_type == 'Ref':
|
|
return self._getRef(change_key, refresh=refresh, event=event)
|
|
|
|
def _checkMaxDependencies(self, change, history):
|
|
if change and history:
|
|
history.add(history.Query.SEEN, change)
|
|
if (self.max_dependencies is not None and
|
|
len(history.queries[history.Query.SEEN]) >
|
|
self.max_dependencies):
|
|
raise GerritEventProcessingException(
|
|
f"Change {change} has too many dependencies")
|
|
|
|
def _getChange(self, change_key, refresh=False, history=None,
|
|
network_future=None, event=None,
|
|
allow_key_update=False, change=None,
|
|
update_if_older_than=None):
|
|
# Ensure number and patchset are str
|
|
if change is None:
|
|
# We may be called from the event handler which has
|
|
# already gotten a change from the cache but thinks it may
|
|
# need to be updated.
|
|
change = self._change_cache.get(change_key)
|
|
self._checkMaxDependencies(change, history)
|
|
if change and not refresh:
|
|
return change
|
|
if not change:
|
|
change = GerritChange(None)
|
|
change.number = change_key.stable_id
|
|
change.patchset = change_key.revision
|
|
self._checkMaxDependencies(change, history)
|
|
network_start = network_future is None
|
|
while True:
|
|
if network_future is None:
|
|
network_future = ChangeNetworkFuture(update_if_older_than)
|
|
network_future.add(change_key)
|
|
try:
|
|
self.change_network_manager.permissionToProceed(network_future)
|
|
return self._updateChange(change_key, change, event, history,
|
|
network_future, allow_key_update)
|
|
except ChangeNetworkConflict as e:
|
|
if network_start:
|
|
# This is the top of the stack. We wait for the
|
|
# future and try again.
|
|
e.future.wait()
|
|
self.change_network_manager.setComplete(network_future)
|
|
# Start a new future but copy over the query cache
|
|
# results from the winner. In case we decide that
|
|
# the winning network did not update our change(s)
|
|
# recently enough, we may still be able to benefit
|
|
# from some of the previously run queries.
|
|
network_future = ChangeNetworkFuture(update_if_older_than)
|
|
network_future.query_results = e.future.query_results
|
|
change = self._change_cache.get(change_key) or change
|
|
continue
|
|
else:
|
|
raise
|
|
finally:
|
|
if network_start:
|
|
self.change_network_manager.setComplete(network_future)
|
|
|
|
def _getTag(self, change_key, refresh=False, event=None):
|
|
tag = change_key.stable_id
|
|
change = self._change_cache.get(change_key)
|
|
if change:
|
|
if refresh:
|
|
self._change_cache.updateChangeWithRetry(
|
|
change_key, change, lambda c: None)
|
|
return change
|
|
project = self.source.getProject(change_key.project_name)
|
|
change = Tag(project)
|
|
change.tag = tag
|
|
change.ref = f'refs/tags/{tag}'
|
|
change.oldrev = change_key.oldrev
|
|
change.newrev = change_key.newrev
|
|
change.url = self._getWebUrl(project, sha=change.newrev)
|
|
try:
|
|
self._change_cache.set(change_key, change)
|
|
except ConcurrentUpdateError:
|
|
change = self._change_cache.get(change_key)
|
|
return change
|
|
|
|
def _getBranch(self, change_key, refresh=False, event=None):
|
|
branch = change_key.stable_id
|
|
change = self._change_cache.get(change_key)
|
|
if change:
|
|
if refresh:
|
|
self._change_cache.updateChangeWithRetry(
|
|
change_key, change, lambda c: None)
|
|
return change
|
|
project = self.source.getProject(change_key.project_name)
|
|
change = Branch(project)
|
|
change.branch = branch
|
|
change.ref = f'refs/heads/{branch}'
|
|
change.oldrev = change_key.oldrev
|
|
change.newrev = change_key.newrev
|
|
change.url = self._getWebUrl(project, sha=change.newrev)
|
|
try:
|
|
self._change_cache.set(change_key, change)
|
|
except ConcurrentUpdateError:
|
|
change = self._change_cache.get(change_key)
|
|
return change
|
|
|
|
def _getRef(self, change_key, refresh=False, event=None):
|
|
change = self._change_cache.get(change_key)
|
|
if change:
|
|
if refresh:
|
|
self._change_cache.updateChangeWithRetry(
|
|
change_key, change, lambda c: None)
|
|
return change
|
|
project = self.source.getProject(change_key.project_name)
|
|
change = Ref(project)
|
|
change.ref = change_key.stable_id
|
|
change.oldrev = change_key.oldrev
|
|
change.newrev = change_key.newrev
|
|
change.url = self._getWebUrl(project, sha=change.newrev)
|
|
try:
|
|
self._change_cache.set(change_key, change)
|
|
except ConcurrentUpdateError:
|
|
change = self._change_cache.get(change_key)
|
|
return change
|
|
|
|
def _getDependsOnFromCommit(self, message, change, event):
|
|
log = get_annotated_logger(self.log, event)
|
|
records = []
|
|
seen = set()
|
|
for match in self.depends_on_re.findall(message):
|
|
if match in seen:
|
|
log.debug("Ignoring duplicate Depends-On: %s", match)
|
|
continue
|
|
seen.add(match)
|
|
query = "change:%s" % (match,)
|
|
log.debug("Updating %s: Running query %s to find needed changes",
|
|
change, query)
|
|
records.extend(self.simpleQuery(query, event=event))
|
|
return [(x.number, x.current_patchset) for x in records]
|
|
|
|
def _getNeededByFromCommit(self, change_id, change, event):
|
|
log = get_annotated_logger(self.log, event)
|
|
records = []
|
|
seen = set()
|
|
query = 'message:{%s}' % change_id
|
|
log.debug("Updating %s: Running query %s to find changes needed-by",
|
|
change, query)
|
|
results = self.simpleQuery(query, event=event)
|
|
for result in results:
|
|
for match in self.depends_on_re.findall(
|
|
result.message):
|
|
if match != change_id:
|
|
continue
|
|
# Note: This is not a ChangeCache ChangeKey
|
|
key = (result.number, result.current_patchset)
|
|
if key in seen:
|
|
continue
|
|
log.debug("Updating %s: Found change %s,%s "
|
|
"needs %s from commit",
|
|
change, key[0], key[1], change_id)
|
|
seen.add(key)
|
|
records.append(result)
|
|
return [(x.number, x.current_patchset) for x in records]
|
|
|
|
def _updateChange(self, key, change, event, history, network_future,
|
|
allow_key_update=False):
|
|
log = get_annotated_logger(self.log, event)
|
|
|
|
# In case this change is already in the history we have a
|
|
# cyclic dependency and don't need to update ourselves again
|
|
# as this gets done in a previous frame of the call stack.
|
|
# NOTE(jeblair): The only case where this can still be hit is
|
|
# when we get an event for a change with no associated
|
|
# patchset; for instance, when the gerrit topic is changed.
|
|
# In that case, we will update change 1234,None, which will be
|
|
# inserted into the cache as its own entry, but then we will
|
|
# resolve the patchset before adding it to the history list,
|
|
# then if there are dependencies, we can walk down and then
|
|
# back up to the version of this change with a patchset which
|
|
# will match the history list but will have bypassed the
|
|
# change cache because the previous object had a patchset of
|
|
# None. All paths hit the change cache first. To be able to
|
|
# drop history, we need to resolve the patchset on events with
|
|
# no patchsets before adding the entry to the change cache.
|
|
if history and change.number and change.patchset:
|
|
history_change = history.getByKey(history.Query.CHANGE, key)
|
|
if history_change:
|
|
log.debug("Change %s is in history", change)
|
|
return history_change
|
|
|
|
if (network_future.min_ltime and
|
|
change.zuul_query_ltime and
|
|
change.zuul_query_ltime > network_future.min_ltime):
|
|
# The change was updated in another thread while we were
|
|
# processing this change network.
|
|
log.debug("Change %s is up to date", change)
|
|
return change
|
|
|
|
log.info("Updating %s", change)
|
|
data = self.queryChange(change.number, event=event,
|
|
min_ltime=network_future.min_ltime)
|
|
# Do a local update without updating the cache so that we can
|
|
# reference this change when we recurse for dependencies.
|
|
change.update(data, {}, self)
|
|
|
|
# Get the dependencies for this change, and recursively update
|
|
# dependent changes (recursively calling this method).
|
|
if not change.is_merged:
|
|
extra = self._updateChangeDependencies(
|
|
log, key, change, data, event, history, network_future)
|
|
else:
|
|
extra = {}
|
|
|
|
# Actually update this change in the change cache.
|
|
def _update_change(c):
|
|
return c.update(data, extra, self)
|
|
|
|
change = self._change_cache.updateChangeWithRetry(
|
|
key, change, _update_change, allow_key_update=allow_key_update)
|
|
|
|
return change
|
|
|
|
def _updateChangeDependencies(self, log, key, change, data, event,
|
|
history, network_future):
|
|
if history is None:
|
|
history = QueryHistory()
|
|
history.add(history.Query.CHANGE, change)
|
|
|
|
needs_changes = set()
|
|
git_needs_changes = []
|
|
if data.depends_on is not None:
|
|
dep_num, dep_ps = data.depends_on
|
|
log.debug("Updating %s: Getting git-dependent change %s,%s",
|
|
change, dep_num, dep_ps)
|
|
dep_key = ChangeKey(self.connection_name, None,
|
|
'GerritChange', str(dep_num), str(dep_ps))
|
|
dep = self._getChange(dep_key, history=history,
|
|
network_future=network_future,
|
|
event=event)
|
|
# This is a git commit dependency. So we only ignore it if it is
|
|
# already merged. So even if it is "ABANDONED", we should not
|
|
# ignore it.
|
|
|
|
if (not dep.is_merged) and dep not in needs_changes:
|
|
git_needs_changes.append(dep_key.reference)
|
|
needs_changes.add(dep_key.reference)
|
|
|
|
compat_needs_changes = []
|
|
for (dep_num, dep_ps) in self._getDependsOnFromCommit(
|
|
data.message, change, event):
|
|
log.debug("Updating %s: Getting commit-dependent "
|
|
"change %s,%s", change, dep_num, dep_ps)
|
|
dep_key = ChangeKey(self.connection_name, None,
|
|
'GerritChange', str(dep_num), str(dep_ps))
|
|
dep = self._getChange(dep_key, history=history,
|
|
network_future=network_future,
|
|
event=event)
|
|
if dep.open and dep not in needs_changes:
|
|
compat_needs_changes.append(dep_key.reference)
|
|
needs_changes.add(dep_key.reference)
|
|
|
|
needed_by_changes = set()
|
|
git_needed_by_changes = []
|
|
for (dep_num, dep_ps) in data.needed_by:
|
|
try:
|
|
log.debug("Updating %s: Getting git-needed change %s,%s",
|
|
change, dep_num, dep_ps)
|
|
dep_key = ChangeKey(self.connection_name, None,
|
|
'GerritChange', str(dep_num), str(dep_ps))
|
|
dep = self._getChange(dep_key, history=history,
|
|
network_future=network_future,
|
|
event=event)
|
|
if (dep.open and dep.is_current_patchset and
|
|
dep not in needed_by_changes):
|
|
git_needed_by_changes.append(dep_key.reference)
|
|
needed_by_changes.add(dep_key.reference)
|
|
except ChangeNetworkConflict:
|
|
raise
|
|
# We ignore GerritEventProcessingExceptions (e.g. when exceeding
|
|
# max_dependencies) here because they are not important on the
|
|
# "needed_by" side of the dependency chain. Those are already
|
|
# checked for on the "needs" side.
|
|
except Exception:
|
|
log.exception("Failed to get git-needed change %s,%s",
|
|
dep_num, dep_ps)
|
|
|
|
compat_needed_by_changes = []
|
|
for (dep_num, dep_ps) in self._getNeededByFromCommit(
|
|
data.id, change, event):
|
|
try:
|
|
log.debug("Updating %s: Getting commit-needed change %s,%s",
|
|
change, dep_num, dep_ps)
|
|
# Because a commit needed-by may be a cross-repo
|
|
# dependency, cause that change to refresh so that it will
|
|
# reference the latest patchset of its Depends-On (this
|
|
# change). In case the dep is already in history we already
|
|
# refreshed this change so refresh is not needed in this case.
|
|
dep_key = ChangeKey(self.connection_name, None,
|
|
'GerritChange', str(dep_num), str(dep_ps))
|
|
refresh = not history.getByKey(history.Query.CHANGE, dep_key)
|
|
dep = self._getChange(
|
|
dep_key, refresh=refresh, history=history,
|
|
network_future=network_future,
|
|
event=event)
|
|
if (dep.open and dep.is_current_patchset
|
|
and dep not in needed_by_changes):
|
|
compat_needed_by_changes.append(dep_key.reference)
|
|
needed_by_changes.add(dep_key.reference)
|
|
except ChangeNetworkConflict:
|
|
raise
|
|
# We ignore GerritEventProcessingExceptions (e.g. when exceeding
|
|
# max_dependencies) here because they are not important on the
|
|
# "needed_by" side of the dependency chain. Those are already
|
|
# checked for on the "needs" side.
|
|
except Exception:
|
|
log.exception("Failed to get commit-needed change %s,%s",
|
|
dep_num, dep_ps)
|
|
|
|
return dict(
|
|
git_needs_changes=git_needs_changes,
|
|
compat_needs_changes=compat_needs_changes,
|
|
git_needed_by_changes=git_needed_by_changes,
|
|
compat_needed_by_changes=compat_needed_by_changes,
|
|
)
|
|
|
|
def isMerged(self, change, head=None):
|
|
self.log.debug("Checking if change %s is merged" % change)
|
|
if not change.number:
|
|
self.log.debug("Change has no number; considering it merged")
|
|
# Good question. It's probably ref-updated, which, ah,
|
|
# means it's merged.
|
|
return True
|
|
|
|
data = self.queryChange(change.number)
|
|
# Note: in the case of a cherry-pick there will be a new
|
|
# patchset (i.e., change.patchset might be 3 while
|
|
# data.current_patchset would be 4). We could add a new
|
|
# change to the cache here, but that would be more expensive.
|
|
# Also, the ssh-query code path with cherry-pick does not have
|
|
# a way of determining the current patchset for dependencies,
|
|
# so in that case, it is useful for any changes still pointing
|
|
# at this old version to think it is merged.
|
|
key = ChangeKey(self.connection_name, None,
|
|
'GerritChange', str(change.number),
|
|
str(change.patchset))
|
|
|
|
def _update_change(c):
|
|
c.update(data, {}, self)
|
|
|
|
self._change_cache.updateChangeWithRetry(key, change, _update_change)
|
|
|
|
if change.is_merged:
|
|
self.log.debug("Change %s is merged" % (change,))
|
|
else:
|
|
self.log.debug("Change %s is not merged" % (change,))
|
|
if not head:
|
|
return change.is_merged
|
|
if not change.is_merged:
|
|
return False
|
|
|
|
ref = 'refs/heads/' + change.branch
|
|
self.log.debug("Waiting for %s to appear in git repo" % (change))
|
|
if not hasattr(change, '_ref_sha'):
|
|
self.log.error("Unable to confirm change %s in git repo: "
|
|
"the change has not been reported; "
|
|
"this pipeline may be misconfigured "
|
|
"(check for multiple Gerrit connections)." %
|
|
(change,))
|
|
return False
|
|
|
|
if self._waitForRefSha(change.project, ref, change._ref_sha):
|
|
self.log.debug("Change %s is in the git repo" %
|
|
(change))
|
|
return True
|
|
self.log.debug("Change %s did not appear in the git repo" %
|
|
(change))
|
|
return False
|
|
|
|
def _waitForRefSha(self, project: Project,
|
|
ref: str, old_sha: str='') -> bool:
|
|
# Wait for the ref to show up in the repo
|
|
start = time.time()
|
|
while time.time() - start < self.is_merged_replication_timeout:
|
|
sha = self.getRefSha(project, ref)
|
|
if old_sha != sha:
|
|
return True
|
|
time.sleep(self.replication_retry_interval)
|
|
return False
|
|
|
|
def getRefSha(self, project: Project, ref: str) -> str:
|
|
refs = {} # type: Dict[str, str]
|
|
try:
|
|
refs = self.getInfoRefs(project)
|
|
except Exception:
|
|
self.log.exception("Exception looking for ref %s" %
|
|
ref)
|
|
sha = refs.get(ref, '')
|
|
return sha
|
|
|
|
def canMerge(self, change, allow_needs, event=None):
|
|
log = get_annotated_logger(self.log, event)
|
|
if not change.number:
|
|
log.debug("Change has no number; considering it merged")
|
|
# Good question. It's probably ref-updated, which, ah,
|
|
# means it's merged.
|
|
return True
|
|
if change.wip:
|
|
return False
|
|
missing_labels = change.missing_labels - set(allow_needs)
|
|
if missing_labels:
|
|
self.log.debug("Unable to merge due to "
|
|
"missing labels: %s", missing_labels)
|
|
return False
|
|
for sr in change.submit_requirements:
|
|
if sr.get('status') == 'UNSATISFIED':
|
|
# Otherwise, we don't care and should skip.
|
|
|
|
# We're going to look at each unsatisfied submit
|
|
# requirement, and if one of the involved labels is an
|
|
# "allow_needs" label, we will assume that Zuul may be
|
|
# able to take an action which can cause the
|
|
# requirement to be satisfied, and we will ignore it.
|
|
# Otherwise, it is likely a requirement that Zuul can
|
|
# not alter in which case the requirement should stand
|
|
# and block merging.
|
|
result = sr.get("submittability_expression_result", {})
|
|
expression = result.get("expression", '')
|
|
expr_contains_allow = False
|
|
for allow in allow_needs:
|
|
if f'label:{allow}' in expression:
|
|
expr_contains_allow = True
|
|
break
|
|
if not expr_contains_allow:
|
|
self.log.debug("Unable to merge due to "
|
|
"submit requirement: %s", sr)
|
|
return False
|
|
return True
|
|
|
|
def getProjectOpenChanges(self, project: Project) -> List[GerritChange]:
|
|
# This is a best-effort function in case Gerrit is unable to return
|
|
# a particular change. It happens.
|
|
query = "project:{%s} status:open" % (project.name,)
|
|
self.log.debug("Running query %s to get project open changes" %
|
|
(query,))
|
|
data = self.simpleQuery(query)
|
|
changes = [] # type: List[GerritChange]
|
|
for record in data:
|
|
try:
|
|
change_key = ChangeKey(self.connection_name, None,
|
|
'GerritChange',
|
|
str(record.number),
|
|
str(record.current_patchset))
|
|
changes.append(self._getChange(change_key))
|
|
except Exception:
|
|
self.log.exception("Unable to query change %s",
|
|
record.number)
|
|
return changes
|
|
|
|
@staticmethod
|
|
def _checkRefFormat(refname: str) -> bool:
|
|
# These are the requirements for valid ref names as per
|
|
# man git-check-ref-format
|
|
parts = refname.split('/')
|
|
return \
|
|
(GerritConnection.refname_bad_sequences.search(refname) is None and
|
|
len(parts) > 1 and
|
|
not any(part.startswith('.') or part.endswith('.lock')
|
|
for part in parts))
|
|
|
|
def _getProjectBranchesRequiredFlags(
|
|
self, exclude_unprotected, exclude_locked):
|
|
return BranchFlag.PRESENT
|
|
|
|
def _filterProjectBranches(
|
|
self, branch_infos, exclude_unprotected, exclude_locked):
|
|
return branch_infos
|
|
|
|
def _fetchProjectBranches(self, project, required_flags):
|
|
refs = self.getInfoRefs(project)
|
|
heads = [str(k[len('refs/heads/'):]) for k in refs
|
|
if k.startswith('refs/heads/') and
|
|
GerritConnection._checkRefFormat(k)]
|
|
branch_infos = [BranchInfo(h, present=True) for h in heads]
|
|
self.log.debug("Fetched %s branches for %s", len(heads), project)
|
|
return BranchFlag.PRESENT, branch_infos
|
|
|
|
def _fetchProjectDefaultBranch(self, project):
|
|
if not self.session:
|
|
return 'master'
|
|
head = None
|
|
for attempt in range(1, 4):
|
|
try:
|
|
head = self.get(
|
|
'projects/%s/HEAD' % (
|
|
urllib.parse.quote(project.name, safe=''),
|
|
))
|
|
if head.startswith('refs/heads/'):
|
|
head = head[len('refs/heads/'):]
|
|
return head
|
|
except HTTPNotFoundException:
|
|
self.log.exception("Unable to get HEAD for %s",
|
|
project)
|
|
return head
|
|
except Exception as e:
|
|
if attempt >= 3:
|
|
self.log.exception("Unable to get HEAD for %s",
|
|
project)
|
|
return head
|
|
self.log.warning("Unable to get HEAD for %s, will retry: %s",
|
|
project, e)
|
|
time.sleep(1)
|
|
|
|
def isBranchProtected(self, project_name, branch_name,
|
|
zuul_event_id=None):
|
|
# TODO: This could potentially be expanded to do something
|
|
# with user-specific branches.
|
|
return True
|
|
|
|
def shouldProcessEvent(self, event_type, event, replication):
|
|
valid_events = event._branch_ref_update or \
|
|
event.default_branch_changed or \
|
|
event.change_number
|
|
if replication:
|
|
valid_events = valid_events or \
|
|
event_type == "ref-replication-scheduled" or \
|
|
event_type == "ref-replicated"
|
|
if not (valid_events):
|
|
# Check if the events match specific filters and are
|
|
# valid.
|
|
for event_filter in self.watched_event_filters:
|
|
r = event_filter.preFilter(event)
|
|
if r:
|
|
break
|
|
else:
|
|
return False
|
|
return True
|
|
|
|
def addEvent(self, data):
|
|
# NOTE(mnaser): Certain plugins fire events which end up causing
|
|
# an unrecognized event log *and* a traceback if they
|
|
# do not contain full project information, we skip them
|
|
# here to keep logs clean.
|
|
event_type = data.get('type')
|
|
if event_type in GerritEventConnector.IGNORED_EVENTS:
|
|
return
|
|
# Due to notedb, an high percentage of all events Zuul
|
|
# processes are ref-updated of the /meta ref, and that is
|
|
# unlikely to be used in Zuul. Skip those here so that we
|
|
# reduce traffic on the event queue.
|
|
if event_type == 'ref-updated':
|
|
refname = data.get('refUpdate', {}).get('refName', '')
|
|
if (refname.startswith('refs/changes/') and
|
|
refname.endswith('/meta')):
|
|
return
|
|
|
|
# Partially realize a GerritTriggerEvent with enough
|
|
# information to determine branches/refs/etc.
|
|
event = GerritTriggerEvent.fromGerritEventDict(
|
|
data, None, self, None)
|
|
|
|
# If the event communicates replication info or might trigger
|
|
# reconfiguration actions we keep it; otherwise check to see if
|
|
# it matches our pre-filters:
|
|
if not self.shouldProcessEvent(event_type, event, True):
|
|
self.log.debug("Event did not match pre-filters %s", event)
|
|
return
|
|
|
|
event_uuid = uuid4().hex
|
|
attributes = {
|
|
"zuul_event_id": event_uuid,
|
|
}
|
|
# Gerrit events don't have an event id that could be used to globally
|
|
# identify this event in the system so we have to generate one.
|
|
with self.tracer.start_span(
|
|
"GerritEvent", attributes=attributes) as span:
|
|
event = {
|
|
"timestamp": time.time(),
|
|
"zuul_event_id": event_uuid,
|
|
"span_context": tracing.getSpanContext(span),
|
|
"payload": data,
|
|
}
|
|
self.event_queue.put(event)
|
|
|
|
def review(self, item, change, message, submit, labels, checks_api,
|
|
notify, file_comments, phase1, phase2, zuul_event_id=None):
|
|
if self.session:
|
|
meth = self.review_http
|
|
else:
|
|
meth = self.review_ssh
|
|
return meth(item, change, message, submit, labels, checks_api, notify,
|
|
file_comments, phase1, phase2,
|
|
zuul_event_id=zuul_event_id)
|
|
|
|
def review_ssh(self, item, change, message, submit, labels, checks_api,
|
|
notify, file_comments, phase1, phase2, zuul_event_id=None):
|
|
log = get_annotated_logger(self.log, zuul_event_id)
|
|
if checks_api:
|
|
log.error("Zuul is configured to report to the checks API, "
|
|
"but no HTTP password is present for the connection "
|
|
"in the configuration file.")
|
|
project = change.project.name
|
|
cmd = 'gerrit review --project %s' % project
|
|
if notify:
|
|
cmd += ' --notify %s' % shlex.quote(notify)
|
|
if phase1:
|
|
if message:
|
|
b_len = len(message.encode('utf-8'))
|
|
if b_len >= GERRIT_HUMAN_MESSAGE_LIMIT:
|
|
log.info("Message truncated %d > %d" %
|
|
(b_len, GERRIT_HUMAN_MESSAGE_LIMIT))
|
|
message = ("%s... (truncated)" %
|
|
message[:GERRIT_HUMAN_MESSAGE_LIMIT - 20])
|
|
cmd += ' --message %s' % shlex.quote(message)
|
|
for key, val in labels.items():
|
|
if val is True:
|
|
cmd += ' --%s' % key
|
|
else:
|
|
cmd += ' --label %s=%s' % (key, val)
|
|
if self.version >= (2, 13, 0):
|
|
cmd += ' --tag autogenerated:zuul:%s' % (
|
|
item.manager.pipeline.name)
|
|
if phase2 and submit:
|
|
cmd += ' --submit'
|
|
changeid = '%s,%s' % (change.number, change.patchset)
|
|
cmd += ' %s' % changeid
|
|
out, err = self._ssh(cmd, zuul_event_id=zuul_event_id)
|
|
return err
|
|
|
|
def report_checks(self, log, item, change, changeid, checkinfo):
|
|
checkinfo = checkinfo.copy()
|
|
uuid = checkinfo.pop('uuid', None)
|
|
scheme = checkinfo.pop('scheme', None)
|
|
if uuid is None:
|
|
uuids = self.project_checker_map.get(
|
|
change.project.canonical_name, set())
|
|
for u in uuids:
|
|
if u.split(':')[0] == scheme:
|
|
uuid = u
|
|
break
|
|
if uuid is None:
|
|
log.error("Unable to find matching checker for %s %s",
|
|
item, checkinfo)
|
|
return
|
|
|
|
def fmt(t):
|
|
return str(datetime.datetime.fromtimestamp(t))
|
|
|
|
if item.enqueue_time:
|
|
checkinfo['started'] = fmt(item.enqueue_time)
|
|
if item.report_time:
|
|
checkinfo['finished'] = fmt(item.report_time)
|
|
url = item.formatItemUrl()
|
|
if url:
|
|
checkinfo['url'] = url
|
|
if checkinfo:
|
|
for x in range(1, 4):
|
|
try:
|
|
self.post('changes/%s/revisions/%s/checks/%s' %
|
|
(changeid, change.commit, uuid),
|
|
checkinfo)
|
|
break
|
|
except HTTPConflictException:
|
|
log.exception("Conflict submitting check data to gerrit.")
|
|
break
|
|
except HTTPBadRequestException:
|
|
log.exception(
|
|
"Bad request submitting check data to gerrit.")
|
|
break
|
|
except Exception as e:
|
|
log.exception("Error submitting check data to gerrit on "
|
|
"attempt %s: %s", x, e)
|
|
time.sleep(x * self.submit_retry_backoff)
|
|
|
|
def review_http(self, item, change, message, submit, labels,
|
|
checks_api, notify, file_comments, phase1, phase2,
|
|
zuul_event_id=None):
|
|
changeid = "%s~%s~%s" % (
|
|
urllib.parse.quote(str(change.project), safe=''),
|
|
urllib.parse.quote(str(change.branch), safe=''),
|
|
change.id)
|
|
log = get_annotated_logger(self.log, zuul_event_id)
|
|
b_len = len(message.encode('utf-8'))
|
|
if b_len >= GERRIT_HUMAN_MESSAGE_LIMIT:
|
|
log.info("Message truncated %d > %d" %
|
|
(b_len, GERRIT_HUMAN_MESSAGE_LIMIT))
|
|
message = ("%s... (truncated)" %
|
|
message[:GERRIT_HUMAN_MESSAGE_LIMIT - 20])
|
|
data = dict(strict_labels=False)
|
|
if notify:
|
|
data['notify'] = notify
|
|
if phase1:
|
|
data['message'] = message
|
|
if change.is_current_patchset:
|
|
if labels:
|
|
data['labels'] = labels
|
|
if file_comments:
|
|
if self.version >= (2, 15, 0):
|
|
file_comments = copy.deepcopy(file_comments)
|
|
url = item.formatItemUrl()
|
|
for comments in itertools.chain(
|
|
file_comments.values()):
|
|
for comment in comments:
|
|
comment['robot_id'] = 'zuul'
|
|
comment['robot_run_id'] = \
|
|
item.current_build_set.uuid
|
|
if url:
|
|
comment['url'] = url
|
|
data['robot_comments'] = file_comments
|
|
else:
|
|
data['comments'] = file_comments
|
|
if self.version >= (2, 13, 0):
|
|
data['tag'] = 'autogenerated:zuul:%s' % (
|
|
item.manager.pipeline.name)
|
|
if checks_api:
|
|
self.report_checks(log, item, change, changeid, checks_api)
|
|
if (message or data.get('labels') or data.get('comments')
|
|
or data.get('robot_comments')):
|
|
for x in range(1, 4):
|
|
try:
|
|
self.post('changes/%s/revisions/%s/review' %
|
|
(changeid, change.commit),
|
|
data)
|
|
break
|
|
except HTTPConflictException:
|
|
log.exception("Conflict submitting data to gerrit.")
|
|
break
|
|
except HTTPBadRequestException:
|
|
log.exception(
|
|
"Bad request submitting check data to gerrit.")
|
|
break
|
|
except Exception as e:
|
|
log.exception(
|
|
"Error submitting data to gerrit "
|
|
"on attempt %s: %s",
|
|
x, e)
|
|
time.sleep(x * self.submit_retry_backoff)
|
|
if phase2 and change.is_current_patchset and submit:
|
|
for x in range(1, 4):
|
|
try:
|
|
self.post('changes/%s/submit' % (changeid,), {})
|
|
break
|
|
except HTTPConflictException:
|
|
log.info("Conflict submitting data to gerrit, "
|
|
"change may already be merged")
|
|
break
|
|
except HTTPBadRequestException:
|
|
log.exception(
|
|
"Bad request submitting check data to gerrit.")
|
|
break
|
|
except Exception as e:
|
|
log.exception(
|
|
"Error submitting data to gerrit on attempt %s: %s",
|
|
x, e)
|
|
time.sleep(x * self.submit_retry_backoff)
|
|
|
|
def queryChangeSSH(self, number, event=None):
|
|
args = '--all-approvals --comments --commit-message'
|
|
args += ' --current-patch-set --dependencies --files'
|
|
args += ' --patch-sets --submit-records'
|
|
cmd = 'gerrit query --format json %s change:%s' % (args, number)
|
|
out, err = self._ssh(cmd)
|
|
if not out:
|
|
return False
|
|
lines = out.split('\n')
|
|
if not lines:
|
|
return False
|
|
data = json.loads(lines[0])
|
|
if not data:
|
|
return False
|
|
iolog = get_annotated_logger(self.iolog, event)
|
|
iolog.debug("Received data from Gerrit query: \n%s",
|
|
pprint.pformat(data))
|
|
return data
|
|
|
|
def queryChangeHTTP(self, number, event=None):
|
|
query = ('changes/%s?o=DETAILED_ACCOUNTS&o=CURRENT_REVISION&'
|
|
'o=CURRENT_COMMIT&o=CURRENT_FILES&o=LABELS&'
|
|
'o=DETAILED_LABELS&o=ALL_REVISIONS' % (number,))
|
|
if self.version >= (3, 5, 0):
|
|
query += '&o=SUBMIT_REQUIREMENTS'
|
|
data = self.get(query)
|
|
related = self.get('changes/%s/revisions/%s/related' % (
|
|
number, data['current_revision']))
|
|
|
|
files_query = 'changes/%s/revisions/%s/files' % (
|
|
number, data['current_revision'])
|
|
|
|
commentable_files_query = None
|
|
# We query with parent=1 to get the list of files compared to
|
|
# the merge target, unless this is the initial commit to the
|
|
# repository (there are no parents).
|
|
if data['revisions'][data['current_revision']]['commit']['parents']:
|
|
files_query += '?parent=1'
|
|
if len(data['revisions'][data['current_revision']][
|
|
'commit']['parents']) > 1:
|
|
# If there is more than one parent, we also query the
|
|
# list of files on this specific commit so we know
|
|
# upon which we can leave comments.
|
|
commentable_files_query = 'changes/%s/revisions/%s/files' % (
|
|
number, data['current_revision'])
|
|
|
|
if commentable_files_query:
|
|
commentable_files = self.get(commentable_files_query)
|
|
else:
|
|
commentable_files = None
|
|
|
|
files = self.get(files_query)
|
|
return data, related, files, commentable_files
|
|
|
|
def queryChange(self, number, event=None, min_ltime=None):
|
|
lock = self.change_network_manager.getQueryLock(number)
|
|
with lock:
|
|
if min_ltime:
|
|
data = self.change_network_manager.getQueryResult(number)
|
|
if data and data.zuul_query_ltime > min_ltime:
|
|
return data
|
|
|
|
for attempt in range(1, 4):
|
|
# Get a query ltime -- any events before this point should be
|
|
# included in our change data.
|
|
zuul_query_ltime = self.sched.zk_client.getCurrentLtime()
|
|
try:
|
|
if self.session:
|
|
data, related, files, commentable_files =\
|
|
self.queryChangeHTTP(
|
|
number, event=event)
|
|
ret = GerritChangeData(
|
|
GerritChangeData.HTTP,
|
|
data, related, files, commentable_files,
|
|
zuul_query_ltime=zuul_query_ltime)
|
|
else:
|
|
data = self.queryChangeSSH(number, event=event)
|
|
ret = GerritChangeData(
|
|
GerritChangeData.SSH, data,
|
|
zuul_query_ltime=zuul_query_ltime)
|
|
self.change_network_manager.updateQueryResult(number, ret)
|
|
return ret
|
|
except HTTPNotFoundException as e:
|
|
# do not retry on 404 results, instead skip further event
|
|
# processing since we can't load a corresponding change
|
|
raise GerritEventProcessingException(
|
|
f"Did not find change for number {number}") from e
|
|
except Exception as e:
|
|
if attempt >= 3:
|
|
raise
|
|
# The internet is a flaky place try again.
|
|
self.log.warning(
|
|
"Error querying change %s, will retry: %s",
|
|
number, e)
|
|
time.sleep(1)
|
|
|
|
def simpleQuerySSH(self, query, event=None):
|
|
def _query_chunk(query, event):
|
|
args = '--commit-message --current-patch-set'
|
|
|
|
cmd = 'gerrit query --format json %s %s' % (
|
|
args, query)
|
|
out, err = self._ssh(cmd)
|
|
if not out:
|
|
return False
|
|
lines = out.split('\n')
|
|
if not lines:
|
|
return False
|
|
|
|
# filter out blank lines
|
|
data = [json.loads(line) for line in lines
|
|
if line.startswith('{')]
|
|
|
|
# check last entry for more changes
|
|
more_changes = None
|
|
if 'moreChanges' in data[-1]:
|
|
more_changes = data[-1]['moreChanges']
|
|
|
|
# we have to remove the statistics line
|
|
del data[-1]
|
|
|
|
if not data:
|
|
return False, more_changes
|
|
iolog = get_annotated_logger(self.iolog, event)
|
|
iolog.debug("Received data from Gerrit query: \n%s",
|
|
pprint.pformat(data))
|
|
return data, more_changes
|
|
|
|
# gerrit returns 500 results by default, so implement paging
|
|
# for large projects like nova
|
|
alldata = []
|
|
chunk, more_changes = _query_chunk(query, event)
|
|
while chunk:
|
|
alldata.extend(chunk)
|
|
if more_changes is None:
|
|
# continue sortKey based (before Gerrit 2.9)
|
|
resume = "resume_sortkey:'%s'" % chunk[-1]["sortKey"]
|
|
elif more_changes:
|
|
# continue moreChanges based (since Gerrit 2.9)
|
|
resume = "-S %d" % len(alldata)
|
|
else:
|
|
# no more changes
|
|
break
|
|
|
|
chunk, more_changes = _query_chunk(
|
|
"%s %s" % (query, resume), event)
|
|
return alldata
|
|
|
|
def simpleQueryHTTP(self, query, event=None):
|
|
iolog = get_annotated_logger(self.iolog, event)
|
|
changes = []
|
|
sortkey = ''
|
|
done = False
|
|
offset = 0
|
|
query = urllib.parse.quote(query, safe='')
|
|
while not done:
|
|
# We don't actually want to limit to 500, but that's the
|
|
# server-side default, and if we don't specify this, we
|
|
# won't get a _more_changes flag.
|
|
q = ('changes/?n=500%s&o=CURRENT_REVISION&o=CURRENT_COMMIT&'
|
|
'q=%s' % (sortkey, query))
|
|
iolog.debug('Query: %s', q)
|
|
batch = self.get(q)
|
|
iolog.debug("Received data from Gerrit query: \n%s",
|
|
pprint.pformat(batch))
|
|
done = True
|
|
if batch:
|
|
changes += batch
|
|
if '_more_changes' in batch[-1]:
|
|
done = False
|
|
if '_sortkey' in batch[-1]:
|
|
sortkey = '&N=%s' % (batch[-1]['_sortkey'],)
|
|
else:
|
|
offset += len(batch)
|
|
sortkey = '&start=%s' % (offset,)
|
|
return changes
|
|
|
|
def simpleQuery(self, query, event=None):
|
|
if self.session:
|
|
# None of the users of this method require dependency
|
|
# data, so we only perform the change query and omit the
|
|
# related changes query.
|
|
alldata = self.simpleQueryHTTP(query, event=event)
|
|
return [GerritChangeData(GerritChangeData.HTTP, data)
|
|
for data in alldata]
|
|
else:
|
|
alldata = self.simpleQuerySSH(query, event=event)
|
|
return [GerritChangeData(GerritChangeData.SSH, data)
|
|
for data in alldata]
|
|
|
|
def _uploadPack(self, project):
|
|
if self.session and not self.git_over_ssh:
|
|
url = ('%s/%s/info/refs?service=git-upload-pack' %
|
|
(self.baseurl, project.name))
|
|
for attempt in range(1, 4):
|
|
try:
|
|
r = self.session.get(
|
|
url,
|
|
verify=self.verify_ssl,
|
|
auth=self.auth, timeout=TIMEOUT,
|
|
headers={'User-Agent': self.user_agent})
|
|
self.iolog.debug('Received: %s %s',
|
|
r.status_code, r.text)
|
|
if r.status_code == 409:
|
|
raise HTTPConflictException()
|
|
if r.status_code == 404:
|
|
raise HTTPNotFoundException()
|
|
elif r.status_code != 200:
|
|
raise Exception("Received response %s" % (
|
|
r.status_code,))
|
|
except HTTPNotFoundException:
|
|
raise
|
|
except Exception as e:
|
|
if attempt >= 3:
|
|
self.log.exception(
|
|
"Error getting refs for %s:",
|
|
project)
|
|
raise
|
|
self.log.warning(
|
|
"Error getting refs for %s, will retry: %s",
|
|
project, e)
|
|
time.sleep(1)
|
|
out = r.text[r.text.find('\n') + 5:]
|
|
else:
|
|
cmd = "git-upload-pack %s" % project.name
|
|
out, err = self._ssh(cmd, "0000")
|
|
return out
|
|
|
|
def _open(self):
|
|
if self.client:
|
|
# Paramiko needs explicit closes, its possible we will open even
|
|
# with an unclosed client so explicitly close here.
|
|
self.client.close()
|
|
try:
|
|
client = paramiko.SSHClient()
|
|
client.load_system_host_keys()
|
|
client.set_missing_host_key_policy(paramiko.WarningPolicy())
|
|
# SSH banner, handshake, and auth timeouts default to 15
|
|
# seconds, so we only set the socket timeout here.
|
|
client.connect(self.ssh_server,
|
|
username=self.user,
|
|
port=self.port,
|
|
key_filename=self.keyfile,
|
|
timeout=self.ssh_timeout)
|
|
transport = client.get_transport()
|
|
transport.set_keepalive(self.keepalive)
|
|
self.client = client
|
|
except Exception:
|
|
client.close()
|
|
self.client = None
|
|
raise
|
|
|
|
def _ssh(self, command, stdin_data=None, zuul_event_id=None):
|
|
log = get_annotated_logger(self.log, zuul_event_id)
|
|
if not self.client:
|
|
self._open()
|
|
|
|
max_attempts = 2
|
|
for x in range(max_attempts):
|
|
try:
|
|
log.debug("SSH command:\n%s", command)
|
|
stdin, stdout, stderr = self.client.exec_command(
|
|
command, timeout=TIMEOUT)
|
|
break
|
|
except Exception:
|
|
self._open()
|
|
if x + 1 >= max_attempts:
|
|
raise
|
|
|
|
if stdin_data:
|
|
stdin.write(stdin_data)
|
|
|
|
out = stdout.read().decode('utf-8')
|
|
self.iolog.debug("SSH received stdout:\n%s" % out)
|
|
|
|
ret = stdout.channel.recv_exit_status()
|
|
log.debug("SSH exit status: %s", ret)
|
|
|
|
err = stderr.read().decode('utf-8')
|
|
if err.strip():
|
|
log.debug("SSH received stderr:\n%s", err)
|
|
|
|
if ret:
|
|
log.debug("SSH received stdout:\n%s", out)
|
|
raise Exception("Gerrit error executing %s" % command)
|
|
return (out, err)
|
|
|
|
def getInfoRefs(self, project: Project) -> Dict[str, str]:
|
|
try:
|
|
# Encode the UTF-8 data back to a byte array, as the size of
|
|
# each record in the pack is in bytes, and so the slicing must
|
|
# also be done on a byte-basis.
|
|
data = self._uploadPack(project).encode("utf-8")
|
|
except Exception:
|
|
self.log.error("Cannot get references from %s" % project)
|
|
raise # keeps error information
|
|
ret = {}
|
|
read_advertisement = False
|
|
i = 0
|
|
while i < len(data):
|
|
if len(data) - i < 4:
|
|
raise Exception("Invalid length in info/refs")
|
|
plen = int(data[i:i + 4], 16)
|
|
i += 4
|
|
# It's the length of the packet, including the 4 bytes of the
|
|
# length itself, unless it's null, in which case the length is
|
|
# not included.
|
|
if plen > 0:
|
|
plen -= 4
|
|
if len(data) - i < plen:
|
|
raise Exception("Invalid data in info/refs")
|
|
# Once the pack data is sliced, we can safely decode it back
|
|
# into a (UTF-8) string.
|
|
line = data[i:i + plen].decode("utf-8")
|
|
i += plen
|
|
if not read_advertisement:
|
|
read_advertisement = True
|
|
continue
|
|
if plen == 0:
|
|
# The terminating null
|
|
continue
|
|
line = line.strip()
|
|
revision, ref = line.split()
|
|
ret[ref] = revision
|
|
return ret
|
|
|
|
def getGitUrl(self, project: Project) -> str:
|
|
if self.anonymous_git:
|
|
url = ('%s/%s' % (self.baseurl, project.name))
|
|
elif self.session and not self.git_over_ssh:
|
|
baseurl = list(urllib.parse.urlparse(self.baseurl))
|
|
# Make sure we escape '/' symbols, otherwise git's url
|
|
# parser will think the username is a hostname.
|
|
baseurl[1] = '%s:%s@%s' % (
|
|
urllib.parse.quote(self.user, safe=''),
|
|
urllib.parse.quote(self.password, safe=''),
|
|
baseurl[1])
|
|
baseurl = urllib.parse.urlunparse(baseurl)
|
|
url = ('%s/a/%s' % (baseurl, project.name))
|
|
else:
|
|
url = 'ssh://%s@%s:%s/%s' % (self.user, self.ssh_server, self.port,
|
|
project.name)
|
|
return url
|
|
|
|
def _getWebUrl(self, project: Project, sha: str=None) -> str:
|
|
return self.gitweb_url_template.format(
|
|
baseurl=self.baseurl,
|
|
project=project.getSafeAttributes(),
|
|
sha=sha)
|
|
|
|
def _getRemoteVersion(self):
|
|
version = self.get('config/server/version')
|
|
base = version.split('-')[0]
|
|
parts = base.split('.')
|
|
major = minor = micro = 0
|
|
if len(parts) > 0:
|
|
major = int(parts[0])
|
|
if len(parts) > 1:
|
|
minor = int(parts[1])
|
|
if len(parts) > 2:
|
|
micro = int(parts[2])
|
|
self.version = (major, minor, micro)
|
|
self.log.info("Remote version is: %s (parsed as %s)" %
|
|
(version, self.version))
|
|
|
|
def _getRemoteInfo(self):
|
|
info = self.get('config/server/info')
|
|
change_info = info.get('change', {})
|
|
self.submit_whole_topic = change_info.get('submit_whole_topic', False)
|
|
self.log.info("Remote submitWholeTopic: %s", self.submit_whole_topic)
|
|
|
|
def refWatcherCallback(self, data):
|
|
event = {
|
|
'type': 'ref-updated',
|
|
'refUpdate': {
|
|
'project': data['project'],
|
|
'refName': data['ref'],
|
|
'oldRev': data['oldrev'],
|
|
'newRev': data['newrev'],
|
|
}
|
|
}
|
|
self.addEvent(event)
|
|
|
|
def onLoad(self, zk_client, component_registry):
|
|
self.log.debug("Starting Gerrit Connection/Watchers")
|
|
if self.session:
|
|
try:
|
|
self._getRemoteVersion()
|
|
except Exception:
|
|
self.log.exception("Unable to determine remote Gerrit version")
|
|
try:
|
|
self._getRemoteInfo()
|
|
except Exception:
|
|
self.log.exception("Unable to fetch remote Gerrit info")
|
|
|
|
# Set the project branch cache to read only if no scheduler is
|
|
# provided to prevent fetching the branches from the connection.
|
|
self.read_only = not self.sched
|
|
|
|
self.log.debug('Creating Zookeeper branch cache')
|
|
self._branch_cache = BranchCache(zk_client, self,
|
|
component_registry)
|
|
|
|
self.log.info("Creating Zookeeper event queue")
|
|
if self.sched:
|
|
component_info = self.sched.component_info
|
|
else:
|
|
component_info = None
|
|
self.event_queue = ConnectionEventQueue(
|
|
zk_client, self.connection_name, component_info)
|
|
|
|
# If the connection was not loaded by a scheduler, but by e.g.
|
|
# zuul-web, we want to stop here.
|
|
if not self.sched:
|
|
return
|
|
|
|
self.log.debug('Creating Zookeeper change cache')
|
|
self._change_cache = GerritChangeCache(zk_client, self)
|
|
|
|
self.startEventSourceThread()
|
|
# TODO: This is only for the checks plugin and can be removed
|
|
# when checks support is removed. Until then, we always start
|
|
# this thread, but if no checks are configured, it remains
|
|
# idle.
|
|
self.startPollerThread()
|
|
self.startEventConnector()
|
|
|
|
def onStop(self):
|
|
self.log.debug("Stopping Gerrit Connection/Watchers")
|
|
self.stopEventSourceThread()
|
|
self.stopPollerThread()
|
|
self.stopRefWatcherThread()
|
|
self.stopEventConnector()
|
|
if self._change_cache:
|
|
self._change_cache.stop()
|
|
|
|
def getEventQueue(self):
|
|
return getattr(self, "event_queue", None)
|
|
|
|
def stopEventSourceThread(self):
|
|
if self.event_thread:
|
|
self.event_thread.stop()
|
|
self.event_thread.join()
|
|
|
|
def startEventSourceThread(self):
|
|
if self.event_source == self.EVENT_SOURCE_STREAM_EVENTS:
|
|
self.startSSHListener()
|
|
elif self.event_source == self.EVENT_SOURCE_KAFKA:
|
|
self.startKafkaListener()
|
|
elif self.event_source == self.EVENT_SOURCE_KINESIS:
|
|
self.startAWSKinesisListener()
|
|
elif self.event_source == self.EVENT_SOURCE_GCLOUD_PUBSUB:
|
|
self.startGcloudPubsubListener()
|
|
else:
|
|
self.log.warning("No gerrit event source configured")
|
|
self.startRefWatcherThread()
|
|
if self.event_thread:
|
|
self.event_thread.start()
|
|
|
|
def startSSHListener(self):
|
|
self.log.info("Starting SSH event stream client")
|
|
self.event_thread = GerritSSHEventListener(
|
|
self, self.connection_config)
|
|
|
|
def startKafkaListener(self):
|
|
self.log.info("Starting Kafka consumer")
|
|
self.event_thread = GerritKafkaEventListener(
|
|
self, self.connection_config)
|
|
|
|
def startAWSKinesisListener(self):
|
|
self.log.info("Starting AWS Kinesis consumer")
|
|
self.event_thread = GerritAWSKinesisEventListener(
|
|
self, self.connection_config)
|
|
|
|
def startGcloudPubsubListener(self):
|
|
self.log.info("Starting gcloud pubsub consumer")
|
|
self.event_thread = GerritGcloudPubsubEventListener(
|
|
self, self.connection_config)
|
|
|
|
def startPollerThread(self):
|
|
if self.session is not None:
|
|
self.poller_thread = self._poller_class(self)
|
|
self.poller_thread.start()
|
|
else:
|
|
self.log.info(
|
|
"%s: Gerrit Poller is disabled because no "
|
|
"HTTP authentication is defined",
|
|
self.connection_name)
|
|
|
|
def stopPollerThread(self):
|
|
if self.poller_thread:
|
|
self.poller_thread.stop()
|
|
self.poller_thread.join()
|
|
|
|
def stopRefWatcherThread(self):
|
|
if self.ref_watcher_thread:
|
|
self.ref_watcher_thread.stop()
|
|
self.ref_watcher_thread.join()
|
|
|
|
def startRefWatcherThread(self):
|
|
self.ref_watcher_thread = self._ref_watcher_class(
|
|
self,
|
|
self.baseurl,
|
|
self.ref_watcher_poll_interval,
|
|
self.refWatcherCallback,
|
|
election_name="ref-watcher")
|
|
self.ref_watcher_thread.start()
|
|
|
|
def startEventConnector(self):
|
|
self.gerrit_event_connector = GerritEventConnector(
|
|
self, self.replication_timeout)
|
|
self.gerrit_event_connector.start()
|
|
|
|
def stopEventConnector(self):
|
|
if self.gerrit_event_connector:
|
|
self.gerrit_event_connector.stop()
|