# Copyright 2012 Hewlett-Packard Development Company, L.P. # Copyright 2021-2025 Acme Gating, LLC # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import abc import copy import hashlib import itertools import json import logging import math import threading import time import textwrap import types import urllib.parse from collections import OrderedDict, defaultdict, namedtuple, UserDict from enum import StrEnum from functools import partial, total_ordering from uuid import uuid4 import re2 import jsonpath_rw from cachetools.func import lru_cache from kazoo.exceptions import NodeExistsError, NoNodeError from opentelemetry import trace from zuul import change_matcher from zuul.exceptions import ( SEVERITY_ERROR, SEVERITY_WARNING, OIDCIssuerNotAllowedError, LabelForbiddenError, MaxOIDCTTLError, MaxTimeoutError, NodesetNotFoundError, ProjectNotFoundError, ProjectNotPermittedError, UnknownConnection, ) from zuul.lib.re2util import filter_allowed_disallowed from zuul.lib import tracing from zuul.lib import yamlutil as yaml from zuul.lib.capabilities import capabilities_registry from zuul.lib.config import get_default from zuul.lib.jsonutil import json_dumps from zuul.lib.logutil import get_annotated_logger from zuul.lib.result_data import get_artifacts_from_result_data from zuul.lib.varnames import check_varnames from zuul.zk import zkobject from zuul.zk.blob_store import BlobStore from zuul.zk.change_cache import ChangeKey from zuul.zk.components import COMPONENT_REGISTRY MERGER_MERGE = 1 # "git merge" MERGER_MERGE_RESOLVE = 2 # "git merge -s resolve" MERGER_CHERRY_PICK = 3 # "git cherry-pick" MERGER_SQUASH_MERGE = 4 # "git merge --squash" MERGER_REBASE = 5 # "git rebase" MERGER_MERGE_RECURSIVE = 6 # "git merge -s recursive" MERGER_MERGE_ORT = 7 # "git merge -s ort" MERGER_MAP = { 'merge': MERGER_MERGE, 'merge-resolve': MERGER_MERGE_RESOLVE, 'merge-recursive': MERGER_MERGE_RECURSIVE, 'merge-ort': MERGER_MERGE_ORT, 'cherry-pick': MERGER_CHERRY_PICK, 'squash-merge': MERGER_SQUASH_MERGE, 'rebase': MERGER_REBASE, } ALL_MERGE_MODES = list(MERGER_MAP.values()) PRECEDENCE_NORMAL = 0 PRECEDENCE_LOW = 1 PRECEDENCE_HIGH = 2 PRECEDENCE_MAP = { None: PRECEDENCE_NORMAL, 'low': PRECEDENCE_LOW, 'normal': PRECEDENCE_NORMAL, 'high': PRECEDENCE_HIGH, } PRIORITY_MAP = { PRECEDENCE_NORMAL: 200, PRECEDENCE_LOW: 300, PRECEDENCE_HIGH: 100, } # Request states STATE_REQUESTED = 'requested' STATE_FULFILLED = 'fulfilled' STATE_FAILED = 'failed' REQUEST_STATES = set([STATE_REQUESTED, STATE_FULFILLED, STATE_FAILED]) # Node states STATE_BUILDING = 'building' STATE_TESTING = 'testing' STATE_READY = 'ready' STATE_IN_USE = 'in-use' STATE_USED = 'used' STATE_HOLD = 'hold' STATE_DELETING = 'deleting' NODE_STATES = set([STATE_BUILDING, STATE_TESTING, STATE_READY, STATE_IN_USE, STATE_USED, STATE_HOLD, STATE_DELETING]) # Workspace scheme SCHEME_GOLANG = 'golang' SCHEME_FLAT = 'flat' SCHEME_UNIQUE = 'unique' def add_debug_line(debug_messages, msg, indent=0): if debug_messages is None: return if indent: indent = ' ' * indent else: indent = '' debug_messages.append(indent + msg) def get_merge_mode_name(merge_mode): "Look up the merge mode name given the constant" for k, v in MERGER_MAP.items(): if v == merge_mode: return k def filter_severity(error_list, errors=True, warnings=True): return [e for e in error_list if ( (errors and e.severity == SEVERITY_ERROR) or (warnings and e.severity == SEVERITY_WARNING) )] class QuotaInformation: def __init__(self, default=0, **kw): ''' Initializes the quota information with some values. None values will be initialized with default which will be typically 0 or math.inf indicating an infinite limit. :param default: The default value to use for any attribute not supplied (usually 0 or math.inf). ''' self.quota = {} for k, v in kw.items(): self.quota[k] = v self.default = default def __eq__(self, other): return (isinstance(other, QuotaInformation) and self.default == other.default and self.quota == other.quota) def _get_default(self, value, default): return value if value is not None else default def _add_subtract(self, other, add=True): for resource in other.quota.keys(): self.quota.setdefault(resource, self.default) for resource in self.quota.keys(): other_value = other.quota.get(resource, other.default) if add: self.quota[resource] += other_value else: self.quota[resource] -= other_value def copy(self): return QuotaInformation(self.default, **self.quota) def subtract(self, other): self._add_subtract(other, add=False) def add(self, other): self._add_subtract(other, True) def min(self, other): for resource, theirs in other.quota.items(): ours = self.quota.get(resource, self.default) self.quota[resource] = min(ours, theirs) def nonNegative(self): for resource, value in self.quota.items(): if value < 0: return False return True def getResources(self): '''Return resources value to register in ZK node''' return self.quota def __str__(self): return str(self.quota) class QueryCacheEntry: def __init__(self, ltime, results): self.ltime = ltime self.results = results class QueryCache: """Cache query information while processing dependencies""" def __init__(self, zk_client): self.zk_client = zk_client self.ltime = 0 self.clear(0) def clear(self, ltime): self.ltime = ltime self.topic_queries = {} def clearIfOlderThan(self, event): if not hasattr(event, "zuul_event_ltime"): return ltime = event.zuul_event_ltime if ltime > self.ltime: ltime = self.zk_client.getCurrentLtime() self.clear(ltime) class MergeOp: def __init__(self, cmd=None, timestamp=None, comment=None, path=None): """A class representing a merge operation, returned by the merger to tell the user what was done.""" self.cmd = cmd self.timestamp = timestamp self.comment = comment self.path = path def toDict(self): ret = {} for k in ['cmd', 'timestamp', 'comment', 'path']: v = getattr(self, k) if v is not None: ret[k] = v return ret class ZuulMark: # The yaml mark class differs between the C and python versions. # The C version does not provide a snippet, and also appears to # lose data under some circumstances. def __init__(self, start_mark, end_mark, stream): self.name = start_mark.name self.index = start_mark.index self.line = start_mark.line self.end_line = end_mark.line self.end_index = end_mark.index self.column = start_mark.column self.end_column = end_mark.column self.snippet = stream[start_mark.index:end_mark.index] def __str__(self): return ' in "{name}", line {line}, column {column}'.format( name=self.name, line=self.line + 1, column=self.column + 1, ) def __eq__(self, other): if not isinstance(other, ZuulMark): return False return (self.line == other.line and self.snippet == other.snippet) line_snippet_context = 4 def getLineSnippet(self, line): start = max(line - self.line - self.line_snippet_context, 0) end = start + (self.line_snippet_context * 2) + 1 all_lines = self.snippet.splitlines() lines = all_lines[start:end] if start > 0: lines.insert(0, '...') if end < len(all_lines): lines.append('...') return '\n'.join(lines) def getLineLocation(self, line): return ' in "{name}", line {line}'.format( name=self.name, line=line + 1, ) def serialize(self): return { "name": self.name, "index": self.index, "line": self.line, "end_line": self.end_line, "end_index": self.end_index, "column": self.column, "end_column": self.end_column, "snippet": self.snippet, } @classmethod def deserialize(cls, data): o = cls.__new__(cls) o.__dict__.update(data) return o class ConfigurationErrorKey(object): """A class which attempts to uniquely identify configuration errors based on their file location. It's not perfect, but it's usually sufficient to determine whether we should show an error to a user. """ # Note: this class is serialized to ZK via ConfigurationErrorList, # ensure that it serializes and deserializes appropriately. def __init__(self, context, mark, error_text): self.context = context self.mark = mark self.error_text = error_text elements = [] if context: elements.extend([ context.project_canonical_name, context.branch, context.path, ]) else: elements.extend([None, None, None]) if mark: elements.extend([ mark.line, mark.snippet, ]) else: elements.extend([None, None]) elements.append(error_text) hasher = hashlib.sha256() hasher.update(json.dumps(elements, sort_keys=True).encode('utf8')) self._hash = hasher.hexdigest() def serialize(self): return { "context": self.context and self.context.serialize(), "mark": self.mark and self.mark.serialize(), "error_text": self.error_text, "_hash": self._hash, } @classmethod def deserialize(cls, data): data.update({ "context": data["context"] and SourceContext.deserialize( data["context"]), "mark": data["mark"] and ZuulMark.deserialize(data["mark"]), }) o = cls.__new__(cls) o.__dict__.update(data) return o def __hash__(self): return hash(self._hash) def __ne__(self, other): return not self.__eq__(other) def __eq__(self, other): if not isinstance(other, ConfigurationErrorKey): return False return (self.context == other.context and self.mark == other.mark and self.error_text == other.error_text) class ConfigurationError(object): """A configuration error""" # Note: this class is serialized to ZK via ConfigurationErrorList, # ensure that it serializes and deserializes appropriately. def __init__(self, context, mark, error, short_error=None, severity=None, name=None): self.error = error self.short_error = short_error self.severity = severity or SEVERITY_ERROR self.name = name or 'Unknown' self.key = ConfigurationErrorKey(context, mark, self.error) def serialize(self): return { "error": self.error, "short_error": self.short_error, "key": self.key.serialize(), "severity": self.severity, "name": self.name, } @classmethod def deserialize(cls, data): data["key"] = ConfigurationErrorKey.deserialize(data["key"]) data['severity'] = data['severity'] data['name'] = data['name'] o = cls.__new__(cls) o.__dict__.update(data) return o def __ne__(self, other): return not self.__eq__(other) def __eq__(self, other): if not isinstance(other, ConfigurationError): return False return (self.error == other.error and self.short_error == other.short_error and self.key == other.key and self.severity == other.severity and self.name == other.name) class ConfigurationErrorList(zkobject.ShardedZKObject): """A list of configuration errors. BuildSets may have zero or one of these. """ def __repr__(self): return '' def getPath(self): return self._path def serialize(self, context): data = { "errors": [e.serialize() for e in self.errors], } return json.dumps(data, sort_keys=True).encode("utf8") def deserialize(self, raw, context, extra=None): data = super().deserialize(raw, context) data.update({ "errors": [ConfigurationError.deserialize(d) for d in data["errors"]], }) return data class LoadingErrors(object): """A configuration errors accumalator attached to a layout object """ def __init__(self): self.errors = [] self.error_keys = set() def makeError(self, context, mark, error, short_error=None, severity=None, name=None): e = ConfigurationError(context, mark, error, short_error=short_error, severity=severity, name=name) self.addError(e) def addError(self, error): self.errors.append(error) self.error_keys.add(error.key) def __getitem__(self, index): return self.errors[index] def __len__(self): return len(self.errors) class RequirementsError(Exception): """A job's requirements were not met.""" pass class JobConfigurationError(Exception): """A job has an invalid configuration. These are expected user errors when freezing a job graph. """ pass class TemplateNotFoundError(JobConfigurationError): """A project referenced a template that does not exist.""" pass class NoMatchingParentError(JobConfigurationError): """A job referenced a parent, but that parent had no variants which matched the current change.""" pass class JobNotDefinedError(JobConfigurationError): """A job was not defined.""" pass class SecretNotFoundError(JobConfigurationError): """A job referenced a semaphore that does not exist.""" pass class Attributes(object): """A class to hold attributes for string formatting.""" def __init__(self, **kw): setattr(self, '__dict__', kw) def toDict(self): return self.__dict__ class ConfigObject: def __init__(self): super().__init__() self.source_context = None self.start_mark = None class Pipeline(object): """A configuration that ties together triggers, reporters and managers Trigger A description of which events should be processed Manager Responsible for enqueing and dequeing Changes Reporter Communicates success and failure results somewhere """ STATE_NORMAL = 'normal' STATE_ERROR = 'error' def __init__(self, name): self.name = name self.allow_other_connections = True self.connections = [] self.source_context = None self.start_mark = None self.description = None self.failure_message = None self.merge_conflict_message = None self.success_message = None self.footer_message = None self.enqueue_message = None self.start_message = None self.dequeue_message = None self.post_review = False self.dequeue_on_new_patchset = True self.ignore_dependencies = False self.manager_name = None self.precedence = PRECEDENCE_NORMAL self.supercedes = [] self.triggers = [] self.enqueue_actions = [] self.start_actions = [] self.success_actions = [] self.failure_actions = [] self.merge_conflict_actions = [] self.no_jobs_actions = [] self.disabled_actions = [] self.dequeue_actions = [] self.disable_at = None self.window = None self.window_floor = None self.window_ceiling = None self.window_increase_type = None self.window_increase_factor = None self.window_decrease_type = None self.window_decrease_factor = None self.ref_filters = [] self.event_filters = [] @property def actions(self): return ( self.enqueue_actions + self.start_actions + self.success_actions + self.failure_actions + self.merge_conflict_actions + self.no_jobs_actions + self.disabled_actions + self.dequeue_actions ) def __repr__(self): return '' % self.name def getSafeAttributes(self): return Attributes(name=self.name) def validateReferences(self, layout): # Verify that references to other objects in the layout are # valid. for pipeline in self.supercedes: if not layout.pipeline_managers.get(pipeline): raise Exception( 'The pipeline "{this}" supercedes an unknown pipeline ' '{other}.'.format( this=self.name, other=pipeline)) class PipelineState(zkobject.ZKObject): def __init__(self): super().__init__() self._set( state=Pipeline.STATE_NORMAL, queues=[], old_queues=[], consecutive_failures=0, disabled=False, layout_uuid=None, # Local pipeline manager reference (not persisted in Zookeeper) manager=None, _read_only=False, ) def _lateInitData(self): # If we're initializing the object on our initial refresh, # reset the data to this. return dict( state=Pipeline.STATE_NORMAL, queues=[], old_queues=[], consecutive_failures=0, disabled=False, layout_uuid=self.manager.tenant.layout.uuid, ) @classmethod def fromZK(klass, context, path, manager, **kw): obj = klass() obj._set(manager=manager, **kw) # Bind the state to the manager, so child objects can access # the the full pipeline state. manager.state = obj obj._load(context, path=path) return obj @classmethod def create(cls, manager, old_state=None): # If we are resetting an existing pipeline, we will have an # old_state, so just clean up the object references there and # let the next refresh handle updating any data. # TODO: This apparently hasn't been called in some time; fix. if old_state: old_state._resetObjectRefs() return old_state # Otherwise, we are initializing a pipeline that we haven't # seen before. It still might exist in ZK, but since we # haven't seen it, we don't have any object references to # clean up. We can just start with a clean object, set the # manager reference, and let the next refresh deal with # whether there might be any data in ZK. state = cls() state._set(manager=manager) return state def _resetObjectRefs(self): # Update the pipeline references on the queue objects. for queue in self.queues + self.old_queues: queue.manager = self.manager def getPath(self): if hasattr(self, '_path'): return self._path return self.pipelinePath(self.manager) @classmethod def pipelinePath(cls, manager): safe_tenant = urllib.parse.quote_plus(manager.tenant.name) safe_pipeline = urllib.parse.quote_plus(manager.pipeline.name) return f"/zuul/tenant/{safe_tenant}/pipeline/{safe_pipeline}" @classmethod def parsePath(self, path): """Return path components for use by the REST API""" root, safe_tenant, pipeline, safe_pipeline = path.rsplit('/', 3) return (urllib.parse.unquote_plus(safe_tenant), urllib.parse.unquote_plus(safe_pipeline)) def _dirtyPath(self): return f'{self.getPath()}/dirty' def isDirty(self, client): return bool(client.exists(self._dirtyPath())) def setDirty(self, client): try: client.create(self._dirtyPath()) except NodeExistsError: pass def clearDirty(self, client): try: client.delete(self._dirtyPath()) except NoNodeError: pass def removeOldQueue(self, context, queue): if queue in self.old_queues: with self.activeContext(context): self.old_queues.remove(queue) def addQueue(self, queue): with self.activeContext(self.manager.current_context): self.queues.append(queue) def getQueue(self, project_cname, branch): # Queues might be branch specific so match with branch for queue in self.queues: if queue.matches(project_cname, branch): return queue return None def removeQueue(self, queue): if queue in self.queues: with self.activeContext(self.manager.current_context): self.queues.remove(queue) queue.delete(self.manager.current_context) def promoteQueue(self, queue): if queue not in self.queues: return with self.activeContext(self.manager.current_context): self.queues.remove(queue) self.queues.insert(0, queue) def getAllItems(self, include_old=False): items = [] for shared_queue in self.queues: items.extend(shared_queue.queue) if include_old: for shared_queue in self.old_queues: items.extend(shared_queue.queue) return items def serialize(self, context): if self._read_only: raise RuntimeError("Attempt to serialize read-only pipeline state") data = { "state": self.state, "consecutive_failures": self.consecutive_failures, "disabled": self.disabled, "queues": [q.getPath() for q in self.queues], "old_queues": [q.getPath() for q in self.old_queues], "layout_uuid": self.layout_uuid, } return json.dumps(data, sort_keys=True).encode("utf8") def refresh(self, context, read_only=False): # Set read_only to True to indicate that we should avoid # "resetting" the pipeline state if the layout has changed. # This is so that we can refresh the object in circumstances # where we haven't verified that our local layout matches # what's in ZK. # Notably, this need not prevent us from performing the # initialization below if necessary. The case of the object # being brand new in ZK supercedes our worry that our old copy # might be out of date since our old copy is, itself, brand # new. self._set(_read_only=read_only) try: return super().refresh(context) except NoNodeError: # If the object doesn't exist we will receive a # NoNodeError. This happens because the postConfig call # creates this object without holding the pipeline lock, # so it can't determine whether or not it exists in ZK. # We do hold the pipeline lock here, so if we get this # error, we know we're initializing the object, and we # should write it to ZK. # Note that typically this code is not used since # currently other objects end up creating the pipeline # path in ZK first. It is included in case that ever # changes. Currently the empty byte-string code path in # deserialize() is used instead. context.log.warning("Initializing pipeline state for %s; " "this is expected only for new pipelines", self.manager.pipeline.name) self._set(**self._lateInitData()) self.internalCreate(context) def deserialize(self, raw, context, extra=None): # We may have old change objects in the pipeline cache, so # make sure they are the same objects we would get from the # source change cache. self.manager.clearCache() # If the object doesn't exist we will get back an empty byte # string. This happens because the postConfig call creates # this object without holding the pipeline lock, so it can't # determine whether or not it exists in ZK. We do hold the # pipeline lock here, so if we get the empty byte string, we # know we're initializing the object. In that case, we should # initialize the layout id to the current layout. Nothing # else needs to be set. if raw == b'': context.log.warning("Initializing pipeline state for %s; " "this is expected only for new pipelines", self.manager.pipeline.name) return self._lateInitData() data = super().deserialize(raw, context) if not self._read_only: # Skip this check if we're in a context where we want to # read the state without updating it (in case we're not # certain that the layout is up to date). if data['layout_uuid'] != self.manager.tenant.layout.uuid: # The tenant layout has updated since our last state; we # need to reset the state. data = dict( state=Pipeline.STATE_NORMAL, queues=[], old_queues=data["old_queues"] + data["queues"], consecutive_failures=0, disabled=False, layout_uuid=self.manager.tenant.layout.uuid, ) existing_queues = { q.getPath(): q for q in self.queues + self.old_queues } # Restore the old queues first, so that in case an item is # already in one of the new queues the item(s) ahead/behind # pointers are corrected when restoring the new queues. old_queues = [] for queue_path in data["old_queues"]: queue = existing_queues.get(queue_path) if queue: queue.refresh(context) else: queue = ChangeQueue.fromZK(context, queue_path, manager=self.manager) old_queues.append(queue) queues = [] for queue_path in data["queues"]: queue = existing_queues.get(queue_path) if queue: queue.refresh(context) else: queue = ChangeQueue.fromZK(context, queue_path, manager=self.manager) queues.append(queue) if hasattr(self.manager, "change_queue_managers"): # Clear out references to old queues for cq_manager in self.manager.change_queue_managers: cq_manager.created_for_branches.clear() # Add queues to matching change queue managers for queue in queues: project_cname, branch = queue.project_branches[0] for cq_manager in self.manager.change_queue_managers: managed_projects = { p.canonical_name for p in cq_manager.projects } if project_cname in managed_projects: cq_manager.created_for_branches[branch] = queue break data.update({ "queues": queues, "old_queues": old_queues, }) return data def cleanup(self, context): pipeline_path = self.getPath() try: all_items = set(context.client.get_children( f"{pipeline_path}/item")) except NoNodeError: all_items = set() known_item_objs = self.getAllItems(include_old=True) known_items = {i.uuid for i in known_item_objs} items_referenced_by_builds = set() for i in known_item_objs: build_set = i.current_build_set # Drop some attributes from local objects to save memory build_set._set(_files=None, _merge_repo_state=None, _extra_repo_state=None, _repo_state=RepoState()) job_graph = build_set.job_graph if not job_graph: continue for job in job_graph.getJobs(): build = build_set.getBuild(job) if build: items_referenced_by_builds.add(build.build_set.item.uuid) stale_items = all_items - known_items - items_referenced_by_builds for item_uuid in stale_items: self.manager.log.debug("Cleaning up stale item %s", item_uuid) context.client.delete(QueueItem.itemPath(pipeline_path, item_uuid), recursive=True) try: all_queues = set(context.client.get_children( f"{pipeline_path}/queue")) except NoNodeError: all_queues = set() known_queues = {q.uuid for q in (*self.old_queues, *self.queues)} stale_queues = all_queues - known_queues for queue_uuid in stale_queues: self.manager.log.debug("Cleaning up stale queue %s", queue_uuid) context.client.delete( ChangeQueue.queuePath(pipeline_path, queue_uuid), recursive=True) class PipelineChangeList(zkobject.ShardedZKObject): """A list of change references within a pipeline This is used by the scheduler to quickly decide if events which otherwise don't match the pipeline triggers should be nevertheless forwarded to the pipeline. It is also used to maintain the connection cache. """ # We can read from this object without locking, and since it's # sharded, that may produce an error. If that happens, don't # delete the object, just retry. delete_on_error = False def __init__(self): super().__init__() self._set( changes=[], _change_keys=[], ) def refresh(self, context, allow_init=True): # Set allow_init to false to indicate that we don't hold the # lock and we should not try to initialize the object in ZK if # it does not exist. try: self._retry(context, super().refresh, context, max_tries=5) except NoNodeError: # If the object doesn't exist we will receive a # NoNodeError. This happens because the postConfig call # creates this object without holding the pipeline lock, # so it can't determine whether or not it exists in ZK. # We do hold the pipeline lock here, so if we get this # error, we know we're initializing the object, and # we should write it to ZK. if allow_init: context.log.warning( "Initializing pipeline change list for %s; " "this is expected only for new pipelines", self.manager.pipeline.name) self.internalCreate(context) else: # If we're called from a context where we can't # initialize the change list, re-raise the exception. raise def getPath(self): return self.getChangeListPath(self.manager) @classmethod def getChangeListPath(cls, manager): pipeline_path = manager.state.getPath() return pipeline_path + '/change_list' @classmethod def create(cls, manager): # This object may or may not exist in ZK, but we using any of # that data here. We can just start with a clean object, set # the manager reference, and let the next refresh deal with # whether there might be any data in ZK. change_list = cls() change_list._set(manager=manager) return change_list def serialize(self, context): data = { "changes": self.changes, } return json.dumps(data, sort_keys=True).encode("utf8") def deserialize(self, raw, context, extra=None): data = super().deserialize(raw, context) change_keys = [] # We must have a dictionary with a 'changes' key; otherwise we # may be reading immediately after truncating. Allow the # KeyError exception to propogate in that case. for ref in data['changes']: change_keys.append(ChangeKey.fromReference(ref)) data['_change_keys'] = change_keys return data def setChangeKeys(self, context, change_keys): change_refs = [key.reference for key in change_keys] if change_refs == self.changes: return self.updateAttributes(context, changes=change_refs) self._set(_change_keys=change_keys) def getChangeKeys(self): return self._change_keys class PipelineSummary(zkobject.ShardedZKObject): log = logging.getLogger("zuul.PipelineSummary") truncate_on_create = True delete_on_error = False def __init__(self): super().__init__() self._set( status={}, ) def getPath(self): return f"{PipelineState.pipelinePath(self.manager)}/status" def update(self, context, zuul_globals): status = self.manager.formatStatusJSON( zuul_globals.websocket_url) self.updateAttributes(context, status=status) def serialize(self, context): data = { "status": self.status, } return json.dumps(data, sort_keys=True).encode("utf8") def refresh(self, context): # Ignore exceptions and just re-use the previous state. This # might happen in case the sharded status data is truncated # while zuul-web tries to read it. try: super().refresh(context) except NoNodeError: self.log.warning("No pipeline summary found " "(may not be created yet)") except Exception: self.log.exception("Failed to refresh data") return self.status class ChangeQueue(zkobject.ZKObject): """A ChangeQueue contains Changes to be processed for related projects. A Pipeline with a DependentPipelineManager has multiple parallel ChangeQueues shared by different projects. For instance, there may a ChangeQueue shared by interrelated projects foo and bar, and a second queue for independent project baz. A Pipeline with an IndependentPipelineManager puts every Change into its own ChangeQueue. The ChangeQueue Window is inspired by TCP windows and controlls how many Changes in a given ChangeQueue will be considered active and ready to be processed. If a Change succeeds, the Window is increased by `window_increase_factor`. If a Change fails, the Window is decreased by `window_decrease_factor`. A ChangeQueue may be a dynamically created queue, which may be removed from a DependentPipelineManager once empty. """ def __init__(self): super().__init__() self._set( uuid=uuid4().hex, manager=None, name="", project_branches=[], _jobs=set(), queue=[], window=0, window_floor=1, window_ceiling=math.inf, window_increase_type="linear", window_increase_factor=1, window_decrease_type="exponential", window_decrease_factor=2, dynamic=False, ) def serialize(self, context): data = { "uuid": self.uuid, "name": self.name, "project_branches": self.project_branches, "_jobs": list(self._jobs), "queue": [i.getPath() for i in self.queue], "window": self.window, "window_floor": self.window_floor, "window_ceiling": self.window_ceiling, "window_increase_type": self.window_increase_type, "window_increase_factor": self.window_increase_factor, "window_decrease_type": self.window_decrease_type, "window_decrease_factor": self.window_decrease_factor, "dynamic": self.dynamic, } return json.dumps(data, sort_keys=True).encode("utf8") def deserialize(self, raw, context, extra=None): data = super().deserialize(raw, context) existing_items = {} for item in self.queue: existing_items[item.getPath()] = item items_by_path = OrderedDict() # This is a tuple of (x, Future), where x is None if no action # needs to be taken, or a string to indicate which kind of job # it was. This structure allows us to execute async ZK reads # and perform local data updates in order. tpe_jobs = [] tpe = context.executor[ChangeQueue] for item_path in data["queue"]: item = existing_items.get(item_path) items_by_path[item_path] = item if item: tpe_jobs.append((None, tpe.submit(item.refresh, context))) else: tpe_jobs.append(('item', tpe.submit( QueueItem.fromZK, context, item_path, queue=self))) for (kind, future) in tpe_jobs: result = future.result() if kind == 'item': items_by_path[result.getPath()] = result # Resolve ahead/behind references between queue items for item in items_by_path.values(): # After a re-enqueue we might have references to items # outside the current queue. We will resolve those # references to None for the item ahead or simply exclude # it in the list of items behind. # The pipeline manager will take care of correcting the # references on the next queue iteration. item._set( item_ahead=items_by_path.get(item._item_ahead), items_behind=[items_by_path[p] for p in item._items_behind if p in items_by_path]) data.update({ "_jobs": set(data["_jobs"]), "queue": list(items_by_path.values()), "project_branches": [tuple(pb) for pb in data["project_branches"]], }) return data def getPath(self): pipeline_path = self.manager.state.getPath() return self.queuePath(pipeline_path, self.uuid) @classmethod def queuePath(cls, pipeline_path, queue_uuid): return f"{pipeline_path}/queue/{queue_uuid}" @property def zk_context(self): return self.manager.current_context def __repr__(self): return '' % (self.manager.pipeline.name, self.name) def getJobs(self): return self._jobs def addProject(self, project, branch): """ Adds a project branch combination to the queue. The queue will match exactly this combination. If the caller doesn't care about branches it can supply None (but must supply None as well when matching) """ project_branch = (project.canonical_name, branch) if project_branch not in self.project_branches: with self.activeContext(self.zk_context): self.project_branches.append(project_branch) def matches(self, project_cname, branch): return (project_cname, branch) in self.project_branches def enqueueChanges(self, changes, event, span_info=None, enqueue_time=None): if enqueue_time is None: enqueue_time = time.time() if event: event_ref_cache_key = None if isinstance(event, EventInfo): event_ref_cache_key = event.ref elif getattr(event, 'orig_ref', None): event_ref_cache_key = event.orig_ref elif hasattr(event, 'canonical_project_name'): trusted, project = self.manager.tenant.getProject( event.canonical_project_name) if project: change_key = project.source.getChangeKey(event) event_ref_cache_key = change_key.reference else: # We handle promote, enqueue, and trigger events # above; it's unclear what other unhandled event would # cause an enqueue, but if it happens, log and # continue. self.manager.log.warning( "Unable to identify triggering ref from event %s", event) event_info = EventInfo.fromEvent(event, event_ref_cache_key) else: event_info = None item = QueueItem.new(self.zk_context, queue=self, changes=changes, event=event_info, span_info=span_info, enqueue_time=enqueue_time) self.enqueueItem(item) return item def enqueueItem(self, item): item._set(queue=self) if self.queue: item.updateAttributes(self.zk_context, item_ahead=self.queue[-1]) with item.item_ahead.activeContext(self.zk_context): item.item_ahead.items_behind.append(item) with self.activeContext(self.zk_context): self.queue.append(item) def dequeueItem(self, item): if item in self.queue: with self.activeContext(self.zk_context): self.queue.remove(item) if item.item_ahead: with item.item_ahead.activeContext(self.zk_context): item.item_ahead.items_behind.remove(item) item.item_ahead.items_behind.extend(item.items_behind) for item_behind in item.items_behind: item_behind.updateAttributes(self.zk_context, item_ahead=item.item_ahead) item.delete(self.zk_context) # We use the dequeue time for stats reporting, but the queue # item will no longer be in Zookeeper at this point. item._set(dequeue_time=time.time()) def moveItem(self, item, item_ahead): if item.item_ahead == item_ahead: return False # Remove from current location if item.item_ahead: with item.item_ahead.activeContext(self.zk_context): item.item_ahead.items_behind.remove(item) item.item_ahead.items_behind.extend(item.items_behind) for item_behind in item.items_behind: item_behind.updateAttributes( self.zk_context, item_ahead=item.item_ahead) # Add to new location item.updateAttributes( self.zk_context, item_ahead=item_ahead, items_behind=[]) if item.item_ahead: with item.item_ahead.activeContext(self.zk_context): item.item_ahead.items_behind.append(item) return True def isActionable(self, item): if not self.window: return True return item in self.queue[:self.window] def increaseWindowSize(self): if not self.window: return with self.activeContext(self.zk_context): if self.window_increase_type == 'linear': self.window = min( self.window_ceiling, self.window + self.window_increase_factor) elif self.window_increase_type == 'exponential': self.window = min( self.window_ceiling, self.window * self.window_increase_factor) def decreaseWindowSize(self): if not self.window: return with self.activeContext(self.zk_context): if self.window_decrease_type == 'linear': self.window = max( self.window_floor, self.window - self.window_decrease_factor) elif self.window_decrease_type == 'exponential': self.window = max( self.window_floor, int(self.window / self.window_decrease_factor)) class Project(object): """A Project represents a git repository such as openstack/nova.""" # NOTE: Projects should only be instantiated via a Source object # so that they are associated with and cached by their Connection. # This makes a Project instance a unique identifier for a given # project from a given source. def __init__(self, name, source, foreign=False): self.name = name self.source = source self.connection_name = source.connection.connection_name self.canonical_hostname = source.canonical_hostname self.canonical_name = source.canonical_hostname + '/' + name self.private_secrets_key = None self.public_secrets_key = None self.private_ssh_key = None self.public_ssh_key = None # foreign projects are those referenced in dependencies # of layout projects, this should matter # when deciding whether to enqueue their changes # TODOv3 (jeblair): re-add support for foreign projects if needed self.foreign = foreign def __str__(self): return self.name def __repr__(self): return '' % (self.name) def getSafeAttributes(self): return Attributes(name=self.name) def toDict(self): d = {} d['name'] = self.name d['connection_name'] = self.connection_name d['canonical_name'] = self.canonical_name return d class ApiRoot(ConfigObject): def __init__(self, default_auth_realm=None): super().__init__() self.default_auth_realm = default_auth_realm self.access_rules = [] def __ne__(self, other): return not self.__eq__(other) def __eq__(self, other): if not isinstance(other, ApiRoot): return False return (self.default_auth_realm == other.default_auth_realm, self.access_rules == other.access_rules) def __repr__(self): return f'' class ImageBuildArtifact(zkobject.LockableZKObject): ROOT = "/zuul/images" IMAGES_PATH = "artifacts" LOCKS_PATH = "locks" class State(StrEnum): READY = "ready" DELETING = "deleting" STATES = set([ State.READY, State.DELETING, ]) def __init__(self): super().__init__() self._set( uuid=None, # A random UUID for the image build artifact canonical_name=None, name=None, # For validation builds project_canonical_name=None, # For validation builds project_branch=None, # For validation builds build_tenant_name=None, # For validation builds build_uuid=None, # The UUID of the build job format=None, md5sum=None, sha256=None, url=None, timestamp=None, validated=None, _state=None, state_time=None, # Attributes that are not serialized lock=None, is_locked=False, ) @property def state(self): return self._state @state.setter def state(self, value): if value not in self.STATES: raise TypeError("'%s' is not a valid state" % value) self._state = value self.state_time = time.time() def __repr__(self): return (f"") def getPath(self): return f"{self.ROOT}/{self.IMAGES_PATH}/{self.uuid}" def getLockPath(self): return f"{self.ROOT}/{self.LOCKS_PATH}/{self.uuid}" def serialize(self, context): data = dict( uuid=self.uuid, name=self.name, canonical_name=self.canonical_name, project_canonical_name=self.project_canonical_name, project_branch=self.project_branch, build_tenant_name=self.build_tenant_name, build_uuid=self.build_uuid, format=self.format, md5sum=self.md5sum, sha256=self.sha256, url=self.url, timestamp=self.timestamp, validated=self.validated, _state=self._state, state_time=self.state_time, ) return json.dumps(data, sort_keys=True).encode("utf-8") class ImageUpload(zkobject.LockableZKObject): ROOT = "/zuul/image-uploads" UPLOADS_PATH = "uploads" LOCKS_PATH = "locks" class State(StrEnum): READY = "ready" DELETING = "deleting" PENDING = "pending" UPLOADING = "uploading" STATES = set([ State.READY, State.DELETING, State.PENDING, State.UPLOADING, ]) def __init__(self): super().__init__() self._set( uuid=None, # A random UUID for the image upload canonical_name=None, artifact_uuid=None, # The UUID of the ImageBuildArtifact endpoint_name=None, providers=None, config_hash=None, external_id=None, timestamp=None, validated=None, _state=None, state_time=None, # Attributes that are not serialized lock=None, is_locked=False, ) @property def state(self): return self._state @state.setter def state(self, value): if value not in self.STATES: raise TypeError("'%s' is not a valid state" % value) self._state = value self.state_time = time.time() def __repr__(self): return (f"") def getPath(self): return f"{self.ROOT}/{self.UPLOADS_PATH}/{self.uuid}" def getLockPath(self): return f"{self.ROOT}/{self.LOCKS_PATH}/{self.uuid}" def serialize(self, context): data = dict( uuid=self.uuid, canonical_name=self.canonical_name, artifact_uuid=self.artifact_uuid, endpoint_name=self.endpoint_name, providers=self.providers, config_hash=self.config_hash, external_id=self.external_id, timestamp=self.timestamp, validated=self.validated, _state=self._state, state_time=self.state_time, ) return json.dumps(data, sort_keys=True).encode("utf-8") class Image(ConfigObject): """A zuul or cloud image. Images are associated with labels and providers. """ def __init__(self, name, image_type, description): super().__init__() self.name = name self.type = image_type self.description = description @property def canonical_name(self): return '/'.join([ urllib.parse.quote_plus( self.source_context.project_canonical_name), urllib.parse.quote_plus(self.name), ]) def __repr__(self): return '' % (self.name,) def __ne__(self, other): return not self.__eq__(other) def __eq__(self, other): if not isinstance(other, Image): return False return (self.name == other.name and self.type == other.type and self.description == other.description) @property def project_canonical_name(self): return self.source_context.project_canonical_name @property def branch(self): return self.source_context.branch def toDict(self): return { 'project_canonical_name': self.project_canonical_name, 'name': self.name, 'branch': self.branch, 'type': self.type, 'description': self.description, } def toConfig(self): return { 'project_canonical_name': self.project_canonical_name, 'name': self.name, 'branch': self.branch, 'type': self.type, 'description': self.description, } class Flavor(ConfigObject): """A node flavor. Flavors are associated with provider-specific instance types. """ def __init__(self, name, description): super().__init__() self.name = name self.description = description @property def canonical_name(self): return '/'.join([ urllib.parse.quote_plus( self.source_context.project_canonical_name), urllib.parse.quote_plus(self.name), ]) def __repr__(self): return '' % (self.name,) def __ne__(self, other): return not self.__eq__(other) def __eq__(self, other): if not isinstance(other, Flavor): return False return (self.name == other.name and self.description == other.description) def toDict(self): sc = self.source_context return { 'project_canonical_name': sc.project_canonical_name, 'name': self.name, 'description': self.description, } def toConfig(self): sc = self.source_context return { 'project_canonical_name': sc.project_canonical_name, 'name': self.name, 'description': self.description, } class Label(ConfigObject): """A node label. Labels are associated with provider-specific instance types. """ def __init__(self, name, image, flavor, description, min_ready, max_ready_age): super().__init__() self.name = name self.image = image self.flavor = flavor self.description = description self.min_ready = min_ready self.max_ready_age = max_ready_age @property def canonical_name(self): return '/'.join([ urllib.parse.quote_plus( self.source_context.project_canonical_name), urllib.parse.quote_plus(self.name), ]) def __repr__(self): return '