zuul/zuul/model.py
James E. Blair c2dc705147 Record merger operations
This records the merger git operations so that later we can provide
them to the user for reconstructing the repo state manually (ie,
for local testing).

Change-Id: Ic1f8fc3cb45d382cd136630e2b25d23718329aca
2024-06-14 07:58:29 -07:00

9229 lines
339 KiB
Python

# Copyright 2012 Hewlett-Packard Development Company, L.P.
# Copyright 2021-2024 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
from collections import OrderedDict, defaultdict, namedtuple, UserDict
import copy
import json
import hashlib
import logging
import math
from functools import partial, total_ordering
import threading
import re2
import time
from uuid import uuid4
import urllib.parse
import textwrap
import types
import itertools
from kazoo.exceptions import NodeExistsError, NoNodeError
from cachetools.func import lru_cache
from opentelemetry import trace
from zuul.lib import yamlutil as yaml
from zuul.lib.varnames import check_varnames
import jsonpath_rw
from zuul import change_matcher
from zuul.lib.config import get_default
from zuul.lib.result_data import get_artifacts_from_result_data
from zuul.lib.logutil import get_annotated_logger
from zuul.lib.capabilities import capabilities_registry
from zuul.lib.jsonutil import json_dumps
from zuul.lib import tracing
from zuul.zk import zkobject
from zuul.zk.blob_store import BlobStore
from zuul.zk.change_cache import ChangeKey
from zuul.zk.components import COMPONENT_REGISTRY
from zuul.exceptions import (
SEVERITY_ERROR,
SEVERITY_WARNING,
NodesetNotFoundError,
)
MERGER_MERGE = 1 # "git merge"
MERGER_MERGE_RESOLVE = 2 # "git merge -s resolve"
MERGER_CHERRY_PICK = 3 # "git cherry-pick"
MERGER_SQUASH_MERGE = 4 # "git merge --squash"
MERGER_REBASE = 5 # "git rebase"
MERGER_MERGE_RECURSIVE = 6 # "git merge -s recursive"
MERGER_MERGE_ORT = 7 # "git merge -s ort"
MERGER_MAP = {
'merge': MERGER_MERGE,
'merge-resolve': MERGER_MERGE_RESOLVE,
'merge-recursive': MERGER_MERGE_RECURSIVE,
'merge-ort': MERGER_MERGE_ORT,
'cherry-pick': MERGER_CHERRY_PICK,
'squash-merge': MERGER_SQUASH_MERGE,
'rebase': MERGER_REBASE,
}
ALL_MERGE_MODES = list(MERGER_MAP.values())
PRECEDENCE_NORMAL = 0
PRECEDENCE_LOW = 1
PRECEDENCE_HIGH = 2
PRECEDENCE_MAP = {
None: PRECEDENCE_NORMAL,
'low': PRECEDENCE_LOW,
'normal': PRECEDENCE_NORMAL,
'high': PRECEDENCE_HIGH,
}
PRIORITY_MAP = {
PRECEDENCE_NORMAL: 200,
PRECEDENCE_LOW: 300,
PRECEDENCE_HIGH: 100,
}
# Request states
STATE_REQUESTED = 'requested'
STATE_FULFILLED = 'fulfilled'
STATE_FAILED = 'failed'
REQUEST_STATES = set([STATE_REQUESTED,
STATE_FULFILLED,
STATE_FAILED])
# Node states
STATE_BUILDING = 'building'
STATE_TESTING = 'testing'
STATE_READY = 'ready'
STATE_IN_USE = 'in-use'
STATE_USED = 'used'
STATE_HOLD = 'hold'
STATE_DELETING = 'deleting'
NODE_STATES = set([STATE_BUILDING,
STATE_TESTING,
STATE_READY,
STATE_IN_USE,
STATE_USED,
STATE_HOLD,
STATE_DELETING])
# Workspace scheme
SCHEME_GOLANG = 'golang'
SCHEME_FLAT = 'flat'
SCHEME_UNIQUE = 'unique'
def add_debug_line(debug_messages, msg, indent=0):
if debug_messages is None:
return
if indent:
indent = ' ' * indent
else:
indent = ''
debug_messages.append(indent + msg)
def get_merge_mode_name(merge_mode):
"Look up the merge mode name given the constant"
for k, v in MERGER_MAP.items():
if v == merge_mode:
return k
def filter_severity(error_list, errors=True, warnings=True):
return [e for e in error_list
if (
(errors and e.severity == SEVERITY_ERROR) or
(warnings and e.severity == SEVERITY_WARNING)
)]
class QueryCache:
"""Cache query information while processing dependencies"""
def __init__(self, zk_client):
self.zk_client = zk_client
self.ltime = 0
self.clear(0)
def clear(self, ltime):
self.ltime = ltime
self.topic_queries = {}
def clearIfOlderThan(self, event):
if not hasattr(event, "zuul_event_ltime"):
return
ltime = event.zuul_event_ltime
if ltime > self.ltime:
ltime = self.zk_client.getCurrentLtime()
self.clear(ltime)
class MergeOp:
def __init__(self, cmd=None, timestamp=None, comment=None, path=None):
"""A class representing a merge operation, returned by the merger to
tell the user what was done."""
self.cmd = cmd
self.timestamp = timestamp
self.comment = comment
self.path = path
def toDict(self):
ret = {}
for k in ['cmd', 'timestamp', 'comment', 'path']:
v = getattr(self, k)
if v is not None:
ret[k] = v
return ret
class ZuulMark:
# The yaml mark class differs between the C and python versions.
# The C version does not provide a snippet, and also appears to
# lose data under some circumstances.
def __init__(self, start_mark, end_mark, stream):
self.name = start_mark.name
self.index = start_mark.index
self.line = start_mark.line
self.end_line = end_mark.line
self.end_index = end_mark.index
self.column = start_mark.column
self.end_column = end_mark.column
self.snippet = stream[start_mark.index:end_mark.index]
def __str__(self):
return ' in "{name}", line {line}, column {column}'.format(
name=self.name,
line=self.line + 1,
column=self.column + 1,
)
def __eq__(self, other):
if not isinstance(other, ZuulMark):
return False
return (self.line == other.line and
self.snippet == other.snippet)
line_snippet_context = 4
def getLineSnippet(self, line):
start = max(line - self.line - self.line_snippet_context, 0)
end = start + (self.line_snippet_context * 2) + 1
all_lines = self.snippet.splitlines()
lines = all_lines[start:end]
if start > 0:
lines.insert(0, '...')
if end < len(all_lines):
lines.append('...')
return '\n'.join(lines)
def getLineLocation(self, line):
return ' in "{name}", line {line}'.format(
name=self.name,
line=line + 1,
)
def serialize(self):
return {
"name": self.name,
"index": self.index,
"line": self.line,
"end_line": self.end_line,
"end_index": self.end_index,
"column": self.column,
"end_column": self.end_column,
"snippet": self.snippet,
}
@classmethod
def deserialize(cls, data):
o = cls.__new__(cls)
o.__dict__.update(data)
return o
class ConfigurationErrorKey(object):
"""A class which attempts to uniquely identify configuration errors
based on their file location. It's not perfect, but it's usually
sufficient to determine whether we should show an error to a user.
"""
# Note: this class is serialized to ZK via ConfigurationErrorList,
# ensure that it serializes and deserializes appropriately.
def __init__(self, context, mark, error_text):
self.context = context
self.mark = mark
self.error_text = error_text
elements = []
if context:
elements.extend([
context.project_canonical_name,
context.branch,
context.path,
])
else:
elements.extend([None, None, None])
if mark:
elements.extend([
mark.line,
mark.snippet,
])
else:
elements.extend([None, None])
elements.append(error_text)
hasher = hashlib.sha256()
hasher.update(json.dumps(elements, sort_keys=True).encode('utf8'))
self._hash = hasher.hexdigest()
def serialize(self):
return {
"context": self.context and self.context.serialize(),
"mark": self.mark and self.mark.serialize(),
"error_text": self.error_text,
"_hash": self._hash,
}
@classmethod
def deserialize(cls, data):
data.update({
"context": data["context"] and SourceContext.deserialize(
data["context"]),
"mark": data["mark"] and ZuulMark.deserialize(data["mark"]),
})
o = cls.__new__(cls)
o.__dict__.update(data)
return o
def __hash__(self):
return hash(self._hash)
def __ne__(self, other):
return not self.__eq__(other)
def __eq__(self, other):
if not isinstance(other, ConfigurationErrorKey):
return False
return (self.context == other.context and
self.mark == other.mark and
self.error_text == other.error_text)
class ConfigurationError(object):
"""A configuration error"""
# Note: this class is serialized to ZK via ConfigurationErrorList,
# ensure that it serializes and deserializes appropriately.
def __init__(self, context, mark, error, short_error=None,
severity=None, name=None):
self.error = error
self.short_error = short_error
self.severity = severity or SEVERITY_ERROR
self.name = name or 'Unknown'
self.key = ConfigurationErrorKey(context, mark, self.error)
def serialize(self):
return {
"error": self.error,
"short_error": self.short_error,
"key": self.key.serialize(),
"severity": self.severity,
"name": self.name,
}
@classmethod
def deserialize(cls, data):
data["key"] = ConfigurationErrorKey.deserialize(data["key"])
data['severity'] = data['severity']
data['name'] = data['name']
o = cls.__new__(cls)
o.__dict__.update(data)
return o
def __ne__(self, other):
return not self.__eq__(other)
def __eq__(self, other):
if not isinstance(other, ConfigurationError):
return False
return (self.error == other.error and
self.short_error == other.short_error and
self.key == other.key and
self.severity == other.severity and
self.name == other.name)
class ConfigurationErrorList(zkobject.ShardedZKObject):
"""A list of configuration errors.
BuildSets may have zero or one of these.
"""
def __repr__(self):
return '<ConfigurationErrorList>'
def getPath(self):
return self._path
def serialize(self, context):
data = {
"errors": [e.serialize() for e in self.errors],
}
return json.dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
data = super().deserialize(raw, context)
data.update({
"errors": [ConfigurationError.deserialize(d)
for d in data["errors"]],
})
return data
class LoadingErrors(object):
"""A configuration errors accumalator attached to a layout object
"""
def __init__(self):
self.errors = []
self.error_keys = set()
def makeError(self, context, mark, error, short_error=None,
severity=None, name=None):
e = ConfigurationError(context, mark, error,
short_error=short_error,
severity=severity,
name=name)
self.addError(e)
def addError(self, error):
self.errors.append(error)
self.error_keys.add(error.key)
def __getitem__(self, index):
return self.errors[index]
def __len__(self):
return len(self.errors)
class NoMatchingParentError(Exception):
"""A job referenced a parent, but that parent had no variants which
matched the current change."""
pass
class TemplateNotFoundError(Exception):
"""A project referenced a template that does not exist."""
pass
class RequirementsError(Exception):
"""A job's requirements were not met."""
pass
class Attributes(object):
"""A class to hold attributes for string formatting."""
def __init__(self, **kw):
setattr(self, '__dict__', kw)
def toDict(self):
return self.__dict__
class Freezable(object):
"""A mix-in class so that an object can be made immutable"""
def __init__(self):
super(Freezable, self).__setattr__('_frozen', False)
def freeze(self):
"""Make this object immutable"""
def _freezelist(l):
for i, v in enumerate(l):
if isinstance(v, Freezable):
if not v._frozen:
v.freeze()
elif isinstance(v, dict):
l[i] = _freezedict(v)
elif isinstance(v, list):
l[i] = _freezelist(v)
return tuple(l)
def _freezedict(d):
for k, v in list(d.items()):
if isinstance(v, Freezable):
if not v._frozen:
v.freeze()
elif isinstance(v, dict):
d[k] = _freezedict(v)
elif isinstance(v, list):
d[k] = _freezelist(v)
return types.MappingProxyType(d)
_freezedict(self.__dict__)
# Ignore return value from freezedict because __dict__ can't
# be a mappingproxy.
self._frozen = True
@staticmethod
def thaw(data):
"""Thaw the supplied dictionary"""
def _thawlist(l):
l = list(l)
for i, v in enumerate(l):
if isinstance(v, (types.MappingProxyType, dict)):
l[i] = _thawdict(v)
elif isinstance(v, (tuple, list)):
l[i] = _thawlist(v)
return l
def _thawdict(d):
d = dict(d)
for k, v in list(d.items()):
if isinstance(v, (types.MappingProxyType, dict)):
d[k] = _thawdict(v)
elif isinstance(v, (tuple, list)):
d[k] = _thawlist(v)
return d
return _thawdict(data)
def __setattr__(self, name, value):
if self._frozen:
raise Exception("Unable to modify frozen object %s" %
(repr(self),))
super(Freezable, self).__setattr__(name, value)
class ConfigObject(Freezable):
def __init__(self):
super().__init__()
self.source_context = None
self.start_mark = None
class Pipeline(object):
"""A configuration that ties together triggers, reporters and managers
Trigger
A description of which events should be processed
Manager
Responsible for enqueing and dequeing Changes
Reporter
Communicates success and failure results somewhere
"""
STATE_NORMAL = 'normal'
STATE_ERROR = 'error'
def __init__(self, name, tenant):
self.name = name
# Note that pipelines are not portable across tenants (new
# pipeline objects must be made when a tenant is
# reconfigured). A pipeline requires a tenant in order to
# reach the currently active layout for that tenant.
self.tenant = tenant
self.allow_other_connections = True
self.connections = []
self.source_context = None
self.start_mark = None
self.description = None
self.failure_message = None
self.merge_conflict_message = None
self.success_message = None
self.footer_message = None
self.enqueue_message = None
self.start_message = None
self.dequeue_message = None
self.post_review = False
self.dequeue_on_new_patchset = True
self.ignore_dependencies = False
self.manager = None
self.relative_priority_queues = {}
self.precedence = PRECEDENCE_NORMAL
self.supercedes = []
self.triggers = []
self.enqueue_actions = []
self.start_actions = []
self.success_actions = []
self.failure_actions = []
self.merge_conflict_actions = []
self.no_jobs_actions = []
self.disabled_actions = []
self.dequeue_actions = []
self.disable_at = None
self.window = None
self.window_floor = None
self.window_ceiling = None
self.window_increase_type = None
self.window_increase_factor = None
self.window_decrease_type = None
self.window_decrease_factor = None
self.state = None
self.change_list = None
# Only used by the unit tests for assertions
self._exception_count = 0
@property
def queues(self):
return self.state.queues
@property
def actions(self):
return (
self.enqueue_actions +
self.start_actions +
self.success_actions +
self.failure_actions +
self.merge_conflict_actions +
self.no_jobs_actions +
self.disabled_actions +
self.dequeue_actions
)
def __repr__(self):
return '<Pipeline %s>' % self.name
def getSafeAttributes(self):
return Attributes(name=self.name)
def validateReferences(self, layout):
# Verify that references to other objects in the layout are
# valid.
for pipeline in self.supercedes:
if not layout.pipelines.get(pipeline):
raise Exception(
'The pipeline "{this}" supercedes an unknown pipeline '
'{other}.'.format(
this=self.name,
other=pipeline))
def setManager(self, manager):
self.manager = manager
def addQueue(self, queue):
with self.state.activeContext(self.manager.current_context):
self.queues.append(queue)
def getQueue(self, project_cname, branch):
# Queues might be branch specific so match with branch
for queue in self.queues:
if queue.matches(project_cname, branch):
return queue
return None
def getRelativePriorityQueue(self, project):
for queue in self.relative_priority_queues.values():
if project in queue:
return queue
return [project]
def setRelativePriorityQueues(self, queues):
self.state.updateAttributes(self.manager.current_context,
relative_priority_queues=queues)
def removeQueue(self, queue):
if queue in self.queues:
with self.state.activeContext(self.manager.current_context):
self.queues.remove(queue)
queue.delete(self.manager.current_context)
def promoteQueue(self, queue):
if queue not in self.queues:
return
with self.state.activeContext(self.manager.current_context):
self.queues.remove(queue)
self.queues.insert(0, queue)
def getChangesInQueue(self):
changes = []
for shared_queue in self.queues:
changes.extend([x.change for x in shared_queue.queue])
return changes
def getAllItems(self, include_old=False):
items = []
for shared_queue in self.queues:
items.extend(shared_queue.queue)
if include_old:
for shared_queue in self.state.old_queues:
items.extend(shared_queue.queue)
return items
def formatStatusJSON(self, websocket_url=None):
j_pipeline = dict(name=self.name,
description=self.description,
state=self.state.state,
manager=self.manager.type)
j_pipeline['triggers'] = [
{'driver': t.driver.name} for t in self.triggers
]
j_queues = []
j_pipeline['change_queues'] = j_queues
for queue in self.queues:
j_queue = dict(name=queue.name)
j_queues.append(j_queue)
j_queue['heads'] = []
j_queue['window'] = queue.window
if queue.project_branches and queue.project_branches[0][1]:
j_queue['branch'] = queue.project_branches[0][1]
else:
j_queue['branch'] = None
j_changes = []
for e in queue.queue:
if not e.item_ahead:
if j_changes:
j_queue['heads'].append(j_changes)
j_changes = []
j_changes.append(e.formatJSON(websocket_url))
if (len(j_changes) > 1 and
(j_changes[-2]['remaining_time'] is not None) and
(j_changes[-1]['remaining_time'] is not None)):
j_changes[-1]['remaining_time'] = max(
j_changes[-2]['remaining_time'],
j_changes[-1]['remaining_time'])
if j_changes:
j_queue['heads'].append(j_changes)
return j_pipeline
class PipelineState(zkobject.ZKObject):
def __init__(self):
super().__init__()
self._set(
state=Pipeline.STATE_NORMAL,
queues=[],
old_queues=[],
consecutive_failures=0,
disabled=False,
layout_uuid=None,
# Local pipeline reference (not persisted in Zookeeper)
pipeline=None,
_read_only=False,
)
def _lateInitData(self):
# If we're initializing the object on our initial refresh,
# reset the data to this.
return dict(
state=Pipeline.STATE_NORMAL,
queues=[],
old_queues=[],
consecutive_failures=0,
disabled=False,
layout_uuid=self.pipeline.tenant.layout.uuid,
)
@classmethod
def fromZK(klass, context, path, pipeline, **kw):
obj = klass()
obj._set(pipeline=pipeline, **kw)
# Bind the state to the pipeline, so child objects can access
# the the full pipeline state.
pipeline.state = obj
obj._load(context, path=path)
return obj
@classmethod
def create(cls, pipeline, old_state=None):
# If we are resetting an existing pipeline, we will have an
# old_state, so just clean up the object references there and
# let the next refresh handle updating any data.
if old_state:
old_state._resetObjectRefs()
return old_state
# Otherwise, we are initializing a pipeline that we haven't
# seen before. It still might exist in ZK, but since we
# haven't seen it, we don't have any object references to
# clean up. We can just start with a clean object, set the
# pipeline reference, and let the next refresh deal with
# whether there might be any data in ZK.
state = cls()
state._set(pipeline=pipeline)
return state
def _resetObjectRefs(self):
# Update the pipeline references on the queue objects.
for queue in self.queues + self.old_queues:
queue.pipeline = self.pipeline
def getPath(self):
if hasattr(self, '_path'):
return self._path
return self.pipelinePath(self.pipeline)
@classmethod
def pipelinePath(cls, pipeline):
safe_tenant = urllib.parse.quote_plus(pipeline.tenant.name)
safe_pipeline = urllib.parse.quote_plus(pipeline.name)
return f"/zuul/tenant/{safe_tenant}/pipeline/{safe_pipeline}"
@classmethod
def parsePath(self, path):
"""Return path components for use by the REST API"""
root, safe_tenant, pipeline, safe_pipeline = path.rsplit('/', 3)
return (urllib.parse.unquote_plus(safe_tenant),
urllib.parse.unquote_plus(safe_pipeline))
def _dirtyPath(self):
return f'{self.getPath()}/dirty'
def isDirty(self, client):
return bool(client.exists(self._dirtyPath()))
def setDirty(self, client):
try:
client.create(self._dirtyPath())
except NodeExistsError:
pass
def clearDirty(self, client):
try:
client.delete(self._dirtyPath())
except NoNodeError:
pass
def removeOldQueue(self, context, queue):
if queue in self.old_queues:
with self.activeContext(context):
self.old_queues.remove(queue)
def serialize(self, context):
if self._read_only:
raise RuntimeError("Attempt to serialize read-only pipeline state")
data = {
"state": self.state,
"consecutive_failures": self.consecutive_failures,
"disabled": self.disabled,
"queues": [q.getPath() for q in self.queues],
"old_queues": [q.getPath() for q in self.old_queues],
"layout_uuid": self.layout_uuid,
}
return json.dumps(data, sort_keys=True).encode("utf8")
def refresh(self, context, read_only=False):
# Set read_only to True to indicate that we should avoid
# "resetting" the pipeline state if the layout has changed.
# This is so that we can refresh the object in circumstances
# where we haven't verified that our local layout matches
# what's in ZK.
# Notably, this need not prevent us from performing the
# initialization below if necessary. The case of the object
# being brand new in ZK supercedes our worry that our old copy
# might be out of date since our old copy is, itself, brand
# new.
self._set(_read_only=read_only)
try:
return super().refresh(context)
except NoNodeError:
# If the object doesn't exist we will receive a
# NoNodeError. This happens because the postConfig call
# creates this object without holding the pipeline lock,
# so it can't determine whether or not it exists in ZK.
# We do hold the pipeline lock here, so if we get this
# error, we know we're initializing the object, and we
# should write it to ZK.
# Note that typically this code is not used since
# currently other objects end up creating the pipeline
# path in ZK first. It is included in case that ever
# changes. Currently the empty byte-string code path in
# deserialize() is used instead.
context.log.warning("Initializing pipeline state for %s; "
"this is expected only for new pipelines",
self.pipeline.name)
self._set(**self._lateInitData())
self.internalCreate(context)
def deserialize(self, raw, context):
# We may have old change objects in the pipeline cache, so
# make sure they are the same objects we would get from the
# source change cache.
self.pipeline.manager.clearCache()
# If the object doesn't exist we will get back an empty byte
# string. This happens because the postConfig call creates
# this object without holding the pipeline lock, so it can't
# determine whether or not it exists in ZK. We do hold the
# pipeline lock here, so if we get the empty byte string, we
# know we're initializing the object. In that case, we should
# initialize the layout id to the current layout. Nothing
# else needs to be set.
if raw == b'':
context.log.warning("Initializing pipeline state for %s; "
"this is expected only for new pipelines",
self.pipeline.name)
return self._lateInitData()
data = super().deserialize(raw, context)
if not self._read_only:
# Skip this check if we're in a context where we want to
# read the state without updating it (in case we're not
# certain that the layout is up to date).
if data['layout_uuid'] != self.pipeline.tenant.layout.uuid:
# The tenant layout has updated since our last state; we
# need to reset the state.
data = dict(
state=Pipeline.STATE_NORMAL,
queues=[],
old_queues=data["old_queues"] + data["queues"],
consecutive_failures=0,
disabled=False,
layout_uuid=self.pipeline.tenant.layout.uuid,
)
existing_queues = {
q.getPath(): q for q in self.queues + self.old_queues
}
# Restore the old queues first, so that in case an item is
# already in one of the new queues the item(s) ahead/behind
# pointers are corrected when restoring the new queues.
old_queues = []
for queue_path in data["old_queues"]:
queue = existing_queues.get(queue_path)
if queue:
queue.refresh(context)
else:
queue = ChangeQueue.fromZK(context, queue_path,
pipeline=self.pipeline)
old_queues.append(queue)
queues = []
for queue_path in data["queues"]:
queue = existing_queues.get(queue_path)
if queue:
queue.refresh(context)
else:
queue = ChangeQueue.fromZK(context, queue_path,
pipeline=self.pipeline)
queues.append(queue)
if hasattr(self.pipeline.manager, "change_queue_managers"):
# Clear out references to old queues
for cq_manager in self.pipeline.manager.change_queue_managers:
cq_manager.created_for_branches.clear()
# Add queues to matching change queue managers
for queue in queues:
project_cname, branch = queue.project_branches[0]
for cq_manager in self.pipeline.manager.change_queue_managers:
managed_projects = {
p.canonical_name for p in cq_manager.projects
}
if project_cname in managed_projects:
cq_manager.created_for_branches[branch] = queue
break
data.update({
"queues": queues,
"old_queues": old_queues,
})
return data
def _getKnownItems(self):
items = []
for queue in (*self.old_queues, *self.queues):
items.extend(queue.queue)
return items
def cleanup(self, context):
pipeline_path = self.getPath()
try:
all_items = set(context.client.get_children(
f"{pipeline_path}/item"))
except NoNodeError:
all_items = set()
known_item_objs = self._getKnownItems()
known_items = {i.uuid for i in known_item_objs}
items_referenced_by_builds = set()
for i in known_item_objs:
build_set = i.current_build_set
# Drop some attributes from local objects to save memory
build_set._set(_files=None,
_merge_repo_state=None,
_extra_repo_state=None,
_repo_state=RepoState())
job_graph = build_set.job_graph
if not job_graph:
continue
for job in job_graph.getJobs():
build = build_set.getBuild(job)
if build:
items_referenced_by_builds.add(build.build_set.item.uuid)
stale_items = all_items - known_items - items_referenced_by_builds
for item_uuid in stale_items:
self.pipeline.manager.log.debug("Cleaning up stale item %s",
item_uuid)
context.client.delete(QueueItem.itemPath(pipeline_path, item_uuid),
recursive=True)
try:
all_queues = set(context.client.get_children(
f"{pipeline_path}/queue"))
except NoNodeError:
all_queues = set()
known_queues = {q.uuid for q in (*self.old_queues, *self.queues)}
stale_queues = all_queues - known_queues
for queue_uuid in stale_queues:
self.pipeline.manager.log.debug("Cleaning up stale queue %s",
queue_uuid)
context.client.delete(
ChangeQueue.queuePath(pipeline_path, queue_uuid),
recursive=True)
class PipelineChangeList(zkobject.ShardedZKObject):
"""A list of change references within a pipeline
This is used by the scheduler to quickly decide if events which
otherwise don't match the pipeline triggers should be
nevertheless forwarded to the pipeline.
It is also used to maintain the connection cache.
"""
# We can read from this object without locking, and since it's
# sharded, that may produce an error. If that happens, don't
# delete the object, just retry.
delete_on_error = False
def __init__(self):
super().__init__()
self._set(
changes=[],
_change_keys=[],
)
def refresh(self, context, allow_init=True):
# Set allow_init to false to indicate that we don't hold the
# lock and we should not try to initialize the object in ZK if
# it does not exist.
try:
self._retry(context, super().refresh,
context, max_tries=5)
except NoNodeError:
# If the object doesn't exist we will receive a
# NoNodeError. This happens because the postConfig call
# creates this object without holding the pipeline lock,
# so it can't determine whether or not it exists in ZK.
# We do hold the pipeline lock here, so if we get this
# error, we know we're initializing the object, and
# we should write it to ZK.
if allow_init:
context.log.warning(
"Initializing pipeline change list for %s; "
"this is expected only for new pipelines",
self.pipeline.name)
self.internalCreate(context)
else:
# If we're called from a context where we can't
# initialize the change list, re-raise the exception.
raise
def getPath(self):
return self.getChangeListPath(self.pipeline)
@classmethod
def getChangeListPath(cls, pipeline):
pipeline_path = pipeline.state.getPath()
return pipeline_path + '/change_list'
@classmethod
def create(cls, pipeline):
# This object may or may not exist in ZK, but we using any of
# that data here. We can just start with a clean object, set
# the pipeline reference, and let the next refresh deal with
# whether there might be any data in ZK.
change_list = cls()
change_list._set(pipeline=pipeline)
return change_list
def serialize(self, context):
data = {
"changes": self.changes,
}
return json.dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
data = super().deserialize(raw, context)
change_keys = []
# We must have a dictionary with a 'changes' key; otherwise we
# may be reading immediately after truncating. Allow the
# KeyError exception to propogate in that case.
for ref in data['changes']:
change_keys.append(ChangeKey.fromReference(ref))
data['_change_keys'] = change_keys
return data
def setChangeKeys(self, context, change_keys):
change_refs = [key.reference for key in change_keys]
if change_refs == self.changes:
return
self.updateAttributes(context, changes=change_refs)
self._set(_change_keys=change_keys)
def getChangeKeys(self):
return self._change_keys
class PipelineSummary(zkobject.ShardedZKObject):
log = logging.getLogger("zuul.PipelineSummary")
truncate_on_create = True
delete_on_error = False
def __init__(self):
super().__init__()
self._set(
status={},
)
def getPath(self):
return f"{PipelineState.pipelinePath(self.pipeline)}/status"
def update(self, context, zuul_globals):
status = self.pipeline.formatStatusJSON(zuul_globals.websocket_url)
self.updateAttributes(context, status=status)
def serialize(self, context):
data = {
"status": self.status,
}
return json.dumps(data, sort_keys=True).encode("utf8")
def refresh(self, context):
# Ignore exceptions and just re-use the previous state. This
# might happen in case the sharded status data is truncated
# while zuul-web tries to read it.
try:
super().refresh(context)
except NoNodeError:
self.log.warning("No pipeline summary found "
"(may not be created yet)")
except Exception:
self.log.exception("Failed to refresh data")
return self.status
class ChangeQueue(zkobject.ZKObject):
"""A ChangeQueue contains Changes to be processed for related projects.
A Pipeline with a DependentPipelineManager has multiple parallel
ChangeQueues shared by different projects. For instance, there may a
ChangeQueue shared by interrelated projects foo and bar, and a second queue
for independent project baz.
A Pipeline with an IndependentPipelineManager puts every Change into its
own ChangeQueue.
The ChangeQueue Window is inspired by TCP windows and controlls how many
Changes in a given ChangeQueue will be considered active and ready to
be processed. If a Change succeeds, the Window is increased by
`window_increase_factor`. If a Change fails, the Window is decreased by
`window_decrease_factor`.
A ChangeQueue may be a dynamically created queue, which may be removed
from a DependentPipelineManager once empty.
"""
def __init__(self):
super().__init__()
self._set(
uuid=uuid4().hex,
pipeline=None,
name="",
project_branches=[],
_jobs=set(),
queue=[],
window=0,
window_floor=1,
window_ceiling=math.inf,
window_increase_type="linear",
window_increase_factor=1,
window_decrease_type="exponential",
window_decrease_factor=2,
dynamic=False,
)
def serialize(self, context):
data = {
"uuid": self.uuid,
"name": self.name,
"project_branches": self.project_branches,
"_jobs": list(self._jobs),
"queue": [i.getPath() for i in self.queue],
"window": self.window,
"window_floor": self.window_floor,
"window_ceiling": self.window_ceiling,
"window_increase_type": self.window_increase_type,
"window_increase_factor": self.window_increase_factor,
"window_decrease_type": self.window_decrease_type,
"window_decrease_factor": self.window_decrease_factor,
"dynamic": self.dynamic,
}
return json.dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
data = super().deserialize(raw, context)
existing_items = {}
for item in self.queue:
existing_items[item.getPath()] = item
items_by_path = OrderedDict()
# This is a tuple of (x, Future), where x is None if no action
# needs to be taken, or a string to indicate which kind of job
# it was. This structure allows us to execute async ZK reads
# and perform local data updates in order.
tpe_jobs = []
tpe = context.executor[ChangeQueue]
for item_path in data["queue"]:
item = existing_items.get(item_path)
items_by_path[item_path] = item
if item:
tpe_jobs.append((None, tpe.submit(item.refresh, context)))
else:
tpe_jobs.append(('item', tpe.submit(
QueueItem.fromZK, context, item_path,
queue=self)))
for (kind, future) in tpe_jobs:
result = future.result()
if kind == 'item':
items_by_path[result.getPath()] = result
# Resolve ahead/behind references between queue items
for item in items_by_path.values():
# After a re-enqueue we might have references to items
# outside the current queue. We will resolve those
# references to None for the item ahead or simply exclude
# it in the list of items behind.
# The pipeline manager will take care of correcting the
# references on the next queue iteration.
item._set(
item_ahead=items_by_path.get(item._item_ahead),
items_behind=[items_by_path[p] for p in item._items_behind
if p in items_by_path])
data.update({
"_jobs": set(data["_jobs"]),
"queue": list(items_by_path.values()),
"project_branches": [tuple(pb) for pb in data["project_branches"]],
})
return data
def getPath(self):
pipeline_path = self.pipeline.state.getPath()
return self.queuePath(pipeline_path, self.uuid)
@classmethod
def queuePath(cls, pipeline_path, queue_uuid):
return f"{pipeline_path}/queue/{queue_uuid}"
@property
def zk_context(self):
return self.pipeline.manager.current_context
def __repr__(self):
return '<ChangeQueue %s: %s>' % (self.pipeline.name, self.name)
def getJobs(self):
return self._jobs
def addProject(self, project, branch):
"""
Adds a project branch combination to the queue.
The queue will match exactly this combination. If the caller doesn't
care about branches it can supply None (but must supply None as well
when matching)
"""
project_branch = (project.canonical_name, branch)
if project_branch not in self.project_branches:
with self.activeContext(self.zk_context):
self.project_branches.append(project_branch)
def matches(self, project_cname, branch):
return (project_cname, branch) in self.project_branches
def enqueueChanges(self, changes, event, span_info=None,
enqueue_time=None):
if enqueue_time is None:
enqueue_time = time.time()
if event:
event_ref_cache_key = None
if isinstance(event, EventInfo):
event_ref_cache_key = event.ref
elif getattr(event, 'orig_ref', None):
event_ref_cache_key = event.orig_ref
elif hasattr(event, 'canonical_project_name'):
trusted, project = self.pipeline.tenant.getProject(
event.canonical_project_name)
if project:
change_key = project.source.getChangeKey(event)
event_ref_cache_key = change_key.reference
else:
# We handle promote, enqueue, and trigger events
# above; it's unclear what other unhandled event would
# cause an enqueue, but if it happens, log and
# continue.
self.pipeline.manager.log.warning(
"Unable to identify triggering ref from event %s",
event)
event_info = EventInfo.fromEvent(event, event_ref_cache_key)
else:
event_info = None
item = QueueItem.new(self.zk_context,
queue=self,
changes=changes,
event=event_info,
span_info=span_info,
enqueue_time=enqueue_time)
self.enqueueItem(item)
return item
def enqueueItem(self, item):
item._set(queue=self)
if self.queue:
item.updateAttributes(self.zk_context, item_ahead=self.queue[-1])
with item.item_ahead.activeContext(self.zk_context):
item.item_ahead.items_behind.append(item)
with self.activeContext(self.zk_context):
self.queue.append(item)
def dequeueItem(self, item):
if item in self.queue:
with self.activeContext(self.zk_context):
self.queue.remove(item)
if item.item_ahead:
with item.item_ahead.activeContext(self.zk_context):
item.item_ahead.items_behind.remove(item)
item.item_ahead.items_behind.extend(item.items_behind)
for item_behind in item.items_behind:
item_behind.updateAttributes(self.zk_context,
item_ahead=item.item_ahead)
item.delete(self.zk_context)
# We use the dequeue time for stats reporting, but the queue
# item will no longer be in Zookeeper at this point.
item._set(dequeue_time=time.time())
def moveItem(self, item, item_ahead):
if item.item_ahead == item_ahead:
return False
# Remove from current location
if item.item_ahead:
with item.item_ahead.activeContext(self.zk_context):
item.item_ahead.items_behind.remove(item)
item.item_ahead.items_behind.extend(item.items_behind)
for item_behind in item.items_behind:
item_behind.updateAttributes(
self.zk_context,
item_ahead=item.item_ahead)
# Add to new location
item.updateAttributes(
self.zk_context,
item_ahead=item_ahead,
items_behind=[])
if item.item_ahead:
with item.item_ahead.activeContext(self.zk_context):
item.item_ahead.items_behind.append(item)
return True
def isActionable(self, item):
if not self.window:
return True
return item in self.queue[:self.window]
def increaseWindowSize(self):
if not self.window:
return
with self.activeContext(self.zk_context):
if self.window_increase_type == 'linear':
self.window = min(
self.window_ceiling,
self.window + self.window_increase_factor)
elif self.window_increase_type == 'exponential':
self.window = min(
self.window_ceiling,
self.window * self.window_increase_factor)
def decreaseWindowSize(self):
if not self.window:
return
with self.activeContext(self.zk_context):
if self.window_decrease_type == 'linear':
self.window = max(
self.window_floor,
self.window - self.window_decrease_factor)
elif self.window_decrease_type == 'exponential':
self.window = max(
self.window_floor,
int(self.window / self.window_decrease_factor))
class Project(object):
"""A Project represents a git repository such as openstack/nova."""
# NOTE: Projects should only be instantiated via a Source object
# so that they are associated with and cached by their Connection.
# This makes a Project instance a unique identifier for a given
# project from a given source.
def __init__(self, name, source, foreign=False):
self.name = name
self.source = source
self.connection_name = source.connection.connection_name
self.canonical_hostname = source.canonical_hostname
self.canonical_name = source.canonical_hostname + '/' + name
self.private_secrets_key = None
self.public_secrets_key = None
self.private_ssh_key = None
self.public_ssh_key = None
# foreign projects are those referenced in dependencies
# of layout projects, this should matter
# when deciding whether to enqueue their changes
# TODOv3 (jeblair): re-add support for foreign projects if needed
self.foreign = foreign
def __str__(self):
return self.name
def __repr__(self):
return '<Project %s>' % (self.name)
def getSafeAttributes(self):
return Attributes(name=self.name)
def toDict(self):
d = {}
d['name'] = self.name
d['connection_name'] = self.connection_name
d['canonical_name'] = self.canonical_name
return d
class ApiRoot(ConfigObject):
def __init__(self, default_auth_realm=None):
super().__init__()
self.default_auth_realm = default_auth_realm
self.access_rules = []
def __ne__(self, other):
return not self.__eq__(other)
def __eq__(self, other):
if not isinstance(other, ApiRoot):
return False
return (self.default_auth_realm == other.default_auth_realm,
self.access_rules == other.access_rules)
def __repr__(self):
return f'<ApiRoot realm={self.default_auth_realm}>'
class Node(ConfigObject):
"""A single node for use by a job.
This may represent a request for a node, or an actual node
provided by Nodepool.
"""
def __init__(self, name, label):
super(Node, self).__init__()
self.name = name
self.label = label
self.id = None
self.lock = None
self.hold_job = None
self.comment = None
self.user_data = None
# Attributes from Nodepool
self._state = 'unknown'
self.state_time = time.time()
self.host_id = None
self.interface_ip = None
self.public_ipv4 = None
self.private_ipv4 = None
self.public_ipv6 = None
self.private_ipv6 = None
self.connection_port = 22
self.connection_type = None
self.slot = None
self._keys = []
self.az = None
self.provider = None
self.region = None
self.username = None
self.hold_expiration = None
self.resources = None
self.allocated_to = None
self.attributes = {}
self.tenant_name = None
self.requestor = None
@property
def state(self):
return self._state
@state.setter
def state(self, value):
if value not in NODE_STATES:
raise TypeError("'%s' is not a valid state" % value)
self._state = value
self.state_time = time.time()
def __repr__(self):
return '<Node %s %s:%s>' % (self.id, self.name, self.label)
def __ne__(self, other):
return not self.__eq__(other)
def __eq__(self, other):
if not isinstance(other, Node):
return False
return (self.name == other.name and
self.label == other.label and
self.id == other.id)
def toDict(self, internal_attributes=False):
d = {}
d["id"] = self.id
d['state'] = self.state
d['hold_job'] = self.hold_job
d['comment'] = self.comment
d['user_data'] = self.user_data
d['tenant_name'] = self.tenant_name
d['requestor'] = self.requestor
for k in self._keys:
d[k] = getattr(self, k)
if internal_attributes:
# These attributes are only useful for the rpc serialization
d['name'] = self.name[0]
d['aliases'] = list(self.name[1:])
d['label'] = self.label
return d
def updateFromDict(self, data):
self._state = data['state']
keys = []
for k, v in data.items():
if k in ['state', 'name', 'aliases']:
continue
keys.append(k)
setattr(self, k, v)
self._keys = keys
@classmethod
def fromDict(cls, data):
aliases = data.get('aliases', [])
node = cls([data["name"]] + aliases, data["label"])
node.updateFromDict(data)
return node
class Group(ConfigObject):
"""A logical group of nodes for use by a job.
A Group is a named set of node names that will be provided to
jobs in the inventory to describe logical units where some subset of tasks
run.
"""
def __init__(self, name, nodes):
super(Group, self).__init__()
self.name = name
self.nodes = nodes
def __repr__(self):
return '<Group %s %s>' % (self.name, str(self.nodes))
def __ne__(self, other):
return not self.__eq__(other)
def __eq__(self, other):
if not isinstance(other, Group):
return False
return (self.name == other.name and
self.nodes == other.nodes)
def toDict(self):
return {
'name': self.name,
'nodes': self.nodes
}
@classmethod
def fromDict(cls, data):
return cls(data["name"], data["nodes"])
class NodeSet(ConfigObject):
"""A set of nodes.
In configuration, NodeSets are attributes of Jobs indicating that
a Job requires nodes matching this description.
They may appear as top-level configuration objects and be named,
or they may appears anonymously in in-line job definitions.
"""
def __init__(self, name=None):
super(NodeSet, self).__init__()
self.name = name or ''
self.nodes = OrderedDict()
self.groups = OrderedDict()
self.alternatives = []
def __ne__(self, other):
return not self.__eq__(other)
def __eq__(self, other):
if not isinstance(other, NodeSet):
return False
return (self.name == other.name and
self.nodes == other.nodes and
self.groups == other.groups and
self.alternatives == other.alternatives)
def toDict(self):
d = {}
d['name'] = self.name
d['nodes'] = []
for node in self.nodes.values():
d['nodes'].append(node.toDict(internal_attributes=True))
d['groups'] = []
for group in self.groups.values():
d['groups'].append(group.toDict())
d['alternatives'] = []
for alt in self.alternatives:
if isinstance(alt, NodeSet):
d['alternatives'].append(alt.toDict())
else:
d['alternatives'].append(alt)
return d
@classmethod
def fromDict(cls, data):
nodeset = cls(data["name"])
for node in data["nodes"]:
nodeset.addNode(Node.fromDict(node))
for group in data["groups"]:
nodeset.addGroup(Group.fromDict(group))
for alt in data.get('alternatives', []):
if isinstance(alt, str):
if isinstance(alt, str):
nodeset.addAlternative(alt)
else:
nodeset.addAlternative(NodeSet.fromDict(alt))
return nodeset
def copy(self):
n = NodeSet(self.name)
for name, node in self.nodes.items():
n.addNode(Node(node.name, node.label))
for name, group in self.groups.items():
n.addGroup(Group(group.name, group.nodes[:]))
for alt in self.alternatives:
if isinstance(alt, str):
n.addAlternative(alt)
else:
n.addAlternative(alt.copy())
return n
def addNode(self, node):
for name in node.name:
if name in self.nodes:
raise Exception("Duplicate node in %s" % (self,))
self.nodes[tuple(node.name)] = node
def getNodes(self):
return list(self.nodes.values())
def addGroup(self, group):
if group.name in self.groups:
raise Exception("Duplicate group in %s" % (self,))
self.groups[group.name] = group
def getGroups(self):
return list(self.groups.values())
def addAlternative(self, alt):
self.alternatives.append(alt)
def flattenAlternatives(self, layout):
alts = []
history = []
self._flattenAlternatives(layout, self, alts, history)
return alts
def _flattenAlternatives(self, layout, nodeset,
alternatives, history):
if isinstance(nodeset, str):
# This references an existing named nodeset in the layout.
ns = layout.nodesets.get(nodeset)
if ns is None:
raise NodesetNotFoundError(nodeset)
else:
ns = nodeset
if ns in history:
raise Exception(f'Nodeset cycle detected on "{nodeset}"')
history.append(ns)
if ns.alternatives:
for alt in ns.alternatives:
self._flattenAlternatives(layout, alt, alternatives, history)
else:
alternatives.append(ns)
def validateReferences(self, layout):
self.flattenAlternatives(layout)
def __repr__(self):
if self.name:
name = self.name + ' '
else:
name = ''
return '<NodeSet %s%s>' % (name, list(self.nodes.values()))
def __len__(self):
return len(self.nodes)
class NodeRequest(object):
"""A request for a set of nodes."""
def __init__(self, requestor, build_set_uuid, tenant_name, pipeline_name,
job_uuid, job_name, labels, provider, relative_priority,
event_id=None, span_info=None):
self.requestor = requestor
self.build_set_uuid = build_set_uuid
self.tenant_name = tenant_name
self.pipeline_name = pipeline_name
self.job_uuid = job_uuid
# The requestor doesn't need the job name anymore after moving
# to job UUIDs, but we should keep it in the requestor data,
# since it can be used in Nodepool for dynamic label tags.
self.job_name = job_name
self.labels = labels
self.nodes = []
self._state = STATE_REQUESTED
self.requested_time = time.time()
self.state_time = time.time()
self.created_time = None
self.stat = None
self.relative_priority = relative_priority
self.provider = provider
self.id = None
self._zk_data = {} # Data that we read back from ZK
self.event_id = event_id
self.span_info = span_info
# Zuul internal flags (not stored in ZK so they are not
# overwritten).
self.failed = False
self.canceled = False
def reset(self):
# Reset the node request for re-submission
self._zk_data = {}
# Remove any real node information
self.nodes = []
self.id = None
self.state = STATE_REQUESTED
self.stat = None
self.failed = False
self.canceled = False
@property
def fulfilled(self):
return (self._state == STATE_FULFILLED) and not self.failed
@property
def state(self):
return self._state
@state.setter
def state(self, value):
if value not in REQUEST_STATES:
raise TypeError("'%s' is not a valid state" % value)
self._state = value
self.state_time = time.time()
def __repr__(self):
return '<NodeRequest %s %s>' % (self.id, self.labels)
def toDict(self):
"""
Serialize a NodeRequest so it can be stored in ZooKeeper.
Any additional information must be stored in the requestor_data field,
so Nodepool doesn't strip the information when it fulfills the request.
"""
# Start with any previously read data
d = self._zk_data.copy()
# The requestor_data is opaque to nodepool and won't be touched by
# nodepool when it fulfills the request.
d["requestor_data"] = {
"build_set_uuid": self.build_set_uuid,
"tenant_name": self.tenant_name,
"pipeline_name": self.pipeline_name,
"job_uuid": self.job_uuid,
"job_name": self.job_name,
"span_info": self.span_info,
}
d.setdefault('node_types', self.labels)
d.setdefault('requestor', self.requestor)
d.setdefault('created_time', self.created_time)
d.setdefault('provider', self.provider)
# We might change these
d['state'] = self.state
d['state_time'] = self.state_time
d['relative_priority'] = self.relative_priority
d['event_id'] = self.event_id
d['tenant_name'] = self.tenant_name
return d
def updateFromDict(self, data):
self._zk_data = data
self._state = data['state']
self.state_time = data['state_time']
self.relative_priority = data.get('relative_priority', 0)
self.event_id = data['event_id']
# Make sure we don't update tenant_name to 'None'.
# This can happen if nodepool does not report one back and leads
# to errors at other places where we rely on that info.
if 'tenant_name' in data:
self.tenant_name = data['tenant_name']
self.nodes = data.get('nodes', [])
self.created_time = data.get('created_time')
@classmethod
def fromDict(cls, data):
"""Deserialize a NodeRequest from the data in ZooKeeper.
Any additional information must be stored in the requestor_data field,
so Nodepool doesn't strip the information when it fulfills the request.
"""
# The requestor_data contains zuul-specific information which is opaque
# to nodepool and returned as-is when the NodeRequest is fulfilled.
requestor_data = data["requestor_data"]
if requestor_data is None:
requestor_data = {}
request = cls(
requestor=data["requestor"],
build_set_uuid=requestor_data.get("build_set_uuid"),
tenant_name=requestor_data.get("tenant_name"),
pipeline_name=requestor_data.get("pipeline_name"),
job_uuid=requestor_data.get("job_uuid"),
job_name=requestor_data.get("job_name"),
labels=data["node_types"],
provider=data["provider"],
relative_priority=data.get("relative_priority", 0),
span_info=requestor_data.get("span_info"),
)
request.updateFromDict(data)
return request
class Secret(ConfigObject):
"""A collection of private data.
In configuration, Secrets are collections of private data in
key-value pair format. They are defined as top-level
configuration objects and then referenced by Jobs.
"""
def __init__(self, name, source_context):
super(Secret, self).__init__()
self.name = name
self.source_context = source_context
# The secret data may or may not be encrypted. This attribute
# is named 'secret_data' to make it easy to search for and
# spot where it is directly used.
self.secret_data = {}
def __ne__(self, other):
return not self.__eq__(other)
def __eq__(self, other):
if not isinstance(other, Secret):
return False
return (self.name == other.name and
self.source_context == other.source_context and
self.secret_data == other.secret_data)
def areDataEqual(self, other):
return (self.secret_data == other.secret_data)
def __repr__(self):
return '<Secret %s>' % (self.name,)
def _decrypt(self, private_key, secret_data):
# recursive function to decrypt data
if hasattr(secret_data, 'decrypt'):
return secret_data.decrypt(private_key)
if isinstance(secret_data, (dict, types.MappingProxyType)):
decrypted_secret_data = {}
for k, v in secret_data.items():
decrypted_secret_data[k] = self._decrypt(private_key, v)
return decrypted_secret_data
if isinstance(secret_data, (list, tuple)):
decrypted_secret_data = []
for v in secret_data:
decrypted_secret_data.append(self._decrypt(private_key, v))
return decrypted_secret_data
return secret_data
def decrypt(self, private_key):
"""Return a copy of this secret with any encrypted data decrypted.
Note that the original remains encrypted."""
r = Secret(self.name, self.source_context)
r.secret_data = self._decrypt(private_key, self.secret_data)
return r
def serialize(self):
return yaml.encrypted_dump(self.secret_data, default_flow_style=False)
class SecretUse(ConfigObject):
"""A use of a secret in a Job"""
def __init__(self, name, alias):
super(SecretUse, self).__init__()
self.name = name
self.alias = alias
self.pass_to_parent = False
class FrozenSecret(ConfigObject):
"""A frozen secret for use by the executor"""
def __init__(self, connection_name, project_name, name, encrypted_data):
super(FrozenSecret, self).__init__()
self.connection_name = connection_name
self.project_name = project_name
self.name = name
self.encrypted_data = encrypted_data
@staticmethod
@lru_cache(maxsize=1024)
def construct_cached(connection_name, project_name, name, encrypted_data):
"""
A caching constructor that enables re-use already existing
FrozenSecret objects.
"""
return FrozenSecret(connection_name, project_name, name,
encrypted_data)
def toDict(self):
# Name is omitted since this is used in a dictionary
return dict(
connection_name=self.connection_name,
project_name=self.project_name,
encrypted_data=self.encrypted_data,
)
class SourceContext(ConfigObject):
"""A reference to the branch of a project in configuration.
Jobs and playbooks reference this to keep track of where they
originate."""
def __init__(self, project_canonical_name, project_name,
project_connection_name, branch, path, trusted,
implied_branch_matchers=None):
super(SourceContext, self).__init__()
self.project_canonical_name = project_canonical_name
self.project_name = project_name
self.project_connection_name = project_connection_name
self.branch = branch
self.path = path
self.trusted = trusted
self.implied_branch_matchers = implied_branch_matchers
self.implied_branches = None
def __str__(self):
return '%s/%s@%s' % (
self.project_name, self.path, self.branch)
def __repr__(self):
return '<SourceContext %s trusted:%s>' % (str(self),
self.trusted)
def __deepcopy__(self, memo):
return self.copy()
def copy(self):
return self.__class__(
self.project_canonical_name, self.project_name,
self.project_connection_name, self.branch, self.path, self.trusted,
self.implied_branch_matchers)
def isSameProject(self, other):
if not isinstance(other, SourceContext):
return False
return (self.project_canonical_name == other.project_canonical_name and
self.trusted == other.trusted)
def __ne__(self, other):
return not self.__eq__(other)
def __eq__(self, other):
if not isinstance(other, SourceContext):
return False
return (self.project_canonical_name == other.project_canonical_name and
self.branch == other.branch and
self.path == other.path and
self.trusted == other.trusted)
def serialize(self):
ibs = None
if self.implied_branches:
ibs = [ibm.serialize() for ibm in self.implied_branches]
return {
"project_canonical_name": self.project_canonical_name,
"project_name": self.project_name,
"project_connection_name": self.project_connection_name,
"branch": self.branch,
"path": self.path,
"trusted": self.trusted,
"implied_branch_matchers": self.implied_branch_matchers,
"implied_branches": ibs,
}
@classmethod
def deserialize(cls, data):
o = cls.__new__(cls)
ibs = data.get('implied_branches')
if ibs:
data['implied_branches'] = []
for matcher_data in ibs:
if matcher_data['implied']:
cls = change_matcher.ImpliedBranchMatcher
else:
cls = change_matcher.BranchMatcher
data['implied_branches'].append(
cls.deserialize(matcher_data))
o.__dict__.update(data)
return o
def toDict(self):
return dict(
project=self.project_name,
branch=self.branch,
path=self.path,
)
class PlaybookContext(ConfigObject):
"""A reference to a playbook in the context of a project.
Jobs refer to objects of this class for their main, pre, and post
playbooks so that we can keep track of which repos and security
contexts are needed in order to run them.
We also keep a list of roles so that playbooks only run with the
roles which were defined at the point the playbook was defined.
"""
def __init__(self, source_context, path, roles, secrets,
semaphores):
super(PlaybookContext, self).__init__()
self.source_context = source_context
self.path = path
self.roles = roles
# The original SecretUse objects describing how the secret
# should be used
self.secrets = secrets
# FrozenSecret objects which contain only the info the
# executor needs
self.frozen_secrets = ()
# The original JobSemaphore objects
self.semaphores = semaphores
# the result of getSemaphoreInfo from semaphore handler
self.frozen_semaphores = ()
def __repr__(self):
return '<PlaybookContext %s %s>' % (self.source_context,
self.path)
def __ne__(self, other):
return not self.__eq__(other)
def __eq__(self, other):
if not isinstance(other, PlaybookContext):
return False
return (self.source_context == other.source_context and
self.path == other.path and
self.roles == other.roles and
self.secrets == other.secrets and
self.semaphores == other.semaphores)
def copy(self):
r = PlaybookContext(self.source_context,
self.path,
self.roles,
self.secrets,
self.semaphores)
return r
def validateReferences(self, layout):
# Verify that references to other objects in the layout are
# valid.
for secret_use in self.secrets:
secret = layout.secrets.get(secret_use.name)
if secret is None:
raise Exception(
'The secret "{name}" was not found.'.format(
name=secret_use.name))
check_varnames({secret_use.alias: ''})
if not secret.source_context.isSameProject(self.source_context):
raise Exception(
"Unable to use secret {name}. Secrets must be "
"defined in the same project in which they "
"are used".format(
name=secret_use.name))
project = layout.tenant.getProject(
self.source_context.project_canonical_name)[1]
# Decrypt a copy of the secret to verify it can be done
secret.decrypt(project.private_secrets_key)
# TODO: if we remove the implicit max=1 semaphore, validate
# references here.
def freezeSemaphores(self, layout, semaphore_handler):
semaphores = []
for job_semaphore in self.semaphores:
info = semaphore_handler.getSemaphoreInfo(job_semaphore)
semaphores.append(info)
self.frozen_semaphores = tuple(semaphores)
def freezeSecrets(self, layout):
secrets = []
for secret_use in self.secrets:
secret = layout.secrets.get(secret_use.name)
secret_name = secret_use.alias
encrypted_secret_data = secret.serialize()
# Use *our* project, not the secret's, because we want to decrypt
# with *our* key.
project = layout.tenant.getProject(
self.source_context.project_canonical_name)[1]
secrets.append(FrozenSecret.construct_cached(
project.connection_name, project.name, secret_name,
encrypted_secret_data))
self.frozen_secrets = tuple(secrets)
def addSecrets(self, frozen_secrets):
current_names = set([s.name for s in self.frozen_secrets])
new_secrets = [s for s in frozen_secrets
if s.name not in current_names]
self.frozen_secrets = self.frozen_secrets + tuple(new_secrets)
def toDict(self, redact_secrets=True):
# Render to a dict to use in passing json to the executor
secrets = {}
for secret in self.frozen_secrets:
if redact_secrets:
secrets[secret.name] = 'REDACTED'
else:
secrets[secret.name] = secret.toDict()
return dict(
connection=self.source_context.project_connection_name,
project=self.source_context.project_name,
branch=self.source_context.branch,
trusted=self.source_context.trusted,
roles=[r.toDict() for r in self.roles],
secrets=secrets,
semaphores=self.frozen_semaphores,
path=self.path)
def toSchemaDict(self):
# Render to a dict to use in REST api
d = {
'path': self.path,
'roles': list(map(lambda x: x.toDict(), self.roles)),
'secrets': [{'name': secret.name, 'alias': secret.alias}
for secret in self.secrets],
'semaphores': [{'name': sem.name} for sem in self.semaphores],
}
if self.source_context:
d['source_context'] = self.source_context.toDict()
else:
d['source_context'] = None
return d
class Role(ConfigObject, metaclass=abc.ABCMeta):
"""A reference to an ansible role."""
def __init__(self, target_name):
super(Role, self).__init__()
self.target_name = target_name
@abc.abstractmethod
def __repr__(self):
pass
def __ne__(self, other):
return not self.__eq__(other)
@abc.abstractmethod
def __eq__(self, other):
if not isinstance(other, Role):
return False
return (self.target_name == other.target_name)
@abc.abstractmethod
def toDict(self):
# Render to a dict to use in passing json to the executor
return dict(target_name=self.target_name)
class ZuulRole(Role):
"""A reference to an ansible role in a Zuul project."""
def __init__(self, target_name, project_canonical_name, implicit=False):
super(ZuulRole, self).__init__(target_name)
self.project_canonical_name = project_canonical_name
self.implicit = implicit
def __repr__(self):
return '<ZuulRole %s %s>' % (self.project_canonical_name,
self.target_name)
def __hash__(self):
return hash(json.dumps(self.toDict(), sort_keys=True))
def __eq__(self, other):
if not isinstance(other, ZuulRole):
return False
# Implicit is not consulted for equality so that we can handle
# implicit to explicit conversions.
return (super(ZuulRole, self).__eq__(other) and
self.project_canonical_name == other.project_canonical_name)
def toDict(self):
# Render to a dict to use in passing json to the executor
d = super(ZuulRole, self).toDict()
d['type'] = 'zuul'
d['project_canonical_name'] = self.project_canonical_name
d['implicit'] = self.implicit
return d
@classmethod
def fromDict(cls, data):
self = cls(data['target_name'],
data['project_canonical_name'],
data['implicit'])
return self
class JobData(zkobject.ShardedZKObject):
"""Data or variables for a job.
These can be arbitrarily large, so they are stored as sharded ZK objects.
A hash attribute can be stored on the job object itself to detect
whether the data need to be refreshed.
"""
# We can always recreate data if necessary, so go ahead and
# truncate when we update so we avoid corrupted data.
truncate_on_create = True
def __repr__(self):
return '<JobData>'
def getPath(self):
return self._path
@classmethod
def new(klass, context, create=True, **kw):
"""Create a new instance and save it in ZooKeeper"""
obj = klass()
kw['hash'] = JobData.getHash(kw['data'])
obj._set(**kw)
if create:
data = obj._trySerialize(context)
obj._save(context, data, create=True)
return obj
@staticmethod
def getHash(data):
hasher = hashlib.sha256()
# Use json_dumps to strip any ZuulMark entries
hasher.update(json_dumps(data, sort_keys=True).encode('utf8'))
return hasher.hexdigest()
def serialize(self, context):
data = {
"data": self.data,
"hash": self.hash,
"_path": self._path,
}
return json_dumps(data, sort_keys=True).encode("utf8")
def __hash__(self):
return hash(self.hash)
def __eq__(self, other):
if not isinstance(other, JobData):
return False
return self.hash == other.hash
class FrozenJob(zkobject.ZKObject):
"""A rendered job definition that will actually be run.
This is the combination of one or more Job variants to produce a
rendered job definition that can be serialized and run by the
executor.
Most variables should not be updated once created, except some
variables which deal with the current state of the job in the
pipeline.
"""
# If data/variables are more than 10k, we offload them to another
# object, otherwise we store them on this one.
MAX_DATA_LEN = 10 * 1024
attributes = ('ansible_version',
'ansible_split_streams',
'dependencies',
'inheritance_path',
'name',
'nodeset_alternatives',
'nodeset_index',
'override_branch',
'override_checkout',
'post_timeout',
'required_projects',
'semaphores',
'tags',
'timeout',
'voting',
'queued',
'hold_following_changes',
'waiting_status',
'pre_run',
'run',
'post_run',
'cleanup_run',
'attempts',
'success_message',
'failure_message',
'provides',
'requires',
'workspace_scheme',
'config_hash',
'deduplicate',
'failure_output',
)
job_data_attributes = ('artifact_data',
'extra_variables',
'group_variables',
'host_variables',
'secret_parent_data',
'variables',
'parent_data',
'secrets',
'affected_projects',
)
def __init__(self):
super().__init__()
self._set(ref=None,
other_refs=[])
def __repr__(self):
name = getattr(self, 'name', '<UNKNOWN>')
return f'<FrozenJob {name}>'
def isEqual(self, other):
# Compare two frozen jobs to determine whether they are
# effectively equal. The inheritance path will always be
# different, so it is ignored. But if otherwise they have the
# same attributes, they will probably produce the same
# results.
if not isinstance(other, FrozenJob):
return False
if self.name != other.name:
return False
for k in self.attributes:
if k in ['inheritance_path', 'waiting_status', 'queued']:
continue
if getattr(self, k) != getattr(other, k):
return False
for k in self.job_data_attributes:
if getattr(self, k) != getattr(other, k):
return False
return True
@classmethod
def new(klass, context, **kw):
raise NotImplementedError()
@classmethod
def createInMemory(klass, **kw):
obj = klass()
obj._set(uuid=uuid4().hex)
for k in klass.job_data_attributes:
v = kw.pop(k, None)
kw['_' + k] = v
obj._set(**kw)
return obj
def internalCreate(self, context):
# Convert these to JobData after creation.
job_data_vars = []
for k in self.job_data_attributes:
v = getattr(self, '_' + k)
if v:
# If the value is long, we need to make this a
# JobData; otherwise we can use the value as-is.
# TODO(jeblair): if we apply the same createInMemory
# approach to JobData creation, we can avoid this
# serialization test as well as rewriting the
# frozenjob object below.
v = self._makeJobData(context, k, v, create=False)
self._set(**{'_' + k: v})
if isinstance(v, JobData):
job_data_vars.append(v)
super().internalCreate(context)
# If we need to make any JobData entries, do that now.
for v in job_data_vars:
v.internalCreate(context)
def isBase(self):
return self.parent is None
@classmethod
def jobPath(cls, job_id, parent_path):
return f"{parent_path}/job/{job_id}"
def getPath(self):
return self.jobPath(self.uuid, self.buildset.getPath())
@property
def all_refs(self):
return [self.ref, *self.other_refs]
def serialize(self, context):
# Ensure that any special handling in this method is matched
# in Job.freezeJob so that FrozenJobs are identical regardless
# of whether they have been deserialized.
data = {
"uuid": self.uuid,
}
for k in self.attributes:
# TODO: Backwards compat handling, remove after 5.0
if k == 'config_hash':
if not hasattr(self, k):
continue
v = getattr(self, k)
if k == 'nodeset_alternatives':
v = [alt.toDict() for alt in v]
elif k == 'dependencies':
# frozenset of JobDependency
v = [dep.toDict() for dep in v]
elif k == 'semaphores':
# list of JobSemaphores
v = [sem.toDict() for sem in v]
elif k in ('provides', 'requires', 'tags'):
v = list(v)
elif k == 'required_projects':
# dict of name->JobProject
v = {project_name: job_project.toDict()
for (project_name, job_project) in v.items()}
data[k] = v
for k in self.job_data_attributes:
v = getattr(self, '_' + k)
if isinstance(v, JobData):
v = {'storage': 'offload', 'path': v.getPath(), 'hash': v.hash}
else:
v = {'storage': 'local', 'data': v}
data[k] = v
data['ref'] = self.ref
data['other_refs'] = self.other_refs
# Use json_dumps to strip any ZuulMark entries
return json_dumps(data, sort_keys=True).encode("utf8")
def deserialize(self, raw, context):
# Ensure that any special handling in this method is matched
# in Job.freezeJob so that FrozenJobs are identical regardless
# of whether they have been deserialized.
data = super().deserialize(raw, context)
if hasattr(self, 'nodeset_alternatives'):
alts = self.nodeset_alternatives
else:
alts = data.get('nodeset_alternatives', [])
alts = [NodeSet.fromDict(alt) for alt in alts]
data['nodeset_alternatives'] = alts
if hasattr(self, 'dependencies'):
data['dependencies'] = self.dependencies
else:
data['dependencies'] = frozenset(JobDependency.fromDict(dep)
for dep in data['dependencies'])
if hasattr(self, 'semaphores'):
data['semaphores'] = self.semaphores
else:
data['semaphores'] = [JobSemaphore.fromDict(sem)
for sem in data['semaphores']]
if hasattr(self, 'required_projects'):
data['required_projects'] = self.required_projects
else:
data['required_projects'] = {
project_name: JobProject.fromDict(job_project)
for (project_name, job_project)
in data['required_projects'].items()}
data['provides'] = frozenset(data['provides'])
data['requires'] = frozenset(data['requires'])
data['tags'] = frozenset(data['tags'])
for job_data_key in self.job_data_attributes:
job_data = data.pop(job_data_key, None)
if job_data:
# This is a dict which tells us where the actual data is.
if job_data['storage'] == 'local':
# The data are stored locally in this dict
data['_' + job_data_key] = job_data['data']
elif job_data['storage'] == 'offload':
existing_job_data = getattr(self, f"_{job_data_key}", None)
if (getattr(existing_job_data, 'hash', None) ==
job_data['hash']
and job_data['hash'] is not None):
# Re-use the existing object since it's the same
data['_' + job_data_key] = existing_job_data
else:
if job_data['hash'] is None:
context.log.error("JobData hash is None on %s",
self)
# Load the object from ZK
data['_' + job_data_key] = JobData.fromZK(
context, job_data['path'])
else:
data['_' + job_data_key] = None
return data
def _save(self, context, *args, **kw):
# Before saving, update the buildset with the new job version
# so that future readers know to refresh it.
self.buildset.updateJobVersion(context, self)
return super()._save(context, *args, **kw)
def setWaitingStatus(self, status):
if self.waiting_status == status:
return
self.updateAttributes(
self.buildset.item.pipeline.manager.current_context,
waiting_status=status)
def _getJobData(self, name):
val = getattr(self, name, None)
if isinstance(val, JobData):
return val.data
return val
@property
def nodeset(self):
if self.nodeset_alternatives:
return self.nodeset_alternatives[self.nodeset_index]
return None
@property
def parent_data(self):
return self._getJobData('_parent_data')
@property
def secret_parent_data(self):
return self._getJobData('_secret_parent_data')
@property
def artifact_data(self):
return self._getJobData('_artifact_data')
@property
def extra_variables(self):
return self._getJobData('_extra_variables')
@property
def group_variables(self):
return self._getJobData('_group_variables')
@property
def host_variables(self):
return self._getJobData('_host_variables')
@property
def variables(self):
return self._getJobData('_variables')
@property
def secrets(self):
return self._getJobData('_secrets')
@property
def affected_projects(self):
return self._getJobData('_affected_projects')
def getSafeAttributes(self):
return Attributes(name=self.name)
@staticmethod
def updateParentData(parent_data, secret_parent_data, artifact_data,
other_build):
# Update variables, but give the new values priority. If more than one
# parent job returns the same variable, the value from the later job
# in the job graph will take precedence.
other_vars = other_build.result_data
v = parent_data
v = Job._deepUpdate(v, other_vars)
# To avoid running afoul of checks that jobs don't set zuul
# variables, remove them from parent data here.
v.pop('zuul', None)
# For safety, also drop nodepool and unsafe_vars
v.pop('nodepool', None)
v.pop('unsafe_vars', None)
parent_data = v
secret_other_vars = other_build.secret_result_data
v = secret_parent_data
v = Job._deepUpdate(secret_other_vars, v)
if 'zuul' in v:
del v['zuul']
secret_parent_data = v
artifacts = get_artifacts_from_result_data(other_vars)
artifact_data = artifact_data[:]
for a in artifacts:
# Change here may be any ref type (tag, change, etc)
ref = other_build.build_set.item.getChangeForJob(other_build.job)
a.update({'project': ref.project.name,
'job': other_build.job.name})
# Change is a Branch
if hasattr(ref, 'branch'):
a.update({'branch': ref.branch})
if hasattr(ref, 'number') and hasattr(ref, 'patchset'):
a.update({'change': str(ref.number),
'patchset': ref.patchset})
# Otherwise we are ref type
else:
a.update({'ref': ref.ref,
'oldrev': ref.oldrev,
'newrev': ref