jenkins-job-builder/jenkins_jobs/builder.py
Kei YAMAZAKI 604d39305b Fix multibyte character problem
Fixed problem that causes a UnicodeEncodeError when use multi-byte characters in the job.yml.

Change-Id: Ie715c827a794e73fae11cdca079ea80cfb8c280d
2014-01-21 22:34:30 +09:00

551 lines
21 KiB
Python

#!/usr/bin/env python
# Copyright (C) 2012 OpenStack, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# Manage jobs in Jenkins server
import os
import hashlib
import yaml
import json
import xml.etree.ElementTree as XML
from xml.dom import minidom
import jenkins
import re
import pkg_resources
import logging
import copy
import itertools
import fnmatch
from jenkins_jobs.errors import JenkinsJobsException
logger = logging.getLogger(__name__)
MAGIC_MANAGE_STRING = "<!-- Managed by Jenkins Job Builder -->"
def deep_format(obj, paramdict):
"""Apply the paramdict via str.format() to all string objects found within
the supplied obj. Lists and dicts are traversed recursively."""
# YAML serialisation was originally used to achieve this, but that places
# limitations on the values in paramdict - the post-format result must
# still be valid YAML (so substituting-in a string containing quotes, for
# example, is problematic).
if isinstance(obj, str):
try:
ret = obj.format(**paramdict)
except KeyError as exc:
missing_key = exc.message
desc = "%s parameter missing to format %s\nGiven: %s" % (
missing_key, obj, paramdict)
raise JenkinsJobsException(desc)
elif isinstance(obj, list):
ret = []
for item in obj:
ret.append(deep_format(item, paramdict))
elif isinstance(obj, dict):
ret = {}
for item in obj:
ret[item.format(**paramdict)] = deep_format(obj[item], paramdict)
else:
ret = obj
return ret
def matches(what, where):
"""
Checks if the given string matches against the given list of glob patterns
:arg str what: String that we want to test if matches
:arg list where: list of glob patters to match
"""
for pattern in where:
if re.match(fnmatch.translate(pattern), what):
return True
return False
class YamlParser(object):
def __init__(self, config=None):
self.registry = ModuleRegistry(config)
self.data = {}
self.jobs = []
def parse(self, fn):
data = yaml.load(open(fn))
if data:
for item in data:
cls, dfn = item.items()[0]
group = self.data.get(cls, {})
if len(item.items()) > 1:
n = None
for k, v in item.items():
if k == "name":
n = v
break
# Syntax error
raise JenkinsJobsException("Syntax error, for item "
"named '{0}'. Missing indent?"
.format(n))
name = dfn['name']
group[name] = dfn
self.data[cls] = group
def getJob(self, name):
job = self.data.get('job', {}).get(name, None)
if not job:
return job
return self.applyDefaults(job)
def getJobGroup(self, name):
return self.data.get('job-group', {}).get(name, None)
def getJobTemplate(self, name):
job = self.data.get('job-template', {}).get(name, None)
if not job:
return job
return self.applyDefaults(job)
def applyDefaults(self, data):
whichdefaults = data.get('defaults', 'global')
defaults = self.data.get('defaults', {}).get(whichdefaults, {})
newdata = {}
newdata.update(defaults)
newdata.update(data)
return newdata
def generateXML(self, jobs_filter=None):
changed = True
while changed:
changed = False
for module in self.registry.modules:
if hasattr(module, 'handle_data'):
if module.handle_data(self):
changed = True
for job in self.data.get('job', {}).values():
if jobs_filter and not matches(job['name'], jobs_filter):
logger.debug("Ignoring job {0}".format(job['name']))
continue
logger.debug("XMLifying job '{0}'".format(job['name']))
job = self.applyDefaults(job)
self.getXMLForJob(job)
for project in self.data.get('project', {}).values():
logger.debug("XMLifying project '{0}'".format(project['name']))
for jobspec in project.get('jobs', []):
if isinstance(jobspec, dict):
# Singleton dict containing dict of job-specific params
jobname, jobparams = jobspec.items()[0]
if not isinstance(jobparams, dict):
jobparams = {}
else:
jobname = jobspec
jobparams = {}
job = self.getJob(jobname)
if job:
# Just naming an existing defined job
continue
# see if it's a job group
group = self.getJobGroup(jobname)
if group:
for group_jobspec in group['jobs']:
if isinstance(group_jobspec, dict):
group_jobname, group_jobparams = \
group_jobspec.items()[0]
if not isinstance(group_jobparams, dict):
group_jobparams = {}
else:
group_jobname = group_jobspec
group_jobparams = {}
job = self.getJob(group_jobname)
if job:
continue
template = self.getJobTemplate(group_jobname)
# Allow a group to override parameters set by a project
d = {}
d.update(project)
d.update(jobparams)
d.update(group)
d.update(group_jobparams)
# Except name, since the group's name is not useful
d['name'] = project['name']
if template:
self.getXMLForTemplateJob(d, template, jobs_filter)
continue
# see if it's a template
template = self.getJobTemplate(jobname)
if template:
d = {}
d.update(project)
d.update(jobparams)
self.getXMLForTemplateJob(d, template, jobs_filter)
else:
raise JenkinsJobsException("Failed to find suitable "
"template named '{0}'"
.format(jobname))
def getXMLForTemplateJob(self, project, template, jobs_filter=None):
dimensions = []
for (k, v) in project.items():
if type(v) == list and k not in ['jobs']:
dimensions.append(zip([k] * len(v), v))
# XXX somewhat hackish to ensure we actually have a single
# pass through the loop
if len(dimensions) == 0:
dimensions = [(("", ""),)]
checksums = set([])
for values in itertools.product(*dimensions):
params = copy.deepcopy(project)
params.update(values)
expanded = deep_format(template, params)
# Keep track of the resulting expansions to avoid
# regenerating the exact same job. Whenever a project has
# different values for a parameter and that parameter is not
# used in the template, we ended up regenerating the exact
# same job.
# To achieve that we serialize the expanded template making
# sure the dict keys are always in the same order. Then we
# record the checksum in an unordered unique set which let
# us guarantee a group of parameters will not be added a
# second time.
uniq = json.dumps(expanded, sort_keys=True)
checksum = hashlib.md5(uniq).hexdigest()
# Lookup the checksum
if checksum not in checksums:
# We also want to skip XML generation whenever the user did
# not ask for that job.
job_name = expanded.get('name')
if jobs_filter and not matches(job_name, jobs_filter):
continue
logger.debug("Generating XML for template job {0}"
" (params {1})".format(
template['name'], params))
self.getXMLForJob(expanded)
checksums.add(checksum)
def getXMLForJob(self, data):
kind = data.get('project-type', 'freestyle')
data["description"] = data.get("description", "") + \
self.get_managed_string()
for ep in pkg_resources.iter_entry_points(
group='jenkins_jobs.projects', name=kind):
Mod = ep.load()
mod = Mod(self.registry)
xml = mod.root_xml(data)
self.gen_xml(xml, data)
job = XmlJob(xml, data['name'])
self.jobs.append(job)
break
def gen_xml(self, xml, data):
for module in self.registry.modules:
if hasattr(module, 'gen_xml'):
module.gen_xml(self, xml, data)
def get_managed_string(self):
# The \n\n is not hard coded, because they get stripped if the
# project does not otherwise have a description.
return "\n\n" + MAGIC_MANAGE_STRING
class ModuleRegistry(object):
entry_points_cache = {}
def __init__(self, config):
self.modules = []
self.modules_by_component_type = {}
self.handlers = {}
self.global_config = config
for entrypoint in pkg_resources.iter_entry_points(
group='jenkins_jobs.modules'):
Mod = entrypoint.load()
mod = Mod(self)
self.modules.append(mod)
self.modules.sort(lambda a, b: cmp(a.sequence, b.sequence))
if mod.component_type is not None:
self.modules_by_component_type[mod.component_type] = mod
def registerHandler(self, category, name, method):
cat_dict = self.handlers.get(category, {})
if not cat_dict:
self.handlers[category] = cat_dict
cat_dict[name] = method
def getHandler(self, category, name):
return self.handlers[category][name]
def dispatch(self, component_type,
parser, xml_parent,
component, template_data={}):
"""This is a method that you can call from your implementation of
Base.gen_xml or component. It allows modules to define a type
of component, and benefit from extensibility via Python
entry points and Jenkins Job Builder :ref:`Macros <macro>`.
:arg string component_type: the name of the component
(e.g., `builder`)
:arg YAMLParser parser: the global YMAL Parser
:arg Element xml_parent: the parent XML element
:arg dict template_data: values that should be interpolated into
the component definition
See :py:class:`jenkins_jobs.modules.base.Base` for how to register
components of a module.
See the Publishers module for a simple example of how to use
this method.
"""
if component_type not in self.modules_by_component_type:
raise JenkinsJobsException("Unknown component type: "
"'{0}'.".format(component_type))
component_list_type = self.modules_by_component_type[component_type] \
.component_list_type
if isinstance(component, dict):
# The component is a sigleton dictionary of name: dict(args)
name, component_data = component.items()[0]
if template_data:
# Template data contains values that should be interpolated
# into the component definition
s = yaml.dump(component_data, default_flow_style=False)
s = s.format(**template_data)
component_data = yaml.load(s)
else:
# The component is a simple string name, eg "run-tests"
name = component
component_data = {}
# Look for a component function defined in an entry point
cache_key = '%s:%s' % (component_list_type, name)
eps = ModuleRegistry.entry_points_cache.get(cache_key)
if eps is None:
eps = list(pkg_resources.iter_entry_points(
group='jenkins_jobs.{0}'.format(component_list_type),
name=name))
if len(eps) > 1:
raise JenkinsJobsException(
"Duplicate entry point found for component type: '{0}',"
"name: '{1}'".format(component_type, name))
elif len(eps) == 1:
ModuleRegistry.entry_points_cache[cache_key] = eps
logger.debug("Cached entry point %s = %s", cache_key,
ModuleRegistry.entry_points_cache[cache_key])
if len(eps) == 1:
func = eps[0].load()
func(parser, xml_parent, component_data)
else:
# Otherwise, see if it's defined as a macro
component = parser.data.get(component_type, {}).get(name)
if component:
for b in component[component_list_type]:
# Pass component_data in as template data to this function
# so that if the macro is invoked with arguments,
# the arguments are interpolated into the real defn.
self.dispatch(component_type,
parser, xml_parent, b, component_data)
else:
raise JenkinsJobsException("Unknown entry point or macro '{0}'"
" for component type: '{1}'.".
format(name, component_type))
class XmlJob(object):
def __init__(self, xml, name):
self.xml = xml
self.name = name
def md5(self):
return hashlib.md5(self.output()).hexdigest()
# Pretty printing ideas from
# http://stackoverflow.com/questions/749796/pretty-printing-xml-in-python
pretty_text_re = re.compile('>\n\s+([^<>\s].*?)\n\s+</', re.DOTALL)
def output(self):
out = minidom.parseString(XML.tostring(self.xml))
out = out.toprettyxml(indent=' ', encoding='utf-8')
return self.pretty_text_re.sub('>\g<1></', out)
class CacheStorage(object):
def __init__(self, jenkins_url, flush=False):
cache_dir = self.get_cache_dir()
# One cache per remote Jenkins URL:
host_vary = re.sub('[^A-Za-z0-9\-\~]', '_', jenkins_url)
self.cachefilename = os.path.join(
cache_dir, 'cache-host-jobs-' + host_vary + '.yml')
if flush or not os.path.isfile(self.cachefilename):
self.data = {}
return
with file(self.cachefilename, 'r') as yfile:
self.data = yaml.load(yfile)
logger.debug("Using cache: '{0}'".format(self.cachefilename))
@staticmethod
def get_cache_dir():
home = os.path.expanduser('~')
if home == '~':
raise OSError('Could not locate home folder')
xdg_cache_home = os.environ.get('XDG_CACHE_HOME') or \
os.path.join(home, '.cache')
path = os.path.join(xdg_cache_home, 'jenkins_jobs')
if not os.path.isdir(path):
os.makedirs(path)
return path
def set(self, job, md5):
self.data[job] = md5
yfile = file(self.cachefilename, 'w')
yaml.dump(self.data, yfile)
yfile.close()
def is_cached(self, job):
if job in self.data:
return True
return False
def has_changed(self, job, md5):
if job in self.data and self.data[job] == md5:
return False
return True
class Jenkins(object):
def __init__(self, url, user, password):
self.jenkins = jenkins.Jenkins(url, user, password)
def update_job(self, job_name, xml):
if self.is_job(job_name):
logger.info("Reconfiguring jenkins job {0}".format(job_name))
self.jenkins.reconfig_job(job_name, xml)
else:
logger.info("Creating jenkins job {0}".format(job_name))
self.jenkins.create_job(job_name, xml)
def is_job(self, job_name):
return self.jenkins.job_exists(job_name)
def get_job_md5(self, job_name):
xml = self.jenkins.get_job_config(job_name)
return hashlib.md5(xml).hexdigest()
def delete_job(self, job_name):
if self.is_job(job_name):
logger.info("Deleting jenkins job {0}".format(job_name))
self.jenkins.delete_job(job_name)
def get_jobs(self):
return self.jenkins.get_jobs()
def is_managed(self, job_name):
xml = self.jenkins.get_job_config(job_name)
try:
out = XML.fromstring(xml)
description = out.find(".//description").text
return description.endswith(MAGIC_MANAGE_STRING)
except (TypeError, AttributeError):
pass
return False
class Builder(object):
def __init__(self, jenkins_url, jenkins_user, jenkins_password,
config=None, ignore_cache=False, flush_cache=False):
self.jenkins = Jenkins(jenkins_url, jenkins_user, jenkins_password)
self.cache = CacheStorage(jenkins_url, flush=flush_cache)
self.global_config = config
self.ignore_cache = ignore_cache
def load_files(self, fn):
if os.path.isdir(fn):
files_to_process = [os.path.join(fn, f)
for f in os.listdir(fn)
if (f.endswith('.yml') or f.endswith('.yaml'))]
else:
files_to_process = [fn]
self.parser = YamlParser(self.global_config)
for in_file in files_to_process:
logger.debug("Parsing YAML file {0}".format(in_file))
self.parser.parse(in_file)
def delete_old_managed(self, keep):
jobs = self.jenkins.get_jobs()
for job in jobs:
if job['name'] not in keep and \
self.jenkins.is_managed(job['name']):
logger.info("Removing obsolete jenkins job {0}"
.format(job['name']))
self.delete_job(job['name'])
else:
logger.debug("Ignoring unmanaged jenkins job %s",
job['name'])
def delete_job(self, glob_name, fn=None):
if fn:
self.load_files(fn)
self.parser.generateXML(glob_name)
jobs = [j.name
for j in self.parser.jobs
if matches(j.name, [glob_name])]
else:
jobs = [glob_name]
for job in jobs:
self.jenkins.delete_job(job)
if(self.cache.is_cached(job)):
self.cache.set(job, '')
def delete_all_jobs(self):
jobs = self.jenkins.get_jobs()
for job in jobs:
self.delete_job(job['name'])
def update_job(self, fn, names=None, output_dir=None):
self.load_files(fn)
self.parser.generateXML(names)
self.parser.jobs.sort(lambda a, b: cmp(a.name, b.name))
for job in self.parser.jobs:
if names and not matches(job.name, names):
continue
if output_dir:
if names:
print job.output()
continue
fn = os.path.join(output_dir, job.name)
logger.debug("Writing XML to '{0}'".format(fn))
f = open(fn, 'w')
f.write(job.output())
f.close()
continue
md5 = job.md5()
if (self.jenkins.is_job(job.name)
and not self.cache.is_cached(job.name)):
old_md5 = self.jenkins.get_job_md5(job.name)
self.cache.set(job.name, old_md5)
if self.cache.has_changed(job.name, md5) or self.ignore_cache:
self.jenkins.update_job(job.name, job.output())
self.cache.set(job.name, md5)
else:
logger.debug("'{0}' has not changed".format(job.name))
return self.parser.jobs