Request multiple folder levels at once in get_all_jobs

On our Jenkins instance with almost a hundred folders, JJB update
stalls for quite a while because it calls get_all_jobs. When invoked
locally at the Jenkins master, it's a matter of seconds, on a fast
broadband link and VPN, it's 2 minutes, and on a train it's easily 10
minutes.

But there's trick!

    curl \
        --show-error --silent --fail \
        --user : --negotiate \
        --get \
        --data-urlencode \
            tree=jobs\[url\,name\,jobs\[url\,name\,jobs\[url\,name\,\
            jobs\[url\,name\,jobs\[url\,name\,jobs\[url\,name\,\
            jobs\[url\,name\,jobs\[url\,name\,jobs\[url\,name\,\
            jobs\[url\,name\,jobs\]\]\]\]\]\]\]\]\]\]
        https://jenkins.example.com/api/json

This returns almost instantly.

And it gets better: if we fail to correctly guess the nesting level
necessary, Jenkins returns

    …, "jobs": [{}, {}, …], …

so we can easily detect that we need to recurse deeper.

Change-Id: I7268259149e4bc8939c512a112c7e6ec1908224f
This commit is contained in:
Tomas Janousek 2018-08-03 22:59:50 +02:00
parent 05986a64d7
commit b5a8b7035e
4 changed files with 85 additions and 58 deletions

View File

@ -97,7 +97,8 @@ INFO = 'api/json'
PLUGIN_INFO = 'pluginManager/api/json?depth=%(depth)s'
CRUMB_URL = 'crumbIssuer/api/json'
WHOAMI_URL = 'me/api/json?depth=%(depth)s'
JOBS_QUERY = '?tree=jobs[url,color,name,jobs]'
JOBS_QUERY = '?tree=%s'
JOBS_QUERY_TREE = 'jobs[url,color,name,%s]'
JOB_INFO = '%(folder_url)sjob/%(short_name)s/api/json?depth=%(depth)s'
JOB_NAME = '%(folder_url)sjob/%(short_name)s/api/json?tree=name'
ALL_BUILDS = '%(folder_url)sjob/%(short_name)s/api/json?tree=allBuilds[number,url]'
@ -474,17 +475,21 @@ class Jenkins(object):
raise JenkinsException(
"Could not parse JSON info for job[%s]" % name)
def get_job_info_regex(self, pattern, depth=0, folder_depth=0):
def get_job_info_regex(self, pattern, depth=0, folder_depth=0,
folder_depth_per_request=10):
'''Get a list of jobs information that contain names which match the
regex pattern.
:param pattern: regex pattern, ``str``
:param depth: JSON depth, ``int``
:param folder_depth: folder level depth to search ``int``
:param folder_depth_per_request: Number of levels to fetch at once,
``int``. See :func:`get_all_jobs`.
:returns: List of jobs info, ``list``
'''
result = []
jobs = self.get_all_jobs(folder_depth)
jobs = self.get_all_jobs(folder_depth=folder_depth,
folder_depth_per_request=folder_depth_per_request)
for job in jobs:
if re.search(pattern, job['name']):
result.append(self.get_job_info(job['name'], depth=depth))
@ -942,7 +947,7 @@ class Jenkins(object):
return plugins_data
def get_jobs(self, folder_depth=0, view_name=None):
def get_jobs(self, folder_depth=0, folder_depth_per_request=10, view_name=None):
"""Get list of jobs.
Each job is a dictionary with 'name', 'url', 'color' and 'fullname'
@ -955,6 +960,8 @@ class Jenkins(object):
:param folder_depth: Number of levels to search, ``int``. By default
0, which will limit search to toplevel. None disables the limit.
:param folder_depth_per_request: Number of levels to fetch at once,
``int``. See :func:`get_all_jobs`.
:param view_name: Name of a Jenkins view for which to
retrieve jobs, ``str``. By default, the job list is
not limited to a specific view.
@ -976,9 +983,10 @@ class Jenkins(object):
if view_name:
return self._get_view_jobs(name=view_name)
else:
return self.get_all_jobs(folder_depth=folder_depth)
return self.get_all_jobs(folder_depth=folder_depth,
folder_depth_per_request=folder_depth_per_request)
def get_all_jobs(self, folder_depth=None):
def get_all_jobs(self, folder_depth=None, folder_depth_per_request=10):
"""Get list of all jobs recursively to the given folder depth.
Each job is a dictionary with 'name', 'url', 'color' and 'fullname'
@ -986,46 +994,37 @@ class Jenkins(object):
:param folder_depth: Number of levels to search, ``int``. By default
None, which will search all levels. 0 limits to toplevel.
:param folder_depth_per_request: Number of levels to fetch at once,
``int``. By default 10, which is usually enough to fetch all jobs
using a single request and still easily fits into an HTTP request.
:returns: list of jobs, ``[ { str: str} ]``
.. note::
On instances with many folders it may be more efficient to use the
run_script method to retrieve all jobs instead.
On instances with many folders it would not be efficient to fetch
each folder separately, hence `folder_depth_per_request` levels
are fetched at once using the ``tree`` query parameter::
Example::
?tree=jobs[url,color,name,jobs[...,jobs[...,jobs[...,jobs]]]]
server.run_script(\"\"\"
import groovy.json.JsonBuilder;
If there are more folder levels than the query asks for, Jenkins
returns empty [#]_ objects at the deepest level::
// get all projects excluding matrix configuration
// as they are simply part of a matrix project.
// there may be better ways to get just jobs
items = Jenkins.instance.getAllItems(AbstractProject);
items.removeAll {
it instanceof hudson.matrix.MatrixConfiguration
};
{"name": "folder", "url": "...", "jobs": [{}, {}, ...]}
def json = new JsonBuilder()
def root = json {
jobs items.collect {
[
name: it.name,
url: Jenkins.instance.getRootUrl() + it.getUrl(),
color: it.getIconColor().toString(),
fullname: it.getFullName()
]
}
}
// use json.toPrettyString() if viewing
println json.toString()
\"\"\")
This makes it possible to detect when additional requests are
needed.
.. [#] Actually recent Jenkins includes a ``_class`` field
everywhere, but it's missing the requested fields.
"""
jobs_list = []
jobs_query = 'jobs'
for _ in range(folder_depth_per_request):
jobs_query = JOBS_QUERY_TREE % jobs_query
jobs_query = JOBS_QUERY % jobs_query
jobs = [(0, [], self.get_info(query=JOBS_QUERY)['jobs'])]
jobs_list = []
jobs = [(0, [], self.get_info(query=jobs_query)['jobs'])]
for lvl, root, lvl_jobs in jobs:
if not isinstance(lvl_jobs, list):
lvl_jobs = [lvl_jobs]
@ -1036,13 +1035,16 @@ class Jenkins(object):
if u'fullname' not in job:
job[u'fullname'] = '/'.join(path)
jobs_list.append(job)
if 'jobs' in job: # folder
if 'jobs' in job and isinstance(job['jobs'], list): # folder
if folder_depth is None or lvl < folder_depth:
url_path = ''.join(['/job/' + p for p in path])
jobs.append(
(lvl + 1, path,
self.get_info(url_path,
query=JOBS_QUERY)['jobs']))
children = job['jobs']
# once folder_depth_per_request is reached, Jenkins
# returns empty objects
if any('url' not in child for child in job['jobs']):
url_path = ''.join(['/job/' + p for p in path])
children = self.get_info(url_path,
query=jobs_query)['jobs']
jobs.append((lvl + 1, path, children))
return jobs_list
def copy_job(self, from_name, to_name):
@ -1161,22 +1163,6 @@ class Jenkins(object):
'''Get the number of jobs on the Jenkins server
:returns: Total number of jobs, ``int``
.. note::
On instances with many folders it may be more efficient to use the
run_script method to retrieve the total number of jobs instead.
Example::
# get all projects excluding matrix configuration
# as they are simply part of a matrix project.
server.run_script(
"print(Hudson.instance.getAllItems("
" hudson.model.AbstractProject).count{"
" !(it instanceof hudson.matrix.MatrixConfiguration)"
" })")
'''
return len(self.get_all_jobs())

View File

@ -72,3 +72,19 @@ class JenkinsGetJobsTestBase(JenkinsJobsTestBase):
{'name': 'my_job', 'color': 'blue', 'url': 'http://...'}
]}
]
jobs_in_folder_deep_query = [
{'jobs': [
{'name': 'top_folder', 'url': 'http://...', 'jobs': [
{'name': 'middle_folder', 'url': 'http://...', 'jobs': [
{'name': 'bottom_folder', 'url': 'http://...',
'jobs': [{}, {}]}
]}
]}
]},
# top_folder/middle_folder/bottom_folder jobs
{'jobs': [
{'name': 'my_job1', 'color': 'blue', 'url': 'http://...'},
{'name': 'my_job2', 'color': 'blue', 'url': 'http://...'}
]}
]

View File

@ -18,7 +18,7 @@ class JenkinsGetJobsTest(JenkinsGetJobsTestBase):
job_info_to_return = {u'jobs': jobs}
jenkins_mock.return_value = json.dumps(job_info_to_return)
job_info = self.j.get_jobs()
job_info = self.j.get_jobs(folder_depth_per_request=1)
jobs[u'fullname'] = jobs[u'name']
self.assertEqual(job_info, [jobs])

View File

@ -120,3 +120,28 @@ class JenkinsGetAllJobsTest(JenkinsGetJobsTestBase):
]
self.assertEqual(expected_request_urls,
self.got_request_urls(jenkins_mock))
@patch.object(jenkins.Jenkins, 'jenkins_open')
def test_deep_query(self, jenkins_mock):
jenkins_mock.side_effect = map(
json.dumps, self.jobs_in_folder_deep_query)
jobs_info = self.j.get_all_jobs()
expected_fullnames = [
u"top_folder",
u"top_folder/middle_folder",
u"top_folder/middle_folder/bottom_folder",
u"top_folder/middle_folder/bottom_folder/my_job1",
u"top_folder/middle_folder/bottom_folder/my_job2"
]
self.assertEqual(len(expected_fullnames), len(jobs_info))
got_fullnames = [job[u"fullname"] for job in jobs_info]
self.assertEqual(expected_fullnames, got_fullnames)
expected_request_urls = [
self.make_url('api/json'),
self.make_url('job/top_folder/job/middle_folder/job/bottom_folder/api/json')
]
self.assertEqual(expected_request_urls,
self.got_request_urls(jenkins_mock))