Small script to scrape Zuul job node usage
Zuul now logs for every project + job the number of seconds taken and the number of nodes used. Using this we can scrape the logs to determine usage rates by project, repo, and job. Change-Id: Ie790df12e731f363ef7819d11b6d44c28e3d8320
This commit is contained in:
parent
d4a142b97b
commit
3d6d20ae6a
157
tools/node_usage.py
Normal file
157
tools/node_usage.py
Normal file
@ -0,0 +1,157 @@
|
||||
import gzip
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
|
||||
|
||||
def get_log_age(path):
|
||||
filename = os.path.basename(path)
|
||||
parts = filename.split('.')
|
||||
if len(parts) < 4:
|
||||
return 0
|
||||
else:
|
||||
return int(parts[2])
|
||||
|
||||
|
||||
class LogScraper(object):
|
||||
# Example log line
|
||||
# 2018-10-26 16:14:47,527 INFO zuul.nodepool: Nodeset <NodeSet two-centos-7-nodes [<Node 0000058431 ('primary',):centos-7>, <Node 0000058468 ('secondary',):centos-7>]> with 2 nodes was in use for 6241.08082151413 seconds for build <Build 530c4ca7af9e44dcb535e7074258e803 of tripleo-ci-centos-7-scenario008-multinode-oooq-container voting:False on <Worker ze05.openstack.org>> for project openstack/tripleo-quickstart-extras # noqa
|
||||
r = re.compile(r'(?P<timestamp>\d+-\d+-\d+ \d\d:\d\d:\d\d,\d\d\d) INFO zuul.nodepool: Nodeset <.*> with (?P<nodes>\d+) nodes was in use for (?P<secs>\d+(.[\d\-e]+)?) seconds for build <Build \w+ of (?P<job>[^\s]+) voting:\w+ on .* for project (?P<repos>[^\s]+)') # noqa
|
||||
|
||||
def __init__(self):
|
||||
self.repos = {}
|
||||
self.sorted_repos = []
|
||||
self.jobs = {}
|
||||
self.sorted_jobs = []
|
||||
self.total_usage = 0.0
|
||||
self.projects = {}
|
||||
self.sorted_projects = []
|
||||
self.start_time = None
|
||||
self.end_time = None
|
||||
|
||||
def scrape_file(self, fn):
|
||||
if fn.endswith('.gz'):
|
||||
open_f = gzip.open
|
||||
else:
|
||||
open_f = open
|
||||
with open_f(fn, 'rt') as f:
|
||||
for line in f:
|
||||
if 'nodes was in use for' in line:
|
||||
m = self.r.match(line)
|
||||
if not m:
|
||||
continue
|
||||
g = m.groupdict()
|
||||
repo = g['repos']
|
||||
secs = float(g['secs'])
|
||||
nodes = int(g['nodes'])
|
||||
job = g['job']
|
||||
if not self.start_time:
|
||||
self.start_time = g['timestamp']
|
||||
self.end_time = g['timestamp']
|
||||
if repo not in self.repos:
|
||||
self.repos[repo] = {}
|
||||
self.repos[repo]['total'] = 0.0
|
||||
node_time = nodes * secs
|
||||
self.total_usage += node_time
|
||||
self.repos[repo]['total'] += node_time
|
||||
if job not in self.jobs:
|
||||
self.jobs[job] = 0.0
|
||||
if job not in self.repos[repo]:
|
||||
self.repos[repo][job] = 0.0
|
||||
self.jobs[job] += node_time
|
||||
self.repos[repo][job] += node_time
|
||||
|
||||
def list_log_files(self, path='/var/log/zuul'):
|
||||
ret = []
|
||||
entries = os.listdir(path)
|
||||
prefix = os.path.join(path, 'zuul.log')
|
||||
for entry in entries:
|
||||
entry = os.path.join(path, entry)
|
||||
if os.path.isfile(entry) and entry.startswith(prefix):
|
||||
ret.append(entry)
|
||||
ret.sort(key=get_log_age, reverse=True)
|
||||
return ret
|
||||
|
||||
def sort_repos(self):
|
||||
for repo in self.repos:
|
||||
self.sorted_repos.append((repo, self.repos[repo]['total']))
|
||||
|
||||
self.sorted_repos.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
def sort_jobs(self):
|
||||
for job, usage in self.jobs.items():
|
||||
self.sorted_jobs.append((job, usage))
|
||||
|
||||
self.sorted_jobs.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
def calculate_project_usage(self):
|
||||
'''Group usage by logical project/effort
|
||||
|
||||
It is often the case that a single repo doesn't capture the work
|
||||
of a logical project or effort. If this is the case in your situation
|
||||
you can create a projects.yaml file that groups together repos
|
||||
under logical project names to report usage by that logical grouping.
|
||||
|
||||
The projects.yaml should be in your current directory and have this
|
||||
format:
|
||||
|
||||
project_name:
|
||||
deliverables:
|
||||
logical_deliverable_name:
|
||||
repos:
|
||||
- repo1
|
||||
- repo2
|
||||
|
||||
project_name2:
|
||||
deliverables:
|
||||
logical_deliverable_name2:
|
||||
repos:
|
||||
- repo3
|
||||
- repo4
|
||||
'''
|
||||
if not os.path.exists('projects.yaml'):
|
||||
return self.sorted_projects
|
||||
with open('projects.yaml') as f:
|
||||
y = yaml.load(f)
|
||||
for name, v in y.items():
|
||||
self.projects[name] = 0.0
|
||||
for deliverable in v['deliverables'].values():
|
||||
for repo in deliverable['repos']:
|
||||
if repo in self.repos:
|
||||
self.projects[name] += self.repos[repo]['total']
|
||||
|
||||
for project, usage in self.projects.items():
|
||||
self.sorted_projects.append((project, usage))
|
||||
|
||||
self.sorted_projects.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
|
||||
scraper = LogScraper()
|
||||
for fn in scraper.list_log_files():
|
||||
scraper.scrape_file(fn)
|
||||
|
||||
print('For period from %s to %s' % (scraper.start_time, scraper.end_time))
|
||||
print('Total node time used: %.2fs' % scraper.total_usage)
|
||||
print()
|
||||
|
||||
scraper.calculate_project_usage()
|
||||
if scraper.sorted_projects:
|
||||
print('Top 20 logical projects by resource usage:')
|
||||
for project, total in scraper.sorted_projects[:20]:
|
||||
percentage = (total / scraper.total_usage) * 100
|
||||
print('%s: %.2fs, %.2f%%' % (project, total, percentage))
|
||||
print()
|
||||
|
||||
scraper.sort_repos()
|
||||
print('Top 20 repos by resource usage:')
|
||||
for repo, total in scraper.sorted_repos[:20]:
|
||||
percentage = (total / scraper.total_usage) * 100
|
||||
print('%s: %.2fs, %.2f%%' % (repo, total, percentage))
|
||||
print()
|
||||
|
||||
scraper.sort_jobs()
|
||||
print('Top 20 jobs by resource usage:')
|
||||
for job, total in scraper.sorted_jobs[:20]:
|
||||
percentage = (total / scraper.total_usage) * 100
|
||||
print('%s: %.2fs, %.2f%%' % (job, total, percentage))
|
||||
print()
|
Loading…
Reference in New Issue
Block a user