From 3d6d20ae6a52375d8960da42b4e009d9b567c081 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Fri, 26 Oct 2018 12:34:52 -0700 Subject: [PATCH] Small script to scrape Zuul job node usage Zuul now logs for every project + job the number of seconds taken and the number of nodes used. Using this we can scrape the logs to determine usage rates by project, repo, and job. Change-Id: Ie790df12e731f363ef7819d11b6d44c28e3d8320 --- tools/node_usage.py | 157 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 tools/node_usage.py diff --git a/tools/node_usage.py b/tools/node_usage.py new file mode 100644 index 0000000000..1b29a801fa --- /dev/null +++ b/tools/node_usage.py @@ -0,0 +1,157 @@ +import gzip +import os +import re +import yaml + + +def get_log_age(path): + filename = os.path.basename(path) + parts = filename.split('.') + if len(parts) < 4: + return 0 + else: + return int(parts[2]) + + +class LogScraper(object): + # Example log line + # 2018-10-26 16:14:47,527 INFO zuul.nodepool: Nodeset , ]> with 2 nodes was in use for 6241.08082151413 seconds for build > for project openstack/tripleo-quickstart-extras # noqa + r = re.compile(r'(?P\d+-\d+-\d+ \d\d:\d\d:\d\d,\d\d\d) INFO zuul.nodepool: Nodeset <.*> with (?P\d+) nodes was in use for (?P\d+(.[\d\-e]+)?) seconds for build [^\s]+) voting:\w+ on .* for project (?P[^\s]+)') # noqa + + def __init__(self): + self.repos = {} + self.sorted_repos = [] + self.jobs = {} + self.sorted_jobs = [] + self.total_usage = 0.0 + self.projects = {} + self.sorted_projects = [] + self.start_time = None + self.end_time = None + + def scrape_file(self, fn): + if fn.endswith('.gz'): + open_f = gzip.open + else: + open_f = open + with open_f(fn, 'rt') as f: + for line in f: + if 'nodes was in use for' in line: + m = self.r.match(line) + if not m: + continue + g = m.groupdict() + repo = g['repos'] + secs = float(g['secs']) + nodes = int(g['nodes']) + job = g['job'] + if not self.start_time: + self.start_time = g['timestamp'] + self.end_time = g['timestamp'] + if repo not in self.repos: + self.repos[repo] = {} + self.repos[repo]['total'] = 0.0 + node_time = nodes * secs + self.total_usage += node_time + self.repos[repo]['total'] += node_time + if job not in self.jobs: + self.jobs[job] = 0.0 + if job not in self.repos[repo]: + self.repos[repo][job] = 0.0 + self.jobs[job] += node_time + self.repos[repo][job] += node_time + + def list_log_files(self, path='/var/log/zuul'): + ret = [] + entries = os.listdir(path) + prefix = os.path.join(path, 'zuul.log') + for entry in entries: + entry = os.path.join(path, entry) + if os.path.isfile(entry) and entry.startswith(prefix): + ret.append(entry) + ret.sort(key=get_log_age, reverse=True) + return ret + + def sort_repos(self): + for repo in self.repos: + self.sorted_repos.append((repo, self.repos[repo]['total'])) + + self.sorted_repos.sort(key=lambda x: x[1], reverse=True) + + def sort_jobs(self): + for job, usage in self.jobs.items(): + self.sorted_jobs.append((job, usage)) + + self.sorted_jobs.sort(key=lambda x: x[1], reverse=True) + + def calculate_project_usage(self): + '''Group usage by logical project/effort + + It is often the case that a single repo doesn't capture the work + of a logical project or effort. If this is the case in your situation + you can create a projects.yaml file that groups together repos + under logical project names to report usage by that logical grouping. + + The projects.yaml should be in your current directory and have this + format: + + project_name: + deliverables: + logical_deliverable_name: + repos: + - repo1 + - repo2 + + project_name2: + deliverables: + logical_deliverable_name2: + repos: + - repo3 + - repo4 + ''' + if not os.path.exists('projects.yaml'): + return self.sorted_projects + with open('projects.yaml') as f: + y = yaml.load(f) + for name, v in y.items(): + self.projects[name] = 0.0 + for deliverable in v['deliverables'].values(): + for repo in deliverable['repos']: + if repo in self.repos: + self.projects[name] += self.repos[repo]['total'] + + for project, usage in self.projects.items(): + self.sorted_projects.append((project, usage)) + + self.sorted_projects.sort(key=lambda x: x[1], reverse=True) + + +scraper = LogScraper() +for fn in scraper.list_log_files(): + scraper.scrape_file(fn) + +print('For period from %s to %s' % (scraper.start_time, scraper.end_time)) +print('Total node time used: %.2fs' % scraper.total_usage) +print() + +scraper.calculate_project_usage() +if scraper.sorted_projects: + print('Top 20 logical projects by resource usage:') + for project, total in scraper.sorted_projects[:20]: + percentage = (total / scraper.total_usage) * 100 + print('%s: %.2fs, %.2f%%' % (project, total, percentage)) + print() + +scraper.sort_repos() +print('Top 20 repos by resource usage:') +for repo, total in scraper.sorted_repos[:20]: + percentage = (total / scraper.total_usage) * 100 + print('%s: %.2fs, %.2f%%' % (repo, total, percentage)) +print() + +scraper.sort_jobs() +print('Top 20 jobs by resource usage:') +for job, total in scraper.sorted_jobs[:20]: + percentage = (total / scraper.total_usage) * 100 + print('%s: %.2fs, %.2f%%' % (job, total, percentage)) +print()