From 5a4db84e5a62cf545bb8331f090feadc4bd510e3 Mon Sep 17 00:00:00 2001 From: Tobias Henkel Date: Sat, 14 Jul 2018 10:23:57 +0200 Subject: [PATCH] Log cpu times of ansible executions We need to be able to compare and discover ansible performance regressions or improvements of ansible. Currently we have no way of detecting changes there other than observing the overall system load of executors. One way to get some metrics is to log the cpu times used by individual ansible runs and the sum of them over the whole job execution. With this one could grab that data from the log and analyse them. Change-Id: Ib0b62299c741533f0d1615f67eced9601498f00d --- zuul/driver/bubblewrap/__init__.py | 3 ++- zuul/driver/nullwrap/__init__.py | 4 ++-- zuul/executor/server.py | 21 +++++++++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/zuul/driver/bubblewrap/__init__.py b/zuul/driver/bubblewrap/__init__.py index 1566f8515d..52db3fc53a 100644 --- a/zuul/driver/bubblewrap/__init__.py +++ b/zuul/driver/bubblewrap/__init__.py @@ -19,6 +19,7 @@ import fcntl import grp import logging import os +import psutil import pwd import shlex import subprocess @@ -52,7 +53,7 @@ class WrappedPopen(object): if fd not in pass_fds: pass_fds.append(fd) kwargs['pass_fds'] = pass_fds - proc = subprocess.Popen(args, *sub_args, **kwargs) + proc = psutil.Popen(args, *sub_args, **kwargs) finally: self.__del__() return proc diff --git a/zuul/driver/nullwrap/__init__.py b/zuul/driver/nullwrap/__init__.py index 178e4c7fcc..2e8f4c1160 100644 --- a/zuul/driver/nullwrap/__init__.py +++ b/zuul/driver/nullwrap/__init__.py @@ -15,7 +15,7 @@ # under the License. import logging -import subprocess +import psutil from zuul.driver import (Driver, WrapperInterface) from zuul.execution_context import BaseExecutionContext @@ -25,7 +25,7 @@ class NullExecutionContext(BaseExecutionContext): log = logging.getLogger("zuul.NullExecutionContext") def getPopen(self, **kwargs): - return subprocess.Popen + return psutil.Popen class NullwrapDriver(Driver, WrapperInterface): diff --git a/zuul/executor/server.py b/zuul/executor/server.py index 535ce70f3f..237a70f41c 100644 --- a/zuul/executor/server.py +++ b/zuul/executor/server.py @@ -636,6 +636,9 @@ class AnsibleJob(object): self.executor_variables_file = None + self.cpu_times = {'user': 0, 'system': 0, + 'children_user': 0, 'children_system': 0} + if self.executor_server.config.has_option('executor', 'variables'): self.executor_variables_file = self.executor_server.config.get( 'executor', 'variables') @@ -952,6 +955,13 @@ class AnsibleJob(object): pre_failed = True break + self.log.debug( + "Overall ansible cpu times: user=%.2f, system=%.2f, " + "children_user=%.2f, children_system=%.2f" % + (self.cpu_times['user'], self.cpu_times['system'], + self.cpu_times['children_user'], + self.cpu_times['children_system'])) + if not pre_failed: ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout) job_status, job_code = self.runAnsiblePlaybook( @@ -1575,6 +1585,17 @@ class AnsibleJob(object): line = line[:1024].rstrip() self.log.debug("Ansible output: %s" % (line,)) self.log.debug("Ansible output terminated") + cpu_times = self.proc.cpu_times() + self.log.debug("Ansible cpu times: user=%.2f, system=%.2f, " + "children_user=%.2f, " + "children_system=%.2f" % + (cpu_times.user, cpu_times.system, + cpu_times.children_user, + cpu_times.children_system)) + self.cpu_times['user'] += cpu_times.user + self.cpu_times['system'] += cpu_times.system + self.cpu_times['children_user'] += cpu_times.children_user + self.cpu_times['children_system'] += cpu_times.children_system ret = self.proc.wait() self.log.debug("Ansible exit code: %s" % (ret,)) finally: