Browse Source

Log cpu times of ansible executions

We need to be able to compare and discover ansible performance
regressions or improvements of ansible. Currently we have no way of
detecting changes there other than observing the overall system load
of executors. One way to get some metrics is to log the cpu times used
by individual ansible runs and the sum of them over the whole job
execution. With this one could grab that data from the log and analyse
them.

Change-Id: Ib0b62299c741533f0d1615f67eced9601498f00d
changes/13/582713/1
Tobias Henkel 3 years ago
parent
commit
5a4db84e5a
No known key found for this signature in database GPG Key ID: 3750DEC158E5FA2
3 changed files with 25 additions and 3 deletions
  1. +2
    -1
      zuul/driver/bubblewrap/__init__.py
  2. +2
    -2
      zuul/driver/nullwrap/__init__.py
  3. +21
    -0
      zuul/executor/server.py

+ 2
- 1
zuul/driver/bubblewrap/__init__.py View File

@ -19,6 +19,7 @@ import fcntl
import grp
import logging
import os
import psutil
import pwd
import shlex
import subprocess
@ -52,7 +53,7 @@ class WrappedPopen(object):
if fd not in pass_fds:
pass_fds.append(fd)
kwargs['pass_fds'] = pass_fds
proc = subprocess.Popen(args, *sub_args, **kwargs)
proc = psutil.Popen(args, *sub_args, **kwargs)
finally:
self.__del__()
return proc


+ 2
- 2
zuul/driver/nullwrap/__init__.py View File

@ -15,7 +15,7 @@
# under the License.
import logging
import subprocess
import psutil
from zuul.driver import (Driver, WrapperInterface)
from zuul.execution_context import BaseExecutionContext
@ -25,7 +25,7 @@ class NullExecutionContext(BaseExecutionContext):
log = logging.getLogger("zuul.NullExecutionContext")
def getPopen(self, **kwargs):
return subprocess.Popen
return psutil.Popen
class NullwrapDriver(Driver, WrapperInterface):


+ 21
- 0
zuul/executor/server.py View File

@ -636,6 +636,9 @@ class AnsibleJob(object):
self.executor_variables_file = None
self.cpu_times = {'user': 0, 'system': 0,
'children_user': 0, 'children_system': 0}
if self.executor_server.config.has_option('executor', 'variables'):
self.executor_variables_file = self.executor_server.config.get(
'executor', 'variables')
@ -952,6 +955,13 @@ class AnsibleJob(object):
pre_failed = True
break
self.log.debug(
"Overall ansible cpu times: user=%.2f, system=%.2f, "
"children_user=%.2f, children_system=%.2f" %
(self.cpu_times['user'], self.cpu_times['system'],
self.cpu_times['children_user'],
self.cpu_times['children_system']))
if not pre_failed:
ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout)
job_status, job_code = self.runAnsiblePlaybook(
@ -1575,6 +1585,17 @@ class AnsibleJob(object):
line = line[:1024].rstrip()
self.log.debug("Ansible output: %s" % (line,))
self.log.debug("Ansible output terminated")
cpu_times = self.proc.cpu_times()
self.log.debug("Ansible cpu times: user=%.2f, system=%.2f, "
"children_user=%.2f, "
"children_system=%.2f" %
(cpu_times.user, cpu_times.system,
cpu_times.children_user,
cpu_times.children_system))
self.cpu_times['user'] += cpu_times.user
self.cpu_times['system'] += cpu_times.system
self.cpu_times['children_user'] += cpu_times.children_user
self.cpu_times['children_system'] += cpu_times.children_system
ret = self.proc.wait()
self.log.debug("Ansible exit code: %s" % (ret,))
finally:


Loading…
Cancel
Save