Log cpu times of ansible executions
We need to be able to compare and discover ansible performance regressions or improvements of ansible. Currently we have no way of detecting changes there other than observing the overall system load of executors. One way to get some metrics is to log the cpu times used by individual ansible runs and the sum of them over the whole job execution. With this one could grab that data from the log and analyse them. Change-Id: Ib0b62299c741533f0d1615f67eced9601498f00d
This commit is contained in:
parent
11709e7044
commit
5a4db84e5a
@ -19,6 +19,7 @@ import fcntl
|
||||
import grp
|
||||
import logging
|
||||
import os
|
||||
import psutil
|
||||
import pwd
|
||||
import shlex
|
||||
import subprocess
|
||||
@ -52,7 +53,7 @@ class WrappedPopen(object):
|
||||
if fd not in pass_fds:
|
||||
pass_fds.append(fd)
|
||||
kwargs['pass_fds'] = pass_fds
|
||||
proc = subprocess.Popen(args, *sub_args, **kwargs)
|
||||
proc = psutil.Popen(args, *sub_args, **kwargs)
|
||||
finally:
|
||||
self.__del__()
|
||||
return proc
|
||||
|
@ -15,7 +15,7 @@
|
||||
# under the License.
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
import psutil
|
||||
|
||||
from zuul.driver import (Driver, WrapperInterface)
|
||||
from zuul.execution_context import BaseExecutionContext
|
||||
@ -25,7 +25,7 @@ class NullExecutionContext(BaseExecutionContext):
|
||||
log = logging.getLogger("zuul.NullExecutionContext")
|
||||
|
||||
def getPopen(self, **kwargs):
|
||||
return subprocess.Popen
|
||||
return psutil.Popen
|
||||
|
||||
|
||||
class NullwrapDriver(Driver, WrapperInterface):
|
||||
|
@ -636,6 +636,9 @@ class AnsibleJob(object):
|
||||
|
||||
self.executor_variables_file = None
|
||||
|
||||
self.cpu_times = {'user': 0, 'system': 0,
|
||||
'children_user': 0, 'children_system': 0}
|
||||
|
||||
if self.executor_server.config.has_option('executor', 'variables'):
|
||||
self.executor_variables_file = self.executor_server.config.get(
|
||||
'executor', 'variables')
|
||||
@ -952,6 +955,13 @@ class AnsibleJob(object):
|
||||
pre_failed = True
|
||||
break
|
||||
|
||||
self.log.debug(
|
||||
"Overall ansible cpu times: user=%.2f, system=%.2f, "
|
||||
"children_user=%.2f, children_system=%.2f" %
|
||||
(self.cpu_times['user'], self.cpu_times['system'],
|
||||
self.cpu_times['children_user'],
|
||||
self.cpu_times['children_system']))
|
||||
|
||||
if not pre_failed:
|
||||
ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout)
|
||||
job_status, job_code = self.runAnsiblePlaybook(
|
||||
@ -1575,6 +1585,17 @@ class AnsibleJob(object):
|
||||
line = line[:1024].rstrip()
|
||||
self.log.debug("Ansible output: %s" % (line,))
|
||||
self.log.debug("Ansible output terminated")
|
||||
cpu_times = self.proc.cpu_times()
|
||||
self.log.debug("Ansible cpu times: user=%.2f, system=%.2f, "
|
||||
"children_user=%.2f, "
|
||||
"children_system=%.2f" %
|
||||
(cpu_times.user, cpu_times.system,
|
||||
cpu_times.children_user,
|
||||
cpu_times.children_system))
|
||||
self.cpu_times['user'] += cpu_times.user
|
||||
self.cpu_times['system'] += cpu_times.system
|
||||
self.cpu_times['children_user'] += cpu_times.children_user
|
||||
self.cpu_times['children_system'] += cpu_times.children_system
|
||||
ret = self.proc.wait()
|
||||
self.log.debug("Ansible exit code: %s" % (ret,))
|
||||
finally:
|
||||
|
Loading…
x
Reference in New Issue
Block a user