Add memory awareness to system load governor
This will unregister for concurrent jobs whenever available system memory drops below 5% by default. It does not take into account buffers or cache which could be reclaimed. Users can tune this up or down as necessary. This is a very conservative default and will likely need tuning once observed in production. Change-Id: Iab6469c0173d9f5635769d4ab0e8034a41355cd4 Signed-off-by: Paul Belanger <pabelanger@redhat.com>
This commit is contained in:
parent
22c5b7155b
commit
1754b2caf0
|
@ -575,6 +575,16 @@ The following sections of ``zuul.conf`` are used by the executor:
|
||||||
The executor will observe system load and determine whether
|
The executor will observe system load and determine whether
|
||||||
to accept more jobs every 30 seconds.
|
to accept more jobs every 30 seconds.
|
||||||
|
|
||||||
|
.. attr:: min_avail_mem
|
||||||
|
:default: 5.0
|
||||||
|
|
||||||
|
This is the minimum percentage of system RAM available. The
|
||||||
|
executor will stop accepting more than 1 job at a time until
|
||||||
|
more memory is available. The available memory percentage is
|
||||||
|
calculated from the total available memory divided by the
|
||||||
|
total real memory multiplied by 100. Buffers and cache are
|
||||||
|
considered available in the calculation.
|
||||||
|
|
||||||
.. attr:: hostname
|
.. attr:: hostname
|
||||||
:default: hostname of the server
|
:default: hostname of the server
|
||||||
|
|
||||||
|
|
|
@ -27,3 +27,4 @@ pyjwt
|
||||||
iso8601
|
iso8601
|
||||||
aiohttp
|
aiohttp
|
||||||
uvloop;python_version>='3.5'
|
uvloop;python_version>='3.5'
|
||||||
|
psutil
|
||||||
|
|
|
@ -18,6 +18,7 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import os
|
import os
|
||||||
|
import psutil
|
||||||
import shutil
|
import shutil
|
||||||
import signal
|
import signal
|
||||||
import shlex
|
import shlex
|
||||||
|
@ -1949,6 +1950,7 @@ class ExecutorServer(object):
|
||||||
''' Apply some heuristics to decide whether or not we should
|
''' Apply some heuristics to decide whether or not we should
|
||||||
be askign for more jobs '''
|
be askign for more jobs '''
|
||||||
load_avg = os.getloadavg()[0]
|
load_avg = os.getloadavg()[0]
|
||||||
|
avail_mem_pct = 100.0 - psutil.virtual_memory().percent
|
||||||
if self.accepting_work:
|
if self.accepting_work:
|
||||||
# Don't unregister if we don't have any active jobs.
|
# Don't unregister if we don't have any active jobs.
|
||||||
if load_avg > self.max_load_avg and self.job_workers:
|
if load_avg > self.max_load_avg and self.job_workers:
|
||||||
|
@ -1956,10 +1958,19 @@ class ExecutorServer(object):
|
||||||
"Unregistering due to high system load {} > {}".format(
|
"Unregistering due to high system load {} > {}".format(
|
||||||
load_avg, self.max_load_avg))
|
load_avg, self.max_load_avg))
|
||||||
self.unregister_work()
|
self.unregister_work()
|
||||||
elif load_avg <= self.max_load_avg:
|
elif avail_mem_pct < self.min_avail_mem:
|
||||||
|
self.log.info(
|
||||||
|
"Unregistering due to low memory {:3.1f}% < {}".format(
|
||||||
|
avail_mem_pct, self.min_avail_mem))
|
||||||
|
self.unregister_work()
|
||||||
|
elif (load_avg <= self.max_load_avg and
|
||||||
|
avail_mem_pct >= self.min_avail_mem):
|
||||||
self.log.info(
|
self.log.info(
|
||||||
"Re-registering as load is within limits {} <= {}".format(
|
"Re-registering as job is within limits "
|
||||||
load_avg, self.max_load_avg))
|
"{} <= {} {:3.1f}% <= {}".format(load_avg,
|
||||||
|
self.max_load_avg,
|
||||||
|
avail_mem_pct,
|
||||||
|
self.min_avail_mem))
|
||||||
self.register_work()
|
self.register_work()
|
||||||
if self.statsd:
|
if self.statsd:
|
||||||
base_key = 'zuul.executor.%s' % self.hostname
|
base_key = 'zuul.executor.%s' % self.hostname
|
||||||
|
|
Loading…
Reference in New Issue