Optionally disable disk_limit_per_job
Our current approach to enforce the disk limit per job can be very expensive by running 'du' in a loop. When having many repos in the cache and many running jobs this can poison the cache and induce a large amount of IO load. This can influence overall performance especially if zuul is running on a shared storage like ceph. Change-Id: Ic03168e30e0cba4a4adb42eebf4709ceba0d8c3e
This commit is contained in:
parent
d60346181b
commit
0a394ad67e
|
@ -597,7 +597,7 @@ The following sections of ``zuul.conf`` are used by the executor:
|
||||||
This integer is the maximum number of megabytes that any one job
|
This integer is the maximum number of megabytes that any one job
|
||||||
is allowed to consume on disk while it is running. If a job's
|
is allowed to consume on disk while it is running. If a job's
|
||||||
scratch space has more than this much space consumed, it will be
|
scratch space has more than this much space consumed, it will be
|
||||||
aborted.
|
aborted. Set to -1 to disable the limit.
|
||||||
|
|
||||||
.. attr:: trusted_ro_paths
|
.. attr:: trusted_ro_paths
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,18 @@ class TestDiskAccountant(BaseTestCase):
|
||||||
da.stop()
|
da.stop()
|
||||||
self.assertFalse(da.thread.is_alive())
|
self.assertFalse(da.thread.is_alive())
|
||||||
|
|
||||||
|
def test_disk_accountant_no_limit(self):
|
||||||
|
jobs_dir = tempfile.mkdtemp(
|
||||||
|
dir=os.environ.get("ZUUL_TEST_ROOT", None))
|
||||||
|
cache_dir = tempfile.mkdtemp()
|
||||||
|
executor_server = FakeExecutor()
|
||||||
|
da = DiskAccountant(jobs_dir, -1, executor_server.stopJobByJobDir,
|
||||||
|
cache_dir)
|
||||||
|
da.start()
|
||||||
|
self.assertFalse(da.running)
|
||||||
|
da.stop()
|
||||||
|
self.assertFalse(da.running)
|
||||||
|
|
||||||
def test_cache_hard_links(self):
|
def test_cache_hard_links(self):
|
||||||
root_dir = tempfile.mkdtemp(
|
root_dir = tempfile.mkdtemp(
|
||||||
dir=os.environ.get("ZUUL_TEST_ROOT", None))
|
dir=os.environ.get("ZUUL_TEST_ROOT", None))
|
||||||
|
|
|
@ -148,16 +148,25 @@ class DiskAccountant(object):
|
||||||
self.stop_event.wait(delay_time)
|
self.stop_event.wait(delay_time)
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
|
if self.limit < 0:
|
||||||
|
# No need to start if there is no limit.
|
||||||
|
return
|
||||||
self._running = True
|
self._running = True
|
||||||
self.thread.start()
|
self.thread.start()
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
|
if not self.running:
|
||||||
|
return
|
||||||
self._running = False
|
self._running = False
|
||||||
self.stop_event.set()
|
self.stop_event.set()
|
||||||
# We join here to avoid whitelisting the thread -- if it takes more
|
# We join here to avoid whitelisting the thread -- if it takes more
|
||||||
# than 5s to stop in tests, there's a problem.
|
# than 5s to stop in tests, there's a problem.
|
||||||
self.thread.join(timeout=5)
|
self.thread.join(timeout=5)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def running(self):
|
||||||
|
return self._running
|
||||||
|
|
||||||
|
|
||||||
class Watchdog(object):
|
class Watchdog(object):
|
||||||
def __init__(self, timeout, function, args):
|
def __init__(self, timeout, function, args):
|
||||||
|
|
Loading…
Reference in New Issue