Add min_avail_hdd governor for zuul-executor
Using the zuul.executor.state_dir setting from zuul.conf, we can create a new governor to track the amount of space a zuul-executor is using. If we go above the min_avail_hdd space (default 5.0%), we'll stop accepting jobs until space has been reclaimed but the executor. Change-Id: Ieb446397135ee5b138829cd2440b8c86abbb7d56 Signed-off-by: Paul Belanger <pabelanger@redhat.com>
This commit is contained in:
parent
d1372f8f98
commit
608b22f577
|
@ -559,6 +559,16 @@ The following sections of ``zuul.conf`` are used by the executor:
|
|||
The executor will observe system load and determine whether
|
||||
to accept more jobs every 30 seconds.
|
||||
|
||||
.. attr:: min_avail_hdd
|
||||
:default: 5.0
|
||||
|
||||
This is the minimum percentage of HDD storage available for the
|
||||
:attr:`executor.state_dir` directory. The executor will stop accepting
|
||||
more than 1 job at a time until more HDD storage is available. The
|
||||
available HDD percentage is calculated from the total available
|
||||
disk space divided by the total real storage capacity multiplied by
|
||||
100.
|
||||
|
||||
.. attr:: min_avail_mem
|
||||
:default: 5.0
|
||||
|
||||
|
|
|
@ -446,6 +446,7 @@ class TestExecutorHostname(ZuulTestCase):
|
|||
|
||||
|
||||
class TestGovernor(ZuulTestCase):
|
||||
config_file = 'zuul-executor-hostname.conf'
|
||||
tenant_config_file = 'config/governor/main.yaml'
|
||||
|
||||
@mock.patch('os.getloadavg')
|
||||
|
@ -464,6 +465,33 @@ class TestGovernor(ZuulTestCase):
|
|||
self.executor_server.manageLoad()
|
||||
self.assertFalse(self.executor_server.accepting_work)
|
||||
|
||||
@mock.patch('os.statvfs')
|
||||
def test_hdd_governor(self, statvfs_mock):
|
||||
class Dummy(object):
|
||||
pass
|
||||
hdd = Dummy()
|
||||
hdd.f_frsize = 4096
|
||||
hdd.f_blocks = 120920708
|
||||
hdd.f_bfree = 95716701
|
||||
statvfs_mock.return_value = hdd # 20.84% used
|
||||
|
||||
self.executor_server.manageLoad()
|
||||
self.assertTrue(self.executor_server.accepting_work)
|
||||
|
||||
self.assertReportedStat(
|
||||
'zuul.executor.test-executor-hostname_example_com.pct_used_hdd',
|
||||
value='2084', kind='g')
|
||||
|
||||
hdd.f_bfree = 5716701
|
||||
statvfs_mock.return_value = hdd # 95.27% used
|
||||
|
||||
self.executor_server.manageLoad()
|
||||
self.assertFalse(self.executor_server.accepting_work)
|
||||
|
||||
self.assertReportedStat(
|
||||
'zuul.executor.test-executor-hostname_example_com.pct_used_hdd',
|
||||
value='9527', kind='g')
|
||||
|
||||
def test_pause_governor(self):
|
||||
self.executor_server.manageLoad()
|
||||
self.assertTrue(self.executor_server.accepting_work)
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
# Copyright 2018 Red Hat, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from zuul.executor.sensors import SensorInterface
|
||||
from zuul.lib.config import get_default
|
||||
|
||||
|
||||
def get_avail_hdd_pct(path):
|
||||
s = os.statvfs(path)
|
||||
used = float(s.f_blocks - s.f_bfree)
|
||||
percent = (used / s.f_blocks) * 100
|
||||
|
||||
return (100.0 - percent)
|
||||
|
||||
|
||||
class HDDSensor(SensorInterface):
|
||||
log = logging.getLogger("zuul.executor.sensor.hdd")
|
||||
|
||||
def __init__(self, config=None):
|
||||
self.min_avail_hdd = float(
|
||||
get_default(config, 'executor', 'min_avail_hdd', '5.0'))
|
||||
self.state_dir = get_default(
|
||||
config, 'executor', 'state_dir', '/var/lib/zuul', expand_user=True)
|
||||
|
||||
def isOk(self):
|
||||
avail_hdd_pct = get_avail_hdd_pct(self.state_dir)
|
||||
|
||||
if avail_hdd_pct < self.min_avail_hdd:
|
||||
return False, "low disk space {:3.1f}% < {}".format(
|
||||
avail_hdd_pct, self.min_avail_hdd)
|
||||
|
||||
return True, "{:3.1f}% <= {}".format(avail_hdd_pct, self.min_avail_hdd)
|
||||
|
||||
def reportStats(self, statsd, base_key):
|
||||
avail_hdd_pct = get_avail_hdd_pct(self.state_dir)
|
||||
|
||||
# We multiply the percentage by 100 so we can report it to 2 decimal
|
||||
# points.
|
||||
statsd.gauge(base_key + '.pct_used_hdd',
|
||||
int((100.0 - avail_hdd_pct) * 100))
|
|
@ -40,6 +40,7 @@ import gear
|
|||
import zuul.merger.merger
|
||||
import zuul.ansible.logconfig
|
||||
from zuul.executor.sensors.cpu import CPUSensor
|
||||
from zuul.executor.sensors.hdd import HDDSensor
|
||||
from zuul.executor.sensors.pause import PauseSensor
|
||||
from zuul.executor.sensors.startingbuilds import StartingBuildsSensor
|
||||
from zuul.executor.sensors.ram import RAMSensor
|
||||
|
@ -1874,6 +1875,7 @@ class ExecutorServer(object):
|
|||
cpu_sensor = CPUSensor(config)
|
||||
self.sensors = [
|
||||
cpu_sensor,
|
||||
HDDSensor(config),
|
||||
self.pause_sensor,
|
||||
RAMSensor(config),
|
||||
StartingBuildsSensor(self, cpu_sensor.max_load_avg)
|
||||
|
|
Loading…
Reference in New Issue