Add min_avail_hdd governor for zuul-executor

Using the zuul.executor.state_dir setting from zuul.conf, we can
create a new governor to track the amount of space a zuul-executor is
using. If we go above the min_avail_hdd space (default 5.0%), we'll
stop accepting jobs until space has been reclaimed but the executor.

Change-Id: Ieb446397135ee5b138829cd2440b8c86abbb7d56
Signed-off-by: Paul Belanger <pabelanger@redhat.com>
This commit is contained in:
Paul Belanger 2018-06-25 17:11:45 -04:00
parent d1372f8f98
commit 608b22f577
No known key found for this signature in database
GPG Key ID: 611A80832067AF38
4 changed files with 94 additions and 0 deletions

View File

@ -559,6 +559,16 @@ The following sections of ``zuul.conf`` are used by the executor:
The executor will observe system load and determine whether
to accept more jobs every 30 seconds.
.. attr:: min_avail_hdd
:default: 5.0
This is the minimum percentage of HDD storage available for the
:attr:`executor.state_dir` directory. The executor will stop accepting
more than 1 job at a time until more HDD storage is available. The
available HDD percentage is calculated from the total available
disk space divided by the total real storage capacity multiplied by
100.
.. attr:: min_avail_mem
:default: 5.0

View File

@ -446,6 +446,7 @@ class TestExecutorHostname(ZuulTestCase):
class TestGovernor(ZuulTestCase):
config_file = 'zuul-executor-hostname.conf'
tenant_config_file = 'config/governor/main.yaml'
@mock.patch('os.getloadavg')
@ -464,6 +465,33 @@ class TestGovernor(ZuulTestCase):
self.executor_server.manageLoad()
self.assertFalse(self.executor_server.accepting_work)
@mock.patch('os.statvfs')
def test_hdd_governor(self, statvfs_mock):
class Dummy(object):
pass
hdd = Dummy()
hdd.f_frsize = 4096
hdd.f_blocks = 120920708
hdd.f_bfree = 95716701
statvfs_mock.return_value = hdd # 20.84% used
self.executor_server.manageLoad()
self.assertTrue(self.executor_server.accepting_work)
self.assertReportedStat(
'zuul.executor.test-executor-hostname_example_com.pct_used_hdd',
value='2084', kind='g')
hdd.f_bfree = 5716701
statvfs_mock.return_value = hdd # 95.27% used
self.executor_server.manageLoad()
self.assertFalse(self.executor_server.accepting_work)
self.assertReportedStat(
'zuul.executor.test-executor-hostname_example_com.pct_used_hdd',
value='9527', kind='g')
def test_pause_governor(self):
self.executor_server.manageLoad()
self.assertTrue(self.executor_server.accepting_work)

View File

@ -0,0 +1,54 @@
# Copyright 2018 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import os
from zuul.executor.sensors import SensorInterface
from zuul.lib.config import get_default
def get_avail_hdd_pct(path):
s = os.statvfs(path)
used = float(s.f_blocks - s.f_bfree)
percent = (used / s.f_blocks) * 100
return (100.0 - percent)
class HDDSensor(SensorInterface):
log = logging.getLogger("zuul.executor.sensor.hdd")
def __init__(self, config=None):
self.min_avail_hdd = float(
get_default(config, 'executor', 'min_avail_hdd', '5.0'))
self.state_dir = get_default(
config, 'executor', 'state_dir', '/var/lib/zuul', expand_user=True)
def isOk(self):
avail_hdd_pct = get_avail_hdd_pct(self.state_dir)
if avail_hdd_pct < self.min_avail_hdd:
return False, "low disk space {:3.1f}% < {}".format(
avail_hdd_pct, self.min_avail_hdd)
return True, "{:3.1f}% <= {}".format(avail_hdd_pct, self.min_avail_hdd)
def reportStats(self, statsd, base_key):
avail_hdd_pct = get_avail_hdd_pct(self.state_dir)
# We multiply the percentage by 100 so we can report it to 2 decimal
# points.
statsd.gauge(base_key + '.pct_used_hdd',
int((100.0 - avail_hdd_pct) * 100))

View File

@ -40,6 +40,7 @@ import gear
import zuul.merger.merger
import zuul.ansible.logconfig
from zuul.executor.sensors.cpu import CPUSensor
from zuul.executor.sensors.hdd import HDDSensor
from zuul.executor.sensors.pause import PauseSensor
from zuul.executor.sensors.startingbuilds import StartingBuildsSensor
from zuul.executor.sensors.ram import RAMSensor
@ -1874,6 +1875,7 @@ class ExecutorServer(object):
cpu_sensor = CPUSensor(config)
self.sensors = [
cpu_sensor,
HDDSensor(config),
self.pause_sensor,
RAMSensor(config),
StartingBuildsSensor(self, cpu_sensor.max_load_avg)