Add tenant name on NodeRequests for Nodepool
This change adds the tenant name of the current events' context to NodeRequests and exposes it as a new field on ZooKeeper. It prepares for a tenant-aware Nodepool Launcher for it to enforce per-tenant resource quota. In addition, Zuul exposes a new statsd metric ``zuul.nodepool.tenant.<tenant>.current_requests`` that drills down the overall current_requests metric per tenant. Corresponding Spec can be found here https://review.opendev.org/c/zuul/zuul/+/788481 Change-Id: I6d47431e939aba2c80f30504b7a48c15f9fc8fb7
This commit is contained in:
parent
fd028206de
commit
282a2ae391
|
@ -305,6 +305,14 @@ These metrics are emitted by the Zuul :ref:`scheduler`:
|
|||
Persistently high values indicate more testing node resources
|
||||
would be helpful.
|
||||
|
||||
.. stat:: tenant.<tenant>.current_requests
|
||||
:type: gauge
|
||||
|
||||
The number of outstanding nodepool requests from Zuul drilled down by
|
||||
<tenant>. If a tenant for a node request cannot be determed, it is
|
||||
reported as ``unknown``. This relates to
|
||||
``zuul.nodepool.current_requests``.
|
||||
|
||||
.. stat:: resources
|
||||
|
||||
Holds metrics about resource usage by tenant or project if resources
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
---
|
||||
features:
|
||||
- |
|
||||
Add the name of the tenant to node requests. This new field is passed to
|
||||
ZooKeeper in preparation of a tenant-aware nodepool. Also, new statsd
|
||||
gauge metrics of current open node requests are exported in the form
|
||||
``zuul.nodepool.tenant.<tenant>.current_requests``. This metric tracks the
|
||||
currently open node requests per tenant. It drills down the overall
|
||||
``zuul.nodepool.current_requests`` metric.
|
|
@ -35,12 +35,14 @@ class TestNodepoolIntegration(BaseTestCase):
|
|||
self.addCleanup(self.zk_client.disconnect)
|
||||
self.zk_client.connect()
|
||||
self.hostname = socket.gethostname()
|
||||
# empty tenants dict
|
||||
self.tenants = {}
|
||||
|
||||
self.provisioned_requests = []
|
||||
# This class implements the scheduler methods zuul.nodepool
|
||||
# needs, so we pass 'self' as the scheduler.
|
||||
self.nodepool = zuul.nodepool.Nodepool(
|
||||
self.zk_client, self.hostname, self.statsd, self)
|
||||
self.zk_client, self.hostname, self.statsd, self.tenants, self)
|
||||
|
||||
def waitForRequests(self):
|
||||
# Wait until all requests are complete.
|
||||
|
|
|
@ -42,12 +42,14 @@ class TestNodepool(BaseTestCase):
|
|||
self.addCleanup(self.zk_client.disconnect)
|
||||
self.zk_client.connect()
|
||||
self.hostname = 'nodepool-test-hostname'
|
||||
# empty tenants dict
|
||||
self.tenants = {}
|
||||
|
||||
self.provisioned_requests = []
|
||||
# This class implements the scheduler methods zuul.nodepool
|
||||
# needs, so we pass 'self' as the scheduler.
|
||||
self.nodepool = zuul.nodepool.Nodepool(
|
||||
self.zk_client, self.hostname, self.statsd, self)
|
||||
self.zk_client, self.hostname, self.statsd, self.tenants, self)
|
||||
|
||||
self.fake_nodepool = FakeNodepool(self.zk_chroot_fixture)
|
||||
self.addCleanup(self.fake_nodepool.stop)
|
||||
|
|
|
@ -432,6 +432,9 @@ class TestScheduler(ZuulTestCase):
|
|||
'zuul.nodepool.requests.fulfilled.size.1', value='1', kind='c')
|
||||
self.assertReportedStat(
|
||||
'zuul.nodepool.current_requests', value='1', kind='g')
|
||||
self.assertReportedStat(
|
||||
'zuul.nodepool.tenant.tenant-one.current_requests', value='1',
|
||||
kind='g')
|
||||
self.assertReportedStat(
|
||||
'zuul.executors.online', value='1', kind='g')
|
||||
self.assertReportedStat(
|
||||
|
|
|
@ -801,6 +801,10 @@ class NodeRequest(object):
|
|||
self.relative_priority = relative_priority
|
||||
self.provider = self._getPausedParentProvider()
|
||||
self.id = None
|
||||
if build_set is not None:
|
||||
self.tenant = build_set.item.pipeline.tenant.name
|
||||
else:
|
||||
self.tenant = None
|
||||
self._zk_data = {} # Data that we read back from ZK
|
||||
if event is not None:
|
||||
self.event_id = event.zuul_event_id
|
||||
|
@ -874,6 +878,7 @@ class NodeRequest(object):
|
|||
d['state_time'] = self.state_time
|
||||
d['relative_priority'] = self.relative_priority
|
||||
d['event_id'] = self.event_id
|
||||
d['tenant'] = self.tenant
|
||||
return d
|
||||
|
||||
def updateFromDict(self, data):
|
||||
|
|
|
@ -33,9 +33,10 @@ def subtract_resources(target, source):
|
|||
class Nodepool(object):
|
||||
log = logging.getLogger('zuul.nodepool')
|
||||
|
||||
def __init__(self, zk_client, hostname, statsd, scheduler=None):
|
||||
def __init__(self, zk_client, hostname, statsd, tenants, scheduler=None):
|
||||
self.hostname = hostname
|
||||
self.statsd = statsd
|
||||
self.tenants = tenants
|
||||
# TODO (felix): Remove the scheduler parameter once the nodes are
|
||||
# locked on the executor side.
|
||||
self.sched = scheduler
|
||||
|
@ -55,6 +56,7 @@ class Nodepool(object):
|
|||
# timer zuul.nodepool.requests.(fulfilled|failed).<label>
|
||||
# timer zuul.nodepool.requests.(fulfilled|failed).<size>
|
||||
# gauge zuul.nodepool.current_requests
|
||||
# gauge zuul.nodepool.tenant.<tenant>.current_requests
|
||||
if not self.statsd:
|
||||
return
|
||||
pipe = self.statsd.pipeline()
|
||||
|
@ -79,6 +81,27 @@ class Nodepool(object):
|
|||
if dt:
|
||||
pipe.timing(key + '.size.%s' % len(request.nodeset.nodes), dt)
|
||||
pipe.gauge('zuul.nodepool.current_requests', len(self.requests))
|
||||
|
||||
# count the current requests of all tenants
|
||||
# first get all currently configured tenants
|
||||
tenant_requests = defaultdict(int)
|
||||
for tenant_name in self.tenants.keys():
|
||||
tenant_requests[tenant_name] = 0
|
||||
|
||||
for r in self.requests.values():
|
||||
# (might be None, we report them separately as 'unknown')
|
||||
tenant_name = r.tenant if r.tenant else 'unknown'
|
||||
tenant_requests[tenant_name] += 1
|
||||
|
||||
# export current_requests stats per tenant
|
||||
for tenant, request_count in tenant_requests.items():
|
||||
# the custom statsd clients' format is not supported for pipelines
|
||||
# therefore call _format_stat here manually.
|
||||
stats_key = self.statsd._format_stat(
|
||||
'zuul.nodepool.tenant.{tenant}.current_requests',
|
||||
tenant=tenant)
|
||||
pipe.gauge(stats_key, request_count)
|
||||
|
||||
pipe.send()
|
||||
|
||||
def emitStatsResources(self):
|
||||
|
|
|
@ -220,7 +220,8 @@ class Scheduler(threading.Thread):
|
|||
self.executor = ExecutorClient(self.config, self)
|
||||
self.merger = self._merger_client_class(self.config, self)
|
||||
self.nodepool = nodepool.Nodepool(
|
||||
self.zk_client, self.hostname, self.statsd, self)
|
||||
self.zk_client, self.hostname, self.statsd,
|
||||
self.abide.tenants, self)
|
||||
|
||||
def start(self):
|
||||
super(Scheduler, self).start()
|
||||
|
|
Loading…
Reference in New Issue