Adjust test_autohold to catch stats errors
This associates resources with only the held node and validates that we emit stats that: * Include the node while it is running a job * Do not include the node after it is complete and is held Change-Id: I832005dacd5e8c2ca00840fd5976834e88cfdf98
This commit is contained in:
parent
919c5a3654
commit
c22c63ab5f
|
@ -215,6 +215,11 @@ def registerProjects(source_name, client, config):
|
|||
client.addProjectByName(project)
|
||||
|
||||
|
||||
class StatException(Exception):
|
||||
# Used by assertReportedStat
|
||||
pass
|
||||
|
||||
|
||||
class GerritDriverMock(GerritDriver):
|
||||
def __init__(self, registry, changes: Dict[str, Dict[str, Change]],
|
||||
upstream_root: str, additional_event_queues, poller_events,
|
||||
|
@ -5426,7 +5431,7 @@ class ZuulTestCase(BaseTestCase):
|
|||
self.assertEqual(self.getZKTree(client.WAITER_ROOT), [])
|
||||
self.assertEqual(self.getZKTree(client.LOCK_ROOT), [])
|
||||
|
||||
def assertReportedStat(self, key, value=None, kind=None):
|
||||
def assertReportedStat(self, key, value=None, kind=None, timeout=5):
|
||||
"""Check statsd output
|
||||
|
||||
Check statsd return values. A ``value`` should specify a
|
||||
|
@ -5443,6 +5448,8 @@ class ZuulTestCase(BaseTestCase):
|
|||
- ``ms`` timing
|
||||
- ``s`` set
|
||||
|
||||
:arg int timeout: How long to wait for the stat to appear
|
||||
|
||||
:returns: The value
|
||||
"""
|
||||
|
||||
|
@ -5450,7 +5457,7 @@ class ZuulTestCase(BaseTestCase):
|
|||
self.assertNotEqual(kind, None)
|
||||
|
||||
start = time.time()
|
||||
while time.time() < (start + 5):
|
||||
while time.time() <= (start + timeout):
|
||||
# Note our fake statsd just queues up results in a queue.
|
||||
# We just keep going through them until we find one that
|
||||
# matches, or fail out. If statsd pipelines are used,
|
||||
|
@ -5480,7 +5487,7 @@ class ZuulTestCase(BaseTestCase):
|
|||
already_set_keys.update([k])
|
||||
for k in already_set_keys:
|
||||
if key != k and key.startswith(k):
|
||||
raise Exception(
|
||||
raise StatException(
|
||||
"Key %s is a gauge/counter and "
|
||||
"we are trying to set subkey %s" % (k, key))
|
||||
|
||||
|
@ -5513,7 +5520,16 @@ class ZuulTestCase(BaseTestCase):
|
|||
return s_value
|
||||
time.sleep(0.1)
|
||||
|
||||
raise Exception("Key %s not found in reported stats" % key)
|
||||
raise StatException("Key %s not found in reported stats" % key)
|
||||
|
||||
def assertUnReportedStat(self, key, value=None, kind=None):
|
||||
try:
|
||||
value = self.assertReportedStat(key, value=value,
|
||||
kind=kind, timeout=0)
|
||||
except StatException:
|
||||
return
|
||||
raise StatException("Key %s found in reported stats: %s" %
|
||||
(key, value))
|
||||
|
||||
def assertBuilds(self, builds):
|
||||
"""Assert that the running builds are as described.
|
||||
|
|
|
@ -1922,13 +1922,6 @@ class TestScheduler(ZuulTestCase):
|
|||
client = zuul.rpcclient.RPCClient('127.0.0.1',
|
||||
self.gearman_server.port)
|
||||
self.addCleanup(client.shutdown)
|
||||
# Set resources so we can examine the code path for updating
|
||||
# the stats on autohold.
|
||||
self.fake_nodepool.resources = {
|
||||
'cores': 2,
|
||||
'ram': 1024,
|
||||
'instances': 1,
|
||||
}
|
||||
r = client.autohold('tenant-one', 'org/project', 'project-test2',
|
||||
"", "", "reason text", 1)
|
||||
self.assertTrue(r)
|
||||
|
@ -1966,17 +1959,40 @@ class TestScheduler(ZuulTestCase):
|
|||
break
|
||||
self.assertIsNone(held_node)
|
||||
|
||||
self.hold_jobs_in_queue = True
|
||||
# Hold in build to check the stats
|
||||
self.executor_server.hold_jobs_in_build = True
|
||||
|
||||
# Now test that failed jobs are autoheld
|
||||
|
||||
# Set resources only for this node so we can examine the code
|
||||
# path for updating the stats on autohold.
|
||||
self.fake_nodepool.resources = {
|
||||
'cores': 2,
|
||||
'ram': 1024,
|
||||
'instances': 1,
|
||||
}
|
||||
# Some convenience variables for checking these stats.
|
||||
tenant_ram_stat = 'zuul.nodepool.resources.tenant.tenant-one.ram'
|
||||
project_ram_stat = ('zuul.nodepool.resources.project.'
|
||||
'review_example_com/org/project.ram')
|
||||
|
||||
B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
|
||||
self.executor_server.failJob('project-test2', B)
|
||||
self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
|
||||
|
||||
self.waitUntilSettled()
|
||||
|
||||
# Get the build request object
|
||||
build = list(self.scheds.first.sched.executor.builds.values())[0]
|
||||
|
||||
self.hold_jobs_in_queue = False
|
||||
self.executor_api.release()
|
||||
# We should report using the held node's resources
|
||||
self.assertReportedStat(tenant_ram_stat, value='1024', kind='g')
|
||||
self.assertReportedStat(project_ram_stat, value='1024', kind='g')
|
||||
self.assertUnReportedStat(tenant_ram_stat, value='0', kind='g')
|
||||
self.assertUnReportedStat(project_ram_stat, value='0', kind='g')
|
||||
|
||||
self.executor_server.hold_jobs_in_build = False
|
||||
self.executor_server.release()
|
||||
self.waitUntilSettled()
|
||||
|
||||
self.assertEqual(B.data['status'], 'NEW')
|
||||
|
@ -2009,7 +2025,14 @@ class TestScheduler(ZuulTestCase):
|
|||
self.assertEqual(1, len(request2.nodes))
|
||||
self.assertEqual(1, len(request2.nodes[0]["nodes"]))
|
||||
|
||||
# We should now report that we no longer use the nodes resources
|
||||
self.assertReportedStat(tenant_ram_stat, value='1024', kind='g')
|
||||
self.assertReportedStat(project_ram_stat, value='1024', kind='g')
|
||||
self.assertReportedStat(tenant_ram_stat, value='0', kind='g')
|
||||
self.assertReportedStat(project_ram_stat, value='0', kind='g')
|
||||
|
||||
# Another failed change should not hold any more nodes
|
||||
self.fake_nodepool.resources = {}
|
||||
C = self.fake_gerrit.addFakeChange('org/project', 'master', 'C')
|
||||
self.executor_server.failJob('project-test2', C)
|
||||
self.fake_gerrit.addEvent(C.getPatchsetCreatedEvent(1))
|
||||
|
@ -2036,15 +2059,6 @@ class TestScheduler(ZuulTestCase):
|
|||
self.assertEqual(3, len(node_states))
|
||||
self.assertEqual([zuul.model.STATE_USED] * 3, node_states)
|
||||
|
||||
# The resources should be reported
|
||||
self.assertReportedStat(
|
||||
'zuul.nodepool.resources.tenant.tenant-one.ram',
|
||||
value='1024', kind='g')
|
||||
self.assertReportedStat(
|
||||
'zuul.nodepool.resources.project.'
|
||||
'review_example_com/org/project.ram',
|
||||
value='1024', kind='g')
|
||||
|
||||
@simple_layout('layouts/autohold.yaml')
|
||||
def test_autohold_info(self):
|
||||
client = zuul.rpcclient.RPCClient('127.0.0.1',
|
||||
|
|
Loading…
Reference in New Issue