Implement 'max-ready-age' handling

Clean up nodes that have exceeded their max-ready-age.

Change-Id: I3054d8a1ac9041059215fa20dbb4f1d4d12f8765
This commit is contained in:
Simon Westphahl 2024-10-08 12:40:50 +02:00
parent e8f0a37e1f
commit 3c7c70faae
No known key found for this signature in database
6 changed files with 81 additions and 14 deletions

View File

@ -3,6 +3,7 @@
image: debian
flavor: normal
min-ready: 1
max-ready-age: 900
- provider:
name: aws-eu-central-1-main

View File

@ -68,6 +68,13 @@ class LauncherBaseTestCase(ZuulTestCase):
self.mock_aws.stop()
super().tearDown()
def _nodes_by_label(self):
nodes = self.launcher.api.nodes_cache.getItems()
nodes_by_label = defaultdict(list)
for node in nodes:
nodes_by_label[node.label].append(node)
return nodes_by_label
class TestLauncher(LauncherBaseTestCase):
debian_return_data = {
@ -732,6 +739,46 @@ class TestMinReadyLauncher(LauncherBaseTestCase):
self.assertGreaterEqual(len(nodes), 3)
self.assertLessEqual(len(nodes), 5)
def test_max_ready_age(self):
for _ in iterate_timeout(60, "nodes to be ready"):
nodes = self.launcher.api.nodes_cache.getItems()
# Since we are randomly picking a provider to fill the
# min-ready slots we might end up with 3-5 nodes
# depending on the choice of providers.
if not 3 <= len(nodes) <= 5:
continue
if all(n.state == n.State.READY for n in nodes):
break
self.waitUntilSettled()
nodes = self.launcher.api.nodes_cache.getItems()
self.assertGreaterEqual(len(nodes), 3)
self.assertLessEqual(len(nodes), 5)
nodes_by_label = self._nodes_by_label()
self.assertEqual(1, len(nodes_by_label['debian-emea']))
node = nodes_by_label['debian-emea'][0]
ctx = self.createZKContext(None)
try:
node.acquireLock(ctx)
node.updateAttributes(ctx, expiry_time=1)
finally:
node.releaseLock()
for _ in iterate_timeout(60, "node to be cleaned up"):
nodes = self.launcher.api.nodes_cache.getItems()
if node in nodes:
continue
if not 3 <= len(nodes) <= 5:
continue
if all(n.state == n.State.READY for n in nodes):
break
self.waitUntilSettled()
nodes_by_label = self._nodes_by_label()
self.assertEqual(1, len(nodes_by_label['debian-emea']))
class TestMinReadyTenantVariant(LauncherBaseTestCase):
tenant_config_file = "config/launcher-min-ready/tenant-variant.yaml"
@ -769,10 +816,7 @@ class TestMinReadyTenantVariant(LauncherBaseTestCase):
nodes = self.launcher.api.nodes_cache.getItems()
self.assertEqual(5, len(nodes))
nodes_by_label = defaultdict(list)
for node in nodes:
nodes_by_label[node.label].append(node)
nodes_by_label = self._nodes_by_label()
self.assertEqual(4, len(nodes_by_label['debian-normal']))
debian_normal_cfg_hashes = {
n.label_config_hash for n in nodes_by_label['debian-normal']
@ -806,10 +850,7 @@ class TestMinReadyTenantVariant(LauncherBaseTestCase):
nodes = self.launcher.api.nodes_cache.getItems()
self.assertEqual(5, len(nodes))
nodes_by_label = defaultdict(list)
for node in nodes:
nodes_by_label[node.label].append(node)
nodes_by_label = self._nodes_by_label()
self.assertEqual(1, len(nodes_by_label['debian-emea']))
self.assertEqual(4, len(nodes_by_label['debian-normal']))
debian_normal_cfg_hashes = {

View File

@ -467,6 +467,7 @@ class LabelParser(object):
vs.Required('flavor'): str,
'description': str,
'min-ready': int,
'max-ready-age': int,
}
schema = vs.Schema(label)
@ -479,7 +480,8 @@ class LabelParser(object):
self.schema(conf)
label = model.Label(conf['name'], conf['image'], conf['flavor'],
conf.get('description'), conf.get('min-ready'))
conf.get('description'), conf.get('min-ready'),
conf.get('max-ready-age'))
label.source_context = conf.get('_source_context')
label.start_mark = conf.get('_start_mark')
label.freeze()
@ -3004,7 +3006,7 @@ class TenantParser(object):
with parse_context.accumulator.catchErrors():
label.validateReferences(shadow_layout)
for section in shadow_layout.sections.values():
with parse_context.errorContext(stanza='section', conf=label):
with parse_context.errorContext(stanza='section', conf=section):
with parse_context.accumulator.catchErrors():
section.validateReferences(shadow_layout)
# Add providers to the shadow (or real) layout

View File

@ -362,6 +362,8 @@ class Launcher:
for node in list(ready_nodes.get(label.name, [])):
if node.is_locked:
continue
if node.hasExpired():
continue
for provider in self.tenant_providers[request.tenant_name]:
if provider.connection_name != node.connection_name:
continue
@ -447,6 +449,9 @@ class Launcher:
tags = provider.getNodeTags(
self.system.system_id, label, node_uuid, provider, request)
node_class = provider.driver.getProviderNodeClass()
expiry_time = None
if label.max_ready_age:
expiry_time = time.time() + label.max_ready_age
node = node_class.new(
ctx,
uuid=node_uuid,
@ -454,6 +459,7 @@ class Launcher:
label_config_hash=label.config_hash,
request_id=request.uuid,
zuul_event_id=request.zuul_event_id,
expiry_time=expiry_time,
connection_name=provider.connection_name,
tenant_name=request.tenant_name,
provider=provider.canonical_name,
@ -530,7 +536,8 @@ class Launcher:
self.wake_event.set()
# Mark outdated nodes w/o a request for cleanup
if not request and not self._hasProvider(node):
if not request and (
node.hasExpired() or not self._hasProvider(node)):
state = node.State.OUTDATED
log.debug("Marking node %s as %s", node, state)
with self.createZKContext(node._lock, self.log) as ctx:
@ -563,6 +570,8 @@ class Launcher:
if node.request_id:
request_exists = bool(self.api.getNodesetRequest(node.request_id))
return not request_exists
elif node.hasExpired():
return True
elif not self._hasProvider(node):
# We no longer have a provider that use the given node
return True
@ -649,6 +658,9 @@ class Launcher:
tags = provider.getNodeTags(
self.system.system_id, label, node_uuid)
node_class = provider.driver.getProviderNodeClass()
expiry_time = None
if label.max_ready_age:
expiry_time = time.time() + label.max_ready_age
with self.createZKContext(None, self.log) as ctx:
node = node_class.new(
ctx,
@ -658,6 +670,7 @@ class Launcher:
request_id=None,
connection_name=provider.connection_name,
zuul_event_id=uuid.uuid4().hex,
expiry_time=expiry_time,
tenant_name=None,
provider=None,
tags=tags,

View File

@ -1658,13 +1658,15 @@ class Label(ConfigObject):
Labels are associated with provider-specific instance types.
"""
def __init__(self, name, image, flavor, description, min_ready):
def __init__(self, name, image, flavor, description, min_ready,
max_ready_age):
super().__init__()
self.name = name
self.image = image
self.flavor = flavor
self.description = description
self.min_ready = min_ready
self.max_ready_age = max_ready_age
@property
def canonical_name(self):
@ -1687,7 +1689,8 @@ class Label(ConfigObject):
self.image == other.image and
self.flavor == other.flavor and
self.description == other.description and
self.min_ready == other.min_ready)
self.min_ready == other.min_ready and
self.max_ready_age == other.max_ready_age)
def toDict(self):
sc = self.source_context
@ -1698,6 +1701,7 @@ class Label(ConfigObject):
'flavor': self.flavor,
'description': self.description,
'min_ready': self.min_ready,
'max_ready_age': self.max_ready_age,
}
def validateReferences(self, layout):
@ -2454,6 +2458,7 @@ class ProviderNode(zkobject.PolymorphicZKObjectMixin,
uuid=uuid4().hex,
request_id=None,
zuul_event_id=None,
expiry_time=None,
state=self.State.REQUESTED,
label="",
label_config_hash=None,
@ -2513,6 +2518,7 @@ class ProviderNode(zkobject.PolymorphicZKObjectMixin,
uuid=self.uuid,
request_id=self.request_id,
zuul_event_id=self.zuul_event_id,
expiry_time=self.expiry_time,
state=self.state,
label=self.label,
label_config_hash=self.label_config_hash,
@ -2525,6 +2531,9 @@ class ProviderNode(zkobject.PolymorphicZKObjectMixin,
)
return json.dumps(data, sort_keys=True).encode("utf-8")
def hasExpired(self):
return self.expiry_time and self.expiry_time < time.time()
def getDriverData(self):
return dict()

View File

@ -32,7 +32,8 @@ base_label = vs.Schema({
Optional('image'): Nullable(str),
Optional('flavor'): Nullable(str),
Optional('tags', default=dict): {str: str},
Optional('min_ready', default=0): int
Optional('min_ready', default=0): int,
Optional('max_ready_age', default=0): int,
})
# Label attributes that are common to any kind of ssh-based driver.