Implement 'max-ready-age' handling
Clean up nodes that have exceeded their max-ready-age. Change-Id: I3054d8a1ac9041059215fa20dbb4f1d4d12f8765
This commit is contained in:
parent
e8f0a37e1f
commit
3c7c70faae
@ -3,6 +3,7 @@
|
||||
image: debian
|
||||
flavor: normal
|
||||
min-ready: 1
|
||||
max-ready-age: 900
|
||||
|
||||
- provider:
|
||||
name: aws-eu-central-1-main
|
||||
|
@ -68,6 +68,13 @@ class LauncherBaseTestCase(ZuulTestCase):
|
||||
self.mock_aws.stop()
|
||||
super().tearDown()
|
||||
|
||||
def _nodes_by_label(self):
|
||||
nodes = self.launcher.api.nodes_cache.getItems()
|
||||
nodes_by_label = defaultdict(list)
|
||||
for node in nodes:
|
||||
nodes_by_label[node.label].append(node)
|
||||
return nodes_by_label
|
||||
|
||||
|
||||
class TestLauncher(LauncherBaseTestCase):
|
||||
debian_return_data = {
|
||||
@ -732,6 +739,46 @@ class TestMinReadyLauncher(LauncherBaseTestCase):
|
||||
self.assertGreaterEqual(len(nodes), 3)
|
||||
self.assertLessEqual(len(nodes), 5)
|
||||
|
||||
def test_max_ready_age(self):
|
||||
for _ in iterate_timeout(60, "nodes to be ready"):
|
||||
nodes = self.launcher.api.nodes_cache.getItems()
|
||||
# Since we are randomly picking a provider to fill the
|
||||
# min-ready slots we might end up with 3-5 nodes
|
||||
# depending on the choice of providers.
|
||||
if not 3 <= len(nodes) <= 5:
|
||||
continue
|
||||
if all(n.state == n.State.READY for n in nodes):
|
||||
break
|
||||
|
||||
self.waitUntilSettled()
|
||||
nodes = self.launcher.api.nodes_cache.getItems()
|
||||
self.assertGreaterEqual(len(nodes), 3)
|
||||
self.assertLessEqual(len(nodes), 5)
|
||||
|
||||
nodes_by_label = self._nodes_by_label()
|
||||
self.assertEqual(1, len(nodes_by_label['debian-emea']))
|
||||
node = nodes_by_label['debian-emea'][0]
|
||||
|
||||
ctx = self.createZKContext(None)
|
||||
try:
|
||||
node.acquireLock(ctx)
|
||||
node.updateAttributes(ctx, expiry_time=1)
|
||||
finally:
|
||||
node.releaseLock()
|
||||
|
||||
for _ in iterate_timeout(60, "node to be cleaned up"):
|
||||
nodes = self.launcher.api.nodes_cache.getItems()
|
||||
if node in nodes:
|
||||
continue
|
||||
if not 3 <= len(nodes) <= 5:
|
||||
continue
|
||||
if all(n.state == n.State.READY for n in nodes):
|
||||
break
|
||||
|
||||
self.waitUntilSettled()
|
||||
nodes_by_label = self._nodes_by_label()
|
||||
self.assertEqual(1, len(nodes_by_label['debian-emea']))
|
||||
|
||||
|
||||
class TestMinReadyTenantVariant(LauncherBaseTestCase):
|
||||
tenant_config_file = "config/launcher-min-ready/tenant-variant.yaml"
|
||||
@ -769,10 +816,7 @@ class TestMinReadyTenantVariant(LauncherBaseTestCase):
|
||||
nodes = self.launcher.api.nodes_cache.getItems()
|
||||
self.assertEqual(5, len(nodes))
|
||||
|
||||
nodes_by_label = defaultdict(list)
|
||||
for node in nodes:
|
||||
nodes_by_label[node.label].append(node)
|
||||
|
||||
nodes_by_label = self._nodes_by_label()
|
||||
self.assertEqual(4, len(nodes_by_label['debian-normal']))
|
||||
debian_normal_cfg_hashes = {
|
||||
n.label_config_hash for n in nodes_by_label['debian-normal']
|
||||
@ -806,10 +850,7 @@ class TestMinReadyTenantVariant(LauncherBaseTestCase):
|
||||
nodes = self.launcher.api.nodes_cache.getItems()
|
||||
self.assertEqual(5, len(nodes))
|
||||
|
||||
nodes_by_label = defaultdict(list)
|
||||
for node in nodes:
|
||||
nodes_by_label[node.label].append(node)
|
||||
|
||||
nodes_by_label = self._nodes_by_label()
|
||||
self.assertEqual(1, len(nodes_by_label['debian-emea']))
|
||||
self.assertEqual(4, len(nodes_by_label['debian-normal']))
|
||||
debian_normal_cfg_hashes = {
|
||||
|
@ -467,6 +467,7 @@ class LabelParser(object):
|
||||
vs.Required('flavor'): str,
|
||||
'description': str,
|
||||
'min-ready': int,
|
||||
'max-ready-age': int,
|
||||
}
|
||||
schema = vs.Schema(label)
|
||||
|
||||
@ -479,7 +480,8 @@ class LabelParser(object):
|
||||
self.schema(conf)
|
||||
|
||||
label = model.Label(conf['name'], conf['image'], conf['flavor'],
|
||||
conf.get('description'), conf.get('min-ready'))
|
||||
conf.get('description'), conf.get('min-ready'),
|
||||
conf.get('max-ready-age'))
|
||||
label.source_context = conf.get('_source_context')
|
||||
label.start_mark = conf.get('_start_mark')
|
||||
label.freeze()
|
||||
@ -3004,7 +3006,7 @@ class TenantParser(object):
|
||||
with parse_context.accumulator.catchErrors():
|
||||
label.validateReferences(shadow_layout)
|
||||
for section in shadow_layout.sections.values():
|
||||
with parse_context.errorContext(stanza='section', conf=label):
|
||||
with parse_context.errorContext(stanza='section', conf=section):
|
||||
with parse_context.accumulator.catchErrors():
|
||||
section.validateReferences(shadow_layout)
|
||||
# Add providers to the shadow (or real) layout
|
||||
|
@ -362,6 +362,8 @@ class Launcher:
|
||||
for node in list(ready_nodes.get(label.name, [])):
|
||||
if node.is_locked:
|
||||
continue
|
||||
if node.hasExpired():
|
||||
continue
|
||||
for provider in self.tenant_providers[request.tenant_name]:
|
||||
if provider.connection_name != node.connection_name:
|
||||
continue
|
||||
@ -447,6 +449,9 @@ class Launcher:
|
||||
tags = provider.getNodeTags(
|
||||
self.system.system_id, label, node_uuid, provider, request)
|
||||
node_class = provider.driver.getProviderNodeClass()
|
||||
expiry_time = None
|
||||
if label.max_ready_age:
|
||||
expiry_time = time.time() + label.max_ready_age
|
||||
node = node_class.new(
|
||||
ctx,
|
||||
uuid=node_uuid,
|
||||
@ -454,6 +459,7 @@ class Launcher:
|
||||
label_config_hash=label.config_hash,
|
||||
request_id=request.uuid,
|
||||
zuul_event_id=request.zuul_event_id,
|
||||
expiry_time=expiry_time,
|
||||
connection_name=provider.connection_name,
|
||||
tenant_name=request.tenant_name,
|
||||
provider=provider.canonical_name,
|
||||
@ -530,7 +536,8 @@ class Launcher:
|
||||
self.wake_event.set()
|
||||
|
||||
# Mark outdated nodes w/o a request for cleanup
|
||||
if not request and not self._hasProvider(node):
|
||||
if not request and (
|
||||
node.hasExpired() or not self._hasProvider(node)):
|
||||
state = node.State.OUTDATED
|
||||
log.debug("Marking node %s as %s", node, state)
|
||||
with self.createZKContext(node._lock, self.log) as ctx:
|
||||
@ -563,6 +570,8 @@ class Launcher:
|
||||
if node.request_id:
|
||||
request_exists = bool(self.api.getNodesetRequest(node.request_id))
|
||||
return not request_exists
|
||||
elif node.hasExpired():
|
||||
return True
|
||||
elif not self._hasProvider(node):
|
||||
# We no longer have a provider that use the given node
|
||||
return True
|
||||
@ -649,6 +658,9 @@ class Launcher:
|
||||
tags = provider.getNodeTags(
|
||||
self.system.system_id, label, node_uuid)
|
||||
node_class = provider.driver.getProviderNodeClass()
|
||||
expiry_time = None
|
||||
if label.max_ready_age:
|
||||
expiry_time = time.time() + label.max_ready_age
|
||||
with self.createZKContext(None, self.log) as ctx:
|
||||
node = node_class.new(
|
||||
ctx,
|
||||
@ -658,6 +670,7 @@ class Launcher:
|
||||
request_id=None,
|
||||
connection_name=provider.connection_name,
|
||||
zuul_event_id=uuid.uuid4().hex,
|
||||
expiry_time=expiry_time,
|
||||
tenant_name=None,
|
||||
provider=None,
|
||||
tags=tags,
|
||||
|
@ -1658,13 +1658,15 @@ class Label(ConfigObject):
|
||||
Labels are associated with provider-specific instance types.
|
||||
"""
|
||||
|
||||
def __init__(self, name, image, flavor, description, min_ready):
|
||||
def __init__(self, name, image, flavor, description, min_ready,
|
||||
max_ready_age):
|
||||
super().__init__()
|
||||
self.name = name
|
||||
self.image = image
|
||||
self.flavor = flavor
|
||||
self.description = description
|
||||
self.min_ready = min_ready
|
||||
self.max_ready_age = max_ready_age
|
||||
|
||||
@property
|
||||
def canonical_name(self):
|
||||
@ -1687,7 +1689,8 @@ class Label(ConfigObject):
|
||||
self.image == other.image and
|
||||
self.flavor == other.flavor and
|
||||
self.description == other.description and
|
||||
self.min_ready == other.min_ready)
|
||||
self.min_ready == other.min_ready and
|
||||
self.max_ready_age == other.max_ready_age)
|
||||
|
||||
def toDict(self):
|
||||
sc = self.source_context
|
||||
@ -1698,6 +1701,7 @@ class Label(ConfigObject):
|
||||
'flavor': self.flavor,
|
||||
'description': self.description,
|
||||
'min_ready': self.min_ready,
|
||||
'max_ready_age': self.max_ready_age,
|
||||
}
|
||||
|
||||
def validateReferences(self, layout):
|
||||
@ -2454,6 +2458,7 @@ class ProviderNode(zkobject.PolymorphicZKObjectMixin,
|
||||
uuid=uuid4().hex,
|
||||
request_id=None,
|
||||
zuul_event_id=None,
|
||||
expiry_time=None,
|
||||
state=self.State.REQUESTED,
|
||||
label="",
|
||||
label_config_hash=None,
|
||||
@ -2513,6 +2518,7 @@ class ProviderNode(zkobject.PolymorphicZKObjectMixin,
|
||||
uuid=self.uuid,
|
||||
request_id=self.request_id,
|
||||
zuul_event_id=self.zuul_event_id,
|
||||
expiry_time=self.expiry_time,
|
||||
state=self.state,
|
||||
label=self.label,
|
||||
label_config_hash=self.label_config_hash,
|
||||
@ -2525,6 +2531,9 @@ class ProviderNode(zkobject.PolymorphicZKObjectMixin,
|
||||
)
|
||||
return json.dumps(data, sort_keys=True).encode("utf-8")
|
||||
|
||||
def hasExpired(self):
|
||||
return self.expiry_time and self.expiry_time < time.time()
|
||||
|
||||
def getDriverData(self):
|
||||
return dict()
|
||||
|
||||
|
@ -32,7 +32,8 @@ base_label = vs.Schema({
|
||||
Optional('image'): Nullable(str),
|
||||
Optional('flavor'): Nullable(str),
|
||||
Optional('tags', default=dict): {str: str},
|
||||
Optional('min_ready', default=0): int
|
||||
Optional('min_ready', default=0): int,
|
||||
Optional('max_ready_age', default=0): int,
|
||||
})
|
||||
|
||||
# Label attributes that are common to any kind of ssh-based driver.
|
||||
|
Loading…
x
Reference in New Issue
Block a user