Improve default min/max node count behaviour when autoscaling is enabled
This change fixes an issue where creating a cluster with the 2 or more nodes in the default worker group and with `auto_scaling_enabled=true` but with neither `{min,max}_node_count` labels set would lead to a situation where the default worker group would only have 1 node instead of 2. The reason for this seems to be that the `min_node_count` and `max_node_count` values which are passed through to CAPI take priority over the initial `node_count` field and the min count currently defaults to 1. In these cases, I think the expected user behaviour is to have `node_count` == `min_node_count` == `max_node_count` and for autoscaling to be disabled on the default node group. Change-Id: I65e65bf64c97cf5ab659d6d7d158c32692a63348
This commit is contained in:
parent
957ce61b3c
commit
318b1269ef
@ -593,7 +593,7 @@ class Driver(driver.Driver):
|
||||
cluster, nodegroup
|
||||
)
|
||||
auto_scale_args = {}
|
||||
if auto_scale and max_nodes is not None:
|
||||
if auto_scale and min_nodes != max_nodes:
|
||||
auto_scale_args["autoscale"] = "true"
|
||||
auto_scale_args["machineCountMin"] = min_nodes
|
||||
auto_scale_args["machineCountMax"] = max_nodes
|
||||
@ -646,22 +646,37 @@ class Driver(driver.Driver):
|
||||
|
||||
def _get_node_counts(self, cluster, nodegroup):
|
||||
|
||||
# ClusterAPI provider OpenStack (CAPO) doesn't
|
||||
# support scale to zero yet
|
||||
min_nodes = max(1, nodegroup.min_node_count)
|
||||
max_nodes = nodegroup.max_node_count
|
||||
# NOTE(scott): In CAPI MachineDeployment resources created by the
|
||||
# capi-helm-charts, the `replicas` field is omitted when autoscaling
|
||||
# is enabled (since we're relinquishing control over node count to
|
||||
# the autoscaler) so if a user creates a nodegroup where only
|
||||
# node_count is provided and min/max are not, we need to be careful
|
||||
# about setting the default min/max values to equal node_count.
|
||||
min_nodes = nodegroup.node_count
|
||||
max_nodes = nodegroup.node_count
|
||||
|
||||
if nodegroup.min_node_count is not None:
|
||||
min_nodes = nodegroup.min_node_count
|
||||
if nodegroup.max_node_count is not None:
|
||||
max_nodes = nodegroup.max_node_count
|
||||
|
||||
# If min/max node counts are not defined on the default
|
||||
# worker group then fall back to equivalent cluster labels
|
||||
if self._is_default_worker_nodegroup(cluster, nodegroup):
|
||||
# Magnum seems to set min_node_count = 1 on default group
|
||||
# but we still want to be able to override that with labels
|
||||
if min_nodes is None or min_nodes == 1:
|
||||
min_nodes = self._get_label_int(cluster, "min_node_count", 1)
|
||||
if not max_nodes:
|
||||
max_nodes = self._get_label_int(
|
||||
cluster, "max_node_count", min_nodes
|
||||
)
|
||||
# NOTE(scott): Magnum seems to set min_node_count = 1
|
||||
# on the default group so treat this as if it were None
|
||||
if nodegroup.min_node_count == 1:
|
||||
min_nodes = nodegroup.node_count
|
||||
|
||||
# We still want to be able to override the default node
|
||||
# group values with labels for consistent behaviour with
|
||||
# Magnum Heat driver.
|
||||
min_nodes = self._get_label_int(
|
||||
cluster, "min_node_count", min_nodes
|
||||
)
|
||||
max_nodes = self._get_label_int(
|
||||
cluster, "max_node_count", max_nodes
|
||||
)
|
||||
|
||||
return min_nodes, max_nodes
|
||||
|
||||
@ -675,6 +690,8 @@ class Driver(driver.Driver):
|
||||
)
|
||||
|
||||
if min_nodes is not None:
|
||||
# ClusterAPI Provider OpenStack (CAPO)
|
||||
# doesn't support scale to zero yet.
|
||||
if min_nodes < 1:
|
||||
raise exception.NodeGroupInvalidInput(
|
||||
message="Min node count must be greater than "
|
||||
|
@ -2922,7 +2922,7 @@ class ClusterAPIDriverTest(base.DbTestCase):
|
||||
id=456,
|
||||
name="autoscale-group",
|
||||
node_count=1,
|
||||
min_node_count=0,
|
||||
min_node_count=1,
|
||||
max_node_count=3,
|
||||
)
|
||||
self.cluster_obj.nodegroups.append(auto_scale_nodegroup)
|
||||
@ -3143,3 +3143,49 @@ class ClusterAPIDriverTest(base.DbTestCase):
|
||||
],
|
||||
disk_size_configuration_value,
|
||||
)
|
||||
|
||||
@mock.patch.object(
|
||||
driver.Driver,
|
||||
"_storageclass_definitions",
|
||||
return_value=mock.ANY,
|
||||
)
|
||||
@mock.patch.object(driver.Driver, "_validate_allowed_flavor")
|
||||
@mock.patch.object(neutron, "get_network", autospec=True)
|
||||
@mock.patch.object(
|
||||
driver.Driver, "_ensure_certificate_secrets", autospec=True
|
||||
)
|
||||
@mock.patch.object(driver.Driver, "_create_appcred_secret", autospec=True)
|
||||
@mock.patch.object(kubernetes.Client, "load", autospec=True)
|
||||
@mock.patch.object(driver.Driver, "_get_image_details", autospec=True)
|
||||
@mock.patch.object(helm.Client, "install_or_upgrade", autospec=True)
|
||||
def test_nodegroup_node_count_min_max_equal(
|
||||
self,
|
||||
mock_install,
|
||||
mock_image,
|
||||
mock_load,
|
||||
mock_appcred,
|
||||
mock_certs,
|
||||
mock_get_net,
|
||||
mock_validate_allowed_flavor,
|
||||
mock_storageclasses,
|
||||
):
|
||||
# When autoscaling is enabled but no min/max node counts are
|
||||
# provided for the default node group, we want autoscaling to
|
||||
# be disabled on the default node group.
|
||||
self.cluster_obj.labels = {"auto_scaling_enabled": "true"}
|
||||
mock_image.return_value = (
|
||||
"imageid1",
|
||||
"1.27.4",
|
||||
"ubuntu",
|
||||
)
|
||||
self.driver.create_cluster(self.context, self.cluster_obj, 10)
|
||||
helm_install_values = mock_install.call_args[0][3]
|
||||
helm_values_default_ng = next(
|
||||
ng
|
||||
for ng in helm_install_values["nodeGroups"]
|
||||
if ng["name"] == self.cluster_obj.default_ng_worker.name
|
||||
)
|
||||
self.assertEqual(helm_values_default_ng.get("autoscale"), None)
|
||||
self.assertEqual(helm_values_default_ng.get("machineCountMin"), None)
|
||||
self.assertEqual(helm_values_default_ng.get("machineCountMax"), None)
|
||||
self.assertEqual(helm_values_default_ng.get("machineCount"), 3)
|
||||
|
@ -0,0 +1,8 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes an issue where creating a cluster with the 2 or more
|
||||
nodes in the default worker group and with `auto_scaling_enabled=true`
|
||||
but with neither `{min,max}_node_count` labels set would lead
|
||||
to a situation where the default worker group would only have 1
|
||||
node instead of 2.
|
Loading…
x
Reference in New Issue
Block a user