Merge "Allow autoscaling of non-default node groups"
This commit is contained in:
commit
1f1edec328
@ -584,30 +584,20 @@ class Driver(driver.Driver):
|
||||
def _get_autoheal_enabled(self, cluster):
|
||||
return self._get_label_bool(cluster, "auto_healing_enabled", True)
|
||||
|
||||
def _get_autoscale(self, cluster, nodegroup):
|
||||
auto_scale = self._get_label_bool(
|
||||
cluster, "auto_scaling_enabled", False
|
||||
def _get_autoscale_enabled(self, cluster):
|
||||
return self._get_label_bool(cluster, "auto_scaling_enabled", False)
|
||||
|
||||
def _get_autoscale_values(self, cluster, nodegroup):
|
||||
auto_scale = self._get_autoscale_enabled(cluster)
|
||||
min_nodes, max_nodes = self._validate_allowed_node_counts(
|
||||
cluster, nodegroup
|
||||
)
|
||||
if auto_scale:
|
||||
auto_scale_args = dict(autoscale="true")
|
||||
min_nodes = max(1, nodegroup.min_node_count)
|
||||
max_nodes = self._get_label_int(
|
||||
cluster, "max_node_count", min_nodes
|
||||
)
|
||||
if min_nodes > nodegroup.node_count:
|
||||
raise exception.MagnumException(
|
||||
message="min_node_count must be less than or equal to "
|
||||
"default-worker nodegroup node_count."
|
||||
)
|
||||
auto_scale_args = {}
|
||||
if auto_scale and max_nodes is not None:
|
||||
auto_scale_args["autoscale"] = "true"
|
||||
auto_scale_args["machineCountMin"] = min_nodes
|
||||
if max_nodes < min_nodes:
|
||||
raise exception.MagnumException(
|
||||
message="max_node_count must be greater than or "
|
||||
"equal to min_node_count"
|
||||
)
|
||||
auto_scale_args["machineCountMax"] = max_nodes
|
||||
return auto_scale_args
|
||||
return auto_scale
|
||||
return auto_scale_args
|
||||
|
||||
def _get_k8s_keystone_auth_enabled(self, cluster):
|
||||
return self._get_label_bool(cluster, "keystone_auth_enabled", False)
|
||||
@ -651,6 +641,58 @@ class Driver(driver.Driver):
|
||||
f"Minimum {CONF.capi_helm.minimum_flavor_ram} MB required."
|
||||
)
|
||||
|
||||
def _is_default_worker_nodegroup(self, cluster, nodegroup):
|
||||
return cluster.default_ng_worker.id == nodegroup.id
|
||||
|
||||
def _get_node_counts(self, cluster, nodegroup):
|
||||
|
||||
# ClusterAPI provider OpenStack (CAPO) doesn't
|
||||
# support scale to zero yet
|
||||
min_nodes = max(1, nodegroup.min_node_count)
|
||||
max_nodes = nodegroup.max_node_count
|
||||
|
||||
# If min/max node counts are not defined on the default
|
||||
# worker group then fall back to equivalent cluster labels
|
||||
if self._is_default_worker_nodegroup(cluster, nodegroup):
|
||||
# Magnum seems to set min_node_count = 1 on default group
|
||||
# but we still want to be able to override that with labels
|
||||
if min_nodes is None or min_nodes == 1:
|
||||
min_nodes = self._get_label_int(cluster, "min_node_count", 1)
|
||||
if not max_nodes:
|
||||
max_nodes = self._get_label_int(
|
||||
cluster, "max_node_count", min_nodes
|
||||
)
|
||||
|
||||
return min_nodes, max_nodes
|
||||
|
||||
def _validate_allowed_node_counts(self, cluster, nodegroup):
|
||||
min_nodes, max_nodes = self._get_node_counts(cluster, nodegroup)
|
||||
|
||||
LOG.debug(
|
||||
f"Checking if node group {nodegroup.name} has valid "
|
||||
f"node count parameters (count, min, max) = "
|
||||
f"{(nodegroup.node_count, min_nodes, max_nodes)}"
|
||||
)
|
||||
|
||||
if min_nodes is not None:
|
||||
if min_nodes < 1:
|
||||
raise exception.NodeGroupInvalidInput(
|
||||
message="Min node count must be greater than "
|
||||
"or equal to 1 for all node groups."
|
||||
)
|
||||
if min_nodes > nodegroup.node_count:
|
||||
raise exception.NodeGroupInvalidInput(
|
||||
message="Min node count must be less than "
|
||||
"or equal to current node count"
|
||||
)
|
||||
if max_nodes is not None and max_nodes < min_nodes:
|
||||
raise exception.NodeGroupInvalidInput(
|
||||
message="Max node count must be greater than "
|
||||
"or equal to min node count"
|
||||
)
|
||||
|
||||
return min_nodes, max_nodes
|
||||
|
||||
def _get_csi_cinder_availability_zone(self, cluster):
|
||||
return self._label(
|
||||
cluster,
|
||||
@ -763,13 +805,9 @@ class Driver(driver.Driver):
|
||||
machineFlavor=ng.flavor_id,
|
||||
machineCount=ng.node_count,
|
||||
)
|
||||
# Assume first nodegroup is default-worker.
|
||||
if not nodegroup_set:
|
||||
auto_scale = self._get_autoscale(cluster, ng)
|
||||
if auto_scale:
|
||||
nodegroup_item = helm.mergeconcat(
|
||||
nodegroup_item, auto_scale
|
||||
)
|
||||
if self._get_autoscale_enabled(cluster):
|
||||
values = self._get_autoscale_values(cluster, ng)
|
||||
nodegroup_item = helm.mergeconcat(nodegroup_item, values)
|
||||
nodegroup_set.append(nodegroup_item)
|
||||
return nodegroup_set
|
||||
|
||||
@ -1048,6 +1086,8 @@ class Driver(driver.Driver):
|
||||
def create_nodegroup(self, context, cluster, nodegroup):
|
||||
nodegroup.status = fields.ClusterStatus.CREATE_IN_PROGRESS
|
||||
self._validate_allowed_flavor(context, nodegroup.flavor_id)
|
||||
if self._get_autoscale_enabled(cluster):
|
||||
self._validate_allowed_node_counts(cluster, nodegroup)
|
||||
nodegroup.save()
|
||||
|
||||
self._update_helm_release(context, cluster)
|
||||
@ -1055,6 +1095,8 @@ class Driver(driver.Driver):
|
||||
def update_nodegroup(self, context, cluster, nodegroup):
|
||||
nodegroup.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
|
||||
self._validate_allowed_flavor(context, nodegroup.flavor_id)
|
||||
if self._get_autoscale_enabled(cluster):
|
||||
self._validate_allowed_node_counts(cluster, nodegroup)
|
||||
nodegroup.save()
|
||||
|
||||
self._update_helm_release(context, cluster)
|
||||
|
@ -10,6 +10,7 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
from unittest import mock
|
||||
from uuid import uuid4
|
||||
|
||||
from magnum.common import exception
|
||||
from magnum.common import neutron
|
||||
@ -2702,7 +2703,7 @@ class ClusterAPIDriverTest(base.DbTestCase):
|
||||
)
|
||||
self.assertRaises(
|
||||
exception.MagnumException,
|
||||
self.driver._get_autoscale,
|
||||
self.driver._get_autoscale_values,
|
||||
self.cluster_obj,
|
||||
self.cluster_obj.nodegroups[0],
|
||||
)
|
||||
@ -2755,10 +2756,9 @@ class ClusterAPIDriverTest(base.DbTestCase):
|
||||
helm_install_values["nodeGroups"][0]["autoscale"],
|
||||
auto_scale_labels["auto_scaling_enabled"],
|
||||
)
|
||||
# min_node_count is hardcode to max(1, ng.min_node_count)
|
||||
self.assertEqual(
|
||||
helm_install_values["nodeGroups"][0]["machineCountMin"],
|
||||
self.cluster_obj.nodegroups[0].min_node_count,
|
||||
auto_scale_labels["min_node_count"],
|
||||
)
|
||||
self.assertEqual(
|
||||
helm_install_values["nodeGroups"][0]["machineCountMax"],
|
||||
@ -2821,3 +2821,163 @@ class ClusterAPIDriverTest(base.DbTestCase):
|
||||
"machineCountMax",
|
||||
helm_install_values["nodeGroups"][0],
|
||||
)
|
||||
|
||||
@mock.patch.object(
|
||||
driver.Driver,
|
||||
"_storageclass_definitions",
|
||||
return_value=mock.ANY,
|
||||
)
|
||||
@mock.patch.object(driver.Driver, "_validate_allowed_flavor")
|
||||
@mock.patch.object(neutron, "get_network", autospec=True)
|
||||
@mock.patch.object(
|
||||
driver.Driver, "_ensure_certificate_secrets", autospec=True
|
||||
)
|
||||
@mock.patch.object(driver.Driver, "_create_appcred_secret", autospec=True)
|
||||
@mock.patch.object(kubernetes.Client, "load", autospec=True)
|
||||
@mock.patch.object(driver.Driver, "_get_image_details", autospec=True)
|
||||
@mock.patch.object(helm.Client, "install_or_upgrade", autospec=True)
|
||||
def test_nodegroup_node_count_validation(
|
||||
self,
|
||||
mock_install,
|
||||
mock_image,
|
||||
mock_load,
|
||||
mock_appcred,
|
||||
mock_certs,
|
||||
mock_get_net,
|
||||
mock_validate_allowed_flavor,
|
||||
mock_storageclasses,
|
||||
):
|
||||
auto_scale_labels = dict(
|
||||
auto_scaling_enabled="true", min_node_count=2, max_node_count=6
|
||||
)
|
||||
self.cluster_obj.labels = auto_scale_labels
|
||||
|
||||
mock_image.return_value = (
|
||||
"imageid1",
|
||||
"1.27.4",
|
||||
"ubuntu",
|
||||
)
|
||||
|
||||
# Create cluster with extra node groups
|
||||
ng = obj_utils.create_test_nodegroup(
|
||||
self.context, min_node_count=3, max_node_count=2
|
||||
)
|
||||
self.cluster_obj.nodegroups.append(ng)
|
||||
self.assertRaises(
|
||||
exception.NodeGroupInvalidInput,
|
||||
self.driver.create_cluster,
|
||||
self.context,
|
||||
self.cluster_obj,
|
||||
"not-used",
|
||||
)
|
||||
|
||||
@mock.patch.object(
|
||||
driver.Driver,
|
||||
"_storageclass_definitions",
|
||||
return_value=mock.ANY,
|
||||
)
|
||||
@mock.patch.object(driver.Driver, "_validate_allowed_flavor")
|
||||
@mock.patch.object(neutron, "get_network", autospec=True)
|
||||
@mock.patch.object(
|
||||
driver.Driver, "_ensure_certificate_secrets", autospec=True
|
||||
)
|
||||
@mock.patch.object(driver.Driver, "_create_appcred_secret", autospec=True)
|
||||
@mock.patch.object(kubernetes.Client, "load", autospec=True)
|
||||
@mock.patch.object(driver.Driver, "_get_image_details", autospec=True)
|
||||
@mock.patch.object(helm.Client, "install_or_upgrade", autospec=True)
|
||||
def test_create_extra_nodegroup_auto_scale_values(
|
||||
self,
|
||||
mock_install,
|
||||
mock_image,
|
||||
mock_load,
|
||||
mock_appcred,
|
||||
mock_certs,
|
||||
mock_get_net,
|
||||
mock_validate_allowed_flavor,
|
||||
mock_storageclasses,
|
||||
):
|
||||
auto_scale_labels = dict(
|
||||
auto_scaling_enabled="true", min_node_count=2, max_node_count=6
|
||||
)
|
||||
self.cluster_obj.labels = auto_scale_labels
|
||||
|
||||
mock_image.return_value = (
|
||||
"imageid1",
|
||||
"1.27.4",
|
||||
"ubuntu",
|
||||
)
|
||||
|
||||
# Create cluster with extra node groups
|
||||
non_auto_scale_nodegroup = obj_utils.create_test_nodegroup(
|
||||
self.context,
|
||||
uuid=uuid4(),
|
||||
id=123,
|
||||
name="non-autoscale-group",
|
||||
node_count=1,
|
||||
)
|
||||
self.cluster_obj.nodegroups.append(non_auto_scale_nodegroup)
|
||||
auto_scale_nodegroup = obj_utils.create_test_nodegroup(
|
||||
self.context,
|
||||
uuid=uuid4(),
|
||||
id=456,
|
||||
name="autoscale-group",
|
||||
node_count=1,
|
||||
min_node_count=0,
|
||||
max_node_count=3,
|
||||
)
|
||||
self.cluster_obj.nodegroups.append(auto_scale_nodegroup)
|
||||
self.driver.create_cluster(self.context, self.cluster_obj, 10)
|
||||
for ng in self.cluster_obj.nodegroups:
|
||||
print(ng)
|
||||
|
||||
# Unpack some values for asserting against
|
||||
helm_install_values = mock_install.call_args[0][3]
|
||||
helm_node_groups = helm_install_values["nodeGroups"]
|
||||
helm_values_default_nodegroup = [
|
||||
ng
|
||||
for ng in helm_node_groups
|
||||
if ng["name"] == self.cluster_obj.default_ng_worker.name
|
||||
]
|
||||
helm_values_autoscale_nodegroup = [
|
||||
ng
|
||||
for ng in helm_node_groups
|
||||
if ng["name"] == auto_scale_nodegroup.name
|
||||
]
|
||||
helm_values_non_autoscale_nodegroup = [
|
||||
ng
|
||||
for ng in helm_node_groups
|
||||
if ng["name"] == non_auto_scale_nodegroup.name
|
||||
]
|
||||
|
||||
# Check that node groups were passed into Helm values correctly
|
||||
self.assertEqual(len(helm_values_default_nodegroup), 1)
|
||||
self.assertEqual(len(helm_values_autoscale_nodegroup), 1)
|
||||
self.assertEqual(len(helm_values_non_autoscale_nodegroup), 1)
|
||||
|
||||
# Check default node group values
|
||||
ng = helm_values_default_nodegroup[0]
|
||||
self.assertEqual(ng["autoscale"], "true")
|
||||
self.assertEqual(
|
||||
ng["machineCountMin"], auto_scale_labels["min_node_count"]
|
||||
)
|
||||
self.assertEqual(
|
||||
ng["machineCountMax"], auto_scale_labels["max_node_count"]
|
||||
)
|
||||
|
||||
# Check extra autoscaling node group values
|
||||
# NOTE: CAPO doesn't support scale to zero so
|
||||
# min node count should be max(1, ng.min_node_count)
|
||||
ng = helm_values_autoscale_nodegroup[0]
|
||||
self.assertEqual(ng["autoscale"], "true")
|
||||
self.assertEqual(
|
||||
ng["machineCountMin"], max(1, auto_scale_nodegroup.min_node_count)
|
||||
)
|
||||
self.assertEqual(
|
||||
ng["machineCountMax"], auto_scale_nodegroup.max_node_count
|
||||
)
|
||||
|
||||
# Check extra non-autoscaling node group values
|
||||
ng = helm_values_non_autoscale_nodegroup[0]
|
||||
self.assertEqual(ng.get("autoscale"), None)
|
||||
self.assertEqual(ng.get("machineCountMin"), None)
|
||||
self.assertEqual(ng.get("machineCountMax"), None)
|
||||
|
@ -0,0 +1,10 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Adds support for autoscaling of non-default worker node groups. The min/max
|
||||
node count properties on each node group are passed to the autoscaler to
|
||||
determine autoscaling behaviour. If the target cluster has the
|
||||
`auto_scaling_enabled` cluster label set to `true` then any node groups with
|
||||
both `min_node_count`` and `max_node_count` properties set will be allowed
|
||||
to autoscale between these min and max node counts depending on the cluster's
|
||||
current workload.
|
Loading…
x
Reference in New Issue
Block a user