Cluster API: use helm client in driver

This adds the initial create and delete of clusters via the
helm charts. Howerver, this references an app cred secret
we have not yet created. We also do not yet monitor the progress
of the create or delete, we just set the cluster into Error.
Fixes for this will be in later patches.

Note we default to using the openstack_cluster chart from
this repo:
https://stackhpc.github.io/capi-helm-charts

We hope this helm chart will eventually move into the
ownership of the magnum project.

Users can modify the default helm chart values by creating
their own charts, and changing the configuration to point
to their own customized helm chart.

story: 2009780

Change-Id: I1c0c7e734788fe126a9cd173913150f7a9cca6fc
This commit is contained in:
John Garbutt 2023-04-25 09:08:21 +01:00
parent dd60e56fb0
commit 6d930c8e6e
No known key found for this signature in database
5 changed files with 428 additions and 17 deletions

View File

@ -301,6 +301,10 @@ class OSDistroFieldNotFound(ResourceNotFound):
code = 400
class KubeVersionPropertyNotFound(Invalid):
message = _("Image %(image_id)s does not have a kube_version property.")
class X509KeyPairNotFound(ResourceNotFound):
message = _("A key pair %(x509keypair)s could not be found.")

View File

@ -24,9 +24,48 @@ capi_driver_opts = [
"Path to a kubeconfig file for a management cluster,"
"for use in the Cluster API driver. "
"Defaults to the environment variable KUBECONFIG, "
"or if not defined ~/.kube/config"
"or if not defined ~/.kube/config "
"Note we only use the default context within the "
"kubeconfig file."
),
)
),
cfg.StrOpt(
"magnum_namespace_suffix",
default="magnum",
help=(
"Resources for each openstack cluster are created in a "
"separate namespace within the CAPI Management cluster "
"specified by the configuration: capi_driver.kubeconfig_file "
"You should modify this suffix when two magnum deployments "
"want to share a single CAPI management cluster."
),
),
# TODO(johngarbutt): move this helm chart into magnum ownerhship
cfg.StrOpt(
"helm_chart_repo",
default="https://stackhpc.github.io/capi-helm-charts",
help=(
"Reference to the helm chart repository for "
"the cluster API driver."
),
),
cfg.StrOpt(
"helm_chart_name",
default="openstack-cluster",
help=(
"Name of the helm chart to use from the repo specified "
"by the config: capi_driver.helm_chart_repo"
),
),
cfg.StrOpt(
"helm_chart_version",
default="0.1.1-dev.0.main.221",
help=(
"Version of the helm chart specified "
"by the config: capi_driver.helm_chart_repo "
"and capi_driver.helm_chart_name"
),
),
]

View File

@ -10,12 +10,20 @@
# License for the specific language governing permissions and limitations
# under the License.
import re
from oslo_log import log as logging
from magnum.api import utils as api_utils
from magnum.common import clients
from magnum.common import exception
from magnum.common import short_id
from magnum import conf
from magnum.drivers.cluster_api import helm
from magnum.drivers.common import driver
LOG = logging.getLogger(__name__)
CONF = conf.CONF
class Driver(driver.Driver):
@ -36,8 +44,164 @@ class Driver(driver.Driver):
def update_cluster_status(self, context, cluster):
raise NotImplementedError("don't support update_cluster_status yet")
def _namespace(self, cluster):
# We create clusters in a project-specific namespace
# To generate the namespace, first sanitize the project id
project_id = re.sub("[^a-z0-9]", "", cluster.project_id.lower())
suffix = CONF.capi_driver.magnum_namespace_suffix
return f"{suffix}-{project_id}"
def _label(self, cluster, key, default):
all_labels = helm.mergeconcat(
cluster.cluster_template.labels, cluster.labels
)
if not all_labels:
return default
raw = all_labels.get(key, default)
# NOTE(johngarbutt): filtering untrusted user input
return re.sub(r"[^a-z0-9\.\-\/]+", "", raw)
def _get_chart_version(self, cluster):
version = cluster.cluster_template.labels.get(
"capi_helm_chart_version", CONF.capi_driver.helm_chart_version
)
# NOTE(johngarbutt): filtering untrusted user input
return re.sub(r"[^a-z0-9\.\-]+", "", version)
def _sanitised_name(self, name, suffix=None):
return re.sub(
"[^a-z0-9]+",
"-",
(f"{name}-{suffix}" if suffix else name).lower(),
)
def _get_kube_version(self, image):
# The image should have a property containing the Kubernetes version
kube_version = image.get("kube_version")
if not kube_version:
raise exception.KubeVersionPropertyNotFound(image_id=image.id)
return kube_version.lstrip("v")
def _get_image_details(self, context, image_identifier):
osc = clients.OpenStackClients(context)
image = api_utils.get_openstack_resource(
osc.glance().images, image_identifier, "images"
)
return image.id, self._get_kube_version(image)
def _get_app_cred_name(self, cluster):
return self._sanitised_name(
self._get_chart_release_name(cluster), "cloud-credentials"
)
def _get_monitoring_enabled(self, cluster):
mon_label = self._label(cluster, "monitoring_enabled", "")
# NOTE(mkjpryor) default of, like heat driver,
# as requires cinder and takes a while
return mon_label == "true"
def _get_kube_dash_enabled(self, cluster):
kube_dash_label = self._label(cluster, "kube_dashboard_enabled", "")
# NOTE(mkjpryor) default on, like the heat driver
return kube_dash_label != "false"
def _update_helm_release(self, context, cluster):
cluster_template = cluster.cluster_template
image_id, kube_version = self._get_image_details(
context, cluster_template.image_id
)
values = {
"kubernetesVersion": kube_version,
"machineImageId": image_id,
# TODO(johngarbutt): need to generate app creds
"cloudCredentialsSecretName": self._get_app_cred_name(cluster),
# TODO(johngarbutt): need to respect requested networks
"clusterNetworking": {
"internalNetwork": {
"nodeCidr": self._label(
cluster, "fixed_subnet_cidr", "10.0.0.0/24"
),
}
},
"apiServer": {
"enableLoadBalancer": True,
"loadBalancerProvider": self._label(
cluster, "octavia_provider", "amphora"
),
},
"controlPlane": {
"machineFlavor": cluster.master_flavor_id,
"machineCount": cluster.master_count,
},
"addons": {
"monitoring": {
"enabled": self._get_monitoring_enabled(cluster)
},
"kubernetesDashboard": {
"enabled": self._get_kube_dash_enabled(cluster)
},
# TODO(mkjpryor): can't enable ingress until code exists to
# remove the load balancer
"ingress": {"enabled": False},
},
"nodeGroups": [
{
"name": self._sanitised_name(ng.name),
"machineFlavor": ng.flavor_id,
"machineCount": ng.node_count,
}
for ng in cluster.nodegroups
if ng.role != "master"
],
}
if cluster_template.dns_nameserver:
dns_nameservers = cluster_template.dns_nameserver.split(",")
values["clusterNetworking"]["dnsNameservers"] = dns_nameservers
if cluster.keypair:
values["machineSSHKeyName"] = cluster.keypair
chart_version = self._get_chart_version(cluster)
self._helm_client.install_or_upgrade(
self._get_chart_release_name(cluster),
CONF.capi_driver.helm_chart_name,
values,
repo=CONF.capi_driver.helm_chart_repo,
version=chart_version,
namespace=self._namespace(cluster),
)
def _generate_release_name(self, cluster):
if cluster.stack_id:
return
# Make sure no duplicate names
# by generating 12 character random id
random_bit = short_id.generate_id()
base_name = self._sanitised_name(cluster.name)
# valid release names are 53 chars long
# and stack_id is 12 characters
# but we also use this to derive hostnames
trimmed_name = base_name[:30]
# Save the full name, so users can rename in the API
cluster.stack_id = f"{trimmed_name}-{random_bit}".lower()
# be sure to save this before we use it
cluster.save()
def _get_chart_release_name(self, cluster):
return cluster.stack_id
def create_cluster(self, context, cluster, cluster_create_timeout):
raise NotImplementedError("don't support create yet")
LOG.info("Starting to create cluster %s", cluster.uuid)
# we generate this name (on the initial create call only)
# so we hit no issues with duplicate cluster names
# and it makes renaming clusters in the API possible
self._generate_release_name(cluster)
self._update_helm_release(context, cluster)
def update_cluster(
self, context, cluster, scale_manager=None, rollback=False
@ -45,7 +209,15 @@ class Driver(driver.Driver):
raise NotImplementedError("don't support update yet")
def delete_cluster(self, context, cluster):
raise NotImplementedError("don't support delete yet")
LOG.info("Starting to delete cluster %s", cluster.uuid)
# Begin the deletion of the cluster resources by uninstalling the
# Helm release
# Note that this just marks the resources for deletion - it does not
# wait for the resources to be deleted
self._helm_client.uninstall_release(
self._get_chart_release_name(cluster),
namespace=self._namespace(cluster),
)
def resize_cluster(
self,

View File

@ -338,6 +338,7 @@ def get_nodegroups_for_cluster(**kw):
node_count=kw.get('node_count', 3),
status=kw.get('worker_status', 'CREATE_COMPLETE'),
status_reason=kw.get('worker_reason', 'Completed successfully'),
flavor_id=kw.get('flavor_id', None),
image_id=kw.get('image_id', 'test_image')
)

View File

@ -9,11 +9,18 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from unittest import mock
from magnum.common import exception
from magnum import conf
from magnum.drivers.cluster_api import driver
from magnum.drivers.cluster_api import helm
from magnum import objects
from magnum.tests.unit.db import base
from magnum.tests.unit.objects import utils as obj_utils
CONF = conf.CONF
class ClusterAPIDriverTest(base.DbTestCase):
def setUp(self):
@ -24,6 +31,7 @@ class ClusterAPIDriverTest(base.DbTestCase):
name="cluster_example_$A",
master_flavor_id="flavor_small",
flavor_id="flavor_medium",
stack_id="cluster-example-a-111111111111",
)
def test_provides(self):
@ -40,21 +48,208 @@ class ClusterAPIDriverTest(base.DbTestCase):
self.cluster_obj,
)
def test_create_cluster(self):
self.assertRaises(
NotImplementedError,
self.driver.create_cluster,
self.context,
self.cluster_obj,
cluster_create_timeout=10,
def test_namespace(self):
self.cluster_obj.project_id = "123-456F"
namespace = self.driver._namespace(self.cluster_obj)
self.assertEqual("magnum-123456f", namespace)
def test_label_return_default(self):
result = self.driver._label(self.cluster_obj, "foo", "bar")
self.assertEqual("bar", result)
def test_label_return_template(self):
self.cluster_obj.cluster_template.labels = dict(foo=42)
result = self.driver._label(self.cluster_obj, "foo", "bar")
self.assertEqual("42", result)
def test_label_return_cluster(self):
self.cluster_obj.labels = dict(foo=41)
self.cluster_obj.cluster_template.labels = dict(foo=42)
result = self.driver._label(self.cluster_obj, "foo", "bar")
self.assertEqual("41", result)
def test_sanitised_name_no_suffix(self):
self.assertEqual(
"123-456fab", self.driver._sanitised_name("123-456Fab")
)
def test_delete_cluster(self):
self.assertRaises(
NotImplementedError,
self.driver.delete_cluster,
self.context,
self.cluster_obj,
def test_sanitised_name_with_suffix(self):
self.assertEqual(
"123-456-fab-1-asdf",
self.driver._sanitised_name("123-456_Fab!!_1!!", "asdf"),
)
self.assertEqual(
"123-456-fab-1-asdf",
self.driver._sanitised_name("123-456_Fab-1", "asdf"),
)
def test_get_kube_version_raises(self):
mock_image = mock.Mock()
mock_image.get.return_value = None
mock_image.id = "myid"
e = self.assertRaises(
exception.KubeVersionPropertyNotFound,
self.driver._get_kube_version,
mock_image,
)
self.assertEqual(
"Image myid does not have a kube_version property.", str(e)
)
mock_image.get.assert_called_once_with("kube_version")
def test_get_kube_version_works(self):
mock_image = mock.Mock()
mock_image.get.return_value = "v1.27.9"
result = self.driver._get_kube_version(mock_image)
self.assertEqual("1.27.9", result)
mock_image.get.assert_called_once_with("kube_version")
@mock.patch("magnum.common.clients.OpenStackClients")
@mock.patch("magnum.api.utils.get_openstack_resource")
def test_get_image_details(self, mock_get, mock_osc):
mock_image = mock.Mock()
mock_image.get.return_value = "v1.27.9"
mock_image.id = "myid"
mock_get.return_value = mock_image
id, version = self.driver._get_image_details(
self.context, "myimagename"
)
self.assertEqual("1.27.9", version)
self.assertEqual("myid", id)
mock_image.get.assert_called_once_with("kube_version")
mock_get.assert_called_once_with(mock.ANY, "myimagename", "images")
def test_get_chart_release_name_lenght(self):
self.cluster_obj.stack_id = "foo"
result = self.driver._get_chart_release_name(self.cluster_obj)
self.assertEqual("foo", result)
def test_generate_release_name_skip(self):
self.cluster_obj.stack_id = "foo"
self.driver._generate_release_name(self.cluster_obj)
self.assertEqual("foo", self.cluster_obj.stack_id)
def test_generate_release_name_generates(self):
self.cluster_obj.stack_id = None
self.cluster_obj.name = "a" * 77
self.driver._generate_release_name(self.cluster_obj)
first = self.cluster_obj.stack_id
self.assertEqual(43, len(first))
self.assertTrue(self.cluster_obj.name[:30] in first)
self.cluster_obj.stack_id = None
self.driver._generate_release_name(self.cluster_obj)
second = self.cluster_obj.stack_id
self.assertNotEqual(first, second)
self.assertEqual(43, len(second))
self.assertTrue(self.cluster_obj.name[:30] in second)
def test_get_monitoring_enabled_from_template(self):
self.cluster_obj.cluster_template.labels["monitoring_enabled"] = "true"
result = self.driver._get_monitoring_enabled(self.cluster_obj)
self.assertTrue(result)
def test_get_kube_dash_enabled_from_template(self):
self.cluster_obj.cluster_template.labels[
"kube_dashboard_enabled"
] = "false"
result = self.driver._get_kube_dash_enabled(self.cluster_obj)
self.assertFalse(result)
def test_get_chart_version_from_config(self):
version = self.driver._get_chart_version(self.cluster_obj)
self.assertEqual(CONF.capi_driver.helm_chart_version, version)
def test_get_chart_version_from_template(self):
self.cluster_obj.cluster_template.labels[
"capi_helm_chart_version"
] = "1.42.0"
version = self.driver._get_chart_version(self.cluster_obj)
self.assertEqual("1.42.0", version)
@mock.patch.object(driver.Driver, "_get_image_details")
@mock.patch.object(helm.Client, "install_or_upgrade")
def test_create_cluster(
self,
mock_install,
mock_image,
):
mock_image.return_value = ("imageid1", "1.27.4")
self.cluster_obj.keypair = "kp1"
self.driver.create_cluster(self.context, self.cluster_obj, 10)
app_cred_name = "cluster-example-a-111111111111-cloud-credentials"
mock_install.assert_called_once_with(
"cluster-example-a-111111111111",
"openstack-cluster",
{
"kubernetesVersion": "1.27.4",
"machineImageId": "imageid1",
"cloudCredentialsSecretName": app_cred_name,
"clusterNetworking": {
"internalNetwork": {"nodeCidr": "10.0.0.0/24"},
"dnsNameservers": ["8.8.1.1"],
},
"apiServer": {
"enableLoadBalancer": True,
"loadBalancerProvider": "amphora",
},
"controlPlane": {
"machineFlavor": "flavor_small",
"machineCount": 3,
},
"addons": {
"monitoring": {"enabled": False},
"kubernetesDashboard": {"enabled": True},
"ingress": {"enabled": False},
},
"nodeGroups": [
{
"name": "test-worker",
"machineFlavor": "flavor_medium",
"machineCount": 3,
}
],
"machineSSHKeyName": "kp1",
},
repo=CONF.capi_driver.helm_chart_repo,
version=CONF.capi_driver.helm_chart_version,
namespace="magnum-fakeproject",
)
@mock.patch.object(helm.Client, "uninstall_release")
def test_delete_cluster(self, mock_uninstall):
self.driver.delete_cluster(self.context, self.cluster_obj)
mock_uninstall.assert_called_once_with(
"cluster-example-a-111111111111", namespace="magnum-fakeproject"
)
def test_update_cluster(self):