From 619976fe436496424dac4bd8745afed069766377 Mon Sep 17 00:00:00 2001 From: Guillaume Boutry Date: Tue, 16 Jan 2024 17:20:14 +0100 Subject: [PATCH] Implement sunbeam-clusterd Sunbeam-clusterd can scale up and down. Currently, the external address in bound to the peers relationship. Exposes the action `get-credentials` which returns the URL. In the long term, it will return the credentials to access securely clusterd. Change-Id: I8b91efe6d96198f5ad3634b9747161225381ded6 --- .gitignore | 1 + charms/sunbeam-clusterd/CONTRIBUTING.md | 34 ++ charms/sunbeam-clusterd/LICENSE | 202 ++++++++++ charms/sunbeam-clusterd/README.md | 46 +++ charms/sunbeam-clusterd/charmcraft.yaml | 54 +++ charms/sunbeam-clusterd/rebuild | 3 + charms/sunbeam-clusterd/requirements.txt | 18 + charms/sunbeam-clusterd/src/charm.py | 345 ++++++++++++++++++ charms/sunbeam-clusterd/src/clusterd.py | 173 +++++++++ .../sunbeam-clusterd/src/relation_handlers.py | 211 +++++++++++ .../sunbeam-clusterd/tests/unit/__init__.py | 15 + .../sunbeam-clusterd/tests/unit/test_charm.py | 96 +++++ common.sh | 7 + playbooks/zaza-func-test.yaml | 8 +- render_bundles.py | 2 +- roles/charm-publish/defaults/main.yaml | 1 + roles/collect-run-data/tasks/k8s.yaml | 44 +++ roles/collect-run-data/tasks/main.yaml | 47 +-- roles/lxd-cloud/tasks/main.yaml | 79 ++++ test-requirements.txt | 1 + .../sunbeam/charm_tests/clusterd/__init__.py | 0 .../sunbeam/charm_tests/clusterd/tests.py | 172 +++++++++ tests/machine/smoke.yaml.j2 | 22 ++ tests/machine/tests.yaml | 25 ++ tox.ini | 21 +- zuul.d/jobs.yaml | 48 +++ zuul.d/project-templates.yaml | 6 + zuul.d/zuul.yaml | 4 + 28 files changed, 1629 insertions(+), 56 deletions(-) create mode 100644 charms/sunbeam-clusterd/CONTRIBUTING.md create mode 100644 charms/sunbeam-clusterd/LICENSE create mode 100644 charms/sunbeam-clusterd/README.md create mode 100644 charms/sunbeam-clusterd/charmcraft.yaml create mode 100644 charms/sunbeam-clusterd/rebuild create mode 100644 charms/sunbeam-clusterd/requirements.txt create mode 100755 charms/sunbeam-clusterd/src/charm.py create mode 100644 charms/sunbeam-clusterd/src/clusterd.py create mode 100644 charms/sunbeam-clusterd/src/relation_handlers.py create mode 100644 charms/sunbeam-clusterd/tests/unit/__init__.py create mode 100644 charms/sunbeam-clusterd/tests/unit/test_charm.py create mode 100644 roles/collect-run-data/tasks/k8s.yaml create mode 100644 roles/lxd-cloud/tasks/main.yaml create mode 100644 tests/local/zaza/sunbeam/charm_tests/clusterd/__init__.py create mode 100644 tests/local/zaza/sunbeam/charm_tests/clusterd/tests.py create mode 100644 tests/machine/smoke.yaml.j2 create mode 100644 tests/machine/tests.yaml diff --git a/.gitignore b/.gitignore index 66aec3e4..97977aef 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ __pycache__/ *.auth *.pub *.token +tests/*/bundles/ diff --git a/charms/sunbeam-clusterd/CONTRIBUTING.md b/charms/sunbeam-clusterd/CONTRIBUTING.md new file mode 100644 index 00000000..20e88bcc --- /dev/null +++ b/charms/sunbeam-clusterd/CONTRIBUTING.md @@ -0,0 +1,34 @@ +# Contributing + +To make contributions to this charm, you'll need a working [development setup](https://juju.is/docs/sdk/dev-setup). + +You can create an environment for development with `tox`: + +```shell +tox devenv -e integration +source venv/bin/activate +``` + +## Testing + +This project uses `tox` for managing test environments. There are some pre-configured environments +that can be used for linting and formatting code when you're preparing contributions to the charm: + +```shell +tox run -e format # update your code according to linting rules +tox run -e lint # code style +tox run -e static # static type checking +tox run -e unit # unit tests +tox run -e integration # integration tests +tox # runs 'format', 'lint', 'static', and 'unit' environments +``` + +## Build the charm + +Build the charm in this git repository using: + +```shell +charmcraft pack +``` + + + +[contributors-guide]: https://opendev.org/openstack/sunbeam-charms/src/branch/main/charms/sunbeam-clusterd/CONTRIBUTING.md +[juju-docs-actions]: https://juju.is/docs/juju/manage-actions +[juju-docs-config-apps]: https://juju.is/docs/configuring-applications +[lp-bugs-charm-sunbeam-clusterd]: https://bugs.launchpad.net/sunbeam-charms/+filebug diff --git a/charms/sunbeam-clusterd/charmcraft.yaml b/charms/sunbeam-clusterd/charmcraft.yaml new file mode 100644 index 00000000..4a16f9b3 --- /dev/null +++ b/charms/sunbeam-clusterd/charmcraft.yaml @@ -0,0 +1,54 @@ +# This file configures Charmcraft. +# See https://juju.is/docs/sdk/charmcraft-config for guidance. + +name: sunbeam-clusterd + +type: charm + +title: Sunbeam Clusterd + +summary: A juju charm to run sunbeam clusterd + +description: | + Manage sunbeam clusterd deployment + +# (Required for 'charm' type) +bases: + - build-on: + - name: ubuntu + channel: "22.04" + run-on: + - name: ubuntu + channel: "22.04" + +parts: + charm: + build-packages: + - git + - libffi-dev + - libssl-dev + - pkg-config + - rustc + - cargo + charm-binary-python-packages: + - cryptography + - jsonschema + - jinja2 + +peers: + peers: + interface: clusterd-peer + +actions: + get-credentials: + description: | + Return information necessary to connect to clusterd. + +config: + options: + snap-channel: + default: "2023.2/edge" + type: string + debug: + default: False + type: boolean diff --git a/charms/sunbeam-clusterd/rebuild b/charms/sunbeam-clusterd/rebuild new file mode 100644 index 00000000..2358b681 --- /dev/null +++ b/charms/sunbeam-clusterd/rebuild @@ -0,0 +1,3 @@ +# This file is used to trigger a build. +# Change uuid to trigger a new build. +886a46db-8f02-4271-a825-91153f83c579 diff --git a/charms/sunbeam-clusterd/requirements.txt b/charms/sunbeam-clusterd/requirements.txt new file mode 100644 index 00000000..7939a110 --- /dev/null +++ b/charms/sunbeam-clusterd/requirements.txt @@ -0,0 +1,18 @@ +# This file is managed centrally by release-tools and should not be modified +# within individual charm repos. See the 'global' dir contents for available +# choices of *requirements.txt files for OpenStack Charms: +# https://github.com/openstack-charmers/release-tools +# + +cryptography +jinja2 +jsonschema +pydantic<2.0 +lightkube +lightkube-models +ops +requests # Apache 2 +requests-unixsocket # Apache 2 +urllib3<1.27,>=1.21.1 # MIT +tenacity + diff --git a/charms/sunbeam-clusterd/src/charm.py b/charms/sunbeam-clusterd/src/charm.py new file mode 100755 index 00000000..115028b1 --- /dev/null +++ b/charms/sunbeam-clusterd/src/charm.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python3 + +# Copyright 2024 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Sunbeam Clusterd Charm. + +This charm manages a clusterd deployment. Clusterd is a service storing +every metadata about a sunbeam deployment. +""" + +import logging +from pathlib import ( + Path, +) + +import clusterd +import ops.framework +import ops_sunbeam.charm as sunbeam_charm +import requests +import tenacity +from charms.operator_libs_linux.v2 import ( + snap, +) +from ops.main import ( + main, +) +from ops_sunbeam.relation_handlers import ( + RelationHandler, +) +from relation_handlers import ( + ClusterdNewNodeEvent, + ClusterdNodeAddedEvent, + ClusterdPeerHandler, + ClusterdRemoveNodeEvent, +) + +logger = logging.getLogger(__name__) + + +class SunbeamClusterdCharm(sunbeam_charm.OSBaseOperatorCharm): + """Charm the service.""" + + _state = ops.StoredState() + service_name = "sunbeam-clusterd" + clusterd_port = 7000 + + def __init__(self, framework: ops.Framework) -> None: + """Run constructor.""" + super().__init__(framework) + self._state.set_default(channel="config", departed=False) + self.framework.observe(self.on.install, self._on_install) + self.framework.observe(self.on.stop, self._on_stop) + self.framework.observe( + self.on.get_credentials_action, self._on_get_credentials_action + ) + self._clusterd = clusterd.ClusterdClient( + Path("/var/snap/openstack/common/state/control.socket") + ) + + def get_relation_handlers( + self, handlers: list[RelationHandler] | None = None + ) -> list[RelationHandler]: + """Setup charm relation handlers.""" + handlers = handlers or [] + if self.can_add_handler("peers", handlers): + self.peers = ClusterdPeerHandler( + self, + "peers", + self.configure_charm, + "peers" in self.mandatory_relations, + ) + handlers.append(self.peers) + return super().get_relation_handlers(handlers) + + def _on_install(self, event: ops.InstallEvent) -> None: + """Handle install event.""" + try: + self.ensure_snap_present() + except TimeoutError: + logger.debug("Snap installation failed, retrying.") + event.defer() + return + self.clusterd_ready() + + def _on_stop(self, event: ops.StopEvent) -> None: + """Handle stop event.""" + try: + self._clusterd.shutdown() + except clusterd.ClusterdUnavailableError: + logger.debug("Clusterd not available, skipping shutdown.") + snap.SnapCache()["openstack"].stop() + + def _on_get_credentials_action(self, event: ops.ActionEvent) -> None: + """Handle get-credentials action.""" + if not self.peers.interface.state.joined: + event.fail("Clusterd not joined yet") + + event.set_results( + { + "url": "https://" + + self._binding_address() + + ":" + + str(self.clusterd_port) + } + ) + + def _binding_address(self) -> str: + """Return the binding address.""" + relation = self.model.get_relation("peers") + + if relation is None: + raise ValueError("Missing relation peers") + + binding = self.model.get_binding(relation) + + if binding is None: + raise ValueError("Missing binding peers") + + if binding.network.bind_address is None: + raise ValueError("Missing binding address") + + return str(binding.network.bind_address) + + def ensure_snap_present(self): + """Install/refresh snap if needed.""" + config = self.model.config.get + snap_channel = config("snap-channel") + + try: + cache = snap.SnapCache() + openstack = cache["openstack"] + if not openstack.present or snap_channel != openstack.channel: + openstack.ensure(snap.SnapState.Latest, channel=snap_channel) + self._state.channel = openstack.channel + self.set_workload_version() + except snap.SnapError as e: + logger.error( + "An exception occurred when installing snap. Reason: %s", + e.message, + ) + + def set_workload_version(self): + """Set workload version.""" + cache = snap.SnapCache() + openstack = cache["openstack"] + if not openstack.present: + return + version = openstack.channel + f"(rev {openstack.revision})" + self.unit.set_workload_version(version) + + def configure_app_leader(self, event: ops.EventBase): + """Configure leader unit.""" + if not self.clusterd_ready(): + logger.debug("Clusterd not ready yet.") + event.defer() + return + if not self.is_leader_ready(): + self.bootstrap_cluster() + self.peers.interface.state.joined = True + super().configure_app_leader(event) + if isinstance(event, ClusterdNewNodeEvent): + self.add_node_to_cluster(event) + + def configure_app_non_leader(self, event: ops.EventBase): + """Configure non-leader unit.""" + super().configure_app_non_leader(event) + if isinstance(event, ClusterdNodeAddedEvent): + self.join_node_to_cluster(event) + + def configure_unit(self, event: ops.EventBase): + """Configure unit.""" + super().configure_unit(event) + self.ensure_snap_present() + if isinstance(event, ClusterdRemoveNodeEvent): + self.remove_node_from_cluster(event) + config = self.model.config.get + snap_data = { + "daemon.debug": config("debug", False), + } + self.set_snap_data(snap_data) + + def set_snap_data(self, snap_data: dict): + """Set snap data on local snap.""" + cache = snap.SnapCache() + openstack = cache["openstack"] + new_settings = {} + for k in sorted(snap_data.keys()): + try: + if snap_data[k] != openstack.get(k): + new_settings[k] = snap_data[k] + except snap.SnapError: + # Trying to retrieve an unset parameter results in a snapError + # so assume the snap.SnapError means there is missing config + # that needs setting. + new_settings[k] = snap_data[k] + if new_settings: + logger.debug(f"Applying new snap settings {new_settings}") + openstack.set(new_settings, typed=True) + else: + logger.debug("Snap settings do not need updating") + + @tenacity.retry( + stop=tenacity.stop_after_attempt(10), + retry=( + tenacity.retry_if_exception_type(clusterd.ClusterdUnavailableError) + | tenacity.retry_if_not_result(lambda result: result) + ), + after=tenacity.after_log(logger, logging.WARNING), + wait=tenacity.wait_exponential(multiplier=1, min=1, max=30), + ) + def clusterd_ready(self) -> bool: + """Check whether clusterd is ready.""" + if not self._clusterd.ready(): + return False + return True + + def bootstrap_cluster(self): + """Bootstrap the cluster.""" + logger.info("Bootstrapping the cluster") + self._clusterd.bootstrap( + self.unit.name.replace("/", "-"), + self._binding_address() + ":" + str(self.clusterd_port), + ) + + def add_node_to_cluster(self, event: ClusterdNewNodeEvent) -> None: + """Generate token for node joining.""" + if event.unit is None: + logger.debug("No unit to add") + return + unit_key = f"{event.unit.name}.join_token" + if self.peers.get_app_data(unit_key): + logger.debug(f"Already generated token for {event.unit.name}") + return + + try: + token = self._clusterd.generate_token( + event.unit.name.replace("/", "-") + ) + except requests.exceptions.HTTPError as e: + if e.response is not None and e.response.status_code >= 500: + logger.error(f"Clusterd error: {str(e)}") + logger.debug("Failed to generate token, retrying.") + event.defer() + return + raise e + self.peers.set_app_data({unit_key: token}) + + def remove_node_from_cluster(self, event: ClusterdRemoveNodeEvent) -> None: + """Remove node from cluster.""" + if event.departing_unit is None: + logger.debug("No unit to remove") + return + + self_departing = event.departing_unit.name == self.unit.name + departing_key = f"{event.departing_unit.name}.join_token" + unit_name = event.departing_unit.name.replace("/", "-") + + logger.debug(f"Departing unit: {event.departing_unit.name}") + try: + logger.debug(f"Removing member {unit_name}") + self._clusterd.remove_node(unit_name, allow_not_found=True) + except clusterd.ClusterdUnavailableError as e: + if "Remote end closed connection without response" in str(e): + logger.debug( + "Forwarded request failed, most likely because member was leader" + " and this member was removed." + ) + return + if self_departing: + logger.debug( + "Happened during self removal, ignoring. Error: %s", e + ) + return + except requests.exceptions.HTTPError as e: + is_503 = e.response is not None and e.response.status_code == 503 + if self_departing and is_503: + logger.debug( + "Clusterd is not initialized, most likely because" + " leader has already removed this unit from clusterd." + " Error: %s", + e.response.text, + ) + return + raise e + finally: + departing_key = f"{event.departing_unit.name}.join_token" + if self.unit.is_leader(): + self.peers.interface._app_data_bag.pop( + departing_key, + None, + ) + + def join_node_to_cluster(self, event: ClusterdNodeAddedEvent) -> None: + """Join node to cluster.""" + token = self.peers.get_app_data(f"{self.unit.name}.join_token") + if token is None: + logger.warning("No token found for unit %s", self.unit.name) + return + member = self.unit.name.replace("/", "-") + if not self.peers.interface.state.joined: + self._clusterd.join( + member, + self._binding_address() + ":" + str(self.clusterd_port), + token, + ) + self.peers.interface.state.joined = True + self.peers.set_unit_data({"joined": "true"}) + + self.status.set(ops.WaitingStatus("Waiting for clusterd role")) + is_role_set = self._wait_until_role_set(member) + if not is_role_set: + logger.debug("Member %s is still pending", member) + event.defer() + return + self.status.set(ops.ActiveStatus()) + + @tenacity.retry( + wait=tenacity.wait_fixed(5), + stop=tenacity.stop_after_delay(300), + retry=tenacity.retry_if_not_result(lambda result: result), + ) + def _wait_until_role_set(self, name: str) -> bool: + member = self._clusterd.get_member(name) + role = member.get("role") + logger.debug(f"Member {name} role: {role}") + if role == "PENDING": + return False + return True + + +if __name__ == "__main__": # pragma: nocover + main(SunbeamClusterdCharm) diff --git a/charms/sunbeam-clusterd/src/clusterd.py b/charms/sunbeam-clusterd/src/clusterd.py new file mode 100644 index 00000000..6a733cf6 --- /dev/null +++ b/charms/sunbeam-clusterd/src/clusterd.py @@ -0,0 +1,173 @@ +# Copyright 2024 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Clusterd client talking over unix socket.""" + +import json +import logging +from pathlib import ( + Path, +) +from urllib.parse import ( + quote, +) + +import requests +import requests_unixsocket +from requests_unixsocket import ( + DEFAULT_SCHEME, +) + +logger = logging.getLogger(__name__) + + +class ClusterdUnavailableError(Exception): + """Raised when the cluster is unavailable.""" + + +class ClusterdClient: + """A client for interacting with the remote client API.""" + + def __init__(self, socket_path: Path): + self._socket_path = socket_path + self._session = requests.sessions.Session() + self._session.mount( + requests_unixsocket.DEFAULT_SCHEME, + requests_unixsocket.UnixAdapter(), + ) + + def _request(self, method: str, path: str, **kwargs) -> dict: + if path.startswith("/"): + path = path[1:] + netloc = quote(str(self._socket_path), safe="") + url = f"{DEFAULT_SCHEME}{netloc}/{path}" + try: + logging.debug("[%s] %s, args=%s", method, url, kwargs) + response = self._session.request(method=method, url=url, **kwargs) + logging.debug("Response(%s) = %s", response, response.text) + except requests.exceptions.ConnectionError as e: + msg = str(e) + if "FileNotFoundError" in msg: + raise ClusterdUnavailableError( + "Sunbeam Cluster socket not found, is clusterd running ?" + " Check with 'snap services openstack.clusterd'", + ) from e + raise ClusterdUnavailableError(msg) + except requests.exceptions.HTTPError as e: + if e.response is not None: + logger.debug( + f"HTTPError: {e.response.status_code}, {e.response.text}" + ) + if e.response.status_code == 503: + raise ClusterdUnavailableError(str(e)) from e + raise e + response.raise_for_status() + return response.json() + + def _get(self, path, **kwargs): + kwargs.setdefault("allow_redirects", True) + return self._request("get", path, **kwargs) + + def _post(self, path, data=None, json=None, **kwargs): + return self._request("post", path, data=data, json=json, **kwargs) + + def _delete(self, path, **kwargs): + return self._request("delete", path, **kwargs) + + def ready(self) -> bool: + """Is the cluster ready.""" + try: + self._get("cluster/1.0/ready") + except ClusterdUnavailableError: + return False + return True + + def shutdown(self): + """Shutdown local clusterd.""" + try: + self._post("cluster/control/shutdown") + except requests.exceptions.HTTPError as e: + if e.response is None: + raise e + is_500 = e.response.status_code == 500 + is_closed_anyway = ( + "but connection was closed anyway" in e.response.text + ) + if is_500 and is_closed_anyway: + logger.debug("Clusterd shutdown") + return + raise e + + def bootstrap(self, name: str, address: str): + """Bootstrap clusterd.""" + data = {"bootstrap": True, "address": address, "name": name} + self._post("/cluster/control", data=json.dumps(data)) + + def join(self, name: str, address: str, token: str) -> None: + """Join node to the micro cluster. + + Verified the token with the list of saved tokens and + joins the node with the given name and address. + """ + data = {"join_token": token, "address": address, "name": name} + self._post("cluster/control", data=json.dumps(data)) + + def get_members(self) -> list[dict]: + """Get cluster members.""" + cluster = self._get("/cluster/1.0/cluster")["metadata"] + return cluster + + def get_member(self, name) -> dict: + """Get cluster member.""" + for member in self.get_members(): + if member["name"] == name: + return member + raise ValueError(f"Member {name} not found") + + def remove_node(self, name: str, allow_not_found: bool = True): + """Delete node.""" + try: + self._delete(f"/cluster/1.0/cluster/{name}") + except requests.exceptions.HTTPError as e: + if e.response is None: + raise e + if e.response.status_code == 404 and allow_not_found: + logger.debug(f"Node {name} not found") + return + is_500 = e.response.status_code == 500 + remote_not_found = is_500 and ( + "No remote exists with the given name" in e.response.text + ) + no_dqlite_member = ( + is_500 + and "No dqlite cluster member exists with the given name" + in e.response.text + ) + delete_with_url = ( + is_500 and f"cluster/1.0/cluster/{name}" in e.response.text + ) + not_found = remote_not_found or no_dqlite_member or delete_with_url + if not_found and allow_not_found: + logger.debug(f"Node {name} not found") + return + raise e + + def generate_token(self, name: str) -> str: + """Generate token for the node. + + Generate a new token for the node with name. + """ + data = {"name": name} + result = self._post("/cluster/1.0/tokens", data=json.dumps(data)) + return str(result["metadata"]) diff --git a/charms/sunbeam-clusterd/src/relation_handlers.py b/charms/sunbeam-clusterd/src/relation_handlers.py new file mode 100644 index 00000000..335d8b1d --- /dev/null +++ b/charms/sunbeam-clusterd/src/relation_handlers.py @@ -0,0 +1,211 @@ +# Copyright 2024 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Clusterd relation definition.""" + +import logging +from typing import ( + Callable, +) + +import ops +import ops_sunbeam.charm as sunbeam_charm +import ops_sunbeam.interfaces as sunbeam_interfaces +import ops_sunbeam.relation_handlers as sunbeam_rhandlers + +logger = logging.getLogger(__name__) + + +class ClusterdNewNodeEvent(ops.RelationEvent): + """charm runs add-node in response to this event, passes join URL back.""" + + +class ClusterdNodeAddedEvent(ops.RelationEvent): + """charm runs join in response to this event using supplied join URL.""" + + +class ClusterdRemoveNodeEvent(ops.RelationDepartedEvent): + """charm runs remove-node to this event.""" + + +class ClusterdEvents(ops.ObjectEvents): + """Events related to Clusterd.""" + + add_node = ops.EventSource(ClusterdNewNodeEvent) + node_added = ops.EventSource(ClusterdNodeAddedEvent) + remove_node = ops.EventSource(ClusterdRemoveNodeEvent) + + +class ClusterdPeers(sunbeam_interfaces.OperatorPeers): + """Interface for the clusterd peers relation.""" + + on = ClusterdEvents() + + def __init__( + self, charm: sunbeam_charm.OSBaseOperatorCharm, relation_name: str + ) -> None: + """Run constructor.""" + super().__init__(charm, relation_name) + + self.state.set_default(joined=False) + self.framework.observe( + charm.on[relation_name].relation_departed, self.on_departed + ) + + def _event_args(self, relation_event, **kwargs): + return dict( + relation=relation_event.relation, + app=relation_event.app, + unit=relation_event.unit, + **kwargs, + ) + + def on_created(self, event: ops.RelationCreatedEvent) -> None: + """Handle relation created event.""" + + def on_changed(self, event: ops.RelationChangedEvent) -> None: + """Handle relation changed event.""" + keys = [ + key + for key in self.get_all_app_data().keys() + if key.endswith(".join_token") + ] + if event.unit and self.model.unit.is_leader(): + if not keys: + logger.debug("We are the seed node.") + # The seed node is implicitly joined, so there's no need to emit an event. + self.state.joined = True + + if f"{event.unit.name}.join_token" in keys: + logger.debug(f"Already added {event.unit.name} to the cluster") + return + + logger.debug("Emitting add_node event") + self.on.add_node.emit(**self._event_args(event)) + else: + # Node already joined as member of cluster + if self.state.joined: + logger.debug(f"Node {self.model.unit.name} already joined") + return + + # Join token not yet generated for this node + if f"{self.model.unit.name}.join_token" not in keys: + logger.debug( + f"Join token not yet generated for node {self.model.unit.name}" + ) + return + + # TOCHK: Can we pull app data and unit data and emit node_added events based on them + # do we need to save joined in unit data which might trigger relation-changed event? + logger.debug("Emitting node_added event") + event_args = self._event_args(event) + event_args["unit"] = self.model.unit + self.on.node_added.emit(**event_args) + + def on_joined(self, event: ops.RelationChangedEvent) -> None: + """Handle relation joined event.""" + # Do nothing or raise an event to charm? + pass + + def on_departed(self, event: ops.RelationDepartedEvent) -> None: + """Handle relation departed event.""" + if event.departing_unit is None: + logger.debug("Don't know which unit is leaving") + return + + logger.debug("Emitting remove_node event") + self.on.remove_node.emit( + **self._event_args( + event, + departing_unit_name=event.departing_unit.name, + ) + ) + + +class ClusterdPeerHandler(sunbeam_rhandlers.BasePeerHandler): + """Base handler for managing a peers relation.""" + + interface: ClusterdPeers + + def __init__( + self, + charm: ops.charm.CharmBase, + relation_name: str, + callback_f: Callable, + mandatory: bool = False, + ): + """Run constructor.""" + super().__init__(charm, relation_name, callback_f, mandatory) + + def setup_event_handler(self) -> ops.Object: + """Configure event handlers for peer relation.""" + logger.debug("Setting up peer event handler") + peer_int = ClusterdPeers(self.charm, self.relation_name) # type: ignore + + self.framework.observe(peer_int.on.add_node, self._on_add_node) + self.framework.observe(peer_int.on.node_added, self._on_node_added) + self.framework.observe(peer_int.on.remove_node, self._on_remove_node) + + return peer_int + + def _on_add_node(self, event: ClusterdNewNodeEvent): + if not self.model.unit.is_leader(): + logger.debug("Ignoring Add node event as this is not leader unit") + return + + if not self.is_leader_ready(): + logger.debug( + "Add node event, deferring the event as leader not ready" + ) + event.defer() + return + + self.callback_f(event) + + def _on_node_added(self, event: ClusterdNodeAddedEvent): + if self.model.unit.name != event.unit.name: + logger.debug( + "Ignoring Node Added event, event received on other node" + ) + return + + self.callback_f(event) + + def _on_remove_node(self, event: ClusterdRemoveNodeEvent): + """Emit remove_node event. + + Emit remove_node event on both the leader and the departing unit. + Sometimes, juju might remove the unit before the leader unit gets notified. + Clusterd does not like a member node lost before a removal. + """ + if event.departing_unit is None: + logger.debug("Don't know which unit is leaving") + return + + unit = self.model.unit + if not unit.is_leader() and unit.name != event.departing_unit.name: + logger.debug( + "Ignoring Remove node event as this is not leader unit" + " or departing unit." + ) + return + + if not self.is_leader_ready(): + logger.debug( + "Remove node event, deferring the event as leader not ready" + ) + event.defer() + return + + self.callback_f(event) diff --git a/charms/sunbeam-clusterd/tests/unit/__init__.py b/charms/sunbeam-clusterd/tests/unit/__init__.py new file mode 100644 index 00000000..19f143b5 --- /dev/null +++ b/charms/sunbeam-clusterd/tests/unit/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2024 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for charm.""" diff --git a/charms/sunbeam-clusterd/tests/unit/test_charm.py b/charms/sunbeam-clusterd/tests/unit/test_charm.py new file mode 100644 index 00000000..6eb81d7e --- /dev/null +++ b/charms/sunbeam-clusterd/tests/unit/test_charm.py @@ -0,0 +1,96 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +# +# Learn more about testing at: https://juju.is/docs/sdk/testing + +"""Unit tests.""" + +import pathlib + +import charm +import ops +import ops.testing as testing +import ops_sunbeam.test_utils as test_utils +import yaml +from mock import ( + patch, +) + + +class _SunbeamClusterdCharm(charm.SunbeamClusterdCharm): + """Clusterd test charm.""" + + def __init__(self, framework): + """Setup event logging.""" + self.seen_events = [] + super().__init__(framework) + + +charmcraft = ( + pathlib.Path(__file__).parents[2] / "charmcraft.yaml" +).read_text() +config = yaml.dump(yaml.safe_load(charmcraft)["config"]) +actions = yaml.dump(yaml.safe_load(charmcraft)["actions"]) + + +class TestCharm(test_utils.CharmTestCase): + """Test the charm.""" + + PATCHES = ["snap", "clusterd"] + + def setUp(self): + """Setup charm and harness.""" + super().setUp(charm, self.PATCHES) + self.harness = testing.Harness( + _SunbeamClusterdCharm, + meta=charmcraft, + config=config, + actions=actions, + ) + ensure_snap_present = patch( + "charm.SunbeamClusterdCharm.ensure_snap_present" + ) + self.ensure_snap_present = ensure_snap_present.start() + self.addCleanup(ensure_snap_present.stop) + self.addCleanup(self.harness.cleanup) + + def initial_setup(self): + """Common setup code for charm tests.""" + self.harness.add_network("10.0.0.10") + self.harness.begin_with_initial_hooks() + + def test_initial_bootstrap(self): + """Test charm is bootstrapped.""" + self.initial_setup() + self.harness.set_leader() + self.harness.charm.on.config_changed.emit() + + self.assertEqual(self.harness.charm.unit.status, ops.ActiveStatus()) + self.ensure_snap_present.assert_called() + self.harness.charm._clusterd.bootstrap.assert_called_once() + + def test_initial_bootstrap_no_leader(self): + """Test charm is bootstrapped.""" + self.initial_setup() + self.harness.charm.on.config_changed.emit() + self.assertEqual( + self.harness.charm.unit.status, + ops.WaitingStatus("(workload) Leader not ready"), + ) + self.harness.charm._clusterd.bootstrap.assert_not_called() + + def test_config(self): + """Test config update.""" + self.initial_setup() + self.harness.set_leader() + self.harness.update_config({"snap-channel": "edge"}) + self.ensure_snap_present.assert_called() + + def test_get_credentials(self): + """Test get credentials action.""" + self.initial_setup() + self.harness.set_leader() + self.harness.charm.on.config_changed.emit() + + output = self.harness.run_action("get-credentials") + self.assertEqual({"url": "https://10.0.0.10:7000"}, output.results) diff --git a/common.sh b/common.sh index e3dc65a8..d101e8bf 100644 --- a/common.sh +++ b/common.sh @@ -129,6 +129,10 @@ EXTERNAL_OPENSTACK_HYPERVISOR_LIBS=( "tls_certificates_interface" ) +EXTERNAL_SUNBEAM_CLUSTERD_LIBS=( + "operator_libs_linux" +) + EXTERNAL_OVN_CENTRAL_LIBS=( "tls_certificates_interface" ) @@ -273,6 +277,7 @@ declare -A INTERNAL_LIBS=( [octavia-k8s]=${INTERNAL_NEUTRON_LIBS[@]} [openstack-exporter-k8s]=${INTERNAL_KEYSTONE_LIBS[@]} [openstack-hypervisor]=${INTERNAL_OPENSTACK_HYPERVISOR_LIBS[@]} + [sunbeam-clusterd]=${NULL_ARRAY[@]} [sunbeam-machine]=${NULL_ARRAY[@]} [ovn-central-k8s]=${INTERNAL_OVN_CENTRAL_LIBS[@]} [ovn-relay-k8s]=${INTERNAL_OVN_CENTRAL_LIBS[@]} @@ -299,6 +304,7 @@ declare -A EXTERNAL_LIBS=( [octavia-k8s]=${EXTERNAL_OCTAVIA_LIBS[@]} [openstack-exporter-k8s]=${EXTERNAL_OPENSTACK_EXPORTER_LIBS[@]} [openstack-hypervisor]=${EXTERNAL_OPENSTACK_HYPERVISOR_LIBS[@]} + [sunbeam-clusterd]=${EXTERNAL_SUNBEAM_CLUSTERD_LIBS[@]} [sunbeam-machine]=${NULL_ARRAY[@]} [ovn-central-k8s]=${EXTERNAL_OVN_CENTRAL_LIBS[@]} [ovn-relay-k8s]=${EXTERNAL_OVN_RELAY_LIBS[@]} @@ -325,6 +331,7 @@ declare -A CONFIG_TEMPLATES=( [octavia-k8s]=${CONFIG_TEMPLATES_OCTAVIA[@]} [openstack-exporter-k8s]=${NULL_ARRAY[@]} [openstack-hypervisor]=${NULL_ARRAY[@]} + [sunbeam-clusterd]=${NULL_ARRAY[@]} [sunbeam-machine]=${NULL_ARRAY[@]} [ovn-central-k8s]=${NULL_ARRAY[@]} [ovn-relay-k8s]=${NULL_ARRAY[@]} diff --git a/playbooks/zaza-func-test.yaml b/playbooks/zaza-func-test.yaml index 41e5c315..6d3f0c40 100644 --- a/playbooks/zaza-func-test.yaml +++ b/playbooks/zaza-func-test.yaml @@ -1,6 +1,10 @@ - hosts: all roles: - ensure-tox - - use-docker-mirror - - microk8s-cloud + - role: use-docker-mirror + when: env_type == 'k8s' + - role: microk8s-cloud + when: env_type == 'k8s' + - role: lxd-cloud + when: env_type == 'lxd' - zaza-func-test diff --git a/render_bundles.py b/render_bundles.py index 4a10bddf..d8bae7a3 100644 --- a/render_bundles.py +++ b/render_bundles.py @@ -34,7 +34,7 @@ from jinja2 import ( FileSystemLoader, ) -test_directories = [ dir_.name for dir_ in list(Path("tests").glob('*')) ] +test_directories = [dir_.name for dir_ in list(Path("tests").glob('*')) if dir_.name != "local"] built_charms = glob.glob("*.charm") context = { charm.rstrip(".charm").replace("-", "_"): True for charm in built_charms diff --git a/roles/charm-publish/defaults/main.yaml b/roles/charm-publish/defaults/main.yaml index 8537cc85..48c7fd89 100644 --- a/roles/charm-publish/defaults/main.yaml +++ b/roles/charm-publish/defaults/main.yaml @@ -22,3 +22,4 @@ publish_channels: openstack-exporter-k8s: latest/edge openstack-hypervisor: latest/edge sunbeam-machine: latest/edge + sunbeam-clusterd: latest/edge diff --git a/roles/collect-run-data/tasks/k8s.yaml b/roles/collect-run-data/tasks/k8s.yaml new file mode 100644 index 00000000..bc306150 --- /dev/null +++ b/roles/collect-run-data/tasks/k8s.yaml @@ -0,0 +1,44 @@ +- name: collect microk8s inspection report + args: + executable: /bin/bash + shell: | + cp /var/snap/microk8s/current/inspection-report-*.tar.gz "{{ zuul.project.src_dir }}/log/" + failed_when: false +- name: debug describe pods + args: + executable: /bin/bash + shell: | + set -o pipefail + MODEL="$(juju models --format=json | jq -r '.models[]["short-name"]' | grep '^zaza-')" + microk8s.kubectl describe -n $MODEL pods > {{ zuul.project.src_dir }}/log/describe-pods.txt + CONTROLLER_MODEL="$(microk8s.kubectl get ns | grep controller | awk '{print $1}')" + microk8s.kubectl describe -n $CONTROLLER_MODEL pods > {{ zuul.project.src_dir }}/log/describe-controller-pods.txt + exit 0 +- name: Collect var logs + args: + executable: /bin/bash + shell: | + set -o pipefail + MODEL_NAME=$(juju models --format=json | jq -r '.models[]["short-name"]' | grep '^zaza-') + UNITS=$(juju status --format oneline | awk '{print $2}' | sed -e 's!:!!' | grep -Ev '^$' | paste -s -d' ') + for UNIT_NAME in $UNITS; do + POD_NAME=$(echo $UNIT_NAME | sed -e 's!/!-!') + CONTAINERS=$(microk8s.kubectl get pods -n $MODEL_NAME $POD_NAME -o jsonpath='{.spec.containers[*].name}' | sed -e 's/charm //') + for CONTAINER in $CONTAINERS; do + juju ssh --container $CONTAINER -m $MODEL_NAME $UNIT_NAME "tar zcf /tmp/logs.tgz /var/log/" + juju scp --container $CONTAINER -m $MODEL_NAME $UNIT_NAME:/tmp/logs.tgz {{ zuul.project.src_dir }}/log/$POD_NAME-$CONTAINER.tgz + done + done +- name: Collect pods logs + args: + executable: /bin/bash + shell: | + set -o pipefail + LOG_FOLDER={{ zuul.project.src_dir }}/log/pods/ + MODEL_NAME=$(juju models --format=json | jq -r '.models[]["short-name"]' | grep '^zaza-') + mkdir -p $LOG_FOLDER + for pod in $(microk8s.kubectl get pods -n $MODEL_NAME -o=jsonpath='{.items[*].metadata.name}'); + do + echo Collecting logs: $pod + microk8s.kubectl logs --ignore-errors -n $MODEL_NAME --all-containers $pod > $LOG_FOLDER/$pod.log + done diff --git a/roles/collect-run-data/tasks/main.yaml b/roles/collect-run-data/tasks/main.yaml index 81123bc5..c8e2e3c8 100644 --- a/roles/collect-run-data/tasks/main.yaml +++ b/roles/collect-run-data/tasks/main.yaml @@ -8,12 +8,6 @@ path: "{{ zuul.project.src_dir }}/log" state: directory mode: 0755 -- name: collect microk8s inspection report - args: - executable: /bin/bash - shell: | - cp /var/snap/microk8s/current/inspection-report-*.tar.gz "{{ zuul.project.src_dir }}/log/" - failed_when: false - name: debug logs replay args: executable: /bin/bash @@ -23,16 +17,6 @@ juju switch $MODEL juju debug-log --replay > {{ zuul.project.src_dir }}/log/debug-hooks.txt exit 0 -- name: debug describe pods - args: - executable: /bin/bash - shell: | - set -o pipefail - MODEL="$(juju models --format=json | jq -r '.models[]["short-name"]' | grep '^zaza-')" - microk8s.kubectl describe -n $MODEL pods > {{ zuul.project.src_dir }}/log/describe-pods.txt - CONTROLLER_MODEL="$(microk8s.kubectl get ns | grep controller | awk '{print $1}')" - microk8s.kubectl describe -n $CONTROLLER_MODEL pods > {{ zuul.project.src_dir }}/log/describe-controller-pods.txt - exit 0 - name: juju status args: executable: /bin/bash @@ -42,34 +26,6 @@ juju status -m $model > {{ zuul.project.src_dir }}/log/juju-status.$model.txt juju status -m $model --format=yaml > {{ zuul.project.src_dir }}/log/juju-status.$model.yaml done -- name: Collect var logs - args: - executable: /bin/bash - shell: | - set -o pipefail - MODEL_NAME=$(juju models --format=json | jq -r '.models[]["short-name"]' | grep '^zaza-') - UNITS=$(juju status --format oneline | awk '{print $2}' | sed -e 's!:!!' | grep -Ev '^$' | paste -s -d' ') - for UNIT_NAME in $UNITS; do - POD_NAME=$(echo $UNIT_NAME | sed -e 's!/!-!') - CONTAINERS=$(microk8s.kubectl get pods -n $MODEL_NAME $POD_NAME -o jsonpath='{.spec.containers[*].name}' | sed -e 's/charm //') - for CONTAINER in $CONTAINERS; do - juju ssh --container $CONTAINER -m $MODEL_NAME $UNIT_NAME "tar zcf /tmp/logs.tgz /var/log/" - juju scp --container $CONTAINER -m $MODEL_NAME $UNIT_NAME:/tmp/logs.tgz {{ zuul.project.src_dir }}/log/$POD_NAME-$CONTAINER.tgz - done - done -- name: Collect pods logs - args: - executable: /bin/bash - shell: | - set -o pipefail - LOG_FOLDER={{ zuul.project.src_dir }}/log/pods/ - MODEL_NAME=$(juju models --format=json | jq -r '.models[]["short-name"]' | grep '^zaza-') - mkdir -p $LOG_FOLDER - for pod in $(microk8s.kubectl get pods -n $MODEL_NAME -o=jsonpath='{.items[*].metadata.name}'); - do - echo Collecting logs: $pod - microk8s.kubectl logs --ignore-errors -n $MODEL_NAME --all-containers $pod > $LOG_FOLDER/$pod.log - done - name: Collect units' info args: executable: /bin/bash @@ -85,6 +41,9 @@ unit_name=$(echo $unit | tr / -) juju show-unit --output="$LOG_FOLDER/$unit_name.yaml" $unit done +- name: Include k8s tasks + include_tasks: k8s.yaml + when: env_type == 'k8s' - name: fetch juju logs synchronize: dest: "{{ zuul.executor.log_root }}" diff --git a/roles/lxd-cloud/tasks/main.yaml b/roles/lxd-cloud/tasks/main.yaml new file mode 100644 index 00000000..46c2cfff --- /dev/null +++ b/roles/lxd-cloud/tasks/main.yaml @@ -0,0 +1,79 @@ +- name: lxd apt packages are not present + apt: + name: + - lxd + - lxd-client + state: absent + purge: true + become: true + +- name: snapd is installed + apt: + name: snapd + become: true + +- name: nftables is installed + apt: + name: nftables + become: true + when: ansible_distribution_release == 'jammy' + +- name: lxd is installed + snap: + name: lxd + channel: "{{ lxd_channel | default('latest/stable') }}" + become: true + +- name: current user is in lxd group + user: + name: "{{ ansible_user }}" + groups: "lxd" + append: true + become: true + +- name: reset ssh connection to apply permissions from new group + meta: reset_connection + +- name: initialize lxd + command: + cmd: lxd init --auto + +- name: allow packets from lxd bridge + command: nft insert rule filter openstack-INPUT iif lxdbr0 accept + become: true + when: ansible_distribution_release == 'jammy' + +- name: lxd is running and ready + command: + cmd: lxd waitready + +- name: juju is installed + snap: + name: juju + classic: "{{ juju_classic_mode | default(true) }}" + channel: "{{ juju_channel | default('latest/stable') }}" + become: true + +- name: Ensure ~/.local/share directory exist + file: + path: ~/.local/share + state: directory + +- name: juju is bootstrapped on lxd + command: + cmd: juju bootstrap localhost lxd + register: res + failed_when: '"ERROR" in res.stdout and "already exists" not in res.stdout' + +- name: current juju controller is lxd + command: + cmd: juju switch lxd + register: res + changed_when: '"no change" not in res.stderr' + +- name: Collect snap versions + command: snap list + register: snap_out + +- name: Show snap versions + debug: msg="{{ snap_out.stdout }}" diff --git a/test-requirements.txt b/test-requirements.txt index 4cdc9db0..063fac5d 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -15,3 +15,4 @@ netifaces # cinder-ceph-k8s cosl # openstack-exporter git+https://github.com/juju/charm-helpers.git#egg=charmhelpers # cinder-ceph-k8s,glance-k8s,gnocchi-k8s git+https://opendev.org/openstack/charm-ops-interface-ceph-client#egg=interface_ceph_client # cinder-ceph-k8s +requests-unixsocket # sunbeam-clusterd diff --git a/tests/local/zaza/sunbeam/charm_tests/clusterd/__init__.py b/tests/local/zaza/sunbeam/charm_tests/clusterd/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/local/zaza/sunbeam/charm_tests/clusterd/tests.py b/tests/local/zaza/sunbeam/charm_tests/clusterd/tests.py new file mode 100644 index 00000000..b7ed2ed2 --- /dev/null +++ b/tests/local/zaza/sunbeam/charm_tests/clusterd/tests.py @@ -0,0 +1,172 @@ +# Copyright (c) 2024 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging +import subprocess +from random import shuffle, choice +from typing import Tuple + +import requests +import tenacity +import zaza.model as model +import zaza.openstack.charm_tests.test_utils as test_utils + + +class ClusterdTest(test_utils.BaseCharmTest): + """Charm tests for clusterd.""" + + @classmethod + def setUpClass(cls): + """Run class setup for running tests.""" + super(ClusterdTest, cls).setUpClass( + application_name="sunbeam-clusterd" + ) + + def _get_units(self) -> list[str]: + """Get the units.""" + return [unit.name for unit in model.get_units(self.application_name)] + + def _query_clusterd(self, unit: str, method: str, path: str): + cmd = [ + "juju", + "ssh", + unit, + "sudo", + "curl", + "-s", + "--unix-socket", + "/var/snap/openstack/common/state/control.socket", + "-X", + method, + "http://localhost" + path, + ] + try: + stdout = subprocess.check_output(cmd) + except subprocess.CalledProcessError: + logging.exception("Failed to query clusterd on %s", unit) + self.fail("Failed to query clusterd on {}".format(unit)) + return json.loads(stdout.decode("utf-8")) + + def _add_2_units(self): + model.add_unit(self.application_name, count=2) + model.block_until_unit_count(self.application_name, 3) + model.block_until_all_units_idle() + units = self._get_units() + for unit in units: + model.block_until_unit_wl_status(unit, "active", timeout=60 * 5) + + def test_100_connect_to_clusterd(self): + """Try sending data to an endpoint.""" + action = model.run_action_on_leader( + self.application_name, "get-credentials" + ) + url = action.data["results"]["url"] + "/1.0/config/100_connect" + response = requests.put(url, json={"data": "test"}, verify=False) + response.raise_for_status() + response = requests.get(url, verify=False) + response.raise_for_status() + self.assertEqual( + json.loads(response.json()["metadata"])["data"], "test" + ) + + def test_200_scale_up(self): + """Scale up.""" + self._add_2_units() + + def test_201_scale_down_multiple_units(self): + """Scale down 2 units.""" + units = self._get_units() + shuffle(units) + model.destroy_unit( + self.application_name, *units[:2], wait_disappear=True + ) + model.block_until_all_units_idle() + + units = self._get_units() + for unit in units: + model.block_until_unit_wl_status(unit, "active", timeout=60 * 5) + + def test_202_scale_up_again(self): + """Scale back to 3.""" + self._add_2_units() + + def test_203_scale_down_to_2_units(self): + """Scale down to 2 units for voter/spare test.""" + units = self._get_units() + model.destroy_unit( + self.application_name, choice(units), wait_disappear=True + ) + model.block_until_all_units_idle() + + units = self._get_units() + for unit in units: + model.block_until_unit_wl_status(unit, "active", timeout=60 * 5) + + def _wait_for_voter_spare( + self, unit: str, timeout=1800 + ) -> Tuple[str, str]: + """After a scale down of microcluster, it can take a while for the + voter, spare to be elected. This function will wait for these roles + to be elected. + """ + + @tenacity.retry( + wait=tenacity.wait_fixed(10), + stop=tenacity.stop_after_delay(timeout), + retry=tenacity.retry_if_exception_type(ValueError), + ) + def _tenacity_handler() -> Tuple[str, str]: + voter, spare = None, None + output = self._query_clusterd(unit, "GET", "/cluster/1.0/cluster") + metadata = output.get("metadata") + if metadata is None: + logging.warning("No metadata from clusterd, %r", output) + raise ValueError("No metadata from clusterd") + for member in output["metadata"]: + if member["role"] == "voter": + voter = member["name"] + elif member["role"] == "spare": + spare = member["name"] + if voter is None or spare is None: + raise ValueError("No voter or spare found") + return voter, spare + + return _tenacity_handler() + + def test_204_scale_down_voter(self): + """Scale down the voter member. + + When there's only 2 members left, 1 is voter, and 1 is spare. + There has been issues when the voter member is removed. + """ + units = self._get_units() + voter, _ = self._wait_for_voter_spare(units[0]) + for unit in units: + if unit.replace("/", "-") == voter: + model.destroy_unit( + self.application_name, + unit, + wait_disappear=True, + ) + units.remove(unit) + break + else: + self.fail("No unit found for voter {}".format(voter)) + model.block_until_all_units_idle() + model.block_until_unit_wl_status(units[0], "active", timeout=60 * 5) + output = self._query_clusterd(units[0], "GET", "/cluster/1.0/cluster") + self.assertEqual(output["status_code"], 200) + self.assertEqual(len(output["metadata"]), 1) diff --git a/tests/machine/smoke.yaml.j2 b/tests/machine/smoke.yaml.j2 new file mode 100644 index 00000000..21fdc1e6 --- /dev/null +++ b/tests/machine/smoke.yaml.j2 @@ -0,0 +1,22 @@ +applications: + sunbeam-machine: + {% if sunbeam_machine is defined and sunbeam_machine is sameas true -%} + charm: ../../../sunbeam-machine.charm + {% else -%} + charm: ch:sunbeam-machine + channel: 2023.2/edge + {% endif -%} + base: ubuntu@22.04 + scale: 1 + sunbeam-clusterd: + {% if sunbeam_clusterd is defined and sunbeam_clusterd is sameas true -%} + charm: ../../../sunbeam-clusterd.charm + {% else -%} + charm: ch:sunbeam-clusterd + channel: 2023.2/edge + {% endif -%} + base: ubuntu@22.04 + scale: 1 + options: + # untrusted endpoints only on this branch + snap-channel: 2023.2/edge/maas diff --git a/tests/machine/tests.yaml b/tests/machine/tests.yaml new file mode 100644 index 00000000..6cc71ada --- /dev/null +++ b/tests/machine/tests.yaml @@ -0,0 +1,25 @@ +gate_bundles: + - smoke +smoke_bundles: + - smoke +configure: + - zaza.charm_tests.noop.setup.basic_setup +tests: + - zaza.sunbeam.charm_tests.clusterd.tests.ClusterdTest +tests_options: + trust: + - smoke + ignore_hard_deploy_errors: + - smoke + + tempest: + default: + smoke: True + +target_deploy_status: + sunbeam-machine: + workload-status: active + workload-status-message-regex: '^$' + sunbeam-clusterd: + workload-status: active + workload-status-message-regex: '^$' diff --git a/tox.ini b/tox.ini index 2d2868ac..bfe7be59 100644 --- a/tox.ini +++ b/tox.ini @@ -75,19 +75,22 @@ commands = [testenv:func-noop] basepython = python3 deps = - git+https://github.com/openstack-charmers/zaza.git@libjuju-3.1#egg=zaza - git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack - git+https://opendev.org/openstack/tempest.git#egg=tempest + git+https://github.com/openstack-charmers/zaza.git@libjuju-3.1#egg=zaza + git+https://github.com/openstack-charmers/zaza-openstack-tests.git#egg=zaza.openstack + git+https://opendev.org/openstack/tempest.git#egg=tempest commands = - functest-run-suite --help + functest-run-suite --help [testenv:func] basepython = python3 deps = {[testenv:func-noop]deps} +passenv = + PYTHONPATH setenv = - TEST_MODEL_SETTINGS = automatically-retry-hooks=true - TEST_MAX_RESOLVE_COUNT = 5 + TEST_MODEL_SETTINGS = automatically-retry-hooks=true + TEST_MAX_RESOLVE_COUNT = 5 + PYTHONPATH = {toxinidir}/tests/local:{env:PYTHONPATH} commands = - python3 render_bundles.py - # Example: functest-run-suite --keep-model --smoke --test-directory=tests/set1 - functest-run-suite --keep-model {posargs} + python3 render_bundles.py + # Example: functest-run-suite --keep-model --smoke --test-directory=tests/set1 + functest-run-suite --keep-model {posargs} diff --git a/zuul.d/jobs.yaml b/zuul.d/jobs.yaml index 7d0ba34e..5dafbad5 100644 --- a/zuul.d/jobs.yaml +++ b/zuul.d/jobs.yaml @@ -274,6 +274,18 @@ - rebuild vars: charm: sunbeam-machine +- job: + name: charm-build-sunbeam-clusterd + description: Build sunbeam-clusterd charm + run: playbooks/charm/build.yaml + timeout: 3600 + match-on-config-updates: false + files: + - ops-sunbeam/ops_sunbeam/* + - charms/sunbeam-clusterd/* + - rebuild + vars: + charm: sunbeam-clusterd - job: name: func-test-core @@ -435,6 +447,29 @@ - charm-build-openstack-exporter-k8s - charm-build-keystone-k8s test_dir: tests/misc +- job: + name: func-test-machine + description: | + Zaza smoke test for sunbeam-machine, sunbeam-clusterd charms. + timeout: 3600 + run: playbooks/zaza-func-test.yaml + post-run: playbooks/collect-run-data.yaml + dependencies: + - name: charm-build-sunbeam-machine + soft: true + - name: charm-build-sunbeam-clusterd + soft: true + files: + - ops-sunbeam/ops_sunbeam/* + - charms/sunbeam-machine/* + - charms/sunbeam-clusterd/* + - rebuild + vars: + env_type: lxd + charm_jobs: + - charm-build-sunbeam-machine + - charm-build-sunbeam-clusterd + test_dir: tests/machine - job: name: publish-charm-aodh-k8s @@ -734,3 +769,16 @@ secrets: - charmhub_token timeout: 3600 + +- job: + name: publish-charm-sunbeam-clusterd + description: | + Publish sunbeam-clusterd built in gate pipeline. + run: playbooks/charm/publish.yaml + files: + - ops-sunbeam/ops_sunbeam/* + - charms/sunbeam-clusterd/* + - rebuild + secrets: + - charmhub_token + timeout: 3600 diff --git a/zuul.d/project-templates.yaml b/zuul.d/project-templates.yaml index 36fa9ce6..d7398edf 100644 --- a/zuul.d/project-templates.yaml +++ b/zuul.d/project-templates.yaml @@ -82,6 +82,8 @@ nodeset: ubuntu-jammy - charm-build-sunbeam-machine: nodeset: ubuntu-jammy + - charm-build-sunbeam-clusterd: + nodeset: ubuntu-jammy gate: fail-fast: true jobs: @@ -131,6 +133,8 @@ nodeset: ubuntu-jammy - charm-build-sunbeam-machine: nodeset: ubuntu-jammy + - charm-build-sunbeam-clusterd: + nodeset: ubuntu-jammy - project-template: name: charm-publish-jobs @@ -184,3 +188,5 @@ nodeset: ubuntu-jammy - publish-charm-sunbeam-machine: nodeset: ubuntu-jammy + - publish-charm-sunbeam-clusterd: + nodeset: ubuntu-jammy diff --git a/zuul.d/zuul.yaml b/zuul.d/zuul.yaml index 9a2d37a6..2e165101 100644 --- a/zuul.d/zuul.yaml +++ b/zuul.d/zuul.yaml @@ -14,9 +14,12 @@ nodeset: ubuntu-focal - func-test-misc: nodeset: ubuntu-focal + - func-test-machine: + nodeset: ubuntu-jammy vars: juju_channel: 3.2/stable juju_classic_mode: false + env_type: k8s microk8s_channel: 1.28-strict/stable microk8s_classic_mode: false charmcraft_channel: 2.x/stable @@ -45,3 +48,4 @@ openstack-exporter-k8s: 2023.2/edge openstack-hypervisor: 2023.2/edge sunbeam-machine: 2023.2/edge + sunbeam-clusterd: 2023.2/edge