diff --git a/charms/openstack-hypervisor/.sunbeam-build.yaml b/charms/openstack-hypervisor/.sunbeam-build.yaml index fa7a0d75..eb23a7ef 100644 --- a/charms/openstack-hypervisor/.sunbeam-build.yaml +++ b/charms/openstack-hypervisor/.sunbeam-build.yaml @@ -2,7 +2,6 @@ external-libraries: - charms.data_platform_libs.v0.data_interfaces - charms.grafana_agent.v0.cos_agent - charms.operator_libs_linux.v2.snap - - charms.operator_libs_linux.v1.systemd - charms.rabbitmq_k8s.v0.rabbitmq - charms.traefik_k8s.v2.ingress - charms.tls_certificates_interface.v3.tls_certificates diff --git a/charms/openstack-hypervisor/src/charm.py b/charms/openstack-hypervisor/src/charm.py index a3172319..e7577446 100755 --- a/charms/openstack-hypervisor/src/charm.py +++ b/charms/openstack-hypervisor/src/charm.py @@ -26,6 +26,7 @@ import io import logging import os import secrets +import shutil import socket import string import subprocess @@ -63,9 +64,6 @@ from charms.nova_k8s.v0.nova_service import ( NovaConfigChangedEvent, NovaServiceGoneAwayEvent, ) -from charms.operator_libs_linux.v1.systemd import ( - service_running, -) from cryptography import ( x509, ) @@ -544,9 +542,85 @@ class HypervisorOperatorCharm(sunbeam_charm.OSBaseOperatorCharm): else: logger.debug("Snap settings do not need updating") + def _clear_system_ovs_datapaths(self): + logger.info("Clearing system OVS datapaths.") + + if not shutil.which("ovs-dpctl"): + logger.info( + "ovs-dpctl not found, skipped clearing system OVS datapaths." + ) + return + + result = subprocess.run( + ["ovs-dpctl", "dump-dps"], + capture_output=True, + text=True, + check=True, + ) + datapaths = result.stdout.strip().split("\n") + for datapath in datapaths: + logger.info("Removing OVS datapath: %s", datapath) + subprocess.run(["ovs-dpctl", "del-dp", datapath], check=True) + + def _disable_system_ovs(self) -> bool: + """Disable deb installed OVS. + + OVS crashes if there are multiple conflicting installations, as such + we are going to mask system OVS services and use the snap based + installation from "openstack-hypervisor" instead. + + OVS bridges and bonds defined in MAAS will be included in the Netplan + configuration. We expect Netplan to contain the following in + order to work with snap based installations: + https://github.com/canonical/netplan/pull/549 + + Note that any configuration defined in the system OVS db that's + not set in Netplan will be lost. + + Returns a boolean, stating if any changes were made. + """ + ovs_services = [ + "openvswitch-switch.service", + "ovs-vswitchd.service", + "ovsdb-server.service", + "ovs-record-hostname.service", + ] + + changes_made = False + + for service_name in ovs_services: + unit_info = utils.get_systemd_unit_status(service_name) + if not unit_info: + logger.debug("%s unit not found.", service_name) + continue + + if unit_info["active_state"] != "inactive": + logging.info("Stopping unit: %s", service_name) + subprocess.run(["systemctl", "stop", service_name], check=True) + changes_made = True + else: + logger.debug("%s unit already stopped.", service_name) + + if unit_info["load_state"] != "masked": + logging.info("Masking unit: %s", service_name) + subprocess.run(["systemctl", "mask", service_name], check=True) + changes_made = True + else: + logger.debug("%s unit already masked.", service_name) + + if changes_made: + self._clear_system_ovs_datapaths() + + return changes_made + def ensure_snap_present(self): """Install snap if it is not already present.""" config = self.model.config.get + + # If we've just disabled the system OVS services, reapply + # the netplan configuration to the snap based installation. + netplan_apply_needed = self._disable_system_ovs() + try: cache = self.get_snap_cache() hypervisor = cache["openstack-hypervisor"] @@ -556,6 +630,14 @@ class HypervisorOperatorCharm(sunbeam_charm.OSBaseOperatorCharm): snap.SnapState.Latest, channel=config("snap-channel") ) self._connect_to_epa_orchestrator() + + # Netplan expects the "ovs-vsctl" alias in order to pick up the + # snap installation. The other aliases are there for consistency + # and convenience. + hypervisor.alias("ovs-vsctl", "ovs-vsctl") + hypervisor.alias("ovs-appctl", "ovs-appctl") + hypervisor.alias("ovs-dpctl", "ovs-dpctl") + hypervisor.alias("ovs-ofctl", "ovs-ofctl") except (snap.SnapError, snap.SnapNotFoundError) as e: logger.error( "An exception occurred when installing openstack-hypervisor. Reason: %s", @@ -566,6 +648,14 @@ class HypervisorOperatorCharm(sunbeam_charm.OSBaseOperatorCharm): "openstack-hypervisor installation failed" ) + if netplan_apply_needed: + logger.info("System OVS services masked, reapplying netplan.") + subprocess.run(["netplan", "apply"], check=True) + else: + logger.debug( + "No OVS changes made, skipping netplan configuration." + ) + def _connect_to_epa_orchestrator(self): """Connect openstack-hypervisor snap plug to epa-orchestrator snap slot.""" cache = self.get_snap_cache() @@ -598,21 +688,8 @@ class HypervisorOperatorCharm(sunbeam_charm.OSBaseOperatorCharm): """Return snap cache.""" return snap.SnapCache() - def check_system_services(self) -> None: - """Check if system services are in desired state.""" - if service_running("openvswitch-switch.service"): - logger.error( - "OpenVSwitch service is running, please stop it before proceeding. " - "OpenVSwitch is managed by the openstack-hypervisor snap and will " - "conflict with the snap's operation." - ) - raise sunbeam_guard.BlockedExceptionError( - "Breaking: OpenVSwitch service is running on the host." - ) - def configure_unit(self, event) -> None: """Run configuration on this unit.""" - self.check_system_services() self.check_leader_ready() self.check_relation_handlers_ready(event) config = self.model.config.get diff --git a/charms/openstack-hypervisor/src/utils.py b/charms/openstack-hypervisor/src/utils.py index 75bde856..c7259584 100644 --- a/charms/openstack-hypervisor/src/utils.py +++ b/charms/openstack-hypervisor/src/utils.py @@ -18,6 +18,7 @@ import collections +import json import logging import os import subprocess @@ -162,3 +163,34 @@ def get_cpu_numa_architecture() -> dict: numa_nodes[int(numa_node_id)].append(int(core_id)) return dict(numa_nodes) + + +def get_systemd_unit_status(unit_name: str) -> dict[str, str] | None: + """Get Systemd unit status information.""" + result = subprocess.run( + [ + "systemctl", + "list-units", + "--all", + "-o", + "json", + "--no-pager", + unit_name, + ], + capture_output=True, + text=True, + check=True, + ) + units = json.loads(result.stdout) + + for unit_info in units: + name = unit_info["unit"] + if name != unit_name: + continue + return { + "name": name, + "description": unit_info["description"], + "load_state": unit_info["load"], + "active_state": unit_info["active"], + "substate": unit_info["sub"], + } diff --git a/charms/openstack-hypervisor/tests/unit/test_charm.py b/charms/openstack-hypervisor/tests/unit/test_charm.py index ad8aa5cf..b5f0ce70 100644 --- a/charms/openstack-hypervisor/tests/unit/test_charm.py +++ b/charms/openstack-hypervisor/tests/unit/test_charm.py @@ -32,7 +32,6 @@ import jsonschema import ops import ops.testing import ops_sunbeam.test_utils as test_utils -from ops_sunbeam import guard as sunbeam_guard class _HypervisorOperatorCharm(charm.HypervisorOperatorCharm): @@ -65,7 +64,6 @@ class TestCharm(test_utils.CharmTestCase): "get_local_ip_by_default_route", "subprocess", "ConsulNotifyRequirer", - "service_running", "epa_client", ] @@ -73,7 +71,6 @@ class TestCharm(test_utils.CharmTestCase): """Setup OpenStack Hypervisor tests.""" super().setUp(charm, self.PATCHES) self.patch_obj(os, "system") - self.service_running.return_value = False self.snap.SnapError = Exception self.consul_notify_mock = MagicMock() @@ -477,20 +474,6 @@ class TestCharm(test_utils.CharmTestCase): with self.assertRaises(snap.SnapError): self.harness.charm._connect_to_epa_orchestrator() - def test_check_system_services_raises_when_ovs_running(self): - """Test check_system_services raises BlockedExceptionError if OVS is running.""" - self.service_running.return_value = True - self.harness.begin() - with self.assertRaises(sunbeam_guard.BlockedExceptionError): - self.harness.charm.check_system_services() - - def test_check_system_services_passes_when_ovs_not_running(self): - """Test check_system_services does nothing if OVS is not running.""" - self.service_running.return_value = False - self.harness.begin() - # Should not raise - self.harness.charm.check_system_services() - @contextlib.contextmanager def _mock_dpdk_settings_file(self, dpdk_yaml): with tempfile.NamedTemporaryFile(mode="w") as f: @@ -751,3 +734,105 @@ network: self.harness.charm._epa_client.allocate_hugepages.assert_called_once_with( charm.EPA_ALLOCATION_OVS_DPDK_HUGEPAGES, 2, 1024 * 1024, 0 ) + + @mock.patch("shutil.which") + def test_clear_system_ovs_datapaths(self, mock_which): + """Test clearing system ovs datapaths.""" + self.harness.begin() + + mock_which.return_value = "/usr/bin/ovs-dpctl" + self.subprocess.run.side_effect = [ + mock.Mock(stdout="dp1\ndp2"), + mock.Mock(stdout=None), + mock.Mock(stdout=None), + ] + + self.harness.charm._clear_system_ovs_datapaths() + + self.subprocess.run.assert_has_calls( + [ + mock.call( + ["ovs-dpctl", "dump-dps"], + capture_output=True, + text=True, + check=True, + ), + mock.call(["ovs-dpctl", "del-dp", "dp1"], check=True), + mock.call(["ovs-dpctl", "del-dp", "dp2"], check=True), + ] + ) + + @mock.patch("shutil.which") + def test_clear_system_ovs_datapaths_missing_ovs_dpctl(self, mock_which): + """Test clearing system ovs datapaths, no ovs-dpctl found.""" + self.harness.begin() + + mock_which.return_value = None + self.harness.charm._clear_system_ovs_datapaths() + self.subprocess.run.assert_not_called() + + @mock.patch("utils.get_systemd_unit_status") + @mock.patch.object( + charm.HypervisorOperatorCharm, "_clear_system_ovs_datapaths" + ) + def test_disable_system_ovs(self, mock_clear_datapaths, mock_get_status): + """Test disabling system ovs services.""" + self.harness.begin() + + mock_get_status.side_effect = [ + { + "name": "openvswitch-switch.service", + "load_state": "masked", + "active_state": "active", + "substate": "running", + }, + { + "name": "ovs-vswitchd.service", + "load_state": "loaded", + "active_state": "inactive", + "substate": "dead", + }, + { + "name": "ovsdb-server.service", + "load_state": "loaded", + "active_state": "active", + "substate": "running", + }, + None, + ] + + ret_val = self.harness.charm._disable_system_ovs() + self.assertTrue(ret_val) + + self.subprocess.run.assert_has_calls( + [ + mock.call( + ["systemctl", "stop", "openvswitch-switch.service"], + check=True, + ), + mock.call( + ["systemctl", "mask", "ovs-vswitchd.service"], check=True + ), + mock.call( + ["systemctl", "stop", "ovsdb-server.service"], check=True + ), + mock.call( + ["systemctl", "mask", "ovsdb-server.service"], check=True + ), + ] + ) + mock_clear_datapaths.assert_called_once_with() + + @mock.patch("utils.get_systemd_unit_status") + @mock.patch.object( + charm.HypervisorOperatorCharm, "_clear_system_ovs_datapaths" + ) + def test_disable_system_ovs_missing( + self, mock_clear_datapaths, mock_get_status + ): + """Test disabling system ovs, no services found.""" + self.harness.begin() + mock_get_status.return_value = None + + ret_val = self.harness.charm._disable_system_ovs() + self.assertFalse(ret_val) diff --git a/charms/openstack-hypervisor/tests/unit/test_utils.py b/charms/openstack-hypervisor/tests/unit/test_utils.py new file mode 100644 index 00000000..52974cfc --- /dev/null +++ b/charms/openstack-hypervisor/tests/unit/test_utils.py @@ -0,0 +1,67 @@ +# Copyright 2023 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Openstack hypervisor utils.""" + +import unittest +from unittest import ( + mock, +) + +import utils + + +class TestOpenstackHypervisorUtils(unittest.TestCase): + """Tests for Openstack hypervisor utils.""" + + @mock.patch("subprocess.run") + def test_get_systemd_unit_status_not_found(self, mock_run): + """Test retrieving inexistent systemd unit status.""" + mock_run.return_value.stdout = "[]" + + self.assertIsNone(utils.get_systemd_unit_status("test.service")) + + @mock.patch("subprocess.run") + def test_get_systemd_unit_status(self, mock_run): + """Test retrieving systemd unit status.""" + mock_run.return_value.stdout = ( + '[{"unit":"ovs-vswitchd.service","load":"masked",' + '"active":"inactive","sub":"dead",' + '"description":"ovs-vswitchd.service"}]' + ) + + exp_out = { + "name": "ovs-vswitchd.service", + "load_state": "masked", + "active_state": "inactive", + "substate": "dead", + "description": "ovs-vswitchd.service", + } + out = utils.get_systemd_unit_status("ovs-vswitchd.service") + + self.assertEqual(exp_out, out) + mock_run.assert_called_once_with( + [ + "systemctl", + "list-units", + "--all", + "-o", + "json", + "--no-pager", + "ovs-vswitchd.service", + ], + capture_output=True, + text=True, + check=True, + ) diff --git a/libs/external/lib/charms/operator_libs_linux/v1/systemd.py b/libs/external/lib/charms/operator_libs_linux/v1/systemd.py deleted file mode 100644 index cdcbad6a..00000000 --- a/libs/external/lib/charms/operator_libs_linux/v1/systemd.py +++ /dev/null @@ -1,288 +0,0 @@ -# Copyright 2021 Canonical Ltd. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -"""Abstractions for stopping, starting and managing system services via systemd. - -This library assumes that your charm is running on a platform that uses systemd. E.g., -Centos 7 or later, Ubuntu Xenial (16.04) or later. - -For the most part, we transparently provide an interface to a commonly used selection of -systemd commands, with a few shortcuts baked in. For example, service_pause and -service_resume with run the mask/unmask and enable/disable invocations. - -Example usage: - -```python -from charms.operator_libs_linux.v0.systemd import service_running, service_reload - -# Start a service -if not service_running("mysql"): - success = service_start("mysql") - -# Attempt to reload a service, restarting if necessary -success = service_reload("nginx", restart_on_failure=True) -``` -""" - -__all__ = [ # Don't export `_systemctl`. (It's not the intended way of using this lib.) - "SystemdError", - "daemon_reload", - "service_disable", - "service_enable", - "service_failed", - "service_pause", - "service_reload", - "service_restart", - "service_resume", - "service_running", - "service_start", - "service_stop", -] - -import logging -import subprocess - -logger = logging.getLogger(__name__) - -# The unique Charmhub library identifier, never change it -LIBID = "045b0d179f6b4514a8bb9b48aee9ebaf" - -# Increment this major API version when introducing breaking changes -LIBAPI = 1 - -# Increment this PATCH version before using `charmcraft publish-lib` or reset -# to 0 if you are raising the major API version -LIBPATCH = 4 - - -class SystemdError(Exception): - """Custom exception for SystemD related errors.""" - - -def _systemctl(*args: str, check: bool = False) -> int: - """Control a system service using systemctl. - - Args: - *args: Arguments to pass to systemctl. - check: Check the output of the systemctl command. Default: False. - - Returns: - Returncode of systemctl command execution. - - Raises: - SystemdError: Raised if calling systemctl returns a non-zero returncode and check is True. - """ - cmd = ["systemctl", *args] - logger.debug(f"Executing command: {cmd}") - try: - proc = subprocess.run( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - bufsize=1, - encoding="utf-8", - check=check, - ) - logger.debug( - f"Command {cmd} exit code: {proc.returncode}. systemctl output:\n{proc.stdout}" - ) - return proc.returncode - except subprocess.CalledProcessError as e: - raise SystemdError( - f"Command {cmd} failed with returncode {e.returncode}. systemctl output:\n{e.stdout}" - ) - - -def service_running(service_name: str) -> bool: - """Report whether a system service is running. - - Args: - service_name: The name of the service to check. - - Return: - True if service is running/active; False if not. - """ - # If returncode is 0, this means that is service is active. - return _systemctl("--quiet", "is-active", service_name) == 0 - - -def service_failed(service_name: str) -> bool: - """Report whether a system service has failed. - - Args: - service_name: The name of the service to check. - - Returns: - True if service is marked as failed; False if not. - """ - # If returncode is 0, this means that the service has failed. - return _systemctl("--quiet", "is-failed", service_name) == 0 - - -def service_start(*args: str) -> bool: - """Start a system service. - - Args: - *args: Arguments to pass to `systemctl start` (normally the service name). - - Returns: - On success, this function returns True for historical reasons. - - Raises: - SystemdError: Raised if `systemctl start ...` returns a non-zero returncode. - """ - return _systemctl("start", *args, check=True) == 0 - - -def service_stop(*args: str) -> bool: - """Stop a system service. - - Args: - *args: Arguments to pass to `systemctl stop` (normally the service name). - - Returns: - On success, this function returns True for historical reasons. - - Raises: - SystemdError: Raised if `systemctl stop ...` returns a non-zero returncode. - """ - return _systemctl("stop", *args, check=True) == 0 - - -def service_restart(*args: str) -> bool: - """Restart a system service. - - Args: - *args: Arguments to pass to `systemctl restart` (normally the service name). - - Returns: - On success, this function returns True for historical reasons. - - Raises: - SystemdError: Raised if `systemctl restart ...` returns a non-zero returncode. - """ - return _systemctl("restart", *args, check=True) == 0 - - -def service_enable(*args: str) -> bool: - """Enable a system service. - - Args: - *args: Arguments to pass to `systemctl enable` (normally the service name). - - Returns: - On success, this function returns True for historical reasons. - - Raises: - SystemdError: Raised if `systemctl enable ...` returns a non-zero returncode. - """ - return _systemctl("enable", *args, check=True) == 0 - - -def service_disable(*args: str) -> bool: - """Disable a system service. - - Args: - *args: Arguments to pass to `systemctl disable` (normally the service name). - - Returns: - On success, this function returns True for historical reasons. - - Raises: - SystemdError: Raised if `systemctl disable ...` returns a non-zero returncode. - """ - return _systemctl("disable", *args, check=True) == 0 - - -def service_reload(service_name: str, restart_on_failure: bool = False) -> bool: - """Reload a system service, optionally falling back to restart if reload fails. - - Args: - service_name: The name of the service to reload. - restart_on_failure: - Boolean indicating whether to fall back to a restart if the reload fails. - - Returns: - On success, this function returns True for historical reasons. - - Raises: - SystemdError: Raised if `systemctl reload|restart ...` returns a non-zero returncode. - """ - try: - return _systemctl("reload", service_name, check=True) == 0 - except SystemdError: - if restart_on_failure: - return service_restart(service_name) - else: - raise - - -def service_pause(service_name: str) -> bool: - """Pause a system service. - - Stops the service and prevents the service from starting again at boot. - - Args: - service_name: The name of the service to pause. - - Returns: - On success, this function returns True for historical reasons. - - Raises: - SystemdError: Raised if service is still running after being paused by systemctl. - """ - _systemctl("disable", "--now", service_name) - _systemctl("mask", service_name) - - if service_running(service_name): - raise SystemdError(f"Attempted to pause {service_name!r}, but it is still running.") - - return True - - -def service_resume(service_name: str) -> bool: - """Resume a system service. - - Re-enable starting the service again at boot. Start the service. - - Args: - service_name: The name of the service to resume. - - Returns: - On success, this function returns True for historical reasons. - - Raises: - SystemdError: Raised if service is not running after being resumed by systemctl. - """ - _systemctl("unmask", service_name) - _systemctl("enable", "--now", service_name) - - if not service_running(service_name): - raise SystemdError(f"Attempted to resume {service_name!r}, but it is not running.") - - return True - - -def daemon_reload() -> bool: - """Reload systemd manager configuration. - - Returns: - On success, this function returns True for historical reasons. - - Raises: - SystemdError: Raised if `systemctl daemon-reload` returns a non-zero returncode. - """ - return _systemctl("daemon-reload", check=True) == 0