From f1a02a267432a76f200d76b4e5807ea6357853d8 Mon Sep 17 00:00:00 2001 From: Billy Olsen Date: Sun, 17 Apr 2022 19:25:47 -0700 Subject: [PATCH] Add cloud-compute relation for relating to compute nodes Adds the cloud-compute relation and fixes up cell creation when relating to compute nodes. Signed-off-by: Billy Olsen --- charms/nova-k8s/charmcraft.yaml | 8 + charms/nova-k8s/config.yaml | 6 +- .../v0/cloud_compute.py | 356 ++++++++++++++++++ charms/nova-k8s/src/charm.py | 177 ++++++++- charms/nova-k8s/src/templates/nova.conf.j2 | 2 +- charms/nova-k8s/unit_tests/test_nova_charm.py | 5 +- 6 files changed, 545 insertions(+), 9 deletions(-) create mode 100644 charms/nova-k8s/lib/charms/sunbeam_nova_compute_operator/v0/cloud_compute.py diff --git a/charms/nova-k8s/charmcraft.yaml b/charms/nova-k8s/charmcraft.yaml index 08ce6ba5..e57d6d9f 100644 --- a/charms/nova-k8s/charmcraft.yaml +++ b/charms/nova-k8s/charmcraft.yaml @@ -7,7 +7,15 @@ bases: - name: "ubuntu" channel: "20.04" parts: + update-certificates: + plugin: nil + override-build: | + apt update + apt install -y ca-certificates + update-ca-certificates + charm: + after: [update-certificates] build-packages: - git - libffi-dev diff --git a/charms/nova-k8s/config.yaml b/charms/nova-k8s/config.yaml index 830481f0..43d3ee02 100644 --- a/charms/nova-k8s/config.yaml +++ b/charms/nova-k8s/config.yaml @@ -4,19 +4,19 @@ options: description: Enable debug logging. type: boolean os-admin-hostname: - default: nova.juju + default: description: | The hostname or address of the admin endpoints that should be advertised in the nova image provider. type: string os-internal-hostname: - default: nova.juju + default: description: | The hostname or address of the internal endpoints that should be advertised in the nova image provider. type: string os-public-hostname: - default: nova.juju + default: description: | The hostname or address of the internal endpoints that should be advertised in the nova image provider. diff --git a/charms/nova-k8s/lib/charms/sunbeam_nova_compute_operator/v0/cloud_compute.py b/charms/nova-k8s/lib/charms/sunbeam_nova_compute_operator/v0/cloud_compute.py new file mode 100644 index 00000000..ec83554d --- /dev/null +++ b/charms/nova-k8s/lib/charms/sunbeam_nova_compute_operator/v0/cloud_compute.py @@ -0,0 +1,356 @@ +"""CloudCompute Provides and Requires module. + + +This library contains the Requires and Provides classes for handling +the cloud-compute interface. + +Import `CloudComputeRequires` in your charm, with the charm object and the +relation name: + - self + - "cloud-compute" + +The following events are also available to respond to: + - connected + - ready + - goneaway + +A basic example showing the usage of this relation follows: + +``` +from charms.sunbeam_nova_operator.v0.cloud_compute import +CloudComputeRequires + +class CloudComputeClientCharm(CharmBase): + def __init__(self, *args): + super().__init__(*args) + # CloudCompute Requires + self.cloud_compute = CloudComputeRequires( + self, "cloud-compute", + service = "my-service", + region = "region", + ) + self.framework.observe( + self.cloud_compute.on.compute_nodes_connected, + self._on_cloud_compute_connected) + self.framework.observe( + self.cloud_compute.on.compute_nodes_ready, + self._on_cloud_compute_ready) + self.framework.observe( + self.cloud_compute.on.compute_nodes_goneaway, + self._on_cloud_compute_goneaway) + + def _on_cloud_compute_connected(self, event): + '''React to the CloudComputeConnectedEvent event. + + This event happens when a CloudCompute relation is added to the + model before information has been provided + ''' + # Do something before the relation is complete + pass + + def _on_cloud_compute_ready(self, event): + '''React to the CloudComputeReadyEvent event. + + The CloudCompute interface will use the provided config for the + request to the cloud compute. + ''' + # CloudCompute Relation is ready. Do something with the completed + # relation. + pass + + def _on_cloud_compute_goneaway(self, event): + '''React to the CloudComputeGoneAwayEvent event. + + This event happens when a CloudCompute relation is removed. + ''' + # CloudCompute Relation has goneaway. shutdown services or suchlike + pass +``` +""" + +# The unique Charmhub library identifier, never change it +import ops.model + +# The unique Charmhub library identifier, never change it +LIBID = "44d8650223f143489276f00b1298c2da" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 1 + +import logging + +from ops.framework import ( + StoredState, + EventBase, + ObjectEvents, + EventSource, + Object, +) + +from ops.charm import CharmBase +from typing import Union + + +logger = logging.getLogger(__name__) + + +class CloudComputeConnectedEvent(EventBase): + """CloudCompute connected Event.""" + + pass + + +class CloudComputeReadyEvent(EventBase): + """CloudCompute ready for use Event.""" + + def __init__(self, handle, relation_name, relation_id, hostname, + availability_zone): + super().__init__(handle) + self.relation_name = relation_name + self.relation_id = relation_id + self.hostname = hostname + self.availability_zone = availability_zone + + def snapshot(self): + return { + 'relation_name': self.relation_name, + 'relation_id': self.relation_id, + 'hostname': self.hostname, + 'availability_zone': self.availability_zone, + } + + def restore(self, snapshot): + super().restore(snapshot) + self.relation_name = snapshot['relation_name'] + self.relation_id = snapshot['relation_id'] + self.hostname = snapshot['hostname'] + self.availability_zone = snapshot['availability_zone'] + + +class CloudComputeGoneAwayEvent(EventBase): + """CloudCompute relation has gone-away Event""" + + pass + + +class CloudComputeEvents(ObjectEvents): + """Events class for `on`""" + + compute_nodes_connected = EventSource(CloudComputeConnectedEvent) + compute_nodes_ready = EventSource(CloudComputeReadyEvent) + compute_nodes_goneaway = EventSource(CloudComputeGoneAwayEvent) + + +class CloudComputeRequires(Object): + """ + CloudComputeRequires class + """ + + on = CloudComputeEvents() + _stored = StoredState() + + def __init__(self, charm, relation_name: str): + super().__init__(charm, relation_name) + self.charm = charm + self.relation_name = relation_name + self.framework.observe( + self.charm.on[relation_name].relation_changed, + self._on_cloud_compute_relation_changed, + ) + self.framework.observe( + self.charm.on[relation_name].relation_departed, + self._on_cloud_compute_relation_broken, + ) + self.framework.observe( + self.charm.on[relation_name].relation_broken, + self._on_cloud_compute_relation_broken, + ) + + def _on_cloud_compute_relation_changed(self, event): + """CloudCompute relation changed.""" + logger.debug('cloud-compute requires on_changed') + try: + unit_relation_data = event.relation.data[event.unit] + hostname = unit_relation_data.get('hostname') + availability_zone = unit_relation_data.get('availability_zone') + + if not hostname or not availability_zone: + logger.debug('Missing hostname or availability zone. Waiting ' + 'to raise event until ready') + return + + # TODO(wolsen) Need to get the migration auth type and credentials. + self.on.compute_nodes_ready.emit( + event.relation.name, + event.relation.id, + hostname, + availability_zone, + ) + except AttributeError: + logger.exception('Error when emitting event.') + raise + + def _on_cloud_compute_relation_broken(self, event): + """CloudCompute relation broken.""" + logging.debug("CloudCompute on_broken") + self.on.compute_nodes_goneaway.emit() + + def set_controller_info( + self, region: str, cross_az_attach: bool = False, + volume_service: str = 'cinder', network_manager: str = 'neutron', + ) -> None: + """Set controller information for the compute-nodes.""" + if not self.model.unit.is_leader(): + logging.debug('Not leader, leader will send information') + return + + logging.debug('Broadcasting controller information to all ' + f'{self.relation_name} relations.') + for relation in self.framework.model.relations.get(self.relation_name): + app_data = relation.data[self.charm.app] + app_data['network-manager'] = network_manager + app_data['region'] = region + app_data['cross-az-attach'] = str(cross_az_attach) + app_data['volume-service'] = volume_service + + +class HasCloudComputeClientsEvent(EventBase): + """Has CloudComputeClients Event.""" + + def __init__(self, handle, relation_name, relation_id): + super().__init__(handle) + self.relation_name = relation_name + self.relation_id = relation_id + + def snapshot(self): + return { + "relation_id": self.relation_id, + "relation_name": self.relation_name, + } + + def restore(self, snapshot): + super().restore(snapshot) + self.relation_name = snapshot["relation_name"] + self.relation_id = snapshot["relation_id"] + + +class ReadyCloudComputeClientsEvent(EventBase): + """CloudComputeClients Ready Event.""" + + def __init__(self, handle, relation_name, relation_id): + super().__init__(handle) + self.relation_name = relation_name + self.relation_id = relation_id + + def snapshot(self): + return { + "relation_name": self.relation_name, + "relation_id": self.relation_id, + } + + def restore(self, snapshot): + super().restore(snapshot) + self.relation_name = snapshot["relation_name"] + self.relation_id = snapshot["relation_id"] + + +class CloudComputeClientsGoneAway(EventBase): + """CloudComputeClients gone away Event.""" + + pass + + +class CloudComputeClientEvents(ObjectEvents): + """Events class for `on`""" + + has_cloud_compute_clients = EventSource(HasCloudComputeClientsEvent) + ready_cloud_compute_clients = EventSource(ReadyCloudComputeClientsEvent) + cloud_compute_clients_gone = EventSource(CloudComputeClientsGoneAway) + + +class CloudComputeProvides(Object): + """ + CloudComputeProvides class + """ + + on = CloudComputeClientEvents() + _stored = StoredState() + + def __init__(self, charm: CharmBase, relation_name: str): + super().__init__(charm, relation_name) + self.charm = charm + self.relation_name = relation_name + self.framework.observe( + self.charm.on[relation_name].relation_joined, + self._on_cloud_compute_relation_joined, + ) + self.framework.observe( + self.charm.on[relation_name].relation_changed, + self._on_cloud_compute_relation_changed, + ) + self.framework.observe( + self.charm.on[relation_name].relation_broken, + self._on_cloud_compute_relation_broken, + ) + + def _on_cloud_compute_relation_joined(self, event): + """Handle CloudCompute joined.""" + logging.debug(f'cloud-compute joined event for {event.relation.name},' + f' {event.relation.id}') + self.on.has_cloud_compute_clients.emit( + event.relation.name, + event.relation.id, + ) + + def _on_cloud_compute_relation_changed(self, event): + """Handle CloudCompute changed.""" + logging.debug("cloud-compute on_changed") + self.on.ready_cloud_compute_clients.emit( + event.relation.name, + event.relation.id, + ) + + def _on_cloud_compute_relation_broken(self, event): + """Handle CloudCompute broken.""" + logging.debug("CloudComputeProvides on_departed") + self.on.cloud_compute_clients_gone.emit() + + def set_compute_node_info(self, relation_name: int, relation_id: str, + hostname: str, availability_zone: str): + logging.debug(f"Setting compute node information for {relation_name}," + f" {relation_id}") + relation = self.framework.model.get_relation(relation_name, + relation_id) + + unit_data = relation.data[self.charm.unit] + unit_data['hostname'] = hostname + unit_data['availability_zone'] = availability_zone + + @property + def _cloud_compute_rel(self) -> ops.model.Relation: + return self.framework.model.get_relation(self.relation_name) + + def _get_remote_app_data(self, key: str) -> Union[str, bool, int, None]: + relation = self._cloud_compute_rel + data = relation.data[relation.app] + return data.get(key) + + @property + def network_manager(self): + return self._get_remote_app_data('network-manager') + + @property + def volume_service(self): + return self._get_remote_app_data('volume-service') + + @property + def region(self): + return self._get_remote_app_data('region') + + @property + def cross_az_attach(self): + return self._get_remote_app_data('cross-az-attach') diff --git a/charms/nova-k8s/src/charm.py b/charms/nova-k8s/src/charm.py index 49a9df15..6715623f 100755 --- a/charms/nova-k8s/src/charm.py +++ b/charms/nova-k8s/src/charm.py @@ -6,16 +6,21 @@ This charm provide Nova services as part of an OpenStack deployment import logging import uuid +from typing import Callable from typing import List import ops.framework from ops.main import main +from ops.pebble import ExecError import advanced_sunbeam_openstack.charm as sunbeam_charm import advanced_sunbeam_openstack.core as sunbeam_core import advanced_sunbeam_openstack.container_handlers as sunbeam_chandlers +import advanced_sunbeam_openstack.relation_handlers as sunbeam_rhandlers import advanced_sunbeam_openstack.config_contexts as sunbeam_ctxts +import charms.sunbeam_nova_compute_operator.v0.cloud_compute as cloud_compute + logger = logging.getLogger(__name__) NOVA_SCHEDULER_CONTAINER = "nova-scheduler" @@ -98,6 +103,60 @@ class NovaConductorPebbleHandler(sunbeam_chandlers.ServicePebbleHandler): 'nova')] +class CloudComputeRequiresHandler(sunbeam_rhandlers.RelationHandler): + """Handles the cloud-compute relation on the requires side.""" + + def __init__( + self, + charm: ops.charm.CharmBase, + relation_name: str, + region: str, + callback_f: Callable, + ): + """Creates a new CloudComputeRequiresHandler that handles initial + events from the relation and invokes the provided callbacks based on + the event raised. + + :param charm: the Charm class the handler is for + :type charm: ops.charm.CharmBase + :param relation_name: the relation the handler is bound to + :type relation_name: str + :param region: the region the nova services are configured for + :type region: str + :param callback_f: the function to call when the nodes are connected + :type callback_f: Callable + """ + self.region = region + super().__init__(charm, relation_name, callback_f) + + def setup_event_handler(self): + """Configure event handlers for the cloud-compute service relation.""" + logger.debug("Setting up cloud-compute event handler") + compute_service = cloud_compute.CloudComputeRequires( + self.charm, + self.relation_name, + ) + self.framework.observe( + compute_service.on.compute_nodes_connected, + self._compute_nodes_connected + ) + self.framework.observe( + compute_service.on.compute_nodes_ready, + self._compute_nodes_connected + ) + return compute_service + + def _compute_nodes_connected(self, event) -> None: + """Handles cloud-compute change events.""" + # Ready is only emitted when the interface considers + # that the relation is complete (indicated by an availability zone) + self.callback_f(event) + + @property + def ready(self) -> bool: + return True + + class NovaOperatorCharm(sunbeam_charm.OSBaseOperatorAPICharm): """Charm the service.""" @@ -110,7 +169,10 @@ class NovaOperatorCharm(sunbeam_charm.OSBaseOperatorAPICharm): db_sync_cmds = [ ['sudo', '-u', 'nova', 'nova-manage', 'api_db', 'sync'], ['sudo', '-u', 'nova', 'nova-manage', 'cell_v2', 'map_cell0'], - ['sudo', '-u', 'nova', 'nova-manage', 'db', 'sync']] + ['sudo', '-u', 'nova', 'nova-manage', 'db', 'sync'], + ['sudo', '-u', 'nova', 'nova-manage', 'cell_v2', 'create_cell', + '--name', 'cell1', '--verbose'], + ] @property def service_conf(self) -> str: @@ -171,6 +233,24 @@ class NovaOperatorCharm(sunbeam_charm.OSBaseOperatorAPICharm): self.configure_charm)]) return pebble_handlers + def get_relation_handlers( + self, handlers: List[sunbeam_rhandlers.RelationHandler] = None + ) -> List[sunbeam_rhandlers.RelationHandler]: + """ + + :param handlers: + :return: + """ + handlers = super().get_relation_handlers(handlers) + self.compute_nodes = CloudComputeRequiresHandler( + self, + 'cloud-compute', + self.model.config['region'], + self.register_compute_nodes, + ) + handlers.append(self.compute_nodes) + return handlers + @property def config_contexts(self) -> List[sunbeam_ctxts.ConfigContext]: """Generate list of configuration adapters for the charm.""" @@ -193,18 +273,106 @@ class NovaOperatorCharm(sunbeam_charm.OSBaseOperatorAPICharm): self.leader_set( {self.shared_metadata_secret_key: str(uuid.uuid1())}) + def register_compute_nodes(self, event: ops.framework.EventBase) -> None: + """Register compute nodes when the event is received. + + :param event: the event that new compute nodes are available. + :type event: ops.framework.EventBase + :return: None + """ + logger.debug("register_compute_nodes event received") + if not self.bootstrapped(): + logger.debug("Event received while not bootstrapped, deferring") + event.defer() + return + + if not self.unit.is_leader(): + logger.debug("Unit is not the current leader") + return + + handler = self.get_named_pebble_handler(NOVA_CONDUCTOR_CONTAINER) + # TODO(wolsen) make sure the container is there to run the command in + # if not handler.service_ready: + # logger.info(f'Container {NOVA_CONDUCTOR_CONTAINER} is not ready, ' + # 'deferring') + # event.defer() + # return + + self.compute_nodes.interface.set_controller_info( + region=self.model.config['region'], + cross_az_attach=False, + ) + + try: + logger.debug('Discovering hosts for cell1') + cell1_uuid = self.get_cell_uuid('cell1') + cmd = ['nova-manage', 'cell_v2', 'discover_hosts', '--cell_uuid', + cell1_uuid, '--verbose'] + handler.execute(cmd, exception_on_error=True) + except ExecError: + logger.exception('Failed to discover hosts for cell1') + raise + + def get_cell_uuid(self, cell, fatal=True): + """Returns the cell UUID from the name + + :param cell: string cell name i.e. 'cell1' + :returns: string cell uuid + """ + logger.debug(f'listing cells for {cell}') + cells = self.get_cells() + cell_info = cells.get(cell) + if not cell_info: + if fatal: + raise Exception(f"Cell {cell} not found") + return None + + return cell_info['uuid'] + + def get_cells(self): + """Returns the cells configured in the environment. + + :returns: dict containing the cell information + :rtype: dict + """ + logger.info("Getting details of cells") + cells = {} + cmd = ['sudo', 'nova-manage', 'cell_v2', 'list_cells', '--verbose'] + handler = self.get_named_pebble_handler(NOVA_CONDUCTOR_CONTAINER) + try: + out = handler.execute(cmd, exception_on_error=True) + except ExecError: + logger.exception('list_cells failed') + raise + + for line in out.split('\n'): + columns = line.split('|') + if len(columns) < 2: + continue + columns = [c.strip() for c in columns] + try: + uuid.UUID(columns[2].strip()) + cells[columns[1]] = { + 'uuid': columns[2], + 'amqp': columns[3], + 'db': columns[4]} + except ValueError: + pass + + return cells + def configure_charm(self, event: ops.framework.EventBase) -> None: if not self.peers.ready: return metadata_secret = self.get_shared_metadatasecret() if metadata_secret: - logging.debug("Found metadata secret in leader DB") + logger.debug("Found metadata secret in leader DB") else: if self.unit.is_leader(): - logging.debug("Creating metadata secret") + logger.debug("Creating metadata secret") self.set_shared_metadatasecret() else: - logging.debug("Metadata secret not ready") + logger.debug("Metadata secret not ready") return super().configure_charm(event) @@ -213,6 +381,7 @@ class NovaXenaOperatorCharm(NovaOperatorCharm): openstack_release = 'xena' + if __name__ == "__main__": # Note: use_juju_for_storage=True required per # https://github.com/canonical/operator/issues/506 diff --git a/charms/nova-k8s/src/templates/nova.conf.j2 b/charms/nova-k8s/src/templates/nova.conf.j2 index 2a6dbefc..2909f0ed 100644 --- a/charms/nova-k8s/src/templates/nova.conf.j2 +++ b/charms/nova-k8s/src/templates/nova.conf.j2 @@ -4510,7 +4510,7 @@ region = {{ options.region }} # Periodic task interval. For more information, refer to the documentation. # (integer value) # Minimum value: -1 -#discover_hosts_in_cells_interval = -1 +discover_hosts_in_cells_interval = 30 # # The maximum number of placement results to request. For more information, diff --git a/charms/nova-k8s/unit_tests/test_nova_charm.py b/charms/nova-k8s/unit_tests/test_nova_charm.py index 8b50e181..8ed85291 100644 --- a/charms/nova-k8s/unit_tests/test_nova_charm.py +++ b/charms/nova-k8s/unit_tests/test_nova_charm.py @@ -67,7 +67,10 @@ class TestNovaOperatorCharm(test_utils.CharmTestCase): ['a2ensite', 'wsgi-nova-api'], ['sudo', '-u', 'nova', 'nova-manage', 'api_db', 'sync'], ['sudo', '-u', 'nova', 'nova-manage', 'cell_v2', 'map_cell0'], - ['sudo', '-u', 'nova', 'nova-manage', 'db', 'sync']] + ['sudo', '-u', 'nova', 'nova-manage', 'db', 'sync'], + ['sudo', '-u', 'nova', 'nova-manage', 'cell_v2', 'create_cell', + '--name', 'cell1', '--verbose'], + ] for cmd in setup_cmds: self.assertIn(cmd, self.container_calls.execute['nova-api']) self.assertEqual(