From 721a45485326c377c156a4dc4d9420d82dca8720 Mon Sep 17 00:00:00 2001 From: Hemanth Nakkina Date: Thu, 19 Oct 2023 14:00:14 +0530 Subject: [PATCH] stop services when mandatory integrations are removed Stop all the services running in the container when mandatory integrations are removed. Currently the relation data is not cleared during processing of gone away events. This is a bug in juju reported at [1]. Due to this the relation is still considered as ready. Add workaround to check the event and mark the relation to not ready if the event is gone away event of the relation. Catch database relation-broken event and block the application. [1] https://bugs.launchpad.net/juju/+bug/2024583 Change-Id: I80a0120d08b79561c13996c7a0f055824a1d5336 --- ops-sunbeam/ops_sunbeam/charm.py | 143 ++++++++++++++++-- ops-sunbeam/ops_sunbeam/container_handlers.py | 14 ++ .../ops_sunbeam/ovn/relation_handlers.py | 2 + ops-sunbeam/ops_sunbeam/relation_handlers.py | 19 +++ ops-sunbeam/tests/unit_tests/test_core.py | 66 ++++++-- 5 files changed, 224 insertions(+), 20 deletions(-) diff --git a/ops-sunbeam/ops_sunbeam/charm.py b/ops-sunbeam/ops_sunbeam/charm.py index de8cf27c..c4dd6d6d 100644 --- a/ops-sunbeam/ops_sunbeam/charm.py +++ b/ops-sunbeam/ops_sunbeam/charm.py @@ -35,6 +35,8 @@ import urllib from typing import ( List, Mapping, + Optional, + Set, ) import ops.charm @@ -234,9 +236,12 @@ class OSBaseOperatorCharm(ops.charm.CharmBase): ): raise sunbeam_guard.WaitingExceptionError("Leader not ready") - def check_relation_handlers_ready(self): + def check_relation_handlers_ready(self, event: ops.framework.EventBase): """Check all relation handlers are ready.""" - if not self.relation_handlers_ready(): + not_ready_relations = self.get_mandatory_relations_not_ready(event) + if not_ready_relations: + logger.info(f"Relations {not_ready_relations} incomplete") + self.stop_services(not_ready_relations) raise sunbeam_guard.WaitingExceptionError( "Not all relations are ready" ) @@ -252,7 +257,7 @@ class OSBaseOperatorCharm(ops.charm.CharmBase): def configure_unit(self, event: ops.framework.EventBase) -> None: """Run configuration on this unit.""" self.check_leader_ready() - self.check_relation_handlers_ready() + self.check_relation_handlers_ready(event) self._state.unit_bootstrapped = True def configure_app_leader(self, event): @@ -293,6 +298,10 @@ class OSBaseOperatorCharm(ops.charm.CharmBase): self.bootstrap_status.set(ActiveStatus()) self.post_config_setup() + def stop_services(self, relation: Optional[Set[str]] = None) -> None: + """Stop all running services.""" + # Machine charms should implement this function if required. + @property def supports_peer_relation(self) -> bool: """Whether the charm support the peers relation.""" @@ -354,22 +363,125 @@ class OSBaseOperatorCharm(ops.charm.CharmBase): # charms should handle the event if required pass - def relation_handlers_ready(self) -> bool: - """Determine whether all relations are ready for use.""" + def check_broken_relations( + self, relations: set, event: ops.framework.EventBase + ) -> set: + """Return all broken relations on given set of relations.""" + broken_relations = set() + + # Check for each relation if the event is gone away event. + # lazy import the events + # Note: Ceph relation not handled as there is no gone away event. + for relation in relations: + _is_broken = False + match relation: + case "database" | "api-database" | "cell-database": + from ops.charm import ( + RelationBrokenEvent, + ) + + if isinstance(event, RelationBrokenEvent): + _is_broken = True + case "ingress-public" | "ingress-internal": + from charms.traefik_k8s.v2.ingress import ( + IngressPerAppRevokedEvent, + ) + + if isinstance(event, IngressPerAppRevokedEvent): + _is_broken = True + case "identity-service": + from charms.keystone_k8s.v1.identity_service import ( + IdentityServiceGoneAwayEvent, + ) + + if isinstance(event, IdentityServiceGoneAwayEvent): + _is_broken = True + case "amqp": + from charms.rabbitmq_k8s.v0.rabbitmq import ( + RabbitMQGoneAwayEvent, + ) + + if isinstance(event, RabbitMQGoneAwayEvent): + _is_broken = True + case "certificates": + from charms.tls_certificates_interface.v1.tls_certificates import ( + CertificateExpiredEvent, + ) + + if isinstance(event, CertificateExpiredEvent): + _is_broken = True + case "ovsdb-cms": + from charms.ovn_central_k8s.v0.ovsdb import ( + OVSDBCMSGoneAwayEvent, + ) + + if isinstance(event, OVSDBCMSGoneAwayEvent): + _is_broken = True + case "identity-credentials": + from charms.keystone_k8s.v0.identity_credentials import ( + IdentityCredentialsGoneAwayEvent, + ) + + if isinstance(event, IdentityCredentialsGoneAwayEvent): + _is_broken = True + case "identity-ops": + from charms.keystone_k8s.v0.identity_resource import ( + IdentityOpsProviderGoneAwayEvent, + ) + + if isinstance(event, IdentityOpsProviderGoneAwayEvent): + _is_broken = True + case "gnocchi-db": + from charms.gnocchi_k8s.v0.gnocchi_service import ( + GnocchiServiceGoneAwayEvent, + ) + + if isinstance(event, GnocchiServiceGoneAwayEvent): + _is_broken = True + case "ceph-access": + from charms.cinder_ceph_k8s.v0.ceph_access import ( + CephAccessGoneAwayEvent, + ) + + if isinstance(event, CephAccessGoneAwayEvent): + _is_broken = True + case "dns-backend": + from charms.designate_bind_k8s.v0.bind_rndc import ( + BindRndcGoneAwayEvent, + ) + + if isinstance(event, BindRndcGoneAwayEvent): + _is_broken = True + + if _is_broken: + broken_relations.add(relation) + + return broken_relations + + def get_mandatory_relations_not_ready( + self, event: ops.framework.EventBase + ) -> Set[str]: + """Get mandatory relations that are not ready for use.""" ready_relations = { handler.relation_name for handler in self.relation_handlers if handler.mandatory and handler.ready } + + # The relation data for broken relations are not cleared during + # processing of gone away event. This is a temporary workaround + # to mark broken relations as not ready. + # The workaround can be removed once the below bug is resolved + # https://bugs.launchpad.net/juju/+bug/2024583 + # https://github.com/canonical/operator/issues/940 + broken_relations = self.check_broken_relations(ready_relations, event) + ready_relations = ready_relations.difference(broken_relations) + not_ready_relations = self.mandatory_relations.difference( ready_relations ) - if len(not_ready_relations) != 0: - logger.info(f"Relations {not_ready_relations} incomplete") - return False - - return True + return not_ready_relations def contexts(self) -> sunbeam_core.OPSCharmContexts: """Construct context for rendering templates.""" @@ -495,10 +607,19 @@ class OSBaseOperatorCharmK8S(OSBaseOperatorCharm): "Container service not ready" ) + def stop_services(self, relation: Optional[Set[str]] = None) -> None: + """Stop all running services.""" + for ph in self.pebble_handlers: + if ph.pebble_ready: + logging.debug( + f"Stopping all services in container {ph.container_name}" + ) + ph.stop_all() + def configure_unit(self, event: ops.framework.EventBase) -> None: """Run configuration on this unit.""" self.check_leader_ready() - self.check_relation_handlers_ready() + self.check_relation_handlers_ready(event) self.open_ports() self.init_container_services() self.check_pebble_handlers_ready() diff --git a/ops-sunbeam/ops_sunbeam/container_handlers.py b/ops-sunbeam/ops_sunbeam/container_handlers.py index 36c472a7..1d9e3437 100644 --- a/ops-sunbeam/ops_sunbeam/container_handlers.py +++ b/ops-sunbeam/ops_sunbeam/container_handlers.py @@ -302,6 +302,20 @@ class PebbleHandler(ops.charm.Object): ) container.restart(service_name) + def stop_all(self) -> None: + """Stop services in container.""" + container = self.charm.unit.get_container(self.container_name) + if not container.can_connect(): + logger.debug( + f"Container {self.container_name} not ready, no need to stop" + ) + return + + services = container.get_services() + if services: + logger.debug("Stopping all services") + container.stop(*services.keys()) + class ServicePebbleHandler(PebbleHandler): """Container handler for containers which manage a service.""" diff --git a/ops-sunbeam/ops_sunbeam/ovn/relation_handlers.py b/ops-sunbeam/ops_sunbeam/ovn/relation_handlers.py index 8d860fc4..7e2965f5 100644 --- a/ops-sunbeam/ops_sunbeam/ovn/relation_handlers.py +++ b/ops-sunbeam/ops_sunbeam/ovn/relation_handlers.py @@ -554,6 +554,8 @@ class OVSDBCMSRequiresHandler( """Handle OVSDB CMS change events.""" self.callback_f(event) if self.mandatory: + logger.debug("ovsdb-cms integration removed, stop services") + self.charm.stop_services({self.relation_name}) self.status.set(BlockedStatus("integration missing")) @property diff --git a/ops-sunbeam/ops_sunbeam/relation_handlers.py b/ops-sunbeam/ops_sunbeam/relation_handlers.py index 69b40f86..29e87bae 100644 --- a/ops-sunbeam/ops_sunbeam/relation_handlers.py +++ b/ops-sunbeam/ops_sunbeam/relation_handlers.py @@ -300,6 +300,14 @@ class DBHandler(RelationHandler): getattr(db.on, f"{alias}_endpoints_changed"), self._on_database_updated, ) + + # Gone away events are not handled by the database interface library. + # So handle them in this class + self.framework.observe( + self.charm.on[self.relation_name].relation_broken, + self._on_database_relation_broken, + ) + # this will be set to self.interface in parent class return db @@ -315,6 +323,13 @@ class DBHandler(RelationHandler): logger.info(f"Received data: {display_data}") self.callback_f(event) + def _on_database_relation_broken( + self, event: ops.framework.EventBase + ) -> None: + """Handle database gone away event.""" + if self.mandatory: + self.status.set(BlockedStatus("integration missing")) + def get_relation_data(self) -> dict: """Load the data from the relation for consumption in the handler.""" if len(self.interface.relations) > 0: @@ -1178,6 +1193,8 @@ class IdentityResourceRequiresHandler(RelationHandler): """Handles provider_goneaway event.""" logger.info("Keystone provider not available process any requests") self.callback_f(event) + if self.mandatory: + self.status.set(BlockedStatus("integration missing")) def _on_response_available(self, event) -> None: """Handles response available events.""" @@ -1248,6 +1265,8 @@ class CeilometerServiceRequiresHandler(RelationHandler): """Handle gone_away event.""" logger.debug("Ceilometer service relation is departed/broken") self.callback_f(event) + if self.mandatory: + self.status.set(BlockedStatus("integration missing")) @property def ready(self) -> bool: diff --git a/ops-sunbeam/tests/unit_tests/test_core.py b/ops-sunbeam/tests/unit_tests/test_core.py index 91266b69..52e8647d 100644 --- a/ops-sunbeam/tests/unit_tests/test_core.py +++ b/ops-sunbeam/tests/unit_tests/test_core.py @@ -41,6 +41,7 @@ class TestOSBaseOperatorCharm(test_utils.CharmTestCase): """Charm test class setup.""" self.container_calls = test_utils.ContainerCalls() super().setUp(sunbeam_charm, self.PATCHES) + self.mock_event = mock.MagicMock() self.harness = test_utils.get_harness( test_charms.MyCharm, test_charms.CHARM_METADATA, @@ -57,7 +58,12 @@ class TestOSBaseOperatorCharm(test_utils.CharmTestCase): def test_relation_handlers_ready(self) -> None: """Test relation handlers are ready.""" - self.assertTrue(self.harness.charm.relation_handlers_ready()) + self.assertSetEqual( + self.harness.charm.get_mandatory_relations_not_ready( + self.mock_event + ), + set(), + ) class TestOSBaseOperatorCharmK8S(test_utils.CharmTestCase): @@ -76,6 +82,7 @@ class TestOSBaseOperatorCharmK8S(test_utils.CharmTestCase): charm_config=test_charms.CHARM_CONFIG, initial_charm_config=test_charms.INITIAL_CHARM_CONFIG, ) + self.mock_event = mock.MagicMock() self.harness.begin() self.addCleanup(self.harness.cleanup) @@ -100,7 +107,12 @@ class TestOSBaseOperatorCharmK8S(test_utils.CharmTestCase): def test_relation_handlers_ready(self) -> None: """Test relation handlers are ready.""" - self.assertTrue(self.harness.charm.relation_handlers_ready()) + self.assertSetEqual( + self.harness.charm.get_mandatory_relations_not_ready( + self.mock_event + ), + set(), + ) class _TestOSBaseOperatorAPICharm(test_utils.CharmTestCase): @@ -113,6 +125,7 @@ class _TestOSBaseOperatorAPICharm(test_utils.CharmTestCase): self.container_calls = test_utils.ContainerCalls() super().setUp(sunbeam_charm, self.PATCHES) + self.mock_event = mock.MagicMock() self.harness = test_utils.get_harness( charm_to_test, test_charms.API_CHARM_METADATA, @@ -345,11 +358,21 @@ class TestOSBaseOperatorAPICharm(_TestOSBaseOperatorAPICharm): # Add all mandatory relations and test relation_handlers_ready db_rel_id = test_utils.add_base_db_relation(self.harness) test_utils.add_db_relation_credentials(self.harness, db_rel_id) - self.assertFalse(self.harness.charm.relation_handlers_ready()) + self.assertSetEqual( + self.harness.charm.get_mandatory_relations_not_ready( + self.mock_event + ), + {"identity-service", "ingress-public", "amqp"}, + ) amqp_rel_id = test_utils.add_base_amqp_relation(self.harness) test_utils.add_amqp_relation_credentials(self.harness, amqp_rel_id) - self.assertFalse(self.harness.charm.relation_handlers_ready()) + self.assertSetEqual( + self.harness.charm.get_mandatory_relations_not_ready( + self.mock_event + ), + {"ingress-public", "identity-service"}, + ) identity_rel_id = test_utils.add_base_identity_service_relation( self.harness @@ -357,7 +380,12 @@ class TestOSBaseOperatorAPICharm(_TestOSBaseOperatorAPICharm): test_utils.add_identity_service_relation_response( self.harness, identity_rel_id ) - self.assertFalse(self.harness.charm.relation_handlers_ready()) + self.assertSetEqual( + self.harness.charm.get_mandatory_relations_not_ready( + self.mock_event + ), + {"ingress-public"}, + ) ingress_rel_id = test_utils.add_ingress_relation( self.harness, "public" @@ -372,7 +400,12 @@ class TestOSBaseOperatorAPICharm(_TestOSBaseOperatorAPICharm): test_utils.add_ceph_access_relation_response( self.harness, ceph_access_rel_id ) - self.assertTrue(self.harness.charm.relation_handlers_ready()) + self.assertSetEqual( + self.harness.charm.get_mandatory_relations_not_ready( + self.mock_event + ), + set(), + ) # Add an optional relation and test if relation_handlers_ready # returns True @@ -382,12 +415,22 @@ class TestOSBaseOperatorAPICharm(_TestOSBaseOperatorAPICharm): test_utils.add_ingress_relation_data( self.harness, optional_rel_id, "internal" ) - self.assertTrue(self.harness.charm.relation_handlers_ready()) + self.assertSetEqual( + self.harness.charm.get_mandatory_relations_not_ready( + self.mock_event + ), + set(), + ) # Remove a mandatory relation and test if relation_handlers_ready # returns False self.harness.remove_relation(ingress_rel_id) - self.assertFalse(self.harness.charm.relation_handlers_ready()) + self.assertSetEqual( + self.harness.charm.get_mandatory_relations_not_ready( + self.mock_event + ), + {"ingress-public"}, + ) # Add the mandatory relation back and retest relation_handlers_ready ingress_rel_id = test_utils.add_ingress_relation( @@ -396,7 +439,12 @@ class TestOSBaseOperatorAPICharm(_TestOSBaseOperatorAPICharm): test_utils.add_ingress_relation_data( self.harness, ingress_rel_id, "public" ) - self.assertTrue(self.harness.charm.relation_handlers_ready()) + self.assertSetEqual( + self.harness.charm.get_mandatory_relations_not_ready( + self.mock_event + ), + set(), + ) def test_add_explicit_port(self): """Test add_explicit_port method."""