From 1a35d6dc81b96b34f1b5251540afcbe62f2fb0d2 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Mon, 26 Mar 2018 16:40:00 -0700 Subject: [PATCH] Fix health manager edge case with zombie amphora There is a edge case where amphora may have been deleted, marked deleted in our database, but still be running in nova. One example is an instance stuck in nova "deleting" status. These can still report health heartbeats, but we will see them as failed as they are reporting load balancing configuration that we do not have a record for. This will lead to a failover to repair the amphora, which will re-attempt the delete and reset the amphora health record. Which can lead to another failover attempt. This patch will log and leave the amphora as "busy" if a failover is attempted on an amphora we show as "DELETED". This will prevent repeated failover attempts on "DELETED" amphora that are still alive in the compute system. It also fixes a mis-named mock object in a failover test. Change-Id: I3397ffacf8e08964ecd4b47f2353542b6bc57645 --- .../controller/worker/controller_worker.py | 10 +++++++ .../worker/test_controller_worker.py | 28 ++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/octavia/controller/worker/controller_worker.py b/octavia/controller/worker/controller_worker.py index ae6de4802a..be37e67af7 100644 --- a/octavia/controller/worker/controller_worker.py +++ b/octavia/controller/worker/controller_worker.py @@ -50,6 +50,7 @@ class ControllerWorker(base_taskflow.BaseTaskFlowEngine): self._l7rule_flows = l7rule_flows.L7RuleFlows() self._amphora_repo = repo.AmphoraRepository() + self._amphora_health_repo = repo.AmphoraHealthRepository() self._health_mon_repo = repo.HealthMonitorRepository() self._lb_repo = repo.LoadBalancerRepository() self._listener_repo = repo.ListenerRepository() @@ -656,6 +657,15 @@ class ControllerWorker(base_taskflow.BaseTaskFlowEngine): constants.LOADBALANCER_ID: amp.load_balancer_id, constants.BUILD_TYPE_PRIORITY: priority, } + if amp.status == constants.DELETED: + LOG.warning('Amphora %s is marked DELETED in the database but ' + 'was submitted for failover. Marking it busy in the ' + 'amphora health table to exclude it from health ' + 'checks and skipping the failover.', amp.id) + self._amphora_health_repo.update(db_apis.get_session(), amp.id, + busy=True) + return + if (CONF.house_keeping.spare_amphora_pool_size == 0) and ( CONF.nova.enable_anti_affinity is False): LOG.warning("Failing over amphora with no spares pool may " diff --git a/octavia/tests/unit/controller/worker/test_controller_worker.py b/octavia/tests/unit/controller/worker/test_controller_worker.py index 5eabaf7827..432e16f92c 100644 --- a/octavia/tests/unit/controller/worker/test_controller_worker.py +++ b/octavia/tests/unit/controller/worker/test_controller_worker.py @@ -1108,7 +1108,7 @@ class TestControllerWorker(base.TestCase): @mock.patch('octavia.db.repositories.LoadBalancerRepository.update') def test_failover_amphora(self, mock_update, - mock_get_update_listener_flow, + mock_get_failover_flow, mock_api_get_session, mock_dyn_log_listener, mock_taskflow_load, @@ -1140,6 +1140,32 @@ class TestControllerWorker(base.TestCase): mock_update.assert_called_with('TEST', LB_ID, provisioning_status=constants.ACTIVE) + @mock.patch('octavia.db.repositories.AmphoraHealthRepository.update') + def test_failover_deleted_amphora(self, + mock_update, + mock_api_get_session, + mock_dyn_log_listener, + mock_taskflow_load, + mock_pool_repo_get, + mock_member_repo_get, + mock_l7rule_repo_get, + mock_l7policy_repo_get, + mock_listener_repo_get, + mock_lb_repo_get, + mock_health_mon_repo_get, + mock_amp_repo_get): + + mock_taskflow_load.reset_mock() + mock_amphora = mock.MagicMock() + mock_amphora.id = AMP_ID + mock_amphora.status = constants.DELETED + + cw = controller_worker.ControllerWorker() + cw._perform_amphora_failover(mock_amphora, 10) + + mock_update.assert_called_with('TEST', AMP_ID, busy=True) + mock_taskflow_load.assert_not_called() + @mock.patch('octavia.controller.worker.' 'controller_worker.ControllerWorker._perform_amphora_failover') @mock.patch('octavia.db.repositories.LoadBalancerRepository.update')