neutron-lbaas haproxy agent prevent vif unplug when failover occurs

When lbaas fails over after an agent is unresponsive, the dead
agent on coming up should not unplug the vif port, if the lbaas
is active on other agent and when failover is configured.

This patch fixes the problem.

Story: #2003672
Change-Id: I76c38b20eb72c1dba0a0a2a140bbe77053aa3ed0
This commit is contained in:
Swaminathan Vasudevan 2018-06-28 11:56:39 -07:00 committed by Michael Johnson
parent 2552fb1e5d
commit 72399374b2
4 changed files with 29 additions and 19 deletions

View File

@ -159,7 +159,7 @@ class LbaasAgentManager(periodic_task.PeriodicTasks):
ready_instances = set(self.plugin_rpc.get_ready_devices())
for deleted_id in known_instances - ready_instances:
self._destroy_loadbalancer(deleted_id)
self._destroy_loadbalancer(deleted_id, resync=True)
for loadbalancer_id in ready_instances:
self._reload_loadbalancer(loadbalancer_id)
@ -168,7 +168,7 @@ class LbaasAgentManager(periodic_task.PeriodicTasks):
LOG.exception('Unable to retrieve ready devices')
self.needs_resync = True
self.remove_orphans()
self.remove_orphans(resync=True)
def _get_driver(self, loadbalancer_id):
if loadbalancer_id not in self.instance_mapping:
@ -198,10 +198,11 @@ class LbaasAgentManager(periodic_task.PeriodicTasks):
loadbalancer_id)
self.needs_resync = True
def _destroy_loadbalancer(self, lb_id):
def _destroy_loadbalancer(self, lb_id, resync=False):
driver = self._get_driver(lb_id)
try:
driver.undeploy_instance(lb_id, delete_namespace=True)
driver.undeploy_instance(lb_id, delete_namespace=True,
resync=resync)
del self.instance_mapping[lb_id]
self.plugin_rpc.loadbalancer_destroyed(lb_id)
except Exception:
@ -209,12 +210,13 @@ class LbaasAgentManager(periodic_task.PeriodicTasks):
lb_id)
self.needs_resync = True
def remove_orphans(self):
def remove_orphans(self, resync=False):
for driver_name in self.device_drivers:
lb_ids = [lb_id for lb_id in self.instance_mapping
if self.instance_mapping[lb_id] == driver_name]
try:
self.device_drivers[driver_name].remove_orphans(lb_ids)
self.device_drivers[driver_name].remove_orphans(lb_ids,
resync=resync)
except NotImplementedError:
pass # Not all drivers will support this

View File

@ -156,8 +156,13 @@ class HaproxyNSDriver(agent_device_driver.AgentDeviceDriver):
pids_path=pid_path,
pid_file=pid_data)
pm.disable()
# unplug the ports
if loadbalancer_id in self.deployed_loadbalancers:
# Before unplugging the port check if the LBaas
# is being active and see if it is a resync
# or failover is configured
resync = kwargs.get('resync', False)
failover_state = cfg.CONF.allow_automatic_lbaas_agent_failover
if (loadbalancer_id in self.deployed_loadbalancers and
not (resync and failover_state)):
self._unplug(namespace,
self.deployed_loadbalancers[loadbalancer_id].vip_port)
@ -181,7 +186,7 @@ class HaproxyNSDriver(agent_device_driver.AgentDeviceDriver):
if loadbalancer_id in self.deployed_loadbalancers:
del self.deployed_loadbalancers[loadbalancer_id]
def remove_orphans(self, known_loadbalancer_ids):
def remove_orphans(self, known_loadbalancer_ids, resync=False):
if not os.path.exists(self.state_path):
return
@ -189,7 +194,8 @@ class HaproxyNSDriver(agent_device_driver.AgentDeviceDriver):
if lb_id not in known_loadbalancer_ids)
for lb_id in orphans:
if self.exists(lb_id):
self.undeploy_instance(lb_id, cleanup_namespace=True)
self.undeploy_instance(lb_id, cleanup_namespace=True,
resync=resync)
def get_stats(self, loadbalancer_id):
socket_path = self._get_state_file_path(loadbalancer_id,

View File

@ -98,8 +98,9 @@ class TestManager(base.BaseTestCase):
reload.assert_has_calls([mock.call(i) for i in reloaded],
any_order=True)
destroy.assert_has_calls([mock.call(i) for i in destroyed],
any_order=True)
destroy.assert_has_calls(
[mock.call(i, resync=True) for i in destroyed],
any_order=True)
self.assertFalse(self.mgr.needs_resync)
def test_sync_state_all_known(self):
@ -180,7 +181,7 @@ class TestManager(base.BaseTestCase):
self.mgr._destroy_loadbalancer(lb_id)
self.driver_mock.undeploy_instance.assert_called_once_with(
lb_id, delete_namespace=True)
lb_id, delete_namespace=True, resync=False)
self.assertNotIn(lb_id, self.mgr.instance_mapping)
self.rpc_mock.loadbalancer_destroyed.assert_called_once_with(lb_id)
self.assertFalse(self.mgr.needs_resync)
@ -193,7 +194,7 @@ class TestManager(base.BaseTestCase):
self.mgr._destroy_loadbalancer(lb_id)
self.driver_mock.undeploy_instance.assert_called_once_with(
lb_id, delete_namespace=True)
lb_id, delete_namespace=True, resync=False)
self.assertIn(lb_id, self.mgr.instance_mapping)
self.assertFalse(self.rpc_mock.loadbalancer_destroyed.called)
self.assertTrue(self.log.exception.called)
@ -204,15 +205,16 @@ class TestManager(base.BaseTestCase):
self.mgr._get_driver, 'unknown')
def test_remove_orphans(self):
self.mgr.remove_orphans()
self.driver_mock.remove_orphans.assert_called_once_with(['1', '2'])
self.mgr.remove_orphans(resync=False)
self.driver_mock.remove_orphans.assert_called_once_with(
['1', '2'], resync=False)
def test_agent_disabled(self):
payload = {'admin_state_up': False}
self.mgr.agent_updated(mock.Mock(), payload)
self.driver_mock.undeploy_instance.assert_has_calls(
[mock.call('1', delete_namespace=True),
mock.call('2', delete_namespace=True)],
[mock.call('1', delete_namespace=True, resync=False),
mock.call('2', delete_namespace=True, resync=False)],
any_order=True
)

View File

@ -142,7 +142,7 @@ class TestHaproxyNSDriver(base.BaseTestCase):
list_dir.assert_called_once_with(self.driver.state_path)
self.driver.exists.assert_called_once_with('lb2')
self.driver.undeploy_instance.assert_called_once_with(
'lb2', cleanup_namespace=True)
'lb2', cleanup_namespace=True, resync=False)
def test_get_stats(self):
# Shamelessly stolen from v1 namespace driver tests.