neutron-lbaas haproxy agent prevent vif unplug when failover occurs

When lbaas fails over after an agent is unresponsive, the dead
agent on coming up should not unplug the vif port, if the lbaas
is active on other agent and when failover is configured.

This patch fixes the problem.

Story: #2003672
Change-Id: I76c38b20eb72c1dba0a0a2a140bbe77053aa3ed0
This commit is contained in:
Swaminathan Vasudevan 2018-06-28 11:56:39 -07:00 committed by Michael Johnson
parent 2552fb1e5d
commit 72399374b2
4 changed files with 29 additions and 19 deletions

View File

@ -159,7 +159,7 @@ class LbaasAgentManager(periodic_task.PeriodicTasks):
ready_instances = set(self.plugin_rpc.get_ready_devices()) ready_instances = set(self.plugin_rpc.get_ready_devices())
for deleted_id in known_instances - ready_instances: for deleted_id in known_instances - ready_instances:
self._destroy_loadbalancer(deleted_id) self._destroy_loadbalancer(deleted_id, resync=True)
for loadbalancer_id in ready_instances: for loadbalancer_id in ready_instances:
self._reload_loadbalancer(loadbalancer_id) self._reload_loadbalancer(loadbalancer_id)
@ -168,7 +168,7 @@ class LbaasAgentManager(periodic_task.PeriodicTasks):
LOG.exception('Unable to retrieve ready devices') LOG.exception('Unable to retrieve ready devices')
self.needs_resync = True self.needs_resync = True
self.remove_orphans() self.remove_orphans(resync=True)
def _get_driver(self, loadbalancer_id): def _get_driver(self, loadbalancer_id):
if loadbalancer_id not in self.instance_mapping: if loadbalancer_id not in self.instance_mapping:
@ -198,10 +198,11 @@ class LbaasAgentManager(periodic_task.PeriodicTasks):
loadbalancer_id) loadbalancer_id)
self.needs_resync = True self.needs_resync = True
def _destroy_loadbalancer(self, lb_id): def _destroy_loadbalancer(self, lb_id, resync=False):
driver = self._get_driver(lb_id) driver = self._get_driver(lb_id)
try: try:
driver.undeploy_instance(lb_id, delete_namespace=True) driver.undeploy_instance(lb_id, delete_namespace=True,
resync=resync)
del self.instance_mapping[lb_id] del self.instance_mapping[lb_id]
self.plugin_rpc.loadbalancer_destroyed(lb_id) self.plugin_rpc.loadbalancer_destroyed(lb_id)
except Exception: except Exception:
@ -209,12 +210,13 @@ class LbaasAgentManager(periodic_task.PeriodicTasks):
lb_id) lb_id)
self.needs_resync = True self.needs_resync = True
def remove_orphans(self): def remove_orphans(self, resync=False):
for driver_name in self.device_drivers: for driver_name in self.device_drivers:
lb_ids = [lb_id for lb_id in self.instance_mapping lb_ids = [lb_id for lb_id in self.instance_mapping
if self.instance_mapping[lb_id] == driver_name] if self.instance_mapping[lb_id] == driver_name]
try: try:
self.device_drivers[driver_name].remove_orphans(lb_ids) self.device_drivers[driver_name].remove_orphans(lb_ids,
resync=resync)
except NotImplementedError: except NotImplementedError:
pass # Not all drivers will support this pass # Not all drivers will support this

View File

@ -156,8 +156,13 @@ class HaproxyNSDriver(agent_device_driver.AgentDeviceDriver):
pids_path=pid_path, pids_path=pid_path,
pid_file=pid_data) pid_file=pid_data)
pm.disable() pm.disable()
# unplug the ports # Before unplugging the port check if the LBaas
if loadbalancer_id in self.deployed_loadbalancers: # is being active and see if it is a resync
# or failover is configured
resync = kwargs.get('resync', False)
failover_state = cfg.CONF.allow_automatic_lbaas_agent_failover
if (loadbalancer_id in self.deployed_loadbalancers and
not (resync and failover_state)):
self._unplug(namespace, self._unplug(namespace,
self.deployed_loadbalancers[loadbalancer_id].vip_port) self.deployed_loadbalancers[loadbalancer_id].vip_port)
@ -181,7 +186,7 @@ class HaproxyNSDriver(agent_device_driver.AgentDeviceDriver):
if loadbalancer_id in self.deployed_loadbalancers: if loadbalancer_id in self.deployed_loadbalancers:
del self.deployed_loadbalancers[loadbalancer_id] del self.deployed_loadbalancers[loadbalancer_id]
def remove_orphans(self, known_loadbalancer_ids): def remove_orphans(self, known_loadbalancer_ids, resync=False):
if not os.path.exists(self.state_path): if not os.path.exists(self.state_path):
return return
@ -189,7 +194,8 @@ class HaproxyNSDriver(agent_device_driver.AgentDeviceDriver):
if lb_id not in known_loadbalancer_ids) if lb_id not in known_loadbalancer_ids)
for lb_id in orphans: for lb_id in orphans:
if self.exists(lb_id): if self.exists(lb_id):
self.undeploy_instance(lb_id, cleanup_namespace=True) self.undeploy_instance(lb_id, cleanup_namespace=True,
resync=resync)
def get_stats(self, loadbalancer_id): def get_stats(self, loadbalancer_id):
socket_path = self._get_state_file_path(loadbalancer_id, socket_path = self._get_state_file_path(loadbalancer_id,

View File

@ -98,8 +98,9 @@ class TestManager(base.BaseTestCase):
reload.assert_has_calls([mock.call(i) for i in reloaded], reload.assert_has_calls([mock.call(i) for i in reloaded],
any_order=True) any_order=True)
destroy.assert_has_calls([mock.call(i) for i in destroyed], destroy.assert_has_calls(
any_order=True) [mock.call(i, resync=True) for i in destroyed],
any_order=True)
self.assertFalse(self.mgr.needs_resync) self.assertFalse(self.mgr.needs_resync)
def test_sync_state_all_known(self): def test_sync_state_all_known(self):
@ -180,7 +181,7 @@ class TestManager(base.BaseTestCase):
self.mgr._destroy_loadbalancer(lb_id) self.mgr._destroy_loadbalancer(lb_id)
self.driver_mock.undeploy_instance.assert_called_once_with( self.driver_mock.undeploy_instance.assert_called_once_with(
lb_id, delete_namespace=True) lb_id, delete_namespace=True, resync=False)
self.assertNotIn(lb_id, self.mgr.instance_mapping) self.assertNotIn(lb_id, self.mgr.instance_mapping)
self.rpc_mock.loadbalancer_destroyed.assert_called_once_with(lb_id) self.rpc_mock.loadbalancer_destroyed.assert_called_once_with(lb_id)
self.assertFalse(self.mgr.needs_resync) self.assertFalse(self.mgr.needs_resync)
@ -193,7 +194,7 @@ class TestManager(base.BaseTestCase):
self.mgr._destroy_loadbalancer(lb_id) self.mgr._destroy_loadbalancer(lb_id)
self.driver_mock.undeploy_instance.assert_called_once_with( self.driver_mock.undeploy_instance.assert_called_once_with(
lb_id, delete_namespace=True) lb_id, delete_namespace=True, resync=False)
self.assertIn(lb_id, self.mgr.instance_mapping) self.assertIn(lb_id, self.mgr.instance_mapping)
self.assertFalse(self.rpc_mock.loadbalancer_destroyed.called) self.assertFalse(self.rpc_mock.loadbalancer_destroyed.called)
self.assertTrue(self.log.exception.called) self.assertTrue(self.log.exception.called)
@ -204,15 +205,16 @@ class TestManager(base.BaseTestCase):
self.mgr._get_driver, 'unknown') self.mgr._get_driver, 'unknown')
def test_remove_orphans(self): def test_remove_orphans(self):
self.mgr.remove_orphans() self.mgr.remove_orphans(resync=False)
self.driver_mock.remove_orphans.assert_called_once_with(['1', '2']) self.driver_mock.remove_orphans.assert_called_once_with(
['1', '2'], resync=False)
def test_agent_disabled(self): def test_agent_disabled(self):
payload = {'admin_state_up': False} payload = {'admin_state_up': False}
self.mgr.agent_updated(mock.Mock(), payload) self.mgr.agent_updated(mock.Mock(), payload)
self.driver_mock.undeploy_instance.assert_has_calls( self.driver_mock.undeploy_instance.assert_has_calls(
[mock.call('1', delete_namespace=True), [mock.call('1', delete_namespace=True, resync=False),
mock.call('2', delete_namespace=True)], mock.call('2', delete_namespace=True, resync=False)],
any_order=True any_order=True
) )

View File

@ -142,7 +142,7 @@ class TestHaproxyNSDriver(base.BaseTestCase):
list_dir.assert_called_once_with(self.driver.state_path) list_dir.assert_called_once_with(self.driver.state_path)
self.driver.exists.assert_called_once_with('lb2') self.driver.exists.assert_called_once_with('lb2')
self.driver.undeploy_instance.assert_called_once_with( self.driver.undeploy_instance.assert_called_once_with(
'lb2', cleanup_namespace=True) 'lb2', cleanup_namespace=True, resync=False)
def test_get_stats(self): def test_get_stats(self):
# Shamelessly stolen from v1 namespace driver tests. # Shamelessly stolen from v1 namespace driver tests.