Add a lock to prevent race during detach/attach of interface
Only allow one detach/attach at a time with the same pattern instance-port_id in order to avoid race condition when multiple detach/attach are run concurrently. When multiple detach run concurrently on a specific instance-port_id, manager consider many of them as valid because info_cache still contains the port and info_cache is refreshed only once the first request complete. So during this gap of time, while the first request accomplishes the task, all subsequent requests are destined to fail and log a warning [1] in different location of code, depending on the outcome of the first request. The Issue is that all those caught requests finally run a deallocate_port_for_instance which will unbind the port. This may cause a race condition, because a successful attach can pass between those unbind, and be silently unbound, resulting in an infrastructure/DB inconsistency. [1] 'Detaching interface %(mac)s failed because the device is no longer found on the guest.' Closes-Bug: #1892870 Change-Id: Iea5969d0bd16dc9a6f1ba950224b0115e466ce66
This commit is contained in:
parent
d09fade87e
commit
39831c5599
@ -7481,6 +7481,19 @@ class ComputeManager(manager.Manager):
|
||||
def attach_interface(self, context, instance, network_id, port_id,
|
||||
requested_ip, tag):
|
||||
"""Use hotplug to add an network adapter to an instance."""
|
||||
lockname = 'interface-%s-%s' % (instance.uuid, port_id)
|
||||
|
||||
@utils.synchronized(lockname)
|
||||
def do_attach_interface(context, instance, network_id, port_id,
|
||||
requested_ip, tag):
|
||||
return self._attach_interface(context, instance, network_id,
|
||||
port_id, requested_ip, tag)
|
||||
|
||||
return do_attach_interface(context, instance, network_id, port_id,
|
||||
requested_ip, tag)
|
||||
|
||||
def _attach_interface(self, context, instance, network_id, port_id,
|
||||
requested_ip, tag):
|
||||
if not self.driver.capabilities.get('supports_attach_interface',
|
||||
False):
|
||||
raise exception.AttachInterfaceNotSupported(
|
||||
@ -7540,6 +7553,18 @@ class ComputeManager(manager.Manager):
|
||||
@wrap_instance_fault
|
||||
def detach_interface(self, context, instance, port_id):
|
||||
"""Detach a network adapter from an instance."""
|
||||
lockname = 'interface-%s-%s' % (instance.uuid, port_id)
|
||||
|
||||
@utils.synchronized(lockname)
|
||||
def do_detach_interface(context, instance, port_id):
|
||||
self._detach_interface(context, instance, port_id)
|
||||
|
||||
do_detach_interface(context, instance, port_id)
|
||||
|
||||
def _detach_interface(self, context, instance, port_id):
|
||||
# NOTE(aarents): we need to refresh info cache from DB here,
|
||||
# as previous detach/attach lock holder just updated it.
|
||||
compute_utils.refresh_info_cache_for_instance(context, instance)
|
||||
network_info = instance.info_cache.network_info
|
||||
condemned = None
|
||||
for vif in network_info:
|
||||
|
@ -10285,11 +10285,15 @@ class ComputeAPITestCase(BaseTestCase):
|
||||
network_id = nwinfo[0]['network']['id']
|
||||
port_id = nwinfo[0]['id']
|
||||
req_ip = '1.2.3.4'
|
||||
lock_name = 'interface-%s-%s' % (instance.uuid, port_id)
|
||||
mock_allocate = mock.Mock(return_value=nwinfo)
|
||||
self.compute.network_api.allocate_port_for_instance = mock_allocate
|
||||
|
||||
with mock.patch.dict(self.compute.driver.capabilities,
|
||||
supports_attach_interface=True):
|
||||
with test.nested(
|
||||
mock.patch.dict(self.compute.driver.capabilities,
|
||||
supports_attach_interface=True),
|
||||
mock.patch('oslo_concurrency.lockutils.lock')
|
||||
) as (cap, mock_lock):
|
||||
vif = self.compute.attach_interface(self.context,
|
||||
instance,
|
||||
network_id,
|
||||
@ -10304,6 +10308,9 @@ class ComputeAPITestCase(BaseTestCase):
|
||||
action='interface_attach', phase='start'),
|
||||
mock.call(self.context, instance, self.compute.host,
|
||||
action='interface_attach', phase='end')])
|
||||
mock_lock.assert_called_once_with(lock_name, mock.ANY, mock.ANY,
|
||||
mock.ANY, delay=mock.ANY, do_log=mock.ANY, fair=mock.ANY,
|
||||
semaphores=mock.ANY)
|
||||
return nwinfo, port_id
|
||||
|
||||
@mock.patch.object(compute_utils, 'notify_about_instance_action')
|
||||
@ -10401,6 +10408,7 @@ class ComputeAPITestCase(BaseTestCase):
|
||||
self.context, uuids.info_cache_instance)
|
||||
instance.info_cache.network_info = network_model.NetworkInfo.hydrate(
|
||||
nwinfo)
|
||||
lock_name = 'interface-%s-%s' % (instance.uuid, port_id)
|
||||
|
||||
port_allocation = {uuids.rp1: {'NET_BW_EGR_KILOBIT_PER_SEC': 10000}}
|
||||
with test.nested(
|
||||
@ -10409,8 +10417,9 @@ class ComputeAPITestCase(BaseTestCase):
|
||||
'remove_resources_from_instance_allocation'),
|
||||
mock.patch.object(self.compute.network_api,
|
||||
'deallocate_port_for_instance',
|
||||
return_value=([], port_allocation))) as (
|
||||
mock_remove_alloc, mock_deallocate):
|
||||
return_value=([], port_allocation)),
|
||||
mock.patch('oslo_concurrency.lockutils.lock')) as (
|
||||
mock_remove_alloc, mock_deallocate, mock_lock):
|
||||
self.compute.detach_interface(self.context, instance, port_id)
|
||||
|
||||
mock_deallocate.assert_called_once_with(
|
||||
@ -10424,6 +10433,9 @@ class ComputeAPITestCase(BaseTestCase):
|
||||
action='interface_detach', phase='start'),
|
||||
mock.call(self.context, instance, self.compute.host,
|
||||
action='interface_detach', phase='end')])
|
||||
mock_lock.assert_called_once_with(lock_name, mock.ANY, mock.ANY,
|
||||
mock.ANY, delay=mock.ANY, do_log=mock.ANY, fair=mock.ANY,
|
||||
semaphores=mock.ANY)
|
||||
|
||||
@mock.patch('nova.compute.manager.LOG.log')
|
||||
def test_detach_interface_failed(self, mock_log):
|
||||
|
@ -2586,8 +2586,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
|
||||
@mock.patch.object(compute_utils, 'EventReporter')
|
||||
@mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
|
||||
@mock.patch.object(compute_utils, 'refresh_info_cache_for_instance')
|
||||
@mock.patch.object(self.compute, '_set_instance_obj_error_state')
|
||||
def do_test(meth, add_fault, event):
|
||||
def do_test(meth, refresh, add_fault, event):
|
||||
self.assertRaises(exception.PortNotFound,
|
||||
self.compute.detach_interface,
|
||||
self.context, f_instance, 'port_id')
|
||||
@ -2596,6 +2597,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
event.assert_called_once_with(
|
||||
self.context, 'compute_detach_interface', CONF.host,
|
||||
f_instance.uuid, graceful_exit=False)
|
||||
refresh.assert_called_once_with(self.context, f_instance)
|
||||
|
||||
do_test()
|
||||
|
||||
@ -2603,8 +2605,9 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
@mock.patch.object(compute_utils, 'EventReporter')
|
||||
@mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
|
||||
@mock.patch.object(compute_utils, 'notify_about_instance_action')
|
||||
def test_detach_interface_instance_not_found(self, mock_notify, mock_fault,
|
||||
mock_event, mock_log):
|
||||
@mock.patch.object(compute_utils, 'refresh_info_cache_for_instance')
|
||||
def test_detach_interface_instance_not_found(self, mock_refresh,
|
||||
mock_notify, mock_fault, mock_event, mock_log):
|
||||
nw_info = network_model.NetworkInfo([
|
||||
network_model.VIF(uuids.port_id)])
|
||||
info_cache = objects.InstanceInfoCache(network_info=nw_info,
|
||||
@ -2617,6 +2620,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
|
||||
self.assertRaises(exception.InterfaceDetachFailed,
|
||||
self.compute.detach_interface,
|
||||
self.context, instance, uuids.port_id)
|
||||
mock_refresh.assert_called_once_with(self.context, instance)
|
||||
self.assertEqual(1, mock_log.call_count)
|
||||
self.assertEqual(logging.DEBUG, mock_log.call_args[0][0])
|
||||
|
||||
|
8
releasenotes/notes/bug-1892870-eb894956bf04713d.yaml
Normal file
8
releasenotes/notes/bug-1892870-eb894956bf04713d.yaml
Normal file
@ -0,0 +1,8 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Resolve a race condition that may occur during concurrent
|
||||
``interface detach/attach``, resulting in an interface accidentally unbind
|
||||
after attached. See `bug 1892870`_ for more details.
|
||||
|
||||
.. _bug 1892870: https://bugs.launchpad.net/nova/+bug/1892870
|
Loading…
Reference in New Issue
Block a user