Don't interrupt device loop for missing device

If a device is removed in the middle of
treat_devices_added_updated it can cause failures which
will force all of the other unaffected devices to be
reprocessed. When they are reprocessed it will cause
the ports on the server side to go back into BUILD which
can cause failures in tests expecting ACTIVE ports.

This patch adds an exception catcher that examines if the
port is still present. If it's missing, it suppresses the
exception since the device will be treated as a removed
device on the next iteration.

Closes-Bug: #1605090
Change-Id: Ia774a7c1578f5aca71f3b706e47819b3fdc1cce2
This commit is contained in:
Kevin Benton 2016-07-20 19:07:49 -07:00
parent 99cbcba48c
commit e7e8a4e938
2 changed files with 52 additions and 3 deletions

View File

@ -15,6 +15,7 @@
# under the License.
import collections
import contextlib
import sys
import time
@ -23,6 +24,7 @@ from oslo_config import cfg
from oslo_log import log as logging
from oslo_service import loopingcall
from oslo_service import service
from oslo_utils import excutils
from osprofiler import profiler
from neutron._i18n import _LE, _LI
@ -216,7 +218,15 @@ class CommonAgentLoop(service.Service):
return True
for device_details in devices_details_list:
device = device_details['device']
self._process_device_if_exists(device_details)
# no resync is needed
return False
def _process_device_if_exists(self, device_details):
# ignore exceptions from devices that disappear because they will
# be handled as removed in the next iteration
device = device_details['device']
with self._ignore_missing_device_exceptions(device):
LOG.debug("Port %s added", device)
if 'port_id' in device_details:
@ -295,8 +305,16 @@ class CommonAgentLoop(service.Service):
self.ext_manager.handle_port(self.context, device_details)
else:
LOG.info(_LI("Device %s not defined on plugin"), device)
# no resync is needed
return False
@contextlib.contextmanager
def _ignore_missing_device_exceptions(self, device):
try:
yield
except Exception:
with excutils.save_and_reraise_exception() as ectx:
if device not in self.mgr.get_all_devices():
ectx.reraise = False
LOG.debug("%s was removed during processing.", device)
def treat_devices_removed(self, devices):
resync = False

View File

@ -17,6 +17,7 @@
import mock
from neutron_lib import constants
from oslo_config import cfg
import testtools
from neutron.agent.linux import bridge_lib
from neutron.common import constants as n_const
@ -448,6 +449,36 @@ class TestCommonAgentLoop(base.BaseTestCase):
agent.treat_devices_added_updated(set(['tap1']))
set_arp.assert_called_with(mock_details['device'], mock_details)
def test__process_device_if_exists_missing_intf(self):
mock_details = {'device': 'dev123',
'port_id': 'port123',
'network_id': 'net123',
'admin_state_up': True,
'network_type': 'vlan',
'segmentation_id': 100,
'physical_network': 'physnet1',
'device_owner': constants.DEVICE_OWNER_NETWORK_PREFIX}
self.agent.mgr = mock.Mock()
self.agent.mgr.get_all_devices.return_value = []
self.agent.mgr.plug_interface.side_effect = RuntimeError()
self.agent._process_device_if_exists(mock_details)
def test__process_device_if_exists_error(self):
mock_details = {'device': 'dev123',
'port_id': 'port123',
'network_id': 'net123',
'admin_state_up': True,
'network_type': 'vlan',
'segmentation_id': 100,
'physical_network': 'physnet1',
'device_owner': constants.DEVICE_OWNER_NETWORK_PREFIX}
self.agent.mgr = mock.Mock()
self.agent.mgr.get_all_devices.return_value = ['dev123']
self.agent.mgr.plug_interface.side_effect = RuntimeError()
with testtools.ExpectedException(RuntimeError):
# device exists so it should raise
self.agent._process_device_if_exists(mock_details)
def test_set_rpc_timeout(self):
self.agent.stop()
for rpc_client in (self.agent.plugin_rpc.client,