Check tap bridge timestamps to detect local changes

During a quick instance rebuild on Nova, it may remove a tap
interface and then plug it in within the agent polling window.
In this scenario the agent will not realize a device has changed
and will therefore not ask the server for info an subsequently
update its status. This will prevent the notification from being
sent back to Nova that the vif plugging has finished so the
VM will never resume state.

This adds a new timestamp collection method to the common agent
manager interface for devices that is used by the common agent
loop to determine if a device has locally changed.

The linux bridge implementation of it checks the timestamps on the
tap interface's 'bridge' directory, which will change whenever
the tap is added to bridge.

Closes-Bug: #1531862
Change-Id: If172470e907848556b6a8aff13520f94245919bb
This commit is contained in:
Kevin Benton 2016-03-09 23:47:35 -08:00
parent 48649d83aa
commit 7afbd3a6b8
7 changed files with 125 additions and 19 deletions

View File

@ -40,6 +40,13 @@ def is_bridged_interface(interface):
return os.path.exists(BRIDGE_PORT_FS_FOR_DEVICE % interface)
def get_interface_bridged_time(interface):
try:
return os.stat(BRIDGE_PORT_FS_FOR_DEVICE % interface).st_mtime
except OSError:
pass
def get_bridge_names():
return os.listdir(BRIDGE_FS)

View File

@ -137,6 +137,16 @@ class CommonAgentManagerBase(object):
:return: set -- the set of all devices e.g. ['tap1', 'tap2']
"""
@abc.abstractmethod
def get_devices_modified_timestamps(self, devices):
"""Get a dictionary of modified timestamps by device
The devices passed in are expected to be the same format that
get_all_devices returns.
:return: dict -- A dictionary of timestamps keyed by device
"""
@abc.abstractmethod
def get_extension_driver_type(self):
"""Get the agent extension driver type.

View File

@ -320,6 +320,17 @@ class CommonAgentLoop(service.Service):
self.mgr.delete_arp_spoofing_protection(devices)
return resync
@staticmethod
def _get_devices_locally_modified(timestamps, previous_timestamps):
"""Returns devices with previous timestamps that do not match new.
If a device did not have a timestamp previously, it will not be
returned because this means it is new.
"""
return {device for device, timestamp in timestamps.items()
if previous_timestamps.get(device) and
timestamp != previous_timestamps.get(device)}
def scan_devices(self, previous, sync):
device_info = {}
@ -333,12 +344,26 @@ class CommonAgentLoop(service.Service):
previous = {'added': set(),
'current': set(),
'updated': set(),
'removed': set()}
'removed': set(),
'timestamps': {}}
# clear any orphaned ARP spoofing rules (e.g. interface was
# manually deleted)
if self.prevent_arp_spoofing:
self.mgr.delete_unreferenced_arp_protection(current_devices)
# check to see if any devices were locally modified based on their
# timestamps changing since the previous iteration. If a timestamp
# doesn't exist for a device, this calculation is skipped for that
# device.
device_info['timestamps'] = self.mgr.get_devices_modified_timestamps(
current_devices)
locally_updated = self._get_devices_locally_modified(
device_info['timestamps'], previous['timestamps'])
if locally_updated:
LOG.debug("Adding locally changed devices to updated set: %s",
locally_updated)
updated_devices |= locally_updated
if sync:
# This is the first iteration, or the previous one had a problem.
# Re-add all existing devices.

View File

@ -551,6 +551,9 @@ class LinuxBridgeManager(amb.CommonAgentManagerBase):
device.link.delete()
LOG.debug("Done deleting interface %s", interface)
def get_devices_modified_timestamps(self, devices):
return {d: bridge_lib.get_interface_bridged_time(d) for d in devices}
def get_all_devices(self):
devices = set()
for device in bridge_lib.get_bridge_names():

View File

@ -121,6 +121,11 @@ class MacvtapManager(amb.CommonAgentManagerBase):
"Agent terminated!"))
sys.exit(1)
def get_devices_modified_timestamps(self, devices):
# TODO(kevinbenton): this should be implemented to detect
# rapid Nova instance rebuilds.
return {}
def get_all_devices(self):
devices = set()
all_device_names = os.listdir(MACVTAP_FS)

View File

@ -42,6 +42,16 @@ class BridgeLibTestCase(base.BaseSudoTestCase):
def test_get_bridge_names(self):
self.assertIn(self.bridge.name, bridge_lib.get_bridge_names())
def test_get_interface_bridged_time(self):
port = self.port_fixture.br_port
t1 = bridge_lib.get_interface_bridged_time(port)
self.bridge.delif(port)
self.bridge.addif(port)
t2 = bridge_lib.get_interface_bridged_time(port)
self.assertIsNotNone(t1)
self.assertIsNotNone(t2)
self.assertGreater(t2, t1)
def test_get_interface_bridge(self):
bridge = bridge_lib.BridgeDevice.get_interface_bridge(
self.port_fixture.br_port.name)

View File

@ -149,10 +149,22 @@ class TestCommonAgentLoop(base.BaseTestCase):
agent.treat_devices_removed(devices)
de_arp.assert_called_with(devices)
def test__get_devices_locally_modified(self):
new_ts = {1: 1000, 2: 2000, 3: 3000}
old_ts = {1: 10, 2: 2000, 4: 900}
# 3 and 4 are not returned because 3 is a new device and 4 is a
# removed device
self.assertEqual(
set([1]),
self.agent._get_devices_locally_modified(new_ts, old_ts))
def _test_scan_devices(self, previous, updated,
fake_current, expected, sync):
fake_current, expected, sync,
fake_ts_current=None):
self.agent.mgr = mock.Mock()
self.agent.mgr.get_all_devices.return_value = fake_current
self.agent.mgr.get_devices_modified_timestamps.return_value = (
fake_ts_current or {})
self.agent.rpc_callbacks.get_and_clear_updated_devices.return_value =\
updated
@ -163,28 +175,49 @@ class TestCommonAgentLoop(base.BaseTestCase):
previous = {'current': set([1, 2]),
'updated': set(),
'added': set(),
'removed': set()}
'removed': set(),
'timestamps': {}}
fake_current = set([1, 2])
updated = set()
expected = {'current': set([1, 2]),
'updated': set(),
'added': set(),
'removed': set()}
'removed': set(),
'timestamps': {}}
self._test_scan_devices(previous, updated, fake_current, expected,
sync=False)
def test_scan_devices_timestamp_triggers_updated(self):
previous = {'current': set([1, 2]),
'updated': set(),
'added': set(),
'removed': set(),
'timestamps': {2: 600}}
fake_current = set([1, 2])
updated = set()
expected = {'current': set([1, 2]),
'updated': set([2]),
'added': set(),
'removed': set(),
'timestamps': {2: 1000}}
self._test_scan_devices(previous, updated, fake_current, expected,
sync=False, fake_ts_current={2: 1000})
def test_scan_devices_added_removed(self):
previous = {'current': set([1, 2]),
'updated': set(),
'added': set(),
'removed': set()}
'removed': set(),
'timestamps': {}}
fake_current = set([2, 3])
updated = set()
expected = {'current': set([2, 3]),
'updated': set(),
'added': set([3]),
'removed': set([1])}
'removed': set([1]),
'timestamps': {}}
self._test_scan_devices(previous, updated, fake_current, expected,
sync=False)
@ -193,13 +226,15 @@ class TestCommonAgentLoop(base.BaseTestCase):
previous = {'current': set([2, 3]),
'updated': set(),
'added': set(),
'removed': set([1])}
'removed': set([1]),
'timestamps': {}}
fake_current = set([2, 3])
updated = set()
expected = {'current': set([2, 3]),
'updated': set(),
'added': set([2, 3]),
'removed': set([1])}
'removed': set([1]),
'timestamps': {}}
self._test_scan_devices(previous, updated, fake_current, expected,
sync=True)
@ -208,7 +243,8 @@ class TestCommonAgentLoop(base.BaseTestCase):
previous = {'current': set([2, 3]),
'updated': set(),
'added': set(),
'removed': set([1])}
'removed': set([1]),
'timestamps': {}}
# Device 2 disappeared.
fake_current = set([3])
updated = set()
@ -216,7 +252,8 @@ class TestCommonAgentLoop(base.BaseTestCase):
expected = {'current': set([3]),
'updated': set(),
'added': set([3]),
'removed': set([1, 2])}
'removed': set([1, 2]),
'timestamps': {}}
self._test_scan_devices(previous, updated, fake_current, expected,
sync=True)
@ -225,13 +262,15 @@ class TestCommonAgentLoop(base.BaseTestCase):
previous = {'current': set([1, 2]),
'updated': set(),
'added': set(),
'removed': set()}
'removed': set(),
'timestamps': {}}
fake_current = set([1, 2])
updated = set([1])
expected = {'current': set([1, 2]),
'updated': set([1]),
'added': set(),
'removed': set()}
'removed': set(),
'timestamps': {}}
self._test_scan_devices(previous, updated, fake_current, expected,
sync=False)
@ -240,13 +279,15 @@ class TestCommonAgentLoop(base.BaseTestCase):
previous = {'current': set([1, 2]),
'updated': set(),
'added': set(),
'removed': set()}
'removed': set(),
'timestamps': {}}
fake_current = set([1, 2])
updated = set([3])
expected = {'current': set([1, 2]),
'updated': set(),
'added': set(),
'removed': set()}
'removed': set(),
'timestamps': {}}
self._test_scan_devices(previous, updated, fake_current, expected,
sync=False)
@ -256,7 +297,8 @@ class TestCommonAgentLoop(base.BaseTestCase):
'current': set([1, 2]),
'updated': set(),
'added': set(),
'removed': set()
'removed': set(),
'timestamps': {}
}
# Device 2 disappeared.
fake_current = set([1])
@ -266,7 +308,8 @@ class TestCommonAgentLoop(base.BaseTestCase):
'current': set([1]),
'updated': set(),
'added': set(),
'removed': set([2])
'removed': set([2]),
'timestamps': {}
}
self._test_scan_devices(
previous, updated, fake_current, expected, sync=False
@ -276,13 +319,15 @@ class TestCommonAgentLoop(base.BaseTestCase):
previous = {'current': set([1, 2]),
'updated': set([1]),
'added': set(),
'removed': set()}
'removed': set(),
'timestamps': {}}
fake_current = set([1, 2])
updated = set([2])
expected = {'current': set([1, 2]),
'updated': set([1, 2]),
'added': set([1, 2]),
'removed': set()}
'removed': set(),
'timestamps': {}}
self._test_scan_devices(previous, updated, fake_current, expected,
sync=True)
@ -295,7 +340,8 @@ class TestCommonAgentLoop(base.BaseTestCase):
expected = {'current': set([1, 2]),
'updated': set(),
'added': set([1, 2]),
'removed': set()}
'removed': set(),
'timestamps': {}}
self._test_scan_devices(previous, updated, fake_current, expected,
sync=False)
self.agent.mgr.delete_unreferenced_arp_protection.assert_called_with(