OVS agent set max number of attempts to sync failed devices
If a device fails more than MAX_DEVICE_RETRIES, the agent won't try to sync it anymore and will log an error message. Partially-Implements: blueprint restructure-l2-agent Change-Id: Ie37e9197573870fb2e73370b30e41fc2be7bcf78
This commit is contained in:
parent
584faf47e2
commit
a27f017292
|
@ -1762,6 +1762,64 @@ class OVSNeutronAgent(sg_rpc.SecurityGroupAgentRpcCallbackMixin,
|
|||
return (port_info, ancillary_port_info, consecutive_resyncs,
|
||||
ports_not_ready_yet)
|
||||
|
||||
def _remove_devices_not_to_retry(self, failed_devices,
|
||||
failed_ancillary_devices,
|
||||
devices_not_to_retry,
|
||||
ancillary_devices_not_to_retry):
|
||||
"""This method removes the devices that exceeded the number of retries
|
||||
from failed_devices and failed_ancillary_devices
|
||||
|
||||
"""
|
||||
for event in ['added', 'removed']:
|
||||
failed_devices[event] = (
|
||||
failed_devices[event] - devices_not_to_retry[event])
|
||||
failed_ancillary_devices[event] = (
|
||||
failed_ancillary_devices[event] -
|
||||
ancillary_devices_not_to_retry[event])
|
||||
|
||||
def _get_devices_not_to_retry(self, failed_devices,
|
||||
failed_ancillary_devices,
|
||||
failed_devices_retries_map):
|
||||
"""Return the devices not to retry and update the retries map"""
|
||||
new_failed_devices_retries_map = {}
|
||||
devices_not_to_retry = {}
|
||||
ancillary_devices_not_to_retry = {}
|
||||
|
||||
def _increase_retries(devices_set):
|
||||
devices_not_to_retry = set()
|
||||
for dev in devices_set:
|
||||
retries = failed_devices_retries_map.get(dev, 0)
|
||||
if retries >= constants.MAX_DEVICE_RETRIES:
|
||||
devices_not_to_retry.add(dev)
|
||||
LOG.warning(_LW(
|
||||
"Device %(dev)s failed for %(times)s times and won't "
|
||||
"be retried anymore"), {
|
||||
'dev': dev, 'times': constants.MAX_DEVICE_RETRIES})
|
||||
else:
|
||||
new_failed_devices_retries_map[dev] = retries + 1
|
||||
return devices_not_to_retry
|
||||
|
||||
for event in ['added', 'removed']:
|
||||
devices_not_to_retry[event] = _increase_retries(
|
||||
failed_devices[event])
|
||||
ancillary_devices_not_to_retry[event] = _increase_retries(
|
||||
failed_ancillary_devices[event])
|
||||
|
||||
return (new_failed_devices_retries_map, devices_not_to_retry,
|
||||
ancillary_devices_not_to_retry)
|
||||
|
||||
def update_retries_map_and_remove_devs_not_to_retry(
|
||||
self, failed_devices, failed_ancillary_devices,
|
||||
failed_devices_retries_map):
|
||||
(new_failed_devices_retries_map, devices_not_to_retry,
|
||||
ancillary_devices_not_to_retry) = self._get_devices_not_to_retry(
|
||||
failed_devices, failed_ancillary_devices,
|
||||
failed_devices_retries_map)
|
||||
self._remove_devices_not_to_retry(
|
||||
failed_devices, failed_ancillary_devices, devices_not_to_retry,
|
||||
ancillary_devices_not_to_retry)
|
||||
return new_failed_devices_retries_map
|
||||
|
||||
def rpc_loop(self, polling_manager=None):
|
||||
if not polling_manager:
|
||||
polling_manager = polling.get_polling_manager(
|
||||
|
@ -1778,7 +1836,7 @@ class OVSNeutronAgent(sg_rpc.SecurityGroupAgentRpcCallbackMixin,
|
|||
ports_not_ready_yet = set()
|
||||
failed_devices = {'added': set(), 'removed': set()}
|
||||
failed_ancillary_devices = {'added': set(), 'removed': set()}
|
||||
|
||||
failed_devices_retries_map = {}
|
||||
while self._check_and_handle_signal():
|
||||
if self.fullsync:
|
||||
LOG.info(_LI("rpc_loop doing a full sync."))
|
||||
|
@ -1893,6 +1951,10 @@ class OVSNeutronAgent(sg_rpc.SecurityGroupAgentRpcCallbackMixin,
|
|||
ancillary_ports = ancillary_port_info['current']
|
||||
|
||||
polling_manager.polling_completed()
|
||||
failed_devices_retries_map = (
|
||||
self.update_retries_map_and_remove_devs_not_to_retry(
|
||||
failed_devices, failed_ancillary_devices,
|
||||
failed_devices_retries_map))
|
||||
# Keep this flag in the last line of "try" block,
|
||||
# so we can sure that no other Exception occurred.
|
||||
ovs_restarted = False
|
||||
|
|
|
@ -551,6 +551,32 @@ class TestOvsNeutronAgent(object):
|
|||
vif_port_set, registered_ports, port_tags_dict=port_tags_dict)
|
||||
self.assertEqual(expected, actual)
|
||||
|
||||
def test_update_retries_map_and_remove_devs_not_to_retry(self):
|
||||
failed_devices_retries_map = {
|
||||
'device_not_to_retry': constants.MAX_DEVICE_RETRIES,
|
||||
'device_to_retry': 2,
|
||||
'ancillary_not_to_retry': constants.MAX_DEVICE_RETRIES,
|
||||
'ancillary_to_retry': 1}
|
||||
failed_devices = {
|
||||
'added': set(['device_not_to_retry']),
|
||||
'removed': set(['device_to_retry', 'new_device'])}
|
||||
failed_ancillary_devices = {'added': set(['ancillary_to_retry']),
|
||||
'removed': set(['ancillary_not_to_retry'])}
|
||||
expected_failed_devices_retries_map = {
|
||||
'device_to_retry': 3, 'new_device': 1, 'ancillary_to_retry': 2}
|
||||
(new_failed_devices_retries_map, devices_not_to_retry,
|
||||
ancillary_devices_not_t_retry) = self.agent._get_devices_not_to_retry(
|
||||
failed_devices, failed_ancillary_devices,
|
||||
failed_devices_retries_map)
|
||||
self.agent._remove_devices_not_to_retry(
|
||||
failed_devices, failed_ancillary_devices, devices_not_to_retry,
|
||||
ancillary_devices_not_t_retry)
|
||||
self.assertIn('device_to_retry', failed_devices['removed'])
|
||||
self.assertNotIn('device_not_to_retry', failed_devices['added'])
|
||||
self.assertEqual(
|
||||
expected_failed_devices_retries_map,
|
||||
new_failed_devices_retries_map)
|
||||
|
||||
def test_bind_devices(self):
|
||||
devices_up = ['tap1']
|
||||
devices_down = ['tap2']
|
||||
|
|
Loading…
Reference in New Issue