From 48a2221648f490540fdf1ee099d39b8e5230f053 Mon Sep 17 00:00:00 2001 From: Kevin Benton Date: Wed, 13 Aug 2014 04:31:45 -0700 Subject: [PATCH] Big Switch: Retry on 503 errors from backend Retries requests to the backend controller up to 3 additional times with 3 seconds in between each request if a 503 service unavailable message was returned. The scenarios that return 503 messages from floodlight are normally short lived locks for things like OpenStack synchronization or upgrade blocks. Retrying should work in the majority of cases. Closes-Bug: #1357105 Change-Id: Ifacd3a384cfc797ba6d6af5f3c8649c333473259 --- neutron/plugins/bigswitch/servermanager.py | 16 +++++++++++---- .../unit/bigswitch/test_servermanager.py | 20 +++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/neutron/plugins/bigswitch/servermanager.py b/neutron/plugins/bigswitch/servermanager.py index bc070c2e75a..db4c377882a 100644 --- a/neutron/plugins/bigswitch/servermanager.py +++ b/neutron/plugins/bigswitch/servermanager.py @@ -35,6 +35,7 @@ import httplib import os import socket import ssl +import time import weakref import eventlet @@ -72,6 +73,8 @@ ORCHESTRATION_SERVICE_ID = 'Neutron v2.0' HASH_MATCH_HEADER = 'X-BSN-BVS-HASH-MATCH' # error messages NXNETWORK = 'NXVNS' +HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT = 3 +HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL = 3 class RemoteRestError(exceptions.NeutronException): @@ -417,10 +420,15 @@ class ServerPool(object): good_first = sorted(self.servers, key=lambda x: x.failed) first_response = None for active_server in good_first: - ret = active_server.rest_call(action, resource, data, headers, - timeout, - reconnect=self.always_reconnect, - hash_handler=hash_handler) + for x in range(HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1): + ret = active_server.rest_call(action, resource, data, headers, + timeout, + reconnect=self.always_reconnect, + hash_handler=hash_handler) + if ret[0] != httplib.SERVICE_UNAVAILABLE: + break + time.sleep(HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL) + # If inconsistent, do a full synchronization if ret[0] == httplib.CONFLICT: if not self.get_topo_function: diff --git a/neutron/tests/unit/bigswitch/test_servermanager.py b/neutron/tests/unit/bigswitch/test_servermanager.py index ef9e4af2395..5255cd9c097 100644 --- a/neutron/tests/unit/bigswitch/test_servermanager.py +++ b/neutron/tests/unit/bigswitch/test_servermanager.py @@ -373,6 +373,26 @@ class ServerManagerTests(test_rp.BigSwitchProxyPluginV2TestCase): self.assertFalse(pl.servers.server_failure((404,), ignore_codes=[404])) + def test_retry_on_unavailable(self): + pl = manager.NeutronManager.get_plugin() + with contextlib.nested( + mock.patch(SERVERMANAGER + '.ServerProxy.rest_call', + return_value=(httplib.SERVICE_UNAVAILABLE, 0, 0, 0)), + mock.patch(SERVERMANAGER + '.time.sleep') + ) as (srestmock, tmock): + # making a call should trigger retries with sleeps in between + pl.servers.rest_call('GET', '/', '', None, []) + rest_call = [mock.call('GET', '/', '', None, False, reconnect=True, + hash_handler=mock.ANY)] + rest_call_count = ( + servermanager.HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1) + srestmock.assert_has_calls(rest_call * rest_call_count) + sleep_call = [mock.call( + servermanager.HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL)] + # should sleep 1 less time than the number of calls + sleep_call_count = rest_call_count - 1 + tmock.assert_has_calls(sleep_call * sleep_call_count) + def test_conflict_triggers_sync(self): pl = manager.NeutronManager.get_plugin() with mock.patch(