Big Switch: Retry on 503 errors from backend

Retries requests to the backend controller up to 3
additional times with 3 seconds in between each request
if a 503 service unavailable message was returned.
The scenarios that return 503 messages from floodlight
are normally short lived locks for things like OpenStack
synchronization or upgrade blocks. Retrying should work
in the majority of cases.

Closes-Bug: #1357105
Change-Id: Ifacd3a384cfc797ba6d6af5f3c8649c333473259
This commit is contained in:
Kevin Benton 2014-08-13 04:31:45 -07:00
parent 9ddc2267a8
commit 48a2221648
2 changed files with 32 additions and 4 deletions

View File

@ -35,6 +35,7 @@ import httplib
import os import os
import socket import socket
import ssl import ssl
import time
import weakref import weakref
import eventlet import eventlet
@ -72,6 +73,8 @@ ORCHESTRATION_SERVICE_ID = 'Neutron v2.0'
HASH_MATCH_HEADER = 'X-BSN-BVS-HASH-MATCH' HASH_MATCH_HEADER = 'X-BSN-BVS-HASH-MATCH'
# error messages # error messages
NXNETWORK = 'NXVNS' NXNETWORK = 'NXVNS'
HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT = 3
HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL = 3
class RemoteRestError(exceptions.NeutronException): class RemoteRestError(exceptions.NeutronException):
@ -417,10 +420,15 @@ class ServerPool(object):
good_first = sorted(self.servers, key=lambda x: x.failed) good_first = sorted(self.servers, key=lambda x: x.failed)
first_response = None first_response = None
for active_server in good_first: for active_server in good_first:
ret = active_server.rest_call(action, resource, data, headers, for x in range(HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1):
timeout, ret = active_server.rest_call(action, resource, data, headers,
reconnect=self.always_reconnect, timeout,
hash_handler=hash_handler) reconnect=self.always_reconnect,
hash_handler=hash_handler)
if ret[0] != httplib.SERVICE_UNAVAILABLE:
break
time.sleep(HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL)
# If inconsistent, do a full synchronization # If inconsistent, do a full synchronization
if ret[0] == httplib.CONFLICT: if ret[0] == httplib.CONFLICT:
if not self.get_topo_function: if not self.get_topo_function:

View File

@ -373,6 +373,26 @@ class ServerManagerTests(test_rp.BigSwitchProxyPluginV2TestCase):
self.assertFalse(pl.servers.server_failure((404,), self.assertFalse(pl.servers.server_failure((404,),
ignore_codes=[404])) ignore_codes=[404]))
def test_retry_on_unavailable(self):
pl = manager.NeutronManager.get_plugin()
with contextlib.nested(
mock.patch(SERVERMANAGER + '.ServerProxy.rest_call',
return_value=(httplib.SERVICE_UNAVAILABLE, 0, 0, 0)),
mock.patch(SERVERMANAGER + '.time.sleep')
) as (srestmock, tmock):
# making a call should trigger retries with sleeps in between
pl.servers.rest_call('GET', '/', '', None, [])
rest_call = [mock.call('GET', '/', '', None, False, reconnect=True,
hash_handler=mock.ANY)]
rest_call_count = (
servermanager.HTTP_SERVICE_UNAVAILABLE_RETRY_COUNT + 1)
srestmock.assert_has_calls(rest_call * rest_call_count)
sleep_call = [mock.call(
servermanager.HTTP_SERVICE_UNAVAILABLE_RETRY_INTERVAL)]
# should sleep 1 less time than the number of calls
sleep_call_count = rest_call_count - 1
tmock.assert_has_calls(sleep_call * sleep_call_count)
def test_conflict_triggers_sync(self): def test_conflict_triggers_sync(self):
pl = manager.NeutronManager.get_plugin() pl = manager.NeutronManager.get_plugin()
with mock.patch( with mock.patch(