JIRA VIRT-2986. Added an exponential back off strategy for RabbitMQ connection failure in the callback method of the nova verifier.

Change-Id: I8a1c0d14a28e3f0f8f46ba15f14b84dc35fe10ee
This commit is contained in:
Isham Ibrahim 2017-05-11 13:15:02 +05:30
parent 0c4b90f1c3
commit 1b1a5cc4c9
3 changed files with 17 additions and 3 deletions

View File

@ -6,6 +6,7 @@
"enable_notifications": true,
"validation_level": "all",
"flavor_field_name": "instance_type_id",
"exponential_backoff_limit": 15,
"rabbit": {
"durable_queue": false,
"host": "10.0.0.1",

View File

@ -22,6 +22,8 @@ import signal
import sys
import time
import multiprocessing
import random
import librabbitmq
from django.db import transaction
from stacktach import message_service
@ -169,7 +171,7 @@ class Verifier(object):
if signal_number == signal.SIGUSR1:
info = """
%s verifier:
PID: %s Parent PID:
PID: %s Parent PID: %s
Last watchdog check: %s
# of items processed: %s
""" % (self.exchange(), os.getpid(), os.getppid(),
@ -215,7 +217,8 @@ class Verifier(object):
"librabbitmq", self.config.virtual_host()) as conn:
def callback(result):
attempt = 0
while attempt < 2:
retry_limit = self.config.get_exponential_limit()
while attempt < retry_limit:
self.stats['timestamp'] = self._utcnow()
try:
(verified, exist) = result
@ -225,7 +228,7 @@ class Verifier(object):
routing_keys=routing_keys)
break
except exceptions.ObjectDoesNotExist:
if attempt < 1:
if attempt < retry_limit-1:
logger.warn("ObjectDoesNotExist in callback, "
"attempting to reconnect and try "
"again.")
@ -234,12 +237,19 @@ class Verifier(object):
else:
logger.error("ObjectDoesNotExist in callback "
"again, giving up.")
# Avoiding unnecessary sleep()
break
except librabbitmq.ConnectionError as e:
logger.error("ConnectionEror found while trying to connect to RabbitMQ. \
Attempting the {}th time.".format(attempt))
except Exception, e:
msg = "ERROR in Callback %s: %s" % (exchange_name,
e)
logger.exception(msg)
break
attempt += 1
# Exponentially timed backoff
time.sleep((2 ** attempt) / 1000.0 + (random.randint(0, 1000) / 1000.0))
self.stats['timestamp'] = self._utcnow()
total = self.stats.get('total_processed', 0) + 1
self.stats['total_processed'] = total

View File

@ -110,3 +110,6 @@ def batchsize():
def flavor_field_name():
return config['flavor_field_name']
def get_exponential_limit():
return config.get('exponential_backoff_limit', 10)