From 1b1a5cc4c903add7a6ce08db54c9e3a5316e2096 Mon Sep 17 00:00:00 2001 From: Isham Ibrahim Date: Thu, 11 May 2017 13:15:02 +0530 Subject: [PATCH] JIRA VIRT-2986. Added an exponential back off strategy for RabbitMQ connection failure in the callback method of the nova verifier. Change-Id: I8a1c0d14a28e3f0f8f46ba15f14b84dc35fe10ee --- etc/sample_stacktach_verifier_config.json | 1 + verifier/base_verifier.py | 16 +++++++++++++--- verifier/config.py | 3 +++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/etc/sample_stacktach_verifier_config.json b/etc/sample_stacktach_verifier_config.json index da997ec..6076252 100644 --- a/etc/sample_stacktach_verifier_config.json +++ b/etc/sample_stacktach_verifier_config.json @@ -6,6 +6,7 @@ "enable_notifications": true, "validation_level": "all", "flavor_field_name": "instance_type_id", + "exponential_backoff_limit": 15, "rabbit": { "durable_queue": false, "host": "10.0.0.1", diff --git a/verifier/base_verifier.py b/verifier/base_verifier.py index 30424f4..ea89746 100644 --- a/verifier/base_verifier.py +++ b/verifier/base_verifier.py @@ -22,6 +22,8 @@ import signal import sys import time import multiprocessing +import random +import librabbitmq from django.db import transaction from stacktach import message_service @@ -169,7 +171,7 @@ class Verifier(object): if signal_number == signal.SIGUSR1: info = """ %s verifier: - PID: %s Parent PID: + PID: %s Parent PID: %s Last watchdog check: %s # of items processed: %s """ % (self.exchange(), os.getpid(), os.getppid(), @@ -215,7 +217,8 @@ class Verifier(object): "librabbitmq", self.config.virtual_host()) as conn: def callback(result): attempt = 0 - while attempt < 2: + retry_limit = self.config.get_exponential_limit() + while attempt < retry_limit: self.stats['timestamp'] = self._utcnow() try: (verified, exist) = result @@ -225,7 +228,7 @@ class Verifier(object): routing_keys=routing_keys) break except exceptions.ObjectDoesNotExist: - if attempt < 1: + if attempt < retry_limit-1: logger.warn("ObjectDoesNotExist in callback, " "attempting to reconnect and try " "again.") @@ -234,12 +237,19 @@ class Verifier(object): else: logger.error("ObjectDoesNotExist in callback " "again, giving up.") + # Avoiding unnecessary sleep() + break + except librabbitmq.ConnectionError as e: + logger.error("ConnectionEror found while trying to connect to RabbitMQ. \ + Attempting the {}th time.".format(attempt)) except Exception, e: msg = "ERROR in Callback %s: %s" % (exchange_name, e) logger.exception(msg) break attempt += 1 + # Exponentially timed backoff + time.sleep((2 ** attempt) / 1000.0 + (random.randint(0, 1000) / 1000.0)) self.stats['timestamp'] = self._utcnow() total = self.stats.get('total_processed', 0) + 1 self.stats['total_processed'] = total diff --git a/verifier/config.py b/verifier/config.py index 695e1e3..5c9ab3f 100644 --- a/verifier/config.py +++ b/verifier/config.py @@ -110,3 +110,6 @@ def batchsize(): def flavor_field_name(): return config['flavor_field_name'] + +def get_exponential_limit(): + return config.get('exponential_backoff_limit', 10)