diff --git a/nova/flags.py b/nova/flags.py index 07b469bc..121b9ca2 100644 --- a/nova/flags.py +++ b/nova/flags.py @@ -196,6 +196,8 @@ DEFINE_integer('rabbit_port', 5672, 'rabbit port') DEFINE_string('rabbit_userid', 'guest', 'rabbit userid') DEFINE_string('rabbit_password', 'guest', 'rabbit password') DEFINE_string('rabbit_virtual_host', '/', 'rabbit virtual host') +DEFINE_integer('rabbit_retry_interval', 10, 'rabbit connection retry interval') +DEFINE_integer('rabbit_max_retries', 12, 'rabbit connection attempts') DEFINE_string('control_exchange', 'nova', 'the main exchange to connect to') DEFINE_string('cc_host', '127.0.0.1', 'ip of api server') DEFINE_integer('cc_port', 8773, 'cloud controller port') diff --git a/nova/rpc.py b/nova/rpc.py index 9938b083..86a29574 100644 --- a/nova/rpc.py +++ b/nova/rpc.py @@ -38,8 +38,8 @@ from nova import fakerabbit from nova import flags from nova import context -FLAGS = flags.FLAGS +FLAGS = flags.FLAGS LOG = logging.getLogger('amqplib') LOG.setLevel(logging.DEBUG) @@ -83,19 +83,24 @@ class Consumer(messaging.Consumer): Contains methods for connecting the fetch method to async loops """ def __init__(self, *args, **kwargs): - self.failed_connection = False - - while True: + for i in xrange(FLAGS.rabbit_max_retries): + if i > 0: + time.sleep(FLAGS.rabbit_retry_interval) try: super(Consumer, self).__init__(*args, **kwargs) + self.failed_connection = False break except: # Catching all because carrot sucks - logging.exception("AMQP server on %s:%d is unreachable. " \ - "Trying again in 30 seconds." % ( - FLAGS.rabbit_host, - FLAGS.rabbit_port)) - time.sleep(30) - continue + logging.exception("AMQP server on %s:%d is unreachable." \ + " Trying again in %d seconds." % ( + FLAGS.rabbit_host, + FLAGS.rabbit_port, + FLAGS.rabbit_retry_interval)) + self.failed_connection = True + if self.failed_connection: + logging.exception("Unable to connect to AMQP server" \ + " after %d tries. Shutting down." % FLAGS.rabbit_max_retries) + sys.exit(1) def fetch(self, no_ack=None, auto_ack=None, enable_callbacks=False): """Wraps the parent fetch with some logic for failed connections""" @@ -103,7 +108,7 @@ class Consumer(messaging.Consumer): # refactored into some sort of connection manager object try: if self.failed_connection: - # NOTE(vish): conn is defined in the parent class, we can + # NOTE(vish): connection is defined in the parent class, we can # recreate it as long as we create the backend too # pylint: disable-msg=W0201 self.connection = Connection.recreate()