From a4dd6b6f06d222f49bd0d2582dfe0f2925a1638f Mon Sep 17 00:00:00 2001 From: Adam Gandelman Date: Wed, 28 Mar 2012 18:52:41 -0700 Subject: [PATCH] Ensure a functional database connection Allow retrying database connection in get_engine() at an interval. Resolves the issue of nova components erroring at startup if a database connection is unavailable, particularly at boot. Borrowed from a similar commit to glance, (https://review.openstack.org/#change,5552). Fixes Bug #959426 for nova. Update: * Properly return an engine (fixes tests) * Setting sql_max_retries to -1 will retry infinitely * Bumped options count in nova.conf.sample * i18n log warning * Add note to flag help about -1 == infinite * Pep8 fix Change-Id: Id34eda9e0bad6b477a74e9a7d3575e513e6291d5 --- etc/nova/nova.conf.sample | 4 +++- nova/db/sqlalchemy/session.py | 36 +++++++++++++++++++++++++++++++++-- nova/flags.py | 4 ++++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/etc/nova/nova.conf.sample b/etc/nova/nova.conf.sample index ea4e014a9fb4..b2607a9c2f9f 100644 --- a/etc/nova/nova.conf.sample +++ b/etc/nova/nova.conf.sample @@ -215,6 +215,8 @@ # sql_connection="sqlite:///$state_path/$sqlite_db" ###### (IntOpt) timeout before idle sql connections are reaped # sql_idle_timeout=3600 +###### (IntOpt) maximum db connection retries during startup. (setting -1 implies an infinite retry count) +# sql_max_retries=10 ###### (IntOpt) interval between retries of opening a sql connection # sql_retry_interval=10 ###### (StrOpt) the filename to use with sqlite @@ -1105,4 +1107,4 @@ ###### (StrOpt) The ZFS path under which to create zvols for volumes. # san_zfs_volume_base="rpool/" -# Total option count: 466 +# Total option count: 467 diff --git a/nova/db/sqlalchemy/session.py b/nova/db/sqlalchemy/session.py index eeadd9611010..3f206323efa0 100644 --- a/nova/db/sqlalchemy/session.py +++ b/nova/db/sqlalchemy/session.py @@ -22,9 +22,8 @@ import time import sqlalchemy.interfaces import sqlalchemy.orm -from sqlalchemy.exc import DisconnectionError +from sqlalchemy.exc import DisconnectionError, OperationalError from sqlalchemy.pool import NullPool, StaticPool -import time import nova.exception import nova.flags as flags @@ -81,6 +80,17 @@ class MySQLPingListener(object): raise +def is_db_connection_error(args): + """Return True if error in connecting to db.""" + # NOTE(adam_g): This is currently MySQL specific and needs to be extended + # to support Postgres and others. + conn_err_codes = ('2002', '2003', '2006') + for err_code in conn_err_codes: + if args.find(err_code) != -1: + return True + return False + + def get_engine(): """Return a SQLAlchemy engine.""" global _ENGINE @@ -114,6 +124,28 @@ def get_engine(): _ENGINE = sqlalchemy.create_engine(FLAGS.sql_connection, **engine_args) + try: + _ENGINE.connect() + except OperationalError, e: + if not is_db_connection_error(e.args[0]): + raise + + remaining = FLAGS.sql_max_retries + if remaining == -1: + remaining = 'infinite' + while True: + msg = _('SQL connection failed. %s attempts left.') + LOG.warn(msg % remaining) + if remaining != 'infinite': + remaining -= 1 + time.sleep(FLAGS.sql_retry_interval) + try: + _ENGINE.connect() + break + except OperationalError, e: + if (remaining != 'infinite' and remaining == 0) or \ + not is_db_connection_error(e.args[0]): + raise return _ENGINE diff --git a/nova/flags.py b/nova/flags.py index 430abe67ee80..652d3465d7c0 100644 --- a/nova/flags.py +++ b/nova/flags.py @@ -325,6 +325,10 @@ global_opts = [ cfg.IntOpt('sql_idle_timeout', default=3600, help='timeout before idle sql connections are reaped'), + cfg.IntOpt('sql_max_retries', + default=10, + help='maximum db connection retries during startup. ' + '(setting -1 implies an infinite retry count)'), cfg.IntOpt('sql_retry_interval', default=10, help='interval between retries of opening a sql connection'),