Fixed race condition in the Restore workflow

When an instance is restored from a backup, during the step when the root password is being reset, we now check to ensure that the mysqld daemon is up, before we kill the process. This ensures that the root password is always reset successfully. Fixes bug 1187958 Change-Id: If5a5c0293c0dc51c3f118abbcc52c3fb0d1d3cfa
2013-06-05 19:07:03 -07:00 · 2013-06-05 19:07:03 -07:00 · 44c0dba4c6
commit 44c0dba4c6
parent c387692d50
3 changed files with 41 additions and 6 deletions
--- a/reddwarf/guestagent/manager/mysql.py
+++ b/reddwarf/guestagent/manager/mysql.py
@ -6,6 +6,7 @@ from reddwarf.guestagent import volume
 from reddwarf.guestagent.manager.mysql_service import MySqlAppStatus
 from reddwarf.guestagent.manager.mysql_service import MySqlAdmin
 from reddwarf.guestagent.manager.mysql_service import MySqlApp
 from reddwarf.instance import models as rd_models
 from reddwarf.openstack.common import log as logging
 from reddwarf.openstack.common.gettextutils import _
 from reddwarf.openstack.common import periodic_task
@ -65,10 +66,16 @@ class Manager(periodic_task.PeriodicTasks):
    def is_root_enabled(self, context):
        return MySqlAdmin().is_root_enabled()
-    def _perform_restore(self, backup_id, context, restore_location):
+    def _perform_restore(self, backup_id, context, restore_location, app):
        LOG.info(_("Restoring database from backup %s" % backup_id))
        try:
            backup.restore(context, backup_id, restore_location)
-        LOG.info(_("Restored database"))
+        except Exception as e:
            LOG.error(e)
            LOG.error("Error performing restore from backup %s", backup_id)
            app.status.set_status(rd_models.ServiceStatuses.FAILED)
            raise
        LOG.info(_("Restored database successfully"))
    def prepare(self, context, databases, memory_mb, users, device_path=None,
                mount_point=None, backup_id=None):
@ -96,7 +103,7 @@ class Manager(periodic_task.PeriodicTasks):
                app.start_mysql()
        app.install_if_needed()
        if backup_id:
-            self._perform_restore(backup_id, context, CONF.mount_point)
+            self._perform_restore(backup_id, context, CONF.mount_point, app)
        LOG.info(_("Securing mysql now."))
        app.secure(memory_mb)
        if backup_id and MySqlAdmin().is_root_enabled():
--- a/reddwarf/guestagent/strategies/restore/base.py
+++ b/reddwarf/guestagent/strategies/restore/base.py
@ -14,7 +14,9 @@
 #    under the License.
 #
 from reddwarf.guestagent.strategy import Strategy
-from reddwarf.common import cfg, utils
+from reddwarf.common import cfg
 from reddwarf.common import exception
 from reddwarf.common import utils
 from reddwarf.openstack.common import log as logging
 from eventlet.green import subprocess
 import tempfile
@ -25,12 +27,26 @@ import glob
 LOG = logging.getLogger(__name__)
 CONF = cfg.CONF
 CHUNK_SIZE = CONF.backup_chunk_size
 RESET_ROOT_RETRY_TIMEOUT = 100
 RESET_ROOT_SLEEP_INTERVAL = 10
 RESET_ROOT_MYSQL_COMMAND = """
 UPDATE mysql.user SET Password=PASSWORD('') WHERE User='root';
 FLUSH PRIVILEGES;
 """
 def mysql_is_running():
    try:
        out, err = utils.execute_with_timeout(
            "/usr/bin/mysqladmin",
            "ping", run_as_root=True, root_helper="sudo")
        LOG.info("The mysqld daemon is up and running.")
        return True
    except exception.ProcessExecutionError:
        LOG.info("Waiting for mysqld daemon to start")
        return False
 class RestoreError(Exception):
    """Error running the Backup Command."""
@ -115,10 +131,21 @@ class RestoreRunner(Strategy):
        try:
            i = child.expect(['Starting mysqld daemon'])
            if i == 0:
-                LOG.info("Root password reset successfully!")
+                LOG.info("Starting mysqld daemon")
        except pexpect.TIMEOUT as e:
            LOG.error("wait_and_close_proc failed: %s" % e)
        finally:
            try:
                # There is a race condition here where we kill mysqld before
                # the init file been executed. We need to ensure mysqld is up.
                utils.poll_until(mysql_is_running,
                                 sleep_time=RESET_ROOT_SLEEP_INTERVAL,
                                 time_out=RESET_ROOT_RETRY_TIMEOUT)
            except exception.PollTimeOut:
                raise RestoreError("Reset root password failed: "
                                   "mysqld did not start!")
            LOG.info("Root password reset successfully!")
            LOG.info("Cleaning up the temp mysqld process...")
            child.delayafterclose = 1
            child.delayafterterminate = 1
--- a/reddwarf/guestagent/strategies/restore/impl.py
+++ b/reddwarf/guestagent/strategies/restore/impl.py
@ -43,6 +43,7 @@ class InnoBackupEx(base.RestoreRunner):
    is_zipped = True
    restore_cmd = 'sudo xbstream -x -C %(restore_location)s'
    prepare_cmd = ('sudo innobackupex --apply-log %(restore_location)s '
                   '--defaults-file=%(restore_location)s/backup-my.cnf '
                   '--ibbackup xtrabackup 2>/tmp/innoprepare.log')
    def _pre_restore(self):