From 44c0dba4c6404dee3be2d2d7e9aeb70ea59a6647 Mon Sep 17 00:00:00 2001 From: Nikhil Manchanda Date: Wed, 5 Jun 2013 19:07:03 -0700 Subject: [PATCH] Fixed race condition in the Restore workflow When an instance is restored from a backup, during the step when the root password is being reset, we now check to ensure that the mysqld daemon is up, before we kill the process. This ensures that the root password is always reset successfully. Fixes bug 1187958 Change-Id: If5a5c0293c0dc51c3f118abbcc52c3fb0d1d3cfa --- reddwarf/guestagent/manager/mysql.py | 15 ++++++--- .../guestagent/strategies/restore/base.py | 31 +++++++++++++++++-- .../guestagent/strategies/restore/impl.py | 1 + 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/reddwarf/guestagent/manager/mysql.py b/reddwarf/guestagent/manager/mysql.py index bde6576a73..e912509a58 100644 --- a/reddwarf/guestagent/manager/mysql.py +++ b/reddwarf/guestagent/manager/mysql.py @@ -6,6 +6,7 @@ from reddwarf.guestagent import volume from reddwarf.guestagent.manager.mysql_service import MySqlAppStatus from reddwarf.guestagent.manager.mysql_service import MySqlAdmin from reddwarf.guestagent.manager.mysql_service import MySqlApp +from reddwarf.instance import models as rd_models from reddwarf.openstack.common import log as logging from reddwarf.openstack.common.gettextutils import _ from reddwarf.openstack.common import periodic_task @@ -65,10 +66,16 @@ class Manager(periodic_task.PeriodicTasks): def is_root_enabled(self, context): return MySqlAdmin().is_root_enabled() - def _perform_restore(self, backup_id, context, restore_location): + def _perform_restore(self, backup_id, context, restore_location, app): LOG.info(_("Restoring database from backup %s" % backup_id)) - backup.restore(context, backup_id, restore_location) - LOG.info(_("Restored database")) + try: + backup.restore(context, backup_id, restore_location) + except Exception as e: + LOG.error(e) + LOG.error("Error performing restore from backup %s", backup_id) + app.status.set_status(rd_models.ServiceStatuses.FAILED) + raise + LOG.info(_("Restored database successfully")) def prepare(self, context, databases, memory_mb, users, device_path=None, mount_point=None, backup_id=None): @@ -96,7 +103,7 @@ class Manager(periodic_task.PeriodicTasks): app.start_mysql() app.install_if_needed() if backup_id: - self._perform_restore(backup_id, context, CONF.mount_point) + self._perform_restore(backup_id, context, CONF.mount_point, app) LOG.info(_("Securing mysql now.")) app.secure(memory_mb) if backup_id and MySqlAdmin().is_root_enabled(): diff --git a/reddwarf/guestagent/strategies/restore/base.py b/reddwarf/guestagent/strategies/restore/base.py index 67ec350922..8e8f5fb615 100644 --- a/reddwarf/guestagent/strategies/restore/base.py +++ b/reddwarf/guestagent/strategies/restore/base.py @@ -14,7 +14,9 @@ # under the License. # from reddwarf.guestagent.strategy import Strategy -from reddwarf.common import cfg, utils +from reddwarf.common import cfg +from reddwarf.common import exception +from reddwarf.common import utils from reddwarf.openstack.common import log as logging from eventlet.green import subprocess import tempfile @@ -25,12 +27,26 @@ import glob LOG = logging.getLogger(__name__) CONF = cfg.CONF CHUNK_SIZE = CONF.backup_chunk_size +RESET_ROOT_RETRY_TIMEOUT = 100 +RESET_ROOT_SLEEP_INTERVAL = 10 RESET_ROOT_MYSQL_COMMAND = """ UPDATE mysql.user SET Password=PASSWORD('') WHERE User='root'; FLUSH PRIVILEGES; """ +def mysql_is_running(): + try: + out, err = utils.execute_with_timeout( + "/usr/bin/mysqladmin", + "ping", run_as_root=True, root_helper="sudo") + LOG.info("The mysqld daemon is up and running.") + return True + except exception.ProcessExecutionError: + LOG.info("Waiting for mysqld daemon to start") + return False + + class RestoreError(Exception): """Error running the Backup Command.""" @@ -115,10 +131,21 @@ class RestoreRunner(Strategy): try: i = child.expect(['Starting mysqld daemon']) if i == 0: - LOG.info("Root password reset successfully!") + LOG.info("Starting mysqld daemon") except pexpect.TIMEOUT as e: LOG.error("wait_and_close_proc failed: %s" % e) finally: + try: + # There is a race condition here where we kill mysqld before + # the init file been executed. We need to ensure mysqld is up. + utils.poll_until(mysql_is_running, + sleep_time=RESET_ROOT_SLEEP_INTERVAL, + time_out=RESET_ROOT_RETRY_TIMEOUT) + except exception.PollTimeOut: + raise RestoreError("Reset root password failed: " + "mysqld did not start!") + + LOG.info("Root password reset successfully!") LOG.info("Cleaning up the temp mysqld process...") child.delayafterclose = 1 child.delayafterterminate = 1 diff --git a/reddwarf/guestagent/strategies/restore/impl.py b/reddwarf/guestagent/strategies/restore/impl.py index 906898dd20..26e33d3c15 100644 --- a/reddwarf/guestagent/strategies/restore/impl.py +++ b/reddwarf/guestagent/strategies/restore/impl.py @@ -43,6 +43,7 @@ class InnoBackupEx(base.RestoreRunner): is_zipped = True restore_cmd = 'sudo xbstream -x -C %(restore_location)s' prepare_cmd = ('sudo innobackupex --apply-log %(restore_location)s ' + '--defaults-file=%(restore_location)s/backup-my.cnf ' '--ibbackup xtrabackup 2>/tmp/innoprepare.log') def _pre_restore(self):