Fixed race condition in the Restore workflow

When an instance is restored from a backup, during the step when the root
password is being reset, we now check to ensure that the mysqld daemon is
up, before we kill the process. This ensures that the root password is
always reset successfully.

Fixes bug 1187958

Change-Id: If5a5c0293c0dc51c3f118abbcc52c3fb0d1d3cfa
This commit is contained in:
Nikhil Manchanda 2013-06-05 19:07:03 -07:00
parent c387692d50
commit 44c0dba4c6
3 changed files with 41 additions and 6 deletions

View File

@ -6,6 +6,7 @@ from reddwarf.guestagent import volume
from reddwarf.guestagent.manager.mysql_service import MySqlAppStatus
from reddwarf.guestagent.manager.mysql_service import MySqlAdmin
from reddwarf.guestagent.manager.mysql_service import MySqlApp
from reddwarf.instance import models as rd_models
from reddwarf.openstack.common import log as logging
from reddwarf.openstack.common.gettextutils import _
from reddwarf.openstack.common import periodic_task
@ -65,10 +66,16 @@ class Manager(periodic_task.PeriodicTasks):
def is_root_enabled(self, context):
return MySqlAdmin().is_root_enabled()
def _perform_restore(self, backup_id, context, restore_location):
def _perform_restore(self, backup_id, context, restore_location, app):
LOG.info(_("Restoring database from backup %s" % backup_id))
backup.restore(context, backup_id, restore_location)
LOG.info(_("Restored database"))
try:
backup.restore(context, backup_id, restore_location)
except Exception as e:
LOG.error(e)
LOG.error("Error performing restore from backup %s", backup_id)
app.status.set_status(rd_models.ServiceStatuses.FAILED)
raise
LOG.info(_("Restored database successfully"))
def prepare(self, context, databases, memory_mb, users, device_path=None,
mount_point=None, backup_id=None):
@ -96,7 +103,7 @@ class Manager(periodic_task.PeriodicTasks):
app.start_mysql()
app.install_if_needed()
if backup_id:
self._perform_restore(backup_id, context, CONF.mount_point)
self._perform_restore(backup_id, context, CONF.mount_point, app)
LOG.info(_("Securing mysql now."))
app.secure(memory_mb)
if backup_id and MySqlAdmin().is_root_enabled():

View File

@ -14,7 +14,9 @@
# under the License.
#
from reddwarf.guestagent.strategy import Strategy
from reddwarf.common import cfg, utils
from reddwarf.common import cfg
from reddwarf.common import exception
from reddwarf.common import utils
from reddwarf.openstack.common import log as logging
from eventlet.green import subprocess
import tempfile
@ -25,12 +27,26 @@ import glob
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
CHUNK_SIZE = CONF.backup_chunk_size
RESET_ROOT_RETRY_TIMEOUT = 100
RESET_ROOT_SLEEP_INTERVAL = 10
RESET_ROOT_MYSQL_COMMAND = """
UPDATE mysql.user SET Password=PASSWORD('') WHERE User='root';
FLUSH PRIVILEGES;
"""
def mysql_is_running():
try:
out, err = utils.execute_with_timeout(
"/usr/bin/mysqladmin",
"ping", run_as_root=True, root_helper="sudo")
LOG.info("The mysqld daemon is up and running.")
return True
except exception.ProcessExecutionError:
LOG.info("Waiting for mysqld daemon to start")
return False
class RestoreError(Exception):
"""Error running the Backup Command."""
@ -115,10 +131,21 @@ class RestoreRunner(Strategy):
try:
i = child.expect(['Starting mysqld daemon'])
if i == 0:
LOG.info("Root password reset successfully!")
LOG.info("Starting mysqld daemon")
except pexpect.TIMEOUT as e:
LOG.error("wait_and_close_proc failed: %s" % e)
finally:
try:
# There is a race condition here where we kill mysqld before
# the init file been executed. We need to ensure mysqld is up.
utils.poll_until(mysql_is_running,
sleep_time=RESET_ROOT_SLEEP_INTERVAL,
time_out=RESET_ROOT_RETRY_TIMEOUT)
except exception.PollTimeOut:
raise RestoreError("Reset root password failed: "
"mysqld did not start!")
LOG.info("Root password reset successfully!")
LOG.info("Cleaning up the temp mysqld process...")
child.delayafterclose = 1
child.delayafterterminate = 1

View File

@ -43,6 +43,7 @@ class InnoBackupEx(base.RestoreRunner):
is_zipped = True
restore_cmd = 'sudo xbstream -x -C %(restore_location)s'
prepare_cmd = ('sudo innobackupex --apply-log %(restore_location)s '
'--defaults-file=%(restore_location)s/backup-my.cnf '
'--ibbackup xtrabackup 2>/tmp/innoprepare.log')
def _pre_restore(self):