Fixed race condition in the Restore workflow
When an instance is restored from a backup, during the step when the root password is being reset, we now check to ensure that the mysqld daemon is up, before we kill the process. This ensures that the root password is always reset successfully. Fixes bug 1187958 Change-Id: If5a5c0293c0dc51c3f118abbcc52c3fb0d1d3cfa
This commit is contained in:
parent
c387692d50
commit
44c0dba4c6
@ -6,6 +6,7 @@ from reddwarf.guestagent import volume
|
||||
from reddwarf.guestagent.manager.mysql_service import MySqlAppStatus
|
||||
from reddwarf.guestagent.manager.mysql_service import MySqlAdmin
|
||||
from reddwarf.guestagent.manager.mysql_service import MySqlApp
|
||||
from reddwarf.instance import models as rd_models
|
||||
from reddwarf.openstack.common import log as logging
|
||||
from reddwarf.openstack.common.gettextutils import _
|
||||
from reddwarf.openstack.common import periodic_task
|
||||
@ -65,10 +66,16 @@ class Manager(periodic_task.PeriodicTasks):
|
||||
def is_root_enabled(self, context):
|
||||
return MySqlAdmin().is_root_enabled()
|
||||
|
||||
def _perform_restore(self, backup_id, context, restore_location):
|
||||
def _perform_restore(self, backup_id, context, restore_location, app):
|
||||
LOG.info(_("Restoring database from backup %s" % backup_id))
|
||||
backup.restore(context, backup_id, restore_location)
|
||||
LOG.info(_("Restored database"))
|
||||
try:
|
||||
backup.restore(context, backup_id, restore_location)
|
||||
except Exception as e:
|
||||
LOG.error(e)
|
||||
LOG.error("Error performing restore from backup %s", backup_id)
|
||||
app.status.set_status(rd_models.ServiceStatuses.FAILED)
|
||||
raise
|
||||
LOG.info(_("Restored database successfully"))
|
||||
|
||||
def prepare(self, context, databases, memory_mb, users, device_path=None,
|
||||
mount_point=None, backup_id=None):
|
||||
@ -96,7 +103,7 @@ class Manager(periodic_task.PeriodicTasks):
|
||||
app.start_mysql()
|
||||
app.install_if_needed()
|
||||
if backup_id:
|
||||
self._perform_restore(backup_id, context, CONF.mount_point)
|
||||
self._perform_restore(backup_id, context, CONF.mount_point, app)
|
||||
LOG.info(_("Securing mysql now."))
|
||||
app.secure(memory_mb)
|
||||
if backup_id and MySqlAdmin().is_root_enabled():
|
||||
|
@ -14,7 +14,9 @@
|
||||
# under the License.
|
||||
#
|
||||
from reddwarf.guestagent.strategy import Strategy
|
||||
from reddwarf.common import cfg, utils
|
||||
from reddwarf.common import cfg
|
||||
from reddwarf.common import exception
|
||||
from reddwarf.common import utils
|
||||
from reddwarf.openstack.common import log as logging
|
||||
from eventlet.green import subprocess
|
||||
import tempfile
|
||||
@ -25,12 +27,26 @@ import glob
|
||||
LOG = logging.getLogger(__name__)
|
||||
CONF = cfg.CONF
|
||||
CHUNK_SIZE = CONF.backup_chunk_size
|
||||
RESET_ROOT_RETRY_TIMEOUT = 100
|
||||
RESET_ROOT_SLEEP_INTERVAL = 10
|
||||
RESET_ROOT_MYSQL_COMMAND = """
|
||||
UPDATE mysql.user SET Password=PASSWORD('') WHERE User='root';
|
||||
FLUSH PRIVILEGES;
|
||||
"""
|
||||
|
||||
|
||||
def mysql_is_running():
|
||||
try:
|
||||
out, err = utils.execute_with_timeout(
|
||||
"/usr/bin/mysqladmin",
|
||||
"ping", run_as_root=True, root_helper="sudo")
|
||||
LOG.info("The mysqld daemon is up and running.")
|
||||
return True
|
||||
except exception.ProcessExecutionError:
|
||||
LOG.info("Waiting for mysqld daemon to start")
|
||||
return False
|
||||
|
||||
|
||||
class RestoreError(Exception):
|
||||
"""Error running the Backup Command."""
|
||||
|
||||
@ -115,10 +131,21 @@ class RestoreRunner(Strategy):
|
||||
try:
|
||||
i = child.expect(['Starting mysqld daemon'])
|
||||
if i == 0:
|
||||
LOG.info("Root password reset successfully!")
|
||||
LOG.info("Starting mysqld daemon")
|
||||
except pexpect.TIMEOUT as e:
|
||||
LOG.error("wait_and_close_proc failed: %s" % e)
|
||||
finally:
|
||||
try:
|
||||
# There is a race condition here where we kill mysqld before
|
||||
# the init file been executed. We need to ensure mysqld is up.
|
||||
utils.poll_until(mysql_is_running,
|
||||
sleep_time=RESET_ROOT_SLEEP_INTERVAL,
|
||||
time_out=RESET_ROOT_RETRY_TIMEOUT)
|
||||
except exception.PollTimeOut:
|
||||
raise RestoreError("Reset root password failed: "
|
||||
"mysqld did not start!")
|
||||
|
||||
LOG.info("Root password reset successfully!")
|
||||
LOG.info("Cleaning up the temp mysqld process...")
|
||||
child.delayafterclose = 1
|
||||
child.delayafterterminate = 1
|
||||
|
@ -43,6 +43,7 @@ class InnoBackupEx(base.RestoreRunner):
|
||||
is_zipped = True
|
||||
restore_cmd = 'sudo xbstream -x -C %(restore_location)s'
|
||||
prepare_cmd = ('sudo innobackupex --apply-log %(restore_location)s '
|
||||
'--defaults-file=%(restore_location)s/backup-my.cnf '
|
||||
'--ibbackup xtrabackup 2>/tmp/innoprepare.log')
|
||||
|
||||
def _pre_restore(self):
|
||||
|
Loading…
Reference in New Issue
Block a user