Add a hook for restore process to check if successful

1.This is the same solution with the backup process. Previously we only check the stderr of the restore command, but this method is not reliable, and the changes during percona-xtrabackup 2.4.11: https://jira.percona.com/browse/PXB-1542 causes the gate jobs failed in creating a slave MySQL instance. The new hook currently is only used by InnoBackupEx restore runner with checking the exit status and stderr output of xbstream. 2.with[1] merged,this makes DIB_CLOUD_IMAGES more flexible, but it break trovestack build image, now we need to specify a more detailed path to DIB_CLOUD_IMAGES to get the appropriate content. [1]:https://review.openstack.org/#/c/568697/ Co-Authored-By: Zhao Chao <zhaochao1984@gmail.com> Co-Authored-By: zhanggang <zhanggang@cmss.chinamobile.com> Co-Authored-By: jiansong <jian.song@easystack.cn> Closes-Bug: #1771990 Change-Id: Ibb54d6b5953a775be339fb991a0771961d27eba2
2018-05-18 20:18:41 +08:00 · 2018-05-18 20:18:41 +08:00 · 43b5807f2d
parent f51e726f64
commit 43b5807f2d
4 changed files with 88 additions and 9 deletions
--- a/integration/scripts/functions_qemu
+++ b/integration/scripts/functions_qemu
@ -34,7 +34,7 @@ function build_vm() {
    ARCH=amd64
    if [ $DISTRO == 'ubuntu' ]; then
        export DIB_RELEASE=$RELEASE
-        export DIB_CLOUD_IMAGES=cloud-images.ubuntu.com
+        export DIB_CLOUD_IMAGES=cloud-images.ubuntu.com/$DIB_RELEASE/current
        export DIB_USE_HWE_KERNEL
        ARCH=$(dpkg --print-architecture)
    elif [ $DISTRO == 'fedora' ]; then
--- a/trove/guestagent/strategies/restore/base.py
+++ b/trove/guestagent/strategies/restore/base.py
@ -53,6 +53,7 @@ class RestoreRunner(Strategy):

    def __init__(self, storage, **kwargs):
        self.storage = storage
+        self.process = None
        self.location = kwargs.pop('location')
        self.checksum = kwargs.pop('checksum')
        self.restore_location = kwargs.get('restore_location')
@ -80,15 +81,17 @@ class RestoreRunner(Strategy):

    def _unpack(self, location, checksum, command):
        stream = self.storage.load(location, checksum)
-        process = subprocess.Popen(command, shell=True,
-                                   stdin=subprocess.PIPE,
-                                   stderr=subprocess.PIPE)
+        self.process = subprocess.Popen(command, shell=True,
+                                        stdin=subprocess.PIPE,
+                                        stderr=subprocess.PIPE)
        content_length = 0
        for chunk in stream:
-            process.stdin.write(chunk)
+            self.process.stdin.write(chunk)
            content_length += len(chunk)
-        process.stdin.close()
-        utils.raise_if_process_errored(process, RestoreError)
+        self.process.stdin.close()
+        utils.raise_if_process_errored(self.process, RestoreError)
+        if not self.check_process():
+            raise RestoreError
        LOG.debug("Restored %s bytes from stream.", content_length)

        return content_length
@ -104,3 +107,7 @@ class RestoreRunner(Strategy):
    @property
    def unzip_cmd(self):
        return 'gzip -d -c | ' if self.is_zipped else ''
+
+    def check_process(self):
+        """Hook for subclasses to check the restore process for errors."""
+        return True
--- a/trove/guestagent/strategies/restore/mysql_impl.py
+++ b/trove/guestagent/strategies/restore/mysql_impl.py
@ -180,7 +180,8 @@ class MySQLDump(base.RestoreRunner, MySQLRestoreMixin):
 class InnoBackupEx(base.RestoreRunner, MySQLRestoreMixin):
    """Implementation of Restore Strategy for InnoBackupEx."""
    __strategy_name__ = 'innobackupex'
-    base_restore_cmd = 'sudo xbstream -x -C %(restore_location)s'
+    base_restore_cmd = ('sudo xbstream -x -C %(restore_location)s'
+                        ' 2>/tmp/xbstream_extract.log')
    base_prepare_cmd = ('sudo innobackupex'
                        ' --defaults-file=%(restore_location)s/backup-my.cnf'
                        ' --ibbackup=xtrabackup'
@ -229,6 +230,43 @@ class InnoBackupEx(base.RestoreRunner, MySQLRestoreMixin):
        for f in files:
            os.unlink(f)

+    def check_process(self):
+        """Check whether xbstream restore is successful."""
+        # We first check the restore process exits with 0, however
+        # xbstream has a bug for creating new files:
+        # https://jira.percona.com/browse/PXB-1542
+        # So we also check the stderr with ignorance of some known
+        # non-error log lines. Currently we only need to ignore:
+        # "encryption: using gcrypt x.x.x"
+        # After PXB-1542 is fixed, we could just check the exit status.
+        LOG.debug('Checking return code of xbstream restore process.')
+        return_code = self.process.wait()
+        if return_code != 0:
+            LOG.erro('xbstream exited with %s', return_code)
+            return False
+
+        LOG.debug('Checking xbstream restore process stderr output.')
+        IGNORE_LINES = [
+            'encryption: using gcrypt ',
+        ]
+        with open('/tmp/xbstream_extract.log', 'r') as xbstream_log:
+            for line in xbstream_log:
+                # Ignore empty lines
+                if not line.strip():
+                    continue
+
+                # Ignore known non-error log lines
+                check_ignorance = [line.startswith(non_err)
+                                   for non_err in IGNORE_LINES]
+                if any(check_ignorance):
+                    continue
+                else:
+                    LOG.error('xbstream restore failed with: %s',
+                              line.rstrip('\n'))
+                    return False
+
+        return True
+

 class InnoBackupExIncremental(InnoBackupEx):
    __strategy_name__ = 'innobackupexincremental'
--- a/trove/tests/unittests/guestagent/test_backups.py
+++ b/trove/tests/unittests/guestagent/test_backups.py
@ -95,7 +95,8 @@ SQLDUMP_BACKUP_RAW = ("mysqldump --all-databases %(extra_opts)s "
 SQLDUMP_BACKUP = SQLDUMP_BACKUP_RAW % {'extra_opts': ''}
 SQLDUMP_BACKUP_EXTRA_OPTS = (SQLDUMP_BACKUP_RAW %
                             {'extra_opts': '--events --routines --triggers'})
-XTRA_RESTORE_RAW = "sudo xbstream -x -C %(restore_location)s"
+XTRA_RESTORE_RAW = ("sudo xbstream -x -C %(restore_location)s"
+                    " 2>/tmp/xbstream_extract.log")
 XTRA_RESTORE = XTRA_RESTORE_RAW % {'restore_location': '/var/lib/mysql/data'}
 XTRA_INCR_PREPARE = ("sudo innobackupex"
                     " --defaults-file=/var/lib/mysql/data/backup-my.cnf"
@ -1135,3 +1136,36 @@ class CouchDBRestoreTests(trove_testtools.TestCase):
        self.restore_runner.post_restore = mock.Mock()
        self.assertRaises(exception.ProcessExecutionError,
                          self.restore_runner.restore)
+
+
+class MySQLRestoreTests(trove_testtools.TestCase):
+
+    def setUp(self):
+        super(MySQLRestoreTests, self).setUp()
+
+        self.restore_runner = utils.import_class(
+            RESTORE_XTRA_CLS)(
+                'swift', location='http://some.where',
+                checksum='True_checksum',
+                restore_location='/tmp/somewhere')
+
+    def tearDown(self):
+        super(MySQLRestoreTests, self).tearDown()
+
+    def test_restore_success(self):
+        expected_content_length = 123
+        self.restore_runner._run_restore = mock.Mock(
+            return_value=expected_content_length)
+        self.restore_runner.pre_restore = mock.Mock()
+        self.restore_runner.post_restore = mock.Mock()
+        actual_content_length = self.restore_runner.restore()
+        self.assertEqual(
+            expected_content_length, actual_content_length)
+
+    def test_restore_failed_due_to_run_restore(self):
+        self.restore_runner.pre_restore = mock.Mock()
+        self.restore_runner._run_restore = mock.Mock(
+            side_effect=restoreBase.RestoreError('Error'))
+        self.restore_runner.post_restore = mock.Mock()
+        self.assertRaises(restoreBase.RestoreError,
+                          self.restore_runner.restore)