From f8e13a883d961a4a5ba1cf6f557a96ebe6f540ad Mon Sep 17 00:00:00 2001 From: Jan Hartkopf Date: Tue, 16 Jan 2024 15:34:55 +0100 Subject: [PATCH] Ceph: Catch more failure conditions on volume backup This fixes issues for volume backups with the Ceph driver where failures of the first process ("rbd export-diff") were not caught. Instead, only the return code of the second process ("rbd import-diff") was recognized. This change also preserves the stderr that was lost previously in order to ease debugging. Closes-Bug: 2031897 Co-Authored-By: Pete Zaitcev Change-Id: I53b573bfff64e7460ef34f1355d3a9d52a8879f9 Signed-off-by: Jan Hartkopf --- cinder/backup/drivers/ceph.py | 56 +++++++++++-------- .../unit/backup/drivers/test_backup_ceph.py | 11 +++- ...e-failure-conditions-d2ec640f5ff8051c.yaml | 10 ++++ 3 files changed, 50 insertions(+), 27 deletions(-) create mode 100644 releasenotes/notes/ceph-catch-more-failure-conditions-d2ec640f5ff8051c.yaml diff --git a/cinder/backup/drivers/ceph.py b/cinder/backup/drivers/ceph.py index 0e66c4a05e4..cbe1eef21d4 100644 --- a/cinder/backup/drivers/ceph.py +++ b/cinder/backup/drivers/ceph.py @@ -47,6 +47,7 @@ import json import os import re import subprocess +import tempfile import time from typing import Dict, List, Optional, Tuple @@ -618,33 +619,40 @@ class CephBackupDriver(driver.BackupDriver): LOG.debug("Piping cmd1='%s' into...", ' '.join(cmd1)) LOG.debug("cmd2='%s'", ' '.join(cmd2)) - try: - p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) - except OSError as e: - LOG.error("Pipe1 failed - %s ", e) - raise + with tempfile.TemporaryFile() as errfile: - # NOTE(dosaboy): ensure that the pipe is blocking. This is to work - # around the case where evenlet.green.subprocess is used which seems to - # use a non-blocking pipe. - assert p1.stdout is not None - flags = fcntl.fcntl(p1.stdout, fcntl.F_GETFL) & (~os.O_NONBLOCK) - fcntl.fcntl(p1.stdout, fcntl.F_SETFL, flags) + try: + p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE, + stderr=errfile, + close_fds=True) + except OSError as e: + LOG.error("Pipe1 failed - %s ", e) + raise - try: - p2 = subprocess.Popen(cmd2, stdin=p1.stdout, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True) - except OSError as e: - LOG.error("Pipe2 failed - %s ", e) - raise + # NOTE(dosaboy): ensure that the pipe is blocking. This is to work + # around the case where evenlet.green.subprocess is used which + # seems to use a non-blocking pipe. + assert p1.stdout is not None + flags = fcntl.fcntl(p1.stdout, fcntl.F_GETFL) & (~os.O_NONBLOCK) + fcntl.fcntl(p1.stdout, fcntl.F_SETFL, flags) - p1.stdout.close() - stdout, stderr = p2.communicate() - return p2.returncode, stderr + try: + p2 = subprocess.Popen(cmd2, stdin=p1.stdout, + stdout=subprocess.PIPE, + stderr=errfile, + close_fds=True) + except OSError as e: + LOG.error("Pipe2 failed - %s ", e) + raise + + p1.stdout.close() + p2.communicate() + p1.wait() + + errfile.seek(0) + px_stderr = errfile.read() + + return p1.returncode or p2.returncode, px_stderr def _rbd_diff_transfer(self, src_name: str, src_pool: str, dest_name: str, dest_pool: str, diff --git a/cinder/tests/unit/backup/drivers/test_backup_ceph.py b/cinder/tests/unit/backup/drivers/test_backup_ceph.py index b5bef257aa4..8d8cc9f0b2a 100644 --- a/cinder/tests/unit/backup/drivers/test_backup_ceph.py +++ b/cinder/tests/unit/backup/drivers/test_backup_ceph.py @@ -157,6 +157,10 @@ class BackupCephTestCase(test.TestCase): self.callstack.append('communicate') return retval + def wait(mock_inst): + self.callstack.append('wait') + return retval + subprocess.Popen.side_effect = MockPopen def setUp(self): @@ -522,7 +526,8 @@ class BackupCephTestCase(test.TestCase): 'popen_init', 'write', 'stdout_close', - 'communicate'], self.callstack) + 'communicate', + 'wait'], self.callstack) self.assertFalse(mock_full_backup.called) self.assertFalse(mock_get_backup_snaps.called) @@ -1412,8 +1417,8 @@ class BackupCephTestCase(test.TestCase): mock_fcntl.return_value = 0 self._setup_mock_popen(['out', 'err']) self.service._piped_execute(['foo'], ['bar']) - self.assertEqual(['popen_init', 'popen_init', - 'stdout_close', 'communicate'], self.callstack) + self.assertEqual(['popen_init', 'popen_init', 'stdout_close', + 'communicate', 'wait'], self.callstack) @common_mocks def test_restore_metdata(self): diff --git a/releasenotes/notes/ceph-catch-more-failure-conditions-d2ec640f5ff8051c.yaml b/releasenotes/notes/ceph-catch-more-failure-conditions-d2ec640f5ff8051c.yaml new file mode 100644 index 00000000000..4bb20a377e1 --- /dev/null +++ b/releasenotes/notes/ceph-catch-more-failure-conditions-d2ec640f5ff8051c.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + `Bug #2031897 `_: Fixed + issues for volume backups with the Ceph driver where failures of the first + process ("rbd export-diff") were not caught. Instead, only the return code + of the second process ("rbd import-diff") was recognized. + + This change also preserves the stderr that was lost previously + in order to ease debugging.