Ceph: Catch more failure conditions on volume backup

This fixes issues for volume backups with the Ceph driver
where failures of the first process ("rbd export-diff") were
not caught. Instead, only the return code of the second
process ("rbd import-diff") was recognized.

This change also preserves the stderr that was lost previously
in order to ease debugging.

Closes-Bug: 2031897
Co-Authored-By: Pete Zaitcev <zaitcev@kotori.zaitcev.us>
Change-Id: I53b573bfff64e7460ef34f1355d3a9d52a8879f9
Signed-off-by: Jan Hartkopf <jhartkopf@inovex.de>
This commit is contained in:
Jan Hartkopf 2024-01-16 15:34:55 +01:00
parent e38f3b799b
commit f8e13a883d
3 changed files with 50 additions and 27 deletions

View File

@ -47,6 +47,7 @@ import json
import os import os
import re import re
import subprocess import subprocess
import tempfile
import time import time
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
@ -618,33 +619,40 @@ class CephBackupDriver(driver.BackupDriver):
LOG.debug("Piping cmd1='%s' into...", ' '.join(cmd1)) LOG.debug("Piping cmd1='%s' into...", ' '.join(cmd1))
LOG.debug("cmd2='%s'", ' '.join(cmd2)) LOG.debug("cmd2='%s'", ' '.join(cmd2))
try: with tempfile.TemporaryFile() as errfile:
p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
close_fds=True)
except OSError as e:
LOG.error("Pipe1 failed - %s ", e)
raise
# NOTE(dosaboy): ensure that the pipe is blocking. This is to work try:
# around the case where evenlet.green.subprocess is used which seems to p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE,
# use a non-blocking pipe. stderr=errfile,
assert p1.stdout is not None close_fds=True)
flags = fcntl.fcntl(p1.stdout, fcntl.F_GETFL) & (~os.O_NONBLOCK) except OSError as e:
fcntl.fcntl(p1.stdout, fcntl.F_SETFL, flags) LOG.error("Pipe1 failed - %s ", e)
raise
try: # NOTE(dosaboy): ensure that the pipe is blocking. This is to work
p2 = subprocess.Popen(cmd2, stdin=p1.stdout, # around the case where evenlet.green.subprocess is used which
stdout=subprocess.PIPE, # seems to use a non-blocking pipe.
stderr=subprocess.PIPE, assert p1.stdout is not None
close_fds=True) flags = fcntl.fcntl(p1.stdout, fcntl.F_GETFL) & (~os.O_NONBLOCK)
except OSError as e: fcntl.fcntl(p1.stdout, fcntl.F_SETFL, flags)
LOG.error("Pipe2 failed - %s ", e)
raise
p1.stdout.close() try:
stdout, stderr = p2.communicate() p2 = subprocess.Popen(cmd2, stdin=p1.stdout,
return p2.returncode, stderr stdout=subprocess.PIPE,
stderr=errfile,
close_fds=True)
except OSError as e:
LOG.error("Pipe2 failed - %s ", e)
raise
p1.stdout.close()
p2.communicate()
p1.wait()
errfile.seek(0)
px_stderr = errfile.read()
return p1.returncode or p2.returncode, px_stderr
def _rbd_diff_transfer(self, src_name: str, src_pool: str, def _rbd_diff_transfer(self, src_name: str, src_pool: str,
dest_name: str, dest_pool: str, dest_name: str, dest_pool: str,

View File

@ -157,6 +157,10 @@ class BackupCephTestCase(test.TestCase):
self.callstack.append('communicate') self.callstack.append('communicate')
return retval return retval
def wait(mock_inst):
self.callstack.append('wait')
return retval
subprocess.Popen.side_effect = MockPopen subprocess.Popen.side_effect = MockPopen
def setUp(self): def setUp(self):
@ -522,7 +526,8 @@ class BackupCephTestCase(test.TestCase):
'popen_init', 'popen_init',
'write', 'write',
'stdout_close', 'stdout_close',
'communicate'], self.callstack) 'communicate',
'wait'], self.callstack)
self.assertFalse(mock_full_backup.called) self.assertFalse(mock_full_backup.called)
self.assertFalse(mock_get_backup_snaps.called) self.assertFalse(mock_get_backup_snaps.called)
@ -1412,8 +1417,8 @@ class BackupCephTestCase(test.TestCase):
mock_fcntl.return_value = 0 mock_fcntl.return_value = 0
self._setup_mock_popen(['out', 'err']) self._setup_mock_popen(['out', 'err'])
self.service._piped_execute(['foo'], ['bar']) self.service._piped_execute(['foo'], ['bar'])
self.assertEqual(['popen_init', 'popen_init', self.assertEqual(['popen_init', 'popen_init', 'stdout_close',
'stdout_close', 'communicate'], self.callstack) 'communicate', 'wait'], self.callstack)
@common_mocks @common_mocks
def test_restore_metdata(self): def test_restore_metdata(self):

View File

@ -0,0 +1,10 @@
---
fixes:
- |
`Bug #2031897 <https://bugs.launchpad.net/cinder/+bug/2031897>`_: Fixed
issues for volume backups with the Ceph driver where failures of the first
process ("rbd export-diff") were not caught. Instead, only the return code
of the second process ("rbd import-diff") was recognized.
This change also preserves the stderr that was lost previously
in order to ease debugging.