Ceph: Catch more failure conditions on volume backup

This fixes issues for volume backups with the Ceph driver
where failures of the first process ("rbd export-diff") were
not caught. Instead, only the return code of the second
process ("rbd import-diff") was recognized.

This change also preserves the stderr that was lost previously
in order to ease debugging.

Closes-Bug: 2031897
Co-Authored-By: Pete Zaitcev <zaitcev@kotori.zaitcev.us>
Change-Id: I53b573bfff64e7460ef34f1355d3a9d52a8879f9
Signed-off-by: Jan Hartkopf <jhartkopf@inovex.de>
This commit is contained in:
Jan Hartkopf 2024-01-16 15:34:55 +01:00
parent e38f3b799b
commit f8e13a883d
3 changed files with 50 additions and 27 deletions

View File

@ -47,6 +47,7 @@ import json
import os
import re
import subprocess
import tempfile
import time
from typing import Dict, List, Optional, Tuple
@ -618,17 +619,19 @@ class CephBackupDriver(driver.BackupDriver):
LOG.debug("Piping cmd1='%s' into...", ' '.join(cmd1))
LOG.debug("cmd2='%s'", ' '.join(cmd2))
with tempfile.TemporaryFile() as errfile:
try:
p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stderr=errfile,
close_fds=True)
except OSError as e:
LOG.error("Pipe1 failed - %s ", e)
raise
# NOTE(dosaboy): ensure that the pipe is blocking. This is to work
# around the case where evenlet.green.subprocess is used which seems to
# use a non-blocking pipe.
# around the case where evenlet.green.subprocess is used which
# seems to use a non-blocking pipe.
assert p1.stdout is not None
flags = fcntl.fcntl(p1.stdout, fcntl.F_GETFL) & (~os.O_NONBLOCK)
fcntl.fcntl(p1.stdout, fcntl.F_SETFL, flags)
@ -636,15 +639,20 @@ class CephBackupDriver(driver.BackupDriver):
try:
p2 = subprocess.Popen(cmd2, stdin=p1.stdout,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stderr=errfile,
close_fds=True)
except OSError as e:
LOG.error("Pipe2 failed - %s ", e)
raise
p1.stdout.close()
stdout, stderr = p2.communicate()
return p2.returncode, stderr
p2.communicate()
p1.wait()
errfile.seek(0)
px_stderr = errfile.read()
return p1.returncode or p2.returncode, px_stderr
def _rbd_diff_transfer(self, src_name: str, src_pool: str,
dest_name: str, dest_pool: str,

View File

@ -157,6 +157,10 @@ class BackupCephTestCase(test.TestCase):
self.callstack.append('communicate')
return retval
def wait(mock_inst):
self.callstack.append('wait')
return retval
subprocess.Popen.side_effect = MockPopen
def setUp(self):
@ -522,7 +526,8 @@ class BackupCephTestCase(test.TestCase):
'popen_init',
'write',
'stdout_close',
'communicate'], self.callstack)
'communicate',
'wait'], self.callstack)
self.assertFalse(mock_full_backup.called)
self.assertFalse(mock_get_backup_snaps.called)
@ -1412,8 +1417,8 @@ class BackupCephTestCase(test.TestCase):
mock_fcntl.return_value = 0
self._setup_mock_popen(['out', 'err'])
self.service._piped_execute(['foo'], ['bar'])
self.assertEqual(['popen_init', 'popen_init',
'stdout_close', 'communicate'], self.callstack)
self.assertEqual(['popen_init', 'popen_init', 'stdout_close',
'communicate', 'wait'], self.callstack)
@common_mocks
def test_restore_metdata(self):

View File

@ -0,0 +1,10 @@
---
fixes:
- |
`Bug #2031897 <https://bugs.launchpad.net/cinder/+bug/2031897>`_: Fixed
issues for volume backups with the Ceph driver where failures of the first
process ("rbd export-diff") were not caught. Instead, only the return code
of the second process ("rbd import-diff") was recognized.
This change also preserves the stderr that was lost previously
in order to ease debugging.