Fix race condition bug during live_snapshot

During live_snapshot creation, when nova starts block_rebase
operation in libvirt there is possibility that block_job is
not yet started and libvirt blockJobInfo method will return
status.end = 0 and status.cur = 0. In such case libvirt driver
does not wait to finish block rebase operation and that causes
a problem because created snapshot is corrupted.

This patch adds check if status.end != 0 to return information
that job is already finished.

Change-Id: I45ac06eae0b1949f746dae305469718649bfcf23
Closes-Bug: #1530275
This commit is contained in:
Sławek Kapłoński 2016-08-31 20:28:36 +00:00
parent dd44096a04
commit 6b20239a5d
3 changed files with 22 additions and 5 deletions

View File

@ -14284,7 +14284,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
mock_dom.XMLDesc.return_value = xmldoc mock_dom.XMLDesc.return_value = xmldoc
mock_dom.isPersistent.return_value = True mock_dom.isPersistent.return_value = True
mock_dom.blockJobInfo.return_value = {} mock_dom.blockJobInfo.return_value = {'cur': 100, 'end': 100}
drvr._swap_volume(guest, srcfile, dstfile, 1) drvr._swap_volume(guest, srcfile, dstfile, 1)
@ -17557,7 +17557,8 @@ class LibvirtVolumeSnapshotTestCase(test.NoDBTestCase):
flags=fakelibvirt.VIR_DOMAIN_BLOCK_COMMIT_RELATIVE) flags=fakelibvirt.VIR_DOMAIN_BLOCK_COMMIT_RELATIVE)
domain.blockJobInfo('vda', flags=0).AndReturn({'cur': 1, 'end': 1000}) domain.blockJobInfo('vda', flags=0).AndReturn({'cur': 1, 'end': 1000})
domain.blockJobInfo('vda', flags=0).AndReturn({}) domain.blockJobInfo('vda', flags=0).AndReturn({'cur': 1000,
'end': 1000})
self.mox.ReplayAll() self.mox.ReplayAll()

View File

@ -622,7 +622,16 @@ class GuestBlockTestCase(test.NoDBTestCase):
'vda', "foo", "top", 0, 'vda', "foo", "top", 0,
flags=fakelibvirt.VIR_DOMAIN_BLOCK_COMMIT_RELATIVE) flags=fakelibvirt.VIR_DOMAIN_BLOCK_COMMIT_RELATIVE)
def test_wait_for_job(self): def test_wait_for_job_cur_end_zeros(self):
self.domain.blockJobInfo.return_value = {
"type": 4,
"bandwidth": 18,
"cur": 0,
"end": 0}
in_progress = self.gblock.wait_for_job()
self.assertTrue(in_progress)
def test_wait_for_job_current_lower_than_end(self):
self.domain.blockJobInfo.return_value = { self.domain.blockJobInfo.return_value = {
"type": 4, "type": 4,
"bandwidth": 18, "bandwidth": 18,
@ -631,6 +640,7 @@ class GuestBlockTestCase(test.NoDBTestCase):
in_progress = self.gblock.wait_for_job() in_progress = self.gblock.wait_for_job()
self.assertTrue(in_progress) self.assertTrue(in_progress)
def test_wait_for_job_finished(self):
self.domain.blockJobInfo.return_value = { self.domain.blockJobInfo.return_value = {
"type": 4, "type": 4,
"bandwidth": 18, "bandwidth": 18,
@ -639,6 +649,7 @@ class GuestBlockTestCase(test.NoDBTestCase):
in_progress = self.gblock.wait_for_job() in_progress = self.gblock.wait_for_job()
self.assertFalse(in_progress) self.assertFalse(in_progress)
def test_wait_for_job_clean(self):
self.domain.blockJobInfo.return_value = {"type": 0} self.domain.blockJobInfo.return_value = {"type": 0}
in_progress = self.gblock.wait_for_job(wait_for_job_clean=True) in_progress = self.gblock.wait_for_job(wait_for_job_clean=True)
self.assertFalse(in_progress) self.assertFalse(in_progress)

View File

@ -695,11 +695,13 @@ class BlockDevice(object):
Libvirt may return either cur==end or an empty dict when Libvirt may return either cur==end or an empty dict when
the job is complete, depending on whether the job has been the job is complete, depending on whether the job has been
cleaned up by libvirt yet, or not. cleaned up by libvirt yet, or not.
It can also return end=0 if qemu has not yet started the block
operation.
:param abort_on_error: Whether to stop process and raise NovaException :param abort_on_error: Whether to stop process and raise NovaException
on error (default: False) on error (default: False)
:param wait_for_job_clean: Whether to force wait to ensure job is :param wait_for_job_clean: Whether to force wait to ensure job is
finished (see bug: LP#1119173) finished (see bug: RH Bugzilla#1119173)
:returns: True if still in progress :returns: True if still in progress
False if completed False if completed
@ -714,7 +716,10 @@ class BlockDevice(object):
if wait_for_job_clean: if wait_for_job_clean:
job_ended = status.job == 0 job_ended = status.job == 0
else: else:
job_ended = status.cur == status.end # NOTE(slaweq): because of bug in libvirt, which is described in
# http://www.redhat.com/archives/libvir-list/2016-September/msg00017.html
# if status.end == 0 job is not started yet so it is not finished
job_ended = status.end != 0 and status.cur == status.end
return not job_ended return not job_ended