Fix race condition bug during live_snapshot
During live_snapshot creation, when nova starts block_rebase operation in libvirt there is possibility that block_job is not yet started and libvirt blockJobInfo method will return status.end = 0 and status.cur = 0. In such case libvirt driver does not wait to finish block rebase operation and that causes a problem because created snapshot is corrupted. This patch adds check if status.end != 0 to return information that job is already finished. Change-Id: I45ac06eae0b1949f746dae305469718649bfcf23 Closes-Bug: #1530275
This commit is contained in:
parent
dd44096a04
commit
6b20239a5d
@ -14284,7 +14284,7 @@ class LibvirtConnTestCase(test.NoDBTestCase):
|
|||||||
|
|
||||||
mock_dom.XMLDesc.return_value = xmldoc
|
mock_dom.XMLDesc.return_value = xmldoc
|
||||||
mock_dom.isPersistent.return_value = True
|
mock_dom.isPersistent.return_value = True
|
||||||
mock_dom.blockJobInfo.return_value = {}
|
mock_dom.blockJobInfo.return_value = {'cur': 100, 'end': 100}
|
||||||
|
|
||||||
drvr._swap_volume(guest, srcfile, dstfile, 1)
|
drvr._swap_volume(guest, srcfile, dstfile, 1)
|
||||||
|
|
||||||
@ -17557,7 +17557,8 @@ class LibvirtVolumeSnapshotTestCase(test.NoDBTestCase):
|
|||||||
flags=fakelibvirt.VIR_DOMAIN_BLOCK_COMMIT_RELATIVE)
|
flags=fakelibvirt.VIR_DOMAIN_BLOCK_COMMIT_RELATIVE)
|
||||||
|
|
||||||
domain.blockJobInfo('vda', flags=0).AndReturn({'cur': 1, 'end': 1000})
|
domain.blockJobInfo('vda', flags=0).AndReturn({'cur': 1, 'end': 1000})
|
||||||
domain.blockJobInfo('vda', flags=0).AndReturn({})
|
domain.blockJobInfo('vda', flags=0).AndReturn({'cur': 1000,
|
||||||
|
'end': 1000})
|
||||||
|
|
||||||
self.mox.ReplayAll()
|
self.mox.ReplayAll()
|
||||||
|
|
||||||
|
@ -622,7 +622,16 @@ class GuestBlockTestCase(test.NoDBTestCase):
|
|||||||
'vda', "foo", "top", 0,
|
'vda', "foo", "top", 0,
|
||||||
flags=fakelibvirt.VIR_DOMAIN_BLOCK_COMMIT_RELATIVE)
|
flags=fakelibvirt.VIR_DOMAIN_BLOCK_COMMIT_RELATIVE)
|
||||||
|
|
||||||
def test_wait_for_job(self):
|
def test_wait_for_job_cur_end_zeros(self):
|
||||||
|
self.domain.blockJobInfo.return_value = {
|
||||||
|
"type": 4,
|
||||||
|
"bandwidth": 18,
|
||||||
|
"cur": 0,
|
||||||
|
"end": 0}
|
||||||
|
in_progress = self.gblock.wait_for_job()
|
||||||
|
self.assertTrue(in_progress)
|
||||||
|
|
||||||
|
def test_wait_for_job_current_lower_than_end(self):
|
||||||
self.domain.blockJobInfo.return_value = {
|
self.domain.blockJobInfo.return_value = {
|
||||||
"type": 4,
|
"type": 4,
|
||||||
"bandwidth": 18,
|
"bandwidth": 18,
|
||||||
@ -631,6 +640,7 @@ class GuestBlockTestCase(test.NoDBTestCase):
|
|||||||
in_progress = self.gblock.wait_for_job()
|
in_progress = self.gblock.wait_for_job()
|
||||||
self.assertTrue(in_progress)
|
self.assertTrue(in_progress)
|
||||||
|
|
||||||
|
def test_wait_for_job_finished(self):
|
||||||
self.domain.blockJobInfo.return_value = {
|
self.domain.blockJobInfo.return_value = {
|
||||||
"type": 4,
|
"type": 4,
|
||||||
"bandwidth": 18,
|
"bandwidth": 18,
|
||||||
@ -639,6 +649,7 @@ class GuestBlockTestCase(test.NoDBTestCase):
|
|||||||
in_progress = self.gblock.wait_for_job()
|
in_progress = self.gblock.wait_for_job()
|
||||||
self.assertFalse(in_progress)
|
self.assertFalse(in_progress)
|
||||||
|
|
||||||
|
def test_wait_for_job_clean(self):
|
||||||
self.domain.blockJobInfo.return_value = {"type": 0}
|
self.domain.blockJobInfo.return_value = {"type": 0}
|
||||||
in_progress = self.gblock.wait_for_job(wait_for_job_clean=True)
|
in_progress = self.gblock.wait_for_job(wait_for_job_clean=True)
|
||||||
self.assertFalse(in_progress)
|
self.assertFalse(in_progress)
|
||||||
|
@ -695,11 +695,13 @@ class BlockDevice(object):
|
|||||||
Libvirt may return either cur==end or an empty dict when
|
Libvirt may return either cur==end or an empty dict when
|
||||||
the job is complete, depending on whether the job has been
|
the job is complete, depending on whether the job has been
|
||||||
cleaned up by libvirt yet, or not.
|
cleaned up by libvirt yet, or not.
|
||||||
|
It can also return end=0 if qemu has not yet started the block
|
||||||
|
operation.
|
||||||
|
|
||||||
:param abort_on_error: Whether to stop process and raise NovaException
|
:param abort_on_error: Whether to stop process and raise NovaException
|
||||||
on error (default: False)
|
on error (default: False)
|
||||||
:param wait_for_job_clean: Whether to force wait to ensure job is
|
:param wait_for_job_clean: Whether to force wait to ensure job is
|
||||||
finished (see bug: LP#1119173)
|
finished (see bug: RH Bugzilla#1119173)
|
||||||
|
|
||||||
:returns: True if still in progress
|
:returns: True if still in progress
|
||||||
False if completed
|
False if completed
|
||||||
@ -714,7 +716,10 @@ class BlockDevice(object):
|
|||||||
if wait_for_job_clean:
|
if wait_for_job_clean:
|
||||||
job_ended = status.job == 0
|
job_ended = status.job == 0
|
||||||
else:
|
else:
|
||||||
job_ended = status.cur == status.end
|
# NOTE(slaweq): because of bug in libvirt, which is described in
|
||||||
|
# http://www.redhat.com/archives/libvir-list/2016-September/msg00017.html
|
||||||
|
# if status.end == 0 job is not started yet so it is not finished
|
||||||
|
job_ended = status.end != 0 and status.cur == status.end
|
||||||
|
|
||||||
return not job_ended
|
return not job_ended
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user