Quiesce boot from volume instances during live snapshot

With this patch, nova will automatically quiesce active volume boot
instances on creating volume snapshots by image-create command.
After volume snapshots are completed, the instance is unquiesced.

if the boot volume has 'os_require_quiesce=yes' image metadata,
the snapshotting is aborted when quiescing fails. Otherwise,
quiescing is skipped and the snapshotting is continued.

To utilize this feature in KVM instances, qemu-guest-agent must be
installed in the guest and the image metadata must have
'hw_qemu_guest_agent=yes' property.

Change-Id: Ic43dfa441fad8fefaa41b1db43ad19b15dc483c3
Implements: blueprint quiesced-image-snapshots-with-qemu-guest-agent
This commit is contained in:
Tomoki Sekiyama 2014-12-02 16:52:43 -05:00
parent 6ae54e73f0
commit d04b8a3a21
9 changed files with 174 additions and 7 deletions

View File

@ -2222,6 +2222,21 @@ class API(base.Base):
properties['root_device_name'] = instance['root_device_name']
properties.update(extra_properties or {})
quiesced = False
if instance['vm_state'] == vm_states.ACTIVE:
try:
self.compute_rpcapi.quiesce_instance(context, instance)
quiesced = True
except (exception.InstanceQuiesceNotSupported,
exception.NovaException, NotImplementedError) as err:
if strutils.bool_from_string(properties.get(
'os_require_quiesce')):
raise
else:
LOG.info(_LI('Skipping quiescing instance: '
'%(reason)s.'), {'reason': err},
context=context, instance=instance)
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance['uuid'])
@ -2247,6 +2262,9 @@ class API(base.Base):
mapping.append(mapping_dict)
if quiesced:
self.compute_rpcapi.unquiesce_instance(context, instance, mapping)
# NOTE (ndipanov): Remove swap/ephemerals from mappings as they will be
# in the block_device_mapping for the new image.
image_mappings = properties.get('mappings')

View File

@ -77,6 +77,7 @@ from nova.network.security_group import openstack_driver
from nova import objects
from nova.objects import base as obj_base
from nova.openstack.common import log as logging
from nova.openstack.common import loopingcall
from nova.openstack.common import periodic_task
from nova import paths
from nova import rpc
@ -591,7 +592,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
target = messaging.Target(version='3.38')
target = messaging.Target(version='3.39')
# How long to wait in seconds before re-issuing a shutdown
# signal to a instance during power off. The overall
@ -6327,3 +6328,51 @@ class ComputeManager(manager.Manager):
instance.cleaned = True
with utils.temporary_mutation(context, read_deleted='yes'):
instance.save()
@messaging.expected_exceptions(exception.InstanceQuiesceNotSupported,
exception.NovaException,
NotImplementedError)
@wrap_exception()
def quiesce_instance(self, context, instance):
"""Quiesce an instance on this host."""
context = context.elevated()
image_ref = instance.image_ref
image_meta = compute_utils.get_image_metadata(
context, self.image_api, image_ref, instance)
self.driver.quiesce(context, instance, image_meta)
def _wait_for_snapshots_completion(self, context, mapping):
for mapping_dict in mapping:
if mapping_dict.get('source_type') == 'snapshot':
def _wait_snapshot():
snapshot = self.volume_api.get_snapshot(
context, mapping_dict['snapshot_id'])
if snapshot.get('status') != 'creating':
raise loopingcall.LoopingCallDone()
timer = loopingcall.FixedIntervalLoopingCall(_wait_snapshot)
timer.start(interval=0.5).wait()
@messaging.expected_exceptions(exception.InstanceQuiesceNotSupported,
exception.NovaException,
NotImplementedError)
@wrap_exception()
def unquiesce_instance(self, context, instance, mapping=None):
"""Unquiesce an instance on this host.
If snapshots' image mapping is provided, it waits until snapshots are
completed before unqueiscing.
"""
context = context.elevated()
if mapping:
try:
self._wait_for_snapshots_completion(context, mapping)
except Exception as error:
LOG.exception(_LE("Exception while waiting completion of "
"volume snapshots: %s"),
error, instance=instance)
image_ref = instance.image_ref
image_meta = compute_utils.get_image_metadata(
context, self.image_api, image_ref, instance)
self.driver.unquiesce(context, instance, image_meta)

View File

@ -282,6 +282,7 @@ class ComputeAPI(object):
* 3.37 - Add clean_shutdown to stop, resize, rescue, shelve, and
shelve_offload
* 3.38 - Add clean_shutdown to prep_resize
* 3.39 - Add quiesce_instance and unquiesce_instance methods
'''
VERSION_ALIASES = {
@ -977,6 +978,19 @@ class ComputeAPI(object):
block_device_mapping=block_device_mapping, node=node,
limits=limits)
def quiesce_instance(self, ctxt, instance):
version = '3.39'
cctxt = self.client.prepare(server=_compute_host(None, instance),
version=version)
return cctxt.call(ctxt, 'quiesce_instance', instance=instance)
def unquiesce_instance(self, ctxt, instance, mapping=None):
version = '3.39'
cctxt = self.client.prepare(server=_compute_host(None, instance),
version=version)
cctxt.cast(ctxt, 'unquiesce_instance', instance=instance,
mapping=mapping)
class SecurityGroupAPI(object):
'''Client side of the security group rpc API.

View File

@ -212,8 +212,8 @@ def _get_unused_letter(used_letters):
def get_image_metadata(context, image_api, image_id_or_uri, instance):
image_system_meta = {}
# In case of boot from volume, image_id_or_uri may be None
if image_id_or_uri is not None:
# In case of boot from volume, image_id_or_uri may be None or ''
if image_id_or_uri is not None and image_id_or_uri != '':
# If the base image is still available, get its metadata
try:
image = image_api.get(context, image_id_or_uri)

View File

@ -972,6 +972,13 @@ class ServerActionsControllerTestV21(test.TestCase):
root_device_name='/dev/vda')
self.stubs.Set(db, 'instance_get_by_uuid', instance)
self.mox.StubOutWithMock(self.controller.compute_api.compute_rpcapi,
'quiesce_instance')
self.controller.compute_api.compute_rpcapi.quiesce_instance(
mox.IgnoreArg(), mox.IgnoreArg()).AndRaise(
exception.InstanceQuiesceNotSupported(instance_id='fake',
reason='test'))
volume = dict(id=_fake_id('a'),
size=1,
host='fake',
@ -1051,6 +1058,13 @@ class ServerActionsControllerTestV21(test.TestCase):
root_device_name='/dev/vda')
self.stubs.Set(db, 'instance_get_by_uuid', instance)
self.mox.StubOutWithMock(self.controller.compute_api.compute_rpcapi,
'quiesce_instance')
self.controller.compute_api.compute_rpcapi.quiesce_instance(
mox.IgnoreArg(), mox.IgnoreArg()).AndRaise(
exception.InstanceQuiesceNotSupported(instance_id='fake',
reason='test'))
fake_metadata = {'test_key1': 'test_value1',
'test_key2': 'test_value2'}
volume = dict(id=_fake_id('a'),

View File

@ -7193,6 +7193,24 @@ class ComputeTestCase(BaseTestCase):
self.assertEqual('ide', vol_bdm.disk_bus)
self.assertEqual('disk', vol_bdm.device_type)
@mock.patch.object(cinder.API, 'get_snapshot')
def test_quiesce(self, mock_snapshot_get):
# ensure instance can be quiesced and unquiesced
instance = self._create_fake_instance_obj()
mapping = [{'source_type': 'snapshot', 'snapshot_id': 'fake-id1'},
{'source_type': 'snapshot', 'snapshot_id': 'fake-id2'}]
# unquiesce should wait until volume snapshots are completed
mock_snapshot_get.side_effect = [{'status': 'creating'},
{'status': 'available'}] * 2
self.compute.run_instance(self.context, instance, {}, {}, [], None,
None, True, None, False)
self.compute.quiesce_instance(self.context, instance)
self.compute.unquiesce_instance(self.context, instance, mapping)
self.compute.terminate_instance(self.context, instance, [], [])
mock_snapshot_get.assert_any_call(mock.ANY, 'fake-id1')
mock_snapshot_get.assert_any_call(mock.ANY, 'fake-id2')
self.assertEqual(4, mock_snapshot_get.call_count)
class ComputeAPITestCase(BaseTestCase):
def setUp(self):

View File

@ -1981,8 +1981,9 @@ class _ComputeAPIUnitTestMixIn(object):
self._test_snapshot_and_backup(is_snapshot=False,
with_base_ref=True)
def test_snapshot_volume_backed(self):
params = dict(locked=True)
def _test_snapshot_volume_backed(self, quiesce_required, quiesce_fails,
vm_state=vm_states.ACTIVE):
params = dict(locked=True, vm_state=vm_state)
instance = self._create_instance_obj(params=params)
instance['root_device_name'] = 'vda'
@ -2004,6 +2005,13 @@ class _ComputeAPIUnitTestMixIn(object):
'is_public': False
}
quiesced = [False, False]
quiesce_expected = not quiesce_fails and vm_state == vm_states.ACTIVE
if quiesce_required:
image_meta['properties']['os_require_quiesce'] = 'yes'
expect_meta['properties']['os_require_quiesce'] = 'yes'
def fake_get_all_by_instance(context, instance, use_slave=False):
return copy.deepcopy(instance_bdms)
@ -2016,6 +2024,15 @@ class _ComputeAPIUnitTestMixIn(object):
def fake_volume_create_snapshot(context, volume_id, name, description):
return {'id': '%s-snapshot' % volume_id}
def fake_quiesce_instance(context, instance):
if quiesce_fails:
raise exception.InstanceQuiesceNotSupported(
instance_id=instance['uuid'], reason='test')
quiesced[0] = True
def fake_unquiesce_instance(context, instance, mapping=None):
quiesced[1] = True
self.stubs.Set(db, 'block_device_mapping_get_all_by_instance',
fake_get_all_by_instance)
self.stubs.Set(self.compute_api.image_api, 'create',
@ -2024,6 +2041,10 @@ class _ComputeAPIUnitTestMixIn(object):
fake_volume_get)
self.stubs.Set(self.compute_api.volume_api, 'create_snapshot_force',
fake_volume_create_snapshot)
self.stubs.Set(self.compute_api.compute_rpcapi, 'quiesce_instance',
fake_quiesce_instance)
self.stubs.Set(self.compute_api.compute_rpcapi, 'unquiesce_instance',
fake_unquiesce_instance)
# No block devices defined
self.compute_api.snapshot_volume_backed(
@ -2048,6 +2069,9 @@ class _ComputeAPIUnitTestMixIn(object):
self.compute_api.snapshot_volume_backed(
self.context, instance, copy.deepcopy(image_meta), 'test-snapshot')
self.assertEqual(quiesce_expected, quiesced[0])
self.assertEqual(quiesce_expected, quiesced[1])
image_mappings = [{'virtual': 'ami', 'device': 'vda'},
{'device': 'vda', 'virtual': 'ephemeral0'},
{'device': 'vdb', 'virtual': 'swap'},
@ -2058,10 +2082,32 @@ class _ComputeAPIUnitTestMixIn(object):
expect_meta['properties']['mappings'] = [
{'virtual': 'ami', 'device': 'vda'}]
quiesced = [False, False]
# Check that the mappgins from the image properties are included
self.compute_api.snapshot_volume_backed(
self.context, instance, copy.deepcopy(image_meta), 'test-snapshot')
self.assertEqual(quiesce_expected, quiesced[0])
self.assertEqual(quiesce_expected, quiesced[1])
def test_snapshot_volume_backed(self):
self._test_snapshot_volume_backed(False, False)
def test_snapshot_volume_backed_with_quiesce(self):
self._test_snapshot_volume_backed(True, False)
def test_snapshot_volume_backed_with_quiesce_skipped(self):
self._test_snapshot_volume_backed(False, True)
def test_snapshot_volume_backed_with_quiesce_exception(self):
self.assertRaises(exception.NovaException,
self._test_snapshot_volume_backed, True, True)
def test_snapshot_volume_backed_with_quiesce_stopped(self):
self._test_snapshot_volume_backed(True, True,
vm_state=vm_states.STOPPED)
def test_volume_snapshot_create(self):
volume_id = '1'
create_info = {'id': 'eyedee'}

View File

@ -558,3 +558,11 @@ class ComputeRpcAPITestCase(test.TestCase):
security_groups=None,
block_device_mapping=None, node='node', limits=[],
version='3.23')
def test_quiesce_instance(self):
self._test_compute_api('quiesce_instance', 'call',
instance=self.fake_instance_obj, version='3.39')
def test_unquiesce_instance(self):
self._test_compute_api('unquiesce_instance', 'cast',
instance=self.fake_instance_obj, mapping=None, version='3.39')

View File

@ -94,8 +94,8 @@ class fake_snapshot(object):
'deleted': False,
'id': str(id),
'volume_id': volume_id,
'status': 'creating',
'progress': '0%',
'status': 'available',
'progress': '100%',
'volume_size': 1,
'display_name': name,
'display_description': desc,