From d04b8a3a21de20682a6d2bff5879efde5e2bbc0f Mon Sep 17 00:00:00 2001 From: Tomoki Sekiyama Date: Tue, 2 Dec 2014 16:52:43 -0500 Subject: [PATCH] Quiesce boot from volume instances during live snapshot With this patch, nova will automatically quiesce active volume boot instances on creating volume snapshots by image-create command. After volume snapshots are completed, the instance is unquiesced. if the boot volume has 'os_require_quiesce=yes' image metadata, the snapshotting is aborted when quiescing fails. Otherwise, quiescing is skipped and the snapshotting is continued. To utilize this feature in KVM instances, qemu-guest-agent must be installed in the guest and the image metadata must have 'hw_qemu_guest_agent=yes' property. Change-Id: Ic43dfa441fad8fefaa41b1db43ad19b15dc483c3 Implements: blueprint quiesced-image-snapshots-with-qemu-guest-agent --- nova/compute/api.py | 18 +++++++ nova/compute/manager.py | 51 ++++++++++++++++++- nova/compute/rpcapi.py | 14 +++++ nova/compute/utils.py | 4 +- .../openstack/compute/test_server_actions.py | 14 +++++ nova/tests/unit/compute/test_compute.py | 18 +++++++ nova/tests/unit/compute/test_compute_api.py | 50 +++++++++++++++++- nova/tests/unit/compute/test_rpcapi.py | 8 +++ nova/tests/unit/fake_volume.py | 4 +- 9 files changed, 174 insertions(+), 7 deletions(-) diff --git a/nova/compute/api.py b/nova/compute/api.py index fcef46d9a4d4..c6c70f70db55 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -2222,6 +2222,21 @@ class API(base.Base): properties['root_device_name'] = instance['root_device_name'] properties.update(extra_properties or {}) + quiesced = False + if instance['vm_state'] == vm_states.ACTIVE: + try: + self.compute_rpcapi.quiesce_instance(context, instance) + quiesced = True + except (exception.InstanceQuiesceNotSupported, + exception.NovaException, NotImplementedError) as err: + if strutils.bool_from_string(properties.get( + 'os_require_quiesce')): + raise + else: + LOG.info(_LI('Skipping quiescing instance: ' + '%(reason)s.'), {'reason': err}, + context=context, instance=instance) + bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance['uuid']) @@ -2247,6 +2262,9 @@ class API(base.Base): mapping.append(mapping_dict) + if quiesced: + self.compute_rpcapi.unquiesce_instance(context, instance, mapping) + # NOTE (ndipanov): Remove swap/ephemerals from mappings as they will be # in the block_device_mapping for the new image. image_mappings = properties.get('mappings') diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 6f8ddeefe104..73cd5738c69e 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -77,6 +77,7 @@ from nova.network.security_group import openstack_driver from nova import objects from nova.objects import base as obj_base from nova.openstack.common import log as logging +from nova.openstack.common import loopingcall from nova.openstack.common import periodic_task from nova import paths from nova import rpc @@ -591,7 +592,7 @@ class ComputeVirtAPI(virtapi.VirtAPI): class ComputeManager(manager.Manager): """Manages the running instances from creation to destruction.""" - target = messaging.Target(version='3.38') + target = messaging.Target(version='3.39') # How long to wait in seconds before re-issuing a shutdown # signal to a instance during power off. The overall @@ -6327,3 +6328,51 @@ class ComputeManager(manager.Manager): instance.cleaned = True with utils.temporary_mutation(context, read_deleted='yes'): instance.save() + + @messaging.expected_exceptions(exception.InstanceQuiesceNotSupported, + exception.NovaException, + NotImplementedError) + @wrap_exception() + def quiesce_instance(self, context, instance): + """Quiesce an instance on this host.""" + context = context.elevated() + image_ref = instance.image_ref + image_meta = compute_utils.get_image_metadata( + context, self.image_api, image_ref, instance) + self.driver.quiesce(context, instance, image_meta) + + def _wait_for_snapshots_completion(self, context, mapping): + for mapping_dict in mapping: + if mapping_dict.get('source_type') == 'snapshot': + + def _wait_snapshot(): + snapshot = self.volume_api.get_snapshot( + context, mapping_dict['snapshot_id']) + if snapshot.get('status') != 'creating': + raise loopingcall.LoopingCallDone() + + timer = loopingcall.FixedIntervalLoopingCall(_wait_snapshot) + timer.start(interval=0.5).wait() + + @messaging.expected_exceptions(exception.InstanceQuiesceNotSupported, + exception.NovaException, + NotImplementedError) + @wrap_exception() + def unquiesce_instance(self, context, instance, mapping=None): + """Unquiesce an instance on this host. + + If snapshots' image mapping is provided, it waits until snapshots are + completed before unqueiscing. + """ + context = context.elevated() + if mapping: + try: + self._wait_for_snapshots_completion(context, mapping) + except Exception as error: + LOG.exception(_LE("Exception while waiting completion of " + "volume snapshots: %s"), + error, instance=instance) + image_ref = instance.image_ref + image_meta = compute_utils.get_image_metadata( + context, self.image_api, image_ref, instance) + self.driver.unquiesce(context, instance, image_meta) diff --git a/nova/compute/rpcapi.py b/nova/compute/rpcapi.py index 6fa4a1ddec01..b6b56b77f0d8 100644 --- a/nova/compute/rpcapi.py +++ b/nova/compute/rpcapi.py @@ -282,6 +282,7 @@ class ComputeAPI(object): * 3.37 - Add clean_shutdown to stop, resize, rescue, shelve, and shelve_offload * 3.38 - Add clean_shutdown to prep_resize + * 3.39 - Add quiesce_instance and unquiesce_instance methods ''' VERSION_ALIASES = { @@ -977,6 +978,19 @@ class ComputeAPI(object): block_device_mapping=block_device_mapping, node=node, limits=limits) + def quiesce_instance(self, ctxt, instance): + version = '3.39' + cctxt = self.client.prepare(server=_compute_host(None, instance), + version=version) + return cctxt.call(ctxt, 'quiesce_instance', instance=instance) + + def unquiesce_instance(self, ctxt, instance, mapping=None): + version = '3.39' + cctxt = self.client.prepare(server=_compute_host(None, instance), + version=version) + cctxt.cast(ctxt, 'unquiesce_instance', instance=instance, + mapping=mapping) + class SecurityGroupAPI(object): '''Client side of the security group rpc API. diff --git a/nova/compute/utils.py b/nova/compute/utils.py index 212c083ce90c..f655876ba858 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -212,8 +212,8 @@ def _get_unused_letter(used_letters): def get_image_metadata(context, image_api, image_id_or_uri, instance): image_system_meta = {} - # In case of boot from volume, image_id_or_uri may be None - if image_id_or_uri is not None: + # In case of boot from volume, image_id_or_uri may be None or '' + if image_id_or_uri is not None and image_id_or_uri != '': # If the base image is still available, get its metadata try: image = image_api.get(context, image_id_or_uri) diff --git a/nova/tests/unit/api/openstack/compute/test_server_actions.py b/nova/tests/unit/api/openstack/compute/test_server_actions.py index a07ac15a859d..87be01e5146b 100644 --- a/nova/tests/unit/api/openstack/compute/test_server_actions.py +++ b/nova/tests/unit/api/openstack/compute/test_server_actions.py @@ -972,6 +972,13 @@ class ServerActionsControllerTestV21(test.TestCase): root_device_name='/dev/vda') self.stubs.Set(db, 'instance_get_by_uuid', instance) + self.mox.StubOutWithMock(self.controller.compute_api.compute_rpcapi, + 'quiesce_instance') + self.controller.compute_api.compute_rpcapi.quiesce_instance( + mox.IgnoreArg(), mox.IgnoreArg()).AndRaise( + exception.InstanceQuiesceNotSupported(instance_id='fake', + reason='test')) + volume = dict(id=_fake_id('a'), size=1, host='fake', @@ -1051,6 +1058,13 @@ class ServerActionsControllerTestV21(test.TestCase): root_device_name='/dev/vda') self.stubs.Set(db, 'instance_get_by_uuid', instance) + self.mox.StubOutWithMock(self.controller.compute_api.compute_rpcapi, + 'quiesce_instance') + self.controller.compute_api.compute_rpcapi.quiesce_instance( + mox.IgnoreArg(), mox.IgnoreArg()).AndRaise( + exception.InstanceQuiesceNotSupported(instance_id='fake', + reason='test')) + fake_metadata = {'test_key1': 'test_value1', 'test_key2': 'test_value2'} volume = dict(id=_fake_id('a'), diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 642f4912200f..bc732d0f3bfd 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -7193,6 +7193,24 @@ class ComputeTestCase(BaseTestCase): self.assertEqual('ide', vol_bdm.disk_bus) self.assertEqual('disk', vol_bdm.device_type) + @mock.patch.object(cinder.API, 'get_snapshot') + def test_quiesce(self, mock_snapshot_get): + # ensure instance can be quiesced and unquiesced + instance = self._create_fake_instance_obj() + mapping = [{'source_type': 'snapshot', 'snapshot_id': 'fake-id1'}, + {'source_type': 'snapshot', 'snapshot_id': 'fake-id2'}] + # unquiesce should wait until volume snapshots are completed + mock_snapshot_get.side_effect = [{'status': 'creating'}, + {'status': 'available'}] * 2 + self.compute.run_instance(self.context, instance, {}, {}, [], None, + None, True, None, False) + self.compute.quiesce_instance(self.context, instance) + self.compute.unquiesce_instance(self.context, instance, mapping) + self.compute.terminate_instance(self.context, instance, [], []) + mock_snapshot_get.assert_any_call(mock.ANY, 'fake-id1') + mock_snapshot_get.assert_any_call(mock.ANY, 'fake-id2') + self.assertEqual(4, mock_snapshot_get.call_count) + class ComputeAPITestCase(BaseTestCase): def setUp(self): diff --git a/nova/tests/unit/compute/test_compute_api.py b/nova/tests/unit/compute/test_compute_api.py index 27fd6b8e7a5a..e97a0b39dc41 100644 --- a/nova/tests/unit/compute/test_compute_api.py +++ b/nova/tests/unit/compute/test_compute_api.py @@ -1981,8 +1981,9 @@ class _ComputeAPIUnitTestMixIn(object): self._test_snapshot_and_backup(is_snapshot=False, with_base_ref=True) - def test_snapshot_volume_backed(self): - params = dict(locked=True) + def _test_snapshot_volume_backed(self, quiesce_required, quiesce_fails, + vm_state=vm_states.ACTIVE): + params = dict(locked=True, vm_state=vm_state) instance = self._create_instance_obj(params=params) instance['root_device_name'] = 'vda' @@ -2004,6 +2005,13 @@ class _ComputeAPIUnitTestMixIn(object): 'is_public': False } + quiesced = [False, False] + quiesce_expected = not quiesce_fails and vm_state == vm_states.ACTIVE + + if quiesce_required: + image_meta['properties']['os_require_quiesce'] = 'yes' + expect_meta['properties']['os_require_quiesce'] = 'yes' + def fake_get_all_by_instance(context, instance, use_slave=False): return copy.deepcopy(instance_bdms) @@ -2016,6 +2024,15 @@ class _ComputeAPIUnitTestMixIn(object): def fake_volume_create_snapshot(context, volume_id, name, description): return {'id': '%s-snapshot' % volume_id} + def fake_quiesce_instance(context, instance): + if quiesce_fails: + raise exception.InstanceQuiesceNotSupported( + instance_id=instance['uuid'], reason='test') + quiesced[0] = True + + def fake_unquiesce_instance(context, instance, mapping=None): + quiesced[1] = True + self.stubs.Set(db, 'block_device_mapping_get_all_by_instance', fake_get_all_by_instance) self.stubs.Set(self.compute_api.image_api, 'create', @@ -2024,6 +2041,10 @@ class _ComputeAPIUnitTestMixIn(object): fake_volume_get) self.stubs.Set(self.compute_api.volume_api, 'create_snapshot_force', fake_volume_create_snapshot) + self.stubs.Set(self.compute_api.compute_rpcapi, 'quiesce_instance', + fake_quiesce_instance) + self.stubs.Set(self.compute_api.compute_rpcapi, 'unquiesce_instance', + fake_unquiesce_instance) # No block devices defined self.compute_api.snapshot_volume_backed( @@ -2048,6 +2069,9 @@ class _ComputeAPIUnitTestMixIn(object): self.compute_api.snapshot_volume_backed( self.context, instance, copy.deepcopy(image_meta), 'test-snapshot') + self.assertEqual(quiesce_expected, quiesced[0]) + self.assertEqual(quiesce_expected, quiesced[1]) + image_mappings = [{'virtual': 'ami', 'device': 'vda'}, {'device': 'vda', 'virtual': 'ephemeral0'}, {'device': 'vdb', 'virtual': 'swap'}, @@ -2058,10 +2082,32 @@ class _ComputeAPIUnitTestMixIn(object): expect_meta['properties']['mappings'] = [ {'virtual': 'ami', 'device': 'vda'}] + quiesced = [False, False] + # Check that the mappgins from the image properties are included self.compute_api.snapshot_volume_backed( self.context, instance, copy.deepcopy(image_meta), 'test-snapshot') + self.assertEqual(quiesce_expected, quiesced[0]) + self.assertEqual(quiesce_expected, quiesced[1]) + + def test_snapshot_volume_backed(self): + self._test_snapshot_volume_backed(False, False) + + def test_snapshot_volume_backed_with_quiesce(self): + self._test_snapshot_volume_backed(True, False) + + def test_snapshot_volume_backed_with_quiesce_skipped(self): + self._test_snapshot_volume_backed(False, True) + + def test_snapshot_volume_backed_with_quiesce_exception(self): + self.assertRaises(exception.NovaException, + self._test_snapshot_volume_backed, True, True) + + def test_snapshot_volume_backed_with_quiesce_stopped(self): + self._test_snapshot_volume_backed(True, True, + vm_state=vm_states.STOPPED) + def test_volume_snapshot_create(self): volume_id = '1' create_info = {'id': 'eyedee'} diff --git a/nova/tests/unit/compute/test_rpcapi.py b/nova/tests/unit/compute/test_rpcapi.py index 4b03f1505b7c..8c135711dc47 100644 --- a/nova/tests/unit/compute/test_rpcapi.py +++ b/nova/tests/unit/compute/test_rpcapi.py @@ -558,3 +558,11 @@ class ComputeRpcAPITestCase(test.TestCase): security_groups=None, block_device_mapping=None, node='node', limits=[], version='3.23') + + def test_quiesce_instance(self): + self._test_compute_api('quiesce_instance', 'call', + instance=self.fake_instance_obj, version='3.39') + + def test_unquiesce_instance(self): + self._test_compute_api('unquiesce_instance', 'cast', + instance=self.fake_instance_obj, mapping=None, version='3.39') diff --git a/nova/tests/unit/fake_volume.py b/nova/tests/unit/fake_volume.py index 13571016ac5d..df3441731796 100644 --- a/nova/tests/unit/fake_volume.py +++ b/nova/tests/unit/fake_volume.py @@ -94,8 +94,8 @@ class fake_snapshot(object): 'deleted': False, 'id': str(id), 'volume_id': volume_id, - 'status': 'creating', - 'progress': '0%', + 'status': 'available', + 'progress': '100%', 'volume_size': 1, 'display_name': name, 'display_description': desc,