From fb94612294abc0fcf16a01fe1fb8e70260165ed2 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Thu, 12 Jul 2018 19:13:13 -0400 Subject: [PATCH] Pass request_spec from compute to cell conductor on reschedule During a resize, we were still passing a legacy dict-ified version of a RequestSpec to compute even though we don't need to anymore (that's removed here). And we weren't passing the RequestSpec back from compute to the cell conductor during a reschedule of a resize, which makes conductor have to re-create a stub RequestSpec - which has caused problems in the past (see bug 1774205). This change passes the RequestSpec to the cell conductor on a reschedule of a resize so that conductor can use it and lets us start the timer on removing the compatibility code in conductor (marked with a TODO here). While in here, some really old and non-sensical stuff in compute is modernized and tests are updated as a result. Related to blueprint request-spec-use-by-compute Change-Id: I4244f7dd8fe74565180f73684678027067b4506e --- nova/compute/manager.py | 18 ++++----- nova/compute/rpcapi.py | 20 ++++++++-- nova/conductor/manager.py | 6 ++- nova/conductor/tasks/migrate.py | 20 +++++----- nova/objects/service.py | 4 +- nova/tests/unit/compute/test_compute.py | 38 ++++++++++--------- nova/tests/unit/compute/test_rpcapi.py | 7 +--- .../unit/conductor/tasks/test_migrate.py | 5 +-- nova/tests/unit/conductor/test_conductor.py | 5 +-- 9 files changed, 67 insertions(+), 56 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 8e76700416ff..515927607e4a 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -487,7 +487,7 @@ class ComputeVirtAPI(virtapi.VirtAPI): class ComputeManager(manager.Manager): """Manages the running instances from creation to destruction.""" - target = messaging.Target(version='5.0') + target = messaging.Target(version='5.1') def __init__(self, compute_driver=None, *args, **kwargs): """Load configuration options and connect to the hypervisor.""" @@ -1416,6 +1416,8 @@ class ComputeManager(manager.Manager): LOG.error('Error: %s', exc_info[1], instance_uuid=instance_uuid, exc_info=exc_info) + # TODO(mriedem): This method is confusing and only ever used for resize + # reschedules; remove it and merge into _reschedule_resize_or_reraise. def _reschedule(self, context, request_spec, filter_properties, instance, reschedule_method, method_args, task_state, exc_info=None, host_list=None): @@ -1429,11 +1431,6 @@ class ComputeManager(manager.Manager): instance_uuid=instance_uuid) return - if not request_spec: - LOG.debug("No request spec, will not reschedule", - instance_uuid=instance_uuid) - return - LOG.debug("Re-scheduling %(method)s: attempt %(num)d", {'method': reschedule_method.__name__, 'num': retry['num_attempts']}, instance_uuid=instance_uuid) @@ -1446,7 +1443,8 @@ class ComputeManager(manager.Manager): retry['exc'] = traceback.format_exception_only(exc_info[0], exc_info[1]) - reschedule_method(context, *method_args, host_list=host_list) + reschedule_method(context, *method_args, request_spec=request_spec, + host_list=host_list) return True @periodic_task.periodic_task @@ -4238,7 +4236,7 @@ class ComputeManager(manager.Manager): self._revert_allocation(context, instance, migration) # try to re-schedule the resize elsewhere: exc_info = sys.exc_info() - self._reschedule_resize_or_reraise(context, image, instance, + self._reschedule_resize_or_reraise(context, instance, exc_info, instance_type, request_spec, filter_properties, host_list) finally: @@ -4253,13 +4251,11 @@ class ComputeManager(manager.Manager): context, instance, self.host, fields.NotificationPhase.END, instance_type) - def _reschedule_resize_or_reraise(self, context, image, instance, exc_info, + def _reschedule_resize_or_reraise(self, context, instance, exc_info, instance_type, request_spec, filter_properties, host_list): """Try to re-schedule the resize or re-raise the original error to error out the instance. """ - if not request_spec: - request_spec = {} if not filter_properties: filter_properties = {} diff --git a/nova/compute/rpcapi.py b/nova/compute/rpcapi.py index aa65a03a2598..0c8218c321c4 100644 --- a/nova/compute/rpcapi.py +++ b/nova/compute/rpcapi.py @@ -347,7 +347,9 @@ class ComputeAPI(object): can accept 4.x calls from Pike nodes, and can be pinned to 4.x for Pike compatibility. All new changes should go against 5.x. - * 5.0 - Remove 4.x compatibility + * 5.0 - Remove 4.x compatibility + * 5.1 - Make prep_resize() take a RequestSpec object rather than a + legacy dict. ''' VERSION_ALIASES = { @@ -728,7 +730,15 @@ class ComputeAPI(object): def prep_resize(self, ctxt, instance, image, instance_type, host, migration, request_spec, filter_properties, node, clean_shutdown, host_list): - image_p = jsonutils.to_primitive(image) + # TODO(mriedem): We should pass the ImageMeta object through to the + # compute but that also requires plumbing changes through the resize + # flow for other methods like resize_instance and finish_resize. + image_p = objects_base.obj_to_primitive(image) + # FIXME(sbauza): Serialize/Unserialize the legacy dict because of + # oslo.messaging #1529084 to transform datetime values into strings. + # tl;dr: datetimes in dicts are not accepted as correct values by the + # rpc fake driver. + image_p = jsonutils.loads(jsonutils.dumps(image_p)) msg_args = {'instance': instance, 'instance_type': instance_type, 'image': image_p, @@ -739,7 +749,11 @@ class ComputeAPI(object): 'clean_shutdown': clean_shutdown, 'host_list': host_list} client = self.router.client(ctxt) - version = '5.0' + version = '5.1' + if not client.can_send_version(version): + msg_args['request_spec'] = ( + request_spec.to_legacy_request_spec_dict()) + version = '5.0' cctxt = client.prepare(server=host, version=version) cctxt.cast(ctxt, 'prep_resize', **msg_args) diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index 01256dd952f7..98f20dcf3e98 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -300,7 +300,11 @@ class ComputeTaskManager(base.Base): # NOTE(sbauza): If a reschedule occurs when prep_resize(), then # it only provides filter_properties legacy dict back to the - # conductor with no RequestSpec part of the payload. + # conductor with no RequestSpec part of the payload for =Stein computes and request_spec is passed back to + # conductor on reschedule. if not request_spec: # Make sure we hydrate a new RequestSpec object with the new flavor # and not the nested one from the instance diff --git a/nova/conductor/tasks/migrate.py b/nova/conductor/tasks/migrate.py index a26a7202cd8a..96f8be760cb3 100644 --- a/nova/conductor/tasks/migrate.py +++ b/nova/conductor/tasks/migrate.py @@ -163,9 +163,8 @@ class MigrationTask(base.TaskBase): return migration def _execute(self): - # TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec - # object in the signature and all the scheduler.utils methods too - legacy_spec = self.request_spec.to_legacy_request_spec_dict() + # TODO(sbauza): Remove once all the scheduler.utils methods accept a + # RequestSpec object in the signature. legacy_props = self.request_spec.to_legacy_filter_properties_dict() scheduler_utils.setup_instance_group(self.context, self.request_spec) # If a target host is set in a requested destination, @@ -285,20 +284,19 @@ class MigrationTask(base.TaskBase): availability_zones.get_host_availability_zone( self.context, host)) - # FIXME(sbauza): Serialize/Unserialize the legacy dict because of - # oslo.messaging #1529084 to transform datetime values into strings. - # tl;dr: datetimes in dicts are not accepted as correct values by the - # rpc fake driver. - legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec)) - LOG.debug("Calling prep_resize with selected host: %s; " "Selected node: %s; Alternates: %s", host, node, self.host_list, instance=self.instance) # RPC cast to the destination host to start the migration process. self.compute_rpcapi.prep_resize( - self.context, self.instance, legacy_spec['image'], + # NOTE(mriedem): Using request_spec.image here is potentially + # dangerous if it is not kept up to date (i.e. rebuild/unshelve); + # seems like the sane thing to do would be to pass the current + # instance.image_meta since that is what MoveClaim will use for + # any NUMA topology claims on the destination host... + self.context, self.instance, self.request_spec.image, self.flavor, host, migration, - request_spec=legacy_spec, filter_properties=legacy_props, + request_spec=self.request_spec, filter_properties=legacy_props, node=node, clean_shutdown=self.clean_shutdown, host_list=self.host_list) diff --git a/nova/objects/service.py b/nova/objects/service.py index 0493de711bd7..0bc82e7b50c9 100644 --- a/nova/objects/service.py +++ b/nova/objects/service.py @@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__) # NOTE(danms): This is the global service version counter -SERVICE_VERSION = 36 +SERVICE_VERSION = 37 # NOTE(danms): This is our SERVICE_VERSION history. The idea is that any @@ -149,6 +149,8 @@ SERVICE_VERSION_HISTORY = ( # Version 36: Indicates that nova-compute supports specifying volume # type when booting a volume-backed server. {'compute_rpc': '5.0'}, + # Version 37: prep_resize takes a RequestSpec object + {'compute_rpc': '5.1'}, ) diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 370033468d94..e4a8512c351a 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -144,8 +144,10 @@ def unify_instance(instance): class FakeComputeTaskAPI(object): - def resize_instance(self, context, instance, extra_instance_updates, - scheduler_hint, flavor, reservationsi, host_list=None): + def resize_instance(self, ctxt, instance, extra_instance_updates, + scheduler_hint, flavor, reservations=None, + clean_shutdown=True, request_spec=None, + host_list=None): pass @@ -12587,12 +12589,6 @@ class ComputeReschedulingTestCase(BaseTestCase): filter_properties = {} self.assertFalse(self._reschedule(filter_properties=filter_properties)) - def test_reschedule_no_request_spec(self): - # no request spec will also disable re-scheduling. - retry = dict(num_attempts=1) - filter_properties = dict(retry=retry) - self.assertFalse(self._reschedule(filter_properties=filter_properties)) - def test_reschedule_success(self): retry = dict(num_attempts=1) filter_properties = dict(retry=retry) @@ -12638,13 +12634,14 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase): self.compute.prep_resize(self.context, image=None, instance=inst_obj, instance_type=self.instance_type, - request_spec={}, + request_spec=mock.sentinel.reqspec, filter_properties={}, migration=mock.Mock(), node=None, clean_shutdown=True, host_list=None) - mock_res.assert_called_once_with(mock.ANY, None, inst_obj, mock.ANY, - self.instance_type, {}, {}, None) + mock_res.assert_called_once_with(mock.ANY, inst_obj, mock.ANY, + self.instance_type, + mock.sentinel.reqspec, {}, None) @mock.patch.object(compute_manager.ComputeManager, "_reschedule") @mock.patch('nova.compute.utils.notify_about_instance_action') @@ -12661,12 +12658,14 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase): raise test.TestingException("Original") except Exception: exc_info = sys.exc_info() + reqspec = objects.RequestSpec() self.assertRaises(test.TestingException, self.compute._reschedule_resize_or_reraise, self.context, - None, instance, exc_info, self.instance_type, {}, {}, None) + instance, exc_info, self.instance_type, reqspec, + {}, None) mock_res.assert_called_once_with( - self.context, {}, {}, instance, + self.context, reqspec, {}, instance, self.compute.compute_task_api.resize_instance, method_args, task_states.RESIZE_PREP, exc_info, host_list=None) mock_notify.assert_called_once_with( @@ -12687,12 +12686,14 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase): raise test.TestingException("Original") except Exception: exc_info = sys.exc_info() + reqspec = objects.RequestSpec() self.assertRaises(test.TestingException, self.compute._reschedule_resize_or_reraise, self.context, - None, instance, exc_info, self.instance_type, {}, {}, None) + instance, exc_info, self.instance_type, reqspec, + {}, None) mock_res.assert_called_once_with( - self.context, {}, {}, instance, + self.context, reqspec, {}, instance, self.compute.compute_task_api.resize_instance, method_args, task_states.RESIZE_PREP, exc_info, host_list=None) @@ -12710,11 +12711,12 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase): exc_info = sys.exc_info() mock_res.return_value = True + reqspec = objects.RequestSpec() self.compute._reschedule_resize_or_reraise( - self.context, None, instance, exc_info, - self.instance_type, {}, {}, None) + self.context, instance, exc_info, + self.instance_type, reqspec, {}, None) - mock_res.assert_called_once_with(self.context, {}, {}, + mock_res.assert_called_once_with(self.context, reqspec, {}, instance, self.compute.compute_task_api.resize_instance, method_args, task_states.RESIZE_PREP, exc_info, host_list=None) diff --git a/nova/tests/unit/compute/test_rpcapi.py b/nova/tests/unit/compute/test_rpcapi.py index 3c29080841fe..a96b9dcf4797 100644 --- a/nova/tests/unit/compute/test_rpcapi.py +++ b/nova/tests/unit/compute/test_rpcapi.py @@ -144,8 +144,6 @@ class ComputeRpcAPITestCase(test.NoDBTestCase): kwargs['cast'] = False else: kwargs['do_cast'] = False - elif method == 'prep_resize' and 'migration' not in expected_args: - del expected_kwargs['migration'] if 'host' in kwargs: host = kwargs['host'] elif 'instances' in kwargs: @@ -385,8 +383,7 @@ class ComputeRpcAPITestCase(test.NoDBTestCase): timeout=1234) def test_prep_resize(self): - expected_args = {'migration': 'migration'} - self._test_compute_api('prep_resize', 'cast', expected_args, + self._test_compute_api('prep_resize', 'cast', instance=self.fake_instance_obj, instance_type=self.fake_flavor_obj, image='fake_image', host='host', @@ -394,7 +391,7 @@ class ComputeRpcAPITestCase(test.NoDBTestCase): filter_properties={'fakeprop': 'fakeval'}, migration='migration', node='node', clean_shutdown=True, host_list=None, - version='5.0') + version='5.1') def test_reboot_instance(self): self.maxDiff = None diff --git a/nova/tests/unit/conductor/tasks/test_migrate.py b/nova/tests/unit/conductor/tasks/test_migrate.py index 6ac7b77e3c51..e756692cb5d0 100644 --- a/nova/tests/unit/conductor/tasks/test_migrate.py +++ b/nova/tests/unit/conductor/tasks/test_migrate.py @@ -94,7 +94,6 @@ class MigrationTaskTestCase(test.NoDBTestCase): self.request_spec.requested_destination) task = self._generate_task() - legacy_request_spec = self.request_spec.to_legacy_request_spec_dict() gmv_mock.return_value = 23 # We just need this hook point to set a uuid on the @@ -120,9 +119,9 @@ class MigrationTaskTestCase(test.NoDBTestCase): return_objects=True, return_alternates=True) selection = self.host_lists[0][0] prep_resize_mock.assert_called_once_with( - self.context, self.instance, legacy_request_spec['image'], + self.context, self.instance, self.request_spec.image, self.flavor, selection.service_host, task._migration, - request_spec=legacy_request_spec, + request_spec=self.request_spec, filter_properties=self.filter_properties, node=selection.nodename, clean_shutdown=self.clean_shutdown, host_list=[]) az_mock.assert_called_once_with(self.context, 'host1') diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index 00c4ad048899..98019baeeef9 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -2561,7 +2561,6 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase): project_id=self.context.project_id) image = 'fake-image' fake_spec = objects.RequestSpec(image=objects.ImageMeta()) - legacy_request_spec = fake_spec.to_legacy_request_spec_dict() spec_fc_mock.return_value = fake_spec im_mock.return_value = objects.InstanceMapping( @@ -2592,9 +2591,9 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase): select_dest_mock.assert_called_once_with(self.context, fake_spec, [inst_obj.uuid], return_objects=True, return_alternates=True) prep_resize_mock.assert_called_once_with( - self.context, inst_obj, legacy_request_spec['image'], + self.context, inst_obj, fake_spec.image, flavor, hosts[0]['host'], _preallocate_migration.return_value, - request_spec=legacy_request_spec, + request_spec=fake_spec, filter_properties=legacy_filter_props, node=hosts[0]['nodename'], clean_shutdown=True, host_list=[]) notify_mock.assert_called_once_with(self.context, inst_obj.uuid,