Pass request_spec from compute to cell conductor on reschedule

During a resize, we were still passing a legacy dict-ified
version of a RequestSpec to compute even though we don't need
to anymore (that's removed here).

And we weren't passing the RequestSpec back from compute to
the cell conductor during a reschedule of a resize, which makes
conductor have to re-create a stub RequestSpec - which has caused
problems in the past (see bug 1774205).

This change passes the RequestSpec to the cell conductor on a
reschedule of a resize so that conductor can use it and lets
us start the timer on removing the compatibility code in
conductor (marked with a TODO here).

While in here, some really old and non-sensical stuff in compute
is modernized and tests are updated as a result.

Related to blueprint request-spec-use-by-compute

Change-Id: I4244f7dd8fe74565180f73684678027067b4506e
This commit is contained in:
Matt Riedemann 2018-07-12 19:13:13 -04:00
parent f00956352d
commit fb94612294
9 changed files with 67 additions and 56 deletions

View File

@ -487,7 +487,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
target = messaging.Target(version='5.0')
target = messaging.Target(version='5.1')
def __init__(self, compute_driver=None, *args, **kwargs):
"""Load configuration options and connect to the hypervisor."""
@ -1416,6 +1416,8 @@ class ComputeManager(manager.Manager):
LOG.error('Error: %s', exc_info[1], instance_uuid=instance_uuid,
exc_info=exc_info)
# TODO(mriedem): This method is confusing and only ever used for resize
# reschedules; remove it and merge into _reschedule_resize_or_reraise.
def _reschedule(self, context, request_spec, filter_properties,
instance, reschedule_method, method_args, task_state,
exc_info=None, host_list=None):
@ -1429,11 +1431,6 @@ class ComputeManager(manager.Manager):
instance_uuid=instance_uuid)
return
if not request_spec:
LOG.debug("No request spec, will not reschedule",
instance_uuid=instance_uuid)
return
LOG.debug("Re-scheduling %(method)s: attempt %(num)d",
{'method': reschedule_method.__name__,
'num': retry['num_attempts']}, instance_uuid=instance_uuid)
@ -1446,7 +1443,8 @@ class ComputeManager(manager.Manager):
retry['exc'] = traceback.format_exception_only(exc_info[0],
exc_info[1])
reschedule_method(context, *method_args, host_list=host_list)
reschedule_method(context, *method_args, request_spec=request_spec,
host_list=host_list)
return True
@periodic_task.periodic_task
@ -4238,7 +4236,7 @@ class ComputeManager(manager.Manager):
self._revert_allocation(context, instance, migration)
# try to re-schedule the resize elsewhere:
exc_info = sys.exc_info()
self._reschedule_resize_or_reraise(context, image, instance,
self._reschedule_resize_or_reraise(context, instance,
exc_info, instance_type, request_spec,
filter_properties, host_list)
finally:
@ -4253,13 +4251,11 @@ class ComputeManager(manager.Manager):
context, instance, self.host,
fields.NotificationPhase.END, instance_type)
def _reschedule_resize_or_reraise(self, context, image, instance, exc_info,
def _reschedule_resize_or_reraise(self, context, instance, exc_info,
instance_type, request_spec, filter_properties, host_list):
"""Try to re-schedule the resize or re-raise the original error to
error out the instance.
"""
if not request_spec:
request_spec = {}
if not filter_properties:
filter_properties = {}

View File

@ -348,6 +348,8 @@ class ComputeAPI(object):
for Pike compatibility. All new changes should go against 5.x.
* 5.0 - Remove 4.x compatibility
* 5.1 - Make prep_resize() take a RequestSpec object rather than a
legacy dict.
'''
VERSION_ALIASES = {
@ -728,7 +730,15 @@ class ComputeAPI(object):
def prep_resize(self, ctxt, instance, image, instance_type, host,
migration, request_spec, filter_properties, node,
clean_shutdown, host_list):
image_p = jsonutils.to_primitive(image)
# TODO(mriedem): We should pass the ImageMeta object through to the
# compute but that also requires plumbing changes through the resize
# flow for other methods like resize_instance and finish_resize.
image_p = objects_base.obj_to_primitive(image)
# FIXME(sbauza): Serialize/Unserialize the legacy dict because of
# oslo.messaging #1529084 to transform datetime values into strings.
# tl;dr: datetimes in dicts are not accepted as correct values by the
# rpc fake driver.
image_p = jsonutils.loads(jsonutils.dumps(image_p))
msg_args = {'instance': instance,
'instance_type': instance_type,
'image': image_p,
@ -739,6 +749,10 @@ class ComputeAPI(object):
'clean_shutdown': clean_shutdown,
'host_list': host_list}
client = self.router.client(ctxt)
version = '5.1'
if not client.can_send_version(version):
msg_args['request_spec'] = (
request_spec.to_legacy_request_spec_dict())
version = '5.0'
cctxt = client.prepare(server=host, version=version)
cctxt.cast(ctxt, 'prep_resize', **msg_args)

View File

@ -300,7 +300,11 @@ class ComputeTaskManager(base.Base):
# NOTE(sbauza): If a reschedule occurs when prep_resize(), then
# it only provides filter_properties legacy dict back to the
# conductor with no RequestSpec part of the payload.
# conductor with no RequestSpec part of the payload for <Stein
# computes.
# TODO(mriedem): We should be able to remove this in Train when we
# only support >=Stein computes and request_spec is passed back to
# conductor on reschedule.
if not request_spec:
# Make sure we hydrate a new RequestSpec object with the new flavor
# and not the nested one from the instance

View File

@ -163,9 +163,8 @@ class MigrationTask(base.TaskBase):
return migration
def _execute(self):
# TODO(sbauza): Remove that once prep_resize() accepts a RequestSpec
# object in the signature and all the scheduler.utils methods too
legacy_spec = self.request_spec.to_legacy_request_spec_dict()
# TODO(sbauza): Remove once all the scheduler.utils methods accept a
# RequestSpec object in the signature.
legacy_props = self.request_spec.to_legacy_filter_properties_dict()
scheduler_utils.setup_instance_group(self.context, self.request_spec)
# If a target host is set in a requested destination,
@ -285,20 +284,19 @@ class MigrationTask(base.TaskBase):
availability_zones.get_host_availability_zone(
self.context, host))
# FIXME(sbauza): Serialize/Unserialize the legacy dict because of
# oslo.messaging #1529084 to transform datetime values into strings.
# tl;dr: datetimes in dicts are not accepted as correct values by the
# rpc fake driver.
legacy_spec = jsonutils.loads(jsonutils.dumps(legacy_spec))
LOG.debug("Calling prep_resize with selected host: %s; "
"Selected node: %s; Alternates: %s", host, node,
self.host_list, instance=self.instance)
# RPC cast to the destination host to start the migration process.
self.compute_rpcapi.prep_resize(
self.context, self.instance, legacy_spec['image'],
# NOTE(mriedem): Using request_spec.image here is potentially
# dangerous if it is not kept up to date (i.e. rebuild/unshelve);
# seems like the sane thing to do would be to pass the current
# instance.image_meta since that is what MoveClaim will use for
# any NUMA topology claims on the destination host...
self.context, self.instance, self.request_spec.image,
self.flavor, host, migration,
request_spec=legacy_spec, filter_properties=legacy_props,
request_spec=self.request_spec, filter_properties=legacy_props,
node=node, clean_shutdown=self.clean_shutdown,
host_list=self.host_list)

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 36
SERVICE_VERSION = 37
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -149,6 +149,8 @@ SERVICE_VERSION_HISTORY = (
# Version 36: Indicates that nova-compute supports specifying volume
# type when booting a volume-backed server.
{'compute_rpc': '5.0'},
# Version 37: prep_resize takes a RequestSpec object
{'compute_rpc': '5.1'},
)

View File

@ -144,8 +144,10 @@ def unify_instance(instance):
class FakeComputeTaskAPI(object):
def resize_instance(self, context, instance, extra_instance_updates,
scheduler_hint, flavor, reservationsi, host_list=None):
def resize_instance(self, ctxt, instance, extra_instance_updates,
scheduler_hint, flavor, reservations=None,
clean_shutdown=True, request_spec=None,
host_list=None):
pass
@ -12587,12 +12589,6 @@ class ComputeReschedulingTestCase(BaseTestCase):
filter_properties = {}
self.assertFalse(self._reschedule(filter_properties=filter_properties))
def test_reschedule_no_request_spec(self):
# no request spec will also disable re-scheduling.
retry = dict(num_attempts=1)
filter_properties = dict(retry=retry)
self.assertFalse(self._reschedule(filter_properties=filter_properties))
def test_reschedule_success(self):
retry = dict(num_attempts=1)
filter_properties = dict(retry=retry)
@ -12638,13 +12634,14 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase):
self.compute.prep_resize(self.context, image=None,
instance=inst_obj,
instance_type=self.instance_type,
request_spec={},
request_spec=mock.sentinel.reqspec,
filter_properties={}, migration=mock.Mock(),
node=None,
clean_shutdown=True, host_list=None)
mock_res.assert_called_once_with(mock.ANY, None, inst_obj, mock.ANY,
self.instance_type, {}, {}, None)
mock_res.assert_called_once_with(mock.ANY, inst_obj, mock.ANY,
self.instance_type,
mock.sentinel.reqspec, {}, None)
@mock.patch.object(compute_manager.ComputeManager, "_reschedule")
@mock.patch('nova.compute.utils.notify_about_instance_action')
@ -12661,12 +12658,14 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase):
raise test.TestingException("Original")
except Exception:
exc_info = sys.exc_info()
reqspec = objects.RequestSpec()
self.assertRaises(test.TestingException,
self.compute._reschedule_resize_or_reraise, self.context,
None, instance, exc_info, self.instance_type, {}, {}, None)
instance, exc_info, self.instance_type, reqspec,
{}, None)
mock_res.assert_called_once_with(
self.context, {}, {}, instance,
self.context, reqspec, {}, instance,
self.compute.compute_task_api.resize_instance, method_args,
task_states.RESIZE_PREP, exc_info, host_list=None)
mock_notify.assert_called_once_with(
@ -12687,12 +12686,14 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase):
raise test.TestingException("Original")
except Exception:
exc_info = sys.exc_info()
reqspec = objects.RequestSpec()
self.assertRaises(test.TestingException,
self.compute._reschedule_resize_or_reraise, self.context,
None, instance, exc_info, self.instance_type, {}, {}, None)
instance, exc_info, self.instance_type, reqspec,
{}, None)
mock_res.assert_called_once_with(
self.context, {}, {}, instance,
self.context, reqspec, {}, instance,
self.compute.compute_task_api.resize_instance, method_args,
task_states.RESIZE_PREP, exc_info, host_list=None)
@ -12710,11 +12711,12 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase):
exc_info = sys.exc_info()
mock_res.return_value = True
reqspec = objects.RequestSpec()
self.compute._reschedule_resize_or_reraise(
self.context, None, instance, exc_info,
self.instance_type, {}, {}, None)
self.context, instance, exc_info,
self.instance_type, reqspec, {}, None)
mock_res.assert_called_once_with(self.context, {}, {},
mock_res.assert_called_once_with(self.context, reqspec, {},
instance, self.compute.compute_task_api.resize_instance,
method_args, task_states.RESIZE_PREP, exc_info,
host_list=None)

View File

@ -144,8 +144,6 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
kwargs['cast'] = False
else:
kwargs['do_cast'] = False
elif method == 'prep_resize' and 'migration' not in expected_args:
del expected_kwargs['migration']
if 'host' in kwargs:
host = kwargs['host']
elif 'instances' in kwargs:
@ -385,8 +383,7 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
timeout=1234)
def test_prep_resize(self):
expected_args = {'migration': 'migration'}
self._test_compute_api('prep_resize', 'cast', expected_args,
self._test_compute_api('prep_resize', 'cast',
instance=self.fake_instance_obj,
instance_type=self.fake_flavor_obj,
image='fake_image', host='host',
@ -394,7 +391,7 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
filter_properties={'fakeprop': 'fakeval'},
migration='migration',
node='node', clean_shutdown=True, host_list=None,
version='5.0')
version='5.1')
def test_reboot_instance(self):
self.maxDiff = None

View File

@ -94,7 +94,6 @@ class MigrationTaskTestCase(test.NoDBTestCase):
self.request_spec.requested_destination)
task = self._generate_task()
legacy_request_spec = self.request_spec.to_legacy_request_spec_dict()
gmv_mock.return_value = 23
# We just need this hook point to set a uuid on the
@ -120,9 +119,9 @@ class MigrationTaskTestCase(test.NoDBTestCase):
return_objects=True, return_alternates=True)
selection = self.host_lists[0][0]
prep_resize_mock.assert_called_once_with(
self.context, self.instance, legacy_request_spec['image'],
self.context, self.instance, self.request_spec.image,
self.flavor, selection.service_host, task._migration,
request_spec=legacy_request_spec,
request_spec=self.request_spec,
filter_properties=self.filter_properties, node=selection.nodename,
clean_shutdown=self.clean_shutdown, host_list=[])
az_mock.assert_called_once_with(self.context, 'host1')

View File

@ -2561,7 +2561,6 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
project_id=self.context.project_id)
image = 'fake-image'
fake_spec = objects.RequestSpec(image=objects.ImageMeta())
legacy_request_spec = fake_spec.to_legacy_request_spec_dict()
spec_fc_mock.return_value = fake_spec
im_mock.return_value = objects.InstanceMapping(
@ -2592,9 +2591,9 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
select_dest_mock.assert_called_once_with(self.context, fake_spec,
[inst_obj.uuid], return_objects=True, return_alternates=True)
prep_resize_mock.assert_called_once_with(
self.context, inst_obj, legacy_request_spec['image'],
self.context, inst_obj, fake_spec.image,
flavor, hosts[0]['host'], _preallocate_migration.return_value,
request_spec=legacy_request_spec,
request_spec=fake_spec,
filter_properties=legacy_filter_props,
node=hosts[0]['nodename'], clean_shutdown=True, host_list=[])
notify_mock.assert_called_once_with(self.context, inst_obj.uuid,