diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index 58a6311af4cb..88ee72c154f8 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -479,7 +479,8 @@ class ComputeTaskManager(base.Base): exception.HypervisorUnavailable, exception.InstanceNotRunning, exception.MigrationPreCheckError, - exception.LiveMigrationWithOldNovaNotSafe) + exception.LiveMigrationWithOldNovaNotSafe, + exception.UnsupportedPolicyException) def migrate_server(self, context, instance, scheduler_hint, live, rebuild, flavor, block_migration, disk_over_commit, reservations=None, clean_shutdown=True): @@ -521,15 +522,15 @@ class ComputeTaskManager(base.Base): quotas = objects.Quotas.from_reservations(context, reservations, instance=instance) - scheduler_utils.setup_instance_group(context, request_spec, - filter_properties) try: + scheduler_utils.setup_instance_group(context, request_spec, + filter_properties) scheduler_utils.populate_retry(filter_properties, instance['uuid']) hosts = self.scheduler_client.select_destinations( context, request_spec, filter_properties) host_state = hosts[0] except exception.NoValidHost as ex: - vm_state = instance['vm_state'] + vm_state = instance.vm_state if not vm_state: vm_state = vm_states.ACTIVE updates = {'vm_state': vm_state, 'task_state': None} @@ -544,6 +545,16 @@ class ComputeTaskManager(base.Base): else: msg = _("No valid host found for resize") raise exception.NoValidHost(reason=msg) + except exception.UnsupportedPolicyException as ex: + with excutils.save_and_reraise_exception(): + vm_state = instance.vm_state + if not vm_state: + vm_state = vm_states.ACTIVE + updates = {'vm_state': vm_state, 'task_state': None} + self._set_vm_state_and_notify(context, instance.uuid, + 'migrate_server', + updates, ex, request_spec) + quotas.rollback() try: scheduler_utils.populate_filter_properties(filter_properties, @@ -560,7 +571,7 @@ class ComputeTaskManager(base.Base): clean_shutdown=clean_shutdown) except Exception as ex: with excutils.save_and_reraise_exception(): - updates = {'vm_state': instance['vm_state'], + updates = {'vm_state': instance.vm_state, 'task_state': None} self._set_vm_state_and_notify(context, instance.uuid, 'migrate_server', @@ -607,7 +618,7 @@ class ComputeTaskManager(base.Base): exception.LiveMigrationWithOldNovaNotSafe) as ex: with excutils.save_and_reraise_exception(): # TODO(johngarbutt) - eventually need instance actions here - _set_vm_state(context, instance, ex, instance['vm_state']) + _set_vm_state(context, instance, ex, instance.vm_state) except Exception as ex: LOG.error(_LE('Migration of instance %(instance_id)s to host' ' %(dest)s unexpectedly failed.'), @@ -624,8 +635,6 @@ class ComputeTaskManager(base.Base): # 2.0 of the RPC API. request_spec = scheduler_utils.build_request_spec(context, image, instances) - scheduler_utils.setup_instance_group(context, request_spec, - filter_properties) # TODO(danms): Remove this in version 2.0 of the RPC API if (requested_networks and not isinstance(requested_networks, @@ -642,6 +651,8 @@ class ComputeTaskManager(base.Base): filter_properties = dict(filter_properties, instance_type=flavor) try: + scheduler_utils.setup_instance_group(context, request_spec, + filter_properties) # check retry policy. Rather ugly use of instances[0]... # but if we've exceeded max retries... then we really only # have a single instance. @@ -748,7 +759,8 @@ class ComputeTaskManager(base.Base): self.compute_rpcapi.unshelve_instance( context, instance, host, image=image, filter_properties=filter_properties, node=node) - except exception.NoValidHost: + except (exception.NoValidHost, + exception.UnsupportedPolicyException): instance.task_state = None instance.save() LOG.warning(_LW("No valid host found for unshelve instance"), @@ -781,9 +793,9 @@ class ComputeTaskManager(base.Base): request_spec = scheduler_utils.build_request_spec(context, image_ref, [instance]) - scheduler_utils.setup_instance_group(context, request_spec, - filter_properties) try: + scheduler_utils.setup_instance_group(context, request_spec, + filter_properties) hosts = self.scheduler_client.select_destinations(context, request_spec, filter_properties) @@ -796,6 +808,15 @@ class ComputeTaskManager(base.Base): 'task_state': None}, ex, request_spec) LOG.warning(_LW("No valid host found for rebuild"), instance=instance) + except exception.UnsupportedPolicyException as ex: + with excutils.save_and_reraise_exception(): + self._set_vm_state_and_notify(context, instance.uuid, + 'rebuild_server', + {'vm_state': instance.vm_state, + 'task_state': None}, ex, request_spec) + LOG.warning(_LW("Server with unsupported policy " + "cannot be rebuilt"), + instance=instance) self.compute_rpcapi.rebuild_instance(context, instance=instance, diff --git a/nova/exception.py b/nova/exception.py index b917d3a13933..dc234f414146 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -1832,3 +1832,7 @@ class CPUPinningInvalid(Invalid): class ImageCPUPinningForbidden(Invalid): msg_fmt = _("Image property 'hw_cpu_policy' is not permitted to override " "CPU pinning policy set against the flavor") + + +class UnsupportedPolicyException(Invalid): + msg_fmt = _("ServerGroup policy is not supported: %(reason)s") diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py index 2ed941fff5e2..c146dcd606d6 100644 --- a/nova/scheduler/utils.py +++ b/nova/scheduler/utils.py @@ -285,11 +285,11 @@ def _get_group_details(context, instance_uuid, user_group_hosts=None): if (not _SUPPORTS_AFFINITY and 'affinity' in group.policies): msg = _("ServerGroupAffinityFilter not configured") LOG.error(msg) - raise exception.NoValidHost(reason=msg) + raise exception.UnsupportedPolicyException(reason=msg) if (not _SUPPORTS_ANTI_AFFINITY and 'anti-affinity' in group.policies): msg = _("ServerGroupAntiAffinityFilter not configured") LOG.error(msg) - raise exception.NoValidHost(reason=msg) + raise exception.UnsupportedPolicyException(reason=msg) group_hosts = set(group.get_hosts(context)) user_hosts = set(user_group_hosts) if user_group_hosts else set() return GroupDetails(hosts=user_hosts | group_hosts, diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index 7a75cad308c2..30c3af199aee 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -1378,6 +1378,49 @@ class _BaseTaskTestCase(object): block_device_mapping='block_device_mapping', legacy_bdm=False) + @mock.patch('nova.utils.spawn_n') + @mock.patch.object(scheduler_utils, 'build_request_spec') + @mock.patch.object(scheduler_utils, 'setup_instance_group') + @mock.patch.object(conductor_manager.ComputeTaskManager, + '_set_vm_state_and_notify') + def test_build_instances_scheduler_group_failure(self, state_mock, + sig_mock, bs_mock, + spawn_mock): + instances = [fake_instance.fake_instance_obj(self.context) + for i in range(2)] + image = {'fake-data': 'should_pass_silently'} + spec = {'fake': 'specs', + 'instance_properties': instances[0]} + + # NOTE(gibi): LocalComputeTaskAPI use eventlet spawn that makes mocking + # hard so use direct call instead. + spawn_mock.side_effect = lambda f, *a, **k: f(*a, **k) + bs_mock.return_value = spec + exception = exc.UnsupportedPolicyException(reason='fake-reason') + sig_mock.side_effect = exception + + updates = {'vm_state': vm_states.ERROR, 'task_state': None} + + # build_instances() is a cast, we need to wait for it to complete + self.useFixture(cast_as_call.CastAsCall(self.stubs)) + + self.conductor.build_instances( + context=self.context, + instances=instances, + image=image, + filter_properties={}, + admin_password='admin_password', + injected_files='injected_files', + requested_networks=None, + security_groups='security_groups', + block_device_mapping='block_device_mapping', + legacy_bdm=False) + calls = [] + for instance in instances: + calls.append(mock.call(self.context, instance.uuid, + 'build_instances', updates, exception, spec)) + state_mock.assert_has_calls(calls) + def test_unshelve_instance_on_host(self): instance = self._create_fake_instance_obj() instance.vm_state = vm_states.SHELVED @@ -1606,6 +1649,49 @@ class _BaseTaskTestCase(object): filter_properties) self.assertFalse(rebuild_mock.called) + @mock.patch('nova.utils.spawn_n') + @mock.patch.object(conductor_manager.compute_rpcapi.ComputeAPI, + 'rebuild_instance') + @mock.patch.object(scheduler_utils, 'setup_instance_group') + @mock.patch.object(conductor_manager.scheduler_client.SchedulerClient, + 'select_destinations') + @mock.patch('nova.scheduler.utils.build_request_spec') + @mock.patch.object(conductor_manager.ComputeTaskManager, + '_set_vm_state_and_notify') + def test_rebuild_instance_with_scheduler_group_failure(self, + state_mock, + bs_mock, + select_dest_mock, + sig_mock, + rebuild_mock, + spawn_mock): + inst_obj = self._create_fake_instance_obj() + rebuild_args = self._prepare_rebuild_args({'host': None}) + request_spec = {} + bs_mock.return_value = request_spec + + # NOTE(gibi): LocalComputeTaskAPI use eventlet spawn that makes mocking + # hard so use direct call instead. + spawn_mock.side_effect = lambda f, *a, **k: f(*a, **k) + + exception = exc.UnsupportedPolicyException(reason='') + sig_mock.side_effect = exception + + # build_instances() is a cast, we need to wait for it to complete + self.useFixture(cast_as_call.CastAsCall(self.stubs)) + + self.assertRaises(exc.UnsupportedPolicyException, + self.conductor.rebuild_instance, + self.context, + inst_obj, + **rebuild_args) + updates = {'vm_state': vm_states.ACTIVE, 'task_state': None} + state_mock.assert_called_once_with(self.context, inst_obj.uuid, + 'rebuild_server', updates, + exception, request_spec) + self.assertFalse(select_dest_mock.called) + self.assertFalse(rebuild_mock.called) + class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase): """ComputeTaskManager Tests.""" @@ -1922,6 +2008,44 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase): clean_shutdown=True) self.assertIn('cold migrate', nvh.message) + @mock.patch.object(compute_utils, 'get_image_metadata') + @mock.patch('nova.scheduler.utils.build_request_spec') + @mock.patch.object(scheduler_utils, 'setup_instance_group') + @mock.patch.object(conductor_manager.ComputeTaskManager, + '_set_vm_state_and_notify') + def test_cold_migrate_no_valid_host_in_group(self, + set_vm_mock, + sig_mock, + brs_mock, + image_mock): + flavor = flavors.get_flavor_by_name('m1.tiny') + inst = fake_instance.fake_db_instance(image_ref='fake-image_ref', + vm_state=vm_states.STOPPED, + instance_type_id=flavor['id']) + inst_obj = objects.Instance._from_db_object( + self.context, objects.Instance(), inst, + expected_attrs=[]) + request_spec = dict(instance_type=dict(extra_specs=dict()), + instance_properties=dict()) + filter_props = dict(context=None) + resvs = 'fake-resvs' + image = 'fake-image' + exception = exc.UnsupportedPolicyException(reason='') + + image_mock.return_value = image + brs_mock.return_value = request_spec + sig_mock.side_effect = exception + + self.assertRaises(exc.UnsupportedPolicyException, + self.conductor._cold_migrate, self.context, + inst_obj, flavor, filter_props, [resvs], + clean_shutdown=True) + + updates = {'vm_state': vm_states.STOPPED, 'task_state': None} + set_vm_mock.assert_called_once_with(self.context, inst_obj.uuid, + 'migrate_server', updates, + exception, request_spec) + def test_cold_migrate_exception_host_in_error_state_and_raise(self): inst = fake_instance.fake_db_instance(image_ref='fake-image_ref', vm_state=vm_states.STOPPED) diff --git a/nova/tests/unit/scheduler/test_scheduler_utils.py b/nova/tests/unit/scheduler/test_scheduler_utils.py index 38087dc47cf2..0d184c206bc1 100644 --- a/nova/tests/unit/scheduler/test_scheduler_utils.py +++ b/nova/tests/unit/scheduler/test_scheduler_utils.py @@ -305,7 +305,7 @@ class SchedulerUtilsTestCase(test.NoDBTestCase): ) as (get_group, get_hosts): scheduler_utils._SUPPORTS_ANTI_AFFINITY = None scheduler_utils._SUPPORTS_AFFINITY = None - self.assertRaises(exception.NoValidHost, + self.assertRaises(exception.UnsupportedPolicyException, scheduler_utils._get_group_details, self.context, 'fake-uuid')