From edeeaf9102eccb78f1a2555c7e18c3d706f07639 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Mon, 4 Dec 2017 16:18:30 +0100 Subject: [PATCH] Add late server group policy check to rebuild The affinity and anti-affinity server group policy is enforced by the scheduler but two parallel scheduling could cause that such policy is violated. During instance boot a late policy check was performed in the compute manager to prevent this. This check was missing in case of rebuild. Therefore two parallel evacuate command could cause that the server group policy is violated. This patch introduces the late policy check to rebuild to prevent such situation. When the violation is detected during boot a re-scheduling happens. However the rebuild action does not have the re-scheduling implementation so in this case the rebuild will fail and the evacuation needs to be retried by the user. Still this is better than allowing a parallel evacuation to break the server group affinity policy. To make the late policy check possible in the compute/manager the rebuild_instance compute RPC call was extended with a request_spec parameter. Co-Authored-By: Richard Zsarnoczai Change-Id: I752617066bb2167b49239ab9d17b0c89754a3e12 Closes-Bug: #1735407 --- doc/source/user/cellsv2-layout.rst | 2 +- nova/compute/manager.py | 37 +++++++-- nova/compute/rpcapi.py | 11 ++- nova/conductor/manager.py | 3 +- nova/objects/service.py | 4 +- nova/tests/functional/integrated_helpers.py | 10 +-- .../regressions/test_bug_1735407.py | 37 ++++----- nova/tests/functional/test_server_group.py | 14 ++-- nova/tests/functional/test_servers.py | 8 +- nova/tests/unit/compute/test_compute_mgr.py | 81 +++++++++++++++++++ nova/tests/unit/compute/test_rpcapi.py | 9 +++ nova/tests/unit/conductor/test_conductor.py | 5 +- 12 files changed, 165 insertions(+), 56 deletions(-) diff --git a/doc/source/user/cellsv2-layout.rst b/doc/source/user/cellsv2-layout.rst index 1346ed0b7cc2..224535828e40 100644 --- a/doc/source/user/cellsv2-layout.rst +++ b/doc/source/user/cellsv2-layout.rst @@ -319,7 +319,7 @@ following: #. Instance reschedules during boot #. Instance affinity reporting from the compute nodes to scheduler -#. The late anti-affinity check +#. The late anti-affinity check during server create and evacuate #. Querying host aggregates from the cell #. Attaching a volume and ``[cinder]/cross_az_attach=False`` diff --git a/nova/compute/manager.py b/nova/compute/manager.py index edc4e985cdae..8486534adce3 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -485,7 +485,7 @@ class ComputeVirtAPI(virtapi.VirtAPI): class ComputeManager(manager.Manager): """Manages the running instances from creation to destruction.""" - target = messaging.Target(version='4.21') + target = messaging.Target(version='4.22') # How long to wait in seconds before re-issuing a shutdown # signal to an instance during power off. The overall @@ -2804,7 +2804,7 @@ class ComputeManager(manager.Manager): injected_files, new_pass, orig_sys_metadata, bdms, recreate, on_shared_storage=None, preserve_ephemeral=False, migration=None, - scheduled_node=None, limits=None): + scheduled_node=None, limits=None, request_spec=None): """Destroy and re-make this instance. A 'rebuild' effectively purges all existing data from the system and @@ -2834,6 +2834,8 @@ class ComputeManager(manager.Manager): None :param limits: Overcommit limits set by the scheduler. If a host was specified by the user, this will be None + :param request_spec: a RequestSpec object used to schedule the instance + """ context = context.elevated() @@ -2886,11 +2888,22 @@ class ComputeManager(manager.Manager): claim_ctxt, context, instance, orig_image_ref, image_ref, injected_files, new_pass, orig_sys_metadata, bdms, recreate, on_shared_storage, preserve_ephemeral, - migration) - except exception.ComputeResourcesUnavailable as e: - LOG.debug("Could not rebuild instance on this host, not " - "enough resources available.", instance=instance) - + migration, request_spec) + except (exception.ComputeResourcesUnavailable, + exception.RescheduledException) as e: + if isinstance(e, exception.ComputeResourcesUnavailable): + LOG.debug("Could not rebuild instance on this host, not " + "enough resources available.", instance=instance) + else: + # RescheduledException is raised by the late server group + # policy check during evacuation if a parallel scheduling + # violated the policy. + # We catch the RescheduledException here but we don't have + # the plumbing to do an actual reschedule so we abort the + # operation. + LOG.debug("Could not rebuild instance on this host, " + "late server group check failed.", + instance=instance) # NOTE(ndipanov): We just abort the build for now and leave a # migration record for potential cleanup later self._set_migration_status(migration, 'failed') @@ -2949,10 +2962,18 @@ class ComputeManager(manager.Manager): image_ref, injected_files, new_pass, orig_sys_metadata, bdms, recreate, on_shared_storage, preserve_ephemeral, - migration): + migration, request_spec): orig_vm_state = instance.vm_state if recreate: + if request_spec: + # NOTE(gibi): Do a late check of server group policy as + # parallel scheduling could violate such policy. This will + # cause the evacuate to fail as rebuild does not implement + # reschedule. + hints = self._get_scheduler_hints({}, request_spec) + self._validate_instance_group_policy(context, instance, hints) + if not self.driver.capabilities["supports_recreate"]: raise exception.InstanceRecreateNotSupported diff --git a/nova/compute/rpcapi.py b/nova/compute/rpcapi.py index 7f62e5b341ba..c001921f41ab 100644 --- a/nova/compute/rpcapi.py +++ b/nova/compute/rpcapi.py @@ -340,6 +340,7 @@ class ComputeAPI(object): * 4.20 - Add multiattach argument to reserve_block_device_name(). * 4.21 - prep_resize() now gets a 'host_list' parameter representing potential alternate hosts for retries within a cell. + * 4.22 - Add request_spec to rebuild_instance() ''' VERSION_ALIASES = { @@ -811,7 +812,7 @@ class ComputeAPI(object): image_ref, orig_image_ref, orig_sys_metadata, bdms, recreate=False, on_shared_storage=False, host=None, node=None, preserve_ephemeral=False, migration=None, limits=None, - kwargs=None): + request_spec=None, kwargs=None): # NOTE(edleafe): compute nodes can only use the dict form of limits. if isinstance(limits, objects.SchedulerLimits): limits = limits.to_dict() @@ -820,9 +821,13 @@ class ComputeAPI(object): extra = {'preserve_ephemeral': preserve_ephemeral, 'migration': migration, 'scheduled_node': node, - 'limits': limits} - version = '4.5' + 'limits': limits, + 'request_spec': request_spec} + version = '4.22' client = self.router.client(ctxt) + if not client.can_send_version(version): + version = '4.5' + del extra['request_spec'] if not client.can_send_version(version): version = '4.0' extra.pop('migration') diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index 6497e0f77e95..2e0fc3e9277f 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -996,7 +996,8 @@ class ComputeTaskManager(base.Base): on_shared_storage=on_shared_storage, preserve_ephemeral=preserve_ephemeral, migration=migration, - host=host, node=node, limits=limits) + host=host, node=node, limits=limits, + request_spec=request_spec) # TODO(avolkov): move method to bdm @staticmethod diff --git a/nova/objects/service.py b/nova/objects/service.py index b3587ed0999e..5d28feec623a 100644 --- a/nova/objects/service.py +++ b/nova/objects/service.py @@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__) # NOTE(danms): This is the global service version counter -SERVICE_VERSION = 28 +SERVICE_VERSION = 29 # NOTE(danms): This is our SERVICE_VERSION history. The idea is that any @@ -127,6 +127,8 @@ SERVICE_VERSION_HISTORY = ( {'compute_rpc': '4.20'}, # Version 28: Adds a 'host_list' parameter to prep_resize() {'compute_rpc': '4.21'}, + # Version 29: Compute RPC version 4.22 + {'compute_rpc': '4.22'}, ) diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index 8c0bafaf0231..c97557b62068 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -316,8 +316,8 @@ class InstanceHelperMixin(object): self.fail('Timed out waiting for %s failure event. Current ' 'instance actions: %s' % (event_name, actions)) - def _wait_for_migration_status(self, server, expected_status): - """Waits for a migration record with the given status to be found + def _wait_for_migration_status(self, server, expected_statuses): + """Waits for a migration record with the given statuses to be found for the given server, else the test fails. The migration record, if found, is returned. """ @@ -325,13 +325,13 @@ class InstanceHelperMixin(object): if api is None: api = self.api + statuses = [status.lower() for status in expected_statuses] for attempt in range(10): migrations = api.api_get('/os-migrations').body['migrations'] for migration in migrations: if (migration['instance_uuid'] == server['id'] and - migration['status'].lower() == - expected_status.lower()): + migration['status'].lower() in statuses): return migration time.sleep(0.5) self.fail('Timed out waiting for migration with status "%s" for ' - 'instance: %s' % (expected_status, server['id'])) + 'instance: %s' % (expected_statuses, server['id'])) diff --git a/nova/tests/functional/regressions/test_bug_1735407.py b/nova/tests/functional/regressions/test_bug_1735407.py index e222029c92ce..eb321db31d5e 100644 --- a/nova/tests/functional/regressions/test_bug_1735407.py +++ b/nova/tests/functional/regressions/test_bug_1735407.py @@ -148,8 +148,8 @@ class TestParallelEvacuationWithServerGroup( # that. The only thing that happens after the instance.host is set to # the target host is the migration status setting to done. So we have # to wait for that to avoid asserting the wrong host below. - self._wait_for_migration_status(server1, 'done') - self._wait_for_migration_status(server2, 'done') + self._wait_for_migration_status(server1, ['done', 'failed']) + self._wait_for_migration_status(server2, ['done', 'failed']) # get the servers again to have the latest information about their # hosts @@ -158,28 +158,19 @@ class TestParallelEvacuationWithServerGroup( # assert that the anti-affinity policy is enforced during the # evacuation - # NOTE(gibi): This shows bug 1735407 as both instance ends up on the - # same host. - self.assertEqual(server1['OS-EXT-SRV-ATTR:host'], - server2['OS-EXT-SRV-ATTR:host']) - # After the bug 1735407 is fixed the following is expected: - # self.assertNotEqual(server1['OS-EXT-SRV-ATTR:host'], - # server2['OS-EXT-SRV-ATTR:host']) + self.assertNotEqual(server1['OS-EXT-SRV-ATTR:host'], + server2['OS-EXT-SRV-ATTR:host']) # assert that one of the evacuation was successful and that server is # moved to another host and the evacuation of the other server is # failed - # NOTE(gibi): This shows bug 1735407 as both instance is moved - self.assertNotIn(server1['OS-EXT-SRV-ATTR:host'], {'host1', 'host2'}) - self.assertNotIn(server2['OS-EXT-SRV-ATTR:host'], {'host1', 'host2'}) - # After fixing the bug 1735407 the following is expected - # if server1['status'] == 'ERROR': - # failed_server = server1 - # evacuated_server = server2 - # else: - # failed_server = server2 - # evacuated_server = server1 - # self.assertEqual('ERROR', failed_server['status']) - # self.assertNotEqual('host3', failed_server['OS-EXT-SRV-ATTR:host']) - # self.assertEqual('ACTIVE', evacuated_server['status']) - # self.assertEqual('host3', evacuated_server['OS-EXT-SRV-ATTR:host']) + if server1['status'] == 'ERROR': + failed_server = server1 + evacuated_server = server2 + else: + failed_server = server2 + evacuated_server = server1 + self.assertEqual('ERROR', failed_server['status']) + self.assertNotEqual('host3', failed_server['OS-EXT-SRV-ATTR:host']) + self.assertEqual('ACTIVE', evacuated_server['status']) + self.assertEqual('host3', evacuated_server['OS-EXT-SRV-ATTR:host']) diff --git a/nova/tests/functional/test_server_group.py b/nova/tests/functional/test_server_group.py index 9e8ea76e713d..8fa7546f1470 100644 --- a/nova/tests/functional/test_server_group.py +++ b/nova/tests/functional/test_server_group.py @@ -427,7 +427,7 @@ class ServerGroupTestV21(ServerGroupTestBase): post = {'evacuate': {'onSharedStorage': False}} self.admin_api.post_server_action(servers[1]['id'], post) - self._wait_for_migration_status(servers[1], 'done') + self._wait_for_migration_status(servers[1], ['done']) evacuated_server = self._wait_for_state_change( self.admin_api, servers[1], 'ACTIVE') @@ -453,7 +453,7 @@ class ServerGroupTestV21(ServerGroupTestBase): post = {'evacuate': {'onSharedStorage': False}} self.admin_api.post_server_action(servers[1]['id'], post) - self._wait_for_migration_status(servers[1], 'error') + self._wait_for_migration_status(servers[1], ['error']) server_after_failed_evac = self._wait_for_state_change( self.admin_api, servers[1], 'ERROR') @@ -477,7 +477,7 @@ class ServerGroupTestV21(ServerGroupTestBase): post = {'evacuate': {'onSharedStorage': False}} self.admin_api.post_server_action(servers[1]['id'], post) - self._wait_for_migration_status(servers[1], 'error') + self._wait_for_migration_status(servers[1], ['error']) server_after_failed_evac = self._wait_for_state_change( self.admin_api, servers[1], 'ERROR') @@ -624,7 +624,7 @@ class ServerGroupTestV215(ServerGroupTestV21): post = {'evacuate': {}} self.admin_api.post_server_action(servers[1]['id'], post) - self._wait_for_migration_status(servers[1], 'done') + self._wait_for_migration_status(servers[1], ['done']) evacuated_server = self._wait_for_state_change( self.admin_api, servers[1], 'ACTIVE') @@ -651,7 +651,7 @@ class ServerGroupTestV215(ServerGroupTestV21): post = {'evacuate': {}} self.admin_api.post_server_action(servers[1]['id'], post) - self._wait_for_migration_status(servers[1], 'error') + self._wait_for_migration_status(servers[1], ['error']) server_after_failed_evac = self._wait_for_state_change( self.admin_api, servers[1], 'ERROR') @@ -675,7 +675,7 @@ class ServerGroupTestV215(ServerGroupTestV21): post = {'evacuate': {}} self.admin_api.post_server_action(servers[1]['id'], post) - self._wait_for_migration_status(servers[1], 'error') + self._wait_for_migration_status(servers[1], ['error']) server_after_failed_evac = self._wait_for_state_change( self.admin_api, servers[1], 'ERROR') @@ -817,7 +817,7 @@ class ServerGroupTestV215(ServerGroupTestV21): post = {'evacuate': {}} self.admin_api.post_server_action(servers[1]['id'], post) - self._wait_for_migration_status(servers[1], 'done') + self._wait_for_migration_status(servers[1], ['done']) evacuated_server = self._wait_for_state_change( self.admin_api, servers[1], 'ACTIVE') diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index eb45ab5a43d9..0829e6eec0b9 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -2531,7 +2531,7 @@ class ServerMovingTests(ProviderUsageBaseTestCase): # clears the task_state. The migration record status is set to # 'error', so that's what we need to look for to know when this # is done. - migration = self._wait_for_migration_status(server, 'error') + migration = self._wait_for_migration_status(server, ['error']) # The source_compute should be set on the migration record, but the # destination shouldn't be as we never made it to one. @@ -2591,7 +2591,7 @@ class ServerMovingTests(ProviderUsageBaseTestCase): self.api.post_server_action(server['id'], post) # The compute manager will put the migration record into error status # when pre_live_migration fails, so wait for that to happen. - migration = self._wait_for_migration_status(server, 'error') + migration = self._wait_for_migration_status(server, ['error']) # The _rollback_live_migration method in the compute manager will reset # the task_state on the instance, so wait for that to happen. server = self._wait_for_server_parameter( @@ -2859,7 +2859,7 @@ class ServerLiveMigrateForceAndAbort(ProviderUsageBaseTestCase): } self.api.post_server_action(server['id'], post) - migration = self._wait_for_migration_status(server, 'running') + migration = self._wait_for_migration_status(server, ['running']) self.api.force_complete_migration(server['id'], migration['id']) @@ -2902,7 +2902,7 @@ class ServerLiveMigrateForceAndAbort(ProviderUsageBaseTestCase): } self.api.post_server_action(server['id'], post) - migration = self._wait_for_migration_status(server, 'running') + migration = self._wait_for_migration_status(server, ['running']) self.api.delete_migration(server['id'], migration['id']) self._wait_for_server_parameter(self.api, server, diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index e915699e3403..d254c25ed035 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -3913,6 +3913,87 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): delete_alloc.assert_called_once_with(instance, 'foo', node_type='destination') + @mock.patch('nova.context.RequestContext.elevated') + @mock.patch('nova.objects.instance.Instance.drop_migration_context') + @mock.patch('nova.objects.instance.Instance.apply_migration_context') + @mock.patch('nova.objects.instance.Instance.mutated_migration_context') + @mock.patch('nova.objects.BlockDeviceMappingList.get_by_instance_uuid') + @mock.patch('nova.network.neutronv2.api.API.' + 'setup_instance_network_on_host') + @mock.patch('nova.network.neutronv2.api.API.setup_networks_on_host') + @mock.patch('nova.objects.instance.Instance.save') + @mock.patch('nova.compute.utils.notify_about_instance_action') + @mock.patch('nova.compute.utils.notify_about_instance_usage') + @mock.patch('nova.compute.utils.notify_usage_exists') + @mock.patch('nova.objects.instance.Instance.image_meta', + new_callable=mock.PropertyMock) + @mock.patch('nova.compute.manager.ComputeManager.' + '_validate_instance_group_policy') + @mock.patch('nova.compute.manager.ComputeManager._set_migration_status') + @mock.patch('nova.compute.resource_tracker.ResourceTracker.rebuild_claim') + def test_evacuate_late_server_group_policy_check( + self, mock_rebuild_claim, mock_set_migration_status, + mock_validate_policy, mock_image_meta, mock_notify_exists, + mock_notify_legacy, mock_notify, mock_instance_save, + mock_setup_networks, mock_setup_intance_network, mock_get_bdms, + mock_mutate_migration, mock_appy_migration, mock_drop_migration, + mock_context_elevated): + instance = fake_instance.fake_instance_obj(self.context) + instance.info_cache = None + elevated_context = mock.Mock() + mock_context_elevated.return_value = elevated_context + request_spec = objects.RequestSpec() + request_spec.scheduler_hints = {'group': [uuids.group]} + + self.compute.rebuild_instance( + self.context, instance, None, None, None, None, None, + None, recreate=True, scheduled_node='fake-node', + request_spec=request_spec) + + mock_validate_policy.assert_called_once_with( + elevated_context, instance, {'group': [uuids.group]}) + + @mock.patch('nova.compute.utils.add_instance_fault_from_exc') + @mock.patch('nova.compute.resource_tracker.ResourceTracker.' + 'delete_allocation_for_evacuated_instance') + @mock.patch('nova.context.RequestContext.elevated') + @mock.patch('nova.objects.instance.Instance.save') + @mock.patch('nova.compute.utils.notify_about_instance_action') + @mock.patch('nova.compute.utils.notify_about_instance_usage') + @mock.patch('nova.compute.manager.ComputeManager.' + '_validate_instance_group_policy') + @mock.patch('nova.compute.manager.ComputeManager._set_migration_status') + @mock.patch('nova.compute.resource_tracker.ResourceTracker.rebuild_claim') + def test_evacuate_late_server_group_policy_check_fails( + self, mock_rebuild_claim, mock_set_migration_status, + mock_validate_policy, mock_notify_legacy, mock_notify, + mock_instance_save, mock_context_elevated, mock_delete_allocation, + mock_instance_fault): + instance = fake_instance.fake_instance_obj(self.context) + instance.info_cache = None + elevated_context = mock.Mock() + mock_context_elevated.return_value = elevated_context + request_spec = objects.RequestSpec() + request_spec.scheduler_hints = {'group': [uuids.group]} + + exc = exception.RescheduledException( + instance_uuid=instance.uuid, reason='policy violation') + mock_validate_policy.side_effect = exc + + self.assertRaises( + exception.BuildAbortException, self.compute.rebuild_instance, + self.context, instance, None, None, None, None, None, None, + recreate=True, scheduled_node='fake-node', + request_spec=request_spec) + + mock_validate_policy.assert_called_once_with( + elevated_context, instance, {'group': [uuids.group]}) + mock_delete_allocation.assert_called_once_with( + instance, 'fake-node', node_type='destination') + mock_notify.assert_called_once_with( + elevated_context, instance, 'fake-mini', action='rebuild', + bdms=None, exception=exc, phase='error') + def test_rebuild_node_not_updated_if_not_recreate(self): node = uuidutils.generate_uuid() # ironic node uuid instance = fake_instance.fake_instance_obj(self.context, node=node) diff --git a/nova/tests/unit/compute/test_rpcapi.py b/nova/tests/unit/compute/test_rpcapi.py index cb72b22d59bb..1075059156c7 100644 --- a/nova/tests/unit/compute/test_rpcapi.py +++ b/nova/tests/unit/compute/test_rpcapi.py @@ -517,6 +517,15 @@ class ComputeRpcAPITestCase(test.NoDBTestCase): reboot_type='type') def test_rebuild_instance(self): + self._test_compute_api('rebuild_instance', 'cast', new_pass='None', + injected_files='None', image_ref='None', orig_image_ref='None', + bdms=[], instance=self.fake_instance_obj, host='new_host', + orig_sys_metadata=None, recreate=True, on_shared_storage=True, + preserve_ephemeral=True, migration=None, node=None, + limits=None, request_spec=None, version='4.22') + + def test_rebuild_instance_remove_request_spec(self): + self.flags(group='upgrade_levels', compute='4.21') self._test_compute_api('rebuild_instance', 'cast', new_pass='None', injected_files='None', image_ref='None', orig_image_ref='None', bdms=[], instance=self.fake_instance_obj, host='new_host', diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index 915422976ed1..8e47cb60bcd4 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -366,9 +366,6 @@ class _BaseTaskTestCase(object): compute_rebuild_args['node'] = node compute_rebuild_args['limits'] = limits - # Args that are passed in to the method but don't get passed to RPC - compute_rebuild_args.pop('request_spec') - return rebuild_args, compute_rebuild_args @mock.patch.object(objects.InstanceMapping, 'get_by_instance_uuid') @@ -1312,6 +1309,7 @@ class _BaseTaskTestCase(object): select_dest_mock.assert_called_once_with(self.context, fake_spec, inst_uuids, return_objects=True, return_alternates=False) compute_args['host'] = expected_host + compute_args['request_spec'] = fake_spec rebuild_mock.assert_called_once_with(self.context, instance=inst_obj, **compute_args) @@ -1469,6 +1467,7 @@ class _BaseTaskTestCase(object): fake_spec, [inst_obj.uuid], return_objects=True, return_alternates=False) compute_args['host'] = expected_host + compute_args['request_spec'] = fake_spec rebuild_mock.assert_called_once_with(self.context, instance=inst_obj, **compute_args)