From e4c998e57390c15891131205f7443fee98dde4ee Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Mon, 25 Mar 2019 11:27:57 -0400 Subject: [PATCH] Do not persist RequestSpec.ignore_hosts Change Ic3968721d257a167f3f946e5387cd227a7eeec6c in Newton started setting the RequestSpec.ignore_hosts field to the source instance.host during resize/cold migrate if allow_resize_to_same_host=False in config, which it is by default. Change I8abdf58a6537dd5e15a012ea37a7b48abd726579 also in Newton persists changes to the RequestSpec in conductor in order to save the RequestSpec.flavor for the new flavor. This inadvertently persists the ignore_hosts field as well. Later if you try to evacuate or unshelve the server it will ignore the original source host because of the persisted ignore_hosts value. This is obviously a problem in a small deployment with only a few compute nodes (like an edge deployment). As a result, an evacuation can fail if the only available host is the one being ignored. This change does two things: 1. In order to deal with existing corrupted RequestSpecs in the DB, this change simply makes conductor overwrite RequestSpec.ignore_hosts rather than append during evacuate before calling the scheduler so the current instance host (which is down) is filtered out. This evacuate code dealing with ignore_hosts goes back to Mitaka: I7fe694175bb47f53d281bd62ac200f1c8416682b The test_rebuild_instance_with_request_spec unit test is updated and renamed to actually be doing an evacuate which is what it was intended for, i.e. the host would not change during rebuild. 2. This change makes the RequestSpec no longer persist the ignore_hosts field like several other per-operation fields in the RequestSpec. The only operations that use ignore_hosts are resize (if allow_resize_to_same_host=False), evacuate and live migration, and the field gets reset in each case to ignore the source instance.host. The related functional recreate test is also updated to show the bug is fixed. Note that as part of that, the confirm_migration method in the fake virt driver needed to be implemented otherwise trying to evacuate back to the source host fails with an InstanceExists error since the confirmResize operation did not remove the guest from the source host. Change-Id: I3f488be6f3c399f23ccf2b9ee0d76cd000da0e3e Closes-Bug: #1669054 --- nova/conductor/manager.py | 3 +-- nova/objects/request_spec.py | 14 +++++------ .../regressions/test_bug_1669054.py | 24 ++++++++----------- nova/tests/unit/conductor/test_conductor.py | 13 +++++----- nova/tests/unit/objects/test_request_spec.py | 12 ++++++++-- nova/virt/fake.py | 6 ++++- 6 files changed, 39 insertions(+), 33 deletions(-) diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index f641a45f57fb..6241a4ec2a73 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -1027,8 +1027,7 @@ class ComputeTaskManager(base.Base): if recreate: # NOTE(sbauza): Augment the RequestSpec object by excluding # the source host for avoiding the scheduler to pick it - request_spec.ignore_hosts = request_spec.ignore_hosts or [] - request_spec.ignore_hosts.append(instance.host) + request_spec.ignore_hosts = [instance.host] # NOTE(sbauza): Force_hosts/nodes needs to be reset # if we want to make sure that the next destination # is not forced to be the original host diff --git a/nova/objects/request_spec.py b/nova/objects/request_spec.py index e9c97e959884..c93da7d13c04 100644 --- a/nova/objects/request_spec.py +++ b/nova/objects/request_spec.py @@ -534,10 +534,10 @@ class RequestSpec(base.NovaObject): # None and we'll lose what is set (but not persisted) on the # object. continue - elif key == 'retry': - # NOTE(takashin): Do not override the 'retry' field - # which is not a persistent. It is not lazy-loadable field. - # If it is not set, set None. + elif key in ('retry', 'ignore_hosts'): + # NOTE(takashin): Do not override the 'retry' or 'ignore_hosts' + # fields which are not persisted. They are not lazy-loadable + # fields. If they are not set, set None. if not spec.obj_attr_is_set(key): setattr(spec, key, None) elif key in spec_obj: @@ -601,10 +601,10 @@ class RequestSpec(base.NovaObject): if 'instance_group' in spec and spec.instance_group: spec.instance_group.members = None spec.instance_group.hosts = None - # NOTE(mriedem): Don't persist retries or requested_destination - # since those are per-request + # NOTE(mriedem): Don't persist retries, requested_destination, + # requested_resources or ignored hosts since those are per-request for excluded in ('retry', 'requested_destination', - 'requested_resources'): + 'requested_resources', 'ignore_hosts'): if excluded in spec and getattr(spec, excluded): setattr(spec, excluded, None) # NOTE(stephenfin): Don't persist network metadata since we have diff --git a/nova/tests/functional/regressions/test_bug_1669054.py b/nova/tests/functional/regressions/test_bug_1669054.py index 47440d6c1680..73b8fcc18503 100644 --- a/nova/tests/functional/regressions/test_bug_1669054.py +++ b/nova/tests/functional/regressions/test_bug_1669054.py @@ -63,21 +63,17 @@ class ResizeEvacuateTestCase(integrated_helpers._IntegratedTestBase, # Disable the host on which the server is now running (host2). host2.stop() self.api.force_down_service('host2', 'nova-compute', forced_down=True) - # Now try to evacuate the server back to the original source compute. - # FIXME(mriedem): This is bug 1669054 where the evacuate fails with - # NoValidHost because the RequestSpec.ignore_hosts field has the - # original source host in it which is the only other available host to - # which we can evacuate the server. req = {'evacuate': {'onSharedStorage': False}} - self.api.post_server_action(server['id'], req, - check_response_status=[500]) - # There should be fault recorded with the server. - server = self._wait_for_state_change(self.api, server, 'ERROR') - self.assertIn('fault', server) - self.assertIn('No valid host was found', server['fault']['message']) - # Assert the RequestSpec.ignore_hosts is still populated. + self.api.post_server_action(server['id'], req) + server = self._wait_for_state_change(self.api, server, 'ACTIVE') + # The evacuate flow in the compute manager is annoying in that it + # sets the instance status to ACTIVE before updating the host, so we + # have to wait for the migration record to be 'done' to avoid a race. + self._wait_for_migration_status(server, ['done']) + self.assertEqual(self.compute.host, server['OS-EXT-SRV-ATTR:host']) + + # Assert the RequestSpec.ignore_hosts field is not populated. reqspec = objects.RequestSpec.get_by_instance_uuid( context.get_admin_context(), server['id']) - self.assertIsNotNone(reqspec.ignore_hosts) - self.assertIn(self.compute.host, reqspec.ignore_hosts) + self.assertIsNone(reqspec.ignore_hosts) diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index 99dde54c251c..c2d1632ff1be 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -1562,7 +1562,7 @@ class _BaseTaskTestCase(object): **compute_args) @mock.patch('nova.compute.utils.notify_about_instance_rebuild') - def test_rebuild_instance_with_request_spec(self, mock_notify): + def test_evacuate_instance_with_request_spec(self, mock_notify): inst_obj = self._create_fake_instance_obj() inst_obj.host = 'noselect' expected_host = 'thebesthost' @@ -1570,10 +1570,10 @@ class _BaseTaskTestCase(object): expected_limits = None fake_selection = objects.Selection(service_host=expected_host, nodename=expected_node, limits=None) - fake_spec = objects.RequestSpec(ignore_hosts=[]) + fake_spec = objects.RequestSpec(ignore_hosts=[uuids.ignored_host]) rebuild_args, compute_args = self._prepare_rebuild_args( {'host': None, 'node': expected_node, 'limits': expected_limits, - 'request_spec': fake_spec}) + 'request_spec': fake_spec, 'recreate': True}) with test.nested( mock.patch.object(self.conductor_manager.compute_rpcapi, 'rebuild_instance'), @@ -1587,10 +1587,9 @@ class _BaseTaskTestCase(object): self.conductor_manager.rebuild_instance(context=self.context, instance=inst_obj, **rebuild_args) - if rebuild_args['recreate']: - reset_fd.assert_called_once_with() - else: - reset_fd.assert_not_called() + reset_fd.assert_called_once_with() + # The RequestSpec.ignore_hosts field should be overwritten. + self.assertEqual([inst_obj.host], fake_spec.ignore_hosts) select_dest_mock.assert_called_once_with(self.context, fake_spec, [inst_obj.uuid], return_objects=True, return_alternates=False) diff --git a/nova/tests/unit/objects/test_request_spec.py b/nova/tests/unit/objects/test_request_spec.py index 988e4d1b8dbe..7773150970bb 100644 --- a/nova/tests/unit/objects/test_request_spec.py +++ b/nova/tests/unit/objects/test_request_spec.py @@ -575,7 +575,8 @@ class _TestRequestSpecObject(object): fake_spec['instance_uuid']) self.assertEqual(1, req_obj.num_instances) - self.assertEqual(['host2', 'host4'], req_obj.ignore_hosts) + # ignore_hosts is not persisted + self.assertIsNone(req_obj.ignore_hosts) self.assertEqual('fake', req_obj.project_id) self.assertEqual({'hint': ['over-there']}, req_obj.scheduler_hints) self.assertEqual(['host1', 'host3'], req_obj.force_hosts) @@ -599,7 +600,7 @@ class _TestRequestSpecObject(object): jsonutils.loads(changes['spec'])) # primitive fields - for field in ['instance_uuid', 'num_instances', 'ignore_hosts', + for field in ['instance_uuid', 'num_instances', 'project_id', 'scheduler_hints', 'force_hosts', 'availability_zone', 'force_nodes']: self.assertEqual(getattr(req_obj, field), @@ -616,6 +617,7 @@ class _TestRequestSpecObject(object): self.assertIsNone(serialized_obj.instance_group.hosts) self.assertIsNone(serialized_obj.retry) self.assertIsNone(serialized_obj.requested_destination) + self.assertIsNone(serialized_obj.ignore_hosts) def test_create(self): req_obj = fake_request_spec.fake_spec_obj(remove_id=True) @@ -710,6 +712,7 @@ class _TestRequestSpecObject(object): ])) req_obj.retry = expected_retry req_obj.num_instances = 2 + req_obj.ignore_hosts = [uuids.ignored_host] orig_save_in_db = request_spec.RequestSpec._save_in_db with mock.patch.object(request_spec.RequestSpec, '_save_in_db') \ @@ -723,11 +726,13 @@ class _TestRequestSpecObject(object): # 2. requested_destination # 3. requested_resources # 4. retry + # 5. ignore_hosts data = jsonutils.loads(updates['spec'])['nova_object.data'] self.assertNotIn('network_metadata', data) self.assertIsNone(data['requested_destination']) self.assertIsNone(data['requested_resources']) self.assertIsNone(data['retry']) + self.assertIsNone(data['ignore_hosts']) self.assertIsNotNone(data['instance_uuid']) # also we expect that the following fields are not reset after save @@ -735,6 +740,7 @@ class _TestRequestSpecObject(object): # 2. requested_destination # 3. requested_resources # 4. retry + # 5. ignore_hosts self.assertIsNotNone(req_obj.network_metadata) self.assertJsonEqual(expected_network_metadata.obj_to_primitive(), req_obj.network_metadata.obj_to_primitive()) @@ -747,6 +753,8 @@ class _TestRequestSpecObject(object): self.assertIsNotNone(req_obj.retry) self.assertJsonEqual(expected_retry.obj_to_primitive(), req_obj.retry.obj_to_primitive()) + self.assertIsNotNone(req_obj.ignore_hosts) + self.assertEqual([uuids.ignored_host], req_obj.ignore_hosts) def test_save(self): req_obj = fake_request_spec.fake_spec_obj() diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 2eb6e9dd8d7a..0ac43ce8eeee 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -609,7 +609,11 @@ class FakeDriver(driver.ComputeDriver): block_device_info=block_device_info) def confirm_migration(self, context, migration, instance, network_info): - return + # Confirm migration cleans up the guest from the source host so just + # destroy the guest to remove it from the list of tracked instances + # unless it is a same-host resize. + if migration.source_compute != migration.dest_compute: + self.destroy(context, instance, network_info) def pre_live_migration(self, context, instance, block_device_info, network_info, disk_info, migrate_data):