diff --git a/nova/compute/api.py b/nova/compute/api.py index a65f66ba3fac..89b72e594c5d 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -2310,6 +2310,9 @@ class API(base.Base): src_host = migration.source_compute + # FIXME(mriedem): If migration.cross_cell_move, we need to also + # cleanup the instance data from the source cell database. + self._record_action_start(context, instance, instance_actions.CONFIRM_RESIZE) diff --git a/nova/tests/fixtures.py b/nova/tests/fixtures.py index 1b6fb4cee3e8..d091c85e7821 100644 --- a/nova/tests/fixtures.py +++ b/nova/tests/fixtures.py @@ -1947,6 +1947,14 @@ class CinderFixture(fixtures.Fixture): yield volume_id break + def attachment_ids_for_instance(self, instance_uuid): + attachment_ids = [] + for volume_id, attachments in self.volume_to_attachment.items(): + for attachment in attachments.values(): + if attachment['instance_uuid'] == instance_uuid: + attachment_ids.append(attachment['id']) + return attachment_ids + def setUp(self): super(CinderFixture, self).setUp() diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index dbe1ee2c972f..2b6e12e7e765 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -426,6 +426,7 @@ class ProviderUsageBaseTestCase(test.TestCase, InstanceHelperMixin): expected_fake_driver_capability_traits = set([ six.u(trait) for trait in [ os_traits.COMPUTE_IMAGE_TYPE_RAW, + os_traits.COMPUTE_DEVICE_TAGGING, os_traits.COMPUTE_NET_ATTACH_INTERFACE, os_traits.COMPUTE_NET_ATTACH_INTERFACE_WITH_TAG, os_traits.COMPUTE_VOLUME_ATTACH_WITH_TAG, diff --git a/nova/tests/functional/test_cross_cell_migrate.py b/nova/tests/functional/test_cross_cell_migrate.py new file mode 100644 index 000000000000..bc671819c015 --- /dev/null +++ b/nova/tests/functional/test_cross_cell_migrate.py @@ -0,0 +1,389 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from nova import context as nova_context +from nova import objects +from nova.scheduler import weights +from nova.tests import fixtures as nova_fixtures +from nova.tests.functional import integrated_helpers +from nova.tests.unit.image import fake as fake_image + + +class HostNameWeigher(weights.BaseHostWeigher): + # TestMultiCellMigrate creates host1 in cell1 and host2 in cell2. + # Something about migrating from host1 to host2 teases out failures + # which probably has to do with cell1 being the default cell DB in + # our base test class setup, so prefer host1 to make the tests + # deterministic. + _weights = {'host1': 100, 'host2': 50} + + def _weigh_object(self, host_state, weight_properties): + # Any undefined host gets no weight. + return self._weights.get(host_state.host, 0) + + +class TestMultiCellMigrate(integrated_helpers.ProviderUsageBaseTestCase): + """Tests for cross-cell cold migration (resize)""" + + NUMBER_OF_CELLS = 2 + compute_driver = 'fake.MediumFakeDriver' + + def setUp(self): + # Use our custom weigher defined above to make sure that we have + # a predictable scheduling sort order during server create. + self.flags(weight_classes=[__name__ + '.HostNameWeigher'], + group='filter_scheduler') + super(TestMultiCellMigrate, self).setUp() + self.cinder = self.useFixture(nova_fixtures.CinderFixture(self)) + + self._enable_cross_cell_resize() + + # Adjust the polling interval and timeout for long RPC calls. + self.flags(rpc_response_timeout=1) + self.flags(long_rpc_timeout=60) + + # Set up 2 compute services in different cells + self.host_to_cell_mappings = { + 'host1': 'cell1', 'host2': 'cell2'} + + # TODO(mriedem): Should probably put the hosts in separate AZs so we + # can assert the instance.availability_zone value is updated when it + # moves. + for host in sorted(self.host_to_cell_mappings): + self._start_compute( + host, cell_name=self.host_to_cell_mappings[host]) + + def _enable_cross_cell_resize(self): + # Enable cross-cell resize policy since it defaults to not allow + # anyone to perform that type of operation. For these tests we'll + # just allow admins to perform cross-cell resize. + # TODO(mriedem): Uncomment this when the policy rule is added and + # used in the compute API _allow_cross_cell_resize method. For now + # we just stub that method to return True. + # self.policy_fixture.set_rules({ + # servers_policies.CROSS_CELL_RESIZE: + # base_policies.RULE_ADMIN_API}, + # overwrite=False) + self.stub_out('nova.compute.api.API._allow_cross_cell_resize', + lambda *a, **kw: True) + + def assertFlavorMatchesAllocation(self, flavor, allocation, + volume_backed=False): + self.assertEqual(flavor['vcpus'], allocation['VCPU']) + self.assertEqual(flavor['ram'], allocation['MEMORY_MB']) + # Volume-backed instances won't have DISK_GB allocations. + if volume_backed: + self.assertNotIn('DISK_GB', allocation) + else: + self.assertEqual(flavor['disk'], allocation['DISK_GB']) + + def assert_instance_fields_match_flavor(self, instance, flavor): + self.assertEqual(instance.memory_mb, flavor['ram']) + self.assertEqual(instance.vcpus, flavor['vcpus']) + self.assertEqual(instance.root_gb, flavor['disk']) + self.assertEqual( + instance.ephemeral_gb, flavor['OS-FLV-EXT-DATA:ephemeral']) + + def _count_volume_attachments(self, server_id): + attachment_ids = self.cinder.attachment_ids_for_instance(server_id) + return len(attachment_ids) + + def assert_quota_usage(self, expected_num_instances): + limits = self.api.get_limits()['absolute'] + self.assertEqual(expected_num_instances, limits['totalInstancesUsed']) + + def _create_server(self, flavor, volume_backed=False): + """Creates a server and waits for it to be ACTIVE + + :param flavor: dict form of the flavor to use + :param volume_backed: True if the server should be volume-backed + :returns: server dict response from the GET /servers/{server_id} API + """ + # Provide a VIF tag for the pre-existing port. Since VIF tags are + # stored in the virtual_interfaces table in the cell DB, we want to + # make sure those survive the resize to another cell. + networks = [{ + 'port': self.neutron.port_1['id'], + 'tag': 'private' + }] + image_uuid = fake_image.get_valid_image_id() + server = self._build_minimal_create_server_request( + self.api, 'test_cross_cell_resize', + image_uuid=image_uuid, + flavor_id=flavor['id'], + networks=networks) + # Put a tag on the server to make sure that survives the resize. + server['tags'] = ['test'] + if volume_backed: + bdms = [{ + 'boot_index': 0, + 'uuid': nova_fixtures.CinderFixture.IMAGE_BACKED_VOL, + 'source_type': 'volume', + 'destination_type': 'volume', + 'tag': 'root' + }] + server['block_device_mapping_v2'] = bdms + # We don't need the imageRef for volume-backed servers. + server.pop('imageRef', None) + + server = self.api.post_server({'server': server}) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + # For volume-backed make sure there is one attachment to start. + if volume_backed: + self.assertEqual(1, self._count_volume_attachments(server['id']), + self.cinder.volume_to_attachment) + return server + + def _resize_and_validate(self, volume_backed=False, stopped=False, + target_host=None): + """Creates and resizes the server to another cell. Validates various + aspects of the server and its related records (allocations, migrations, + actions, VIF tags, etc). + + :param volume_backed: True if the server should be volume-backed, False + if image-backed. + :param stopped: True if the server should be stopped prior to resize, + False if the server should be ACTIVE + :param target_host: If not None, triggers a cold migration to the + specified host. + :returns: tuple of: + - server response object + - source compute node resource provider uuid + - target compute node resource provider uuid + - old flavor + - new flavor + """ + # Create the server. + flavors = self.api.get_flavors() + old_flavor = flavors[0] + server = self._create_server(old_flavor, volume_backed=volume_backed) + original_host = server['OS-EXT-SRV-ATTR:host'] + + if stopped: + # Stop the server before resizing it. + self.api.post_server_action(server['id'], {'os-stop': None}) + self._wait_for_state_change(self.api, server, 'SHUTOFF') + + # Before resizing make sure quota usage is only 1 for total instances. + self.assert_quota_usage(expected_num_instances=1) + + # Resize it which should migrate the server to the host in the + # other cell. + new_flavor = flavors[1] + body = {'resize': {'flavorRef': new_flavor['id']}} + self.api.post_server_action(server['id'], body) + # Wait for the server to be resized and then verify the host has + # changed to be the host in the other cell. + server = self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE') + expected_host = 'host1' if original_host == 'host2' else 'host2' + self.assertEqual(expected_host, server['OS-EXT-SRV-ATTR:host']) + # Assert that the instance is only listed one time from the API (to + # make sure it's not listed out of both cells). + # Note that we only get one because the DB API excludes hidden + # instances by default (see instance_get_all_by_filters_sort). + servers = self.api.get_servers() + self.assertEqual(1, len(servers), + 'Unexpected number of servers: %s' % servers) + self.assertEqual(expected_host, servers[0]['OS-EXT-SRV-ATTR:host']) + + # And that there is only one migration record. + migrations = self.api.api_get( + '/os-migrations?instance_uuid=%s' % server['id'] + ).body['migrations'] + self.assertEqual(1, len(migrations), + 'Unexpected number of migrations records: %s' % + migrations) + migration = migrations[0] + self.assertEqual('finished', migration['status']) + + # There should be at least two actions, one for create and one for the + # resize. There will be a third action if the server was stopped. + actions = self.api.api_get( + '/servers/%s/os-instance-actions' % server['id'] + ).body['instanceActions'] + expected_num_of_actions = 3 if stopped else 2 + self.assertEqual(expected_num_of_actions, len(actions), actions) + # Each action should have events (make sure these were copied from + # the source cell to the target cell). + for action in actions: + detail = self.api.api_get( + '/servers/%s/os-instance-actions/%s' % ( + server['id'], action['request_id'])).body['instanceAction'] + self.assertNotEqual(0, len(detail['events']), detail) + + # The tag should still be present on the server. + self.assertEqual(1, len(server['tags']), + 'Server tags not found in target cell.') + self.assertEqual('test', server['tags'][0]) + + # Confirm the source node has allocations for the old flavor and the + # target node has allocations for the new flavor. + source_rp_uuid = self._get_provider_uuid_by_host(original_host) + # The source node allocations should be on the migration record. + source_allocations = self._get_allocations_by_provider_uuid( + source_rp_uuid)[migration['uuid']]['resources'] + self.assertFlavorMatchesAllocation( + old_flavor, source_allocations, volume_backed=volume_backed) + + target_rp_uuid = self._get_provider_uuid_by_host(expected_host) + # The target node allocations should be on the instance record. + target_allocations = self._get_allocations_by_provider_uuid( + target_rp_uuid)[server['id']]['resources'] + self.assertFlavorMatchesAllocation( + new_flavor, target_allocations, volume_backed=volume_backed) + + # The instance, in the target cell DB, should have the old and new + # flavor stored with it with the values we expect at this point. + target_cell = self.cell_mappings[ + self.host_to_cell_mappings[expected_host]] + admin_context = nova_context.get_admin_context() + with nova_context.target_cell(admin_context, target_cell) as cctxt: + inst = objects.Instance.get_by_uuid( + cctxt, server['id'], expected_attrs=['flavor']) + self.assertIsNotNone( + inst.old_flavor, + 'instance.old_flavor not saved in target cell') + self.assertIsNotNone( + inst.new_flavor, + 'instance.new_flavor not saved in target cell') + self.assertEqual(inst.flavor.flavorid, inst.new_flavor.flavorid) + self.assertNotEqual(inst.flavor.flavorid, inst.old_flavor.flavorid) + self.assertEqual(old_flavor['id'], inst.old_flavor.flavorid) + self.assertEqual(new_flavor['id'], inst.new_flavor.flavorid) + # Assert the ComputeManager._set_instance_info fields + # are correct after the resize. + self.assert_instance_fields_match_flavor(inst, new_flavor) + + # Assert the VIF tag was carried through to the target cell DB. + interface_attachments = self.api.get_port_interfaces(server['id']) + self.assertEqual(1, len(interface_attachments)) + self.assertEqual('private', interface_attachments[0]['tag']) + + if volume_backed: + # Assert the BDM tag was carried through to the target cell DB. + volume_attachments = self.api.get_server_volumes(server['id']) + self.assertEqual(1, len(volume_attachments)) + self.assertEqual('root', volume_attachments[0]['tag']) + + # Make sure the guest is no longer tracked on the source node. + source_guest_uuids = ( + self.computes[original_host].manager.driver.list_instance_uuids()) + self.assertNotIn(server['id'], source_guest_uuids) + # And the guest is on the target node hypervisor. + target_guest_uuids = ( + self.computes[expected_host].manager.driver.list_instance_uuids()) + self.assertIn(server['id'], target_guest_uuids) + + # The source hypervisor continues to report usage in the hypervisors + # API because even though the guest was destroyed there, the instance + # resources are still claimed on that node in case the user reverts. + self.assert_hypervisor_usage(source_rp_uuid, old_flavor, volume_backed) + # The new flavor should show up with resource usage on the target host. + self.assert_hypervisor_usage(target_rp_uuid, new_flavor, volume_backed) + + # While we have a copy of the instance in each cell database make sure + # that quota usage is only reporting 1 (because one is hidden). + self.assert_quota_usage(expected_num_instances=1) + + # For a volume-backed server, at this point there should be two volume + # attachments for the instance: one tracked in the source cell and + # one in the target cell. + if volume_backed: + self.assertEqual(2, self._count_volume_attachments(server['id']), + self.cinder.volume_to_attachment) + + # Assert the expected power state. + expected_power_state = 4 if stopped else 1 + self.assertEqual( + expected_power_state, server['OS-EXT-STS:power_state'], + "Unexpected power state after resize.") + + return server, source_rp_uuid, target_rp_uuid, old_flavor, new_flavor + + def test_resize_confirm_image_backed(self): + """Creates an image-backed server in one cell and resizes it to the + host in the other cell. The resize is confirmed. + """ + self._resize_and_validate() + + # TODO(mriedem): Confirm the resize and make assertions. + + def test_resize_revert_volume_backed(self): + """Tests a volume-backed resize to another cell where the resize + is reverted back to the original source cell. + """ + self._resize_and_validate(volume_backed=True) + + # TODO(mriedem): Revert the resize and make assertions. + + def test_delete_while_in_verify_resize_status(self): + """Tests that when deleting a server in VERIFY_RESIZE status, the + data is cleaned from both the source and target cell. + """ + server = self._resize_and_validate()[0] + self.api.delete_server(server['id']) + self._wait_until_deleted(server) + # Now list servers to make sure it doesn't show up from the source cell + servers = self.api.get_servers() + self.assertEqual(0, len(servers), servers) + # FIXME(mriedem): Need to cleanup from source cell in API method + # _confirm_resize_on_deleting(). The above check passes because the + # instance is still hidden in the source cell so the API filters it + # out. + target_host = server['OS-EXT-SRV-ATTR:host'] + source_host = 'host1' if target_host == 'host2' else 'host2' + source_cell = self.cell_mappings[ + self.host_to_cell_mappings[source_host]] + ctxt = nova_context.get_admin_context() + with nova_context.target_cell(ctxt, source_cell) as cctxt: + # Once the API is fixed this should raise InstanceNotFound. + instance = objects.Instance.get_by_uuid(cctxt, server['id']) + self.assertTrue(instance.hidden) + + # TODO(mriedem): Test cold migration with a specified target host in + # another cell. + + # TODO(mriedem): Test cross-cell list where the source cell has two + # hosts so the CrossCellWeigher picks the other host in the source cell + # and we do a traditional resize. Add a variant on this where the flavor + # being resized to is only available, via aggregate, on the host in the + # other cell so the CrossCellWeigher is overruled by the filters. + + # TODO(mriedem): Test a bunch of rollback scenarios. + + # TODO(mriedem): Test re-scheduling when the first host fails the + # resize_claim and a subsequent alternative host works, and also the + # case that all hosts fail the resize_claim. + + # TODO(mriedem): Test cross-cell anti-affinity group assumptions from + # scheduler utils setup_instance_group where it assumes moves are within + # the same cell, so: + # 0. create 2 hosts in cell1 and 1 host in cell2 + # 1. create two servers in an anti-affinity group in cell1 + # 2. migrate one server to cell2 + # 3. migrate the other server to cell2 - this should fail during scheduling + # because there is already a server from the anti-affinity group on the + # host in cell2 but setup_instance_group code may not catch it. + + # TODO(mriedem): Perform a resize with at-capacity computes, meaning that + # when we revert we can only fit the instance with the old flavor back + # onto the source host in the source cell. + + def test_resize_confirm_from_stopped(self): + """Tests resizing and confirming a server that was initially stopped + so it should remain stopped through the resize. + """ + self._resize_and_validate(volume_backed=True, stopped=True) + # TODO(mriedem): Confirm the resize and assert the guest remains off + + # TODO(mriedem): Test a failure scenario where the server is recovered via + # rebuild in the source cell. diff --git a/nova/virt/fake.py b/nova/virt/fake.py index 2bbae8bd1b8a..d4bbc9da5d55 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -106,6 +106,7 @@ class FakeDriver(driver.ComputeDriver): "supports_evacuate": True, "supports_migrate_to_same_host": True, "supports_attach_interface": True, + "supports_device_tagging": True, "supports_tagged_attach_interface": True, "supports_tagged_attach_volume": True, "supports_extend_volume": True, @@ -642,7 +643,7 @@ class FakeDriver(driver.ComputeDriver): def get_volume_connector(self, instance): return {'ip': CONF.my_block_storage_ip, 'initiator': 'fake', - 'host': 'fakehost'} + 'host': self._host} def get_available_nodes(self, refresh=False): return self._nodes