From 5f63ada309f562f3ffe9b75b6f1acf1efb80ee4e Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Fri, 13 Dec 2019 21:18:07 +0100 Subject: [PATCH] Func test for qos live migration reschedule This patch adds extra functional test coverage for live migration with qos ports. It covers live migration with target host specified and re-schedule success and failure cases. There are two non test code changes. 1) In the check_can_live_migrate_destination compute manager method CONF.host was used to determine the destination host. This was replaced with self.host. This change result in identical data in a production environment. But during the functional test the CONF object is global for every compute servers running in the environment. This causes that when the above call runs on host2 the CONF.host is already set to host3 as that host was started later in the test. Fortunately self.host is set on the compute manage during the compute startup so it is correct in the functional test env too. It is unfortunate that we have to change non test code to make the test code work but the global CONF variable is a hard problem to resolve. 2) The InstancePCIRequest is saved in the LiveMigrationTask as the task updates the request for each host it tries to move the instance. If non of the destination supports the migration then the rollback() of the task will restore the InstancePCIRequest to its original value as the instance will remain on the source host. Change-Id: I351399f02d4d3b3e958a62fe37380577f3056d0d blueprint: support-move-ops-with-qos-ports-ussuri --- nova/compute/manager.py | 2 +- nova/conductor/tasks/live_migrate.py | 10 ++ nova/tests/functional/test_servers.py | 130 +++++++++++++++++- .../unit/conductor/tasks/test_live_migrate.py | 1 + 4 files changed, 137 insertions(+), 6 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index feafa9f523ba..706b699cdfa3 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -7615,7 +7615,7 @@ class ComputeManager(manager.Manager): src_compute_info = obj_base.obj_to_primitive( self._get_compute_info(ctxt, instance.host)) dst_compute_info = obj_base.obj_to_primitive( - self._get_compute_info(ctxt, CONF.host)) + self._get_compute_info(ctxt, self.host)) dest_check_data = self.driver.check_can_live_migrate_destination(ctxt, instance, src_compute_info, dst_compute_info, block_migration, disk_over_commit) diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py index 6d430b2718e4..c50131752b3a 100644 --- a/nova/conductor/tasks/live_migrate.py +++ b/nova/conductor/tasks/live_migrate.py @@ -9,6 +9,7 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +import copy from oslo_log import log as logging import oslo_messaging as messaging @@ -67,6 +68,7 @@ class LiveMigrationTask(base.TaskBase): self._source_cn = None self._held_allocations = None self.network_api = network.API() + self.instance_pci_request = None def _execute(self): self._check_instance_is_active() @@ -82,6 +84,11 @@ class LiveMigrationTask(base.TaskBase): self.instance, self.migration)) + # NOTE(gibi): migrating with qos SRIOV ports will update the PCI + # request so we saving the request here so that we can roll that change + # back if the migration fails. + self.instance_pci_request = copy.deepcopy(self.instance.pci_requests) + if not self.destination: # Either no host was specified in the API request and the user # wants the scheduler to pick a destination host, or a host was @@ -151,6 +158,9 @@ class LiveMigrationTask(base.TaskBase): self._source_cn, self.instance, self.migration) + if self.instance_pci_request: + self.instance.pci_requests = self.instance_pci_request + self.instance.save() def _check_instance_is_active(self): if self.instance.power_state not in (power_state.RUNNING, diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 118cb141c460..7024947f3d4e 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -7249,7 +7249,7 @@ class ServerMoveWithPortResourceRequestTest( self.addCleanup(patcher.stop) patcher.start() - def test_live_migrate_with_qos_port(self): + def test_live_migrate_with_qos_port(self, host=None): # TODO(gibi): remove this when live migration is fully supported and # therefore the check is removed from the api self._turn_off_api_check() @@ -7270,7 +7270,7 @@ class ServerMoveWithPortResourceRequestTest( server['id'], { 'os-migrateLive': { - 'host': None, + 'host': host, 'block_migration': 'auto' } } @@ -7288,10 +7288,130 @@ class ServerMoveWithPortResourceRequestTest( self._delete_server_and_check_allocations( server, qos_normal_port, qos_sriov_port) + def test_live_migrate_with_qos_port_with_target_host(self): + self.test_live_migrate_with_qos_port(host='host2') + + def test_live_migrate_with_qos_port_reschedule_success(self): + # TODO(gibi): remove this when live migration is fully supported and + # therefore the check is removed from the api + self._turn_off_api_check() + + self._start_compute('host3') + compute3_rp_uuid = self._get_provider_uuid_by_host('host3') + self._create_networking_rp_tree('host3', compute3_rp_uuid) + + non_qos_normal_port = self.neutron.port_1 + qos_normal_port = self.neutron.port_with_resource_request + qos_sriov_port = self.neutron.port_with_sriov_resource_request + + server = self._create_server_with_ports( + non_qos_normal_port, qos_normal_port, qos_sriov_port) + + # check that the server allocates from the current host properly + self._check_allocation( + server, self.compute1_rp_uuid, non_qos_normal_port, + qos_normal_port, qos_sriov_port, self.flavor_with_group_policy) + + orig_check = nova.virt.fake.FakeDriver.\ + check_can_live_migrate_destination + + def fake_check_can_live_migrate_destination( + context, instance, src_compute_info, dst_compute_info, + block_migration=False, disk_over_commit=False): + if dst_compute_info['host'] == 'host2': + raise exception.MigrationPreCheckError( + reason='test_live_migrate_pre_check_fails') + else: + return orig_check( + context, instance, src_compute_info, dst_compute_info, + block_migration, disk_over_commit) + + with mock.patch('nova.virt.fake.FakeDriver.' + 'check_can_live_migrate_destination', + side_effect=fake_check_can_live_migrate_destination): + self.api.post_server_action( + server['id'], + { + 'os-migrateLive': { + 'host': None, + 'block_migration': 'auto' + } + } + ) + # The first migration attempt was to host2. So we expect that the + # instance lands on host3. + self._wait_for_server_parameter( + server, + {'OS-EXT-SRV-ATTR:host': 'host3', + 'status': 'ACTIVE'}) + + self._check_allocation( + server, compute3_rp_uuid, non_qos_normal_port, + qos_normal_port, qos_sriov_port, self.flavor_with_group_policy) + + self._delete_server_and_check_allocations( + server, qos_normal_port, qos_sriov_port) + + def test_live_migrate_with_qos_port_reschedule_fails(self): + # TODO(gibi): remove this when live migration is fully supported and + # therefore the check is removed from the api + self._turn_off_api_check() + + non_qos_normal_port = self.neutron.port_1 + qos_normal_port = self.neutron.port_with_resource_request + qos_sriov_port = self.neutron.port_with_sriov_resource_request + + server = self._create_server_with_ports( + non_qos_normal_port, qos_normal_port, qos_sriov_port) + + # check that the server allocates from the current host properly + self._check_allocation( + server, self.compute1_rp_uuid, non_qos_normal_port, + qos_normal_port, qos_sriov_port, self.flavor_with_group_policy) + + with mock.patch( + 'nova.virt.fake.FakeDriver.check_can_live_migrate_destination', + side_effect=exception.MigrationPreCheckError( + reason='test_live_migrate_pre_check_fails')): + self.api.post_server_action( + server['id'], + { + 'os-migrateLive': { + 'host': None, + 'block_migration': 'auto' + } + } + ) + # The every migration target host will fail the pre check so + # the conductor will run out of target host and the migration will + # fail + self._wait_for_migration_status(server, ['error']) + + # the server will remain on host1 + self._wait_for_server_parameter( + server, + {'OS-EXT-SRV-ATTR:host': 'host1', + 'status': 'ACTIVE'}) + + self._check_allocation( + server, self.compute1_rp_uuid, non_qos_normal_port, + qos_normal_port, qos_sriov_port, self.flavor_with_group_policy) + + # Assert that the InstancePCIRequests also rolled back to point to + # host1 + ctxt = context.get_admin_context() + pci_requests = objects.InstancePCIRequests.get_by_instance_uuid( + ctxt, server['id']) + self.assertEqual(1, len(pci_requests.requests)) + self.assertEqual(1, len(pci_requests.requests[0].spec)) + self.assertEqual( + 'host1-ens2', + pci_requests.requests[0].spec[0]['parent_ifname']) + + self._delete_server_and_check_allocations( + server, qos_normal_port, qos_sriov_port) + # TODO(gibi): add tests for live migration cases: - # * with target host - # * re-schedule success - # * re-schedule fail -> pci request cleanup? # * abort / cancel -> dest / pci request cleanup? diff --git a/nova/tests/unit/conductor/tasks/test_live_migrate.py b/nova/tests/unit/conductor/tasks/test_live_migrate.py index 45678c12a129..9eed38b15967 100644 --- a/nova/tests/unit/conductor/tasks/test_live_migrate.py +++ b/nova/tests/unit/conductor/tasks/test_live_migrate.py @@ -60,6 +60,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase): self.context, objects.Instance(), db_instance) self.instance.system_metadata = {'image_hw_disk_bus': 'scsi'} self.instance.numa_topology = None + self.instance.pci_requests = None self.destination = "destination" self.block_migration = "bm" self.disk_over_commit = "doc"