From 4c6f3b688126f7ec9ef447611dfe693b23e5046f Mon Sep 17 00:00:00 2001 From: James Parker Date: Wed, 27 Jan 2021 12:51:38 -0500 Subject: [PATCH] Add SR-IOV Live Migration Test Added new test class to test_sriov, SRIOVMigration. Since there are common helper methods between SRIOVMigration and SRIOVNumaAffinity a new parent class was created for the two test classes, SRIOVBase. SRIOVMigration adds two new test methods test_sriov_direct_live_migration and test_sriov_macvtap_live_migration which follow the exact same procedure but use different port types, direct and macvtap respectively. Tests creates a server, migrates it to a new host, confirms the xml parameters describing the port correct, and the number of allocated pci_devices in the pci_devices database table is accurate. It then migrates the servers back to the source host and rechecks the same set of criteria. Change-Id: I08b769dc3cfe52329093c5953768b023b0f40b9f --- .../api/compute/test_sriov.py | 468 +++++++++++++----- 1 file changed, 338 insertions(+), 130 deletions(-) diff --git a/whitebox_tempest_plugin/api/compute/test_sriov.py b/whitebox_tempest_plugin/api/compute/test_sriov.py index 6864ad63..87b8209a 100644 --- a/whitebox_tempest_plugin/api/compute/test_sriov.py +++ b/whitebox_tempest_plugin/api/compute/test_sriov.py @@ -26,8 +26,201 @@ CONF = config.CONF LOG = logging.getLogger(__name__) -class SRIOVNumaAffinity(base.BaseWhiteboxComputeTest): +class SRIOVBase(base.BaseWhiteboxComputeTest): + @classmethod + def skip_checks(cls): + super(SRIOVBase, cls).skip_checks() + if getattr(CONF.whitebox_hardware, + 'sriov_physnet', None) is None: + raise cls.skipException('Requires sriov_physnet parameter ' + 'to be set in order to execute test ' + 'cases.') + if getattr(CONF.network_feature_enabled, + 'provider_net_base_segmentation_id', None) is None: + raise cls.skipException('Requires ' + 'provider_net_base_segmentation_id ' + 'parameter to be set in order to execute ' + 'test cases.') + + def _get_expected_xml_interface_type(self, port): + """Return expected domain xml interface type based on port vnic_type + + :param port: dictionary with port details + :return xml_vnic_type: the vnic_type as it is expected to be + represented in a guest's XML + """ + vnic_type = port['port']['binding:vnic_type'] + # NOTE: SR-IOV Port binding vnic type has been known to cause confusion + # when mapping the value to the underlying instance XML. A vnic_type + # that is direct is a 'hostdev' or Host device assignment that is + # is passing the device directly from the host to the guest. A + # vnic_type that is macvtap or 'direct' in the guest xml, is using the + # macvtap driver to attach a guests NIC directly to a specified + # physical interface on the host. + if vnic_type == 'direct': + return 'hostdev' + elif vnic_type == 'macvtap': + return 'direct' + + def _create_sriov_net(self): + """Create an IPv4 L2 vlan network. Physical network provider comes + from sriov_physnet provided in tempest config + + :return net A dictionary describing details about the created network + """ + name_net = data_utils.rand_name(self.__class__.__name__) + vlan_id = \ + CONF.network_feature_enabled.provider_net_base_segmentation_id + physical_net = CONF.whitebox_hardware.sriov_physnet + net_dict = { + 'provider:network_type': 'vlan', + 'provider:physical_network': physical_net, + 'provider:segmentation_id': vlan_id + } + net = self.os_admin.networks_client.create_network( + name=name_net, + **net_dict) + self.addCleanup(self.os_admin.networks_client.delete_network, + net['network']['id']) + return net + + def _create_sriov_subnet(self, network_id): + """Create an IPv4 L2 vlan network. Physical network provider comes + from sriov_physnet provided in tempest config + + :param network_id: str, network id subnet will be associated with + :return net A dictionary describing details about the created network + """ + name_subnet = data_utils.rand_name(self.__class__.__name__) + subnet = self.os_admin.subnets_client.create_subnet( + name=name_subnet, + network_id=network_id, + cidr=CONF.network.project_network_cidr, + ip_version=4 + ) + self.addCleanup( + self.os_admin.subnets_client.delete_subnet, + subnet['subnet']['id'] + ) + return subnet + + def _create_sriov_port(self, net, vnic_type): + """Create an sr-iov port based on the provided vnic type + + :param net: dictionary with network details + :param vnic_type: str, representing the vnic type to use with creating + the sriov port, e.g. direct, macvtap, etc. + :return port: dictionary with details about newly created port provided + by neutron ports client + """ + vnic_params = {'binding:vnic_type': vnic_type} + port = self.os_admin.ports_client.create_port( + network_id=net['network']['id'], + **vnic_params) + self.addCleanup(self.os_admin.ports_client.delete_port, + port['port']['id']) + return port + + def _get_xml_interface_device(self, server_id, port_id): + """Returns xml interface element that matches provided port mac + and interface type. It is technically possible to have multiple ports + with the same MAC address in an instance, so method functionality may + break in the future. + + :param server_id: str, id of the instance to analyze + :param port_id: str, port id to request from the ports client + :return xml_network_deivce: The xml network device delement that match + the port search criteria + """ + port_info = self.os_admin.ports_client.show_port(port_id) + interface_type = self._get_expected_xml_interface_type(port_info) + root = self.get_server_xml(server_id) + mac = port_info['port']['mac_address'] + interface_list = root.findall( + "./devices/interface[@type='%s']/mac[@address='%s'].." + % (interface_type, mac) + ) + self.assertEqual(len(interface_list), 1, 'Expect to find one ' + 'and only one instance of interface but ' + 'instead found %d instances' % + len(interface_list)) + return interface_list[0] + + def _validate_port_xml_vlan_tag(self, port_xml_element, expected_vlan): + """Validates port count and vlan are accurate in server's XML + + :param server_id: str, id of the instance to analyze + :param port: dictionary describing port to find + """ + interface_vlan = port_xml_element.find("./vlan/tag").get('id', None) + self.assertEqual( + expected_vlan, interface_vlan, 'Interface should have have vlan ' + 'tag %s but instead it is tagged with %s' % + (expected_vlan, interface_vlan)) + + def _get_port_attribute(self, port_id, attribute): + """Get a specific attribute for provided port id + + :param port_id: str the port id to search for + :param attribute: str the attribute or key to check from the returned + port dictionary + :return port_attribute: the requested port attribute value + """ + body = self.os_admin.ports_client.show_port(port_id) + port = body['port'] + return port.get(attribute) + + def _search_pci_devices(self, column, value): + """Returns all pci_device's address, status, and dev_type that match + query criteria. + + :param column: str, the column in the pci_devices table to search + :param value: str, the specific value in the column to query for + return query_match: json, all pci_devices that match specified query + """ + db_client = clients.DatabaseClient() + db = CONF.whitebox_database.nova_cell1_db_name + with db_client.cursor(db) as cursor: + cursor.execute( + 'SELECT address,status,dev_type FROM ' + 'pci_devices WHERE %s = "%s"' % (column, value)) + data = cursor.fetchall() + return data + + def _verify_neutron_port_binding(self, server_id, port_id): + """Verifies db metrics are accurate for the state of the provided + port_id + + :param port_id str, the port id to request from the ports client + :param server_id str, the guest id to check + """ + binding_profile = self._get_port_attribute(port_id, 'binding:profile') + vnic_type = self._get_port_attribute(port_id, 'binding:vnic_type') + pci_info = self._search_pci_devices('instance_uuid', server_id) + for pci_device in pci_info: + self.assertEqual( + "allocated", pci_device['status'], 'Physical function %s is ' + 'in status %s and not in status allocated' % + (pci_device['address'], pci_device['status'])) + self.assertEqual( + pci_device['address'], + binding_profile['pci_slot'], 'PCI device ' + 'information in Nova and and Binding profile information in ' + 'Neutron mismatch') + if vnic_type == 'direct-physical': + self.assertEqual(pci_device['dev_type'], 'type-PF') + else: + # vnic_type direct, macvtap or virtio-forwarder can use VF or + # type pci devices. + self.assertIn(pci_device['dev_type'], ['type-VF', 'type-PCI']) + + +class SRIOVNumaAffinity(SRIOVBase): + + # Test utilizes the optional host parameter for server creation introduced + # in 2.74. It allows the guest to be scheduled to a specific compute host. + # This allows the test to fill NUMA nodes on the same host. min_microversion = '2.74' required = {'hw:cpu_policy': 'dedicated', @@ -48,116 +241,23 @@ class SRIOVNumaAffinity(base.BaseWhiteboxComputeTest): 'cases.') if getattr(CONF.whitebox_hardware, 'physnet_numa_affinity', None) is None: - raise cls.skipException('Requires physnet_numa_affinity_map ' - 'parameter to be set in order to execute ' - 'test cases.') - if getattr(CONF.network_feature_enabled, - 'provider_net_base_segmentation_id', None) is None: - raise cls.skipException('Requires ' - 'provider_net_base_segmentation_id ' - 'parameter to be set in order to execute ' - 'test cases.') + raise cls.skipException('Requires physnet_numa_affinity parameter ' + 'to be set in order to execute test ' + 'cases.') if len(CONF.whitebox_hardware.cpu_topology) < 2: raise cls.skipException('Requires 2 or more NUMA nodes to ' 'execute test.') - @classmethod - def setup_clients(cls): - super(SRIOVNumaAffinity, cls).setup_clients() - cls.networks_client = cls.os_admin.networks_client - cls.subnets_client = cls.os_admin.subnets_client - cls.ports_client = cls.os_admin.ports_client - def setUp(self): super(SRIOVNumaAffinity, self).setUp() network = self._create_sriov_net() - self.port_a = self._create_sriov_port(network) - self.port_b = self._create_sriov_port(network) - - def _get_expected_xml_interface_type(self, port): - """Return expected domain xml interface type based on port vnic_type - - :param port: dictionary with port details - :return str: the xml interface type. - """ - vnic_type = port['port']['binding:vnic_type'] - # NOTE: SR-IOV Port binding vnic type has been known to cause confusion - # when mapping the value to the underlying instance XML. A vnic_type - # that is direct is a 'hostdev' or Host device assignment that is - # is passing the device directly from the host to the guest. A - # vnic_type that is macvtap or 'direct' in the guest xml, is using the - # macvtap driver to attach a guests NIC directly to a specified - # physical interface on the host. - if vnic_type == 'direct': - return 'hostdev' - elif vnic_type == 'macvtap': - return 'direct' - - def _create_sriov_net(self): - """Create an IPv4 L2 vlan network and subnet. Physical network - provider comes from sriov_physnet provided in tempest config - - :return net A dictionary describing details about the created network - """ - name_net = data_utils.rand_name(self.__class__.__name__) - vlan_id = \ - CONF.network_feature_enabled.provider_net_base_segmentation_id - physical_net = CONF.whitebox_hardware.sriov_physnet - net_dict = { - 'shared': True, - 'provider:network_type': 'vlan', - 'provider:physical_network': physical_net, - 'provider:segmentation_id': vlan_id - } - net = self.networks_client.create_network(name=name_net, - **net_dict) - self.addCleanup(self.networks_client.delete_network, - net['network']['id']) - - subnet = self.subnets_client.create_subnet( - network_id=net['network']['id'], - cidr=CONF.network.project_network_cidr, - ip_version=4 - ) - self.addCleanup( - self.subnets_client.delete_subnet, - subnet['subnet']['id'] - ) - - return net - - def _create_sriov_port(self, net): - """Create an sr-iov port with a vnic_type provided by tempest config - - :param net: dictionary with network details - :return port: dictionary with details about newly created port - """ - vnic_type = {'binding:vnic_type': CONF.network.port_vnic_type} - port = self.ports_client.create_port(network_id=net['network']['id'], - **vnic_type) - self.addCleanup(self.ports_client.delete_port, - port['port']['id']) - return port - - def _get_xml_interface_devices(self, server_id, port, interface_type): - """Returns xml interface element that matches provided port mac - and interface type. It is technically possible to have multiple ports - with the same MAC address in an instance, so method functionality may - break in the future. - - :param server_id: str, id of the instance to analyze - :param port: dictionary describing port to find - :param interface_type: str, interface type to look for in the xml - return intf: A list of xml elements that match the port - search criteria - """ - root = self.get_server_xml(server_id) - mac = port['port']['mac_address'] - interface_list = root.findall( - "./devices/interface[@type='%s']/mac[@address='%s'].." - % (interface_type, mac) - ) - return interface_list + self._create_sriov_subnet(network['network']['id']) + self.port_a = self._create_sriov_port( + net=network, + vnic_type=CONF.network.port_vnic_type) + self.port_b = self._create_sriov_port( + net=network, + vnic_type=CONF.network.port_vnic_type) def test_sriov_affinity_preferred(self): """Validate instance will schedule to NUMA without nic affinity @@ -237,21 +337,13 @@ class SRIOVNumaAffinity(base.BaseWhiteboxComputeTest): CONF.network_feature_enabled.provider_net_base_segmentation_id for server, port in zip([server_a, server_b], [self.port_a, self.port_b]): - interface_type = self._get_expected_xml_interface_type(port) - interface_list = self._get_xml_interface_devices( + interface_xml_element = self._get_xml_interface_device( server['id'], - port, - interface_type + port['port']['id'] ) - self.assertEqual(len(interface_list), 1, 'Expect to find one ' - 'and only one instance of interface but ' - 'instead found %d instances' % - len(interface_list)) - intf = interface_list[0] - interface_vlan = intf.find("./vlan/tag").get('id', None) - self.assertEqual(net_vlan, interface_vlan, 'Interface should ' - 'have vlan tag %s but instead it is tagged ' - 'with %s' % (net_vlan, interface_vlan)) + self._validate_port_xml_vlan_tag( + interface_xml_element, + net_vlan) # NOTE(jparker) At this point we have to manually delete both # servers before the config_option() context manager reverts @@ -315,21 +407,11 @@ class SRIOVNumaAffinity(base.BaseWhiteboxComputeTest): # in the xml. Its type and vlan should be accurate. net_vlan = \ CONF.network_feature_enabled.provider_net_base_segmentation_id - interface_type = self._get_expected_xml_interface_type(self.port_a) - interface_list = self._get_xml_interface_devices( + interface_xml_element = self._get_xml_interface_device( server_a['id'], - self.port_a, - interface_type + self.port_a['port']['id'] ) - self.assertEqual(len(interface_list), 1, 'Expect to find one and ' - 'only one instance of interface but instead ' - 'found %d instances' % len(interface_list)) - - interface = interface_list[0] - interface_vlan = interface.find("./vlan/tag").get('id', None) - self.assertEqual(net_vlan, interface_vlan, 'Interface should have ' - 'vlan tag %s but instead it is tagged with %s' % - (net_vlan, interface_vlan)) + self._validate_port_xml_vlan_tag(interface_xml_element, net_vlan) # Compare the cpu pin set from server A with the expected PCPU's # from the NUMA Node with affinity to SR-IOV NIC that was gathered @@ -345,3 +427,129 @@ class SRIOVNumaAffinity(base.BaseWhiteboxComputeTest): # server before the config_option() context manager reverts # any config changes it made. This is Nova bug 1836945. self.delete_server(server_a['id']) + + +class SRIOVMigration(SRIOVBase): + + # Test utilizes the optional host parameter for server creation introduced + # in 2.74 to schedule the guest to a specific compute host. This allows the + # test to dictate specific target hosts as the test progresses. + min_microversion = '2.74' + + def setUp(self): + super(SRIOVMigration, self).setUp() + self.network = self._create_sriov_net() + self._create_sriov_subnet(self.network['network']['id']) + + @classmethod + def skip_checks(cls): + super(SRIOVMigration, cls).skip_checks() + if (CONF.compute.min_compute_nodes < 2 or + CONF.whitebox.max_compute_nodes > 2): + raise cls.skipException('Exactly 2 compute nodes required.') + + def _get_pci_status_count(self, status): + """Return the number of pci devices that match the status argument + + :param status: str, value to query from the pci_devices table + return int, the number of rows that match the provided status + """ + db_client = clients.DatabaseClient() + db = CONF.whitebox_database.nova_cell1_db_name + with db_client.cursor(db) as cursor: + cursor.execute('select COUNT(*) from pci_devices WHERE ' + 'status = "%s"' % status) + data = cursor.fetchall() + return data[0]['COUNT(*)'] + + def _base_test_live_migration(self, vnic_type): + """Parent test class that perform sr-iov live migration + + :param vnic_type: str, vnic_type to use when creating sr-iov port + """ + net_vlan = \ + CONF.network_feature_enabled.provider_net_base_segmentation_id + hostname1, hostname2 = self.list_compute_hosts() + flavor = self.create_flavor() + + port = self._create_sriov_port( + net=self.network, + vnic_type=vnic_type + ) + + server = self.create_test_server( + clients=self.os_admin, + flavor=flavor['id'], + networks=[{'port': port['port']['id']}], + host=hostname1 + ) + + # Live migrate the server + self.live_migrate(server['id'], hostname2, 'ACTIVE') + + # Search the instace's XML for the SR-IOV network device element based + # on the mac address and binding:vnic_type from port info + interface_xml_element = self._get_xml_interface_device( + server['id'], + port['port']['id'], + ) + + # Validate the vlan tag persisted in instance's XML after migration + self._validate_port_xml_vlan_tag(interface_xml_element, net_vlan) + + # Confirm dev_type, allocation status, and pci address information are + # correct in pci_devices table of openstack DB + self._verify_neutron_port_binding( + server['id'], + port['port']['id'] + ) + + # Validate the total allocation of pci devices is one and only one + # after instance migration + pci_allocated_count = self._get_pci_status_count('allocated') + self.assertEqual(pci_allocated_count, 1, 'Total allocated pci devices ' + 'after first migration should be 1 but instead ' + 'is %s' % pci_allocated_count) + + # Migrate server back to the original host + self.live_migrate(server['id'], hostname1, 'ACTIVE') + + # Again find the instance's network device element based on the mac + # address and binding:vnic_type from the port info provided by ports + # client + interface_xml_element = self._get_xml_interface_device( + server['id'], + port['port']['id'], + ) + + # Confirm vlan tag in interface XML, dev_type, allocation status, and + # pci address information are correct in pci_devices table of openstack + # DB after second migration + self._validate_port_xml_vlan_tag(interface_xml_element, net_vlan) + self._verify_neutron_port_binding( + server['id'], + port['port']['id'] + ) + + # Confirm total port allocations still remains one after final + # migration + pci_allocated_count = self._get_pci_status_count('allocated') + self.assertEqual(pci_allocated_count, 1, 'Total allocated pci devices ' + 'after second migration should be 1 but instead ' + 'is %s' % pci_allocated_count) + + # Resource cleanup does not take into effect until all test methods + # for class have finalized. Deleting server to free up port + # allocations so they do not impact other live migration tests from + # this test class. + self.delete_server(server['id']) + + def test_sriov_direct_live_migration(self): + """Verify sriov live migration using direct type ports + """ + self._base_test_live_migration(vnic_type='direct') + + def test_sriov_macvtap_live_migration(self): + """Verify sriov live migration using macvtap type ports + """ + self._base_test_live_migration(vnic_type='macvtap')