Merge "Add functional test for libvirt vgpu reshape"
This commit is contained in:
@@ -57,11 +57,14 @@ class ServersTestBase(base.ServersTestBase):
|
||||
# service in the test
|
||||
self.flags(compute_driver='libvirt.LibvirtDriver')
|
||||
|
||||
def _get_connection(self, host_info, pci_info=None):
|
||||
def _get_connection(self, host_info, pci_info=None,
|
||||
libvirt_version=fakelibvirt.FAKE_LIBVIRT_VERSION,
|
||||
mdev_info=None):
|
||||
fake_connection = fakelibvirt.Connection(
|
||||
'qemu:///system',
|
||||
version=fakelibvirt.FAKE_LIBVIRT_VERSION,
|
||||
version=libvirt_version,
|
||||
hv_version=fakelibvirt.FAKE_QEMU_VERSION,
|
||||
host_info=host_info,
|
||||
pci_info=pci_info)
|
||||
pci_info=pci_info,
|
||||
mdev_info=mdev_info)
|
||||
return fake_connection
|
||||
|
||||
200
nova/tests/functional/libvirt/test_reshape.py
Normal file
200
nova/tests/functional/libvirt/test_reshape.py
Normal file
@@ -0,0 +1,200 @@
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import time
|
||||
|
||||
import mock
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
|
||||
from nova.tests.functional.libvirt import base
|
||||
from nova.tests.unit.virt.libvirt import fakelibvirt
|
||||
|
||||
CONF = cfg.CONF
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VGPUReshapeTests(base.ServersTestBase):
|
||||
# the minimum libvirt version needed for vgpu
|
||||
MIN_LIBVIRT_MDEV_SUPPORT = 3004000
|
||||
|
||||
def _wait_for_state_change(self, server, expected_status):
|
||||
for i in range(0, 50):
|
||||
server = self.api.get_server(server['id'])
|
||||
if server['status'] == expected_status:
|
||||
return server
|
||||
time.sleep(.1)
|
||||
self.assertEqual(expected_status, server['status'])
|
||||
return server
|
||||
|
||||
def test_create_servers_with_vgpu(self):
|
||||
"""Verify that vgpu reshape works with libvirt driver
|
||||
|
||||
1) create two servers with an old tree where the VGPU resource is on
|
||||
the compute provider
|
||||
2) trigger a reshape
|
||||
3) check that the allocations of the servers are still valid
|
||||
4) create another server now against the new tree
|
||||
"""
|
||||
|
||||
# NOTE(gibi): We cannot simply ask the virt driver to create an old
|
||||
# RP tree with vgpu on the root RP as that code path does not exist
|
||||
# any more. So we have to hack a "bit". We will create a compute
|
||||
# service without vgpu support to have the compute RP ready then we
|
||||
# manually add the VGPU resources to that RP in placement. Also we make
|
||||
# sure that during the instance claim the virt driver does not detect
|
||||
# the old tree as that would be a bad time for reshape. Later when the
|
||||
# compute service is restarted the driver will do the reshape.
|
||||
|
||||
fake_connection = self._get_connection(
|
||||
# We need more RAM or the 3rd server won't be created
|
||||
host_info=fakelibvirt.HostInfo(kB_mem=8192),
|
||||
libvirt_version=self.MIN_LIBVIRT_MDEV_SUPPORT,
|
||||
mdev_info=fakelibvirt.HostMdevDevicesInfo())
|
||||
self.mock_conn.return_value = fake_connection
|
||||
|
||||
# start a compute with vgpu support disabled so the driver will
|
||||
# ignore the content of the above HostMdevDeviceInfo
|
||||
self.flags(enabled_vgpu_types='', group='devices')
|
||||
self.compute = self.start_service('compute', host='compute1')
|
||||
|
||||
# create the VGPU resource in placement manually
|
||||
compute_rp_uuid = self.placement_api.get(
|
||||
'/resource_providers?name=compute1').body[
|
||||
'resource_providers'][0]['uuid']
|
||||
inventories = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid).body
|
||||
inventories['inventories']['VGPU'] = {
|
||||
'allocation_ratio': 1.0,
|
||||
'max_unit': 3,
|
||||
'min_unit': 1,
|
||||
'reserved': 0,
|
||||
'step_size': 1,
|
||||
'total': 3}
|
||||
self.placement_api.put(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid,
|
||||
inventories)
|
||||
|
||||
# now we boot two servers with vgpu
|
||||
extra_spec = {"resources:VGPU": 1}
|
||||
flavor_id = self._create_flavor(extra_spec=extra_spec)
|
||||
|
||||
server_req = self._build_server(flavor_id)
|
||||
|
||||
# NOTE(gibi): during instance_claim() there is a
|
||||
# driver.update_provider_tree() call that would detect the old tree and
|
||||
# would fail as this is not a good time to reshape. To avoid that we
|
||||
# temporally mock update_provider_tree here.
|
||||
with mock.patch('nova.virt.libvirt.driver.LibvirtDriver.'
|
||||
'update_provider_tree'):
|
||||
created_server1 = self.api.post_server({'server': server_req})
|
||||
server1 = self._wait_for_state_change(created_server1, 'ACTIVE')
|
||||
created_server2 = self.api.post_server({'server': server_req})
|
||||
server2 = self._wait_for_state_change(created_server2, 'ACTIVE')
|
||||
|
||||
# verify that the inventory, usages and allocation are correct before
|
||||
# the reshape
|
||||
compute_inventory = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid).body[
|
||||
'inventories']
|
||||
self.assertEqual(3, compute_inventory['VGPU']['total'])
|
||||
compute_usages = self.placement_api.get(
|
||||
'/resource_providers/%s/usages' % compute_rp_uuid).body[
|
||||
'usages']
|
||||
self.assertEqual(2, compute_usages['VGPU'])
|
||||
|
||||
for server in (server1, server2):
|
||||
allocations = self.placement_api.get(
|
||||
'/allocations/%s' % server['id']).body['allocations']
|
||||
# the flavor has disk=10 and ephemeral=10
|
||||
self.assertEqual(
|
||||
{'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2, 'VGPU': 1},
|
||||
allocations[compute_rp_uuid]['resources'])
|
||||
|
||||
# enabled vgpu support
|
||||
self.flags(
|
||||
enabled_vgpu_types=fakelibvirt.NVIDIA_11_VGPU_TYPE,
|
||||
group='devices')
|
||||
# restart compute which will trigger a reshape
|
||||
self.restart_compute_service(self.compute)
|
||||
|
||||
# verify that the inventory, usages and allocation are correct after
|
||||
# the reshape
|
||||
compute_inventory = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % compute_rp_uuid).body[
|
||||
'inventories']
|
||||
self.assertNotIn('VGPU', compute_inventory)
|
||||
|
||||
# NOTE(sbauza): The two instances will use two different pGPUs
|
||||
# That said, we need to check all the pGPU inventories for knowing
|
||||
# which ones are used.
|
||||
usages = {}
|
||||
for pci_device in [fakelibvirt.PGPU1_PCI_ADDR,
|
||||
fakelibvirt.PGPU2_PCI_ADDR,
|
||||
fakelibvirt.PGPU3_PCI_ADDR]:
|
||||
gpu_rp_uuid = self.placement_api.get(
|
||||
'/resource_providers?name=compute1_%s' % pci_device).body[
|
||||
'resource_providers'][0]['uuid']
|
||||
gpu_inventory = self.placement_api.get(
|
||||
'/resource_providers/%s/inventories' % gpu_rp_uuid).body[
|
||||
'inventories']
|
||||
self.assertEqual(1, gpu_inventory['VGPU']['total'])
|
||||
|
||||
gpu_usages = self.placement_api.get(
|
||||
'/resource_providers/%s/usages' % gpu_rp_uuid).body[
|
||||
'usages']
|
||||
usages[pci_device] = gpu_usages['VGPU']
|
||||
# Make sure that both instances are using different pGPUs
|
||||
used_devices = [dev for dev, usage in usages.items() if usage == 1]
|
||||
avail_devices = list(set(usages.keys()) - set(used_devices))
|
||||
self.assertEqual(2, len(used_devices))
|
||||
|
||||
for server in [server1, server2]:
|
||||
allocations = self.placement_api.get(
|
||||
'/allocations/%s' % server['id']).body[
|
||||
'allocations']
|
||||
self.assertEqual(
|
||||
{'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2},
|
||||
allocations[compute_rp_uuid]['resources'])
|
||||
rp_uuids = list(allocations.keys())
|
||||
# We only have two RPs, the compute PR (the root) and the child
|
||||
# pGPU RP
|
||||
gpu_rp_uuid = (rp_uuids[1] if rp_uuids[0] == compute_rp_uuid
|
||||
else rp_uuids[0])
|
||||
self.assertEqual(
|
||||
{'VGPU': 1},
|
||||
allocations[gpu_rp_uuid]['resources'])
|
||||
|
||||
# now create one more instance with vgpu against the reshaped tree
|
||||
created_server = self.api.post_server({'server': server_req})
|
||||
server3 = self._wait_for_state_change(created_server, 'ACTIVE')
|
||||
|
||||
# find the pGPU that wasn't used before we created the third instance
|
||||
# It should have taken the previously available pGPU
|
||||
device = avail_devices[0]
|
||||
gpu_rp_uuid = self.placement_api.get(
|
||||
'/resource_providers?name=compute1_%s' % device).body[
|
||||
'resource_providers'][0]['uuid']
|
||||
gpu_usages = self.placement_api.get(
|
||||
'/resource_providers/%s/usages' % gpu_rp_uuid).body[
|
||||
'usages']
|
||||
self.assertEqual(1, gpu_usages['VGPU'])
|
||||
|
||||
allocations = self.placement_api.get(
|
||||
'/allocations/%s' % server3['id']).body[
|
||||
'allocations']
|
||||
self.assertEqual(
|
||||
{'DISK_GB': 20, 'MEMORY_MB': 2048, 'VCPU': 2},
|
||||
allocations[compute_rp_uuid]['resources'])
|
||||
self.assertEqual(
|
||||
{'VGPU': 1},
|
||||
allocations[gpu_rp_uuid]['resources'])
|
||||
@@ -172,6 +172,11 @@ VF_DRIVER_NAME = 'ixgbevf'
|
||||
VF_SLOT = '10'
|
||||
PF_SLOT = '00'
|
||||
|
||||
NVIDIA_11_VGPU_TYPE = 'nvidia-11'
|
||||
PGPU1_PCI_ADDR = 'pci_0000_06_00_0'
|
||||
PGPU2_PCI_ADDR = 'pci_0000_07_00_0'
|
||||
PGPU3_PCI_ADDR = 'pci_0000_08_00_0'
|
||||
|
||||
|
||||
class FakePciDevice(object):
|
||||
pci_dev_template = """<device>
|
||||
@@ -293,6 +298,57 @@ class HostPciSRIOVDevicesInfo(object):
|
||||
return pci_dev
|
||||
|
||||
|
||||
class FakeMdevDevice(object):
|
||||
template = """
|
||||
<device>
|
||||
<name>%(dev_name)s</name>
|
||||
<path>/sys/devices/pci0000:00/0000:00:02.0/%(path)s</path>
|
||||
<parent>%(parent)s</parent>
|
||||
<driver>
|
||||
<name>vfio_mdev</name>
|
||||
</driver>
|
||||
<capability type='mdev'>
|
||||
<type id='%(type_id)s'/>
|
||||
<iommuGroup number='12'/>
|
||||
</capability>
|
||||
</device>
|
||||
"""
|
||||
|
||||
def __init__(self, dev_name, type_id, parent):
|
||||
self.xml = self.template % {
|
||||
'dev_name': dev_name, 'type_id': type_id,
|
||||
'path': dev_name[len('mdev_'):],
|
||||
'parent': parent}
|
||||
|
||||
def XMLDesc(self, flags):
|
||||
return self.xml
|
||||
|
||||
|
||||
class HostMdevDevicesInfo(object):
|
||||
def __init__(self):
|
||||
self.devices = {
|
||||
'mdev_4b20d080_1b54_4048_85b3_a6a62d165c01':
|
||||
FakeMdevDevice(
|
||||
dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c01',
|
||||
type_id=NVIDIA_11_VGPU_TYPE, parent=PGPU1_PCI_ADDR),
|
||||
'mdev_4b20d080_1b54_4048_85b3_a6a62d165c02':
|
||||
FakeMdevDevice(
|
||||
dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c02',
|
||||
type_id=NVIDIA_11_VGPU_TYPE, parent=PGPU2_PCI_ADDR),
|
||||
'mdev_4b20d080_1b54_4048_85b3_a6a62d165c03':
|
||||
FakeMdevDevice(
|
||||
dev_name='mdev_4b20d080_1b54_4048_85b3_a6a62d165c03',
|
||||
type_id=NVIDIA_11_VGPU_TYPE, parent=PGPU3_PCI_ADDR),
|
||||
}
|
||||
|
||||
def get_all_devices(self):
|
||||
return self.devices.keys()
|
||||
|
||||
def get_device_by_name(self, device_name):
|
||||
dev = self.devices[device_name]
|
||||
return dev
|
||||
|
||||
|
||||
class HostInfo(object):
|
||||
|
||||
def __init__(self, arch=obj_fields.Architecture.X86_64, kB_mem=4096,
|
||||
@@ -705,6 +761,20 @@ class Domain(object):
|
||||
|
||||
devices['nics'] = nics_info
|
||||
|
||||
hostdev_info = []
|
||||
hostdevs = device_nodes.findall('./hostdev')
|
||||
for hostdev in hostdevs:
|
||||
address = hostdev.find('./source/address')
|
||||
# NOTE(gibi): only handle mdevs as pci is complicated
|
||||
dev_type = hostdev.get('type')
|
||||
if dev_type == 'mdev':
|
||||
hostdev_info.append({
|
||||
'type': dev_type,
|
||||
'model': hostdev.get('model'),
|
||||
'address_uuid': address.get('uuid')
|
||||
})
|
||||
devices['hostdevs'] = hostdev_info
|
||||
|
||||
definition['devices'] = devices
|
||||
|
||||
return definition
|
||||
@@ -844,6 +914,15 @@ class Domain(object):
|
||||
function='0x0'/>
|
||||
</interface>''' % nic
|
||||
|
||||
hostdevs = ''
|
||||
for hostdev in self._def['devices']['hostdevs']:
|
||||
hostdevs += '''<hostdev mode='subsystem' type='%(type)s' model='%(model)s'>
|
||||
<source>
|
||||
<address uuid='%(address_uuid)s'/>
|
||||
</source>
|
||||
</hostdev>
|
||||
''' % hostdev
|
||||
|
||||
return '''<domain type='kvm'>
|
||||
<name>%(name)s</name>
|
||||
<uuid>%(uuid)s</uuid>
|
||||
@@ -899,6 +978,7 @@ class Domain(object):
|
||||
<address type='pci' domain='0x0000' bus='0x00' slot='0x04'
|
||||
function='0x0'/>
|
||||
</memballoon>
|
||||
%(hostdevs)s
|
||||
</devices>
|
||||
</domain>''' % {'name': self._def['name'],
|
||||
'uuid': self._def['uuid'],
|
||||
@@ -906,7 +986,8 @@ class Domain(object):
|
||||
'vcpu': self._def['vcpu'],
|
||||
'arch': self._def['os']['arch'],
|
||||
'disks': disks,
|
||||
'nics': nics}
|
||||
'nics': nics,
|
||||
'hostdevs': hostdevs}
|
||||
|
||||
def managedSave(self, flags):
|
||||
self._connection._mark_not_running(self)
|
||||
@@ -995,7 +1076,8 @@ class DomainSnapshot(object):
|
||||
|
||||
class Connection(object):
|
||||
def __init__(self, uri=None, readonly=False, version=FAKE_LIBVIRT_VERSION,
|
||||
hv_version=FAKE_QEMU_VERSION, host_info=None, pci_info=None):
|
||||
hv_version=FAKE_QEMU_VERSION, host_info=None, pci_info=None,
|
||||
mdev_info=None):
|
||||
if not uri or uri == '':
|
||||
if allow_default_uri_connection:
|
||||
uri = 'qemu:///session'
|
||||
@@ -1031,6 +1113,7 @@ class Connection(object):
|
||||
self.host_info = host_info or HostInfo()
|
||||
self.pci_info = pci_info or HostPciSRIOVDevicesInfo(num_pfs=0,
|
||||
num_vfs=0)
|
||||
self.mdev_info = mdev_info or []
|
||||
|
||||
def _add_filter(self, nwfilter):
|
||||
self._nwfilters[nwfilter._name] = nwfilter
|
||||
@@ -1439,6 +1522,9 @@ class Connection(object):
|
||||
return self.pci_info.get_device_by_name(dev_name)
|
||||
|
||||
def nodeDeviceLookupByName(self, name):
|
||||
if name.startswith('mdev'):
|
||||
return self.mdev_info.get_device_by_name(name)
|
||||
|
||||
pci_dev = self.pci_info.get_device_by_name(name)
|
||||
if pci_dev:
|
||||
return pci_dev
|
||||
@@ -1452,7 +1538,17 @@ class Connection(object):
|
||||
error_domain=VIR_FROM_NODEDEV)
|
||||
|
||||
def listDevices(self, cap, flags):
|
||||
return self.pci_info.get_all_devices()
|
||||
if cap == 'pci':
|
||||
return self.pci_info.get_all_devices()
|
||||
if cap == 'mdev':
|
||||
return self.mdev_info.get_all_devices()
|
||||
if cap == 'mdev_types':
|
||||
# TODO(gibi): We should return something like
|
||||
# https://libvirt.org/drvnodedev.html#MDEVCap but I tried and it
|
||||
# did not work for me.
|
||||
return None
|
||||
else:
|
||||
raise ValueError('Capability "%s" is not supported' % cap)
|
||||
|
||||
def baselineCPU(self, cpu, flag):
|
||||
"""Add new libvirt API."""
|
||||
|
||||
Reference in New Issue
Block a user