Add handling for offlined CPUs to the nova libvirt driver.
When a host system has CPUs that are offlined via CPU hotplug, nova fails to start an instance on the host. Currently the libvirt driver does not check if the CPUs it selects for running the instance are online or offline. As a result, CPUs that are offline can become part of the cpuset that is passed to libvirt. Libvirt presents the following error in this case: libvirtError: Invalid value '8-15,24-31' for 'cpuset.cpus': Invalid argument With this fix, the nova libvirt driver makes use of the getCPUMap API in libvirt to determine if CPUs are online or offline. When selecting a CPU set for running an instance, offline CPUs are masked out. Rationale: on server platforms like s390, it is common to have offlined CPUs on a host as the platform offers capabilities to run multiple host operatings systems (e.g. multiple KVM hypervisors / compute nodes). CPUs can dynamically be assigned to the different host operating systems, so it is common to have offlined CPUs on a compute node. Change-Id: I506ebc9608e17e02d807e5002fe867309c22aafc Closes-Bug: #1417144
This commit is contained in:
parent
71d116eaef
commit
0696a5cd5f
@ -827,6 +827,10 @@ class Connection(object):
|
||||
def registerCloseCallback(self, cb, opaque):
|
||||
pass
|
||||
|
||||
def getCPUMap(self):
|
||||
"""Return spoofed CPU map, showing 2 online CPUs."""
|
||||
return (2, [True] * 2, 2)
|
||||
|
||||
def getCapabilities(self):
|
||||
"""Return spoofed capabilities."""
|
||||
return '''<capabilities>
|
||||
|
@ -1234,7 +1234,9 @@ class LibvirtConnTestCase(test.TestCase):
|
||||
mock.patch.object(
|
||||
random, 'choice', side_effect=lambda cells: cells[0]),
|
||||
mock.patch.object(pci_manager, "get_instance_pci_devs",
|
||||
return_value=[pci_device])):
|
||||
return_value=[pci_device]),
|
||||
mock.patch.object(host.Host, 'get_online_cpus',
|
||||
return_value=set(range(8)))):
|
||||
cfg = conn._get_guest_config(instance_ref, [], {}, disk_info)
|
||||
self.assertIsNone(instance_ref.numa_topology)
|
||||
self.assertEqual(set([2, 3]), cfg.cpuset)
|
||||
@ -1279,6 +1281,8 @@ class LibvirtConnTestCase(test.TestCase):
|
||||
host.Host, "get_capabilities", return_value=caps),
|
||||
mock.patch.object(
|
||||
hardware, 'get_vcpu_pin_set', return_value=set([3])),
|
||||
mock.patch.object(host.Host, 'get_online_cpus',
|
||||
return_value=set(range(8))),
|
||||
mock.patch.object(pci_manager, "get_instance_pci_devs",
|
||||
return_value=[pci_device])):
|
||||
cfg = conn._get_guest_config(instance_ref, [], {}, disk_info)
|
||||
@ -1402,9 +1406,12 @@ class LibvirtConnTestCase(test.TestCase):
|
||||
mock.patch.object(
|
||||
hardware, 'get_vcpu_pin_set', return_value=set([2, 3])),
|
||||
mock.patch.object(
|
||||
random, 'choice', side_effect=lambda cells: cells[0])
|
||||
random, 'choice', side_effect=lambda cells: cells[0]),
|
||||
mock.patch.object(host.Host, 'get_online_cpus',
|
||||
return_value=set(range(8)))
|
||||
) as (has_min_version_mock, get_host_cap_mock,
|
||||
get_vcpu_pin_set_mock, choice_mock):
|
||||
get_vcpu_pin_set_mock, choice_mock,
|
||||
get_online_cpus_mock):
|
||||
cfg = drvr._get_guest_config(instance_ref, [], {}, disk_info)
|
||||
# NOTE(ndipanov): we make sure that pin_set was taken into account
|
||||
# when choosing viable cells
|
||||
@ -1498,7 +1505,9 @@ class LibvirtConnTestCase(test.TestCase):
|
||||
return_value=caps),
|
||||
mock.patch.object(
|
||||
hardware, 'get_vcpu_pin_set',
|
||||
return_value=set([2, 3, 4, 5]))
|
||||
return_value=set([2, 3, 4, 5])),
|
||||
mock.patch.object(host.Host, 'get_online_cpus',
|
||||
return_value=set(range(8))),
|
||||
):
|
||||
cfg = drvr._get_guest_config(instance_ref, [], {}, disk_info)
|
||||
self.assertIsNone(cfg.cpuset)
|
||||
@ -1575,6 +1584,8 @@ class LibvirtConnTestCase(test.TestCase):
|
||||
return_value=True),
|
||||
mock.patch.object(host.Host, "get_capabilities",
|
||||
return_value=caps),
|
||||
mock.patch.object(host.Host, 'get_online_cpus',
|
||||
return_value=set(range(8))),
|
||||
):
|
||||
cfg = drvr._get_guest_config(instance_ref, [], {}, disk_info)
|
||||
self.assertIsNone(cfg.cpuset)
|
||||
@ -1649,7 +1660,9 @@ class LibvirtConnTestCase(test.TestCase):
|
||||
mock.patch.object(host.Host, 'has_min_version',
|
||||
return_value=True),
|
||||
mock.patch.object(host.Host, "get_capabilities",
|
||||
return_value=caps)
|
||||
return_value=caps),
|
||||
mock.patch.object(host.Host, 'get_online_cpus',
|
||||
return_value=set(range(8))),
|
||||
):
|
||||
cfg = conn._get_guest_config(instance_ref, [], {}, disk_info)
|
||||
self.assertIsNone(cfg.cpuset)
|
||||
@ -9543,7 +9556,10 @@ class LibvirtConnTestCase(test.TestCase):
|
||||
mock.patch.object(host.Host, "get_capabilities",
|
||||
return_value=caps),
|
||||
mock.patch.object(
|
||||
hardware, 'get_vcpu_pin_set', return_value=set([0, 1, 3]))
|
||||
hardware, 'get_vcpu_pin_set',
|
||||
return_value=set([0, 1, 3, 4, 5])),
|
||||
mock.patch.object(host.Host, 'get_online_cpus',
|
||||
return_value=set([0, 1, 2, 3, 6])),
|
||||
):
|
||||
got_topo = drvr._get_host_numa_topology()
|
||||
got_topo_dict = got_topo._to_dict()
|
||||
|
@ -4736,6 +4736,11 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
|
||||
cells = []
|
||||
allowed_cpus = hardware.get_vcpu_pin_set()
|
||||
online_cpus = self._host.get_online_cpus()
|
||||
if allowed_cpus:
|
||||
allowed_cpus &= online_cpus
|
||||
else:
|
||||
allowed_cpus = online_cpus
|
||||
|
||||
for cell in topology.cells:
|
||||
cpuset = set(cpu.id for cpu in cell.cpus)
|
||||
@ -4744,9 +4749,8 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
if cpu.siblings else ()
|
||||
for cpu in cell.cpus)
|
||||
))
|
||||
if allowed_cpus:
|
||||
cpuset &= allowed_cpus
|
||||
siblings = [sib & allowed_cpus for sib in siblings]
|
||||
cpuset &= allowed_cpus
|
||||
siblings = [sib & allowed_cpus for sib in siblings]
|
||||
# Filter out singles and empty sibling sets that may be left
|
||||
siblings = [sib for sib in siblings if len(sib) > 1]
|
||||
|
||||
|
@ -585,6 +585,26 @@ class Host(object):
|
||||
|
||||
return doms
|
||||
|
||||
def get_online_cpus(self):
|
||||
"""Get the set of CPUs that are online on the host
|
||||
|
||||
Method is only used by NUMA code paths which check on
|
||||
libvirt version >= 1.0.4. getCPUMap() was introduced in
|
||||
libvirt 1.0.0.
|
||||
|
||||
:returns: set of online CPUs, raises libvirtError on error
|
||||
|
||||
"""
|
||||
|
||||
(cpus, cpu_map, online) = self.get_connection().getCPUMap()
|
||||
|
||||
online_cpus = set()
|
||||
for cpu in range(cpus):
|
||||
if cpu_map[cpu]:
|
||||
online_cpus.add(cpu)
|
||||
|
||||
return online_cpus
|
||||
|
||||
def get_capabilities(self):
|
||||
"""Returns the host capabilities information
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user