From 046b640e2fbc243c6e9c55708d521e53f1485ddf Mon Sep 17 00:00:00 2001 From: Sahid Orentino Ferdjaoui Date: Wed, 17 Aug 2016 06:51:25 -0400 Subject: [PATCH] libvirt: fix incorrect host cpus giving to emulator threads when RT Realtime guarantees in certain operating systems require that the thread that is running the QEMU emulator is pinned to a physical CPU that is *not* the same as any physical CPU that the vCPUs for a realtime guest are pinned to. This patch ensures that the value of the hw:cpu_realtime_mask flavor extraspec property is respected when creating the libvirt configuration XML and sets emulatorpin values to a physical CPU matching the hw:cpu_realtime_mask value. Change-Id: I7f50dde0753b059a690dc50172fee645c94b8e5b Closes-Bug: #1614054 (cherry picked from commit 6683bf9b7dc575ef9516f0cdc395b8da1b81c233) --- nova/tests/unit/virt/libvirt/test_driver.py | 23 +++++++-- nova/tests/unit/virt/test_hardware.py | 22 +++++---- nova/virt/hardware.py | 17 ++----- nova/virt/libvirt/driver.py | 53 +++++++++++++-------- 4 files changed, 68 insertions(+), 47 deletions(-) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 54307b410908..0db3680b10e3 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -2741,15 +2741,15 @@ class LibvirtConnTestCase(test.NoDBTestCase): instance_topology = objects.InstanceNUMATopology( cells=[ objects.InstanceNUMACell( - id=1, cpuset=set([0, 1]), + id=2, cpuset=set([0, 1]), memory=1024, pagesize=2048), objects.InstanceNUMACell( - id=2, cpuset=set([2, 3]), + id=3, cpuset=set([2, 3]), memory=1024, pagesize=2048)]) instance_ref = objects.Instance(**self.test_instance) instance_ref.numa_topology = instance_topology image_meta = objects.ImageMeta.from_dict(self.test_image_meta) - flavor = objects.Flavor(memory_mb=2048, vcpus=2, root_gb=496, + flavor = objects.Flavor(memory_mb=2048, vcpus=4, root_gb=496, ephemeral_gb=8128, swap=33550336, name='fake', extra_specs={ "hw:cpu_realtime": "yes", @@ -2779,7 +2779,7 @@ class LibvirtConnTestCase(test.NoDBTestCase): return_value=caps), mock.patch.object( hardware, 'get_vcpu_pin_set', - return_value=set([2, 3, 4, 5])), + return_value=set([4, 5, 6, 7])), mock.patch.object(host.Host, 'get_online_cpus', return_value=set(range(8))), ): @@ -2810,8 +2810,21 @@ class LibvirtConnTestCase(test.NoDBTestCase): self.assertEqual(1, len(cfg.cputune.vcpusched)) self.assertEqual("fifo", cfg.cputune.vcpusched[0].scheduler) + + # Ensure vCPUs 0-1 are pinned on host CPUs 4-5 and 2-3 are + # set on host CPUs 6-7 according the realtime mask ^0-1 + self.assertEqual(set([4, 5]), cfg.cputune.vcpupin[0].cpuset) + self.assertEqual(set([4, 5]), cfg.cputune.vcpupin[1].cpuset) + self.assertEqual(set([6, 7]), cfg.cputune.vcpupin[2].cpuset) + self.assertEqual(set([6, 7]), cfg.cputune.vcpupin[3].cpuset) + + # We ensure that emulator threads are pinned on host CPUs + # 4-5 which are "normal" vCPUs + self.assertEqual(set([4, 5]), cfg.cputune.emulatorpin.cpuset) + + # We ensure that the vCPUs RT are 2-3 set to the host CPUs + # which are 6, 7 self.assertEqual(set([2, 3]), cfg.cputune.vcpusched[0].vcpus) - self.assertEqual(set([0, 1]), cfg.cputune.emulatorpin.cpuset) def test_get_cpu_numa_config_from_instance(self): topology = objects.InstanceNUMATopology(cells=[ diff --git a/nova/tests/unit/virt/test_hardware.py b/nova/tests/unit/virt/test_hardware.py index 694d0eb21d3d..8477792c87c7 100644 --- a/nova/tests/unit/virt/test_hardware.py +++ b/nova/tests/unit/virt/test_hardware.py @@ -2769,30 +2769,32 @@ class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase): class CPURealtimeTestCase(test.NoDBTestCase): def test_success_flavor(self): - flavor = {"extra_specs": {"hw:cpu_realtime_mask": "^1"}} + flavor = objects.Flavor(vcpus=3, memory_mb=2048, + extra_specs={"hw:cpu_realtime_mask": "^1"}) image = objects.ImageMeta.from_dict({}) - rt, em = hw.vcpus_realtime_topology(set([0, 1, 2]), flavor, image) + rt = hw.vcpus_realtime_topology(flavor, image) self.assertEqual(set([0, 2]), rt) - self.assertEqual(set([1]), em) def test_success_image(self): - flavor = {"extra_specs": {}} + flavor = objects.Flavor(vcpus=3, memory_mb=2048, + extra_specs={"hw:cpu_realtime_mask": "^1"}) image = objects.ImageMeta.from_dict( {"properties": {"hw_cpu_realtime_mask": "^0-1"}}) - rt, em = hw.vcpus_realtime_topology(set([0, 1, 2]), flavor, image) + rt = hw.vcpus_realtime_topology(flavor, image) self.assertEqual(set([2]), rt) - self.assertEqual(set([0, 1]), em) def test_no_mask_configured(self): - flavor = {"extra_specs": {}} + flavor = objects.Flavor(vcpus=3, memory_mb=2048, + extra_specs={}) image = objects.ImageMeta.from_dict({"properties": {}}) self.assertRaises( exception.RealtimeMaskNotFoundOrInvalid, - hw.vcpus_realtime_topology, set([0, 1, 2]), flavor, image) + hw.vcpus_realtime_topology, flavor, image) def test_mask_badly_configured(self): - flavor = {"extra_specs": {"hw:cpu_realtime_mask": "^0-2"}} + flavor = objects.Flavor(vcpus=3, memory_mb=2048, + extra_specs={"hw:cpu_realtime_mask": "^0-2"}) image = objects.ImageMeta.from_dict({"properties": {}}) self.assertRaises( exception.RealtimeMaskNotFoundOrInvalid, - hw.vcpus_realtime_topology, set([0, 1, 2]), flavor, image) + hw.vcpus_realtime_topology, flavor, image) diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py index 212bde7fdc8a..2954e227125d 100644 --- a/nova/virt/hardware.py +++ b/nova/virt/hardware.py @@ -1097,24 +1097,17 @@ def _get_realtime_mask(flavor, image): return image_mask or flavor_mask -def vcpus_realtime_topology(vcpus_set, flavor, image): - """Partitions vcpus used for realtime and 'normal' vcpus. - - According to a mask specified from flavor or image, returns set of - vcpus configured for realtime scheduler and set running as a - 'normal' vcpus. - """ +def vcpus_realtime_topology(flavor, image): + """Determines instance vCPUs used as RT for a given spec""" mask = _get_realtime_mask(flavor, image) if not mask: raise exception.RealtimeMaskNotFoundOrInvalid() - vcpus_spec = format_cpu_spec(vcpus_set) - vcpus_rt = parse_cpu_spec(vcpus_spec + ", " + mask) - vcpus_em = vcpus_set - vcpus_rt - if len(vcpus_rt) < 1 or len(vcpus_em) < 1: + vcpus_rt = parse_cpu_spec("0-%d,%s" % (flavor.vcpus - 1, mask)) + if len(vcpus_rt) < 1: raise exception.RealtimeMaskNotFoundOrInvalid() - return vcpus_rt, vcpus_em + return vcpus_rt def _numa_get_constraints_auto(nodes, flavor): diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index a239135a3988..7136fc274c47 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -3821,12 +3821,28 @@ class LibvirtDriver(driver.ComputeDriver): # Now get the CpuTune configuration from the numa_topology guest_cpu_tune = vconfig.LibvirtConfigGuestCPUTune() guest_numa_tune = vconfig.LibvirtConfigGuestNUMATune() - allpcpus = [] + emupcpus = [] numa_mem = vconfig.LibvirtConfigGuestNUMATuneMemory() numa_memnodes = [vconfig.LibvirtConfigGuestNUMATuneMemNode() for _ in guest_cpu_numa_config.cells] + vcpus_rt = set([]) + wants_realtime = hardware.is_realtime_enabled(flavor) + if wants_realtime: + if not self._host.has_min_version( + MIN_LIBVIRT_REALTIME_VERSION): + raise exception.RealtimePolicyNotSupported() + # Prepare realtime config for libvirt + vcpus_rt = hardware.vcpus_realtime_topology( + flavor, image_meta) + vcpusched = vconfig.LibvirtConfigGuestCPUTuneVCPUSched() + vcpusched.vcpus = vcpus_rt + vcpusched.scheduler = "fifo" + vcpusched.priority = ( + CONF.libvirt.realtime_scheduler_priority) + guest_cpu_tune.vcpusched.append(vcpusched) + for host_cell in topology.cells: for guest_node_id, guest_config_cell in enumerate( guest_cpu_numa_config.cells): @@ -3855,7 +3871,21 @@ class LibvirtDriver(driver.ComputeDriver): pin_cpuset.cpuset = set([pcpu]) else: pin_cpuset.cpuset = host_cell.cpuset - allpcpus.extend(pin_cpuset.cpuset) + if not wants_realtime or cpu not in vcpus_rt: + # - If realtime IS NOT enabled, the + # emulator threads are allowed to float + # across all the pCPUs associated with + # the guest vCPUs ("not wants_realtime" + # is true, so we add all pcpus) + # - If realtime IS enabled, then at least + # 1 vCPU is required to be set aside for + # non-realtime usage. The emulator + # threads are allowed to float acros the + # pCPUs that are associated with the + # non-realtime VCPUs (the "cpu not in + # vcpu_rt" check deals with this + # filtering) + emupcpus.extend(pin_cpuset.cpuset) guest_cpu_tune.vcpupin.append(pin_cpuset) # TODO(berrange) When the guest has >1 NUMA node, it will @@ -3875,28 +3905,11 @@ class LibvirtDriver(driver.ComputeDriver): # cross NUMA node traffic. This is an area of investigation # for QEMU community devs. emulatorpin = vconfig.LibvirtConfigGuestCPUTuneEmulatorPin() - emulatorpin.cpuset = set(allpcpus) + emulatorpin.cpuset = set(emupcpus) guest_cpu_tune.emulatorpin = emulatorpin # Sort the vcpupin list per vCPU id for human-friendlier XML guest_cpu_tune.vcpupin.sort(key=operator.attrgetter("id")) - if hardware.is_realtime_enabled(flavor): - if not self._host.has_min_version( - MIN_LIBVIRT_REALTIME_VERSION): - raise exception.RealtimePolicyNotSupported() - - vcpus_rt, vcpus_em = hardware.vcpus_realtime_topology( - set(cpu.id for cpu in guest_cpu_tune.vcpupin), - flavor, image_meta) - - vcpusched = vconfig.LibvirtConfigGuestCPUTuneVCPUSched() - vcpusched.vcpus = vcpus_rt - vcpusched.scheduler = "fifo" - vcpusched.priority = ( - CONF.libvirt.realtime_scheduler_priority) - guest_cpu_tune.vcpusched.append(vcpusched) - guest_cpu_tune.emulatorpin.cpuset = vcpus_em - guest_numa_tune.memory = numa_mem guest_numa_tune.memnodes = numa_memnodes