virt: reserved number of mempages on compute host
Users need to mark as reserved some amount of pages for third party components. The most common use case for using huge/large pages is NFV. In the current state of that feature we can't guarantee the necessary amount of pages to allow OVS-DPDK to run properly on the compute node, which result in the instance failing to boot on a well selected compute-node. OVS-DPDK needs 1 GB hugepages reserved. Since Nova does not take into account that page reserved for OVS-DPDK it results in the process not being able to acquire the necessary memory which results in a failed boot. This commit adds a new option 'reserved_huge_pages' which takes a list of string format to select on which host NUMA nodes and from which pagesize we want to reserve a certain amount of pages. It also updates NUMAPageTopology to contain a reserved memory pages attribute, which helps compute the available pages size on host for scheduling/claiming resources. Change-Id: Ie04d6362a4e99dcb2504698fc831a366ba746b44 Closes-Bug: #1543149
This commit is contained in:
parent
e701653bbc
commit
d52ceaf269
@ -13,6 +13,7 @@
|
||||
# under the License.
|
||||
|
||||
from oslo_config import cfg
|
||||
from oslo_config import types
|
||||
|
||||
from nova.conf import paths
|
||||
|
||||
@ -348,6 +349,30 @@ Interdependencies to other options:
|
||||
configured as HVM.
|
||||
""")
|
||||
|
||||
reserved_huge_pages = cfg.MultiOpt(
|
||||
"reserved_huge_pages",
|
||||
item_type=types.Dict,
|
||||
help="""Reserves a number of huge/large memory pages per NUMA host cells
|
||||
|
||||
Possible values:
|
||||
|
||||
* A list of valid key=value which reflect NUMA node ID, page size
|
||||
(Default unit is KiB) and number of pages to be reserved.
|
||||
|
||||
reserved_huge_pages = node=0,size=2048,count=64
|
||||
reserved_huge_pages = node=1,size=1GB,count=1
|
||||
|
||||
In this example we are reserving on NUMA node 0 64 pages of 2MiB
|
||||
and on NUMA node 1 1 page of 1GiB.
|
||||
|
||||
Services which consume this:
|
||||
|
||||
* nova-compute
|
||||
|
||||
Related options:
|
||||
|
||||
* None""")
|
||||
|
||||
|
||||
ALL_OPTS = [vcpu_pin_set,
|
||||
compute_driver,
|
||||
@ -367,7 +392,8 @@ ALL_OPTS = [vcpu_pin_set,
|
||||
image_cache_subdirectory_name,
|
||||
remove_unused_base_images,
|
||||
remove_unused_original_minimum_age_seconds,
|
||||
pointer_model]
|
||||
pointer_model,
|
||||
reserved_huge_pages]
|
||||
|
||||
|
||||
def register_opts(conf):
|
||||
|
@ -2137,3 +2137,9 @@ class BuildRequestNotFound(NotFound):
|
||||
class AttachInterfaceNotSupported(Invalid):
|
||||
msg_fmt = _("Attaching interfaces is not supported for "
|
||||
"instance %(instance)s.")
|
||||
|
||||
|
||||
class InvalidReservedMemoryPagesOption(Invalid):
|
||||
msg_fmt = _("The format of the option 'reserved_huge_pages' is invalid. "
|
||||
"(found '%(conf)s') Please refer to the nova "
|
||||
"config-reference.")
|
||||
|
@ -13,6 +13,7 @@
|
||||
# under the License.
|
||||
|
||||
from oslo_serialization import jsonutils
|
||||
from oslo_utils import versionutils
|
||||
|
||||
from nova import exception
|
||||
from nova.objects import base
|
||||
@ -148,14 +149,23 @@ class NUMACell(base.NovaObject):
|
||||
@base.NovaObjectRegistry.register
|
||||
class NUMAPagesTopology(base.NovaObject):
|
||||
# Version 1.0: Initial version
|
||||
VERSION = '1.0'
|
||||
# Version 1.1: Adds reserved field
|
||||
VERSION = '1.1'
|
||||
|
||||
fields = {
|
||||
'size_kb': fields.IntegerField(),
|
||||
'total': fields.IntegerField(),
|
||||
'used': fields.IntegerField(default=0),
|
||||
'reserved': fields.IntegerField(default=0),
|
||||
}
|
||||
|
||||
def obj_make_compatible(self, primitive, target_version):
|
||||
super(NUMAPagesTopology, self).obj_make_compatible(primitive,
|
||||
target_version)
|
||||
target_version = versionutils.convert_version_to_tuple(target_version)
|
||||
if target_version < (1, 1):
|
||||
primitive.pop('reserved', None)
|
||||
|
||||
def __eq__(self, other):
|
||||
return all_things_equal(self, other)
|
||||
|
||||
@ -165,7 +175,11 @@ class NUMAPagesTopology(base.NovaObject):
|
||||
@property
|
||||
def free(self):
|
||||
"""Returns the number of avail pages."""
|
||||
return self.total - self.used
|
||||
if not self.obj_attr_is_set('reserved'):
|
||||
# In case where an old compute node is sharing resource to
|
||||
# an updated node we must ensure that this property is defined.
|
||||
self.reserved = 0
|
||||
return self.total - self.used - self.reserved
|
||||
|
||||
@property
|
||||
def free_kb(self):
|
||||
|
@ -142,13 +142,20 @@ class _TestNUMA(object):
|
||||
objects.NUMAPagesTopology(
|
||||
size_kb=4, total=1548736, used=0),
|
||||
objects.NUMAPagesTopology(
|
||||
size_kb=2048, total=513, used=0)]) # 1,002G
|
||||
size_kb=2048, total=513, used=0),
|
||||
objects.NUMAPagesTopology(
|
||||
size_kb=1048576, total=4, used=1, reserved=1)])
|
||||
|
||||
pagesize = 2048
|
||||
|
||||
self.assertTrue(cell.can_fit_hugepages(pagesize, 2 ** 20))
|
||||
self.assertFalse(cell.can_fit_hugepages(pagesize, 2 ** 21))
|
||||
self.assertFalse(cell.can_fit_hugepages(pagesize, 2 ** 19 + 1))
|
||||
|
||||
pagesize = 1048576
|
||||
self.assertTrue(cell.can_fit_hugepages(pagesize, 2 ** 20))
|
||||
self.assertTrue(cell.can_fit_hugepages(pagesize, 2 ** 20 * 2))
|
||||
self.assertFalse(cell.can_fit_hugepages(pagesize, 2 ** 20 * 3))
|
||||
|
||||
self.assertRaises(
|
||||
exception.MemoryPageSizeNotSupported,
|
||||
cell.can_fit_hugepages, 12345, 2 ** 20)
|
||||
@ -239,6 +246,13 @@ class _TestNUMA(object):
|
||||
mempages=[pt2])
|
||||
self.assertNotEqual(cell1, cell2)
|
||||
|
||||
def test_reserved_property_not_set(self):
|
||||
p = objects.NUMAPagesTopology(
|
||||
# To have reserved not set is similar than to have receive
|
||||
# a NUMAPageTopology version 1.0
|
||||
size_kb=1024, total=64, used=32)
|
||||
self.assertEqual(32, p.free)
|
||||
|
||||
|
||||
class TestNUMA(test_objects._LocalTest,
|
||||
_TestNUMA):
|
||||
|
@ -1163,7 +1163,7 @@ object_data = {
|
||||
'MonitorMetricList': '1.1-15ecf022a68ddbb8c2a6739cfc9f8f5e',
|
||||
'NotificationPublisher': '1.0-bbbc1402fb0e443a3eb227cc52b61545',
|
||||
'NUMACell': '1.2-74fc993ac5c83005e76e34e8487f1c05',
|
||||
'NUMAPagesTopology': '1.0-c71d86317283266dc8364c149155e48e',
|
||||
'NUMAPagesTopology': '1.1-edab9fa2dc43c117a38d600be54b4542',
|
||||
'NUMATopology': '1.2-c63fad38be73b6afd04715c9c1b29220',
|
||||
'NUMATopologyLimits': '1.0-9463e0edd40f64765ae518a539b9dfd2',
|
||||
'Network': '1.2-a977ab383aa462a479b2fae8211a5dde',
|
||||
|
@ -1367,6 +1367,79 @@ class NUMATopologyTest(test.NoDBTestCase):
|
||||
self.assertEqual(hostusage.cells[2].cpu_usage, 0)
|
||||
self.assertEqual(hostusage.cells[2].memory_usage, 0)
|
||||
|
||||
def _topo_usage_reserved_page_size(self):
|
||||
reserved = hw.numa_get_reserved_huge_pages()
|
||||
hosttopo = objects.NUMATopology(cells=[
|
||||
objects.NUMACell(id=0, cpuset=set([0, 1]), memory=512,
|
||||
cpu_usage=0, memory_usage=0, mempages=[
|
||||
objects.NUMAPagesTopology(
|
||||
size_kb=2048,
|
||||
total=512,
|
||||
used=128,
|
||||
reserved=reserved[0][2048])],
|
||||
siblings=[], pinned_cpus=set([])),
|
||||
objects.NUMACell(id=1, cpuset=set([2, 3]), memory=512,
|
||||
cpu_usage=0, memory_usage=0, mempages=[
|
||||
objects.NUMAPagesTopology(
|
||||
size_kb=1048576,
|
||||
total=5,
|
||||
used=2,
|
||||
reserved=reserved[1][1048576])],
|
||||
siblings=[], pinned_cpus=set([])),
|
||||
])
|
||||
instance1 = objects.InstanceNUMATopology(cells=[
|
||||
objects.InstanceNUMACell(
|
||||
id=0, cpuset=set([0, 1]), memory=256, pagesize=2048),
|
||||
objects.InstanceNUMACell(
|
||||
id=1, cpuset=set([2, 3]), memory=1024, pagesize=1048576),
|
||||
])
|
||||
return hosttopo, instance1
|
||||
|
||||
def test_numa_get_reserved_huge_pages(self):
|
||||
reserved = hw.numa_get_reserved_huge_pages()
|
||||
self.assertEqual({}, reserved)
|
||||
self.flags(reserved_huge_pages=[
|
||||
{'node': 3, 'size': 2048, 'count': 128},
|
||||
{'node': 3, 'size': '1GB', 'count': 4},
|
||||
{'node': 6, 'size': '2MB', 'count': 64},
|
||||
{'node': 9, 'size': '1GB', 'count': 1}])
|
||||
reserved = hw.numa_get_reserved_huge_pages()
|
||||
self.assertEqual({2048: 128, 1048576: 4}, reserved[3])
|
||||
self.assertEqual({2048: 64}, reserved[6])
|
||||
self.assertEqual({1048576: 1}, reserved[9])
|
||||
|
||||
def test_reserved_hugepgaes_success(self):
|
||||
self.flags(reserved_huge_pages=[
|
||||
{'node': 0, 'size': 2048, 'count': 128},
|
||||
{'node': 1, 'size': 1048576, 'count': 1}])
|
||||
hosttopo, instance1 = self._topo_usage_reserved_page_size()
|
||||
hostusage = hw.numa_usage_from_instances(
|
||||
hosttopo, [instance1])
|
||||
|
||||
self.assertEqual(hostusage.cells[0].mempages[0].size_kb, 2048)
|
||||
self.assertEqual(hostusage.cells[0].mempages[0].total, 512)
|
||||
self.assertEqual(hostusage.cells[0].mempages[0].used, 256)
|
||||
# 128 already used + 128 used by instance + 128 reserved
|
||||
self.assertEqual(hostusage.cells[0].mempages[0].free, 128)
|
||||
|
||||
self.assertEqual(hostusage.cells[1].mempages[0].size_kb, 1048576)
|
||||
self.assertEqual(hostusage.cells[1].mempages[0].total, 5)
|
||||
self.assertEqual(hostusage.cells[1].mempages[0].used, 3)
|
||||
# 2 already used + 1 used by instance + 1 reserved
|
||||
self.assertEqual(hostusage.cells[1].mempages[0].free, 1)
|
||||
|
||||
def test_reserved_huge_pages_invalid_format(self):
|
||||
self.flags(reserved_huge_pages=[{'node': 0, 'size': 2048}])
|
||||
self.assertRaises(
|
||||
exception.InvalidReservedMemoryPagesOption,
|
||||
self._topo_usage_reserved_page_size)
|
||||
|
||||
def test_reserved_huge_pages_invalid_value(self):
|
||||
self.flags(reserved_huge_pages=["0:foo:bar"])
|
||||
self.assertRaises(
|
||||
exception.InvalidReservedMemoryPagesOption,
|
||||
self._topo_usage_reserved_page_size)
|
||||
|
||||
def test_topo_usage_none(self):
|
||||
hosttopo = objects.NUMATopology(cells=[
|
||||
objects.NUMACell(id=0, cpuset=set([0, 1]), memory=512,
|
||||
|
@ -1249,6 +1249,36 @@ def numa_fit_instance_to_host(
|
||||
return objects.InstanceNUMATopology(cells=cells)
|
||||
|
||||
|
||||
def numa_get_reserved_huge_pages():
|
||||
"""Returns reserved memory pages from host option
|
||||
|
||||
Based from the compute node option reserved_huge_pages, this
|
||||
method will return a well formatted list of dict which can be used
|
||||
to build NUMATopology.
|
||||
|
||||
:raises: exceptionInvalidReservedMemoryPagesOption is option is
|
||||
not corretly set.
|
||||
|
||||
:returns: a list of dict ordered by NUMA node ids; keys of dict
|
||||
are pages size where values are the number reserved.
|
||||
"""
|
||||
bucket = {}
|
||||
if CONF.reserved_huge_pages:
|
||||
try:
|
||||
bucket = collections.defaultdict(dict)
|
||||
for cfg in CONF.reserved_huge_pages:
|
||||
try:
|
||||
pagesize = int(cfg['size'])
|
||||
except ValueError:
|
||||
pagesize = strutils.string_to_bytes(
|
||||
cfg['size'], return_int=True) / units.Ki
|
||||
bucket[int(cfg['node'])][pagesize] = int(cfg['count'])
|
||||
except (ValueError, TypeError, KeyError):
|
||||
raise exception.InvalidReservedMemoryPagesOption(
|
||||
conf=CONF.reserved_huge_pages)
|
||||
return bucket
|
||||
|
||||
|
||||
def _numa_pagesize_usage_from_cell(hostcell, instancecell, sign):
|
||||
topo = []
|
||||
for pages in hostcell.mempages:
|
||||
@ -1258,7 +1288,8 @@ def _numa_pagesize_usage_from_cell(hostcell, instancecell, sign):
|
||||
total=pages.total,
|
||||
used=max(0, pages.used +
|
||||
instancecell.memory * units.Ki /
|
||||
pages.size_kb * sign)))
|
||||
pages.size_kb * sign),
|
||||
reserved=pages.reserved if 'reserved' in pages else 0))
|
||||
else:
|
||||
topo.append(pages)
|
||||
return topo
|
||||
|
@ -1,2 +1,3 @@
|
||||
---
|
||||
prelude: >
|
||||
features:
|
||||
- Adds reserved_huge_pages option to reserve
|
||||
amount of huge pages used by third party components.
|
Loading…
Reference in New Issue
Block a user