virt: reserved number of mempages on compute host

Users need to mark as reserved some amount of pages for third party
components.

The most common use case for using huge/large pages is NFV. In the
current state of that feature we can't guarantee the necessary amount
of pages to allow OVS-DPDK to run properly on the compute node, which
result in the instance failing to boot on a well selected
compute-node. OVS-DPDK needs 1 GB hugepages reserved. Since Nova does
not take into account that page reserved for OVS-DPDK it results in
the process not being able to acquire the necessary memory which
results in a failed boot.

This commit adds a new option 'reserved_huge_pages' which takes a list
of string format to select on which host NUMA nodes and from which
pagesize we want to reserve a certain amount of pages. It also updates
NUMAPageTopology to contain a reserved memory pages attribute, which
helps compute the available pages size on host for scheduling/claiming
resources.

Change-Id: Ie04d6362a4e99dcb2504698fc831a366ba746b44
Closes-Bug: #1543149
This commit is contained in:
Sahid Orentino Ferdjaoui 2016-02-08 09:37:37 -05:00
parent e701653bbc
commit d52ceaf269
8 changed files with 174 additions and 9 deletions

View File

@ -13,6 +13,7 @@
# under the License.
from oslo_config import cfg
from oslo_config import types
from nova.conf import paths
@ -348,6 +349,30 @@ Interdependencies to other options:
configured as HVM.
""")
reserved_huge_pages = cfg.MultiOpt(
"reserved_huge_pages",
item_type=types.Dict,
help="""Reserves a number of huge/large memory pages per NUMA host cells
Possible values:
* A list of valid key=value which reflect NUMA node ID, page size
(Default unit is KiB) and number of pages to be reserved.
reserved_huge_pages = node=0,size=2048,count=64
reserved_huge_pages = node=1,size=1GB,count=1
In this example we are reserving on NUMA node 0 64 pages of 2MiB
and on NUMA node 1 1 page of 1GiB.
Services which consume this:
* nova-compute
Related options:
* None""")
ALL_OPTS = [vcpu_pin_set,
compute_driver,
@ -367,7 +392,8 @@ ALL_OPTS = [vcpu_pin_set,
image_cache_subdirectory_name,
remove_unused_base_images,
remove_unused_original_minimum_age_seconds,
pointer_model]
pointer_model,
reserved_huge_pages]
def register_opts(conf):

View File

@ -2137,3 +2137,9 @@ class BuildRequestNotFound(NotFound):
class AttachInterfaceNotSupported(Invalid):
msg_fmt = _("Attaching interfaces is not supported for "
"instance %(instance)s.")
class InvalidReservedMemoryPagesOption(Invalid):
msg_fmt = _("The format of the option 'reserved_huge_pages' is invalid. "
"(found '%(conf)s') Please refer to the nova "
"config-reference.")

View File

@ -13,6 +13,7 @@
# under the License.
from oslo_serialization import jsonutils
from oslo_utils import versionutils
from nova import exception
from nova.objects import base
@ -148,14 +149,23 @@ class NUMACell(base.NovaObject):
@base.NovaObjectRegistry.register
class NUMAPagesTopology(base.NovaObject):
# Version 1.0: Initial version
VERSION = '1.0'
# Version 1.1: Adds reserved field
VERSION = '1.1'
fields = {
'size_kb': fields.IntegerField(),
'total': fields.IntegerField(),
'used': fields.IntegerField(default=0),
'reserved': fields.IntegerField(default=0),
}
def obj_make_compatible(self, primitive, target_version):
super(NUMAPagesTopology, self).obj_make_compatible(primitive,
target_version)
target_version = versionutils.convert_version_to_tuple(target_version)
if target_version < (1, 1):
primitive.pop('reserved', None)
def __eq__(self, other):
return all_things_equal(self, other)
@ -165,7 +175,11 @@ class NUMAPagesTopology(base.NovaObject):
@property
def free(self):
"""Returns the number of avail pages."""
return self.total - self.used
if not self.obj_attr_is_set('reserved'):
# In case where an old compute node is sharing resource to
# an updated node we must ensure that this property is defined.
self.reserved = 0
return self.total - self.used - self.reserved
@property
def free_kb(self):

View File

@ -142,13 +142,20 @@ class _TestNUMA(object):
objects.NUMAPagesTopology(
size_kb=4, total=1548736, used=0),
objects.NUMAPagesTopology(
size_kb=2048, total=513, used=0)]) # 1,002G
size_kb=2048, total=513, used=0),
objects.NUMAPagesTopology(
size_kb=1048576, total=4, used=1, reserved=1)])
pagesize = 2048
self.assertTrue(cell.can_fit_hugepages(pagesize, 2 ** 20))
self.assertFalse(cell.can_fit_hugepages(pagesize, 2 ** 21))
self.assertFalse(cell.can_fit_hugepages(pagesize, 2 ** 19 + 1))
pagesize = 1048576
self.assertTrue(cell.can_fit_hugepages(pagesize, 2 ** 20))
self.assertTrue(cell.can_fit_hugepages(pagesize, 2 ** 20 * 2))
self.assertFalse(cell.can_fit_hugepages(pagesize, 2 ** 20 * 3))
self.assertRaises(
exception.MemoryPageSizeNotSupported,
cell.can_fit_hugepages, 12345, 2 ** 20)
@ -239,6 +246,13 @@ class _TestNUMA(object):
mempages=[pt2])
self.assertNotEqual(cell1, cell2)
def test_reserved_property_not_set(self):
p = objects.NUMAPagesTopology(
# To have reserved not set is similar than to have receive
# a NUMAPageTopology version 1.0
size_kb=1024, total=64, used=32)
self.assertEqual(32, p.free)
class TestNUMA(test_objects._LocalTest,
_TestNUMA):

View File

@ -1163,7 +1163,7 @@ object_data = {
'MonitorMetricList': '1.1-15ecf022a68ddbb8c2a6739cfc9f8f5e',
'NotificationPublisher': '1.0-bbbc1402fb0e443a3eb227cc52b61545',
'NUMACell': '1.2-74fc993ac5c83005e76e34e8487f1c05',
'NUMAPagesTopology': '1.0-c71d86317283266dc8364c149155e48e',
'NUMAPagesTopology': '1.1-edab9fa2dc43c117a38d600be54b4542',
'NUMATopology': '1.2-c63fad38be73b6afd04715c9c1b29220',
'NUMATopologyLimits': '1.0-9463e0edd40f64765ae518a539b9dfd2',
'Network': '1.2-a977ab383aa462a479b2fae8211a5dde',

View File

@ -1367,6 +1367,79 @@ class NUMATopologyTest(test.NoDBTestCase):
self.assertEqual(hostusage.cells[2].cpu_usage, 0)
self.assertEqual(hostusage.cells[2].memory_usage, 0)
def _topo_usage_reserved_page_size(self):
reserved = hw.numa_get_reserved_huge_pages()
hosttopo = objects.NUMATopology(cells=[
objects.NUMACell(id=0, cpuset=set([0, 1]), memory=512,
cpu_usage=0, memory_usage=0, mempages=[
objects.NUMAPagesTopology(
size_kb=2048,
total=512,
used=128,
reserved=reserved[0][2048])],
siblings=[], pinned_cpus=set([])),
objects.NUMACell(id=1, cpuset=set([2, 3]), memory=512,
cpu_usage=0, memory_usage=0, mempages=[
objects.NUMAPagesTopology(
size_kb=1048576,
total=5,
used=2,
reserved=reserved[1][1048576])],
siblings=[], pinned_cpus=set([])),
])
instance1 = objects.InstanceNUMATopology(cells=[
objects.InstanceNUMACell(
id=0, cpuset=set([0, 1]), memory=256, pagesize=2048),
objects.InstanceNUMACell(
id=1, cpuset=set([2, 3]), memory=1024, pagesize=1048576),
])
return hosttopo, instance1
def test_numa_get_reserved_huge_pages(self):
reserved = hw.numa_get_reserved_huge_pages()
self.assertEqual({}, reserved)
self.flags(reserved_huge_pages=[
{'node': 3, 'size': 2048, 'count': 128},
{'node': 3, 'size': '1GB', 'count': 4},
{'node': 6, 'size': '2MB', 'count': 64},
{'node': 9, 'size': '1GB', 'count': 1}])
reserved = hw.numa_get_reserved_huge_pages()
self.assertEqual({2048: 128, 1048576: 4}, reserved[3])
self.assertEqual({2048: 64}, reserved[6])
self.assertEqual({1048576: 1}, reserved[9])
def test_reserved_hugepgaes_success(self):
self.flags(reserved_huge_pages=[
{'node': 0, 'size': 2048, 'count': 128},
{'node': 1, 'size': 1048576, 'count': 1}])
hosttopo, instance1 = self._topo_usage_reserved_page_size()
hostusage = hw.numa_usage_from_instances(
hosttopo, [instance1])
self.assertEqual(hostusage.cells[0].mempages[0].size_kb, 2048)
self.assertEqual(hostusage.cells[0].mempages[0].total, 512)
self.assertEqual(hostusage.cells[0].mempages[0].used, 256)
# 128 already used + 128 used by instance + 128 reserved
self.assertEqual(hostusage.cells[0].mempages[0].free, 128)
self.assertEqual(hostusage.cells[1].mempages[0].size_kb, 1048576)
self.assertEqual(hostusage.cells[1].mempages[0].total, 5)
self.assertEqual(hostusage.cells[1].mempages[0].used, 3)
# 2 already used + 1 used by instance + 1 reserved
self.assertEqual(hostusage.cells[1].mempages[0].free, 1)
def test_reserved_huge_pages_invalid_format(self):
self.flags(reserved_huge_pages=[{'node': 0, 'size': 2048}])
self.assertRaises(
exception.InvalidReservedMemoryPagesOption,
self._topo_usage_reserved_page_size)
def test_reserved_huge_pages_invalid_value(self):
self.flags(reserved_huge_pages=["0:foo:bar"])
self.assertRaises(
exception.InvalidReservedMemoryPagesOption,
self._topo_usage_reserved_page_size)
def test_topo_usage_none(self):
hosttopo = objects.NUMATopology(cells=[
objects.NUMACell(id=0, cpuset=set([0, 1]), memory=512,

View File

@ -1249,6 +1249,36 @@ def numa_fit_instance_to_host(
return objects.InstanceNUMATopology(cells=cells)
def numa_get_reserved_huge_pages():
"""Returns reserved memory pages from host option
Based from the compute node option reserved_huge_pages, this
method will return a well formatted list of dict which can be used
to build NUMATopology.
:raises: exceptionInvalidReservedMemoryPagesOption is option is
not corretly set.
:returns: a list of dict ordered by NUMA node ids; keys of dict
are pages size where values are the number reserved.
"""
bucket = {}
if CONF.reserved_huge_pages:
try:
bucket = collections.defaultdict(dict)
for cfg in CONF.reserved_huge_pages:
try:
pagesize = int(cfg['size'])
except ValueError:
pagesize = strutils.string_to_bytes(
cfg['size'], return_int=True) / units.Ki
bucket[int(cfg['node'])][pagesize] = int(cfg['count'])
except (ValueError, TypeError, KeyError):
raise exception.InvalidReservedMemoryPagesOption(
conf=CONF.reserved_huge_pages)
return bucket
def _numa_pagesize_usage_from_cell(hostcell, instancecell, sign):
topo = []
for pages in hostcell.mempages:
@ -1258,7 +1288,8 @@ def _numa_pagesize_usage_from_cell(hostcell, instancecell, sign):
total=pages.total,
used=max(0, pages.used +
instancecell.memory * units.Ki /
pages.size_kb * sign)))
pages.size_kb * sign),
reserved=pages.reserved if 'reserved' in pages else 0))
else:
topo.append(pages)
return topo

View File

@ -1,2 +1,3 @@
---
prelude: >
features:
- Adds reserved_huge_pages option to reserve
amount of huge pages used by third party components.