fix hugepage allocation overflow

From the database dump, it shows current "vm_hugepages_nr_2M", which is derived from the previous "vm_hugepages_possible_2M" is much bigger than current "vm_hugepages_possible_2M". "vm_hugepages_possible_2M" reported by sysinv-agent is calculated using the platform reserved value from the "worker_reserved.conf", while "worker_reserved.conf" is updated by puppet after first unlock. This patch calculates the initial huge pages based on the current view of the host memory instead of using the vm_hugepages_possible_2M value reported from the sysinv-agent, which could be calculated based on the default platform reserved value. In addition, it also takes consideration of the platform memory reserved changes (by user) when performing huge pages semantic check. Change-Id: I686b99728ed2b3572ace39469d479176a6ae55ff Closes-Bug: 1827258 Signed-off-by: Liu, Tao <Tao.Liu@windriver.com> Signed-off-by: Bin Yang <bin.yang@intel.com>
2019-06-27 16:36:41 +08:00 · 2019-06-27 16:36:41 +08:00 · 5e42f69989
commit 5e42f69989
parent e0e342fff3
2 changed files with 79 additions and 35 deletions
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
@ -3566,8 +3566,15 @@ class HostController(rest.RestController):
            elif m.vm_hugepages_nr_1G:
                allocated += constants.MIB_1G * m.vm_hugepages_nr_1G

-            LOG.debug("MemTotal=%s allocated=%s" % (memtotal, allocated))
-            if memtotal < allocated:
+            LOG.info("Memory: Total=%s MiB, Allocated=%s MiB, "
+                    "2M: %s pages %s pages pending, "
+                    "1G: %s pages %s pages pending"
+                    % (memtotal, allocated,
+                        m.vm_hugepages_possible_2M, m.vm_hugepages_nr_2M_pending,
+                        m.vm_hugepages_possible_1G, m.vm_hugepages_nr_1G_pending))
+            if (memtotal < allocated or
+                    m.vm_hugepages_possible_2M < m.vm_hugepages_nr_2M_pending or
+                    m.vm_hugepages_possible_1G < m.vm_hugepages_nr_1G_pending):
                msg = (_("Rejected: Total allocated memory exceeds the total memory of "
                         "%(host)s numa node %(node)s "
                         ) %
@ -3679,19 +3686,46 @@ class HostController(rest.RestController):
                        if m.vm_hugepages_nr_1G_pending is not None \
                        else m.vm_hugepages_nr_1G

+                    hp_possible_mib = (m.node_memtotal_mib -
+                                       m.platform_reserved_mib)
+                    vs_mem_mib = (vs_hugepages_nr *
+                                  m.vswitch_hugepages_size_mib)
+                    vm_mem_mib = hp_possible_mib - vs_mem_mib
+
+                    vm_mem_mib_possible = m.vm_hugepages_possible_2M * constants.MIB_2M
+
+                    LOG.info("host(%s) node(%d): vm_mem_mib=%d,"
+                            "vm_mem_mib_possible (from agent) = %d"
+                            % (ihost['hostname'], node['id'], vm_mem_mib,
+                                vm_mem_mib_possible))
+
+                    # vm_mem_mib should not be negative
+                    if vm_mem_mib < constants.MIB_2M:
+                        vm_mem_mib = 0
+                    # worker_reserved.conf might have different setting
+                    # during upgrading or patching
+                    if vm_mem_mib > vm_mem_mib_possible:
+                        vm_mem_mib = vm_mem_mib_possible
+                    # Current value might not be suitable after upgrading or
+                    # patching
+                    if vm_hugepages_nr_2M > int((vm_mem_mib * 0.9) /
+                            constants.MIB_2M):
+                        vm_hugepages_nr_2M = int((vm_mem_mib * 0.9) /
+                                                 constants.MIB_2M)
+                        value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
+
                    # calculate 90% 2M pages if the huge pages have not been
                    # allocated and the compute label is set
                    if cutils.has_openstack_compute(labels) and \
-                                    vm_hugepages_nr_2M == 0 and \
-                                    vm_hugepages_nr_1G == 0 and \
+                            vm_hugepages_nr_2M == 0 and \
+                            vm_hugepages_nr_1G == 0 and \
+                            vm_mem_mib > 0 and \
                            cutils.is_default_huge_pages_required(ihost):
-                        vm_hugepages_nr_2M = int(m.vm_hugepages_possible_2M * 0.9)
+                        vm_hugepages_nr_2M = int((vm_mem_mib * 0.9) /
+                                                 constants.MIB_2M)
                        value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})

-                    vm_hugepages_4K = \
-                        (m.node_memtotal_mib - m.platform_reserved_mib)
-                    vm_hugepages_4K -= \
-                        (vs_hugepages_nr * m.vswitch_hugepages_size_mib)
+                    vm_hugepages_4K = vm_mem_mib
                    vm_hugepages_4K -= \
                        (constants.MIB_2M * vm_hugepages_nr_2M)
                    vm_hugepages_4K -=  \
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py
@ -420,8 +420,11 @@ class MemoryController(rest.RestController):
        try:
            # Semantics checks and update hugepage memory accounting
            patch = _check_huge_values(rpc_port, patch,
-                    vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending,
-                    vswitch_hugepages_reqd, vswitch_hugepages_size_mib)
+                                       vm_hugepages_nr_2M_pending,
+                                       vm_hugepages_nr_1G_pending,
+                                       vswitch_hugepages_reqd,
+                                       vswitch_hugepages_size_mib,
+                                       platform_reserved_mib)
        except wsme.exc.ClientSideError as e:
            inode = pecan.request.dbapi.inode_get(inode_id=rpc_port.forinodeid)
            numa_node = inode.numa_node
@ -519,8 +522,11 @@ def _update(mem_uuid, mem_values):

    # Semantics checks and update hugepage memory accounting
    mem_values = _check_huge_values(rpc_port, mem_values,
-                                    vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending,
-                                    vswitch_hugepages_reqd, vswitch_hugepages_size_mib)
+                                    vm_hugepages_nr_2M_pending,
+                                    vm_hugepages_nr_1G_pending,
+                                    vswitch_hugepages_reqd,
+                                    vswitch_hugepages_size_mib,
+                                    platform_reserved_mib)

    # Semantics checks for platform memory
    _check_memory(rpc_port, host_id, platform_reserved_mib,
@ -549,16 +555,6 @@ def _check_memory(rpc_port, ihost, platform_reserved_mib=None,
                  vm_hugepages_nr_2M_pending=None, vm_hugepages_nr_1G_pending=None,
                  vswitch_hugepages_reqd=None, vswitch_hugepages_size_mib=None):
    if platform_reserved_mib:
-        # Check for invalid characters
-        try:
-            val = int(platform_reserved_mib)
-        except ValueError:
-            raise wsme.exc.ClientSideError((
-                "Platform memory must be a number"))
-        if val < 0:
-            raise wsme.exc.ClientSideError((
-                "Platform memory must be greater than zero"))
-
        # Check for lower limit
        inode_id = rpc_port['forinodeid']
        inode = pecan.request.dbapi.inode_get(inode_id)
@ -636,7 +632,8 @@ def _check_memory(rpc_port, ihost, platform_reserved_mib=None,

 def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
                       vm_hugepages_nr_1G=None, vswitch_hugepages_reqd=None,
-                       vswitch_hugepages_size_mib=None):
+                       vswitch_hugepages_size_mib=None,
+                       platform_reserved_mib=None):

    if rpc_port['vm_hugepages_use_1G'] == 'False':
        vs_hp_size = vswitch_hugepages_size_mib
@ -741,16 +738,33 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
        vs_hp_size_mib = constants.MIB_2M
    vs_hp_reqd_mib = new_vs_pages * vs_hp_size_mib

-    # The size of possible hugepages is the size of reported possible
-    # vm pages + the reported current number of vswitch pages.
+    # The size of possible hugepages is the node mem total - platform reserved
+    base_mem_mib = rpc_port['platform_reserved_mib']
+    if platform_reserved_mib:
+        # Check for invalid characters
+        try:
+            val = int(platform_reserved_mib)
+        except ValueError:
+            raise wsme.exc.ClientSideError((
+                "Platform memory must be a number"))
+        if val < 0:
+            raise wsme.exc.ClientSideError((
+                "Platform memory must be greater than zero"))
+        base_mem_mib = int(platform_reserved_mib)
+
+    hp_possible_mib = rpc_port['node_memtotal_mib'] - base_mem_mib
    if vs_hp_size_mib == constants.MIB_2M:
-        hp_possible_mib = int(
+        agent_hp_possible_mib = int(
            rpc_port.get('vm_hugepages_possible_2M', 0) +
            rpc_port.get('vswitch_hugepages_nr', 0)) * vs_hp_size_mib
+        if hp_possible_mib > agent_hp_possible_mib:
+            hp_possible_mib = agent_hp_possible_mib
    elif vs_hp_size_mib == constants.MIB_1G:
-        hp_possible_mib = int(
-            rpc_port.get('vm_hugepages_possible_1G', 0) +
-            rpc_port.get('vswitch_hugepages_nr', 0)) * vs_hp_size_mib
+        agent_hp_possible_mib = int(
+                rpc_port.get('vm_hugepages_possible_1G', 0) +
+                rpc_port.get('vswitch_hugepages_nr', 0)) * vs_hp_size_mib
+        if hp_possible_mib > agent_hp_possible_mib:
+            hp_possible_mib = agent_hp_possible_mib

    # Total requested huge pages
    hp_requested_mib = vm_hp_2M_reqd_mib + vm_hp_1G_reqd_mib + vs_hp_reqd_mib
@ -762,12 +776,8 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
        vm_max_hp_1G = ((hp_possible_mib - vs_hp_reqd_mib - vm_hp_2M_reqd_mib)
                        / constants.MIB_1G)

-        if vm_max_hp_2M < 0:
-            vm_max_hp_2M = 0
-        if vm_max_hp_1G < 0:
-            vm_max_hp_1G = 0
-
        if new_2M_pages > 0 and new_1G_pages > 0:
+
            msg = _("For a requested vSwitch hugepage allocation of %s MiB, "
                    "max 1G pages is %s when 2M is %s, or "
                    "max 2M pages is %s when 1G is %s." % (