Fix the host unlock rejection caused by memory check
Historically, the node total memory is calculated based on total of allocated hugepages, and usable Linux memory. NODE_TOTAL_MiB = HTOT + (AVAIL + PSS) This bottom-up calculation introduced variability and complexity. In some cases, the previously allocated memory exceeds the current total memory when AVAIL and PSS changed. This update changes NODE_TOTAL_MiB to MemTotal (the node's total usable ram), therefore Possible Huge Pages Memory = MemTotal - Platform Reserved. This update also adds a constraint, which only allows 90% of the Possible Huge Pages Memory to be allocated. Closes-Bug: 1837749 Change-Id: I2fa1d82f70263bb2a1b93df2a405a979b42ed83a Signed-off-by: Tao Liu <tao.liu@windriver.com>
This commit is contained in:
parent
8d39303a5b
commit
e2cc0f53ac
@ -16,7 +16,6 @@ from os import listdir
|
||||
from os.path import isfile
|
||||
from os.path import join
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from sysinv.openstack.common import log as logging
|
||||
import tsconfig.tsconfig as tsc
|
||||
@ -105,19 +104,6 @@ class NodeOperator(object):
|
||||
# self._get_free_memory_mb()
|
||||
# self._get_free_memory_nodes_mb()
|
||||
|
||||
def _is_strict(self):
|
||||
with open(os.devnull, "w") as fnull:
|
||||
try:
|
||||
output = subprocess.check_output(
|
||||
["cat", "/proc/sys/vm/overcommit_memory"],
|
||||
stderr=fnull)
|
||||
if int(output) == 2:
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
LOG.info("Failed to check for overcommit, error (%s)",
|
||||
e.output)
|
||||
return False
|
||||
|
||||
def convert_range_string_to_list(self, s):
|
||||
olist = []
|
||||
s = s.strip()
|
||||
@ -438,22 +424,9 @@ class NodeOperator(object):
|
||||
# silently ignore IO errors (eg. file missing)
|
||||
pass
|
||||
|
||||
# Get the free and total memory from meminfo for this node
|
||||
# Get the total memory from meminfo for this node
|
||||
re_node_memtotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
|
||||
re_node_memfree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
|
||||
re_node_filepages = \
|
||||
re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
|
||||
re_node_sreclaim = \
|
||||
re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
|
||||
re_node_commitlimit = \
|
||||
re.compile(r'^Node\s+\d+\s+\CommitLimit:\s+(\d+)')
|
||||
re_node_committed_as = \
|
||||
re.compile(r'^Node\s+\d+\s+\'Committed_AS:\s+(\d+)')
|
||||
|
||||
free_kb = 0 # Free Memory (KB) available
|
||||
total_kb = 0 # Total Memory (KB)
|
||||
limit = 0 # only used in strict accounting
|
||||
committed = 0 # only used in strict accounting
|
||||
|
||||
meminfo = "/sys/devices/system/node/node%d/meminfo" % node
|
||||
try:
|
||||
@ -462,54 +435,13 @@ class NodeOperator(object):
|
||||
match = re_node_memtotal.search(line)
|
||||
if match:
|
||||
total_kb += int(match.group(1))
|
||||
continue
|
||||
match = re_node_memfree.search(line)
|
||||
if match:
|
||||
free_kb += int(match.group(1))
|
||||
continue
|
||||
match = re_node_filepages.search(line)
|
||||
if match:
|
||||
free_kb += int(match.group(1))
|
||||
continue
|
||||
match = re_node_sreclaim.search(line)
|
||||
if match:
|
||||
free_kb += int(match.group(1))
|
||||
continue
|
||||
match = re_node_commitlimit.search(line)
|
||||
if match:
|
||||
limit = int(match.group(1))
|
||||
continue
|
||||
match = re_node_committed_as.search(line)
|
||||
if match:
|
||||
committed = int(match.group(1))
|
||||
continue
|
||||
|
||||
if self._is_strict():
|
||||
free_kb = limit - committed
|
||||
break
|
||||
|
||||
except IOError:
|
||||
# silently ignore IO errors (eg. file missing)
|
||||
pass
|
||||
|
||||
# Calculate PSS
|
||||
pss_mb = 0
|
||||
if node == 0:
|
||||
cmd = 'cat /proc/*/smaps 2>/dev/null | awk \'/^Pss:/ ' \
|
||||
'{a += $2;} END {printf "%d\\n", a/1024.0;}\''
|
||||
try:
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
||||
shell=True)
|
||||
result = proc.stdout.read().strip()
|
||||
pss_mb = int(result)
|
||||
except subprocess.CalledProcessError as e:
|
||||
LOG.error("Cannot calculate PSS (%s) (%d)", cmd,
|
||||
e.returncode)
|
||||
except OSError as e:
|
||||
LOG.error("Failed to execute (%s) OS error (%d)", cmd,
|
||||
e.errno)
|
||||
|
||||
# need to multiply total_mb by 1024
|
||||
node_total_kb = total_hp_mb * SIZE_KB + free_kb + pss_mb * SIZE_KB
|
||||
node_total_kb = total_kb
|
||||
|
||||
# Read base memory from worker_reserved.conf
|
||||
base_mem_mb = 0
|
||||
@ -553,7 +485,7 @@ class NodeOperator(object):
|
||||
'memtotal_mib': total_hp_mb,
|
||||
'memavail_mib': free_hp_mb,
|
||||
'hugepages_configured': 'True',
|
||||
'node_memtotal_mib': node_total_kb / 1024,
|
||||
'node_memtotal_mib': node_total_kb / SIZE_KB,
|
||||
})
|
||||
|
||||
imemory.append(attr)
|
||||
|
@ -3589,20 +3589,12 @@ class HostController(rest.RestController):
|
||||
m.vswitch_hugepages_size_mib)
|
||||
vm_mem_mib = hp_possible_mib - vs_mem_mib
|
||||
|
||||
vm_mem_mib_possible = m.vm_hugepages_possible_2M * constants.MIB_2M
|
||||
|
||||
LOG.info("host(%s) node(%d): vm_mem_mib=%d,"
|
||||
"vm_mem_mib_possible (from agent) = %d"
|
||||
% (ihost['hostname'], node['id'], vm_mem_mib,
|
||||
vm_mem_mib_possible))
|
||||
% (ihost['hostname'], node['id'], vm_mem_mib))
|
||||
|
||||
# vm_mem_mib should not be negative
|
||||
if vm_mem_mib < constants.MIB_2M:
|
||||
vm_mem_mib = 0
|
||||
# worker_reserved.conf might have different setting
|
||||
# during upgrading or patching
|
||||
if vm_mem_mib > vm_mem_mib_possible:
|
||||
vm_mem_mib = vm_mem_mib_possible
|
||||
# Current value might not be suitable after upgrading or
|
||||
# patching
|
||||
if vm_hugepages_nr_2M > int((vm_mem_mib * 0.9) /
|
||||
@ -3618,7 +3610,7 @@ class HostController(rest.RestController):
|
||||
vm_hugepages_nr_1G == 0 and \
|
||||
vm_mem_mib > 0 and \
|
||||
cutils.is_default_huge_pages_required(ihost):
|
||||
vm_hugepages_nr_2M = int((vm_mem_mib * 0.9) /
|
||||
vm_hugepages_nr_2M = int((hp_possible_mib * 0.9 - vs_mem_mib) /
|
||||
constants.MIB_2M)
|
||||
value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
|
||||
|
||||
|
@ -755,7 +755,8 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
|
||||
"Platform memory must be greater than zero"))
|
||||
base_mem_mib = int(platform_reserved_mib)
|
||||
|
||||
hp_possible_mib = rpc_port['node_memtotal_mib'] - base_mem_mib
|
||||
# only allow allocating 90% of the possible huge pages memory
|
||||
hp_possible_mib = int((rpc_port['node_memtotal_mib'] - base_mem_mib) * 0.9)
|
||||
|
||||
# Total requested huge pages
|
||||
hp_requested_mib = vm_hp_2M_reqd_mib + vm_hp_1G_reqd_mib + vs_hp_reqd_mib
|
||||
|
Loading…
Reference in New Issue
Block a user