Integrate host configuration into configuration framework
Integrates the following host configuration into the configuration framework: - Host boot parameters - CPU reservation - Process affinity - Memory huge page allocations Change-Id: I2259e0e93eefd5ce5000271fa32ecaa8d13fa411 Signed-off-by: Matt Peters <matt.peters@windriver.com>
This commit is contained in:
parent
b3d4df1cc5
commit
69365bb834
@ -35,7 +35,6 @@ Initial compute node hugepages and reserved cpus configuration
|
||||
# compute init scripts
|
||||
install -d -m 755 %{buildroot}%{local_etc_initd}
|
||||
install -p -D -m 755 affine-platform.sh %{buildroot}%{local_etc_initd}/affine-platform.sh
|
||||
install -p -D -m 755 compute-huge.sh %{buildroot}%{local_etc_initd}/compute-huge.sh
|
||||
|
||||
# utility scripts
|
||||
install -p -D -m 755 cpumap_functions.sh %{buildroot}%{local_etc_initd}/cpumap_functions.sh
|
||||
@ -53,7 +52,6 @@ install -p -D -m 755 bin/topology %{buildroot}%{local_bindir}/topology
|
||||
# compute config data
|
||||
install -d -m 755 %{buildroot}%{local_etc_nova}
|
||||
install -p -D -m 755 compute_reserved.conf %{buildroot}%{local_etc_nova}/compute_reserved.conf
|
||||
install -p -D -m 755 compute_hugepages_total.conf %{buildroot}%{local_etc_nova}/compute_hugepages_total.conf
|
||||
|
||||
# goenabled check
|
||||
install -d -m 755 %{buildroot}%{local_etc_goenabledd}
|
||||
@ -62,11 +60,9 @@ install -p -D -m 755 compute-huge-goenabled.sh %{buildroot}%{local_etc_goenabled
|
||||
# systemd services
|
||||
install -d -m 755 %{buildroot}%{_unitdir}
|
||||
install -p -D -m 664 affine-platform.sh.service %{buildroot}%{_unitdir}/affine-platform.sh.service
|
||||
install -p -D -m 664 compute-huge.sh.service %{buildroot}%{_unitdir}/compute-huge.sh.service
|
||||
|
||||
%post
|
||||
/bin/systemctl enable affine-platform.sh.service >/dev/null 2>&1
|
||||
/bin/systemctl enable compute-huge.sh.service >/dev/null 2>&1
|
||||
|
||||
%clean
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
@ -79,7 +75,5 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%{local_etc_initd}/*
|
||||
%{local_etc_goenabledd}/*
|
||||
%config(noreplace) %{local_etc_nova}/compute_reserved.conf
|
||||
%config(noreplace) %{local_etc_nova}/compute_hugepages_total.conf
|
||||
|
||||
%{_unitdir}/compute-huge.sh.service
|
||||
%{_unitdir}/affine-platform.sh.service
|
||||
|
@ -1,7 +1,7 @@
|
||||
[Unit]
|
||||
Description=Titanium Cloud Affine Platform
|
||||
After=syslog.service network.service dbus.service sw-patch.service
|
||||
Before=compute-huge.sh.service
|
||||
Before=computeconfig.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
|
@ -17,7 +17,7 @@ source "/etc/init.d/log_functions.sh"
|
||||
source "/usr/bin/tsconfig"
|
||||
|
||||
if [ -e ${VOLATILE_COMPUTE_CONFIG_COMPLETE} -a ! -f ${COMPUTE_HUGE_GOENABLED} ]; then
|
||||
log_error "compute-huge.sh CPU configuration check failed. Failing goenabled check."
|
||||
log_error "Compute manifest CPU configuration check failed. Failing goenabled check."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,14 +0,0 @@
|
||||
[Unit]
|
||||
Description=Titanium Cloud Compute Huge
|
||||
After=syslog.service network.service affine-platform.sh.service sw-patch.service
|
||||
Before=sshd.service sw-patch-agent.service sysinv-agent.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/etc/init.d/compute-huge.sh start
|
||||
ExecStop=/etc/init.d/compute-huge.sh stop
|
||||
ExecReload=/etc/init.d/compute-huge.sh restart
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
@ -52,7 +52,23 @@ rm -rf ${PUPPET_TMP}
|
||||
mkdir -p ${PUPPET_TMP}/hieradata
|
||||
cp /etc/puppet/hieradata/global.yaml ${PUPPET_TMP}/hieradata/global.yaml
|
||||
cp /etc/puppet/hieradata/${PERSONALITY}.yaml ${PUPPET_TMP}/hieradata/personality.yaml
|
||||
cp -f ${HIERADATA}/${HOST}.yaml ${PUPPET_TMP}/hieradata/host.yaml
|
||||
|
||||
# When the compute node is first booted and goes online, sysinv-agent reports
|
||||
# host CPU inventory which triggers the first runtime manifest apply that updates
|
||||
# the grub. At this time, copying the host file failed due to a timing issue that
|
||||
# has not yet been fully understood. Subsequent retries worked.
|
||||
if [ "${PERSONALITY}" = "compute" ]; then
|
||||
n=0
|
||||
until [ $n -ge 3 ]
|
||||
do
|
||||
cp -f ${HIERADATA}/${HOST}.yaml ${PUPPET_TMP}/hieradata/host.yaml && break
|
||||
n=$[$n+1]
|
||||
logger -t $0 "Failed to copy /etc/puppet/hieradata/${HOST}.yaml"
|
||||
sleep 15
|
||||
done
|
||||
else
|
||||
cp -f ${HIERADATA}/${HOST}.yaml ${PUPPET_TMP}/hieradata/host.yaml
|
||||
fi
|
||||
cp -f ${HIERADATA}/system.yaml \
|
||||
${HIERADATA}/secure_system.yaml \
|
||||
${HIERADATA}/static.yaml \
|
||||
|
@ -13,6 +13,7 @@ include ::platform::sysctl::compute
|
||||
include ::platform::dhclient
|
||||
include ::platform::partitions
|
||||
include ::platform::lvm::compute
|
||||
include ::platform::compute
|
||||
include ::platform::vswitch
|
||||
include ::platform::network
|
||||
include ::platform::fstab
|
||||
|
@ -0,0 +1,5 @@
|
||||
# Returns the current boot parameters
|
||||
Facter.add(:get_cmdline) do
|
||||
setcode "cat /proc/cmdline 2>/dev/null"
|
||||
end
|
||||
|
@ -0,0 +1,8 @@
|
||||
# Returns true if it is Broadwell processor
|
||||
# Broadwell specific flags (model: 79)
|
||||
Facter.add("is_broadwell_processor") do
|
||||
setcode do
|
||||
Facter::Core::Execution.exec('grep -q -E "^model\s+:\s+79$" /proc/cpuinfo')
|
||||
$?.exitstatus == 0
|
||||
end
|
||||
end
|
@ -0,0 +1,7 @@
|
||||
# Returns true if one GB pages is supported
|
||||
Facter.add("is_gb_page_supported") do
|
||||
setcode do
|
||||
Facter::Core::Execution.exec('grep -q pdpe1gb /proc/cpuinfo')
|
||||
$?.exitstatus == 0
|
||||
end
|
||||
end
|
@ -0,0 +1,7 @@
|
||||
# Returns true if hugetlbfs not enabled
|
||||
Facter.add("is_hugetlbfs_enabled") do
|
||||
setcode do
|
||||
Facter::Core::Execution.exec('grep -q hugetlbfs /proc/filesystems')
|
||||
$?.exitstatus == 0
|
||||
end
|
||||
end
|
@ -0,0 +1,6 @@
|
||||
# Returns true if Resource Control is supported on this node
|
||||
Facter.add("is_per_numa_supported") do
|
||||
setcode do
|
||||
Dir.exist?('/sys/devices/system/node/node0')
|
||||
end
|
||||
end
|
@ -0,0 +1,6 @@
|
||||
# Returns true if Resource Control is supported on this node
|
||||
Facter.add("is_resctrl_supported") do
|
||||
setcode do
|
||||
Dir.exist?('/sys/fs/resctrl')
|
||||
end
|
||||
end
|
@ -0,0 +1,4 @@
|
||||
# Returns number of logical cpus
|
||||
Facter.add(:number_of_logical_cpus) do
|
||||
setcode "cat /proc/cpuinfo 2>/dev/null | awk '/^[pP]rocessor/ { n +=1 } END { print (n>0) ? n : 1}'"
|
||||
end
|
@ -0,0 +1,4 @@
|
||||
# Returns number of numa nodes
|
||||
Facter.add(:number_of_numa_nodes) do
|
||||
setcode "ls -d /sys/devices/system/node/node* 2>/dev/null | wc -l"
|
||||
end
|
@ -0,0 +1,34 @@
|
||||
module Puppet::Parser::Functions
|
||||
newfunction(:check_grub_config,
|
||||
:type => :rvalue,
|
||||
:doc => <<-EOD
|
||||
This internal function checks if a list of arguments are configured
|
||||
in the current boot args based on the input parameters
|
||||
|
||||
EOD
|
||||
) do |args|
|
||||
|
||||
func_name = "check_grub_config()"
|
||||
|
||||
raise(Puppet::ParseError, "#{func_name}: Requires 1 argument" +
|
||||
"#{args.size} given") if args.size != 1
|
||||
|
||||
expected = args[0]
|
||||
raise(Puppet::ParseError, "#{func_name}: first argument must be a string") \
|
||||
unless expected.instance_of? String
|
||||
|
||||
# get the current boot args
|
||||
cmd = Facter.value(:get_cmdline)
|
||||
cmd_array = cmd.split()
|
||||
|
||||
value = true
|
||||
expected.split().each do |element|
|
||||
value = cmd_array.include?(element)
|
||||
if value == false
|
||||
Puppet.debug("#{element} is not presented in #{cmd}")
|
||||
return value
|
||||
end
|
||||
end
|
||||
value
|
||||
end
|
||||
end
|
246
puppet-manifests/src/modules/platform/manifests/compute.pp
Normal file
246
puppet-manifests/src/modules/platform/manifests/compute.pp
Normal file
@ -0,0 +1,246 @@
|
||||
class platform::compute::grub::params (
|
||||
$n_cpus = '',
|
||||
$cpu_options = '',
|
||||
$m_hugepages = 'hugepagesz=2M hugepages=0',
|
||||
$default_pgsz = 'default_hugepagesz=2M',
|
||||
$keys = ['kvm-intel.eptad', 'default_hugepagesz', 'hugepagesz', 'hugepages', 'isolcpus', 'nohz_full', 'rcu_nocbs', 'kthread_cpus', 'irqaffinity'],
|
||||
) {
|
||||
|
||||
if $::is_broadwell_processor {
|
||||
$eptad = 'kvm-intel.eptad=0'
|
||||
} else {
|
||||
$eptad = ''
|
||||
}
|
||||
|
||||
if $::is_gb_page_supported {
|
||||
$gb_hugepages = "hugepagesz=1G hugepages=$::number_of_numa_nodes"
|
||||
} else {
|
||||
$gb_hugepages = ''
|
||||
}
|
||||
|
||||
$grub_updates = strip("${eptad} ${$gb_hugepages} ${m_hugepages} ${default_pgsz} ${cpu_options}")
|
||||
}
|
||||
|
||||
class platform::compute::grub::update
|
||||
inherits ::platform::compute::grub::params {
|
||||
|
||||
notice("Updating grub configuration")
|
||||
|
||||
$to_be_removed = join($keys, " ")
|
||||
exec { "Remove the cpu arguments":
|
||||
command => "grubby --update-kernel=ALL --remove-args='$to_be_removed'",
|
||||
} ->
|
||||
exec { "Add the cpu arguments":
|
||||
command => "grubby --update-kernel=ALL --args='$grub_updates'",
|
||||
}
|
||||
}
|
||||
|
||||
class platform::compute::grub::recovery {
|
||||
|
||||
notice("Update Grub and Reboot")
|
||||
|
||||
class {'platform::compute::grub::update': } -> Exec['reboot-recovery']
|
||||
|
||||
exec { "reboot-recovery":
|
||||
command => "reboot",
|
||||
}
|
||||
}
|
||||
|
||||
class platform::compute::grub::audit
|
||||
inherits ::platform::compute::grub::params {
|
||||
|
||||
if ! str2bool($::is_initial_config_primary) {
|
||||
|
||||
notice("Audit CPU and Grub Configuration")
|
||||
|
||||
$expected_n_cpus = $::number_of_logical_cpus
|
||||
$n_cpus_ok = ("$n_cpus" == "$expected_n_cpus")
|
||||
|
||||
$cmd_ok = check_grub_config($grub_updates)
|
||||
|
||||
if $cmd_ok and $n_cpus_ok {
|
||||
$ensure = present
|
||||
notice("CPU and Boot Argument audit passed.")
|
||||
} else {
|
||||
$ensure = absent
|
||||
if !$cmd_ok {
|
||||
notice("Kernel Boot Argument Mismatch")
|
||||
include ::platform::compute::grub::recovery
|
||||
}
|
||||
}
|
||||
|
||||
file { "/var/run/compute_huge_goenabled":
|
||||
ensure => $ensure,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0644',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class platform::compute::grub::runtime {
|
||||
include ::platform::compute::grub::update
|
||||
}
|
||||
|
||||
# Mounts virtual hugetlbfs filesystems for each supported page size
|
||||
class platform::compute::hugetlbf {
|
||||
|
||||
if str2bool($::is_hugetlbfs_enabled) {
|
||||
|
||||
$fs_list = generate("/bin/bash", "-c", "ls -1d /sys/kernel/mm/hugepages/hugepages-*")
|
||||
$array = split($fs_list, '\n')
|
||||
$array.each | String $val | {
|
||||
$page_name = generate("/bin/bash", "-c", "basename $val")
|
||||
$page_size = strip(regsubst($page_name, 'hugepages-', ''))
|
||||
$hugemnt ="/mnt/huge-$page_size"
|
||||
$options = "pagesize=${page_size}"
|
||||
|
||||
notice("Mounting hugetlbfs at: $hugemnt")
|
||||
exec { "create $hugemnt":
|
||||
command => "mkdir -p ${hugemnt}",
|
||||
onlyif => "test ! -d ${hugemnt}",
|
||||
} ->
|
||||
mount { "${hugemnt}":
|
||||
name => "${hugemnt}",
|
||||
device => 'none',
|
||||
fstype => 'hugetlbfs',
|
||||
ensure => 'mounted',
|
||||
options => "${options}",
|
||||
atboot => 'yes',
|
||||
remounts => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class platform::compute::hugepage::params (
|
||||
$nr_hugepages_2M = undef,
|
||||
$nr_hugepages_1G = undef,
|
||||
$vswitch_2M_pages = '',
|
||||
$vswitch_1G_pages = '',
|
||||
$vm_4K_pages = '',
|
||||
$vm_2M_pages = '',
|
||||
$vm_1G_pages = '',
|
||||
) {}
|
||||
|
||||
|
||||
define allocate_pages (
|
||||
$path,
|
||||
$page_count,
|
||||
) {
|
||||
exec { "Allocate ${page_count} ${path}":
|
||||
command => "echo $page_count > $path",
|
||||
onlyif => "test -f $path",
|
||||
}
|
||||
}
|
||||
|
||||
# Allocates HugeTLB memory according to the attributes specified in the
|
||||
# nr_hugepages_2M and nr_hugepages_1G
|
||||
class platform::compute::allocate
|
||||
inherits ::platform::compute::hugepage::params {
|
||||
|
||||
# determine the node file system
|
||||
if str2bool($::is_per_numa_supported) {
|
||||
$nodefs = '/sys/devices/system/node'
|
||||
} else {
|
||||
$nodefs = '/sys/kernel/mm'
|
||||
}
|
||||
|
||||
if $nr_hugepages_2M != undef {
|
||||
$nr_hugepages_2M_array = regsubst($nr_hugepages_2M, '[\(\)\"]', '', 'G').split(' ')
|
||||
$nr_hugepages_2M_array.each | String $val | {
|
||||
$per_node_2M = $val.split(':')
|
||||
if size($per_node_2M)== 3 {
|
||||
$node = $per_node_2M[0]
|
||||
$page_size = $per_node_2M[1]
|
||||
allocate_pages { "Start ${node} ${page_size}":
|
||||
path => "${nodefs}/${node}/hugepages/hugepages-${page_size}/nr_hugepages",
|
||||
page_count => $per_node_2M[2],
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if $nr_hugepages_1G != undef {
|
||||
$nr_hugepages_1G_array = regsubst($nr_hugepages_1G , '[\(\)\"]', '', 'G').split(' ')
|
||||
$nr_hugepages_1G_array.each | String $val | {
|
||||
$per_node_1G = $val.split(':')
|
||||
if size($per_node_1G)== 3 {
|
||||
$node = $per_node_1G[0]
|
||||
$page_size = $per_node_1G[1]
|
||||
allocate_pages { "Start ${node} ${page_size}":
|
||||
path => "${nodefs}/${node}/hugepages/hugepages-${page_size}/nr_hugepages",
|
||||
page_count => $per_node_1G[2],
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class platform::compute::extend
|
||||
inherits ::platform::compute::hugepage::params {
|
||||
|
||||
# nova-compute reads on init, extended nova compute options
|
||||
# used with nova accounting
|
||||
file { "/etc/nova/compute_extend.conf":
|
||||
ensure => 'present',
|
||||
replace => true,
|
||||
content => template('platform/compute_extend.conf.erb')
|
||||
}
|
||||
}
|
||||
|
||||
# Mount resctrl to allow Cache Allocation Technology per VM
|
||||
class platform::compute::resctrl {
|
||||
|
||||
if str2bool($::is_resctrl_supported) {
|
||||
mount { "/sys/fs/resctrl":
|
||||
name => '/sys/fs/resctrl',
|
||||
device => 'resctrl',
|
||||
fstype => 'resctrl',
|
||||
ensure => 'mounted',
|
||||
atboot => 'yes',
|
||||
remounts => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Set Power Management QoS resume latency constraints for CPUs.
|
||||
# The PM QoS resume latency limit is set to shallow C-state for vswitch CPUs.
|
||||
# All other CPUs are allowed to go to the deepest C-state available.
|
||||
class platform::compute::pmqos (
|
||||
$low_wakeup_cpus = '',
|
||||
$hight_wakeup_cpus = '',
|
||||
) {
|
||||
|
||||
if str2bool($::is_compute_subfunction) and str2bool($::is_lowlatency_subfunction) {
|
||||
|
||||
$script = "/usr/bin/set-cpu-wakeup-latency.sh"
|
||||
|
||||
# Set low wakeup latency (shallow C-state) for vswitch CPUs using PM QoS interface
|
||||
exec { "low-wakeup-latency":
|
||||
command => "${script} low ${low_wakeup_cpus}",
|
||||
onlyif => "test -f ${script}",
|
||||
logoutput => true,
|
||||
}
|
||||
|
||||
#Set high wakeup latency (deep C-state) for non-vswitch CPUs using PM QoS interface
|
||||
exec { "high-wakeup-latency":
|
||||
command => "${script} high ${hight_wakeup_cpus}",
|
||||
onlyif => "test -f ${script}",
|
||||
logoutput => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class platform::compute {
|
||||
|
||||
Class[$name] -> Class['::platform::vswitch']
|
||||
Class[$name] -> Class['::nova::compute']
|
||||
|
||||
require ::platform::compute::grub::audit
|
||||
require ::platform::compute::hugetlbf
|
||||
require ::platform::compute::allocate
|
||||
require ::platform::compute::pmqos
|
||||
require ::platform::compute::resctrl
|
||||
require ::platform::compute::extend
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
###########################################################################
|
||||
#
|
||||
# compute_extend.conf contains compute extended nova options
|
||||
#
|
||||
# - This file is managed by Puppet. DO NOT EDIT.
|
||||
#
|
||||
###########################################################################
|
||||
compute_vswitch_2M_pages=<%= @vswitch_2M_pages.gsub!(/\A"|"\Z/, '') %>
|
||||
compute_vswitch_1G_pages=<%= @vswitch_1G_pages.gsub!(/\A"|"\Z/, '') %>
|
||||
compute_vm_4K_pages=<%= @vm_4K_pages.gsub!(/\A"|"\Z/, '') %>
|
||||
compute_vm_2M_pages=<%= @vm_2M_pages.gsub!(/\A"|"\Z/, '') %>
|
||||
compute_vm_1G_pages=<%= @vm_1G_pages.gsub!(/\A"|"\Z/, '') %>
|
@ -151,6 +151,7 @@ class AgentManager(service.PeriodicService):
|
||||
self._notify_subfunctions_alarm_raise = False
|
||||
self._tpmconfig_rpc_failure = False
|
||||
self._tpmconfig_host_first_apply = False
|
||||
self._first_grub_update = False
|
||||
|
||||
def start(self):
|
||||
super(AgentManager, self).start()
|
||||
@ -316,6 +317,16 @@ class AgentManager(service.PeriodicService):
|
||||
except subprocess.CalledProcessError as e:
|
||||
LOG.error("subprocess error: (%d)", e.returncode)
|
||||
|
||||
def _force_grub_update(self):
|
||||
""" Force update the grub on the first AIO controller after the initial
|
||||
config is completed
|
||||
"""
|
||||
if (not self._first_grub_update and
|
||||
os.path.isfile(tsc.INITIAL_CONFIG_COMPLETE_FLAG)):
|
||||
self._first_grub_update = True
|
||||
return True
|
||||
return False
|
||||
|
||||
def periodic_tasks(self, context, raise_on_error=False):
|
||||
""" Periodic tasks are run at pre-specified intervals. """
|
||||
|
||||
@ -712,11 +723,13 @@ class AgentManager(service.PeriodicService):
|
||||
LOG.exception("Sysinv Agent uncaught exception updating inuma.")
|
||||
pass
|
||||
|
||||
force_grub_update = self._force_grub_update()
|
||||
try:
|
||||
# may get duplicate key if already sent on earlier init
|
||||
rpcapi.icpus_update_by_ihost(icontext,
|
||||
ihost['uuid'],
|
||||
icpus)
|
||||
icpus,
|
||||
force_grub_update)
|
||||
except RemoteError as e:
|
||||
LOG.error("icpus_update_by_ihost RemoteError exc_type=%s" %
|
||||
e.exc_type)
|
||||
@ -731,19 +744,21 @@ class AgentManager(service.PeriodicService):
|
||||
pass
|
||||
|
||||
imemory = self._inode_operator.inodes_get_imemory()
|
||||
try:
|
||||
# may get duplicate key if already sent on earlier init
|
||||
rpcapi.imemory_update_by_ihost(icontext,
|
||||
ihost['uuid'],
|
||||
imemory)
|
||||
except RemoteError as e:
|
||||
LOG.error("imemory_update_by_ihost RemoteError exc_type=%s" %
|
||||
e.exc_type)
|
||||
# Allow the audit to update
|
||||
pass
|
||||
except:
|
||||
LOG.exception("Sysinv Agent exception updating imemory conductor.")
|
||||
pass
|
||||
if imemory:
|
||||
try:
|
||||
# may get duplicate key if already sent on earlier init
|
||||
rpcapi.imemory_update_by_ihost(icontext,
|
||||
ihost['uuid'],
|
||||
imemory)
|
||||
except RemoteError as e:
|
||||
LOG.error("imemory_update_by_ihost RemoteError exc_type=%s" %
|
||||
e.exc_type)
|
||||
# Allow the audit to update
|
||||
pass
|
||||
except:
|
||||
LOG.exception("Sysinv Agent exception updating imemory "
|
||||
"conductor.")
|
||||
pass
|
||||
|
||||
idisk = self._idisk_operator.idisk_get()
|
||||
try:
|
||||
@ -1283,7 +1298,9 @@ class AgentManager(service.PeriodicService):
|
||||
try:
|
||||
# runtime manifests can not be applied without the initial
|
||||
# configuration applied
|
||||
if not os.path.isfile(tsc.INITIAL_CONFIG_COMPLETE_FLAG):
|
||||
force = config_dict.get('force', False)
|
||||
if (not force and
|
||||
not os.path.isfile(tsc.INITIAL_CONFIG_COMPLETE_FLAG)):
|
||||
return
|
||||
|
||||
personalities = config_dict.get('personalities')
|
||||
|
@ -19,18 +19,13 @@ from os import listdir
|
||||
from os.path import isfile, join
|
||||
import random
|
||||
import re
|
||||
import shlex
|
||||
import shutil
|
||||
import signal
|
||||
import six
|
||||
import socket
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
|
||||
from sysinv.common import exception
|
||||
from sysinv.common import utils
|
||||
from sysinv.openstack.common import log as logging
|
||||
import tsconfig.tsconfig as tsc
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@ -97,6 +92,30 @@ class NodeOperator(object):
|
||||
# self._get_free_memory_MiB()
|
||||
# self._get_free_memory_nodes_MiB()
|
||||
|
||||
def _is_strict(self):
|
||||
with open(os.devnull, "w") as fnull:
|
||||
try:
|
||||
output = subprocess.check_output(
|
||||
["cat", "/proc/sys/vm/overcommit_memory"],
|
||||
stderr=fnull)
|
||||
if int(output) == 2:
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
LOG.info("Failed to check for overcommit, error (%s)",
|
||||
e.output)
|
||||
return False
|
||||
|
||||
def _is_hugepages_allocated(self):
|
||||
with open(os.devnull, "w") as fnull:
|
||||
try:
|
||||
output = subprocess.check_output(
|
||||
["cat", "/proc/sys/vm/nr_hugepages"], stderr=fnull)
|
||||
if int(output) > 0:
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
LOG.info("Failed to check hugepages, error (%s)", e.output)
|
||||
return False
|
||||
|
||||
def convert_range_string_to_list(self, s):
|
||||
olist = []
|
||||
s = s.strip()
|
||||
@ -267,7 +286,7 @@ class NodeOperator(object):
|
||||
return [name for name in listdir(dir)
|
||||
if os.path.isdir(join(dir, name))]
|
||||
|
||||
def _set_default_avs_hugesize(self, attr):
|
||||
def _set_default_avs_hugesize(self):
|
||||
'''
|
||||
Set the default memory size for avs hugepages when it must fallback to
|
||||
2MB pages because there are no 1GB pages. In a virtual environment we
|
||||
@ -281,18 +300,10 @@ class NodeOperator(object):
|
||||
else:
|
||||
avs_hugepages_nr = AVS_REAL_MEMORY_MB / hugepage_size
|
||||
|
||||
memtotal_mib = attr.get('memtotal_mib', 0)
|
||||
memavail_mib = attr.get('memavail_mib', 0)
|
||||
memtotal_mib = memtotal_mib - (hugepage_size * avs_hugepages_nr)
|
||||
memavail_mib = min(memtotal_mib, memavail_mib)
|
||||
|
||||
## Create a new set of dict attributes
|
||||
hp_attr = {'avs_hugepages_size_mib': hugepage_size,
|
||||
'avs_hugepages_nr': avs_hugepages_nr,
|
||||
'avs_hugepages_avail': 0,
|
||||
'vm_hugepages_use_1G': 'False',
|
||||
'memtotal_mib': memtotal_mib,
|
||||
'memavail_mib': memavail_mib}
|
||||
'avs_hugepages_avail': 0}
|
||||
return hp_attr
|
||||
|
||||
def _inode_get_memory_hugepages(self):
|
||||
@ -303,17 +314,34 @@ class NodeOperator(object):
|
||||
'''
|
||||
|
||||
imemory = []
|
||||
num_2M_for_1G = 512
|
||||
num_4K_for_2M = 512
|
||||
Ki = 1024
|
||||
SZ_2M_Ki = 2048
|
||||
SZ_1G_Ki = 1048576
|
||||
controller_min_MB = 6000
|
||||
compute_min_MB = 1600
|
||||
compute_min_non0_MB = 500
|
||||
|
||||
re_node_MemFreeInit = re.compile(r'^Node\s+\d+\s+\MemFreeInit:\s+(\d+)')
|
||||
initial_compute_config_completed = \
|
||||
os.path.exists(tsc.INITIAL_COMPUTE_CONFIG_COMPLETE)
|
||||
|
||||
# check if it is initial report before the huge pages are allocated
|
||||
initial_report = not initial_compute_config_completed
|
||||
|
||||
# do not send report if the initial compute config is completed and
|
||||
# the huge pages have not been allocated, i.e.during subsequent
|
||||
# reboot before the manifest allocates the huge pages
|
||||
if (initial_compute_config_completed and
|
||||
not self._is_hugepages_allocated()):
|
||||
return imemory
|
||||
|
||||
for node in range(self.num_nodes):
|
||||
attr = {}
|
||||
Total_MiB = 0
|
||||
Free_MiB = 0
|
||||
Total_HP_MiB = 0 # Total memory (MiB) currently configured in HPs
|
||||
Free_HP_MiB = 0
|
||||
|
||||
# Check AVS and Libvirt memory
|
||||
# Loop through configured hugepage sizes of this node and record
|
||||
# total number and number free
|
||||
hugepages = "/sys/devices/system/node/node%d/hugepages" % node
|
||||
|
||||
try:
|
||||
@ -325,15 +353,14 @@ class NodeOperator(object):
|
||||
# role via size; also from /etc/nova/compute_reserved.conf
|
||||
if sizesplit[1].startswith("1048576kB"):
|
||||
hugepages_role = "avs"
|
||||
size = int(1048576 / 1024)
|
||||
size = int(SZ_1G_Ki / Ki)
|
||||
else:
|
||||
hugepages_role = "vm"
|
||||
size = int(2048 / 1024)
|
||||
size = int(SZ_2M_Ki / Ki)
|
||||
|
||||
nr_hugepages = 0
|
||||
free_hugepages = 0
|
||||
|
||||
# files = os.walk(subdir).next()[2]
|
||||
mydir = hugepages + '/' + subdir
|
||||
files = [f for f in listdir(mydir) if isfile(join(mydir, f))]
|
||||
|
||||
@ -345,11 +372,11 @@ class NodeOperator(object):
|
||||
if file.startswith("free_hugepages"):
|
||||
free_hugepages = int(f.readline())
|
||||
|
||||
Total_HP_MiB = Total_HP_MiB + int(nr_hugepages * size)
|
||||
Free_HP_MiB = Free_HP_MiB + int(free_hugepages * size)
|
||||
|
||||
# Libvirt hugepages can now be 1G and 2M, can't only look
|
||||
# at 2M pages
|
||||
Total_MiB = Total_MiB + int(nr_hugepages * size)
|
||||
Free_MiB = Free_MiB + int(free_hugepages * size)
|
||||
|
||||
if hugepages_role == "avs":
|
||||
avs_hugepages_nr = AVS_REAL_MEMORY_MB / size
|
||||
hp_attr = {
|
||||
@ -359,18 +386,19 @@ class NodeOperator(object):
|
||||
'vm_hugepages_nr_1G':
|
||||
(nr_hugepages - avs_hugepages_nr),
|
||||
'vm_hugepages_avail_1G': free_hugepages,
|
||||
'vm_hugepages_use_1G': 'True'
|
||||
}
|
||||
else:
|
||||
if len(subdirs) == 1:
|
||||
hp_attr = {
|
||||
'vm_hugepages_nr_2M': (nr_hugepages - 256),
|
||||
'vm_hugepages_avail_2M': free_hugepages,
|
||||
}
|
||||
else:
|
||||
hp_attr = {
|
||||
'vm_hugepages_nr_2M': nr_hugepages,
|
||||
'vm_hugepages_avail_2M': free_hugepages,
|
||||
}
|
||||
hp_attr = self._set_default_avs_hugesize()
|
||||
hp_attr.update({'vm_hugepages_use_1G': 'False'})
|
||||
|
||||
avs_hugepages_nr = hp_attr.get('avs_hugepages_nr', 0)
|
||||
hp_attr.update({
|
||||
'vm_hugepages_avail_2M': free_hugepages,
|
||||
'vm_hugepages_nr_2M':
|
||||
(nr_hugepages - avs_hugepages_nr)
|
||||
})
|
||||
|
||||
attr.update(hp_attr)
|
||||
|
||||
@ -378,115 +406,134 @@ class NodeOperator(object):
|
||||
# silently ignore IO errors (eg. file missing)
|
||||
pass
|
||||
|
||||
# Read the total possible number of libvirt (2M and 1G) hugepages,
|
||||
# and total available memory determined by compute-huge.
|
||||
hp_pages_2M = []
|
||||
hp_pages_1G = []
|
||||
tot_memory = []
|
||||
huge_total_attrs = {}
|
||||
hp_total_info = "/etc/nova/compute_hugepages_total.conf"
|
||||
try:
|
||||
with open(hp_total_info, 'r') as infile:
|
||||
for line in infile:
|
||||
possible_memorys = line.split("=")
|
||||
if possible_memorys[0] == 'compute_hp_total_2M':
|
||||
hp_pages_2M = map(int, possible_memorys[1].split(','))
|
||||
continue
|
||||
# Get the free and total memory from meminfo for this node
|
||||
re_node_MemTotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
|
||||
re_node_MemFree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
|
||||
re_node_FilePages = \
|
||||
re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
|
||||
re_node_SReclaim = \
|
||||
re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
|
||||
re_node_CommitLimit = \
|
||||
re.compile(r'^Node\s+\d+\s+\CommitLimit:\s+(\d+)')
|
||||
re_node_Committed_AS = \
|
||||
re.compile(r'^Node\s+\d+\s+\'Committed_AS:\s+(\d+)')
|
||||
|
||||
elif possible_memorys[0] == 'compute_hp_total_1G':
|
||||
hp_pages_1G = map(int, possible_memorys[1].split(','))
|
||||
continue
|
||||
Free_KiB = 0 # Free Memory (KiB) available
|
||||
Total_KiB = 0 # Total Memory (KiB)
|
||||
limit = 0 # only used in strict accounting
|
||||
committed = 0 # only used in strict accounting
|
||||
|
||||
elif possible_memorys[0] == 'compute_total_MiB':
|
||||
tot_memory = map(int, possible_memorys[1].split(','))
|
||||
continue
|
||||
|
||||
except IOError:
|
||||
# silently ignore IO errors (eg. file missing)
|
||||
pass
|
||||
|
||||
huge_total_attrs = {
|
||||
'vm_hugepages_possible_2M': hp_pages_2M[node],
|
||||
'vm_hugepages_possible_1G': hp_pages_1G[node],
|
||||
}
|
||||
|
||||
# The remaining VM pages are allocated to 4K pages
|
||||
vm_hugepages_2M = attr.get('vm_hugepages_nr_2M')
|
||||
vm_hugepages_1G = attr.get('vm_hugepages_nr_1G')
|
||||
|
||||
vm_hugepages_4K = (hp_pages_2M[node] - vm_hugepages_2M)
|
||||
if vm_hugepages_1G:
|
||||
vm_hugepages_4K -= (vm_hugepages_1G * num_2M_for_1G)
|
||||
|
||||
vm_hugepages_4K = vm_hugepages_4K * num_4K_for_2M
|
||||
|
||||
# Clip 4K pages, just like compute-huge.
|
||||
min_4K = 32 * 1024 / 4
|
||||
if vm_hugepages_4K < min_4K:
|
||||
vm_hugepages_4K = 0
|
||||
|
||||
hp_attrs_4K = {
|
||||
'vm_hugepages_nr_4K': vm_hugepages_4K,
|
||||
}
|
||||
|
||||
attr.update(huge_total_attrs)
|
||||
attr.update(hp_attrs_4K)
|
||||
|
||||
# Include 4K pages in the displayed VM memtotal.
|
||||
# Since there is no way to track used VM 4K pages, we treat them
|
||||
# as available, but that is bogus.
|
||||
vm_4K_MiB = vm_hugepages_4K * 4 / 1024
|
||||
Total_MiB += vm_4K_MiB
|
||||
Free_MiB += vm_4K_MiB
|
||||
self.total_memory_nodes_MiB.append(Total_MiB)
|
||||
attroverview = {
|
||||
'numa_node': node,
|
||||
'memtotal_mib': Total_MiB,
|
||||
'memavail_mib': Free_MiB,
|
||||
'hugepages_configured': 'True',
|
||||
}
|
||||
|
||||
attr.update(attroverview)
|
||||
|
||||
new_attrs = {}
|
||||
if 'avs_hugepages_size_mib' not in attr:
|
||||
## No 1GB pages were found so borrow from the VM 2MB pool
|
||||
##
|
||||
## FIXME:
|
||||
## It is unfortunate that memory is categorized as VM or
|
||||
## AVS here on the compute node. It would have been more
|
||||
## flexible if memory parameters were collected and sent
|
||||
## up to the controller without making any decisions about
|
||||
## what the memory was going to be used for. That type of
|
||||
## decision is better left to the controller (or better
|
||||
## yet, to the user)
|
||||
new_attrs = self._set_default_avs_hugesize(attr)
|
||||
else:
|
||||
new_attrs = {'vm_hugepages_use_1G': 'True'}
|
||||
|
||||
attr.update(new_attrs)
|
||||
|
||||
# Get the total memory of the numa node
|
||||
memTotal_mib = 0
|
||||
meminfo = "/sys/devices/system/node/node%d/meminfo_extra" % node
|
||||
meminfo = "/sys/devices/system/node/node%d/meminfo" % node
|
||||
try:
|
||||
with open(meminfo, 'r') as infile:
|
||||
for line in infile:
|
||||
match = re_node_MemFreeInit.search(line)
|
||||
match = re_node_MemTotal.search(line)
|
||||
if match:
|
||||
memTotal_mib = int(match.group(1))
|
||||
Total_KiB += int(match.group(1))
|
||||
continue
|
||||
match = re_node_MemFree.search(line)
|
||||
if match:
|
||||
Free_KiB += int(match.group(1))
|
||||
continue
|
||||
match = re_node_FilePages.search(line)
|
||||
if match:
|
||||
Free_KiB += int(match.group(1))
|
||||
continue
|
||||
match = re_node_SReclaim.search(line)
|
||||
if match:
|
||||
Free_KiB += int(match.group(1))
|
||||
continue
|
||||
match = re_node_CommitLimit.search(line)
|
||||
if match:
|
||||
limit = int(match.group(1))
|
||||
continue
|
||||
match = re_node_Committed_AS.search(line)
|
||||
if match:
|
||||
committed = int(match.group(1))
|
||||
continue
|
||||
|
||||
if self._is_strict():
|
||||
Free_KiB = limit - committed
|
||||
|
||||
except IOError:
|
||||
# silently ignore IO errors (eg. file missing)
|
||||
pass
|
||||
|
||||
memTotal_mib /= 1024
|
||||
if tot_memory[node]:
|
||||
memTotal_mib = tot_memory[node]
|
||||
node_attr = {
|
||||
'node_memtotal_mib': memTotal_mib,
|
||||
}
|
||||
attr.update(node_attr)
|
||||
# Calculate PSS
|
||||
Pss_MiB = 0
|
||||
if node == 0:
|
||||
cmd = 'cat /proc/*/smaps 2>/dev/null | awk \'/^Pss:/ ' \
|
||||
'{a += $2;} END {printf "%d\\n", a/1024.0;}\''
|
||||
try:
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
||||
shell=True)
|
||||
result = proc.stdout.read().strip()
|
||||
Pss_MiB = int(result)
|
||||
except subprocess.CalledProcessError as e:
|
||||
LOG.error("Cannot calculate PSS (%s) (%d)", cmd,
|
||||
e.returncode)
|
||||
except OSError as e:
|
||||
LOG.error("Failed to execute (%s) OS error (%d)", cmd,
|
||||
e.errno)
|
||||
|
||||
# need to multiply Total_MiB by 1024 to match compute_huge
|
||||
node_total_kib = Total_HP_MiB * Ki + Free_KiB + Pss_MiB * Ki
|
||||
|
||||
# Read base memory from compute_reserved.conf
|
||||
base_mem_MiB = 0
|
||||
with open('/etc/nova/compute_reserved.conf', 'r') as infile:
|
||||
for line in infile:
|
||||
if "COMPUTE_BASE_RESERVED" in line:
|
||||
val = line.split("=")
|
||||
base_reserves = val[1].strip('\n')[1:-1]
|
||||
for reserve in base_reserves.split():
|
||||
reserve = reserve.split(":")
|
||||
if reserve[0].strip('"') == "node%d" % node:
|
||||
base_mem_MiB = int(reserve[1].strip('MB'))
|
||||
|
||||
# On small systems, clip memory overhead to more reasonable minimal
|
||||
# settings
|
||||
if (Total_KiB / Ki - base_mem_MiB) < 1000:
|
||||
if node == 0:
|
||||
base_mem_MiB = compute_min_MB
|
||||
if tsc.nodetype == 'controller':
|
||||
base_mem_MiB += controller_min_MB
|
||||
else:
|
||||
base_mem_MiB = compute_min_non0_MB
|
||||
|
||||
Eng_KiB = node_total_kib - base_mem_MiB * Ki
|
||||
|
||||
vswitch_mem_kib = (attr.get('avs_hugepages_size_mib', 0) *
|
||||
attr.get('avs_hugepages_nr', 0) * Ki)
|
||||
|
||||
VM_KiB = (Eng_KiB - vswitch_mem_kib)
|
||||
|
||||
max_vm_pages_2M = VM_KiB / SZ_2M_Ki
|
||||
max_vm_pages_1G = VM_KiB / SZ_1G_Ki
|
||||
|
||||
attr.update({
|
||||
'vm_hugepages_possible_2M': max_vm_pages_2M,
|
||||
'vm_hugepages_possible_1G': max_vm_pages_1G,
|
||||
})
|
||||
|
||||
# calculate 100% 2M pages if it is initial report and the huge
|
||||
# pages have not been allocated
|
||||
if initial_report:
|
||||
Total_HP_MiB += int(max_vm_pages_2M * (SZ_2M_Ki / Ki))
|
||||
Free_HP_MiB = Total_HP_MiB
|
||||
attr.update({
|
||||
'vm_hugepages_nr_2M': max_vm_pages_2M,
|
||||
'vm_hugepages_avail_2M': max_vm_pages_2M,
|
||||
'vm_hugepages_nr_1G': 0
|
||||
})
|
||||
|
||||
attr.update({
|
||||
'numa_node': node,
|
||||
'memtotal_mib': Total_HP_MiB,
|
||||
'memavail_mib': Free_HP_MiB,
|
||||
'hugepages_configured': 'True',
|
||||
'node_memtotal_mib': node_total_kib / 1024,
|
||||
})
|
||||
|
||||
imemory.append(attr)
|
||||
|
||||
@ -502,7 +549,6 @@ class NodeOperator(object):
|
||||
self.total_memory_MiB = 0
|
||||
|
||||
re_node_MemTotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
|
||||
re_node_MemFreeInit = re.compile(r'^Node\s+\d+\s+\MemFreeInit:\s+(\d+)')
|
||||
re_node_MemFree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
|
||||
re_node_FilePages = re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
|
||||
re_node_SReclaim = re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
|
||||
@ -538,19 +584,6 @@ class NodeOperator(object):
|
||||
# silently ignore IO errors (eg. file missing)
|
||||
pass
|
||||
|
||||
# WRS kernel customization to exclude kernel overheads
|
||||
meminfo = "/sys/devices/system/node/node%d/meminfo_extra" % node
|
||||
try:
|
||||
with open(meminfo, 'r') as infile:
|
||||
for line in infile:
|
||||
match = re_node_MemFreeInit.search(line)
|
||||
if match:
|
||||
Total_MiB = int(match.group(1))
|
||||
continue
|
||||
except IOError:
|
||||
# silently ignore IO errors (eg. file missing)
|
||||
pass
|
||||
|
||||
Total_MiB /= 1024
|
||||
Free_MiB /= 1024
|
||||
self.total_memory_nodes_MiB.append(Total_MiB)
|
||||
|
@ -436,8 +436,7 @@ class CPUController(rest.RestController):
|
||||
|
||||
rpc_port.save()
|
||||
|
||||
if (utils.get_system_mode() == constants.SYSTEM_MODE_SIMPLEX and
|
||||
action == constants.APPLY_ACTION):
|
||||
if action == constants.APPLY_ACTION:
|
||||
# perform rpc to conductor to perform config apply
|
||||
pecan.request.rpcapi.update_cpu_config(
|
||||
pecan.request.context)
|
||||
|
@ -305,9 +305,9 @@ class HostStatesController(rest.RestController):
|
||||
(cpu.uuid, values))
|
||||
pecan.request.dbapi.icpu_update(cpu.uuid, values)
|
||||
|
||||
# perform inservice apply if this is a controller in simplex state
|
||||
if utils.is_host_simplex_controller(ihost):
|
||||
pecan.request.rpcapi.update_cpu_config(pecan.request.context)
|
||||
# perform inservice apply
|
||||
pecan.request.rpcapi.update_cpu_config(pecan.request.context,
|
||||
host_uuid)
|
||||
|
||||
return self._get_host_cpus_collection(ihost.uuid)
|
||||
|
||||
@ -3478,6 +3478,46 @@ class HostController(rest.RestController):
|
||||
(ihost['hostname'], values))
|
||||
pecan.request.dbapi.imemory_update(m.uuid, values)
|
||||
|
||||
@staticmethod
|
||||
def _update_vm_4k_pages(ihost):
|
||||
"""
|
||||
Update VM 4K huge pages.
|
||||
"""
|
||||
ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid'])
|
||||
|
||||
for node in ihost_inodes:
|
||||
mems = pecan.request.dbapi.imemory_get_by_inode(node['id'])
|
||||
for m in mems:
|
||||
if m.hugepages_configured:
|
||||
vm_hugepages_nr_2M = m.vm_hugepages_nr_2M_pending \
|
||||
if m.vm_hugepages_nr_2M_pending is not None \
|
||||
else m.vm_hugepages_nr_2M
|
||||
vm_hugepages_nr_1G = m.vm_hugepages_nr_1G_pending \
|
||||
if m.vm_hugepages_nr_1G_pending is not None \
|
||||
else m.vm_hugepages_nr_1G
|
||||
|
||||
vm_hugepages_4K = \
|
||||
(m.node_memtotal_mib - m.platform_reserved_mib)
|
||||
vm_hugepages_4K -= \
|
||||
(m.avs_hugepages_nr * m.avs_hugepages_size_mib)
|
||||
vm_hugepages_4K -= \
|
||||
(constants.MIB_2M * vm_hugepages_nr_2M)
|
||||
vm_hugepages_4K -= \
|
||||
(constants.MIB_1G * vm_hugepages_nr_1G)
|
||||
vm_hugepages_4K = \
|
||||
(constants.NUM_4K_PER_MiB * vm_hugepages_4K)
|
||||
|
||||
# Clip 4K pages
|
||||
min_4K = 32 * constants.Ki / 4
|
||||
if vm_hugepages_4K < min_4K:
|
||||
vm_hugepages_4K = 0
|
||||
|
||||
value = {'vm_hugepages_nr_4K': vm_hugepages_4K}
|
||||
LOG.info("Set VM 4K pages for host (%s) node (%d) pages "
|
||||
"(%d)" % (ihost['hostname'], node['id'],
|
||||
vm_hugepages_4K))
|
||||
pecan.request.dbapi.imemory_update(m.uuid, value)
|
||||
|
||||
@staticmethod
|
||||
def _semantic_mtc_check_action(hostupdate, action):
|
||||
"""
|
||||
@ -4739,6 +4779,9 @@ class HostController(rest.RestController):
|
||||
if align_2M_memory or align_1G_memory:
|
||||
self._align_pending_memory(ihost, align_2M_memory, align_1G_memory)
|
||||
|
||||
# calculate the VM 4K huge pages for nova
|
||||
self._update_vm_4k_pages(ihost)
|
||||
|
||||
if cutils.is_virtual() or cutils.is_virtual_compute(ihost):
|
||||
mib_platform_reserved_no_io = mib_reserved
|
||||
required_platform = \
|
||||
|
@ -206,6 +206,8 @@ REGION_SECONDARY = "External"
|
||||
# Hugepage sizes in MiB
|
||||
MIB_2M = 2
|
||||
MIB_1G = 1024
|
||||
Ki = 1024
|
||||
NUM_4K_PER_MiB = 256
|
||||
|
||||
# Dynamic IO Resident Set Size(RSS) in MiB per socket
|
||||
DISK_IO_RESIDENT_SET_SIZE_MIB = 2000
|
||||
|
@ -2553,7 +2553,8 @@ class ConductorManager(service.PeriodicService):
|
||||
LOG.info('%9s : %s' % ('thread_id', t))
|
||||
|
||||
def icpus_update_by_ihost(self, context,
|
||||
ihost_uuid, icpu_dict_array):
|
||||
ihost_uuid, icpu_dict_array,
|
||||
force_grub_update=False):
|
||||
"""Create cpus for an ihost with the supplied data.
|
||||
|
||||
This method allows records for cpus for ihost to be created.
|
||||
@ -2561,6 +2562,7 @@ class ConductorManager(service.PeriodicService):
|
||||
:param context: an admin context
|
||||
:param ihost_uuid: ihost uuid unique id
|
||||
:param icpu_dict_array: initial values for cpu objects
|
||||
:param force_grub_update: bool value to force grub update
|
||||
:returns: pass or fail
|
||||
"""
|
||||
|
||||
@ -2626,6 +2628,9 @@ class ConductorManager(service.PeriodicService):
|
||||
subfunctions=ihost.get('subfunctions'),
|
||||
reference='current (unchanged)',
|
||||
sockets=cs, cores=cc, threads=ct)
|
||||
if ihost.administrative == constants.ADMIN_LOCKED and \
|
||||
force_grub_update:
|
||||
self.update_cpu_config(context, ihost_uuid)
|
||||
return
|
||||
|
||||
self.print_cpu_topology(hostname=ihost.get('hostname'),
|
||||
@ -2679,9 +2684,15 @@ class ConductorManager(service.PeriodicService):
|
||||
# info may have already been posted
|
||||
pass
|
||||
|
||||
if (utils.is_host_simplex_controller(ihost) and
|
||||
ihost.administrative == constants.ADMIN_LOCKED):
|
||||
self.update_cpu_config(context)
|
||||
# if it is the first controller wait for the initial config to
|
||||
# be completed
|
||||
if ((utils.is_host_simplex_controller(ihost) and
|
||||
os.path.isfile(tsc.INITIAL_CONFIG_COMPLETE_FLAG)) or
|
||||
(not utils.is_host_simplex_controller(ihost) and
|
||||
ihost.administrative == constants.ADMIN_LOCKED)):
|
||||
LOG.info("Update CPU grub config, host_uuid (%s), name (%s)"
|
||||
% (ihost_uuid, ihost.get('hostname')))
|
||||
self.update_cpu_config(context, ihost_uuid)
|
||||
|
||||
return
|
||||
|
||||
@ -2753,6 +2764,13 @@ class ConductorManager(service.PeriodicService):
|
||||
mem = self.dbapi.imemory_create(forihostid, mem_dict)
|
||||
else:
|
||||
for imem in imems:
|
||||
# Include 4K pages in the displayed VM memtotal
|
||||
if imem.vm_hugepages_nr_4K is not None:
|
||||
vm_4K_mib = \
|
||||
(imem.vm_hugepages_nr_4K /
|
||||
constants.NUM_4K_PER_MiB)
|
||||
mem_dict['memtotal_mib'] += vm_4K_mib
|
||||
mem_dict['memavail_mib'] += vm_4K_mib
|
||||
pmem = self.dbapi.imemory_update(imem['uuid'],
|
||||
mem_dict)
|
||||
except:
|
||||
@ -6689,19 +6707,28 @@ class ConductorManager(service.PeriodicService):
|
||||
# discard temporary file
|
||||
os.remove(hosts_file_temp)
|
||||
|
||||
def update_cpu_config(self, context):
|
||||
"""Update the cpu assignment configuration on an AIO system"""
|
||||
LOG.info("update_cpu_config")
|
||||
def update_cpu_config(self, context, host_uuid):
|
||||
"""Update the cpu assignment configuration on a host"""
|
||||
|
||||
try:
|
||||
hostname = socket.gethostname()
|
||||
host = self.dbapi.ihost_get(hostname)
|
||||
except Exception as e:
|
||||
LOG.warn("Failed to get local host object: %s", str(e))
|
||||
return
|
||||
command = ['/etc/init.d/compute-huge.sh', 'reload']
|
||||
rpcapi = agent_rpcapi.AgentAPI()
|
||||
rpcapi.execute_command(context, host_uuid=host.uuid, command=command)
|
||||
# only apply the manifest on the host that has compute sub function
|
||||
host = self.dbapi.ihost_get(host_uuid)
|
||||
if constants.COMPUTE in host.subfunctions:
|
||||
force = (not utils.is_host_simplex_controller(host))
|
||||
LOG.info("update_cpu_config, host uuid: (%s), force: (%s)",
|
||||
host_uuid, str(force))
|
||||
personalities = [constants.CONTROLLER, constants.COMPUTE]
|
||||
config_uuid = self._config_update_hosts(context,
|
||||
personalities,
|
||||
host_uuid=host_uuid)
|
||||
config_dict = {
|
||||
"personalities": personalities,
|
||||
"host_uuids": [host_uuid],
|
||||
"classes": ['platform::compute::grub::runtime']
|
||||
}
|
||||
self._config_apply_runtime_manifest(context, config_uuid,
|
||||
config_dict,
|
||||
force=force,
|
||||
host_uuid=host_uuid)
|
||||
|
||||
def _update_resolv_file(self, context, config_uuid, personalities):
|
||||
"""Generate and update the resolv.conf files on the system"""
|
||||
@ -7403,7 +7430,8 @@ class ConductorManager(service.PeriodicService):
|
||||
context,
|
||||
config_uuid,
|
||||
config_dict,
|
||||
host_uuid=None):
|
||||
host_uuid=None,
|
||||
force=False):
|
||||
|
||||
"""Apply manifests on all hosts affected by the supplied personalities.
|
||||
If host_uuid is set, only update hiera data for that host
|
||||
@ -7413,8 +7441,10 @@ class ConductorManager(service.PeriodicService):
|
||||
# is not set. If host_uuid is set only update hiera data for that host
|
||||
self._config_update_puppet(config_uuid,
|
||||
config_dict,
|
||||
host_uuid=host_uuid)
|
||||
host_uuid=host_uuid,
|
||||
force=force)
|
||||
|
||||
config_dict.update({'force': force})
|
||||
rpcapi = agent_rpcapi.AgentAPI()
|
||||
rpcapi.config_apply_runtime_manifest(context,
|
||||
config_uuid=config_uuid,
|
||||
|
@ -282,7 +282,9 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
|
||||
inuma_dict_array=inuma_dict_array))
|
||||
|
||||
def icpus_update_by_ihost(self, context,
|
||||
ihost_uuid, icpu_dict_array):
|
||||
ihost_uuid, icpu_dict_array,
|
||||
force_grub_update,
|
||||
):
|
||||
"""Create cpus for an ihost with the supplied data.
|
||||
|
||||
This method allows records for cpus for ihost to be created.
|
||||
@ -290,13 +292,15 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
|
||||
:param context: an admin context
|
||||
:param ihost_uuid: ihost uuid unique id
|
||||
:param icpu_dict_array: initial values for cpu objects
|
||||
:param force_grub_update: bool value to force grub update
|
||||
:returns: pass or fail
|
||||
"""
|
||||
|
||||
return self.call(context,
|
||||
self.make_msg('icpus_update_by_ihost',
|
||||
ihost_uuid=ihost_uuid,
|
||||
icpu_dict_array=icpu_dict_array))
|
||||
icpu_dict_array=icpu_dict_array,
|
||||
force_grub_update=force_grub_update))
|
||||
|
||||
def imemory_update_by_ihost(self, context,
|
||||
ihost_uuid, imemory_dict_array):
|
||||
@ -834,13 +838,15 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
|
||||
status=status,
|
||||
error=error))
|
||||
|
||||
def update_cpu_config(self, context):
|
||||
def update_cpu_config(self, context, host_uuid):
|
||||
"""Synchronously, have the conductor update the cpu
|
||||
configuration.
|
||||
|
||||
:param context: request context.
|
||||
:param host_uuid: host unique uuid
|
||||
"""
|
||||
return self.call(context, self.make_msg('update_cpu_config'))
|
||||
return self.call(context, self.make_msg('update_cpu_config',
|
||||
host_uuid=host_uuid))
|
||||
|
||||
def iconfig_update_by_ihost(self, context,
|
||||
ihost_uuid, imsg_dict):
|
||||
|
@ -4,6 +4,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import collections
|
||||
import abc
|
||||
import itertools
|
||||
import netaddr
|
||||
@ -213,3 +214,11 @@ class BasePuppet(object):
|
||||
s = "%s-%s" % (rng[0][1], rng[-1][1])
|
||||
ranges.append(s)
|
||||
return ','.join(ranges)
|
||||
|
||||
def _get_numa_index_list(self, obj):
|
||||
"""Create map of objects indexed by numa node"""
|
||||
obj_lists = collections.defaultdict(list)
|
||||
for index, o in enumerate(obj):
|
||||
o["_index"] = index
|
||||
obj_lists[o.numa_node].append(o)
|
||||
return obj_lists
|
||||
|
@ -4,18 +4,18 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import copy
|
||||
import itertools
|
||||
import os
|
||||
|
||||
from sysinv.common import constants
|
||||
from sysinv.common import exception
|
||||
from sysinv.openstack.common import log as logging
|
||||
from sysinv.common import utils
|
||||
|
||||
from tsconfig import tsconfig
|
||||
|
||||
from . import base
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
HOSTNAME_INFRA_SUFFIX = '-infra'
|
||||
|
||||
NOVA_UPGRADE_LEVEL_NEWTON = 'newton'
|
||||
@ -65,6 +65,8 @@ class PlatformPuppet(base.BasePuppet):
|
||||
config.update(self._get_host_sysctl_config(host))
|
||||
config.update(self._get_host_drbd_config(host))
|
||||
config.update(self._get_host_upgrade_config(host))
|
||||
config.update(self._get_host_cpu_config(host))
|
||||
config.update(self._get_host_hugepage_config(host))
|
||||
return config
|
||||
|
||||
def _get_static_software_config(self):
|
||||
@ -480,6 +482,159 @@ class PlatformPuppet(base.BasePuppet):
|
||||
})
|
||||
return config
|
||||
|
||||
def _get_host_cpu_config(self, host):
|
||||
config = {}
|
||||
if constants.COMPUTE in utils.get_personalities(host):
|
||||
host_cpus = self._get_host_cpu_list(host, threads=True)
|
||||
if not host_cpus:
|
||||
return config
|
||||
|
||||
host_cpus = sorted(host_cpus, key=lambda c: c.cpu)
|
||||
n_cpus = len(host_cpus)
|
||||
host_cpu_list = [c.cpu for c in host_cpus]
|
||||
|
||||
platform_cpus = self._get_host_cpu_list(
|
||||
host, function=constants.PLATFORM_FUNCTION, threads=True)
|
||||
platform_cpus = sorted(platform_cpus, key=lambda c: c.cpu)
|
||||
platform_cpu_list = \
|
||||
"%s" % ','.join([str(c.cpu) for c in platform_cpus])
|
||||
|
||||
vswitch_cpus = self._get_host_cpu_list(
|
||||
host, constants.VSWITCH_FUNCTION, threads=True)
|
||||
vswitch_cpus = sorted(vswitch_cpus, key=lambda c: c.cpu)
|
||||
vswitch_cpu_list = \
|
||||
"%s" % ','.join([str(c.cpu) for c in vswitch_cpus])
|
||||
|
||||
# rcu_nocbs = all cores - platform cores
|
||||
rcu_nocbs = copy.deepcopy(host_cpu_list)
|
||||
for i in [int(s) for s in platform_cpu_list.split(',')]:
|
||||
rcu_nocbs.remove(i)
|
||||
|
||||
# change the CPU list to ranges
|
||||
rcu_nocbs_ranges = ""
|
||||
for key, group in itertools.groupby(enumerate(rcu_nocbs),
|
||||
lambda (x, y): y - x):
|
||||
group = list(group)
|
||||
rcu_nocbs_ranges += "%s-%s," % (group[0][1], group[-1][1])
|
||||
rcu_nocbs_ranges = rcu_nocbs_ranges.rstrip(',')
|
||||
|
||||
# non-vswitch CPUs = all cores - vswitch cores
|
||||
non_vswitch_cpus = host_cpu_list
|
||||
for i in [int(s) for s in vswitch_cpu_list.split(',')]:
|
||||
non_vswitch_cpus.remove(i)
|
||||
|
||||
# change the CPU list to ranges
|
||||
non_vswitch_cpus_ranges = ""
|
||||
for key, group in itertools.groupby(enumerate(non_vswitch_cpus),
|
||||
lambda (x, y): y - x):
|
||||
group = list(group)
|
||||
non_vswitch_cpus_ranges += "\"%s-%s\"," % (group[0][1], group[-1][1])
|
||||
|
||||
cpu_options = ""
|
||||
if constants.LOWLATENCY in host.subfunctions:
|
||||
vswitch_cpu_list_with_quotes = \
|
||||
"\"%s\"" % ','.join([str(c.cpu) for c in vswitch_cpus])
|
||||
config.update({
|
||||
'platform::compute::pmqos::low_wakeup_cpus':
|
||||
vswitch_cpu_list_with_quotes,
|
||||
'platform::compute::pmqos::hight_wakeup_cpus':
|
||||
non_vswitch_cpus_ranges.rstrip(',')})
|
||||
vswitch_cpu_list = rcu_nocbs_ranges
|
||||
cpu_options += "nohz_full=%s " % vswitch_cpu_list
|
||||
|
||||
cpu_options += "isolcpus=%s rcu_nocbs=%s kthread_cpus=%s " \
|
||||
"irqaffinity=%s" % (vswitch_cpu_list,
|
||||
rcu_nocbs_ranges,
|
||||
platform_cpu_list,
|
||||
platform_cpu_list)
|
||||
config.update({
|
||||
'platform::compute::grub::params::n_cpus': n_cpus,
|
||||
'platform::compute::grub::params::cpu_options': cpu_options,
|
||||
})
|
||||
return config
|
||||
|
||||
def _get_host_hugepage_config(self, host):
|
||||
config = {}
|
||||
if constants.COMPUTE in utils.get_personalities(host):
|
||||
host_memory = self.dbapi.imemory_get_by_ihost(host.id)
|
||||
|
||||
memory_numa_list = self._get_numa_index_list(host_memory)
|
||||
|
||||
hugepages_2Ms = []
|
||||
hugepages_1Gs = []
|
||||
vswitch_2M_pages = []
|
||||
vswitch_1G_pages = []
|
||||
vm_4K_pages = []
|
||||
vm_2M_pages = []
|
||||
vm_1G_pages = []
|
||||
|
||||
for node, memory_list in memory_numa_list.items():
|
||||
|
||||
memory = memory_list[0]
|
||||
vswitch_2M_page = 0
|
||||
vswitch_1G_page = 0
|
||||
|
||||
vm_hugepages_nr_2M = memory.vm_hugepages_nr_2M_pending \
|
||||
if memory.vm_hugepages_nr_2M_pending is not None \
|
||||
else memory.vm_hugepages_nr_2M
|
||||
vm_hugepages_nr_1G = memory.vm_hugepages_nr_1G_pending \
|
||||
if memory.vm_hugepages_nr_1G_pending is not None \
|
||||
else memory.vm_hugepages_nr_1G
|
||||
vm_hugepages_nr_4K = memory.vm_hugepages_nr_4K \
|
||||
if memory.vm_hugepages_nr_4K is not None else 0
|
||||
|
||||
total_hugepages_2M = vm_hugepages_nr_2M
|
||||
total_hugepages_1G = vm_hugepages_nr_1G
|
||||
|
||||
if memory.avs_hugepages_size_mib == constants.MIB_2M:
|
||||
total_hugepages_2M += memory.avs_hugepages_nr
|
||||
vswitch_2M_page += memory.avs_hugepages_nr
|
||||
elif memory.avs_hugepages_size_mib == constants.MIB_1G:
|
||||
total_hugepages_1G += memory.avs_hugepages_nr
|
||||
vswitch_1G_page += memory.avs_hugepages_nr
|
||||
|
||||
vswitch_2M_pages.append(vswitch_2M_page)
|
||||
vswitch_1G_pages.append(vswitch_1G_page)
|
||||
|
||||
hugepages_2M = "\"node%d:%dkB:%d\"" % (
|
||||
node, constants.MIB_2M * 1024, total_hugepages_2M)
|
||||
hugepages_1G = "\"node%d:%dkB:%d\"" % (
|
||||
node, constants.MIB_1G * 1024, total_hugepages_1G)
|
||||
hugepages_2Ms.append(hugepages_2M)
|
||||
hugepages_1Gs.append(hugepages_1G)
|
||||
|
||||
vm_4K_pages.append(vm_hugepages_nr_4K)
|
||||
vm_2M_pages.append(vm_hugepages_nr_2M)
|
||||
vm_1G_pages.append(vm_hugepages_nr_1G)
|
||||
|
||||
nr_hugepages_2Ms = "(%s)" % ' '.join(hugepages_2Ms)
|
||||
nr_hugepages_1Gs = "(%s)" % ' '.join(hugepages_1Gs)
|
||||
|
||||
vswitch_2M = "\"%s\"" % ','.join([str(i) for i in vswitch_2M_pages])
|
||||
vswitch_1G = "\"%s\"" % ','.join([str(i) for i in vswitch_1G_pages])
|
||||
vm_4K = "\"%s\"" % ','.join([str(i) for i in vm_4K_pages])
|
||||
vm_2M = "\"%s\"" % ','.join([str(i) for i in vm_2M_pages])
|
||||
vm_1G = "\"%s\"" % ','.join([str(i) for i in vm_1G_pages])
|
||||
|
||||
config.update({
|
||||
'platform::compute::hugepage::params::nr_hugepages_2M':
|
||||
nr_hugepages_2Ms,
|
||||
'platform::compute::hugepage::params::nr_hugepages_1G':
|
||||
nr_hugepages_1Gs,
|
||||
'platform::compute::hugepage::params::vswitch_2M_pages':
|
||||
vswitch_2M,
|
||||
'platform::compute::hugepage::params::vswitch_1G_pages':
|
||||
vswitch_1G,
|
||||
'platform::compute::hugepage::params::vm_4K_pages':
|
||||
vm_4K,
|
||||
'platform::compute::hugepage::params::vm_2M_pages':
|
||||
vm_2M,
|
||||
'platform::compute::hugepage::params::vm_1G_pages':
|
||||
vm_1G,
|
||||
})
|
||||
|
||||
return config
|
||||
|
||||
def _get_drbd_link_speed(self):
|
||||
# return infra link speed if provisioned, otherwise mgmt
|
||||
try:
|
||||
|
Loading…
Reference in New Issue
Block a user