enable k8s huge page feature
- record k8s labels for nodes in puppet - enable k8s huge page feature for worker w/o openstack compute label. and disable otherwise. - automatically defaults hugepages for worker nodes with openstack compute labels, changes will be applied on the unlock - do not allocate any huge pages by defaults for worker nodes without the openstack compute labels and vswitch_type is none. is assigned - when changing vswitch type,trigger update grub mem config Story: 2004763 Task: 28880 Change-Id: I7636eeb4773fa3fe32671a6bb2870c2e1074a5fa Signed-off-by: Sun Austin <austin.sun@intel.com>
This commit is contained in:
parent
560b2b6500
commit
9c5bf5771e
@ -52,7 +52,7 @@ class platform::compute::grub::params (
|
||||
$eptad = ''
|
||||
}
|
||||
|
||||
if $::is_gb_page_supported {
|
||||
if $::is_gb_page_supported and $::platform::params::vswitch_type != 'none' {
|
||||
if $g_hugepages != undef {
|
||||
$gb_hugepages = $g_hugepages
|
||||
} else {
|
||||
|
@ -91,8 +91,11 @@ class platform::kubernetes::cgroup
|
||||
}
|
||||
|
||||
class platform::kubernetes::kubeadm {
|
||||
include ::platform::docker::params
|
||||
|
||||
include ::platform::docker::params
|
||||
include ::platform::kubernetes::params
|
||||
|
||||
$host_labels = $::platform::kubernetes::params::host_labels
|
||||
$iptables_file = "net.bridge.bridge-nf-call-ip6tables = 1
|
||||
net.bridge.bridge-nf-call-iptables = 1"
|
||||
|
||||
@ -102,6 +105,21 @@ class platform::kubernetes::kubeadm {
|
||||
$k8s_registry = undef
|
||||
}
|
||||
|
||||
#only set k8s_hugepage true when subfunction is worker and openstack-compute-node is not in host_labels
|
||||
if str2bool($::is_worker_subfunction)
|
||||
and !('openstack-compute-node'
|
||||
in $host_labels) {
|
||||
$k8s_hugepage = true
|
||||
} else {
|
||||
$k8s_hugepage = false
|
||||
}
|
||||
|
||||
# enable extra parameters such as hugepage
|
||||
file { '/etc/sysconfig/kubelet':
|
||||
ensure => file,
|
||||
content => template('platform/kubelet.conf.erb'),
|
||||
}
|
||||
|
||||
# Update iptables config. This is required based on:
|
||||
# https://kubernetes.io/docs/tasks/tools/install-kubeadm
|
||||
# This probably belongs somewhere else - initscripts package?
|
||||
|
@ -0,0 +1,3 @@
|
||||
# Overrides config file for kubelet
|
||||
KUBELET_EXTRA_ARGS=--feature-gates=HugePages=<%= @k8s_hugepage %>
|
||||
|
@ -23,9 +23,6 @@ import tsconfig.tsconfig as tsc
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
# Defines per-socket vswitch memory requirements (in MB)
|
||||
VSWITCH_MEMORY_MB = 1024
|
||||
|
||||
# Defines the size of one kilobyte
|
||||
SIZE_KB = 1024
|
||||
|
||||
@ -386,15 +383,16 @@ class NodeOperator(object):
|
||||
|
||||
vs_hp_nr, vs_hp_size = self._get_vswitch_reserved_memory(
|
||||
node)
|
||||
if vs_hp_nr == 0 or vs_hp_size == 0:
|
||||
vs_hp_nr = VSWITCH_MEMORY_MB // size
|
||||
if vs_hp_size == 0:
|
||||
vs_hp_size = size
|
||||
|
||||
# Libvirt hugepages can be 1G and 2M
|
||||
if size == SIZE_1G_MB:
|
||||
hp_attr = {}
|
||||
if vs_hp_size == size:
|
||||
nr_hugepages -= vs_hp_nr
|
||||
# If the huge pages are not allocated
|
||||
if nr_hugepages != 0:
|
||||
nr_hugepages -= vs_hp_nr
|
||||
hp_attr.update({
|
||||
'vswitch_hugepages_size_mib': vs_hp_size,
|
||||
'vswitch_hugepages_nr': vs_hp_nr,
|
||||
@ -410,15 +408,19 @@ class NodeOperator(object):
|
||||
# No 1G hugepage support.
|
||||
hp_attr = {
|
||||
'vm_hugepages_use_1G': 'False',
|
||||
'vm_hugepages_nr_1G': 0,
|
||||
'vswitch_hugepages_size_mib': vs_hp_size,
|
||||
'vswitch_hugepages_nr': vs_hp_nr,
|
||||
'vswitch_hugepages_avail': 0
|
||||
}
|
||||
if nr_hugepages != 0:
|
||||
nr_hugepages -= vs_hp_nr
|
||||
else:
|
||||
hp_attr = {}
|
||||
if vs_hp_size == size and initial_report is False:
|
||||
# User manually set 2M pages
|
||||
nr_hugepages -= vs_hp_nr
|
||||
if nr_hugepages != 0:
|
||||
nr_hugepages -= vs_hp_nr
|
||||
hp_attr.update({
|
||||
'vswitch_hugepages_size_mib': vs_hp_size,
|
||||
'vswitch_hugepages_nr': vs_hp_nr,
|
||||
@ -546,18 +548,6 @@ class NodeOperator(object):
|
||||
'vm_hugepages_possible_1G': max_vm_pages_1gb,
|
||||
})
|
||||
|
||||
# calculate 90% 2M pages if it is initial report and the huge
|
||||
# pages have not been allocated
|
||||
if initial_report:
|
||||
max_vm_pages_2mb = max_vm_pages_2mb * 0.9
|
||||
total_hp_mb += int(max_vm_pages_2mb * (SIZE_2M_KB / SIZE_KB))
|
||||
free_hp_mb = total_hp_mb
|
||||
attr.update({
|
||||
'vm_hugepages_nr_2M': max_vm_pages_2mb,
|
||||
'vm_hugepages_avail_2M': max_vm_pages_2mb,
|
||||
'vm_hugepages_nr_1G': 0
|
||||
})
|
||||
|
||||
attr.update({
|
||||
'numa_node': node,
|
||||
'memtotal_mib': total_hp_mb,
|
||||
|
@ -89,6 +89,7 @@ from sysinv.common import ceph
|
||||
from sysinv.common import constants
|
||||
from sysinv.common import exception
|
||||
from sysinv.common import utils as cutils
|
||||
from sysinv.helm import common as helm_common
|
||||
from sysinv.openstack.common import log
|
||||
from sysinv.openstack.common import uuidutils
|
||||
from sysinv.openstack.common.gettextutils import _
|
||||
@ -3377,6 +3378,26 @@ class HostController(rest.RestController):
|
||||
"addresses while in SDN mode.")
|
||||
raise wsme.exc.ClientSideError(msg)
|
||||
|
||||
@staticmethod
|
||||
def _semantic_check_vswitch_type_attributes(ihost):
|
||||
"""
|
||||
Perform semantic checks host label openstack-compute-node if ovs or ovs-dpdk
|
||||
vswitch type is enabled since allocating 2M hugepage is needed
|
||||
validity of the node configuration prior to unlocking it.
|
||||
"""
|
||||
vswitch_type = utils.get_vswitch_type()
|
||||
if vswitch_type == constants.VSWITCH_TYPE_NONE:
|
||||
return
|
||||
|
||||
# Check whether compute_label has been assigned
|
||||
if utils.is_openstack_compute(ihost) is not True:
|
||||
raise wsme.exc.ClientSideError(
|
||||
_("Can not unlock worker host %s without "
|
||||
" %s label if config %s. Action: assign "
|
||||
"%s label for this host prior to unlock."
|
||||
% (ihost['hostname'], helm_common.LABEL_COMPUTE_LABEL,
|
||||
vswitch_type, helm_common.LABEL_COMPUTE_LABEL)))
|
||||
|
||||
@staticmethod
|
||||
def _semantic_check_data_vrs_attributes(ihost):
|
||||
"""
|
||||
@ -3637,16 +3658,27 @@ class HostController(rest.RestController):
|
||||
pecan.request.dbapi.imemory_update(m.uuid, values)
|
||||
|
||||
@staticmethod
|
||||
def _update_vm_4k_pages(ihost):
|
||||
def _update_huge_pages(ihost):
|
||||
"""
|
||||
Update VM 4K huge pages.
|
||||
Update the host huge pages.
|
||||
"""
|
||||
ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid'])
|
||||
|
||||
labels = pecan.request.dbapi.label_get_by_host(ihost['uuid'])
|
||||
vswitch_type = utils.get_vswitch_type()
|
||||
for node in ihost_inodes:
|
||||
mems = pecan.request.dbapi.imemory_get_by_inode(node['id'])
|
||||
for m in mems:
|
||||
if m.hugepages_configured:
|
||||
value = {}
|
||||
vs_hugepages_nr = m.vswitch_hugepages_nr
|
||||
# allocate the default vswitch huge pages if required
|
||||
if vswitch_type != constants.VSWITCH_TYPE_NONE and \
|
||||
vs_hugepages_nr == 0:
|
||||
vs_hugepages_nr = constants.VSWITCH_MEMORY_MB \
|
||||
// m.vswitch_hugepages_size_mib
|
||||
value.update({'vswitch_hugepages_nr': vs_hugepages_nr})
|
||||
|
||||
vm_hugepages_nr_2M = m.vm_hugepages_nr_2M_pending \
|
||||
if m.vm_hugepages_nr_2M_pending is not None \
|
||||
else m.vm_hugepages_nr_2M
|
||||
@ -3654,10 +3686,18 @@ class HostController(rest.RestController):
|
||||
if m.vm_hugepages_nr_1G_pending is not None \
|
||||
else m.vm_hugepages_nr_1G
|
||||
|
||||
# calculate 90% 2M pages if the huge pages have not been
|
||||
# allocated and the compute label is set
|
||||
if cutils.has_openstack_compute(labels) and \
|
||||
vm_hugepages_nr_2M == 0 and \
|
||||
vm_hugepages_nr_1G == 0:
|
||||
vm_hugepages_nr_2M = m.vm_hugepages_possible_2M * 0.9
|
||||
value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
|
||||
|
||||
vm_hugepages_4K = \
|
||||
(m.node_memtotal_mib - m.platform_reserved_mib)
|
||||
vm_hugepages_4K -= \
|
||||
(m.vswitch_hugepages_nr * m.vswitch_hugepages_size_mib)
|
||||
(vs_hugepages_nr * m.vswitch_hugepages_size_mib)
|
||||
vm_hugepages_4K -= \
|
||||
(constants.MIB_2M * vm_hugepages_nr_2M)
|
||||
vm_hugepages_4K -= \
|
||||
@ -3670,10 +3710,9 @@ class HostController(rest.RestController):
|
||||
if vm_hugepages_4K < min_4K:
|
||||
vm_hugepages_4K = 0
|
||||
|
||||
value = {'vm_hugepages_nr_4K': vm_hugepages_4K}
|
||||
LOG.info("Set VM 4K pages for host (%s) node (%d) pages "
|
||||
"(%d)" % (ihost['hostname'], node['id'],
|
||||
vm_hugepages_4K))
|
||||
value.update({'vm_hugepages_nr_4K': vm_hugepages_4K})
|
||||
LOG.info("Updating mem values of host(%s) node(%d): %s" %
|
||||
(ihost['hostname'], node['id'], str(value)))
|
||||
pecan.request.dbapi.imemory_update(m.uuid, value)
|
||||
|
||||
@staticmethod
|
||||
@ -5204,6 +5243,7 @@ class HostController(rest.RestController):
|
||||
self._semantic_check_data_interfaces(ihost,
|
||||
kubernetes_config,
|
||||
force_unlock)
|
||||
self._semantic_check_vswitch_type_attributes(ihost)
|
||||
else:
|
||||
# sdn configuration check
|
||||
self._semantic_check_sdn_attributes(ihost)
|
||||
@ -5265,8 +5305,8 @@ class HostController(rest.RestController):
|
||||
if align_2M_memory or align_1G_memory:
|
||||
self._align_pending_memory(ihost, align_2M_memory, align_1G_memory)
|
||||
|
||||
# calculate the VM 4K huge pages for nova
|
||||
self._update_vm_4k_pages(ihost)
|
||||
# update ihost huge pages allocation
|
||||
self._update_huge_pages(ihost)
|
||||
|
||||
if cutils.is_virtual() or cutils.is_virtual_worker(ihost):
|
||||
mib_platform_reserved_no_io = mib_reserved
|
||||
|
@ -33,6 +33,7 @@ from oslo_config import cfg
|
||||
from sysinv.common import constants
|
||||
from sysinv.common import exception
|
||||
from sysinv.common.utils import memoized
|
||||
from sysinv.helm import common as helm_common
|
||||
from sysinv.openstack.common.gettextutils import _
|
||||
from sysinv.openstack.common import log
|
||||
|
||||
@ -255,6 +256,16 @@ def get_vswitch_type():
|
||||
return system.capabilities.get('vswitch_type')
|
||||
|
||||
|
||||
def is_openstack_compute(ihost):
|
||||
for obj in pecan.request.dbapi.label_get_by_host(ihost['uuid']):
|
||||
try:
|
||||
if helm_common.LABEL_COMPUTE_LABEL == obj.label_key:
|
||||
return True
|
||||
except AttributeError:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def get_https_enabled():
|
||||
system = pecan.request.dbapi.isystem_get_one()
|
||||
return system.capabilities.get('https_enabled', False)
|
||||
|
@ -208,6 +208,9 @@ MIB_1G = 1024
|
||||
Ki = 1024
|
||||
NUM_4K_PER_MiB = 256
|
||||
|
||||
# Defines per-socket vswitch memory requirements (in MB)
|
||||
VSWITCH_MEMORY_MB = 1024
|
||||
|
||||
# Dynamic IO Resident Set Size(RSS) in MiB per socket
|
||||
DISK_IO_RESIDENT_SET_SIZE_MIB = 2000
|
||||
DISK_IO_RESIDENT_SET_SIZE_MIB_VBOX = 500
|
||||
|
Loading…
x
Reference in New Issue
Block a user