enable k8s huge page feature

- record k8s labels for nodes in puppet
- enable k8s huge page feature for worker w/o openstack compute label.
  and disable otherwise.
- automatically defaults hugepages for worker nodes with openstack
  compute labels, changes will be applied on the unlock
- do not allocate any huge pages by defaults for worker nodes without
  the openstack compute labels and vswitch_type is none.
  is assigned
- when changing vswitch type,trigger update grub mem config

Story: 2004763
Task: 28880

Change-Id: I7636eeb4773fa3fe32671a6bb2870c2e1074a5fa
Signed-off-by: Sun Austin <austin.sun@intel.com>
This commit is contained in:
Sun Austin 2019-03-18 10:03:23 +08:00
parent 560b2b6500
commit 9c5bf5771e
7 changed files with 95 additions and 30 deletions

View File

@ -52,7 +52,7 @@ class platform::compute::grub::params (
$eptad = ''
}
if $::is_gb_page_supported {
if $::is_gb_page_supported and $::platform::params::vswitch_type != 'none' {
if $g_hugepages != undef {
$gb_hugepages = $g_hugepages
} else {

View File

@ -91,8 +91,11 @@ class platform::kubernetes::cgroup
}
class platform::kubernetes::kubeadm {
include ::platform::docker::params
include ::platform::docker::params
include ::platform::kubernetes::params
$host_labels = $::platform::kubernetes::params::host_labels
$iptables_file = "net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1"
@ -102,6 +105,21 @@ class platform::kubernetes::kubeadm {
$k8s_registry = undef
}
#only set k8s_hugepage true when subfunction is worker and openstack-compute-node is not in host_labels
if str2bool($::is_worker_subfunction)
and !('openstack-compute-node'
in $host_labels) {
$k8s_hugepage = true
} else {
$k8s_hugepage = false
}
# enable extra parameters such as hugepage
file { '/etc/sysconfig/kubelet':
ensure => file,
content => template('platform/kubelet.conf.erb'),
}
# Update iptables config. This is required based on:
# https://kubernetes.io/docs/tasks/tools/install-kubeadm
# This probably belongs somewhere else - initscripts package?

View File

@ -0,0 +1,3 @@
# Overrides config file for kubelet
KUBELET_EXTRA_ARGS=--feature-gates=HugePages=<%= @k8s_hugepage %>

View File

@ -23,9 +23,6 @@ import tsconfig.tsconfig as tsc
LOG = logging.getLogger(__name__)
# Defines per-socket vswitch memory requirements (in MB)
VSWITCH_MEMORY_MB = 1024
# Defines the size of one kilobyte
SIZE_KB = 1024
@ -386,15 +383,16 @@ class NodeOperator(object):
vs_hp_nr, vs_hp_size = self._get_vswitch_reserved_memory(
node)
if vs_hp_nr == 0 or vs_hp_size == 0:
vs_hp_nr = VSWITCH_MEMORY_MB // size
if vs_hp_size == 0:
vs_hp_size = size
# Libvirt hugepages can be 1G and 2M
if size == SIZE_1G_MB:
hp_attr = {}
if vs_hp_size == size:
nr_hugepages -= vs_hp_nr
# If the huge pages are not allocated
if nr_hugepages != 0:
nr_hugepages -= vs_hp_nr
hp_attr.update({
'vswitch_hugepages_size_mib': vs_hp_size,
'vswitch_hugepages_nr': vs_hp_nr,
@ -410,15 +408,19 @@ class NodeOperator(object):
# No 1G hugepage support.
hp_attr = {
'vm_hugepages_use_1G': 'False',
'vm_hugepages_nr_1G': 0,
'vswitch_hugepages_size_mib': vs_hp_size,
'vswitch_hugepages_nr': vs_hp_nr,
'vswitch_hugepages_avail': 0
}
if nr_hugepages != 0:
nr_hugepages -= vs_hp_nr
else:
hp_attr = {}
if vs_hp_size == size and initial_report is False:
# User manually set 2M pages
nr_hugepages -= vs_hp_nr
if nr_hugepages != 0:
nr_hugepages -= vs_hp_nr
hp_attr.update({
'vswitch_hugepages_size_mib': vs_hp_size,
'vswitch_hugepages_nr': vs_hp_nr,
@ -546,18 +548,6 @@ class NodeOperator(object):
'vm_hugepages_possible_1G': max_vm_pages_1gb,
})
# calculate 90% 2M pages if it is initial report and the huge
# pages have not been allocated
if initial_report:
max_vm_pages_2mb = max_vm_pages_2mb * 0.9
total_hp_mb += int(max_vm_pages_2mb * (SIZE_2M_KB / SIZE_KB))
free_hp_mb = total_hp_mb
attr.update({
'vm_hugepages_nr_2M': max_vm_pages_2mb,
'vm_hugepages_avail_2M': max_vm_pages_2mb,
'vm_hugepages_nr_1G': 0
})
attr.update({
'numa_node': node,
'memtotal_mib': total_hp_mb,

View File

@ -89,6 +89,7 @@ from sysinv.common import ceph
from sysinv.common import constants
from sysinv.common import exception
from sysinv.common import utils as cutils
from sysinv.helm import common as helm_common
from sysinv.openstack.common import log
from sysinv.openstack.common import uuidutils
from sysinv.openstack.common.gettextutils import _
@ -3377,6 +3378,26 @@ class HostController(rest.RestController):
"addresses while in SDN mode.")
raise wsme.exc.ClientSideError(msg)
@staticmethod
def _semantic_check_vswitch_type_attributes(ihost):
"""
Perform semantic checks host label openstack-compute-node if ovs or ovs-dpdk
vswitch type is enabled since allocating 2M hugepage is needed
validity of the node configuration prior to unlocking it.
"""
vswitch_type = utils.get_vswitch_type()
if vswitch_type == constants.VSWITCH_TYPE_NONE:
return
# Check whether compute_label has been assigned
if utils.is_openstack_compute(ihost) is not True:
raise wsme.exc.ClientSideError(
_("Can not unlock worker host %s without "
" %s label if config %s. Action: assign "
"%s label for this host prior to unlock."
% (ihost['hostname'], helm_common.LABEL_COMPUTE_LABEL,
vswitch_type, helm_common.LABEL_COMPUTE_LABEL)))
@staticmethod
def _semantic_check_data_vrs_attributes(ihost):
"""
@ -3637,16 +3658,27 @@ class HostController(rest.RestController):
pecan.request.dbapi.imemory_update(m.uuid, values)
@staticmethod
def _update_vm_4k_pages(ihost):
def _update_huge_pages(ihost):
"""
Update VM 4K huge pages.
Update the host huge pages.
"""
ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid'])
labels = pecan.request.dbapi.label_get_by_host(ihost['uuid'])
vswitch_type = utils.get_vswitch_type()
for node in ihost_inodes:
mems = pecan.request.dbapi.imemory_get_by_inode(node['id'])
for m in mems:
if m.hugepages_configured:
value = {}
vs_hugepages_nr = m.vswitch_hugepages_nr
# allocate the default vswitch huge pages if required
if vswitch_type != constants.VSWITCH_TYPE_NONE and \
vs_hugepages_nr == 0:
vs_hugepages_nr = constants.VSWITCH_MEMORY_MB \
// m.vswitch_hugepages_size_mib
value.update({'vswitch_hugepages_nr': vs_hugepages_nr})
vm_hugepages_nr_2M = m.vm_hugepages_nr_2M_pending \
if m.vm_hugepages_nr_2M_pending is not None \
else m.vm_hugepages_nr_2M
@ -3654,10 +3686,18 @@ class HostController(rest.RestController):
if m.vm_hugepages_nr_1G_pending is not None \
else m.vm_hugepages_nr_1G
# calculate 90% 2M pages if the huge pages have not been
# allocated and the compute label is set
if cutils.has_openstack_compute(labels) and \
vm_hugepages_nr_2M == 0 and \
vm_hugepages_nr_1G == 0:
vm_hugepages_nr_2M = m.vm_hugepages_possible_2M * 0.9
value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
vm_hugepages_4K = \
(m.node_memtotal_mib - m.platform_reserved_mib)
vm_hugepages_4K -= \
(m.vswitch_hugepages_nr * m.vswitch_hugepages_size_mib)
(vs_hugepages_nr * m.vswitch_hugepages_size_mib)
vm_hugepages_4K -= \
(constants.MIB_2M * vm_hugepages_nr_2M)
vm_hugepages_4K -= \
@ -3670,10 +3710,9 @@ class HostController(rest.RestController):
if vm_hugepages_4K < min_4K:
vm_hugepages_4K = 0
value = {'vm_hugepages_nr_4K': vm_hugepages_4K}
LOG.info("Set VM 4K pages for host (%s) node (%d) pages "
"(%d)" % (ihost['hostname'], node['id'],
vm_hugepages_4K))
value.update({'vm_hugepages_nr_4K': vm_hugepages_4K})
LOG.info("Updating mem values of host(%s) node(%d): %s" %
(ihost['hostname'], node['id'], str(value)))
pecan.request.dbapi.imemory_update(m.uuid, value)
@staticmethod
@ -5204,6 +5243,7 @@ class HostController(rest.RestController):
self._semantic_check_data_interfaces(ihost,
kubernetes_config,
force_unlock)
self._semantic_check_vswitch_type_attributes(ihost)
else:
# sdn configuration check
self._semantic_check_sdn_attributes(ihost)
@ -5265,8 +5305,8 @@ class HostController(rest.RestController):
if align_2M_memory or align_1G_memory:
self._align_pending_memory(ihost, align_2M_memory, align_1G_memory)
# calculate the VM 4K huge pages for nova
self._update_vm_4k_pages(ihost)
# update ihost huge pages allocation
self._update_huge_pages(ihost)
if cutils.is_virtual() or cutils.is_virtual_worker(ihost):
mib_platform_reserved_no_io = mib_reserved

View File

@ -33,6 +33,7 @@ from oslo_config import cfg
from sysinv.common import constants
from sysinv.common import exception
from sysinv.common.utils import memoized
from sysinv.helm import common as helm_common
from sysinv.openstack.common.gettextutils import _
from sysinv.openstack.common import log
@ -255,6 +256,16 @@ def get_vswitch_type():
return system.capabilities.get('vswitch_type')
def is_openstack_compute(ihost):
for obj in pecan.request.dbapi.label_get_by_host(ihost['uuid']):
try:
if helm_common.LABEL_COMPUTE_LABEL == obj.label_key:
return True
except AttributeError:
pass
return False
def get_https_enabled():
system = pecan.request.dbapi.isystem_get_one()
return system.capabilities.get('https_enabled', False)

View File

@ -208,6 +208,9 @@ MIB_1G = 1024
Ki = 1024
NUM_4K_PER_MiB = 256
# Defines per-socket vswitch memory requirements (in MB)
VSWITCH_MEMORY_MB = 1024
# Dynamic IO Resident Set Size(RSS) in MiB per socket
DISK_IO_RESIDENT_SET_SIZE_MIB = 2000
DISK_IO_RESIDENT_SET_SIZE_MIB_VBOX = 500