Merge "enable k8s huge page feature"

This commit is contained in:
Zuul 2019-04-29 15:30:13 +00:00 committed by Gerrit Code Review
commit b182032481
7 changed files with 95 additions and 30 deletions

View File

@ -52,7 +52,7 @@ class platform::compute::grub::params (
$eptad = '' $eptad = ''
} }
if $::is_gb_page_supported { if $::is_gb_page_supported and $::platform::params::vswitch_type != 'none' {
if $g_hugepages != undef { if $g_hugepages != undef {
$gb_hugepages = $g_hugepages $gb_hugepages = $g_hugepages
} else { } else {

View File

@ -91,8 +91,11 @@ class platform::kubernetes::cgroup
} }
class platform::kubernetes::kubeadm { class platform::kubernetes::kubeadm {
include ::platform::docker::params
include ::platform::docker::params
include ::platform::kubernetes::params
$host_labels = $::platform::kubernetes::params::host_labels
$iptables_file = "net.bridge.bridge-nf-call-ip6tables = 1 $iptables_file = "net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1" net.bridge.bridge-nf-call-iptables = 1"
@ -102,6 +105,21 @@ class platform::kubernetes::kubeadm {
$k8s_registry = undef $k8s_registry = undef
} }
#only set k8s_hugepage true when subfunction is worker and openstack-compute-node is not in host_labels
if str2bool($::is_worker_subfunction)
and !('openstack-compute-node'
in $host_labels) {
$k8s_hugepage = true
} else {
$k8s_hugepage = false
}
# enable extra parameters such as hugepage
file { '/etc/sysconfig/kubelet':
ensure => file,
content => template('platform/kubelet.conf.erb'),
}
# Update iptables config. This is required based on: # Update iptables config. This is required based on:
# https://kubernetes.io/docs/tasks/tools/install-kubeadm # https://kubernetes.io/docs/tasks/tools/install-kubeadm
# This probably belongs somewhere else - initscripts package? # This probably belongs somewhere else - initscripts package?

View File

@ -0,0 +1,3 @@
# Overrides config file for kubelet
KUBELET_EXTRA_ARGS=--feature-gates=HugePages=<%= @k8s_hugepage %>

View File

@ -23,9 +23,6 @@ import tsconfig.tsconfig as tsc
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
# Defines per-socket vswitch memory requirements (in MB)
VSWITCH_MEMORY_MB = 1024
# Defines the size of one kilobyte # Defines the size of one kilobyte
SIZE_KB = 1024 SIZE_KB = 1024
@ -386,15 +383,16 @@ class NodeOperator(object):
vs_hp_nr, vs_hp_size = self._get_vswitch_reserved_memory( vs_hp_nr, vs_hp_size = self._get_vswitch_reserved_memory(
node) node)
if vs_hp_nr == 0 or vs_hp_size == 0: if vs_hp_size == 0:
vs_hp_nr = VSWITCH_MEMORY_MB // size
vs_hp_size = size vs_hp_size = size
# Libvirt hugepages can be 1G and 2M # Libvirt hugepages can be 1G and 2M
if size == SIZE_1G_MB: if size == SIZE_1G_MB:
hp_attr = {} hp_attr = {}
if vs_hp_size == size: if vs_hp_size == size:
nr_hugepages -= vs_hp_nr # If the huge pages are not allocated
if nr_hugepages != 0:
nr_hugepages -= vs_hp_nr
hp_attr.update({ hp_attr.update({
'vswitch_hugepages_size_mib': vs_hp_size, 'vswitch_hugepages_size_mib': vs_hp_size,
'vswitch_hugepages_nr': vs_hp_nr, 'vswitch_hugepages_nr': vs_hp_nr,
@ -410,15 +408,19 @@ class NodeOperator(object):
# No 1G hugepage support. # No 1G hugepage support.
hp_attr = { hp_attr = {
'vm_hugepages_use_1G': 'False', 'vm_hugepages_use_1G': 'False',
'vm_hugepages_nr_1G': 0,
'vswitch_hugepages_size_mib': vs_hp_size, 'vswitch_hugepages_size_mib': vs_hp_size,
'vswitch_hugepages_nr': vs_hp_nr, 'vswitch_hugepages_nr': vs_hp_nr,
'vswitch_hugepages_avail': 0 'vswitch_hugepages_avail': 0
} }
if nr_hugepages != 0:
nr_hugepages -= vs_hp_nr
else: else:
hp_attr = {} hp_attr = {}
if vs_hp_size == size and initial_report is False: if vs_hp_size == size and initial_report is False:
# User manually set 2M pages # User manually set 2M pages
nr_hugepages -= vs_hp_nr if nr_hugepages != 0:
nr_hugepages -= vs_hp_nr
hp_attr.update({ hp_attr.update({
'vswitch_hugepages_size_mib': vs_hp_size, 'vswitch_hugepages_size_mib': vs_hp_size,
'vswitch_hugepages_nr': vs_hp_nr, 'vswitch_hugepages_nr': vs_hp_nr,
@ -546,18 +548,6 @@ class NodeOperator(object):
'vm_hugepages_possible_1G': max_vm_pages_1gb, 'vm_hugepages_possible_1G': max_vm_pages_1gb,
}) })
# calculate 90% 2M pages if it is initial report and the huge
# pages have not been allocated
if initial_report:
max_vm_pages_2mb = max_vm_pages_2mb * 0.9
total_hp_mb += int(max_vm_pages_2mb * (SIZE_2M_KB / SIZE_KB))
free_hp_mb = total_hp_mb
attr.update({
'vm_hugepages_nr_2M': max_vm_pages_2mb,
'vm_hugepages_avail_2M': max_vm_pages_2mb,
'vm_hugepages_nr_1G': 0
})
attr.update({ attr.update({
'numa_node': node, 'numa_node': node,
'memtotal_mib': total_hp_mb, 'memtotal_mib': total_hp_mb,

View File

@ -89,6 +89,7 @@ from sysinv.common import ceph
from sysinv.common import constants from sysinv.common import constants
from sysinv.common import exception from sysinv.common import exception
from sysinv.common import utils as cutils from sysinv.common import utils as cutils
from sysinv.helm import common as helm_common
from sysinv.openstack.common import log from sysinv.openstack.common import log
from sysinv.openstack.common import uuidutils from sysinv.openstack.common import uuidutils
from sysinv.openstack.common.gettextutils import _ from sysinv.openstack.common.gettextutils import _
@ -3377,6 +3378,26 @@ class HostController(rest.RestController):
"addresses while in SDN mode.") "addresses while in SDN mode.")
raise wsme.exc.ClientSideError(msg) raise wsme.exc.ClientSideError(msg)
@staticmethod
def _semantic_check_vswitch_type_attributes(ihost):
"""
Perform semantic checks host label openstack-compute-node if ovs or ovs-dpdk
vswitch type is enabled since allocating 2M hugepage is needed
validity of the node configuration prior to unlocking it.
"""
vswitch_type = utils.get_vswitch_type()
if vswitch_type == constants.VSWITCH_TYPE_NONE:
return
# Check whether compute_label has been assigned
if utils.is_openstack_compute(ihost) is not True:
raise wsme.exc.ClientSideError(
_("Can not unlock worker host %s without "
" %s label if config %s. Action: assign "
"%s label for this host prior to unlock."
% (ihost['hostname'], helm_common.LABEL_COMPUTE_LABEL,
vswitch_type, helm_common.LABEL_COMPUTE_LABEL)))
@staticmethod @staticmethod
def _semantic_check_data_vrs_attributes(ihost): def _semantic_check_data_vrs_attributes(ihost):
""" """
@ -3637,16 +3658,27 @@ class HostController(rest.RestController):
pecan.request.dbapi.imemory_update(m.uuid, values) pecan.request.dbapi.imemory_update(m.uuid, values)
@staticmethod @staticmethod
def _update_vm_4k_pages(ihost): def _update_huge_pages(ihost):
""" """
Update VM 4K huge pages. Update the host huge pages.
""" """
ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid']) ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid'])
labels = pecan.request.dbapi.label_get_by_host(ihost['uuid'])
vswitch_type = utils.get_vswitch_type()
for node in ihost_inodes: for node in ihost_inodes:
mems = pecan.request.dbapi.imemory_get_by_inode(node['id']) mems = pecan.request.dbapi.imemory_get_by_inode(node['id'])
for m in mems: for m in mems:
if m.hugepages_configured: if m.hugepages_configured:
value = {}
vs_hugepages_nr = m.vswitch_hugepages_nr
# allocate the default vswitch huge pages if required
if vswitch_type != constants.VSWITCH_TYPE_NONE and \
vs_hugepages_nr == 0:
vs_hugepages_nr = constants.VSWITCH_MEMORY_MB \
// m.vswitch_hugepages_size_mib
value.update({'vswitch_hugepages_nr': vs_hugepages_nr})
vm_hugepages_nr_2M = m.vm_hugepages_nr_2M_pending \ vm_hugepages_nr_2M = m.vm_hugepages_nr_2M_pending \
if m.vm_hugepages_nr_2M_pending is not None \ if m.vm_hugepages_nr_2M_pending is not None \
else m.vm_hugepages_nr_2M else m.vm_hugepages_nr_2M
@ -3654,10 +3686,18 @@ class HostController(rest.RestController):
if m.vm_hugepages_nr_1G_pending is not None \ if m.vm_hugepages_nr_1G_pending is not None \
else m.vm_hugepages_nr_1G else m.vm_hugepages_nr_1G
# calculate 90% 2M pages if the huge pages have not been
# allocated and the compute label is set
if cutils.has_openstack_compute(labels) and \
vm_hugepages_nr_2M == 0 and \
vm_hugepages_nr_1G == 0:
vm_hugepages_nr_2M = m.vm_hugepages_possible_2M * 0.9
value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
vm_hugepages_4K = \ vm_hugepages_4K = \
(m.node_memtotal_mib - m.platform_reserved_mib) (m.node_memtotal_mib - m.platform_reserved_mib)
vm_hugepages_4K -= \ vm_hugepages_4K -= \
(m.vswitch_hugepages_nr * m.vswitch_hugepages_size_mib) (vs_hugepages_nr * m.vswitch_hugepages_size_mib)
vm_hugepages_4K -= \ vm_hugepages_4K -= \
(constants.MIB_2M * vm_hugepages_nr_2M) (constants.MIB_2M * vm_hugepages_nr_2M)
vm_hugepages_4K -= \ vm_hugepages_4K -= \
@ -3670,10 +3710,9 @@ class HostController(rest.RestController):
if vm_hugepages_4K < min_4K: if vm_hugepages_4K < min_4K:
vm_hugepages_4K = 0 vm_hugepages_4K = 0
value = {'vm_hugepages_nr_4K': vm_hugepages_4K} value.update({'vm_hugepages_nr_4K': vm_hugepages_4K})
LOG.info("Set VM 4K pages for host (%s) node (%d) pages " LOG.info("Updating mem values of host(%s) node(%d): %s" %
"(%d)" % (ihost['hostname'], node['id'], (ihost['hostname'], node['id'], str(value)))
vm_hugepages_4K))
pecan.request.dbapi.imemory_update(m.uuid, value) pecan.request.dbapi.imemory_update(m.uuid, value)
@staticmethod @staticmethod
@ -5204,6 +5243,7 @@ class HostController(rest.RestController):
self._semantic_check_data_interfaces(ihost, self._semantic_check_data_interfaces(ihost,
kubernetes_config, kubernetes_config,
force_unlock) force_unlock)
self._semantic_check_vswitch_type_attributes(ihost)
else: else:
# sdn configuration check # sdn configuration check
self._semantic_check_sdn_attributes(ihost) self._semantic_check_sdn_attributes(ihost)
@ -5265,8 +5305,8 @@ class HostController(rest.RestController):
if align_2M_memory or align_1G_memory: if align_2M_memory or align_1G_memory:
self._align_pending_memory(ihost, align_2M_memory, align_1G_memory) self._align_pending_memory(ihost, align_2M_memory, align_1G_memory)
# calculate the VM 4K huge pages for nova # update ihost huge pages allocation
self._update_vm_4k_pages(ihost) self._update_huge_pages(ihost)
if cutils.is_virtual() or cutils.is_virtual_worker(ihost): if cutils.is_virtual() or cutils.is_virtual_worker(ihost):
mib_platform_reserved_no_io = mib_reserved mib_platform_reserved_no_io = mib_reserved

View File

@ -33,6 +33,7 @@ from oslo_config import cfg
from sysinv.common import constants from sysinv.common import constants
from sysinv.common import exception from sysinv.common import exception
from sysinv.common.utils import memoized from sysinv.common.utils import memoized
from sysinv.helm import common as helm_common
from sysinv.openstack.common.gettextutils import _ from sysinv.openstack.common.gettextutils import _
from sysinv.openstack.common import log from sysinv.openstack.common import log
@ -255,6 +256,16 @@ def get_vswitch_type():
return system.capabilities.get('vswitch_type') return system.capabilities.get('vswitch_type')
def is_openstack_compute(ihost):
for obj in pecan.request.dbapi.label_get_by_host(ihost['uuid']):
try:
if helm_common.LABEL_COMPUTE_LABEL == obj.label_key:
return True
except AttributeError:
pass
return False
def get_https_enabled(): def get_https_enabled():
system = pecan.request.dbapi.isystem_get_one() system = pecan.request.dbapi.isystem_get_one()
return system.capabilities.get('https_enabled', False) return system.capabilities.get('https_enabled', False)

View File

@ -208,6 +208,9 @@ MIB_1G = 1024
Ki = 1024 Ki = 1024
NUM_4K_PER_MiB = 256 NUM_4K_PER_MiB = 256
# Defines per-socket vswitch memory requirements (in MB)
VSWITCH_MEMORY_MB = 1024
# Dynamic IO Resident Set Size(RSS) in MiB per socket # Dynamic IO Resident Set Size(RSS) in MiB per socket
DISK_IO_RESIDENT_SET_SIZE_MIB = 2000 DISK_IO_RESIDENT_SET_SIZE_MIB = 2000
DISK_IO_RESIDENT_SET_SIZE_MIB_VBOX = 500 DISK_IO_RESIDENT_SET_SIZE_MIB_VBOX = 500