Merge "enable k8s huge page feature"
This commit is contained in:
commit
b182032481
|
@ -52,7 +52,7 @@ class platform::compute::grub::params (
|
||||||
$eptad = ''
|
$eptad = ''
|
||||||
}
|
}
|
||||||
|
|
||||||
if $::is_gb_page_supported {
|
if $::is_gb_page_supported and $::platform::params::vswitch_type != 'none' {
|
||||||
if $g_hugepages != undef {
|
if $g_hugepages != undef {
|
||||||
$gb_hugepages = $g_hugepages
|
$gb_hugepages = $g_hugepages
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -91,8 +91,11 @@ class platform::kubernetes::cgroup
|
||||||
}
|
}
|
||||||
|
|
||||||
class platform::kubernetes::kubeadm {
|
class platform::kubernetes::kubeadm {
|
||||||
include ::platform::docker::params
|
|
||||||
|
|
||||||
|
include ::platform::docker::params
|
||||||
|
include ::platform::kubernetes::params
|
||||||
|
|
||||||
|
$host_labels = $::platform::kubernetes::params::host_labels
|
||||||
$iptables_file = "net.bridge.bridge-nf-call-ip6tables = 1
|
$iptables_file = "net.bridge.bridge-nf-call-ip6tables = 1
|
||||||
net.bridge.bridge-nf-call-iptables = 1"
|
net.bridge.bridge-nf-call-iptables = 1"
|
||||||
|
|
||||||
|
@ -102,6 +105,21 @@ class platform::kubernetes::kubeadm {
|
||||||
$k8s_registry = undef
|
$k8s_registry = undef
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#only set k8s_hugepage true when subfunction is worker and openstack-compute-node is not in host_labels
|
||||||
|
if str2bool($::is_worker_subfunction)
|
||||||
|
and !('openstack-compute-node'
|
||||||
|
in $host_labels) {
|
||||||
|
$k8s_hugepage = true
|
||||||
|
} else {
|
||||||
|
$k8s_hugepage = false
|
||||||
|
}
|
||||||
|
|
||||||
|
# enable extra parameters such as hugepage
|
||||||
|
file { '/etc/sysconfig/kubelet':
|
||||||
|
ensure => file,
|
||||||
|
content => template('platform/kubelet.conf.erb'),
|
||||||
|
}
|
||||||
|
|
||||||
# Update iptables config. This is required based on:
|
# Update iptables config. This is required based on:
|
||||||
# https://kubernetes.io/docs/tasks/tools/install-kubeadm
|
# https://kubernetes.io/docs/tasks/tools/install-kubeadm
|
||||||
# This probably belongs somewhere else - initscripts package?
|
# This probably belongs somewhere else - initscripts package?
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
# Overrides config file for kubelet
|
||||||
|
KUBELET_EXTRA_ARGS=--feature-gates=HugePages=<%= @k8s_hugepage %>
|
||||||
|
|
|
@ -23,9 +23,6 @@ import tsconfig.tsconfig as tsc
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Defines per-socket vswitch memory requirements (in MB)
|
|
||||||
VSWITCH_MEMORY_MB = 1024
|
|
||||||
|
|
||||||
# Defines the size of one kilobyte
|
# Defines the size of one kilobyte
|
||||||
SIZE_KB = 1024
|
SIZE_KB = 1024
|
||||||
|
|
||||||
|
@ -386,15 +383,16 @@ class NodeOperator(object):
|
||||||
|
|
||||||
vs_hp_nr, vs_hp_size = self._get_vswitch_reserved_memory(
|
vs_hp_nr, vs_hp_size = self._get_vswitch_reserved_memory(
|
||||||
node)
|
node)
|
||||||
if vs_hp_nr == 0 or vs_hp_size == 0:
|
if vs_hp_size == 0:
|
||||||
vs_hp_nr = VSWITCH_MEMORY_MB // size
|
|
||||||
vs_hp_size = size
|
vs_hp_size = size
|
||||||
|
|
||||||
# Libvirt hugepages can be 1G and 2M
|
# Libvirt hugepages can be 1G and 2M
|
||||||
if size == SIZE_1G_MB:
|
if size == SIZE_1G_MB:
|
||||||
hp_attr = {}
|
hp_attr = {}
|
||||||
if vs_hp_size == size:
|
if vs_hp_size == size:
|
||||||
nr_hugepages -= vs_hp_nr
|
# If the huge pages are not allocated
|
||||||
|
if nr_hugepages != 0:
|
||||||
|
nr_hugepages -= vs_hp_nr
|
||||||
hp_attr.update({
|
hp_attr.update({
|
||||||
'vswitch_hugepages_size_mib': vs_hp_size,
|
'vswitch_hugepages_size_mib': vs_hp_size,
|
||||||
'vswitch_hugepages_nr': vs_hp_nr,
|
'vswitch_hugepages_nr': vs_hp_nr,
|
||||||
|
@ -410,15 +408,19 @@ class NodeOperator(object):
|
||||||
# No 1G hugepage support.
|
# No 1G hugepage support.
|
||||||
hp_attr = {
|
hp_attr = {
|
||||||
'vm_hugepages_use_1G': 'False',
|
'vm_hugepages_use_1G': 'False',
|
||||||
|
'vm_hugepages_nr_1G': 0,
|
||||||
'vswitch_hugepages_size_mib': vs_hp_size,
|
'vswitch_hugepages_size_mib': vs_hp_size,
|
||||||
'vswitch_hugepages_nr': vs_hp_nr,
|
'vswitch_hugepages_nr': vs_hp_nr,
|
||||||
'vswitch_hugepages_avail': 0
|
'vswitch_hugepages_avail': 0
|
||||||
}
|
}
|
||||||
|
if nr_hugepages != 0:
|
||||||
|
nr_hugepages -= vs_hp_nr
|
||||||
else:
|
else:
|
||||||
hp_attr = {}
|
hp_attr = {}
|
||||||
if vs_hp_size == size and initial_report is False:
|
if vs_hp_size == size and initial_report is False:
|
||||||
# User manually set 2M pages
|
# User manually set 2M pages
|
||||||
nr_hugepages -= vs_hp_nr
|
if nr_hugepages != 0:
|
||||||
|
nr_hugepages -= vs_hp_nr
|
||||||
hp_attr.update({
|
hp_attr.update({
|
||||||
'vswitch_hugepages_size_mib': vs_hp_size,
|
'vswitch_hugepages_size_mib': vs_hp_size,
|
||||||
'vswitch_hugepages_nr': vs_hp_nr,
|
'vswitch_hugepages_nr': vs_hp_nr,
|
||||||
|
@ -546,18 +548,6 @@ class NodeOperator(object):
|
||||||
'vm_hugepages_possible_1G': max_vm_pages_1gb,
|
'vm_hugepages_possible_1G': max_vm_pages_1gb,
|
||||||
})
|
})
|
||||||
|
|
||||||
# calculate 90% 2M pages if it is initial report and the huge
|
|
||||||
# pages have not been allocated
|
|
||||||
if initial_report:
|
|
||||||
max_vm_pages_2mb = max_vm_pages_2mb * 0.9
|
|
||||||
total_hp_mb += int(max_vm_pages_2mb * (SIZE_2M_KB / SIZE_KB))
|
|
||||||
free_hp_mb = total_hp_mb
|
|
||||||
attr.update({
|
|
||||||
'vm_hugepages_nr_2M': max_vm_pages_2mb,
|
|
||||||
'vm_hugepages_avail_2M': max_vm_pages_2mb,
|
|
||||||
'vm_hugepages_nr_1G': 0
|
|
||||||
})
|
|
||||||
|
|
||||||
attr.update({
|
attr.update({
|
||||||
'numa_node': node,
|
'numa_node': node,
|
||||||
'memtotal_mib': total_hp_mb,
|
'memtotal_mib': total_hp_mb,
|
||||||
|
|
|
@ -89,6 +89,7 @@ from sysinv.common import ceph
|
||||||
from sysinv.common import constants
|
from sysinv.common import constants
|
||||||
from sysinv.common import exception
|
from sysinv.common import exception
|
||||||
from sysinv.common import utils as cutils
|
from sysinv.common import utils as cutils
|
||||||
|
from sysinv.helm import common as helm_common
|
||||||
from sysinv.openstack.common import log
|
from sysinv.openstack.common import log
|
||||||
from sysinv.openstack.common import uuidutils
|
from sysinv.openstack.common import uuidutils
|
||||||
from sysinv.openstack.common.gettextutils import _
|
from sysinv.openstack.common.gettextutils import _
|
||||||
|
@ -3377,6 +3378,26 @@ class HostController(rest.RestController):
|
||||||
"addresses while in SDN mode.")
|
"addresses while in SDN mode.")
|
||||||
raise wsme.exc.ClientSideError(msg)
|
raise wsme.exc.ClientSideError(msg)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _semantic_check_vswitch_type_attributes(ihost):
|
||||||
|
"""
|
||||||
|
Perform semantic checks host label openstack-compute-node if ovs or ovs-dpdk
|
||||||
|
vswitch type is enabled since allocating 2M hugepage is needed
|
||||||
|
validity of the node configuration prior to unlocking it.
|
||||||
|
"""
|
||||||
|
vswitch_type = utils.get_vswitch_type()
|
||||||
|
if vswitch_type == constants.VSWITCH_TYPE_NONE:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Check whether compute_label has been assigned
|
||||||
|
if utils.is_openstack_compute(ihost) is not True:
|
||||||
|
raise wsme.exc.ClientSideError(
|
||||||
|
_("Can not unlock worker host %s without "
|
||||||
|
" %s label if config %s. Action: assign "
|
||||||
|
"%s label for this host prior to unlock."
|
||||||
|
% (ihost['hostname'], helm_common.LABEL_COMPUTE_LABEL,
|
||||||
|
vswitch_type, helm_common.LABEL_COMPUTE_LABEL)))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _semantic_check_data_vrs_attributes(ihost):
|
def _semantic_check_data_vrs_attributes(ihost):
|
||||||
"""
|
"""
|
||||||
|
@ -3637,16 +3658,27 @@ class HostController(rest.RestController):
|
||||||
pecan.request.dbapi.imemory_update(m.uuid, values)
|
pecan.request.dbapi.imemory_update(m.uuid, values)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _update_vm_4k_pages(ihost):
|
def _update_huge_pages(ihost):
|
||||||
"""
|
"""
|
||||||
Update VM 4K huge pages.
|
Update the host huge pages.
|
||||||
"""
|
"""
|
||||||
ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid'])
|
ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid'])
|
||||||
|
|
||||||
|
labels = pecan.request.dbapi.label_get_by_host(ihost['uuid'])
|
||||||
|
vswitch_type = utils.get_vswitch_type()
|
||||||
for node in ihost_inodes:
|
for node in ihost_inodes:
|
||||||
mems = pecan.request.dbapi.imemory_get_by_inode(node['id'])
|
mems = pecan.request.dbapi.imemory_get_by_inode(node['id'])
|
||||||
for m in mems:
|
for m in mems:
|
||||||
if m.hugepages_configured:
|
if m.hugepages_configured:
|
||||||
|
value = {}
|
||||||
|
vs_hugepages_nr = m.vswitch_hugepages_nr
|
||||||
|
# allocate the default vswitch huge pages if required
|
||||||
|
if vswitch_type != constants.VSWITCH_TYPE_NONE and \
|
||||||
|
vs_hugepages_nr == 0:
|
||||||
|
vs_hugepages_nr = constants.VSWITCH_MEMORY_MB \
|
||||||
|
// m.vswitch_hugepages_size_mib
|
||||||
|
value.update({'vswitch_hugepages_nr': vs_hugepages_nr})
|
||||||
|
|
||||||
vm_hugepages_nr_2M = m.vm_hugepages_nr_2M_pending \
|
vm_hugepages_nr_2M = m.vm_hugepages_nr_2M_pending \
|
||||||
if m.vm_hugepages_nr_2M_pending is not None \
|
if m.vm_hugepages_nr_2M_pending is not None \
|
||||||
else m.vm_hugepages_nr_2M
|
else m.vm_hugepages_nr_2M
|
||||||
|
@ -3654,10 +3686,18 @@ class HostController(rest.RestController):
|
||||||
if m.vm_hugepages_nr_1G_pending is not None \
|
if m.vm_hugepages_nr_1G_pending is not None \
|
||||||
else m.vm_hugepages_nr_1G
|
else m.vm_hugepages_nr_1G
|
||||||
|
|
||||||
|
# calculate 90% 2M pages if the huge pages have not been
|
||||||
|
# allocated and the compute label is set
|
||||||
|
if cutils.has_openstack_compute(labels) and \
|
||||||
|
vm_hugepages_nr_2M == 0 and \
|
||||||
|
vm_hugepages_nr_1G == 0:
|
||||||
|
vm_hugepages_nr_2M = m.vm_hugepages_possible_2M * 0.9
|
||||||
|
value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
|
||||||
|
|
||||||
vm_hugepages_4K = \
|
vm_hugepages_4K = \
|
||||||
(m.node_memtotal_mib - m.platform_reserved_mib)
|
(m.node_memtotal_mib - m.platform_reserved_mib)
|
||||||
vm_hugepages_4K -= \
|
vm_hugepages_4K -= \
|
||||||
(m.vswitch_hugepages_nr * m.vswitch_hugepages_size_mib)
|
(vs_hugepages_nr * m.vswitch_hugepages_size_mib)
|
||||||
vm_hugepages_4K -= \
|
vm_hugepages_4K -= \
|
||||||
(constants.MIB_2M * vm_hugepages_nr_2M)
|
(constants.MIB_2M * vm_hugepages_nr_2M)
|
||||||
vm_hugepages_4K -= \
|
vm_hugepages_4K -= \
|
||||||
|
@ -3670,10 +3710,9 @@ class HostController(rest.RestController):
|
||||||
if vm_hugepages_4K < min_4K:
|
if vm_hugepages_4K < min_4K:
|
||||||
vm_hugepages_4K = 0
|
vm_hugepages_4K = 0
|
||||||
|
|
||||||
value = {'vm_hugepages_nr_4K': vm_hugepages_4K}
|
value.update({'vm_hugepages_nr_4K': vm_hugepages_4K})
|
||||||
LOG.info("Set VM 4K pages for host (%s) node (%d) pages "
|
LOG.info("Updating mem values of host(%s) node(%d): %s" %
|
||||||
"(%d)" % (ihost['hostname'], node['id'],
|
(ihost['hostname'], node['id'], str(value)))
|
||||||
vm_hugepages_4K))
|
|
||||||
pecan.request.dbapi.imemory_update(m.uuid, value)
|
pecan.request.dbapi.imemory_update(m.uuid, value)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -5204,6 +5243,7 @@ class HostController(rest.RestController):
|
||||||
self._semantic_check_data_interfaces(ihost,
|
self._semantic_check_data_interfaces(ihost,
|
||||||
kubernetes_config,
|
kubernetes_config,
|
||||||
force_unlock)
|
force_unlock)
|
||||||
|
self._semantic_check_vswitch_type_attributes(ihost)
|
||||||
else:
|
else:
|
||||||
# sdn configuration check
|
# sdn configuration check
|
||||||
self._semantic_check_sdn_attributes(ihost)
|
self._semantic_check_sdn_attributes(ihost)
|
||||||
|
@ -5265,8 +5305,8 @@ class HostController(rest.RestController):
|
||||||
if align_2M_memory or align_1G_memory:
|
if align_2M_memory or align_1G_memory:
|
||||||
self._align_pending_memory(ihost, align_2M_memory, align_1G_memory)
|
self._align_pending_memory(ihost, align_2M_memory, align_1G_memory)
|
||||||
|
|
||||||
# calculate the VM 4K huge pages for nova
|
# update ihost huge pages allocation
|
||||||
self._update_vm_4k_pages(ihost)
|
self._update_huge_pages(ihost)
|
||||||
|
|
||||||
if cutils.is_virtual() or cutils.is_virtual_worker(ihost):
|
if cutils.is_virtual() or cutils.is_virtual_worker(ihost):
|
||||||
mib_platform_reserved_no_io = mib_reserved
|
mib_platform_reserved_no_io = mib_reserved
|
||||||
|
|
|
@ -33,6 +33,7 @@ from oslo_config import cfg
|
||||||
from sysinv.common import constants
|
from sysinv.common import constants
|
||||||
from sysinv.common import exception
|
from sysinv.common import exception
|
||||||
from sysinv.common.utils import memoized
|
from sysinv.common.utils import memoized
|
||||||
|
from sysinv.helm import common as helm_common
|
||||||
from sysinv.openstack.common.gettextutils import _
|
from sysinv.openstack.common.gettextutils import _
|
||||||
from sysinv.openstack.common import log
|
from sysinv.openstack.common import log
|
||||||
|
|
||||||
|
@ -255,6 +256,16 @@ def get_vswitch_type():
|
||||||
return system.capabilities.get('vswitch_type')
|
return system.capabilities.get('vswitch_type')
|
||||||
|
|
||||||
|
|
||||||
|
def is_openstack_compute(ihost):
|
||||||
|
for obj in pecan.request.dbapi.label_get_by_host(ihost['uuid']):
|
||||||
|
try:
|
||||||
|
if helm_common.LABEL_COMPUTE_LABEL == obj.label_key:
|
||||||
|
return True
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def get_https_enabled():
|
def get_https_enabled():
|
||||||
system = pecan.request.dbapi.isystem_get_one()
|
system = pecan.request.dbapi.isystem_get_one()
|
||||||
return system.capabilities.get('https_enabled', False)
|
return system.capabilities.get('https_enabled', False)
|
||||||
|
|
|
@ -208,6 +208,9 @@ MIB_1G = 1024
|
||||||
Ki = 1024
|
Ki = 1024
|
||||||
NUM_4K_PER_MiB = 256
|
NUM_4K_PER_MiB = 256
|
||||||
|
|
||||||
|
# Defines per-socket vswitch memory requirements (in MB)
|
||||||
|
VSWITCH_MEMORY_MB = 1024
|
||||||
|
|
||||||
# Dynamic IO Resident Set Size(RSS) in MiB per socket
|
# Dynamic IO Resident Set Size(RSS) in MiB per socket
|
||||||
DISK_IO_RESIDENT_SET_SIZE_MIB = 2000
|
DISK_IO_RESIDENT_SET_SIZE_MIB = 2000
|
||||||
DISK_IO_RESIDENT_SET_SIZE_MIB_VBOX = 500
|
DISK_IO_RESIDENT_SET_SIZE_MIB_VBOX = 500
|
||||||
|
|
Loading…
Reference in New Issue