Merge "enable k8s huge page feature"

2019-04-29 15:30:13 +00:00 · 2019-04-29 15:30:13 +00:00 · b182032481
parent 63253961a1 9c5bf5771e
commit b182032481
7 changed files with 95 additions and 30 deletions
--- a/puppet-manifests/src/modules/platform/manifests/compute.pp
+++ b/puppet-manifests/src/modules/platform/manifests/compute.pp
@ -52,7 +52,7 @@ class platform::compute::grub::params (
    $eptad = ''
  }
-  if $::is_gb_page_supported {
+  if $::is_gb_page_supported and $::platform::params::vswitch_type != 'none' {
    if $g_hugepages != undef {
      $gb_hugepages = $g_hugepages
    } else {
--- a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp
+++ b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp
@ -91,8 +91,11 @@ class platform::kubernetes::cgroup
 }
 class platform::kubernetes::kubeadm {
  include ::platform::docker::params
  include ::platform::docker::params
  include ::platform::kubernetes::params
  $host_labels = $::platform::kubernetes::params::host_labels
  $iptables_file = "net.bridge.bridge-nf-call-ip6tables = 1
    net.bridge.bridge-nf-call-iptables = 1"
@ -102,6 +105,21 @@ class platform::kubernetes::kubeadm {
    $k8s_registry = undef
  }
  #only set k8s_hugepage true when subfunction is worker and openstack-compute-node is not in host_labels
  if str2bool($::is_worker_subfunction)
    and !('openstack-compute-node'
          in $host_labels) {
    $k8s_hugepage = true
  } else {
    $k8s_hugepage = false
  }
  # enable extra parameters such as hugepage
  file { '/etc/sysconfig/kubelet':
    ensure  => file,
    content => template('platform/kubelet.conf.erb'),
  }
  # Update iptables config. This is required based on:
  # https://kubernetes.io/docs/tasks/tools/install-kubeadm
  # This probably belongs somewhere else - initscripts package?
--- a/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb
+++ b/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb
@ -0,0 +1,3 @@
 # Overrides config file for kubelet
 KUBELET_EXTRA_ARGS=--feature-gates=HugePages=<%= @k8s_hugepage %>
--- a/sysinv/sysinv/sysinv/sysinv/agent/node.py
+++ b/sysinv/sysinv/sysinv/sysinv/agent/node.py
@ -23,9 +23,6 @@ import tsconfig.tsconfig as tsc
 LOG = logging.getLogger(__name__)
 # Defines per-socket vswitch memory requirements (in MB)
 VSWITCH_MEMORY_MB = 1024
 # Defines the size of one kilobyte
 SIZE_KB = 1024
@ -386,15 +383,16 @@ class NodeOperator(object):
                    vs_hp_nr, vs_hp_size = self._get_vswitch_reserved_memory(
                        node)
-                    if vs_hp_nr == 0 or vs_hp_size == 0:
+                    if vs_hp_size == 0:
                        vs_hp_nr = VSWITCH_MEMORY_MB // size
                        vs_hp_size = size
                    # Libvirt hugepages can be 1G and 2M
                    if size == SIZE_1G_MB:
                        hp_attr = {}
                        if vs_hp_size == size:
-                            nr_hugepages -= vs_hp_nr
+                            # If the huge pages are not allocated
                            if nr_hugepages != 0:
                                nr_hugepages -= vs_hp_nr
                            hp_attr.update({
                                'vswitch_hugepages_size_mib': vs_hp_size,
                                'vswitch_hugepages_nr': vs_hp_nr,
@ -410,15 +408,19 @@ class NodeOperator(object):
                            # No 1G hugepage support.
                            hp_attr = {
                                'vm_hugepages_use_1G': 'False',
                                'vm_hugepages_nr_1G': 0,
                                'vswitch_hugepages_size_mib': vs_hp_size,
                                'vswitch_hugepages_nr': vs_hp_nr,
                                'vswitch_hugepages_avail': 0
                            }
                            if nr_hugepages != 0:
                                nr_hugepages -= vs_hp_nr
                        else:
                            hp_attr = {}
                            if vs_hp_size == size and initial_report is False:
                                # User manually set 2M pages
-                                nr_hugepages -= vs_hp_nr
+                                if nr_hugepages != 0:
                                    nr_hugepages -= vs_hp_nr
                                hp_attr.update({
                                    'vswitch_hugepages_size_mib': vs_hp_size,
                                    'vswitch_hugepages_nr': vs_hp_nr,
@ -546,18 +548,6 @@ class NodeOperator(object):
                'vm_hugepages_possible_1G': max_vm_pages_1gb,
            })
            # calculate 90% 2M pages if it is initial report and the huge
            # pages have not been allocated
            if initial_report:
                max_vm_pages_2mb = max_vm_pages_2mb * 0.9
                total_hp_mb += int(max_vm_pages_2mb * (SIZE_2M_KB / SIZE_KB))
                free_hp_mb = total_hp_mb
                attr.update({
                    'vm_hugepages_nr_2M': max_vm_pages_2mb,
                    'vm_hugepages_avail_2M': max_vm_pages_2mb,
                    'vm_hugepages_nr_1G': 0
                })
            attr.update({
                'numa_node': node,
                'memtotal_mib': total_hp_mb,
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
@ -89,6 +89,7 @@ from sysinv.common import ceph
 from sysinv.common import constants
 from sysinv.common import exception
 from sysinv.common import utils as cutils
 from sysinv.helm import common as helm_common
 from sysinv.openstack.common import log
 from sysinv.openstack.common import uuidutils
 from sysinv.openstack.common.gettextutils import _
@ -3377,6 +3378,26 @@ class HostController(rest.RestController):
                    "addresses while in SDN mode.")
            raise wsme.exc.ClientSideError(msg)
    @staticmethod
    def _semantic_check_vswitch_type_attributes(ihost):
        """
        Perform semantic checks host label openstack-compute-node if ovs or ovs-dpdk
        vswitch type is enabled since allocating 2M hugepage is needed
        validity of the node configuration prior to unlocking it.
        """
        vswitch_type = utils.get_vswitch_type()
        if vswitch_type == constants.VSWITCH_TYPE_NONE:
            return
        # Check whether compute_label has been assigned
        if utils.is_openstack_compute(ihost) is not True:
            raise wsme.exc.ClientSideError(
                _("Can not unlock worker host %s without "
                  " %s label if config %s. Action: assign "
                  "%s label for this host prior to unlock."
                  % (ihost['hostname'], helm_common.LABEL_COMPUTE_LABEL,
                    vswitch_type, helm_common.LABEL_COMPUTE_LABEL)))
    @staticmethod
    def _semantic_check_data_vrs_attributes(ihost):
        """
@ -3637,16 +3658,27 @@ class HostController(rest.RestController):
                    pecan.request.dbapi.imemory_update(m.uuid, values)
    @staticmethod
-    def _update_vm_4k_pages(ihost):
+    def _update_huge_pages(ihost):
        """
-        Update VM 4K huge pages.
+        Update the host huge pages.
        """
        ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid'])
        labels = pecan.request.dbapi.label_get_by_host(ihost['uuid'])
        vswitch_type = utils.get_vswitch_type()
        for node in ihost_inodes:
            mems = pecan.request.dbapi.imemory_get_by_inode(node['id'])
            for m in mems:
                if m.hugepages_configured:
                    value = {}
                    vs_hugepages_nr = m.vswitch_hugepages_nr
                    # allocate the default vswitch huge pages if required
                    if vswitch_type != constants.VSWITCH_TYPE_NONE and \
                       vs_hugepages_nr == 0:
                        vs_hugepages_nr = constants.VSWITCH_MEMORY_MB \
                                      // m.vswitch_hugepages_size_mib
                        value.update({'vswitch_hugepages_nr': vs_hugepages_nr})
                    vm_hugepages_nr_2M = m.vm_hugepages_nr_2M_pending \
                        if m.vm_hugepages_nr_2M_pending is not None \
                        else m.vm_hugepages_nr_2M
@ -3654,10 +3686,18 @@ class HostController(rest.RestController):
                        if m.vm_hugepages_nr_1G_pending is not None \
                        else m.vm_hugepages_nr_1G
                    # calculate 90% 2M pages if the huge pages have not been
                    # allocated and the compute label is set
                    if cutils.has_openstack_compute(labels) and \
                                    vm_hugepages_nr_2M == 0 and \
                                    vm_hugepages_nr_1G == 0:
                        vm_hugepages_nr_2M = m.vm_hugepages_possible_2M * 0.9
                        value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
                    vm_hugepages_4K = \
                        (m.node_memtotal_mib - m.platform_reserved_mib)
                    vm_hugepages_4K -= \
-                        (m.vswitch_hugepages_nr * m.vswitch_hugepages_size_mib)
+                        (vs_hugepages_nr * m.vswitch_hugepages_size_mib)
                    vm_hugepages_4K -= \
                        (constants.MIB_2M * vm_hugepages_nr_2M)
                    vm_hugepages_4K -=  \
@ -3670,10 +3710,9 @@ class HostController(rest.RestController):
                    if vm_hugepages_4K < min_4K:
                        vm_hugepages_4K = 0
-                    value = {'vm_hugepages_nr_4K': vm_hugepages_4K}
+                    value.update({'vm_hugepages_nr_4K': vm_hugepages_4K})
-                    LOG.info("Set VM 4K pages for host (%s) node (%d) pages "
+                    LOG.info("Updating mem values of host(%s) node(%d): %s" %
-                             "(%d)" % (ihost['hostname'], node['id'],
+                             (ihost['hostname'], node['id'], str(value)))
                                       vm_hugepages_4K))
                    pecan.request.dbapi.imemory_update(m.uuid, value)
    @staticmethod
@ -5204,6 +5243,7 @@ class HostController(rest.RestController):
            self._semantic_check_data_interfaces(ihost,
                                                 kubernetes_config,
                                                 force_unlock)
            self._semantic_check_vswitch_type_attributes(ihost)
        else:
            # sdn configuration check
            self._semantic_check_sdn_attributes(ihost)
@ -5265,8 +5305,8 @@ class HostController(rest.RestController):
        if align_2M_memory or align_1G_memory:
            self._align_pending_memory(ihost, align_2M_memory, align_1G_memory)
-        # calculate the VM 4K huge pages for nova
+        # update ihost huge pages allocation
-        self._update_vm_4k_pages(ihost)
+        self._update_huge_pages(ihost)
        if cutils.is_virtual() or cutils.is_virtual_worker(ihost):
            mib_platform_reserved_no_io = mib_reserved
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/utils.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/utils.py
@ -33,6 +33,7 @@ from oslo_config import cfg
 from sysinv.common import constants
 from sysinv.common import exception
 from sysinv.common.utils import memoized
 from sysinv.helm import common as helm_common
 from sysinv.openstack.common.gettextutils import _
 from sysinv.openstack.common import log
@ -255,6 +256,16 @@ def get_vswitch_type():
    return system.capabilities.get('vswitch_type')
 def is_openstack_compute(ihost):
    for obj in pecan.request.dbapi.label_get_by_host(ihost['uuid']):
        try:
            if helm_common.LABEL_COMPUTE_LABEL == obj.label_key:
                return True
        except AttributeError:
            pass
    return False
 def get_https_enabled():
    system = pecan.request.dbapi.isystem_get_one()
    return system.capabilities.get('https_enabled', False)
--- a/sysinv/sysinv/sysinv/sysinv/common/constants.py
+++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py
@ -208,6 +208,9 @@ MIB_1G = 1024
 Ki = 1024
 NUM_4K_PER_MiB = 256
 # Defines per-socket vswitch memory requirements (in MB)
 VSWITCH_MEMORY_MB = 1024
 # Dynamic IO Resident Set Size(RSS) in MiB per socket
 DISK_IO_RESIDENT_SET_SIZE_MIB = 2000
 DISK_IO_RESIDENT_SET_SIZE_MIB_VBOX = 500
		`@ -0,0 +1,3 @@`
							`# Overrides config file for kubelet`
							`KUBELET_EXTRA_ARGS=--feature-gates=HugePages=<%= @k8s_hugepage %>`