Support Single huge page size for openstack worker node

Kubernetes only supports a single huge page size per worker node. Prior to kubernetes 1.15, the huge page feature could be disabled via a feature gate. In kubernetes 1.15, the feature gate has been removed so huge page support is always on in k8s. This update removes the conditional disabling of the hugepage feature and enforces the provisioning of a single page size per worker. When vswitch type is set to ovs-dpdk or avs, the application huge pages size goes with the vswitch huge pages size. This update also changes the auto-provisioning of VM huge pages to 1G as there is no auto-provisioning in virtual environment. Story: 2006295 Task: 36006 Change-Id: I84d4959b420584fdcdf8a8664a6f4855c08ec989 Signed-off-by: Tao Liu <tao.liu@windriver.com>
2019-08-15 10:24:40 -04:00 · 2019-08-15 10:24:40 -04:00 · 47735c6ab9
commit 47735c6ab9
parent c6a18c4833
5 changed files with 24 additions and 17 deletions
--- a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp
+++ b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp
@ -115,11 +115,10 @@ class platform::kubernetes::kubeadm {
    $k8s_registry = 'k8s.gcr.io'
  }

-  # Configure kubelet hugepage and cpumanager options
+  # Configure kubelet cpumanager options
  if str2bool($::is_worker_subfunction)
    and !('openstack-compute-node'
          in $host_labels) {
-    $k8s_hugepage = true
    $k8s_cpu_manager_opts = join([
      '--cpu-manager-policy=static',
      '--system-reserved-cgroup=/system.slice',
@ -129,12 +128,11 @@ class platform::kubernetes::kubeadm {
        "memory=${k8s_reserved_mem}Mi"])
      ], ' ')
  } else {
-    $k8s_hugepage = false
    $k8s_cpu_manager_opts = '--cpu-manager-policy=none'
  }

  # Enable kubelet extra parameters that are node specific such as
-  # hugepages and cpumanager
+  # cpumanager
  file { '/etc/sysconfig/kubelet':
    ensure  => file,
    content => template('platform/kubelet.conf.erb'),
--- a/puppet-manifests/src/modules/platform/templates/kubeadm.yaml.erb
+++ b/puppet-manifests/src/modules/platform/templates/kubeadm.yaml.erb
@ -41,8 +41,6 @@ apiVersion: kubelet.config.k8s.io/v1beta1
 configMapAndSecretChangeDetectionStrategy: Cache
 nodeStatusUpdateFrequency: "4s"
 failSwapOn: false
-featureGates:
-  HugePages: false
 cgroupRoot: "/k8s-infra"
 ---
 kind: KubeProxyConfiguration
--- a/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb
+++ b/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb
@ -1,2 +1,2 @@
 # Overrides config file for kubelet
-KUBELET_EXTRA_ARGS=--node-ip=<%= @node_ip %> --feature-gates=HugePages=<%= @k8s_hugepage %> <%= @k8s_cpu_manager_opts %>
+KUBELET_EXTRA_ARGS=--node-ip=<%= @node_ip %> <%= @k8s_cpu_manager_opts %>
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
@ -3486,9 +3486,9 @@ class HostController(rest.RestController):
                pending_2M_memory, pending_1G_memory)

    @staticmethod
-    def _check_memory_for_non_openstack(ihost):
+    def _check_memory_for_single_size(ihost):
        """
-        Perform memory semantic checks on a non openstack worker.
+        Perform memory semantic checks on a worker node.
        It restricts the huge page allocation to either a 2M or 1G
        pool.
        """
@ -3606,16 +3606,16 @@ class HostController(rest.RestController):
                                                 constants.MIB_2M)
                        value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})

-                    # calculate 90% 2M pages if the huge pages have not been
+                    # calculate 90% 1G pages if the huge pages have not been
                    # allocated and the compute label is set
                    if cutils.has_openstack_compute(labels) and \
                            vm_hugepages_nr_2M == 0 and \
                            vm_hugepages_nr_1G == 0 and \
                            vm_mem_mib > 0 and \
                            cutils.is_default_huge_pages_required(ihost):
-                        vm_hugepages_nr_2M = int((hp_possible_mib * 0.9 - vs_mem_mib) /
-                                                 constants.MIB_2M)
-                        value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
+                        vm_hugepages_nr_1G = int((hp_possible_mib * 0.9 - vs_mem_mib) /
+                                                 constants.MIB_1G)
+                        value.update({'vm_hugepages_nr_1G': vm_hugepages_nr_1G})

                    vm_hugepages_4K = vm_mem_mib
                    vm_hugepages_4K -= \
@ -5221,10 +5221,8 @@ class HostController(rest.RestController):
        # Check if cpu assignments are valid
        self._semantic_check_worker_cpu_assignments(ihost)

-        # for non-openstack worker node, only allow allocating huge pages
-        # for a single size
-        if not utils.is_openstack_compute(ihost):
-            self._check_memory_for_non_openstack(ihost)
+        # only allow allocating huge pages for a single size
+        self._check_memory_for_single_size(ihost)

        # check if the platform reserved memory is valid
        ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid'])
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py
@ -741,6 +741,19 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
        vs_hp_size_mib = constants.MIB_2M
    vs_hp_reqd_mib = new_vs_pages * vs_hp_size_mib

+    if new_2M_pages != 0 or new_1G_pages != 0:
+        if utils.get_vswitch_type() != constants.VSWITCH_TYPE_NONE:
+            if vs_hp_size_mib == constants.MIB_1G:
+                if new_2M_pages != 0:
+                    raise wsme.exc.ClientSideError(_(
+                        "Only 1G huge page allocation is supported"))
+            elif new_1G_pages != 0:
+                raise wsme.exc.ClientSideError(_(
+                    "Only 2M huge page allocation is supported"))
+        elif new_2M_pages != 0 and new_1G_pages != 0:
+            raise wsme.exc.ClientSideError(_(
+                "Host only supports single huge page size."))
+
    # The size of possible hugepages is the node mem total - platform reserved
    base_mem_mib = rpc_port['platform_reserved_mib']
    if platform_reserved_mib: