Remove kubernetes 1.18, 1.19, 1.20 pkgs
The new minimum supported k8s version will be 1.21. This commit cleans the pkg files needed to build the old k8s versions. The pkgs build successfully. Deployed on AIO-SX and AIO-DX, the k8s services were running ok. Story: 2009859 Task: 44498 Change-Id: Ib39e9d1522a49c5788240781c8edee2bdffbc97a Signed-off-by: Daniel Safta <daniel.safta@windriver.com>
This commit is contained in:
parent
5f474f3c2e
commit
c66d407614
@ -156,15 +156,6 @@ memcached
|
||||
|
||||
# kubernetes
|
||||
kubernetes-unversioned
|
||||
kubernetes-1.18.1-node
|
||||
kubernetes-1.18.1-kubeadm
|
||||
kubernetes-1.18.1-client
|
||||
kubernetes-1.19.13-node
|
||||
kubernetes-1.19.13-kubeadm
|
||||
kubernetes-1.19.13-client
|
||||
kubernetes-1.20.9-node
|
||||
kubernetes-1.20.9-kubeadm
|
||||
kubernetes-1.20.9-client
|
||||
kubernetes-1.21.8-node
|
||||
kubernetes-1.21.8-kubeadm
|
||||
kubernetes-1.21.8-client
|
||||
|
@ -55,9 +55,6 @@ docker/python-docker
|
||||
kubernetes/containerd
|
||||
kubernetes/cni/plugins
|
||||
kubernetes/cni/bond-cni
|
||||
kubernetes/kubernetes-1.18.1
|
||||
kubernetes/kubernetes-1.19.13
|
||||
kubernetes/kubernetes-1.20.9
|
||||
kubernetes/kubernetes-1.21.8
|
||||
kubernetes/kubernetes-1.22.5
|
||||
kubernetes/kubernetes-unversioned
|
||||
|
@ -40,9 +40,6 @@ kexec-tools-2.0.21.tar.xz#kexec-tools-2.0.21#https://www.kernel.org/pub/linux/ut
|
||||
!kdump-anaconda-addon-003-29-g4c517c5.tar.gz#kexec-tools#https://vault.centos.org/7.6.1810/os/Source/SPackages/kexec-tools-2.0.15-21.el7.src.rpm#https##
|
||||
keycodemapdb-16e5b07.tar.gz#keycodemapdb#https://github.com/CendioOssman/keycodemapdb/tarball/16e5b0787687d8904dad2c026107409eb9bfcb95#http##
|
||||
kubernetes-contrib-v1.18.1.tar.gz#kubernetes-contrib-1.18.1#https://github.com/kubernetes-retired/contrib/tarball/89f6948e24578fed2a90a87871b2263729f90ac3#http##
|
||||
kubernetes-v1.18.1.tar.gz#kubernetes-1.18.1#https://github.com/kubernetes/kubernetes/archive/7879fc12a63337efff607952a323df90cdc7a335.tar.gz#http##
|
||||
kubernetes-v1.19.13.tar.gz#kubernetes-1.19.13#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.19.13.tar.gz#http##
|
||||
kubernetes-v1.20.9.tar.gz#kubernetes-1.20.9#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.20.9.tar.gz#http##
|
||||
kubernetes-v1.21.8.tar.gz#kubernetes-1.21.8#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.21.8.tar.gz#http##
|
||||
kubernetes-v1.22.5.tar.gz#kubernetes-1.22.5#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.22.5.tar.gz#http##
|
||||
kvm-unit-tests.git-4ea7633.tar.bz2#kvm-unit-tests#https://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git/snapshot/kvm-unit-tests-20171020.tar.gz#http##
|
||||
|
@ -1,9 +0,0 @@
|
||||
The spec file used here was from the kubernetes 1.10.0 src rpm.
|
||||
The orig file is included to help show modifications made to that
|
||||
spec file, to help understand which changes were needed and to
|
||||
assist with future upversioning.
|
||||
|
||||
The contrib tarball does not have the same versioning as kubernetes and
|
||||
there is little activity in that repo.
|
||||
|
||||
The version for the contrib tarball is arbitrary.
|
@ -1,5 +0,0 @@
|
||||
VERSION=1.18.1
|
||||
TAR_NAME=kubernetes
|
||||
TAR="$TAR_NAME-v$VERSION.tar.gz"
|
||||
COPY_LIST="${CGCS_BASE}/downloads/$TAR $FILES_BASE/*"
|
||||
TIS_PATCH_VER=PKG_GITREVCOUNT
|
@ -1,44 +0,0 @@
|
||||
From ee648637dde0394a9e487a47a2c6f33f2e238046 Mon Sep 17 00:00:00 2001
|
||||
From: Robert Church <robert.church@windriver.com>
|
||||
Date: Mon, 6 Apr 2020 20:59:53 -0400
|
||||
Subject: [PATCH] Fix pagesize check to allow for options already ending in 'i'
|
||||
|
||||
Commit https://github.com/kubernetes/kubernetes/commit/03ecc20 adds a
|
||||
pagesize mount option quantity check that appends an 'i' to the pagesize
|
||||
value.
|
||||
|
||||
Based on the current StarlingX configuration the hugepages are mounted
|
||||
with the following option that already contains an 'i' as a suffix:
|
||||
pagesize=1Gi.
|
||||
|
||||
This temporary patch updates the logic to avoid appending an additional
|
||||
'i' at the end of the size string. This extra 'i' is not handled by
|
||||
ParseQuantity() and results is a pod stuck Terminating as the mount
|
||||
is not removed from the container.
|
||||
|
||||
Signed-off-by: Robert Church <robert.church@windriver.com>
|
||||
---
|
||||
pkg/volume/emptydir/empty_dir_linux.go | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/pkg/volume/emptydir/empty_dir_linux.go b/pkg/volume/emptydir/empty_dir_linux.go
|
||||
index 63a25dc4ed0..7343c5e510a 100644
|
||||
--- a/pkg/volume/emptydir/empty_dir_linux.go
|
||||
+++ b/pkg/volume/emptydir/empty_dir_linux.go
|
||||
@@ -69,7 +69,12 @@ func getPageSize(path string, mounter mount.Interface) (*resource.Quantity, erro
|
||||
// NOTE: Adding suffix 'i' as result should be comparable with a medium size.
|
||||
// pagesize mount option is specified without a suffix,
|
||||
// e.g. pagesize=2M or pagesize=1024M for x86 CPUs
|
||||
- pageSize, err := resource.ParseQuantity(strings.TrimPrefix(opt, prefix) + "i")
|
||||
+ opt_val := strings.TrimPrefix(opt, prefix)
|
||||
+ val := opt_val
|
||||
+ if !strings.HasSuffix(opt_val, "i") {
|
||||
+ val = opt_val + "i"
|
||||
+ }
|
||||
+ pageSize, err := resource.ParseQuantity(val)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting page size from '%s' mount option: %v", opt, err)
|
||||
}
|
||||
--
|
||||
2.16.6
|
||||
|
@ -1,138 +0,0 @@
|
||||
From c72ad02d7be3edaf17a07bb6b2c40249ba00038e Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Tue, 21 Apr 2020 16:06:35 -0600
|
||||
Subject: [PATCH] Fix exclusive CPU allocations being deleted at container
|
||||
restart
|
||||
|
||||
The expectation is that exclusive CPU allocations happen at pod
|
||||
creation time. When a container restarts, it should not have its
|
||||
exclusive CPU allocations removed, and it should not need to
|
||||
re-allocate CPUs.
|
||||
|
||||
There are a few places in the current code that look for containers
|
||||
that have exited and call CpuManager.RemoveContainer() to clean up
|
||||
the container. This will end up deleting any exclusive CPU
|
||||
allocations for that container, and if the container restarts within
|
||||
the same pod it will end up using the default cpuset rather than
|
||||
what should be exclusive CPUs.
|
||||
|
||||
Removing those calls and adding resource cleanup at allocation
|
||||
time should get rid of the problem.
|
||||
|
||||
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 19 +++++++++----------
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 12 ++++++++++++
|
||||
pkg/kubelet/cm/internal_container_lifecycle.go | 9 ---------
|
||||
3 files changed, 21 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 08d45c77182..c682f813a8a 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -242,6 +242,9 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe
|
||||
}
|
||||
|
||||
func (m *manager) Allocate(p *v1.Pod, c *v1.Container) error {
|
||||
+ // Garbage collect any stranded resources before allocating CPUs.
|
||||
+ m.removeStaleState()
|
||||
+
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
@@ -422,18 +425,14 @@ func (m *manager) reconcileState() (success []reconciledContainer, failure []rec
|
||||
}
|
||||
|
||||
if cstatus.State.Terminated != nil {
|
||||
- // Since the container is terminated, we know it is safe to
|
||||
- // remove it without any reconciliation. Removing the container
|
||||
- // will also remove it from the `containerMap` so that this
|
||||
- // container will be skipped next time around the loop.
|
||||
+ // The container is terminated but we can't call m.RemoveContainer()
|
||||
+ // here because it could remove the allocated cpuset for the container
|
||||
+ // which may be in the process of being restarted. That would result
|
||||
+ // in the container losing any exclusively-allocated CPUs that it
|
||||
+ // was allocated.
|
||||
_, _, err := m.containerMap.GetContainerRef(containerID)
|
||||
if err == nil {
|
||||
- klog.Warningf("[cpumanager] reconcileState: skipping container; already terminated (pod: %s, container id: %s)", pod.Name, containerID)
|
||||
- err := m.RemoveContainer(containerID)
|
||||
- if err != nil {
|
||||
- klog.Errorf("[cpumanager] reconcileState: failed to remove container (pod: %s, container id: %s, error: %v)", pod.Name, containerID, err)
|
||||
- failure = append(failure, reconciledContainer{pod.Name, container.Name, containerID})
|
||||
- }
|
||||
+ klog.Warningf("[cpumanager] reconcileState: ignoring terminated (pod: %s, container id: %s)", pod.Name, containerID)
|
||||
}
|
||||
continue
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
index e806c62e80e..e3e0097cafb 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
@@ -41,6 +41,12 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
)
|
||||
|
||||
+type mockSourcesReady struct{}
|
||||
+
|
||||
+func (s *mockSourcesReady) AddSource(source string) {}
|
||||
+
|
||||
+func (s *mockSourcesReady) AllReady() bool { return false }
|
||||
+
|
||||
type mockState struct {
|
||||
assignments state.ContainerCPUAssignments
|
||||
defaultCPUSet cpuset.CPUSet
|
||||
@@ -277,6 +283,8 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
podStatusProvider: mockPodStatusProvider{},
|
||||
}
|
||||
|
||||
+ mgr.sourcesReady = &mockSourcesReady{}
|
||||
+
|
||||
pod := makePod("fakePod", "fakeContainer", "2", "2")
|
||||
container := &pod.Spec.Containers[0]
|
||||
err := mgr.Allocate(pod, container)
|
||||
@@ -497,6 +505,8 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
podStatusProvider: mockPodStatusProvider{},
|
||||
}
|
||||
|
||||
+ mgr.sourcesReady = &mockSourcesReady{}
|
||||
+
|
||||
containers := append(
|
||||
testCase.pod.Spec.InitContainers,
|
||||
testCase.pod.Spec.Containers...)
|
||||
@@ -1038,6 +1048,8 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
podStatusProvider: mockPodStatusProvider{},
|
||||
}
|
||||
|
||||
+ mgr.sourcesReady = &mockSourcesReady{}
|
||||
+
|
||||
pod := makePod("fakePod", "fakeContainer", "2", "2")
|
||||
container := &pod.Spec.Containers[0]
|
||||
err := mgr.Allocate(pod, container)
|
||||
diff --git a/pkg/kubelet/cm/internal_container_lifecycle.go b/pkg/kubelet/cm/internal_container_lifecycle.go
|
||||
index 9e243430269..690718e4e68 100644
|
||||
--- a/pkg/kubelet/cm/internal_container_lifecycle.go
|
||||
+++ b/pkg/kubelet/cm/internal_container_lifecycle.go
|
||||
@@ -54,19 +54,10 @@ func (i *internalContainerLifecycleImpl) PreStartContainer(pod *v1.Pod, containe
|
||||
}
|
||||
|
||||
func (i *internalContainerLifecycleImpl) PreStopContainer(containerID string) error {
|
||||
- if i.cpuManager != nil {
|
||||
- return i.cpuManager.RemoveContainer(containerID)
|
||||
- }
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *internalContainerLifecycleImpl) PostStopContainer(containerID string) error {
|
||||
- if i.cpuManager != nil {
|
||||
- err := i.cpuManager.RemoveContainer(containerID)
|
||||
- if err != nil {
|
||||
- return err
|
||||
- }
|
||||
- }
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.TopologyManager) {
|
||||
err := i.topologyManager.RemoveContainer(containerID)
|
||||
if err != nil {
|
||||
--
|
||||
2.16.6
|
||||
|
@ -1,79 +0,0 @@
|
||||
From 8b765213a4e6d5cd4eecf361dadfec2851f1dd59 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Fri, 23 Oct 2020 17:46:10 -0600
|
||||
Subject: [PATCH] enable support for kubernetes to ignore isolcpus
|
||||
|
||||
The normal mechanisms for allocating isolated CPUs do not allow
|
||||
a mix of isolated and exclusive CPUs in the same container. In
|
||||
order to allow this in *very* limited cases where the pod spec
|
||||
is known in advance we will add the ability to disable the normal
|
||||
isolcpus behaviour.
|
||||
|
||||
If the file "/etc/kubernetes/ignore_isolcpus" exists, then kubelet
|
||||
will basically forget everything it knows about isolcpus and just
|
||||
treat them like regular CPUs.
|
||||
|
||||
The admin user can then rely on the fact that CPU allocation is
|
||||
deterministic to ensure that the isolcpus they configure end up being
|
||||
allocated to the correct pods.
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 9 +++++++++
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 8 ++++++++
|
||||
2 files changed, 17 insertions(+)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index c682f813..92992991 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -19,6 +19,7 @@ package cpumanager
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
+ "os"
|
||||
"sync"
|
||||
"time"
|
||||
"strings"
|
||||
@@ -56,6 +57,14 @@ const cpuManagerStateFileName = "cpu_manager_state"
|
||||
|
||||
// get the system-level isolated CPUs
|
||||
func getIsolcpus() cpuset.CPUSet {
|
||||
+
|
||||
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||
+ klog.Infof("[cpumanager] turning off isolcpus awareness")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||
if err != nil {
|
||||
klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index 1913065e..4fb3202f 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -18,6 +18,7 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
+ "os"
|
||||
"strconv"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@@ -510,6 +511,13 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||
|
||||
// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||
func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||
+
|
||||
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
// NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||
// not create UID. We also need a way to properly stub devicemanager.
|
||||
if len(string(pod.UID)) == 0 {
|
||||
--
|
||||
2.16.6
|
||||
|
@ -1,12 +0,0 @@
|
||||
diff --git a/vendor/golang.org/x/net/http2/transport.go b/vendor/golang.org/x/net/http2/transport.go
|
||||
index aeac7d8..ec18648 100644
|
||||
--- a/vendor/golang.org/x/net/http2/transport.go
|
||||
+++ b/vendor/golang.org/x/net/http2/transport.go
|
||||
@@ -2404,6 +2404,7 @@ func strSliceContains(ss []string, s string) bool {
|
||||
|
||||
type erringRoundTripper struct{ err error }
|
||||
|
||||
+func (rt erringRoundTripper) IsHTTP2ErringRoundtripper() {}
|
||||
func (rt erringRoundTripper) RoundTrip(*http.Request) (*http.Response, error) { return nil, rt.err }
|
||||
|
||||
// gzipReader wraps a response body so it can lazily
|
@ -1,84 +0,0 @@
|
||||
MDSFORMANPAGES="kube-apiserver.md kube-controller-manager.md kube-proxy.md kube-scheduler.md kubelet.md"
|
||||
|
||||
# remove comments from man pages
|
||||
for manpage in ${MDSFORMANPAGES}; do
|
||||
pos=$(grep -n "<\!-- END MUNGE: UNVERSIONED_WARNING -->" ${manpage} | cut -d':' -f1)
|
||||
if [ -n ${pos} ]; then
|
||||
sed -i "1,${pos}{/.*/d}" ${manpage}
|
||||
fi
|
||||
done
|
||||
|
||||
# for each man page add NAME and SYNOPSIS section
|
||||
# kube-apiserver
|
||||
sed -i -s "s/## kube-apiserver/# NAME\nkube-apiserver \- Provides the API for kubernetes orchestration.\n\n# SYNOPSIS\n**kube-apiserver** [OPTIONS]\n/" kube-apiserver.md
|
||||
|
||||
cat << 'EOF' >> kube-apiserver.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-apiserver --logtostderr=true --v=0 --etcd_servers=http://127.0.0.1:4001 --insecure_bind_address=127.0.0.1 --insecure_port=8080 --kubelet_port=10250 --service-cluster-ip-range=10.1.1.0/24 --allow_privileged=false
|
||||
```
|
||||
EOF
|
||||
# kube-controller-manager
|
||||
sed -i -s "s/## kube-controller-manager/# NAME\nkube-controller-manager \- Enforces kubernetes services.\n\n# SYNOPSIS\n**kube-controller-manager** [OPTIONS]\n/" kube-controller-manager.md
|
||||
|
||||
cat << 'EOF' >> kube-controller-manager.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-controller-manager --logtostderr=true --v=0 --master=127.0.0.1:8080
|
||||
```
|
||||
EOF
|
||||
# kube-proxy
|
||||
sed -i -s "s/## kube-proxy/# NAME\nkube-proxy \- Provides network proxy services.\n\n# SYNOPSIS\n**kube-proxy** [OPTIONS]\n/" kube-proxy.md
|
||||
|
||||
cat << 'EOF' >> kube-proxy.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-proxy --logtostderr=true --v=0 --master=http://127.0.0.1:8080
|
||||
```
|
||||
EOF
|
||||
# kube-scheduler
|
||||
sed -i -s "s/## kube-scheduler/# NAME\nkube-scheduler \- Schedules containers on hosts.\n\n# SYNOPSIS\n**kube-scheduler** [OPTIONS]\n/" kube-scheduler.md
|
||||
|
||||
cat << 'EOF' >> kube-scheduler.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-scheduler --logtostderr=true --v=0 --master=127.0.0.1:8080
|
||||
```
|
||||
EOF
|
||||
# kubelet
|
||||
sed -i -s "s/## kubelet/# NAME\nkubelet \- Processes a container manifest so the containers are launched according to how they are described.\n\n# SYNOPSIS\n**kubelet** [OPTIONS]\n/" kubelet.md
|
||||
|
||||
cat << 'EOF' >> kubelet.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kubelet --logtostderr=true --v=0 --api_servers=http://127.0.0.1:8080 --address=127.0.0.1 --port=10250 --hostname_override=127.0.0.1 --allow-privileged=false
|
||||
```
|
||||
EOF
|
||||
|
||||
# for all man-pages
|
||||
for md in $MDSFORMANPAGES; do
|
||||
# correct section names
|
||||
sed -i -s "s/### Synopsis/# DESCRIPTION/" $md
|
||||
sed -i -s "s/### Options/# OPTIONS/" $md
|
||||
# add header
|
||||
sed -i "s/# NAME/% KUBERNETES(1) kubernetes User Manuals\n# NAME/" $md
|
||||
# modify list of options
|
||||
# options with no value in ""
|
||||
sed -i -r 's/(^ )(-[^":][^":]*)(:)(.*)/\*\*\2\*\*\n\t\4\n/' $md
|
||||
# option with value in ""
|
||||
sed -i -r 's/(^ )(-[^":][^":]*)("[^"]*")(:)(.*)/\*\*\2\3\*\*\n\t\5\n/' $md
|
||||
# options in -s, --long
|
||||
sed -i -r 's/(^ )(-[a-z], -[^":][^":]*)(:)(.*)/\*\*\2\*\*\n\t\4\n/' $md
|
||||
sed -i -r 's/(^ )(-[a-z], -[^":][^":]*)("[^"]*")(:)(.*)/\*\*\2\3\*\*\n\t\5\n/' $md
|
||||
# remove ```
|
||||
sed -i 's/```//' $md
|
||||
# remove all lines starting with ######
|
||||
sed -i 's/^######.*//' $md
|
||||
# modify footer
|
||||
sed -i -r "s/^\[!\[Analytics\].*//" $md
|
||||
# md does not contain section => taking 1
|
||||
name="${md%.md}"
|
||||
go-md2man -in $md -out man/man1/$name.1
|
||||
done
|
||||
|
||||
|
@ -1,106 +0,0 @@
|
||||
From 9ff79a463fd4502dd1800198bc0b367e5861baf3 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Fri, 28 Aug 2020 21:17:42 -0600
|
||||
Subject: [PATCH] kubeadm: create platform pods with zero CPU resources
|
||||
|
||||
We want to specify zero CPU resources when creating the manifests
|
||||
for the static platform pods, as a workaround for the lack of
|
||||
separate resource tracking for platform resources.
|
||||
|
||||
We also specify zero CPU resources for the coredns deployment.
|
||||
manifests.go appears to be the main file for this, not sure if the
|
||||
others are used by I changed them just in case.
|
||||
---
|
||||
cluster/addons/dns/coredns/coredns.yaml.base | 2 +-
|
||||
cluster/addons/dns/coredns/coredns.yaml.in | 2 +-
|
||||
cluster/addons/dns/coredns/coredns.yaml.sed | 2 +-
|
||||
cmd/kubeadm/app/phases/addons/dns/manifests.go | 2 +-
|
||||
cmd/kubeadm/app/phases/controlplane/manifests.go | 6 +++---
|
||||
5 files changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.base b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
index a8f0afb5085..45054a29420 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
@@ -124,7 +124,7 @@ spec:
|
||||
limits:
|
||||
memory: __PILLAR__DNS__MEMORY__LIMIT__
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.in b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
index ad65d946095..9b2b183faec 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
@@ -124,7 +124,7 @@ spec:
|
||||
limits:
|
||||
memory: {{ pillar['dns_memory_limit'] }}
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.sed b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
index 3c86e5749a5..6b0c3388bcd 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
@@ -124,7 +124,7 @@ spec:
|
||||
limits:
|
||||
memory: $DNS_MEMORY_LIMIT
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cmd/kubeadm/app/phases/addons/dns/manifests.go b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
index 737d9d97cbe..7a3b2d61f37 100644
|
||||
--- a/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
+++ b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
@@ -250,7 +250,7 @@ spec:
|
||||
limits:
|
||||
memory: 170Mi
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cmd/kubeadm/app/phases/controlplane/manifests.go b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
index ae9cd77e259..9222805ecfb 100644
|
||||
--- a/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
+++ b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
@@ -57,7 +57,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
Command: getAPIServerCommand(cfg, endpoint),
|
||||
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeAPIServer)),
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/healthz", int(endpoint.BindPort), v1.URISchemeHTTPS),
|
||||
- Resources: staticpodutil.ComponentResources("250m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeAPIServer),
|
||||
map[string]string{kubeadmconstants.KubeAPIServerAdvertiseAddressEndpointAnnotationKey: endpoint.String()}),
|
||||
@@ -68,7 +68,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
Command: getControllerManagerCommand(cfg),
|
||||
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeControllerManager)),
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS),
|
||||
- Resources: staticpodutil.ComponentResources("200m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeControllerManager), nil),
|
||||
kubeadmconstants.KubeScheduler: staticpodutil.ComponentPod(v1.Container{
|
||||
@@ -78,7 +78,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
Command: getSchedulerCommand(cfg),
|
||||
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeScheduler)),
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS),
|
||||
- Resources: staticpodutil.ComponentResources("100m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeScheduler), nil),
|
||||
}
|
||||
--
|
||||
2.24.2
|
||||
|
@ -1,18 +0,0 @@
|
||||
# Note: This dropin only works with kubeadm and kubelet v1.11+
|
||||
[Service]
|
||||
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
|
||||
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
|
||||
# This is a file that "kubeadm init" and "kubeadm join" generates at runtime, populating the KUBELET_KUBEADM_ARGS variable dynamically
|
||||
EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
|
||||
# This is a file that the user can use for overrides of the kubelet args as a last resort. Preferably, the user should use
|
||||
# the .NodeRegistration.KubeletExtraArgs object in the configuration files instead. KUBELET_EXTRA_ARGS should be sourced from this file.
|
||||
EnvironmentFile=-/etc/sysconfig/kubelet
|
||||
ExecStart=
|
||||
ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS
|
||||
ExecStartPre=-/usr/local/sbin/sanitize_kubelet_reserved_cpus.sh /etc/sysconfig/kubelet
|
||||
ExecStartPre=-/usr/bin/kubelet-cgroup-setup.sh
|
||||
ExecStartPost=/bin/bash -c 'echo $MAINPID > /var/run/kubelet.pid;'
|
||||
ExecStopPost=/bin/rm -f /var/run/kubelet.pid
|
||||
Restart=always
|
||||
StartLimitInterval=0
|
||||
RestartSec=10
|
@ -1,132 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This script does minimal cgroup setup for kubelet. This creates k8s-infra
|
||||
# cgroup for a minimal set of resource controllers, and configures cpuset
|
||||
# attributes to span all online cpus and nodes. This will do nothing if
|
||||
# the k8s-infra cgroup already exists (i.e., assume already configured).
|
||||
# NOTE: The creation of directories under /sys/fs/cgroup is volatile, and
|
||||
# does not persist reboots. The cpuset.mems and cpuset.cpus is later updated
|
||||
# by puppet kubernetes.pp manifest.
|
||||
#
|
||||
|
||||
# Define minimal path
|
||||
PATH=/bin:/usr/bin:/usr/local/bin
|
||||
|
||||
# Log info message to /var/log/daemon.log
|
||||
function LOG {
|
||||
logger -p daemon.info "$0($$): $@"
|
||||
}
|
||||
|
||||
# Log error message to /var/log/daemon.log
|
||||
function ERROR {
|
||||
logger -s -p daemon.error "$0($$): ERROR: $@"
|
||||
}
|
||||
|
||||
# Create minimal cgroup directories and configure cpuset attributes if required
|
||||
function create_cgroup {
|
||||
local cg_name=$1
|
||||
local cg_nodeset=$2
|
||||
local cg_cpuset=$3
|
||||
|
||||
local CGROUP=/sys/fs/cgroup
|
||||
local CONTROLLERS_AUTO_DELETED=("pids" "hugetlb")
|
||||
local CONTROLLERS_PRESERVED=("cpuset" "memory" "cpu,cpuacct" "systemd")
|
||||
local cnt=''
|
||||
local CGDIR=''
|
||||
local RC=0
|
||||
|
||||
# Ensure that these cgroups are created every time as they are auto deleted
|
||||
for cnt in ${CONTROLLERS_AUTO_DELETED[@]}; do
|
||||
CGDIR=${CGROUP}/${cnt}/${cg_name}
|
||||
if [ -d ${CGDIR} ]; then
|
||||
LOG "Nothing to do, already configured: ${CGDIR}."
|
||||
continue
|
||||
fi
|
||||
LOG "Creating: ${CGDIR}"
|
||||
mkdir -p ${CGDIR}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Creating: ${CGDIR}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
done
|
||||
|
||||
# These cgroups are preserved so if any of these are encountered additional
|
||||
# cgroup setup is not required
|
||||
for cnt in ${CONTROLLERS_PRESERVED[@]}; do
|
||||
CGDIR=${CGROUP}/${cnt}/${cg_name}
|
||||
if [ -d ${CGDIR} ]; then
|
||||
LOG "Nothing to do, already configured: ${CGDIR}."
|
||||
exit ${RC}
|
||||
fi
|
||||
LOG "Creating: ${CGDIR}"
|
||||
mkdir -p ${CGDIR}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Creating: ${CGDIR}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
done
|
||||
|
||||
# Customize cpuset attributes
|
||||
LOG "Configuring cgroup: ${cg_name}, nodeset: ${cg_nodeset}, cpuset: ${cg_cpuset}"
|
||||
CGDIR=${CGROUP}/cpuset/${cg_name}
|
||||
local CGMEMS=${CGDIR}/cpuset.mems
|
||||
local CGCPUS=${CGDIR}/cpuset.cpus
|
||||
local CGTASKS=${CGDIR}/tasks
|
||||
|
||||
# Assign cgroup memory nodeset
|
||||
LOG "Assign nodeset ${cg_nodeset} to ${CGMEMS}"
|
||||
/bin/echo ${cg_nodeset} > ${CGMEMS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Unable to write to: ${CGMEMS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Assign cgroup cpus
|
||||
LOG "Assign cpuset ${cg_cpuset} to ${CGCPUS}"
|
||||
/bin/echo ${cg_cpuset} > ${CGCPUS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Assigning: ${cg_cpuset} to ${CGCPUS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Set file ownership
|
||||
chown root:root ${CGMEMS} ${CGCPUS} ${CGTASKS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Setting owner for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Set file mode permissions
|
||||
chmod 644 ${CGMEMS} ${CGCPUS} ${CGTASKS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Setting mode for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
return ${RC}
|
||||
}
|
||||
|
||||
if [ $UID -ne 0 ]; then
|
||||
ERROR "Require sudo/root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configure default kubepods cpuset to span all online cpus and nodes.
|
||||
ONLINE_NODESET=$(/bin/cat /sys/devices/system/node/online)
|
||||
ONLINE_CPUSET=$(/bin/cat /sys/devices/system/cpu/online)
|
||||
|
||||
# Configure kubelet cgroup to match cgroupRoot.
|
||||
create_cgroup 'k8s-infra' ${ONLINE_NODESET} ${ONLINE_CPUSET}
|
||||
|
||||
exit $?
|
||||
|
@ -1,111 +0,0 @@
|
||||
From 696c016ebaae6c4cfa24fb5a492d20ebde41d7f8 Mon Sep 17 00:00:00 2001
|
||||
From: Jim Gauld <james.gauld@windriver.com>
|
||||
Date: Thu, 5 Sep 2019 10:46:58 -0400
|
||||
Subject: [PATCH 1/6] kubelet cpumanager disable CFS quota throttling for
|
||||
Guaranteed pods
|
||||
|
||||
This disables CFS CPU quota to avoid performance degradation due to
|
||||
Linux kernel CFS quota implementation. Note that 4.18 kernel attempts
|
||||
to solve the CFS throttling problem, but there are reports that it is
|
||||
not completely effective.
|
||||
|
||||
This disables CFS quota throttling for Guaranteed pods for both
|
||||
parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us.
|
||||
Disabling has a dramatic latency improvement for HTTP response times.
|
||||
|
||||
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 22 ++++++++++++++++++++++
|
||||
pkg/kubelet/cm/helpers_linux.go | 5 +++++
|
||||
pkg/kubelet/cm/helpers_linux_test.go | 8 ++++----
|
||||
3 files changed, 31 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 616a620f8ce..c0c440453a9 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -36,6 +36,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
+ v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
)
|
||||
|
||||
// ActivePodsFunc is a function that returns a list of pods to reconcile.
|
||||
@@ -230,6 +231,14 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e
|
||||
// Get the CPUs assigned to the container during Allocate()
|
||||
// (or fall back to the default CPUSet if none were assigned).
|
||||
cpus := m.state.GetCPUSetOrDefault(string(p.UID), c.Name)
|
||||
+
|
||||
+ // Guaranteed PODs should not have CFS quota throttle
|
||||
+ if m.policy.Name() == string(PolicyStatic) && v1qos.GetPodQOS(p) == v1.PodQOSGuaranteed {
|
||||
+ err := m.disableContainerCPUQuota(containerID)
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] AddContainer disable CPU Quota error: %v", err)
|
||||
+ }
|
||||
+ }
|
||||
m.Unlock()
|
||||
|
||||
if !cpus.IsEmpty() {
|
||||
@@ -462,3 +471,16 @@ func (m *manager) updateContainerCPUSet(containerID string, cpus cpuset.CPUSet)
|
||||
CpusetCpus: cpus.String(),
|
||||
})
|
||||
}
|
||||
+
|
||||
+func (m *manager) disableContainerCPUQuota(containerID string) error {
|
||||
+ // Disable CFS CPU quota to avoid performance degradation due to
|
||||
+ // Linux kernel CFS throttle implementation.
|
||||
+ // NOTE: 4.18 kernel attempts to solve CFS throttling problem,
|
||||
+ // but there are reports that it is not completely effective.
|
||||
+ return m.containerRuntime.UpdateContainerResources(
|
||||
+ containerID,
|
||||
+ &runtimeapi.LinuxContainerResources{
|
||||
+ CpuPeriod: 100000,
|
||||
+ CpuQuota: -1,
|
||||
+ })
|
||||
+}
|
||||
diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go
|
||||
index f6a1d519026..8aa6f87ad49 100644
|
||||
--- a/pkg/kubelet/cm/helpers_linux.go
|
||||
+++ b/pkg/kubelet/cm/helpers_linux.go
|
||||
@@ -157,6 +157,11 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64)
|
||||
// determine the qos class
|
||||
qosClass := v1qos.GetPodQOS(pod)
|
||||
|
||||
+ // disable cfs quota for guaranteed pods
|
||||
+ if qosClass == v1.PodQOSGuaranteed {
|
||||
+ cpuQuota = int64(-1)
|
||||
+ }
|
||||
+
|
||||
// build the result
|
||||
result := &ResourceConfig{}
|
||||
if qosClass == v1.PodQOSGuaranteed {
|
||||
diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go
|
||||
index 56d765fbc22..0c43afe5875 100644
|
||||
--- a/pkg/kubelet/cm/helpers_linux_test.go
|
||||
+++ b/pkg/kubelet/cm/helpers_linux_test.go
|
||||
@@ -63,8 +63,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
burstablePartialShares := MilliCPUToShares(200)
|
||||
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||
guaranteedShares := MilliCPUToShares(100)
|
||||
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||
+ guaranteedQuota := int64(-1)
|
||||
+ guaranteedTunedQuota := int64(-1)
|
||||
memoryQuantity = resource.MustParse("100Mi")
|
||||
cpuNoLimit := int64(-1)
|
||||
guaranteedMemory := memoryQuantity.Value()
|
||||
@@ -283,8 +283,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
burstablePartialShares := MilliCPUToShares(200)
|
||||
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||
guaranteedShares := MilliCPUToShares(100)
|
||||
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||
+ guaranteedQuota := int64(-1)
|
||||
+ guaranteedTunedQuota := int64(-1)
|
||||
memoryQuantity = resource.MustParse("100Mi")
|
||||
cpuNoLimit := int64(-1)
|
||||
guaranteedMemory := memoryQuantity.Value()
|
||||
--
|
||||
2.16.6
|
||||
|
@ -1,139 +0,0 @@
|
||||
From d0e89da9ebcbd9a13051ab5366b6daef2cec9bbe Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Fri, 27 Sep 2019 14:11:54 -0600
|
||||
Subject: [PATCH 4/6] kubelet cpumanager infrastructure pods use system
|
||||
reserved CPUs
|
||||
|
||||
This assigns system infrastructure pods to the "reserved" cpuset
|
||||
to isolate them from the shared pool of CPUs.
|
||||
|
||||
Infrastructure pods include any pods that belong to the kube-system,
|
||||
armada, cert-manager, vault, platform-deployment-manager, portieris,
|
||||
notification or flux-helm namespaces.
|
||||
|
||||
The implementation is a bit simplistic, it is assumed that the
|
||||
"reserved" cpuset is large enough to handle all infrastructure pods
|
||||
CPU allocations.
|
||||
|
||||
This also prevents infrastucture pods from using Guaranteed resources.
|
||||
|
||||
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 45 +++++++++++++++++++++++++
|
||||
pkg/kubelet/cm/cpumanager/policy_static_test.go | 19 ++++++++++-
|
||||
2 files changed, 63 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index e631d5d6a74..e511caf7ab7 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -32,6 +32,11 @@ import (
|
||||
// PolicyStatic is the name of the static policy
|
||||
const PolicyStatic policyName = "static"
|
||||
|
||||
+// Define namespaces used by platform infrastructure pods
|
||||
+var infraNamespaces = [...]string{
|
||||
+ "kube-system", "armada", "cert-manager", "platform-deployment-manager", "portieris", "vault", "notification", "flux-helm",
|
||||
+}
|
||||
+
|
||||
// staticPolicy is a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
@@ -205,6 +210,32 @@ func (p *staticPolicy) assignableCPUs(s state.State) cpuset.CPUSet {
|
||||
}
|
||||
|
||||
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
||||
+ // Process infra pods before guaranteed pods
|
||||
+ if isKubeInfra(pod) {
|
||||
+ // Container belongs in reserved pool.
|
||||
+ // We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
|
||||
+ if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||
+ klog.Infof("[cpumanager] static policy: reserved container already present in state, skipping " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
+ cpuset := p.reserved
|
||||
+ if cpuset.IsEmpty() {
|
||||
+ // If this happens then someone messed up.
|
||||
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
|
||||
+ }
|
||||
+ s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
+ klog.Infof("[cpumanager] static policy: reserved: AddContainer " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
+
|
||||
if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 {
|
||||
klog.Infof("[cpumanager] static policy: Allocate (pod: %s, container: %s)", pod.Name, container.Name)
|
||||
// container belongs in an exclusively allocated pool
|
||||
@@ -300,6 +331,10 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
|
||||
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
|
||||
return 0
|
||||
}
|
||||
+ // Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
|
||||
+ if isKubeInfra(pod) {
|
||||
+ return 0
|
||||
+ }
|
||||
// Safe downcast to do for all systems with < 2.1 billion CPUs.
|
||||
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
|
||||
// https://golang.org/ref/spec#Numeric_types
|
||||
@@ -417,3 +452,13 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, req
|
||||
|
||||
return hints
|
||||
}
|
||||
+
|
||||
+// check if a given pod is in a platform infrastructure namespace
|
||||
+func isKubeInfra(pod *v1.Pod) bool {
|
||||
+ for _, namespace := range infraNamespaces {
|
||||
+ if namespace == pod.Namespace {
|
||||
+ return true
|
||||
+ }
|
||||
+ }
|
||||
+ return false
|
||||
+}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index b2982432c13..04947d28055 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -747,7 +747,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
-
|
||||
+ infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
|
||||
+ infraPod.Namespace = "kube-system"
|
||||
testCases := []staticPolicyTestWithResvList{
|
||||
{
|
||||
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||
@@ -789,6 +790,22 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.NewCPUSet(4, 5),
|
||||
},
|
||||
+ {
|
||||
+ description: "InfraPod, SingleSocketHT, ExpectAllocReserved",
|
||||
+ topo: topoSingleSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ stAssignments: state.ContainerCPUAssignments{
|
||||
+ "fakePod": map[string]cpuset.CPUSet{
|
||||
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
+ },
|
||||
+ },
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
+ pod: infraPod,
|
||||
+ expErr: nil,
|
||||
+ expCPUAlloc: true,
|
||||
+ expCSet: cpuset.NewCPUSet(0, 1),
|
||||
+ },
|
||||
}
|
||||
|
||||
testExcl := true
|
||||
--
|
||||
2.16.6
|
||||
|
@ -1,526 +0,0 @@
|
||||
From de3b9749f765398d4064c3225caa0a960d27eff3 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Thu, 9 Apr 2020 12:52:19 -0600
|
||||
Subject: [PATCH 5/6] kubelet cpumanager introduce concept of isolated CPUs
|
||||
|
||||
This introduces the concept of "isolated CPUs", which are CPUs that
|
||||
have been isolated at the kernel level via the "isolcpus" kernel boot
|
||||
parameter.
|
||||
|
||||
When starting the kubelet process, two separate sets of reserved CPUs
|
||||
may be specified. With this change CPUs reserved via
|
||||
'--system-reserved=cpu' will be used for infrastructure pods while the
|
||||
isolated CPUs should be reserved via '--kube-reserved=cpu' to cause
|
||||
kubelet to skip over them for "normal" CPU resource tracking. The
|
||||
kubelet code will double-check that the specified isolated CPUs match
|
||||
what the kernel exposes in "/sys/devices/system/cpu/isolated".
|
||||
|
||||
A plugin (outside the scope of this commit) will expose the isolated
|
||||
CPUs to kubelet via the device plugin API.
|
||||
|
||||
If a pod specifies some number of "isolcpus" resources, the device
|
||||
manager will allocate them. In this code we check whether such
|
||||
resources have been allocated, and if so we set the container cpuset to
|
||||
the isolated CPUs. This does mean that it really only makes sense to
|
||||
specify "isolcpus" resources for best-effort or burstable pods, not for
|
||||
guaranteed ones since that would throw off the accounting code. In
|
||||
order to ensure the accounting still works as designed, if "isolcpus"
|
||||
are specified for guaranteed pods, the affinity will be set to the
|
||||
non-isolated CPUs.
|
||||
|
||||
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
|
||||
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/container_manager_linux.go | 1 +
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 30 ++++++++-
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 14 +++-
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 86 +++++++++++++++++++++++--
|
||||
pkg/kubelet/cm/cpumanager/policy_static_test.go | 46 ++++++++++---
|
||||
5 files changed, 158 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go
|
||||
index 13c7176bdc2..e6ffb7a6194 100644
|
||||
--- a/pkg/kubelet/cm/container_manager_linux.go
|
||||
+++ b/pkg/kubelet/cm/container_manager_linux.go
|
||||
@@ -325,6 +325,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
|
||||
cm.GetNodeAllocatableReservation(),
|
||||
nodeConfig.KubeletRootDir,
|
||||
cm.topologyManager,
|
||||
+ cm.deviceManager,
|
||||
)
|
||||
if err != nil {
|
||||
klog.Errorf("failed to initialize cpu manager: %v", err)
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 322a2040a77..08d45c77182 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -21,6 +21,8 @@ import (
|
||||
"math"
|
||||
"sync"
|
||||
"time"
|
||||
+ "strings"
|
||||
+ "io/ioutil"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@@ -34,6 +36,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
@@ -51,6 +54,25 @@ type policyName string
|
||||
// cpuManagerStateFileName is the file name where cpu manager stores its state
|
||||
const cpuManagerStateFileName = "cpu_manager_state"
|
||||
|
||||
+// get the system-level isolated CPUs
|
||||
+func getIsolcpus() cpuset.CPUSet {
|
||||
+ dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
+ // The isolated cpus string ends in a newline
|
||||
+ cpustring := strings.TrimSuffix(string(dat), "\n")
|
||||
+ cset, err := cpuset.Parse(cpustring)
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] unable to parse sysfs isolcpus string to cpuset")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
+ return cset
|
||||
+}
|
||||
+
|
||||
// Manager interface provides methods for Kubelet to manage pod cpus.
|
||||
type Manager interface {
|
||||
// Start is called during Kubelet initialization.
|
||||
@@ -127,7 +149,7 @@ func (s *sourcesReadyStub) AddSource(source string) {}
|
||||
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||
|
||||
// NewManager creates new cpu manager based on provided policy
|
||||
-func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, numaNodeInfo topology.NUMANodeInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
|
||||
+func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, numaNodeInfo topology.NUMANodeInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store, deviceManager devicemanager.Manager) (Manager, error) {
|
||||
var topo *topology.CPUTopology
|
||||
var policy Policy
|
||||
|
||||
@@ -164,7 +186,11 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
|
||||
// NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||
// This variable is primarily to make testing easier.
|
||||
excludeReserved := true
|
||||
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, excludeReserved)
|
||||
+ // isolCPUs is the set of kernel-isolated CPUs. They should be a subset of specificCPUs or
|
||||
+ // of the CPUs that NewStaticPolicy() will pick if numReservedCPUs is set. It's only in the
|
||||
+ // argument list here for ease of testing, it's really internal to the policy.
|
||||
+ isolCPUs := getIsolcpus()
|
||||
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, isolCPUs, affinity, deviceManager, excludeReserved)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new static policy error: %v", err)
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
index a4d8f13c853..e806c62e80e 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
@@ -38,6 +38,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
)
|
||||
|
||||
type mockState struct {
|
||||
@@ -207,6 +208,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||
}
|
||||
|
||||
func TestCPUManagerAdd(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
@@ -222,7 +224,8 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
},
|
||||
0,
|
||||
cpuset.NewCPUSet(),
|
||||
- topologymanager.NewFakeManager(), testExcl)
|
||||
+ cpuset.NewCPUSet(),
|
||||
+ topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
updateErr error
|
||||
@@ -476,8 +479,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
}
|
||||
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
|
||||
state := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -617,7 +621,8 @@ func TestCPUManagerGenerate(t *testing.T) {
|
||||
}
|
||||
defer os.RemoveAll(sDir)
|
||||
|
||||
- mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, nil, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
|
||||
+ testDM, err := devicemanager.NewManagerStub()
|
||||
+ mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, nil, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM)
|
||||
if testCase.expectedError != nil {
|
||||
if !strings.Contains(err.Error(), testCase.expectedError.Error()) {
|
||||
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), testCase.expectedError.Error())
|
||||
@@ -972,6 +977,7 @@ func TestReconcileState(t *testing.T) {
|
||||
// the following tests are with --reserved-cpus configured
|
||||
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -986,7 +992,9 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
},
|
||||
1,
|
||||
cpuset.NewCPUSet(0),
|
||||
+ cpuset.NewCPUSet(),
|
||||
topologymanager.NewFakeManager(),
|
||||
+ testDM,
|
||||
testExcl,
|
||||
)
|
||||
testCases := []struct {
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index e511caf7ab7..490e7675679 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -18,6 +18,7 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
+ "strconv"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog"
|
||||
@@ -27,6 +28,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
)
|
||||
|
||||
// PolicyStatic is the name of the static policy
|
||||
@@ -80,6 +82,10 @@ type staticPolicy struct {
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reserved cpuset.CPUSet
|
||||
+ // subset of reserved CPUs with isolcpus attribute
|
||||
+ isolcpus cpuset.CPUSet
|
||||
+ // parent containerManager, used to get device list
|
||||
+ deviceManager devicemanager.Manager
|
||||
// If true, default CPUSet should exclude reserved CPUs
|
||||
excludeReserved bool
|
||||
// topology manager reference to get container Topology affinity
|
||||
@@ -92,7 +98,7 @@ var _ Policy = &staticPolicy{}
|
||||
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, excludeReserved bool) (Policy, error) {
|
||||
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, isolCPUs cpuset.CPUSet, affinity topologymanager.Store, deviceManager devicemanager.Manager, excludeReserved bool) (Policy, error) {
|
||||
allCPUs := topology.CPUDetails.CPUs()
|
||||
var reserved cpuset.CPUSet
|
||||
if reservedCPUs.Size() > 0 {
|
||||
@@ -113,9 +119,17 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
|
||||
klog.Infof("[cpumanager] reserved %d CPUs (\"%s\") not available for exclusive assignment", reserved.Size(), reserved)
|
||||
|
||||
+ if !isolCPUs.IsSubsetOf(reserved) {
|
||||
+ klog.Errorf("[cpumanager] isolCPUs %v is not a subset of reserved %v", isolCPUs, reserved)
|
||||
+ reserved = reserved.Union(isolCPUs)
|
||||
+ klog.Warningf("[cpumanager] mismatch isolCPUs %v, force reserved %v", isolCPUs, reserved)
|
||||
+ }
|
||||
+
|
||||
return &staticPolicy{
|
||||
topology: topology,
|
||||
reserved: reserved,
|
||||
+ isolcpus: isolCPUs,
|
||||
+ deviceManager: deviceManager,
|
||||
excludeReserved: excludeReserved,
|
||||
affinity: affinity,
|
||||
}, nil
|
||||
@@ -151,8 +165,8 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
} else {
|
||||
s.SetDefaultCPUSet(allCPUs)
|
||||
}
|
||||
- klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||
- allCPUs, p.reserved, s.GetDefaultCPUSet())
|
||||
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, isolcpus:%v, default:%v\n",
|
||||
+ allCPUs, p.reserved, p.isolcpus, s.GetDefaultCPUSet())
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -221,12 +235,13 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
return nil
|
||||
}
|
||||
|
||||
- cpuset := p.reserved
|
||||
+ // TODO: Is the clone actually needed?
|
||||
+ cpuset := p.reserved.Clone().Difference(p.isolcpus)
|
||||
if cpuset.IsEmpty() {
|
||||
// If this happens then someone messed up.
|
||||
return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus " +
|
||||
- "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v",
|
||||
- pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v, isolcpus:%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved, p.isolcpus)
|
||||
}
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
klog.Infof("[cpumanager] static policy: reserved: AddContainer " +
|
||||
@@ -267,7 +282,37 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
}
|
||||
}
|
||||
}
|
||||
+ klog.Infof("[cpumanager] guaranteed: AddContainer " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); numCPUS=%d, cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, numCPUs, cpuset)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
+ if isolcpus := p.podIsolCPUs(pod, container); isolcpus.Size() > 0 {
|
||||
+ // container has requested isolated CPUs
|
||||
+ if set, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||
+ if set.Equals(isolcpus) {
|
||||
+ klog.Infof("[cpumanager] isolcpus container already present in state, skipping " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ return nil
|
||||
+ } else {
|
||||
+ klog.Infof("[cpumanager] isolcpus container state has cpus %v, should be %v" +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ isolcpus, set, pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ }
|
||||
+ }
|
||||
+ // Note that we do not do anything about init containers here.
|
||||
+ // It looks like devices are allocated per-pod based on effective requests/limits
|
||||
+ // and extra devices from initContainers are not freed up when the regular containers start.
|
||||
+ // TODO: confirm this is still true for 1.18
|
||||
+ s.SetCPUSet(string(pod.UID), container.Name, isolcpus)
|
||||
+ klog.Infof("[cpumanager] isolcpus: AddContainer " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, isolcpus)
|
||||
+ return nil
|
||||
}
|
||||
+
|
||||
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||
return nil
|
||||
}
|
||||
@@ -462,3 +507,32 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
+
|
||||
+// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||
+func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||
+ // NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||
+ // not create UID. We also need a way to properly stub devicemanager.
|
||||
+ if len(string(pod.UID)) == 0 {
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+ devices := p.deviceManager.GetDevices(string(pod.UID), container.Name)
|
||||
+ for _, dev := range devices {
|
||||
+ // this resource name needs to match the isolcpus device plugin
|
||||
+ if dev.ResourceName == "windriver.com/isolcpus" {
|
||||
+ cpuStrList := dev.DeviceIds
|
||||
+ if len(cpuStrList) > 0 {
|
||||
+ cpuSet := cpuset.NewCPUSet()
|
||||
+ // loop over the list of strings, convert each one to int, add to cpuset
|
||||
+ for _, cpuStr := range cpuStrList {
|
||||
+ cpu, err := strconv.Atoi(cpuStr)
|
||||
+ if err != nil {
|
||||
+ panic(err)
|
||||
+ }
|
||||
+ cpuSet = cpuSet.Union(cpuset.NewCPUSet(cpu))
|
||||
+ }
|
||||
+ return cpuSet
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ return cpuset.NewCPUSet()
|
||||
+}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index 04947d28055..999ab3c1af0 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
)
|
||||
|
||||
type staticPolicyTest struct {
|
||||
@@ -45,8 +46,9 @@ type staticPolicyTest struct {
|
||||
}
|
||||
|
||||
func TestStaticPolicyName(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testExcl := false
|
||||
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
|
||||
policyName := policy.Name()
|
||||
if policyName != "static" {
|
||||
@@ -56,6 +58,7 @@ func TestStaticPolicyName(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyStart(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "non-corrupted state",
|
||||
@@ -131,7 +134,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testCase.excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -179,6 +182,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
largeTopoSock0CPUSet := largeTopoSock0Builder.Result()
|
||||
largeTopoSock1CPUSet := largeTopoSock1Builder.Result()
|
||||
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||
@@ -447,7 +451,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testCase.excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -490,6 +494,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyRemove(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
excludeReserved := false
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
@@ -549,7 +554,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -571,6 +576,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
excludeReserved := false
|
||||
testCases := []struct {
|
||||
description string
|
||||
@@ -640,7 +646,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: tc.stAssignments,
|
||||
@@ -673,6 +679,7 @@ type staticPolicyTestWithResvList struct {
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
reserved cpuset.CPUSet
|
||||
+ isolcpus cpuset.CPUSet
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
stDefaultCPUSet cpuset.CPUSet
|
||||
pod *v1.Pod
|
||||
@@ -713,9 +720,10 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
},
|
||||
}
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||
testCase.description, testCase.expNewErr, err)
|
||||
@@ -755,6 +763,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 1,
|
||||
reserved: cpuset.NewCPUSet(0),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||
@@ -767,6 +776,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||
@@ -779,6 +789,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"fakePod": map[string]cpuset.CPUSet{
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
@@ -795,6 +806,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"fakePod": map[string]cpuset.CPUSet{
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
@@ -806,12 +818,30 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.NewCPUSet(0, 1),
|
||||
},
|
||||
+ {
|
||||
+ description: "InfraPod, SingleSocketHT, Isolcpus, ExpectAllocReserved",
|
||||
+ topo: topoSingleSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(1),
|
||||
+ stAssignments: state.ContainerCPUAssignments{
|
||||
+ "fakePod": map[string]cpuset.CPUSet{
|
||||
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
+ },
|
||||
+ },
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
+ pod: infraPod,
|
||||
+ expErr: nil,
|
||||
+ expCPUAlloc: true,
|
||||
+ expCSet: cpuset.NewCPUSet(0),
|
||||
+ },
|
||||
}
|
||||
|
||||
testExcl := true
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
-
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, testCase.isolcpus, topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
+
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
defaultCPUSet: testCase.stDefaultCPUSet,
|
||||
--
|
||||
2.16.6
|
||||
|
@ -1,312 +0,0 @@
|
||||
From c109ab23f98b00ee5f98000c760985da967d47a9 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Tue, 1 Oct 2019 00:16:00 -0600
|
||||
Subject: [PATCH 3/6] kubelet cpumanager keep normal containers off reserved
|
||||
CPUs
|
||||
|
||||
When starting the kubelet process, two separate sets of reserved CPUs
|
||||
may be specified. With this change CPUs reserved via '--system-reserved=cpu'
|
||||
or '--kube-reserved=cpu' will be ignored by kubernetes itself. A small
|
||||
tweak to the default CPU affinity ensures that "normal" Kubernetes
|
||||
pods won't run on the reserved CPUs.
|
||||
|
||||
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 5 +++-
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 11 +++++---
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 29 ++++++++++++++++----
|
||||
pkg/kubelet/cm/cpumanager/policy_static_test.go | 35 ++++++++++++++++++-------
|
||||
4 files changed, 61 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index c0c440453a9..322a2040a77 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -161,7 +161,10 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
|
||||
// exclusively allocated.
|
||||
reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000
|
||||
numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
|
||||
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity)
|
||||
+ // NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||
+ // This variable is primarily to make testing easier.
|
||||
+ excludeReserved := true
|
||||
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, excludeReserved)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new static policy error: %v", err)
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
index e9c7852c602..a4d8f13c853 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
@@ -207,6 +207,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||
}
|
||||
|
||||
func TestCPUManagerAdd(t *testing.T) {
|
||||
+ testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -221,7 +222,7 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
},
|
||||
0,
|
||||
cpuset.NewCPUSet(),
|
||||
- topologymanager.NewFakeManager())
|
||||
+ topologymanager.NewFakeManager(), testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
updateErr error
|
||||
@@ -474,8 +475,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
+ testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
|
||||
state := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -969,6 +971,7 @@ func TestReconcileState(t *testing.T) {
|
||||
// above test cases are without kubelet --reserved-cpus cmd option
|
||||
// the following tests are with --reserved-cpus configured
|
||||
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
+ testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -983,7 +986,9 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
},
|
||||
1,
|
||||
cpuset.NewCPUSet(0),
|
||||
- topologymanager.NewFakeManager())
|
||||
+ topologymanager.NewFakeManager(),
|
||||
+ testExcl,
|
||||
+ )
|
||||
testCases := []struct {
|
||||
description string
|
||||
updateErr error
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index da68ed808bd..e631d5d6a74 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -75,6 +75,8 @@ type staticPolicy struct {
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reserved cpuset.CPUSet
|
||||
+ // If true, default CPUSet should exclude reserved CPUs
|
||||
+ excludeReserved bool
|
||||
// topology manager reference to get container Topology affinity
|
||||
affinity topologymanager.Store
|
||||
}
|
||||
@@ -85,7 +87,7 @@ var _ Policy = &staticPolicy{}
|
||||
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store) (Policy, error) {
|
||||
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, excludeReserved bool) (Policy, error) {
|
||||
allCPUs := topology.CPUDetails.CPUs()
|
||||
var reserved cpuset.CPUSet
|
||||
if reservedCPUs.Size() > 0 {
|
||||
@@ -109,6 +111,7 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
return &staticPolicy{
|
||||
topology: topology,
|
||||
reserved: reserved,
|
||||
+ excludeReserved: excludeReserved,
|
||||
affinity: affinity,
|
||||
}, nil
|
||||
}
|
||||
@@ -136,7 +139,15 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
}
|
||||
// state is empty initialize
|
||||
allCPUs := p.topology.CPUDetails.CPUs()
|
||||
- s.SetDefaultCPUSet(allCPUs)
|
||||
+ if p.excludeReserved {
|
||||
+ // Exclude reserved CPUs from the default CPUSet to keep containers off them
|
||||
+ // unless explicitly affined.
|
||||
+ s.SetDefaultCPUSet(allCPUs.Difference(p.reserved))
|
||||
+ } else {
|
||||
+ s.SetDefaultCPUSet(allCPUs)
|
||||
+ }
|
||||
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||
+ allCPUs, p.reserved, s.GetDefaultCPUSet())
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -144,9 +155,11 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
// 1. Check if the reserved cpuset is not part of default cpuset because:
|
||||
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
|
||||
// - user tampered with file
|
||||
- if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
|
||||
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
- p.reserved.String(), tmpDefaultCPUset.String())
|
||||
+ if !p.excludeReserved {
|
||||
+ if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
|
||||
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
+ p.reserved.String(), tmpDefaultCPUset.String())
|
||||
+ }
|
||||
}
|
||||
|
||||
// 2. Check if state for static policy is consistent
|
||||
@@ -175,6 +188,9 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
}
|
||||
}
|
||||
totalKnownCPUs = totalKnownCPUs.UnionAll(tmpCPUSets)
|
||||
+ if p.excludeReserved {
|
||||
+ totalKnownCPUs = totalKnownCPUs.Union(p.reserved)
|
||||
+ }
|
||||
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
|
||||
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
|
||||
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
|
||||
@@ -229,6 +245,9 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
|
||||
klog.Infof("[cpumanager] static policy: RemoveContainer (pod: %s, container: %s)", podUID, containerName)
|
||||
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
|
||||
s.Delete(podUID, containerName)
|
||||
+ if p.excludeReserved {
|
||||
+ toRelease = toRelease.Difference(p.reserved)
|
||||
+ }
|
||||
// Mutate the shared pool, adding released cpus.
|
||||
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index ea2bcf11333..b2982432c13 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -33,6 +33,7 @@ type staticPolicyTest struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
+ excludeReserved bool
|
||||
podUID string
|
||||
containerName string
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
@@ -44,7 +45,8 @@ type staticPolicyTest struct {
|
||||
}
|
||||
|
||||
func TestStaticPolicyName(t *testing.T) {
|
||||
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ testExcl := false
|
||||
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
|
||||
policyName := policy.Name()
|
||||
if policyName != "static" {
|
||||
@@ -74,6 +76,15 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(),
|
||||
expCSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||
},
|
||||
+ {
|
||||
+ description: "empty cpuset exclude reserved",
|
||||
+ topo: topoDualSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ excludeReserved: true,
|
||||
+ stAssignments: state.ContainerCPUAssignments{},
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(),
|
||||
+ expCSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
|
||||
+ },
|
||||
{
|
||||
description: "reserved cores 0 & 6 are not present in available cpuset",
|
||||
topo: topoDualSocketHT,
|
||||
@@ -120,7 +131,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -436,7 +447,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -479,6 +490,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyRemove(t *testing.T) {
|
||||
+ excludeReserved := false
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "SingleSocketHT, DeAllocOneContainer",
|
||||
@@ -537,7 +549,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -559,6 +571,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
+ excludeReserved := false
|
||||
testCases := []struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
@@ -627,7 +640,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: tc.stAssignments,
|
||||
@@ -699,9 +712,10 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
|
||||
},
|
||||
}
|
||||
+ testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager())
|
||||
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||
testCase.description, testCase.expNewErr, err)
|
||||
@@ -741,7 +755,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
numReservedCPUs: 1,
|
||||
reserved: cpuset.NewCPUSet(0),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||
expErr: fmt.Errorf("not enough cpus available to satisfy request"),
|
||||
expCPUAlloc: false,
|
||||
@@ -753,7 +767,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||
expErr: nil,
|
||||
expCPUAlloc: true,
|
||||
@@ -769,7 +783,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
},
|
||||
},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 4, 5),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"),
|
||||
expErr: nil,
|
||||
expCPUAlloc: true,
|
||||
@@ -777,8 +791,9 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
+ testExcl := true
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
--
|
||||
2.16.6
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,9 +0,0 @@
|
||||
The spec file used here was from the kubernetes 1.10.0 src rpm.
|
||||
The orig file is included to help show modifications made to that
|
||||
spec file, to help understand which changes were needed and to
|
||||
assist with future upversioning.
|
||||
|
||||
The contrib tarball does not have the same versioning as kubernetes and
|
||||
there is little activity in that repo.
|
||||
|
||||
The version for the contrib tarball is arbitrary.
|
@ -1,5 +0,0 @@
|
||||
VERSION=1.19.13
|
||||
TAR_NAME=kubernetes
|
||||
TAR="$TAR_NAME-v$VERSION.tar.gz"
|
||||
COPY_LIST="${CGCS_BASE}/downloads/$TAR $FILES_BASE/*"
|
||||
TIS_PATCH_VER=PKG_GITREVCOUNT
|
@ -1,79 +0,0 @@
|
||||
From 81de324764a81969d3a8d7648730fcd54d58cb6e Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Fri, 23 Oct 2020 17:46:10 -0600
|
||||
Subject: [PATCH 6/6] enable support for kubernetes to ignore isolcpus
|
||||
|
||||
The normal mechanisms for allocating isolated CPUs do not allow
|
||||
a mix of isolated and exclusive CPUs in the same container. In
|
||||
order to allow this in *very* limited cases where the pod spec
|
||||
is known in advance we will add the ability to disable the normal
|
||||
isolcpus behaviour.
|
||||
|
||||
If the file "/etc/kubernetes/ignore_isolcpus" exists, then kubelet
|
||||
will basically forget everything it knows about isolcpus and just
|
||||
treat them like regular CPUs.
|
||||
|
||||
The admin user can then rely on the fact that CPU allocation is
|
||||
deterministic to ensure that the isolcpus they configure end up being
|
||||
allocated to the correct pods.
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 9 +++++++++
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 8 ++++++++
|
||||
2 files changed, 17 insertions(+)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 5e13a6d90c7..bebd0e4fa01 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -19,6 +19,7 @@ package cpumanager
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
+ "os"
|
||||
"sync"
|
||||
"time"
|
||||
"strings"
|
||||
@@ -56,6 +57,14 @@ const cpuManagerStateFileName = "cpu_manager_state"
|
||||
|
||||
// get the system-level isolated CPUs
|
||||
func getIsolcpus() cpuset.CPUSet {
|
||||
+
|
||||
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||
+ klog.Infof("[cpumanager] turning off isolcpus awareness")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||
if err != nil {
|
||||
klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index 78bb297eb07..1ed4bee5943 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -18,6 +18,7 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
+ "os"
|
||||
"strconv"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@@ -529,6 +530,13 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||
|
||||
// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||
func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||
+
|
||||
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
// NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||
// not create UID. We also need a way to properly stub devicemanager.
|
||||
if len(string(pod.UID)) == 0 {
|
||||
--
|
||||
2.24.2
|
||||
|
@ -1,84 +0,0 @@
|
||||
MDSFORMANPAGES="kube-apiserver.md kube-controller-manager.md kube-proxy.md kube-scheduler.md kubelet.md"
|
||||
|
||||
# remove comments from man pages
|
||||
for manpage in ${MDSFORMANPAGES}; do
|
||||
pos=$(grep -n "<\!-- END MUNGE: UNVERSIONED_WARNING -->" ${manpage} | cut -d':' -f1)
|
||||
if [ -n ${pos} ]; then
|
||||
sed -i "1,${pos}{/.*/d}" ${manpage}
|
||||
fi
|
||||
done
|
||||
|
||||
# for each man page add NAME and SYNOPSIS section
|
||||
# kube-apiserver
|
||||
sed -i -s "s/## kube-apiserver/# NAME\nkube-apiserver \- Provides the API for kubernetes orchestration.\n\n# SYNOPSIS\n**kube-apiserver** [OPTIONS]\n/" kube-apiserver.md
|
||||
|
||||
cat << 'EOF' >> kube-apiserver.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-apiserver --logtostderr=true --v=0 --etcd_servers=http://127.0.0.1:4001 --insecure_bind_address=127.0.0.1 --insecure_port=8080 --kubelet_port=10250 --service-cluster-ip-range=10.1.1.0/24 --allow_privileged=false
|
||||
```
|
||||
EOF
|
||||
# kube-controller-manager
|
||||
sed -i -s "s/## kube-controller-manager/# NAME\nkube-controller-manager \- Enforces kubernetes services.\n\n# SYNOPSIS\n**kube-controller-manager** [OPTIONS]\n/" kube-controller-manager.md
|
||||
|
||||
cat << 'EOF' >> kube-controller-manager.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-controller-manager --logtostderr=true --v=0 --master=127.0.0.1:8080
|
||||
```
|
||||
EOF
|
||||
# kube-proxy
|
||||
sed -i -s "s/## kube-proxy/# NAME\nkube-proxy \- Provides network proxy services.\n\n# SYNOPSIS\n**kube-proxy** [OPTIONS]\n/" kube-proxy.md
|
||||
|
||||
cat << 'EOF' >> kube-proxy.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-proxy --logtostderr=true --v=0 --master=http://127.0.0.1:8080
|
||||
```
|
||||
EOF
|
||||
# kube-scheduler
|
||||
sed -i -s "s/## kube-scheduler/# NAME\nkube-scheduler \- Schedules containers on hosts.\n\n# SYNOPSIS\n**kube-scheduler** [OPTIONS]\n/" kube-scheduler.md
|
||||
|
||||
cat << 'EOF' >> kube-scheduler.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-scheduler --logtostderr=true --v=0 --master=127.0.0.1:8080
|
||||
```
|
||||
EOF
|
||||
# kubelet
|
||||
sed -i -s "s/## kubelet/# NAME\nkubelet \- Processes a container manifest so the containers are launched according to how they are described.\n\n# SYNOPSIS\n**kubelet** [OPTIONS]\n/" kubelet.md
|
||||
|
||||
cat << 'EOF' >> kubelet.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kubelet --logtostderr=true --v=0 --api_servers=http://127.0.0.1:8080 --address=127.0.0.1 --port=10250 --hostname_override=127.0.0.1 --allow-privileged=false
|
||||
```
|
||||
EOF
|
||||
|
||||
# for all man-pages
|
||||
for md in $MDSFORMANPAGES; do
|
||||
# correct section names
|
||||
sed -i -s "s/### Synopsis/# DESCRIPTION/" $md
|
||||
sed -i -s "s/### Options/# OPTIONS/" $md
|
||||
# add header
|
||||
sed -i "s/# NAME/% KUBERNETES(1) kubernetes User Manuals\n# NAME/" $md
|
||||
# modify list of options
|
||||
# options with no value in ""
|
||||
sed -i -r 's/(^ )(-[^":][^":]*)(:)(.*)/\*\*\2\*\*\n\t\4\n/' $md
|
||||
# option with value in ""
|
||||
sed -i -r 's/(^ )(-[^":][^":]*)("[^"]*")(:)(.*)/\*\*\2\3\*\*\n\t\5\n/' $md
|
||||
# options in -s, --long
|
||||
sed -i -r 's/(^ )(-[a-z], -[^":][^":]*)(:)(.*)/\*\*\2\*\*\n\t\4\n/' $md
|
||||
sed -i -r 's/(^ )(-[a-z], -[^":][^":]*)("[^"]*")(:)(.*)/\*\*\2\3\*\*\n\t\5\n/' $md
|
||||
# remove ```
|
||||
sed -i 's/```//' $md
|
||||
# remove all lines starting with ######
|
||||
sed -i 's/^######.*//' $md
|
||||
# modify footer
|
||||
sed -i -r "s/^\[!\[Analytics\].*//" $md
|
||||
# md does not contain section => taking 1
|
||||
name="${md%.md}"
|
||||
go-md2man -in $md -out man/man1/$name.1
|
||||
done
|
||||
|
||||
|
@ -1,108 +0,0 @@
|
||||
From 327b473fd46e0d95a10564ed8b186c6f30549ac7 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Mon, 30 Aug 2021 16:53:49 -0600
|
||||
Subject: [PATCH 5/6] kubeadm: create platform pods with zero CPU resources
|
||||
|
||||
We want to specify zero CPU resources when creating the manifests
|
||||
for the static platform pods, as a workaround for the lack of
|
||||
separate resource tracking for platform resources.
|
||||
|
||||
We also specify zero CPU resources for the coredns deployment.
|
||||
manifests.go appears to be the main file for this, not sure if the
|
||||
others are used but I changed them just in case.
|
||||
|
||||
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
|
||||
---
|
||||
cluster/addons/dns/coredns/coredns.yaml.base | 2 +-
|
||||
cluster/addons/dns/coredns/coredns.yaml.in | 2 +-
|
||||
cluster/addons/dns/coredns/coredns.yaml.sed | 2 +-
|
||||
cmd/kubeadm/app/phases/addons/dns/manifests.go | 2 +-
|
||||
cmd/kubeadm/app/phases/controlplane/manifests.go | 6 +++---
|
||||
5 files changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.base b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
index 23ddd0c06a8..634f894f061 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
@@ -137,7 +137,7 @@ spec:
|
||||
limits:
|
||||
memory: __PILLAR__DNS__MEMORY__LIMIT__
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.in b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
index 55e6f33bd44..15e3d9b601c 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
@@ -137,7 +137,7 @@ spec:
|
||||
limits:
|
||||
memory: {{ pillar['dns_memory_limit'] }}
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.sed b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
index aad2a143635..81288fb05b2 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
@@ -137,7 +137,7 @@ spec:
|
||||
limits:
|
||||
memory: $DNS_MEMORY_LIMIT
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cmd/kubeadm/app/phases/addons/dns/manifests.go b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
index cfc13edd9a3..d5c4c192b92 100644
|
||||
--- a/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
+++ b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
@@ -250,7 +250,7 @@ spec:
|
||||
limits:
|
||||
memory: 170Mi
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cmd/kubeadm/app/phases/controlplane/manifests.go b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
index 6e2037c20ef..160a85ddd9b 100644
|
||||
--- a/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
+++ b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
@@ -60,7 +60,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS),
|
||||
ReadinessProbe: staticpodutil.ReadinessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/readyz", int(endpoint.BindPort), v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("250m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeAPIServer),
|
||||
map[string]string{kubeadmconstants.KubeAPIServerAdvertiseAddressEndpointAnnotationKey: endpoint.String()}),
|
||||
@@ -72,7 +72,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeControllerManager)),
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("200m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeControllerManager), nil),
|
||||
kubeadmconstants.KubeScheduler: staticpodutil.ComponentPod(v1.Container{
|
||||
@@ -83,7 +83,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeScheduler)),
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("100m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeScheduler), nil),
|
||||
}
|
||||
--
|
||||
2.24.2
|
||||
|
@ -1,18 +0,0 @@
|
||||
# Note: This dropin only works with kubeadm and kubelet v1.11+
|
||||
[Service]
|
||||
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
|
||||
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
|
||||
# This is a file that "kubeadm init" and "kubeadm join" generates at runtime, populating the KUBELET_KUBEADM_ARGS variable dynamically
|
||||
EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
|
||||
# This is a file that the user can use for overrides of the kubelet args as a last resort. Preferably, the user should use
|
||||
# the .NodeRegistration.KubeletExtraArgs object in the configuration files instead. KUBELET_EXTRA_ARGS should be sourced from this file.
|
||||
EnvironmentFile=-/etc/sysconfig/kubelet
|
||||
ExecStart=
|
||||
ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS
|
||||
ExecStartPre=-/usr/local/sbin/sanitize_kubelet_reserved_cpus.sh /etc/sysconfig/kubelet
|
||||
ExecStartPre=-/usr/bin/kubelet-cgroup-setup.sh
|
||||
ExecStartPost=/bin/bash -c 'echo $MAINPID > /var/run/kubelet.pid;'
|
||||
ExecStopPost=/bin/rm -f /var/run/kubelet.pid
|
||||
Restart=always
|
||||
StartLimitInterval=0
|
||||
RestartSec=10
|
@ -1,132 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This script does minimal cgroup setup for kubelet. This creates k8s-infra
|
||||
# cgroup for a minimal set of resource controllers, and configures cpuset
|
||||
# attributes to span all online cpus and nodes. This will do nothing if
|
||||
# the k8s-infra cgroup already exists (i.e., assume already configured).
|
||||
# NOTE: The creation of directories under /sys/fs/cgroup is volatile, and
|
||||
# does not persist reboots. The cpuset.mems and cpuset.cpus is later updated
|
||||
# by puppet kubernetes.pp manifest.
|
||||
#
|
||||
|
||||
# Define minimal path
|
||||
PATH=/bin:/usr/bin:/usr/local/bin
|
||||
|
||||
# Log info message to /var/log/daemon.log
|
||||
function LOG {
|
||||
logger -p daemon.info "$0($$): $@"
|
||||
}
|
||||
|
||||
# Log error message to /var/log/daemon.log
|
||||
function ERROR {
|
||||
logger -s -p daemon.error "$0($$): ERROR: $@"
|
||||
}
|
||||
|
||||
# Create minimal cgroup directories and configure cpuset attributes if required
|
||||
function create_cgroup {
|
||||
local cg_name=$1
|
||||
local cg_nodeset=$2
|
||||
local cg_cpuset=$3
|
||||
|
||||
local CGROUP=/sys/fs/cgroup
|
||||
local CONTROLLERS_AUTO_DELETED=("pids" "hugetlb")
|
||||
local CONTROLLERS_PRESERVED=("cpuset" "memory" "cpu,cpuacct" "systemd")
|
||||
local cnt=''
|
||||
local CGDIR=''
|
||||
local RC=0
|
||||
|
||||
# Ensure that these cgroups are created every time as they are auto deleted
|
||||
for cnt in ${CONTROLLERS_AUTO_DELETED[@]}; do
|
||||
CGDIR=${CGROUP}/${cnt}/${cg_name}
|
||||
if [ -d ${CGDIR} ]; then
|
||||
LOG "Nothing to do, already configured: ${CGDIR}."
|
||||
continue
|
||||
fi
|
||||
LOG "Creating: ${CGDIR}"
|
||||
mkdir -p ${CGDIR}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Creating: ${CGDIR}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
done
|
||||
|
||||
# These cgroups are preserved so if any of these are encountered additional
|
||||
# cgroup setup is not required
|
||||
for cnt in ${CONTROLLERS_PRESERVED[@]}; do
|
||||
CGDIR=${CGROUP}/${cnt}/${cg_name}
|
||||
if [ -d ${CGDIR} ]; then
|
||||
LOG "Nothing to do, already configured: ${CGDIR}."
|
||||
exit ${RC}
|
||||
fi
|
||||
LOG "Creating: ${CGDIR}"
|
||||
mkdir -p ${CGDIR}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Creating: ${CGDIR}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
done
|
||||
|
||||
# Customize cpuset attributes
|
||||
LOG "Configuring cgroup: ${cg_name}, nodeset: ${cg_nodeset}, cpuset: ${cg_cpuset}"
|
||||
CGDIR=${CGROUP}/cpuset/${cg_name}
|
||||
local CGMEMS=${CGDIR}/cpuset.mems
|
||||
local CGCPUS=${CGDIR}/cpuset.cpus
|
||||
local CGTASKS=${CGDIR}/tasks
|
||||
|
||||
# Assign cgroup memory nodeset
|
||||
LOG "Assign nodeset ${cg_nodeset} to ${CGMEMS}"
|
||||
/bin/echo ${cg_nodeset} > ${CGMEMS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Unable to write to: ${CGMEMS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Assign cgroup cpus
|
||||
LOG "Assign cpuset ${cg_cpuset} to ${CGCPUS}"
|
||||
/bin/echo ${cg_cpuset} > ${CGCPUS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Assigning: ${cg_cpuset} to ${CGCPUS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Set file ownership
|
||||
chown root:root ${CGMEMS} ${CGCPUS} ${CGTASKS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Setting owner for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Set file mode permissions
|
||||
chmod 644 ${CGMEMS} ${CGCPUS} ${CGTASKS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Setting mode for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
return ${RC}
|
||||
}
|
||||
|
||||
if [ $UID -ne 0 ]; then
|
||||
ERROR "Require sudo/root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configure default kubepods cpuset to span all online cpus and nodes.
|
||||
ONLINE_NODESET=$(/bin/cat /sys/devices/system/node/online)
|
||||
ONLINE_CPUSET=$(/bin/cat /sys/devices/system/cpu/online)
|
||||
|
||||
# Configure kubelet cgroup to match cgroupRoot.
|
||||
create_cgroup 'k8s-infra' ${ONLINE_NODESET} ${ONLINE_CPUSET}
|
||||
|
||||
exit $?
|
||||
|
@ -1,111 +0,0 @@
|
||||
From 7bae7fe0f023b4c6a49910bf583af7ccc4af9391 Mon Sep 17 00:00:00 2001
|
||||
From: Jim Gauld <james.gauld@windriver.com>
|
||||
Date: Thu, 5 Sep 2019 10:46:58 -0400
|
||||
Subject: [PATCH 1/6] kubelet cpumanager disable CFS quota throttling for
|
||||
Guaranteed pods
|
||||
|
||||
This disables CFS CPU quota to avoid performance degradation due to
|
||||
Linux kernel CFS quota implementation. Note that 4.18 kernel attempts
|
||||
to solve the CFS throttling problem, but there are reports that it is
|
||||
not completely effective.
|
||||
|
||||
This disables CFS quota throttling for Guaranteed pods for both
|
||||
parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us.
|
||||
Disabling has a dramatic latency improvement for HTTP response times.
|
||||
|
||||
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 22 ++++++++++++++++++++++
|
||||
pkg/kubelet/cm/helpers_linux.go | 5 +++++
|
||||
pkg/kubelet/cm/helpers_linux_test.go | 8 ++++----
|
||||
3 files changed, 31 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 07ff3d9c860..fc3a247e70b 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -36,6 +36,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
+ v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
)
|
||||
|
||||
// ActivePodsFunc is a function that returns a list of pods to reconcile.
|
||||
@@ -233,6 +234,14 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e
|
||||
// Get the CPUs assigned to the container during Allocate()
|
||||
// (or fall back to the default CPUSet if none were assigned).
|
||||
cpus := m.state.GetCPUSetOrDefault(string(p.UID), c.Name)
|
||||
+
|
||||
+ // Guaranteed PODs should not have CFS quota throttle
|
||||
+ if m.policy.Name() == string(PolicyStatic) && v1qos.GetPodQOS(p) == v1.PodQOSGuaranteed {
|
||||
+ err := m.disableContainerCPUQuota(containerID)
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] AddContainer disable CPU Quota error: %v", err)
|
||||
+ }
|
||||
+ }
|
||||
m.Unlock()
|
||||
|
||||
if !cpus.IsEmpty() {
|
||||
@@ -464,3 +473,16 @@ func (m *manager) updateContainerCPUSet(containerID string, cpus cpuset.CPUSet)
|
||||
CpusetCpus: cpus.String(),
|
||||
})
|
||||
}
|
||||
+
|
||||
+func (m *manager) disableContainerCPUQuota(containerID string) error {
|
||||
+ // Disable CFS CPU quota to avoid performance degradation due to
|
||||
+ // Linux kernel CFS throttle implementation.
|
||||
+ // NOTE: 4.18 kernel attempts to solve CFS throttling problem,
|
||||
+ // but there are reports that it is not completely effective.
|
||||
+ return m.containerRuntime.UpdateContainerResources(
|
||||
+ containerID,
|
||||
+ &runtimeapi.LinuxContainerResources{
|
||||
+ CpuPeriod: 100000,
|
||||
+ CpuQuota: -1,
|
||||
+ })
|
||||
+}
|
||||
diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go
|
||||
index 9b115ab5380..d3185e1e958 100644
|
||||
--- a/pkg/kubelet/cm/helpers_linux.go
|
||||
+++ b/pkg/kubelet/cm/helpers_linux.go
|
||||
@@ -166,6 +166,11 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64)
|
||||
// determine the qos class
|
||||
qosClass := v1qos.GetPodQOS(pod)
|
||||
|
||||
+ // disable cfs quota for guaranteed pods
|
||||
+ if qosClass == v1.PodQOSGuaranteed {
|
||||
+ cpuQuota = int64(-1)
|
||||
+ }
|
||||
+
|
||||
// build the result
|
||||
result := &ResourceConfig{}
|
||||
if qosClass == v1.PodQOSGuaranteed {
|
||||
diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go
|
||||
index 56d765fbc22..0c43afe5875 100644
|
||||
--- a/pkg/kubelet/cm/helpers_linux_test.go
|
||||
+++ b/pkg/kubelet/cm/helpers_linux_test.go
|
||||
@@ -63,8 +63,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
burstablePartialShares := MilliCPUToShares(200)
|
||||
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||
guaranteedShares := MilliCPUToShares(100)
|
||||
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||
+ guaranteedQuota := int64(-1)
|
||||
+ guaranteedTunedQuota := int64(-1)
|
||||
memoryQuantity = resource.MustParse("100Mi")
|
||||
cpuNoLimit := int64(-1)
|
||||
guaranteedMemory := memoryQuantity.Value()
|
||||
@@ -283,8 +283,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
burstablePartialShares := MilliCPUToShares(200)
|
||||
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||
guaranteedShares := MilliCPUToShares(100)
|
||||
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||
+ guaranteedQuota := int64(-1)
|
||||
+ guaranteedTunedQuota := int64(-1)
|
||||
memoryQuantity = resource.MustParse("100Mi")
|
||||
cpuNoLimit := int64(-1)
|
||||
guaranteedMemory := memoryQuantity.Value()
|
||||
--
|
||||
2.24.2
|
||||
|
@ -1,139 +0,0 @@
|
||||
From 6f6bbe74fcff0bef6e27a3da4da3f9cdddf45fe6 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Fri, 27 Sep 2019 14:11:54 -0600
|
||||
Subject: [PATCH 3/6] kubelet cpumanager infrastructure pods use system
|
||||
reserved CPUs
|
||||
|
||||
This assigns system infrastructure pods to the "reserved" cpuset
|
||||
to isolate them from the shared pool of CPUs.
|
||||
|
||||
Infrastructure pods include any pods that belong to the kube-system,
|
||||
armada, cert-manager, vault, platform-deployment-manager, portieris,
|
||||
notification or flux-helm namespaces.
|
||||
|
||||
The implementation is a bit simplistic, it is assumed that the
|
||||
"reserved" cpuset is large enough to handle all infrastructure pods
|
||||
CPU allocations.
|
||||
|
||||
This also prevents infrastucture pods from using Guaranteed resources.
|
||||
|
||||
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 45 +++++++++++++++++++
|
||||
.../cm/cpumanager/policy_static_test.go | 19 +++++++-
|
||||
2 files changed, 63 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index 4dc0e499fc4..ee960519127 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -32,6 +32,11 @@ import (
|
||||
// PolicyStatic is the name of the static policy
|
||||
const PolicyStatic policyName = "static"
|
||||
|
||||
+// Define namespaces used by platform infrastructure pods
|
||||
+var infraNamespaces = [...]string{
|
||||
+ "kube-system", "armada", "cert-manager", "platform-deployment-manager", "portieris", "vault", "notification", "flux-helm",
|
||||
+}
|
||||
+
|
||||
// staticPolicy is a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
@@ -232,6 +237,32 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
|
||||
}
|
||||
|
||||
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
||||
+ // Process infra pods before guaranteed pods
|
||||
+ if isKubeInfra(pod) {
|
||||
+ // Container belongs in reserved pool.
|
||||
+ // We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
|
||||
+ if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||
+ klog.Infof("[cpumanager] static policy: reserved container already present in state, skipping " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
+ cpuset := p.reserved
|
||||
+ if cpuset.IsEmpty() {
|
||||
+ // If this happens then someone messed up.
|
||||
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
|
||||
+ }
|
||||
+ s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
+ klog.Infof("[cpumanager] static policy: reserved: AddContainer " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
+
|
||||
if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 {
|
||||
klog.Infof("[cpumanager] static policy: Allocate (pod: %s, container: %s)", pod.Name, container.Name)
|
||||
// container belongs in an exclusively allocated pool
|
||||
@@ -321,6 +352,10 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
|
||||
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
|
||||
return 0
|
||||
}
|
||||
+ // Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
|
||||
+ if isKubeInfra(pod) {
|
||||
+ return 0
|
||||
+ }
|
||||
// Safe downcast to do for all systems with < 2.1 billion CPUs.
|
||||
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
|
||||
// https://golang.org/ref/spec#Numeric_types
|
||||
@@ -438,3 +473,13 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
|
||||
|
||||
return hints
|
||||
}
|
||||
+
|
||||
+// check if a given pod is in a platform infrastructure namespace
|
||||
+func isKubeInfra(pod *v1.Pod) bool {
|
||||
+ for _, namespace := range infraNamespaces {
|
||||
+ if namespace == pod.Namespace {
|
||||
+ return true
|
||||
+ }
|
||||
+ }
|
||||
+ return false
|
||||
+}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index 9c7e4f146ff..5cfd9a8e24e 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -747,7 +747,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
-
|
||||
+ infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
|
||||
+ infraPod.Namespace = "kube-system"
|
||||
testCases := []staticPolicyTestWithResvList{
|
||||
{
|
||||
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||
@@ -789,6 +790,22 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.NewCPUSet(4, 5),
|
||||
},
|
||||
+ {
|
||||
+ description: "InfraPod, SingleSocketHT, ExpectAllocReserved",
|
||||
+ topo: topoSingleSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ stAssignments: state.ContainerCPUAssignments{
|
||||
+ "fakePod": map[string]cpuset.CPUSet{
|
||||
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
+ },
|
||||
+ },
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
+ pod: infraPod,
|
||||
+ expErr: nil,
|
||||
+ expCPUAlloc: true,
|
||||
+ expCSet: cpuset.NewCPUSet(0, 1),
|
||||
+ },
|
||||
}
|
||||
|
||||
testExcl := true
|
||||
--
|
||||
2.24.2
|
||||
|
@ -1,528 +0,0 @@
|
||||
From 6224d7abbb0b0946b901d1f6923f73415dc545d5 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Mon, 30 Aug 2021 16:44:36 -0600
|
||||
Subject: [PATCH 4/6] kubelet cpumanager introduce concept of isolated CPUs
|
||||
|
||||
This introduces the concept of "isolated CPUs", which are CPUs that
|
||||
have been isolated at the kernel level via the "isolcpus" kernel boot
|
||||
parameter.
|
||||
|
||||
When starting the kubelet process, two separate sets of reserved CPUs
|
||||
may be specified. With this change CPUs reserved via
|
||||
'--system-reserved=cpu' will be used for infrastructure pods while the
|
||||
isolated CPUs should be reserved via '--kube-reserved=cpu' to cause
|
||||
kubelet to skip over them for "normal" CPU resource tracking. The
|
||||
kubelet code will double-check that the specified isolated CPUs match
|
||||
what the kernel exposes in "/sys/devices/system/cpu/isolated".
|
||||
|
||||
A plugin (outside the scope of this commit) will expose the isolated
|
||||
CPUs to kubelet via the device plugin API.
|
||||
|
||||
If a pod specifies some number of "isolcpus" resources, the device
|
||||
manager will allocate them. In this code we check whether such
|
||||
resources have been allocated, and if so we set the container cpuset to
|
||||
the isolated CPUs. This does mean that it really only makes sense to
|
||||
specify "isolcpus" resources for best-effort or burstable pods, not for
|
||||
guaranteed ones since that would throw off the accounting code. In
|
||||
order to ensure the accounting still works as designed, if "isolcpus"
|
||||
are specified for guaranteed pods, the affinity will be set to the
|
||||
non-isolated CPUs.
|
||||
|
||||
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
|
||||
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/container_manager_linux.go | 1 +
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 31 ++++++-
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 13 ++-
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 86 +++++++++++++++++--
|
||||
.../cm/cpumanager/policy_static_test.go | 46 ++++++++--
|
||||
5 files changed, 157 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go
|
||||
index 72d960879e8..56df7f8b0bf 100644
|
||||
--- a/pkg/kubelet/cm/container_manager_linux.go
|
||||
+++ b/pkg/kubelet/cm/container_manager_linux.go
|
||||
@@ -326,6 +326,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
|
||||
cm.GetNodeAllocatableReservation(),
|
||||
nodeConfig.KubeletRootDir,
|
||||
cm.topologyManager,
|
||||
+ cm.deviceManager,
|
||||
)
|
||||
if err != nil {
|
||||
klog.Errorf("failed to initialize cpu manager: %v", err)
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 281f31a6a0e..5e13a6d90c7 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -21,6 +21,8 @@ import (
|
||||
"math"
|
||||
"sync"
|
||||
"time"
|
||||
+ "strings"
|
||||
+ "io/ioutil"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@@ -34,6 +36,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
@@ -51,6 +54,25 @@ type policyName string
|
||||
// cpuManagerStateFileName is the file name where cpu manager stores its state
|
||||
const cpuManagerStateFileName = "cpu_manager_state"
|
||||
|
||||
+// get the system-level isolated CPUs
|
||||
+func getIsolcpus() cpuset.CPUSet {
|
||||
+ dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
+ // The isolated cpus string ends in a newline
|
||||
+ cpustring := strings.TrimSuffix(string(dat), "\n")
|
||||
+ cset, err := cpuset.Parse(cpustring)
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] unable to parse sysfs isolcpus string to cpuset")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
+ return cset
|
||||
+}
|
||||
+
|
||||
// Manager interface provides methods for Kubelet to manage pod cpus.
|
||||
type Manager interface {
|
||||
// Start is called during Kubelet initialization.
|
||||
@@ -127,7 +149,7 @@ func (s *sourcesReadyStub) AddSource(source string) {}
|
||||
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||
|
||||
// NewManager creates new cpu manager based on provided policy
|
||||
-func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
|
||||
+func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store, deviceManager devicemanager.Manager) (Manager, error) {
|
||||
var topo *topology.CPUTopology
|
||||
var policy Policy
|
||||
|
||||
@@ -164,8 +186,11 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
|
||||
// NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||
// This variable is primarily to make testing easier.
|
||||
excludeReserved := true
|
||||
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, excludeReserved)
|
||||
-
|
||||
+ // isolCPUs is the set of kernel-isolated CPUs. They should be a subset of specificCPUs or
|
||||
+ // of the CPUs that NewStaticPolicy() will pick if numReservedCPUs is set. It's only in the
|
||||
+ // argument list here for ease of testing, it's really internal to the policy.
|
||||
+ isolCPUs := getIsolcpus()
|
||||
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, isolCPUs, affinity, deviceManager, excludeReserved)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new static policy error: %v", err)
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
index e9fc823130b..f56045e5215 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
@@ -38,6 +38,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
)
|
||||
|
||||
type mockState struct {
|
||||
@@ -207,6 +208,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||
}
|
||||
|
||||
func TestCPUManagerAdd(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
@@ -222,7 +224,9 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
},
|
||||
0,
|
||||
cpuset.NewCPUSet(),
|
||||
+ cpuset.NewCPUSet(),
|
||||
topologymanager.NewFakeManager(),
|
||||
+ testDM,
|
||||
testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
@@ -478,8 +482,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
},
|
||||
}
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
|
||||
state := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -631,7 +636,8 @@ func TestCPUManagerGenerate(t *testing.T) {
|
||||
}
|
||||
defer os.RemoveAll(sDir)
|
||||
|
||||
- mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
|
||||
+ testDM, err := devicemanager.NewManagerStub()
|
||||
+ mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM)
|
||||
if testCase.expectedError != nil {
|
||||
if !strings.Contains(err.Error(), testCase.expectedError.Error()) {
|
||||
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), testCase.expectedError.Error())
|
||||
@@ -986,6 +992,7 @@ func TestReconcileState(t *testing.T) {
|
||||
// the following tests are with --reserved-cpus configured
|
||||
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -1000,7 +1007,9 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
},
|
||||
1,
|
||||
cpuset.NewCPUSet(0),
|
||||
+ cpuset.NewCPUSet(),
|
||||
topologymanager.NewFakeManager(),
|
||||
+ testDM,
|
||||
testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index ee960519127..78bb297eb07 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -18,6 +18,7 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
+ "strconv"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
@@ -27,6 +28,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
)
|
||||
|
||||
// PolicyStatic is the name of the static policy
|
||||
@@ -80,6 +82,10 @@ type staticPolicy struct {
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reserved cpuset.CPUSet
|
||||
+ // subset of reserved CPUs with isolcpus attribute
|
||||
+ isolcpus cpuset.CPUSet
|
||||
+ // parent containerManager, used to get device list
|
||||
+ deviceManager devicemanager.Manager
|
||||
// If true, default CPUSet should exclude reserved CPUs
|
||||
excludeReserved bool
|
||||
// topology manager reference to get container Topology affinity
|
||||
@@ -94,7 +100,7 @@ var _ Policy = &staticPolicy{}
|
||||
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, excludeReserved bool) (Policy, error) {
|
||||
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, isolCPUs cpuset.CPUSet, affinity topologymanager.Store, deviceManager devicemanager.Manager, excludeReserved bool) (Policy, error) {
|
||||
allCPUs := topology.CPUDetails.CPUs()
|
||||
var reserved cpuset.CPUSet
|
||||
if reservedCPUs.Size() > 0 {
|
||||
@@ -114,10 +120,17 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
}
|
||||
|
||||
klog.Infof("[cpumanager] reserved %d CPUs (\"%s\") not available for exclusive assignment", reserved.Size(), reserved)
|
||||
-
|
||||
+ if !isolCPUs.IsSubsetOf(reserved) {
|
||||
+ klog.Errorf("[cpumanager] isolCPUs %v is not a subset of reserved %v", isolCPUs, reserved)
|
||||
+ reserved = reserved.Union(isolCPUs)
|
||||
+ klog.Warningf("[cpumanager] mismatch isolCPUs %v, force reserved %v", isolCPUs, reserved)
|
||||
+ }
|
||||
+
|
||||
return &staticPolicy{
|
||||
topology: topology,
|
||||
reserved: reserved,
|
||||
+ isolcpus: isolCPUs,
|
||||
+ deviceManager: deviceManager,
|
||||
excludeReserved: excludeReserved,
|
||||
affinity: affinity,
|
||||
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
@@ -154,8 +167,8 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
} else {
|
||||
s.SetDefaultCPUSet(allCPUs)
|
||||
}
|
||||
- klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||
- allCPUs, p.reserved, s.GetDefaultCPUSet())
|
||||
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, isolcpus:%v, default:%v\n",
|
||||
+ allCPUs, p.reserved, p.isolcpus, s.GetDefaultCPUSet())
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -248,12 +261,13 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
return nil
|
||||
}
|
||||
|
||||
- cpuset := p.reserved
|
||||
+ // TODO: Is the clone actually needed?
|
||||
+ cpuset := p.reserved.Clone().Difference(p.isolcpus)
|
||||
if cpuset.IsEmpty() {
|
||||
// If this happens then someone messed up.
|
||||
return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus " +
|
||||
- "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v",
|
||||
- pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v, isolcpus:%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved, p.isolcpus)
|
||||
}
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
klog.Infof("[cpumanager] static policy: reserved: AddContainer " +
|
||||
@@ -285,8 +299,37 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
}
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
p.updateCPUsToReuse(pod, container, cpuset)
|
||||
+ klog.Infof("[cpumanager] guaranteed: AddContainer " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); numCPUS=%d, cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, numCPUs, cpuset)
|
||||
+ return nil
|
||||
+ }
|
||||
|
||||
+ if isolcpus := p.podIsolCPUs(pod, container); isolcpus.Size() > 0 {
|
||||
+ // container has requested isolated CPUs
|
||||
+ if set, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||
+ if set.Equals(isolcpus) {
|
||||
+ klog.Infof("[cpumanager] isolcpus container already present in state, skipping " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ return nil
|
||||
+ } else {
|
||||
+ klog.Infof("[cpumanager] isolcpus container state has cpus %v, should be %v" +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ isolcpus, set, pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ }
|
||||
+ }
|
||||
+ // Note that we do not do anything about init containers here.
|
||||
+ // It looks like devices are allocated per-pod based on effective requests/limits
|
||||
+ // and extra devices from initContainers are not freed up when the regular containers start.
|
||||
+ // TODO: confirm this is still true for 1.19
|
||||
+ s.SetCPUSet(string(pod.UID), container.Name, isolcpus)
|
||||
+ klog.Infof("[cpumanager] isolcpus: AddContainer " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, isolcpus)
|
||||
+ return nil
|
||||
}
|
||||
+
|
||||
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||
return nil
|
||||
}
|
||||
@@ -483,3 +526,32 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
+
|
||||
+// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||
+func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||
+ // NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||
+ // not create UID. We also need a way to properly stub devicemanager.
|
||||
+ if len(string(pod.UID)) == 0 {
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+ devices := p.deviceManager.GetDevices(string(pod.UID), container.Name)
|
||||
+ for _, dev := range devices {
|
||||
+ // this resource name needs to match the isolcpus device plugin
|
||||
+ if dev.ResourceName == "windriver.com/isolcpus" {
|
||||
+ cpuStrList := dev.DeviceIds
|
||||
+ if len(cpuStrList) > 0 {
|
||||
+ cpuSet := cpuset.NewCPUSet()
|
||||
+ // loop over the list of strings, convert each one to int, add to cpuset
|
||||
+ for _, cpuStr := range cpuStrList {
|
||||
+ cpu, err := strconv.Atoi(cpuStr)
|
||||
+ if err != nil {
|
||||
+ panic(err)
|
||||
+ }
|
||||
+ cpuSet = cpuSet.Union(cpuset.NewCPUSet(cpu))
|
||||
+ }
|
||||
+ return cpuSet
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ return cpuset.NewCPUSet()
|
||||
+}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index 5cfd9a8e24e..e3d02d6b819 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
)
|
||||
|
||||
type staticPolicyTest struct {
|
||||
@@ -45,8 +46,9 @@ type staticPolicyTest struct {
|
||||
}
|
||||
|
||||
func TestStaticPolicyName(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testExcl := false
|
||||
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
|
||||
policyName := policy.Name()
|
||||
if policyName != "static" {
|
||||
@@ -56,6 +58,7 @@ func TestStaticPolicyName(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyStart(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "non-corrupted state",
|
||||
@@ -131,7 +134,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testCase.excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -179,6 +182,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
largeTopoSock0CPUSet := largeTopoSock0Builder.Result()
|
||||
largeTopoSock1CPUSet := largeTopoSock1Builder.Result()
|
||||
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||
@@ -447,7 +451,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testCase.excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -490,6 +494,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyRemove(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
excludeReserved := false
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
@@ -549,7 +554,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -571,6 +576,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
excludeReserved := false
|
||||
testCases := []struct {
|
||||
description string
|
||||
@@ -640,7 +646,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: tc.stAssignments,
|
||||
@@ -673,6 +679,7 @@ type staticPolicyTestWithResvList struct {
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
reserved cpuset.CPUSet
|
||||
+ isolcpus cpuset.CPUSet
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
stDefaultCPUSet cpuset.CPUSet
|
||||
pod *v1.Pod
|
||||
@@ -713,9 +720,10 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
},
|
||||
}
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||
testCase.description, testCase.expNewErr, err)
|
||||
@@ -755,6 +763,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 1,
|
||||
reserved: cpuset.NewCPUSet(0),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||
@@ -767,6 +776,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||
@@ -779,6 +789,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"fakePod": map[string]cpuset.CPUSet{
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
@@ -795,6 +806,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"fakePod": map[string]cpuset.CPUSet{
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
@@ -806,12 +818,30 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.NewCPUSet(0, 1),
|
||||
},
|
||||
+ {
|
||||
+ description: "InfraPod, SingleSocketHT, Isolcpus, ExpectAllocReserved",
|
||||
+ topo: topoSingleSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(1),
|
||||
+ stAssignments: state.ContainerCPUAssignments{
|
||||
+ "fakePod": map[string]cpuset.CPUSet{
|
||||
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
+ },
|
||||
+ },
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
+ pod: infraPod,
|
||||
+ expErr: nil,
|
||||
+ expCPUAlloc: true,
|
||||
+ expCSet: cpuset.NewCPUSet(0),
|
||||
+ },
|
||||
}
|
||||
|
||||
testExcl := true
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
-
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, testCase.isolcpus, topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
+
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
defaultCPUSet: testCase.stDefaultCPUSet,
|
||||
--
|
||||
2.24.2
|
||||
|
@ -1,314 +0,0 @@
|
||||
From e4625434ce9cec55428ee8600c4a347e75cf8584 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Mon, 30 Aug 2021 15:45:57 -0600
|
||||
Subject: [PATCH 2/6] kubelet cpumanager keep normal containers off reserved
|
||||
CPUs
|
||||
|
||||
When starting the kubelet process, two separate sets of reserved CPUs
|
||||
may be specified. With this change CPUs reserved via '--system-reserved=cpu'
|
||||
or '--kube-reserved=cpu' will be ignored by kubernetes itself. A small
|
||||
tweak to the default CPU affinity ensures that "normal" Kubernetes
|
||||
pods won't run on the reserved CPUs.
|
||||
|
||||
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 6 +++-
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 12 ++++---
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 29 ++++++++++++---
|
||||
.../cm/cpumanager/policy_static_test.go | 35 +++++++++++++------
|
||||
4 files changed, 62 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index fc3a247e70b..281f31a6a0e 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -161,7 +161,11 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
|
||||
// exclusively allocated.
|
||||
reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000
|
||||
numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
|
||||
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity)
|
||||
+ // NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||
+ // This variable is primarily to make testing easier.
|
||||
+ excludeReserved := true
|
||||
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, excludeReserved)
|
||||
+
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new static policy error: %v", err)
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
index dc0d756f07c..e9fc823130b 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
@@ -207,6 +207,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||
}
|
||||
|
||||
func TestCPUManagerAdd(t *testing.T) {
|
||||
+ testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -221,7 +222,8 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
},
|
||||
0,
|
||||
cpuset.NewCPUSet(),
|
||||
- topologymanager.NewFakeManager())
|
||||
+ topologymanager.NewFakeManager(),
|
||||
+ testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
updateErr error
|
||||
@@ -475,9 +477,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
cpuset.NewCPUSet(1, 5)},
|
||||
},
|
||||
}
|
||||
-
|
||||
+ testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
|
||||
state := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -983,6 +985,7 @@ func TestReconcileState(t *testing.T) {
|
||||
// above test cases are without kubelet --reserved-cpus cmd option
|
||||
// the following tests are with --reserved-cpus configured
|
||||
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
+ testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -997,7 +1000,8 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
},
|
||||
1,
|
||||
cpuset.NewCPUSet(0),
|
||||
- topologymanager.NewFakeManager())
|
||||
+ topologymanager.NewFakeManager(),
|
||||
+ testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
updateErr error
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index dd4dccbc36e..4dc0e499fc4 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -75,6 +75,8 @@ type staticPolicy struct {
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reserved cpuset.CPUSet
|
||||
+ // If true, default CPUSet should exclude reserved CPUs
|
||||
+ excludeReserved bool
|
||||
// topology manager reference to get container Topology affinity
|
||||
affinity topologymanager.Store
|
||||
// set of CPUs to reuse across allocations in a pod
|
||||
@@ -87,7 +89,7 @@ var _ Policy = &staticPolicy{}
|
||||
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store) (Policy, error) {
|
||||
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, excludeReserved bool) (Policy, error) {
|
||||
allCPUs := topology.CPUDetails.CPUs()
|
||||
var reserved cpuset.CPUSet
|
||||
if reservedCPUs.Size() > 0 {
|
||||
@@ -111,6 +113,7 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
return &staticPolicy{
|
||||
topology: topology,
|
||||
reserved: reserved,
|
||||
+ excludeReserved: excludeReserved,
|
||||
affinity: affinity,
|
||||
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
}, nil
|
||||
@@ -139,7 +142,15 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
}
|
||||
// state is empty initialize
|
||||
allCPUs := p.topology.CPUDetails.CPUs()
|
||||
- s.SetDefaultCPUSet(allCPUs)
|
||||
+ if p.excludeReserved {
|
||||
+ // Exclude reserved CPUs from the default CPUSet to keep containers off them
|
||||
+ // unless explicitly affined.
|
||||
+ s.SetDefaultCPUSet(allCPUs.Difference(p.reserved))
|
||||
+ } else {
|
||||
+ s.SetDefaultCPUSet(allCPUs)
|
||||
+ }
|
||||
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||
+ allCPUs, p.reserved, s.GetDefaultCPUSet())
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -147,9 +158,11 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
// 1. Check if the reserved cpuset is not part of default cpuset because:
|
||||
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
|
||||
// - user tampered with file
|
||||
- if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
|
||||
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
- p.reserved.String(), tmpDefaultCPUset.String())
|
||||
+ if !p.excludeReserved {
|
||||
+ if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
|
||||
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
+ p.reserved.String(), tmpDefaultCPUset.String())
|
||||
+ }
|
||||
}
|
||||
|
||||
// 2. Check if state for static policy is consistent
|
||||
@@ -178,6 +191,9 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
}
|
||||
}
|
||||
totalKnownCPUs = totalKnownCPUs.UnionAll(tmpCPUSets)
|
||||
+ if p.excludeReserved {
|
||||
+ totalKnownCPUs = totalKnownCPUs.Union(p.reserved)
|
||||
+ }
|
||||
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
|
||||
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
|
||||
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
|
||||
@@ -248,6 +264,9 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
|
||||
klog.Infof("[cpumanager] static policy: RemoveContainer (pod: %s, container: %s)", podUID, containerName)
|
||||
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
|
||||
s.Delete(podUID, containerName)
|
||||
+ if p.excludeReserved {
|
||||
+ toRelease = toRelease.Difference(p.reserved)
|
||||
+ }
|
||||
// Mutate the shared pool, adding released cpus.
|
||||
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index b4b46c68c17..9c7e4f146ff 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -33,6 +33,7 @@ type staticPolicyTest struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
+ excludeReserved bool
|
||||
podUID string
|
||||
containerName string
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
@@ -44,7 +45,8 @@ type staticPolicyTest struct {
|
||||
}
|
||||
|
||||
func TestStaticPolicyName(t *testing.T) {
|
||||
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ testExcl := false
|
||||
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
|
||||
policyName := policy.Name()
|
||||
if policyName != "static" {
|
||||
@@ -74,6 +76,15 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(),
|
||||
expCSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||
},
|
||||
+ {
|
||||
+ description: "empty cpuset exclude reserved",
|
||||
+ topo: topoDualSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ excludeReserved: true,
|
||||
+ stAssignments: state.ContainerCPUAssignments{},
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(),
|
||||
+ expCSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
|
||||
+ },
|
||||
{
|
||||
description: "reserved cores 0 & 6 are not present in available cpuset",
|
||||
topo: topoDualSocketHT,
|
||||
@@ -120,7 +131,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -436,7 +447,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -479,6 +490,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyRemove(t *testing.T) {
|
||||
+ excludeReserved := false
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "SingleSocketHT, DeAllocOneContainer",
|
||||
@@ -537,7 +549,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -559,6 +571,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
+ excludeReserved := false
|
||||
testCases := []struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
@@ -627,7 +640,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: tc.stAssignments,
|
||||
@@ -699,9 +712,10 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
|
||||
},
|
||||
}
|
||||
+ testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager())
|
||||
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||
testCase.description, testCase.expNewErr, err)
|
||||
@@ -741,7 +755,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
numReservedCPUs: 1,
|
||||
reserved: cpuset.NewCPUSet(0),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||
expErr: fmt.Errorf("not enough cpus available to satisfy request"),
|
||||
expCPUAlloc: false,
|
||||
@@ -753,7 +767,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||
expErr: nil,
|
||||
expCPUAlloc: true,
|
||||
@@ -769,7 +783,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
},
|
||||
},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 4, 5),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"),
|
||||
expErr: nil,
|
||||
expCPUAlloc: true,
|
||||
@@ -777,8 +791,9 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
+ testExcl := true
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
--
|
||||
2.24.2
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,9 +0,0 @@
|
||||
The spec file used here was from the kubernetes 1.10.0 src rpm.
|
||||
The orig file is included to help show modifications made to that
|
||||
spec file, to help understand which changes were needed and to
|
||||
assist with future upversioning.
|
||||
|
||||
The contrib tarball does not have the same versioning as kubernetes and
|
||||
there is little activity in that repo.
|
||||
|
||||
The version for the contrib tarball is arbitrary.
|
@ -1,5 +0,0 @@
|
||||
VERSION=1.20.9
|
||||
TAR_NAME=kubernetes
|
||||
TAR="$TAR_NAME-v$VERSION.tar.gz"
|
||||
COPY_LIST="${CGCS_BASE}/downloads/$TAR $FILES_BASE/*"
|
||||
TIS_PATCH_VER=PKG_GITREVCOUNT
|
@ -1,79 +0,0 @@
|
||||
From 80fc45845ac260819108a6a6dabb9da7c0fd111f Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Fri, 23 Oct 2020 17:46:10 -0600
|
||||
Subject: [PATCH 6/6] enable support for kubernetes to ignore isolcpus
|
||||
|
||||
The normal mechanisms for allocating isolated CPUs do not allow
|
||||
a mix of isolated and exclusive CPUs in the same container. In
|
||||
order to allow this in *very* limited cases where the pod spec
|
||||
is known in advance we will add the ability to disable the normal
|
||||
isolcpus behaviour.
|
||||
|
||||
If the file "/etc/kubernetes/ignore_isolcpus" exists, then kubelet
|
||||
will basically forget everything it knows about isolcpus and just
|
||||
treat them like regular CPUs.
|
||||
|
||||
The admin user can then rely on the fact that CPU allocation is
|
||||
deterministic to ensure that the isolcpus they configure end up being
|
||||
allocated to the correct pods.
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 9 +++++++++
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 8 ++++++++
|
||||
2 files changed, 17 insertions(+)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 8470431c07c..fd0bdeeee07 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -19,6 +19,7 @@ package cpumanager
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
+ "os"
|
||||
"sync"
|
||||
"time"
|
||||
"strings"
|
||||
@@ -56,6 +57,14 @@ const cpuManagerStateFileName = "cpu_manager_state"
|
||||
|
||||
// get the system-level isolated CPUs
|
||||
func getIsolcpus() cpuset.CPUSet {
|
||||
+
|
||||
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||
+ klog.Infof("[cpumanager] turning off isolcpus awareness")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||
if err != nil {
|
||||
klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index 4acd5609748..78c5f0f2576 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -18,6 +18,7 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
+ "os"
|
||||
"strconv"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@@ -614,6 +615,13 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||
|
||||
// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||
func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||
+
|
||||
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
// NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||
// not create UID. We also need a way to properly stub devicemanager.
|
||||
if len(string(pod.UID)) == 0 {
|
||||
--
|
||||
2.17.1
|
||||
|
@ -1,84 +0,0 @@
|
||||
MDSFORMANPAGES="kube-apiserver.md kube-controller-manager.md kube-proxy.md kube-scheduler.md kubelet.md"
|
||||
|
||||
# remove comments from man pages
|
||||
for manpage in ${MDSFORMANPAGES}; do
|
||||
pos=$(grep -n "<\!-- END MUNGE: UNVERSIONED_WARNING -->" ${manpage} | cut -d':' -f1)
|
||||
if [ -n ${pos} ]; then
|
||||
sed -i "1,${pos}{/.*/d}" ${manpage}
|
||||
fi
|
||||
done
|
||||
|
||||
# for each man page add NAME and SYNOPSIS section
|
||||
# kube-apiserver
|
||||
sed -i -s "s/## kube-apiserver/# NAME\nkube-apiserver \- Provides the API for kubernetes orchestration.\n\n# SYNOPSIS\n**kube-apiserver** [OPTIONS]\n/" kube-apiserver.md
|
||||
|
||||
cat << 'EOF' >> kube-apiserver.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-apiserver --logtostderr=true --v=0 --etcd_servers=http://127.0.0.1:4001 --insecure_bind_address=127.0.0.1 --insecure_port=8080 --kubelet_port=10250 --service-cluster-ip-range=10.1.1.0/24 --allow_privileged=false
|
||||
```
|
||||
EOF
|
||||
# kube-controller-manager
|
||||
sed -i -s "s/## kube-controller-manager/# NAME\nkube-controller-manager \- Enforces kubernetes services.\n\n# SYNOPSIS\n**kube-controller-manager** [OPTIONS]\n/" kube-controller-manager.md
|
||||
|
||||
cat << 'EOF' >> kube-controller-manager.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-controller-manager --logtostderr=true --v=0 --master=127.0.0.1:8080
|
||||
```
|
||||
EOF
|
||||
# kube-proxy
|
||||
sed -i -s "s/## kube-proxy/# NAME\nkube-proxy \- Provides network proxy services.\n\n# SYNOPSIS\n**kube-proxy** [OPTIONS]\n/" kube-proxy.md
|
||||
|
||||
cat << 'EOF' >> kube-proxy.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-proxy --logtostderr=true --v=0 --master=http://127.0.0.1:8080
|
||||
```
|
||||
EOF
|
||||
# kube-scheduler
|
||||
sed -i -s "s/## kube-scheduler/# NAME\nkube-scheduler \- Schedules containers on hosts.\n\n# SYNOPSIS\n**kube-scheduler** [OPTIONS]\n/" kube-scheduler.md
|
||||
|
||||
cat << 'EOF' >> kube-scheduler.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kube-scheduler --logtostderr=true --v=0 --master=127.0.0.1:8080
|
||||
```
|
||||
EOF
|
||||
# kubelet
|
||||
sed -i -s "s/## kubelet/# NAME\nkubelet \- Processes a container manifest so the containers are launched according to how they are described.\n\n# SYNOPSIS\n**kubelet** [OPTIONS]\n/" kubelet.md
|
||||
|
||||
cat << 'EOF' >> kubelet.md
|
||||
# EXAMPLES
|
||||
```
|
||||
/usr/bin/kubelet --logtostderr=true --v=0 --api_servers=http://127.0.0.1:8080 --address=127.0.0.1 --port=10250 --hostname_override=127.0.0.1 --allow-privileged=false
|
||||
```
|
||||
EOF
|
||||
|
||||
# for all man-pages
|
||||
for md in $MDSFORMANPAGES; do
|
||||
# correct section names
|
||||
sed -i -s "s/### Synopsis/# DESCRIPTION/" $md
|
||||
sed -i -s "s/### Options/# OPTIONS/" $md
|
||||
# add header
|
||||
sed -i "s/# NAME/% KUBERNETES(1) kubernetes User Manuals\n# NAME/" $md
|
||||
# modify list of options
|
||||
# options with no value in ""
|
||||
sed -i -r 's/(^ )(-[^":][^":]*)(:)(.*)/\*\*\2\*\*\n\t\4\n/' $md
|
||||
# option with value in ""
|
||||
sed -i -r 's/(^ )(-[^":][^":]*)("[^"]*")(:)(.*)/\*\*\2\3\*\*\n\t\5\n/' $md
|
||||
# options in -s, --long
|
||||
sed -i -r 's/(^ )(-[a-z], -[^":][^":]*)(:)(.*)/\*\*\2\*\*\n\t\4\n/' $md
|
||||
sed -i -r 's/(^ )(-[a-z], -[^":][^":]*)("[^"]*")(:)(.*)/\*\*\2\3\*\*\n\t\5\n/' $md
|
||||
# remove ```
|
||||
sed -i 's/```//' $md
|
||||
# remove all lines starting with ######
|
||||
sed -i 's/^######.*//' $md
|
||||
# modify footer
|
||||
sed -i -r "s/^\[!\[Analytics\].*//" $md
|
||||
# md does not contain section => taking 1
|
||||
name="${md%.md}"
|
||||
go-md2man -in $md -out man/man1/$name.1
|
||||
done
|
||||
|
||||
|
@ -1,108 +0,0 @@
|
||||
From 62575aa6d34c52dffb02535a526f6361cdedb300 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Fri, 3 Sep 2021 18:05:15 -0400
|
||||
Subject: [PATCH 5/6] kubeadm: create platform pods with zero CPU resources
|
||||
|
||||
We want to specify zero CPU resources when creating the manifests
|
||||
for the static platform pods, as a workaround for the lack of
|
||||
separate resource tracking for platform resources.
|
||||
|
||||
We also specify zero CPU resources for the coredns deployment.
|
||||
manifests.go appears to be the main file for this, not sure if the
|
||||
others are used but I changed them just in case.
|
||||
|
||||
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||
---
|
||||
cluster/addons/dns/coredns/coredns.yaml.base | 2 +-
|
||||
cluster/addons/dns/coredns/coredns.yaml.in | 2 +-
|
||||
cluster/addons/dns/coredns/coredns.yaml.sed | 2 +-
|
||||
cmd/kubeadm/app/phases/addons/dns/manifests.go | 2 +-
|
||||
cmd/kubeadm/app/phases/controlplane/manifests.go | 6 +++---
|
||||
5 files changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.base b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
index 460db6317db..30873a81f18 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
@@ -138,7 +138,7 @@ spec:
|
||||
limits:
|
||||
memory: __DNS__MEMORY__LIMIT__
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.in b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
index 35fd52f15cd..51d963282ea 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
@@ -138,7 +138,7 @@ spec:
|
||||
limits:
|
||||
memory: 'dns_memory_limit'
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.sed b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
index ebe0c7182e8..dab87353509 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
@@ -138,7 +138,7 @@ spec:
|
||||
limits:
|
||||
memory: $DNS_MEMORY_LIMIT
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cmd/kubeadm/app/phases/addons/dns/manifests.go b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
index 014cbd773c2..18ce45d1e85 100644
|
||||
--- a/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
+++ b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
@@ -254,7 +254,7 @@ spec:
|
||||
limits:
|
||||
memory: 170Mi
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cmd/kubeadm/app/phases/controlplane/manifests.go b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
index 8181bea63a4..4c4b4448dd4 100644
|
||||
--- a/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
+++ b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
@@ -60,7 +60,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS),
|
||||
ReadinessProbe: staticpodutil.ReadinessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/readyz", int(endpoint.BindPort), v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("250m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeAPIServer),
|
||||
map[string]string{kubeadmconstants.KubeAPIServerAdvertiseAddressEndpointAnnotationKey: endpoint.String()}),
|
||||
@@ -72,7 +72,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeControllerManager)),
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("200m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeControllerManager), nil),
|
||||
kubeadmconstants.KubeScheduler: staticpodutil.ComponentPod(v1.Container{
|
||||
@@ -83,7 +83,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeScheduler)),
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("100m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeScheduler), nil),
|
||||
}
|
||||
--
|
||||
2.17.1
|
||||
|
@ -1,18 +0,0 @@
|
||||
# Note: This dropin only works with kubeadm and kubelet v1.11+
|
||||
[Service]
|
||||
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
|
||||
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
|
||||
# This is a file that "kubeadm init" and "kubeadm join" generates at runtime, populating the KUBELET_KUBEADM_ARGS variable dynamically
|
||||
EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
|
||||
# This is a file that the user can use for overrides of the kubelet args as a last resort. Preferably, the user should use
|
||||
# the .NodeRegistration.KubeletExtraArgs object in the configuration files instead. KUBELET_EXTRA_ARGS should be sourced from this file.
|
||||
EnvironmentFile=-/etc/sysconfig/kubelet
|
||||
ExecStart=
|
||||
ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS
|
||||
ExecStartPre=-/usr/local/sbin/sanitize_kubelet_reserved_cpus.sh /etc/sysconfig/kubelet
|
||||
ExecStartPre=-/usr/bin/kubelet-cgroup-setup.sh
|
||||
ExecStartPost=/bin/bash -c 'echo $MAINPID > /var/run/kubelet.pid;'
|
||||
ExecStopPost=/bin/rm -f /var/run/kubelet.pid
|
||||
Restart=always
|
||||
StartLimitInterval=0
|
||||
RestartSec=10
|
@ -1,132 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This script does minimal cgroup setup for kubelet. This creates k8s-infra
|
||||
# cgroup for a minimal set of resource controllers, and configures cpuset
|
||||
# attributes to span all online cpus and nodes. This will do nothing if
|
||||
# the k8s-infra cgroup already exists (i.e., assume already configured).
|
||||
# NOTE: The creation of directories under /sys/fs/cgroup is volatile, and
|
||||
# does not persist reboots. The cpuset.mems and cpuset.cpus is later updated
|
||||
# by puppet kubernetes.pp manifest.
|
||||
#
|
||||
|
||||
# Define minimal path
|
||||
PATH=/bin:/usr/bin:/usr/local/bin
|
||||
|
||||
# Log info message to /var/log/daemon.log
|
||||
function LOG {
|
||||
logger -p daemon.info "$0($$): $@"
|
||||
}
|
||||
|
||||
# Log error message to /var/log/daemon.log
|
||||
function ERROR {
|
||||
logger -s -p daemon.error "$0($$): ERROR: $@"
|
||||
}
|
||||
|
||||
# Create minimal cgroup directories and configure cpuset attributes if required
|
||||
function create_cgroup {
|
||||
local cg_name=$1
|
||||
local cg_nodeset=$2
|
||||
local cg_cpuset=$3
|
||||
|
||||
local CGROUP=/sys/fs/cgroup
|
||||
local CONTROLLERS_AUTO_DELETED=("pids" "hugetlb")
|
||||
local CONTROLLERS_PRESERVED=("cpuset" "memory" "cpu,cpuacct" "systemd")
|
||||
local cnt=''
|
||||
local CGDIR=''
|
||||
local RC=0
|
||||
|
||||
# Ensure that these cgroups are created every time as they are auto deleted
|
||||
for cnt in ${CONTROLLERS_AUTO_DELETED[@]}; do
|
||||
CGDIR=${CGROUP}/${cnt}/${cg_name}
|
||||
if [ -d ${CGDIR} ]; then
|
||||
LOG "Nothing to do, already configured: ${CGDIR}."
|
||||
continue
|
||||
fi
|
||||
LOG "Creating: ${CGDIR}"
|
||||
mkdir -p ${CGDIR}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Creating: ${CGDIR}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
done
|
||||
|
||||
# These cgroups are preserved so if any of these are encountered additional
|
||||
# cgroup setup is not required
|
||||
for cnt in ${CONTROLLERS_PRESERVED[@]}; do
|
||||
CGDIR=${CGROUP}/${cnt}/${cg_name}
|
||||
if [ -d ${CGDIR} ]; then
|
||||
LOG "Nothing to do, already configured: ${CGDIR}."
|
||||
exit ${RC}
|
||||
fi
|
||||
LOG "Creating: ${CGDIR}"
|
||||
mkdir -p ${CGDIR}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Creating: ${CGDIR}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
done
|
||||
|
||||
# Customize cpuset attributes
|
||||
LOG "Configuring cgroup: ${cg_name}, nodeset: ${cg_nodeset}, cpuset: ${cg_cpuset}"
|
||||
CGDIR=${CGROUP}/cpuset/${cg_name}
|
||||
local CGMEMS=${CGDIR}/cpuset.mems
|
||||
local CGCPUS=${CGDIR}/cpuset.cpus
|
||||
local CGTASKS=${CGDIR}/tasks
|
||||
|
||||
# Assign cgroup memory nodeset
|
||||
LOG "Assign nodeset ${cg_nodeset} to ${CGMEMS}"
|
||||
/bin/echo ${cg_nodeset} > ${CGMEMS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Unable to write to: ${CGMEMS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Assign cgroup cpus
|
||||
LOG "Assign cpuset ${cg_cpuset} to ${CGCPUS}"
|
||||
/bin/echo ${cg_cpuset} > ${CGCPUS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Assigning: ${cg_cpuset} to ${CGCPUS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Set file ownership
|
||||
chown root:root ${CGMEMS} ${CGCPUS} ${CGTASKS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Setting owner for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Set file mode permissions
|
||||
chmod 644 ${CGMEMS} ${CGCPUS} ${CGTASKS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Setting mode for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
return ${RC}
|
||||
}
|
||||
|
||||
if [ $UID -ne 0 ]; then
|
||||
ERROR "Require sudo/root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configure default kubepods cpuset to span all online cpus and nodes.
|
||||
ONLINE_NODESET=$(/bin/cat /sys/devices/system/node/online)
|
||||
ONLINE_CPUSET=$(/bin/cat /sys/devices/system/cpu/online)
|
||||
|
||||
# Configure kubelet cgroup to match cgroupRoot.
|
||||
create_cgroup 'k8s-infra' ${ONLINE_NODESET} ${ONLINE_CPUSET}
|
||||
|
||||
exit $?
|
||||
|
@ -1,111 +0,0 @@
|
||||
From 019172946c0146eca91d611595866c70a8ed3ddb Mon Sep 17 00:00:00 2001
|
||||
From: Jim Gauld <james.gauld@windriver.com>
|
||||
Date: Fri, 3 Sep 2021 14:10:46 -0400
|
||||
Subject: [PATCH 1/6] kubelet cpumanager disable CFS quota throttling for
|
||||
Guaranteed pods
|
||||
|
||||
This disables CFS CPU quota to avoid performance degradation due to
|
||||
Linux kernel CFS quota implementation. Note that 4.18 kernel attempts
|
||||
to solve the CFS throttling problem, but there are reports that it is
|
||||
not completely effective.
|
||||
|
||||
This disables CFS quota throttling for Guaranteed pods for both
|
||||
parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us.
|
||||
Disabling has a dramatic latency improvement for HTTP response times.
|
||||
|
||||
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 22 ++++++++++++++++++++++
|
||||
pkg/kubelet/cm/helpers_linux.go | 5 +++++
|
||||
pkg/kubelet/cm/helpers_linux_test.go | 8 ++++----
|
||||
3 files changed, 31 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 44368efc441..88cfbc1fa83 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -36,6 +36,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
+ v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
)
|
||||
|
||||
// ActivePodsFunc is a function that returns a list of pods to reconcile.
|
||||
@@ -242,6 +243,14 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e
|
||||
// Get the CPUs assigned to the container during Allocate()
|
||||
// (or fall back to the default CPUSet if none were assigned).
|
||||
cpus := m.state.GetCPUSetOrDefault(string(p.UID), c.Name)
|
||||
+
|
||||
+ // Guaranteed PODs should not have CFS quota throttle
|
||||
+ if m.policy.Name() == string(PolicyStatic) && v1qos.GetPodQOS(p) == v1.PodQOSGuaranteed {
|
||||
+ err := m.disableContainerCPUQuota(containerID)
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] AddContainer disable CPU Quota error: %v", err)
|
||||
+ }
|
||||
+ }
|
||||
m.Unlock()
|
||||
|
||||
if !cpus.IsEmpty() {
|
||||
@@ -489,3 +498,16 @@ func (m *manager) GetCPUs(podUID, containerName string) []int64 {
|
||||
}
|
||||
return result
|
||||
}
|
||||
+
|
||||
+func (m *manager) disableContainerCPUQuota(containerID string) error {
|
||||
+ // Disable CFS CPU quota to avoid performance degradation due to
|
||||
+ // Linux kernel CFS throttle implementation.
|
||||
+ // NOTE: 4.18 kernel attempts to solve CFS throttling problem,
|
||||
+ // but there are reports that it is not completely effective.
|
||||
+ return m.containerRuntime.UpdateContainerResources(
|
||||
+ containerID,
|
||||
+ &runtimeapi.LinuxContainerResources{
|
||||
+ CpuPeriod: 100000,
|
||||
+ CpuQuota: -1,
|
||||
+ })
|
||||
+}
|
||||
diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go
|
||||
index 9b115ab5380..d3185e1e958 100644
|
||||
--- a/pkg/kubelet/cm/helpers_linux.go
|
||||
+++ b/pkg/kubelet/cm/helpers_linux.go
|
||||
@@ -166,6 +166,11 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64)
|
||||
// determine the qos class
|
||||
qosClass := v1qos.GetPodQOS(pod)
|
||||
|
||||
+ // disable cfs quota for guaranteed pods
|
||||
+ if qosClass == v1.PodQOSGuaranteed {
|
||||
+ cpuQuota = int64(-1)
|
||||
+ }
|
||||
+
|
||||
// build the result
|
||||
result := &ResourceConfig{}
|
||||
if qosClass == v1.PodQOSGuaranteed {
|
||||
diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go
|
||||
index 56d765fbc22..0c43afe5875 100644
|
||||
--- a/pkg/kubelet/cm/helpers_linux_test.go
|
||||
+++ b/pkg/kubelet/cm/helpers_linux_test.go
|
||||
@@ -63,8 +63,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
burstablePartialShares := MilliCPUToShares(200)
|
||||
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||
guaranteedShares := MilliCPUToShares(100)
|
||||
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||
+ guaranteedQuota := int64(-1)
|
||||
+ guaranteedTunedQuota := int64(-1)
|
||||
memoryQuantity = resource.MustParse("100Mi")
|
||||
cpuNoLimit := int64(-1)
|
||||
guaranteedMemory := memoryQuantity.Value()
|
||||
@@ -283,8 +283,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
burstablePartialShares := MilliCPUToShares(200)
|
||||
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||
guaranteedShares := MilliCPUToShares(100)
|
||||
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||
+ guaranteedQuota := int64(-1)
|
||||
+ guaranteedTunedQuota := int64(-1)
|
||||
memoryQuantity = resource.MustParse("100Mi")
|
||||
cpuNoLimit := int64(-1)
|
||||
guaranteedMemory := memoryQuantity.Value()
|
||||
--
|
||||
2.17.1
|
||||
|
@ -1,139 +0,0 @@
|
||||
From 5471fc2f03d1d14ceb250bf98f400cee9feb6983 Mon Sep 17 00:00:00 2001
|
||||
From: Jim Gauld <james.gauld@windriver.com>
|
||||
Date: Fri, 3 Sep 2021 15:57:58 -0400
|
||||
Subject: [PATCH 3/6] kubelet cpumanager infrastructure pods use system
|
||||
reserved CPUs
|
||||
|
||||
This assigns system infrastructure pods to the "reserved" cpuset
|
||||
to isolate them from the shared pool of CPUs.
|
||||
|
||||
Infrastructure pods include any pods that belong to the kube-system,
|
||||
armada, cert-manager, vault, platform-deployment-manager, portieris,
|
||||
notification or flux-helm namespaces.
|
||||
|
||||
The implementation is a bit simplistic, it is assumed that the
|
||||
"reserved" cpuset is large enough to handle all infrastructure pods
|
||||
CPU allocations.
|
||||
|
||||
This also prevents infrastucture pods from using Guaranteed resources.
|
||||
|
||||
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 44 +++++++++++++++++++
|
||||
.../cm/cpumanager/policy_static_test.go | 19 +++++++-
|
||||
2 files changed, 62 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index e892d63641b..ab3206c5dc4 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -33,6 +33,11 @@ import (
|
||||
// PolicyStatic is the name of the static policy
|
||||
const PolicyStatic policyName = "static"
|
||||
|
||||
+// Define namespaces used by platform infrastructure pods
|
||||
+var infraNamespaces = [...]string{
|
||||
+ "kube-system", "armada", "cert-manager", "platform-deployment-manager", "portieris", "vault", "notification", "flux-helm",
|
||||
+}
|
||||
+
|
||||
// staticPolicy is a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
@@ -233,6 +238,31 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
|
||||
}
|
||||
|
||||
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
||||
+ // Process infra pods before guaranteed pods
|
||||
+ if isKubeInfra(pod) {
|
||||
+ // Container belongs in reserved pool.
|
||||
+ // We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
|
||||
+ if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||
+ klog.Infof("[cpumanager] static policy: reserved container already present in state, skipping " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
+ cpuset := p.reserved
|
||||
+ if cpuset.IsEmpty() {
|
||||
+ // If this happens then someone messed up.
|
||||
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
|
||||
+ }
|
||||
+ s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
+ klog.Infof("[cpumanager] static policy: reserved: AddContainer " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 {
|
||||
klog.Infof("[cpumanager] static policy: Allocate (pod: %s, container: %s)", format.Pod(pod), container.Name)
|
||||
// container belongs in an exclusively allocated pool
|
||||
@@ -322,6 +352,10 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
|
||||
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
|
||||
return 0
|
||||
}
|
||||
+ // Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
|
||||
+ if isKubeInfra(pod) {
|
||||
+ return 0
|
||||
+ }
|
||||
// Safe downcast to do for all systems with < 2.1 billion CPUs.
|
||||
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
|
||||
// https://golang.org/ref/spec#Numeric_types
|
||||
@@ -524,3 +558,13 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
|
||||
|
||||
return hints
|
||||
}
|
||||
+
|
||||
+// check if a given pod is in a platform infrastructure namespace
|
||||
+func isKubeInfra(pod *v1.Pod) bool {
|
||||
+ for _, namespace := range infraNamespaces {
|
||||
+ if namespace == pod.Namespace {
|
||||
+ return true
|
||||
+ }
|
||||
+ }
|
||||
+ return false
|
||||
+}
|
||||
\ No newline at end of file
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index 9c7e4f146ff..5cfd9a8e24e 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -747,7 +747,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
-
|
||||
+ infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
|
||||
+ infraPod.Namespace = "kube-system"
|
||||
testCases := []staticPolicyTestWithResvList{
|
||||
{
|
||||
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||
@@ -789,6 +790,22 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.NewCPUSet(4, 5),
|
||||
},
|
||||
+ {
|
||||
+ description: "InfraPod, SingleSocketHT, ExpectAllocReserved",
|
||||
+ topo: topoSingleSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ stAssignments: state.ContainerCPUAssignments{
|
||||
+ "fakePod": map[string]cpuset.CPUSet{
|
||||
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
+ },
|
||||
+ },
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
+ pod: infraPod,
|
||||
+ expErr: nil,
|
||||
+ expCPUAlloc: true,
|
||||
+ expCSet: cpuset.NewCPUSet(0, 1),
|
||||
+ },
|
||||
}
|
||||
|
||||
testExcl := true
|
||||
--
|
||||
2.17.1
|
||||
|
@ -1,526 +0,0 @@
|
||||
From 05db95e27509e60022a62c1001be2191ba42d2a3 Mon Sep 17 00:00:00 2001
|
||||
From: Jim Gauld <james.gauld@windriver.com>
|
||||
Date: Fri, 3 Sep 2021 17:30:31 -0400
|
||||
Subject: [PATCH 4/6] kubelet cpumanager introduce concept of isolated CPUs
|
||||
|
||||
This introduces the concept of "isolated CPUs", which are CPUs that
|
||||
have been isolated at the kernel level via the "isolcpus" kernel boot
|
||||
parameter.
|
||||
|
||||
When starting the kubelet process, two separate sets of reserved CPUs
|
||||
may be specified. With this change CPUs reserved via
|
||||
'--system-reserved=cpu' will be used for infrastructure pods while the
|
||||
isolated CPUs should be reserved via '--kube-reserved=cpu' to cause
|
||||
kubelet to skip over them for "normal" CPU resource tracking. The
|
||||
kubelet code will double-check that the specified isolated CPUs match
|
||||
what the kernel exposes in "/sys/devices/system/cpu/isolated".
|
||||
|
||||
A plugin (outside the scope of this commit) will expose the isolated
|
||||
CPUs to kubelet via the device plugin API.
|
||||
|
||||
If a pod specifies some number of "isolcpus" resources, the device
|
||||
manager will allocate them. In this code we check whether such
|
||||
resources have been allocated, and if so we set the container cpuset to
|
||||
the isolated CPUs. This does mean that it really only makes sense to
|
||||
specify "isolcpus" resources for best-effort or burstable pods, not for
|
||||
guaranteed ones since that would throw off the accounting code. In
|
||||
order to ensure the accounting still works as designed, if "isolcpus"
|
||||
are specified for guaranteed pods, the affinity will be set to the
|
||||
non-isolated CPUs.
|
||||
|
||||
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||
Co-authored-by: Chris Friesen <chris.friesen@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/container_manager_linux.go | 1 +
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 31 ++++++-
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 13 ++-
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 84 +++++++++++++++++--
|
||||
.../cm/cpumanager/policy_static_test.go | 44 ++++++++--
|
||||
5 files changed, 155 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go
|
||||
index eeea6a8b7e4..4f250b2a6ca 100644
|
||||
--- a/pkg/kubelet/cm/container_manager_linux.go
|
||||
+++ b/pkg/kubelet/cm/container_manager_linux.go
|
||||
@@ -333,6 +333,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
|
||||
cm.GetNodeAllocatableReservation(),
|
||||
nodeConfig.KubeletRootDir,
|
||||
cm.topologyManager,
|
||||
+ cm.deviceManager,
|
||||
)
|
||||
if err != nil {
|
||||
klog.Errorf("failed to initialize cpu manager: %v", err)
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index a0586c7b860..8470431c07c 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -21,6 +21,8 @@ import (
|
||||
"math"
|
||||
"sync"
|
||||
"time"
|
||||
+ "strings"
|
||||
+ "io/ioutil"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@@ -34,6 +36,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
@@ -51,6 +54,25 @@ type policyName string
|
||||
// cpuManagerStateFileName is the file name where cpu manager stores its state
|
||||
const cpuManagerStateFileName = "cpu_manager_state"
|
||||
|
||||
+// get the system-level isolated CPUs
|
||||
+func getIsolcpus() cpuset.CPUSet {
|
||||
+ dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
+ // The isolated cpus string ends in a newline
|
||||
+ cpustring := strings.TrimSuffix(string(dat), "\n")
|
||||
+ cset, err := cpuset.Parse(cpustring)
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] unable to parse sysfs isolcpus string to cpuset")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
+ return cset
|
||||
+}
|
||||
+
|
||||
// Manager interface provides methods for Kubelet to manage pod cpus.
|
||||
type Manager interface {
|
||||
// Start is called during Kubelet initialization.
|
||||
@@ -136,7 +158,7 @@ func (s *sourcesReadyStub) AddSource(source string) {}
|
||||
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||
|
||||
// NewManager creates new cpu manager based on provided policy
|
||||
-func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
|
||||
+func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store, deviceManager devicemanager.Manager) (Manager, error) {
|
||||
var topo *topology.CPUTopology
|
||||
var policy Policy
|
||||
|
||||
@@ -173,8 +195,11 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
|
||||
// NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||
// This variable is primarily to make testing easier.
|
||||
excludeReserved := true
|
||||
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, excludeReserved)
|
||||
-
|
||||
+ // isolCPUs is the set of kernel-isolated CPUs. They should be a subset of specificCPUs or
|
||||
+ // of the CPUs that NewStaticPolicy() will pick if numReservedCPUs is set. It's only in the
|
||||
+ // argument list here for ease of testing, it's really internal to the policy.
|
||||
+ isolCPUs := getIsolcpus()
|
||||
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, isolCPUs, affinity, deviceManager, excludeReserved)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new static policy error: %v", err)
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
index a155791e75f..7a6ea90b3c5 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
@@ -38,6 +38,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
)
|
||||
|
||||
type mockState struct {
|
||||
@@ -211,6 +212,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||
}
|
||||
|
||||
func TestCPUManagerAdd(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
@@ -226,7 +228,9 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
},
|
||||
0,
|
||||
cpuset.NewCPUSet(),
|
||||
+ cpuset.NewCPUSet(),
|
||||
topologymanager.NewFakeManager(),
|
||||
+ testDM,
|
||||
testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
@@ -483,8 +487,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
}
|
||||
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
|
||||
state := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -636,7 +641,8 @@ func TestCPUManagerGenerate(t *testing.T) {
|
||||
}
|
||||
defer os.RemoveAll(sDir)
|
||||
|
||||
- mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
|
||||
+ testDM, err := devicemanager.NewManagerStub()
|
||||
+ mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM)
|
||||
if testCase.expectedError != nil {
|
||||
if !strings.Contains(err.Error(), testCase.expectedError.Error()) {
|
||||
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), testCase.expectedError.Error())
|
||||
@@ -991,6 +997,7 @@ func TestReconcileState(t *testing.T) {
|
||||
// the following tests are with --reserved-cpus configured
|
||||
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -1005,7 +1012,9 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
},
|
||||
1,
|
||||
cpuset.NewCPUSet(0),
|
||||
+ cpuset.NewCPUSet(),
|
||||
topologymanager.NewFakeManager(),
|
||||
+ testDM,
|
||||
testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index ab3206c5dc4..4acd5609748 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -18,6 +18,7 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
+ "strconv"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
@@ -28,6 +29,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
)
|
||||
|
||||
// PolicyStatic is the name of the static policy
|
||||
@@ -81,6 +83,10 @@ type staticPolicy struct {
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reserved cpuset.CPUSet
|
||||
+ // subset of reserved CPUs with isolcpus attribute
|
||||
+ isolcpus cpuset.CPUSet
|
||||
+ // parent containerManager, used to get device list
|
||||
+ deviceManager devicemanager.Manager
|
||||
// If true, default CPUSet should exclude reserved CPUs
|
||||
excludeReserved bool
|
||||
// topology manager reference to get container Topology affinity
|
||||
@@ -95,7 +101,7 @@ var _ Policy = &staticPolicy{}
|
||||
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, excludeReserved bool) (Policy, error) {
|
||||
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, isolCPUs cpuset.CPUSet, affinity topologymanager.Store, deviceManager devicemanager.Manager, excludeReserved bool) (Policy, error) {
|
||||
allCPUs := topology.CPUDetails.CPUs()
|
||||
var reserved cpuset.CPUSet
|
||||
if reservedCPUs.Size() > 0 {
|
||||
@@ -116,9 +122,17 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
|
||||
klog.Infof("[cpumanager] reserved %d CPUs (\"%s\") not available for exclusive assignment", reserved.Size(), reserved)
|
||||
|
||||
+ if !isolCPUs.IsSubsetOf(reserved) {
|
||||
+ klog.Errorf("[cpumanager] isolCPUs %v is not a subset of reserved %v", isolCPUs, reserved)
|
||||
+ reserved = reserved.Union(isolCPUs)
|
||||
+ klog.Warningf("[cpumanager] mismatch isolCPUs %v, force reserved %v", isolCPUs, reserved)
|
||||
+ }
|
||||
+
|
||||
return &staticPolicy{
|
||||
topology: topology,
|
||||
reserved: reserved,
|
||||
+ isolcpus: isolCPUs,
|
||||
+ deviceManager: deviceManager,
|
||||
excludeReserved: excludeReserved,
|
||||
affinity: affinity,
|
||||
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
@@ -155,8 +169,8 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
} else {
|
||||
s.SetDefaultCPUSet(allCPUs)
|
||||
}
|
||||
- klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||
- allCPUs, p.reserved, s.GetDefaultCPUSet())
|
||||
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, isolcpus:%v, default:%v\n",
|
||||
+ allCPUs, p.reserved, p.isolcpus, s.GetDefaultCPUSet())
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -249,12 +263,12 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
return nil
|
||||
}
|
||||
|
||||
- cpuset := p.reserved
|
||||
+ cpuset := p.reserved.Clone().Difference(p.isolcpus)
|
||||
if cpuset.IsEmpty() {
|
||||
// If this happens then someone messed up.
|
||||
return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus " +
|
||||
- "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v",
|
||||
- pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v, isolcpus:%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved, p.isolcpus)
|
||||
}
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
klog.Infof("[cpumanager] static policy: reserved: AddContainer " +
|
||||
@@ -285,8 +299,37 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
}
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
p.updateCPUsToReuse(pod, container, cpuset)
|
||||
+ klog.Infof("[cpumanager] guaranteed: AddContainer " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); numCPUS=%d, cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, numCPUs, cpuset)
|
||||
+ return nil
|
||||
+ }
|
||||
|
||||
+ if isolcpus := p.podIsolCPUs(pod, container); isolcpus.Size() > 0 {
|
||||
+ // container has requested isolated CPUs
|
||||
+ if set, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||
+ if set.Equals(isolcpus) {
|
||||
+ klog.Infof("[cpumanager] isolcpus container already present in state, skipping " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ return nil
|
||||
+ } else {
|
||||
+ klog.Infof("[cpumanager] isolcpus container state has cpus %v, should be %v" +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ isolcpus, set, pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ }
|
||||
+ }
|
||||
+ // Note that we do not do anything about init containers here.
|
||||
+ // It looks like devices are allocated per-pod based on effective requests/limits
|
||||
+ // and extra devices from initContainers are not freed up when the regular containers start.
|
||||
+ // TODO: confirm this is still true for 1.20
|
||||
+ s.SetCPUSet(string(pod.UID), container.Name, isolcpus)
|
||||
+ klog.Infof("[cpumanager] isolcpus: AddContainer " +
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, isolcpus)
|
||||
+ return nil
|
||||
}
|
||||
+
|
||||
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||
return nil
|
||||
}
|
||||
@@ -567,4 +610,33 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||
}
|
||||
}
|
||||
return false
|
||||
+}
|
||||
+
|
||||
+// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||
+func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||
+ // NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||
+ // not create UID. We also need a way to properly stub devicemanager.
|
||||
+ if len(string(pod.UID)) == 0 {
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+ devices := p.deviceManager.GetDevices(string(pod.UID), container.Name)
|
||||
+ for _, dev := range devices {
|
||||
+ // this resource name needs to match the isolcpus device plugin
|
||||
+ if dev.ResourceName == "windriver.com/isolcpus" {
|
||||
+ cpuStrList := dev.DeviceIds
|
||||
+ if len(cpuStrList) > 0 {
|
||||
+ cpuSet := cpuset.NewCPUSet()
|
||||
+ // loop over the list of strings, convert each one to int, add to cpuset
|
||||
+ for _, cpuStr := range cpuStrList {
|
||||
+ cpu, err := strconv.Atoi(cpuStr)
|
||||
+ if err != nil {
|
||||
+ panic(err)
|
||||
+ }
|
||||
+ cpuSet = cpuSet.Union(cpuset.NewCPUSet(cpu))
|
||||
+ }
|
||||
+ return cpuSet
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ return cpuset.NewCPUSet()
|
||||
}
|
||||
\ No newline at end of file
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index 5cfd9a8e24e..8307aa1e3f0 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
)
|
||||
|
||||
type staticPolicyTest struct {
|
||||
@@ -45,8 +46,9 @@ type staticPolicyTest struct {
|
||||
}
|
||||
|
||||
func TestStaticPolicyName(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testExcl := false
|
||||
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
|
||||
policyName := policy.Name()
|
||||
if policyName != "static" {
|
||||
@@ -56,6 +58,7 @@ func TestStaticPolicyName(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyStart(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "non-corrupted state",
|
||||
@@ -131,7 +134,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testCase.excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -179,6 +182,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
largeTopoSock0CPUSet := largeTopoSock0Builder.Result()
|
||||
largeTopoSock1CPUSet := largeTopoSock1Builder.Result()
|
||||
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||
@@ -447,7 +451,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testCase.excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -490,6 +494,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyRemove(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
excludeReserved := false
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
@@ -549,7 +554,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -571,6 +576,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
excludeReserved := false
|
||||
testCases := []struct {
|
||||
description string
|
||||
@@ -640,7 +646,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: tc.stAssignments,
|
||||
@@ -673,6 +679,7 @@ type staticPolicyTestWithResvList struct {
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
reserved cpuset.CPUSet
|
||||
+ isolcpus cpuset.CPUSet
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
stDefaultCPUSet cpuset.CPUSet
|
||||
pod *v1.Pod
|
||||
@@ -713,9 +720,10 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
},
|
||||
}
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||
testCase.description, testCase.expNewErr, err)
|
||||
@@ -755,6 +763,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 1,
|
||||
reserved: cpuset.NewCPUSet(0),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||
@@ -767,6 +776,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||
@@ -779,6 +789,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"fakePod": map[string]cpuset.CPUSet{
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
@@ -795,6 +806,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"fakePod": map[string]cpuset.CPUSet{
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
@@ -806,11 +818,29 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.NewCPUSet(0, 1),
|
||||
},
|
||||
+ {
|
||||
+ description: "InfraPod, SingleSocketHT, Isolcpus, ExpectAllocReserved",
|
||||
+ topo: topoSingleSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(1),
|
||||
+ stAssignments: state.ContainerCPUAssignments{
|
||||
+ "fakePod": map[string]cpuset.CPUSet{
|
||||
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
+ },
|
||||
+ },
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
+ pod: infraPod,
|
||||
+ expErr: nil,
|
||||
+ expCPUAlloc: true,
|
||||
+ expCSet: cpuset.NewCPUSet(0),
|
||||
+ },
|
||||
}
|
||||
|
||||
testExcl := true
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, testCase.isolcpus, topologymanager.NewFakeManager(), testDM, testExcl)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
--
|
||||
2.17.1
|
||||
|
@ -1,313 +0,0 @@
|
||||
From c85d0d1a42fc5989f2e989daf46fdedeebf486a4 Mon Sep 17 00:00:00 2001
|
||||
From: Jim Gauld <james.gauld@windriver.com>
|
||||
Date: Fri, 3 Sep 2021 15:31:31 -0400
|
||||
Subject: [PATCH 2/6] kubelet cpumanager keep normal containers off reserved
|
||||
CPUs
|
||||
|
||||
When starting the kubelet process, two separate sets of reserved CPUs
|
||||
may be specified. With this change CPUs reserved via '--system-reserved=cpu'
|
||||
or '--kube-reserved=cpu' will be ignored by kubernetes itself. A small
|
||||
tweak to the default CPU affinity ensures that "normal" Kubernetes
|
||||
pods won't run on the reserved CPUs.
|
||||
|
||||
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 6 +++-
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 11 ++++--
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 29 ++++++++++++---
|
||||
.../cm/cpumanager/policy_static_test.go | 35 +++++++++++++------
|
||||
4 files changed, 62 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 88cfbc1fa83..a0586c7b860 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -170,7 +170,11 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
|
||||
// exclusively allocated.
|
||||
reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000
|
||||
numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
|
||||
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity)
|
||||
+ // NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||
+ // This variable is primarily to make testing easier.
|
||||
+ excludeReserved := true
|
||||
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, excludeReserved)
|
||||
+
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new static policy error: %v", err)
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
index 34b170be234..a155791e75f 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
@@ -211,6 +211,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||
}
|
||||
|
||||
func TestCPUManagerAdd(t *testing.T) {
|
||||
+ testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -225,7 +226,8 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
},
|
||||
0,
|
||||
cpuset.NewCPUSet(),
|
||||
- topologymanager.NewFakeManager())
|
||||
+ topologymanager.NewFakeManager(),
|
||||
+ testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
updateErr error
|
||||
@@ -480,8 +482,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
+ testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
|
||||
state := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -987,6 +990,7 @@ func TestReconcileState(t *testing.T) {
|
||||
// above test cases are without kubelet --reserved-cpus cmd option
|
||||
// the following tests are with --reserved-cpus configured
|
||||
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
+ testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -1001,7 +1005,8 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
},
|
||||
1,
|
||||
cpuset.NewCPUSet(0),
|
||||
- topologymanager.NewFakeManager())
|
||||
+ topologymanager.NewFakeManager(),
|
||||
+ testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
updateErr error
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index c3309ef7280..e892d63641b 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -76,6 +76,8 @@ type staticPolicy struct {
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reserved cpuset.CPUSet
|
||||
+ // If true, default CPUSet should exclude reserved CPUs
|
||||
+ excludeReserved bool
|
||||
// topology manager reference to get container Topology affinity
|
||||
affinity topologymanager.Store
|
||||
// set of CPUs to reuse across allocations in a pod
|
||||
@@ -88,7 +90,7 @@ var _ Policy = &staticPolicy{}
|
||||
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store) (Policy, error) {
|
||||
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, excludeReserved bool) (Policy, error) {
|
||||
allCPUs := topology.CPUDetails.CPUs()
|
||||
var reserved cpuset.CPUSet
|
||||
if reservedCPUs.Size() > 0 {
|
||||
@@ -112,6 +114,7 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
return &staticPolicy{
|
||||
topology: topology,
|
||||
reserved: reserved,
|
||||
+ excludeReserved: excludeReserved,
|
||||
affinity: affinity,
|
||||
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
}, nil
|
||||
@@ -140,7 +143,15 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
}
|
||||
// state is empty initialize
|
||||
allCPUs := p.topology.CPUDetails.CPUs()
|
||||
- s.SetDefaultCPUSet(allCPUs)
|
||||
+ if p.excludeReserved {
|
||||
+ // Exclude reserved CPUs from the default CPUSet to keep containers off them
|
||||
+ // unless explicitly affined.
|
||||
+ s.SetDefaultCPUSet(allCPUs.Difference(p.reserved))
|
||||
+ } else {
|
||||
+ s.SetDefaultCPUSet(allCPUs)
|
||||
+ }
|
||||
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||
+ allCPUs, p.reserved, s.GetDefaultCPUSet())
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -148,9 +159,11 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
// 1. Check if the reserved cpuset is not part of default cpuset because:
|
||||
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
|
||||
// - user tampered with file
|
||||
- if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
|
||||
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
- p.reserved.String(), tmpDefaultCPUset.String())
|
||||
+ if !p.excludeReserved {
|
||||
+ if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
|
||||
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
+ p.reserved.String(), tmpDefaultCPUset.String())
|
||||
+ }
|
||||
}
|
||||
|
||||
// 2. Check if state for static policy is consistent
|
||||
@@ -179,6 +192,9 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
}
|
||||
}
|
||||
totalKnownCPUs = totalKnownCPUs.UnionAll(tmpCPUSets)
|
||||
+ if p.excludeReserved {
|
||||
+ totalKnownCPUs = totalKnownCPUs.Union(p.reserved)
|
||||
+ }
|
||||
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
|
||||
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
|
||||
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
|
||||
@@ -249,6 +265,9 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
|
||||
klog.Infof("[cpumanager] static policy: RemoveContainer (pod: %s, container: %s)", podUID, containerName)
|
||||
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
|
||||
s.Delete(podUID, containerName)
|
||||
+ if p.excludeReserved {
|
||||
+ toRelease = toRelease.Difference(p.reserved)
|
||||
+ }
|
||||
// Mutate the shared pool, adding released cpus.
|
||||
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index b4b46c68c17..9c7e4f146ff 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -33,6 +33,7 @@ type staticPolicyTest struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
+ excludeReserved bool
|
||||
podUID string
|
||||
containerName string
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
@@ -44,7 +45,8 @@ type staticPolicyTest struct {
|
||||
}
|
||||
|
||||
func TestStaticPolicyName(t *testing.T) {
|
||||
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ testExcl := false
|
||||
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||
|
||||
policyName := policy.Name()
|
||||
if policyName != "static" {
|
||||
@@ -74,6 +76,15 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(),
|
||||
expCSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||
},
|
||||
+ {
|
||||
+ description: "empty cpuset exclude reserved",
|
||||
+ topo: topoDualSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ excludeReserved: true,
|
||||
+ stAssignments: state.ContainerCPUAssignments{},
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(),
|
||||
+ expCSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
|
||||
+ },
|
||||
{
|
||||
description: "reserved cores 0 & 6 are not present in available cpuset",
|
||||
topo: topoDualSocketHT,
|
||||
@@ -120,7 +131,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -436,7 +447,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -479,6 +490,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyRemove(t *testing.T) {
|
||||
+ excludeReserved := false
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "SingleSocketHT, DeAllocOneContainer",
|
||||
@@ -537,7 +549,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -559,6 +571,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
+ excludeReserved := false
|
||||
testCases := []struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
@@ -627,7 +640,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: tc.stAssignments,
|
||||
@@ -699,9 +712,10 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
|
||||
},
|
||||
}
|
||||
+ testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager())
|
||||
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||
testCase.description, testCase.expNewErr, err)
|
||||
@@ -741,7 +755,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
numReservedCPUs: 1,
|
||||
reserved: cpuset.NewCPUSet(0),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||
expErr: fmt.Errorf("not enough cpus available to satisfy request"),
|
||||
expCPUAlloc: false,
|
||||
@@ -753,7 +767,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||
expErr: nil,
|
||||
expCPUAlloc: true,
|
||||
@@ -769,7 +783,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
},
|
||||
},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 4, 5),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"),
|
||||
expErr: nil,
|
||||
expCPUAlloc: true,
|
||||
@@ -777,8 +791,9 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
+ testExcl := true
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager())
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
--
|
||||
2.17.1
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user