From 9a4b6b6a5d903482624f2f4b86041511d3dfa7e4 Mon Sep 17 00:00:00 2001 From: Bart Wensley Date: Mon, 15 Jul 2019 07:03:46 -0500 Subject: [PATCH] Set TCP keepalive timeouts for cluster network The TCP keepalive timeouts in pods running on the cluster network are currently set to the following: net.ipv4.tcp_keepalive_intvl = 75 net.ipv4.tcp_keepalive_probes = 9 net.ipv4.tcp_keepalive_time = 7200 This means that a dropped TCP connection can take more than two hours to be removed. That can cause large delays in reacting to unexpected events like the uncontrolled reboot of a host. This commit changes the TCP keepalive timeouts for the cluster network to match the timeouts for the host OS: net.ipv4.tcp_keepalive_intvl = 1 net.ipv4.tcp_keepalive_probes = 5 net.ipv4.tcp_keepalive_time = 5 Change-Id: I23e2c9a733727e4059ac272e052dca0e6ec4f2e1 Closes-bug: 1836232 Signed-off-by: Bart Wensley --- .../templates/multus-cni.yaml.j2 | 53 ++++++++++++------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/playbookconfig/src/playbooks/bootstrap/roles/bringup-essential-services/templates/multus-cni.yaml.j2 b/playbookconfig/src/playbooks/bootstrap/roles/bringup-essential-services/templates/multus-cni.yaml.j2 index 847192a1a..f9b7267f0 100644 --- a/playbookconfig/src/playbooks/bootstrap/roles/bringup-essential-services/templates/multus-cni.yaml.j2 +++ b/playbookconfig/src/playbooks/bootstrap/roles/bringup-essential-services/templates/multus-cni.yaml.j2 @@ -6,8 +6,8 @@ # # - The multus CNI configuration file has been explicitly specified to ensure # it has a lower lexographic order than the calico CNI configuration file. -# # - The configMap has been modified to work with Calico rather than Flannel +# - The tuning plugin is used to update sysctl tcp_keepalive timers. --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition @@ -85,24 +85,39 @@ data: "delegates": [ { "cniVersion": "0.3.0", - "name": "k8s-pod-network", - "type": "calico", - "masterplugin": true, - "log_level": "info", - "datastore_type": "kubernetes", - "nodename": "__KUBERNETES_NODE_NAME__", - "mtu": 1500, - "ipam": { - "type": "calico-ipam", - "assign_ipv4": "{{ "true" if cluster_network_ipv4 else "false" }}", - "assign_ipv6": "{{ "true" if cluster_network_ipv6 else "false" }}" - }, - "policy": { - "type": "k8s" - }, - "kubernetes": { - "kubeconfig": "/etc/cni/net.d/calico-kubeconfig" - } + "name": "chain", + "plugins": [ + { + "cniVersion": "0.3.0", + "name": "k8s-pod-network", + "type": "calico", + "masterplugin": true, + "log_level": "info", + "datastore_type": "kubernetes", + "nodename": "__KUBERNETES_NODE_NAME__", + "mtu": 1500, + "ipam": { + "type": "calico-ipam", + "assign_ipv4": "{{ "true" if cluster_network_ipv4 else "false" }}", + "assign_ipv6": "{{ "true" if cluster_network_ipv6 else "false" }}" + }, + "policy": { + "type": "k8s" + }, + "kubernetes": { + "kubeconfig": "/etc/cni/net.d/calico-kubeconfig" + } + }, + { + "name": "sysctl-tuning", + "type": "tuning", + "sysctl": { + "net.ipv4.tcp_keepalive_intvl": "1", + "net.ipv4.tcp_keepalive_probes": "5", + "net.ipv4.tcp_keepalive_time": "5" + } + } + ] } ], "kubeconfig": "/etc/cni/net.d/multus.d/multus.kubeconfig"