Stability workarounds for ceph/cgroups issues

- Add stale rbd cleanup fix
 - Add cleaning of cgroups mounts

Change-Id: I9950496416fbbb572eb5775032305e18fee1f026
This commit is contained in:
Kaspars Skels 2019-05-01 14:04:38 -05:00
parent e1328ed98b
commit df24b1853b
5 changed files with 279 additions and 0 deletions

View File

@ -0,0 +1,26 @@
---
schema: pegleg/Script/v1
metadata:
schema: metadata/Document/v1
name: hanging-cgroup-release
storagePolicy: cleartext
layeringDefinition:
abstract: false
layer: global
data: |-
#!/bin/bash
set -ex
cgroup_count() {
echo "Current cgroup count: $(find /sys/fs/cgroup/*/system.slice -name tasks | wc -l)"
}
DATE=$(date)
echo "$(cgroup_count)"
echo # Stop systemd mount unit that isn't actually mounted
echo "Stopping Kubernetes systemd mount units that are not mounted to the system."
systemctl list-units --state=running| \
sed -rn '/Kubernetes.transient.mount/s,(run-\S+).+(/var/lib/kubelet/pods/.+),\1 \2,p' | \
xargs -r -l1 sh -c 'test -d $2 || echo $1' -- | \
xargs -r -tl1 systemctl stop |& wc -l
echo "$(cgroup_count)"

View File

@ -0,0 +1,32 @@
---
schema: pegleg/Script/v1
metadata:
schema: metadata/Document/v1
name: rbd-roomba-scanner
storagePolicy: cleartext
layeringDefinition:
abstract: false
layer: global
data: |-
#!/bin/bash
set -ex
# don't put it in /tmp where it can be p0wned (???)
lsblk | awk '/^rbd/ {if($7==""){print $0}}' | awk '{ printf "/dev/%s\n",$1 }' > /var/run/rbd_list
# wait a while, so we don't catch rbd devices the kubelet is working on mounting
sleep 60
# finally, examine rbd devices again and if any were seen previously (60s ago) we will
# forcefully unmount them if they have no fs mounts
DATE=$(date)
for rbd in `lsblk | awk '/^rbd/ {if($7==""){print $0}}' | awk '{ printf "/dev/%s\n",$1 }'`; do
if grep -q $rbd /var/run/rbd_list; then
echo "[${DATE}] Unmapping stale RBD $rbd"
/usr/bin/rbd unmap -o force $rbd
# NOTE(supamatt): rbd unmap -o force will only succeed if there are NO pending I/O
else
echo "[${DATE}] Skipping RBD $rbd as it hasn't been stale for at least 60 seconds"
fi
done
rm -rf /var/run/rbd_list

View File

@ -24,6 +24,19 @@ metadata:
path: .images.ucp.divingbell
dest:
path: .values.images
- src:
schema: pegleg/Script/v1
name: rbd-roomba-scanner
path: .
dest:
path: .values.conf.exec.X005-rbd-roomba-scanner.data
- src:
schema: pegleg/Script/v1
name: hanging-cgroup-release
path: .
dest:
path: .values.conf.exec.X005-hanging-cgroup-release.data
data:
chart_name: ucp-divingbell
release: ucp-divingbell
@ -78,6 +91,17 @@ data:
# were restarted. "Failed to add /run/systemd/ask-password to directory
# watch: No space left on device". https://bit.ly/2Mj5qn2 TDP bug 427616
fs.inotify.max_user_watches: '1048576'
exec:
X005-rbd-roomba-scanner:
rerun_policy: always
# 300 = 5 minutes
rerun_interval: 300
timeout: 300
X005-hanging-cgroup-release:
rerun_policy: always
# 300 = 5 minutes
rerun_interval: 3600
timeout: 600
dependencies:
- ucp-divingbell-htk
---

View File

@ -0,0 +1,96 @@
#!/bin/bash
set -ex
CLUSTER_DNS=${CLUSTER_DNS:-10.96.0.10}
KUBECTL_IMAGE=${KUBECTL_IMAGE:-gcr.io/google-containers/hyperkube-amd64:v1.11.6}
UBUNTU_IMAGE=${UBUNTU_IMAGE:-docker.io/ubuntu:16.04}
cat > /tmp/hanging-cgroup-release.yaml << 'EOF'
---
apiVersion: v1
kind: ConfigMap
metadata:
name: hanging-cgroup-release
namespace: kube-system
labels:
hotfix: 'true'
data:
singleshot.sh: |+
#!/bin/bash
set -ex
while [ 1 ];
do
cgroup_count() {
echo "Current cgroup count: $(find /sys/fs/cgroup/*/system.slice -name tasks | wc -l)"
}
DATE=$(date)
echo "$(cgroup_count)"
echo # Stop systemd mount unit that isn't actually mounted
echo "Stopping Kubernetes systemd mount units that are not mounted to the system."
systemctl list-units --state=running| \
sed -rn '/Kubernetes.transient.mount/s,(run-\S+).+(/var/lib/kubelet/pods/.+),\1 \2,p' | \
xargs -r -l1 sh -c 'test -d $2 || echo $1' -- | \
xargs -r -tl1 systemctl stop |& wc -l
echo "$(cgroup_count)"
sleep 3600
done;
EOF
cat >> /tmp/hanging-cgroup-release.yaml << EOF
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: hanging-cgroup-release
namespace: kube-system
spec:
template:
metadata:
labels:
name: hanging-cgroup-release
spec:
hostNetwork: true
hostPID: true
nodeSelector:
ucp-control-plane: enabled
containers:
- resources:
requests:
cpu: 0.1
securityContext:
privileged: true
image: ${UBUNTU_IMAGE}
name: hanging-cgroup-release
command: ["/bin/bash", "-cx"]
args:
- >
cp -p /tmp/singleshot.sh /host/tmp;
nsenter -t 1 -m -u -n -i /tmp/singleshot.sh;
volumeMounts:
- name: host
mountPath: /host
- name: hanging-cgroup-release
subPath: singleshot.sh
mountPath: /tmp/singleshot.sh
volumes:
- name: host
hostPath:
path: /
- name: hanging-cgroup-release
configMap:
name: hanging-cgroup-release
defaultMode: 0555
EOF
docker run --rm -i \
--net host \
-v /tmp:/work \
-v /etc/kubernetes/admin:/etc/kubernetes/admin \
-e KUBECONFIG=/etc/kubernetes/admin/kubeconfig.yaml \
${KUBECTL_IMAGE} \
/kubectl apply -f /work/hanging-cgroup-release.yaml

101
tools/fixes/rbd-roomba-scanner.sh Executable file
View File

@ -0,0 +1,101 @@
#!/bin/bash
set -ex
CLUSTER_DNS=${CLUSTER_DNS:-10.96.0.10}
KUBECTL_IMAGE=${KUBECTL_IMAGE:-gcr.io/google-containers/hyperkube-amd64:v1.11.6}
UBUNTU_IMAGE=${UBUNTU_IMAGE:-docker.io/ubuntu:16.04}
cat > /tmp/rbd-roomba-scanner.yaml << 'EOF'
---
apiVersion: v1
kind: ConfigMap
metadata:
name: rbd-roomba-scanner
namespace: ceph
labels:
hotfix: 'true'
data:
singleshot.sh: |+
#!/bin/bash
set -ex
while [ 1 ];
do
# don't put it in /tmp where it can be p0wned (???)
lsblk | awk '/^rbd/ {if($7==""){print $0}}' | awk '{ printf "/dev/%s\n",$1 }' > /var/run/rbd_list
# wait a while, so we don't catch rbd devices the kubelet is working on mounting
sleep 60
# finally, examine rbd devices again and if any were seen previously (60s ago) we will
# forcefully unmount them if they have no fs mounts
DATE=$(date)
for rbd in `lsblk | awk '/^rbd/ {if($7==""){print $0}}' | awk '{ printf "/dev/%s\n",$1 }'`; do
if grep -q $rbd /var/run/rbd_list; then
echo "[${DATE}] Unmapping stale RBD $rbd"
/usr/bin/rbd unmap -o force $rbd
# NOTE(supamatt): rbd unmap -o force will only succeed if there are NO pending I/O
else
echo "[${DATE}] Skipping RBD $rbd as it hasn't been stale for at least 60 seconds"
fi
done
rm -rf /var/run/rbd_list
done;
EOF
cat >> /tmp/rbd-roomba-scanner.yaml << EOF
---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: rbd-roomba-scanner
namespace: ceph
spec:
template:
metadata:
labels:
name: rbd-roomba-scanner
spec:
hostNetwork: true
hostPID: true
nodeSelector:
openstack-control-plane: enabled
containers:
- resources:
requests:
cpu: 0.1
securityContext:
privileged: true
image: ${UBUNTU_IMAGE}
name: rbd-roomba-scanner
command: ["/bin/bash", "-cx"]
args:
- >
cp -p /tmp/singleshot.sh /host/tmp;
nsenter -t 1 -m -u -n -i /tmp/singleshot.sh;
volumeMounts:
- name: host
mountPath: /host
- name: rbd-roomba-scanner
subPath: singleshot.sh
mountPath: /tmp/singleshot.sh
volumes:
- name: host
hostPath:
path: /
- name: rbd-roomba-scanner
configMap:
name: rbd-roomba-scanner
defaultMode: 0555
EOF
docker run --rm -i \
--net host \
-v /tmp:/work \
-v /etc/kubernetes/admin:/etc/kubernetes/admin \
-e KUBECONFIG=/etc/kubernetes/admin/kubeconfig.yaml \
${KUBECTL_IMAGE} \
/kubectl apply -f /work/rbd-roomba-scanner.yaml