diff --git a/diskimage-create/README.rst b/diskimage-create/README.rst index 48390aa3af..fde101a97a 100644 --- a/diskimage-create/README.rst +++ b/diskimage-create/README.rst @@ -110,6 +110,7 @@ Command syntax: [-i **ubuntu-minimal** | fedora | centos-minimal | rhel ] [-k ] [-l ] + [-m] [-n] [-o **amphora-x64-haproxy** | ] [-p] @@ -132,6 +133,7 @@ Command syntax: '-i' is the base OS (default: ubuntu-minimal) '-k' is the kernel meta package name, currently only for ubuntu-minimal base OS (default: linux-image-virtual) '-l' is output logfile (default: none) + '-m' enable vCPU pinning optimizations (default: disabled) '-n' disable sshd (default: enabled) '-o' is the output image file name '-p' install amphora-agent from distribution packages (default: disabled)" diff --git a/diskimage-create/diskimage-create.sh b/diskimage-create/diskimage-create.sh index dbb165abce..44049fd1a3 100755 --- a/diskimage-create/diskimage-create.sh +++ b/diskimage-create/diskimage-create.sh @@ -31,6 +31,7 @@ usage() { echo " [-i **ubuntu-minimal** | fedora | centos-minimal | rhel ]" echo " [-k ]" echo " [-l ]" + echo " [-m]" echo " [-n]" echo " [-o **amphora-x64-haproxy.qcow2** | ]" echo " [-p]" @@ -52,6 +53,7 @@ usage() { echo " '-i' is the base OS (default: ubuntu-minimal)" echo " '-k' is the kernel meta package name, currently only for ubuntu-minimal base OS (default: linux-image-virtual)" echo " '-l' is output logfile (default: none)" + echo " '-m' enable vCPU pinning optimizations (default: disabled)" echo " '-n' disable sshd (default: enabled)" echo " '-o' is the output image file name" echo " '-p' install amphora-agent from distribution packages (default: disabled)" @@ -92,7 +94,7 @@ dib_enable_tracing= AMP_LOGFILE="" -while getopts "a:b:c:d:efg:hi:k:l:no:pt:r:s:vw:xy" opt; do +while getopts "a:b:c:d:efg:hi:k:l:mno:pt:r:s:vw:xy" opt; do case $opt in a) AMP_ARCH=$OPTARG @@ -166,6 +168,9 @@ while getopts "a:b:c:d:efg:hi:k:l:no:pt:r:s:vw:xy" opt; do l) AMP_LOGFILE="--logfile=$OPTARG" ;; + m) + AMP_ENABLE_CPUPINNING=1 + ;; n) AMP_DISABLE_SSHD=1 ;; @@ -253,6 +258,8 @@ if [ "$AMP_BASEOS" = "ubuntu-minimal" ]; then export DIB_UBUNTU_KERNEL=${AMP_KERNEL:-"linux-image-virtual"} fi +AMP_ENABLE_CPUPINNING=${AMP_ENABLE_CPUPINNING:-0} + AMP_DISABLE_SSHD=${AMP_DISABLE_SSHD:-0} AMP_PACKAGE_INSTALL=${AMP_PACKAGE_INSTALL:-0} @@ -476,6 +483,11 @@ AMP_element_sequence="$AMP_element_sequence pip-cache" # Add certificate ramfs element AMP_element_sequence="$AMP_element_sequence certs-ramfs" +# Add cpu-pinning element +if [ "$AMP_ENABLE_CPUPINNING" -eq 1 ]; then + AMP_element_sequence="$AMP_element_sequence cpu-pinning" +fi + # Disable SSHD if requested if [ "$AMP_DISABLE_SSHD" -eq 1 ]; then AMP_element_sequence="$AMP_element_sequence remove-sshd" diff --git a/elements/cpu-pinning/README.rst b/elements/cpu-pinning/README.rst new file mode 100644 index 0000000000..a59432c5f3 --- /dev/null +++ b/elements/cpu-pinning/README.rst @@ -0,0 +1,6 @@ +Element to enable optimizations for vertical scaling + +This element configures the Linux kernel to isolate all but the first +vCPU of the system, so that they are used by HAProxy threads exclusively. +It also installs and activates a customized TuneD profile that should further +tweak vertical scaling performance. diff --git a/elements/cpu-pinning/element-deps b/elements/cpu-pinning/element-deps new file mode 100644 index 0000000000..483dfd9a67 --- /dev/null +++ b/elements/cpu-pinning/element-deps @@ -0,0 +1,2 @@ +install-static +package-installs diff --git a/elements/cpu-pinning/environment.d/80-kernel-cpu-affinity b/elements/cpu-pinning/environment.d/80-kernel-cpu-affinity new file mode 100644 index 0000000000..198aef0bca --- /dev/null +++ b/elements/cpu-pinning/environment.d/80-kernel-cpu-affinity @@ -0,0 +1,36 @@ +#!/bin/bash + +# +# Copyright Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +if [ ${DIB_DEBUG_TRACE:-0} -gt 0 ]; then + set -x +fi + +set -euo pipefail + +case $DISTRO_NAME in + ubuntu | debian | fedora | centos* | rhel* ) + DIB_BOOTLOADER_DEFAULT_CMDLINE+=" irqaffinity=0" + # This will be ignored on single vCPU systems + DIB_BOOTLOADER_DEFAULT_CMDLINE+=" isolcpus=1-N" + export DIB_BOOTLOADER_DEFAULT_CMDLINE + ;; + *) + echo "ERROR: Unsupported distribution $DISTRO_NAME" + exit 1 + ;; +esac diff --git a/elements/cpu-pinning/package-installs.yaml b/elements/cpu-pinning/package-installs.yaml new file mode 100644 index 0000000000..a537247400 --- /dev/null +++ b/elements/cpu-pinning/package-installs.yaml @@ -0,0 +1,3 @@ +irqbalance: + uninstall: True +tuned: diff --git a/elements/cpu-pinning/post-install.d/20-enable-tuned b/elements/cpu-pinning/post-install.d/20-enable-tuned new file mode 100644 index 0000000000..b369b2ca81 --- /dev/null +++ b/elements/cpu-pinning/post-install.d/20-enable-tuned @@ -0,0 +1,11 @@ +#!/bin/bash + +if [ ${DIB_DEBUG_TRACE:-0} -gt 0 ]; then + set -x +fi + +set -euo pipefail + +if [ "$DIB_INIT_SYSTEM" == "systemd" ]; then + systemctl enable $(svc-map tuned) +fi diff --git a/elements/cpu-pinning/post-install.d/30-set-tuned-profile b/elements/cpu-pinning/post-install.d/30-set-tuned-profile new file mode 100644 index 0000000000..347dcbe3b7 --- /dev/null +++ b/elements/cpu-pinning/post-install.d/30-set-tuned-profile @@ -0,0 +1,3 @@ +#!/bin/sh + +chmod +x /usr/lib/tuned/amphora/script.sh diff --git a/elements/cpu-pinning/static/etc/tuned/active_profile b/elements/cpu-pinning/static/etc/tuned/active_profile new file mode 100644 index 0000000000..b6b31fe78a --- /dev/null +++ b/elements/cpu-pinning/static/etc/tuned/active_profile @@ -0,0 +1 @@ +virtual-guest optimize-serial-console amphora diff --git a/elements/cpu-pinning/static/etc/tuned/amphora/script.sh b/elements/cpu-pinning/static/etc/tuned/amphora/script.sh new file mode 100644 index 0000000000..8088595ce0 --- /dev/null +++ b/elements/cpu-pinning/static/etc/tuned/amphora/script.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +# Comment the line in ...tuned/functions that fails on the amp: +# DISKS_SYS="$(command ls -d1 /sys/block/{sd,cciss,dm-,vd,dasd,xvd}* 2>/dev/null)" +sed -i 's/^DISKS_SYS=/#&/' /usr/lib/tuned/functions +. /usr/lib/tuned/functions + +start() { + setup_kvm_mod_low_latency + disable_ksm + + return "$?" +} + +stop() { + if [ "$1" = "full_rollback" ]; then + teardown_kvm_mod_low_latency + enable_ksm + fi + return "$?" +} + +process $@ diff --git a/elements/cpu-pinning/static/etc/tuned/amphora/tuned.conf b/elements/cpu-pinning/static/etc/tuned/amphora/tuned.conf new file mode 100644 index 0000000000..b53536a612 --- /dev/null +++ b/elements/cpu-pinning/static/etc/tuned/amphora/tuned.conf @@ -0,0 +1,67 @@ +# +# tuned configuration +# +# Copyright Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +[main] +summary=Customized profile for use on Octavia amphorae +include=network-latency + +[variables] +isolated_cores=${f:cpulist_invert:0} +no_balance_cores=${isolated_cores} + +# Fail if isolated_cores are not set +assert1=${f:assertion_non_equal:isolated_cores are set:${isolated_cores}:${isolated_cores_assert_check}} + +# tmpdir +tmpdir=${f:strip:${f:exec:mktemp:-d}} + +isolated_cores_expanded=${f:cpulist_unpack:${isolated_cores}} +isolated_cpumask=${f:cpulist2hex:${isolated_cores_expanded}} +not_isolated_cores_expanded=${f:cpulist_invert:${isolated_cores_expanded}} +isolated_cores_online_expanded=${f:cpulist_online:${isolated_cores}} +not_isolated_cores_online_expanded=${f:cpulist_online:${not_isolated_cores_expanded}} +not_isolated_cpumask=${f:cpulist2hex:${not_isolated_cores_expanded}} +# Make sure no_balance_cores is defined before +# no_balance_cores_expanded is defined, so that child profiles can set +# no_balance_cores directly in the profile (tuned.conf) +no_balance_cores_expanded=${f:cpulist_unpack:${no_balance_cores}} + +# Fail if isolated_cores contains CPUs which are not online +assert2=${f:assertion:isolated_cores contains online CPU(s):${isolated_cores_expanded}:${isolated_cores_online_expanded}} + +[sysctl] +kernel.numa_balancing=0 +kernel.hung_task_timeout_secs = 600 +vm.stat_interval = 10 +# See https://bugzilla.redhat.com/show_bug.cgi?id=1797629 +kernel.timer_migration = 0 + +[sysfs] +/sys/bus/workqueue/devices/writeback/cpumask = ${not_isolated_cpumask} +/sys/devices/virtual/workqueue/cpumask = ${not_isolated_cpumask} +/sys/devices/virtual/workqueue/*/cpumask = ${not_isolated_cpumask} +/sys/devices/system/machinecheck/machinecheck*/ignore_ce = 1 + +[systemd] +cpu_affinity=${not_isolated_cores_expanded} + +[script] +script=${i:PROFILE_DIR}/script.sh + +[scheduler] +isolated_cores=${isolated_cores} +ps_blacklist=.*pmd.*;.*PMD.*;^DPDK;.*qemu-kvm.*;^contrail-vroute$;^lcore-slave-.*;^rte_mp_handle$;^rte_mp_async$;^eal-intr-thread$ diff --git a/elements/cpu-pinning/svc-map b/elements/cpu-pinning/svc-map new file mode 100644 index 0000000000..937a5ff9d0 --- /dev/null +++ b/elements/cpu-pinning/svc-map @@ -0,0 +1,4 @@ +tuned: + default: tuned +irqbalance: + default: irqbalance diff --git a/releasenotes/notes/add-cpu-pinning-element-86617303b720d5a9.yaml b/releasenotes/notes/add-cpu-pinning-element-86617303b720d5a9.yaml new file mode 100644 index 0000000000..044ab5e5d3 --- /dev/null +++ b/releasenotes/notes/add-cpu-pinning-element-86617303b720d5a9.yaml @@ -0,0 +1,19 @@ +--- +features: + - | + The new "cpu-pinning" element optimizes the amphora image for better + vertical scaling. When an amphora flavor with multiple vCPUs is configured + it will configure the kernel to isolate (isolcpus) + all vCPUs except the first one. + Furthermore, it uninstalls irqbalance and sets the IRQ affinity to the + first CPU. That way the other CPUs are free to be used by HAProxy + exclusively. A new customized TuneD profile applies some more tweaks + for improving network latency. + This new feature is disabled by default, but can be enabled by running + `diskimage-create.sh` with the `-m` option or setting the + `AMP_ENABLE_CPUPINNING` environment variable to 1 before running the script. +upgrade: + - | + Amphora vertical scaling optimizations require a new amphora image + build with the optional CPU pinning feature enabled in order + to become effective.