AIO reaffine DRBD tasks during startup

This will speedup the initial DRBD sync on AIO when there are limited
number of platform cores by reaffining DRBD tasks to use all cpus.

This enhances affine-tasks init script to dynamically reaffine CPU
intensive DRBD tasks. The receiver threads (i.e., drbd_r_*)
may use a full core each. On systems with fast disk, we notice the
receiver threads and softirq processing get CPU limited by the
number of platform cores configured.

The DRBD receiver tasks are reaffined initially to float across all
cores. This will poll for newly created DRBD resources and reaffine
them as they are found until all DRBD resources have started.

This script waits for sufficient platform readiness criteria. Once the
system is at steady-state, this will ensure that DRBD tasks are
constrained to platform cores and do not run on cores with
VMs/containers. The DRBD configuration file affinity option is left
as-is in case the DRBD kernel threads are restarted for some reason.

Change-Id: I019137ea1cf3736768ad8882bd8d8628cc5c2857
Closes-Bug: 1832781
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
This commit is contained in:
Jim Gauld 2019-07-16 15:35:36 -04:00
parent e186a7ff82
commit 674850130f
1 changed files with 75 additions and 1 deletions

View File

@ -44,7 +44,7 @@
#
################################################################################
# Define minimal path
PATH=/bin:/usr/bin:/usr/local/bin
PATH=/bin:/usr/bin:/usr/sbin:/usr/local/bin
CPUMAP_FUNCTIONS=${CPUMAP_FUNCTIONS:-"/etc/init.d/cpumap_functions.sh"}
[[ -e ${CPUMAP_FUNCTIONS} ]] && source ${CPUMAP_FUNCTIONS}
@ -273,6 +273,60 @@ END { printf "%d\n", n; }
return ${PASS}
}
# Get number of DRBD resources started.
# Returns 0 if DRBD not ready.
function number_drbd_resources_started {
local started
# Number of started DRBD resources
started=$(cat /proc/drbd 2>/dev/null | \
awk '/cs:/ { n+=1; } END {printf "%d\n", n}')
echo "${started}"
}
# Check criteria for all drbd resources started.
# i.e., see running DRBD worker threads for each configured resource.
function all_drbd_resources_started {
local PASS=0
local FAIL=1
local -i started=0
local -i resources=0
# Global variable
NOT_READY_REASON=""
# Number of started DRBD resources
started=$(number_drbd_resources_started)
if [ ${started} -eq 0 ]; then
NOT_READY_REASON="no drbd resources started"
return ${FAIL}
fi
# Number of expected DRBD resources
resources=$(drbdadm sh-resources | \
awk -vFS='[[:space:]]' 'END {print NF}')
if [ ${started} -ne ${resources} ]; then
NOT_READY_REASON="${started} of ${resources} drbd resources started"
return ${FAIL}
fi
return ${PASS}
}
function affine_drbd_tasks {
local CPUS=$1
local pidlist
LOG "Affine drbd tasks, CPUS=${CPUS}"
# Affine drbd_r_* threads to all cores. The DRBD receiver threads are
# particularly CPU intensive. Leave the other DRBD threads alone.
pidlist=$(pgrep drbd_r_)
for pid in ${pidlist[@]}; do
taskset --pid --cpu-list ${CPUS} ${pid} > /dev/null 2>&1
done
}
# Return list of reaffineable pids. This includes all processes, but excludes
# kernel threads, vSwitch, and anything in K8S or qemu/kvm.
function reaffineable_pids {
@ -332,6 +386,9 @@ function affine_tasks_to_platform_cores {
taskset --pid --cpu-list ${PLATFORM_CPUS} {} > /dev/null 2>&1
done
# Reaffine drbd_r_* threads to platform cpus
affine_drbd_tasks ${PLATFORM_CPUS}
LOG "Affined ${count} processes to platform cores."
}
@ -381,6 +438,23 @@ function start {
update_cgroup_cpuset_k8s_infra_all
fi
# Wait for all DRBD resources to have started. Affine DRBD tasks
# to float on all cores as we find them.
until all_drbd_resources_started; do
started=$(number_drbd_resources_started)
if [ ${started} -gt 0 ]; then
affine_drbd_tasks ${NONISOL_CPUS}
fi
dt=$(( ${SECONDS} - ${t0} ))
if [ ${dt} -ge ${PRINT_INTERVAL_SECONDS} ]; then
t0=${SECONDS}
LOG "Recovery wait, elapsed ${SECONDS} seconds." \
"Reason: ${NOT_READY_REASON}"
fi
sleep ${INIT_INTERVAL_SECONDS}
done
affine_drbd_tasks ${NONISOL_CPUS}
# Wait until K8s pods have recovered and nova-compute is running
t0=${SECONDS}
until is_k8s_platform_steady_state_ready; do