AIO reaffine DRBD tasks during startup
This will speedup the initial DRBD sync on AIO when there are limited number of platform cores by reaffining DRBD tasks to use all cpus. This enhances affine-tasks init script to dynamically reaffine CPU intensive DRBD tasks. The receiver threads (i.e., drbd_r_*) may use a full core each. On systems with fast disk, we notice the receiver threads and softirq processing get CPU limited by the number of platform cores configured. The DRBD receiver tasks are reaffined initially to float across all cores. This will poll for newly created DRBD resources and reaffine them as they are found until all DRBD resources have started. This script waits for sufficient platform readiness criteria. Once the system is at steady-state, this will ensure that DRBD tasks are constrained to platform cores and do not run on cores with VMs/containers. The DRBD configuration file affinity option is left as-is in case the DRBD kernel threads are restarted for some reason. Change-Id: I019137ea1cf3736768ad8882bd8d8628cc5c2857 Closes-Bug: 1832781 Signed-off-by: Jim Gauld <james.gauld@windriver.com>
This commit is contained in:
parent
4fe2b6bed5
commit
696f987a17
@ -44,7 +44,7 @@
|
|||||||
#
|
#
|
||||||
################################################################################
|
################################################################################
|
||||||
# Define minimal path
|
# Define minimal path
|
||||||
PATH=/bin:/usr/bin:/usr/local/bin
|
PATH=/bin:/usr/bin:/usr/sbin:/usr/local/bin
|
||||||
|
|
||||||
CPUMAP_FUNCTIONS=${CPUMAP_FUNCTIONS:-"/etc/init.d/cpumap_functions.sh"}
|
CPUMAP_FUNCTIONS=${CPUMAP_FUNCTIONS:-"/etc/init.d/cpumap_functions.sh"}
|
||||||
[[ -e ${CPUMAP_FUNCTIONS} ]] && source ${CPUMAP_FUNCTIONS}
|
[[ -e ${CPUMAP_FUNCTIONS} ]] && source ${CPUMAP_FUNCTIONS}
|
||||||
@ -273,6 +273,60 @@ END { printf "%d\n", n; }
|
|||||||
return ${PASS}
|
return ${PASS}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Get number of DRBD resources started.
|
||||||
|
# Returns 0 if DRBD not ready.
|
||||||
|
function number_drbd_resources_started {
|
||||||
|
local started
|
||||||
|
|
||||||
|
# Number of started DRBD resources
|
||||||
|
started=$(cat /proc/drbd 2>/dev/null | \
|
||||||
|
awk '/cs:/ { n+=1; } END {printf "%d\n", n}')
|
||||||
|
echo "${started}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check criteria for all drbd resources started.
|
||||||
|
# i.e., see running DRBD worker threads for each configured resource.
|
||||||
|
function all_drbd_resources_started {
|
||||||
|
local PASS=0
|
||||||
|
local FAIL=1
|
||||||
|
local -i started=0
|
||||||
|
local -i resources=0
|
||||||
|
|
||||||
|
# Global variable
|
||||||
|
NOT_READY_REASON=""
|
||||||
|
|
||||||
|
# Number of started DRBD resources
|
||||||
|
started=$(number_drbd_resources_started)
|
||||||
|
if [ ${started} -eq 0 ]; then
|
||||||
|
NOT_READY_REASON="no drbd resources started"
|
||||||
|
return ${FAIL}
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Number of expected DRBD resources
|
||||||
|
resources=$(drbdadm sh-resources | \
|
||||||
|
awk -vFS='[[:space:]]' 'END {print NF}')
|
||||||
|
if [ ${started} -ne ${resources} ]; then
|
||||||
|
NOT_READY_REASON="${started} of ${resources} drbd resources started"
|
||||||
|
return ${FAIL}
|
||||||
|
fi
|
||||||
|
|
||||||
|
return ${PASS}
|
||||||
|
}
|
||||||
|
|
||||||
|
function affine_drbd_tasks {
|
||||||
|
local CPUS=$1
|
||||||
|
local pidlist
|
||||||
|
|
||||||
|
LOG "Affine drbd tasks, CPUS=${CPUS}"
|
||||||
|
|
||||||
|
# Affine drbd_r_* threads to all cores. The DRBD receiver threads are
|
||||||
|
# particularly CPU intensive. Leave the other DRBD threads alone.
|
||||||
|
pidlist=$(pgrep drbd_r_)
|
||||||
|
for pid in ${pidlist[@]}; do
|
||||||
|
taskset --pid --cpu-list ${CPUS} ${pid} > /dev/null 2>&1
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
# Return list of reaffineable pids. This includes all processes, but excludes
|
# Return list of reaffineable pids. This includes all processes, but excludes
|
||||||
# kernel threads, vSwitch, and anything in K8S or qemu/kvm.
|
# kernel threads, vSwitch, and anything in K8S or qemu/kvm.
|
||||||
function reaffineable_pids {
|
function reaffineable_pids {
|
||||||
@ -332,6 +386,9 @@ function affine_tasks_to_platform_cores {
|
|||||||
taskset --pid --cpu-list ${PLATFORM_CPUS} {} > /dev/null 2>&1
|
taskset --pid --cpu-list ${PLATFORM_CPUS} {} > /dev/null 2>&1
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# Reaffine drbd_r_* threads to platform cpus
|
||||||
|
affine_drbd_tasks ${PLATFORM_CPUS}
|
||||||
|
|
||||||
LOG "Affined ${count} processes to platform cores."
|
LOG "Affined ${count} processes to platform cores."
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -381,6 +438,23 @@ function start {
|
|||||||
update_cgroup_cpuset_k8s_infra_all
|
update_cgroup_cpuset_k8s_infra_all
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Wait for all DRBD resources to have started. Affine DRBD tasks
|
||||||
|
# to float on all cores as we find them.
|
||||||
|
until all_drbd_resources_started; do
|
||||||
|
started=$(number_drbd_resources_started)
|
||||||
|
if [ ${started} -gt 0 ]; then
|
||||||
|
affine_drbd_tasks ${NONISOL_CPUS}
|
||||||
|
fi
|
||||||
|
dt=$(( ${SECONDS} - ${t0} ))
|
||||||
|
if [ ${dt} -ge ${PRINT_INTERVAL_SECONDS} ]; then
|
||||||
|
t0=${SECONDS}
|
||||||
|
LOG "Recovery wait, elapsed ${SECONDS} seconds." \
|
||||||
|
"Reason: ${NOT_READY_REASON}"
|
||||||
|
fi
|
||||||
|
sleep ${INIT_INTERVAL_SECONDS}
|
||||||
|
done
|
||||||
|
affine_drbd_tasks ${NONISOL_CPUS}
|
||||||
|
|
||||||
# Wait until K8s pods have recovered and nova-compute is running
|
# Wait until K8s pods have recovered and nova-compute is running
|
||||||
t0=${SECONDS}
|
t0=${SECONDS}
|
||||||
until is_k8s_platform_steady_state_ready; do
|
until is_k8s_platform_steady_state_ready; do
|
||||||
|
Loading…
Reference in New Issue
Block a user