Engineer IRQ affinity to decouple applications from platform

This modifies the affine-platform.sh script to align with the kernel
boot arg irqaffinity setting as engineered by sysinv.

The default IRQ affinity setting is engineered to application cores if
they are configured. PCI IRQs for platform interfaces and NVMe are later
moved to platform cores, so this change effectively decouples platform
from application.

This is particularly noticeable when running RT kernel + kernel-bridge,
using SRIOV pod and significant traffic load. The resultant irq kernel
VF threads do work on cores where they are affined.

Partial-Bug: 1930595
Depends-On: https://review.opendev.org/c/starlingx/config/+/794270

Signed-off-by: Jim Gauld <james.gauld@windriver.com>
Change-Id: Ia74bcef5261fba449478624c1ae5e9ceaa3e54e8
This commit is contained in:
Jim Gauld
2021-06-02 12:03:41 -04:00
parent 931887828b
commit e46f8017d4

View File

@@ -37,7 +37,8 @@ LOG_DEBUG=1
# Affine all running tasks to the CPULIST provided in the first parameter.
################################################################################
function affine_tasks {
local CPULIST=$1
local PLAT_CPULIST=$1
local IRQ_CPULIST=$2
local PIDLIST
local RET=0
@@ -47,20 +48,27 @@ function affine_tasks {
# Calculate platform cores cpumap. Reformat with comma separator every
# 8 hex characters.
PLATFORM_COREMASK=$(cpulist_to_cpumap ${CPULIST} ${N_CPUS} | \
PLATFORM_COREMASK=$(cpulist_to_cpumap ${PLAT_CPULIST} ${N_CPUS} | \
sed ':a;s/\B[0-9A-Fa-f]\{8\}\>/,&/;ta')
log_debug "PLATFORM_CPULIST=${PLAT_CPULIST}, COREMASK=${PLATFORM_COREMASK}"
# Calculate irq cores cpumap. Reformat with comma separator every
# 8 hex characters.
IRQ_COREMASK=$(cpulist_to_cpumap ${IRQ_CPULIST} ${N_CPUS} | \
sed ':a;s/\B[0-9A-Fa-f]\{8\}\>/,&/;ta')
log_debug "IRQ_CPULIST=${IRQ_CPULIST}, COREMASK=${IRQ_COREMASK}"
# Set default IRQ affinity
# NOTE: The following echoes a value to file and redirects the stderr to
# stdout so that potential errors get captured in the variable ERROR.
ERROR=$(echo ${PLATFORM_COREMASK} 2>&1 > /proc/irq/default_smp_affinity)
ERROR=$(echo ${IRQ_COREMASK} 2>&1 > /proc/irq/default_smp_affinity)
RET=$?
if [ ${RET} -ne 0 ]; then
log_error "Failed to set: ${PLATFORM_COREMASK}" \
log_error "Failed to set: ${IRQ_COREMASK}" \
"/proc/irq/default_smp_affinity, err=${ERROR}"
fi
# Affine all PCI/MSI interrupts to platform cores; this overrides
# Affine all PCI/MSI interrupts to IRQ cores; this overrides
# irqaffinity boot arg, since that does not handle IRQs for PCI devices
# on numa nodes that do not intersect with platform cores.
PCIDEVS=/sys/bus/pci/devices
@@ -69,11 +77,12 @@ function affine_tasks {
irqs+=($(ls ${PCIDEVS}/*/msi_irqs 2>/dev/null | grep -E '^[0-9]+$' | xargs))
# flatten list of irqs, removing duplicates
irqs=($(echo ${irqs[@]} | tr ' ' '\n' | sort -nu))
log_debug "Affining all PCI/MSI irqs(${irqs[@]}) with cpus (${CPULIST})"
log_debug "Affining all PCI/MSI irqs(${irqs[@]}) with cpus (${IRQ_CPULIST})"
for i in ${irqs[@]}; do
/bin/bash -c "[[ -e /proc/irq/${i} ]] && echo ${CPULIST} > /proc/irq/${i}/smp_affinity_list" 2>/dev/null
/bin/bash -c "[[ -e /proc/irq/${i} ]] && echo ${IRQ_CPULIST} > /proc/irq/${i}/smp_affinity_list" 2>/dev/null
done
if [[ "$subfunction" == *"worker,lowlatency" ]]; then
log_debug "Affining workqueues with cpus (${PLATFORM_CPULIST})"
# Affine work queues to platform cores
ERROR=$(echo ${PLATFORM_COREMASK} 2>&1 > /sys/devices/virtual/workqueue/cpumask)
RET=$?
@@ -139,8 +148,16 @@ function start {
## Define platform cpulist to be thread siblings of core 0
PLATFORM_CPULIST=$(get_platform_cpu_list)
# Affine all tasks to platform cpulist
affine_tasks ${PLATFORM_CPULIST}
# Obtain current IRQ affinity setting from kernel boot cmdline
if [[ $(</proc/cmdline) =~ irqaffinity=([0-9,-]+) ]]; then
IRQ_CPULIST=${BASH_REMATCH[1]}
else
IRQ_CPULIST=${PLATFORM_CPULIST}
fi
# Affine all tasks to platform cpulist and irqs to irq cpulist.
# NOTE: irq/* tasks inherit /proc/irq/*/smp_affinity_list setting.
affine_tasks ${PLATFORM_CPULIST} ${IRQ_CPULIST}
RET=$?
if [ ${RET} -ne 0 ]; then
log_error "Failed to affine tasks ${PLATFORM_CPULIST}, rc=${RET}"