Fix memory overcommit that caused OOM killer
Parallel package builds use large ramdisks. It's important not to commit too much memory to these ram disks, or we may push the system into memory exhaustion. At that stage the Kernel will invoke the OOM killer, It will likely select our build, or worse someone else's build, to sacrifice. The current algorithm only considers free memory at the instant the parallel build starts. It does not consider how many other builds are in flight, but might not have allocated their ramdisk yet. The other build intends to use the memory, we see the memory as free and try to use the same memory. Solution is to consider total memory, and number of builds already running or which might foreseeably start in the near future (share factor) to derive an alternate estimate of memory available. We then allocate the lesser amount. Also fixed some issues with cleaning up of child processes when a newer mockchain-parallel is in use. Closes-Bug: 1917525 Signed-off-by: Scott Little <scott.little@windriver.com> Change-Id: Iab178c6f9acbd5a209d66d0da21f367911f34905
This commit is contained in:
parent
0972ffe246
commit
78be59c758
@ -122,6 +122,10 @@ number_of_users () {
|
|||||||
users | tr ' ' '\n' | sort --uniq | wc -l
|
users | tr ' ' '\n' | sort --uniq | wc -l
|
||||||
}
|
}
|
||||||
|
|
||||||
|
total_mem_gb () {
|
||||||
|
free -g | grep 'Mem:' | awk '{ print $2 }'
|
||||||
|
}
|
||||||
|
|
||||||
available_mem_gb () {
|
available_mem_gb () {
|
||||||
free -g | grep 'Mem:' | awk '{ print $7 }'
|
free -g | grep 'Mem:' | awk '{ print $7 }'
|
||||||
}
|
}
|
||||||
@ -238,26 +242,41 @@ compute_resources () {
|
|||||||
local users=$(number_of_users)
|
local users=$(number_of_users)
|
||||||
if [ $users -lt 1 ]; then users=1; fi
|
if [ $users -lt 1 ]; then users=1; fi
|
||||||
local mem=$(available_mem_gb)
|
local mem=$(available_mem_gb)
|
||||||
|
local total_mem=$(total_mem_gb)
|
||||||
local disk=$(available_disk_gb)
|
local disk=$(available_disk_gb)
|
||||||
local cpus=$(number_of_cpus)
|
local cpus=$(number_of_cpus)
|
||||||
local num_users=$(sqrt $users)
|
local num_users=$(sqrt $users)
|
||||||
local num_build=$(number_of_builds_in_progress)
|
local num_build=$(number_of_builds_in_progress)
|
||||||
num_build=$((num_build+1))
|
num_build=$((num_build+1))
|
||||||
echo "compute_resources: total: cpus=$cpus, mem=$mem, disk=$disk, weight=$weight, num_build=$num_build"
|
echo "compute_resources: total: cpus=$cpus, total_mem=$total_mem, avail_mem=$mem, disk=$disk, weight=$weight, num_build=$num_build"
|
||||||
|
|
||||||
# What fraction of the machine will we use
|
# What fraction of the machine will we use
|
||||||
local share_factor=$num_users
|
local share_factor=$num_users
|
||||||
if [ $share_factor -gt $((MAX_SHARE_FACTOR+num_build-1)) ]; then share_factor=$((MAX_SHARE_FACTOR+num_build-1)); fi
|
if [ $share_factor -gt $((MAX_SHARE_FACTOR+num_build-1)) ]; then share_factor=$((MAX_SHARE_FACTOR+num_build-1)); fi
|
||||||
if [ $share_factor -lt $num_build ]; then share_factor=$num_build; fi
|
if [ $share_factor -lt $num_build ]; then share_factor=$num_build; fi
|
||||||
local mem_share_factor=$((share_factor-num_build))
|
|
||||||
|
# What fraction of free memory can we use.
|
||||||
|
# e.g.
|
||||||
|
# We intend to support 4 concurrent builds (share_factor)
|
||||||
|
# Two builds (excluding ours) are already underway (num_build-1)
|
||||||
|
# So we should be able to support 2 more builds (mem_share_factor)
|
||||||
|
local mem_share_factor=$((share_factor-(num_build-1)))
|
||||||
if [ $mem_share_factor -lt 1 ]; then mem_share_factor=1; fi
|
if [ $mem_share_factor -lt 1 ]; then mem_share_factor=1; fi
|
||||||
|
|
||||||
echo "compute_resources: share_factor=$share_factor mem_share_factor=$mem_share_factor"
|
echo "compute_resources: share_factor=$share_factor mem_share_factor=$mem_share_factor"
|
||||||
|
|
||||||
# What resources are we permitted to use
|
# What resources are we permitted to use
|
||||||
|
# Continuing the example from above ... memory share is the lesser of
|
||||||
|
# - Half the available memory (mem/mem_share_factor)
|
||||||
|
# - A quarter of the total memory (total_mem/share_factor)
|
||||||
local mem_share=$(((mem-MEMORY_RESERVE)/mem_share_factor))
|
local mem_share=$(((mem-MEMORY_RESERVE)/mem_share_factor))
|
||||||
if [ $mem_share -lt 0 ]; then mem_share=0; fi
|
if [ $mem_share -lt 0 ]; then mem_share=0; fi
|
||||||
|
local total_mem_share=$(((total_mem-MEMORY_RESERVE)/share_factor))
|
||||||
|
if [ $total_mem_share -lt 0 ]; then total_mem_share=0; fi
|
||||||
|
if [ $mem_share -gt $total_mem_share ]; then mem_share=$total_mem_share; fi
|
||||||
local disk_share=$((disk/share_factor))
|
local disk_share=$((disk/share_factor))
|
||||||
local cpus_share=$((cpus/share_factor))
|
local cpus_share=$((cpus/share_factor))
|
||||||
|
|
||||||
echo "compute_resources: our share: cpus=$cpus_share, mem=$mem_share, disk=$disk_share"
|
echo "compute_resources: our share: cpus=$cpus_share, mem=$mem_share, disk=$disk_share"
|
||||||
|
|
||||||
# How many build jobs, how many jobs will use tmpfs, and how much mem for each tmpfs
|
# How many build jobs, how many jobs will use tmpfs, and how much mem for each tmpfs
|
||||||
@ -293,7 +312,7 @@ compute_resources () {
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# Our output is saved in environmnet variables
|
# Our output is saved in environment variables
|
||||||
MOCKCHAIN_RESOURCE_ALLOCATION=$(echo $x | sed 's#^:##')
|
MOCKCHAIN_RESOURCE_ALLOCATION=$(echo $x | sed 's#^:##')
|
||||||
MAX_WORKERS=$workers
|
MAX_WORKERS=$workers
|
||||||
echo "compute_resources: MAX_WORKERS=$MAX_WORKERS, MOCKCHAIN_RESOURCE_ALLOCATION=$MOCKCHAIN_RESOURCE_ALLOCATION"
|
echo "compute_resources: MAX_WORKERS=$MAX_WORKERS, MOCKCHAIN_RESOURCE_ALLOCATION=$MOCKCHAIN_RESOURCE_ALLOCATION"
|
||||||
@ -654,7 +673,7 @@ kill_descendents ()
|
|||||||
|
|
||||||
local relevant_recursive_children="$ME"
|
local relevant_recursive_children="$ME"
|
||||||
local relevant_recursive_promote_children="mock"
|
local relevant_recursive_promote_children="mock"
|
||||||
local relevant_other_children="mockchain-parallel mockchain-parallel-1.3.4 mockchain-parallel-1.4.16"
|
local relevant_other_children="mockchain-parallel mockchain-parallel-1.3.4 mockchain-parallel-1.4.16 mockchain-parallel-2.6 mockchain-parallel-2.7"
|
||||||
|
|
||||||
local recursive_promote_children=$(for relevant_child in $relevant_recursive_promote_children; do pgrep -P $kill_pid $relevant_child; done)
|
local recursive_promote_children=$(for relevant_child in $relevant_recursive_promote_children; do pgrep -P $kill_pid $relevant_child; done)
|
||||||
local recursive_children=$(for relevant_child in $relevant_recursive_children; do pgrep -P $kill_pid $relevant_child; done)
|
local recursive_children=$(for relevant_child in $relevant_recursive_children; do pgrep -P $kill_pid $relevant_child; done)
|
||||||
@ -1181,14 +1200,24 @@ mock_clean_metadata_cfg () {
|
|||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CMD=$((cat $CFG; \
|
#
|
||||||
grep config_opts\\[\'yum.conf\'\\\] $CFG | \
|
# From mock config, extract the embedded yum/dnf config.
|
||||||
sed 's#\\n#\n#g') | \
|
# Then extract the repo definitions,
|
||||||
grep '^[[]' | \
|
# and convert to a series of yum commands to clean the
|
||||||
grep -v main | \
|
# metadata one repo at a time. e.g.
|
||||||
sed -e 's/[][]//g' -e "s#^#${PKG_MANAGER} --enablerepo=#" -e 's#$# clean metadata#' | \
|
# CMD="yum --disablerepo=* --enablerepo=StxCentos7Distro clean metadata; \
|
||||||
sort -u | \
|
# yum --disablerepo=* --enablerepo=StxCentos7Distro-rt clean metadata;
|
||||||
tr '\n' ';')
|
# ...
|
||||||
|
# "
|
||||||
|
#
|
||||||
|
CMD=$((grep -e config_opts\\[\'yum.conf\'\\\] $CFG \
|
||||||
|
-e config_opts\\[\'dnf.conf\'\\\] $CFG | \
|
||||||
|
sed 's#\\n#\n#g') | \
|
||||||
|
grep '^[[]' | \
|
||||||
|
grep -v main | \
|
||||||
|
sed -e 's/[][]//g' -e "s#^#${PKG_MANAGER} --disablerepo=* --enablerepo=#" -e 's#$# clean metadata#' | \
|
||||||
|
sort -u | \
|
||||||
|
tr '\n' ';')
|
||||||
echo "$MOCK --root $CFG --configdir $(dirname $CFG) --chroot bash -c $CMD" &> $TMP
|
echo "$MOCK --root $CFG --configdir $(dirname $CFG) --chroot bash -c $CMD" &> $TMP
|
||||||
trapwrap_n $CFG $MOCK --root $CFG --configdir $(dirname $CFG) --chroot "bash -c '($CMD)'" &>>$TMP
|
trapwrap_n $CFG $MOCK --root $CFG --configdir $(dirname $CFG) --chroot "bash -c '($CMD)'" &>>$TMP
|
||||||
RC=$?
|
RC=$?
|
||||||
@ -2338,6 +2367,7 @@ if [ $CAREFUL -eq 1 ]; then
|
|||||||
CMD_OPTIONS="$MOCK_PASSTHROUGH --no-cleanup-after"
|
CMD_OPTIONS="$MOCK_PASSTHROUGH --no-cleanup-after"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
CMD_OPTIONS+=" $MOCK_PASSTHROUGH --enable-plugin=package_state"
|
||||||
CMD_OPTIONS+=" --log=$MOCKCHAIN_LOG"
|
CMD_OPTIONS+=" --log=$MOCKCHAIN_LOG"
|
||||||
|
|
||||||
echo "CAREFUL=$CAREFUL"
|
echo "CAREFUL=$CAREFUL"
|
||||||
|
@ -25,7 +25,14 @@
|
|||||||
|
|
||||||
export ME=$(basename "$0")
|
export ME=$(basename "$0")
|
||||||
CMDLINE="$ME $@"
|
CMDLINE="$ME $@"
|
||||||
|
BUILD_RPMS_PARALLEL_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
|
||||||
|
|
||||||
|
# Set PKG_MANAGER for our build environment.
|
||||||
|
source "${BUILD_RPMS_PARALLEL_DIR}/pkg-manager-utils.sh"
|
||||||
|
|
||||||
|
|
||||||
|
# Build for distribution. Currently 'centos' is only supported value.
|
||||||
|
export DISTRO="centos"
|
||||||
|
|
||||||
CREATEREPO=$(which createrepo_c)
|
CREATEREPO=$(which createrepo_c)
|
||||||
if [ $? -ne 0 ]; then
|
if [ $? -ne 0 ]; then
|
||||||
@ -42,6 +49,7 @@ if [ ! -d ${LOCAL_REPO} ]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Make sure we have a dependency cache
|
||||||
DEPENDANCY_DIR="${LOCAL_REPO}/dependancy-cache"
|
DEPENDANCY_DIR="${LOCAL_REPO}/dependancy-cache"
|
||||||
SRPM_DIRECT_REQUIRES_FILE="$DEPENDANCY_DIR/SRPM-direct-requires"
|
SRPM_DIRECT_REQUIRES_FILE="$DEPENDANCY_DIR/SRPM-direct-requires"
|
||||||
SRPM_TRANSITIVE_REQUIRES_FILE="$DEPENDANCY_DIR/SRPM-transitive-requires"
|
SRPM_TRANSITIVE_REQUIRES_FILE="$DEPENDANCY_DIR/SRPM-transitive-requires"
|
||||||
@ -118,7 +126,7 @@ create-no-clean-list () {
|
|||||||
local g
|
local g
|
||||||
|
|
||||||
for g in $install_groups; do
|
for g in $install_groups; do
|
||||||
# Find manditory packages in the group.
|
# Find mandatory packages in the group.
|
||||||
# Discard anything before (and including) 'Mandatory Packages:'
|
# Discard anything before (and including) 'Mandatory Packages:'
|
||||||
# and anything after (and including) 'Optional Packages:'.
|
# and anything after (and including) 'Optional Packages:'.
|
||||||
# Also discard leading spaces or '+' characters.
|
# Also discard leading spaces or '+' characters.
|
||||||
@ -135,7 +143,7 @@ create-no-clean-list () {
|
|||||||
|
|
||||||
while [ $noclean_list_len -gt $noclean_last_list_len ]; do
|
while [ $noclean_list_len -gt $noclean_last_list_len ]; do
|
||||||
noclean_last_list_len=$noclean_list_len
|
noclean_last_list_len=$noclean_list_len
|
||||||
noclean_list=$( (yum -c $MY_YUM_CONF deplist $noclean_list 2>> /dev/null | grep provider: | awk '{ print $2 }' | awk -F . '{ print $1 }'; for p in $noclean_list; do echo $p; done) | sort --uniq)
|
noclean_list=$( (${PKG_MANAGER} -c $MY_YUM_CONF deplist $noclean_list 2>> /dev/null | grep provider: | awk '{ print $2 }' | awk -F . '{ print $1 }'; for p in $noclean_list; do echo $p; done) | sort --uniq)
|
||||||
noclean_list_len=$(echo $noclean_list | wc -w)
|
noclean_list_len=$(echo $noclean_list | wc -w)
|
||||||
done
|
done
|
||||||
|
|
||||||
@ -475,7 +483,7 @@ kill_descendents ()
|
|||||||
|
|
||||||
local relevant_recursive_children="$ME"
|
local relevant_recursive_children="$ME"
|
||||||
local relevant_recursive_promote_children="mock"
|
local relevant_recursive_promote_children="mock"
|
||||||
local relevant_other_children="mockchain-parallel"
|
local relevant_other_children="mockchain-parallel mockchain-parallel-1.3.4 mockchain-parallel-1.4.16 mockchain-parallel-2.6 mockchain-parallel-2.7"
|
||||||
|
|
||||||
local recursive_promote_children=$(for relevant_child in $relevant_recursive_promote_children; do pgrep -P $kill_pid $relevant_child; done)
|
local recursive_promote_children=$(for relevant_child in $relevant_recursive_promote_children; do pgrep -P $kill_pid $relevant_child; done)
|
||||||
local recursive_children=$(for relevant_child in $relevant_recursive_children; do pgrep -P $kill_pid $relevant_child; done)
|
local recursive_children=$(for relevant_child in $relevant_recursive_children; do pgrep -P $kill_pid $relevant_child; done)
|
||||||
@ -964,7 +972,24 @@ mock_clean_metadata_cfg () {
|
|||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CMD=$((cat $CFG; grep config_opts\\[\'yum.conf\'\\\] $CFG | sed 's#\\n#\n#g') | grep '^[[]' | grep -v main | sed 's/[][]//g' | sed 's#^#yum --enablerepo=#' | sed 's#$# clean metadata#' | sort -u | tr '\n' ';')
|
#
|
||||||
|
# From mock config, extract the embedded yum/dnf config.
|
||||||
|
# Then extract the repo definitions,
|
||||||
|
# and convert to a series of yum commands to clean the
|
||||||
|
# metadata one repo at a time. e.g.
|
||||||
|
# CMD="yum --disablerepo=* --enablerepo=StxCentos7Distro clean metadata; \
|
||||||
|
# yum --disablerepo=* --enablerepo=StxCentos7Distro-rt clean metadata;
|
||||||
|
# ...
|
||||||
|
# "
|
||||||
|
#
|
||||||
|
CMD=$((grep -e config_opts\\[\'yum.conf\'\\\] $CFG \
|
||||||
|
-e config_opts\\[\'dnf.conf\'\\\] $CFG | \
|
||||||
|
sed 's#\\n#\n#g') | \
|
||||||
|
grep '^[[]' | \
|
||||||
|
grep -v main | \
|
||||||
|
sed -e 's/[][]//g' -e "s#^#${PKG_MANAGER} --disablerepo=* --enablerepo=#" -e 's#$# clean metadata#' | \
|
||||||
|
sort -u | \
|
||||||
|
tr '\n' ';')
|
||||||
echo "$MOCK --root $CFG --configdir $(dirname $CFG) --chroot bash -c $CMD" &> $TMP
|
echo "$MOCK --root $CFG --configdir $(dirname $CFG) --chroot bash -c $CMD" &> $TMP
|
||||||
trapwrap_n $CFG $MOCK --root $CFG --configdir $(dirname $CFG) --chroot "bash -c '($CMD)'" &>>$TMP
|
trapwrap_n $CFG $MOCK --root $CFG --configdir $(dirname $CFG) --chroot "bash -c '($CMD)'" &>>$TMP
|
||||||
RC=$?
|
RC=$?
|
||||||
@ -1129,6 +1154,7 @@ clean_yum_cache_cfg () {
|
|||||||
return $RC
|
return $RC
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
clean_yum_cache () {
|
clean_yum_cache () {
|
||||||
echo "${FUNCNAME[0]}: in"
|
echo "${FUNCNAME[0]}: in"
|
||||||
clean_yum_cache_cfg $BUILD_CFG
|
clean_yum_cache_cfg $BUILD_CFG
|
||||||
@ -1249,7 +1275,6 @@ while true ; do
|
|||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
# Reset variables
|
# Reset variables
|
||||||
if [ -n "$MY_WORKSPACE" ]; then
|
if [ -n "$MY_WORKSPACE" ]; then
|
||||||
export MY_WORKSPACE_TOP=${MY_WORKSPACE_TOP:-$MY_WORKSPACE}
|
export MY_WORKSPACE_TOP=${MY_WORKSPACE_TOP:-$MY_WORKSPACE}
|
||||||
|
Loading…
Reference in New Issue
Block a user