Do not race with locks for the sync/wrapper script

Exit immediately and omit locking itself, when the jobs file is empty
or missing. Additionally, limit the time given for the sync trigger and
the service wrapper scripts, when acquiring a lock. Then fail it, if
the lock cannot be acquired after a 10 seconds. This prevents other
possible racy scenarios, like dealocks.

Change-Id: I19ac0c3b43c50be00350fa500ccc2a5fb1ab545c
Signed-off-by: Bogdan Dobrelya <bdobreli@redhat.com>
This commit is contained in:
Bogdan Dobrelya 2020-03-25 09:20:57 +01:00
parent b338e16692
commit 39fe0ab5b9
2 changed files with 10 additions and 8 deletions

View File

@ -32,12 +32,15 @@ function start_service {
$CONTAINER_CMD $@
}
jobs_file="{{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_wrapper_service_name }}/processes"
[ -s "$jobs_file" ] || exit 0 # nothing to do, no need for locking, just exit
exec {lock_fd}>/var/lock/{{ tripleo_systemd_wrapper_service_name }}-processes.lock || exit 1
# In case service_wrapper script already locked the commands, we just wait.
flock "$lock_fd"
# In case service_wrapper script already locked the commands, we just wait for a 10 sec.
flock -w 10 "$lock_fd" || exit 1
IFS=$'\n'
for LINE in $(cat {{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_wrapper_service_name }}/processes); do
for LINE in $(cat ${jobs_file}); do
NETNS=$(echo $LINE | awk '{ print $1 }')
IFS=$' ' ARGS=$(echo $LINE | sed -e "s|$NETNS ||" | xargs)
# TODO(emilien) investigate if we should rather run docker/podman ps instead of ps on the host
@ -46,6 +49,6 @@ for LINE in $(cat {{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_w
fi
done
# truncate the file so we don't start them again
:> {{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_wrapper_service_name }}/processes
:> "$jobs_file"
flock -u "$lock_fd"

View File

@ -7,11 +7,10 @@ ARGS="$@"
NETNS=$(ip netns identify)
exec {lock_fd}>/var/lock/{{ tripleo_systemd_wrapper_service_name }}-processes.lock || exit 1
# In case service_sync script already locked the commands, we just wait.
flock "$lock_fd"
# In case service_sync script already locked the commands, we just wait for a 10 sec.
flock -w 10 "$lock_fd" || exit 1
echo "$NETNS $ARGS" >> {{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_wrapper_service_name }}/processes
# only update the timestamp which fires systemd if there was an update
flock -u "$lock_fd" # prevents locking the processes file, while updating
date > {{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_wrapper_service_name }}/processes-timestamp
flock -u "$lock_fd"