From 39fe0ab5b935eb6c6262ed8f2a1fd0fb6e456c10 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Wed, 25 Mar 2020 09:20:57 +0100 Subject: [PATCH] Do not race with locks for the sync/wrapper script Exit immediately and omit locking itself, when the jobs file is empty or missing. Additionally, limit the time given for the sync trigger and the service wrapper scripts, when acquiring a lock. Then fail it, if the lock cannot be acquired after a 10 seconds. This prevents other possible racy scenarios, like dealocks. Change-Id: I19ac0c3b43c50be00350fa500ccc2a5fb1ab545c Signed-off-by: Bogdan Dobrelya --- .../tripleo_systemd_wrapper/templates/service_sync.j2 | 11 +++++++---- .../templates/service_wrapper.j2 | 7 +++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tripleo_ansible/roles/tripleo_systemd_wrapper/templates/service_sync.j2 b/tripleo_ansible/roles/tripleo_systemd_wrapper/templates/service_sync.j2 index 63b46d5a3..becc0b07a 100644 --- a/tripleo_ansible/roles/tripleo_systemd_wrapper/templates/service_sync.j2 +++ b/tripleo_ansible/roles/tripleo_systemd_wrapper/templates/service_sync.j2 @@ -32,12 +32,15 @@ function start_service { $CONTAINER_CMD $@ } +jobs_file="{{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_wrapper_service_name }}/processes" +[ -s "$jobs_file" ] || exit 0 # nothing to do, no need for locking, just exit + exec {lock_fd}>/var/lock/{{ tripleo_systemd_wrapper_service_name }}-processes.lock || exit 1 -# In case service_wrapper script already locked the commands, we just wait. -flock "$lock_fd" +# In case service_wrapper script already locked the commands, we just wait for a 10 sec. +flock -w 10 "$lock_fd" || exit 1 IFS=$'\n' -for LINE in $(cat {{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_wrapper_service_name }}/processes); do +for LINE in $(cat ${jobs_file}); do NETNS=$(echo $LINE | awk '{ print $1 }') IFS=$' ' ARGS=$(echo $LINE | sed -e "s|$NETNS ||" | xargs) # TODO(emilien) investigate if we should rather run docker/podman ps instead of ps on the host @@ -46,6 +49,6 @@ for LINE in $(cat {{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_w fi done # truncate the file so we don't start them again -:> {{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_wrapper_service_name }}/processes +:> "$jobs_file" flock -u "$lock_fd" diff --git a/tripleo_ansible/roles/tripleo_systemd_wrapper/templates/service_wrapper.j2 b/tripleo_ansible/roles/tripleo_systemd_wrapper/templates/service_wrapper.j2 index 2ae4eec3a..d6e03a19b 100644 --- a/tripleo_ansible/roles/tripleo_systemd_wrapper/templates/service_wrapper.j2 +++ b/tripleo_ansible/roles/tripleo_systemd_wrapper/templates/service_wrapper.j2 @@ -7,11 +7,10 @@ ARGS="$@" NETNS=$(ip netns identify) exec {lock_fd}>/var/lock/{{ tripleo_systemd_wrapper_service_name }}-processes.lock || exit 1 -# In case service_sync script already locked the commands, we just wait. -flock "$lock_fd" +# In case service_sync script already locked the commands, we just wait for a 10 sec. +flock -w 10 "$lock_fd" || exit 1 echo "$NETNS $ARGS" >> {{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_wrapper_service_name }}/processes # only update the timestamp which fires systemd if there was an update +flock -u "$lock_fd" # prevents locking the processes file, while updating date > {{ tripleo_systemd_wrapper_service_dir }}/{{ tripleo_systemd_wrapper_service_name }}/processes-timestamp - -flock -u "$lock_fd"