resolved issue with neutron ha failover cron clobbering other crons

the cron job for the neutron ha l3 failover is non blocking. this
makes it so that its possible to run many of the same job which
can cause all kinds of terribad issues. this agragates the jobs
into a single script that has a lock file such that it will only
ever run one of the single jobs.
This commit is contained in:
Kevin Carter 2014-10-23 19:05:36 -05:00
parent eee8b0dcad
commit 851a38d1dc
2 changed files with 87 additions and 5 deletions

View File

@ -26,17 +26,38 @@
name: "{{ inventory_hostname }}" name: "{{ inventory_hostname }}"
register: hashed_name register: hashed_name
# These are used in the Neutron HA Cron job script
- name: Creating Job Facts
set_fact:
do_job: ". /root/openrc && /opt/neutron-ha-tool.py --l3-agent-migrate"
sleep_time: "{{ hashed_name.int_value }}"
- name: "Drop Neutron HA job script"
template:
src: "neutron_ha_tool.sh.j2"
dest: "/opt/neutron-ha-tool.sh"
owner: "root"
group: "root"
mode: "0755"
- name: Create Neutron HA - name: Create Neutron HA
cron: cron:
name: "{{ item.name }}" name: "neutron-ha-tool"
minute: "*/1" minute: "*/1"
day: "*" day: "*"
hour: "*" hour: "*"
month: "*" month: "*"
state: present state: present
job: "sleep {{ hashed_name.int_value }} && . /root/openrc && /opt/neutron-ha-tool.py {{ item.command }}" job: "/opt/neutron-ha-tool.sh"
user: root user: root
cron_file: "{{ item.name }}" cron_file: "neutron-ha-tool"
# This should be revised in Kilo
# kilo_revision: true
- name: Remove old cronjobs if found.
file:
path: "{{ item }}"
state: "absent"
with_items: with_items:
- { name: "l3_agent_migrate", command: "--l3-agent-migrate" } - /etc/cron.d/replicate_dhcp
- { name: "replicate_dhcp", command: "--replicate-dhcp" } - /etc/cron.d/l3_agent_migrate

View File

@ -0,0 +1,61 @@
#!/usr/bin/env bash
# Copyright 2014, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
LOCKFILE="/var/run/neutron_ha_tool.lock"
# Trap any errors that might happen in executing the script
trap my_trap_handler ERR
function my_trap_handler() {
kill_job
}
function unlock() {
rm "${LOCKFILE}"
}
function do_job() {
# Do a given job
logger "$({{ do_job }})"
}
function cooldown() {
# Sleep for a given amount of time
sleep {{ sleep_time }}
}
function kill_job() {
# If the job needs killing kill the pid and unlock the file.
PID="$(cat ${LOCKFILE})"
unlock
if [ -f "${LOCKFILE}" ]; then
kill -9 "${PID}"
fi
}
if [ ! -f "${LOCKFILE}" ]; then
echo $$ | tee "${LOCKFILE}"
do_job
cooldown
unlock
else
if [ "$(find ${LOCKFILE} -mmin +15)" ]; then
logger "Stale pid found for ${LOCKFILE}. Killing any left over processes and unlocking"
kill_job
fi
fi