From 851a38d1dc7e506b1967e03c6c4c667b9ece9bf7 Mon Sep 17 00:00:00 2001 From: Kevin Carter Date: Thu, 23 Oct 2014 19:05:36 -0500 Subject: [PATCH] resolved issue with neutron ha failover cron clobbering other crons the cron job for the neutron ha l3 failover is non blocking. this makes it so that its possible to run many of the same job which can cause all kinds of terribad issues. this agragates the jobs into a single script that has a lock file such that it will only ever run one of the single jobs. --- .../roles/neutron_l3_ha/tasks/main.yml | 31 ++++++++-- .../templates/neutron_ha_tool.sh.j2 | 61 +++++++++++++++++++ 2 files changed, 87 insertions(+), 5 deletions(-) create mode 100644 rpc_deployment/roles/neutron_l3_ha/templates/neutron_ha_tool.sh.j2 diff --git a/rpc_deployment/roles/neutron_l3_ha/tasks/main.yml b/rpc_deployment/roles/neutron_l3_ha/tasks/main.yml index 9c0062f140..a883a716d5 100644 --- a/rpc_deployment/roles/neutron_l3_ha/tasks/main.yml +++ b/rpc_deployment/roles/neutron_l3_ha/tasks/main.yml @@ -26,17 +26,38 @@ name: "{{ inventory_hostname }}" register: hashed_name +# These are used in the Neutron HA Cron job script +- name: Creating Job Facts + set_fact: + do_job: ". /root/openrc && /opt/neutron-ha-tool.py --l3-agent-migrate" + sleep_time: "{{ hashed_name.int_value }}" + +- name: "Drop Neutron HA job script" + template: + src: "neutron_ha_tool.sh.j2" + dest: "/opt/neutron-ha-tool.sh" + owner: "root" + group: "root" + mode: "0755" + - name: Create Neutron HA cron: - name: "{{ item.name }}" + name: "neutron-ha-tool" minute: "*/1" day: "*" hour: "*" month: "*" state: present - job: "sleep {{ hashed_name.int_value }} && . /root/openrc && /opt/neutron-ha-tool.py {{ item.command }}" + job: "/opt/neutron-ha-tool.sh" user: root - cron_file: "{{ item.name }}" + cron_file: "neutron-ha-tool" + +# This should be revised in Kilo +# kilo_revision: true +- name: Remove old cronjobs if found. + file: + path: "{{ item }}" + state: "absent" with_items: - - { name: "l3_agent_migrate", command: "--l3-agent-migrate" } - - { name: "replicate_dhcp", command: "--replicate-dhcp" } + - /etc/cron.d/replicate_dhcp + - /etc/cron.d/l3_agent_migrate diff --git a/rpc_deployment/roles/neutron_l3_ha/templates/neutron_ha_tool.sh.j2 b/rpc_deployment/roles/neutron_l3_ha/templates/neutron_ha_tool.sh.j2 new file mode 100644 index 0000000000..c8d5c68bbe --- /dev/null +++ b/rpc_deployment/roles/neutron_l3_ha/templates/neutron_ha_tool.sh.j2 @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +# Copyright 2014, Rackspace US, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +LOCKFILE="/var/run/neutron_ha_tool.lock" + +# Trap any errors that might happen in executing the script +trap my_trap_handler ERR + +function my_trap_handler() { + kill_job +} + +function unlock() { + rm "${LOCKFILE}" +} + +function do_job() { + # Do a given job + logger "$({{ do_job }})" +} + +function cooldown() { + # Sleep for a given amount of time + sleep {{ sleep_time }} +} + +function kill_job() { + # If the job needs killing kill the pid and unlock the file. + PID="$(cat ${LOCKFILE})" + unlock + if [ -f "${LOCKFILE}" ]; then + kill -9 "${PID}" + fi +} + +if [ ! -f "${LOCKFILE}" ]; then + echo $$ | tee "${LOCKFILE}" + do_job + cooldown + unlock +else + if [ "$(find ${LOCKFILE} -mmin +15)" ]; then + logger "Stale pid found for ${LOCKFILE}. Killing any left over processes and unlocking" + kill_job + fi +fi