Merge "Add statsd metrics for ansible runs"

This commit is contained in:
Zuul 2018-09-13 00:14:17 +00:00 committed by Gerrit Code Review
commit 93d62f8d71
3 changed files with 58 additions and 1 deletions

View File

@ -1,4 +1,7 @@
iptables_extra_allowed_hosts:
- hostname: bridge.openstack.org
port: 8125
protocol: udp
- hostname: git.openstack.org
port: 8125
protocol: udp

View File

@ -13,7 +13,7 @@
cron:
name: run_all.sh
state: present
job: 'flock -n /var/run/ansible/run_all.lock bash /opt/system-config/run_all.sh >> /var/log/ansible/run_all_cron.log 2>&1'
job: 'flock -n /var/run/ansible/run_all.lock bash /opt/system-config/run_all.sh -c >> /var/log/ansible/run_all_cron.log 2>&1'
minute: "{{ update_cron_interval.minute }}"
hour: "{{ update_cron_interval.hour }}"
day: "{{ update_cron_interval.day }}"

View File

@ -22,6 +22,41 @@ set -e
SYSTEM_CONFIG=/opt/system-config
ANSIBLE_PLAYBOOKS=$SYSTEM_CONFIG/playbooks
# We only send stats if running under cron
UNDER_CRON=0
while getopts ":c" arg; do
case $arg in
c)
UNDER_CRON=1
;;
esac
done
GLOBAL_START_TIME=$(date '+%s')
# Send a timer stat to statsd
# send_timer metric [start_time]
# * uses timer metric bridge.ansible.run_all.<$1>
# * time will be taken from last call of start_timer, or $2 if set
function send_timer {
# Only send stats under cron conditions
if [[ ${UNDER_CRON} != 1 ]]; then
return
fi
local current=$(date '+%s')
local name=$1
local start=${2-$_START_TIME}
local elapsed_ms=$(( (current - start) * 1000 ))
echo "bridge.ansible.run_all.${name}:${elapsed_ms}|ms" | nc -w 1 -u graphite.openstack.org 8125
}
# See send_timer
function start_timer {
_START_TIME=$(date '+%s')
}
echo "--- begin run @ $(date -Is) ---"
# It's possible for connectivity to a server or manifest application to break
@ -33,24 +68,43 @@ set +e
# stuck if they are oomkilled
# Clone system-config and install modules and roles
start_timer
timeout -k 2m 120m ansible-playbook ${ANSIBLE_PLAYBOOKS}/update-system-config.yaml
send_timer update_system_config
# Update the code on bridge
start_timer
timeout -k 2m 120m ansible-playbook ${ANSIBLE_PLAYBOOKS}/bridge.yaml
send_timer bridge
# Run the base playbook everywhere
start_timer
timeout -k 2m 120m ansible-playbook -f 50 ${ANSIBLE_PLAYBOOKS}/base.yaml
send_timer base
# Update the puppet version
start_timer
timeout -k 2m 120m ansible-playbook -f 50 ${ANSIBLE_PLAYBOOKS}/update_puppet_version.yaml
send_timer update_puppet_version
# Run the git/gerrit/zuul sequence, since it's important that they all work together
start_timer
timeout -k 2m 120m ansible-playbook -f 50 ${ANSIBLE_PLAYBOOKS}/remote_puppet_git.yaml
send_timer git
# Run AFS changes separately so we can make sure to only do one at a time
# (turns out quorum is nice to have)
start_timer
timeout -k 2m 120m ansible-playbook -f 1 ${ANSIBLE_PLAYBOOKS}/remote_puppet_afs.yaml
send_timer afs
# Run everything else. We do not care if the other things worked
start_timer
timeout -k 2m 120m ansible-playbook -f 50 ${ANSIBLE_PLAYBOOKS}/remote_puppet_else.yaml
send_timer else
# Send the combined time for everything
send_timer total $GLOBAL_START_TIME
echo "--- end run @ $(date -Is) ---"
echo