f981042733
We were using /var/run/ansible/zuul_reboot.lock to flock around this cron job. Unfortauntely it seems /var/run/ansible does not exist so the flock command fails. Move the file to /var/run/zuul_reboot.lock to work around this. Note that we want to use /var/run since it is a tmpfs which means if the server unexpectedly reboots we'll automatically clear the lock. Change-Id: Ib0f4a434cbbf2152722493e80b5cc7a945c1f235
111 lines
3.6 KiB
YAML
111 lines
3.6 KiB
YAML
# This relies on flock -n /var/run/zuul_reboot.lock to ensure
|
|
# we don't run multiple copies of this playbook concurrently.
|
|
|
|
# TODO: stop pulling in the hourly job if we do this
|
|
- name: "Ensure we are going to restart/reboot on the same image"
|
|
import_playbook: zuul_pull.yaml
|
|
|
|
# TODO Do we want to force disabled servers to be rebooted too?
|
|
- hosts: "zuul-executor:!disabled"
|
|
name: "Reboot zuul-executors gracefully one at a time"
|
|
serial: 1
|
|
tasks:
|
|
- name: Gracefully stop the executor
|
|
include_role:
|
|
name: zuul-executor
|
|
tasks_from: graceful
|
|
- name: Upgrade executor server packages
|
|
apt:
|
|
update_cache: yes
|
|
upgrade: yes
|
|
- name: Reboot the executor server
|
|
reboot:
|
|
- name: Start the executor
|
|
include_role:
|
|
name: zuul-executor
|
|
tasks_from: start
|
|
|
|
- hosts: "zuul-merger:!disabled"
|
|
name: "Reboot zuul-mergers gracefully one at a time"
|
|
serial: 1
|
|
tasks:
|
|
- name: Gracefully stop the merger
|
|
include_role:
|
|
name: zuul-merger
|
|
tasks_from: graceful
|
|
- name: Upgrade merger server packages
|
|
apt:
|
|
update_cache: yes
|
|
upgrade: yes
|
|
- name: Reboot the merger server
|
|
reboot:
|
|
- name: Start the merger
|
|
include_role:
|
|
name: zuul-merger
|
|
tasks_from: start
|
|
|
|
# TODO should we do both schedulers with reboots then do the webs without
|
|
# reboots?
|
|
- hosts: "zuul-scheduler:!disabled"
|
|
name: "Reboot zuul-schedulers gracefully one at a time"
|
|
serial: 1
|
|
tasks:
|
|
- name: Stop the scheduler process
|
|
include_role:
|
|
name: zuul-scheduler
|
|
tasks_from: stop
|
|
- name: Stop the web processes
|
|
include_role:
|
|
name: zuul-web
|
|
tasks_from: stop
|
|
- name: Upgrade scheduler server packages
|
|
apt:
|
|
update_cache: yes
|
|
upgrade: yes
|
|
- name: Reboot the scheduler server
|
|
reboot:
|
|
- name: Start the scheduler process
|
|
include_role:
|
|
name: zuul-scheduler
|
|
tasks_from: start
|
|
- name: Start the web processes
|
|
include_role:
|
|
name: zuul-web
|
|
tasks_from: start
|
|
- name: Wait for scheduler to be running
|
|
uri:
|
|
url: https://zuul.opendev.org/api/components
|
|
method: GET
|
|
return_content: yes
|
|
register: components
|
|
# 3 hours
|
|
retries: 360
|
|
delay: 30
|
|
until: "{{ components.status == 200 and components.content | from_json | json_query(scheduler_query) | length == 1 and components.content | from_json | json_query(scheduler_query) | first == 'running' }}"
|
|
vars:
|
|
scheduler_query: "scheduler[?hostname=='{{ inventory_hostname }}'].state"
|
|
- name: Wait for web to be running
|
|
uri:
|
|
url: https://zuul.opendev.org/api/components
|
|
method: GET
|
|
return_content: yes
|
|
register: components
|
|
# 3 hours
|
|
retries: 360
|
|
delay: 30
|
|
until: "{{ components.status == 200 and components.content | from_json | json_query(web_query) | length == 1 and components.content | from_json | json_query(web_query) | first == 'running' }}"
|
|
vars:
|
|
web_query: "web[?hostname=='{{ inventory_hostname }}'].state"
|
|
- name: Wait for fingergw to be running
|
|
uri:
|
|
url: https://zuul.opendev.org/api/components
|
|
method: GET
|
|
return_content: yes
|
|
register: components
|
|
# 45 minutes
|
|
retries: 180
|
|
delay: 15
|
|
until: "{{ components.status == 200 and components.content | from_json | json_query(finger_query) | length == 1 and components.content | from_json | json_query(finger_query) | first == 'running' }}"
|
|
vars:
|
|
finger_query: "fingergw[?hostname=='{{ inventory_hostname }}'].state"
|