# This relies on flock -n /var/run/zuul_reboot.lock to ensure # we don't run multiple copies of this playbook concurrently. # TODO: stop pulling in the hourly job if we do this - name: "Ensure we are going to restart/reboot on the same image" import_playbook: zuul_pull.yaml # TODO Do we want to force disabled servers to be rebooted too? - hosts: "zuul-executor:!disabled" name: "Reboot zuul-executors gracefully one at a time" serial: 1 tasks: - name: Gracefully stop the executor include_role: name: zuul-executor tasks_from: graceful - name: Upgrade executor server packages apt: update_cache: yes upgrade: yes register: apt_action # 20 minute wait for unattended-upgrades to complete delay: 30 retries: 40 until: apt_action is success or 'Failed to lock apt for exclusive operation' not in apt_action.msg - name: Reboot the executor server reboot: - name: Start the executor include_role: name: zuul-executor tasks_from: start - hosts: "zuul-merger:!disabled" name: "Reboot zuul-mergers gracefully one at a time" serial: 1 tasks: - name: Gracefully stop the merger include_role: name: zuul-merger tasks_from: graceful - name: Upgrade merger server packages apt: update_cache: yes upgrade: yes register: apt_action # 20 minute wait for unattended-upgrades to complete delay: 30 retries: 40 until: apt_action is success or 'Failed to lock apt for exclusive operation' not in apt_action.msg - name: Reboot the merger server reboot: - name: Start the merger include_role: name: zuul-merger tasks_from: start # TODO should we do both schedulers with reboots then do the webs without # reboots? - hosts: "zuul-scheduler:!disabled" name: "Reboot zuul-schedulers gracefully one at a time" serial: 1 tasks: - name: Stop the scheduler process include_role: name: zuul-scheduler tasks_from: stop - name: Stop the web processes include_role: name: zuul-web tasks_from: stop - name: Upgrade scheduler server packages apt: update_cache: yes upgrade: yes register: apt_action # 20 minute wait for unattended-upgrades to complete delay: 30 retries: 40 until: apt_action is success or 'Failed to lock apt for exclusive operation' not in apt_action.msg - name: Reboot the scheduler server reboot: - name: Start the scheduler process include_role: name: zuul-scheduler tasks_from: start - name: Start the web processes include_role: name: zuul-web tasks_from: start - name: Wait for scheduler to be running uri: url: https://zuul.opendev.org/api/components method: GET return_content: yes register: components # 3 hours retries: 360 delay: 30 until: "{{ components.status == 200 and components.content | from_json | json_query(scheduler_query) | length == 1 and components.content | from_json | json_query(scheduler_query) | first == 'running' }}" vars: scheduler_query: "scheduler[?hostname=='{{ inventory_hostname }}'].state" - name: Wait for web to be running uri: url: https://zuul.opendev.org/api/components method: GET return_content: yes register: components # 3 hours retries: 360 delay: 30 until: "{{ components.status == 200 and components.content | from_json | json_query(web_query) | length == 1 and components.content | from_json | json_query(web_query) | first == 'running' }}" vars: web_query: "web[?hostname=='{{ inventory_hostname }}'].state" - name: Wait for fingergw to be running uri: url: https://zuul.opendev.org/api/components method: GET return_content: yes register: components # 45 minutes retries: 180 delay: 15 until: "{{ components.status == 200 and components.content | from_json | json_query(finger_query) | length == 1 and components.content | from_json | json_query(finger_query) | first == 'running' }}" vars: finger_query: "fingergw[?hostname=='{{ inventory_hostname }}'].state"