From 106ce3267fd1471cee184f697f7cb19c695ab36c Mon Sep 17 00:00:00 2001 From: James Slagle Date: Thu, 12 Dec 2019 18:10:09 -0500 Subject: [PATCH] Use async tasks for long running common tasks Use async with poll: 0 for the long running common tasks including: - puppet host configuration - container-puppet generate config - container starting - container-puppet bootstrap tasks Executing the tasks in this manner and then polling for the results causes all of the tasks to be started in parallel across all the nodes, regardless of the number of ansible forks configured. The behavior will be that ansible will start the task on the first count of nodes that matches the configured forks value. Since poll:0, ansible will immediately move on to the next batch, etc. Effectively, all the tasks are started in parallel just as quickly as ansible can start them. The polling tasks (async_status) will then execute in parallel up to the configured number of forks. As tasks start to finish, ansible moves on to checking the status of the next batch (again, up to the configured number of forks). Since most nodes will configure around the same time, the polling tasks finish roughly at the same time (except for differences in roles, such as controllers taking much longer). This behavior results in a signifcant performance improvement at scale, or when deploying any number of nodes greater than the configured forks value. Instead of waiting for the first batch to fully complete, all the nodes are started in parallel. For example, if puppet host configuration usually takes 5 minutes per node, and there are 100 nodes, with 25 forks, previously this would have taken 20 minutes. With this patch, it would now take closer to 5 minutes for all 100 nodes, plus some overhead for polling. This more closely matches the behavior previously used with Heat, when all the nodes were operating in "pull" mode in parallel. Change-Id: I0c651d127cd2bb179f7592a0519a5fd5064faeb3 --- common/deploy-steps-tasks.yaml | 95 +++++++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 24 deletions(-) diff --git a/common/deploy-steps-tasks.yaml b/common/deploy-steps-tasks.yaml index 82c819dc99..9f7a239339 100644 --- a/common/deploy-steps-tasks.yaml +++ b/common/deploy-steps-tasks.yaml @@ -17,6 +17,8 @@ - host_config - name: Run puppet host configuration for step {{ step }} + async: 3600 + poll: 0 when: enable_puppet|bool shell: >- set -o pipefail; @@ -29,21 +31,32 @@ {{ ansible_check_mode | bool | ternary('--hiera_config /etc/puppet/check-mode/hiera.yaml', '') }} /var/lib/tripleo-config/{{ ansible_check_mode | bool | ternary('check-mode/', '') }}puppet_step_config.pp 2>&1 | logger -s -t puppet-user - changed_when: outputs.rc == 2 - register: outputs - failed_when: false + register: puppet_host_async_result no_log: true tags: - host_config check_mode: no + - name: Wait for puppet host configuration to finish + async_status: + jid: "{{ puppet_host_async_result.ansible_job_id }}" + register: puppet_host_outputs + until: puppet_host_outputs.finished + retries: 1200 + delay: 3 + failed_when: + - (not puppet_host_outputs.finished) or (puppet_host_outputs.rc is defined and puppet_host_outputs.rc not in [0, 2]) + tags: + - host_config + - name: "Debug output for task: Run puppet host configuration for step {{ step }}" debug: - var: outputs.stdout_lines | default([]) | union(outputs.stderr_lines | default([])) + var: puppet_host_outputs.stdout_lines | default([]) | union(puppet_host_outputs.stderr_lines | default([])) when: - enable_puppet | bool - - outputs.rc is defined - failed_when: outputs.rc not in [0, 2] + - puppet_host_outputs.rc is defined + changed_when: puppet_host_outputs.rc == 2 + failed_when: puppet_host_outputs.rc not in [0, 2] tags: - host_config ignore_errors: "{{ ansible_check_mode|bool }}" @@ -66,6 +79,8 @@ ###################################### - name: Run container-puppet tasks (generate config) during step {{ step }} + async: 3600 + poll: 0 shell: "{{ python_cmd }} /var/lib/container-puppet/container-puppet.py" environment: NET_HOST: 'true' @@ -81,19 +96,27 @@ CONTAINER_HEALTHCHECK_DISABLED: "{{ container_healthcheck_disabled }}" SHORT_HOSTNAME: "{{ ansible_hostname }}" when: step|int == 1 - changed_when: false check_mode: no - register: outputs - failed_when: false - no_log: true + register: generate_config_async_result + tags: + - container_config + + - name: Wait for container-puppet tasks (generate config) to finish + async_status: + jid: "{{ generate_config_async_result.ansible_job_id }}" + register: generate_config_outputs + until: generate_config_outputs.finished + retries: 1200 + delay: 3 + when: step|int == 1 tags: - container_config - name: "Debug output for task: Run container-puppet tasks (generate config) during step {{ step }}" debug: - var: outputs.stdout_lines | default([]) | union(outputs.stderr_lines | default([])) - when: outputs.rc is defined - failed_when: outputs.rc != 0 + var: generate_config_outputs.stdout_lines | default([]) | union(generate_config_outputs.stderr_lines | default([])) + when: generate_config_outputs.rc is defined + failed_when: generate_config_outputs.rc != 0 tags: - container_config ignore_errors: "{{ ansible_check_mode|bool }}" @@ -135,6 +158,8 @@ when: enable_paunch|default(true) block: - name: Start containers for step {{ step }} using paunch + async: 3600 + poll: 0 environment: TRIPLEO_MINOR_UPDATE: '{{ tripleo_minor_update | default(false) }}' paunch: @@ -146,16 +171,25 @@ healthcheck_disabled: "{{ container_healthcheck_disabled | bool }}" managed_by: "tripleo-{{ tripleo_role_name }}" debug: "{{ enable_debug | bool }}" - register: outputs - failed_when: false + register: start_containers_async_result + tags: + - container_startup_configs + + - name: Wait for containers to start for step {{ step }} using paunch + async_status: + jid: "{{ start_containers_async_result.ansible_job_id }}" + register: start_containers_outputs + until: start_containers_outputs.finished + retries: 1200 + delay: 3 tags: - container_startup_configs - name: "Debug output for task: Start containers for step {{ step }}" debug: - var: outputs.stdout_lines | default([]) | union(outputs.stderr_lines | default([])) - when: outputs.rc is defined - failed_when: outputs.rc != 0 + var: start_containers_outputs.stdout_lines | default([]) | union(start_containers_outputs.stderr_lines | default([])) + when: start_containers_outputs.rc is defined + failed_when: start_containers_outputs.rc != 0 tags: - container_startup_configs @@ -213,6 +247,8 @@ when: host_container_puppet_tasks is defined - name: Run container-puppet tasks (bootstrap tasks) for step {{ step }} + async: 3600 + poll: 0 shell: "{{ python_cmd }} /var/lib/container-puppet/container-puppet.py" environment: CONFIG: /var/lib/container-puppet/{{ ansible_check_mode | bool | ternary('check-mode/', '') }}container-puppet-tasks{{ step }}.json @@ -226,17 +262,28 @@ SHORT_HOSTNAME: "{{ ansible_hostname }}" PROCESS_COUNT: "{{ docker_puppet_process_count }}" when: host_container_puppet_tasks is defined - changed_when: false - register: outputs - failed_when: false + register: bootstrap_tasks_async_result no_log: true tags: - container_config_tasks + - name: Wait for container-puppet tasks (bootstrap tasks) for step {{ step }} to finish + async_status: + jid: "{{ bootstrap_tasks_async_result.ansible_job_id }}" + register: bootstrap_tasks_outputs + until: bootstrap_tasks_outputs.finished + retries: 1200 + delay: 3 + when: host_container_puppet_tasks is defined + tags: + - container_config_tasks + - name: "Debug output for task: Run container-puppet tasks (bootstrap tasks) for step {{ step }}" debug: - var: outputs.stdout_lines | default([]) | union(outputs.stderr_lines | default([])) - when: outputs.rc is defined - failed_when: outputs.rc != 0 + var: bootstrap_tasks_outputs.stdout_lines | default([]) | union(bootstrap_tasks_outputs.stderr_lines | default([])) + when: + - host_container_puppet_tasks is defined + - bootstrap_tasks_outputs.rc is defined + failed_when: bootstrap_tasks_outputs.rc != 0 tags: - container_config_tasks