Fix fact gathering with --limit

Prior to this change, when the --limit argument is used, each host in the
limit gathers facts for every other host. This is clearly unnecessary, and
can result in up to (N-1)^2 fact gathers.

This change gathers facts for each host only once. Hosts that are not in
the limit are divided between those that are in the limit, and facts are
gathered via delegation.

This change also factors out the fact gathering logic into a separate
playbook that is imported where necessary.

Change-Id: I923df5af41a7f1b7b0142d0da185a9a0979be543
This commit is contained in:
Mark Goddard 2018-09-17 16:01:52 +01:00
parent 5d8403bdc8
commit 56b4352f9e
3 changed files with 46 additions and 63 deletions

44
ansible/gather-facts.yml Normal file
View File

@ -0,0 +1,44 @@
---
# NOTE(awiddersheim): Gather facts for all hosts as a
# first step since several plays below require them when
# building their configurations. The below 'gather_facts'
# set to 'false' is a bit confusing but this is to avoid
# Ansible gathering facts twice.
- name: Gather facts for all hosts
hosts: all
serial: '{{ kolla_serial|default("0") }}'
gather_facts: false
tasks:
- name: Gather facts
setup:
- name: Group hosts to determine when using --limit
group_by:
key: "all_using_limit_{{ (ansible_play_batch | length) != (groups['all'] | length) }}"
tags: always
# NOTE(pbourke): This case covers deploying subsets of hosts using --limit. The
# limit arg will cause the first play to gather facts only about that node,
# meaning facts such as IP addresses for rabbitmq nodes etc. will be undefined
# in the case of adding a single compute node.
# NOTE(mgoddard): Divide all hosts to be queried between the hosts selected via
# the limit.
- name: Gather facts for all hosts (if using --limit)
hosts: all_using_limit_True
serial: '{{ kolla_serial|default("0") }}'
gather_facts: false
vars:
batch_index: "{{ ansible_play_batch.index(inventory_hostname) }}"
batch_size: "{{ ansible_play_batch | length }}"
# Use a python list slice to divide the group up.
# Syntax: [<start index>:<end index>:<step size>]
delegate_hosts: "{{ groups['all'][batch_index | int::batch_size | int] }}"
tasks:
- name: Gather facts
setup:
delegate_facts: True
delegate_to: "{{ item }}"
with_items: "{{ delegate_hosts }}"
# We gathered facts for all hosts in the batch during the first play.
when: item not in ansible_play_batch
tags: always

View File

@ -1,35 +1,5 @@
---
# NOTE(awiddersheim): Gather facts for all hosts as a
# first step since several plays below require them when
# building their configurations. The below 'gather_facts'
# set to 'false' is a bit confusing but this is to avoid
# Ansible gathering facts twice.
- name: Gather facts for all hosts
hosts: all
serial: '{{ kolla_serial|default("0") }}'
gather_facts: false
tasks:
- setup:
tags: always
# NOTE(pbourke): This case covers deploying subsets of hosts using --limit. The
# limit arg will cause the first play to gather facts only about that node,
# meaning facts such as IP addresses for rabbitmq nodes etc. will be undefined
# in the case of adding a single compute node.
# We don't want to add the delegate parameters to the above play as it will
# result in ((num_nodes-1)^2) number of SSHs when running for all nodes
# which can be very inefficient.
- name: Gather facts for all hosts (if using --limit)
hosts: all
serial: '{{ kolla_serial|default("0") }}'
gather_facts: false
tasks:
- setup:
delegate_facts: True
delegate_to: "{{ item }}"
with_items: "{{ groups['all'] }}"
when:
- (ansible_play_batch | length) != (groups['all'] | length)
- import_playbook: gather-facts.yml
- name: Apply role baremetal
hosts: baremetal

View File

@ -1,36 +1,5 @@
---
# NOTE(awiddersheim): Gather facts for all hosts as a
# first step since several plays below require them when
# building their configurations. The below 'gather_facts'
# set to 'false' is a bit confusing but this is to avoid
# Ansible gathering facts twice.
- name: Gather facts for all hosts
hosts: all
serial: '{{ kolla_serial|default("0") }}'
gather_facts: false
tasks:
- setup:
tags: always
# NOTE(pbourke): This case covers deploying subsets of hosts using --limit. The
# limit arg will cause the first play to gather facts only about that node,
# meaning facts such as IP addresses for rabbitmq nodes etc. will be undefined
# in the case of adding a single compute node.
# We don't want to add the delegate parameters to the above play as it will
# result in ((num_nodes-1)^2) number of SSHs when running for all nodes
# which can be very inefficient.
- name: Gather facts for all hosts (if using --limit)
hosts: all
serial: '{{ kolla_serial|default("0") }}'
gather_facts: false
tasks:
- setup:
delegate_facts: True
delegate_to: "{{ item }}"
with_items: "{{ groups['all'] }}"
when:
- (ansible_play_batch | length) != (groups['all'] | length)
tags: always
- import_playbook: gather-facts.yml
# NOTE(mgoddard): In large environments, even tasks that are skipped can take a
# significant amount of time. This is an optimisation to prevent any tasks