Remove the "max_fail_percentage" option

This option can cause silent failures which are confusing and hard to
track down. While the intention of this was to allow large scale
deployments to succeed in cases where a single node fails due to
transiant issues it has produced more problems in terms of confusion
that it solves. This change removes the option from all production
playbooks.

Change-Id: I1dcbbf5bc8cc66f11dd8ddc22d2a177c5c0f31f1
Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
(cherry picked from commit c2743f5cca)
Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
This commit is contained in:
Kevin Carter 2018-02-22 18:00:43 -06:00
parent 5dfcb62f77
commit 2cffc500a5
No known key found for this signature in database
GPG Key ID: 9443251A787B9FB3
29 changed files with 21 additions and 37 deletions

View File

@ -15,7 +15,6 @@
- name: Install ceph mons - name: Install ceph mons
hosts: ceph-mon hosts: ceph-mon
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-log-dir-setup.yml - include: common-tasks/os-log-dir-setup.yml
@ -132,7 +131,6 @@
- name: Install ceph osds - name: Install ceph osds
hosts: ceph-osd hosts: ceph-osd
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-log-dir-setup.yml - include: common-tasks/os-log-dir-setup.yml

View File

@ -16,7 +16,6 @@
- name: Install etcd server cluster - name: Install etcd server cluster
hosts: etcd_all hosts: etcd_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml

View File

@ -17,7 +17,6 @@
hosts: galera_all hosts: galera_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
serial: 1 serial: 1
max_fail_percentage: 0
user: root user: root
tasks: tasks:
- include: common-tasks/os-log-dir-setup.yml - include: common-tasks/os-log-dir-setup.yml
@ -41,7 +40,6 @@
hosts: galera_all hosts: galera_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
serial: 1 serial: 1
max_fail_percentage: 20
user: root user: root
roles: roles:
- role: "galera_server" - role: "galera_server"

View File

@ -39,7 +39,6 @@
- name: Install haproxy - name: Install haproxy
hosts: haproxy hosts: haproxy
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- name: Remove legacy haproxy configuration files - name: Remove legacy haproxy configuration files

View File

@ -20,7 +20,6 @@
- name: Create container(s) - name: Create container(s)
hosts: "{{ container_group|default('all_containers') }}" hosts: "{{ container_group|default('all_containers') }}"
gather_facts: false gather_facts: false
max_fail_percentage: 20
user: root user: root
roles: roles:
- role: "lxc_container_create" - role: "lxc_container_create"

View File

@ -16,7 +16,6 @@
- name: Destroy lxc containers - name: Destroy lxc containers
hosts: "{{ container_group|default('all_containers') }}" hosts: "{{ container_group|default('all_containers') }}"
gather_facts: false gather_facts: false
max_fail_percentage: 20
user: root user: root
tasks: tasks:
- name: Destroy a container - name: Destroy a container

View File

@ -16,7 +16,6 @@
- name: Basic lxc host setup - name: Basic lxc host setup
hosts: "{{ lxc_host_group | default('lxc_hosts')}}" hosts: "{{ lxc_host_group | default('lxc_hosts')}}"
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/set-upper-constraints.yml - include: common-tasks/set-upper-constraints.yml

View File

@ -16,7 +16,6 @@
- name: Install memcached - name: Install memcached
hosts: memcached hosts: memcached
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml

View File

@ -39,7 +39,6 @@
- name: Basic host setup - name: Basic host setup
hosts: "{{ openstack_host_group|default('hosts') }}" hosts: "{{ openstack_host_group|default('hosts') }}"
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- name: Check for a supported Operating System - name: Check for a supported Operating System

View File

@ -16,7 +16,6 @@
- name: Install the aodh components - name: Install the aodh components
hosts: aodh_all hosts: aodh_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml

View File

@ -16,7 +16,6 @@
- name: Installation and setup of barbican - name: Installation and setup of barbican
hosts: barbican_all hosts: barbican_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml

View File

@ -16,7 +16,6 @@
- name: Install the ceilometer components - name: Install the ceilometer components
hosts: ceilometer_all hosts: ceilometer_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml

View File

@ -140,7 +140,6 @@
hosts: cinder_backup,cinder_volume,cinder_scheduler hosts: cinder_backup,cinder_volume,cinder_scheduler
gather_facts: no gather_facts: no
serial: "{{ cinder_backend_serial | default(['1', '100%']) }}" serial: "{{ cinder_backend_serial | default(['1', '100%']) }}"
max_fail_percentage: 20
user: root user: root
environment: "{{ deployment_environment_variables | default({}) }}" environment: "{{ deployment_environment_variables | default({}) }}"
tags: tags:
@ -166,7 +165,6 @@
hosts: cinder_api hosts: cinder_api
gather_facts: no gather_facts: no
serial: "{{ cinder_api_serial | default(['1','100%']) }}" serial: "{{ cinder_api_serial | default(['1','100%']) }}"
max_fail_percentage: 20
user: root user: root
environment: "{{ deployment_environment_variables | default({}) }}" environment: "{{ deployment_environment_variables | default({}) }}"
tags: tags:

View File

@ -81,7 +81,6 @@
- name: Refresh local facts after all software changes are made - name: Refresh local facts after all software changes are made
hosts: glance_all hosts: glance_all
gather_facts: no gather_facts: no
max_fail_percentage: 20
user: root user: root
environment: "{{ deployment_environment_variables | default({}) }}" environment: "{{ deployment_environment_variables | default({}) }}"
tags: tags:

View File

@ -15,7 +15,6 @@
- name: Install Gnocchi components - name: Install Gnocchi components
hosts: gnocchi_all hosts: gnocchi_all
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml

View File

@ -16,7 +16,6 @@
- name: Install heat server - name: Install heat server
hosts: heat_all hosts: heat_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml

View File

@ -16,7 +16,6 @@
- name: Install horizon server - name: Install horizon server
hosts: horizon_all hosts: horizon_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml

View File

@ -62,7 +62,6 @@
hosts: keystone_all hosts: keystone_all
serial: "{{ keystone_serial | default(['1', '100%']) }}" serial: "{{ keystone_serial | default(['1', '100%']) }}"
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
environment: "{{ deployment_environment_variables | default({}) }}" environment: "{{ deployment_environment_variables | default({}) }}"
tags: tags:
@ -149,7 +148,6 @@
- name: Finalise data migrations if required - name: Finalise data migrations if required
hosts: keystone_all hosts: keystone_all
gather_facts: no gather_facts: no
max_fail_percentage: 20
user: root user: root
environment: "{{ deployment_environment_variables | default({}) }}" environment: "{{ deployment_environment_variables | default({}) }}"
tags: tags:

View File

@ -18,7 +18,6 @@
- name: Install magnum server - name: Install magnum server
hosts: magnum_all hosts: magnum_all
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml

View File

@ -134,7 +134,6 @@
- name: Refresh local facts after all software changes are made - name: Refresh local facts after all software changes are made
hosts: nova_all hosts: nova_all
gather_facts: no gather_facts: no
max_fail_percentage: 20
user: root user: root
environment: "{{ deployment_environment_variables | default({}) }}" environment: "{{ deployment_environment_variables | default({}) }}"
tags: tags:
@ -180,7 +179,6 @@
hosts: "nova_all:!nova_api_placement:!nova_console" hosts: "nova_all:!nova_api_placement:!nova_console"
gather_facts: no gather_facts: no
serial: "{{ nova_serial | default('100%') }}" serial: "{{ nova_serial | default('100%') }}"
max_fail_percentage: 20
user: root user: root
environment: "{{ deployment_environment_variables | default({}) }}" environment: "{{ deployment_environment_variables | default({}) }}"
tags: tags:
@ -205,7 +203,6 @@
hosts: "nova_api_placement:nova_console" hosts: "nova_api_placement:nova_console"
gather_facts: no gather_facts: no
serial: "{{ nova_api_serial | default(['1', '100%']) }}" serial: "{{ nova_api_serial | default(['1', '100%']) }}"
max_fail_percentage: 20
user: root user: root
environment: "{{ deployment_environment_variables | default({}) }}" environment: "{{ deployment_environment_variables | default({}) }}"
tags: tags:

View File

@ -16,7 +16,6 @@
- name: Install octavia server - name: Install octavia server
hosts: octavia_all hosts: octavia_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml

View File

@ -16,7 +16,6 @@
- name: Installation and setup of Swift - name: Installation and setup of Swift
hosts: swift_all:swift_remote_all hosts: swift_all:swift_remote_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml
@ -59,7 +58,6 @@
- name: Installation and setup of Swift - name: Installation and setup of Swift
hosts: swift_all hosts: swift_all
max_fail_percentage: 20
user: root user: root
roles: roles:
- role: "rsyslog_client" - role: "rsyslog_client"

View File

@ -18,7 +18,6 @@
- name: Synchronisation of swift ring and ssh keys - name: Synchronisation of swift ring and ssh keys
hosts: swift_all:swift_remote_all hosts: swift_all:swift_remote_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
roles: roles:
- role: "os_swift" - role: "os_swift"

View File

@ -16,7 +16,6 @@
- name: Installation and setup of Tempest - name: Installation and setup of Tempest
hosts: utility_all[0] hosts: utility_all[0]
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
roles: roles:
- role: "os_tempest" - role: "os_tempest"

View File

@ -17,7 +17,6 @@
hosts: "{{ rabbitmq_host_group | default('rabbitmq_all') }}" hosts: "{{ rabbitmq_host_group | default('rabbitmq_all') }}"
serial: 1 serial: 1
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 0
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml
@ -38,7 +37,6 @@
# http://www.rabbitmq.com/clustering.html#upgrading # http://www.rabbitmq.com/clustering.html#upgrading
- name: Stop RabbitMQ nodes that are not the upgrader - name: Stop RabbitMQ nodes that are not the upgrader
hosts: "{{ rabbitmq_host_group | default('rabbitmq_all') }}[1:]" hosts: "{{ rabbitmq_host_group | default('rabbitmq_all') }}[1:]"
max_fail_percentage: 0
user: root user: root
tasks: tasks:
- name: "Stop RabbitMQ" - name: "Stop RabbitMQ"
@ -50,7 +48,6 @@
- name: Install rabbitmq server - name: Install rabbitmq server
hosts: "{{ rabbitmq_host_group | default('rabbitmq_all') }}" hosts: "{{ rabbitmq_host_group | default('rabbitmq_all') }}"
serial: 20% serial: 20%
max_fail_percentage: 20
user: root user: root
roles: roles:
- role: "rabbitmq_server" - role: "rabbitmq_server"

View File

@ -16,7 +16,6 @@
- name: Setup repo servers - name: Setup repo servers
hosts: repo_all hosts: repo_all
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:

View File

@ -16,7 +16,6 @@
- name: Install rsyslog - name: Install rsyslog
hosts: rsyslog hosts: rsyslog
gather_facts: "{{ osa_gather_facts | default(True) }}" gather_facts: "{{ osa_gather_facts | default(True) }}"
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:
- include: common-tasks/os-lxc-container-setup.yml - include: common-tasks/os-lxc-container-setup.yml

View File

@ -15,7 +15,6 @@
- name: Setup the utility location(s) - name: Setup the utility location(s)
hosts: utility_all hosts: utility_all
max_fail_percentage: 20
user: root user: root
pre_tasks: pre_tasks:

View File

@ -0,0 +1,21 @@
---
other:
- The `max_fail_percentage` playbook option has been used with the default
playbooks since the first release of the playbooks back in **Icehouse**.
While the intention was to allow large-scale deployments to succeed in
cases where a single node fails due to transient issues, this option has
produced more problems that it solves. If a failure occurs that is transient
in nature but is under the set failure percentage the playbook will report a
success, which can cause silent failures depending on where the failure
happened. If a deployer finds themselves in this situation the problems are
are then compounded because the tools will report there are no known issues.
To ensure deployers have the best deployment experience and the most
accurate information a change has been made to remove the
`max_fail_percentage` option from all of the default playbooks. The removal
of this option has the side effect of requiring the deploy to skip specific
hosts should one need to be omitted from a run, but has the benefit of
eliminating silent, hard to track down, failures. To skip a failing host
for a given playbook run use the `--limit '!$HOSTNAME'` CLI switch for the
specific run. Once the issues have been resolved for the failing host rerun
the specific playbook without the `--limit` option to ensure everything
is in sync.