tripleo-heat-templates/extraconfig/tasks/major_upgrade_pacemaker.yaml
Michele Baldessari dde12b075f Fix race during major-upgrade-pacemaker step
Currently when we call the major-upgrade step we do the following:
"""
...
if [[ -n $(is_bootstrap_node) ]]; then
    check_clean_cluster
fi
...
if [[ -n $(is_bootstrap_node) ]]; then
    migrate_full_to_ng_ha
fi
...
for service in $(services_to_migrate); do
    manage_systemd_service stop "${service%%-clone}"
    ...
done
"""

The problem with the above code is that it is open to the following race
condition:
1. Code gets run first on a non-bootstrap controller node so we start
stopping a bunch of services
2. Pacemaker notices will notice that services are down and will mark
the service as stopped
3. Code gets run on the bootstrap node (controller-0) and the
check_clean_cluster function will fail and exit
4. Eventually also the script on the non-bootstrap controller node will
timeout and exit because the cluster never shut down (it never actually
started the shutdown because we failed at 3)

Let's make sure we first only call the HA NG migration step as a
separate heat step. Only afterwards we start shutting down the systemd
services on all nodes.

We also need to move the STONITH_STATE variable into a file because it
is being used across two different scripts (1 and 2) and we need to
store that state.

Co-Authored-By: Athlan-Guyot Sofer <sathlang@redhat.com>

Closes-Bug: #1640407
Change-Id: Ifb9b9e633fcc77604cca2590071656f4b2275c60
2016-11-09 14:51:51 +01:00

222 lines
7.5 KiB
YAML

heat_template_version: 2016-10-14
description: 'Upgrade for Pacemaker deployments'
parameters:
servers:
type: json
input_values:
type: json
description: input values for the software deployments
UpgradeLevelNovaCompute:
type: string
description: Nova Compute upgrade level
default: ''
MySqlMajorUpgrade:
type: string
description: Can be auto,yes,no and influences if the major upgrade should do or detect an automatic mysql upgrade
constraints:
- allowed_values: ['auto', 'yes', 'no']
default: 'auto'
IgnoreCephUpgradeWarnings:
type: boolean
default: false
description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean
KeepSaharaServicesOnUpgrade:
type: boolean
default: true
description: Whether to keep Sahara services when upgrading controller nodes from mitaka to newton
resources:
# TODO(jistr): for Mitaka->Newton upgrades and further we can use
# map_merge with input_values instead of feeding params into scripts
# via str_replace on bash snippets
CephMonUpgradeConfig:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
list_join:
- ''
- - str_replace:
template: |
#!/bin/bash
ignore_ceph_upgrade_warnings='IGNORE_CEPH_UPGRADE_WARNINGS'
params:
IGNORE_CEPH_UPGRADE_WARNINGS: {get_param: IgnoreCephUpgradeWarnings}
- get_file: major_upgrade_ceph_mon.sh
CephMonUpgradeDeployment:
type: OS::Heat::SoftwareDeploymentGroup
properties:
servers: {get_param: [servers, Controller]}
config: {get_resource: CephMonUpgradeConfig}
input_values: {get_param: input_values}
update_policy:
batch_create:
max_batch_size: 1
rolling_update:
max_batch_size: 1
ControllerPacemakerUpgradeConfig_Step1:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
list_join:
- ''
- - str_replace:
template: |
#!/bin/bash
upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE'
params:
UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute}
- str_replace:
template: |
#!/bin/bash
mariadb_do_major_upgrade='MYSQL_MAJOR_UPGRADE'
params:
MYSQL_MAJOR_UPGRADE: {get_param: MySqlMajorUpgrade}
- get_file: pacemaker_common_functions.sh
- get_file: major_upgrade_check.sh
- get_file: major_upgrade_pacemaker_migrations.sh
- get_file: major_upgrade_controller_pacemaker_1.sh
ControllerPacemakerUpgradeDeployment_Step1:
type: OS::Heat::SoftwareDeploymentGroup
depends_on: CephMonUpgradeDeployment
properties:
servers: {get_param: [servers, Controller]}
config: {get_resource: ControllerPacemakerUpgradeConfig_Step1}
input_values: {get_param: input_values}
BlockStorageUpgradeConfig:
type: OS::Heat::SoftwareConfig
depends_on: ControllerPacemakerUpgradeDeployment_Step1
properties:
group: script
config: {get_file: major_upgrade_block_storage.sh}
BlockStorageUpgradeDeployment:
type: OS::Heat::SoftwareDeploymentGroup
properties:
servers: {get_param: [servers, BlockStorage]}
config: {get_resource: BlockStorageUpgradeConfig}
input_values: {get_param: input_values}
ControllerPacemakerUpgradeConfig_Step2:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
list_join:
- ''
- - str_replace:
template: |
#!/bin/bash
upgrade_level_nova_compute='UPGRADE_LEVEL_NOVA_COMPUTE'
params:
UPGRADE_LEVEL_NOVA_COMPUTE: {get_param: UpgradeLevelNovaCompute}
- str_replace:
template: |
#!/bin/bash
mariadb_do_major_upgrade='MYSQL_MAJOR_UPGRADE'
params:
MYSQL_MAJOR_UPGRADE: {get_param: MySqlMajorUpgrade}
- get_file: pacemaker_common_functions.sh
- get_file: major_upgrade_check.sh
- get_file: major_upgrade_pacemaker_migrations.sh
- get_file: major_upgrade_controller_pacemaker_2.sh
ControllerPacemakerUpgradeDeployment_Step2:
type: OS::Heat::SoftwareDeploymentGroup
depends_on: BlockStorageUpgradeDeployment
properties:
servers: {get_param: [servers, Controller]}
config: {get_resource: ControllerPacemakerUpgradeConfig_Step2}
input_values: {get_param: input_values}
ControllerPacemakerUpgradeConfig_Step3:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
list_join:
- ''
- - get_file: pacemaker_common_functions.sh
- get_file: major_upgrade_pacemaker_migrations.sh
- get_file: major_upgrade_controller_pacemaker_3.sh
ControllerPacemakerUpgradeDeployment_Step3:
type: OS::Heat::SoftwareDeploymentGroup
depends_on: ControllerPacemakerUpgradeDeployment_Step2
properties:
servers: {get_param: [servers, Controller]}
config: {get_resource: ControllerPacemakerUpgradeConfig_Step3}
input_values: {get_param: input_values}
ControllerPacemakerUpgradeConfig_Step4:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
list_join:
- ''
- - get_file: pacemaker_common_functions.sh
- get_file: major_upgrade_pacemaker_migrations.sh
- get_file: major_upgrade_controller_pacemaker_4.sh
ControllerPacemakerUpgradeDeployment_Step4:
type: OS::Heat::SoftwareDeploymentGroup
depends_on: ControllerPacemakerUpgradeDeployment_Step3
properties:
servers: {get_param: [servers, Controller]}
config: {get_resource: ControllerPacemakerUpgradeConfig_Step4}
input_values: {get_param: input_values}
ControllerPacemakerUpgradeConfig_Step5:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
list_join:
- ''
- - get_file: pacemaker_common_functions.sh
- get_file: major_upgrade_pacemaker_migrations.sh
- get_file: major_upgrade_controller_pacemaker_5.sh
ControllerPacemakerUpgradeDeployment_Step5:
type: OS::Heat::SoftwareDeploymentGroup
depends_on: ControllerPacemakerUpgradeDeployment_Step4
properties:
servers: {get_param: [servers, Controller]}
config: {get_resource: ControllerPacemakerUpgradeConfig_Step5}
input_values: {get_param: input_values}
ControllerPacemakerUpgradeConfig_Step6:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
list_join:
- ''
- - str_replace:
template: |
#!/bin/bash
keep_sahara_services_on_upgrade='KEEP_SAHARA_SERVICES_ON_UPGRADE'
params:
KEEP_SAHARA_SERVICES_ON_UPGRADE: {get_param: KeepSaharaServicesOnUpgrade}
- get_file: pacemaker_common_functions.sh
- get_file: major_upgrade_pacemaker_migrations.sh
- get_file: major_upgrade_controller_pacemaker_6.sh
ControllerPacemakerUpgradeDeployment_Step6:
type: OS::Heat::SoftwareDeploymentGroup
depends_on: ControllerPacemakerUpgradeDeployment_Step5
properties:
servers: {get_param: [servers, Controller]}
config: {get_resource: ControllerPacemakerUpgradeConfig_Step6}
input_values: {get_param: input_values}