tripleo-heat-templates/deployment/pacemaker/pacemaker-baremetal-puppet.yaml
Jiri Stransky 56a35beb51 Idempotency for system_upgrade_prepare
Partial-Bug: #1831690
Change-Id: I9510fd00755cf33df105ef637d047d3cb87243ae
2019-06-24 12:32:37 +02:00

293 lines
11 KiB
YAML

heat_template_version: rocky
description: >
Pacemaker service configured with Puppet
parameters:
ServiceData:
default: {}
description: Dictionary packing service data
type: json
ServiceNetMap:
default: {}
description: Mapping of service_name -> network name. Typically set
via parameter_defaults in the resource registry. This
mapping overrides those in ServiceNetMapDefaults.
type: json
DefaultPasswords:
default: {}
type: json
RoleName:
default: ''
description: Role name on which the service is applied
type: string
RoleParameters:
default: {}
description: Parameters specific to the role
type: json
EndpointMap:
default: {}
description: Mapping of service endpoint -> protocol. Typically set
via parameter_defaults in the resource registry.
type: json
MonitoringSubscriptionPacemaker:
default: 'overcloud-pacemaker'
type: string
CorosyncIPv6:
default: false
description: Enable IPv6 in Corosync
type: boolean
EnableFencing:
default: false
description: Whether to enable fencing in Pacemaker or not.
type: boolean
PacemakerRemoteAuthkey:
type: string
description: The authkey for the pacemaker remote service.
hidden: true
PcsdPassword:
type: string
description: The password for the 'pcsd' user for pacemaker.
hidden: true
CorosyncSettleTries:
type: number
description: Number of tries for cluster settling. This has the
same default as the pacemaker puppet module. Override
to a smaller value when in need to replace a controller node.
default: 360
FencingConfig:
default: {}
description: |
Pacemaker fencing configuration. The JSON should have
the following structure:
{
"devices": [
{
"agent": "AGENT_NAME",
"host_mac": "HOST_MAC_ADDRESS",
"params": {"PARAM_NAME": "PARAM_VALUE"}
}
]
}
For instance:
{
"devices": [
{
"agent": "fence_xvm",
"host_mac": "52:54:00:aa:bb:cc",
"params": {
"multicast_address": "225.0.0.12",
"port": "baremetal_0",
"manage_fw": true,
"manage_key_file": true,
"key_file": "/etc/fence_xvm.key",
"key_file_password": "abcdef"
}
}
]
}
type: json
PacemakerLoggingSource:
type: json
default:
tag: system.pacemaker
path: /var/log/pacemaker.log,/var/log/cluster/corosync.log
format: >-
/^(?<time>[^ ]*\s*[^ ]* [^ ]*)
\[(?<pid>[^ ]*)\]
(?<host>[^ ]*)
(?<message>.*)$/
UpgradeLeappEnabled:
description: Use Leapp for operating system upgrade
type: boolean
default: true
outputs:
role_data:
description: Role data for the Pacemaker role.
value:
service_name: pacemaker
monitoring_subscription: {get_param: MonitoringSubscriptionPacemaker}
config_settings:
pacemaker::corosync::cluster_name: 'tripleo_cluster'
pacemaker::corosync::manage_fw: false
pacemaker::resource_defaults::defaults:
resource-stickiness: { value: INFINITY }
corosync_token_timeout: 10000
pacemaker::corosync::settle_tries: {get_param: CorosyncSettleTries}
pacemaker::resource::bundle::deep_compare: true
pacemaker::resource::ip::deep_compare: true
pacemaker::resource::ocf::deep_compare: true
tripleo::pacemaker::firewall_rules:
'130 pacemaker tcp':
proto: 'tcp'
dport:
- 2224
- 3121
- 21064
'131 pacemaker udp':
proto: 'udp'
dport: 5405
corosync_ipv6: {get_param: CorosyncIPv6}
tripleo::fencing::config: {get_param: FencingConfig}
enable_fencing: {get_param: EnableFencing}
hacluster_pwd:
yaql:
expression: $.data.passwords.where($ != '').first()
data:
passwords:
- {get_param: PcsdPassword}
- {get_param: [DefaultPasswords, pcsd_password]}
tripleo::profile::base::pacemaker::remote_authkey: {get_param: PacemakerRemoteAuthkey}
service_config_settings:
fluentd:
tripleo_fluentd_groups_pacemaker:
- haclient
tripleo_fluentd_sources_pacemaker:
- {get_param: PacemakerLoggingSource}
step_config: |
include ::tripleo::profile::base::pacemaker
upgrade_tasks:
- name: system_upgrade_prepare step 2
tags:
- never
- system_upgrade_prepare
when: step|int == 2
block:
- name: Check if pcs is present
stat:
path: /usr/sbin/pcs
register: pcs_stat
- name: Stop pacemaker cluster
pacemaker_cluster: state=offline
when: pcs_stat.stat.exists
- name: destroy pacemaker cluster
command: /usr/sbin/pcs cluster destroy
when: pcs_stat.stat.exists
- name: upgrade step 0
when: step|int == 0
vars:
upgrade_leapp_enabled: {get_param: UpgradeLeappEnabled}
block:
- name: Check pacemaker cluster running before upgrade
when: step|int == 0
tags: validation
pacemaker_cluster: state=online check_and_fail=true
async: 30
poll: 4
when: not upgrade_leapp_enabled|bool
- name: upgrade step 1
when:
- step|int == 1
block:
- name: set pacemaker upgrade node facts in a single-node environment
set_fact:
pacemaker_short_node_names_upgraded: "{{ pacemaker_short_node_names }}"
cacheable: no
when: groups['pacemaker'] | length == 1
- name: set pacemaker upgrade node facts from the limit option
set_fact:
pacemaker_short_node_names_upgraded: "{{ pacemaker_short_node_names_upgraded|default([]) + [item.split('.')[0]] }}"
cacheable: no
when: item.split('.')[0] in ansible_limit.split(',')
loop: "{{ pacemaker_short_node_names }}"
- debug:
msg: "Prepare pacemaker upgrade for {{ pacemaker_short_node_names_upgraded }}"
- fail:
msg: >
You can't upgrade pacemaker without staged
upgrade. You need to use the limit option in order
to do so.
when: >-
pacemaker_short_node_names_upgraded is not defined or
pacemaker_short_node_names_upgraded | length == 0
- name: set pacemaker node ips fact from the names fact
set_fact:
# Generate matching IPs for the names, e.g. for these varaible values:
# pacemaker_node_ips: [ "1", "2", "3" ]
# pacemaker_short_node_names: [ "a", "b", "c" ]
# pacemaker_short_node_names_override: [ "b" ]
# it will set:
# pacemaker_node_ips_override: [ "2" ]
pacemaker_node_ips_upgraded: "{{
dict(pacemaker_short_node_names|zip(pacemaker_node_ips))
| dict2items
| selectattr('key', 'in', pacemaker_short_node_names_upgraded)
| map(attribute='value')
| list }}"
cacheable: no
- name: add the pacemaker short name to hiera data for the upgrade.
include_role:
name: tripleo-upgrade-hiera
tasks_from: set.yml
vars:
tripleo_upgrade_key: pacemaker_short_node_names_override
tripleo_upgrade_value: "{{pacemaker_short_node_names_upgraded}}"
- name: add the pacemaker ips to hiera data for the upgrade.
include_role:
name: tripleo-upgrade-hiera
tasks_from: set.yml
vars:
tripleo_upgrade_key: pacemaker_node_ips_override
tripleo_upgrade_value: "{{pacemaker_node_ips_upgraded}}"
- name: remove the extra hiera data needed for the upgrade.
include_role:
name: tripleo-upgrade-hiera
tasks_from: remove.yml
vars:
tripleo_upgrade_key: "{{item}}"
loop:
- pacemaker_short_node_names_override
- pacemaker_node_ips_override
when: pacemaker_short_node_names_upgraded | length == pacemaker_short_node_names | length
- name: upgrade step 2
when: step|int == 2
vars:
upgrade_leapp_enabled: {get_param: UpgradeLeappEnabled}
block:
- name: Stop pacemaker cluster
pacemaker_cluster: state=offline
when: not upgrade_leapp_enabled
- name: upgrade step 4
when: step|int == 4
vars:
upgrade_leapp_enabled: {get_param: UpgradeLeappEnabled}
block:
- name: Start pacemaker cluster
pacemaker_cluster: state=online
when: not upgrade_leapp_enabled
update_tasks:
- name: Check pacemaker cluster running before the minor update
when: step|int == 0 # TODO(marios) disabling validations?
pacemaker_cluster: state=online check_and_fail=true
async: 30
poll: 4
- name: Move virtual IPs to another node before stopping pacemaker
when: step|int == 1
shell: |
CLUSTER_NODE=$(crm_node -n)
echo "Retrieving all the VIPs which are hosted on this node"
VIPS_TO_MOVE=$(crm_mon --as-xml | xmllint --xpath '//resource[@resource_agent = "ocf::heartbeat:IPaddr2" and @role = "Started" and @managed = "true" and ./node[@name = "'${CLUSTER_NODE}'"]]/@id' - | sed -e 's/id=//g' -e 's/"//g')
for v in ${VIPS_TO_MOVE}; do
echo "Moving VIP $v on another node"
pcs resource move $v --wait=300
done
echo "Removing the location constraints that were created to move the VIPs"
for v in ${VIPS_TO_MOVE}; do
echo "Removing location ban for VIP $v"
ban_id=$(cibadmin --query | xmllint --xpath 'string(//rsc_location[@rsc="'${v}'" and @node="'${CLUSTER_NODE}'" and @score="-INFINITY"]/@id)' -)
if [ -n "$ban_id" ]; then
pcs constraint remove ${ban_id}
else
echo "Could not retrieve and clear location constraint for VIP $v" 2>&1
fi
done
- name: Stop pacemaker cluster
when: step|int == 1
pacemaker_cluster: state=offline
- name: Start pacemaker cluster
when: step|int == 4
pacemaker_cluster: state=online