tripleo-heat-templates/deployment/pacemaker/pacemaker-baremetal-puppet.yaml
Jiri Stransky b0519479c1 Add operating system upgrade preparation via Leapp
Also make sure pacemaker cluster is not only stopped but also
destroyed on the node being upgraded. This is a prerequisite for
upgrades via Leapp, otherwise Leapp would assume we expect it to
upgrade the cluster, and it would fail on validation.

Change-Id: I8cf3e4cf294fee3b71473e2343ad16b184fac665
2019-05-30 11:17:31 +00:00

201 lines
7.0 KiB
YAML

heat_template_version: rocky
description: >
Pacemaker service configured with Puppet
parameters:
ServiceData:
default: {}
description: Dictionary packing service data
type: json
ServiceNetMap:
default: {}
description: Mapping of service_name -> network name. Typically set
via parameter_defaults in the resource registry. This
mapping overrides those in ServiceNetMapDefaults.
type: json
DefaultPasswords:
default: {}
type: json
RoleName:
default: ''
description: Role name on which the service is applied
type: string
RoleParameters:
default: {}
description: Parameters specific to the role
type: json
EndpointMap:
default: {}
description: Mapping of service endpoint -> protocol. Typically set
via parameter_defaults in the resource registry.
type: json
MonitoringSubscriptionPacemaker:
default: 'overcloud-pacemaker'
type: string
CorosyncIPv6:
default: false
description: Enable IPv6 in Corosync
type: boolean
EnableFencing:
default: false
description: Whether to enable fencing in Pacemaker or not.
type: boolean
PacemakerRemoteAuthkey:
type: string
description: The authkey for the pacemaker remote service.
hidden: true
PcsdPassword:
type: string
description: The password for the 'pcsd' user for pacemaker.
hidden: true
CorosyncSettleTries:
type: number
description: Number of tries for cluster settling. This has the
same default as the pacemaker puppet module. Override
to a smaller value when in need to replace a controller node.
default: 360
FencingConfig:
default: {}
description: |
Pacemaker fencing configuration. The JSON should have
the following structure:
{
"devices": [
{
"agent": "AGENT_NAME",
"host_mac": "HOST_MAC_ADDRESS",
"params": {"PARAM_NAME": "PARAM_VALUE"}
}
]
}
For instance:
{
"devices": [
{
"agent": "fence_xvm",
"host_mac": "52:54:00:aa:bb:cc",
"params": {
"multicast_address": "225.0.0.12",
"port": "baremetal_0",
"manage_fw": true,
"manage_key_file": true,
"key_file": "/etc/fence_xvm.key",
"key_file_password": "abcdef"
}
}
]
}
type: json
PacemakerLoggingSource:
type: json
default:
tag: system.pacemaker
path: /var/log/pacemaker.log,/var/log/cluster/corosync.log
format: >-
/^(?<time>[^ ]*\s*[^ ]* [^ ]*)
\[(?<pid>[^ ]*)\]
(?<host>[^ ]*)
(?<message>.*)$/
outputs:
role_data:
description: Role data for the Pacemaker role.
value:
service_name: pacemaker
monitoring_subscription: {get_param: MonitoringSubscriptionPacemaker}
config_settings:
pacemaker::corosync::cluster_name: 'tripleo_cluster'
pacemaker::corosync::manage_fw: false
pacemaker::resource_defaults::defaults:
resource-stickiness: { value: INFINITY }
corosync_token_timeout: 10000
pacemaker::corosync::settle_tries: {get_param: CorosyncSettleTries}
pacemaker::resource::bundle::deep_compare: true
pacemaker::resource::ip::deep_compare: true
pacemaker::resource::ocf::deep_compare: true
tripleo::pacemaker::firewall_rules:
'130 pacemaker tcp':
proto: 'tcp'
dport:
- 2224
- 3121
- 21064
'131 pacemaker udp':
proto: 'udp'
dport: 5405
corosync_ipv6: {get_param: CorosyncIPv6}
tripleo::fencing::config: {get_param: FencingConfig}
enable_fencing: {get_param: EnableFencing}
hacluster_pwd:
yaql:
expression: $.data.passwords.where($ != '').first()
data:
passwords:
- {get_param: PcsdPassword}
- {get_param: [DefaultPasswords, pcsd_password]}
tripleo::profile::base::pacemaker::remote_authkey: {get_param: PacemakerRemoteAuthkey}
service_config_settings:
fluentd:
tripleo_fluentd_groups_pacemaker:
- haclient
tripleo_fluentd_sources_pacemaker:
- {get_param: PacemakerLoggingSource}
step_config: |
include ::tripleo::profile::base::pacemaker
upgrade_tasks:
- name: system_upgrade_prepare step 2
tags:
- never
- system_upgrade_prepare
when: step|int == 2
block:
- name: Stop pacemaker cluster
pacemaker_cluster: state=offline
- name: destroy pacemaker cluster
command: /usr/sbin/pcs cluster destroy
- name: Check pacemaker cluster running before upgrade
when: step|int == 0
tags: validation
pacemaker_cluster: state=online check_and_fail=true
async: 30
poll: 4
- name: Stop pacemaker cluster
when: step|int == 2
pacemaker_cluster: state=offline
- name: Start pacemaker cluster
when: step|int == 4
pacemaker_cluster: state=online
update_tasks:
- name: Check pacemaker cluster running before the minor update
when: step|int == 0 # TODO(marios) disabling validations?
pacemaker_cluster: state=online check_and_fail=true
async: 30
poll: 4
- name: Move virtual IPs to another node before stopping pacemaker
when: step|int == 1
shell: |
CLUSTER_NODE=$(crm_node -n)
echo "Retrieving all the VIPs which are hosted on this node"
VIPS_TO_MOVE=$(crm_mon --as-xml | xmllint --xpath '//resource[@resource_agent = "ocf::heartbeat:IPaddr2" and @role = "Started" and @managed = "true" and ./node[@name = "'${CLUSTER_NODE}'"]]/@id' - | sed -e 's/id=//g' -e 's/"//g')
for v in ${VIPS_TO_MOVE}; do
echo "Moving VIP $v on another node"
pcs resource move $v --wait=300
done
echo "Removing the location constraints that were created to move the VIPs"
for v in ${VIPS_TO_MOVE}; do
echo "Removing location ban for VIP $v"
ban_id=$(cibadmin --query | xmllint --xpath 'string(//rsc_location[@rsc="'${v}'" and @node="'${CLUSTER_NODE}'" and @score="-INFINITY"]/@id)' -)
if [ -n "$ban_id" ]; then
pcs constraint remove ${ban_id}
else
echo "Could not retrieve and clear location constraint for VIP $v" 2>&1
fi
done
- name: Stop pacemaker cluster
when: step|int == 1
pacemaker_cluster: state=offline
- name: Start pacemaker cluster
when: step|int == 4
pacemaker_cluster: state=online