2a734381fc
By settings these hiera keys we will actually do a deep comparison
of all pacemaker resources and update them accordingly.
This means that rerunning a deploy against an existing overcloud
will actually detect if a pacemaker resource needs updating and
will update the definition and restart of said resource. The code
will wait for the resource to be up (aka the cluster to settle
after an update).
Related-Bug: #1823849
Depends-On: I62d5662327333e17e1c32a029695b3d3a904ca10
Change-Id: I0b9ed429e6b1bd4a62b4462bf6312782f29b7d6b
(cherry picked from commit 60d75f19e5
)
195 lines
6.7 KiB
YAML
195 lines
6.7 KiB
YAML
heat_template_version: queens
|
|
|
|
description: >
|
|
Pacemaker service configured with Puppet
|
|
|
|
parameters:
|
|
ServiceData:
|
|
default: {}
|
|
description: Dictionary packing service data
|
|
type: json
|
|
ServiceNetMap:
|
|
default: {}
|
|
description: Mapping of service_name -> network name. Typically set
|
|
via parameter_defaults in the resource registry. This
|
|
mapping overrides those in ServiceNetMapDefaults.
|
|
type: json
|
|
DefaultPasswords:
|
|
default: {}
|
|
type: json
|
|
RoleName:
|
|
default: ''
|
|
description: Role name on which the service is applied
|
|
type: string
|
|
RoleParameters:
|
|
default: {}
|
|
description: Parameters specific to the role
|
|
type: json
|
|
EndpointMap:
|
|
default: {}
|
|
description: Mapping of service endpoint -> protocol. Typically set
|
|
via parameter_defaults in the resource registry.
|
|
type: json
|
|
MonitoringSubscriptionPacemaker:
|
|
default: 'overcloud-pacemaker'
|
|
type: string
|
|
CorosyncIPv6:
|
|
default: false
|
|
description: Enable IPv6 in Corosync
|
|
type: boolean
|
|
EnableFencing:
|
|
default: false
|
|
description: Whether to enable fencing in Pacemaker or not.
|
|
type: boolean
|
|
PacemakerRemoteAuthkey:
|
|
type: string
|
|
description: The authkey for the pacemaker remote service.
|
|
hidden: true
|
|
PcsdPassword:
|
|
type: string
|
|
description: The password for the 'pcsd' user for pacemaker.
|
|
hidden: true
|
|
CorosyncSettleTries:
|
|
type: number
|
|
description: Number of tries for cluster settling. This has the
|
|
same default as the pacemaker puppet module. Override
|
|
to a smaller value when in need to replace a controller node.
|
|
default: 360
|
|
FencingConfig:
|
|
default: {}
|
|
description: |
|
|
Pacemaker fencing configuration. The JSON should have
|
|
the following structure:
|
|
{
|
|
"devices": [
|
|
{
|
|
"agent": "AGENT_NAME",
|
|
"host_mac": "HOST_MAC_ADDRESS",
|
|
"params": {"PARAM_NAME": "PARAM_VALUE"}
|
|
}
|
|
]
|
|
}
|
|
For instance:
|
|
{
|
|
"devices": [
|
|
{
|
|
"agent": "fence_xvm",
|
|
"host_mac": "52:54:00:aa:bb:cc",
|
|
"params": {
|
|
"multicast_address": "225.0.0.12",
|
|
"port": "baremetal_0",
|
|
"manage_fw": true,
|
|
"manage_key_file": true,
|
|
"key_file": "/etc/fence_xvm.key",
|
|
"key_file_password": "abcdef"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
type: json
|
|
PacemakerLoggingSource:
|
|
type: json
|
|
default:
|
|
tag: system.pacemaker
|
|
path: /var/log/pacemaker.log,/var/log/cluster/corosync.log
|
|
format: >-
|
|
/^(?<time>[^ ]*\s*[^ ]* [^ ]*)
|
|
\[(?<pid>[^ ]*)\]
|
|
(?<host>[^ ]*)
|
|
(?<message>.*)$/
|
|
EnableLoadBalancer:
|
|
default: true
|
|
description: Whether to deploy a LoadBalancer on the Controller
|
|
type: boolean
|
|
|
|
outputs:
|
|
role_data:
|
|
description: Role data for the Pacemaker role.
|
|
value:
|
|
service_name: pacemaker
|
|
monitoring_subscription: {get_param: MonitoringSubscriptionPacemaker}
|
|
config_settings:
|
|
pacemaker::corosync::cluster_name: 'tripleo_cluster'
|
|
pacemaker::corosync::manage_fw: false
|
|
pacemaker::resource_defaults::defaults:
|
|
resource-stickiness: { value: INFINITY }
|
|
corosync_token_timeout: 10000
|
|
pacemaker::corosync::settle_tries: {get_param: CorosyncSettleTries}
|
|
pacemaker::resource::bundle::deep_compare: true
|
|
pacemaker::resource::ip::deep_compare: true
|
|
pacemaker::resource::ocf::deep_compare: true
|
|
tripleo.pacemaker.firewall_rules:
|
|
'130 pacemaker tcp':
|
|
proto: 'tcp'
|
|
dport:
|
|
- 2224
|
|
- 3121
|
|
- 21064
|
|
'131 pacemaker udp':
|
|
proto: 'udp'
|
|
dport: 5405
|
|
corosync_ipv6: {get_param: CorosyncIPv6}
|
|
tripleo::fencing::config: {get_param: FencingConfig}
|
|
enable_fencing: {get_param: EnableFencing}
|
|
hacluster_pwd:
|
|
yaql:
|
|
expression: $.data.passwords.where($ != '').first()
|
|
data:
|
|
passwords:
|
|
- {get_param: PcsdPassword}
|
|
- {get_param: [DefaultPasswords, pcsd_password]}
|
|
tripleo::profile::base::pacemaker::remote_authkey: {get_param: PacemakerRemoteAuthkey}
|
|
service_config_settings:
|
|
fluentd:
|
|
tripleo_fluentd_groups_pacemaker:
|
|
- haclient
|
|
tripleo_fluentd_sources_pacemaker:
|
|
- {get_param: PacemakerLoggingSource}
|
|
step_config: |
|
|
include ::tripleo::profile::base::pacemaker
|
|
upgrade_tasks:
|
|
- name: Check pacemaker cluster running before upgrade
|
|
when: step|int == 0
|
|
tags: validation
|
|
pacemaker_cluster: state=online check_and_fail=true
|
|
async: 30
|
|
poll: 4
|
|
- name: Stop pacemaker cluster
|
|
when: step|int == 2
|
|
pacemaker_cluster: state=offline
|
|
- name: Start pacemaker cluster
|
|
when: step|int == 4
|
|
pacemaker_cluster: state=online
|
|
update_tasks:
|
|
- name: Check pacemaker cluster running before the minor update
|
|
when: step|int == 0 # TODO(marios) disabling validations?
|
|
pacemaker_cluster: state=online check_and_fail=true
|
|
async: 30
|
|
poll: 4
|
|
- name: Move virtual IPs to another node before stopping pacemaker
|
|
when: step|int == 1
|
|
shell: |
|
|
CLUSTER_NODE=$(crm_node -n)
|
|
echo "Retrieving all the VIPs which are hosted on this node"
|
|
VIPS_TO_MOVE=$(crm_mon --as-xml | xmllint --xpath '//resource[@resource_agent = "ocf::heartbeat:IPaddr2" and @role = "Started" and @managed = "true" and ./node[@name = "'${CLUSTER_NODE}'"]]/@id' - | sed -e 's/id=//g' -e 's/"//g')
|
|
for v in ${VIPS_TO_MOVE}; do
|
|
echo "Moving VIP $v on another node"
|
|
pcs resource move $v --wait=300
|
|
done
|
|
echo "Removing the location constraints that were created to move the VIPs"
|
|
for v in ${VIPS_TO_MOVE}; do
|
|
echo "Removing location ban for VIP $v"
|
|
ban_id=$(cibadmin --query | xmllint --xpath 'string(//rsc_location[@rsc="'${v}'" and @node="'${CLUSTER_NODE}'" and @score="-INFINITY"]/@id)' -)
|
|
if [ -n "$ban_id" ]; then
|
|
pcs constraint remove ${ban_id}
|
|
else
|
|
echo "Could not retrieve and clear location constraint for VIP $v" 2>&1
|
|
fi
|
|
done
|
|
- name: Stop pacemaker cluster
|
|
when: step|int == 1
|
|
pacemaker_cluster: state=offline
|
|
- name: Start pacemaker cluster
|
|
when: step|int == 4
|
|
pacemaker_cluster: state=online
|