7f195ff9a8
This was mainly there as an legacy interface which was for internal use. Now that we pull the passwords from the existing environment and don't use it, we can drop this. Reduces a number of heat resources. Change-Id: If83d0f3d72a229d737a45b2fd37507dc11a04649
376 lines
15 KiB
YAML
376 lines
15 KiB
YAML
heat_template_version: rocky
|
|
|
|
description: >
|
|
Pacemaker service configured with Puppet
|
|
|
|
parameters:
|
|
ServiceData:
|
|
default: {}
|
|
description: Dictionary packing service data
|
|
type: json
|
|
ServiceNetMap:
|
|
default: {}
|
|
description: Mapping of service_name -> network name. Typically set
|
|
via parameter_defaults in the resource registry. This
|
|
mapping overrides those in ServiceNetMapDefaults.
|
|
type: json
|
|
RoleName:
|
|
default: ''
|
|
description: Role name on which the service is applied
|
|
type: string
|
|
RoleParameters:
|
|
default: {}
|
|
description: Parameters specific to the role
|
|
type: json
|
|
EndpointMap:
|
|
default: {}
|
|
description: Mapping of service endpoint -> protocol. Typically set
|
|
via parameter_defaults in the resource registry.
|
|
type: json
|
|
MonitoringSubscriptionPacemaker:
|
|
default: 'overcloud-pacemaker'
|
|
type: string
|
|
CorosyncIPv6:
|
|
default: false
|
|
description: Enable IPv6 in Corosync
|
|
type: boolean
|
|
EnableFencing:
|
|
default: false
|
|
description: Whether to enable fencing in Pacemaker or not.
|
|
type: boolean
|
|
PacemakerTLSPriorities:
|
|
type: string
|
|
description: Pacemaker TLS Priorities
|
|
default: ''
|
|
PacemakerRemoteAuthkey:
|
|
type: string
|
|
description: The authkey for the pacemaker remote service.
|
|
hidden: true
|
|
PcsdPassword:
|
|
type: string
|
|
description: The password for the 'pcsd' user for pacemaker.
|
|
hidden: true
|
|
CorosyncSettleTries:
|
|
type: number
|
|
description: Number of tries for cluster settling. This has the
|
|
same default as the pacemaker puppet module. Override
|
|
to a smaller value when in need to replace a controller node.
|
|
default: 360
|
|
FencingConfig:
|
|
default: {}
|
|
description: |
|
|
Pacemaker fencing configuration. The JSON should have
|
|
the following structure:
|
|
{
|
|
"devices": [
|
|
{
|
|
"agent": "AGENT_NAME",
|
|
"host_mac": "HOST_MAC_ADDRESS",
|
|
"params": {"PARAM_NAME": "PARAM_VALUE"}
|
|
}
|
|
]
|
|
}
|
|
For instance:
|
|
{
|
|
"devices": [
|
|
{
|
|
"agent": "fence_xvm",
|
|
"host_mac": "52:54:00:aa:bb:cc",
|
|
"params": {
|
|
"multicast_address": "225.0.0.12",
|
|
"port": "baremetal_0",
|
|
"manage_fw": true,
|
|
"manage_key_file": true,
|
|
"key_file": "/etc/fence_xvm.key",
|
|
"key_file_password": "abcdef"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
type: json
|
|
PacemakerLoggingSource:
|
|
type: json
|
|
default:
|
|
tag: system.pacemaker
|
|
file: /var/log/host/pacemaker/pacemaker.log
|
|
startmsg.regex: "^[a-zA-Z]{3} [0-9]{2} [:0-9]{8}"
|
|
ContainerCli:
|
|
type: string
|
|
default: 'podman'
|
|
description: CLI tool used to manage containers.
|
|
constraints:
|
|
- allowed_values: ['docker', 'podman']
|
|
EnableInstanceHA:
|
|
default: false
|
|
description: Whether to enable an Instance Ha configurarion or not.
|
|
This setup requires the Compute role to have the
|
|
PacemakerRemote service added to it.
|
|
type: boolean
|
|
PacemakerBundleOperationTimeout:
|
|
type: string
|
|
default: ''
|
|
description: The timeout for start, monitor and stop operations
|
|
run by the container resource agent, in seconds.
|
|
When set to default '', the timeout comes from
|
|
pacemaker's default operation timeouts (20s). When
|
|
set to default and podman is used, force the timeout
|
|
to 120s.
|
|
constraints:
|
|
- allowed_pattern: "([1-9][0-9]*s)?"
|
|
|
|
parameter_groups:
|
|
- label: deprecated
|
|
description: |
|
|
The following parameters are deprecated and will be removed. They should not
|
|
be relied on for new deployments. If you have concerns regarding deprecated
|
|
parameters, please contact the TripleO development team on IRC or the
|
|
OpenStack mailing list.
|
|
parameters:
|
|
- CorosyncIPv6
|
|
|
|
conditions:
|
|
pcmk_tls_priorities_empty: {equals: [{get_param: PacemakerTLSPriorities}, '']}
|
|
pcmk_bundle_op_timeout_empty: {equals: [{get_param: PacemakerBundleOperationTimeout}, '']}
|
|
podman_enabled: {equals: [{get_param: ContainerCli}, 'podman']}
|
|
is_ipv6:
|
|
equals:
|
|
- {get_param: [ServiceData, net_ip_version_map, {get_param: [ServiceNetMap, PacemakerNetwork]}]}
|
|
- 6
|
|
|
|
outputs:
|
|
role_data:
|
|
description: Role data for the Pacemaker role.
|
|
value:
|
|
service_name: pacemaker
|
|
monitoring_subscription: {get_param: MonitoringSubscriptionPacemaker}
|
|
firewall_rules:
|
|
'130 pacemaker tcp':
|
|
proto: 'tcp'
|
|
dport:
|
|
- 2224
|
|
- 3121
|
|
- 21064
|
|
'131 pacemaker udp':
|
|
proto: 'udp'
|
|
dport: 5405
|
|
config_settings:
|
|
map_merge:
|
|
- pacemaker::corosync::cluster_name: 'tripleo_cluster'
|
|
pacemaker::corosync::manage_fw: false
|
|
pacemaker::resource_defaults::defaults:
|
|
resource-stickiness: { value: INFINITY }
|
|
corosync_token_timeout: 10000
|
|
pacemaker::corosync::settle_tries: {get_param: CorosyncSettleTries}
|
|
pacemaker::resource::bundle::deep_compare: true
|
|
pacemaker::resource::ip::deep_compare: true
|
|
pacemaker::resource::ocf::deep_compare: true
|
|
corosync_ipv6: {if: [is_ipv6, true, false]}
|
|
tripleo::fencing::config: {get_param: FencingConfig}
|
|
tripleo::fencing::deep_compare: true
|
|
enable_fencing: {get_param: EnableFencing}
|
|
hacluster_pwd: {get_param: PcsdPassword}
|
|
tripleo::profile::base::pacemaker::remote_authkey: {get_param: PacemakerRemoteAuthkey}
|
|
tripleo::profile::base::pacemaker::pcsd_bind_addr:
|
|
str_replace:
|
|
template:
|
|
"%{hiera('$NETWORK')}"
|
|
params:
|
|
$NETWORK: {get_param: [ServiceNetMap, PacemakerNetwork]}
|
|
-
|
|
if:
|
|
- pcmk_tls_priorities_empty
|
|
- {}
|
|
- tripleo::pacemaker::tls_priorities: {get_param: PacemakerTLSPriorities}
|
|
-
|
|
if:
|
|
- and:
|
|
- pcmk_bundle_op_timeout_empty
|
|
- not: podman_enabled
|
|
- {}
|
|
- tripleo::profile::base::pacemaker::resource_op_defaults:
|
|
bundle:
|
|
name: timeout
|
|
value:
|
|
if:
|
|
- pcmk_bundle_op_timeout_empty
|
|
- '120s'
|
|
- {get_param: PacemakerBundleOperationTimeout}
|
|
service_config_settings:
|
|
rsyslog:
|
|
tripleo_logging_sources_pacemaker:
|
|
- {get_param: PacemakerLoggingSource}
|
|
step_config: |
|
|
include tripleo::profile::base::pacemaker
|
|
host_prep_tasks:
|
|
# Need this until https://bugzilla.redhat.com/show_bug.cgi?id=1857247 is fixed
|
|
- name: Make sure python3-novaclient is installed when IHA is enabled
|
|
package:
|
|
name: python3-novaclient
|
|
state: present
|
|
when: {get_param: EnableInstanceHA}
|
|
upgrade_tasks:
|
|
- name: upgrade step 0
|
|
when: step|int == 0
|
|
block:
|
|
# If performing an upgrade which requires operating system upgrading
|
|
# a transfer data step needs to be run. During this step, the whole
|
|
# pacemaker cluster is stopped so we can't check the cluster status.
|
|
# Once the transfer_data step is executed, a flag file is stored.
|
|
# This code checks the existence of this file to know if we should
|
|
# avoid doing a normal pacemaker upgrade or not. As with the
|
|
# operating system upgrade a new cluster will be created in which
|
|
# the other nodes will be added.
|
|
- name: check flag file existence in destination host
|
|
stat:
|
|
path: "/var/lib/tripleo/transfer-flags/var-lib-mysql"
|
|
register: tripleo_transfer_flag_stat
|
|
become: true
|
|
delegate_to: "{{ mysql_short_bootstrap_node_name }}"
|
|
- name: Set fact cluster_recreate
|
|
set_fact:
|
|
cluster_recreate: "{{ tripleo_transfer_flag_stat.stat.exists|bool }}"
|
|
- name: Check pacemaker cluster running before upgrade
|
|
tags: validation
|
|
pacemaker_cluster: state=online check_and_fail=true
|
|
async: 30
|
|
poll: 4
|
|
when: not cluster_recreate|bool
|
|
- name: Create hiera data to upgrade pacemaker in a stepwise manner.
|
|
when:
|
|
- step|int == 1
|
|
- cluster_recreate|bool
|
|
block:
|
|
- name: set pacemaker upgrade node facts in a single-node environment
|
|
set_fact:
|
|
pacemaker_short_node_names_upgraded: "{{ pacemaker_short_node_names }}"
|
|
cacheable: no
|
|
when: groups['pacemaker'] | length <= 1
|
|
- name: set pacemaker upgrade node facts from the limit option
|
|
set_fact:
|
|
pacemaker_short_node_names_upgraded: "{{ pacemaker_short_node_names_upgraded|default([]) + [item.split('.')[0]] }}"
|
|
cacheable: no
|
|
when:
|
|
- groups['pacemaker'] | length > 1
|
|
- item.split('.')[0] in ansible_limit.split(':')
|
|
loop: "{{ pacemaker_short_node_names | default([]) }}"
|
|
- fail:
|
|
msg: >
|
|
You can't upgrade pacemaker without staged
|
|
upgrade. You need to use the limit option in order
|
|
to do so.
|
|
when: >-
|
|
pacemaker_short_node_names_upgraded is not defined or
|
|
pacemaker_short_node_names_upgraded | length == 0
|
|
- debug:
|
|
msg: "Prepare pacemaker upgrade for {{ pacemaker_short_node_names_upgraded }}"
|
|
- name: set pacemaker node ips fact from the names fact
|
|
set_fact:
|
|
# Generate matching IPs for the names, e.g. for these varaible values:
|
|
# pacemaker_node_ips: [ "1", "2", "3" ]
|
|
# pacemaker_short_node_names: [ "a", "b", "c" ]
|
|
# pacemaker_short_node_names_override: [ "b" ]
|
|
# it will set:
|
|
# pacemaker_node_ips_override: [ "2" ]
|
|
pacemaker_node_ips_upgraded: "{{
|
|
dict(pacemaker_short_node_names|zip(pacemaker_node_ips))
|
|
| dict2items
|
|
| selectattr('key', 'in', pacemaker_short_node_names_upgraded)
|
|
| map(attribute='value')
|
|
| list }}"
|
|
cacheable: no
|
|
|
|
- name: add the pacemaker short name to hiera data for the upgrade.
|
|
include_role:
|
|
name: tripleo_upgrade_hiera
|
|
tasks_from: set.yml
|
|
vars:
|
|
tripleo_upgrade_key: pacemaker_short_node_names_override
|
|
tripleo_upgrade_value: "{{pacemaker_short_node_names_upgraded}}"
|
|
- name: add the pacemaker ips to hiera data for the upgrade.
|
|
include_role:
|
|
name: tripleo_upgrade_hiera
|
|
tasks_from: set.yml
|
|
vars:
|
|
tripleo_upgrade_key: pacemaker_node_ips_override
|
|
tripleo_upgrade_value: "{{pacemaker_node_ips_upgraded}}"
|
|
- name: remove the extra hiera data needed for the upgrade.
|
|
include_role:
|
|
name: tripleo_upgrade_hiera
|
|
tasks_from: remove.yml
|
|
vars:
|
|
tripleo_upgrade_key: "{{item}}"
|
|
loop:
|
|
- pacemaker_short_node_names_override
|
|
- pacemaker_node_ips_override
|
|
when: pacemaker_short_node_names_upgraded | length == pacemaker_short_node_names | length
|
|
- name: upgrade step 2
|
|
when: step|int == 2
|
|
block:
|
|
- name: Stop pacemaker cluster
|
|
pacemaker_cluster: state=offline
|
|
when: not cluster_recreate|bool
|
|
- name: upgrade step 4
|
|
when: step|int == 4
|
|
block:
|
|
- name: Start pacemaker cluster
|
|
pacemaker_cluster: state=online
|
|
when: not cluster_recreate|bool
|
|
external_upgrade_tasks:
|
|
- when:
|
|
- step|int == 1
|
|
tags:
|
|
- never
|
|
- system_upgrade_stop_services
|
|
- system_upgrade_transfer_data
|
|
block:
|
|
- name: Stop cluster
|
|
become: true
|
|
shell: |
|
|
set -eu
|
|
FILE=/usr/sbin/pcs
|
|
if test -f "$FILE"; then
|
|
/usr/sbin/pcs cluster stop --force
|
|
fi
|
|
delegate_to: "{{ item }}"
|
|
with_items: "{{ groups['pacemaker'] | default([]) }}"
|
|
update_tasks:
|
|
- name: Check pacemaker cluster running before the minor update
|
|
when: step|int == 0 # TODO(marios) disabling validations?
|
|
pacemaker_cluster: state=online check_and_fail=true
|
|
async: 30
|
|
poll: 4
|
|
- name: Move virtual IPs to another node before stopping pacemaker
|
|
when:
|
|
- step|int == 1
|
|
- hostvars[inventory_hostname]["haproxy_node_names"]|default([])|length > 1
|
|
shell: |
|
|
CLUSTER_NODE=$(crm_node -n)
|
|
echo "Retrieving all the VIPs which are hosted on this node"
|
|
VIPS_TO_MOVE=$(crm_mon --as-xml | xmllint --xpath '//resource[@resource_agent = "ocf::heartbeat:IPaddr2" and @role = "Started" and @managed = "true" and ./node[@name = "'${CLUSTER_NODE}'"]]/@id' - | sed -e 's/id=//g' -e 's/"//g')
|
|
for v in ${VIPS_TO_MOVE}; do
|
|
echo "Moving VIP $v on another node"
|
|
pcs resource move $v --wait=300
|
|
done
|
|
echo "Removing the location constraints that were created to move the VIPs"
|
|
for v in ${VIPS_TO_MOVE}; do
|
|
echo "Removing location ban for VIP $v"
|
|
ban_id=$(cibadmin --query | xmllint --xpath 'string(//rsc_location[@rsc="'${v}'" and @node="'${CLUSTER_NODE}'" and @score="-INFINITY"]/@id)' -)
|
|
if [ -n "$ban_id" ]; then
|
|
pcs constraint remove ${ban_id}
|
|
else
|
|
echo "Could not retrieve and clear location constraint for VIP $v" 2>&1
|
|
fi
|
|
done
|
|
- name: Acquire the cluster shutdown lock to stop pacemaker cluster
|
|
when: step|int == 1
|
|
command: systemd-cat -t ha-shutdown /var/lib/container-config-scripts/pacemaker_mutex_shutdown.sh --acquire
|
|
- name: Stop pacemaker cluster
|
|
when: step|int == 1
|
|
pacemaker_cluster: state=offline
|
|
- name: Start pacemaker cluster
|
|
when: step|int == 4
|
|
pacemaker_cluster: state=online
|
|
- name: Release the cluster shutdown lock
|
|
when: step|int == 4
|
|
command: systemd-cat -t ha-shutdown /var/lib/container-config-scripts/pacemaker_mutex_shutdown.sh --release
|