heat_template_version: rocky description: > Pacemaker service configured with Puppet parameters: ServiceData: default: {} description: Dictionary packing service data type: json ServiceNetMap: default: {} description: Mapping of service_name -> network name. Typically set via parameter_defaults in the resource registry. This mapping overrides those in ServiceNetMapDefaults. type: json DefaultPasswords: default: {} type: json RoleName: default: '' description: Role name on which the service is applied type: string RoleParameters: default: {} description: Parameters specific to the role type: json EndpointMap: default: {} description: Mapping of service endpoint -> protocol. Typically set via parameter_defaults in the resource registry. type: json MonitoringSubscriptionPacemaker: default: 'overcloud-pacemaker' type: string CorosyncIPv6: default: false description: Enable IPv6 in Corosync type: boolean EnableFencing: default: false description: Whether to enable fencing in Pacemaker or not. type: boolean PacemakerTLSPriorities: type: string description: Pacemaker TLS Priorities default: '' PacemakerRemoteAuthkey: type: string description: The authkey for the pacemaker remote service. hidden: true PcsdPassword: type: string description: The password for the 'pcsd' user for pacemaker. hidden: true CorosyncSettleTries: type: number description: Number of tries for cluster settling. This has the same default as the pacemaker puppet module. Override to a smaller value when in need to replace a controller node. default: 360 FencingConfig: default: {} description: | Pacemaker fencing configuration. The JSON should have the following structure: { "devices": [ { "agent": "AGENT_NAME", "host_mac": "HOST_MAC_ADDRESS", "params": {"PARAM_NAME": "PARAM_VALUE"} } ] } For instance: { "devices": [ { "agent": "fence_xvm", "host_mac": "52:54:00:aa:bb:cc", "params": { "multicast_address": "225.0.0.12", "port": "baremetal_0", "manage_fw": true, "manage_key_file": true, "key_file": "/etc/fence_xvm.key", "key_file_password": "abcdef" } } ] } type: json PacemakerLoggingSource: type: json default: tag: system.pacemaker file: /var/log/pacemaker.log,/var/log/cluster/corosync.log startmsg.regex: ^[^ ]*\s*[^ ]* [^ ]* \[[^ ]*\] [^ ]* UpgradeLeappEnabled: description: Use Leapp for operating system upgrade type: boolean default: true conditions: pcmk_tls_priorities_empty: {equals: [{get_param: PacemakerTLSPriorities}, '']} outputs: role_data: description: Role data for the Pacemaker role. value: service_name: pacemaker monitoring_subscription: {get_param: MonitoringSubscriptionPacemaker} config_settings: map_merge: - pacemaker::corosync::cluster_name: 'tripleo_cluster' pacemaker::corosync::manage_fw: false pacemaker::resource_defaults::defaults: resource-stickiness: { value: INFINITY } corosync_token_timeout: 10000 pacemaker::corosync::settle_tries: {get_param: CorosyncSettleTries} pacemaker::resource::bundle::deep_compare: true pacemaker::resource::ip::deep_compare: true pacemaker::resource::ocf::deep_compare: true tripleo.pacemaker.firewall_rules: '130 pacemaker tcp': proto: 'tcp' dport: - 2224 - 3121 - 21064 '131 pacemaker udp': proto: 'udp' dport: 5405 corosync_ipv6: {get_param: CorosyncIPv6} tripleo::fencing::config: {get_param: FencingConfig} tripleo::fencing::deep_compare: true enable_fencing: {get_param: EnableFencing} hacluster_pwd: yaql: expression: $.data.passwords.where($ != '').first() data: passwords: - {get_param: PcsdPassword} - {get_param: [DefaultPasswords, pcsd_password]} tripleo::profile::base::pacemaker::remote_authkey: {get_param: PacemakerRemoteAuthkey} - if: - pcmk_tls_priorities_empty - {} - tripleo::pacemaker::tls_priorities: {get_param: PacemakerTLSPriorities} service_config_settings: rsyslog: tripleo_logging_sources_pacemaker: - {get_param: PacemakerLoggingSource} step_config: | include ::tripleo::profile::base::pacemaker upgrade_tasks: - name: upgrade step 0 when: step|int == 0 vars: upgrade_leapp_enabled: {get_param: UpgradeLeappEnabled} block: - name: Check pacemaker cluster running before upgrade tags: validation pacemaker_cluster: state=online check_and_fail=true async: 30 poll: 4 when: not upgrade_leapp_enabled|bool - name: upgrade step 1 when: - step|int == 1 block: - name: set pacemaker upgrade node facts in a single-node environment set_fact: pacemaker_short_node_names_upgraded: "{{ pacemaker_short_node_names }}" cacheable: no when: groups['pacemaker'] | length <= 1 - name: set pacemaker upgrade node facts from the limit option set_fact: pacemaker_short_node_names_upgraded: "{{ pacemaker_short_node_names_upgraded|default([]) + [item.split('.')[0]] }}" cacheable: no when: - groups['pacemaker'] | length > 1 - item.split('.')[0] in ansible_limit.split(',') loop: "{{ pacemaker_short_node_names }}" - debug: msg: "Prepare pacemaker upgrade for {{ pacemaker_short_node_names_upgraded }}" - fail: msg: > You can't upgrade pacemaker without staged upgrade. You need to use the limit option in order to do so. when: >- pacemaker_short_node_names_upgraded is not defined or pacemaker_short_node_names_upgraded | length == 0 - name: set pacemaker node ips fact from the names fact set_fact: # Generate matching IPs for the names, e.g. for these varaible values: # pacemaker_node_ips: [ "1", "2", "3" ] # pacemaker_short_node_names: [ "a", "b", "c" ] # pacemaker_short_node_names_override: [ "b" ] # it will set: # pacemaker_node_ips_override: [ "2" ] pacemaker_node_ips_upgraded: "{{ dict(pacemaker_short_node_names|zip(pacemaker_node_ips)) | dict2items | selectattr('key', 'in', pacemaker_short_node_names_upgraded) | map(attribute='value') | list }}" cacheable: no - name: add the pacemaker short name to hiera data for the upgrade. include_role: name: tripleo-upgrade-hiera tasks_from: set.yml vars: tripleo_upgrade_key: pacemaker_short_node_names_override tripleo_upgrade_value: "{{pacemaker_short_node_names_upgraded}}" - name: add the pacemaker ips to hiera data for the upgrade. include_role: name: tripleo-upgrade-hiera tasks_from: set.yml vars: tripleo_upgrade_key: pacemaker_node_ips_override tripleo_upgrade_value: "{{pacemaker_node_ips_upgraded}}" - name: remove the extra hiera data needed for the upgrade. include_role: name: tripleo-upgrade-hiera tasks_from: remove.yml vars: tripleo_upgrade_key: "{{item}}" loop: - pacemaker_short_node_names_override - pacemaker_node_ips_override when: pacemaker_short_node_names_upgraded | length == pacemaker_short_node_names | length - name: upgrade step 2 when: step|int == 2 vars: upgrade_leapp_enabled: {get_param: UpgradeLeappEnabled} block: - name: Stop pacemaker cluster pacemaker_cluster: state=offline when: not upgrade_leapp_enabled - name: upgrade step 4 when: step|int == 4 vars: upgrade_leapp_enabled: {get_param: UpgradeLeappEnabled} block: - name: Start pacemaker cluster pacemaker_cluster: state=online when: not upgrade_leapp_enabled external_upgrade_tasks: - when: - step|int == 1 tags: - never - system_upgrade_stop_services - system_upgrade_transfer_data block: - name: Stop cluster become: true shell: | set -eu FILE=/usr/sbin/pcs if test -f "$FILE"; then /usr/sbin/pcs cluster stop --force fi delegate_to: "{{ item }}" with_items: "{{ groups['pacemaker'] | default([]) }}" update_tasks: - name: Check pacemaker cluster running before the minor update when: step|int == 0 # TODO(marios) disabling validations? pacemaker_cluster: state=online check_and_fail=true async: 30 poll: 4 - name: Move virtual IPs to another node before stopping pacemaker when: step|int == 1 shell: | CLUSTER_NODE=$(crm_node -n) echo "Retrieving all the VIPs which are hosted on this node" VIPS_TO_MOVE=$(crm_mon --as-xml | xmllint --xpath '//resource[@resource_agent = "ocf::heartbeat:IPaddr2" and @role = "Started" and @managed = "true" and ./node[@name = "'${CLUSTER_NODE}'"]]/@id' - | sed -e 's/id=//g' -e 's/"//g') for v in ${VIPS_TO_MOVE}; do echo "Moving VIP $v on another node" pcs resource move $v --wait=300 done echo "Removing the location constraints that were created to move the VIPs" for v in ${VIPS_TO_MOVE}; do echo "Removing location ban for VIP $v" ban_id=$(cibadmin --query | xmllint --xpath 'string(//rsc_location[@rsc="'${v}'" and @node="'${CLUSTER_NODE}'" and @score="-INFINITY"]/@id)' -) if [ -n "$ban_id" ]; then pcs constraint remove ${ban_id} else echo "Could not retrieve and clear location constraint for VIP $v" 2>&1 fi done - name: Stop pacemaker cluster when: step|int == 1 pacemaker_cluster: state=offline - name: Start pacemaker cluster when: step|int == 4 pacemaker_cluster: state=online