Composable HA

This commit implements composable HA for the pacemaker profiles.
- Everytime a pacemaker resource gets included on a node,
  that node will add a node cluster property with the name of the resource
  (e.g. galera-role=true)
- Add a location rule constraint to force running the resource only
  on the nodes that have that property
- We also make sure that any pacemaker resource/property creation has a
  predefined number of tries (20 by default). The reason for this is
  that within composable HA, it might be possible to get "older CIB"
  errors when another node changed the CIB while we were doing an
  operation on it. Simply retrying fixes this.
- Also make sure that we use the newly introduced
  pacemaker::constraint::order class instead of the older
  pacemaker::constraint::base class. The former uses the push_cib()
  function and hence behaves correctly in case multiple nodes try
  to modify the CIB at the same time.

Change-Id: I63da4f48da14534fd76265764569e76300534472
Depends-On: Ib931adaff43dbc16220a90fb509845178d696402
Depends-On: I8d78cc1b14f0e18e034b979a826bf3cdb0878bae
Depends-On: Iba1017c33b1cd4d56a3ee8824d851b38cfdbc2d3
This commit is contained in:
Michele Baldessari 2016-08-29 22:07:55 +02:00 committed by Emilien Macchi
parent 4a77eb9e98
commit f9efeb1582
9 changed files with 250 additions and 59 deletions

View File

@ -1 +1,7 @@
require 'puppet-openstack_spec_helper/rake_tasks'
# We disable the unquoted node name check because puppet-pacemaker node
# properies make use of attributes called 'node' and puppet-lint breaks on
# them: https://github.com/rodjek/puppet-lint/issues/501
# We are not using site.pp with nodes so this is safe.
PuppetLint.configuration.send('disable_unquoted_node_name')

View File

@ -27,11 +27,35 @@
# (String) IP address on which HAProxy is colocated
# Required
#
# [*location_rule*]
# (optional) Add a location constraint before actually enabling
# the resource. Must be a hash like the following example:
# location_rule => {
# resource_discovery => 'exclusive', # optional
# role => 'master|slave', # optional
# score => 0, # optional
# score_attribute => foo, # optional
# # Multiple expressions can be used
# expression => ['opsrole eq controller']
# }
# Defaults to undef
#
# [*pcs_tries*]
# (Optional) The number of times pcs commands should be retried.
# Defaults to 1
#
# [*ensure*]
# (Boolean) Create the all the resources only if true. False won't
# destroy the resource, it will just not create them.
# Default to true
define tripleo::pacemaker::haproxy_with_vip($vip_name, $ip_address, $ensure = true) {
#
define tripleo::pacemaker::haproxy_with_vip(
$vip_name,
$ip_address,
$location_rule = undef,
$pcs_tries = 1,
$ensure = true)
{
if($ensure) {
if is_ipv6_address($ip_address) {
$netmask = '64'
@ -40,25 +64,29 @@ define tripleo::pacemaker::haproxy_with_vip($vip_name, $ip_address, $ensure = tr
}
pacemaker::resource::ip { "${vip_name}_vip":
ip_address => $ip_address,
cidr_netmask => $netmask,
ip_address => $ip_address,
cidr_netmask => $netmask,
location_rule => $location_rule,
tries => $pcs_tries,
}
pacemaker::constraint::base { "${vip_name}_vip-then-haproxy":
constraint_type => 'order',
pacemaker::constraint::order { "${vip_name}_vip-then-haproxy":
first_resource => "ip-${ip_address}",
second_resource => 'haproxy-clone',
first_action => 'start',
second_action => 'start',
constraint_params => 'kind=Optional',
require => [Pacemaker::Resource::Service['haproxy'],
Pacemaker::Resource::Ip["${vip_name}_vip"]],
tries => $pcs_tries,
}
pacemaker::constraint::colocation { "${vip_name}_vip-with-haproxy":
source => "ip-${ip_address}",
target => 'haproxy-clone',
score => 'INFINITY',
require => [Pacemaker::Resource::Service['haproxy'],
Pacemaker::Resource::Ip["${vip_name}_vip"]],
source => "ip-${ip_address}",
target => 'haproxy-clone',
score => 'INFINITY',
tries => $pcs_tries,
}
Pacemaker::Resource::Ip["${vip_name}_vip"] ->
Pacemaker::Resource::Service['haproxy'] ->
Pacemaker::Constraint::Order["${vip_name}_vip-then-haproxy"] ->
Pacemaker::Constraint::Colocation["${vip_name}_vip-with-haproxy"]
}
}

View File

@ -27,9 +27,14 @@
# for more details.
# Defaults to hiera('step')
#
# [*pcs_tries*]
# (Optional) The number of times pcs commands should be retried.
# Defaults to hiera('pcs_tries', 20)
#
class tripleo::profile::pacemaker::cinder::backup (
$bootstrap_node = hiera('cinder_backup_short_bootstrap_node_name'),
$step = hiera('step'),
$pcs_tries = hiera('pcs_tries', 20),
) {
Service <| tag == 'cinder::backup' |> {
@ -47,6 +52,15 @@ class tripleo::profile::pacemaker::cinder::backup (
include ::tripleo::profile::base::cinder::backup
if $step >= 2 {
pacemaker::property { 'cinder-backup-role-node-property':
property => 'cinder-backup-role',
value => true,
tries => $pcs_tries,
node => $::hostname,
}
}
if $step >= 3 and $pacemaker_master and hiera('stack_action') == 'UPDATE' {
Cinder_config<||>
~>
@ -55,7 +69,13 @@ class tripleo::profile::pacemaker::cinder::backup (
if $step >= 5 and $pacemaker_master {
pacemaker::resource::service { $::cinder::params::backup_service :
op_params => 'start timeout=200s stop timeout=200s',
op_params => 'start timeout=200s stop timeout=200s',
tries => $pcs_tries,
location_rule => {
resource_discovery => 'exclusive',
score => 0,
expression => ['cinder-backup-role eq true'],
}
}
}

View File

@ -27,9 +27,14 @@
# for more details.
# Defaults to hiera('step')
#
# [*pcs_tries*]
# (Optional) The number of times pcs commands should be retried.
# Defaults to hiera('pcs_tries', 20)
#
class tripleo::profile::pacemaker::cinder::volume (
$bootstrap_node = hiera('cinder_volume_short_bootstrap_node_name'),
$step = hiera('step'),
$pcs_tries = hiera('pcs_tries', 20),
) {
Service <| tag == 'cinder::volume' |> {
hasrestart => true,
@ -46,6 +51,15 @@ class tripleo::profile::pacemaker::cinder::volume (
include ::tripleo::profile::base::cinder::volume
if $step >= 2 {
pacemaker::property { 'cinder-volume-role-node-property':
property => 'cinder-volume-role',
value => true,
tries => $pcs_tries,
node => $::hostname,
}
}
if $step >= 3 and $pacemaker_master and hiera('stack_action') == 'UPDATE' {
Cinder_api_paste_ini<||> ~> Tripleo::Pacemaker::Resource_restart_flag["${::cinder::params::volume_service}"]
Cinder_config<||> ~> Tripleo::Pacemaker::Resource_restart_flag["${::cinder::params::volume_service}"]
@ -54,7 +68,13 @@ class tripleo::profile::pacemaker::cinder::volume (
if $step >= 5 and $pacemaker_master {
pacemaker::resource::service { $::cinder::params::volume_service :
op_params => 'start timeout=200s stop timeout=200s',
op_params => 'start timeout=200s stop timeout=200s',
tries => $pcs_tries,
location_rule => {
resource_discovery => 'exclusive',
score => 0,
expression => ['cinder-volume-role eq true'],
}
}
}

View File

@ -36,11 +36,16 @@
# for more details.
# Defaults to hiera('step')
#
# [*pcs_tries*]
# (Optional) The number of times pcs commands should be retried.
# Defaults to hiera('pcs_tries', 20)
#
class tripleo::profile::pacemaker::database::mysql (
$bootstrap_node = hiera('mysql_short_bootstrap_node_name'),
$bind_address = $::hostname,
$gmcast_listen_addr = hiera('mysql_bind_host'),
$step = hiera('step'),
$pcs_tries = hiera('pcs_tries', 20),
) {
if $::hostname == downcase($bootstrap_node) {
$pacemaker_master = true
@ -118,6 +123,12 @@ class tripleo::profile::pacemaker::database::mysql (
}
if $step >= 2 {
pacemaker::property { 'galera-role-node-property':
property => 'galera-role',
value => true,
tries => $pcs_tries,
node => $::hostname,
}
if $pacemaker_master {
pacemaker::resource::ocf { 'galera' :
ocf_agent_name => 'heartbeat:galera',
@ -125,7 +136,14 @@ class tripleo::profile::pacemaker::database::mysql (
master_params => '',
meta_params => "master-max=${galera_nodes_count} ordered=true",
resource_params => "additional_parameters='--open-files-limit=16384' enable_creation=true wsrep_cluster_address='gcomm://${galera_nodes}'",
require => Class['::mysql::server'],
tries => $pcs_tries,
location_rule => {
resource_discovery => 'exclusive',
score => 0,
expression => ['galera-role eq true'],
},
require => [Class['::mysql::server'],
Pacemaker::Property['galera-role-node-property']],
before => Exec['galera-ready'],
}
exec { 'galera-ready' :

View File

@ -36,11 +36,16 @@
# for when redis is managed by pacemaker. Defaults to hiera('redis_file_limit')
# or 10240 (default in redis systemd limits)
#
# [*pcs_tries*]
# (Optional) The number of times pcs commands should be retried.
# Defaults to hiera('pcs_tries', 20)
#
class tripleo::profile::pacemaker::database::redis (
$bootstrap_node = hiera('redis_short_bootstrap_node_name'),
$enable_load_balancer = hiera('enable_load_balancer', true),
$step = hiera('step'),
$redis_file_limit = hiera('redis_file_limit', 10240),
$pcs_tries = hiera('pcs_tries', 20),
) {
if $::hostname == downcase($bootstrap_node) {
$pacemaker_master = true
@ -71,14 +76,29 @@ class tripleo::profile::pacemaker::database::redis (
}
}
if $step >= 2 and $pacemaker_master {
pacemaker::resource::ocf { 'redis':
ocf_agent_name => 'heartbeat:redis',
master_params => '',
meta_params => 'notify=true ordered=true interleave=true',
resource_params => 'wait_last_known_master=true',
op_params => 'start timeout=200s stop timeout=200s',
require => Class['::redis'],
if $step >= 2 {
pacemaker::property { 'redis-role-node-property':
property => 'redis-role',
value => true,
tries => $pcs_tries,
node => $::hostname,
}
if $pacemaker_master {
pacemaker::resource::ocf { 'redis':
ocf_agent_name => 'heartbeat:redis',
master_params => '',
meta_params => 'notify=true ordered=true interleave=true',
resource_params => 'wait_last_known_master=true',
op_params => 'start timeout=200s stop timeout=200s',
tries => $pcs_tries,
location_rule => {
resource_discovery => 'exclusive',
score => 0,
expression => ['redis-role eq true'],
},
require => [Class['::redis'],
Pacemaker::Property['redis-role-node-property']],
}
}
}
}

View File

@ -31,10 +31,15 @@
# for more details.
# Defaults to hiera('step')
#
# [*pcs_tries*]
# (Optional) The number of times pcs commands should be retried.
# Defaults to hiera('pcs_tries', 20)
#
class tripleo::profile::pacemaker::haproxy (
$bootstrap_node = hiera('haproxy_short_bootstrap_node_name'),
$enable_load_balancer = hiera('enable_load_balancer', true),
$step = hiera('step'),
$pcs_tries = hiera('pcs_tries', 20),
) {
include ::tripleo::profile::base::haproxy
@ -50,56 +55,90 @@ class tripleo::profile::pacemaker::haproxy (
}
}
if $step >= 2 and $pacemaker_master and $enable_load_balancer {
if $step >= 2 and $enable_load_balancer {
pacemaker::property { 'haproxy-role-node-property':
property => 'haproxy-role',
value => true,
tries => $pcs_tries,
node => $::hostname,
}
if $pacemaker_master {
$haproxy_location_rule = {
resource_discovery => 'exclusive',
score => 0,
expression => ['haproxy-role eq true'],
}
# FIXME: we should not have to access tripleo::haproxy class
# parameters here to configure pacemaker VIPs. The configuration
# of pacemaker VIPs could move into puppet-tripleo or we should
# make use of less specific hiera parameters here for the settings.
pacemaker::resource::service { 'haproxy':
op_params => 'start timeout=200s stop timeout=200s',
clone_params => true,
op_params => 'start timeout=200s stop timeout=200s',
clone_params => true,
location_rule => $haproxy_location_rule,
tries => $pcs_tries,
require => Pacemaker::Property['haproxy-role-node-property'],
}
$control_vip = hiera('controller_virtual_ip')
tripleo::pacemaker::haproxy_with_vip { 'haproxy_and_control_vip':
vip_name => 'control',
ip_address => $control_vip,
vip_name => 'control',
ip_address => $control_vip,
location_rule => $haproxy_location_rule,
pcs_tries => $pcs_tries,
require => Pacemaker::Property['haproxy-role-node-property'],
}
$public_vip = hiera('public_virtual_ip')
tripleo::pacemaker::haproxy_with_vip { 'haproxy_and_public_vip':
ensure => $public_vip and $public_vip != $control_vip,
vip_name => 'public',
ip_address => $public_vip,
ensure => $public_vip and $public_vip != $control_vip,
vip_name => 'public',
ip_address => $public_vip,
location_rule => $haproxy_location_rule,
pcs_tries => $pcs_tries,
require => Pacemaker::Property['haproxy-role-node-property'],
}
$redis_vip = hiera('redis_vip')
tripleo::pacemaker::haproxy_with_vip { 'haproxy_and_redis_vip':
ensure => $redis_vip and $redis_vip != $control_vip,
vip_name => 'redis',
ip_address => $redis_vip,
ensure => $redis_vip and $redis_vip != $control_vip,
vip_name => 'redis',
ip_address => $redis_vip,
location_rule => $haproxy_location_rule,
pcs_tries => $pcs_tries,
require => Pacemaker::Property['haproxy-role-node-property'],
}
$internal_api_vip = hiera('internal_api_virtual_ip')
tripleo::pacemaker::haproxy_with_vip { 'haproxy_and_internal_api_vip':
ensure => $internal_api_vip and $internal_api_vip != $control_vip,
vip_name => 'internal_api',
ip_address => $internal_api_vip,
ensure => $internal_api_vip and $internal_api_vip != $control_vip,
vip_name => 'internal_api',
ip_address => $internal_api_vip,
location_rule => $haproxy_location_rule,
pcs_tries => $pcs_tries,
require => Pacemaker::Property['haproxy-role-node-property'],
}
$storage_vip = hiera('storage_virtual_ip')
tripleo::pacemaker::haproxy_with_vip { 'haproxy_and_storage_vip':
ensure => $storage_vip and $storage_vip != $control_vip,
vip_name => 'storage',
ip_address => $storage_vip,
ensure => $storage_vip and $storage_vip != $control_vip,
vip_name => 'storage',
ip_address => $storage_vip,
location_rule => $haproxy_location_rule,
pcs_tries => $pcs_tries,
require => Pacemaker::Property['haproxy-role-node-property'],
}
$storage_mgmt_vip = hiera('storage_mgmt_virtual_ip')
tripleo::pacemaker::haproxy_with_vip { 'haproxy_and_storage_mgmt_vip':
ensure => $storage_mgmt_vip and $storage_mgmt_vip != $control_vip,
vip_name => 'storage_mgmt',
ip_address => $storage_mgmt_vip,
ensure => $storage_mgmt_vip and $storage_mgmt_vip != $control_vip,
vip_name => 'storage_mgmt',
ip_address => $storage_mgmt_vip,
location_rule => $haproxy_location_rule,
pcs_tries => $pcs_tries,
require => Pacemaker::Property['haproxy-role-node-property'],
}
}
}
}

View File

@ -45,6 +45,10 @@
# for more details.
# Defaults to hiera('step')
#
# [*pcs_tries*]
# (Optional) The number of times pcs commands should be retried.
# Defaults to hiera('pcs_tries', 20)
#
class tripleo::profile::pacemaker::manila (
$backend_generic_enabled = hiera('manila_backend_generic_enabled', false),
$backend_netapp_enabled = hiera('manila_backend_netapp_enabled', false),
@ -52,6 +56,7 @@ class tripleo::profile::pacemaker::manila (
$ceph_mds_enabled = hiera('ceph_mds_enabled', false),
$bootstrap_node = hiera('manila_share_short_bootstrap_node_name'),
$step = hiera('step'),
$pcs_tries = hiera('pcs_tries', 20),
) {
if $::hostname == downcase($bootstrap_node) {
$pacemaker_master = true
@ -70,6 +75,15 @@ class tripleo::profile::pacemaker::manila (
include ::tripleo::profile::base::manila::share
if $step >= 2 {
pacemaker::property { 'manila-share-role-node-property':
property => 'manila-share-role',
value => true,
tries => $pcs_tries,
node => $::hostname,
}
}
if $step >= 4 {
# manila generic:
if $backend_generic_enabled {
@ -185,7 +199,13 @@ allow command \"auth get\", allow command \"auth get-or-create\"',
# only manila-share is pacemaker managed, and in a/p
pacemaker::resource::service { $::manila::params::share_service :
op_params => 'start timeout=200s stop timeout=200s',
op_params => 'start timeout=200s stop timeout=200s',
tries => $pcs_tries,
location_rule => {
resource_discovery => 'exclusive',
score => 0,
expression => ['manila-share-role eq true'],
},
}
}

View File

@ -41,11 +41,16 @@
# for more details.
# Defaults to hiera('step')
#
# [*pcs_tries*]
# (Optional) The number of times pcs commands should be retried.
# Defaults to hiera('pcs_tries', 20)
#
class tripleo::profile::pacemaker::rabbitmq (
$bootstrap_node = hiera('rabbitmq_short_bootstrap_node_name'),
$erlang_cookie = hiera('rabbitmq::erlang_cookie'),
$user_ha_queues = hiera('rabbitmq::nr_ha_queues', 0),
$rabbit_nodes = hiera('rabbitmq_node_names'),
$pcs_tries = hiera('pcs_tries', 20),
$step = hiera('step'),
) {
if $::hostname == downcase($bootstrap_node) {
@ -72,22 +77,37 @@ class tripleo::profile::pacemaker::rabbitmq (
}
}
if $step >= 2 and $pacemaker_master {
include ::stdlib
# The default nr of ha queues is ceiling(N/2)
if $user_ha_queues == 0 {
$nr_rabbit_nodes = size($rabbit_nodes)
$nr_ha_queues = $nr_rabbit_nodes / 2 + ($nr_rabbit_nodes % 2)
} else {
$nr_ha_queues = $user_ha_queues
if $step >= 2 {
pacemaker::property { 'rabbitmq-role-node-property':
property => 'rabbitmq-role',
value => true,
tries => $pcs_tries,
node => $::hostname,
}
pacemaker::resource::ocf { 'rabbitmq':
ocf_agent_name => 'heartbeat:rabbitmq-cluster',
resource_params => "set_policy='ha-all ^(?!amq\\.).* {\"ha-mode\":\"exactly\",\"ha-params\":${nr_ha_queues}}'",
clone_params => 'ordered=true interleave=true',
meta_params => 'notify=true',
op_params => 'start timeout=200s stop timeout=200s',
require => Class['::rabbitmq'],
if $pacemaker_master {
include ::stdlib
# The default nr of ha queues is ceiling(N/2)
if $user_ha_queues == 0 {
$nr_rabbit_nodes = size($rabbit_nodes)
$nr_ha_queues = $nr_rabbit_nodes / 2 + ($nr_rabbit_nodes % 2)
} else {
$nr_ha_queues = $user_ha_queues
}
pacemaker::resource::ocf { 'rabbitmq':
ocf_agent_name => 'heartbeat:rabbitmq-cluster',
resource_params => "set_policy='ha-all ^(?!amq\\.).* {\"ha-mode\":\"exactly\",\"ha-params\":${nr_ha_queues}}'",
clone_params => 'ordered=true interleave=true',
meta_params => 'notify=true',
op_params => 'start timeout=200s stop timeout=200s',
tries => $pcs_tries,
location_rule => {
resource_discovery => 'exclusive',
score => 0,
expression => ['rabbitmq-role eq true'],
},
require => [Class['::rabbitmq'],
Pacemaker::Property['rabbitmq-role-node-property']],
}
}
}
}