HA: ensure scaling up galera does not cause promotion errors
During scale up, two galera resources are being updated in the pacemaker cluster. Force a specific ordering in puppet to make sure the galera resource agent always picks up the up-to-date config when it starts new replicas. Closes-Bug: #1892530 Change-Id: Id40ac8c10fd0348ce4fd99ce319dab933312acfa
This commit is contained in:
parent
6cdd43946e
commit
16a6ba465d
23
lib/puppet/functions/pacemaker_bundle_replicas.rb
Normal file
23
lib/puppet/functions/pacemaker_bundle_replicas.rb
Normal file
@ -0,0 +1,23 @@
|
||||
# Custom function to extract the current number of replicas for a pacemaker
|
||||
# resource, as defined in the pacemaker cluster.
|
||||
# Input is the name of a pacemaker bundle resource
|
||||
# Output is the number of replicas for that resource or 0 if not found
|
||||
Puppet::Functions.create_function(:'pacemaker_bundle_replicas') do
|
||||
dispatch :pacemaker_bundle_replicas do
|
||||
param 'String', :bundle
|
||||
return_type 'Integer'
|
||||
end
|
||||
|
||||
def pacemaker_bundle_replicas(bundle)
|
||||
# the name of the node holding the replicas attribute varies based on the
|
||||
# container engine used (podman, docker...), so match via attributes instead
|
||||
replicas = `cibadmin -Q | xmllint --xpath "string(//bundle[@id='#{bundle}']/*[boolean(@image) and boolean(@run-command)]/@replicas)" -`
|
||||
|
||||
# post-condition: 0 in case the bundle does not exist or an error occurred
|
||||
if $?.success? && !replicas.empty?
|
||||
return Integer(replicas)
|
||||
else
|
||||
return 0
|
||||
end
|
||||
end
|
||||
end
|
@ -484,6 +484,7 @@ MYSQL_HOST=localhost\n",
|
||||
storage_maps => merge($storage_maps, $storage_maps_tls),
|
||||
container_backend => $container_backend,
|
||||
tries => $pcs_tries,
|
||||
before => Exec['galera-ready'],
|
||||
}
|
||||
|
||||
pacemaker::resource::ocf { 'galera':
|
||||
@ -499,12 +500,26 @@ MYSQL_HOST=localhost\n",
|
||||
expression => ['galera-role eq true'],
|
||||
},
|
||||
bundle => 'galera-bundle',
|
||||
require => [Class['::mysql::server'],
|
||||
Pacemaker::Resource::Bundle['galera-bundle']],
|
||||
require => [Class['::mysql::server']],
|
||||
before => Exec['galera-ready'],
|
||||
force => $force_ocf,
|
||||
}
|
||||
|
||||
# Resource relation: we normally want the bundle resource to
|
||||
# be run before the OCF one, as the latter depends on the former
|
||||
# at creation time.
|
||||
# However during scale up, both resources change, and the bundle
|
||||
# one shouldn't be updated prior to the OCF one, otherwise
|
||||
# pacemaker could spawn additional replicas before the necessary
|
||||
# info is updated in the OCF resource, which would confuse the
|
||||
# galera resource agent and cause spurious errors.
|
||||
$replicas=pacemaker_bundle_replicas('galera-bundle')
|
||||
if ($replicas > 0) and ($galera_nodes_count > $replicas) {
|
||||
Pacemaker::Resource::Ocf['galera'] -> Pacemaker::Resource::Bundle['galera-bundle']
|
||||
} else {
|
||||
Pacemaker::Resource::Bundle['galera-bundle'] -> Pacemaker::Resource::Ocf['galera']
|
||||
}
|
||||
|
||||
exec { 'galera-ready' :
|
||||
command => '/usr/bin/clustercheck >/dev/null',
|
||||
timeout => 30,
|
||||
|
Loading…
Reference in New Issue
Block a user