HA: ensure scaling up galera does not cause promotion errors
During scale up, two galera resources are being updated in the
pacemaker cluster. Force a specific ordering in puppet to make
sure the galera resource agent always picks up the up-to-date
config when it starts new replicas.
Closes-Bug: #1892530
Change-Id: Id40ac8c10fd0348ce4fd99ce319dab933312acfa
(cherry picked from commit 16a6ba465d
)
This commit is contained in:
parent
ffe998180d
commit
e75842e53e
23
lib/puppet/functions/pacemaker_bundle_replicas.rb
Normal file
23
lib/puppet/functions/pacemaker_bundle_replicas.rb
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Custom function to extract the current number of replicas for a pacemaker
|
||||||
|
# resource, as defined in the pacemaker cluster.
|
||||||
|
# Input is the name of a pacemaker bundle resource
|
||||||
|
# Output is the number of replicas for that resource or 0 if not found
|
||||||
|
Puppet::Functions.create_function(:'pacemaker_bundle_replicas') do
|
||||||
|
dispatch :pacemaker_bundle_replicas do
|
||||||
|
param 'String', :bundle
|
||||||
|
return_type 'Integer'
|
||||||
|
end
|
||||||
|
|
||||||
|
def pacemaker_bundle_replicas(bundle)
|
||||||
|
# the name of the node holding the replicas attribute varies based on the
|
||||||
|
# container engine used (podman, docker...), so match via attributes instead
|
||||||
|
replicas = `cibadmin -Q | xmllint --xpath "string(//bundle[@id='#{bundle}']/*[boolean(@image) and boolean(@run-command)]/@replicas)" -`
|
||||||
|
|
||||||
|
# post-condition: 0 in case the bundle does not exist or an error occurred
|
||||||
|
if $?.success? && !replicas.empty?
|
||||||
|
return Integer(replicas)
|
||||||
|
else
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
@ -484,6 +484,7 @@ MYSQL_HOST=localhost\n",
|
|||||||
storage_maps => merge($storage_maps, $storage_maps_tls),
|
storage_maps => merge($storage_maps, $storage_maps_tls),
|
||||||
container_backend => $container_backend,
|
container_backend => $container_backend,
|
||||||
tries => $pcs_tries,
|
tries => $pcs_tries,
|
||||||
|
before => Exec['galera-ready'],
|
||||||
}
|
}
|
||||||
|
|
||||||
pacemaker::resource::ocf { 'galera':
|
pacemaker::resource::ocf { 'galera':
|
||||||
@ -499,12 +500,26 @@ MYSQL_HOST=localhost\n",
|
|||||||
expression => ['galera-role eq true'],
|
expression => ['galera-role eq true'],
|
||||||
},
|
},
|
||||||
bundle => 'galera-bundle',
|
bundle => 'galera-bundle',
|
||||||
require => [Class['::mysql::server'],
|
require => [Class['::mysql::server']],
|
||||||
Pacemaker::Resource::Bundle['galera-bundle']],
|
|
||||||
before => Exec['galera-ready'],
|
before => Exec['galera-ready'],
|
||||||
force => $force_ocf,
|
force => $force_ocf,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Resource relation: we normally want the bundle resource to
|
||||||
|
# be run before the OCF one, as the latter depends on the former
|
||||||
|
# at creation time.
|
||||||
|
# However during scale up, both resources change, and the bundle
|
||||||
|
# one shouldn't be updated prior to the OCF one, otherwise
|
||||||
|
# pacemaker could spawn additional replicas before the necessary
|
||||||
|
# info is updated in the OCF resource, which would confuse the
|
||||||
|
# galera resource agent and cause spurious errors.
|
||||||
|
$replicas=pacemaker_bundle_replicas('galera-bundle')
|
||||||
|
if ($replicas > 0) and ($galera_nodes_count > $replicas) {
|
||||||
|
Pacemaker::Resource::Ocf['galera'] -> Pacemaker::Resource::Bundle['galera-bundle']
|
||||||
|
} else {
|
||||||
|
Pacemaker::Resource::Bundle['galera-bundle'] -> Pacemaker::Resource::Ocf['galera']
|
||||||
|
}
|
||||||
|
|
||||||
exec { 'galera-ready' :
|
exec { 'galera-ready' :
|
||||||
command => '/usr/bin/clustercheck >/dev/null',
|
command => '/usr/bin/clustercheck >/dev/null',
|
||||||
timeout => 30,
|
timeout => 30,
|
||||||
|
Loading…
Reference in New Issue
Block a user