From 43e26fe8fc2f439b3266cd2b4dc670bd788f64a2 Mon Sep 17 00:00:00 2001 From: Vladimir Kuklin Date: Thu, 28 Nov 2013 17:09:26 +0400 Subject: [PATCH] Fix start/cleanup race condition Do service cleanup on the particluar node if it is not installed on the node. --- .../corosync/lib/facter/pacemaker_hostname.rb | 9 ++++++ .../lib/puppet/provider/service/pacemaker.rb | 7 +++++ .../puppet/corosync/manifests/cleanup.pp | 29 +++++++++++-------- 3 files changed, 33 insertions(+), 12 deletions(-) create mode 100644 deployment/puppet/corosync/lib/facter/pacemaker_hostname.rb diff --git a/deployment/puppet/corosync/lib/facter/pacemaker_hostname.rb b/deployment/puppet/corosync/lib/facter/pacemaker_hostname.rb new file mode 100644 index 0000000000..9c7340e4eb --- /dev/null +++ b/deployment/puppet/corosync/lib/facter/pacemaker_hostname.rb @@ -0,0 +1,9 @@ +# Fact: pacemaker_hostname +# +# Purpose: Return name of the node used by Pacemaker +# +Facter.add(:pacemaker_hostname) do + setcode do + rv = Facter::Util::Resolution.exec('uname -n') + end +end diff --git a/deployment/puppet/corosync/lib/puppet/provider/service/pacemaker.rb b/deployment/puppet/corosync/lib/puppet/provider/service/pacemaker.rb index 131c12f5ce..6e6af6c824 100644 --- a/deployment/puppet/corosync/lib/puppet/provider/service/pacemaker.rb +++ b/deployment/puppet/corosync/lib/puppet/provider/service/pacemaker.rb @@ -10,6 +10,7 @@ Puppet::Type.type(:service).provide :pacemaker, :parent => Puppet::Provider::Cor commands :crm => 'crm' commands :cibadmin => 'cibadmin' commands :crm_attribute => 'crm_attribute' + commands :crm_resource => 'crm_resource' desc "Pacemaker service management." @@ -208,6 +209,12 @@ Puppet::Type.type(:service).provide :pacemaker, :parent => Puppet::Provider::Cor last_successful_op = 'start' else last_successful_op = 'stop' + if last_op.attributes['rc-code'].to_i == 5 and node[:uname] == Facter.value(:pacemaker_hostname) + crm_resource('--cleanup','--resource',get_service_name,'--node',Facter.value(:pacemaker_hostname)) + sleep 15 + self.class.get_cib + retry + end end end debug("LAST SUCCESSFUL OP :\n\n #{last_successful_op.inspect}") diff --git a/deployment/puppet/corosync/manifests/cleanup.pp b/deployment/puppet/corosync/manifests/cleanup.pp index 24694262bc..834289c49c 100644 --- a/deployment/puppet/corosync/manifests/cleanup.pp +++ b/deployment/puppet/corosync/manifests/cleanup.pp @@ -14,16 +14,21 @@ # # Copyright 2012 Puppet Labs, LLC. # -define corosync::cleanup () { - Cs_resource <| name == $name |> ~> Exec["crm resource cleanup $name"] +define corosync::cleanup ( + $force = false, + $wait_before = 15 +) { + if $force { + Cs_resource <| name == $name |> ~> Exec["crm resource cleanup $name"] - ##FIXME: we need to create a better way to workaround crm commit <-> cleanup race condition than a simple sleep - #Workaround for hostname bugs with FQDN vs short hostname - exec { "crm resource cleanup $name": - command => "bash -c \"(sleep 5 && crm_resource --resource ${name} --cleanup --node `uname -n`) || :\"", - path => ['/bin', '/usr/bin', '/sbin', '/usr/sbin'], - returns => [0,""], - refreshonly => true, - timeout => 600, - } -} + ##FIXME: we need to create a better way to workaround crm commit <-> cleanup race condition than a simple sleep + #Workaround for hostname bugs with FQDN vs short hostname + exec { "crm resource cleanup $name": + command => "bash -c \"(sleep ${wait_before} && crm_resource --resource ${name} --cleanup --node `uname -n`) || :\"", + path => ['/bin', '/usr/bin', '/sbin', '/usr/sbin'], + returns => [0,""], + refreshonly => true, + timeout => 600, + } + } +}