From 6134af0c6ec651e4feef45acdc5a291beab405cd Mon Sep 17 00:00:00 2001 From: Dmitry Bilunov Date: Tue, 15 Dec 2015 12:29:07 +0300 Subject: [PATCH] Fix detached keystone node epoch mismatch Pacemaker maintains an internal database, which is used for configuration storage. Each update of this database increases a counter, called "epoch", which should have the same value cluster-wide. If an update operation comes to a previously detached node, a conflict will occur. Pacemaker does not allow updating this database on a node, which epoch value is lower than the epoch value of a cluster leader. We should wait the epoch counter to come into sync by perodically retrying the update command. Closes-Bug: 1494314 Change-Id: I1f242bcd90264ec45da2aaa6bc030f244511761b --- .../lib/puppet/provider/cs_property/crm.rb | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/deployment/puppet/corosync/lib/puppet/provider/cs_property/crm.rb b/deployment/puppet/corosync/lib/puppet/provider/cs_property/crm.rb index ae747d603c..987c1c2bc7 100644 --- a/deployment/puppet/corosync/lib/puppet/provider/cs_property/crm.rb +++ b/deployment/puppet/corosync/lib/puppet/provider/cs_property/crm.rb @@ -10,6 +10,9 @@ Puppet::Type.type(:cs_property).provide(:crm, :parent => Puppet::Provider::Crmsh commands :crm => 'crm' commands :cibadmin => 'cibadmin' + RETRY_COUNT = 100 + RETRY_STEP = 6 + def self.instances block_until_ready @@ -71,6 +74,25 @@ Puppet::Type.type(:cs_property).provide(:crm, :parent => Puppet::Provider::Crmsh @property_hash[:value] = should end + # retry the given command until it runs without errors + # or for RETRY_COUNT times with RETRY_STEP sec step + # print cluster status report on fail + # returns normal command output on success + # @return [String] + def retry_command + (0..RETRY_COUNT).each do + begin + out = yield + rescue Puppet::ExecutionFailure => e + Puppet.debug "Command failed: #{e.message}" + sleep RETRY_STEP + else + return out + end + end + fail "Execution timeout after #{RETRY_COUNT * RETRY_STEP} seconds!" + end + # Flush is triggered on anything that has been detected as being # modified in the property_hash. It generates a temporary file with # the updates that need to be made. The temporary file is then used @@ -82,7 +104,9 @@ Puppet::Type.type(:cs_property).provide(:crm, :parent => Puppet::Provider::Crmsh # clear this on properties, in case it's set from a previous # run of a different corosync type ENV['CIB_shadow'] = nil - crm('configure', 'property', '$id="cib-bootstrap-options"', "#{@property_hash[:name]}=#{@property_hash[:value]}") + retry_command { + crm('configure', 'property', '$id="cib-bootstrap-options"', "#{@property_hash[:name]}=#{@property_hash[:value]}") + } end end end