Files
puppet-pacemaker/lib/pacemaker/wait.rb
Dmitry Ilyin c1c2031c84 Fixes for race condition when adding primitives without OCF present
* Change the servie "status" to report "stopped" if the
  primitive has failures on the node.
* Enable "status" failure check.
* Support error detection for a missing OCF monitor operations.
* Add operations status debug method
* Add forgotten cib_reset to the wait_for_online

Change-Id: I34fbf8b4a7d2420fb568719f473bc9b40063cc82
2017-02-23 17:45:07 +00:00

239 lines
8.7 KiB
Ruby

module Pacemaker
# functions that can wait for something repeatedly
# polling the system status until the condition is met
module Wait
# retry the given command until it runs without errors
# or for RETRY_COUNT times with RETRY_STEP sec step
# print cluster status report on fail
# @param options [Hash]
def retry_block(options = {})
options = pacemaker_options.merge options
options[:retry_count].times do
begin
out = Timeout.timeout(options[:retry_timeout]) { yield }
if options[:retry_false_is_failure]
return out if out
else
return out
end
rescue => e
debug "Execution failure: #{e.message}"
end
sleep options[:retry_step]
end
raise "Execution timeout after #{options[:retry_count] * options[:retry_step]} seconds!" if options[:retry_fail_on_timeout]
end
# wait for pacemaker to become online
# @param comment [String] log tag comment to trace calls
def wait_for_online(comment = nil)
message = "Waiting #{max_wait_time} seconds for Pacemaker to become online"
message += " (#{comment})" if comment
debug message
retry_block do
cib_reset 'wait_for_online'
online?
end
debug 'Pacemaker is online'
end
# wait until a primitive has known status
# @param primitive [String] primitive name
# @param node [String] on this node if given
def wait_for_status(primitive, node = nil)
message = "Waiting #{max_wait_time} seconds for a known status of '#{primitive}'"
message += " on node '#{node}'" if node
debug message
retry_block do
cib_reset 'wait_for_status'
!primitive_status(primitive).nil?
end
message = "Primitive '#{primitive}' has status '#{primitive_status primitive}'"
message += " on node '#{node}'" if node
debug message
end
# wait for primitive to start
# if node is given then start on this node
# @param primitive [String] primitive id
# @param node [String] on this node if given
def wait_for_start(primitive, node = nil)
message = "Waiting #{max_wait_time} seconds for the service '#{primitive}' to start"
message += " on node '#{node}'" if node
debug message
retry_block do
cib_reset 'wait_for_start'
primitive_is_running? primitive, node
end
message = "Service '#{primitive}' have started"
message += " on node '#{node}'" if node
debug message
end
# wait for primitive to start as a master
# if node is given then start as a master on this node
# @param primitive [String] primitive id
# @param node [String] on this node if given
def wait_for_master(primitive, node = nil)
message = "Waiting #{max_wait_time} seconds for the service '#{primitive}' to start master"
message += " on node '#{node}'" if node
debug message
retry_block do
cib_reset 'wait_for_master'
primitive_has_master_running? primitive, node
end
message = "Service '#{primitive}' have started master"
message += " on node '#{node}'" if node
debug message
end
# wait for primitive to stop
# if node is given then start on this node
# @param primitive [String] primitive id
# @param node [String] on this node if given
def wait_for_stop(primitive, node = nil)
message = "Waiting #{max_wait_time} seconds for the service '#{primitive}' to stop"
message += " on node '#{node}'" if node
debug message
retry_block do
cib_reset 'wait_for_stop'
result = primitive_is_running? primitive, node
result.is_a? FalseClass
end
message = "Service '#{primitive}' was stopped"
message += " on node '#{node}'" if node
debug message
end
# add a new primitive to CIB
# and wait for it to be actually created
# @param xml [String, REXML::Element] XML block to add
# @param primitive [String] the id of the new primitive
# @param scope [String] XML root scope
def wait_for_primitive_create(xml, primitive, scope = 'resources')
message = "Waiting #{max_wait_time} seconds for the primitive '#{primitive}' to be created"
debug message
retry_block do
if pacemaker_options[:cibadmin_idempotency_checks]
cib_reset 'wait_for_primitive_create'
break true if primitive_exists? primitive
end
cibadmin_create xml, scope
end
message = "Primitive '#{primitive}' was created"
debug message
end
# remove a primitive from CIB
# and wait for it to be actually removed
# @param xml [String, REXML::Element] XML block to remove
# @param primitive [String] the id of the removed primitive
# @param scope [String] XML root scope
def wait_for_primitive_remove(xml, primitive, scope = 'resources')
message = "Waiting #{max_wait_time} seconds for the primitive '#{primitive}' to be removed"
debug message
retry_block do
if pacemaker_options[:cibadmin_idempotency_checks]
cib_reset 'wait_for_primitive_remove'
break true unless primitive_exists? primitive
end
cibadmin_delete xml, scope
end
message = "Primitive '#{primitive}' was removed"
debug message
end
# update a primitive in CIB
# and wait for it to be actually updated
# @param xml [String, REXML::Element] XML block to update
# @param primitive [String] the id of the updated primitive
# @param scope [String] XML root scope
def wait_for_primitive_update(xml, primitive, scope = 'resources')
message = "Waiting #{max_wait_time} seconds for the primitive '#{primitive}' to be updated"
debug message
retry_block do
# replace action is already idempotent
cibadmin_replace xml, scope
end
message = "Primitive '#{primitive}' was updated"
debug message
end
# add a new constraint to CIB
# and wait for it to be actually created
# @param xml [String, REXML::Element] XML block to add
# @param constraint [String] the id of the new constraint
# @param scope [String] XML root scope
def wait_for_constraint_create(xml, constraint, scope = 'constraints')
message = "Waiting #{max_wait_time} seconds for the constraint '#{constraint}' to be created"
debug message
retry_block do
if pacemaker_options[:cibadmin_idempotency_checks]
cib_reset 'wait_for_constraint_create'
break true if constraint_exists? constraint
end
cibadmin_create xml, scope
end
message = "Constraint '#{constraint}' was created"
debug message
end
# remove a constraint from CIB
# and wait for it to be actually removed
# @param xml [String, REXML::Element] XML block to remove
# @param constraint [String] the id of the removed constraint
# @param scope [String] XML root scope
def wait_for_constraint_remove(xml, constraint, scope = 'constraints')
message = "Waiting #{max_wait_time} seconds for the constraint '#{constraint}' to be removed"
debug message
retry_block do
if pacemaker_options[:cibadmin_idempotency_checks]
cib_reset 'wait_for_constraint_remove'
break true unless constraint_exists? constraint
end
cibadmin_delete xml, scope
end
message = "Constraint '#{constraint}' was removed"
debug message
end
# update a constraint in CIB
# and wait for it to be actually updated
# @param xml [String, REXML::Element] XML block to update
# @param constraint [String] the id of the updated constraint
# @param scope [String] XML root scope
def wait_for_constraint_update(xml, constraint, scope = 'constraints')
message = "Waiting #{max_wait_time} seconds for the constraint '#{constraint}' to be updated"
debug message
retry_block do
# replace action is already idempotent
cibadmin_replace xml, scope
end
message = "Constraint '#{constraint}' was updated"
debug message
end
# check if pacemaker is online and we can work with it
# * pacemaker is online if cib can be downloaded
# * dc_version attribute can be obtained
# * DC have been designated
# Times out a stuck command calls and catches failed command calls
# @return [TrueClass,FalseClass]
def online?
Timeout.timeout(pacemaker_options[:retry_timeout]) do
return false unless dc_version
return false unless dc
return false unless cib_section_node_state
true
end
rescue Puppet::ExecutionFailure => e
debug "Cluster is offline: #{e.message}"
false
rescue Timeout::Error
debug 'Online check timeout!'
false
end
end
end