Fixes for new pacemaker versions

* Improve missing cib sections filters
* Change is_online? to use dc-version to determine if CIB is ready
* Cleanup primitives by their full names
* Change node id references to node name as 'id' does not represent
  node name for new pacemaker versions
  (related change: Id84994df3bf2a990d2925b595aa448746db45383
* Switch location add implementation from pcs to cibadmin --patch
  as this provder claims to be crmsh/pcs agnostic and
  to solve problems with cib changes not being synced to other nodes
  then has been applied concurrently.
* Add missing debug info
* Update rspecs

Author: Dmitry Ilyin <dilyin@mirantis.com>
Closes-bug: #1416378

Change-Id: I2185b17286e46086674abe985bfc48260e7c397b
Signed-off-by: Bogdan Dobrelya <bdobrelia@mirantis.com>
This commit is contained in:
Bogdan Dobrelya 2015-01-30 12:43:49 +01:00
parent 537ed9e375
commit c08013d004
5 changed files with 79 additions and 56 deletions

View File

@ -38,28 +38,23 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
@nodes_structure = nil
end
# get status CIB section
# @return [REXML::Element] at /cib/status
def cib_section_status
REXML::XPath.match cib, '/cib/status'
end
# get lrm_rsc_ops section from lrm_resource section CIB section
# @param lrm_resource [REXML::Element]
# at /cib/status/node_state/lrm[@id="node-name"]/lrm_resources/lrm_resource[@id="resource-name"]/lrm_rsc_op
# @return [REXML::Element]
def cib_section_lrm_rsc_ops(lrm_resource)
return unless lrm_resource.is_a? REXML::Element
REXML::XPath.match lrm_resource, 'lrm_rsc_op'
end
# get node_state CIB section
# @return [REXML::Element] at /cib/status/node_state
def cib_section_nodes_state
REXML::XPath.match cib_section_status, 'node_state'
REXML::XPath.match cib, '//node_state'
end
# get primitives CIB section
# @return [REXML::Element] at /cib/configuration/resources/primitive
# @return [Array<REXML::Element>] at /cib/configuration/resources/primitive
def cib_section_primitives
REXML::XPath.match cib, '//primitive'
end
@ -69,6 +64,7 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
# at /cib/status/node_state/lrm[@id="node-name"]/lrm_resources/lrm_resource
# @return [REXML::Element]
def cib_section_lrm_resources(lrm)
return unless lrm.is_a? REXML::Element
REXML::XPath.match lrm, 'lrm_resources/lrm_resource'
end
@ -76,8 +72,9 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
# @param op [Hash<String => String>]
# @return ['start','stop','master',nil]
def operation_status(op)
# skip incomplete ops
return unless op['op-status'] == '0'
# skip pending ops
# we should wait for status for become known
return if op['op-status'] == '-1'
if op['operation'] == 'monitor'
# for monitor operation status is determined by its rc-code
@ -184,6 +181,7 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
id = resource['id']
next unless id
lrm_rsc_ops = cib_section_lrm_rsc_ops lrm_resource
next unless lrm_rsc_ops
ops = decode_lrm_rsc_ops lrm_rsc_ops
resource.store 'ops', ops
resource.store 'status', determine_primitive_status(ops)
@ -213,13 +211,15 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
@nodes_structure = {}
cib_section_nodes_state.each do |node_state|
node = attributes_to_hash node_state
id = node['id']
next unless id
node_name = node['uname']
next unless node_name
lrm = node_state.elements['lrm']
next unless lrm
lrm_resources = cib_section_lrm_resources lrm
next unless lrm_resources
resources = decode_lrm_resources lrm_resources
node.store 'primitives', resources
@nodes_structure.store id, node
@nodes_structure.store node_name, node
end
@nodes_structure
end
@ -350,9 +350,9 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
# cleanup this primitive
# @param primitive [String]
def cleanup_primitive(primitive, node = '')
def cleanup_primitive(primitive, node = nil)
opts = ['--cleanup', "--resource=#{primitive}"]
opts << "--node=#{node}" if ! node.empty?
opts << "--node=#{node}" if node
retry_command {
crm_resource opts
}
@ -395,9 +395,22 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
# @param node [String] the node's name
# @param score [Numeric,String] score value
def constraint_location_add(primitive, node, score = 100)
id = "#{primitive}_on_#{node}"
id = "#{primitive}-on-#{node}"
xml = <<-EOF
<diff>
<diff-added>
<cib>
<configuration>
<constraints>
<rsc_location id="#{id}" node="#{node}" rsc="#{primitive}" score="#{score}" __crm_diff_marker__="added:top"/>
</constraints>
</configuration>
</cib>
</diff-added>
</diff>
EOF
retry_command {
pcs 'constraint', 'location', 'add', id, primitive, node, score
cibadmin '--patch', '--sync-call', '--xml-text', xml
}
end
@ -600,12 +613,13 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
# @return [TrueClass,FalseClass]
def is_online?
begin
cibadmin '-Q'
dc_version = crm_attribute '-q', '--type', 'crm_config', '--query', '--name', 'dc-version'
return false unless dc_version
return false if dc_version.empty?
return false unless cib_section_nodes_state
true
rescue Puppet::ExecutionFailure
false
else
true
end
end
@ -650,19 +664,19 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
Puppet.debug 'Pacemaker is online'
end
# cleanup a primitive and then wait until
# we can get it's status again because
# cleanup blocks operations sections for a while
# wait until we can get a known status of the primitive
# @param primitive [String] primitive name
def cleanup_with_wait(primitive, node = '')
node_msgpart = node.empty? ? '' : " on node '#{node}'"
Puppet.debug "Cleanup primitive '#{primitive}'#{node_msgpart} and wait until cleanup finishes"
cleanup_primitive(primitive, node)
def wait_for_status(primitive, node = nil)
msg = "Wait for known status of '#{primitive}'"
msg += " on node '#{node}'" if node
Puppet.debug msg
retry_block_until_true do
cib_reset
primitive_status(primitive) != nil
end
Puppet.debug "Primitive '#{primitive}' have been cleaned up#{node_msgpart} and is online again"
msg = "Primitive '#{primitive}' has status '#{primitive_status primitive}'"
msg += " on node '#{node}'" if node
Puppet.debug msg
end
# wait for primitive to start
@ -672,6 +686,7 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
def wait_for_start(primitive, node = nil)
message = "Waiting #{RETRY_COUNT * RETRY_STEP} seconds for service '#{primitive}' to start"
message += " on node '#{node}'" if node
Puppet.debug get_cluster_debug_report
Puppet.debug message
retry_block_until_true do
cib_reset
@ -690,6 +705,7 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
def wait_for_master(primitive, node = nil)
message = "Waiting #{RETRY_COUNT * RETRY_STEP} seconds for service '#{primitive}' to start master"
message += " on node '#{node}'" if node
Puppet.debug get_cluster_debug_report
Puppet.debug message
retry_block_until_true do
cib_reset
@ -708,6 +724,7 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider
def wait_for_stop(primitive, node = nil)
message = "Waiting #{RETRY_COUNT * RETRY_STEP} seconds for service '#{primitive}' to stop"
message += " on node '#{node}'" if node
Puppet.debug get_cluster_debug_report
Puppet.debug message
retry_block_until_true do
cib_reset

View File

@ -8,6 +8,7 @@ Puppet::Type.type(:service).provide :pacemaker, :parent => Puppet::Provider::Pac
commands :uname => 'uname'
commands :pcs => 'pcs'
commands :crm_resource => 'crm_resource'
commands :crm_attribute => 'crm_attribute'
commands :cibadmin => 'cibadmin'
# hostname of the current node
@ -70,6 +71,13 @@ Puppet::Type.type(:service).provide :pacemaker, :parent => Puppet::Provider::Pac
end
end
# cleanup a primitive and
# wait for cleanup to finish
def cleanup
cleanup_primitive full_name, hostname
wait_for_status name
end
# called by Puppet to determine if the service
# is running on the local node
# @return [:running,:stopped]
@ -88,10 +96,11 @@ Puppet::Type.type(:service).provide :pacemaker, :parent => Puppet::Provider::Pac
Puppet.debug "Call 'start' for Pacemaker service '#{name}' on node '#{hostname}'"
enable unless primitive_is_managed? name
disable_basic_service
constraint_location_add name, hostname
constraint_location_add full_name, hostname
cleanup
unban_primitive name, hostname
start_primitive full_name
start_primitive name
cleanup_with_wait(name, hostname) if primitive_has_failures?(name, hostname)
if primitive_is_multistate? name
Puppet.debug "Choose master start for Pacemaker service '#{name}'"
@ -105,8 +114,8 @@ Puppet::Type.type(:service).provide :pacemaker, :parent => Puppet::Provider::Pac
# called by Puppet to stop the service
def stop
Puppet.debug "Call 'stop' for Pacemaker service '#{name}' on node '#{hostname}'"
cleanup
enable unless primitive_is_managed? name
cleanup_with_wait(name, hostname) if primitive_has_failures?(name, hostname)
if primitive_is_complex? name
Puppet.debug "Choose local stop for Pacemaker service '#{name}' on node '#{hostname}'"

View File

@ -179,7 +179,7 @@ describe Puppet::Provider::Pacemaker_common do
context 'constraints control' do
it 'can add location constraint' do
@class.expects(:pcs).with 'constraint', 'location', 'add', 'myprimitive_on_mynode', 'myprimitive', 'mynode', '200'
@class.expects(:cibadmin).returns(true)
@class.constraint_location_add 'myprimitive', 'mynode', '200'
end
@ -199,11 +199,10 @@ describe Puppet::Provider::Pacemaker_common do
@class.wait_for_online
end
it 'cleanups primitive and waits for it to become online again' do
@class.stubs(:cleanup_primitive).with('myprimitive', 'mynode').returns true
it 'waits for status to become known' do
@class.stubs(:cib_reset).returns true
@class.stubs(:primitive_status).returns 'stopped'
@class.cleanup_with_wait 'myprimitive', 'mynode'
@class.wait_for_status 'myprimitive'
end
it 'waits for the service to start' do

View File

@ -23,7 +23,7 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do
@class.stubs(:cib_reset).returns(true)
@class.stubs(:wait_for_online).returns(true)
@class.stubs(:cleanup_with_wait).returns(true)
@class.stubs(:wait_for_status).returns(true)
@class.stubs(:wait_for_start).returns(true)
@class.stubs(:wait_for_stop).returns(true)
@ -42,6 +42,7 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do
@class.stubs(:ban_primitive).returns(true)
@class.stubs(:start_primitive).returns(true)
@class.stubs(:stop_primitive).returns(true)
@class.stubs(:cleanup_primitive).returns(true)
@class.stubs(:enable).returns(true)
@class.stubs(:disable).returns(true)
@ -95,6 +96,7 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do
@class.expects(:enable).once
@class.start
@class.stubs(:primitive_is_managed?).returns(true)
@class.unstub(:enable)
@class.expects(:enable).never
@class.start
end
@ -104,12 +106,9 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do
@class.start
end
it 'should cleanup a primitive only if there are errors' do
it 'should cleanup a primitive' do
@class.stubs(:primitive_has_failures?).returns(true)
@class.expects(:cleanup_with_wait).once
@class.start
@class.stubs(:primitive_has_failures?).returns(false)
@class.expects(:cleanup_with_wait).never
@class.expects(:cleanup_primitive).with(full_name, hostname).once
@class.start
end
@ -118,13 +117,13 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do
@class.start
end
it 'tries to start the service by its name' do
@class.expects(:start_primitive).with(name)
it 'tries to start the service by its full name' do
@class.expects(:start_primitive).with(full_name)
@class.start
end
it 'adds a location constraint for the service by its name' do
@class.expects(:constraint_location_add).with(name, hostname)
it 'adds a location constraint for the service by its full_name' do
@class.expects(:constraint_location_add).with(full_name, hostname)
@class.start
end
@ -154,22 +153,19 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do
end
context '#stop' do
it 'tries to enable service if it is not enabled to work with it' do
it 'tries to disable service if it is not enabled to work with it' do
@class.stubs(:primitive_is_managed?).returns(false)
@class.expects(:enable).once
@class.start
@class.stop
@class.stubs(:primitive_is_managed?).returns(true)
@class.unstub(:enable)
@class.expects(:enable).never
@class.start
@class.stop
end
it 'should cleanup a primitive only if there are errors' do
@class.stubs(:primitive_has_failures?).returns(true)
@class.expects(:cleanup_with_wait).once
@class.start
@class.stubs(:primitive_has_failures?).returns(false)
@class.expects(:cleanup_with_wait).never
@class.start
it 'should cleanup a primitive on stop' do
@class.expects(:cleanup_primitive).with(full_name, hostname).once.once
@class.stop
end
it 'uses Ban to stop the service and waits for it to stop locally if service is complex' do
@ -190,6 +186,8 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do
context '#restart' do
it 'does not stop or start the service if it is not locally running' do
@class.stubs(:primitive_is_running?).with(name, hostname).returns(false)
@class.unstub(:stop)
@class.unstub(:start)
@class.expects(:stop).never
@class.expects(:start).never
@class.restart