From c08013d004de5b4de8c750e68c65b03e3da40ba9 Mon Sep 17 00:00:00 2001 From: Bogdan Dobrelya Date: Fri, 30 Jan 2015 12:43:49 +0100 Subject: [PATCH] Fixes for new pacemaker versions * Improve missing cib sections filters * Change is_online? to use dc-version to determine if CIB is ready * Cleanup primitives by their full names * Change node id references to node name as 'id' does not represent node name for new pacemaker versions (related change: Id84994df3bf2a990d2925b595aa448746db45383 * Switch location add implementation from pcs to cibadmin --patch as this provder claims to be crmsh/pcs agnostic and to solve problems with cib changes not being synced to other nodes then has been applied concurrently. * Add missing debug info * Update rspecs Author: Dmitry Ilyin Closes-bug: #1416378 Change-Id: I2185b17286e46086674abe985bfc48260e7c397b Signed-off-by: Bogdan Dobrelya --- lib/puppet/provider/pacemaker_common.rb | 73 ++++++++++++------- lib/puppet/provider/service/pacemaker.rb | 15 +++- spec/spec_helper.rb | 2 +- .../puppet/provider/pacemaker_common_spec.rb | 7 +- .../puppet/provider/service/pacemaker_spec.rb | 38 +++++----- 5 files changed, 79 insertions(+), 56 deletions(-) diff --git a/lib/puppet/provider/pacemaker_common.rb b/lib/puppet/provider/pacemaker_common.rb index e9f5d87..75f4090 100644 --- a/lib/puppet/provider/pacemaker_common.rb +++ b/lib/puppet/provider/pacemaker_common.rb @@ -38,28 +38,23 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider @nodes_structure = nil end - # get status CIB section - # @return [REXML::Element] at /cib/status - def cib_section_status - REXML::XPath.match cib, '/cib/status' - end - # get lrm_rsc_ops section from lrm_resource section CIB section # @param lrm_resource [REXML::Element] # at /cib/status/node_state/lrm[@id="node-name"]/lrm_resources/lrm_resource[@id="resource-name"]/lrm_rsc_op # @return [REXML::Element] def cib_section_lrm_rsc_ops(lrm_resource) + return unless lrm_resource.is_a? REXML::Element REXML::XPath.match lrm_resource, 'lrm_rsc_op' end # get node_state CIB section # @return [REXML::Element] at /cib/status/node_state def cib_section_nodes_state - REXML::XPath.match cib_section_status, 'node_state' + REXML::XPath.match cib, '//node_state' end # get primitives CIB section - # @return [REXML::Element] at /cib/configuration/resources/primitive + # @return [Array] at /cib/configuration/resources/primitive def cib_section_primitives REXML::XPath.match cib, '//primitive' end @@ -69,6 +64,7 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider # at /cib/status/node_state/lrm[@id="node-name"]/lrm_resources/lrm_resource # @return [REXML::Element] def cib_section_lrm_resources(lrm) + return unless lrm.is_a? REXML::Element REXML::XPath.match lrm, 'lrm_resources/lrm_resource' end @@ -76,8 +72,9 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider # @param op [Hash String>] # @return ['start','stop','master',nil] def operation_status(op) - # skip incomplete ops - return unless op['op-status'] == '0' + # skip pending ops + # we should wait for status for become known + return if op['op-status'] == '-1' if op['operation'] == 'monitor' # for monitor operation status is determined by its rc-code @@ -184,6 +181,7 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider id = resource['id'] next unless id lrm_rsc_ops = cib_section_lrm_rsc_ops lrm_resource + next unless lrm_rsc_ops ops = decode_lrm_rsc_ops lrm_rsc_ops resource.store 'ops', ops resource.store 'status', determine_primitive_status(ops) @@ -213,13 +211,15 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider @nodes_structure = {} cib_section_nodes_state.each do |node_state| node = attributes_to_hash node_state - id = node['id'] - next unless id + node_name = node['uname'] + next unless node_name lrm = node_state.elements['lrm'] + next unless lrm lrm_resources = cib_section_lrm_resources lrm + next unless lrm_resources resources = decode_lrm_resources lrm_resources node.store 'primitives', resources - @nodes_structure.store id, node + @nodes_structure.store node_name, node end @nodes_structure end @@ -350,9 +350,9 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider # cleanup this primitive # @param primitive [String] - def cleanup_primitive(primitive, node = '') + def cleanup_primitive(primitive, node = nil) opts = ['--cleanup', "--resource=#{primitive}"] - opts << "--node=#{node}" if ! node.empty? + opts << "--node=#{node}" if node retry_command { crm_resource opts } @@ -395,9 +395,22 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider # @param node [String] the node's name # @param score [Numeric,String] score value def constraint_location_add(primitive, node, score = 100) - id = "#{primitive}_on_#{node}" + id = "#{primitive}-on-#{node}" + xml = <<-EOF + + + + + + + + + + + + EOF retry_command { - pcs 'constraint', 'location', 'add', id, primitive, node, score + cibadmin '--patch', '--sync-call', '--xml-text', xml } end @@ -600,12 +613,13 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider # @return [TrueClass,FalseClass] def is_online? begin - cibadmin '-Q' + dc_version = crm_attribute '-q', '--type', 'crm_config', '--query', '--name', 'dc-version' + return false unless dc_version + return false if dc_version.empty? + return false unless cib_section_nodes_state true rescue Puppet::ExecutionFailure false - else - true end end @@ -650,19 +664,19 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider Puppet.debug 'Pacemaker is online' end - # cleanup a primitive and then wait until - # we can get it's status again because - # cleanup blocks operations sections for a while + # wait until we can get a known status of the primitive # @param primitive [String] primitive name - def cleanup_with_wait(primitive, node = '') - node_msgpart = node.empty? ? '' : " on node '#{node}'" - Puppet.debug "Cleanup primitive '#{primitive}'#{node_msgpart} and wait until cleanup finishes" - cleanup_primitive(primitive, node) + def wait_for_status(primitive, node = nil) + msg = "Wait for known status of '#{primitive}'" + msg += " on node '#{node}'" if node + Puppet.debug msg retry_block_until_true do cib_reset primitive_status(primitive) != nil end - Puppet.debug "Primitive '#{primitive}' have been cleaned up#{node_msgpart} and is online again" + msg = "Primitive '#{primitive}' has status '#{primitive_status primitive}'" + msg += " on node '#{node}'" if node + Puppet.debug msg end # wait for primitive to start @@ -672,6 +686,7 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider def wait_for_start(primitive, node = nil) message = "Waiting #{RETRY_COUNT * RETRY_STEP} seconds for service '#{primitive}' to start" message += " on node '#{node}'" if node + Puppet.debug get_cluster_debug_report Puppet.debug message retry_block_until_true do cib_reset @@ -690,6 +705,7 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider def wait_for_master(primitive, node = nil) message = "Waiting #{RETRY_COUNT * RETRY_STEP} seconds for service '#{primitive}' to start master" message += " on node '#{node}'" if node + Puppet.debug get_cluster_debug_report Puppet.debug message retry_block_until_true do cib_reset @@ -708,6 +724,7 @@ class Puppet::Provider::Pacemaker_common < Puppet::Provider def wait_for_stop(primitive, node = nil) message = "Waiting #{RETRY_COUNT * RETRY_STEP} seconds for service '#{primitive}' to stop" message += " on node '#{node}'" if node + Puppet.debug get_cluster_debug_report Puppet.debug message retry_block_until_true do cib_reset diff --git a/lib/puppet/provider/service/pacemaker.rb b/lib/puppet/provider/service/pacemaker.rb index 3139867..bc8fcb0 100644 --- a/lib/puppet/provider/service/pacemaker.rb +++ b/lib/puppet/provider/service/pacemaker.rb @@ -8,6 +8,7 @@ Puppet::Type.type(:service).provide :pacemaker, :parent => Puppet::Provider::Pac commands :uname => 'uname' commands :pcs => 'pcs' commands :crm_resource => 'crm_resource' + commands :crm_attribute => 'crm_attribute' commands :cibadmin => 'cibadmin' # hostname of the current node @@ -70,6 +71,13 @@ Puppet::Type.type(:service).provide :pacemaker, :parent => Puppet::Provider::Pac end end + # cleanup a primitive and + # wait for cleanup to finish + def cleanup + cleanup_primitive full_name, hostname + wait_for_status name + end + # called by Puppet to determine if the service # is running on the local node # @return [:running,:stopped] @@ -88,10 +96,11 @@ Puppet::Type.type(:service).provide :pacemaker, :parent => Puppet::Provider::Pac Puppet.debug "Call 'start' for Pacemaker service '#{name}' on node '#{hostname}'" enable unless primitive_is_managed? name disable_basic_service - constraint_location_add name, hostname + constraint_location_add full_name, hostname + cleanup unban_primitive name, hostname + start_primitive full_name start_primitive name - cleanup_with_wait(name, hostname) if primitive_has_failures?(name, hostname) if primitive_is_multistate? name Puppet.debug "Choose master start for Pacemaker service '#{name}'" @@ -105,8 +114,8 @@ Puppet::Type.type(:service).provide :pacemaker, :parent => Puppet::Provider::Pac # called by Puppet to stop the service def stop Puppet.debug "Call 'stop' for Pacemaker service '#{name}' on node '#{hostname}'" + cleanup enable unless primitive_is_managed? name - cleanup_with_wait(name, hostname) if primitive_has_failures?(name, hostname) if primitive_is_complex? name Puppet.debug "Choose local stop for Pacemaker service '#{name}' on node '#{hostname}'" diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 3d92005..2c6f566 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1 +1 @@ -require 'puppetlabs_spec_helper/module_spec_helper' \ No newline at end of file +require 'puppetlabs_spec_helper/module_spec_helper' diff --git a/spec/unit/puppet/provider/pacemaker_common_spec.rb b/spec/unit/puppet/provider/pacemaker_common_spec.rb index 902cc27..388274e 100644 --- a/spec/unit/puppet/provider/pacemaker_common_spec.rb +++ b/spec/unit/puppet/provider/pacemaker_common_spec.rb @@ -179,7 +179,7 @@ describe Puppet::Provider::Pacemaker_common do context 'constraints control' do it 'can add location constraint' do - @class.expects(:pcs).with 'constraint', 'location', 'add', 'myprimitive_on_mynode', 'myprimitive', 'mynode', '200' + @class.expects(:cibadmin).returns(true) @class.constraint_location_add 'myprimitive', 'mynode', '200' end @@ -199,11 +199,10 @@ describe Puppet::Provider::Pacemaker_common do @class.wait_for_online end - it 'cleanups primitive and waits for it to become online again' do - @class.stubs(:cleanup_primitive).with('myprimitive', 'mynode').returns true + it 'waits for status to become known' do @class.stubs(:cib_reset).returns true @class.stubs(:primitive_status).returns 'stopped' - @class.cleanup_with_wait 'myprimitive', 'mynode' + @class.wait_for_status 'myprimitive' end it 'waits for the service to start' do diff --git a/spec/unit/puppet/provider/service/pacemaker_spec.rb b/spec/unit/puppet/provider/service/pacemaker_spec.rb index 96e4431..9474d4e 100644 --- a/spec/unit/puppet/provider/service/pacemaker_spec.rb +++ b/spec/unit/puppet/provider/service/pacemaker_spec.rb @@ -23,7 +23,7 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do @class.stubs(:cib_reset).returns(true) @class.stubs(:wait_for_online).returns(true) - @class.stubs(:cleanup_with_wait).returns(true) + @class.stubs(:wait_for_status).returns(true) @class.stubs(:wait_for_start).returns(true) @class.stubs(:wait_for_stop).returns(true) @@ -42,6 +42,7 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do @class.stubs(:ban_primitive).returns(true) @class.stubs(:start_primitive).returns(true) @class.stubs(:stop_primitive).returns(true) + @class.stubs(:cleanup_primitive).returns(true) @class.stubs(:enable).returns(true) @class.stubs(:disable).returns(true) @@ -95,6 +96,7 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do @class.expects(:enable).once @class.start @class.stubs(:primitive_is_managed?).returns(true) + @class.unstub(:enable) @class.expects(:enable).never @class.start end @@ -104,12 +106,9 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do @class.start end - it 'should cleanup a primitive only if there are errors' do + it 'should cleanup a primitive' do @class.stubs(:primitive_has_failures?).returns(true) - @class.expects(:cleanup_with_wait).once - @class.start - @class.stubs(:primitive_has_failures?).returns(false) - @class.expects(:cleanup_with_wait).never + @class.expects(:cleanup_primitive).with(full_name, hostname).once @class.start end @@ -118,13 +117,13 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do @class.start end - it 'tries to start the service by its name' do - @class.expects(:start_primitive).with(name) + it 'tries to start the service by its full name' do + @class.expects(:start_primitive).with(full_name) @class.start end - it 'adds a location constraint for the service by its name' do - @class.expects(:constraint_location_add).with(name, hostname) + it 'adds a location constraint for the service by its full_name' do + @class.expects(:constraint_location_add).with(full_name, hostname) @class.start end @@ -154,22 +153,19 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do end context '#stop' do - it 'tries to enable service if it is not enabled to work with it' do + it 'tries to disable service if it is not enabled to work with it' do @class.stubs(:primitive_is_managed?).returns(false) @class.expects(:enable).once - @class.start + @class.stop @class.stubs(:primitive_is_managed?).returns(true) + @class.unstub(:enable) @class.expects(:enable).never - @class.start + @class.stop end - it 'should cleanup a primitive only if there are errors' do - @class.stubs(:primitive_has_failures?).returns(true) - @class.expects(:cleanup_with_wait).once - @class.start - @class.stubs(:primitive_has_failures?).returns(false) - @class.expects(:cleanup_with_wait).never - @class.start + it 'should cleanup a primitive on stop' do + @class.expects(:cleanup_primitive).with(full_name, hostname).once.once + @class.stop end it 'uses Ban to stop the service and waits for it to stop locally if service is complex' do @@ -190,6 +186,8 @@ describe Puppet::Type.type(:service).provider(:pacemaker) do context '#restart' do it 'does not stop or start the service if it is not locally running' do @class.stubs(:primitive_is_running?).with(name, hostname).returns(false) + @class.unstub(:stop) + @class.unstub(:start) @class.expects(:stop).never @class.expects(:start).never @class.restart