Merge "Network problem tolerance puppet status check"

This commit is contained in:
Jenkins 2017-01-06 11:53:16 +00:00 committed by Gerrit Code Review
commit d411a2e0f2
5 changed files with 138 additions and 15 deletions

View File

@ -62,6 +62,7 @@ module Astute
conf[:puppet_start_interval] = 2 # interval between attemps to start puppet
conf[:puppet_retries] = 2 # how many times astute will try to run puppet
conf[:puppet_succeed_retries] = 0 # use this to rerun a puppet task again if it was successful (idempotency)
conf[:puppet_undefined_retries] = 3 # how many times astute will try to get actual status of node before fail
conf[:puppet_module_path] = '/etc/puppet/modules' # where we should find basic modules for puppet
conf[:puppet_noop_run] = false # enable Puppet noop run
conf[:mc_retries] = 10 # MClient tries to call mcagent before failure

View File

@ -56,14 +56,14 @@ module Astute
# Create configured puppet mcollective agent
# @return [Astute::MClient]
def puppetd
def puppetd(timeout=nil, retries=1)
puppetd = MClient.new(
@ctx,
"puppetd",
[@node_id],
_check_result=true,
_timeout=nil,
_retries=1,
_timeout=timeout,
_retries=retries,
_enable_result_logging=false
)
puppetd.on_respond_timeout do |uids|
@ -77,7 +77,7 @@ module Astute
# Run last_run_summary action using mcollective puppet agent
# @return [Hash] return hash with status and resources
def last_run_summary
@summary = puppetd.last_run_summary(
@summary = puppetd(_timeout=10, _retries=6).last_run_summary(
:puppet_noop_run => @options['puppet_noop_run'],
:raw_report => @options['raw_report']
).first[:data]

View File

@ -39,8 +39,6 @@ module Astute
'stopped', 'disabled'
]
FAILED_STATUSES = UNDEFINED_STATUSES + STOPED_STATUSES
def initialize(task, puppet_mclient, options)
@task = task
@retries = options['retries']
@ -48,6 +46,8 @@ module Astute
@puppet_start_interval = options['puppet_start_interval']
@time_observer = TimeObserver.new(options['timeout'])
@succeed_retries = options['succeed_retries']
@undefined_retries = options['undefined_retries']
@original_undefined_retries = options['undefined_retries']
@puppet_mclient = puppet_mclient
end
@ -76,6 +76,8 @@ module Astute
processing_running_task
when 'failed'
processing_error_task
when 'undefined'
processing_undefined_task
end
time_is_up! if should_stop?
@ -169,8 +171,10 @@ module Astute
'successful'
when BUSY_STATUSES.include?(mco_puppet_status)
'running'
when FAILED_STATUSES.include?(mco_puppet_status)
when STOPED_STATUSES.include?(mco_puppet_status)
'failed'
when UNDEFINED_STATUSES.include?(mco_puppet_status)
'undefined'
else
raise StatusValidationError,
"Unknow puppet status: #{mco_puppet_status}"
@ -189,7 +193,7 @@ module Astute
# @return [void]
def log_current_status(status)
message = "#{task_details_for_log}, status: #{status}"
if FAILED_STATUSES.include?(status)
if (UNDEFINED_STATUSES + STOPED_STATUSES).include?(status)
Astute.logger.error message
else
Astute.logger.debug message
@ -197,8 +201,9 @@ module Astute
end
# Process additional action in case of puppet succeed
# @return [String] Task status: successful, failed or running
# @return [String] Task status: successful or running
def processing_succeed_task
reset_undefined_retries!
Astute.logger.debug "Puppet completed within "\
"#{@time_observer.since_start} seconds"
if @succeed_retries > 0
@ -218,8 +223,9 @@ module Astute
end
# Process additional action in case of puppet failed
# @return [String] Task status: successful, failed or running
# @return [String] Task status: failed or running
def processing_error_task
reset_undefined_retries!
if @retries > 0
@retries -= 1
Astute.logger.debug "Puppet on node will be "\
@ -236,12 +242,39 @@ module Astute
end
end
# Process additional action in case of undefined puppet status
# @return [String] Task status: failed or running
def processing_undefined_task
if @undefined_retries > 0
@undefined_retries -= 1
Astute.logger.debug "Puppet on node has undefined status. "\
"#{@undefined_retries} retries remained. "\
"#{task_details_for_log}"
Astute.logger.info "Retrying to check status for following "\
"nodes: #{@puppet_mclient.node_id}"
'running'
else
Astute.logger.error "Node has failed to get status. There is"\
" no more retries for status check. #{task_details_for_log}"
'failed'
end
end
# Process additional action in case of puppet running
# @return [String]: Task status: successful, failed or running
def processing_running_task
reset_undefined_retries!
'running'
end
# Reset undefined retries to original value
# @return [void]
def reset_undefined_retries!
Astute.logger.debug "Reset undefined retries to original "\
"value: #{@original_undefined_retries}"
@undefined_retries = @original_undefined_retries
end
end #PuppetJob
end

View File

@ -47,6 +47,7 @@ module Astute
'timeout' => Astute.config.puppet_timeout,
'puppet_debug' => false,
'succeed_retries' => Astute.config.puppet_succeed_retries,
'undefined_retries' => Astute.config.puppet_undefined_retries,
'raw_report' => Astute.config.puppet_raw_report,
'puppet_noop_run' => Astute.config.puppet_noop_run,
'puppet_start_timeout' => Astute.config.puppet_start_timeout,
@ -67,6 +68,7 @@ module Astute
{
'retries' => @task['parameters']['retries'],
'succeed_retries' => @task['parameters']['succeed_retries'],
'undefined_retries' => @task['parameters']['undefined_retries'],
'timeout' => @task['parameters']['timeout'],
'puppet_start_timeout' => @task['parameters'][
'puppet_start_timeout'],

View File

@ -31,6 +31,7 @@ describe Astute::PuppetJob do
{
'retries' => 1,
'succeed_retries' => 0,
'undefined_retries' => 1,
'timeout' => 1,
'puppet_start_timeout' => 1,
'puppet_start_interval' => 0
@ -71,7 +72,7 @@ describe Astute::PuppetJob do
describe '#task_status=' do
it 'should raise error if status do not support' do
expect {subject.send(:task_status=, 'unknow_status')}.to \
raise_error(StatusValidationError, /unknow_status/)
raise_error(Astute::StatusValidationError, /unknow_status/)
end
end
@ -84,7 +85,7 @@ describe Astute::PuppetJob do
puppet_mclient.expects(:status).returns('unknow_status')
expect {subject.status}.to raise_error(
StatusValidationError, /unknow_status/
Astute::StatusValidationError, /unknow_status/
)
end
end
@ -124,7 +125,7 @@ describe Astute::PuppetJob do
end
it 'should return runing when magent failed but can retry' do
puppet_mclient.expects(:run).twice.returns(true)
puppet_mclient.expects(:run).once.returns(true)
subject.run
puppet_mclient.stubs(:status)
@ -149,13 +150,45 @@ describe Astute::PuppetJob do
expect(subject.status).to eq('successful')
end
it 'should successful if undefined/failed but retry succeed' do
puppet_mclient.stubs(:run).returns(true)
options['undefined_retries'] = 1
options['retries'] = 1
subject.run
puppet_mclient.stubs(:status)
.then.returns('undefined')
.returns('stopped')
.then.returns('undefined')
.then.returns('succeed')
3.times { expect(subject.status).to eq('running') }
expect(subject.status). to eq('successful')
end
it 'should successful if failed but retry succeed' do
puppet_mclient.stubs(:run).returns(true)
options['undefined_retries'] = 0
options['retries'] = 2
subject.run
puppet_mclient.stubs(:status)
.returns('stopped')
.then.returns('stopped')
.then.returns('succeed')
2.times { expect(subject.status).to eq('running') }
expect(subject.status). to eq('successful')
end
it 'should successful if undefined but retry succeed' do
puppet_mclient.stubs(:run).returns(true)
options['undefined_retries'] = 2
options['retries'] = 0
subject.run
puppet_mclient.stubs(:status)
.returns('undefined')
.then.returns('undefined')
.then.returns('succeed')
@ -188,7 +221,7 @@ describe Astute::PuppetJob do
subject.run
puppet_mclient.stubs(:status)
.returns('undefined')
.returns('stopped')
.then.returns('stopped')
expect(subject.status).to eq('running')
@ -210,13 +243,67 @@ describe Astute::PuppetJob do
subject.run
puppet_mclient.stubs(:status)
.returns('undefined')
.returns('stopped')
.then.returns('stopped')
expect(subject.status).to eq('running')
3.times { expect(subject.status). to eq('failed') }
end
end
context 'undefined' do
it 'should return failed if undefined and no more retries' do
puppet_mclient.stubs(:run).returns(true)
subject.run
puppet_mclient.stubs(:status)
.returns('undefined')
.then.returns('undefined')
expect(subject.status).to eq('running')
expect(subject.status).to eq('failed')
end
it 'should return failed if time is over and no result' do
puppet_mclient.stubs(:run).returns(true)
options['timeout'] = 0
subject.run
puppet_mclient.stubs(:status).returns('undefined')
expect(subject.status).to eq('failed')
end
it 'should do nothing if final status set and retries end' do
puppet_mclient.stubs(:run).returns(true)
options['undefined'] = 0
subject.run
puppet_mclient.stubs(:status)
.returns('undefined')
.then.returns('undefined')
expect(subject.status).to eq('running')
3.times { expect(subject.status). to eq('failed') }
end
it 'should reset retries if answer was received' do
puppet_mclient.stubs(:run).returns(true)
options['undefined_retries'] = 1
options['retries'] = 0
subject.run
puppet_mclient.stubs(:status)
.then.returns('undefined')
.returns('running')
.then.returns('undefined')
.then.returns('succeed')
3.times { expect(subject.status).to eq('running') }
expect(subject.status). to eq('successful')
end
end
end
end