Merge "Network problem tolerance puppet status check"
This commit is contained in:
commit
d411a2e0f2
|
@ -62,6 +62,7 @@ module Astute
|
|||
conf[:puppet_start_interval] = 2 # interval between attemps to start puppet
|
||||
conf[:puppet_retries] = 2 # how many times astute will try to run puppet
|
||||
conf[:puppet_succeed_retries] = 0 # use this to rerun a puppet task again if it was successful (idempotency)
|
||||
conf[:puppet_undefined_retries] = 3 # how many times astute will try to get actual status of node before fail
|
||||
conf[:puppet_module_path] = '/etc/puppet/modules' # where we should find basic modules for puppet
|
||||
conf[:puppet_noop_run] = false # enable Puppet noop run
|
||||
conf[:mc_retries] = 10 # MClient tries to call mcagent before failure
|
||||
|
|
|
@ -56,14 +56,14 @@ module Astute
|
|||
|
||||
# Create configured puppet mcollective agent
|
||||
# @return [Astute::MClient]
|
||||
def puppetd
|
||||
def puppetd(timeout=nil, retries=1)
|
||||
puppetd = MClient.new(
|
||||
@ctx,
|
||||
"puppetd",
|
||||
[@node_id],
|
||||
_check_result=true,
|
||||
_timeout=nil,
|
||||
_retries=1,
|
||||
_timeout=timeout,
|
||||
_retries=retries,
|
||||
_enable_result_logging=false
|
||||
)
|
||||
puppetd.on_respond_timeout do |uids|
|
||||
|
@ -77,7 +77,7 @@ module Astute
|
|||
# Run last_run_summary action using mcollective puppet agent
|
||||
# @return [Hash] return hash with status and resources
|
||||
def last_run_summary
|
||||
@summary = puppetd.last_run_summary(
|
||||
@summary = puppetd(_timeout=10, _retries=6).last_run_summary(
|
||||
:puppet_noop_run => @options['puppet_noop_run'],
|
||||
:raw_report => @options['raw_report']
|
||||
).first[:data]
|
||||
|
|
|
@ -39,8 +39,6 @@ module Astute
|
|||
'stopped', 'disabled'
|
||||
]
|
||||
|
||||
FAILED_STATUSES = UNDEFINED_STATUSES + STOPED_STATUSES
|
||||
|
||||
def initialize(task, puppet_mclient, options)
|
||||
@task = task
|
||||
@retries = options['retries']
|
||||
|
@ -48,6 +46,8 @@ module Astute
|
|||
@puppet_start_interval = options['puppet_start_interval']
|
||||
@time_observer = TimeObserver.new(options['timeout'])
|
||||
@succeed_retries = options['succeed_retries']
|
||||
@undefined_retries = options['undefined_retries']
|
||||
@original_undefined_retries = options['undefined_retries']
|
||||
@puppet_mclient = puppet_mclient
|
||||
end
|
||||
|
||||
|
@ -76,6 +76,8 @@ module Astute
|
|||
processing_running_task
|
||||
when 'failed'
|
||||
processing_error_task
|
||||
when 'undefined'
|
||||
processing_undefined_task
|
||||
end
|
||||
|
||||
time_is_up! if should_stop?
|
||||
|
@ -169,8 +171,10 @@ module Astute
|
|||
'successful'
|
||||
when BUSY_STATUSES.include?(mco_puppet_status)
|
||||
'running'
|
||||
when FAILED_STATUSES.include?(mco_puppet_status)
|
||||
when STOPED_STATUSES.include?(mco_puppet_status)
|
||||
'failed'
|
||||
when UNDEFINED_STATUSES.include?(mco_puppet_status)
|
||||
'undefined'
|
||||
else
|
||||
raise StatusValidationError,
|
||||
"Unknow puppet status: #{mco_puppet_status}"
|
||||
|
@ -189,7 +193,7 @@ module Astute
|
|||
# @return [void]
|
||||
def log_current_status(status)
|
||||
message = "#{task_details_for_log}, status: #{status}"
|
||||
if FAILED_STATUSES.include?(status)
|
||||
if (UNDEFINED_STATUSES + STOPED_STATUSES).include?(status)
|
||||
Astute.logger.error message
|
||||
else
|
||||
Astute.logger.debug message
|
||||
|
@ -197,8 +201,9 @@ module Astute
|
|||
end
|
||||
|
||||
# Process additional action in case of puppet succeed
|
||||
# @return [String] Task status: successful, failed or running
|
||||
# @return [String] Task status: successful or running
|
||||
def processing_succeed_task
|
||||
reset_undefined_retries!
|
||||
Astute.logger.debug "Puppet completed within "\
|
||||
"#{@time_observer.since_start} seconds"
|
||||
if @succeed_retries > 0
|
||||
|
@ -218,8 +223,9 @@ module Astute
|
|||
end
|
||||
|
||||
# Process additional action in case of puppet failed
|
||||
# @return [String] Task status: successful, failed or running
|
||||
# @return [String] Task status: failed or running
|
||||
def processing_error_task
|
||||
reset_undefined_retries!
|
||||
if @retries > 0
|
||||
@retries -= 1
|
||||
Astute.logger.debug "Puppet on node will be "\
|
||||
|
@ -236,12 +242,39 @@ module Astute
|
|||
end
|
||||
end
|
||||
|
||||
# Process additional action in case of undefined puppet status
|
||||
# @return [String] Task status: failed or running
|
||||
def processing_undefined_task
|
||||
if @undefined_retries > 0
|
||||
@undefined_retries -= 1
|
||||
Astute.logger.debug "Puppet on node has undefined status. "\
|
||||
"#{@undefined_retries} retries remained. "\
|
||||
"#{task_details_for_log}"
|
||||
Astute.logger.info "Retrying to check status for following "\
|
||||
"nodes: #{@puppet_mclient.node_id}"
|
||||
'running'
|
||||
else
|
||||
Astute.logger.error "Node has failed to get status. There is"\
|
||||
" no more retries for status check. #{task_details_for_log}"
|
||||
'failed'
|
||||
end
|
||||
end
|
||||
|
||||
# Process additional action in case of puppet running
|
||||
# @return [String]: Task status: successful, failed or running
|
||||
def processing_running_task
|
||||
reset_undefined_retries!
|
||||
'running'
|
||||
end
|
||||
|
||||
# Reset undefined retries to original value
|
||||
# @return [void]
|
||||
def reset_undefined_retries!
|
||||
Astute.logger.debug "Reset undefined retries to original "\
|
||||
"value: #{@original_undefined_retries}"
|
||||
@undefined_retries = @original_undefined_retries
|
||||
end
|
||||
|
||||
end #PuppetJob
|
||||
|
||||
end
|
||||
|
|
|
@ -47,6 +47,7 @@ module Astute
|
|||
'timeout' => Astute.config.puppet_timeout,
|
||||
'puppet_debug' => false,
|
||||
'succeed_retries' => Astute.config.puppet_succeed_retries,
|
||||
'undefined_retries' => Astute.config.puppet_undefined_retries,
|
||||
'raw_report' => Astute.config.puppet_raw_report,
|
||||
'puppet_noop_run' => Astute.config.puppet_noop_run,
|
||||
'puppet_start_timeout' => Astute.config.puppet_start_timeout,
|
||||
|
@ -67,6 +68,7 @@ module Astute
|
|||
{
|
||||
'retries' => @task['parameters']['retries'],
|
||||
'succeed_retries' => @task['parameters']['succeed_retries'],
|
||||
'undefined_retries' => @task['parameters']['undefined_retries'],
|
||||
'timeout' => @task['parameters']['timeout'],
|
||||
'puppet_start_timeout' => @task['parameters'][
|
||||
'puppet_start_timeout'],
|
||||
|
|
|
@ -31,6 +31,7 @@ describe Astute::PuppetJob do
|
|||
{
|
||||
'retries' => 1,
|
||||
'succeed_retries' => 0,
|
||||
'undefined_retries' => 1,
|
||||
'timeout' => 1,
|
||||
'puppet_start_timeout' => 1,
|
||||
'puppet_start_interval' => 0
|
||||
|
@ -71,7 +72,7 @@ describe Astute::PuppetJob do
|
|||
describe '#task_status=' do
|
||||
it 'should raise error if status do not support' do
|
||||
expect {subject.send(:task_status=, 'unknow_status')}.to \
|
||||
raise_error(StatusValidationError, /unknow_status/)
|
||||
raise_error(Astute::StatusValidationError, /unknow_status/)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -84,7 +85,7 @@ describe Astute::PuppetJob do
|
|||
|
||||
puppet_mclient.expects(:status).returns('unknow_status')
|
||||
expect {subject.status}.to raise_error(
|
||||
StatusValidationError, /unknow_status/
|
||||
Astute::StatusValidationError, /unknow_status/
|
||||
)
|
||||
end
|
||||
end
|
||||
|
@ -124,7 +125,7 @@ describe Astute::PuppetJob do
|
|||
end
|
||||
|
||||
it 'should return runing when magent failed but can retry' do
|
||||
puppet_mclient.expects(:run).twice.returns(true)
|
||||
puppet_mclient.expects(:run).once.returns(true)
|
||||
subject.run
|
||||
|
||||
puppet_mclient.stubs(:status)
|
||||
|
@ -149,13 +150,45 @@ describe Astute::PuppetJob do
|
|||
expect(subject.status).to eq('successful')
|
||||
end
|
||||
|
||||
it 'should successful if undefined/failed but retry succeed' do
|
||||
puppet_mclient.stubs(:run).returns(true)
|
||||
options['undefined_retries'] = 1
|
||||
options['retries'] = 1
|
||||
subject.run
|
||||
|
||||
puppet_mclient.stubs(:status)
|
||||
.then.returns('undefined')
|
||||
.returns('stopped')
|
||||
.then.returns('undefined')
|
||||
.then.returns('succeed')
|
||||
|
||||
3.times { expect(subject.status).to eq('running') }
|
||||
expect(subject.status). to eq('successful')
|
||||
end
|
||||
|
||||
it 'should successful if failed but retry succeed' do
|
||||
puppet_mclient.stubs(:run).returns(true)
|
||||
options['undefined_retries'] = 0
|
||||
options['retries'] = 2
|
||||
subject.run
|
||||
|
||||
puppet_mclient.stubs(:status)
|
||||
.returns('stopped')
|
||||
.then.returns('stopped')
|
||||
.then.returns('succeed')
|
||||
|
||||
2.times { expect(subject.status).to eq('running') }
|
||||
expect(subject.status). to eq('successful')
|
||||
end
|
||||
|
||||
it 'should successful if undefined but retry succeed' do
|
||||
puppet_mclient.stubs(:run).returns(true)
|
||||
options['undefined_retries'] = 2
|
||||
options['retries'] = 0
|
||||
subject.run
|
||||
|
||||
puppet_mclient.stubs(:status)
|
||||
.returns('undefined')
|
||||
.then.returns('undefined')
|
||||
.then.returns('succeed')
|
||||
|
||||
|
@ -188,7 +221,7 @@ describe Astute::PuppetJob do
|
|||
subject.run
|
||||
|
||||
puppet_mclient.stubs(:status)
|
||||
.returns('undefined')
|
||||
.returns('stopped')
|
||||
.then.returns('stopped')
|
||||
|
||||
expect(subject.status).to eq('running')
|
||||
|
@ -210,13 +243,67 @@ describe Astute::PuppetJob do
|
|||
subject.run
|
||||
|
||||
puppet_mclient.stubs(:status)
|
||||
.returns('undefined')
|
||||
.returns('stopped')
|
||||
.then.returns('stopped')
|
||||
|
||||
expect(subject.status).to eq('running')
|
||||
3.times { expect(subject.status). to eq('failed') }
|
||||
end
|
||||
end
|
||||
|
||||
context 'undefined' do
|
||||
it 'should return failed if undefined and no more retries' do
|
||||
puppet_mclient.stubs(:run).returns(true)
|
||||
subject.run
|
||||
|
||||
puppet_mclient.stubs(:status)
|
||||
.returns('undefined')
|
||||
.then.returns('undefined')
|
||||
|
||||
expect(subject.status).to eq('running')
|
||||
expect(subject.status).to eq('failed')
|
||||
end
|
||||
|
||||
it 'should return failed if time is over and no result' do
|
||||
puppet_mclient.stubs(:run).returns(true)
|
||||
options['timeout'] = 0
|
||||
subject.run
|
||||
|
||||
puppet_mclient.stubs(:status).returns('undefined')
|
||||
expect(subject.status).to eq('failed')
|
||||
end
|
||||
|
||||
it 'should do nothing if final status set and retries end' do
|
||||
puppet_mclient.stubs(:run).returns(true)
|
||||
options['undefined'] = 0
|
||||
subject.run
|
||||
|
||||
puppet_mclient.stubs(:status)
|
||||
.returns('undefined')
|
||||
.then.returns('undefined')
|
||||
|
||||
expect(subject.status).to eq('running')
|
||||
3.times { expect(subject.status). to eq('failed') }
|
||||
end
|
||||
|
||||
it 'should reset retries if answer was received' do
|
||||
puppet_mclient.stubs(:run).returns(true)
|
||||
options['undefined_retries'] = 1
|
||||
options['retries'] = 0
|
||||
subject.run
|
||||
|
||||
puppet_mclient.stubs(:status)
|
||||
.then.returns('undefined')
|
||||
.returns('running')
|
||||
.then.returns('undefined')
|
||||
.then.returns('succeed')
|
||||
|
||||
3.times { expect(subject.status).to eq('running') }
|
||||
expect(subject.status). to eq('successful')
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue