Move not provisioned nodes to error status

When there are lot of nodes to provision and we provision
them by chunks, we could fail in the middle due to "Too many
nodes failed to provision". If so, we need to append those
nodes where we did not started provision at all to the list
of failed nodes. Otherwise, those nodes will be reported
as 'provisioned' with progress = 100 and rebooted.
But for some reasons we bind all nodes before starting provision
to debian-installer profile in cobbler, and being rebooted
these not provisioned nodes will fail to boot, because since
7.0 we put empty files where cobbler expects debian-installer
kernel and initrd files. :-)

Change-Id: I2a401b80614ee7dd5a10931b9b50bcff066f790f
Closes-Bug: #1656269
This commit is contained in:
Vladimir Kozhukalov 2017-01-13 18:16:21 +03:00
parent 1b2bf8eaee
commit 570049ca1f
2 changed files with 59 additions and 5 deletions

View File

@ -44,11 +44,11 @@ module Astute
begin
prepare_nodes(reporter, task_id, engine_attrs, nodes, cobbler)
failed_uids, timeouted_uids = provision_and_watch_progress(reporter,
task_id,
Array.new(nodes),
engine_attrs,
provision_method,
fault_tolerance)
task_id,
Array.new(nodes),
engine_attrs,
provision_method,
fault_tolerance)
rescue => e
Astute.logger.error("Error occured while provisioning:\n#{e.pretty_inspect}")
reporter.report({
@ -151,6 +151,8 @@ module Astute
if should_fail(failed_uids + timeouted_uids, fault_tolerance)
Astute.logger.debug("Aborting provision. To many nodes failed: #{failed_uids + timeouted_uids}")
Astute.logger.debug("Those nodes where we not yet started provision will be set to error mode")
failed_uids += nodes_to_provision.map{ |n| n['uid'] }
return failed_uids, timeouted_uids
end

View File

@ -852,6 +852,58 @@ describe Astute::Provisioner do
@provisioner.provision(@reporter, data['task_uuid'], provision_info, 'image')
end
it 'it should append those nodes where provision not yet started to failed nodes if provision is aborted in the middle' do
Astute::CobblerManager.any_instance.stubs(:add_nodes).returns([])
@provisioner.stubs(:remove_nodes).returns([])
@provisioner.stubs(:prepare_nodes).returns([])
@provisioner.stubs(:unlock_nodes_discovery)
Astute.config.provisioning_timeout = 5
Astute.config.max_nodes_to_provision = 2
nodes = [
{ 'uid' => '1'},
{ 'uid' => '2'},
{ 'uid' => '3'}
]
@provisioner.stubs(:report_about_progress).returns()
@provisioner.stubs(:unlock_nodes_discovery)
@provisioner.stubs(:node_type)
.returns([{'uid' => '1', 'node_type' => 'target' }])
success_msg = {
'status' => 'error',
"error"=>"Too many nodes failed to provision",
'progress' => 100,
'nodes' => [
{
'uid' => '2',
'status' => 'error',
'progress' => 100,
'error_msg' => 'Failed to provision',
'error_type' => 'provision'
},
{
'uid' => '3',
'status' => 'error',
'error_msg' => 'Failed to provision',
'progress' => 100,
'error_type' => 'provision'
},
{
'uid' => '1',
'status' => 'provisioned',
'progress' => 100
},
]}
@provisioner.stubs(:provision_piece).returns(['2'])
provision_info = {'nodes' => nodes,
'engine' => data['engine']}
@reporter.expects(:report).with(success_msg).once
@provisioner.provision(@reporter, data['task_uuid'], provision_info, 'image')
end
end
describe '#stop_provision' do