From 64d62086e88460e29f5d9117a1f4c69d391d4bd0 Mon Sep 17 00:00:00 2001 From: Vladimir Kozhukalov Date: Fri, 13 Jan 2017 18:16:21 +0300 Subject: [PATCH] Move not provisioned nodes to error status When there are lot of nodes to provision and we provision them by chunks, we could fail in the middle due to "Too many nodes failed to provision". If so, we need to append those nodes where we did not started provision at all to the list of failed nodes. Otherwise, those nodes will be reported as 'provisioned' with progress = 100 and rebooted. But for some reasons we bind all nodes before starting provision to debian-installer profile in cobbler, and being rebooted these not provisioned nodes will fail to boot, because since 7.0 we put empty files where cobbler expects debian-installer kernel and initrd files. :-) Change-Id: I2a401b80614ee7dd5a10931b9b50bcff066f790f Closes-Bug: #1656269 (cherry picked from commit 570049ca1fde98ee09952075f72bc4f28b9d8b71) --- lib/astute/provision.rb | 10 ++++--- spec/unit/provision_spec.rb | 52 +++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/lib/astute/provision.rb b/lib/astute/provision.rb index 0ac8ad53..8fdf0f76 100644 --- a/lib/astute/provision.rb +++ b/lib/astute/provision.rb @@ -50,10 +50,10 @@ module Astute begin prepare_nodes(reporter, task_id, engine_attrs, nodes, cobbler) failed_uids, timeouted_uids = provision_and_watch_progress(reporter, - task_id, - Array.new(nodes), - engine_attrs, - fault_tolerance) + task_id, + Array.new(nodes), + engine_attrs, + fault_tolerance) rescue => e Astute.logger.error("Error occured while provisioning:\n#{e.pretty_inspect}") reporter.report({ @@ -155,6 +155,8 @@ module Astute if should_fail(failed_uids + timeouted_uids, fault_tolerance) Astute.logger.debug("Aborting provision. To many nodes failed: #{failed_uids + timeouted_uids}") + Astute.logger.debug("Those nodes where we not yet started provision will be set to error mode") + failed_uids += nodes_to_provision.map{ |n| n['uid'] } return failed_uids, timeouted_uids end diff --git a/spec/unit/provision_spec.rb b/spec/unit/provision_spec.rb index 07ef234f..61ba6506 100644 --- a/spec/unit/provision_spec.rb +++ b/spec/unit/provision_spec.rb @@ -821,6 +821,58 @@ describe Astute::Provisioner do @provisioner.provision(@reporter, data['task_uuid'], provision_info) end + + it 'it should append those nodes where provision not yet started to failed nodes if provision is aborted in the middle' do + Astute::CobblerManager.any_instance.stubs(:add_nodes).returns([]) + @provisioner.stubs(:remove_nodes).returns([]) + @provisioner.stubs(:prepare_nodes).returns([]) + @provisioner.stubs(:unlock_nodes_discovery) + Astute.config.provisioning_timeout = 5 + Astute.config.max_nodes_to_provision = 2 + nodes = [ + { 'uid' => '1'}, + { 'uid' => '2'}, + { 'uid' => '3'} + ] + @provisioner.stubs(:report_about_progress).returns() + @provisioner.stubs(:unlock_nodes_discovery) + @provisioner.stubs(:node_type) + .returns([{'uid' => '1', 'node_type' => 'target' }]) + + success_msg = { + 'status' => 'error', + "error"=>"Too many nodes failed to provision", + 'progress' => 100, + 'nodes' => [ + { + 'uid' => '2', + 'status' => 'error', + 'progress' => 100, + 'error_msg' => 'Failed to provision', + 'error_type' => 'provision' + }, + { + 'uid' => '3', + 'status' => 'error', + 'error_msg' => 'Failed to provision', + 'progress' => 100, + 'error_type' => 'provision' + }, + { + 'uid' => '1', + 'status' => 'provisioned', + 'progress' => 100 + }, + + ]} + + @provisioner.stubs(:provision_piece).returns(['2']) + provision_info = {'nodes' => nodes, + 'engine' => data['engine']} + + @reporter.expects(:report).with(success_msg).once + @provisioner.provision(@reporter, data['task_uuid'], provision_info) + end end describe '#stop_provision' do