diff --git a/lib/astute/nodes_remover.rb b/lib/astute/nodes_remover.rb index 881a9bb4..80ba31ae 100644 --- a/lib/astute/nodes_remover.rb +++ b/lib/astute/nodes_remover.rb @@ -77,6 +77,26 @@ module Astute [mclient_skipped_nodes, mclient_nodes] end + def get_already_removed_nodes(nodes) + removed_nodes = [] + control_time = {} + + nodes.uids.sort.each_slice(Astute.config[:max_nodes_per_call]) do |part| + control_time.merge!(get_boot_time(part)) + end + + nodes.each do |uid, node| + boot_time = control_time[uid].to_i + next if boot_time.zero? + if node.boot_time + removed_nodes << uid if boot_time != node.boot_time + else + node.boot_time = boot_time + end + end + removed_nodes + end + def remove_nodes(nodes) if nodes.empty? Astute.logger.info "#{@ctx.task_id}: Nodes to remove are not provided. Do nothing." @@ -84,10 +104,19 @@ module Astute end erased_nodes, mclient_nodes = skipped_unskipped_mclient_nodes(nodes) + + removed_nodes = get_already_removed_nodes(mclient_nodes) + removed_nodes.each do |uid| + erased_node = Node.new('uid' => uid) + erased_nodes << erased_node + mclient_nodes.delete(uid) + Astute.logger.info "#{@ctx.task_id}: Node #{uid} is removed already, skipping" + end + responses = mclient_remove_nodes(mclient_nodes) inaccessible_uids = mclient_nodes.uids - responses.map { |response| response[:sender] } inaccessible_nodes = NodesHash.build(inaccessible_uids.map do |uid| - {'uid' => uid, 'error' => 'Node not answered by RPC.'} + {'uid' => uid, 'error' => 'Node not answered by RPC.', 'boot_time' => mclient_nodes[uid][:boot_time]} end) error_nodes = NodesHash.new @@ -136,5 +165,33 @@ module Astute responses.map(&:results) end + def run_shell_without_check(context, node_uids, cmd, timeout=10) + shell = MClient.new( + context, + 'execute_shell_command', + node_uids, + check_result=false, + timeout=timeout + ) + results = shell.execute(:cmd => cmd) + results.inject({}) do |h, res| + Astute.logger.debug( + "#{context.task_id}: cmd: #{cmd}\n" \ + "stdout: #{res.results[:data][:stdout]}\n" \ + "stderr: #{res.results[:data][:stderr]}\n" \ + "exit code: #{res.results[:data][:exit_code]}") + h.merge({res.results[:sender] => res.results[:data][:stdout].chomp}) + end + end + + def get_boot_time(node_uids) + run_shell_without_check( + @ctx, + node_uids, + "stat --printf='%Y' /proc/1", + timeout=10 + ) + end + end end diff --git a/spec/unit/nodes_remover_spec.rb b/spec/unit/nodes_remover_spec.rb index 11d78c61..89639a92 100644 --- a/spec/unit/nodes_remover_spec.rb +++ b/spec/unit/nodes_remover_spec.rb @@ -20,6 +20,7 @@ describe Astute::NodesRemover do let(:nodes) { [{'uid' => '1'}, {'uid' => '2'}] } let(:ctx) { mock_ctx } + let(:ctl_time) { {'1' => '100', '2' => '200'} } let(:mcollective_answer) do [ @@ -30,6 +31,7 @@ describe Astute::NodesRemover do before(:each) do Astute::NodesRemover.any_instance.stubs(:mclient_remove_piece_nodes).returns(mcollective_answer) + Astute::NodesRemover.any_instance.stubs(:run_shell_without_check).returns(ctl_time) end it 'should erase nodes (mbr) and reboot nodes(default)' do @@ -54,7 +56,7 @@ describe Astute::NodesRemover do {'uid' => '3', 'mclient_remove' => false}, {'uid' => '2'}, ], - "inaccessible_nodes" => [{"uid"=>"1", "error"=>"Node not answered by RPC."}] + "inaccessible_nodes" => [{"uid"=>"1", "error"=>"Node not answered by RPC.", "boot_time"=>100}] } ) end @@ -78,8 +80,8 @@ describe Astute::NodesRemover do nr = Astute::NodesRemover.new(ctx, nodes) nr.stubs(:mclient_remove_nodes).with( Astute::NodesHash.build([ - {'uid' => '1'}, - {'uid' => '2', 'mclient_remove' => true} + {'uid' => '1', 'boot_time' => 100}, + {'uid' => '2', 'mclient_remove' => true, 'boot_time' => 200} ]) ).returns(mcollective_answer).once nr.remove @@ -106,8 +108,8 @@ describe Astute::NodesRemover do { "nodes"=>[], "status" => "error", "error_nodes" => [ - {"uid"=>"1", "error"=>"RPC agent 'erase_node' failed. Result:\n{:sender=>\"1\", :statuscode=>1, :data=>{:rebooted=>false}}\n"}, - {"uid"=>"2", "error"=>"RPC agent 'erase_node' failed. Result:\n{:sender=>\"2\", :statuscode=>1, :data=>{:rebooted=>false}}\n"} + {"uid"=>"1", "error"=>"RPC agent 'erase_node' failed. Result:\n{:sender=>\"1\", :statuscode=>1, :data=>{:rebooted=>false}}\n", "boot_time"=>100}, + {"uid"=>"2", "error"=>"RPC agent 'erase_node' failed. Result:\n{:sender=>\"2\", :statuscode=>1, :data=>{:rebooted=>false}}\n", "boot_time"=>200} ] } ) @@ -159,14 +161,32 @@ describe Astute::NodesRemover do { "nodes"=>[], "status" => "error", "error_nodes" => [ - {"uid"=>"1", "error"=>"RPC method 'erase_node' failed with message: Could not reboot"}, - {"uid"=>"2", "error"=>"RPC method 'erase_node' failed with message: Could not reboot"} + {"uid"=>"1", "error"=>"RPC method 'erase_node' failed with message: Could not reboot", "boot_time"=>100}, + {"uid"=>"2", "error"=>"RPC method 'erase_node' failed with message: Could not reboot", "boot_time"=>200} ] } ) end end + context 'nodes fail to send status, but erased and rebooted' do + let(:mcollective_answer) do + [] + end + + let(:ctl_time2) { {} } + let(:ctl_time3) { {'1' => '150', '2' => '250'} } + + it 'should process rebooted nodes as erased' do + Astute::NodesRemover.any_instance.stubs(:mclient_remove_piece_nodes).returns(mcollective_answer) + Astute::NodesRemover.any_instance.stubs(:run_shell_without_check).returns(ctl_time) + .then.returns(ctl_time2).then.returns(ctl_time3) + expect(Astute::NodesRemover.new(ctx, nodes, reboot=true).remove).to eq( + { "nodes"=>[{"uid"=>"1"}, {"uid"=>"2"}] } + ) + end + end + context 'erase node when change node status from bootstrap to provisioning' do let(:mcollective_answer) do [