Zero tolerance for errors on nodes as default behavior

Without this change we do not mark deployment as error
if task on node failed.

Also use early initialize of logger for support library
Deployment

Change-Id: Ibcac4569756b34c3c1ac33f68ae203246d94d2a4
Closes-Bug: #1620858
This commit is contained in:
Vladimir Sharshov (warpc) 2016-09-13 18:16:34 +03:00
parent 58dd9d2f2c
commit 67896b9a59
3 changed files with 53 additions and 19 deletions

View File

@ -109,6 +109,7 @@ module Astute
def self.logger=(logger)
@logger = logger
Deployment::Log.logger = @logger
end
config_file = '/opt/astute/astute.conf'

View File

@ -98,10 +98,7 @@ module Astute
cluster = @cluster_class.new
cluster.node_concurrency.maximum = Astute.config.max_nodes_per_call
cluster.stop_condition { Thread.current[:gracefully_stop] }
cluster.fault_tolerance_groups = tasks_metadata.fetch(
'fault_tolerance_groups',
[]
)
cluster.noop_run = deployment_options.fetch(:noop_run, false)
cluster.debug_run = deployment_options.fetch(:debug, false)
@ -114,6 +111,11 @@ module Astute
tasks_graph,
cluster.node_statuses_transitions
)
setup_fault_tolerance_behavior(
tasks_metadata['fault_tolerance_groups'],
cluster
)
critical_uids = critical_node_uids(cluster.fault_tolerance_groups)
tasks_graph.keys.each do |node_id|
@ -139,7 +141,6 @@ module Astute
def deploy(deployment_options={})
cluster = create_cluster(deployment_options)
dry_run = deployment_options.fetch(:dry_run, false)
Deployment::Log.logger = Astute.logger if Astute.respond_to? :logger
write_graph_to_file(cluster)
result = if dry_run
{:success => true}
@ -168,6 +169,22 @@ module Astute
tasks_graph
end
def setup_fault_tolerance_behavior(fault_tolerance_groups, cluster)
fault_tolerance_groups = [] if fault_tolerance_groups.nil?
defined_nodes = fault_tolerance_groups.map { |g| g['node_ids'] }.flatten.uniq
all_nodes = cluster.nodes.map{ |n| n[0].to_s }.select{ |n| !sync_point?(n) }
undefined_nodes = all_nodes - defined_nodes
fault_tolerance_groups << {
'fault_tolerance' => 0,
'name' => 'zero_tolerance_as_default_for_nodes',
'node_ids' => undefined_nodes
}
cluster.fault_tolerance_groups = fault_tolerance_groups
end
def setup_fail_behavior(tasks_graph, cluster)
return unless cluster.noop_run
tasks_graph.each do |node_id, tasks|

View File

@ -80,6 +80,7 @@ describe Astute::TaskDeployment do
"null"=> []
}
end
let(:tasks_graph_3) do
{
"null" =>
@ -189,6 +190,35 @@ describe Astute::TaskDeployment do
expect(critical_nodes.size).to eql(3)
end
it 'should support default zero tolerance policy for error on nodes' do
cluster = mock('cluster')
cluster.stubs(:nodes).returns([
['1', mock('node_1')],
['2', mock('node_2')],
['3', mock('node_3')],
['virtual_sync_node', mock('null')]
])
cluster.expects(:fault_tolerance_groups=).with(
[
{'fault_tolerance'=>0, 'name'=>'primary-controller', 'node_ids'=>['1']},
{'fault_tolerance'=>1, 'name'=>'ceph', 'node_ids'=>['1', '3']},
{'fault_tolerance'=>1, 'name'=>'ignored_group', 'node_ids'=>[]},
{'fault_tolerance'=>0, 'name'=>'zero_tolerance_as_default_for_nodes', 'node_ids'=>['2']}
]
)
task_deployment.send(
:setup_fault_tolerance_behavior,
[
{'fault_tolerance'=>0, 'name'=>'primary-controller', 'node_ids'=>['1']},
{'fault_tolerance'=>1, 'name'=>'ceph', 'node_ids'=>['1', '3']},
{'fault_tolerance'=>1, 'name'=>'ignored_group', 'node_ids'=>[]}
],
cluster
)
end
it 'should fail offline nodes' do
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
task_deployment.stubs(:write_graph_to_file)
@ -231,20 +261,6 @@ describe Astute::TaskDeployment do
tasks_directory: tasks_directory)
end
it 'should setup deployment logger' do
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
task_deployment.stubs(:write_graph_to_file)
ctx.stubs(:report)
task_deployment.stubs(:fail_offline_nodes).returns([])
Astute::TaskCluster.any_instance.stubs(:run).returns({:success => true})
Deployment::Log.expects(:logger=).with(Astute.logger)
task_deployment.deploy(
tasks_metadata: tasks_metadata,
tasks_graph: tasks_graph,
tasks_directory: tasks_directory)
end
context 'task concurrency' do
let(:task_concurrency) { mock('task_concurrency') }