Merge "Zero tolerance for errors on nodes as default behavior" into stable/mitaka

This commit is contained in:
Jenkins 2016-09-14 13:35:55 +00:00 committed by Gerrit Code Review
commit d019086b4d
3 changed files with 52 additions and 19 deletions

View File

@ -115,6 +115,7 @@ module Astute
def self.logger=(logger)
@logger = logger
Deployment::Log.logger = @logger
end
config_file = '/opt/astute/astute.conf'

View File

@ -98,10 +98,7 @@ module Astute
cluster = @cluster_class.new
cluster.node_concurrency.maximum = Astute.config.max_nodes_per_call
cluster.stop_condition { Thread.current[:gracefully_stop] }
cluster.fault_tolerance_groups = tasks_metadata.fetch(
'fault_tolerance_groups',
[]
)
cluster.noop_run = deployment_options.fetch(:noop_run, false)
cluster.debug_run = deployment_options.fetch(:debug, false)
@ -114,6 +111,11 @@ module Astute
tasks_graph,
cluster.node_statuses_transitions
)
setup_fault_tolerance_behavior(
tasks_metadata['fault_tolerance_groups'],
cluster
)
critical_uids = critical_node_uids(cluster.fault_tolerance_groups)
tasks_graph.keys.each do |node_id|
@ -139,7 +141,6 @@ module Astute
def deploy(deployment_options={})
cluster = create_cluster(deployment_options)
dry_run = deployment_options.fetch(:dry_run, false)
Deployment::Log.logger = Astute.logger if Astute.respond_to? :logger
write_graph_to_file(cluster)
result = if dry_run
{:success => true}
@ -168,6 +169,22 @@ module Astute
tasks_graph
end
def setup_fault_tolerance_behavior(fault_tolerance_groups, cluster)
fault_tolerance_groups = [] if fault_tolerance_groups.nil?
defined_nodes = fault_tolerance_groups.map { |g| g['node_ids'] }.flatten.uniq
all_nodes = cluster.nodes.map{ |n| n[0].to_s }.select{ |n| !sync_point?(n) }
undefined_nodes = all_nodes - defined_nodes
fault_tolerance_groups << {
'fault_tolerance' => 0,
'name' => 'zero_tolerance_as_default_for_nodes',
'node_ids' => undefined_nodes
}
cluster.fault_tolerance_groups = fault_tolerance_groups
end
def setup_fail_behavior(tasks_graph, cluster)
return unless cluster.noop_run
tasks_graph.each do |node_id, tasks|

View File

@ -189,6 +189,35 @@ describe Astute::TaskDeployment do
expect(critical_nodes.size).to eql(3)
end
it 'should support default zero tolerance policy for error on nodes' do
cluster = mock('cluster')
cluster.stubs(:nodes).returns([
['1', mock('node_1')],
['2', mock('node_2')],
['3', mock('node_3')],
['virtual_sync_node', mock('null')]
])
cluster.expects(:fault_tolerance_groups=).with(
[
{'fault_tolerance'=>0, 'name'=>'primary-controller', 'node_ids'=>['1']},
{'fault_tolerance'=>1, 'name'=>'ceph', 'node_ids'=>['1', '3']},
{'fault_tolerance'=>1, 'name'=>'ignored_group', 'node_ids'=>[]},
{'fault_tolerance'=>0, 'name'=>'zero_tolerance_as_default_for_nodes', 'node_ids'=>['2']}
]
)
task_deployment.send(
:setup_fault_tolerance_behavior,
[
{'fault_tolerance'=>0, 'name'=>'primary-controller', 'node_ids'=>['1']},
{'fault_tolerance'=>1, 'name'=>'ceph', 'node_ids'=>['1', '3']},
{'fault_tolerance'=>1, 'name'=>'ignored_group', 'node_ids'=>[]}
],
cluster
)
end
it 'should fail offline nodes' do
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
task_deployment.stubs(:write_graph_to_file)
@ -231,20 +260,6 @@ describe Astute::TaskDeployment do
tasks_directory: tasks_directory)
end
it 'should setup deployment logger' do
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
task_deployment.stubs(:write_graph_to_file)
ctx.stubs(:report)
task_deployment.stubs(:fail_offline_nodes).returns([])
Astute::TaskCluster.any_instance.stubs(:run).returns({:success => true})
Deployment::Log.expects(:logger=).with(Astute.logger)
task_deployment.deploy(
tasks_metadata: tasks_metadata,
tasks_graph: tasks_graph,
tasks_directory: tasks_directory)
end
context 'task concurrency' do
let(:task_concurrency) { mock('task_concurrency') }