Gracefully stop if tolerance limit exceeded
Several changes: - support fault tolerance group; - support internal stop deployment instead of raise in case of error; - do not show last run summary debug report from mcollective; - fix support of detecting offline nodes before run deployment; - support fail on error behavior. Support fault tolerance group Nailgun send fault tolerance group which inform Astute about available number of error nodes in this deployment and importance of every node in this task. If number of error exceeds number of available errors, deployment will stop. Support internal stop deployment instead of raise in case of error Before this change Astute is end processing, marks all nodes as error and do not waiting of puppet process on nodes. Now we use same way that used in case of stop deployment. Mark failed nodes as error, another nodes as skipped(stopped), ready nodes as ready. Also Astute will wait before current tasks end. Do not show last run summary debug report from mcollective For now moment it not so useful, but quickly filled log file and difficult debug process Fix support of detecting offline nodes before run deployment Astute gets response from mcollective to detect node availability. If node do not respond, it will mark as failed. It also support fault tollerance mechanism Support fail on error behavior From this moment task which setup fail_on_error if false, task marks as skipped instead of failed in case of error. Change-Id: Ica2a4ae64b4dfa4f7fccfbc95108d1412c40dc3f Closes-Bug: #1435610
This commit is contained in:
parent
4df5a45042
commit
5a9f87c080
|
@ -16,3 +16,4 @@ docs/_build
|
|||
# Local raemon copy
|
||||
raemon/
|
||||
|
||||
*.svg
|
||||
|
|
|
@ -75,9 +75,9 @@ module Astute
|
|||
|
||||
deployment_engine = TaskDeployment.new(context)
|
||||
deployment_engine.deploy(
|
||||
deployment_info: deployment_options[:deployment_info],
|
||||
tasks_graph: deployment_options[:tasks_graph],
|
||||
tasks_directory: deployment_options[:tasks_directory],
|
||||
tasks_metadata: deployment_options[:tasks_metadata],
|
||||
dry_run: deployment_options.fetch(:dry_run, false)
|
||||
)
|
||||
ensure
|
||||
|
|
|
@ -85,7 +85,15 @@ module Astute
|
|||
private
|
||||
|
||||
def puppetd
|
||||
puppetd = MClient.new(@ctx, "puppetd", [@node['uid']])
|
||||
puppetd = MClient.new(
|
||||
@ctx,
|
||||
"puppetd",
|
||||
[@node['uid']],
|
||||
_check_result=true,
|
||||
_timeout=nil,
|
||||
_retries=Astute.config.mc_retries,
|
||||
_enable_result_logging=false
|
||||
)
|
||||
puppetd.on_respond_timeout do |uids|
|
||||
nodes = uids.map do |uid|
|
||||
{
|
||||
|
|
|
@ -116,9 +116,9 @@ module Astute
|
|||
reporter,
|
||||
data['args']['task_uuid'],
|
||||
{
|
||||
:deployment_info => data['args'].fetch('deployment_info', []),
|
||||
:tasks_graph => data['args'].fetch('tasks_graph', {}),
|
||||
:tasks_directory => data['args'].fetch('tasks_directory', {}),
|
||||
:tasks_metadata => data['args'].fetch('tasks_metadata', {}),
|
||||
:dry_run => data['args'].fetch('dry_run', false)
|
||||
}
|
||||
)
|
||||
|
|
|
@ -74,6 +74,30 @@ module Astute
|
|||
{}
|
||||
end
|
||||
|
||||
def finished?
|
||||
[:successful, :failed, :skipped].include? @status
|
||||
end
|
||||
|
||||
def successful?
|
||||
@status == :successful
|
||||
end
|
||||
|
||||
def pending?
|
||||
@status == :pending
|
||||
end
|
||||
|
||||
def skipped?
|
||||
@status == :skipped
|
||||
end
|
||||
|
||||
def running?
|
||||
@status == :running
|
||||
end
|
||||
|
||||
def failed?
|
||||
@status == :failed
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Run current task on node, specified in task
|
||||
|
@ -183,44 +207,20 @@ module Astute
|
|||
false
|
||||
end
|
||||
|
||||
def finished?
|
||||
[:successful, :failed, :skipped].include? @status
|
||||
end
|
||||
|
||||
def failed!
|
||||
self.status = :failed
|
||||
time_summary
|
||||
end
|
||||
|
||||
def failed?
|
||||
@status == :failed
|
||||
end
|
||||
|
||||
def running!
|
||||
self.status = :running
|
||||
end
|
||||
|
||||
def running?
|
||||
@status == :running
|
||||
end
|
||||
|
||||
def succeed!
|
||||
self.status = :successful
|
||||
time_summary
|
||||
end
|
||||
|
||||
def successful?
|
||||
@status == :successful
|
||||
end
|
||||
|
||||
def pending?
|
||||
@status == :pending
|
||||
end
|
||||
|
||||
def skipped?
|
||||
@status == :skipped
|
||||
end
|
||||
|
||||
def skipped!
|
||||
self.status = :skipped
|
||||
time_summary
|
||||
|
|
|
@ -15,28 +15,14 @@ require 'fuel_deployment'
|
|||
|
||||
module Astute
|
||||
class TaskCluster < Deployment::Cluster
|
||||
attr_accessor :gracefully_stop_mark
|
||||
|
||||
def stop_condition(&block)
|
||||
self.gracefully_stop_mark = block
|
||||
def hook_post_gracefully_stop(*args)
|
||||
report_new_node_status(args[0])
|
||||
end
|
||||
|
||||
def hook_post_node_poll(*args)
|
||||
gracefully_stop(args[0])
|
||||
end
|
||||
|
||||
# Check if the deployment process should stop
|
||||
# @return [true, false]
|
||||
def gracefully_stop?
|
||||
gracefully_stop_mark ? gracefully_stop_mark.call : false
|
||||
end
|
||||
|
||||
def gracefully_stop(node)
|
||||
if gracefully_stop? && node.ready?
|
||||
node.set_status_skipped
|
||||
node.report_node_status
|
||||
end
|
||||
def report_new_node_status(node)
|
||||
node.report_node_status
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -20,12 +20,10 @@ module Astute
|
|||
@ctx = context
|
||||
end
|
||||
|
||||
def deploy(tasks_graph: {}, tasks_directory: {} , deployment_info: [], dry_run: false)
|
||||
def deploy(tasks_graph: {}, tasks_directory: {} , tasks_metadata: {}, dry_run: false)
|
||||
raise DeploymentEngineError, "Deployment graph was not provided!" if
|
||||
tasks_graph.blank?
|
||||
|
||||
deployment_info, offline_uids = pre_deployment_process(deployment_info)
|
||||
|
||||
support_virtual_node(tasks_graph)
|
||||
unzip_graph(tasks_graph, tasks_directory)
|
||||
|
||||
|
@ -33,12 +31,20 @@ module Astute
|
|||
cluster = TaskCluster.new
|
||||
cluster.node_concurrency.maximum = Astute.config.max_nodes_per_call
|
||||
cluster.stop_condition { Thread.current[:gracefully_stop] }
|
||||
cluster.fault_tolerance_groups = tasks_metadata.fetch(
|
||||
'fault_tolerance_groups',
|
||||
[]
|
||||
)
|
||||
|
||||
offline_uids = fail_offline_nodes(tasks_graph)
|
||||
critical_uids = critical_node_uids(cluster.fault_tolerance_groups)
|
||||
|
||||
tasks_graph.keys.each do |node_id|
|
||||
node = TaskNode.new(node_id, cluster)
|
||||
node.context = @ctx
|
||||
node.set_critical if critical_node_uids(deployment_info).include?(node_id)
|
||||
node.set_status_failed if offline_uids.include? node_id
|
||||
node.set_critical if critical_uids.include?(node_id)
|
||||
node.set_as_sync_point if sync_point?(node_id)
|
||||
node.set_status_failed if offline_uids.include?(node_id)
|
||||
end
|
||||
|
||||
setup_tasks(tasks_graph, cluster)
|
||||
|
@ -57,6 +63,10 @@ module Astute
|
|||
|
||||
private
|
||||
|
||||
def sync_point?(node_id)
|
||||
'virtual_sync_node' == node_id
|
||||
end
|
||||
|
||||
def unzip_graph(tasks_graph, tasks_directory)
|
||||
tasks_graph.each do |node_id, tasks|
|
||||
tasks.each do |task|
|
||||
|
@ -111,34 +121,14 @@ module Astute
|
|||
"non-negative integer, but got #{value}. Please check task #{task}"
|
||||
end
|
||||
|
||||
def pre_deployment_process(deployment_info)
|
||||
return [[],[]] if deployment_info.blank?
|
||||
|
||||
deployment_info, offline_uids = remove_failed_nodes(deployment_info)
|
||||
Astute::TaskPreDeploymentActions.new(deployment_info, @ctx).process
|
||||
[deployment_info, offline_uids]
|
||||
end
|
||||
|
||||
def report_deploy_result(result)
|
||||
if result[:success]
|
||||
if result[:success] && result.fetch(:failed_nodes, []).empty?
|
||||
@ctx.report('status' => 'ready', 'progress' => 100)
|
||||
elsif result[:success] && result.fetch(:failed_nodes, []).present?
|
||||
report_failed_nodes(result)
|
||||
@ctx.report('status' => 'ready', 'progress' => 100)
|
||||
else
|
||||
result[:failed_nodes].each do |node|
|
||||
node_status = {
|
||||
'uid' => node.id,
|
||||
'status' => 'error',
|
||||
'error_type' => 'deploy',
|
||||
'error_msg' => result[:status]
|
||||
}
|
||||
task = result[:failed_tasks].find{ |t| t.node == node }
|
||||
if task
|
||||
node_status.merge!({
|
||||
'deployment_graph_task_name' => task.name,
|
||||
'task_status' => task.status.to_s
|
||||
})
|
||||
end
|
||||
@ctx.report('nodes' => [node_status])
|
||||
end
|
||||
report_failed_nodes(result)
|
||||
@ctx.report(
|
||||
'status' => 'error',
|
||||
'progress' => 100,
|
||||
|
@ -147,6 +137,25 @@ module Astute
|
|||
end
|
||||
end
|
||||
|
||||
def report_failed_nodes(result)
|
||||
result.fetch(:failed_nodes, []).each do |node|
|
||||
node_status = {
|
||||
'uid' => node.id,
|
||||
'status' => 'error',
|
||||
'error_type' => 'deploy',
|
||||
'error_msg' => result[:status]
|
||||
}
|
||||
task = result[:failed_tasks].find{ |t| t.node == node }
|
||||
if task
|
||||
node_status.merge!({
|
||||
'deployment_graph_task_name' => task.name,
|
||||
'task_status' => task.status.to_s
|
||||
})
|
||||
end
|
||||
@ctx.report('nodes' => [node_status])
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def write_graph_to_file(deployment)
|
||||
return unless Astute.config.enable_graph_file
|
||||
|
@ -180,21 +189,20 @@ module Astute
|
|||
tasks_graph
|
||||
end
|
||||
|
||||
def critical_node_uids(deployment_info)
|
||||
@critical_nodes ||= deployment_info.select{ |n| n['fail_if_error'] }
|
||||
.map{ |n| n['uid'] }.uniq
|
||||
def critical_node_uids(fault_tolerance_groups)
|
||||
return [] unless fault_tolerance_groups
|
||||
critical_nodes = fault_tolerance_groups.inject([]) do |critical_uids, group|
|
||||
critical_uids += group['node_ids'] if group['fault_tolerance'].zero?
|
||||
critical_uids
|
||||
end
|
||||
Astute.logger.info "Critical node #{critical_nodes}" if critical_nodes.present?
|
||||
critical_nodes
|
||||
end
|
||||
|
||||
# Removes nodes which failed to provision
|
||||
def remove_failed_nodes(deployment_info)
|
||||
uids = get_uids_from_deployment_info deployment_info
|
||||
required_uids = critical_node_uids(deployment_info)
|
||||
|
||||
available_uids = detect_available_nodes(uids)
|
||||
offline_uids = uids - available_uids
|
||||
def fail_offline_nodes(tasks_graph)
|
||||
offline_uids = detect_offline_nodes(tasks_graph.keys)
|
||||
if offline_uids.present?
|
||||
# set status for all failed nodes to error
|
||||
nodes = (uids - available_uids).map do |uid|
|
||||
nodes = offline_uids.map do |uid|
|
||||
{'uid' => uid,
|
||||
'status' => 'error',
|
||||
'error_type' => 'provision',
|
||||
|
@ -208,9 +216,7 @@ module Astute
|
|||
'error' => 'Node is not ready for deployment'
|
||||
)
|
||||
|
||||
# check if all required nodes are online
|
||||
# if not, raise error
|
||||
missing_required = required_uids - available_uids
|
||||
missing_required = critical_node_uids(tasks_graph) & offline_uids
|
||||
if missing_required.present?
|
||||
error_message = "Critical nodes are not available for deployment: " \
|
||||
"#{missing_required}"
|
||||
|
@ -218,57 +224,21 @@ module Astute
|
|||
end
|
||||
end
|
||||
|
||||
return remove_offline_nodes(
|
||||
uids,
|
||||
available_uids,
|
||||
deployment_info,
|
||||
offline_uids)
|
||||
offline_uids
|
||||
end
|
||||
|
||||
def remove_offline_nodes(uids, available_uids, deployment_info, offline_uids)
|
||||
if offline_uids.blank?
|
||||
return [deployment_info, offline_uids]
|
||||
end
|
||||
|
||||
Astute.logger.info "Removing nodes which failed to provision: " \
|
||||
"#{offline_uids}"
|
||||
deployment_info = cleanup_nodes_block(deployment_info, offline_uids)
|
||||
deployment_info = deployment_info.select do |node|
|
||||
available_uids.include? node['uid']
|
||||
end
|
||||
|
||||
[deployment_info, offline_uids]
|
||||
end
|
||||
|
||||
def cleanup_nodes_block(deployment_info, offline_uids)
|
||||
return deployment_info if offline_uids.blank?
|
||||
|
||||
nodes = deployment_info.first['nodes']
|
||||
|
||||
# In case of deploy in already existing cluster in nodes block
|
||||
# we will have all cluster nodes. We should remove only missing
|
||||
# nodes instead of stay only available.
|
||||
# Example: deploy 3 nodes, after it deploy 2 nodes.
|
||||
# In 1 of 2 seconds nodes missing, in nodes block we should
|
||||
# contain only 4 nodes.
|
||||
nodes_wthout_missing = nodes.select do |node|
|
||||
!offline_uids.include?(node['uid'])
|
||||
end
|
||||
deployment_info.each { |node| node['nodes'] = nodes_wthout_missing }
|
||||
deployment_info
|
||||
end
|
||||
|
||||
def detect_available_nodes(uids)
|
||||
all_uids = uids.clone
|
||||
def detect_offline_nodes(uids)
|
||||
available_uids = []
|
||||
|
||||
uids.delete('master')
|
||||
uids.delete('virtual_sync_node')
|
||||
# In case of big amount of nodes we should do several calls to be sure
|
||||
# about node status
|
||||
Astute.config[:mc_retries].times.each do
|
||||
Astute.config.mc_retries.times.each do
|
||||
systemtype = Astute::MClient.new(
|
||||
@ctx,
|
||||
"systemtype",
|
||||
all_uids,
|
||||
uids,
|
||||
_check_result=false,
|
||||
10
|
||||
)
|
||||
|
@ -277,22 +247,15 @@ module Astute
|
|||
end
|
||||
|
||||
available_uids += available_nodes.map { |node| node.results[:sender] }
|
||||
all_uids -= available_uids
|
||||
break if all_uids.empty?
|
||||
uids -= available_uids
|
||||
break if uids.empty?
|
||||
|
||||
sleep Astute.config[:mc_retry_interval]
|
||||
sleep Astute.config.mc_retry_interval
|
||||
end
|
||||
|
||||
available_uids
|
||||
Astute.logger.warn "Offline node #{uids}" if uids.present?
|
||||
uids
|
||||
end
|
||||
|
||||
def get_uids_from_deployment_info(deployment_info)
|
||||
top_level_uids = deployment_info.map{ |node| node["uid"] }
|
||||
|
||||
inside_uids = deployment_info.inject([]) do |uids, node|
|
||||
uids += node.fetch('nodes', []).map{ |n| n['uid'] }
|
||||
end
|
||||
top_level_uids | inside_uids
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -37,7 +37,7 @@ module Astute
|
|||
# Please be informed that this code define special method
|
||||
# of Deployment::Node class. We use special method `task`
|
||||
# to manage task status, graph of tasks and nodes.
|
||||
task.status = @task_engine.status
|
||||
task.status = setup_task_status
|
||||
if @task.running?
|
||||
@ctx.report({
|
||||
'nodes' => [{
|
||||
|
@ -49,8 +49,7 @@ module Astute
|
|||
}]
|
||||
})
|
||||
else
|
||||
set_status_online
|
||||
|
||||
setup_node_status
|
||||
report_node_status
|
||||
end
|
||||
end
|
||||
|
@ -70,10 +69,13 @@ module Astute
|
|||
'uid' => id,
|
||||
'status' => deploy_status,
|
||||
'progress' => current_progress_bar,
|
||||
}
|
||||
|
||||
node_status.merge!(
|
||||
'deployment_graph_task_name' => task.name,
|
||||
'task_status' => task.status.to_s,
|
||||
'custom' => @task_engine.summary,
|
||||
}
|
||||
'custom' => @task_engine.summary
|
||||
) if task
|
||||
|
||||
node_status.merge!('error_type' => 'deploy') if
|
||||
deploy_status == 'error'
|
||||
|
@ -83,6 +85,27 @@ module Astute
|
|||
|
||||
private
|
||||
|
||||
# This method support special task behavior. If task failed
|
||||
# and we do not think that deployment should be stopped, Astute
|
||||
# will mark such task as skipped and do not report error
|
||||
def setup_task_status
|
||||
if !task.data.fetch('fail_on_error', true) && @task_engine.failed?
|
||||
Astute.logger.warn "Task #{task.name} failed, but marked as skipped "\
|
||||
"because of 'fail on error' behavior"
|
||||
return :skipped
|
||||
end
|
||||
@task_engine.status
|
||||
end
|
||||
|
||||
def setup_node_status
|
||||
if task
|
||||
set_status_failed && return if task.failed?
|
||||
set_status_skipped && return if task.dep_failed?
|
||||
end
|
||||
|
||||
set_status_online
|
||||
end
|
||||
|
||||
def current_progress_bar
|
||||
100 * tasks_finished_count / tasks_total_count
|
||||
end
|
||||
|
|
|
@ -32,15 +32,19 @@ module Deployment
|
|||
@id = id
|
||||
@node_concurrency = Deployment::Concurrency::Counter.new
|
||||
@task_concurrency = Deployment::Concurrency::Group.new
|
||||
@emergency_brake = false
|
||||
end
|
||||
|
||||
include Enumerable
|
||||
include Deployment::Log
|
||||
|
||||
attr_accessor :id
|
||||
attr_accessor :gracefully_stop_mark
|
||||
attr_reader :emergency_brake
|
||||
attr_reader :nodes
|
||||
attr_reader :node_concurrency
|
||||
attr_reader :task_concurrency
|
||||
attr_reader :fault_tolerance_groups
|
||||
|
||||
# Add an existing node object to the cluster
|
||||
# @param [Deployment::Node] node a new node object
|
||||
|
@ -211,6 +215,7 @@ module Deployment
|
|||
hook 'pre_node', node
|
||||
return if node.skipped?
|
||||
node.poll
|
||||
hook 'internal_post_node_poll', node
|
||||
hook 'post_node_poll', node
|
||||
return unless node.ready?
|
||||
ready_task = node.ready_task
|
||||
|
@ -248,35 +253,42 @@ module Deployment
|
|||
def run
|
||||
ready_nodes = each_ready_task.to_a.join ', '
|
||||
info "Starting the deployment process. Starting tasks: #{ready_nodes}"
|
||||
hook 'internal_pre_run'
|
||||
hook 'pre_run'
|
||||
topology_sort
|
||||
result = loop do
|
||||
if all_nodes_are_successful?
|
||||
status = 'All nodes are deployed successfully. Stopping the deployment process!'
|
||||
status = 'All nodes are deployed successfully.'\
|
||||
'Stopping the deployment process!'
|
||||
result = {
|
||||
:success => true,
|
||||
:status => status,
|
||||
}
|
||||
break result
|
||||
end
|
||||
if has_failed_critical_nodes?
|
||||
status = "Critical nodes failed: #{failed_critical_nodes.join ', '}. Stopping the deployment process!"
|
||||
result = {
|
||||
:success => false,
|
||||
:status => status,
|
||||
:failed_nodes => failed_critical_nodes,
|
||||
:failed_tasks => failed_tasks,
|
||||
}
|
||||
break result
|
||||
end
|
||||
gracefully_stop! if has_failed_critical_nodes?
|
||||
|
||||
if all_nodes_are_finished?
|
||||
status = "All nodes are finished. Failed tasks: #{failed_tasks.join ', '} Stopping the deployment process!"
|
||||
result = {
|
||||
status = "All nodes are finished. Failed tasks: "\
|
||||
"#{failed_tasks.join ', '} Stopping the "\
|
||||
"deployment process!"
|
||||
result = if has_failed_critical_nodes?
|
||||
{
|
||||
:success => false,
|
||||
:status => status,
|
||||
:failed_nodes => failed_nodes,
|
||||
:failed_tasks => failed_tasks,
|
||||
}
|
||||
:skipped_nodes => skipped_nodes,
|
||||
:failed_tasks => failed_tasks
|
||||
}
|
||||
else
|
||||
{
|
||||
:success => true,
|
||||
:status => status,
|
||||
:failed_nodes => failed_nodes,
|
||||
:skipped_nodes => skipped_nodes,
|
||||
:failed_tasks => failed_tasks
|
||||
}
|
||||
end
|
||||
break result
|
||||
end
|
||||
# run loop over all nodes
|
||||
|
@ -300,7 +312,7 @@ module Deployment
|
|||
# @return [Array<Deployment::Node>]
|
||||
def failed_critical_nodes
|
||||
critical_nodes.select do |node|
|
||||
node.failed?
|
||||
node.failed? && !node.skipped?
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -315,10 +327,17 @@ module Deployment
|
|||
# @return [Array<Deployment::Node>]
|
||||
def failed_nodes
|
||||
select do |node|
|
||||
node.failed?
|
||||
node.failed? && !node.skipped?
|
||||
end
|
||||
end
|
||||
|
||||
def skipped_nodes
|
||||
select do |node|
|
||||
node.skipped?
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Get the list of the failed nodes
|
||||
# @return [Array<Deployment::Task>]
|
||||
def failed_tasks
|
||||
|
@ -476,6 +495,79 @@ digraph "<%= id || 'graph' %>" {
|
|||
end.sort
|
||||
end
|
||||
|
||||
def stop_condition(&block)
|
||||
self.gracefully_stop_mark = block
|
||||
end
|
||||
|
||||
def hook_internal_post_node_poll(*args)
|
||||
gracefully_stop(args[0])
|
||||
validate_fault_tolerance(args[0])
|
||||
end
|
||||
|
||||
def hook_internal_pre_run(*args)
|
||||
return unless has_failed_nodes?
|
||||
failed_nodes.each { |node| validate_fault_tolerance(node) }
|
||||
end
|
||||
|
||||
# Check if the deployment process should stop
|
||||
# @return [true, false]
|
||||
def gracefully_stop?
|
||||
return true if @emergency_brake
|
||||
if gracefully_stop_mark && gracefully_stop_mark.call
|
||||
info "Stop deployment by stop condition (external reason)"
|
||||
@emergency_brake = true
|
||||
end
|
||||
@emergency_brake
|
||||
end
|
||||
|
||||
def gracefully_stop(node)
|
||||
if gracefully_stop? && node.ready?
|
||||
node.set_status_skipped
|
||||
hook 'post_gracefully_stop', node
|
||||
end
|
||||
end
|
||||
|
||||
def gracefully_stop!
|
||||
return if @emergency_brake
|
||||
|
||||
info "Stop deployment by internal reason"
|
||||
@emergency_brake = true
|
||||
end
|
||||
|
||||
def fault_tolerance_groups=(groups=[])
|
||||
@fault_tolerance_groups = groups.select { |group| group['node_ids'].present? }
|
||||
@fault_tolerance_groups.each { |group| group['failed_node_ids'] = [] }
|
||||
debug "Setup fault tolerance groups: #{@fault_tolerance_groups}"
|
||||
end
|
||||
|
||||
def validate_fault_tolerance(node)
|
||||
return if gracefully_stop?
|
||||
|
||||
if node.failed?
|
||||
count_tolerance_fail(node)
|
||||
gracefully_stop! if fault_tolerance_excess?
|
||||
end
|
||||
end
|
||||
|
||||
def count_tolerance_fail(node)
|
||||
@fault_tolerance_groups.select do |g|
|
||||
g['node_ids'].include?(node.name)
|
||||
end.each do |group|
|
||||
debug "Count faild node #{node.name} for group #{group['name']}"
|
||||
group['fault_tolerance'] -= 1
|
||||
group['node_ids'].delete(node.name)
|
||||
group['failed_node_ids'] << node.name
|
||||
end
|
||||
end
|
||||
|
||||
def fault_tolerance_excess?
|
||||
is_failed = @fault_tolerance_groups.select { |group| group['fault_tolerance'] < 0 }
|
||||
return false if is_failed.empty?
|
||||
|
||||
warn "Fault tolerance exceeded the stop conditions #{is_failed}"
|
||||
true
|
||||
end
|
||||
|
||||
# @return [String]
|
||||
def to_s
|
||||
"Cluster[#{id}]"
|
||||
|
|
|
@ -203,7 +203,7 @@ module Deployment
|
|||
task.finished?
|
||||
end
|
||||
if finished
|
||||
debug 'All tasks are finished'
|
||||
debug "All tasks on node #{name} are finished"
|
||||
@tasks_are_finished = true
|
||||
end
|
||||
finished
|
||||
|
@ -220,7 +220,7 @@ module Deployment
|
|||
task.successful? || task.skipped?
|
||||
end
|
||||
if successful
|
||||
debug 'All tasks are successful'
|
||||
debug "All tasks on node #{name} are successful"
|
||||
@tasks_are_successful = true
|
||||
end
|
||||
successful
|
||||
|
@ -236,7 +236,7 @@ module Deployment
|
|||
task.failed?
|
||||
end
|
||||
if failed.any?
|
||||
debug "Found failed tasks: #{failed.map { |t| t.name }.join ', '}"
|
||||
debug "Found failed tasks on node #{name}: #{failed.map { |t| t.name }.join ', '}"
|
||||
@tasks_have_failed = true
|
||||
end
|
||||
failed.any?
|
||||
|
@ -311,7 +311,9 @@ module Deployment
|
|||
def inspect
|
||||
message = "#{self}{"
|
||||
message += "Tasks: #{tasks_finished_count}/#{tasks_total_count}"
|
||||
message += " Finished: #{tasks_are_finished?} Failed: #{tasks_have_failed?} Successful: #{tasks_are_successful?}"
|
||||
message += " Finished: #{tasks_are_finished?}"
|
||||
message += " Failed: #{tasks_have_failed?}"
|
||||
message += " Successful: #{tasks_are_successful?}"
|
||||
message + '}'
|
||||
end
|
||||
end
|
||||
|
|
|
@ -34,6 +34,8 @@ module Deployment
|
|||
ALLOWED_STATUSES = [:online, :busy, :offline, :failed, :successful, :skipped]
|
||||
# A node is considered finished with one of these statuses
|
||||
FINISHED_STATUSES = [:failed, :successful, :skipped]
|
||||
# A node is considered failed with these statuses
|
||||
FAILED_STATUSES = [:failed]
|
||||
|
||||
# @param [String, Symbol] name
|
||||
# @param [Deployment::Cluster] cluster
|
||||
|
@ -61,6 +63,8 @@ module Deployment
|
|||
attr_accessor :id
|
||||
attr_reader :critical
|
||||
alias :critical? :critical
|
||||
attr_reader :sync_point
|
||||
alias :sync_point? :sync_point
|
||||
|
||||
# Set a new status of this node
|
||||
# @param [Symbol, String] value
|
||||
|
@ -83,15 +87,36 @@ module Deployment
|
|||
# Set this node to be a critical node
|
||||
# @return [true]
|
||||
def set_critical
|
||||
debug "Setup #{self} as critical node"
|
||||
self.critical = true
|
||||
end
|
||||
|
||||
# Set this node to be a normal node
|
||||
# @return [false]
|
||||
def set_normal
|
||||
debug "Setup #{self} as normal node"
|
||||
self.critical = false
|
||||
end
|
||||
|
||||
# Set this node as sync point node
|
||||
# @return [true]
|
||||
def set_as_sync_point
|
||||
self.sync_point = true
|
||||
end
|
||||
|
||||
# Set this node as normal point node
|
||||
# @return [false]
|
||||
def unset_as_sync_point
|
||||
self.sync_point = false
|
||||
end
|
||||
|
||||
# Set the sync point property of this node
|
||||
# @param [true, false] value
|
||||
# @return [true, false]
|
||||
def sync_point=(value)
|
||||
@sync_point = !!value
|
||||
end
|
||||
|
||||
# Set this node's Cluster Object
|
||||
# @param [Deployment::Cluster] cluster The new cluster object
|
||||
# @raise [Deployment::InvalidArgument] if the object is not a Node
|
||||
|
@ -169,7 +194,7 @@ module Deployment
|
|||
# or has the failed status
|
||||
# @return [true, false]
|
||||
def failed?
|
||||
status == :failed or tasks_have_failed?
|
||||
FAILED_STATUSES.include? status or tasks_have_failed?
|
||||
end
|
||||
|
||||
# The node has all tasks successful
|
||||
|
@ -181,7 +206,7 @@ module Deployment
|
|||
|
||||
# The node is skipped and will not get any tasks
|
||||
def skipped?
|
||||
status == :skipped
|
||||
status == :skipped #or tasks_have_only_dep_failed?
|
||||
end
|
||||
|
||||
ALLOWED_STATUSES.each do |status|
|
||||
|
@ -235,7 +260,7 @@ module Deployment
|
|||
def inspect
|
||||
message = "#{self}{Status: #{status}"
|
||||
message += " Tasks: #{tasks_finished_count}/#{tasks_total_count}"
|
||||
message += " CurrentTask: #{task.name}" if task
|
||||
message += " CurrentTask: #{task.name}, task status: #{task.status}" if task
|
||||
message + '}'
|
||||
end
|
||||
|
||||
|
|
|
@ -315,6 +315,7 @@ module Deployment
|
|||
# task are failed and set dep_failed status if so.
|
||||
# @return [true, false]
|
||||
def check_for_failed_dependencies
|
||||
return if self.sync_point?
|
||||
return false if FAILED_STATUSES.include? status
|
||||
failed = each_backward_dependency.any? do |task|
|
||||
FAILED_STATUSES.include? task.status
|
||||
|
@ -329,17 +330,28 @@ module Deployment
|
|||
def check_for_ready_dependencies
|
||||
return false unless status == :pending
|
||||
ready = each_backward_dependency.all? do |task|
|
||||
SUCCESS_STATUSES.include? task.status
|
||||
ready_statuses = SUCCESS_STATUSES
|
||||
ready_statuses += FAILED_STATUSES if sync_point?
|
||||
ready_statuses.include? task.status
|
||||
end
|
||||
self.status = :ready if ready
|
||||
ready
|
||||
end
|
||||
|
||||
# set the pending tasks to dep_failed if the node have failed
|
||||
def check_for_node_status
|
||||
return unless node
|
||||
if Deployment::Node::FAILED_STATUSES.include? node.status and NOT_RUN_STATUSES.include? status
|
||||
self.status = :dep_failed
|
||||
end
|
||||
end
|
||||
|
||||
# Poll direct task dependencies if
|
||||
# the failed or ready status of this task should change
|
||||
def poll_dependencies
|
||||
check_for_ready_dependencies
|
||||
check_for_failed_dependencies
|
||||
check_for_node_status
|
||||
end
|
||||
alias :poll :poll_dependencies
|
||||
|
||||
|
@ -408,6 +420,24 @@ module Deployment
|
|||
FAILED_STATUSES.include? status
|
||||
end
|
||||
|
||||
# This task have not been run because of failed dependencies
|
||||
# @return [true, false]
|
||||
def dep_failed?
|
||||
status == :dep_failed
|
||||
end
|
||||
|
||||
# # This task failed
|
||||
# # @return [true, false]
|
||||
# def abortive?
|
||||
# status == :failed
|
||||
# end
|
||||
|
||||
#This task is sync point
|
||||
# @return [true, false]
|
||||
def sync_point?
|
||||
self.node.sync_point?
|
||||
end
|
||||
|
||||
# @return [String]
|
||||
def to_s
|
||||
"Task[#{name}/#{node.name}]"
|
||||
|
@ -445,7 +475,7 @@ module Deployment
|
|||
poll_dependencies
|
||||
case status
|
||||
when :pending;
|
||||
:white
|
||||
sync_point? ? :cyan : :white
|
||||
when :ready
|
||||
:yellow
|
||||
when :successful;
|
||||
|
|
|
@ -33,7 +33,7 @@ $LOAD_PATH << lib_dir
|
|||
require 'astute'
|
||||
require 'fuel_deployment'
|
||||
|
||||
Deployment::Log.logger.level = Logger::WARN
|
||||
Deployment::Log.logger.level = Logger::DEBUG
|
||||
|
||||
Dir[File.join(File.dirname(__FILE__), 'unit/fixtures/*.rb')].each { |file| require file }
|
||||
|
||||
|
|
|
@ -200,6 +200,54 @@ describe Deployment::Cluster do
|
|||
expect(subject.has_failed_nodes?).to eq true
|
||||
end
|
||||
|
||||
context 'fault_tolerance_groups' do
|
||||
|
||||
let(:fault_tolerance_groups) do
|
||||
[{
|
||||
"fault_tolerance"=>1,
|
||||
"name"=>"test_group",
|
||||
"node_ids"=>['node2']
|
||||
},
|
||||
{
|
||||
"fault_tolerance"=> 0,
|
||||
"name"=>"test_group2",
|
||||
"node_ids"=>[]
|
||||
}]
|
||||
end
|
||||
|
||||
it 'can find tolerance group' do
|
||||
cluster.fault_tolerance_groups = fault_tolerance_groups
|
||||
task1_1.status = :successful
|
||||
task1_2.status = :successful
|
||||
task2_1.status = :successful
|
||||
task2_2.status = :failed
|
||||
expect(cluster.fault_tolerance_groups).to eq [fault_tolerance_groups.first]
|
||||
end
|
||||
|
||||
it 'can validate tolerance group' do
|
||||
cluster.fault_tolerance_groups = fault_tolerance_groups
|
||||
task1_1.status = :successful
|
||||
task1_2.status = :successful
|
||||
task2_1.status = :failed
|
||||
cluster.validate_fault_tolerance(node1)
|
||||
cluster.validate_fault_tolerance(node2)
|
||||
expect(cluster.fault_tolerance_excess?).to eq false
|
||||
expect(cluster.gracefully_stop?).to eq false
|
||||
end
|
||||
|
||||
it 'can control deploy using tolerance group' do
|
||||
fault_tolerance_groups.first['fault_tolerance'] = 0
|
||||
cluster.fault_tolerance_groups = fault_tolerance_groups
|
||||
task1_1.status = :successful
|
||||
task1_2.status = :successful
|
||||
task2_1.status = :failed
|
||||
cluster.validate_fault_tolerance(node1)
|
||||
cluster.validate_fault_tolerance(node2)
|
||||
expect(cluster.fault_tolerance_excess?).to eq true
|
||||
expect(cluster.gracefully_stop?).to eq true
|
||||
end
|
||||
end
|
||||
|
||||
it 'can find critical nodes' do
|
||||
expect(subject.critical_nodes).to eq([])
|
||||
node1.critical = true
|
||||
|
|
|
@ -245,7 +245,7 @@ describe Deployment::Node do
|
|||
subject.status = :offline
|
||||
expect(subject.inspect).to eq 'Node[node1]{Status: offline Tasks: 0/1}'
|
||||
subject.task = task1
|
||||
expect(subject.inspect).to eq 'Node[node1]{Status: offline Tasks: 0/1 CurrentTask: task1}'
|
||||
expect(subject.inspect).to eq 'Node[node1]{Status: offline Tasks: 0/1 CurrentTask: task1, task status: ready}'
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -25,7 +25,11 @@ describe Astute::Orchestrator do
|
|||
end
|
||||
|
||||
describe '#task_deployment' do
|
||||
let(:deployment_info) { [] }
|
||||
let(:tasks_metadata) do
|
||||
{
|
||||
'fault_tolerance_groups' => []
|
||||
}
|
||||
end
|
||||
|
||||
let(:tasks_graph) do
|
||||
{"1"=>
|
||||
|
@ -63,7 +67,7 @@ describe Astute::Orchestrator do
|
|||
|
||||
it 'should run task deployment' do
|
||||
Astute::TaskDeployment.any_instance.expects(:deploy).with(
|
||||
:deployment_info => deployment_info,
|
||||
:tasks_metadata => tasks_metadata,
|
||||
:tasks_graph => tasks_graph,
|
||||
:tasks_directory => tasks_directory,
|
||||
:dry_run => false
|
||||
|
@ -73,7 +77,7 @@ describe Astute::Orchestrator do
|
|||
@reporter,
|
||||
'task_id',
|
||||
{
|
||||
:deployment_info => deployment_info,
|
||||
:tasks_metadata => tasks_metadata,
|
||||
:tasks_graph => tasks_graph,
|
||||
:tasks_directory => tasks_directory
|
||||
}
|
||||
|
@ -92,7 +96,7 @@ describe Astute::Orchestrator do
|
|||
@reporter,
|
||||
'task_id',
|
||||
{
|
||||
:deployment_info => deployment_info,
|
||||
:tasks_metadata => tasks_metadata,
|
||||
:tasks_graph => tasks_graph,
|
||||
:tasks_directory => tasks_directory
|
||||
}
|
||||
|
|
|
@ -22,23 +22,27 @@ describe Astute::TaskCluster do
|
|||
|
||||
let(:node) { Astute::TaskNode.new('node_name', subject) }
|
||||
|
||||
describe "#hook_post_node_poll" do
|
||||
before(:each) do
|
||||
subject.stubs(:validate_fault_tolerance)
|
||||
end
|
||||
|
||||
describe "#hook_internal_post_node_poll" do
|
||||
it 'should call gracefully_stop with node' do
|
||||
subject.expects(:gracefully_stop).with(node)
|
||||
subject.hook_post_node_poll(node)
|
||||
subject.hook_internal_post_node_poll(node)
|
||||
end
|
||||
end
|
||||
|
||||
describe "#gracefully_stop" do
|
||||
it 'should check if node should be stopped' do
|
||||
subject.expects(:gracefully_stop?).returns(false)
|
||||
subject.hook_post_node_poll(node)
|
||||
subject.hook_internal_post_node_poll(node)
|
||||
end
|
||||
|
||||
it 'should check if node ready' do
|
||||
subject.stop_condition { true }
|
||||
node.expects(:ready?).returns(false)
|
||||
subject.hook_post_node_poll(node)
|
||||
subject.hook_internal_post_node_poll(node)
|
||||
end
|
||||
|
||||
it 'should set node status as skipped if stopped' do
|
||||
|
@ -47,7 +51,7 @@ describe Astute::TaskCluster do
|
|||
node.stubs(:report_node_status)
|
||||
|
||||
node.expects(:set_status_skipped).once
|
||||
subject.hook_post_node_poll(node)
|
||||
subject.hook_internal_post_node_poll(node)
|
||||
end
|
||||
|
||||
it 'should report new node status if stopped' do
|
||||
|
@ -56,7 +60,7 @@ describe Astute::TaskCluster do
|
|||
node.stubs(:set_status_skipped).once
|
||||
|
||||
node.expects(:report_node_status)
|
||||
subject.hook_post_node_poll(node)
|
||||
subject.hook_internal_post_node_poll(node)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -24,13 +24,26 @@ describe Astute::TaskDeployment do
|
|||
ctx
|
||||
end
|
||||
|
||||
let(:deployment_info) do
|
||||
[
|
||||
let(:tasks_metadata) do
|
||||
{
|
||||
'uid' => '1',
|
||||
'fail_if_error' => false
|
||||
'fault_tolerance_groups' =>[
|
||||
{"fault_tolerance"=>0, "name"=>"primary-controller", "node_ids"=>["1"]},
|
||||
{"fault_tolerance"=>1, "name"=>"controller", "node_ids"=>[]},
|
||||
{"fault_tolerance"=>0, "name"=>"cinder", "node_ids"=>[]},
|
||||
{"fault_tolerance"=>0, "name"=>"cinder-block-device", "node_ids"=>[]},
|
||||
{"fault_tolerance"=>1, "name"=>"cinder-vmware", "node_ids"=>[]},
|
||||
{"fault_tolerance"=>0, "name"=>"compute", "node_ids"=>["3", "2"]},
|
||||
{"fault_tolerance"=>1, "name"=>"compute-vmware", "node_ids"=>[]},
|
||||
{"fault_tolerance"=>1, "name"=>"mongo", "node_ids"=>[]},
|
||||
{"fault_tolerance"=>1, "name"=>"primary-mongo", "node_ids"=>[]},
|
||||
{"fault_tolerance"=>1,
|
||||
"name"=>"ceph-osd",
|
||||
"node_ids"=>["3", "2", "5", "4"]},
|
||||
{"fault_tolerance"=>1, "name"=>"base-os", "node_ids"=>[]},
|
||||
{"fault_tolerance"=>1, "name"=>"virt", "node_ids"=>[]},
|
||||
{"fault_tolerance"=>1, "name"=>"ironic", "node_ids"=>[]}
|
||||
]
|
||||
}
|
||||
]
|
||||
end
|
||||
|
||||
let(:tasks_graph) do
|
||||
|
@ -41,7 +54,7 @@ describe Astute::TaskDeployment do
|
|||
"required_for"=>[],
|
||||
"requires"=> [],
|
||||
"id"=>"ironic_post_swift_key",
|
||||
"parameters"=>{}
|
||||
"parameters"=>{},
|
||||
}],
|
||||
"null"=> [{
|
||||
"skipped"=>true,
|
||||
|
@ -74,21 +87,19 @@ describe Astute::TaskDeployment do
|
|||
|
||||
describe '#deploy' do
|
||||
it 'should run deploy' do
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
task_deployment.stubs(:fail_offline_nodes).returns([])
|
||||
task_deployment.stubs(:write_graph_to_file)
|
||||
ctx.stubs(:report)
|
||||
|
||||
Astute::TaskCluster.any_instance.expects(:run).returns({:success => true})
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
||||
it 'should not raise error if deployment info not provided' do
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
task_deployment.stubs(:fail_offline_nodes).returns([])
|
||||
task_deployment.stubs(:write_graph_to_file)
|
||||
ctx.stubs(:report)
|
||||
|
||||
|
@ -106,40 +117,33 @@ describe Astute::TaskDeployment do
|
|||
)
|
||||
end
|
||||
|
||||
it 'should run pre deployment task' do
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
task_deployment.stubs(:write_graph_to_file)
|
||||
ctx.stubs(:report)
|
||||
Astute::TaskCluster.any_instance.stubs(:run).returns({:success => true})
|
||||
|
||||
pre_deployment = Astute::TaskPreDeploymentActions.new(deployment_info, ctx)
|
||||
Astute::TaskPreDeploymentActions.expects(:new)
|
||||
.with(deployment_info, ctx)
|
||||
.returns(pre_deployment)
|
||||
Astute::TaskPreDeploymentActions.any_instance.expects(:process)
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
||||
it 'should support virtual node' do
|
||||
d_t = task_deployment.send(:support_virtual_node, tasks_graph)
|
||||
expect(d_t.keys).to include 'virtual_sync_node'
|
||||
expect(d_t.keys).not_to include 'null'
|
||||
end
|
||||
|
||||
it 'should remove failed nodes' do
|
||||
#TODO(vsharshov): improve remove failed nodes check. Check mcollective
|
||||
it 'should support critical nodes' do
|
||||
critical_nodes = task_deployment.send(
|
||||
:critical_node_uids,
|
||||
tasks_metadata['fault_tolerance_groups']
|
||||
)
|
||||
expect(critical_nodes).to include '1'
|
||||
expect(critical_nodes).to include '2'
|
||||
expect(critical_nodes).to include '3'
|
||||
expect(critical_nodes.size).to eql(3)
|
||||
end
|
||||
|
||||
it 'should fail offline nodes' do
|
||||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
task_deployment.stubs(:write_graph_to_file)
|
||||
ctx.stubs(:report)
|
||||
|
||||
task_deployment.expects(:remove_failed_nodes).returns([deployment_info, []])
|
||||
task_deployment.expects(:fail_offline_nodes).returns([])
|
||||
|
||||
Astute::TaskCluster.any_instance.stubs(:run).returns({:success => true})
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
@ -148,12 +152,12 @@ describe Astute::TaskDeployment do
|
|||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
task_deployment.stubs(:write_graph_to_file)
|
||||
ctx.stubs(:report)
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
task_deployment.stubs(:fail_offline_nodes).returns([])
|
||||
Astute::TaskCluster.any_instance.stubs(:run).returns({:success => true})
|
||||
|
||||
Astute::TaskCluster.any_instance.expects(:stop_condition)
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
@ -162,12 +166,12 @@ describe Astute::TaskDeployment do
|
|||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
task_deployment.stubs(:write_graph_to_file)
|
||||
ctx.stubs(:report)
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
task_deployment.stubs(:fail_offline_nodes).returns([])
|
||||
Astute::TaskCluster.any_instance.stubs(:run).returns({:success => true})
|
||||
|
||||
Deployment::Log.expects(:logger=).with(Astute.logger)
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
@ -176,10 +180,9 @@ describe Astute::TaskDeployment do
|
|||
let(:task_concurrency) { mock('task_concurrency') }
|
||||
|
||||
before(:each) do
|
||||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
task_deployment.stubs(:write_graph_to_file)
|
||||
ctx.stubs(:report)
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
task_deployment.stubs(:fail_offline_nodes).returns([])
|
||||
Astute::TaskCluster.any_instance.stubs(:run).returns({:success => true})
|
||||
Deployment::Concurrency::Counter.any_instance
|
||||
.stubs(:maximum=).with(
|
||||
|
@ -190,7 +193,7 @@ describe Astute::TaskDeployment do
|
|||
Deployment::Concurrency::Counter.any_instance.expects(:maximum=).with(0).times(5)
|
||||
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
@ -204,7 +207,7 @@ describe Astute::TaskDeployment do
|
|||
.with(1)
|
||||
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
@ -218,7 +221,7 @@ describe Astute::TaskDeployment do
|
|||
.with(7)
|
||||
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
@ -229,7 +232,7 @@ describe Astute::TaskDeployment do
|
|||
.with(0).times(5)
|
||||
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
@ -241,7 +244,7 @@ describe Astute::TaskDeployment do
|
|||
.with(0).times(2)
|
||||
|
||||
expect {task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)}.to raise_error(
|
||||
Astute::DeploymentEngineError, /expect only non-negative integer, but got -4./
|
||||
|
@ -252,15 +255,14 @@ describe Astute::TaskDeployment do
|
|||
|
||||
context 'dry_run' do
|
||||
it 'should not run actual deployment if dry_run is set to True' do
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
task_deployment.stubs(:fail_offline_nodes).returns([])
|
||||
task_deployment.stubs(:write_graph_to_file)
|
||||
ctx.stubs(:report)
|
||||
|
||||
Astute::TaskCluster.any_instance.expects(:run).never
|
||||
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory,
|
||||
dry_run: true)
|
||||
|
@ -277,8 +279,7 @@ describe Astute::TaskDeployment do
|
|||
it 'should setup max nodes per call using config' do
|
||||
Astute.config.max_nodes_per_call = 33
|
||||
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
task_deployment.stubs(:fail_offline_nodes).returns([])
|
||||
task_deployment.stubs(:write_graph_to_file)
|
||||
ctx.stubs(:report)
|
||||
|
||||
|
@ -293,7 +294,7 @@ describe Astute::TaskDeployment do
|
|||
node_concurrency.expects(:maximum=).with(Astute.config.max_nodes_per_call)
|
||||
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
@ -302,21 +303,18 @@ describe Astute::TaskDeployment do
|
|||
context 'should report final status' do
|
||||
|
||||
it 'succeed status' do
|
||||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
Astute::TaskCluster.any_instance.stubs(:run).returns({:success => true})
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
task_deployment.stubs(:fail_offline_nodes).returns([])
|
||||
task_deployment.stubs(:write_graph_to_file)
|
||||
ctx.expects(:report).with({'status' => 'ready', 'progress' => 100})
|
||||
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
||||
it 'failed status' do
|
||||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
|
||||
failed_node = mock('node')
|
||||
failed_node.expects(:id).returns('1')
|
||||
|
||||
|
@ -330,7 +328,7 @@ describe Astute::TaskDeployment do
|
|||
:failed_nodes => [failed_node],
|
||||
:failed_tasks => [failed_task],
|
||||
:status => 'Failed because of'})
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
task_deployment.stubs(:fail_offline_nodes).returns([])
|
||||
task_deployment.stubs(:write_graph_to_file)
|
||||
ctx.expects(:report).with('nodes' => [{
|
||||
'uid' => '1',
|
||||
|
@ -346,7 +344,7 @@ describe Astute::TaskDeployment do
|
|||
'error' => 'Failed because of'})
|
||||
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
@ -363,8 +361,7 @@ describe Astute::TaskDeployment do
|
|||
it 'should write if disable' do
|
||||
Astute.config.enable_graph_file = false
|
||||
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
task_deployment.stubs(:fail_offline_nodes).returns([])
|
||||
ctx.stubs(:report)
|
||||
Astute::TaskCluster.any_instance.stubs(:run).returns({:success => true})
|
||||
|
||||
|
@ -374,7 +371,7 @@ describe Astute::TaskDeployment do
|
|||
.yields(file_handle).never
|
||||
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
@ -382,8 +379,7 @@ describe Astute::TaskDeployment do
|
|||
it 'should write graph if enable' do
|
||||
Astute.config.enable_graph_file = true
|
||||
|
||||
task_deployment.stubs(:remove_failed_nodes).returns([deployment_info, []])
|
||||
Astute::TaskPreDeploymentActions.any_instance.stubs(:process)
|
||||
task_deployment.stubs(:fail_offline_nodes).returns([])
|
||||
ctx.stubs(:report)
|
||||
Astute::TaskCluster.any_instance.stubs(:run).returns({:success => true})
|
||||
|
||||
|
@ -393,7 +389,7 @@ describe Astute::TaskDeployment do
|
|||
.yields(file_handle).once
|
||||
|
||||
task_deployment.deploy(
|
||||
deployment_info: deployment_info,
|
||||
tasks_metadata: tasks_metadata,
|
||||
tasks_graph: tasks_graph,
|
||||
tasks_directory: tasks_directory)
|
||||
end
|
||||
|
|
|
@ -247,17 +247,19 @@ describe Astute::TaskNode do
|
|||
Astute::Puppet.any_instance.stubs(:run)
|
||||
end
|
||||
|
||||
context 'mark online' do
|
||||
it 'if task successful' do
|
||||
Astute::Puppet.any_instance.stubs(:status).returns(:successful)
|
||||
context 'mark failed' do
|
||||
it 'if task failed' do
|
||||
Astute::Puppet.any_instance.stubs(:status).returns(:failed)
|
||||
ctx.stubs(:report)
|
||||
task_node.run(task)
|
||||
task_node.poll
|
||||
expect(task_node.status).to eql(:online)
|
||||
expect(task_node.status).to eql(:failed)
|
||||
end
|
||||
end
|
||||
|
||||
it 'if task failed' do
|
||||
Astute::Puppet.any_instance.stubs(:status).returns(:failed)
|
||||
context 'mark online' do
|
||||
it 'if task successful' do
|
||||
Astute::Puppet.any_instance.stubs(:status).returns(:successful)
|
||||
ctx.stubs(:report)
|
||||
task_node.run(task)
|
||||
task_node.poll
|
||||
|
@ -396,7 +398,7 @@ describe Astute::TaskNode do
|
|||
task_node.poll
|
||||
end
|
||||
|
||||
it 'should report deploy progress if task failed and another tasks exists' do
|
||||
it 'should not report deploy progress if task failed and another tasks exists' do
|
||||
Astute::Puppet.any_instance.expects(:status).returns(:failed)
|
||||
task_node.graph.create_task(
|
||||
'second_task',
|
||||
|
@ -404,15 +406,7 @@ describe Astute::TaskNode do
|
|||
)
|
||||
|
||||
task_node.run(task)
|
||||
ctx.expects(:report).with({
|
||||
'nodes' => [{
|
||||
'uid' => 'node_id',
|
||||
'status' => 'deploying',
|
||||
'deployment_graph_task_name' => task.name,
|
||||
'custom' => {},
|
||||
'task_status' => 'failed',
|
||||
'progress' => 50}]
|
||||
})
|
||||
ctx.expects(:report).never
|
||||
task_node.poll
|
||||
end
|
||||
end
|
||||
|
|
|
@ -32,7 +32,7 @@ node1_data = [
|
|||
[11, 13],
|
||||
[12, 13],
|
||||
[13, 9],
|
||||
[9, 14],
|
||||
# [9, 14],
|
||||
[14, 15],
|
||||
]
|
||||
|
||||
|
@ -42,7 +42,7 @@ node2_data = [
|
|||
[0, 3],
|
||||
[3, 4],
|
||||
[4, 5],
|
||||
[5, 6],
|
||||
# [5, 6],
|
||||
[5, 7],
|
||||
[6, 8],
|
||||
]
|
||||
|
@ -54,8 +54,14 @@ cluster.plot = true if options[:plot]
|
|||
node1 = cluster.node_create 'node1', Deployment::TestNode
|
||||
node2 = cluster.node_create 'node2', Deployment::TestNode
|
||||
|
||||
sync_node = cluster.node_create 'sync_node', Deployment::TestNode
|
||||
|
||||
node2.set_critical if options[:critical]
|
||||
|
||||
sync_node.set_as_sync_point
|
||||
|
||||
sync_node.create_task 'sync_task'
|
||||
|
||||
node1_data.each do |task_from, task_to|
|
||||
task_from = node1.graph.create_task "task#{task_from}"
|
||||
task_to = node1.graph.create_task "task#{task_to}"
|
||||
|
@ -74,6 +80,11 @@ node2['task4'].depends node1['task3']
|
|||
node2['task5'].depends node1['task13']
|
||||
node1['task15'].depends node2['task6']
|
||||
|
||||
sync_node['sync_task'].depends node2['task5']
|
||||
sync_node['sync_task'].depends node1['task9']
|
||||
node2['task6'].depends sync_node['sync_task']
|
||||
node1['task14'].depends sync_node['sync_task']
|
||||
|
||||
if options[:plot]
|
||||
cluster.make_image 'start'
|
||||
end
|
||||
|
@ -81,5 +92,5 @@ end
|
|||
if options[:interactive]
|
||||
binding.pry
|
||||
else
|
||||
cluster.run
|
||||
p cluster.run
|
||||
end
|
||||
|
|
|
@ -72,6 +72,9 @@ module Deployment
|
|||
debug "#{task} finished with: #{status}"
|
||||
self.task.status = status
|
||||
self.status = :online
|
||||
|
||||
self.status = :skipped if task.status == :dep_failed
|
||||
self.status = :failed if task.status == :failed
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue