fuel-astute/lib/astute/puppet_task.rb

232 lines
7.0 KiB
Ruby

# Copyright 2014 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
require 'timeout'
module Astute
class PuppetTask
def initialize(ctx, node, retries=1, puppet_manifest=nil, puppet_modules=nil, cwd=nil, timeout=nil)
@ctx = ctx
@node = node
@retries = retries
@puppet_manifest = puppet_manifest || '/etc/puppet/manifests/site.pp'
@puppet_modules = puppet_modules || '/etc/puppet/modules'
@cwd = cwd || '/'
@time_observer = TimeObserver.new(timeout || Astute.config.PUPPET_TIMEOUT)
@prev_summary = nil
@is_hung = false
end
def run
Astute.logger.debug "Waiting for puppet to finish deployment on " \
"node #{@node['uid']} (timeout = #{@time_observer.time_limit} sec)..."
@time_observer.start
@prev_summary ||= puppet_status
puppetd_runonce
end
# expect to run this method with respect of Astute.config.PUPPET_FADE_INTERVAL
def status
raise Timeout::Error unless @time_observer.enough_time?
last_run = puppet_status
status = node_status(last_run)
Astute.logger.debug "Node #{@node['uid']}(#{@node['role']}) status: #{status}"
result = case status
when 'succeed'
processing_succeed_node
when 'running'
processing_running_node
when 'error'
processing_error_node(last_run)
end
#TODO(vsharshov): Should we move it to control module?
@ctx.report_and_update_status('nodes' => [result]) if result
# ready, error or deploying
result.fetch('status', 'deploying')
end
private
def puppetd
puppetd = MClient.new(@ctx, "puppetd", [@node['uid']])
puppetd.on_respond_timeout do |uids|
nodes = uids.map do |uid|
{
'uid' => uid,
'status' => 'error',
'error_type' => 'deploy',
'role' => @node['role']
}
end
@ctx.report_and_update_status('nodes' => nodes)
end
puppetd
end
def puppet_status
puppetd.last_run_summary.first[:data]
end
def puppet_run
puppetd.runonce(
:puppet_debug => true,
:manifest => @puppet_manifest,
:modules => @puppet_modules,
:cwd => @cwd
)
end
def running?(status)
['running'].include? status[:status]
end
def idling?(status)
['idling'].include? status[:status]
end
def stopped?(status)
['stopped', 'disabled'].include? status[:status]
end
def succeed?(status)
status[:status] == 'stopped' &&
status[:resources]['failed'].to_i == 0 &&
status[:resources]['failed_to_restart'].to_i == 0 &&
status[:time]['last_run'] != (@prev_summary && @prev_summary[:time]['last_run'])
end
# Runs puppetd.runonce only if puppet is stopped on the host at the time
# If it isn't stopped, we wait a bit and try again.
# Returns list of nodes uids which appear to be with hung puppet.
def puppetd_runonce
started = Time.now.to_i
while Time.now.to_i - started < Astute.config.PUPPET_FADE_TIMEOUT
status = puppet_status
is_stopped = stopped?(status)
is_idling = idling?(status)
is_running = running?(status)
#Try to kill 'idling' process and run again by 'runonce' call
puppet_run if is_stopped || is_idling
break if !is_running && !is_idling
sleep Astute.config.PUPPET_FADE_INTERVAL
end
if is_running || is_idling
Astute.logger.warn "Following nodes have puppet hung " \
"(#{is_running ? 'running' : 'idling'}): '#{@node['uid']}'"
@is_hung = true
else
@is_hung = false
end
end
def node_status(last_run)
case
when @is_hung
'error'
when succeed?(last_run) && !@is_hung
'succeed'
when (running?(last_run) || idling?(last_run)) && !@is_hung
'running'
when stopped?(last_run) && !succeed?(last_run) && !@is_hung
'error'
else
msg = "Unknow status: " \
"is_hung #{@is_hung}, succeed? #{succeed?(last_run)}, " \
"running? #{running?(last_run)}, stopped? #{stopped?(last_run)}, " \
"idling? #{idling?(last_run)}"
raise msg
end
end
def processing_succeed_node
Astute.logger.debug "Puppet completed within #{@time_observer.stop} seconds"
{ 'uid' => @node['uid'], 'status' => 'ready', 'role' => @node['role'] }
end
def processing_error_node(last_run)
if @retries > 0
@retries -= 1
Astute.logger.debug "Puppet on node #{@node['uid']} will be "\
"restarted. #{@retries} retries remained."
Astute.logger.info "Retrying to run puppet for following error " \
"nodes: #{@node['uid']}"
puppetd_runonce
# We need this magic with prev_summary to reflect new puppetd run statuses..
@prev_summary = last_run
node_report_format('status' => 'deploying')
else
Astute.logger.debug "Node #{@node['uid']} has failed to deploy. " \
"There is no more retries for puppet run."
node_report_format('status' => 'error', 'error_type' => 'deploy')
end
end
def processing_running_node
nodes_to_report = []
begin
# Pass nodes because logs calculation needs IP address of node, not just uid
nodes_progress = @ctx.deploy_log_parser.progress_calculate([@node['uid']], [@node])
if nodes_progress.present?
Astute.logger.debug "Got progress for nodes: #{nodes_progress.inspect}"
# Nodes with progress are running, so they are not included in nodes_to_report yet
nodes_progress.map! { |x| x.merge!('status' => 'deploying', 'role' => @node['role']) }
nodes_to_report = nodes_progress
end
rescue => e
Astute.logger.warn "Some error occurred when parse logs for " \
"nodes progress: #{e.message}, trace: #{e.format_backtrace}"
end
nodes_to_report.first || node_report_format('status' => 'deploying')
end
def node_report_format(add_info={})
add_info.merge('uid' => @node['uid'], 'role' => @node['role'])
end
end #PuppetTask
class TimeObserver
def initialize(timeout)
@timeout = timeout
end
def start
@time_before = Time.now
end
def stop
(Time.now - @time_before).to_i
end
def enough_time?
Time.now - @time_before < time_limit
end
def time_limit
@timeout
end
end #TimeObserver
end