d6eb6f7051
Another changes: - fix unexpected behavior: Astute has sent huge message instead of string with dump file path; - add ability to not log mcollective result; - remove logging duplication of incoming Astute tasks. Change-Id: Idccf9bd3e57b4c160f077df779f386f656830d38 Closes-Bug: #1534206
191 lines
6.2 KiB
Ruby
191 lines
6.2 KiB
Ruby
# -*- coding: utf-8 -*-
|
|
# Copyright 2013 Mirantis, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
|
|
require 'mcollective'
|
|
require 'timeout'
|
|
|
|
module Astute
|
|
|
|
class MClient
|
|
include MCollective::RPC
|
|
|
|
attr_accessor :retries
|
|
|
|
def initialize(
|
|
ctx,
|
|
agent,
|
|
nodes=nil,
|
|
check_result=true,
|
|
timeout=nil,
|
|
retries=Astute.config.mc_retries,
|
|
enable_result_logging=true
|
|
)
|
|
|
|
@task_id = ctx.task_id
|
|
@agent = agent
|
|
@nodes = nodes.map { |n| n.to_s } if nodes
|
|
@check_result = check_result
|
|
# Will be used a minimum of two things: the specified parameter(timeout)
|
|
# and timeout from DDL (10 sec by default if not explicitly specified in DDL)
|
|
# If timeout here is nil will be used value from DDL.
|
|
# Examples:
|
|
# timeout - 10 sec, DDL - 20 sec. Result — 10 sec.
|
|
# timeout - 30 sec, DDL - 20 sec. Result — 20 sec.
|
|
# timeout - 20 sec, DDL - not set. Result — 10 sec.
|
|
@timeout = timeout
|
|
@retries = retries
|
|
@enable_result_logging = enable_result_logging
|
|
initialize_mclient
|
|
end
|
|
|
|
def on_respond_timeout(&block)
|
|
@on_respond_timeout = block
|
|
self
|
|
end
|
|
|
|
def method_missing(method, *args)
|
|
@mc_res = mc_send(method, *args)
|
|
|
|
if method == :discover
|
|
@nodes = args[0][:nodes]
|
|
return @mc_res
|
|
end
|
|
|
|
# Enable if needed. In normal case it eats the screen pretty fast
|
|
log_result(@mc_res, method) if @enable_result_logging
|
|
|
|
check_results_with_retries(method, args) if @check_result
|
|
|
|
@mc_res
|
|
end
|
|
|
|
private
|
|
|
|
def check_results_with_retries(method, args)
|
|
err_msg = ''
|
|
timeout_nodes_count = 0
|
|
# Following error might happen because of misconfiguration, ex. direct_addressing = 1 only on client
|
|
# or.. could be just some hang? Let's retry if @retries is set
|
|
if @mc_res.length < @nodes.length
|
|
# some nodes didn't respond
|
|
retry_index = 1
|
|
while retry_index <= @retries
|
|
sleep rand
|
|
nodes_responded = @mc_res.map { |n| n.results[:sender] }
|
|
not_responded = @nodes - nodes_responded
|
|
Astute.logger.debug "Retry ##{retry_index} to run mcollective agent on nodes: '#{not_responded.join(',')}'"
|
|
mc_send :discover, :nodes => not_responded
|
|
@new_res = mc_send(method, *args)
|
|
|
|
log_result(@new_res, method) if @enable_result_logging
|
|
# @new_res can have some nodes which finally responded
|
|
|
|
@mc_res += @new_res
|
|
break if @mc_res.length == @nodes.length
|
|
retry_index += 1
|
|
end
|
|
if @mc_res.length < @nodes.length
|
|
nodes_responded = @mc_res.map { |n| n.results[:sender] }
|
|
not_responded = @nodes - nodes_responded
|
|
if @on_respond_timeout
|
|
@on_respond_timeout.call not_responded
|
|
else
|
|
err_msg += "MCollective agents '#{@agent}' " \
|
|
"'#{not_responded.join(',')}' didn't respond within the " \
|
|
"allotted time.\n"
|
|
timeout_nodes_count += not_responded.size
|
|
end
|
|
end
|
|
end
|
|
failed = @mc_res.select { |x| x.results[:statuscode] != 0 }
|
|
if failed.any?
|
|
err_msg += "MCollective call failed in agent '#{@agent}', "\
|
|
"method '#{method}', failed nodes: \n"
|
|
failed.each do |n|
|
|
err_msg += "ID: #{n.results[:sender]} - Reason: #{n.results[:statusmsg]}\n"
|
|
end
|
|
end
|
|
if err_msg.present?
|
|
Astute.logger.error err_msg
|
|
expired_size = failed.count { |n| n.results[:statusmsg] == 'execution expired' }
|
|
# Detect TimeOut: 1 condition - fail because of DDL timeout, 2 - fail because of custom timeout
|
|
if (failed.present? && failed.size == expired_size) || (timeout_nodes_count > 0 && failed.empty?)
|
|
raise MClientTimeout, "#{@task_id}: #{err_msg}"
|
|
else
|
|
raise MClientError, "#{@task_id}: #{err_msg}"
|
|
end
|
|
end
|
|
end
|
|
|
|
def mc_send(*args)
|
|
retries = 1
|
|
begin
|
|
@mc.send(*args)
|
|
rescue => ex
|
|
case ex
|
|
when Stomp::Error::NoCurrentConnection
|
|
# stupid stomp cannot recover severed connection
|
|
stomp = MCollective::PluginManager["connector_plugin"]
|
|
stomp.disconnect rescue nil
|
|
stomp.instance_variable_set :@connection, nil
|
|
initialize_mclient
|
|
end
|
|
if retries < 3
|
|
Astute.logger.error "Retrying MCollective call after exception:\n#{ex.pretty_inspect}"
|
|
sleep rand
|
|
retries += 1
|
|
retry
|
|
else
|
|
Astute.logger.error "No more retries for MCollective call after exception: " \
|
|
"#{ex.format_backtrace}"
|
|
raise MClientError, "#{ex.pretty_inspect}"
|
|
end
|
|
end
|
|
end
|
|
|
|
def initialize_mclient
|
|
retries = 1
|
|
begin
|
|
@mc = rpcclient(@agent, :exit_on_failure => false)
|
|
|
|
@mc.timeout = @timeout if @timeout
|
|
@mc.progress = false
|
|
if @nodes
|
|
@mc.discover :nodes => @nodes
|
|
end
|
|
rescue => ex
|
|
if retries < 3
|
|
Astute.logger.error "Retrying RPC client instantiation after exception:\n#{ex.pretty_inspect}"
|
|
sleep 5
|
|
retries += 1
|
|
retry
|
|
else
|
|
Astute.logger.error "No more retries for MCollective client instantiation after exception: " \
|
|
"#{ex.format_backtrace}"
|
|
raise MClientError, "#{ex.pretty_inspect}"
|
|
end
|
|
end
|
|
end
|
|
|
|
def log_result(result, method)
|
|
result.each do |node|
|
|
Astute.logger.debug "#{@task_id}: MC agent '#{node.agent}', method '#{method}', "\
|
|
"results:\n#{node.results.pretty_inspect}"
|
|
end
|
|
end
|
|
end
|
|
end
|