fuel-web/bin/agent
Andrey Danin 1229556755 [bin/agent] Sleep before getting system info
To avoid providing inconsistent info the agent should
make a sleep call before it collects system information.

https://mirantis.jira.com/browse/PRD-2288

Change-Id: I1d015f2bc94c4ee1ee94c8fda6d08ced45c4b07c
2013-10-23 00:24:07 +04:00

507 lines
15 KiB
Ruby
Executable File

#!/usr/bin/env ruby
# Copyright 2013 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
begin
require 'rubygems'
rescue LoadError
end
require 'ohai/system'
require 'json'
require 'httpclient'
require 'logger'
require 'optparse'
require 'yaml'
require 'ipaddr'
require 'rethtool'
unless Process.euid == 0
puts "You must be root"
exit 1
end
ENV['PATH'] = "/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"
AGENT_CONFIG = "/etc/nailgun-agent/config.yaml"
class McollectiveConfig
def initialize(logger)
@logger = logger
@configfile = '/etc/mcollective/server.cfg'
end
def get_config_by_key(find_key)
found_key = nil
found_value = nil
# This code is from mcollective's sources
File.open(@configfile, "r").each do |line|
# strip blank spaces, tabs etc off the end of all lines
line.gsub!(/\s*$/, "")
unless line =~ /^#|^$/
if line =~ /(.+?)\s*=\s*(.+)/
key = $1
val = $2
if key == find_key
found_key = key
found_value = val
end
end
end
end
found_value if found_key
end
def replace_identity(new_id)
# check if id complies reqs
raise 'Identities can only match /\w\.\-/' unless new_id.to_s.match(/^[\w\.\-]+$/)
value_from_config = get_config_by_key('identity')
if value_from_config == new_id.to_s
@logger.info "MCollective is up to date with identity = #{new_id}"
else
config = File.open(@configfile, "rb").read
if value_from_config
# Key found, but it has other value
@logger.info "Replacing identity in mcollective server.cfg to new value = '#{new_id}'"
config.gsub!(/^identity[ =].*$/, "identity = #{new_id}")
File.open(@configfile, "w") { |f| f.write(config) }
else # if key was not found
config += "\nidentity = #{new_id}\n"
@logger.info "Identity in mcollective server.cfg has not been found. Setting to '#{new_id}'"
File.open(@configfile, "w") { |f| f.write(config) }
end
# Generally because generic init script for
# mcollective is broken at least in Ubuntu and
# altering restart in such way seems better
# than shipping our own package.
puts `/etc/init.d/mcollective stop; /etc/init.d/mcollective start`
end
end
end
class NodeAgent
def initialize(logger, url=nil)
@logger = logger
@api_default_address = "localhost"
@api_default_port = "8000"
@api_url = url
if @api_url
@api_url.chomp!('/')
@api_ip = @api_url.match(/\bhttp:\/\/((\d{1,3}\.){3}\d{1,3})/)[1]
else
begin
cmdline = ::File.read("/proc/cmdline")
@api_ip = cmdline.match(/\burl=http:\/\/((\d{1,3}\.){3}\d{1,3})/)[1]
@logger.info("Found admin node IP address in kernel cmdline: #{@api_ip}")
rescue
@logger.info("Can't get API url from /proc/cmdline. Will use localhost.")
@api_ip = "127.0.0.1"
end
@api_url = "http://#{@api_ip}:#{@api_default_port}/api"
end
@os = Ohai::System.new()
@os.all_plugins
end
def put
headers = {"Content-Type" => "application/json"}
@logger.debug("Trying to put host info into #{@api_url}")
res = htclient.put("#{@api_url}/nodes/", [_data].to_json, headers)
if res.status < 200 or res.status >= 300
@logger.error("HTTP PUT failed: #{res.inspect}")
end
res
end
def post
headers = {"Content-Type" => "application/json"}
@logger.debug("Trying to create host using #{@api_url}")
res = htclient.post("#{@api_url}/nodes/", _data.to_json, headers)
res
end
def htclient
client = HTTPClient.new
client.connect_timeout = 10
client.send_timeout = 10
client.receive_timeout = 10 # (mihgen): Nailgun may hang for a while, but 10sec should be enough for him to respond
client
end
def _interfaces
interfaces = @os[:network][:interfaces].inject([]) do |result, elm|
result << { :name => elm[0], :addresses => elm[1]["addresses"] }
end
interfaces << { "default_interface" => @os["network"]["default_interface"] }
interfaces << { "default_gateway" => @os["network"]["default_gateway"] }
interfaces
end
def _detailed
detailed_meta = {
:system => _system_info,
:interfaces => [],
:cpu => {
:total => (@os[:cpu][:total].to_i rescue nil),
:real => (@os[:cpu][:real].to_i rescue nil),
:spec => [],
},
:disks => [],
:memory => (_dmi_memory or _ohai_memory),
}
begin
(@os[:network][:interfaces] or {} rescue {}).each do |int, intinfo|
# Send info about physical interfaces only
next if intinfo[:type] != "eth"
# Exception: eth0.0(example) have "type" => "eth" but it is not physical interface
next if int =~ /\d+\.\d+$/
int_meta = {:name => int}
(intinfo[:addresses] or {} rescue {}).each do |addr, addrinfo|
if (addrinfo[:family] rescue nil) =~ /lladdr/
int_meta[:mac] = addr
begin
int_info = Rethtool::InterfaceSettings.new(int)
int_meta[:max_speed] = int_info.best_mode.speed
if int_info.current_mode.speed == :unknown
int_meta[:current_speed] = nil
else
int_meta[:current_speed] = int_info.current_mode.speed
end
rescue
int_meta[:current_speed] = nil
end
elsif (addrinfo[:family] rescue nil) =~ /^inet$/
int_meta[:ip] = addr
int_meta[:netmask] = addrinfo[:netmask] if addrinfo[:netmask]
end
end
detailed_meta[:interfaces] << int_meta
end
rescue Exception => e
@logger.error("Error '#{e.message}' in gathering interfaces metadata: #{e.backtrace}")
end
begin
(@os[:cpu] or {} rescue {}).each do |cpu, cpuinfo|
if cpu =~ /^[\d]+/ and cpuinfo
frequency = cpuinfo[:mhz].to_i rescue nil
begin
# ohai returns current frequency, try to get max if possible
max_frequency = `cat /sys/devices/system/cpu/cpu#{cpu}/cpufreq/cpuinfo_max_freq 2>/dev/null`.to_i / 1000
frequency = max_frequency if max_frequency > 0
rescue
end
detailed_meta[:cpu][:spec] << {
:frequency => frequency,
:model => (cpuinfo[:model_name].gsub(/ +/, " ") rescue nil)
}
end
end
rescue Exception => e
@logger.error("Error '#{e.message}' in gathering cpu metadata: #{e.backtrace}")
end
begin
(@os[:block_device] or {} rescue {}).each do |bname, binfo|
if physical_data_storage_devices.include?(bname) && binfo
dname = bname.gsub(/!/, '/')
# 512 bytes is the size of one sector by default
block_size = 512
fn = "/sys/block/#{bname}/queue/logical_block_size"
block_size = File.read(fn).to_i if File.exist? fn
block_size = 512 if block_size == 0
detailed_meta[:disks] << {
:name => dname,
:model => binfo[:model],
:size => (binfo[:size].to_i * block_size),
:disk => _disk_path_by_name(dname) || dname
}
end
end
rescue Exception => e
@logger.error("Error '#{e.message}' in gathering disks metadata: #{e.backtrace}")
end
detailed_meta
end
def _disk_path_by_name(name)
dn = "/dev/disk/by-path"
basepath = Dir["#{dn}/**?"].find{|f| /\/#{name}$/.match(File.readlink(f))}
basepath.split("/")[2..-1].join("/") if basepath
end
def physical_data_storage_devices
@blocks ||= []
return @blocks unless @blocks.empty?
raise "Path /sys/block does not exist" unless File.exists?("/sys/block")
Dir["/sys/block/*"].each do |block_device_dir|
basename_dir = File.basename(block_device_dir)
properties = `udevadm info --query=property --export --name=#{basename_dir}`.inject({}) do |result, raw_propety|
key, value = raw_propety.split(/\=/)
result.update(key.strip => value.strip.chomp("'").reverse.chomp("'").reverse)
end
if File.exists?("/sys/block/#{basename_dir}/removable")
removable = File.open("/sys/block/#{basename_dir}/removable"){ |f| f.read_nonblock(1024).strip }
end
# look at https://github.com/torvalds/linux/blob/master/Documentation/devices.txt
# KVM virtio volumes has code 252 in CentOS, but 253 in Ubuntu
if [3, 8, 202, 252, 253].include?(properties['MAJOR'].to_i) && removable =~ /^0$/
# Exclude LVM volumes (in CentOS - 253, in Ubuntu - 252) using additional check
@blocks << basename_dir unless properties['DEVPATH'].include?('virtual')
end
end
@blocks
end
def _is_virtualbox
@os[:dmi][:system][:product_name] == "VirtualBox" rescue false
end
def _is_virtual
_is_virtualbox or @os[:virtualization][:role] == "guest" rescue false
end
def _manufacturer
if _is_virtualbox
@os[:dmi][:system][:product_name] rescue nil
elsif _is_virtual
@os[:virtualization][:system].upcase.strip rescue nil
else
@os[:dmi][:system][:manufacturer].strip rescue nil
end
end
def _product_name
unless _is_virtual
@os[:dmi][:system][:product_name].strip rescue nil
end
end
def _serial
@os[:dmi][:system][:serial_number].strip rescue nil
end
def _system_info
{
:manufacturer => _manufacturer,
:serial => _serial,
:product => _product_name,
:family => (@os[:dmi][:system][:family].strip rescue nil),
:version => (@os[:dmi][:system][:version].strip rescue nil),
:fqdn => (@os[:fqdn].strip rescue @os[:hostname].strip rescue nil),
}.delete_if { |key, value| value.nil? or value.empty? or value == "Not Specified" }
end
def _size(size, unit)
case unit
when /^kb$/i
size * 1024
when /^mb$/i
size * 1048576
when /^gb$/i
size * 1073741824
end
end
def _dmi_memory
dmi = `/usr/sbin/dmidecode`
info = {:devices => [], :total => 0, :maximum_capacity => 0, :slots => 0}
return nil if $?.to_i != 0
dmi.split(/\n\n/).each do |group|
if /^Physical Memory Array$/.match(group)
if /^\s*Maximum Capacity:\s+(\d+)\s+(mb|gb|kb)/i.match(group)
info[:maximum_capacity] += _size($1.to_i, $2)
end
if /^\s*Number Of Devices:\s+(\d+)/i.match(group)
info[:slots] += $1.to_i
end
elsif /^Memory Device$/.match(group)
device_info = {}
if /^\s*Size:\s+(\d+)\s+(mb|gb|kb)/i.match(group)
size = _size($1.to_i, $2)
device_info[:size] = size
info[:total] += size
else
next
end
if /^\s*Speed:\s+(\d+)\s+MHz/i.match(group)
device_info[:frequency] = $1.to_i
end
if /^\s*Type:\s+(.*?)$/i.match(group)
device_info[:type] = $1
end
#if /^\s*Locator:\s+(.*?)$/i.match(group)
# device_info[:locator] = $1
#end
info[:devices].push(device_info)
end
end
if info[:total] == 0
nil
else
info
end
end
def _ohai_memory
info = {}
size = @os['memory']['total'].gsub(/(kb|mb|gb)$/i, "").to_i rescue (return nil)
info[:total] = _size(size, $1)
info
end
def _master_ip detailed_data
detailed_data.each do |k, v|
if k.to_s =~ /^interfaces$/
detailed_data[k].each do |i|
begin
net = IPAddr.new "#{i[:ip]}/#{i[:netmask]}"
return i[:ip] if net.include? @api_ip
rescue
end
end
end
end
nil
end
def _master_mac detailed_data
detailed_data.each do |k, v|
if k.to_s =~ /^interfaces$/
detailed_data[k].each do |i|
begin
net = IPAddr.new "#{i[:ip]}/#{i[:netmask]}"
return i[:mac] if net.include? @api_ip
rescue
end
end
end
end
nil
end
def _data
res = {
:mac => (@os[:macaddress] rescue nil),
:ip => (@os[:ipaddress] rescue nil),
:os_platform => (@os[:platform] rescue nil)
}
begin
detailed_data = _detailed
res.merge!({
:ip => ((_master_ip detailed_data or @os[:ipaddress]) rescue nil),
:mac => ((_master_mac detailed_data or @os[:mac]) rescue nil),
:manufacturer => _manufacturer,
:platform_name => _product_name,
:meta => detailed_data
})
rescue Exception => e
@logger.error("Error '#{e.message}' in metadata calculation: #{e.backtrace}")
end
res[:status] = @node_state if @node_state
res[:is_agent] = true
res
end
def update_state
@node_state = nil
if File.exist?("/etc/nailgun_systemtype")
fl = File.open("/etc/nailgun_systemtype", "r")
system_type = fl.readline.rstrip
@node_state = "discover" if system_type == "bootstrap"
end
end
end
def write_data_to_file(logger, filename, data)
if File.exist?(filename)
File.open(filename, 'r') do |fo|
text = fo.read
end
else
text = ''
end
if text != data
begin
File.open(filename, 'w') do |fo|
fo.write(data)
end
logger.info("Wrote data to file '#{filename}'. Data: #{data}")
rescue Exception => e
logger.warning("Can't write data to file '#{filename}'. Reason: #{e.message}")
end
else
logger.info("File '#{filename}' is up to date.")
end
end
logger = Logger.new(STDOUT)
logger.level = Logger::DEBUG
if File.exist?('/var/run/nodiscover')
logger.info("Discover prevented by /var/run/nodiscover presence.")
exit 1
end
begin
logger.info("Trying to load agent config #{AGENT_CONFIG}")
url = YAML.load_file(AGENT_CONFIG)['url']
logger.info("Obtained service url from config file: '#{url}'")
rescue Exception => e
logger.info("Could not get url from configuration file: #{e.message}, trying other ways..")
end
sleep_time = rand(30)
logger.debug("Sleep for #{sleep_time} seconds before sending request")
sleep(sleep_time)
agent = NodeAgent.new(logger, url)
agent.update_state
begin
post_res = agent.post
if post_res.status == 409
put_res = agent.put
new_id = JSON.parse(put_res.body)[0]['id']
elsif post_res.status == 201
new_id = JSON.parse(post_res.body)['id']
else
logger.error post_res.body
exit 1
end
mc_config = McollectiveConfig.new(logger)
mc_config.replace_identity(new_id)
write_data_to_file(logger, '/etc/nailgun_uid', new_id.to_s)
rescue => ex
# NOTE(mihgen): There is no need to retry - cron will do it for us
logger.error "#{ex.message}\n#{ex.backtrace}"
end