fuel-web/bin/agent
2015-07-09 08:50:09 +00:00

676 lines
21 KiB
Ruby
Executable File

#!/usr/bin/env ruby
# Copyright 2013 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
begin
require 'rubygems'
rescue LoadError
end
require 'ohai/system'
require 'json'
require 'httpclient'
require 'logger'
require 'optparse'
require 'yaml'
require 'ipaddr'
require 'rethtool'
require 'digest'
require 'timeout'
unless Process.euid == 0
puts "You must be root"
exit 1
end
ENV['PATH'] = "/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"
AGENT_CONFIG = "/etc/nailgun-agent/config.yaml"
# look at https://github.com/torvalds/linux/blob/master/Documentation/devices.txt
# KVM virtio volumes has code 252 in CentOS, but 253 in Ubuntu
# Please also update the device codes here
# https://github.com/stackforge/fuel-astute/blob/master/mcagents/erase_node.rb#L81
STORAGE_CODES = [3, 8, 65, 66, 67, 68, 69, 70, 71, 104, 105, 106, 107, 108, 109, 110, 111, 202, 252, 253]
REMOVABLE_VENDORS = [
"Adaptec", "IBM", "ServeRA",
]
def digest(body)
if body.is_a? Hash
digest body.map { |k,v| [digest(k),digest(v)].join("=>") }.sort
elsif body.is_a? Array
body.map{ |v| digest v }.join('|')
else
[body.class.to_s, body.to_s].join(":")
end
end
def createsig(body)
Digest::SHA1.hexdigest( digest body )
end
class McollectiveConfig
def initialize(logger)
@logger = logger
@configfile = '/etc/mcollective/server.cfg'
end
def get_config_by_key(find_key)
found_key = nil
found_value = nil
# This code is from mcollective's sources
File.open(@configfile, "r").each do |line|
# strip blank spaces, tabs etc off the end of all lines
line.gsub!(/\s*$/, "")
unless line =~ /^#|^$/
if line =~ /(.+?)\s*=\s*(.+)/
key = $1
val = $2
if key == find_key
found_key = key
found_value = val
end
end
end
end
found_value if found_key
end
def replace_identity(new_id)
# check if id complies reqs
raise 'Identities can only match /\w\.\-/' unless new_id.to_s.match(/^[\w\.\-]+$/)
value_from_config = get_config_by_key('identity')
if value_from_config == new_id.to_s
@logger.info "MCollective is up to date with identity = #{new_id}"
else
config = File.open(@configfile, "rb").read
if value_from_config
# Key found, but it has other value
@logger.info "Replacing identity in mcollective server.cfg to new value = '#{new_id}'"
config.gsub!(/^identity[ =].*$/, "identity = #{new_id}")
File.open(@configfile, "w") { |f| f.write(config) }
else # if key was not found
config += "\nidentity = #{new_id}\n"
@logger.info "Identity in mcollective server.cfg has not been found. Setting to '#{new_id}'"
File.open(@configfile, "w") { |f| f.write(config) }
end
puts `service mcollective restart`
end
end
end
class Offloading
def initialize(name, sub)
@name, @sub = name, sub
end
def to_json(options = {})
{'name' => @name, 'state' => nil, 'sub' => @sub}.to_json()
end
end
class NodeAgent
def initialize(logger, url=nil)
@logger = logger
@api_default_address = "localhost"
@api_default_port = "8000"
@api_url = url
if @api_url
@api_url.chomp!('/')
@api_ip = @api_url.match(/\bhttp:\/\/((\d{1,3}\.){3}\d{1,3})/)[1]
else
begin
cmdline = ::File.read("/proc/cmdline")
@api_ip = cmdline.match(/\burl=http:\/\/((\d{1,3}\.){3}\d{1,3})/)[1]
@logger.info("Found admin node IP address in kernel cmdline: #{@api_ip}")
rescue
@logger.info("Can't get API url from /proc/cmdline. Will use localhost.")
@api_ip = "127.0.0.1"
end
@api_url = "http://#{@api_ip}:#{@api_default_port}/api"
end
@os = ohai_system_info
end
def ohai_system_info
Timeout::timeout(30) do
os = Ohai::System.new()
os.all_plugins
os
end
rescue Timeout::Error
# When one of disks is broken, do not collect data about block devices
# More details: https://bugs.launchpad.net/fuel/+bug/1396086
Ohai::Config[:disabled_plugins]=['linux::block_device', 'linux::filesystem']
os = Ohai::System.new()
os.all_plugins
os
end
def put
headers = {"Content-Type" => "application/json"}
@logger.debug("Trying to put host info into #{@api_url}")
res = htclient.put("#{@api_url}/nodes/agent/", _data.to_json, headers)
@logger.debug("Response: status: #{res.status} body: #{res.body}")
if res.status < 200 or res.status >= 400
@logger.error("HTTP PUT failed: #{res.inspect}")
end
res
end
def post
headers = {"Content-Type" => "application/json"}
@logger.debug("Trying to create host using #{@api_url}")
res = htclient.post("#{@api_url}/nodes/", _data.to_json, headers)
@logger.debug("Response: status: #{res.status} body: #{res.body}")
res
end
def htclient
client = HTTPClient.new
client.connect_timeout = 10
client.send_timeout = 10
client.receive_timeout = 10 # (mihgen): Nailgun may hang for a while, but 10sec should be enough for him to respond
client
end
def _interfaces
interfaces = @os[:network][:interfaces].inject([]) do |result, elm|
result << { :name => elm[0], :addresses => elm[1]["addresses"] }
end
interfaces << { "default_interface" => @os["network"]["default_interface"] }
interfaces << { "default_gateway" => @os["network"]["default_gateway"] }
interfaces
end
# transform input array into array of the objects
# Example:
# [{
# "state":null,
# "sub":[
# {
# "state":null,
# "sub":[],
# "name":"tx-checksum-ipv6"
# },
# ...........
# ],
# "name":"tx-checksumming"
# },
# {
# "state":null,
# "sub":[],
# "name":"generic-segmentation-offload"
# },
# .............
# ]
def _parse_offloading(offloading_arr)
return [] if offloading_arr.empty?
inner = []
current = offloading_arr.shift()
while offloading_arr.any? && offloading_arr.first().start_with?("\t") do
inner << offloading_arr.shift()[1..-1]
end
res = _parse_offloading(offloading_arr)
res << Offloading.new(current, _parse_offloading(inner))
end
def _detailed
detailed_meta = {
:system => _system_info,
:interfaces => [],
:cpu => {
:total => (@os[:cpu][:total].to_i rescue nil),
:real => (@os[:cpu][:real].to_i rescue nil),
:spec => [],
},
:disks => [],
:memory => (_dmi_memory or _ohai_memory),
}
admin_mac = _master_ip_and_mac[:mac] rescue nil
begin
(@os[:network][:interfaces] or {} rescue {}).each do |int, intinfo|
# Send info about physical interfaces only
next if intinfo[:type] !~ /^eth.*/
# Exception: eth0.0(example) have "type" => "eth" but it is not physical interface
next if int =~ /\d+\.\d+$/ or int =~ /vlan\d+$/
# Remove interfaces like eth0.101-hapr, eth1-hapr
next if int =~ /\d+-.+/
int_meta = {:name => int}
int_meta[:state] = intinfo[:state]
(intinfo[:addresses] or {} rescue {}).each do |addr, addrinfo|
if (addrinfo[:family] rescue nil) =~ /lladdr/
int_meta[:mac] = addr
int_meta[:pxe] = admin_mac == int_meta[:mac]
begin
int_info = Rethtool::InterfaceSettings.new(int)
int_meta[:driver] = int_info.driver
int_meta[:bus_info] = int_info.bus_info
int_meta[:max_speed] = int_info.best_mode.speed
if int_info.current_mode.speed == :unknown
int_meta[:current_speed] = nil
else
int_meta[:current_speed] = int_info.current_mode.speed
end
rescue
int_meta[:current_speed] = nil
end
elsif (addrinfo[:family] rescue nil) =~ /^inet$/
int_meta[:ip] = addr
int_meta[:netmask] = addrinfo[:netmask] if addrinfo[:netmask]
end
end
begin
# this stuff will put all non-fixed offloading mode into array
# collect names of non-fixed offloading modes
# Example of ethtool -k ethX output:
# tx-checksumming: on
# tx-checksum-ipv4: on
# tx-checksum-ip-generic: off [fixed]
# tx-checksum-ipv6: on
# tx-checksum-fcoe-crc: off [fixed]
# tx-checksum-sctp: on
# scatter-gather: on
# tx-scatter-gather: on
# tx-scatter-gather-fraglist: off [fixed]
# generic-segmentation-offload: on
offloading_data = `ethtool -k #{int}`.split("\n").reject { |offloading|
offloading.include?("Features for") ||
offloading.include?("fixed")
}.map { |offloading|
offloading.split(':')[0]
}
# transform raw data into array of objects
int_meta[:offloading_modes] = _parse_offloading(offloading_data)
rescue
# in case if we have no `ethtool` package installed we should
# return empty array to support nailgun's rest api call
int_meta[:offloading_modes] = []
end
detailed_meta[:interfaces] << int_meta
end
rescue Exception => e
@logger.error("Error '#{e.message}' in gathering interfaces metadata: #{e.backtrace}")
end
begin
(@os[:cpu] or {} rescue {}).each do |cpu, cpuinfo|
if cpu =~ /^[\d]+/ and cpuinfo
frequency = cpuinfo[:mhz].to_i rescue nil
begin
# ohai returns current frequency, try to get max if possible
max_frequency = `cat /sys/devices/system/cpu/cpu#{cpu}/cpufreq/cpuinfo_max_freq 2>/dev/null`.to_i / 1000
frequency = max_frequency if max_frequency > 0
rescue
end
detailed_meta[:cpu][:spec] << {
:frequency => frequency,
:model => (cpuinfo[:model_name].gsub(/ +/, " ") rescue nil)
}
end
end
rescue Exception => e
@logger.error("Error '#{e.message}' in gathering cpu metadata: #{e.backtrace}")
end
begin
Timeout::timeout(30) do
@logger.debug("Trying to find block devices")
(@os[:block_device] or {} rescue {}).each do |bname, binfo|
@logger.debug("Found block device: #{bname}")
@logger.debug("Block device info: #{binfo.inspect}")
if physical_data_storage_devices.map{|d| d[:name]}.include?(bname) && binfo
@logger.debug("Block device seems to be physical data storage: #{bname}")
block = physical_data_storage_devices.select{|d| d[:name] == bname}[0]
if block[:removable] =~ /^1$/ && ! REMOVABLE_VENDORS.include?(binfo[:vendor])
next
end
dname = bname.gsub(/!/, '/')
# 512 bytes is the size of one sector by default
block_size = 512
fn = "/sys/block/#{bname}/queue/logical_block_size"
block_size = File.read(fn).to_i if File.exist? fn
block_size = 512 if block_size == 0
detailed_meta[:disks] << {
:name => dname,
:model => binfo[:model],
:size => (binfo[:size].to_i * block_size),
:disk => block[:disk],
:extra => block[:extra],
:removable => block[:removable]
}
end
end
@logger.debug("Detailed meta disks: #{detailed_meta[:disks].inspect}")
end
rescue Exception => e
@logger.error("Error '#{e.message}' in gathering disks metadata: #{e.backtrace}")
end
detailed_meta
end
def _disk_id_by_name(name)
dn = "/dev/disk/by-id"
basepath = Dir["#{dn}/**?"].select{|f| /\/#{name}$/.match(File.readlink(f))}
basepath.map{|p| p.split("/")[2..-1].join("/")}
end
def _disk_path_by_name(name)
dn = "/dev/disk/by-path"
basepath = Dir["#{dn}/**?"].find{|f| /\/#{name}$/.match(File.readlink(f))}
basepath.split("/")[2..-1].join("/") if basepath
end
def physical_data_storage_devices
@blocks ||= []
return @blocks unless @blocks.empty?
@logger.debug("Trying to get list of physical devices")
raise "Path /sys/block does not exist" unless File.exists?("/sys/block")
Dir["/sys/block/*"].each do |block_device_dir|
basename_dir = File.basename(block_device_dir)
# Entries in /sys/block for cciss look like cciss!c0d1 while
# the entries in /dev look like /dev/cciss/c0d1. udevadm uses
# the entry in /dev so we need to replace the ! to get a valid
# device name.
devname = basename_dir.gsub(/!/, '/')
@logger.debug("Getting udev properties for device: #{devname}")
properties = `udevadm info --query=property --export --name=#{devname}`.split("\n").inject({}) do |result, raw_propety|
key, value = raw_propety.split(/\=/)
result.update(key.strip => value.strip.chomp("'").reverse.chomp("'").reverse)
end
@logger.debug("Device #{devname} udev properties: #{properties.inspect}")
@logger.debug("Trying to find out if device #{devname} is removable or not")
if File.exists?("/sys/block/#{basename_dir}/removable")
removable = File.open("/sys/block/#{basename_dir}/removable"){ |f| f.read_nonblock(1024).strip }
end
@logger.debug("Device #{devname} removable parameter: #{removable.inspect}")
if STORAGE_CODES.include?(properties['MAJOR'].to_i)
@logger.debug("Device #{devname} seems to be appropriate")
# Exclude LVM volumes (in CentOS - 253, in Ubuntu - 252) using additional check
unless properties['DEVPATH'].include?('virtual')
@blocks << {
:name => basename_dir,
:disk => _disk_path_by_name(devname) || devname,
:extra => _disk_id_by_name(devname) || [],
:removable => removable,
}
end
end
end
@logger.debug("Final list of physical devices is: #{@blocks.inspect}")
@blocks
end
def _is_virtualbox
@os[:dmi][:system][:product_name] == "VirtualBox" rescue false
end
def _is_virtual
_is_virtualbox or @os[:virtualization][:role] == "guest" rescue false
end
def _manufacturer
if _is_virtualbox
@os[:dmi][:system][:product_name] rescue nil
elsif _is_virtual
@os[:virtualization][:system].upcase.strip rescue nil
else
@os[:dmi][:system][:manufacturer].strip rescue nil
end
end
def _product_name
unless _is_virtual
@os[:dmi][:system][:product_name].strip rescue nil
end
end
def _serial
@os[:dmi][:system][:serial_number].strip rescue nil
end
# Returns unique identifier of machine
# * for kvm virtual node will contain virsh UUID
# * for physical HW that would be unique chassis id (from BIOS settings)
# * for other hypervizors - not tested
def uuid
node_uuid = @os.data.fetch(:dmi, {}).fetch(:system, {}).fetch(:uuid, nil)
node_uuid && node_uuid.strip
end
def _system_info
{
:manufacturer => _manufacturer,
:serial => _serial,
:uuid => uuid,
:product => _product_name,
:family => (@os[:dmi][:system][:family].strip rescue nil),
:version => (@os[:dmi][:system][:version].strip rescue nil),
:fqdn => (@os[:fqdn].strip rescue @os[:hostname].strip rescue nil),
}.delete_if { |key, value| value.nil? or value.empty? or value == "Not Specified" }
end
def _size(size, unit)
case unit
when /^kb$/i
size * 1024
when /^mb$/i
size * 1048576
when /^gb$/i
size * 1073741824
end
end
def _dmi_memory
dmi = `/usr/sbin/dmidecode`
info = {:devices => [], :total => 0, :maximum_capacity => 0, :slots => 0}
return nil if $?.to_i != 0
dmi.split(/\n\n/).each do |group|
if /^Physical Memory Array$/.match(group)
if /^\s*Maximum Capacity:\s+(\d+)\s+(mb|gb|kb)/i.match(group)
info[:maximum_capacity] += _size($1.to_i, $2)
end
if /^\s*Number Of Devices:\s+(\d+)/i.match(group)
info[:slots] += $1.to_i
end
elsif /^Memory Device$/.match(group)
device_info = {}
if /^\s*Size:\s+(\d+)\s+(mb|gb|kb)/i.match(group)
size = _size($1.to_i, $2)
device_info[:size] = size
info[:total] += size
else
next
end
if /^\s*Speed:\s+(\d+)\s+MHz/i.match(group)
device_info[:frequency] = $1.to_i
end
if /^\s*Type:\s+(.*?)$/i.match(group)
device_info[:type] = $1
end
#if /^\s*Locator:\s+(.*?)$/i.match(group)
# device_info[:locator] = $1
#end
info[:devices].push(device_info)
end
end
if info[:total] == 0
nil
else
info
end
end
def _ohai_memory
info = {}
size = @os['memory']['total'].gsub(/(kb|mb|gb)$/i, "").to_i rescue (return nil)
info[:total] = _size(size, $1)
info
end
def _master_ip_and_mac
@os[:network][:interfaces].each do |_, intinfo|
next unless intinfo.has_key?(:addresses)
intinfo[:addresses].each do |k, v|
# Here we need to check family because IPAddr.new with bad
# data works very slow on some environments
# https://bugs.launchpad.net/fuel/+bug/1284571
if v[:family] == 'inet' && !(IPAddr.new(k) rescue nil).nil?
net = IPAddr.new("#{k}/#{v[:netmask]}")
if net.include? @api_ip
mac = intinfo[:addresses].find { |_, info| info[:family] == 'lladdr' }[0]
return {:ip => k, :mac => mac}
end
end
end
end
{}
end
def _data
res = {
:mac => (@os[:macaddress] rescue nil),
:ip => (@os[:ipaddress] rescue nil),
:os_platform => (@os[:platform] rescue nil),
}
begin
detailed_data = _detailed
master_data=_master_ip_and_mac
res.merge!({
:ip => (( master_data[:ip] or @os[:ipaddress]) rescue nil),
:mac => (( master_data[:mac] or @os[:macaddress]) rescue nil),
:manufacturer => _manufacturer,
:platform_name => _product_name,
:meta => detailed_data
})
rescue Exception => e
@logger.error("Error '#{e.message}' in metadata calculation: #{e.backtrace}")
end
res[:status] = @node_state if @node_state
res[:is_agent] = true
res[:agent_checksum] = createsig(res)
res
end
def update_state
@node_state = nil
if File.exist?("/etc/nailgun_systemtype")
fl = File.open("/etc/nailgun_systemtype", "r")
system_type = fl.readline.rstrip
@node_state = "discover" if system_type == "bootstrap"
end
end
end
def write_data_to_file(logger, filename, data)
if File.exist?(filename)
File.open(filename, 'r') do |fo|
text = fo.read
end
else
text = ''
end
if text != data
begin
File.open(filename, 'w') do |fo|
fo.write(data)
end
logger.info("Wrote data to file '#{filename}'. Data: #{data}")
rescue Exception => e
logger.warning("Can't write data to file '#{filename}'. Reason: #{e.message}")
end
else
logger.info("File '#{filename}' is up to date.")
end
end
logger = Logger.new(STDOUT)
if File.exist?('/etc/nailgun_uid')
logger.level = Logger::INFO
else
logger.level = Logger::DEBUG
end
# random sleep is here to prevent target nodes
# from reporting to master node all at once
sleep_time = rand(30)
logger.debug("Sleep for #{sleep_time} seconds before sending request")
sleep(sleep_time)
if File.exist?('/etc/nailgun-agent/nodiscover')
logger.info("Discover prevented by /etc/nailgun-agent/nodiscover presence.")
exit 1
end
begin
logger.info("Trying to load agent config #{AGENT_CONFIG}")
url = YAML.load_file(AGENT_CONFIG)['url']
logger.info("Obtained service url from config file: '#{url}'")
rescue Exception => e
logger.info("Could not get url from configuration file: #{e.message}, trying other ways..")
end
agent = NodeAgent.new(logger, url)
agent.update_state
begin
unless File.exist?('/etc/nailgun_uid')
resp = agent.post
# We must not log 409 as error, after node is provisioned there will be no
# /etc/nailgun_uid, it will be created after put request
if [409, 403].include? resp.status
resp = agent.put
end
else
resp = agent.put
# Handle case when node was removed, but nailgun_uid exist
if resp.status == 400
resp = agent.post
end
end
unless [201, 200].include? resp.status
logger.error resp.body
exit 1
end
new_id = JSON.parse(resp.body)['id']
mc_config = McollectiveConfig.new(logger)
mc_config.replace_identity(new_id)
write_data_to_file(logger, '/etc/nailgun_uid', new_id.to_s)
rescue => ex
# NOTE(mihgen): There is no need to retry - cron will do it for us
logger.error "#{ex.message}\n#{ex.backtrace}"
end