fuel-nailgun-agent/agent

1001 lines
34 KiB
Ruby
Executable File

#!/usr/bin/env ruby
# Copyright 2013 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
begin
require 'rubygems'
rescue LoadError
end
require 'ohai/system'
require 'json'
require 'httpclient'
require 'logger'
require 'optparse'
require 'yaml'
require 'ipaddr'
require 'rethtool'
require 'digest'
require 'timeout'
require 'uri'
# TODO(vsharshov): replace below lines by this string after excluding Ruby 1.8
require 'pathname'
require 'rexml/document'
include REXML
unless Process.euid == 0
puts "You must be root"
exit 1
end
ENV['PATH'] = "/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"
AGENT_CONFIG = "/etc/nailgun-agent/config.yaml"
# look at https://github.com/torvalds/linux/blob/master/Documentation/devices.txt
# KVM virtio volumes has code 252 in CentOS, but 253 in Ubuntu
# Please also update the device codes here
# https://github.com/stackforge/fuel-astute/blob/master/mcagents/erase_node.rb#L81
# NVMe has code 259
STORAGE_CODES = [3, 8, 9, 65, 66, 67, 68, 69, 70, 71, 104, 105, 106, 107, 108, 109, 110, 111, 202, 252, 253, 259]
REMOVABLE_VENDORS = [
"Adaptec", "IBM", "ServeRA",
]
# PCI vendor IDs for Adaptec
REMOVABLE_PCI_VENDORS = [
"0x1044", "0x9004", "0x9005",
]
# Set default data structure for SR-IOV
DEFAULT_SRIOV = {
"sriov_totalvfs" => 0,
"available" => false,
"pci_id" => ""
}
def digest(body)
if body.is_a? Hash
digest body.map { |k,v| [digest(k),digest(v)].join("=>") }.sort
elsif body.is_a? Array
body.map{ |v| digest v }.join('|')
else
[body.class.to_s, body.to_s].join(":")
end
end
def createsig(body)
Digest::SHA1.hexdigest( digest body )
end
class McollectiveConfig
def initialize(logger)
@logger = logger
@configfile = '/etc/mcollective/server.cfg'
end
def get_config_by_key(find_key)
found_key = nil
found_value = nil
# This code is from mcollective's sources
File.open(@configfile, "r").each do |line|
# strip blank spaces, tabs etc off the end of all lines
line.gsub!(/\s*$/, "")
unless line =~ /^#|^$/
if line =~ /(.+?)\s*=\s*(.+)/
key = $1
val = $2
if key == find_key
found_key = key
found_value = val
end
end
end
end
found_value if found_key
end
def replace_identity(new_id)
# check if id complies reqs
raise 'Identities can only match /\w\.\-/' unless new_id.to_s.match(/^[\w\.\-]+$/)
value_from_config = get_config_by_key('identity')
if value_from_config == new_id.to_s
@logger.info "MCollective is up to date with identity = #{new_id}"
else
config = File.open(@configfile, "rb").read
if value_from_config
# Key found, but it has other value
@logger.info "Replacing identity in mcollective server.cfg to new value = '#{new_id}'"
config.gsub!(/^identity[ =].*$/, "identity = #{new_id}")
File.open(@configfile, "w") { |f| f.write(config) }
else # if key was not found
config += "\nidentity = #{new_id}\n"
@logger.info "Identity in mcollective server.cfg has not been found. Setting to '#{new_id}'"
File.open(@configfile, "w") { |f| f.write(config) }
end
puts `service mcollective restart`
end
end
end
class Offloading
def initialize(name, sub)
@name, @sub = name, sub
end
def to_json(options = {})
{'name' => @name, 'state' => nil, 'sub' => @sub}.to_json()
end
end
class NodeAgent
API_DEFAULT_ADDRESS = "localhost"
API_DEFAULT_PORT = "8443"
API_LEGACY_PORT = "8000"
def initialize(logger)
@logger = logger
@settings = get_settings()
@api_ip = URI(@settings['url']).host or API_DEFAULT_ADDRESS
scheme, api_port = get_scheme_and_port()
@api_url = "#{scheme}://#{@api_ip}:#{api_port}/api"
@logger.info("API URL is #{@api_url}")
@os = ohai_system_info
@numa_topology = get_numa_topology
end
def get_scheme_and_port
scheme, api_port = nil
begin
res = htclient.get("https://#{@api_ip}:#{API_DEFAULT_PORT}/")
scheme, api_port = "https", API_DEFAULT_PORT
rescue Errno::ECONNREFUSED
@logger.warn("Connection Refused catched when trying connect to HTTPS port. Use plain HTTP")
scheme, api_port = "http", API_LEGACY_PORT
end
return scheme, api_port
end
# transform string into Dictionary
# For example, line: "initrd=/images/bootstrap/initramfs.img ksdevice=bootif lang="
# will be transformed into: {"mco_user"=>"mcollective", "initrd"=>"/images/bootstrap/initramfs.img", "lang"=>nil}
def string_to_hash(string)
hash = Hash.new
string.split(' ').each do |pair|
key,value = pair.split(/=/, 2)
hash[key] = value
end
hash
end
def get_settings
agent_settings = YAML.load_file(AGENT_CONFIG) rescue {}
cmdline_settings = string_to_hash(File.read("/proc/cmdline")) rescue {}
agent_settings.merge(cmdline_settings)
end
def ohai_system_info
Timeout::timeout(30) do
os = Ohai::System.new()
os.all_plugins
os
end
rescue Timeout::Error
# When one of disks is broken, do not collect data about block devices
# More details: https://bugs.launchpad.net/fuel/+bug/1396086
Ohai::Config[:disabled_plugins]=['linux::block_device', 'linux::filesystem']
os = Ohai::System.new()
os.all_plugins
os
end
def put
headers = {"Content-Type" => "application/json"}
@logger.debug("Trying to put host info into #{@api_url}")
res = htclient.put("#{@api_url}/nodes/agent/", _data.to_json, headers)
@logger.debug("Response: status: #{res.status} body: #{res.body}")
if res.status < 200 or res.status >= 400
@logger.error("HTTP PUT failed: #{res.inspect}")
end
res
end
def post
headers = {"Content-Type" => "application/json"}
@logger.debug("Trying to create host using #{@api_url}")
res = htclient.post("#{@api_url}/nodes/", _data.to_json, headers)
@logger.debug("Response: status: #{res.status} body: #{res.body}")
res
end
def htclient
client = HTTPClient.new
client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
client.ssl_config.ssl_version = :TLSv1
client.connect_timeout = 10
client.send_timeout = 10
client.receive_timeout = 10 # (mihgen): Nailgun may hang for a while, but 10sec should be enough for him to respond
client
end
def _interfaces
interfaces = @os[:network][:interfaces].inject([]) do |result, elm|
result << { :name => elm[0], :addresses => elm[1]["addresses"] }
end
interfaces << { "default_interface" => @os["network"]["default_interface"] }
interfaces << { "default_gateway" => @os["network"]["default_gateway"] }
interfaces
end
# transform input array into array of the objects
# Example:
# [{
# "state":null,
# "sub":[
# {
# "state":null,
# "sub":[],
# "name":"tx-checksum-ipv6"
# },
# ...........
# ],
# "name":"tx-checksumming"
# },
# {
# "state":null,
# "sub":[],
# "name":"generic-segmentation-offload"
# },
# .............
# ]
def _parse_offloading(offloading_arr)
return [] if offloading_arr.empty?
inner = []
current = offloading_arr.shift()
while offloading_arr.any? && offloading_arr.first().start_with?("\t") do
inner << offloading_arr.shift()[1..-1]
end
res = _parse_offloading(offloading_arr)
res << Offloading.new(current, _parse_offloading(inner))
end
# Gets information about SR-IOV for specified pci slot
# using 'lspci' utility. Example of output to parse:
# ...
# Capabilities: [160 v1] Single Root I/O Virtualization (SR-IOV)
# IOVCap: Migration-, Interrupt Message Number: 000
# IOVCtl: Enable- Migration- Interrupt- MSE- ARIHierarchy-
# IOVSta: Migration-
# Initial VFs: 8, Total VFs: 8, Number of VFs: 0, Function Dependency Link: 01
# VF offset: 128, stride: 4, Device ID: 1520
# Supported Page Size: 00000553, System Page Size: 00000001
# Region 0: Memory at 0000000090040000 (64-bit, prefetchable)
# Region 3: Memory at 0000000090060000 (64-bit, prefetchable)
# VF Migration: offset: 00000000, BIR: 0
# ...
def sriov_info(int, int_bus_info)
sriov = DEFAULT_SRIOV.dup
lspci = _get_lspci_info(int_bus_info)
if lspci.match(/.*Capabilities:.*SR-IOV.*/)
sriov["available"] = true
sriov["sriov_totalvfs"] = lspci.scan(/\s+Total\s+VFs:\s+(\d+)/).last.first.to_i - 1
vf_vendor = File.read("/sys/class/net/#{int}/device/vendor").chomp.gsub(/^0x/, '')
vf_device = lspci.scan(/VF\s+.*\s+Device\s+ID:\s+(\d+)/).last.first
sriov["pci_id"] = "#{vf_vendor}:#{vf_device}"
end
sriov
rescue
DEFAULT_SRIOV
end
def _detailed
detailed_meta = {
:system => _system_info,
:interfaces => [],
:cpu => {
:total => (@os[:cpu][:total].to_i rescue nil),
:real => (@os[:cpu][:real].to_i rescue nil),
:spec => [],
},
:disks => [],
:memory => (_dmi_memory or _ohai_memory),
:pci_devices => _get_pci_dev_list,
:numa_topology => @numa_topology,
}
admin_mac = (_master_ip_and_mac[:mac] or @os[:macaddress]) rescue nil
begin
(@os[:network][:interfaces] or {} rescue {}).each do |int, intinfo|
# Send info about physical interfaces only
next if intinfo[:encapsulation] !~ /^Ethernet.*/
# Avoid virtual devices like loopback, tunnels, bonding, vlans ...
# TODO(vsharshov): replace below lines by this string after excluding Ruby 1.8
# next if File.realpath("/sys/class/net/#{int}") =~ /virtual/
next if Pathname.new("/sys/class/net/#{int}").realpath.to_s =~ /virtual/
# Avoid wireless
next if File.exist?("/sys/class/net/#{int}/phy80211") ||
File.exist?("/sys/class/net/#{int}/wireless")
# Skip virtual functions
next if File.exists?("/sys/class/net/#{int}/device/physfn")
int_meta = {:name => int}
int_meta[:interface_properties] = {}
int_meta[:state] = intinfo[:state]
(intinfo[:addresses] or {} rescue {}).each do |addr, addrinfo|
if (addrinfo[:family] rescue nil) =~ /lladdr/
# Get original mac excluding case with empty EEPROM data
perm_addr = `ethtool -P #{int}`
begin
re = eval '/(?<=Permanent address: )(?!00(:00){5}).+/'
rescue SyntaxError
re = perm_addr.match(/(00(:00){5})+/).nil? ? /[0-9a-f]+(:[0-9a-f]+){5}$/ : nil
end
int_meta[:mac] = perm_addr.match(re)[0] rescue addr
int_meta[:pxe] = admin_mac == int_meta[:mac]
begin
int_info = Rethtool::InterfaceSettings.new(int)
int_meta[:driver] = int_info.driver
int_meta[:bus_info] = int_info.bus_info
int_meta[:max_speed] = int_info.best_mode.speed
if int_info.current_mode.speed == :unknown
int_meta[:current_speed] = nil
else
int_meta[:current_speed] = int_info.current_mode.speed
end
rescue
int_meta[:current_speed] = nil
end
elsif (addrinfo[:family] rescue nil) =~ /^inet$/
int_meta[:ip] = addr
int_meta[:netmask] = addrinfo[:netmask] if addrinfo[:netmask]
end
end
begin
# this stuff will put all non-fixed offloading mode into array
# collect names of non-fixed offloading modes
# Example of ethtool -k ethX output:
# tx-checksumming: on
# tx-checksum-ipv4: on
# tx-checksum-ip-generic: off [fixed]
# tx-checksum-ipv6: on
# tx-checksum-fcoe-crc: off [fixed]
# tx-checksum-sctp: on
# scatter-gather: on
# tx-scatter-gather: on
# tx-scatter-gather-fraglist: off [fixed]
# generic-segmentation-offload: on
offloading_data = `ethtool -k #{int}`.split("\n").reject { |offloading|
offloading.include?("Features for") ||
offloading.include?("fixed")
}.map { |offloading|
offloading.split(':')[0]
}
# transform raw data into array of objects
int_meta[:offloading_modes] = _parse_offloading(offloading_data)
rescue
# in case if we have no `ethtool` package installed we should
# return empty array to support nailgun's rest api call
int_meta[:offloading_modes] = []
end
# Getting SR-IOV info
int_meta[:interface_properties][:sriov] = sriov_info(int, int_meta[:bus_info])
detailed_meta[:interfaces] << int_meta
end
rescue Exception => e
@logger.error("Error '#{e.message}' in gathering interfaces metadata: #{e.backtrace}")
end
begin
(@os[:cpu] or {} rescue {}).each do |cpu, cpuinfo|
if cpu =~ /^[\d]+/ and cpuinfo
frequency = cpuinfo[:mhz].to_i rescue nil
begin
# ohai returns current frequency, try to get max if possible
max_frequency = `cat /sys/devices/system/cpu/cpu#{cpu}/cpufreq/cpuinfo_max_freq 2>/dev/null`.to_i / 1000
frequency = max_frequency if max_frequency > 0
rescue
end
detailed_meta[:cpu][:spec] << {
:frequency => frequency,
:model => (cpuinfo[:model_name].gsub(/ +/, " ") rescue nil)
}
end
end
rescue Exception => e
@logger.error("Error '#{e.message}' in gathering cpu metadata: #{e.backtrace}")
end
begin
Timeout::timeout(30) do
@logger.debug("Trying to find block devices")
(@os[:block_device] or {} rescue {}).each do |bname, binfo|
@logger.debug("Found block device: #{bname}")
@logger.debug("Block device info: #{binfo.inspect}")
if physical_data_storage_devices.map{|d| d[:name]}.include?(bname) && binfo
@logger.debug("Block device seems to be physical data storage: #{bname}")
block = physical_data_storage_devices.select{|d| d[:name] == bname}[0]
if block[:removable] =~ /^1$/ && ! REMOVABLE_VENDORS.include?(binfo[:vendor])
pci_vendor_id = _get_pci_vendor_id(bname)
@logger.debug("Block device #{bname} is removable. PCI vendor ID: #{pci_vendor_id}")
unless REMOVABLE_PCI_VENDORS.include?(pci_vendor_id)
next
end
@logger.debug("Block device #{bname} is accepted by PCI vendor ID")
end
dname = bname.gsub(/!/, '/')
# ohai reports the disk size according to /sys/block/#{bname}
# which is always measured in 512 bytes blocks, no matter what
# the physical (minimal unit which can be atomically written)
# or logical (minimal # unit which can be addressed) block sizes are, see
# http://lxr.free-electrons.com/source/include/linux/types.h?v=4.4#L124
# http://lxr.free-electrons.com/source/drivers/scsi/sd.c?v=4.4#L2340
block_size = 512
detailed_meta[:disks] << {
:name => dname,
:model => binfo[:model],
:size => (binfo[:size].to_i * block_size),
:disk => block[:disk],
:extra => block[:extra],
:removable => block[:removable]
}
end
end
@logger.debug("Detailed meta disks: #{detailed_meta[:disks].inspect}")
end
rescue Exception => e
@logger.error("Error '#{e.message}' in gathering disks metadata: #{e.backtrace}")
end
detailed_meta
end
def _get_pci_vendor_id(devname)
Timeout::timeout(30) do
udevadm_walk = {}
devpath = nil
# expected output of `udevadm info --attribute-walk --name=#{devname}`:
#
# Udevadm info starts with the device specified by the devpath and then
# walks up the chain of parent devices. It prints for every device
# found, all possible attributes in the udev rules key format.
# A rule to match, can be composed by the attributes of the device
# and the attributes from one single parent device.
#
# looking at device '/devices/pci0000:00/0000:00:1e.0/0000:0d:02.0/8:0:0:1/block/sdc':
# KERNEL=="sdc"
# SUBSYSTEM=="block"
# DRIVER==""
# ATTR{ro}=="0"
# ATTR{size}=="30881792"
# ATTR{removable}=="1"
#
# looking at parent device '/devices/pci0000:00/0000:00:1e.0/0000:0d:02.0':
# Disk adapter plugged into PCIe slot, we need it's PCI vendor ID
# KERNELS=="0000:0d:02.0"
# SUBSYSTEMS=="pci"
# DRIVERS==""
# ATTRS{device}=="0x9030"
# ATTRS{vendor}=="0x10b5"
#
# looking at parent device '/devices/pci0000:00/0000:00:1e.0':
# PCIe slot reported as a PCI bridge device, it's PCI vendor ID is NOT what we need
# KERNELS=="0000:00:1e.0"
# SUBSYSTEMS=="pci"
# DRIVERS==""
# ATTRS{device}=="0x244e"
# ATTRS{vendor}=="0x8086"
#
# looking at parent device '/devices/pci0000:00':
# KERNELS=="pci0000:00"
# SUBSYSTEMS==""
# DRIVERS==""
`udevadm info --attribute-walk --name=#{devname}`.split("\n").each do |line|
line.strip!
next unless line.start_with?('looking', 'KERNEL', 'SUBSYSTEM', 'DRIVER', 'ATTR')
if line.start_with?('looking')
devpath = line.split("'")[1]
udevadm_walk[devpath] = {}
else
key, value = line.split("==").each { |a| a.strip! }
udevadm_walk[devpath][key] = value.gsub(/(^")|("$)/, '')
end
end
# We need a vendor ID of a disk adapter rather than vendor ID of the PCIe slot where it's plugged into.
# Therefore we should pick the device with SUBSYSTEMS==pci having the longest devpath.
# For the example given above, vendor ID should be found as '0x10b5'.
# Next ID of '0x8086' belongs to PCIe slot to which PCIe RAID disk adapter is inserted.
devpath = Hash[udevadm_walk.select { |k, v| v['SUBSYSTEMS'] == 'pci' }].keys.max
udevadm_walk[devpath]['ATTRS{vendor}']
end
rescue => e
@logger.error("Error '#{e.message}' in obtaining PCI vendor ID: #{e.backtrace}")
end
def _disk_id_by_name(name)
dn = "/dev/disk/by-id"
basepath = Dir["#{dn}/**?"].select{|f| /\/#{name}$/.match(File.readlink(f))}
basepath.map{|p| p.split("/")[2..-1].join("/")}
end
def _disk_path_by_name(name)
dn = "/dev/disk/by-path"
basepath = Dir["#{dn}/**?"].find{|f| /\/#{name}$/.match(File.readlink(f))}
basepath.split("/")[2..-1].join("/") if basepath
end
# Sample mdadm --detail /dev/md127 output:
# /dev/md127:
# Version : 1.2
# Creation Time : Thu Oct 29 16:12:00 2015
# Raid Level : raid1
# Array Size : 1048000 (1023.61 MiB 1073.15 MB)
# Used Dev Size : 1048000 (1023.61 MiB 1073.15 MB)
# Raid Devices : 2
# Total Devices : 2
# Persistence : Superblock is persistent
#
# Update Time : Sun Nov 1 00:57:31 2015
# State : clean
# Active Devices : 2
# Working Devices : 2
# Failed Devices : 0
# Spare Devices : 0
#
# Name : agordeev:123 (local to host agordeev)
# UUID : 7aa70afc:742a9fa6:45f9f5a1:25a2585f
# Events : 20
#
# Number Major Minor RaidDevice State
# 0 252 2 0 active sync /dev/dm-2
# 1 252 3 1 active sync /dev/dm-3
#
def _parse_md(data)
md = {}
begin
description, _, components = data.split(/Number\s+Major\s+Minor\s+RaidDevice\s+(State\s+)?/m)
line_patterns = ['Version', 'Raid Level', 'Raid Devices', 'Active Devices',
'Spare Devices', 'Failed Devices', 'State', 'UUID']
for line in (description.split("\n")[1..-1] rescue [])
line.strip!
next if line == ""
line_patterns.each { |pattern| md[pattern] = line.split(" : ").last if line.start_with?(pattern) }
end
md['devices'] = []
for line in (components.split("\n") rescue [])
line.strip!
next if line == ""
md['devices'] << line.split().last
end
rescue Exception => e
@logger.error("Error '#{e.message}' in parsing MD: #{e.backtrace}")
end
md
end
def _find_fake_raid_mds()
mds = []
devices = []
begin
Dir["/sys/block/*"].each do |block_device_dir|
basename_dir = File.basename(block_device_dir)
devname = basename_dir.gsub(/!/, '/')
next unless devname.start_with?('md')
md_data = _parse_md(`mdadm --detail /dev/#{devname}`)
next if md_data['Raid Level'] == 'container'
if md_data.has_key?("Container")
devices.concat((md_data['devices'] or []))
mds << devname
end
end
rescue Exception => e
@logger.error("Error '#{e.message}' in finding fake raid MDs: #{e.backtrace}")
end
return mds, devices
end
def physical_data_storage_devices
@blocks ||= []
return @blocks unless @blocks.empty?
@logger.debug("Trying to get list of physical devices")
raise "Path /sys/block does not exist" unless File.exists?("/sys/block")
mds, devices = _find_fake_raid_mds()
Dir["/sys/block/*"].each do |block_device_dir|
basename_dir = File.basename(block_device_dir)
# Entries in /sys/block for cciss look like cciss!c0d1 while
# the entries in /dev look like /dev/cciss/c0d1. udevadm uses
# the entry in /dev so we need to replace the ! to get a valid
# device name.
devname = basename_dir.gsub(/!/, '/')
# Skipping MD if it's a container. Also skipping underlying
# devices from which that container is composed.
next if devices.include?("/dev/#{devname}")
next if devname.start_with?('md') and not mds.include?(devname)
@logger.debug("Getting udev properties for device: #{devname}")
properties = `udevadm info --query=property --export --name=#{devname}`.split("\n").inject({}) do |result, raw_propety|
key, value = raw_propety.split(/\=/)
result.update(key.strip => value.strip.chomp("'").reverse.chomp("'").reverse)
end
@logger.debug("Device #{devname} udev properties: #{properties.inspect}")
@logger.debug("Trying to find out if device #{devname} is removable or not")
if File.exists?("/sys/block/#{basename_dir}/removable")
removable = File.open("/sys/block/#{basename_dir}/removable"){ |f| f.read_nonblock(1024).strip }
end
@logger.debug("Device #{devname} removable parameter: #{removable.inspect}")
if STORAGE_CODES.include?(properties['MAJOR'].to_i)
@logger.debug("Device #{devname} seems to be appropriate")
# Exclude LVM volumes (in CentOS - 253, in Ubuntu - 252) using additional check
unless properties['DEVPATH'].include?('virtual/block/dm')
@blocks << {
:name => basename_dir,
:disk => _disk_path_by_name(devname) || devname,
:extra => _disk_id_by_name(devname) || [],
:removable => removable,
}
end
end
end
@logger.debug("Final list of physical devices is: #{@blocks.inspect}")
@blocks
end
def _is_virtualbox
@os[:dmi][:system][:product_name] == "VirtualBox" rescue false
end
def _is_virtual
_is_virtualbox or @os[:virtualization][:role] == "guest" rescue false
end
def _manufacturer
if _is_virtualbox
@os[:dmi][:system][:product_name] rescue nil
elsif _is_virtual
@os[:virtualization][:system].upcase.strip rescue nil
else
@os[:dmi][:system][:manufacturer].strip rescue nil
end
end
def _product_name
unless _is_virtual
@os[:dmi][:system][:product_name].strip rescue nil
end
end
def _serial
@os[:dmi][:system][:serial_number].strip rescue nil
end
# Returns unique identifier of machine
# * for kvm virtual node will contain virsh UUID
# * for physical HW that would be unique chassis id (from BIOS settings)
# * for other hypervizors - not tested
def uuid
node_uuid = @os.data.fetch(:dmi, {}).fetch(:system, {}).fetch(:uuid, nil)
node_uuid && node_uuid.strip
end
def _system_info
{
:manufacturer => _manufacturer,
:serial => _serial,
:uuid => uuid,
:runtime_uuid => @settings['runtime_uuid'],
:product => _product_name,
:family => (@os[:dmi][:system][:family].strip rescue nil),
:version => (@os[:dmi][:system][:version].strip rescue nil),
:fqdn => (@os[:fqdn].strip rescue @os[:hostname].strip rescue nil),
}.delete_if { |key, value| value.nil? or value.empty? or value == "Not Specified" }
end
def _size(size, unit)
case unit
when /^kb$/i
size * 1024
when /^mb$/i
size * 1048576
when /^gb$/i
size * 1073741824
end
end
def _dmi_memory
dmi = `/usr/sbin/dmidecode`
info = {:devices => [], :total => 0, :maximum_capacity => 0, :slots => 0}
return nil if $?.to_i != 0
dmi.split(/\n\n/).each do |group|
if /^Physical Memory Array$/.match(group)
if /^\s*Maximum Capacity:\s+(\d+)\s+(mb|gb|kb)/i.match(group)
info[:maximum_capacity] += _size($1.to_i, $2)
end
if /^\s*Number Of Devices:\s+(\d+)/i.match(group)
info[:slots] += $1.to_i
end
elsif /^Memory Device$/.match(group)
device_info = {}
if /^\s*Size:\s+(\d+)\s+(mb|gb|kb)/i.match(group)
size = _size($1.to_i, $2)
device_info[:size] = size
info[:total] += size
else
next
end
if /^\s*Speed:\s+(\d+)\s+MHz/i.match(group)
device_info[:frequency] = $1.to_i
end
if /^\s*Type:\s+(.*?)$/i.match(group)
device_info[:type] = $1
end
#if /^\s*Locator:\s+(.*?)$/i.match(group)
# device_info[:locator] = $1
#end
info[:devices].push(device_info)
end
end
if info[:total] == 0
nil
else
info
end
end
def _ohai_memory
info = {}
size = @os['memory']['total'].gsub(/(kb|mb|gb)$/i, "").to_i rescue (return nil)
info[:total] = _size(size, $1)
info
end
def _get_ip_mac_pair_for(local_addr)
@os[:network][:interfaces].each do |_, intinfo|
next unless intinfo.has_key?(:addresses)
intinfo[:addresses].each do |k, v|
# Here we need to check family because IPAddr.new with bad
# data works very slow on some environments
# https://bugs.launchpad.net/fuel/+bug/1284571
if v[:family] == 'inet' && !(IPAddr.new(k) rescue nil).nil?
net = IPAddr.new("#{k}/#{v[:netmask]}")
if net.include? local_addr
mac = intinfo[:addresses].find { |_, info| info[:family] == 'lladdr' }[0]
return {:ip => k, :mac => mac}
end
end
end
end
{}
end
def _master_ip_and_mac_for_multirack
rv = {}
if File.exist?('/etc/astute.yaml')
conf = YAML::load_file('/etc/astute.yaml')
return {} unless conf.is_a?(Hash)
e_point_name = conf.fetch('network_scheme', {}).fetch('roles', {}).fetch('admin/pxe', nil)
e_point_ips = conf.fetch('network_scheme', {}).fetch('endpoints', {}).fetch(e_point_name, {}).fetch('IP', [])
e_point_ips.each do |admin_ip|
rv = _get_ip_mac_pair_for(admin_ip)
break unless rv.empty?
end
end
return rv
end
def _master_ip_and_mac
rv = _get_ip_mac_pair_for(@api_ip)
return (rv.empty? ? _master_ip_and_mac_for_multirack : rv)
end
def _data
res = {
:mac => (@os[:macaddress] rescue nil),
:ip => (@os[:ipaddress] rescue nil),
:os_platform => (@os[:platform] rescue nil),
}
begin
detailed_data = _detailed
master_data=_master_ip_and_mac
res.merge!({
:ip => (( master_data[:ip] or @os[:ipaddress]) rescue nil),
:mac => (( master_data[:mac] or @os[:macaddress]) rescue nil),
:manufacturer => _manufacturer,
:platform_name => _product_name,
:meta => detailed_data
})
rescue Exception => e
@logger.error("Error '#{e.message}' in metadata calculation: #{e.backtrace}")
end
res[:status] = @node_state if @node_state
res[:is_agent] = true
res[:agent_checksum] = createsig(res)
res
end
def _get_pci_dev_list
lshw_path = `which lshw`.chomp
exitstatus = $?
if exitstatus == 0
data = `#{lshw_path} -json`
exitstatus = $?
if exitstatus == 0
return JSON.parse(data)
else
@logger.warn("Can't get data from lshw. Reason: lshw exited with status #{exitstatus}")
end
else
@logger.warn("Can't find lshw. Reason: 'which lshw' returned exit status #{exitstatus}")
end
rescue => e
@logger.warn("Can't get data from lshw. Reason: #{e.message}")
end
def get_numa_topology
# Output EXAMPLE:
# <distances nbobjs="2" relative_depth="1" latency_base="10.000000">
# <latency value="1.000000"/>
# <latency value="2.100000"/>
# <latency value="2.100000"/>
# <latency value="1.000000"/>
# </distances>
# <object type="NUMANode" os_index="0" cpuset="0x3ff003ff" complete_cpuset="0x3ff003ff" online_cpuset="0x3ff003ff" allowed_cpuset="0x3ff003ff" nodeset="0x00000001" complete_nodeset="0x00000001" allowed_nodeset="0x00000001" local_memory="67452473344">
# <page_type size="4096" count="14370737"/>
# <page_type size="1073741824" count="8"/>
doc = Document.new `lstopo --no-caches --of xml`
topology = {:numa_nodes => [], :supported_hugepages => supported_hugepages, :distances => [["1.0"]]}
doc.elements.each('/topology/object/distances/') do |dist|
topology[:distances] = dist.elements.collect{|v| v.attributes['value']}
.each_slice(dist.attributes['nbobjs'].to_i).to_a
end
numa_node = "/topology/object/object[@type='NUMANode']"
element = doc.elements[numa_node] ? numa_node : "/topology/object[@type='Machine']"
doc.elements.each(element) do |numa|
struct = {:id=> nil, :cpus => [], :memory => nil}
struct[:id] = numa.attributes['os_index'].to_i
struct[:memory] = numa.attributes['local_memory'].to_i
numa.elements.each("object/object/object[@type='PU']") do |pu|
struct[:cpus] << pu.attributes['os_index'].to_i
end
topology[:numa_nodes] << struct
end
topology
rescue => e
@logger.error "Something went wrong with parsing lstopo: #{e.backtrace}"
nil
end
def supported_hugepages
return [2048, 1048576] if @os[:cpu]['0']['flags'].include?('pdpe1gb')
return [2048] if @os[:cpu]['0']['flags'].include?('pse')
[]
end
def _get_lspci_info(device)
lspci_path = `which lspci`.chomp
if $?.success?
data = `#{lspci_path} -vvv -s #{device}`
if $?.success?
return data
else
@logger.warn("Can't get data from lspci. Reason: lspci exited with status #{$?.exitstatus}")
""
end
else
@logger.warn("Can't find lspci. Reason: 'which lspci' returned exit status #{$?.exitstatus}")
""
end
rescue => e
@logger.warn("Can't get data from lspci for #{device} slot. Reason: #{e.message}")
""
end
def update_state
@node_state = nil
if File.exist?("/etc/nailgun_systemtype")
fl = File.open("/etc/nailgun_systemtype", "r")
system_type = fl.readline.rstrip
@node_state = "discover" if system_type == "bootstrap"
end
end
end
def write_data_to_file(logger, filename, data)
if File.exist?(filename)
File.open(filename, 'r') do |fo|
text = fo.read
end
else
text = ''
end
if text != data
begin
File.open(filename, 'w') do |fo|
fo.write(data)
end
logger.info("Wrote data to file '#{filename}'. Data: #{data}")
rescue Exception => e
logger.warning("Can't write data to file '#{filename}'. Reason: #{e.message}")
end
else
logger.info("File '#{filename}' is up to date.")
end
end
logger = Logger.new(STDOUT)
if File.exist?('/etc/nailgun_uid')
logger.level = Logger::INFO
else
logger.level = Logger::DEBUG
end
# random sleep is here to prevent target nodes
# from reporting to master node all at once
sleep_time = rand(30)
logger.debug("Sleep for #{sleep_time} seconds before sending request")
sleep(sleep_time)
if File.exist?('/etc/nailgun-agent/nodiscover')
logger.info("Discover prevented by /etc/nailgun-agent/nodiscover presence.")
exit 1
end
agent = NodeAgent.new(logger)
agent.update_state
begin
unless File.exist?('/etc/nailgun_uid')
resp = agent.post
# We must not log 409 as error, after node is provisioned there will be no
# /etc/nailgun_uid, it will be created after put request
if [409, 403].include? resp.status
resp = agent.put
end
else
resp = agent.put
# Handle case when node was removed, but nailgun_uid exist
if resp.status == 400
resp = agent.post
end
end
unless [201, 200].include? resp.status
logger.error resp.body
exit 1
end
new_id = JSON.parse(resp.body)['id']
mc_config = McollectiveConfig.new(logger)
mc_config.replace_identity(new_id)
write_data_to_file(logger, '/etc/nailgun_uid', new_id.to_s)
rescue => ex
# NOTE(mihgen): There is no need to retry - cron will do it for us
logger.error "#{ex.message}\n#{ex.backtrace}"
end