#!/usr/bin/env ruby # Copyright 2013 Mirantis, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. begin require 'rubygems' rescue LoadError end require 'ohai/system' require 'json' require 'httpclient' require 'logger' require 'optparse' require 'yaml' require 'ipaddr' require 'rethtool' require 'digest' require 'timeout' require 'uri' # TODO(vsharshov): replace below lines by this string after excluding Ruby 1.8 require 'pathname' require 'rexml/document' include REXML unless Process.euid == 0 puts "You must be root" exit 1 end ENV['PATH'] = "/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin" AGENT_CONFIG = "/etc/nailgun-agent/config.yaml" # look at https://github.com/torvalds/linux/blob/master/Documentation/devices.txt # KVM virtio volumes has code 252 in CentOS, but 253 in Ubuntu # Please also update the device codes here # https://github.com/stackforge/fuel-astute/blob/master/mcagents/erase_node.rb#L81 # NVMe has code 259 STORAGE_CODES = [3, 8, 9, 65, 66, 67, 68, 69, 70, 71, 104, 105, 106, 107, 108, 109, 110, 111, 202, 252, 253, 259] REMOVABLE_VENDORS = [ "Adaptec", "IBM", "ServeRA", ] # PCI vendor IDs for Adaptec REMOVABLE_PCI_VENDORS = [ "0x1044", "0x9004", "0x9005", ] # Set default data structure for SR-IOV DEFAULT_SRIOV = { "sriov_totalvfs" => 0, "available" => false, "pci_id" => "" } def digest(body) if body.is_a? Hash digest body.map { |k,v| [digest(k),digest(v)].join("=>") }.sort elsif body.is_a? Array body.map{ |v| digest v }.join('|') else [body.class.to_s, body.to_s].join(":") end end def createsig(body) Digest::SHA1.hexdigest( digest body ) end class McollectiveConfig def initialize(logger) @logger = logger @configfile = '/etc/mcollective/server.cfg' end def get_config_by_key(find_key) found_key = nil found_value = nil # This code is from mcollective's sources File.open(@configfile, "r").each do |line| # strip blank spaces, tabs etc off the end of all lines line.gsub!(/\s*$/, "") unless line =~ /^#|^$/ if line =~ /(.+?)\s*=\s*(.+)/ key = $1 val = $2 if key == find_key found_key = key found_value = val end end end end found_value if found_key end def replace_identity(new_id) # check if id complies reqs raise 'Identities can only match /\w\.\-/' unless new_id.to_s.match(/^[\w\.\-]+$/) value_from_config = get_config_by_key('identity') if value_from_config == new_id.to_s @logger.info "MCollective is up to date with identity = #{new_id}" else config = File.open(@configfile, "rb").read if value_from_config # Key found, but it has other value @logger.info "Replacing identity in mcollective server.cfg to new value = '#{new_id}'" config.gsub!(/^identity[ =].*$/, "identity = #{new_id}") File.open(@configfile, "w") { |f| f.write(config) } else # if key was not found config += "\nidentity = #{new_id}\n" @logger.info "Identity in mcollective server.cfg has not been found. Setting to '#{new_id}'" File.open(@configfile, "w") { |f| f.write(config) } end puts `service mcollective restart` end end end class Offloading def initialize(name, sub) @name, @sub = name, sub end def to_json(options = {}) {'name' => @name, 'state' => nil, 'sub' => @sub}.to_json() end end class NodeAgent API_DEFAULT_ADDRESS = "localhost" API_DEFAULT_PORT = "8443" API_LEGACY_PORT = "8000" def initialize(logger) @logger = logger @settings = get_settings() @api_ip = URI(@settings['url']).host or API_DEFAULT_ADDRESS scheme, api_port = get_scheme_and_port() @api_url = "#{scheme}://#{@api_ip}:#{api_port}/api" @logger.info("API URL is #{@api_url}") @os = ohai_system_info @numa_topology = get_numa_topology @mpath_devices, @skip_devices = multipath_devices end def get_scheme_and_port scheme, api_port = nil begin res = htclient.get("https://#{@api_ip}:#{API_DEFAULT_PORT}/") scheme, api_port = "https", API_DEFAULT_PORT rescue Errno::ECONNREFUSED @logger.warn("Connection Refused catched when trying connect to HTTPS port. Use plain HTTP") scheme, api_port = "http", API_LEGACY_PORT end return scheme, api_port end # transform string into Dictionary # For example, line: "initrd=/images/bootstrap/initramfs.img ksdevice=bootif lang=" # will be transformed into: {"mco_user"=>"mcollective", "initrd"=>"/images/bootstrap/initramfs.img", "lang"=>nil} def string_to_hash(string) hash = Hash.new string.split(' ').each do |pair| key,value = pair.split(/=/, 2) hash[key] = value end hash end def get_settings agent_settings = YAML.load_file(AGENT_CONFIG) rescue {} cmdline_settings = string_to_hash(File.read("/proc/cmdline")) rescue {} agent_settings.merge(cmdline_settings) end def ohai_system_info Timeout::timeout(30) do os = Ohai::System.new() os.all_plugins os end rescue Timeout::Error # When one of disks is broken, do not collect data about block devices # More details: https://bugs.launchpad.net/fuel/+bug/1396086 Ohai::Config[:disabled_plugins]=['linux::block_device', 'linux::filesystem'] os = Ohai::System.new() os.all_plugins os end def put headers = {"Content-Type" => "application/json"} @logger.debug("Trying to put host info into #{@api_url}") res = htclient.put("#{@api_url}/nodes/agent/", _data.to_json, headers) @logger.debug("Response: status: #{res.status} body: #{res.body}") if res.status < 200 or res.status >= 400 @logger.error("HTTP PUT failed: #{res.inspect}") end res end def post headers = {"Content-Type" => "application/json"} @logger.debug("Trying to create host using #{@api_url}") res = htclient.post("#{@api_url}/nodes/", _data.to_json, headers) @logger.debug("Response: status: #{res.status} body: #{res.body}") res end def htclient client = HTTPClient.new client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE client.ssl_config.ssl_version = :TLSv1 client.connect_timeout = 10 client.send_timeout = 10 client.receive_timeout = 10 # (mihgen): Nailgun may hang for a while, but 10sec should be enough for him to respond client end def _interfaces interfaces = @os[:network][:interfaces].inject([]) do |result, elm| result << { :name => elm[0], :addresses => elm[1]["addresses"] } end interfaces << { "default_interface" => @os["network"]["default_interface"] } interfaces << { "default_gateway" => @os["network"]["default_gateway"] } interfaces end # transform input array into array of the objects # Example: # [{ # "state":null, # "sub":[ # { # "state":null, # "sub":[], # "name":"tx-checksum-ipv6" # }, # ........... # ], # "name":"tx-checksumming" # }, # { # "state":null, # "sub":[], # "name":"generic-segmentation-offload" # }, # ............. # ] def _parse_offloading(offloading_arr) return [] if offloading_arr.empty? inner = [] current = offloading_arr.shift() while offloading_arr.any? && offloading_arr.first().start_with?("\t") do inner << offloading_arr.shift()[1..-1] end res = _parse_offloading(offloading_arr) res << Offloading.new(current, _parse_offloading(inner)) end # Gets information about SR-IOV for specified pci slot # using 'lspci' utility. Example of output to parse: # ... # Capabilities: [160 v1] Single Root I/O Virtualization (SR-IOV) # IOVCap: Migration-, Interrupt Message Number: 000 # IOVCtl: Enable- Migration- Interrupt- MSE- ARIHierarchy- # IOVSta: Migration- # Initial VFs: 8, Total VFs: 8, Number of VFs: 0, Function Dependency Link: 01 # VF offset: 128, stride: 4, Device ID: 10ed # Supported Page Size: 00000553, System Page Size: 00000001 # Region 0: Memory at 0000000090040000 (64-bit, prefetchable) # Region 3: Memory at 0000000090060000 (64-bit, prefetchable) # VF Migration: offset: 00000000, BIR: 0 # ... def sriov_info(int, int_bus_info) sriov = DEFAULT_SRIOV.dup lspci = _get_lspci_info(int_bus_info) if lspci.match(/.*Capabilities:.*SR-IOV.*/) sriov["available"] = true sriov["sriov_totalvfs"] = lspci.scan(/\s+Total\s+VFs:\s+(\d+)/).last.first.to_i - 1 vf_vendor = File.read("/sys/class/net/#{int}/device/vendor").chomp.gsub(/^0x/, '') vf_device = lspci.scan(/VF\s+.*\s+Device\s+ID:\s+([A-Fa-f0-9]+)/).last.first sriov["pci_id"] = "#{vf_vendor}:#{vf_device}" end sriov rescue DEFAULT_SRIOV end def nic_pci_id(int) vendor = File.read("/sys/class/net/#{int}/device/vendor").chomp.gsub(/^0x/, '') device = File.read("/sys/class/net/#{int}/device/device").chomp.gsub(/^0x/, '') "#{vendor}:#{device}" rescue "" end def nic_numa_node(int_bus_info) numa_node = @numa_topology[:numa_nodes].select { |node| node[:pcidevs].include?(int_meta[:bus_info]) } numa_node.first[:id].to_i rescue nil end def _detailed detailed_meta = { :system => _system_info, :interfaces => [], :cpu => { :total => (@os[:cpu][:total].to_i rescue nil), :real => (@os[:cpu][:real].to_i rescue nil), :spec => [], }, :disks => [], :memory => (_dmi_memory or _ohai_memory), :pci_devices => _get_pci_dev_list, :numa_topology => @numa_topology, } admin_mac = (_master_ip_and_mac[:mac] or @os[:macaddress]) rescue nil begin (@os[:network][:interfaces] or {} rescue {}).each do |int, intinfo| # Send info about physical interfaces only next if intinfo[:encapsulation] !~ /^Ethernet.*/ # Avoid virtual devices like loopback, tunnels, bonding, vlans ... # TODO(vsharshov): replace below lines by this string after excluding Ruby 1.8 # next if File.realpath("/sys/class/net/#{int}") =~ /virtual/ next if Pathname.new("/sys/class/net/#{int}").realpath.to_s =~ /virtual/ # Avoid wireless next if File.exist?("/sys/class/net/#{int}/phy80211") || File.exist?("/sys/class/net/#{int}/wireless") # Skip virtual functions next if File.exists?("/sys/class/net/#{int}/device/physfn") int_meta = {:name => int} int_meta[:interface_properties] = {} int_meta[:state] = intinfo[:state] (intinfo[:addresses] or {} rescue {}).each do |addr, addrinfo| if (addrinfo[:family] rescue nil) =~ /lladdr/ # Get original mac excluding case with empty EEPROM data perm_addr = `ethtool -P #{int}` begin re = eval '/(?<=Permanent address: )(?!00(:00){5}).+/' rescue SyntaxError re = perm_addr.match(/(00(:00){5})+/).nil? ? /[0-9a-f]+(:[0-9a-f]+){5}$/ : nil end int_meta[:mac] = perm_addr.match(re)[0] rescue addr int_meta[:pxe] = admin_mac == int_meta[:mac] begin int_info = Rethtool::InterfaceSettings.new(int) int_meta[:driver] = int_info.driver int_meta[:bus_info] = int_info.bus_info int_meta[:max_speed] = int_info.best_mode.speed if int_info.current_mode.speed == :unknown int_meta[:current_speed] = nil else int_meta[:current_speed] = int_info.current_mode.speed end rescue int_meta[:current_speed] = nil end elsif (addrinfo[:family] rescue nil) =~ /^inet$/ int_meta[:ip] = addr int_meta[:netmask] = addrinfo[:netmask] if addrinfo[:netmask] end end begin # this stuff will put all non-fixed offloading mode into array # collect names of non-fixed offloading modes # Example of ethtool -k ethX output: # tx-checksumming: on # tx-checksum-ipv4: on # tx-checksum-ip-generic: off [fixed] # tx-checksum-ipv6: on # tx-checksum-fcoe-crc: off [fixed] # tx-checksum-sctp: on # scatter-gather: on # tx-scatter-gather: on # tx-scatter-gather-fraglist: off [fixed] # generic-segmentation-offload: on offloading_data = `ethtool -k #{int}`.split("\n").reject { |offloading| offloading.include?("Features for") || offloading.include?("fixed") }.map { |offloading| offloading.split(':')[0] } # transform raw data into array of objects int_meta[:offloading_modes] = _parse_offloading(offloading_data) rescue # in case if we have no `ethtool` package installed we should # return empty array to support nailgun's rest api call int_meta[:offloading_modes] = [] end # Getting SR-IOV info int_meta[:interface_properties][:sriov] = sriov_info(int, int_meta[:bus_info]) # Get PCI-ID int_meta[:interface_properties][:pci_id] = nic_pci_id(int) # Get numa node int_meta[:interface_properties][:numa_node] = nic_numa_node(int_meta[:bus_info]) detailed_meta[:interfaces] << int_meta end rescue Exception => e @logger.error("Error '#{e.message}' in gathering interfaces metadata: #{e.backtrace}") end begin (@os[:cpu] or {} rescue {}).each do |cpu, cpuinfo| if cpu =~ /^[\d]+/ and cpuinfo frequency = cpuinfo[:mhz].to_i rescue nil begin # ohai returns current frequency, try to get max if possible max_frequency = `cat /sys/devices/system/cpu/cpu#{cpu}/cpufreq/cpuinfo_max_freq 2>/dev/null`.to_i / 1000 frequency = max_frequency if max_frequency > 0 rescue end detailed_meta[:cpu][:spec] << { :frequency => frequency, :model => (cpuinfo[:model_name].gsub(/ +/, " ") rescue nil) } end end rescue Exception => e @logger.error("Error '#{e.message}' in gathering cpu metadata: #{e.backtrace}") end begin Timeout::timeout(30) do @logger.debug("Trying to find block devices") # ohai reports the disk size according to /sys/block/#{bname} # which is always measured in 512 bytes blocks, no matter what # the physical (minimal unit which can be atomically written) # or logical (minimal # unit which can be addressed) block sizes are, see # http://lxr.free-electrons.com/source/include/linux/types.h?v=4.4#L124 # http://lxr.free-electrons.com/source/drivers/scsi/sd.c?v=4.4#L2340 block_size = 512 (@os[:block_device] or {} rescue {}).each do |bname, binfo| @logger.debug("Found block device: #{bname}") @logger.debug("Block device info: #{binfo.inspect}") dname = bname.gsub(/!/, '/') next if @skip_devices.include?(dname) if physical_data_storage_devices.map{|d| d[:name]}.include?(bname) && binfo @logger.debug("Block device seems to be physical data storage: #{bname}") block = physical_data_storage_devices.select{|d| d[:name] == bname}[0] if block[:removable] =~ /^1$/ && ! REMOVABLE_VENDORS.include?(binfo[:vendor]) pci_vendor_id = _get_pci_vendor_id(bname) @logger.debug("Block device #{bname} is removable. PCI vendor ID: #{pci_vendor_id}") unless REMOVABLE_PCI_VENDORS.include?(pci_vendor_id) next end @logger.debug("Block device #{bname} is accepted by PCI vendor ID") end detailed_meta[:disks] << { :name => dname, :model => binfo[:model], :size => (binfo[:size].to_i * block_size), :disk => block[:disk], :extra => block[:extra], :removable => block[:removable], :paths => nil } elsif @mpath_devices.has_key?(dname) device = @mpath_devices[dname] detailed_meta[:disks] << { :name => 'mapper/' + device["DM_NAME"], :model => binfo[:model], :size => (binfo[:size].to_i * block_size), :disk => dname, :extra => _disk_id_by_name(dname), :removable => 0, :paths => device["DM_BLKDEVS_USED"].map{|name| _disk_path_by_name(name)}.join(', ') } end end @logger.debug("Detailed meta disks: #{detailed_meta[:disks].inspect}") end rescue Exception => e @logger.error("Error '#{e.message}' in gathering disks metadata: #{e.backtrace}") end detailed_meta end def multipath_devices dmsetup = `/sbin/dmsetup info -c --nameprefixes --noheadings -o blkdevname,subsystem,blkdevs_used,name,uuid` # Example output: # DM_BLKDEVNAME='dm-0':DM_SUBSYSTEM='mpath':DM_BLKDEVS_USED='sdb,sda':DM_NAME='31234567890abcdef':DM_UUID='mpath-31234567890abcdef' # DM_BLKDEVNAME='dm-1':DM_SUBSYSTEM='mpath':DM_BLKDEVS_USED='sdc,sdd':DM_NAME='92344567890abcdef':DM_UUID='mpath-92344567890abcdef' mpath_devices = {} mapping = [] unless dmsetup.include?("No devices found") dmsetup.lines.each do |line| device = {} line.split(/:/).each do |key_value| k, v = key_value.split('=') device[k] = v.strip().gsub(/'/, '') end next unless device["DM_SUBSYSTEM"] == 'mpath' device["DM_BLKDEVS_USED"] = device["DM_BLKDEVS_USED"].split(',') device["DM_BLKDEVS_USED"].each do | name | mapping << name end mpath_devices[device["DM_BLKDEVNAME"]] = device end end [mpath_devices, mapping] rescue => e @logger.error("Error '#{e.message}' while scanning for multipath devices.") [{}, []] end def _get_pci_vendor_id(devname) Timeout::timeout(30) do udevadm_walk = {} devpath = nil # expected output of `udevadm info --attribute-walk --name=#{devname}`: # # Udevadm info starts with the device specified by the devpath and then # walks up the chain of parent devices. It prints for every device # found, all possible attributes in the udev rules key format. # A rule to match, can be composed by the attributes of the device # and the attributes from one single parent device. # # looking at device '/devices/pci0000:00/0000:00:1e.0/0000:0d:02.0/8:0:0:1/block/sdc': # KERNEL=="sdc" # SUBSYSTEM=="block" # DRIVER=="" # ATTR{ro}=="0" # ATTR{size}=="30881792" # ATTR{removable}=="1" # # looking at parent device '/devices/pci0000:00/0000:00:1e.0/0000:0d:02.0': # Disk adapter plugged into PCIe slot, we need it's PCI vendor ID # KERNELS=="0000:0d:02.0" # SUBSYSTEMS=="pci" # DRIVERS=="" # ATTRS{device}=="0x9030" # ATTRS{vendor}=="0x10b5" # # looking at parent device '/devices/pci0000:00/0000:00:1e.0': # PCIe slot reported as a PCI bridge device, it's PCI vendor ID is NOT what we need # KERNELS=="0000:00:1e.0" # SUBSYSTEMS=="pci" # DRIVERS=="" # ATTRS{device}=="0x244e" # ATTRS{vendor}=="0x8086" # # looking at parent device '/devices/pci0000:00': # KERNELS=="pci0000:00" # SUBSYSTEMS=="" # DRIVERS=="" `udevadm info --attribute-walk --name=#{devname}`.split("\n").each do |line| line.strip! next unless line.start_with?('looking', 'KERNEL', 'SUBSYSTEM', 'DRIVER', 'ATTR') if line.start_with?('looking') devpath = line.split("'")[1] udevadm_walk[devpath] = {} else key, value = line.split("==").each { |a| a.strip! } udevadm_walk[devpath][key] = value.gsub(/(^")|("$)/, '') end end # We need a vendor ID of a disk adapter rather than vendor ID of the PCIe slot where it's plugged into. # Therefore we should pick the device with SUBSYSTEMS==pci having the longest devpath. # For the example given above, vendor ID should be found as '0x10b5'. # Next ID of '0x8086' belongs to PCIe slot to which PCIe RAID disk adapter is inserted. devpath = Hash[udevadm_walk.select { |k, v| v['SUBSYSTEMS'] == 'pci' }].keys.max udevadm_walk[devpath]['ATTRS{vendor}'] end rescue => e @logger.error("Error '#{e.message}' in obtaining PCI vendor ID: #{e.backtrace}") end def _disk_id_by_name(name) dn = "/dev/disk/by-id" basepath = Dir["#{dn}/**?"].select{|f| /\/#{name}$/.match(File.readlink(f))} basepath.map{|p| p.split("/")[2..-1].join("/")} end def _disk_path_by_name(name) dn = "/dev/disk/by-path" basepath = Dir["#{dn}/**?"].find{|f| /\/#{name}$/.match(File.readlink(f))} basepath.split("/")[2..-1].join("/") if basepath end # Sample mdadm --detail /dev/md127 output: # /dev/md127: # Version : 1.2 # Creation Time : Thu Oct 29 16:12:00 2015 # Raid Level : raid1 # Array Size : 1048000 (1023.61 MiB 1073.15 MB) # Used Dev Size : 1048000 (1023.61 MiB 1073.15 MB) # Raid Devices : 2 # Total Devices : 2 # Persistence : Superblock is persistent # # Update Time : Sun Nov 1 00:57:31 2015 # State : clean # Active Devices : 2 # Working Devices : 2 # Failed Devices : 0 # Spare Devices : 0 # # Name : agordeev:123 (local to host agordeev) # UUID : 7aa70afc:742a9fa6:45f9f5a1:25a2585f # Events : 20 # # Number Major Minor RaidDevice State # 0 252 2 0 active sync /dev/dm-2 # 1 252 3 1 active sync /dev/dm-3 # def _parse_md(data) md = {} begin description, _, components = data.split(/Number\s+Major\s+Minor\s+RaidDevice\s+(State\s+)?/m) line_patterns = ['Version', 'Raid Level', 'Raid Devices', 'Active Devices', 'Spare Devices', 'Failed Devices', 'State', 'UUID'] for line in (description.split("\n")[1..-1] rescue []) line.strip! next if line == "" line_patterns.each { |pattern| md[pattern] = line.split(" : ").last if line.start_with?(pattern) } end md['devices'] = [] for line in (components.split("\n") rescue []) line.strip! next if line == "" md['devices'] << line.split().last end rescue Exception => e @logger.error("Error '#{e.message}' in parsing MD: #{e.backtrace}") end md end def _find_fake_raid_mds() mds = [] devices = [] begin Dir["/sys/block/*"].each do |block_device_dir| basename_dir = File.basename(block_device_dir) devname = basename_dir.gsub(/!/, '/') next unless devname.start_with?('md') md_data = _parse_md(`mdadm --detail /dev/#{devname}`) next if md_data['Raid Level'] == 'container' if md_data.has_key?("Container") devices.concat((md_data['devices'] or [])) mds << devname end end rescue Exception => e @logger.error("Error '#{e.message}' in finding fake raid MDs: #{e.backtrace}") end return mds, devices end def physical_data_storage_devices @blocks ||= [] return @blocks unless @blocks.empty? @logger.debug("Trying to get list of physical devices") raise "Path /sys/block does not exist" unless File.exists?("/sys/block") mds, devices = _find_fake_raid_mds() Dir["/sys/block/*"].each do |block_device_dir| basename_dir = File.basename(block_device_dir) # Entries in /sys/block for cciss look like cciss!c0d1 while # the entries in /dev look like /dev/cciss/c0d1. udevadm uses # the entry in /dev so we need to replace the ! to get a valid # device name. devname = basename_dir.gsub(/!/, '/') # Skipping MD if it's a container. Also skipping underlying # devices from which that container is composed. next if devices.include?("/dev/#{devname}") next if devname.start_with?('md') and not mds.include?(devname) @logger.debug("Getting udev properties for device: #{devname}") properties = `udevadm info --query=property --export --name=#{devname}`.split("\n").inject({}) do |result, raw_propety| key, value = raw_propety.split(/\=/) result.update(key.strip => value.strip.chomp("'").reverse.chomp("'").reverse) end @logger.debug("Device #{devname} udev properties: #{properties.inspect}") @logger.debug("Trying to find out if device #{devname} is removable or not") if File.exists?("/sys/block/#{basename_dir}/removable") removable = File.open("/sys/block/#{basename_dir}/removable"){ |f| f.read_nonblock(1024).strip } end @logger.debug("Device #{devname} removable parameter: #{removable.inspect}") if STORAGE_CODES.include?(properties['MAJOR'].to_i) @logger.debug("Device #{devname} seems to be appropriate") # Exclude LVM volumes (in CentOS - 253, in Ubuntu - 252) using additional check # Exclude any storage device connected through USB by the default next if properties['DEVPATH'].include?('virtual/block/dm') || (properties['ID_BUS'] == 'usb' && !@settings.has_key?("report_usb_block_devices")) @blocks << { :name => basename_dir, :disk => _disk_path_by_name(devname) || devname, :extra => _disk_id_by_name(devname) || [], :removable => removable, } end end @logger.debug("Final list of physical devices is: #{@blocks.inspect}") @blocks end def _is_virtualbox @os[:dmi][:system][:product_name] == "VirtualBox" rescue false end def _is_virtual _is_virtualbox or @os[:virtualization][:role] == "guest" rescue false end def _manufacturer if _is_virtualbox @os[:dmi][:system][:product_name] rescue nil elsif _is_virtual @os[:virtualization][:system].upcase.strip rescue nil else @os[:dmi][:system][:manufacturer].strip rescue nil end end def _product_name unless _is_virtual @os[:dmi][:system][:product_name].strip rescue nil end end def _serial @os[:dmi][:system][:serial_number].strip rescue nil end # Returns unique identifier of machine # * for kvm virtual node will contain virsh UUID # * for physical HW that would be unique chassis id (from BIOS settings) # * for other hypervizors - not tested def uuid node_uuid = @os.data.fetch(:dmi, {}).fetch(:system, {}).fetch(:uuid, nil) node_uuid && node_uuid.strip end def _system_info { :manufacturer => _manufacturer, :serial => _serial, :uuid => uuid, :runtime_uuid => @settings['runtime_uuid'], :product => _product_name, :family => (@os[:dmi][:system][:family].strip rescue nil), :version => (@os[:dmi][:system][:version].strip rescue nil), :fqdn => (@os[:fqdn].strip rescue @os[:hostname].strip rescue nil), }.delete_if { |key, value| value.nil? or value.empty? or value == "Not Specified" } end def _size(size, unit) case unit when /^kb$/i size * 1024 when /^mb$/i size * 1048576 when /^gb$/i size * 1073741824 end end def _dmi_memory dmi = `/usr/sbin/dmidecode` info = {:devices => [], :total => 0, :maximum_capacity => 0, :slots => 0} return nil if $?.to_i != 0 dmi.split(/\n\n/).each do |group| if /^Physical Memory Array$/.match(group) if /^\s*Maximum Capacity:\s+(\d+)\s+(mb|gb|kb)/i.match(group) info[:maximum_capacity] += _size($1.to_i, $2) end if /^\s*Number Of Devices:\s+(\d+)/i.match(group) info[:slots] += $1.to_i end elsif /^Memory Device$/.match(group) device_info = {} if /^\s*Size:\s+(\d+)\s+(mb|gb|kb)/i.match(group) size = _size($1.to_i, $2) device_info[:size] = size info[:total] += size else next end if /^\s*Speed:\s+(\d+)\s+MHz/i.match(group) device_info[:frequency] = $1.to_i end if /^\s*Type:\s+(.*?)$/i.match(group) device_info[:type] = $1 end #if /^\s*Locator:\s+(.*?)$/i.match(group) # device_info[:locator] = $1 #end info[:devices].push(device_info) end end if info[:total] == 0 nil else info end end def _ohai_memory info = {} size = @os['memory']['total'].gsub(/(kb|mb|gb)$/i, "").to_i rescue (return nil) info[:total] = _size(size, $1) info end def _get_ip_mac_pair_for(local_addr) @os[:network][:interfaces].each do |_, intinfo| next unless intinfo.has_key?(:addresses) intinfo[:addresses].each do |k, v| # Here we need to check family because IPAddr.new with bad # data works very slow on some environments # https://bugs.launchpad.net/fuel/+bug/1284571 if v[:family] == 'inet' && !(IPAddr.new(k) rescue nil).nil? net = IPAddr.new("#{k}/#{v[:netmask]}") if net.include? local_addr mac = intinfo[:addresses].find { |_, info| info[:family] == 'lladdr' }[0] return {:ip => k, :mac => mac} end end end end {} end def _master_ip_and_mac_for_multirack rv = {} if File.exist?('/etc/astute.yaml') conf = YAML::load_file('/etc/astute.yaml') return {} unless conf.is_a?(Hash) e_point_name = conf.fetch('network_scheme', {}).fetch('roles', {}).fetch('admin/pxe', nil) e_point_ips = conf.fetch('network_scheme', {}).fetch('endpoints', {}).fetch(e_point_name, {}).fetch('IP', []) e_point_ips.each do |admin_ip| rv = _get_ip_mac_pair_for(admin_ip) break unless rv.empty? end end return rv end def _master_ip_and_mac rv = _get_ip_mac_pair_for(@api_ip) return (rv.empty? ? _master_ip_and_mac_for_multirack : rv) end def _data res = { :mac => (@os[:macaddress] rescue nil), :ip => (@os[:ipaddress] rescue nil), :os_platform => (@os[:platform] rescue nil), } begin detailed_data = _detailed master_data=_master_ip_and_mac res.merge!({ :ip => (( master_data[:ip] or @os[:ipaddress]) rescue nil), :mac => (( master_data[:mac] or @os[:macaddress]) rescue nil), :manufacturer => _manufacturer, :platform_name => _product_name, :meta => detailed_data }) rescue Exception => e @logger.error("Error '#{e.message}' in metadata calculation: #{e.backtrace}") end res[:status] = @node_state if @node_state res[:is_agent] = true res[:agent_checksum] = createsig(res) res end def _get_pci_dev_list lshw_timeout = @settings['lshw_timeout'] || 60 Timeout::timeout(lshw_timeout) do lshw_path = `which lshw`.chomp if $?.success? data = `#{lshw_path} -json` return JSON.parse(data) if $?.success? @logger.warn("Can't get data from lshw. Reason: lshw exited with status #{$?.exitstatus}") else @logger.warn("Can't find lshw. Reason: 'which lshw' returned exit status #{$?.exitstatus}") end end {} rescue => e @logger.warn("Can't get data from lshw. Reason: #{e.message}") {} end def get_numa_topology # Output EXAMPLE: # # # # # # # # # # ... # # # # # # # # # doc = Document.new `lstopo --no-caches --of xml` topology = {:numa_nodes => [], :supported_hugepages => supported_hugepages, :distances => [["1.0"]]} doc.elements.each('//distances/') do |dist| topology[:distances] = dist.elements.collect{|v| v.attributes['value']} .each_slice(dist.attributes['nbobjs'].to_i).to_a end numa_node = "//object[@type='NUMANode']" element = doc.elements["//object[@type='NUMANode']"] ? numa_node : "//object[@type='Machine']" doc.elements.each(element) do |numa| struct = {:id=> nil, :cpus => [], :memory => nil, :pcidevs => []} struct[:id] = numa.attributes['os_index'].to_i struct[:memory] = numa.attributes['local_memory'].to_i numa.elements.each("#{numa.xpath}//[@type='PU']") do |pu| struct[:cpus] << pu.attributes['os_index'].to_i end numa.elements.each("#{numa.xpath}//[@type='PCIDev']") do |pcidev| struct[:pcidevs] << pcidev.attributes['pci_busid'] end topology[:numa_nodes] << struct end topology rescue => e @logger.error "Something went wrong with parsing lstopo: #{e.backtrace}" nil end def supported_hugepages return [2048, 1048576] if @os[:cpu]['0']['flags'].include?('pdpe1gb') return [2048] if @os[:cpu]['0']['flags'].include?('pse') [] end def _get_lspci_info(device) lspci_path = `which lspci`.chomp if $?.success? data = `#{lspci_path} -vvv -s #{device}` if $?.success? return data else @logger.warn("Can't get data from lspci. Reason: lspci exited with status #{$?.exitstatus}") "" end else @logger.warn("Can't find lspci. Reason: 'which lspci' returned exit status #{$?.exitstatus}") "" end rescue => e @logger.warn("Can't get data from lspci for #{device} slot. Reason: #{e.message}") "" end def update_state @node_state = nil if File.exist?("/etc/nailgun_systemtype") fl = File.open("/etc/nailgun_systemtype", "r") system_type = fl.readline.rstrip @node_state = "discover" if system_type == "bootstrap" end end end def write_data_to_file(logger, filename, data) if File.exist?(filename) File.open(filename, 'r') do |fo| text = fo.read end else text = '' end if text != data begin File.open(filename, 'w') do |fo| fo.write(data) end logger.info("Wrote data to file '#{filename}'. Data: #{data}") rescue Exception => e logger.warning("Can't write data to file '#{filename}'. Reason: #{e.message}") end else logger.info("File '#{filename}' is up to date.") end end logger = Logger.new(STDOUT) if File.exist?('/etc/nailgun_uid') logger.level = Logger::INFO else logger.level = Logger::DEBUG end # random sleep is here to prevent target nodes # from reporting to master node all at once sleep_time = rand(30) logger.debug("Sleep for #{sleep_time} seconds before sending request") sleep(sleep_time) if File.exist?('/etc/nailgun-agent/nodiscover') logger.info("Discover prevented by /etc/nailgun-agent/nodiscover presence.") exit 1 end agent = NodeAgent.new(logger) agent.update_state begin unless File.exist?('/etc/nailgun_uid') resp = agent.post # We must not log 409 as error, after node is provisioned there will be no # /etc/nailgun_uid, it will be created after put request if [409, 403].include? resp.status resp = agent.put end else resp = agent.put # Handle case when node was removed, but nailgun_uid exist if resp.status == 400 resp = agent.post end end unless [201, 200].include? resp.status logger.error resp.body exit 1 end new_id = JSON.parse(resp.body)['id'] mc_config = McollectiveConfig.new(logger) mc_config.replace_identity(new_id) write_data_to_file(logger, '/etc/nailgun_uid', new_id.to_s) rescue => ex # NOTE(mihgen): There is no need to retry - cron will do it for us logger.error "#{ex.message}\n#{ex.backtrace}" end