#!/usr/bin/env ruby # Copyright 2013 Mirantis, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. begin require 'rubygems' rescue LoadError end require 'ohai/system' require 'json' require 'httpclient' require 'logger' require 'optparse' require 'yaml' require 'ipaddr' require 'rethtool' require 'digest' require 'timeout' unless Process.euid == 0 puts "You must be root" exit 1 end ENV['PATH'] = "/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin" AGENT_CONFIG = "/etc/nailgun-agent/config.yaml" # look at https://github.com/torvalds/linux/blob/master/Documentation/devices.txt # KVM virtio volumes has code 252 in CentOS, but 253 in Ubuntu # Please also update the device codes here # https://github.com/stackforge/fuel-astute/blob/master/mcagents/erase_node.rb#L81 STORAGE_CODES = [3, 8, 65, 66, 67, 68, 69, 70, 71, 104, 105, 106, 107, 108, 109, 110, 111, 202, 252, 253] REMOVABLE_VENDORS = [ "Adaptec", "IBM", "ServeRA", ] def digest(body) if body.is_a? Hash digest body.map { |k,v| [digest(k),digest(v)].join("=>") }.sort elsif body.is_a? Array body.map{ |v| digest v }.join('|') else [body.class.to_s, body.to_s].join(":") end end def createsig(body) Digest::SHA1.hexdigest( digest body ) end class McollectiveConfig def initialize(logger) @logger = logger @configfile = '/etc/mcollective/server.cfg' end def get_config_by_key(find_key) found_key = nil found_value = nil # This code is from mcollective's sources File.open(@configfile, "r").each do |line| # strip blank spaces, tabs etc off the end of all lines line.gsub!(/\s*$/, "") unless line =~ /^#|^$/ if line =~ /(.+?)\s*=\s*(.+)/ key = $1 val = $2 if key == find_key found_key = key found_value = val end end end end found_value if found_key end def replace_identity(new_id) # check if id complies reqs raise 'Identities can only match /\w\.\-/' unless new_id.to_s.match(/^[\w\.\-]+$/) value_from_config = get_config_by_key('identity') if value_from_config == new_id.to_s @logger.info "MCollective is up to date with identity = #{new_id}" else config = File.open(@configfile, "rb").read if value_from_config # Key found, but it has other value @logger.info "Replacing identity in mcollective server.cfg to new value = '#{new_id}'" config.gsub!(/^identity[ =].*$/, "identity = #{new_id}") File.open(@configfile, "w") { |f| f.write(config) } else # if key was not found config += "\nidentity = #{new_id}\n" @logger.info "Identity in mcollective server.cfg has not been found. Setting to '#{new_id}'" File.open(@configfile, "w") { |f| f.write(config) } end puts `service mcollective restart` end end end class Offloading def initialize(name, sub) @name, @sub = name, sub end def to_json(options = {}) {'name' => @name, 'state' => nil, 'sub' => @sub}.to_json() end end class NodeAgent def initialize(logger, url=nil) @logger = logger @api_default_address = "localhost" @api_default_port = "8443" @api_legacy_port = "8000" @api_url = url if @api_url @api_url.chomp!('/') @api_ip = @api_url.match(/\bhttp:\/\/((\d{1,3}\.){3}\d{1,3})/)[1] else begin cmdline = ::File.read("/proc/cmdline") @api_ip = cmdline.match(/\burl=http:\/\/((\d{1,3}\.){3}\d{1,3})/)[1] @logger.info("Found admin node IP address in kernel cmdline: #{@api_ip}") rescue @logger.info("Can't get API url from /proc/cmdline. Will use localhost.") @api_ip = "127.0.0.1" end begin res = htclient.get("https://#{@api_ip}:#{@api_default_port}/") @scheme = "https" @api_port = @api_default_port rescue Errno::ECONNREFUSED @logger.warn("Connection Refused catched when trying connect to HTTPS port. Use plain HTTP") @scheme = "http" @api_port = @api_legacy_port end @api_url = "#{@scheme}://#{@api_ip}:#{@api_port}/api" end @logger.info("API URL is #{@api_url}") @os = ohai_system_info end def ohai_system_info Timeout::timeout(30) do os = Ohai::System.new() os.all_plugins os end rescue Timeout::Error # When one of disks is broken, do not collect data about block devices # More details: https://bugs.launchpad.net/fuel/+bug/1396086 Ohai::Config[:disabled_plugins]=['linux::block_device', 'linux::filesystem'] os = Ohai::System.new() os.all_plugins os end def put headers = {"Content-Type" => "application/json"} @logger.debug("Trying to put host info into #{@api_url}") res = htclient.put("#{@api_url}/nodes/agent/", _data.to_json, headers) @logger.debug("Response: status: #{res.status} body: #{res.body}") if res.status < 200 or res.status >= 400 @logger.error("HTTP PUT failed: #{res.inspect}") end res end def post headers = {"Content-Type" => "application/json"} @logger.debug("Trying to create host using #{@api_url}") res = htclient.post("#{@api_url}/nodes/", _data.to_json, headers) @logger.debug("Response: status: #{res.status} body: #{res.body}") res end def htclient client = HTTPClient.new client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE client.ssl_config.ssl_version = :TLSv1 client.connect_timeout = 10 client.send_timeout = 10 client.receive_timeout = 10 # (mihgen): Nailgun may hang for a while, but 10sec should be enough for him to respond client end def _interfaces interfaces = @os[:network][:interfaces].inject([]) do |result, elm| result << { :name => elm[0], :addresses => elm[1]["addresses"] } end interfaces << { "default_interface" => @os["network"]["default_interface"] } interfaces << { "default_gateway" => @os["network"]["default_gateway"] } interfaces end # transform input array into array of the objects # Example: # [{ # "state":null, # "sub":[ # { # "state":null, # "sub":[], # "name":"tx-checksum-ipv6" # }, # ........... # ], # "name":"tx-checksumming" # }, # { # "state":null, # "sub":[], # "name":"generic-segmentation-offload" # }, # ............. # ] def _parse_offloading(offloading_arr) return [] if offloading_arr.empty? inner = [] current = offloading_arr.shift() while offloading_arr.any? && offloading_arr.first().start_with?("\t") do inner << offloading_arr.shift()[1..-1] end res = _parse_offloading(offloading_arr) res << Offloading.new(current, _parse_offloading(inner)) end def _detailed detailed_meta = { :system => _system_info, :interfaces => [], :cpu => { :total => (@os[:cpu][:total].to_i rescue nil), :real => (@os[:cpu][:real].to_i rescue nil), :spec => [], }, :disks => [], :memory => (_dmi_memory or _ohai_memory), } admin_mac = (_master_ip_and_mac[:mac] or @os[:macaddress]) rescue nil begin (@os[:network][:interfaces] or {} rescue {}).each do |int, intinfo| # Send info about physical interfaces only next if intinfo[:type] !~ /^eth.*/ # Exception: eth0.0(example) have "type" => "eth" but it is not physical interface next if int =~ /\d+\.\d+$/ or int =~ /vlan\d+$/ # Remove interfaces like eth0.101-hapr, eth1-hapr next if int =~ /\d+-.+/ int_meta = {:name => int} int_meta[:state] = intinfo[:state] (intinfo[:addresses] or {} rescue {}).each do |addr, addrinfo| if (addrinfo[:family] rescue nil) =~ /lladdr/ int_meta[:mac] = addr int_meta[:pxe] = admin_mac == int_meta[:mac] begin int_info = Rethtool::InterfaceSettings.new(int) int_meta[:driver] = int_info.driver int_meta[:bus_info] = int_info.bus_info int_meta[:max_speed] = int_info.best_mode.speed if int_info.current_mode.speed == :unknown int_meta[:current_speed] = nil else int_meta[:current_speed] = int_info.current_mode.speed end rescue int_meta[:current_speed] = nil end elsif (addrinfo[:family] rescue nil) =~ /^inet$/ int_meta[:ip] = addr int_meta[:netmask] = addrinfo[:netmask] if addrinfo[:netmask] end end begin # this stuff will put all non-fixed offloading mode into array # collect names of non-fixed offloading modes # Example of ethtool -k ethX output: # tx-checksumming: on # tx-checksum-ipv4: on # tx-checksum-ip-generic: off [fixed] # tx-checksum-ipv6: on # tx-checksum-fcoe-crc: off [fixed] # tx-checksum-sctp: on # scatter-gather: on # tx-scatter-gather: on # tx-scatter-gather-fraglist: off [fixed] # generic-segmentation-offload: on offloading_data = `ethtool -k #{int}`.split("\n").reject { |offloading| offloading.include?("Features for") || offloading.include?("fixed") }.map { |offloading| offloading.split(':')[0] } # transform raw data into array of objects int_meta[:offloading_modes] = _parse_offloading(offloading_data) rescue # in case if we have no `ethtool` package installed we should # return empty array to support nailgun's rest api call int_meta[:offloading_modes] = [] end detailed_meta[:interfaces] << int_meta end rescue Exception => e @logger.error("Error '#{e.message}' in gathering interfaces metadata: #{e.backtrace}") end begin (@os[:cpu] or {} rescue {}).each do |cpu, cpuinfo| if cpu =~ /^[\d]+/ and cpuinfo frequency = cpuinfo[:mhz].to_i rescue nil begin # ohai returns current frequency, try to get max if possible max_frequency = `cat /sys/devices/system/cpu/cpu#{cpu}/cpufreq/cpuinfo_max_freq 2>/dev/null`.to_i / 1000 frequency = max_frequency if max_frequency > 0 rescue end detailed_meta[:cpu][:spec] << { :frequency => frequency, :model => (cpuinfo[:model_name].gsub(/ +/, " ") rescue nil) } end end rescue Exception => e @logger.error("Error '#{e.message}' in gathering cpu metadata: #{e.backtrace}") end begin Timeout::timeout(30) do @logger.debug("Trying to find block devices") (@os[:block_device] or {} rescue {}).each do |bname, binfo| @logger.debug("Found block device: #{bname}") @logger.debug("Block device info: #{binfo.inspect}") if physical_data_storage_devices.map{|d| d[:name]}.include?(bname) && binfo @logger.debug("Block device seems to be physical data storage: #{bname}") block = physical_data_storage_devices.select{|d| d[:name] == bname}[0] if block[:removable] =~ /^1$/ && ! REMOVABLE_VENDORS.include?(binfo[:vendor]) next end dname = bname.gsub(/!/, '/') # 512 bytes is the size of one sector by default block_size = 512 fn = "/sys/block/#{bname}/queue/logical_block_size" block_size = File.read(fn).to_i if File.exist? fn block_size = 512 if block_size == 0 detailed_meta[:disks] << { :name => dname, :model => binfo[:model], :size => (binfo[:size].to_i * block_size), :disk => block[:disk], :extra => block[:extra], :removable => block[:removable] } end end @logger.debug("Detailed meta disks: #{detailed_meta[:disks].inspect}") end rescue Exception => e @logger.error("Error '#{e.message}' in gathering disks metadata: #{e.backtrace}") end detailed_meta end def _disk_id_by_name(name) dn = "/dev/disk/by-id" basepath = Dir["#{dn}/**?"].select{|f| /\/#{name}$/.match(File.readlink(f))} basepath.map{|p| p.split("/")[2..-1].join("/")} end def _disk_path_by_name(name) dn = "/dev/disk/by-path" basepath = Dir["#{dn}/**?"].find{|f| /\/#{name}$/.match(File.readlink(f))} basepath.split("/")[2..-1].join("/") if basepath end def physical_data_storage_devices @blocks ||= [] return @blocks unless @blocks.empty? @logger.debug("Trying to get list of physical devices") raise "Path /sys/block does not exist" unless File.exists?("/sys/block") Dir["/sys/block/*"].each do |block_device_dir| basename_dir = File.basename(block_device_dir) # Entries in /sys/block for cciss look like cciss!c0d1 while # the entries in /dev look like /dev/cciss/c0d1. udevadm uses # the entry in /dev so we need to replace the ! to get a valid # device name. devname = basename_dir.gsub(/!/, '/') @logger.debug("Getting udev properties for device: #{devname}") properties = `udevadm info --query=property --export --name=#{devname}`.split("\n").inject({}) do |result, raw_propety| key, value = raw_propety.split(/\=/) result.update(key.strip => value.strip.chomp("'").reverse.chomp("'").reverse) end @logger.debug("Device #{devname} udev properties: #{properties.inspect}") @logger.debug("Trying to find out if device #{devname} is removable or not") if File.exists?("/sys/block/#{basename_dir}/removable") removable = File.open("/sys/block/#{basename_dir}/removable"){ |f| f.read_nonblock(1024).strip } end @logger.debug("Device #{devname} removable parameter: #{removable.inspect}") if STORAGE_CODES.include?(properties['MAJOR'].to_i) @logger.debug("Device #{devname} seems to be appropriate") # Exclude LVM volumes (in CentOS - 253, in Ubuntu - 252) using additional check unless properties['DEVPATH'].include?('virtual') @blocks << { :name => basename_dir, :disk => _disk_path_by_name(devname) || devname, :extra => _disk_id_by_name(devname) || [], :removable => removable, } end end end @logger.debug("Final list of physical devices is: #{@blocks.inspect}") @blocks end def _is_virtualbox @os[:dmi][:system][:product_name] == "VirtualBox" rescue false end def _is_virtual _is_virtualbox or @os[:virtualization][:role] == "guest" rescue false end def _manufacturer if _is_virtualbox @os[:dmi][:system][:product_name] rescue nil elsif _is_virtual @os[:virtualization][:system].upcase.strip rescue nil else @os[:dmi][:system][:manufacturer].strip rescue nil end end def _product_name unless _is_virtual @os[:dmi][:system][:product_name].strip rescue nil end end def _serial @os[:dmi][:system][:serial_number].strip rescue nil end # Returns unique identifier of machine # * for kvm virtual node will contain virsh UUID # * for physical HW that would be unique chassis id (from BIOS settings) # * for other hypervizors - not tested def uuid node_uuid = @os.data.fetch(:dmi, {}).fetch(:system, {}).fetch(:uuid, nil) node_uuid && node_uuid.strip end def _system_info { :manufacturer => _manufacturer, :serial => _serial, :uuid => uuid, :product => _product_name, :family => (@os[:dmi][:system][:family].strip rescue nil), :version => (@os[:dmi][:system][:version].strip rescue nil), :fqdn => (@os[:fqdn].strip rescue @os[:hostname].strip rescue nil), }.delete_if { |key, value| value.nil? or value.empty? or value == "Not Specified" } end def _size(size, unit) case unit when /^kb$/i size * 1024 when /^mb$/i size * 1048576 when /^gb$/i size * 1073741824 end end def _dmi_memory dmi = `/usr/sbin/dmidecode` info = {:devices => [], :total => 0, :maximum_capacity => 0, :slots => 0} return nil if $?.to_i != 0 dmi.split(/\n\n/).each do |group| if /^Physical Memory Array$/.match(group) if /^\s*Maximum Capacity:\s+(\d+)\s+(mb|gb|kb)/i.match(group) info[:maximum_capacity] += _size($1.to_i, $2) end if /^\s*Number Of Devices:\s+(\d+)/i.match(group) info[:slots] += $1.to_i end elsif /^Memory Device$/.match(group) device_info = {} if /^\s*Size:\s+(\d+)\s+(mb|gb|kb)/i.match(group) size = _size($1.to_i, $2) device_info[:size] = size info[:total] += size else next end if /^\s*Speed:\s+(\d+)\s+MHz/i.match(group) device_info[:frequency] = $1.to_i end if /^\s*Type:\s+(.*?)$/i.match(group) device_info[:type] = $1 end #if /^\s*Locator:\s+(.*?)$/i.match(group) # device_info[:locator] = $1 #end info[:devices].push(device_info) end end if info[:total] == 0 nil else info end end def _ohai_memory info = {} size = @os['memory']['total'].gsub(/(kb|mb|gb)$/i, "").to_i rescue (return nil) info[:total] = _size(size, $1) info end def _master_ip_and_mac @os[:network][:interfaces].each do |_, intinfo| next unless intinfo.has_key?(:addresses) intinfo[:addresses].each do |k, v| # Here we need to check family because IPAddr.new with bad # data works very slow on some environments # https://bugs.launchpad.net/fuel/+bug/1284571 if v[:family] == 'inet' && !(IPAddr.new(k) rescue nil).nil? net = IPAddr.new("#{k}/#{v[:netmask]}") if net.include? @api_ip mac = intinfo[:addresses].find { |_, info| info[:family] == 'lladdr' }[0] return {:ip => k, :mac => mac} end end end end {} end def _data res = { :mac => (@os[:macaddress] rescue nil), :ip => (@os[:ipaddress] rescue nil), :os_platform => (@os[:platform] rescue nil), } begin detailed_data = _detailed master_data=_master_ip_and_mac res.merge!({ :ip => (( master_data[:ip] or @os[:ipaddress]) rescue nil), :mac => (( master_data[:mac] or @os[:macaddress]) rescue nil), :manufacturer => _manufacturer, :platform_name => _product_name, :meta => detailed_data }) rescue Exception => e @logger.error("Error '#{e.message}' in metadata calculation: #{e.backtrace}") end res[:status] = @node_state if @node_state res[:is_agent] = true res[:agent_checksum] = createsig(res) res end def update_state @node_state = nil if File.exist?("/etc/nailgun_systemtype") fl = File.open("/etc/nailgun_systemtype", "r") system_type = fl.readline.rstrip @node_state = "discover" if system_type == "bootstrap" end end end def write_data_to_file(logger, filename, data) if File.exist?(filename) File.open(filename, 'r') do |fo| text = fo.read end else text = '' end if text != data begin File.open(filename, 'w') do |fo| fo.write(data) end logger.info("Wrote data to file '#{filename}'. Data: #{data}") rescue Exception => e logger.warning("Can't write data to file '#{filename}'. Reason: #{e.message}") end else logger.info("File '#{filename}' is up to date.") end end logger = Logger.new(STDOUT) if File.exist?('/etc/nailgun_uid') logger.level = Logger::INFO else logger.level = Logger::DEBUG end # random sleep is here to prevent target nodes # from reporting to master node all at once sleep_time = rand(30) logger.debug("Sleep for #{sleep_time} seconds before sending request") sleep(sleep_time) if File.exist?('/etc/nailgun-agent/nodiscover') logger.info("Discover prevented by /etc/nailgun-agent/nodiscover presence.") exit 1 end begin logger.info("Trying to load agent config #{AGENT_CONFIG}") url = YAML.load_file(AGENT_CONFIG)['url'] logger.info("Obtained service url from config file: '#{url}'") rescue Exception => e logger.info("Could not get url from configuration file: #{e.message}, trying other ways..") end agent = NodeAgent.new(logger, url) agent.update_state begin unless File.exist?('/etc/nailgun_uid') resp = agent.post # We must not log 409 as error, after node is provisioned there will be no # /etc/nailgun_uid, it will be created after put request if [409, 403].include? resp.status resp = agent.put end else resp = agent.put # Handle case when node was removed, but nailgun_uid exist if resp.status == 400 resp = agent.post end end unless [201, 200].include? resp.status logger.error resp.body exit 1 end new_id = JSON.parse(resp.body)['id'] mc_config = McollectiveConfig.new(logger) mc_config.replace_identity(new_id) write_data_to_file(logger, '/etc/nailgun_uid', new_id.to_s) rescue => ex # NOTE(mihgen): There is no need to retry - cron will do it for us logger.error "#{ex.message}\n#{ex.backtrace}" end