Browse Source

Merge "NUMA topology discovering"

Jenkins 3 years ago
parent
commit
6ae1bb9c64
1 changed files with 50 additions and 0 deletions
  1. 50
    0
      agent

+ 50
- 0
agent View File

@@ -31,6 +31,8 @@ require 'timeout'
31 31
 require 'uri'
32 32
 # TODO(vsharshov): replace below lines by this string after excluding Ruby 1.8
33 33
 require 'pathname'
34
+require 'rexml/document'
35
+include REXML
34 36
 
35 37
 unless Process.euid == 0
36 38
   puts "You must be root"
@@ -147,6 +149,7 @@ class NodeAgent
147 149
     @api_url = "#{scheme}://#{@api_ip}:#{api_port}/api"
148 150
     @logger.info("API URL is #{@api_url}")
149 151
     @os = ohai_system_info
152
+    @numa_topology = get_numa_topology
150 153
   end
151 154
 
152 155
   def get_scheme_and_port
@@ -276,6 +279,7 @@ class NodeAgent
276 279
       :disks => [],
277 280
       :memory => (_dmi_memory or _ohai_memory),
278 281
       :pci_devices => _get_pci_dev_list,
282
+      :numa_topology => @numa_topology,
279 283
     }
280 284
 
281 285
     admin_mac = (_master_ip_and_mac[:mac] or @os[:macaddress]) rescue nil
@@ -808,6 +812,52 @@ class NodeAgent
808 812
     @logger.warn("Can't get data from lshw. Reason: #{e.message}")
809 813
   end
810 814
 
815
+  def get_numa_topology
816
+    # Output EXAMPLE:
817
+    # <distances nbobjs="2" relative_depth="1" latency_base="10.000000">
818
+    #   <latency value="1.000000"/>
819
+    #   <latency value="2.100000"/>
820
+    #   <latency value="2.100000"/>
821
+    #   <latency value="1.000000"/>
822
+    # </distances>
823
+    # <object type="NUMANode" os_index="0" cpuset="0x3ff003ff" complete_cpuset="0x3ff003ff" online_cpuset="0x3ff003ff" allowed_cpuset="0x3ff003ff" nodeset="0x00000001" complete_nodeset="0x00000001" allowed_nodeset="0x00000001" local_memory="67452473344">
824
+    #   <page_type size="4096" count="14370737"/>
825
+    #   <page_type size="1073741824" count="8"/>
826
+    doc = Document.new `lstopo --no-caches --of xml`
827
+
828
+    topology = {:numa_nodes => [], :supported_hugepages => supported_hugepages, :distances => nil}
829
+
830
+    doc.elements.each('/topology/object/distances/') do |dist|
831
+      topology[:distances] = dist.elements.collect{|v| v.attributes['value']}
832
+        .each_slice(dist.attributes['nbobjs'].to_i).to_a
833
+    end
834
+
835
+    numa_node = "/topology/object/object[@type='NUMANode']"
836
+    element = doc.elements[numa_node] ? numa_node : "/topology/object[@type='Machine']"
837
+
838
+    doc.elements.each(element) do |numa|
839
+
840
+      struct = {:id=> nil, :cpus => [], :memory => nil}
841
+      struct[:id] = numa.attributes['os_index'].to_i
842
+      struct[:memory] = numa.attributes['local_memory'].to_i
843
+
844
+      numa.elements.each("object/object/object[@type='PU']") do |pu|
845
+        struct[:cpus] << pu.attributes['os_index'].to_i
846
+      end
847
+      topology[:numa_nodes] << struct
848
+    end
849
+    topology
850
+  rescue e
851
+    logger.error "Something went wrong with parsing lstopo: #{e.backtrace}"
852
+    nil
853
+  end
854
+
855
+  def supported_hugepages
856
+      return [2048, 1048576] if @os[:cpu]['0']['flags'].include?('pdpe1gb')
857
+      return [2048] if @os[:cpu]['0']['flags'].include?('pse')
858
+      []
859
+  end
860
+
811 861
   def update_state
812 862
     @node_state = nil
813 863
     if File.exist?("/etc/nailgun_systemtype")

Loading…
Cancel
Save