a1a121fbf8
In some rare case, such as a VM with virtual numa node, nics might not be in a numa node and this breaks numa-topology discovery. Change-Id: I52f52119a5f3175b1723fde291eb26d4f86c8223 Story: 2007105 Task: 38151
266 lines
10 KiB
Python
266 lines
10 KiB
Python
# Copyright 2017 Red Hat, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import os
|
|
|
|
from oslo_log import log
|
|
import pint
|
|
|
|
from ironic_python_agent import errors
|
|
|
|
LOG = log.getLogger(__name__)
|
|
|
|
UNIT_CONVERTER = pint.UnitRegistry(filename=None)
|
|
UNIT_CONVERTER.define('kB = []')
|
|
UNIT_CONVERTER.define('KB = []')
|
|
UNIT_CONVERTER.define('MB = 1024 KB')
|
|
UNIT_CONVERTER.define('GB = 1048576 KB')
|
|
|
|
|
|
def get_numa_node_id(numa_node_dir):
|
|
"""Provides the NUMA node id from NUMA node directory
|
|
|
|
:param numa_node_dir: NUMA node directory
|
|
:raises: IncompatibleNumaFormatError: when unexpected format data
|
|
in NUMA node dir
|
|
|
|
:return: NUMA node id
|
|
"""
|
|
try:
|
|
return int(os.path.basename(numa_node_dir)[4:])
|
|
except (IOError, ValueError, IndexError) as exc:
|
|
msg = ('Failed to get NUMA node id for %(node)s: '
|
|
'%(error)s' % {'node': numa_node_dir, 'error': exc})
|
|
raise errors.IncompatibleNumaFormatError(msg)
|
|
|
|
|
|
def get_nodes_memory_info(numa_node_dirs):
|
|
"""Collect the NUMA nodes memory information.
|
|
|
|
The information is returned in the form of::
|
|
|
|
"ram": [{"numa_node": <numa_node_id>, "size_kb": <memory_in_kb>}, ...]
|
|
|
|
:param numa_node_dirs: A list of NUMA node directories
|
|
:raises: IncompatibleNumaFormatError: when unexpected format data
|
|
in NUMA node
|
|
|
|
:return: A list of memory information with NUMA node id
|
|
"""
|
|
ram = []
|
|
for numa_node_dir in numa_node_dirs:
|
|
numa_node_memory = {}
|
|
numa_node_id = get_numa_node_id(numa_node_dir)
|
|
try:
|
|
with open(os.path.join(numa_node_dir,
|
|
'meminfo')) as meminfo_file:
|
|
for line in meminfo_file:
|
|
if 'MemTotal' in line:
|
|
break
|
|
else:
|
|
msg = ('Memory information is not available for '
|
|
'%(node)s' % {'node': numa_node_dir})
|
|
raise errors.IncompatibleNumaFormatError(msg)
|
|
except IOError as exc:
|
|
msg = ('Failed to get memory information '
|
|
'for %(node)s: %(error)s' %
|
|
{'node': numa_node_dir, 'error': exc})
|
|
raise errors.IncompatibleNumaFormatError(msg)
|
|
try:
|
|
# To get memory size with unit from memory info line
|
|
# Memory info sample line format 'Node 0 MemTotal: 1560000 kB'
|
|
value = line.split(":")[1].strip()
|
|
memory_kb = int(UNIT_CONVERTER(value).to_base_units().magnitude)
|
|
except (ValueError, IndexError, pint.UndefinedUnitError) as exc:
|
|
msg = ('Failed to get memory information for %(node)s: '
|
|
'%(error)s' % {'node': numa_node_dir, 'error': exc})
|
|
raise errors.IncompatibleNumaFormatError(msg)
|
|
numa_node_memory['numa_node'] = numa_node_id
|
|
numa_node_memory['size_kb'] = memory_kb
|
|
LOG.debug('Found memory available %d KB in NUMA node %d',
|
|
memory_kb, numa_node_id)
|
|
ram.append(numa_node_memory)
|
|
return ram
|
|
|
|
|
|
def get_nodes_cores_info(numa_node_dirs):
|
|
"""Collect the NUMA nodes cpu's and thread's information.
|
|
|
|
NUMA nodes path: /sys/devices/system/node/node<node_id>
|
|
|
|
Thread dirs path: /sys/devices/system/node/node<node_id>/cpu<thread_id>
|
|
|
|
CPU id file path: /sys/devices/system/node/node<node_id>/cpu<thread_id>/
|
|
topology/core_id
|
|
|
|
The information is returned in the form of::
|
|
|
|
"cpus": [
|
|
{
|
|
"cpu": <cpu_id>, "numa_node": <numa_node_id>,
|
|
"thread_siblings": [<list of sibling threads>]
|
|
},
|
|
...,
|
|
]
|
|
|
|
:param numa_node_dirs: A list of NUMA node directories
|
|
:raises: IncompatibleNumaFormatError: when unexpected format data
|
|
in NUMA node
|
|
|
|
:return: A list of cpu information with NUMA node id and thread siblings
|
|
"""
|
|
dict_cpus = {}
|
|
for numa_node_dir in numa_node_dirs:
|
|
numa_node_id = get_numa_node_id(numa_node_dir)
|
|
try:
|
|
thread_dirs = os.listdir(numa_node_dir)
|
|
except OSError as exc:
|
|
msg = ('Failed to get list of threads for %(node)s: '
|
|
'%(error)s' % {'node': numa_node_dir, 'error': exc})
|
|
raise errors.IncompatibleNumaFormatError(msg)
|
|
for thread_dir in thread_dirs:
|
|
if (not os.path.isdir(os.path.join(numa_node_dir, thread_dir))
|
|
or not thread_dir.startswith("cpu")):
|
|
continue
|
|
try:
|
|
thread_id = int(thread_dir[3:])
|
|
except (ValueError, IndexError) as exc:
|
|
msg = ('Failed to get cores information for '
|
|
'%(node)s: %(error)s' %
|
|
{'node': numa_node_dir, 'error': exc})
|
|
raise errors.IncompatibleNumaFormatError(msg)
|
|
try:
|
|
with open(os.path.join(numa_node_dir, thread_dir, 'topology',
|
|
'core_id')) as core_id_file:
|
|
cpu_id = int(core_id_file.read().strip())
|
|
except (IOError, ValueError) as exc:
|
|
msg = ('Failed to gather cpu_id for thread'
|
|
'%(thread)s NUMA node %(node)s: %(error)s' %
|
|
{'thread': thread_dir, 'node': numa_node_dir,
|
|
'error': exc})
|
|
raise errors.IncompatibleNumaFormatError(msg)
|
|
# CPU and NUMA node together forms a unique value, as cpu_id is
|
|
# specific to a NUMA node
|
|
# NUMA node id and cpu id tuple is used for unique key
|
|
dict_key = numa_node_id, cpu_id
|
|
if dict_key in dict_cpus:
|
|
if thread_id not in dict_cpus[dict_key]['thread_siblings']:
|
|
dict_cpus[dict_key]['thread_siblings'].append(thread_id)
|
|
else:
|
|
cpu_item = {}
|
|
cpu_item['thread_siblings'] = [thread_id]
|
|
cpu_item['cpu'] = cpu_id
|
|
cpu_item['numa_node'] = numa_node_id
|
|
dict_cpus[dict_key] = cpu_item
|
|
LOG.debug('Found a thread sibling %d for CPU %d in NUMA node %d',
|
|
thread_id, cpu_id, numa_node_id)
|
|
return list(dict_cpus.values())
|
|
|
|
|
|
def get_nodes_nics_info(nic_device_path):
|
|
"""Collect the NUMA nodes nics information.
|
|
|
|
The information is returned in the form of::
|
|
|
|
"nics": [
|
|
{"name": "<network interface name>",
|
|
"numa_node": <numa_node_id>},
|
|
...,
|
|
]
|
|
|
|
:param nic_device_path: nic device directory path
|
|
:raises: IncompatibleNumaFormatError: when unexpected format data
|
|
in NUMA node
|
|
|
|
:return: A list of nics information with NUMA node id
|
|
"""
|
|
nics = []
|
|
if not os.path.isdir(nic_device_path):
|
|
msg = ('Failed to get list of NIC\'s, NIC device path '
|
|
'does not exist: %(nic_device_path)s' %
|
|
{'nic_device_path': nic_device_path})
|
|
raise errors.IncompatibleNumaFormatError(msg)
|
|
for nic_dir in os.listdir(nic_device_path):
|
|
if not os.path.isfile(os.path.join(nic_device_path,
|
|
nic_dir, 'device', 'numa_node')):
|
|
continue
|
|
try:
|
|
with open(os.path.join(nic_device_path, nic_dir, 'device',
|
|
'numa_node')) as nicsinfo_file:
|
|
numa_node_id = int(nicsinfo_file.read().strip())
|
|
except (IOError, ValueError) as exc:
|
|
msg = ('Failed to gather NIC\'s for NUMA node %(node)s: '
|
|
'%(error)s' % {'node': nic_dir, 'error': exc})
|
|
raise errors.IncompatibleNumaFormatError(msg)
|
|
numa_node_nics = {}
|
|
numa_node_nics['name'] = nic_dir
|
|
numa_node_nics['numa_node'] = numa_node_id
|
|
LOG.debug('Found a NIC %s in NUMA node %d', nic_dir,
|
|
numa_node_id)
|
|
nics.append(numa_node_nics)
|
|
return nics
|
|
|
|
|
|
def collect_numa_topology_info(data, failures):
|
|
"""Collect the NUMA topology information.
|
|
|
|
The data is gathered from /sys/devices/system/node/node<X> and
|
|
/sys/class/net/ directories. The information is collected in the form of::
|
|
|
|
{
|
|
"numa_topology": {
|
|
"ram": [{"numa_node": <numa_node_id>, "size_kb": <memory_in_kb>},
|
|
...],
|
|
"cpus": [
|
|
{
|
|
"cpu": <cpu_id>, "numa_node": <numa_node_id>,
|
|
"thread_siblings": [<list of sibling threads>]
|
|
},
|
|
...,
|
|
],
|
|
"nics": [
|
|
{"name": "<network interface name>", "numa_node": <numa_node_id>},
|
|
...,
|
|
]
|
|
}
|
|
}
|
|
|
|
:param data: mutable data that we'll send to inspector
|
|
:param failures: AccumulatedFailures object
|
|
|
|
:return: None
|
|
"""
|
|
numa_node_path = '/sys/devices/system/node/'
|
|
nic_device_path = '/sys/class/net/'
|
|
numa_info = {}
|
|
numa_node_dirs = []
|
|
if not os.path.isdir(numa_node_path):
|
|
LOG.warning('Failed to get list of NUMA nodes, NUMA node path '
|
|
'does not exist: %s', numa_node_path)
|
|
return
|
|
for numa_node_dir in os.listdir(numa_node_path):
|
|
numa_node_dir_path = os.path.join(numa_node_path, numa_node_dir)
|
|
if (os.path.isdir(numa_node_dir_path)
|
|
and numa_node_dir.startswith("node")):
|
|
numa_node_dirs.append(numa_node_dir_path)
|
|
try:
|
|
numa_info['ram'] = get_nodes_memory_info(numa_node_dirs)
|
|
numa_info['cpus'] = get_nodes_cores_info(numa_node_dirs)
|
|
numa_info['nics'] = get_nodes_nics_info(nic_device_path)
|
|
except errors.IncompatibleNumaFormatError as exc:
|
|
LOG.warning('Failed to get some NUMA information (%s)', exc)
|
|
return
|
|
data['numa_topology'] = numa_info
|