config/sysinv/sysinv/sysinv/sysinv/agent/node.py

#
# Copyright (c) 2013-2016 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#

# vim: tabstop=4 shiftwidth=4 softtabstop=4

# All Rights Reserved.
#

""" inventory numa node Utilities and helper functions."""

import os
from os import listdir
from os.path import isfile, join
import re
import subprocess

from sysinv.openstack.common import log as logging
import tsconfig.tsconfig as tsc

LOG = logging.getLogger(__name__)

# Defines per-socket vswitch memory requirements (in MB)
VSWITCH_MEMORY_MB = 1024

# Defines the size of one kilobyte
SIZE_KB = 1024

# Defines the size of 2 megabytes in kilobyte units
SIZE_2M_KB = 2048

# Defines the size of 1 gigabyte in kilobyte units
SIZE_1G_KB = 1048576

# Defines the size of 2 megabytes in megabyte units
SIZE_2M_MB = int(SIZE_2M_KB / SIZE_KB)

# Defines the size of 1 gigabyte in megabyte units
SIZE_1G_MB = int(SIZE_1G_KB / SIZE_KB)

# Defines the minimum size of memory for a controller node in megabyte units
CONTROLLER_MIN_MB = 6000

# Defines the minimum size of memory for a worker node in megabyte units
COMPUTE_MIN_MB = 1600

# Defines the minimum size of memory for a secondary worker node in megabyte
# units
COMPUTE_MIN_NON_0_MB = 500


class CPU:
    '''Class to encapsulate CPU data for System Inventory'''

    def __init__(self, cpu, numa_node, core, thread,
                 cpu_family=None, cpu_model=None, revision=None):
        '''Construct a Icpu object with the given values.'''

        self.cpu = cpu
        self.numa_node = numa_node
        self.core = core
        self.thread = thread
        self.cpu_family = cpu_family
        self.cpu_model = cpu_model
        self.revision = revision
        # self.allocated_functions = mgmt (usu. 0), vswitch

    def __eq__(self, rhs):
        return (self.cpu == rhs.cpu and
                self.numa_node == rhs.numa_node and
                self.core == rhs.core and
                self.thread == rhs.thread)

    def __ne__(self, rhs):
        return (self.cpu != rhs.cpu or
                self.numa_node != rhs.numa_node or
                self.core != rhs.core or
                self.thread != rhs.thread)

    def __str__(self):
        return "%s [%s] [%s] [%s]" % (self.cpu, self.numa_node,
                                      self.core, self.thread)

    def __repr__(self):
        return "<CPU '%s'>" % str(self)


class NodeOperator(object):
    '''Class to encapsulate CPU operations for System Inventory'''

    def __init__(self):

        self.num_cpus = 0
        self.num_nodes = 0
        self.float_cpuset = 0
        self.total_memory_mb = 0
        self.free_memory_mb = 0
        self.total_memory_nodes_mb = []
        self.free_memory_nodes_mb = []
        self.topology = {}

        # self._get_cpu_topology()
        # self._get_total_memory_mb()
        # self._get_total_memory_nodes_mb()
        # self._get_free_memory_mb()
        # self._get_free_memory_nodes_mb()

    def _is_strict(self):
        with open(os.devnull, "w") as fnull:
            try:
                output = subprocess.check_output(
                    ["cat", "/proc/sys/vm/overcommit_memory"],
                    stderr=fnull)
                if int(output) == 2:
                    return True
            except subprocess.CalledProcessError as e:
                LOG.info("Failed to check for overcommit, error (%s)",
                         e.output)
        return False

    def convert_range_string_to_list(self, s):
        olist = []
        s = s.strip()
        if s:
            for part in s.split(','):
                if '-' in part:
                    a, b = part.split('-')
                    a, b = int(a), int(b)
                    olist.extend(range(a, b + 1))
                else:
                    a = int(part)
                    olist.append(a)
        olist.sort()
        return olist

    def inodes_get_inumas_icpus(self):
        '''Enumerate logical cpu topology based on parsing /proc/cpuinfo
           as function of socket_id, core_id, and thread_id. This updates
           topology.

        :param self
        :updates self.num_cpus- number of logical cpus
        :updates self.num_nodes- number of sockets;maps to number of numa nodes
        :updates self.topology[socket_id][core_id][thread_id] = cpu
        :returns None
        '''
        self.num_cpus = 0
        self.num_nodes = 0
        self.topology = {}

        thread_cnt = {}
        cpu = socket_id = core_id = thread_id = -1
        re_processor = re.compile(r'^[Pp]rocessor\s+:\s+(\d+)')
        re_socket = re.compile(r'^physical id\s+:\s+(\d+)')
        re_core = re.compile(r'^core id\s+:\s+(\d+)')
        re_cpu_family = re.compile(r'^cpu family\s+:\s+(\d+)')
        re_cpu_model = re.compile(r'^model name\s+:\s+(\w+)')

        inumas = []
        icpus = []
        sockets = []

        with open('/proc/cpuinfo', 'r') as infile:
            icpu_attrs = {}

            for line in infile:
                match = re_processor.search(line)
                if match:
                    cpu = int(match.group(1))
                    socket_id = -1
                    core_id = -1
                    thread_id = -1
                    self.num_cpus += 1
                    continue

                match = re_cpu_family.search(line)
                if match:
                    name_value = [s.strip() for s in line.split(':', 1)]
                    name, value = name_value
                    icpu_attrs.update({'cpu_family': value})
                    continue

                match = re_cpu_model.search(line)
                if match:
                    name_value = [s.strip() for s in line.split(':', 1)]
                    name, value = name_value
                    icpu_attrs.update({'cpu_model': value})
                    continue

                match = re_socket.search(line)
                if match:
                    socket_id = int(match.group(1))
                    if socket_id not in sockets:
                        sockets.append(socket_id)
                        attrs = {
                            'numa_node': socket_id,
                            'capabilities': {},
                        }
                        inumas.append(attrs)
                    continue

                match = re_core.search(line)
                if match:
                    core_id = int(match.group(1))

                    if socket_id not in thread_cnt:
                        thread_cnt[socket_id] = {}
                    if core_id not in thread_cnt[socket_id]:
                        thread_cnt[socket_id][core_id] = 0
                    else:
                        thread_cnt[socket_id][core_id] += 1
                    thread_id = thread_cnt[socket_id][core_id]

                    if socket_id not in self.topology:
                        self.topology[socket_id] = {}
                    if core_id not in self.topology[socket_id]:
                        self.topology[socket_id][core_id] = {}

                    self.topology[socket_id][core_id][thread_id] = cpu
                    attrs = {
                        'cpu': cpu,
                        'numa_node': socket_id,
                        'core': core_id,
                        'thread': thread_id,
                        'capabilities': {},
                    }
                    icpu_attrs.update(attrs)
                    icpus.append(icpu_attrs)
                    icpu_attrs = {}
                    continue

        self.num_nodes = len(self.topology.keys())

        # In the case topology not detected, hard-code structures
        if self.num_nodes == 0:
            n_sockets, n_cores, n_threads = (1, int(self.num_cpus), 1)
            self.topology = {}
            for socket_id in range(n_sockets):
                self.topology[socket_id] = {}
                if socket_id not in sockets:
                    sockets.append(socket_id)
                    attrs = {
                        'numa_node': socket_id,
                        'capabilities': {},
                    }
                    inumas.append(attrs)
                for core_id in range(n_cores):
                    self.topology[socket_id][core_id] = {}
                    for thread_id in range(n_threads):
                        self.topology[socket_id][core_id][thread_id] = 0
                        attrs = {
                            'cpu': cpu,
                            'numa_node': socket_id,
                            'core': core_id,
                            'thread': thread_id,
                            'capabilities': {},
                        }
                        icpus.append(attrs)

            # Define Thread-Socket-Core order for logical cpu enumeration
            cpu = 0
            for thread_id in range(n_threads):
                for core_id in range(n_cores):
                    for socket_id in range(n_sockets):
                        if socket_id not in sockets:
                            sockets.append(socket_id)
                            attrs = {
                                'numa_node': socket_id,
                                'capabilities': {},
                            }
                            inumas.append(attrs)
                        self.topology[socket_id][core_id][thread_id] = cpu
                        attrs = {
                            'cpu': cpu,
                            'numa_node': socket_id,
                            'core': core_id,
                            'thread': thread_id,
                            'capabilities': {},
                        }
                        icpus.append(attrs)
                        cpu += 1
            self.num_nodes = len(self.topology.keys())

        LOG.debug("inumas= %s, icpus = %s" % (inumas, icpus))

        return inumas, icpus

    def _get_immediate_subdirs(self, dir):
        return [name for name in listdir(dir)
                if os.path.isdir(join(dir, name))]

    def _inode_get_memory_hugepages(self):
        """Collect hugepage info, including vswitch, and vm.
           Collect platform reserved if config.
        :param self
        :returns list of memory nodes and attributes
        """

        imemory = []

        initial_worker_config_completed = \
            os.path.exists(tsc.INITIAL_WORKER_CONFIG_COMPLETE)

        # check if it is initial report before the huge pages are allocated
        initial_report = not initial_worker_config_completed

        # do not send report if the initial worker config is completed and
        # worker config has not finished, i.e.during subsequent
        # reboot before the manifest allocates the huge pages
        worker_config_completed = \
            os.path.exists(tsc.VOLATILE_WORKER_CONFIG_COMPLETE)
        if (initial_worker_config_completed and
                not worker_config_completed):
            return imemory

        for node in range(self.num_nodes):
            attr = {}
            total_hp_mb = 0  # Total memory (MB) currently configured in HPs
            free_hp_mb = 0

            # Check vswitch and libvirt memory
            # Loop through configured hugepage sizes of this node and record
            # total number and number free
            hugepages = "/sys/devices/system/node/node%d/hugepages" % node

            try:
                subdirs = self._get_immediate_subdirs(hugepages)

                for subdir in subdirs:
                    hp_attr = {}
                    sizesplit = subdir.split('-')
                    if sizesplit[1].startswith("1048576kB"):
                        size = SIZE_1G_MB
                    else:
                        size = SIZE_2M_MB

                    nr_hugepages = 0
                    free_hugepages = 0

                    mydir = hugepages + '/' + subdir
                    files = [f for f in listdir(mydir) if isfile(join(mydir, f))]

                    if files:
                        for file in files:
                            with open(mydir + '/' + file, 'r') as f:
                                if file.startswith("nr_hugepages"):
                                    nr_hugepages = int(f.readline())
                                if file.startswith("free_hugepages"):
                                    free_hugepages = int(f.readline())

                    total_hp_mb = total_hp_mb + int(nr_hugepages * size)
                    free_hp_mb = free_hp_mb + int(free_hugepages * size)

                    # Libvirt hugepages can be 1G and 2M
                    if size == SIZE_1G_MB:
                        vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size
                        hp_attr = {
                            'vswitch_hugepages_size_mib': size,
                            'vswitch_hugepages_nr': vswitch_hugepages_nr,
                            'vswitch_hugepages_avail': 0,
                            'vm_hugepages_nr_1G':
                                (nr_hugepages - vswitch_hugepages_nr),
                            'vm_hugepages_avail_1G': free_hugepages,
                            'vm_hugepages_use_1G': 'True'
                        }
                    else:
                        if len(subdirs) == 1:
                            # No 1G hugepage support.
                            vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size
                            hp_attr = {
                                'vswitch_hugepages_size_mib': size,
                                'vswitch_hugepages_nr': vswitch_hugepages_nr,
                                'vswitch_hugepages_avail': 0,
                            }
                            hp_attr.update({'vm_hugepages_use_1G': 'False'})
                        else:
                            # vswitch will use 1G hugpages
                            vswitch_hugepages_nr = 0

                        hp_attr.update({
                            'vm_hugepages_avail_2M': free_hugepages,
                            'vm_hugepages_nr_2M':
                                (nr_hugepages - vswitch_hugepages_nr)
                        })

                    attr.update(hp_attr)

            except IOError:
                # silently ignore IO errors (eg. file missing)
                pass

            # Get the free and total memory from meminfo for this node
            re_node_memtotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
            re_node_memfree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
            re_node_filepages = \
                re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
            re_node_sreclaim = \
                re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
            re_node_commitlimit = \
                re.compile(r'^Node\s+\d+\s+\CommitLimit:\s+(\d+)')
            re_node_committed_as = \
                re.compile(r'^Node\s+\d+\s+\'Committed_AS:\s+(\d+)')

            free_kb = 0    # Free Memory (KB) available
            total_kb = 0   # Total Memory (KB)
            limit = 0      # only used in strict accounting
            committed = 0  # only used in strict accounting

            meminfo = "/sys/devices/system/node/node%d/meminfo" % node
            try:
                with open(meminfo, 'r') as infile:
                    for line in infile:
                        match = re_node_memtotal.search(line)
                        if match:
                            total_kb += int(match.group(1))
                            continue
                        match = re_node_memfree.search(line)
                        if match:
                            free_kb += int(match.group(1))
                            continue
                        match = re_node_filepages.search(line)
                        if match:
                            free_kb += int(match.group(1))
                            continue
                        match = re_node_sreclaim.search(line)
                        if match:
                            free_kb += int(match.group(1))
                            continue
                        match = re_node_commitlimit.search(line)
                        if match:
                            limit = int(match.group(1))
                            continue
                        match = re_node_committed_as.search(line)
                        if match:
                            committed = int(match.group(1))
                            continue

                if self._is_strict():
                    free_kb = limit - committed

            except IOError:
                # silently ignore IO errors (eg. file missing)
                pass

            # Calculate PSS
            pss_mb = 0
            if node == 0:
                cmd = 'cat /proc/*/smaps 2>/dev/null | awk \'/^Pss:/ ' \
                      '{a += $2;} END {printf "%d\\n", a/1024.0;}\''
                try:
                    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
                                            shell=True)
                    result = proc.stdout.read().strip()
                    pss_mb = int(result)
                except subprocess.CalledProcessError as e:
                    LOG.error("Cannot calculate PSS (%s) (%d)", cmd,
                              e.returncode)
                except OSError as e:
                    LOG.error("Failed to execute (%s) OS error (%d)", cmd,
                              e.errno)

            # need to multiply total_mb by 1024
            node_total_kb = total_hp_mb * SIZE_KB + free_kb + pss_mb * SIZE_KB

            # Read base memory from worker_reserved.conf
            base_mem_mb = 0
            with open('/etc/platform/worker_reserved.conf', 'r') as infile:
                for line in infile:
                    if "WORKER_BASE_RESERVED" in line:
                        val = line.split("=")
                        base_reserves = val[1].strip('\n')[1:-1]
                        for reserve in base_reserves.split():
                            reserve = reserve.split(":")
                            if reserve[0].strip('"') == "node%d" % node:
                                base_mem_mb = int(reserve[1].strip('MB'))

            # On small systems, clip memory overhead to more reasonable minimal
            # settings
            if (total_kb / SIZE_KB - base_mem_mb) < 1000:
                if node == 0:
                    base_mem_mb = COMPUTE_MIN_MB
                    if tsc.nodetype == 'controller':
                        base_mem_mb += CONTROLLER_MIN_MB
                else:
                    base_mem_mb = COMPUTE_MIN_NON_0_MB

            eng_kb = node_total_kb - base_mem_mb * SIZE_KB

            vswitch_mem_kb = (attr.get('vswitch_hugepages_size_mib', 0) *
                              attr.get('vswitch_hugepages_nr', 0) * SIZE_KB)

            vm_kb = (eng_kb - vswitch_mem_kb)

            max_vm_pages_2mb = vm_kb / SIZE_2M_KB
            max_vm_pages_1gb = vm_kb / SIZE_1G_KB

            attr.update({
                'vm_hugepages_possible_2M': max_vm_pages_2mb,
                'vm_hugepages_possible_1G': max_vm_pages_1gb,
            })

            # calculate 90% 2M pages if it is initial report and the huge
            # pages have not been allocated
            if initial_report:
                max_vm_pages_2mb = max_vm_pages_2mb * 0.9
                total_hp_mb += int(max_vm_pages_2mb * (SIZE_2M_KB / SIZE_KB))
                free_hp_mb = total_hp_mb
                attr.update({
                    'vm_hugepages_nr_2M': max_vm_pages_2mb,
                    'vm_hugepages_avail_2M': max_vm_pages_2mb,
                    'vm_hugepages_nr_1G': 0
                })

            attr.update({
                'numa_node': node,
                'memtotal_mib': total_hp_mb,
                'memavail_mib': free_hp_mb,
                'hugepages_configured': 'True',
                'node_memtotal_mib': node_total_kb / 1024,
            })

            imemory.append(attr)

        return imemory

    def _inode_get_memory_nonhugepages(self):
        '''Collect nonhugepage info, including platform reserved if config.
        :param self
        :returns list of memory nodes and attributes
        '''

        imemory = []
        self.total_memory_mb = 0

        re_node_memtotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
        re_node_memfree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
        re_node_filepages = re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
        re_node_sreclaim = re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')

        for node in range(self.num_nodes):
            attr = {}
            total_mb = 0
            free_mb = 0

            meminfo = "/sys/devices/system/node/node%d/meminfo" % node
            try:
                with open(meminfo, 'r') as infile:
                    for line in infile:
                        match = re_node_memtotal.search(line)
                        if match:
                            total_mb += int(match.group(1))
                            continue

                        match = re_node_memfree.search(line)
                        if match:
                            free_mb += int(match.group(1))
                            continue
                        match = re_node_filepages.search(line)
                        if match:
                            free_mb += int(match.group(1))
                            continue
                        match = re_node_sreclaim.search(line)
                        if match:
                            free_mb += int(match.group(1))
                            continue

            except IOError:
                # silently ignore IO errors (eg. file missing)
                pass

            total_mb /= 1024
            free_mb /= 1024
            self.total_memory_nodes_mb.append(total_mb)
            attr = {
                'numa_node': node,
                'memtotal_mib': total_mb,
                'memavail_mib': free_mb,
                'hugepages_configured': 'False',
            }

            imemory.append(attr)

        return imemory

    def inodes_get_imemory(self):
        '''Collect logical memory topology
        :param self
        :returns list of memory nodes and attributes
        '''
        imemory = []

        if os.path.isfile("/etc/platform/worker_reserved.conf"):
            imemory = self._inode_get_memory_hugepages()
        else:
            imemory = self._inode_get_memory_nonhugepages()

        LOG.debug("imemory= %s" % imemory)

        return imemory