548 lines
17 KiB
Python
548 lines
17 KiB
Python
#
|
|
# Copyright (c) 2019-2020 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
############################################################################
|
|
#
|
|
# This file contains common collectd plugin constructs and utilities
|
|
#
|
|
############################################################################
|
|
|
|
import collectd
|
|
import itertools as it
|
|
import json
|
|
import uuid
|
|
import httplib2
|
|
import socket
|
|
import time
|
|
import os
|
|
from oslo_concurrency import processutils
|
|
from fm_api import constants as fm_constants
|
|
import tsconfig.tsconfig as tsc
|
|
|
|
from kubernetes import client
|
|
from kubernetes import config
|
|
from kubernetes.client import Configuration
|
|
import urllib3
|
|
|
|
|
|
# http request constants
|
|
PLUGIN_TIMEOUT = 10
|
|
PLUGIN_HTTP_HEADERS = {'Accept': 'application/json', 'Connection': 'close'}
|
|
|
|
MIN_AUDITS_B4_FIRST_QUERY = 2
|
|
|
|
# Kubernetes client constants
|
|
KUBELET_CONF = '/etc/kubernetes/kubelet.conf'
|
|
SSL_TLS_SUPPRESS = True
|
|
|
|
# Standard units conversion parameters (mebi, kibi)
|
|
# Reference: https://en.wikipedia.org/wiki/Binary_prefix
|
|
Mi = 1048576
|
|
Ki = 1024
|
|
|
|
# Standard units conversion
|
|
ONE_MILLION = 1000000
|
|
ONE_THOUSAND = 1000
|
|
ONE_HUNDRED = 100
|
|
|
|
# cgroup definitions
|
|
CGROUP_ROOT = '/sys/fs/cgroup'
|
|
K8S_ROOT = 'k8s-infra'
|
|
KUBEPODS = 'kubepods'
|
|
|
|
# High level grouping categories
|
|
GROUP_OVERALL = 'overall'
|
|
GROUP_FIRST = 'first'
|
|
GROUP_PODS = 'pods'
|
|
|
|
# Overall cpuacct groupings
|
|
GROUP_TOTAL = 'cgroup-total'
|
|
GROUP_PLATFORM = 'platform'
|
|
GROUP_BASE = 'base'
|
|
GROUP_K8S_SYSTEM = 'kube-system'
|
|
GROUP_K8S_ADDON = 'kube-addon'
|
|
|
|
# Groups included in platform - this excludes apps
|
|
PLATFORM_GROUPS = [GROUP_BASE, GROUP_K8S_SYSTEM]
|
|
OVERALL_GROUPS = [GROUP_PLATFORM, GROUP_K8S_ADDON]
|
|
OVERALL_GROUPS.extend(PLATFORM_GROUPS)
|
|
|
|
# First level cgroups -- these are the groups we know about
|
|
CGROUP_SYSTEM = 'system.slice'
|
|
CGROUP_USER = 'user.slice'
|
|
CGROUP_MACHINE = 'machine.slice'
|
|
CGROUP_DOCKER = 'docker'
|
|
CGROUP_K8S = K8S_ROOT
|
|
|
|
# Groupings by first level cgroup
|
|
BASE_GROUPS = [CGROUP_DOCKER, CGROUP_SYSTEM, CGROUP_USER]
|
|
BASE_GROUPS_EXCLUDE = [CGROUP_K8S, CGROUP_MACHINE]
|
|
|
|
# Groupings of pods by kubernetes namespace
|
|
K8S_NAMESPACE_SYSTEM = ['kube-system']
|
|
K8S_NAMESPACE_ADDON = ['monitor', 'openstack']
|
|
|
|
# Pod parent cgroup name based on annotation.
|
|
# e.g., used by: kube-controller-manager, kube-scheduler, kube-apiserver
|
|
POD_ANNOTATION_KEY = 'kubernetes.io/config.hash'
|
|
|
|
# Worker reserved file and keyname
|
|
RESERVED_CONF = '/etc/platform/worker_reserved.conf'
|
|
RESERVED_MEM_KEY = 'WORKER_BASE_RESERVED'
|
|
RESERVED_CPULIST_KEY = 'PLATFORM_CPU_LIST'
|
|
|
|
# plugin return values
|
|
PLUGIN_PASS = 0
|
|
PLUGIN_FAIL = 1
|
|
|
|
|
|
class PluginObject(object):
|
|
|
|
def __init__(self, plugin, url=""):
|
|
|
|
# static variables set in init_func
|
|
self.plugin = plugin # the name of this plugin
|
|
self.hostname = '' # the name of this host
|
|
self.port = 0 # the port number for this plugin
|
|
self.base_eid = '' # the base entity id host=<hostname>
|
|
self.controller = False # set true if node is controller
|
|
|
|
# dynamic gate variables
|
|
self.virtual = False # set to True if host is virtual
|
|
self._config_complete = False # set to True once config is complete
|
|
self.config_done = False # set true if config_func completed ok
|
|
self.init_complete = False # set true if init_func completed ok
|
|
self.fm_connectivity = False # set true when fm connectivity ok
|
|
|
|
self.alarm_type = fm_constants.FM_ALARM_TYPE_7 # OPERATIONAL
|
|
self.cause = fm_constants.ALARM_PROBABLE_CAUSE_50 # THRESHOLD CROSS
|
|
self.suppression = True
|
|
self.service_affecting = False
|
|
|
|
# dynamic variables set in read_func
|
|
self.usage = float(0) # last usage value recorded as float
|
|
self.value = float(0) # last read value
|
|
self.audits = 0 # number of audit since init
|
|
self.enabled = False # tracks a plugin's enabled state
|
|
self.alarmed = False # tracks the current alarmed state
|
|
self.mode = '' # mode specific to plugin
|
|
|
|
# http and json specific variables
|
|
self.url = url # target url
|
|
self.jresp = None # used to store the json response
|
|
self.resp = ''
|
|
|
|
self.objects = [] # list of plugin specific objects
|
|
self.cmd = '' # plugin specific command string
|
|
|
|
# Log controls
|
|
self.config_logged = False # used to log once the plugin config
|
|
self.error_logged = False # used to prevent log flooding
|
|
self.log_throttle_count = 0 # used to count throttle logs
|
|
self.INIT_LOG_THROTTLE = 10 # the init log throttle threshold
|
|
self.CONFIG_LOG_THROTTLE = 50 # the config log throttle threshold
|
|
self.http_retry_count = 0 # track http error cases
|
|
self.HTTP_RETRY_THROTTLE = 6 # http retry threshold
|
|
self.phase = 0 # tracks current phase; init, sampling
|
|
|
|
collectd.debug("%s Common PluginObject constructor [%s]" %
|
|
(plugin, url))
|
|
|
|
###########################################################################
|
|
#
|
|
# Name : init_completed
|
|
#
|
|
# Description: Declare init completed
|
|
#
|
|
# Parameters : plugin name
|
|
#
|
|
###########################################################################
|
|
|
|
def init_completed(self):
|
|
"""Declare plugin init complete"""
|
|
|
|
collectd.info("%s initialization completed" % self.plugin)
|
|
self.init_complete = True
|
|
|
|
###########################################################################
|
|
#
|
|
# Name : config_complete
|
|
#
|
|
# Description: Test for config complete condition
|
|
#
|
|
# Parameters : plugin name
|
|
#
|
|
# Returns : False if config is not complete
|
|
# True if config is complete
|
|
#
|
|
###########################################################################
|
|
|
|
def config_complete(self):
|
|
"""Test for config complete state"""
|
|
|
|
if self._config_complete is False:
|
|
if tsc.nodetype == 'worker' or 'worker' in tsc.subfunctions:
|
|
flag_file = tsc.VOLATILE_WORKER_CONFIG_COMPLETE
|
|
elif tsc.nodetype == 'storage':
|
|
flag_file = tsc.VOLATILE_STORAGE_CONFIG_COMPLETE
|
|
else:
|
|
flag_file = tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE
|
|
|
|
if os.path.exists(flag_file) is False:
|
|
self._config_complete = False
|
|
self.log_throttle_count += 1
|
|
if self.log_throttle_count > self.CONFIG_LOG_THROTTLE:
|
|
collectd.info("%s configuration check needs retry" %
|
|
self.plugin)
|
|
self.log_throttle_count = 0
|
|
time.sleep(1)
|
|
return False
|
|
else:
|
|
self._config_complete = True
|
|
self.log_throttle_count = 0
|
|
collectd.info("%s configuration completed" % self.plugin)
|
|
|
|
return True
|
|
|
|
###########################################################################
|
|
#
|
|
# Name : gethostname
|
|
#
|
|
# Description: load the hostname
|
|
#
|
|
# Parameters : plugin name
|
|
#
|
|
# Returns : Success - hostname
|
|
# Failure - None
|
|
#
|
|
# Updates : obj.hostname
|
|
#
|
|
###########################################################################
|
|
def gethostname(self):
|
|
"""Fetch the hostname"""
|
|
|
|
# get current hostname
|
|
try:
|
|
hostname = socket.gethostname()
|
|
if hostname:
|
|
return hostname
|
|
except:
|
|
collectd.error("%s failed to get hostname" % self.plugin)
|
|
|
|
return None
|
|
|
|
###########################################################################
|
|
#
|
|
# Name : is_virtual
|
|
#
|
|
# Description: Execute facter command with output filter on 'is_virtual'
|
|
#
|
|
# Parameters : None
|
|
#
|
|
# Returns : True if current host is virtual.
|
|
# False if current host is NOT virtual
|
|
#
|
|
###########################################################################
|
|
def is_virtual(self):
|
|
"""Check for virtual host"""
|
|
|
|
try:
|
|
cmd = '/usr/bin/facter is_virtual'
|
|
res, err = processutils.execute(cmd, shell=True)
|
|
if err:
|
|
return False
|
|
elif res:
|
|
# remove the trailing '\n' with strip()
|
|
if res.strip() == 'true':
|
|
collectd.info("%s %s is virtual" %
|
|
(self.plugin, self.hostname))
|
|
return True
|
|
|
|
except Exception as ex:
|
|
collectd.info("%s failed to execute '/usr/bin/facter' ; %s" %
|
|
self.plugin, ex)
|
|
|
|
return False
|
|
|
|
###########################################################################
|
|
#
|
|
# Name : check_for_fit
|
|
#
|
|
# Description: load FIT data if it is present
|
|
#
|
|
# Fit Format : unit data -> 0 89
|
|
# - instance 0 value 89
|
|
#
|
|
# Parameters : plugin name
|
|
# object to update with fit
|
|
# name in fit file
|
|
# unit
|
|
#
|
|
# Returns : Did a failure occur ?
|
|
# False = no
|
|
# True = yes
|
|
#
|
|
# Updates : self.usage with FIT value if FIT conditions are present
|
|
# and apply
|
|
#
|
|
###########################################################################
|
|
def check_for_fit(self, name, unit):
|
|
"""Load FIT data into usage if it exists"""
|
|
|
|
fit_file = '/var/run/fit/' + name + '_data'
|
|
|
|
if os.path.exists(fit_file):
|
|
valid = False
|
|
with open(fit_file, 'r') as infile:
|
|
for line in infile:
|
|
try:
|
|
inst, val = line.split(' ')
|
|
if int(unit) == int(inst):
|
|
self.usage = float(val)
|
|
valid = True
|
|
|
|
except:
|
|
try:
|
|
val = float(line)
|
|
self.usage = float(val)
|
|
valid = True
|
|
|
|
except:
|
|
collectd.error("%s bad FIT data; ignoring" %
|
|
self.plugin)
|
|
|
|
if valid is True:
|
|
collectd.info("%s %.2f usage (unit %d) (FIT)" %
|
|
(self.plugin, unit, self.usage))
|
|
return False
|
|
|
|
return True
|
|
|
|
###########################################################################
|
|
#
|
|
# Name : make_http_request
|
|
#
|
|
# Description: Issue an http request to the specified URL.
|
|
# Load and return the response
|
|
# Handling execution errors
|
|
#
|
|
# Parameters : self as current context.
|
|
#
|
|
# Optional:
|
|
#
|
|
# url - override the default self url with http address to
|
|
# issue the get request to.
|
|
# to - timeout override
|
|
# hdrs - override use of the default header list
|
|
#
|
|
# Updates : self.jresp with the json string response from the request.
|
|
#
|
|
# Returns : Error indication (True/False)
|
|
# True on success
|
|
# False on error
|
|
#
|
|
###########################################################################
|
|
def make_http_request(self, url=None, to=None, hdrs=None):
|
|
"""Make a blocking HTTP Request and return result"""
|
|
|
|
try:
|
|
|
|
# handle timeout override
|
|
if to is None:
|
|
to = PLUGIN_TIMEOUT
|
|
|
|
# handle url override
|
|
if url is None:
|
|
url = self.url
|
|
|
|
# handle header override
|
|
if hdrs is None:
|
|
hdrs = PLUGIN_HTTP_HEADERS
|
|
|
|
http = httplib2.Http(timeout=to)
|
|
resp = http.request(url, headers=hdrs)
|
|
|
|
except Exception as ex:
|
|
collectd.info("%s http request exception ; %s" %
|
|
(self.plugin, str(ex)))
|
|
return False
|
|
|
|
try:
|
|
collectd.debug("%s Resp: %s" %
|
|
(self.plugin, resp[1]))
|
|
|
|
self.resp = resp[1]
|
|
self.jresp = json.loads(resp[1])
|
|
|
|
except Exception as ex:
|
|
collectd.error("%s http response parse exception ; %s" %
|
|
(self.plugin, str(ex)))
|
|
if len(self.resp):
|
|
collectd.error("%s response: %s" %
|
|
(self.plugin, self.resp))
|
|
return False
|
|
return True
|
|
|
|
|
|
class K8sClient(object):
|
|
|
|
def __init__(self):
|
|
|
|
self._host = socket.gethostname()
|
|
self._kube_client_core = None
|
|
|
|
def _load_kube_config(self):
|
|
|
|
config.load_kube_config(KUBELET_CONF)
|
|
|
|
# WORKAROUND: Turn off SSL/TLS verification
|
|
if SSL_TLS_SUPPRESS:
|
|
# Suppress the "InsecureRequestWarning: Unverified HTTPS request"
|
|
# seen with each kubelet client API call.
|
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
c = Configuration()
|
|
c.verify_ssl = False
|
|
Configuration.set_default(c)
|
|
|
|
def _get_k8sclient_core(self):
|
|
if not self._kube_client_core:
|
|
self._load_kube_config()
|
|
self._kube_client_core = client.CoreV1Api()
|
|
return self._kube_client_core
|
|
|
|
def kube_get_local_pods(self):
|
|
field_selector = 'spec.nodeName=' + self._host
|
|
try:
|
|
api_response = self._get_k8sclient_core().\
|
|
list_pod_for_all_namespaces(
|
|
watch=False,
|
|
field_selector=field_selector)
|
|
return api_response.items
|
|
except Exception as err:
|
|
collectd.error("kube_get_local_pods: %s" % (err))
|
|
raise
|
|
|
|
|
|
class POD_object:
|
|
def __init__(self, uid, name, namespace, qos_class):
|
|
self.uid = uid
|
|
self.name = name
|
|
self.namespace = namespace
|
|
self.qos_class = qos_class
|
|
|
|
def __str__(self):
|
|
return str(self.__class__) + ": " + str(self.__dict__)
|
|
|
|
def __repr__(self):
|
|
return str(self.__class__) + ": " + str(self.__dict__)
|
|
|
|
|
|
def is_uuid_like(val):
|
|
"""Returns validation of a value as a UUID
|
|
|
|
For our purposes, a UUID is a canonical form string:
|
|
aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa
|
|
"""
|
|
try:
|
|
return str(uuid.UUID(val)) == val
|
|
except (TypeError, ValueError, AttributeError):
|
|
return False
|
|
|
|
|
|
def get_severity_str(severity):
|
|
"""get string that represents the specified severity"""
|
|
|
|
if severity == fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
|
return "clear"
|
|
elif severity == fm_constants.FM_ALARM_SEVERITY_CRITICAL:
|
|
return "critical"
|
|
elif severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
|
return "major"
|
|
elif severity == fm_constants.FM_ALARM_SEVERITY_MINOR:
|
|
return "minor"
|
|
else:
|
|
return "unknown"
|
|
|
|
|
|
def convert2boolean(v):
|
|
"""Convert an object to boolean result"""
|
|
|
|
if type(v) == bool:
|
|
return v
|
|
if isinstance(v, (int, float)):
|
|
return bool(int(v))
|
|
if isinstance(v, str):
|
|
return v.lower() in ("yes", "true", "t", "1",)
|
|
else:
|
|
return False
|
|
|
|
|
|
def log_dictionary(plugin='', name='', d={}):
|
|
"""Log a line of output for each key-value pair for dictionary d.
|
|
|
|
i.e., d[key] = value
|
|
"""
|
|
|
|
for key in sorted(d.keys()):
|
|
collectd.info('%s: %s: %s = %s'
|
|
% (plugin, name, key, d[key]))
|
|
|
|
|
|
def log_dictionary_nodes(plugin='', name='', d={}):
|
|
"""Log a line of output for each key-value pair for nested dictionary d.
|
|
|
|
i.e., For each node, for each key-value pair: d[node][key] = value
|
|
"""
|
|
|
|
for node in sorted(d.keys()):
|
|
for key, val in sorted(d[node].items()):
|
|
collectd.info('%s: %s: %s %s = %s'
|
|
% (plugin, name, node, key, val))
|
|
|
|
|
|
def walklevel(some_dir, level=1):
|
|
"""Recursively walk directories to a specified level.
|
|
|
|
Provides the same functionality as os.walk(), just limits the walk to a
|
|
specified level of recursion.
|
|
"""
|
|
|
|
some_dir = some_dir.rstrip(os.path.sep)
|
|
assert os.path.isdir(some_dir)
|
|
num_sep = some_dir.count(os.path.sep)
|
|
for root, dirs, files in os.walk(some_dir):
|
|
yield root, dirs, files
|
|
num_sep_this = root.count(os.path.sep)
|
|
if num_sep + level <= num_sep_this:
|
|
del dirs[:]
|
|
|
|
|
|
def range_to_list(csv_range=None):
|
|
"""Convert a string of comma separated ranges into integer list.
|
|
|
|
e.g., '1-3,8-9,15' is converted to [1,2,3,8,9,15]
|
|
"""
|
|
|
|
if not csv_range:
|
|
return []
|
|
ranges = [(lambda L: range(L[0], L[-1] + 1))(map(int, r.split('-')))
|
|
for r in csv_range.split(',')]
|
|
return [y for x in ranges for y in x]
|
|
|
|
|
|
def format_range_set(items):
|
|
"""Generate pretty-printed value of ranges, such as 3-6,12-17."""
|
|
|
|
ranges = []
|
|
for k, iterable in it.groupby(enumerate(sorted(items)),
|
|
lambda x: x[1] - x[0]):
|
|
rng = list(iterable)
|
|
if len(rng) == 1:
|
|
s = str(rng[0][1])
|
|
else:
|
|
s = "%s-%s" % (rng[0][1], rng[-1][1])
|
|
ranges.append(s)
|
|
return ','.join(ranges)
|