Relocated some packages to repo 'monitoring'

List of relocated subdirectories:

monitoring/collectd-extensions
monitoring/influxdb-extensions
tools/monitor-tools
tools/vm-topology

Story: 2006166
Task: 35687
Depends-On: I6c62895f8dda5b8dc4ff56680c73c49f3f3d7935
Depends-On: I665dc7fabbfffc798ad57843eb74dca16e7647a3
Change-Id: Iffacd50340005320540cd9ba1495cde0b2231cd0
Signed-off-by: Scott Little <scott.little@windriver.com>
Depends-On: I14e631137ff5658a54d62ad3d7aa2cd0ffaba6e0
This commit is contained in:
Scott Little 2019-09-04 10:14:29 -04:00 committed by Don Penney
parent 062ec89dbb
commit 3637d66ae4
48 changed files with 2 additions and 12424 deletions

View File

@ -1,10 +0,0 @@
Metadata-Version: 1.1
Name: collectd-extensions
Version: 1.0
Summary: collectd-extensions
Home-page:
Author: Windriver
Author-email: info@windriver.com
License: ASL 2.0
Description: Titanium Cloud collectd extensions
Platform: UNKNOWN

View File

@ -1,25 +0,0 @@
SRC_DIR="$PKG_BASE"
COPY_LIST="$PKG_BASE/src/LICENSE \
$PKG_BASE/src/collectd.conf.pmon \
$PKG_BASE/src/collectd.service \
$PKG_BASE/src/fm_notifier.py \
$PKG_BASE/src/mtce_notifier.py \
$PKG_BASE/src/plugin_common.py \
$PKG_BASE/src/python_plugins.conf \
$PKG_BASE/src/cpu.py \
$PKG_BASE/src/cpu.conf \
$PKG_BASE/src/memory.py \
$PKG_BASE/src/memory.conf \
$PKG_BASE/src/df.conf \
$PKG_BASE/src/ntpq.py \
$PKG_BASE/src/ntpq.conf \
$PKG_BASE/src/interface.py \
$PKG_BASE/src/interface.conf \
$PKG_BASE/src/remotels.py \
$PKG_BASE/src/remotels.conf \
$PKG_BASE/src/ptp.py \
$PKG_BASE/src/ptp.conf \
$PKG_BASE/src/example.py \
$PKG_BASE/src/example.conf"
TIS_PATCH_VER=13

View File

@ -1,110 +0,0 @@
Summary: Titanuim Server collectd Package
Name: collectd-extensions
Version: 1.0
Release: 0%{?_tis_dist}.%{tis_patch_ver}
License: ASL 2.0
Group: base
Packager: Wind River <info@windriver.com>
URL: unknown
# create the files tarball
Source0: %{name}-%{version}.tar.gz
Source1: collectd.service
Source2: collectd.conf.pmon
# collectd python plugin files - notifiers
Source3: fm_notifier.py
Source4: mtce_notifier.py
Source5: plugin_common.py
# collectd python plugin files - resource plugins
Source11: cpu.py
Source12: memory.py
Source14: example.py
Source15: ntpq.py
Source16: interface.py
Source17: remotels.py
Source18: ptp.py
# collectd plugin conf files into /etc/collectd.d
Source100: python_plugins.conf
Source101: cpu.conf
Source102: memory.conf
Source103: df.conf
Source104: example.conf
Source105: ntpq.conf
Source106: interface.conf
Source107: remotels.conf
Source108: ptp.conf
BuildRequires: systemd-devel
Requires: systemd
Requires: collectd
Requires: fm-api
Requires: python-httplib2
Requires: python-influxdb
Requires: python-oslo-concurrency
Requires: tsconfig
Requires: /bin/systemctl
%description
Titanium Cloud collectd extensions
%define debug_package %{nil}
%define local_unit_dir %{_sysconfdir}/systemd/system
%define local_plugin_dir %{_sysconfdir}/collectd.d
%define local_python_extensions_dir /opt/collectd/extensions/python
%define local_config_extensions_dir /opt/collectd/extensions/config
%prep
%setup
%build
%install
install -m 755 -d %{buildroot}%{_sysconfdir}
install -m 755 -d %{buildroot}%{local_unit_dir}
install -m 755 -d %{buildroot}%{local_plugin_dir}
install -m 755 -d %{buildroot}%{local_config_extensions_dir}
install -m 755 -d %{buildroot}%{local_python_extensions_dir}
# support files ; service and pmon conf
install -m 644 %{SOURCE1} %{buildroot}%{local_unit_dir}
install -m 600 %{SOURCE2} %{buildroot}%{local_config_extensions_dir}
# collectd python plugin files - notifiers
install -m 700 %{SOURCE3} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE4} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE5} %{buildroot}%{local_python_extensions_dir}
# collectd python plugin files - resource plugins
install -m 700 %{SOURCE11} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE12} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE14} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE15} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE16} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE17} %{buildroot}%{local_python_extensions_dir}
install -m 700 %{SOURCE18} %{buildroot}%{local_python_extensions_dir}
# collectd plugin conf files into /etc/collectd.d
install -m 600 %{SOURCE100} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE101} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE102} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE103} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE104} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE105} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE106} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE107} %{buildroot}%{local_plugin_dir}
install -m 600 %{SOURCE108} %{buildroot}%{local_plugin_dir}
%clean
rm -rf $RPM_BUILD_ROOT
%files
%defattr(-,root,root,-)
%config(noreplace) %{local_unit_dir}/collectd.service
%{local_plugin_dir}/*
%{local_config_extensions_dir}/*
%{local_python_extensions_dir}/*

View File

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,18 +0,0 @@
[process]
process = collectd
service = collectd
style = lsb
pidfile = /var/run/collectd.pid
severity = major ; minor, major, critical
restarts = 3 ; restart retries before error assertion
interval = 5 ; number of seconds to wait between restarts
debounce = 10 ; number of seconds that a process needs to remain
; running before degrade is removed and retry count
; is cleared.
startuptime = 3 ; Seconds to wait after process start before starting the debounce monitor
mode = passive ; Monitoring mode: passive (default) or active
; passive: process death monitoring (default: always)
; active : heartbeat monitoring, i.e. request / response messaging
; ignore : do not monitor or stop monitoring
quorum = 0 ; process is in the host watchdog quorum

View File

@ -1,15 +0,0 @@
[Unit]
Description=Collectd statistics daemon and extension services
Documentation=man:collectd(1) man:collectd.conf(5)
Before=pmon.service
After=local-fs.target network-online.target
Requires=local-fs.target network-online.target
[Service]
Type=notify
ExecStart=/usr/sbin/collectd
ExecStartPost=/bin/bash -c 'echo $MAINPID > /var/run/collectd.pid'
ExecStopPost=/bin/rm -f /var/run/collectd.pid
[Install]
WantedBy=multi-user.target

View File

@ -1,22 +0,0 @@
# For stock plugin only
# Uncomment to compare stock to tiS plugin readings
# ---------------------
# <Plugin cpu>
# ReportByCpu false
# ReportByState false
# ValuesPercentage true
# </Plugin>
<Plugin "threshold">
<Plugin "cpu">
<Type "percent">
Instance "used"
Persist true
PersistOK true
WarningMax 90.00
FailureMax 95.00
Hits 2
Invert false
</Type>
</Plugin>
</Plugin>

View File

@ -1,262 +0,0 @@
#
# Copyright (c) 2018-2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
############################################################################
#
# This file is the collectd 'Platform CPU Usage' Monitor.
#
# The Platform CPU Usage is calculated as an averaged percentage of
# platform core usable since the previous sample.
#
# Init Function:
# - if 'worker_reserved.conf exists then query/store PLATFORM_CPU_LIST
#
############################################################################
import os
import time
import collectd
debug = False
PASS = 0
FAIL = 1
PATH = '/proc/cpuinfo'
WORKER_RESERVED_CONF = '/etc/platform/worker_reserved.conf'
PLUGIN = 'platform cpu usage plugin'
# CPU Control class
class CPU:
hostname = "" # hostname for sample notification message
usage = float(0.0) # float value of cpu usage
processors = int(0) # number of processors for all cpus case
cpu_list = [] # list of CPUs to calculate combined usage for
cpu_time = [] # schedstat time for each CPU
cpu_time_last = [] # last schedstat time for each CPU
time_last = float(0.0) # float of the time the last sample was taken
def log_error(self, err_str):
"""Print an error log with plugin name prefixing the log"""
collectd.error("%s %s" % (PLUGIN, err_str))
# Instantiate the class
c = CPU()
# The collectd configuration interface
# collectd needs this defined ; but not used/needed.
def config_func(config):
collectd.info('%s config function' % PLUGIN)
# Get the platform cpu list and number of cpus reported by /proc/cpuinfo
def init_func():
# get current hostname
c.hostname = os.uname()[1]
collectd.info('%s init function for %s' % (PLUGIN, c.hostname))
raw_list = ""
if os.path.exists(WORKER_RESERVED_CONF):
with open(WORKER_RESERVED_CONF, 'r') as infile:
for line in infile:
if 'PLATFORM_CPU_LIST' in line:
val = line.split("=")
raw_list = val[1].strip('\n')[1:-1].strip('"')
break
if raw_list:
# Convert the cpu list fetched from the compute
# reserved file into an integer list.
# Handle mix of number list #,# and number range #-#
split_list = raw_list.split(',')
if debug:
collectd.info('%s split list: %s' % (PLUGIN, split_list))
for cpu in split_list:
if cpu.find('-') == -1:
# add individual cpu # with assumed ',' delimiter
c.cpu_list.append(int(cpu))
else:
# add all in range #-#
cpu_range = cpu.split('-')
if len(cpu_range) == 2:
first = int(cpu_range[0])
last = int(cpu_range[1]) + 1
# add each
for i in list(range(first, last)):
c.cpu_list.append(i)
# with the full CPU list in hand we can now just read their samples
if debug:
collectd.info('%s full cpu list: %s' %
(PLUGIN, c.cpu_list))
try:
f = open('/proc/cpuinfo')
except EnvironmentError as e:
collectd.error(str(e), UserWarning)
else:
if len(c.cpu_list) == 0:
_want_all_cpus = True
else:
_want_all_cpus = False
c.processors = 0
for line in f:
name_value = [s.strip() for s in line.split(':', 1)]
if len(name_value) != 2:
continue
name, value = name_value
if 'rocessor' in name:
if _want_all_cpus is True:
c.cpu_list.append(int(c.processors))
c.processors += 1
collectd.info('%s has found %d cpus total' %
(PLUGIN, c.processors))
collectd.info('%s monitoring %d cpus %s' %
(PLUGIN, len(c.cpu_list), c.cpu_list))
f.close()
# Calculate the CPU usage sample
def read_func():
try:
f = open('/proc/schedstat')
except EnvironmentError as e:
c.log_error('file open failed ; ' + str(e))
return FAIL
else:
# schedstat time for each CPU
c.cpu_time = []
# Loop over each line ...
# get the output version ; only 15 is supported
# get the cpu time from each line staring with 'cpux ....'
for line in f:
# break each line into name/value pairs
line_split = [s.strip() for s in line.split(' ', 1)]
name, value = line_split
# get the output version.
if 'ersion' in name:
try:
c.version = int(value)
except ValueError as e:
c.log_error('got invalid schedstat version ; ' + str(e))
# TODO: Consider exiting here and raising alarm.
# Calling this type of exit will stop the plugin.
# sys._exit()
return FAIL
# only version 15 is supported
if c.version == 15:
if 'cpu' in name:
# get the cpu number for each line
if int(name.replace('cpu', '')) in c.cpu_list:
_in_list = True
else:
_in_list = False
# get cpu time for each cpu that is valid
if len(c.cpu_list) == 0 or _in_list is True:
_schedstat = value
value_split = value.split(' ')
c.cpu_time.append(float(value_split[6]))
if debug:
collectd.info('%s %s schedstat is %s [%s]' %
(PLUGIN, name, value_split[6],
_schedstat))
else:
collectd.error('%s unsupported schedstat version [%d]' %
(PLUGIN, c.version))
return 0
f.close()
# Now that we have the cpu time recorded for each cpu
_time_delta = float(0)
_cpu_count = int(0)
if len(c.cpu_time_last) == 0:
c.time_last = time.time()
if c.cpu_list:
# This is a compute node.
# Do not include vswitch or pinned cpus in calculation.
for cpu in c.cpu_list:
c.cpu_time_last.append(float(c.cpu_time[_cpu_count]))
_cpu_count += 1
if debug:
collectd.info('%s cpu time ; first pass ; %s' %
(PLUGIN, c.cpu_time))
return PASS
else:
_time_this = time.time()
_time_delta = _time_this - c.time_last
c.total_avg_cpu = 0
cpu_occupancy = []
if debug:
collectd.info('%s cpu time ; this pass ; %s -> %s' %
(PLUGIN, c.cpu_time_last, c.cpu_time))
if c.cpu_list:
# This is a compute node.
# Do not include vswitch or pinned cpus in calculation.
for cpu in c.cpu_list:
if cpu >= c.processors:
c.log_error(' got out of range cpu number')
else:
_delta = (c.cpu_time[_cpu_count] - c.cpu_time_last[_cpu_count])
_delta = _delta / 1000000 / _time_delta
cpu_occupancy.append(float((100 * (_delta)) / 1000))
c.total_avg_cpu += cpu_occupancy[_cpu_count]
if debug:
collectd.info('%s cpu %d - count:%d [%s]' %
(PLUGIN, cpu, _cpu_count, cpu_occupancy))
_cpu_count += 1
else:
collectd.info('%s no cpus to monitor' % PLUGIN)
return 0
c.usage = c.total_avg_cpu / _cpu_count
if debug:
collectd.info('%s reports %.2f %% usage (averaged)' %
(PLUGIN, c.usage))
# Prepare for next audit ; mode now to last
# c.cpu_time_last = []
c.cpu_time_last = c.cpu_time
c.time_last = _time_this
# if os.path.exists('/var/run/fit/cpu_data'):
# with open('/var/run/fit/cpu_data', 'r') as infile:
# for line in infile:
# c.usage = float(line)
# collectd.info("%s using FIT data:%.2f" %
# (PLUGIN, c.usage))
# break
# Dispatch usage value to collectd
val = collectd.Values(host=c.hostname)
val.plugin = 'cpu'
val.type = 'percent'
val.type_instance = 'used'
val.dispatch(values=[c.usage])
return 0
collectd.register_config(config_func)
collectd.register_init(init_func)
collectd.register_read(read_func)

View File

@ -1,41 +0,0 @@
<Plugin df>
ValuesPercentage true
IgnoreSelected false
ReportByDevice false
ReportInodes false
ValuesAbsolute false
MountPoint "/"
MountPoint "/tmp"
MountPoint "/dev"
MountPoint "/dev/shm"
MountPoint "/var/run"
MountPoint "/var/log"
MountPoint "/var/lock"
MountPoint "/boot"
MountPoint "/scratch"
MountPoint "/opt/etcd"
MountPoint "/opt/platform"
MountPoint "/opt/extension"
MountPoint "/var/lib/rabbitmq"
MountPoint "/var/lib/postgresql"
MountPoint "/var/lib/ceph/mon"
MountPoint "/var/lib/docker"
MountPoint "/var/lib/docker-distribution"
MountPoint "/var/lib/kubelet"
MountPoint "/var/lib/nova/instances"
MountPoint "/opt/backups"
</Plugin>
<Plugin "threshold">
<Plugin "df">
<Type "percent_bytes">
Instance "used"
WarningMax 80.00
FailureMax 90.00
Persist true
PersistOK true
Hits 2
Invert false
</Type>
</Plugin>
</Plugin>

View File

@ -1,13 +0,0 @@
<Plugin "threshold">
<Plugin "example">
<Type "percent">
Instance "used"
Persist true
PersistOK true
WarningMax 49.00
FailureMax 74.00
Hits 1
Invert false
</Type>
</Plugin>
</Plugin>

View File

@ -1,73 +0,0 @@
#
# Copyright (c) 2018 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import os
import random
import collectd
PLUGIN = 'random number plugin'
# static variables
# define a class here that will persist over read calls
class ExampleObject:
hostname = ""
plugin_data = ['1', '100']
obj = ExampleObject()
# The config function - called once on collectd process startup
def config_func(config):
"""Configure the plugin"""
for node in config.children:
key = node.key.lower()
val = node.values[0]
if key == 'data':
obj.plugin_data = str(val).split(' ')
collectd.info("%s configured data '%d:%d'" %
(PLUGIN,
int(obj.plugin_data[0]),
int(obj.plugin_data[1])))
return 0
collectd.info('%s config function' % PLUGIN)
return 0
# The init function - called once on collectd process startup
def init_func():
# get current hostname
obj.hostname = os.uname()[1]
return 0
# The sample read function - called on every audit interval
def read_func():
# do the work to create the sample
low = int(obj.plugin_data[0])
high = int(obj.plugin_data[1])
sample = random.randint(low, high)
# Dispatch usage value to collectd
val = collectd.Values(host=obj.hostname)
val.plugin = 'example'
val.type = 'percent'
val.type_instance = 'used'
val.dispatch(values=[sample])
return 0
# register the config, init and read functions
collectd.register_config(config_func)
collectd.register_init(init_func)
collectd.register_read(read_func)

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +0,0 @@
<Plugin "threshold">
<Plugin "interface">
<Type "percent">
Instance "used"
Persist true
PersistOK true
WarningMin 51
FailureMin 1
# Hits 2
Invert false
</Type>
</Plugin>
</Plugin>

View File

@ -1,981 +0,0 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
############################################################################
#
# This is the Host Interface Monitor plugin for collectd.
#
# Only mgmt, cluster-host and oam interfaces are supported with the following
# mapping specified in /etc/platform/platform.conf
#
# oam - oam_interface | controller | mandatory
# mgmnt - management_interface | all hosts | mandatory
# clstr - cluster_host_interface | any host | optional
#
# This plugin queries the maintenance Link Monitor daemon 'lmon'
# for a link status summary of that hosts configured networks.
#
# This plugin's read_func issues an http GET request to the Link Monitor
# which responds with a json string that represents a complete summary
# of the monitored links, state and the time of the last event or when
# initial status was learned. An example of the Link Monitor response is
#
# {
# "status" : "pass"
# "link_info": [
# { "network":"mgmt",
# "type":"vlan",
# "links": [
# { "name":"enp0s8.1", "state":"Up", "time":"5674323454567" },
# { "name":"enp0s8.2", "state":"Up", "time":"5674323454567" }]
# },
# { "network":"clstr",
# "type":"bond",
# "bond":"bond0",
# "links": [
# { "name":"enp0s9f1", "state":"Down", "time":"5674323454567" },
# { "name":"enp0s9f0", "state":"Up" , "time":"5674323454567" }]
# },
# { "network":"oam",
# "type":"single",
# "links": [
# { "name":"enp0s3", "state":"Up", "time":"5674323454567" }]
# }]
# }
#
# On failure
#
# {
# "status" : "fail ; bad request <or other text based reason>"
# }
#
# This plugin then uses this information to manage interface alarm
# assertion and clear with appropriate severity.
#
# Severity: Interface and Port levels
#
# Alarm Level Minor Major Critical
# ----------- ----- --------------------- ----------------------------
# Interface N/A One of lag pair is Up All Interface ports are Down
# Port N/A Physical Link is Down N/A
#
# Sample Data: represented as % of total links Up for that network interface
#
# 100 or 100% percent used - all links of interface are up.
# 50 or 50% percent used - one of lag pair is Up and the other is Down
# 0 or 0% percent used - all ports for that network are Down
#
############################################################################
import os
import time
import datetime
import collectd
import plugin_common as pc
from fm_api import constants as fm_constants
from fm_api import fm_api
# Fault manager API Object
api = fm_api.FaultAPIsV2()
# name of the plugin - all logs produced by this plugin are prefixed with this
PLUGIN = 'interface plugin'
# Interface Monitoring Interval in seconds
PLUGIN_AUDIT_INTERVAL = 10
# Sample Data 'type' and 'instance' database field values.
PLUGIN_TYPE = 'percent'
PLUGIN_TYPE_INSTANCE = 'usage'
# The Link Status Query URL
PLUGIN_HTTP_URL_PREFIX = 'http://localhost:'
# This plugin's timeout
PLUGIN_HTTP_TIMEOUT = 5
# Specify the link monitor as the maintenance destination service
# full path should look like ; http://localhost:2122/mtce/lmon
PLUGIN_HTTP_URL_PATH = '/mtce/lmon'
# Port and Interface Alarm Identifiers
PLUGIN_OAM_PORT_ALARMID = '100.106' # OAM Network Port
PLUGIN_OAM_IFACE_ALARMID = '100.107' # OAM Network Interface
PLUGIN_MGMT_PORT_ALARMID = '100.108' # Management Network Port
PLUGIN_MGMT_IFACE_ALARMID = '100.109' # Management Network Interface
PLUGIN_CLSTR_PORT_ALARMID = '100.110' # Cluster-host Network Port
PLUGIN_CLSTR_IFACE_ALARMID = '100.111' # Cluster-host Nwk Interface
# List of all alarm identifiers.
ALARM_ID_LIST = [PLUGIN_OAM_PORT_ALARMID,
PLUGIN_OAM_IFACE_ALARMID,
PLUGIN_MGMT_PORT_ALARMID,
PLUGIN_MGMT_IFACE_ALARMID,
PLUGIN_CLSTR_PORT_ALARMID,
PLUGIN_CLSTR_IFACE_ALARMID]
# Monitored Network Name Strings
NETWORK_MGMT = 'mgmt'
NETWORK_CLSTR = 'cluster-host'
NETWORK_OAM = 'oam'
# Port / Interface State strings
LINK_UP = 'Up'
LINK_DOWN = 'Down'
# Alarm control actions
ALARM_ACTION_RAISE = 'raise'
ALARM_ACTION_CLEAR = 'clear'
# Alarm level.
# Ports are the lowest level and represent a physical link
# Interfaces are port groupings in terms of LAG
LEVEL_PORT = 'port'
LEVEL_IFACE = 'interface'
# Run phases
RUN_PHASE__INIT = 0
RUN_PHASE__ALARMS_CLEARED = 1
RUN_PHASE__HTTP_REQUEST_PASS = 2
# Link Object (aka Port or Physical interface) Structure
# and member functions.
class LinkObject:
def __init__(self, alarm_id):
self.name = None
self.state = LINK_UP
self.timestamp = float(0)
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
self.alarm_id = alarm_id
self.state_change = True
collectd.debug("%s LinkObject constructor: %s" %
(PLUGIN, alarm_id))
##################################################################
#
# Name : raise_port_alarm
#
# Purpose : This link object member function is used to
# raise link/port alarms.
#
# Parameters : Network the link is part of.
#
# Returns : False on failure
# True on success
#
##################################################################
def raise_port_alarm(self, network):
"""Raise a port alarm"""
if self.severity != fm_constants.FM_ALARM_SEVERITY_MAJOR:
if manage_alarm(self.name,
network,
LEVEL_PORT,
ALARM_ACTION_RAISE,
fm_constants.FM_ALARM_SEVERITY_MAJOR,
self.alarm_id,
self.timestamp) is True:
self.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
collectd.info("%s %s %s port alarm raised" %
(PLUGIN, self.name, self.alarm_id))
return True
else:
return False
else:
return True
##################################################################
#
# Name : clear_port_alarm
#
# Purpose : This link object member function is used to
# clear link/port alarms.
#
# Parameters : Network the link is part of.
#
# Returns : False on failure
# True on success.
#
##################################################################
def clear_port_alarm(self, network):
"""Clear a port alarm"""
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
if manage_alarm(self.name,
network,
LEVEL_PORT,
ALARM_ACTION_CLEAR,
fm_constants.FM_ALARM_SEVERITY_CLEAR,
self.alarm_id,
self.timestamp) is True:
collectd.info("%s %s %s port alarm cleared" %
(PLUGIN, self.name, self.alarm_id))
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
return True
else:
return False
else:
return True
# Interface (aka Network) Level Object Structure and member functions
class NetworkObject:
def __init__(self, name):
self.name = name
self.sample = 0
self.sample_last = 0
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
self.degraded = False
self.timestamp = float(0)
# add the respective alarm IDs to each object
alarm_id = None
if name == NETWORK_OAM:
alarm_id = PLUGIN_OAM_PORT_ALARMID
self.alarm_id = PLUGIN_OAM_IFACE_ALARMID
elif name == NETWORK_MGMT:
alarm_id = PLUGIN_MGMT_PORT_ALARMID
self.alarm_id = PLUGIN_MGMT_IFACE_ALARMID
elif name == NETWORK_CLSTR:
alarm_id = PLUGIN_CLSTR_PORT_ALARMID
self.alarm_id = PLUGIN_CLSTR_IFACE_ALARMID
else:
self.alarm_id = ""
collectd.error("%s unexpected network (%s)" % (PLUGIN, name))
collectd.debug("%s %s NetworkObject constructor: %s" %
(PLUGIN, name, self.alarm_id))
if alarm_id:
self.link_one = LinkObject(alarm_id)
self.link_two = LinkObject(alarm_id)
##################################################################
#
# Name : raise_iface_alarm
#
# Purpose : This network object member function used to
# raise interface alarms.
#
# Parameters : None
#
# Returns : False on failure
# True on success
#
##################################################################
def raise_iface_alarm(self, severity):
"""Raise an interface alarm"""
if severity == fm_constants.FM_ALARM_SEVERITY_CLEAR:
collectd.error("%s %s raise alarm called with clear severity" %
(PLUGIN, self.name))
return True
if self.severity != severity:
if manage_alarm(self.name,
self.name,
LEVEL_IFACE,
ALARM_ACTION_RAISE,
severity,
self.alarm_id,
self.timestamp) is True:
self.severity = severity
collectd.info("%s %s %s %s interface alarm raised" %
(PLUGIN,
self.name,
self.alarm_id,
pc.get_severity_str(severity)))
return True
else:
return False
else:
return True
##################################################################
#
# Name : clear_iface_alarm
#
# Purpose : This network object member function used to
# clear interface alarms.
#
# Parameters : None
#
# Returns : False on failure
# True on success.
#
##################################################################
def clear_iface_alarm(self):
"""Clear an interface alarm"""
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
if manage_alarm(self.name,
self.name,
LEVEL_IFACE,
ALARM_ACTION_CLEAR,
fm_constants.FM_ALARM_SEVERITY_CLEAR,
self.alarm_id,
self.timestamp) is True:
collectd.info("%s %s %s %s interface alarm cleared" %
(PLUGIN,
self.name,
self.alarm_id,
pc.get_severity_str(self.severity)))
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
return True
else:
return False
else:
return True
######################################################################
#
# Name : manage_iface_alarm
#
# Purpose : clear or raise appropriate severity level interface alarm
#
# Returns : None
#
######################################################################
def manage_iface_alarm(self):
# Single Link Config
if self.link_two.name is None:
if self.link_one.state == LINK_DOWN:
if self.severity != fm_constants.FM_ALARM_SEVERITY_CRITICAL:
self.timestamp = self.link_one.timestamp
self.raise_iface_alarm(
fm_constants.FM_ALARM_SEVERITY_CRITICAL)
elif self.link_one.state == LINK_UP:
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
self.clear_iface_alarm()
# Lagged Link Config
#
# The interface level timestamp is updated based on the failed
# link timestamps
elif self.link_one.state == LINK_UP and \
self.link_two.state == LINK_DOWN:
if self.severity != fm_constants.FM_ALARM_SEVERITY_MAJOR:
self.timestamp = self.link_two.timestamp
self.raise_iface_alarm(fm_constants.FM_ALARM_SEVERITY_MAJOR)
elif self.link_one.state == LINK_DOWN and \
self.link_two.state == LINK_UP:
if self.severity != fm_constants.FM_ALARM_SEVERITY_MAJOR:
self.timestamp = self.link_one.timestamp
self.raise_iface_alarm(fm_constants.FM_ALARM_SEVERITY_MAJOR)
elif self.link_one.state == LINK_UP and self.link_two.state == LINK_UP:
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
self.clear_iface_alarm()
elif self.link_one.state == LINK_DOWN and \
self.link_two.state == LINK_DOWN:
if self.severity != fm_constants.FM_ALARM_SEVERITY_CRITICAL:
if self.link_one.timestamp > self.link_two.timestamp:
self.timestamp = self.link_one.timestamp
else:
self.timestamp = self.link_two.timestamp
self.raise_iface_alarm(fm_constants.FM_ALARM_SEVERITY_CRITICAL)
# Plugin Control Object
obj = pc.PluginObject(PLUGIN, PLUGIN_HTTP_URL_PREFIX)
# Network Object List - Primary Network/Link Control Object
NETWORKS = [NetworkObject(NETWORK_MGMT),
NetworkObject(NETWORK_OAM),
NetworkObject(NETWORK_CLSTR)]
##########################################################################
#
# Name : get_timestamp
#
# Purpose : Convert the long long int microsecond time as string
# that accompany link info from the Link Monitor (lmond)
# and catch exceptions in doing so.
#
# Parameters: lmon_time - long long int as string
#
# Returns : float time that can be consumed by datetime.fromtimestamp
#
# Returns same unit of now time if provided lmon_time is
# invalid.
#
##########################################################################
def get_timestamp(lmon_time):
"""Convert lmon time to fm timestamp time"""
if lmon_time:
try:
return(float(float(lmon_time) / 1000000))
except:
collectd.error("%s failed to parse timestamp ;"
" using current time" % PLUGIN)
else:
collectd.error("%s no timestamp ;"
" using current time" % PLUGIN)
return(float(time.time()))
def dump_network_info(network):
"""Log the specified network info"""
link_one_event_time = datetime.datetime.fromtimestamp(
float(network.link_one.timestamp)).strftime('%Y-%m-%d %H:%M:%S')
link_two_info = ''
if network.link_two.name is not None:
link_two_event_time = datetime.datetime.fromtimestamp(
float(network.link_two.timestamp)).strftime('%Y-%m-%d %H:%M:%S')
link_two_info += "; link two '"
link_two_info += network.link_two.name
link_two_info += "' went " + network.link_two.state
link_two_info += " at " + link_two_event_time
pcnt = '%'
collectd.info("%s %5s %3d%c ; "
"link one '%s' went %s at %s %s" %
(PLUGIN,
network.name,
network.sample,
pcnt,
network.link_one.name,
network.link_one.state,
link_one_event_time,
link_two_info))
#########################################################################
#
# Name : this_hosts_alarm
#
# Purpose : Determine if the supplied eid is for this host.
#
# Description: The eid formats for the alarms managed by this plugin are
#
# host=<hostname>.port=<port_name>
# host=<hostname>.interface=<network_name>
#
# Assumptions: There is no restriction preventing the system
# administrator from creating hostnames with period's ('.')
# in them. Because so the eid cannot simply be split
# around '='s and '.'s. Instead its split around this
# plugins level type '.port' or '.interface'.
#
# Returns : True if hostname is a match
# False otherwise
#
##########################################################################
def this_hosts_alarm(hostname, eid):
"""Check if the specified eid is for this host"""
if hostname:
if eid:
# 'host=controller-0.interface=mgmt'
try:
eid_host = None
eid_disected = eid.split('=')
if len(eid_disected) == 3:
# ['host', 'controller-0.interface', 'mgmt']
if len(eid_disected[1].split('.port')) == 2:
eid_host = eid_disected[1].split('.port')[0]
if eid_host and eid_host == hostname:
return True
elif len(eid_disected[1].split('.interface')) == 2:
eid_host = eid_disected[1].split('.interface')[0]
if eid_host and eid_host == hostname:
return True
except Exception as ex:
collectd.error("%s failed to parse alarm eid (%s)"
" [eid:%s]" % (PLUGIN, str(ex), eid))
return False
##########################################################################
#
# Name : clear_alarms
#
# Purpose : Clear all interface alarms on process startup.
#
# Description: Called after first successful Link Status query.
#
# Loops over the provided alarm id list querying all alarms
# for each. Any that are raised are precisely cleared.
#
# Prevents stuck alarms over port and interface reconfig.
#
# If the original alarm case still exists the alarm will
# be re-raised with the original link event timestamp that
# is part of the Link Status query response.
#
# Parameters : A list of this plugin's alarm ids
#
# Returns : True on Success
# False on Failure
#
##########################################################################
def clear_alarms(alarm_id_list):
"""Clear alarm state of all plugin alarms"""
found = False
for alarm_id in alarm_id_list:
try:
alarms = api.get_faults_by_id(alarm_id)
except Exception as ex:
collectd.error("%s 'get_faults_by_id' exception ;"
" %s ; %s" %
(PLUGIN, alarm_id, ex))
return False
if alarms:
for alarm in alarms:
eid = alarm.entity_instance_id
if this_hosts_alarm(obj.hostname, eid) is False:
# ignore other host alarms
continue
if alarm_id == PLUGIN_OAM_PORT_ALARMID or \
alarm_id == PLUGIN_OAM_IFACE_ALARMID or \
alarm_id == PLUGIN_MGMT_PORT_ALARMID or \
alarm_id == PLUGIN_MGMT_IFACE_ALARMID or \
alarm_id == PLUGIN_CLSTR_PORT_ALARMID or \
alarm_id == PLUGIN_CLSTR_IFACE_ALARMID:
try:
if api.clear_fault(alarm_id, eid) is False:
collectd.info("%s %s:%s:%s alarm already cleared" %
(PLUGIN,
alarm.severity,
alarm_id,
eid))
else:
found = True
collectd.info("%s %s:%s:%s alarm cleared" %
(PLUGIN,
alarm.severity,
alarm_id,
eid))
except Exception as ex:
collectd.error("%s 'clear_fault' exception ; "
"%s:%s ; %s" %
(PLUGIN, alarm_id, eid, ex))
return False
if found is False:
collectd.info("%s found no startup alarms" % PLUGIN)
return True
##########################################################################
#
# Name : manage_alarm
#
# Purpose : Raises or clears port and interface alarms based on
# calling parameters.
#
# Returns : True on success
# False on failure
#
##########################################################################
def manage_alarm(name, network, level, action, severity, alarm_id, timestamp):
"""Manage raise and clear of port and interface alarms"""
ts = datetime.datetime.fromtimestamp(
float(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
collectd.debug("%s %s %s %s alarm for %s:%s [%s] %s" % (PLUGIN,
severity, level, alarm_id, network, name, action, ts))
if action == ALARM_ACTION_CLEAR:
alarm_state = fm_constants.FM_ALARM_STATE_CLEAR
reason = ''
repair = ''
else:
# reason ad repair strings are only needed on alarm assertion
alarm_state = fm_constants.FM_ALARM_STATE_SET
reason = "'" + network.upper() + "' " + level
repair = 'Check cabling and far-end port configuration ' \
'and status on adjacent equipment.'
# build the alarm eid and name string
if level == LEVEL_PORT:
eid = 'host=' + obj.hostname + "." + level + '=' + name
reason += " failed"
else:
eid = 'host=' + obj.hostname + "." + level + '=' + network
if severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
reason += " degraded"
else:
reason += " failed"
if alarm_state == fm_constants.FM_ALARM_STATE_CLEAR:
try:
if api.clear_fault(alarm_id, eid) is False:
collectd.info("%s %s:%s alarm already cleared" %
(PLUGIN, alarm_id, eid))
else:
collectd.info("%s %s:%s alarm cleared" %
(PLUGIN, alarm_id, eid))
return True
except Exception as ex:
collectd.error("%s 'clear_fault' failed ; %s:%s ; %s" %
(PLUGIN, alarm_id, eid, ex))
return False
else:
fault = fm_api.Fault(
uuid="",
alarm_id=alarm_id,
alarm_state=alarm_state,
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
entity_instance_id=eid,
severity=severity,
reason_text=reason,
alarm_type=fm_constants.FM_ALARM_TYPE_7,
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN,
proposed_repair_action=repair,
service_affecting=True,
timestamp=ts,
suppression=True)
try:
alarm_uuid = api.set_fault(fault)
except Exception as ex:
collectd.error("%s 'set_fault' exception ; %s:%s ; %s" %
(PLUGIN, alarm_id, eid, ex))
return False
if pc.is_uuid_like(alarm_uuid) is False:
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
(PLUGIN, alarm_id, eid, alarm_uuid))
return False
else:
return True
# The config function - called once on collectd process startup
def config_func(config):
"""Configure the plugin"""
# Need to update the Link Status Query URL with the port number.
url_updated = False
# The Link Monitor port number is first searched for in
# the /etc/mtc/lmond.conf file.
# If its not there then its taken from the plugin config.
# /etc/mtc/lmond.conf
fn = '/etc/mtc/lmond.conf'
if (os.path.exists(fn)):
try:
with open(fn, 'r') as infile:
for line in infile:
if 'lmon_query_port' in line:
if isinstance(int(line.split()[2]), int):
# add the port
obj.url += line.split()[2]
# add the path /mtce/lmon
obj.url += PLUGIN_HTTP_URL_PATH
url_updated = "config file"
break
except EnvironmentError as e:
collectd.error(str(e), UserWarning)
if url_updated is False:
# Try the config as this might be updated by manifest
for node in config.children:
key = node.key.lower()
val = int(node.values[0])
if key == 'port':
if isinstance(int(val), int):
# add the port
obj.url += str(val)
# add the path /mtce/lmon
obj.url += PLUGIN_HTTP_URL_PATH
url_updated = "manifest"
break
if url_updated:
collectd.info("%s configured by %s [%s]" %
(PLUGIN, url_updated, obj.url))
obj.config_done = True
else:
collectd.error("%s config failure ; cannot monitor" %
(PLUGIN))
return 0
# The init function - called once on collectd process startup
def init_func():
"""Init the plugin"""
if obj.config_done is False:
collectd.info("%s configuration failed" % PLUGIN)
time.sleep(300)
return False
if obj.init_done is False:
if obj.init_ready() is False:
return 0
obj.hostname = obj.gethostname()
obj.init_done = True
collectd.info("%s initialization complete" % PLUGIN)
return 0
# The sample read function - called on every audit interval
def read_func():
"""collectd interface monitor plugin read function"""
if obj.init_done is False:
init_func()
return 0
if obj.phase < RUN_PHASE__ALARMS_CLEARED:
# clear all alarms on first audit
#
# block on fm availability
#
# If the existing raised alarms are still valid then
# they will be re-raised with the same timestamp the
# original event occurred at once auditing resumes.
if clear_alarms(ALARM_ID_LIST) is False:
collectd.error("%s failed to clear existing alarms ; "
"retry next audit" % PLUGIN)
# Don't proceed till we can communicate with FM and
# clear all existing interface and port alarms.
return 0
else:
obj.phase = RUN_PHASE__ALARMS_CLEARED
# Throttle HTTP request error retries
if obj.http_retry_count != 0:
obj.http_retry_count += 1
if obj.http_retry_count > obj.HTTP_RETRY_THROTTLE:
obj.http_retry_count = 0
return 0
# Issue query and construct the monitoring object
success = obj.make_http_request(to=PLUGIN_HTTP_TIMEOUT)
if success is False:
obj.http_retry_count += 1
return 0
if len(obj.jresp) == 0:
collectd.error("%s no json response from http request" % PLUGIN)
obj.http_retry_count += 1
return 0
# Check query status
try:
if obj.jresp['status'] != 'pass':
collectd.error("%s link monitor query %s" %
(PLUGIN, obj.jresp['status']))
obj.http_retry_count += 1
return 0
except Exception as ex:
collectd.error("%s http request get reason failed ; %s" %
(PLUGIN, str(ex)))
collectd.info("%s resp:%d:%s" %
(PLUGIN, len(obj.jresp), obj.jresp))
obj.http_retry_count += 1
return 0
# log the first query response
if obj.audits == 0:
collectd.info("%s Link Status Query Response:%d:\n%s" %
(PLUGIN, len(obj.jresp), obj.jresp))
# uncomment below for debug purposes
#
# for network in NETWORKS:
# dump_network_info(network)
try:
link_info = obj.jresp['link_info']
for network_link_info in link_info:
collectd.debug("%s parse link info:%s" %
(PLUGIN, network_link_info))
for network in NETWORKS:
if network.name == network_link_info['network']:
links = network_link_info['links']
nname = network.name
if len(links) > 0:
link_one = links[0]
# get initial link one name
if network.link_one.name is None:
network.link_one.name = link_one['name']
network.link_one.timestamp =\
float(get_timestamp(link_one['time']))
# load link one state
if link_one['state'] == LINK_UP:
collectd.debug("%s %s IS Up [%s]" %
(PLUGIN, network.link_one.name,
network.link_one.state))
if network.link_one.state != LINK_UP:
network.link_one.state_change = True
network.link_one.clear_port_alarm(nname)
network.link_one.state = LINK_UP
else:
collectd.debug("%s %s IS Down [%s]" %
(PLUGIN, network.link_one.name,
network.link_one.state))
if network.link_one.state == LINK_UP:
network.link_one.state_change = True
network.link_one.raise_port_alarm(nname)
network.link_one.state = LINK_DOWN
if len(links) > 1:
link_two = links[1]
# get initial link two name
if network.link_two.name is None:
network.link_two.name = link_two['name']
network.link_two.timestamp =\
float(get_timestamp(link_two['time']))
# load link two state
if link_two['state'] == LINK_UP:
collectd.debug("%s %s IS Up [%s]" %
(PLUGIN, network.link_two.name,
network.link_two.state))
if network.link_two.state != LINK_UP:
network.link_two.state_change = True
network.link_two.clear_port_alarm(nname)
network.link_two.state = LINK_UP
else:
collectd.debug("%s %s IS Down [%s]" %
(PLUGIN, network.link_two.name,
network.link_two.state))
if network.link_two.state == LINK_UP:
network.link_two.state_change = True
network.link_two.raise_port_alarm(nname)
network.link_two.state = LINK_DOWN
# manage interface alarms
network.manage_iface_alarm()
except Exception as ex:
collectd.error("%s link monitor query parse exception ; %s " %
(PLUGIN, obj.resp))
# handle state changes
for network in NETWORKS:
if network.link_two.name is not None and \
network.link_one.state_change is True:
if network.link_one.state == LINK_UP:
collectd.info("%s %s link one '%s' is Up" %
(PLUGIN,
network.name,
network.link_one.name))
else:
collectd.info("%s %s link one '%s' is Down" %
(PLUGIN,
network.name,
network.link_one.name))
if network.link_two.name is not None and \
network.link_two.state_change is True:
if network.link_two.state == LINK_UP:
collectd.info("%s %s link two '%s' is Up" %
(PLUGIN,
network.name,
network.link_two.name))
else:
collectd.info("%s %s link two %s 'is' Down" %
(PLUGIN,
network.name,
network.link_two.name))
# Dispatch usage value to collectd
val = collectd.Values(host=obj.hostname)
val.plugin = 'interface'
val.type = 'percent'
val.type_instance = 'used'
# For each interface [ mgmt, oam, infra ]
# calculate the percentage used sample
# sample = 100 % when all its links are up
# sample = 0 % when all its links are down
# sample = 50 % when one of a lagged group is down
for network in NETWORKS:
if network.link_one.name is not None:
val.plugin_instance = network.name
network.sample = 0
if network.link_two.name is not None:
# lagged
if network.link_one.state == LINK_UP:
network.sample = 50
if network.link_two.state == LINK_UP:
network.sample += 50
else:
if network.link_one.state == LINK_UP:
network.sample = 100
val.dispatch(values=[network.sample])
if network.link_one.state_change is True or \
network.link_two.state_change is True:
dump_network_info(network)
network.link_one.state_change = False
network.link_two.state_change = False
network.sample_last = network.sample
else:
collectd.debug("%s %s network not provisioned" %
(PLUGIN, network.name))
obj.audits += 1
return 0
# register the config, init and read functions
collectd.register_config(config_func)
collectd.register_init(init_func)
collectd.register_read(read_func, interval=PLUGIN_AUDIT_INTERVAL)

View File

@ -1,21 +0,0 @@
# For stock plugin only
# Uncomment to compare stock to tiS plugin readings
# ---------------------
# <Plugin memory>
# ValuesAbsolute false
# ValuesPercentage true
# </Plugin>
<Plugin "threshold">
<Plugin "memory">
<Type "percent">
Instance "used"
Persist true
PersistOK true
WarningMax 80.00
FailureMax 90.00
Hits 2
Invert false
</Type>
</Plugin>
</Plugin>

View File

@ -1,279 +0,0 @@
#
# Copyright (c) 2018-2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
############################################################################
#
# This file is the collectd 'Platform CPU Usage' Monitor.
#
# The Platform CPU Usage is calculated as an averaged percentage of
# platform core usable since the previous sample.
#
# Init Function:
# - if 'worker_reserved.conf exists then query/store PLATFORM_CPU_LIST
#
############################################################################
import os
import collectd
debug = False
PLUGIN = 'platform memory usage'
PLUGIN_NUMA = 'numa memory usage'
PLUGIN_HUGE = 'hugepage memory usage'
# CPU Control class
class MEM:
hostname = "" # hostname for sample notification message
cmd = '/proc/meminfo' # the query comment
value = float(0.0) # float value of memory usage
# meminfo values we care about
memTotal_kB = 0
memFree_kB = 0
buffers = 0
cached = 0
SReclaimable = 0
CommitLimit = 0
Committed_AS = 0
HugePages_Total = 0
HugePages_Free = 0
Hugepagesize = 0
AnonPages = 0
FilePages = 0
# derived values
avail = 0
total = 0
strict = 0
# Instantiate the class
obj = MEM()
def log_meminfo(plugin, name, meminfo):
"""Log the supplied meminfo"""
if debug is False:
return
collectd.info("%s %s" % (plugin, name))
collectd.info("%s ---------------------------" % plugin)
collectd.info("%s memTotal_kB : %f" % (plugin, meminfo.memTotal_kB))
collectd.info("%s memFree_kB : %f" % (plugin, meminfo.memFree_kB))
collectd.info("%s Buffers : %f" % (plugin, meminfo.buffers))
collectd.info("%s Cached : %f" % (plugin, meminfo.cached))
collectd.info("%s SReclaimable : %f" % (plugin, meminfo.SReclaimable))
collectd.info("%s CommitLimit : %f" % (plugin, meminfo.CommitLimit))
collectd.info("%s Committed_AS : %f" % (plugin, meminfo.Committed_AS))
collectd.info("%s HugePages_Total: %f" % (plugin, meminfo.HugePages_Total))
collectd.info("%s HugePages_Free : %f" % (plugin, meminfo.HugePages_Free))
collectd.info("%s Hugepagesize : %f" % (plugin, meminfo.Hugepagesize))
collectd.info("%s AnonPages : %f" % (plugin, meminfo.AnonPages))
def config_func(config):
"""Configure the memory usage plugin"""
for node in config.children:
key = node.key.lower()
val = node.values[0]
if key == 'path':
obj.cmd = str(val)
collectd.info("%s configured query command: '%s'" %
(PLUGIN, obj.cmd))
return 0
collectd.info("%s no config command provided ; "
"defaulting to '%s'" %
(PLUGIN, obj.cmd))
# Load the hostname and kernel memory 'overcommit' setting.
def init_func():
# get current hostname
obj.hostname = os.uname()[1]
# get strict setting
#
# a value of 0 means "heuristic overcommit"
# a value of 1 means "always overcommit"
# a value of 2 means "don't overcommit".
#
# set strict true strict=1 if value is = 2
# otherwise strict is false strict=0 (default)
fn = '/proc/sys/vm/overcommit_memory'
if os.path.exists(fn):
with open(fn, 'r') as infile:
for line in infile:
if int(line) == 2:
obj.strict = 1
break
collectd.info("%s strict:%d" % (PLUGIN, obj.strict))
# Calculate the CPU usage sample
def read_func():
meminfo = {}
try:
with open(obj.cmd) as fd:
for line in fd:
meminfo[line.split(':')[0]] = line.split(':')[1].strip()
except EnvironmentError as e:
collectd.error("%s unable to read from %s ; str(e)" %
(PLUGIN, str(e)))
return 0
# setup the sample structure
val = collectd.Values(host=obj.hostname)
val.type = 'percent'
val.type_instance = 'used'
# fit_value = 0
# if os.path.exists('/var/run/fit/mem_data'):
# with open('/var/run/fit/mem_data', 'r') as infile:
# for line in infile:
# fit_value = float(line)
# collectd.info("%s using FIT data:%.2f" %
# (PLUGIN, fit_value))
# break
# remove the 'unit' (kB) suffix that might be on some of the lines
for line in meminfo:
# remove the units from the value read
value_unit = [u.strip() for u in meminfo[line].split(' ', 1)]
if len(value_unit) == 2:
value, unit = value_unit
meminfo[line] = float(value)
else:
meminfo[line] = float(meminfo[line])
obj.memTotal_kB = float(meminfo['MemTotal'])
obj.memFree_kB = float(meminfo['MemFree'])
obj.buffers = float(meminfo['Buffers'])
obj.cached = float(meminfo['Cached'])
obj.SReclaimable = float(meminfo['SReclaimable'])
obj.CommitLimit = float(meminfo['CommitLimit'])
obj.Committed_AS = float(meminfo['Committed_AS'])
obj.HugePages_Total = float(meminfo['HugePages_Total'])
obj.HugePages_Free = float(meminfo['HugePages_Free'])
obj.Hugepagesize = float(meminfo['Hugepagesize'])
obj.AnonPages = float(meminfo['AnonPages'])
log_meminfo(PLUGIN, "/proc/meminfo", obj)
obj.avail = float(float(obj.memFree_kB) +
float(obj.buffers) +
float(obj.cached) +
float(obj.SReclaimable))
obj.total = float(float(obj.avail) +
float(obj.AnonPages))
if obj.strict == 1:
obj.value = float(float(obj.Committed_AS) / float(obj.CommitLimit))
else:
obj.value = float(float(obj.AnonPages) / float(obj.total))
obj.value = float(float(obj.value) * 100)
# if fit_value != 0:
# obj.value = fit_value
if debug is True:
collectd.info("%s ---------------------------" % PLUGIN)
collectd.info("%s memAvail: %d" % (PLUGIN, obj.avail))
collectd.info("%s memTotal: %d" % (PLUGIN, obj.total))
collectd.info('%s reports %.2f %% usage' % (PLUGIN, obj.value))
# Dispatch usage value to collectd
val.plugin = 'memory'
val.plugin_instance = 'platform'
val.dispatch(values=[obj.value])
#####################################################################
# Now get the Numa Node Memory Usage
#####################################################################
numa_node_files = []
fn = "/sys/devices/system/node/"
files = os.listdir(fn)
for file in files:
if 'node' in file:
numa_node_files.append(fn + file + '/meminfo')
for numa_node in numa_node_files:
meminfo = {}
try:
with open(numa_node) as fd:
for line in fd:
meminfo[line.split()[2][0:-1]] = line.split()[3].strip()
obj.memFree_kB = float(meminfo['MemFree'])
obj.FilePages = float(meminfo['FilePages'])
obj.SReclaimable = float(meminfo['SReclaimable'])
obj.AnonPages = float(meminfo['AnonPages'])
obj.HugePages_Total = float(meminfo['HugePages_Total'])
obj.HugePages_Free = float(meminfo['HugePages_Free'])
log_meminfo(PLUGIN, numa_node, obj)
avail = float(float(obj.memFree_kB) +
float(obj.FilePages) +
float(obj.SReclaimable))
total = float(float(avail) +
float(obj.AnonPages))
obj.value = float(float(obj.AnonPages)) / float(total)
obj.value = float(float(obj.value) * 100)
# if fit_value != 0:
# obj.value = fit_value
# Dispatch usage value to collectd for this numa node
val.plugin_instance = numa_node.split('/')[5]
val.dispatch(values=[obj.value])
collectd.debug('%s reports %s at %.2f %% usage (%s)' %
(PLUGIN_NUMA,
val.plugin,
obj.value,
val.plugin_instance))
# Numa Node Huge Page Memory Monitoring
#
# Only monitor if there is Huge Page Memory
if obj.HugePages_Total > 0:
obj.value = \
float(float(obj.HugePages_Total -
obj.HugePages_Free)) / \
float(obj.HugePages_Total)
obj.value = float(float(obj.value) * 100)
# if fit_value != 0:
# obj.value = fit_value
# Dispatch huge page memory usage value
# to collectd for this numa node.
val.plugin_instance = numa_node.split('/')[5] + '_hugepages'
val.dispatch(values=[obj.value])
collectd.debug('%s reports %s at %.2f %% usage (%s)' %
(PLUGIN_HUGE,
val.plugin,
obj.value,
val.plugin_instance))
except EnvironmentError as e:
collectd.error("%s unable to read from %s ; str(e)" %
(PLUGIN_NUMA, str(e)))
return 0
collectd.register_config(config_func)
collectd.register_init(init_func)
collectd.register_read(read_func)

View File

@ -1,380 +0,0 @@
#
# Copyright (c) 2018-2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#############################################################################
#
# This file is the collectd 'Maintenance' Notifier.
#
# Collects provides information about each event as an object passed to the
# notification handler ; the notification object.
#
# object.host - the hostname
#
# object.plugin - the name of the plugin aka resource
# object.plugin_instance - plugin instance string i.e. say mountpoint
# for df plugin
# object.type, - the unit i.e. percent or absolute
# object.type_instance - the attribute i.e. free, used, etc
#
# object.severity - a integer value 0=OK , 1=warning, 2=failure
# object.message - a log-able message containing the above along
# with the value
#
# This notifier manages requesting mtce to assert or clear its collectd
# host-degrade-cause flag based on notification messages sent from collectd.
#
# Messages to maintenance are throttled ONE_EVERY while this state is the
# same as last state.
#
# Message is sent on every state change
# from clear to assert or
# from assert to clear
#
# See code comments for details.
#
############################################################################
#
# Import list
import os
import socket
import collectd
import tsconfig.tsconfig as tsc
# This plugin name
PLUGIN = 'degrade notifier'
# collectd severity definitions ;
# Note: can't seem to pull then in symbolically with a header
NOTIF_FAILURE = 1
NOTIF_WARNING = 2
NOTIF_OKAY = 4
# default mtce port.
# ... with configuration override
MTCE_CMD_RX_PORT = 2101
# same state message throttle count.
# ... only send the degrade message every 'this' number
# while the state of assert or clear remains the same.
ONE_EVERY = 10
PLUGIN__DF = 'df'
PLUGIN__MEM = 'memory'
PLUGIN__CPU = 'cpu'
PLUGIN__VSWITCH_MEM = 'vswitch_mem'
PLUGIN__VSWITCH_CPU = 'vswitch_cpu'
PLUGIN__VSWITCH_PORT = "vswitch_port"
PLUGIN__VSWITCH_IFACE = "vswitch_iface"
PLUGIN_INTERFACE = 'interface'
PLUGIN__EXAMPLE = 'example'
# The collectd Maintenance Notifier Object
class collectdMtceNotifierObject:
def __init__(self, port):
"""collectdMtceNotifierObject Class constructor"""
# default maintenance port
self.port = port
self.addr = None
# specifies the protocol family to use when messaging maintenance.
# if system is IPV6, then that is learned and this 'protocol' is
# updated with AF_INET6
self.protocol = socket.AF_INET
# List of plugin names that require degrade for specified severity.
self.degrade_list__failure = [PLUGIN__DF,
PLUGIN__MEM,
PLUGIN__CPU,
PLUGIN__VSWITCH_MEM,
PLUGIN__VSWITCH_CPU,
PLUGIN__VSWITCH_PORT,
PLUGIN__VSWITCH_IFACE,
PLUGIN_INTERFACE,
PLUGIN__EXAMPLE]
self.degrade_list__warning = [PLUGIN_INTERFACE]
# the running list of resources that require degrade.
# a degrade clear message is sent whenever this list is empty.
# a degrade assert message is sent whenever this list is not empty.
self.degrade_list = []
# throttle down sending of duplicate degrade assert/clear messages
self.last_state = "undef"
self.msg_throttle = 0
# Instantiate the mtce_notifier object
# This object persists from notificaiton to notification
obj = collectdMtceNotifierObject(MTCE_CMD_RX_PORT)
def _get_active_controller_ip():
"""Get the active controller host IP"""
try:
obj.addr = socket.getaddrinfo('controller', None)[0][4][0]
collectd.info("%s controller ip: %s" % (PLUGIN, obj.addr))
except Exception as ex:
obj.addr = None
collectd.error("%s failed to get controller ip ; %s" %
(PLUGIN, str(ex)))
return 0
def _df_instance_to_path(df_inst):
"""Convert a df instance name to a mountpoint"""
# df_root is not a dynamic file system. Ignore that one.
if df_inst == 'df_root':
return '/'
else:
# For all others replace all '-' with '/'
return('/' + df_inst[3:].replace('-', '/'))
# This function removes degraded file systems that are no longer present.
def _clear_degrade_for_missing_filesystems():
"""Remove degraded file systems that are no longer mounted or present"""
for df_inst in obj.degrade_list:
# Only file system plugins are looked at.
# File system plugin instance names are prefixed with 'df_'
# as the first 3 chars in the instance name.
if df_inst[0:3] == 'df_':
path = _df_instance_to_path(df_inst)
# check the mount point.
# if the mount point no longer exists then remove
# this instance from the degrade list.
if os.path.ismount(path) is False:
collectd.info("%s clearing degrade for missing %s ; %s" %
(PLUGIN, path, obj.degrade_list))
obj.degrade_list.remove(df_inst)
return 0
# The collectd configuration interface
#
# Used to configure the maintenance port.
# key = 'port'
# val = port number
#
def config_func(config):
"""Configure the maintenance degrade notifier plugin"""
collectd.debug('%s config function' % PLUGIN)
for node in config.children:
key = node.key.lower()
val = node.values[0]
if key == 'port':
obj.port = int(val)
collectd.info("%s configured mtce port: %d" %
(PLUGIN, obj.port))
return 0
obj.port = MTCE_CMD_RX_PORT
collectd.error("%s no mtce port provided ; defaulting to %d" %
(PLUGIN, obj.port))
# Collectd calls this function on startup.
def init_func():
"""Collectd Mtce Notifier Initialization Function"""
obj.host = os.uname()[1]
collectd.info("%s %s:%s sending to mtce port %d" %
(PLUGIN, tsc.nodetype, obj.host, obj.port))
collectd.debug("%s init function" % PLUGIN)
# This is the Notifier function that is called by collectd.
#
# Handling steps are
#
# 1. build resource name from notification object.
# 2. check resource against severity lists.
# 3. manage this instance's degrade state.
# 4. send mtcAgent the degrade state message.
#
def notifier_func(nObject):
"""Collectd Mtce Notifier Handler Function"""
# Create the resource name from the notifier object.
# format: <plugin name>_<plugin_instance_name>
resource = nObject.plugin
if nObject.plugin_instance:
resource += "_" + nObject.plugin_instance
# This block looks at the current notification severity
# and manages the degrade_list.
# If the specified plugin name exists in each of the warnings
# or failure lists and there is a current severity match then
# add that resource instance to the degrade list.
# Conversly if this notification is OKAY then make sure this
# resource instance is not in the degrade list (remove it if it is)
if nObject.severity is NOTIF_OKAY:
if obj.degrade_list and resource in obj.degrade_list:
obj.degrade_list.remove(resource)
elif nObject.severity is NOTIF_FAILURE:
if obj.degrade_list__failure:
if nObject.plugin in obj.degrade_list__failure:
if resource not in obj.degrade_list:
# handle dynamic filesystems going missing over a swact
# or unmount and being reported as a transient error by
# the df plugin. Don't add it to the failed list if the
# mountpoint is gone.
add = True
if nObject.plugin == PLUGIN__DF:
path = _df_instance_to_path(resource)
add = os.path.ismount(path)
if add is True:
collectd.info("%s %s added to degrade list" %
(PLUGIN, resource))
obj.degrade_list.append(resource)
else:
# If severity is failure and no failures cause degrade
# then make sure this plugin is not in the degrade list,
# Should never occur.
if resource in obj.degrade_list:
obj.degrade_list.remove(resource)
elif nObject.severity is NOTIF_WARNING:
if obj.degrade_list__warning:
if nObject.plugin in obj.degrade_list__warning:
if resource not in obj.degrade_list:
# handle dynamic filesystems going missing over a swact
# or unmount and being reported as a transient error by
# the df plugin. Don't add it to the failed list if the
# mountpoint is gone.
add = True
if nObject.plugin == PLUGIN__DF:
path = _df_instance_to_path(resource)
add = os.path.ismount(path)
if add is True:
collectd.info("%s %s added to degrade list" %
(PLUGIN, resource))
obj.degrade_list.append(resource)
else:
# If severity is warning and no warnings cause degrade
# then make sure this plugin is not in the degrade list.
# Should never occur..
if resource in obj.degrade_list:
obj.degrade_list.remove(resource)
else:
collectd.info("%s unsupported severity %d" %
(PLUGIN, nObject.severity))
return 0
# running counter of notifications.
obj.msg_throttle += 1
# Support for Dynamic File Systems
# --------------------------------
# Some active controller mounted filesystems can become
# unmounted under the watch of collectd. This can occur
# as a result of a Swact. If an 'degrade' is raised at the
# time an fs disappears then that state can become stuck
# active until the next Swact. This call handles this case.
#
# Audit file system presence every time we get the
# notification for the root file system.
# Depending on the root filesystem always being there.
if nObject.plugin == 'df' \
and nObject.plugin_instance == 'root' \
and len(obj.degrade_list):
_clear_degrade_for_missing_filesystems()
# If degrade list is empty then a clear state is sent to maintenance.
# If degrade list is NOT empty then an assert state is sent to maintenance
# For logging and to ease debug the code below will create a list of
# degraded resource instances to be included in the message to maintenance
# for mtcAgent to optionally log it.
resources = ""
if obj.degrade_list:
# loop over the list,
# limit the degraded resource list being sent to mtce to 5
for r in obj.degrade_list[0:1:5]:
resources += r + ','
resources = resources[:-1]
state = "assert"
else:
state = "clear"
# Message throttling ....
# Avoid sending the same last state message for up to ONE_EVERY count.
# Just reduce load on mtcAgent
if obj.last_state == state and obj.msg_throttle < ONE_EVERY:
return 0
# if the degrade state has changed then log it and proceed
if obj.last_state != state:
if obj.last_state != "undef":
collectd.info("%s degrade %s %s" %
(PLUGIN,
state,
obj.degrade_list))
# Save state for next time
obj.last_state = state
# Clear the message throttle counter
obj.msg_throttle = 0
# Send the degrade state ; assert or clear message to mtcAgent.
# If we get a send failure then log it and set the addr to None
# so it forces us to refresh the controller address on the next
# notification
try:
mtce_socket = socket.socket(obj.protocol, socket.SOCK_DGRAM)
if mtce_socket:
if obj.addr is None:
_get_active_controller_ip()
if obj.addr is None:
return 0
# Create the Maintenance message.
message = "{\"service\":\"collectd_notifier\","
message += "\"hostname\":\"" + nObject.host + "\","
message += "\"degrade\":\"" + state + "\","
message += "\"resource\":\"" + resources + "\"}"
collectd.debug("%s: %s" % (PLUGIN, message))
mtce_socket.settimeout(1.0)
mtce_socket.sendto(message, (obj.addr, obj.port))
mtce_socket.close()
else:
collectd.error("%s %s failed to open socket (%s)" %
(PLUGIN, resource, obj.addr))
except socket.error as e:
if e.args[0] == socket.EAI_ADDRFAMILY:
# Handle IPV4 to IPV6 switchover:
obj.protocol = socket.AF_INET6
collectd.info("%s %s ipv6 addressing (%s)" %
(PLUGIN, resource, obj.addr))
else:
collectd.error("%s %s socket error (%s) ; %s" %
(PLUGIN, resource, obj.addr, str(e)))
# try self correction
obj.addr = None
obj.protocol = socket.AF_INET
return 0
collectd.register_config(config_func)
collectd.register_init(init_func)
collectd.register_notification(notifier_func)

View File

@ -1,13 +0,0 @@
<Plugin "threshold">
<Plugin "ntpq">
<Type "absolute">
Instance "reachable"
Persist true
PersistOK true
WarningMin 1
FailureMin 0
Hits 2
Invert false
</Type>
</Plugin>
</Plugin>

View File

@ -1,857 +0,0 @@
############################################################################
# Copyright (c) 2018-2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#############################################################################
#
# This is the NTP connectivity monitor plugin for collectd.
#
# This plugin uses the industry standard ntpq exec to query NTP attributes.
#
# This plugin executes 'ntpq -np' to determined which provisioned servers
# are reachable. The ntpq output includes Tally Code. The tally Code is
# represented by the first character in each server's line item.
#
# The only ntpq output looked at by this plugin are the Tally Codes and
# associated IPs.
#
# Tally Code Summary:
#
# A server is considered reachable only when the Tally Code is a * or a +.
# A server is considered unreachable if the Tally Code is a ' ' (space)
# A server with a '*' Tally Code is the 'selected' server.
#
# Here is an example of the ntpq command output
#
# remote refid st t when poll reach delay offset jitter
# =============================================================================
# +192.168.204.104 206.108.0.133 2 u 203 1024 377 0.226 -3.443 1.137
# +97.107.129.217 200.98.196.212 2 u 904 1024 377 21.677 5.577 0.624
# 192.95.27.155 24.150.203.150 2 u 226 1024 377 15.867 0.381 1.124
# -97.107.129.217 200.98.196.212 2 u 904 1024 377 21.677 5.577 0.624
# *182.95.27.155 24.150.203.150 2 u 226 1024 377 15.867 0.381 1.124
#
# The local controller node is not to be considered a reachable server and is
# never alarmed if it is not reachable.
#
# Normal running modes with no alarms include
#
# 0 - All NTP servers are reachable and one is selected
# 1 - No NTP servers are provisioned
#
# Failure modes that warrant alarms include
#
# 2 - None of the NTP servers are reachable - major alarm
# 3 - Some NTP servers reachable and one is selected - server IP minor alarm
# 4 - Some NTP servers reachable but none is selected - major alarm
#
# None of these failures result in a host being degraded.
#
# This script will only be run on the controller nodes.
#
# This script logs to daemon.log with the 'collectd' process label
#
###############################################################################
import os
import subprocess
import uuid
import collectd
from fm_api import constants as fm_constants
from fm_api import fm_api
import tsconfig.tsconfig as tsc
import socket
api = fm_api.FaultAPIsV2()
PLUGIN = 'NTP query plugin'
PLUGIN_INTERVAL = 600 # audit interval in secs
PLUGIN_CONF = '/etc/ntp.conf'
PLUGIN_EXEC = '/usr/sbin/ntpq'
PLUGIN_EXEC_OPTIONS = '-pn'
PLUGIN_ALARMID = "100.114"
# define a class here that will persist over read calls
class NtpqObject:
# static variables set in init
hostname = '' # the name of this host
base_eid = '' # the eid for the major alarm
init_complete = False # set to true once config is complete
alarm_raised = False # True when the major alarm is asserted
server_list_conf = [] # list of servers in the /etc/ntp.conf file
server_list_ntpq = [] # list of servers in the ntpq -np output
unreachable_servers = [] # list of unreachable servers
reachable_servers = [] # list of reachable servers
selected_server = 'None' # the ip address of the selected server
selected_server_save = 'None' # the last selected server ; note change
peer_selected = False # true when peer is selected
# variables used to raise alarms to FM
suppression = True
service_affecting = False
name = "NTP"
alarm_type = fm_constants.FM_ALARM_TYPE_1
cause = fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN
repair = "Monitor and if condition persists, "
repair += "contact next level of support."
# This plugin's class object - persists over read calls
obj = NtpqObject()
###############################################################################
#
# Name : _add_unreachable_server
#
# Description: This private interface is used to add an ip to the
# unreachable servers list.
#
# Parameters : IP address
#
###############################################################################
def _add_unreachable_server(ip=None):
"""Add ip to unreachable_servers list"""
if ip:
if ip not in obj.unreachable_servers:
collectd.debug("%s adding '%s' to unreachable servers list: %s" %
(PLUGIN, ip, obj.unreachable_servers))
obj.unreachable_servers.append(ip)
collectd.info("%s added '%s' to unreachable servers list: %s" %
(PLUGIN, ip, obj.unreachable_servers))
else:
collectd.debug("%s ip '%s' already in unreachable_servers list" %
(PLUGIN, ip))
else:
collectd.error("%s _add_unreachable_server called with no IP" % PLUGIN)
###############################################################################
#
# Name : _raise_alarm
#
# Description: This private interface is used to raise NTP alarms.
#
# Parameters : Optional IP address
#
# If called with no or empty IP then a generic major alarm is raised.
# If called with an IP then an IP specific minor alarm is raised.
#
# Returns : Error indication.
#
# True : is error. FM call failed to set the
# alarm and needs to be retried.
#
# False: no error. FM call succeeds
#
###############################################################################
def _raise_alarm(ip=None):
"""Assert an NTP alarm"""
if not ip:
# Don't re-raise the alarm if its already raised
if obj.alarm_raised is True:
return False
if obj.peer_selected:
reason = "NTP cannot reach external time source; " \
"syncing with peer controller only"
fm_severity = fm_constants.FM_ALARM_SEVERITY_MINOR
else:
reason = "NTP configuration does not contain any valid "
reason += "or reachable NTP servers."
fm_severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
eid = obj.base_eid
else:
reason = "NTP address "
reason += ip
reason += " is not a valid or a reachable NTP server."
eid = obj.base_eid + '=' + ip
fm_severity = fm_constants.FM_ALARM_SEVERITY_MINOR
try:
fault = fm_api.Fault(
alarm_id=PLUGIN_ALARMID,
alarm_state=fm_constants.FM_ALARM_STATE_SET,
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
entity_instance_id=eid,
severity=fm_severity,
reason_text=reason,
alarm_type=obj.alarm_type,
probable_cause=obj.cause,
proposed_repair_action=obj.repair,
service_affecting=obj.service_affecting,
suppression=obj.suppression)
alarm_uuid = api.set_fault(fault)
if _is_uuid_like(alarm_uuid) is False:
# Don't _add_unreachable_server list if the fm call failed.
# That way it will be retried at a later time.
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
(PLUGIN, PLUGIN_ALARMID, eid, alarm_uuid))
return 0
else:
collectd.info("%s raised alarm %s:%s" %
(PLUGIN,
PLUGIN_ALARMID,
eid))
if ip:
_add_unreachable_server(ip)
else:
obj.alarm_raised = True
except Exception as ex:
collectd.error("%s 'set_fault' exception ; %s:%s:%s ; %s" %
(PLUGIN,
PLUGIN_ALARMID,
eid,
fm_severity,
ex))
return 0
###############################################################################
#
# Name : _clear_base_alarm
#
# Description: This private interface is used to clear the NTP base alarm.
#
# Parameters : None
#
# Returns : Error indication.
#
# False: is error. FM call failed to clear the
# alarm and needs to be retried.
#
# True : no error. FM call succeeds
#
###############################################################################
def _clear_base_alarm():
"""Clear the NTP base alarm"""
try:
if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is False:
collectd.info("%s %s:%s alarm already cleared" %
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
else:
collectd.info("%s %s:%s alarm cleared" %
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
obj.alarm_raised = False
return True
except Exception as ex:
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
(PLUGIN,
PLUGIN_ALARMID,
obj.base_eid,
ex))
return False
###############################################################################
#
# Name : _remove_ip_from_unreachable_list
#
# Description: This private interface is used to remove the specified IP
# from the unreachable servers list and clear its alarm if raised.
#
# Parameters : IP address
#
###############################################################################
def _remove_ip_from_unreachable_list(ip):
"""Remove an IP address from the unreachable list and clear its NTP alarms"""
# remove from unreachable list if its there
if ip and ip in obj.unreachable_servers:
eid = obj.base_eid + '=' + ip
collectd.debug("%s trying to clear alarm %s" % (PLUGIN, eid))
try:
# clear the alarm if its asserted
if api.clear_fault(PLUGIN_ALARMID, eid) is True:
collectd.info("%s %s:%s alarm cleared " %
(PLUGIN, PLUGIN_ALARMID, eid))
else:
# alarm does not exist
collectd.info("%s %s:%s alarm clear" %
(PLUGIN, PLUGIN_ALARMID, eid))
obj.unreachable_servers.remove(ip)
except Exception as ex:
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
(PLUGIN,
PLUGIN_ALARMID,
eid,
ex))
###############################################################################
#
# Name : _add_ip_to_ntpq_server_list
#
# Description: This private interface is used to create a list if servers
# found in the ntpq output.
#
# This list is used to detect and handle servers that might come
# and go between readings that might otherwise result in stuck
# alarms.
#
# Parameters : IP address
#
# Returns : nothing
#
###############################################################################
def _add_ip_to_ntpq_server_list(ip):
"""Add this IP to the list of servers that ntpq reports against"""
if ip not in obj.server_list_ntpq:
obj.server_list_ntpq.append(ip)
##############################################################################
#
# Name : _cleanup_stale_servers
#
# Description: This private interface walks through each server tracking list
# removing any that it finds that are not in the ntpq server list.
#
# Alarms are cleared as needed to avoid stale alarms
#
# Parameters : None
#
# Returns : nothing
#
###############################################################################
def _cleanup_stale_servers():
"""Cleanup the server IP tracking lists"""
collectd.debug("%s CLEANUP REACHABLE: %s %s" %
(PLUGIN, obj.server_list_ntpq, obj.reachable_servers))
for ip in obj.reachable_servers:
if ip not in obj.server_list_ntpq:
collectd.info("%s removing missing '%s' server from reachable "
"server list" % (PLUGIN, ip))
obj.reachable_servers.remove(ip)
collectd.debug("%s CLEANUP UNREACHABLE: %s %s" %
(PLUGIN, obj.server_list_ntpq, obj.unreachable_servers))
for ip in obj.unreachable_servers:
if ip not in obj.server_list_ntpq:
collectd.info("%s removing missing '%s' server from unreachable "
"server list" % (PLUGIN, ip))
_remove_ip_from_unreachable_list(ip)
###############################################################################
#
# Name : _get_ntp_servers
#
# Description: This private interface reads the list of ntp servers from the
# ntp.conf file
#
# Parameters : None
#
# Returns : nothing
#
# Updates : server_list_conf
#
###############################################################################
def _get_ntp_servers():
"""Read the provisioned servers from the ntp conf file"""
with open(PLUGIN_CONF, 'r') as infile:
for line in infile:
if line.startswith('server '):
ip = line.rstrip().split(' ')[1]
if ip not in obj.server_list_conf:
obj.server_list_conf.append(ip)
if len(obj.server_list_conf):
collectd.info("%s server list: %s" %
(PLUGIN, obj.server_list_conf))
else:
##################################################################
#
# Handle NTP_NOT_PROVISIONED (1) case
#
# There is no alarming for this case.
# Clear any that may have been raised.
#
##################################################################
collectd.info("%s NTP Service Disabled ; no provisioned servers" %
PLUGIN)
# clear all alarms
if obj.alarm_raised:
_clear_base_alarm()
if obj.unreachable_servers:
for ip in obj.unreachable_servers:
_remove_ip_from_unreachable_list(ip)
###############################################################################
#
# Name : is_controller
#
# Description: This private interface returns a True if the specified ip is
# associated with a local controller.
#
# Parameters : IP address
#
# Returns : True or False
#
###############################################################################
def _is_controller(ip):
"""Returns True if this IP corresponds to one of the controllers"""
collectd.debug("%s check if '%s' is a controller ip" % (PLUGIN, ip))
with open('/etc/hosts', 'r') as infile:
for line in infile:
# skip over file comment lines prefixed with '#'
if line[0] == '#':
continue
# line format is 'ip' 'name' ....
split_line = line.split()
if len(split_line) >= 2:
# look for exact match ip that contains controller in its name
if split_line[0] == ip and 'controller' in line:
collectd.debug("%s %s is a controller" % (PLUGIN, ip))
return True
return False
###############################################################################
#
# Name : _is_ip_address
#
# Description: This private interface returns:
# AF_INET if val is ipv4
# AF_INET6 if val is ipv6
# False if val is not a valid ip address
#
# Parameters : val is a uuid string
#
# Returns : socket.AF_INET for ipv4, socket.AF_INET6 for ipv6
# or False for invalid
#
###############################################################################
def _is_ip_address(val):
try:
socket.inet_pton(socket.AF_INET, val)
return socket.AF_INET
except socket.error:
pass
try:
socket.inet_pton(socket.AF_INET6, val)
return socket.AF_INET6
except socket.error:
pass
return False
###############################################################################
#
# Name : is_uuid_like
#
# Description: This private interface returns a True if the specified value is
# a valid uuid.
#
# Parameters : val is a uuid string
#
# Returns : True or False
#
###############################################################################
def _is_uuid_like(val):
"""Returns validation of a value as a UUID"""
try:
return str(uuid.UUID(val)) == val
except (TypeError, ValueError, AttributeError):
return False
###############################################################################
#
# Name : config_func
#
# Description: The configuration interface this plugin publishes to collectd.
#
# collectd calls this interface one time on its process startup
# when it loads this plugin.
#
# There is currently no specific configuration options to parse
# for this plugin.
#
# Parameters : collectd config object
#
# Returns : zero
#
###############################################################################
def config_func(config):
"""Configure the plugin"""
collectd.debug('%s config function' % PLUGIN)
return 0
###############################################################################
#
# Name : init_func
#
# Description: The initialization interface this plugin publishes to collectd.
#
# collectd calls this interface one time on its process startup
# when it loads this plugin.
#
# 1. get hostname
# 2. build base entity id for the NTP alarm
# 3. query FM for existing NTP alarms
# - base alarm is maintained and state loaded if it exists
# - ntp ip minor alalrms are cleared on init. This is done to
# auto correct ntp server IP address changes over process
# restart ; avoid stuck alarms.
#
# Parameters : None
#
# Returns : zero
#
###############################################################################
def init_func():
# ntp query is for controllers only
if tsc.nodetype != 'controller':
return 0
# do nothing till config is complete.
# init_func will be called again by read_func once config is complete.
if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is False:
return 0
# get current hostname
obj.hostname = os.uname()[1]
if not obj.hostname:
collectd.error("%s failed to get hostname" % PLUGIN)
return 1
obj.base_eid = 'host=' + obj.hostname + '.ntp'
collectd.debug("%s on %s with entity id '%s'" %
(PLUGIN, obj.hostname, obj.base_eid))
# get a list of provisioned ntp servers
_get_ntp_servers()
# manage existing alarms.
try:
alarms = api.get_faults_by_id(PLUGIN_ALARMID)
except Exception as ex:
collectd.error("%s 'get_faults_by_id' exception ; %s ; %s" %
(PLUGIN, PLUGIN_ALARMID, ex))
return 0
if alarms:
for alarm in alarms:
eid = alarm.entity_instance_id
# ignore alarms not for this host
if obj.hostname not in eid:
continue
# maintain only the base alarm.
if alarm.entity_instance_id != obj.base_eid:
# clear any ntp server specific alarms over process restart
# this is done to avoid the potential for stuck ntp ip alarms
collectd.info("%s clearing found startup alarm '%s'" %
(PLUGIN, alarm.entity_instance_id))
try:
api.clear_fault(PLUGIN_ALARMID, alarm.entity_instance_id)
except Exception as ex:
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
(PLUGIN,
PLUGIN_ALARMID,
alarm.entity_instance_id,
ex))
return 0
else:
obj.alarm_raised = True
collectd.info("%s found alarm %s:%s" %
(PLUGIN,
PLUGIN_ALARMID,
alarm.entity_instance_id))
# ensure the base alarm is cleared if there are no
# provisioned servers.
if not obj.server_list_conf:
_clear_base_alarm()
else:
collectd.info("%s no major startup alarms found" % PLUGIN)
obj.init_complete = True
return 0
###############################################################################
#
# Name : read_func
#
# Description: The sample read interface this plugin publishes to collectd.
#
# collectd calls this interface every audit interval.
#
# Runs ntpq -np to query NTP status and manages alarms based on
# the result.
#
# See file header (above) for more specific behavioral detail.
#
# Should only run on a controller ; both
#
# Parameters : None
#
# Returns : zero or non-zero on significant error
#
###############################################################################
def read_func():
# ntp query is for controllers only
if tsc.nodetype != 'controller':
return 0
if obj.init_complete is False:
if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is True:
collectd.info("%s re-running init" % PLUGIN)
init_func()
return 0
# get a list if provisioned ntp servers
_get_ntp_servers()
# nothing to do while there are no provisioned NTP servers
if len(obj.server_list_conf) == 0:
return 0
# Do NTP Query
data = subprocess.check_output([PLUGIN_EXEC, PLUGIN_EXEC_OPTIONS])
# Keep this FIT test code but make it commented out for security
#
# if os.path.exists('/var/run/fit/ntpq_data'):
# data = ''
# collectd.info("%s using ntpq FIT data" % PLUGIN)
# with open('/var/run/fit/ntpq_data', 'r') as infile:
# for line in infile:
# data += line
if not data:
collectd.error("%s no data from query" % PLUGIN)
return 0
# Get the ntp query output into a list of lines
obj.ntpq = data.split('\n')
# keep track of changes ; only log on changes
reachable_list_changed = False
unreachable_list_changed = False
# Manage the selected server name
#
# save the old value so we can print a log if the selected server changes
if obj.selected_server:
obj.selected_server_save = obj.selected_server
# always assume no selected server ; till its learned
obj.selected_server = ''
# start with a fresh empty list for this new run to populate
obj.server_list_ntpq = []
# Loop through the ntpq output.
# Ignore the first 2 lines ; just header data.
for i in range(2, len(obj.ntpq)):
# ignore empty or lines that are not long enough
if len(obj.ntpq[i]) < 10:
continue
# log the ntpq output ; minus the 2 lines of header
collectd.info("NTPQ: %s" % obj.ntpq[i])
# Unreachable servers are ones whose line start with a space
ip = ''
if obj.ntpq[i][0] == ' ':
# get the ip address
# example format of line:['', '132.163.4.102', '', '', '.INIT.',
# get ip from index [1] of the list
unreachable = obj.ntpq[i].split(' ')[1]
if unreachable:
# check to see if its a controller ip
# we skip over controller ips
if _is_controller(unreachable) is False:
_add_ip_to_ntpq_server_list(unreachable)
if unreachable not in obj.unreachable_servers:
if _raise_alarm(unreachable) is False:
unreachable_list_changed = True
# if the FM call to raise the alarm worked then
# add this ip to the unreachable list if its not
# already in it
_add_unreachable_server(unreachable)
# Reachable servers are ones whose line start with a '+'
elif obj.ntpq[i][0] == '+':
# remove the '+' and get the ip
ip = obj.ntpq[i].split(' ')[0][1:]
elif obj.ntpq[i][0] == '*':
# remove the '*' and get the ip
cols = obj.ntpq[i].split(' ')
ip = cols[0][1:]
if ip:
ip_family = _is_ip_address(ip)
obj.peer_selected = _is_controller(ip)
if ip != obj.selected_server and obj.alarm_raised is True:
# a new ntp server is selected, old alarm may not be
# valid
_clear_base_alarm()
obj.alarm_raised = False
if obj.peer_selected is False:
if obj.selected_server:
# done update the selected server if more selections
# are found. go with the first one found.
collectd.info("%s additional selected server found"
" '%s'; current selection is '%s'" %
(PLUGIN, ip, obj.selected_server))
else:
# update the selected server list
obj.selected_server = ip
collectd.debug("%s selected server is '%s'" %
(PLUGIN, obj.selected_server))
else:
# refer to peer
refid = ''
for i in range(1, len(cols)):
if cols[i] != '':
refid = cols[i]
break
if refid not in ('', '127.0.0.1') and \
not _is_controller(refid) and \
socket.AF_INET == ip_family:
# ipv4, peer controller refer to a time source is not
# itself or a controller (this node)
obj.selected_server = ip
collectd.debug("peer controller has a reliable "
"source")
# anything else is unreachable
else:
unreachable = obj.ntpq[i][1:].split(' ')[0]
if _is_controller(unreachable) is False:
_add_ip_to_ntpq_server_list(unreachable)
if unreachable not in obj.unreachable_servers:
if _raise_alarm(unreachable) is False:
unreachable_list_changed = True
# if the FM call to raise the alarm worked then
# add this ip to the unreachable list if its not
# already in it
_add_unreachable_server(unreachable)
if ip:
# if the ip is valid then manage it
if _is_controller(ip) is False:
_add_ip_to_ntpq_server_list(ip)
# add the ip to the reachable servers list
# if its not already there
if ip not in obj.reachable_servers:
obj.reachable_servers.append(ip)
reachable_list_changed = True
# make sure this IP is no longer in the unreachable
# list and that alarms for it are cleared
_remove_ip_from_unreachable_list(ip)
_cleanup_stale_servers()
if obj.selected_server:
if obj.selected_server != obj.selected_server_save:
collectd.info("%s selected server changed from '%s' to '%s'" %
(PLUGIN,
obj.selected_server_save,
obj.selected_server))
obj.selected_server_save = obj.selected_server
if obj.alarm_raised is True:
_clear_base_alarm()
elif obj.alarm_raised is False:
if obj.peer_selected:
collectd.info("%s peer is selected" % PLUGIN)
else:
collectd.error("%s no selected server" % PLUGIN)
if _raise_alarm() is False:
obj.selected_server_save = 'None'
# only log and act on changes
if reachable_list_changed is True:
if obj.reachable_servers:
collectd.info("%s reachable servers: %s" %
(PLUGIN, obj.reachable_servers))
if obj.alarm_raised is True:
if obj.selected_server and obj.reachable_servers:
_clear_base_alarm()
else:
collectd.error("%s no reachable servers" % PLUGIN)
_raise_alarm()
# only log changes
if unreachable_list_changed is True:
if obj.unreachable_servers:
collectd.info("%s unreachable servers: %s" %
(PLUGIN, obj.unreachable_servers))
else:
collectd.info("%s all servers are reachable" % PLUGIN)
# The sample published to the database is simply the number
# of reachable servers if one is selected
if not obj.selected_server:
sample = 0
else:
sample = len(obj.reachable_servers)
# Dispatch usage value to collectd
val = collectd.Values(host=obj.hostname)
val.plugin = 'ntpq'
val.type = 'absolute'
val.type_instance = 'reachable'
val.dispatch(values=[sample])
return 0
# register the config, init and read functions
collectd.register_config(config_func)
collectd.register_init(init_func)
collectd.register_read(read_func, interval=PLUGIN_INTERVAL)

View File

@ -1,311 +0,0 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
############################################################################
#
# This file contains common collectd plugin constructs and utilities
#
############################################################################
import collectd
import json
import uuid
import httplib2
import socket
import os
from oslo_concurrency import processutils
from fm_api import constants as fm_constants
import tsconfig.tsconfig as tsc
# http request constants
PLUGIN_TIMEOUT = 10
PLUGIN_HTTP_HEADERS = {'Accept': 'application/json', 'Connection': 'close'}
MIN_AUDITS_B4_FIRST_QUERY = 2
class PluginObject(object):
def __init__(self, plugin, url):
# static variables set in init_func
self.plugin = plugin # the name of this plugin
self.hostname = '' # the name of this host
self.port = 0 # the port number for this plugin
self.base_eid = '' # the base entity id host=<hostname>
self.controller = False # set true if node is controller
# dynamic gate variables
self.virtual = False # set to True if host is virtual
self.config_complete = False # set to True once config is complete
self.config_done = False # set true if config_func completed ok
self.init_done = False # set true if init_func completed ok
self.fm_connectivity = False # set true when fm connectivity ok
self.alarm_type = fm_constants.FM_ALARM_TYPE_7 # OPERATIONAL
self.cause = fm_constants.ALARM_PROBABLE_CAUSE_50 # THRESHOLD CROSS
self.suppression = True
self.service_affecting = False
# dynamic variables set in read_func
self.usage = float(0) # last usage value recorded as float
self.value = float(0) # last read value
self.audits = 0 # number of audit since init
self.enabled = False # tracks a plugin's enabled state
self.alarmed = False # tracks the current alarmed state
self.mode = '' # mode specific to plugin
# http and json specific variables
self.url = url # target url
self.jresp = None # used to store the json response
self.resp = ''
self.objects = [] # list of plugin specific objects
self.cmd = '' # plugin specific command string
# Log controls
self.config_logged = False # used to log once the plugin config
self.error_logged = False # used to prevent log flooding
self.log_throttle_count = 0 # used to count throttle logs
self.INIT_LOG_THROTTLE = 10 # the init log throttle threshold
self.http_retry_count = 0 # track http error cases
self.HTTP_RETRY_THROTTLE = 6 # http retry threshold
self.phase = 0 # tracks current phase; init, sampling
collectd.debug("%s Common PluginObject constructor [%s]" %
(plugin, url))
###########################################################################
#
# Name : init_ready
#
# Description: Test for init ready condition
#
# Parameters : plugin name
#
# Returns : False if initial config complete is not done
# True if initial config complete is done
#
###########################################################################
def init_ready(self):
"""Test for system init ready state"""
if os.path.exists(tsc.INITIAL_CONFIG_COMPLETE_FLAG) is False:
self.log_throttle_count += 1
if self.log_throttle_count > self.INIT_LOG_THROTTLE:
collectd.info("%s initialization needs retry" % self.plugin)
self.log_throttle_count = 0
return False
else:
self.log_throttle_count = 0
return True
###########################################################################
#
# Name : gethostname
#
# Description: load the hostname
#
# Parameters : plugin name
#
# Returns : Success - hostname
# Failure - None
#
# Updates : obj.hostname
#
###########################################################################
def gethostname(self):
"""Fetch the hostname"""
# get current hostname
try:
hostname = socket.gethostname()
if hostname:
return hostname
except:
collectd.error("%s failed to get hostname" % self.plugin)
return None
###########################################################################
#
# Name : is_virtual
#
# Description: Execute facter command with output filter on 'is_virtual'
#
# Parameters : None
#
# Returns : True if current host is virtual.
# False if current host is NOT virtual
#
###########################################################################
def is_virtual(self):
"""Check for virtual host"""
try:
cmd = '/usr/bin/facter is_virtual'
res, err = processutils.execute(cmd, shell=True)
if err:
return False
elif res:
# remove the trailing '\n' with strip()
if res.strip() == 'true':
collectd.info("%s %s is virtual" %
(self.plugin, self.hostname))
return True
except Exception as ex:
collectd.info("%s failed to execute '/usr/bin/facter' ; %s" %
self.plugin, ex)
return False
###########################################################################
#
# Name : check_for_fit
#
# Description: load FIT data if it is present
#
# Fit Format : unit data -> 0 89
# - instance 0 value 89
#
# Parameters : plugin name
# object to update with fit
# name in fit file
# unit
#
# Returns : Did a failure occur ?
# False = no
# True = yes
#
# Updates : self.usage with FIT value if FIT conditions are present
# and apply
#
###########################################################################
def check_for_fit(self, name, unit):
"""Load FIT data into usage if it exists"""
fit_file = '/var/run/fit/' + name + '_data'
if os.path.exists(fit_file):
valid = False
with open(fit_file, 'r') as infile:
for line in infile:
try:
inst, val = line.split(' ')
if int(unit) == int(inst):
self.usage = float(val)
valid = True
except:
try:
val = float(line)
self.usage = float(val)
valid = True
except:
collectd.error("%s bad FIT data; ignoring" %
self.plugin)
if valid is True:
collectd.info("%s %.2f usage (unit %d) (FIT)" %
(self.plugin, unit, self.usage))
return False
return True
###########################################################################
#
# Name : make_http_request
#
# Description: Issue an http request to the specified URL.
# Load and return the response
# Handling execution errors
#
# Parameters : self as current context.
#
# Optional:
#
# url - override the default self url with http address to
# issue the get request to.
# to - timeout override
# hdrs - override use of the default header list
#
# Updates : self.jresp with the json string response from the request.
#
# Returns : Error indication (True/False)
# True on success
# False on error
#
###########################################################################
def make_http_request(self, url=None, to=None, hdrs=None):
"""Make a blocking HTTP Request and return result"""
try:
# handle timeout override
if to is None:
to = PLUGIN_TIMEOUT
# handle url override
if url is None:
url = self.url
# handle header override
if hdrs is None:
hdrs = PLUGIN_HTTP_HEADERS
http = httplib2.Http(timeout=to)
resp = http.request(url, headers=hdrs)
except Exception as ex:
collectd.info("%s http request exception ; %s" %
(self.plugin, str(ex)))
return False
try:
collectd.debug("%s Resp: %s" %
(self.plugin, resp[1]))
self.resp = resp[1]
self.jresp = json.loads(resp[1])
except Exception as ex:
collectd.error("%s http response parse exception ; %s" %
(self.plugin, str(ex)))
if len(self.resp):
collectd.error("%s response: %s" %
(self.plugin, self.resp))
return False
return True
def is_uuid_like(val):
"""Returns validation of a value as a UUID
For our purposes, a UUID is a canonical form string:
aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa
"""
try:
return str(uuid.UUID(val)) == val
except (TypeError, ValueError, AttributeError):
return False
def get_severity_str(severity):
"""get string that represents the specified severity"""
if severity == fm_constants.FM_ALARM_SEVERITY_CLEAR:
return "clear"
elif severity == fm_constants.FM_ALARM_SEVERITY_CRITICAL:
return "critical"
elif severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
return "major"
elif severity == fm_constants.FM_ALARM_SEVERITY_MINOR:
return "minor"
else:
return "unknown"

View File

@ -1,15 +0,0 @@
<Plugin "threshold">
<Plugin "ptp">
<Type "time_offset">
Instance "nsec"
Persist true
PersistOK true
WarningMax 1000
FailureMax 1000000
WarningMin -1000
FailureMin -1000000
Hits 2
Invert false
</Type>
</Plugin>
</Plugin>

View File

@ -1,988 +0,0 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
############################################################################
#
# This file is the collectd 'Precision Time Protocol' Service Monitor.
#
# Algorithm:
#
# while not config ; check again
# while not init ; retry
# if startup
# clear all ptp alarms
# if ptp enabled
# if ptp not running
# raise 'process' alarm
# else
# read grand master and current skew
# if not controller and is grand master
# raise 'no lock' alarm
# if skew is out-of-tolerance
# raise out-of-tolerance alarm
#
#
# manage alarm state throught
# retry on alarm state change failures
# only make raise/clear alarm calls on severity state changes
#
############################################################################
import os
import collectd
import subprocess
import tsconfig.tsconfig as tsc
import plugin_common as pc
from fm_api import constants as fm_constants
from fm_api import fm_api
debug = False
# Fault manager API Object
api = fm_api.FaultAPIsV2()
PLUGIN_ALARMID = "100.119"
# name of the plugin - all logs produced by this plugin are prefixed with this
PLUGIN = 'ptp plugin'
# Service name
PTP = 'Precision Time Protocol (PTP)'
# Interface Monitoring Interval in seconds
PLUGIN_AUDIT_INTERVAL = 300
# Sample Data 'type' and 'instance' database field values.
PLUGIN_TYPE = 'time_offset'
PLUGIN_TYPE_INSTANCE = 'nsec'
# Primary PTP service name
PLUGIN_SERVICE = 'ptp4l.service'
# Plugin configuration file
#
# This plugin looks for the timestamping mode in the ptp4l config file.
# time_stamping hardware
#
PLUGIN_CONF_FILE = '/etc/ptp4l.conf'
PLUGIN_CONF_TIMESTAMPING = 'time_stamping'
# Tools used by plugin
SYSTEMCTL = '/usr/bin/systemctl'
ETHTOOL = '/usr/sbin/ethtool'
PLUGIN_STATUS_QUERY_EXEC = '/usr/sbin/pmc'
# Query PTP service administrative (enabled/disabled) state
#
# > systemctl is-enabled ptp4l
# enabled
# > systemctl disable ptp4l
# > systemctl is-enabled ptp4l
# disabled
SYSTEMCTL_IS_ENABLED_OPTION = 'is-enabled'
SYSTEMCTL_IS_ENABLED_RESPONSE = 'enabled'
SYSTEMCTL_IS_DISABLED_RESPONSE = 'disabled'
# Query PTP service activity (active=running / inactive) state
#
# > systemctl is-active ptp4l
# active
# > systemctl stop ptp4l
# > systemctl is-active ptp4l
# inactive
SYSTEMCTL_IS_ACTIVE_OPTION = 'is-active'
SYSTEMCTL_IS_ACTIVE_RESPONSE = 'active'
SYSTEMCTL_IS_INACTIVE_RESPONSE = 'inactive'
# Alarm Cause codes ; used to specify what alarm EID to assert or clear.
ALARM_CAUSE__NONE = 0
ALARM_CAUSE__PROCESS = 1
ALARM_CAUSE__OOT = 2
ALARM_CAUSE__NO_LOCK = 3
ALARM_CAUSE__UNSUPPORTED_HW = 4
ALARM_CAUSE__UNSUPPORTED_SW = 5
ALARM_CAUSE__UNSUPPORTED_LEGACY = 6
# Run Phase
RUN_PHASE__INIT = 0
RUN_PHASE__DISABLED = 1
RUN_PHASE__NOT_RUNNING = 2
RUN_PHASE__SAMPLING = 3
# Clock Sync Out-Of-Tolerance thresholds
OOT_MINOR_THRESHOLD = int(1000)
OOT_MAJOR_THRESHOLD = int(1000000)
# Instantiate the common plugin control object
obj = pc.PluginObject(PLUGIN, "")
# Create an alarm management class
class PTP_alarm_object:
def __init__(self, interface=None):
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
self.cause = fm_constants.ALARM_PROBABLE_CAUSE_50
self.alarm = ALARM_CAUSE__NONE
self.interface = interface
self.raised = False
self.reason = ''
self.repair = ''
self.eid = ''
# Plugin specific control class and object.
class PTP_ctrl_object:
def __init__(self):
self.gm_log_throttle = 0
self.nolock_alarm_object = None
self.process_alarm_object = None
self.oot_alarm_object = None
ctrl = PTP_ctrl_object()
# Alarm object list, one entry for each interface and alarm cause case
ALARM_OBJ_LIST = []
# UT verification utilities
def assert_all_alarms():
for o in ALARM_OBJ_LIST:
raise_alarm(o.alarm, o.interface, 0)
def clear_all_alarms():
for o in ALARM_OBJ_LIST:
if clear_alarm(o.eid) is True:
msg = 'cleared'
else:
msg = 'clear failed'
collectd.info("%s %s:%s alarm %s" %
(PLUGIN, PLUGIN_ALARMID, o.eid, msg))
def print_alarm_object(o):
collectd.info("%s Interface:%s Cause: %d Severity:%s Raised:%d" %
(PLUGIN,
o.interface,
o.alarm,
o.severity,
o.raised))
collectd.info("%s Entity:[%s]" % (PLUGIN, o.eid))
collectd.info("%s Reason:[%s]" % (PLUGIN, o.reason))
collectd.info("%s Repair:[%s]" % (PLUGIN, o.repair))
def print_alarm_objects():
for o in ALARM_OBJ_LIST:
print_alarm_object(o)
# Interface:Supported Modes dictionary. key:value
#
# interface:modes
#
interfaces = {}
#####################################################################
#
# Name : _get_supported_modes
#
# Description: Invoke ethtool -T <interface> and load its
# time stamping capabilities.
#
# hardware, software or legacy.
#
# Parameters : The name of the physical interface to query the
# supported modes for.
#
# Interface Capabilities Output Examples:
#
# vbox prints this as it only supports software timestamping
# software-transmit (SOF_TIMESTAMPING_TX_SOFTWARE)
# software-receive (SOF_TIMESTAMPING_RX_SOFTWARE)
#
# full support output looks like this
# hardware-transmit (SOF_TIMESTAMPING_TX_HARDWARE)
# software-transmit (SOF_TIMESTAMPING_TX_SOFTWARE)
# hardware-receive (SOF_TIMESTAMPING_RX_HARDWARE)
# software-receive (SOF_TIMESTAMPING_RX_SOFTWARE)
# hardware-raw-clock (SOF_TIMESTAMPING_RAW_HARDWARE)
#
# Only legacy support output looks like this
# hardware-raw-clock (SOF_TIMESTAMPING_RAW_HARDWARE)
#
# Provisionable PTP Modes are
# hardware -> hardware-transmit/receive
# software -> software-transmit/receive
# legacy -> hardware-raw-clock
TIMESTAMP_MODE__HW = 'hardware'
TIMESTAMP_MODE__SW = 'software'
TIMESTAMP_MODE__LEGACY = 'legacy'
#
# Returns : a list of supported modes
#
#####################################################################
def _get_supported_modes(interface):
"""Get the supported modes for the specified interface"""
hw_tx = hw_rx = sw_tx = sw_rx = False
modes = []
data = subprocess.check_output([ETHTOOL, '-T', interface]).split('\n')
if data:
collectd.debug("%s 'ethtool -T %s' output:%s\n" %
(PLUGIN, interface, data))
check_for_modes = False
for i in range(0, len(data)):
collectd.debug("%s data[%d]:%s\n" % (PLUGIN, i, data[i]))
if 'Capabilities' in data[i]:
# start of capabilities list
check_for_modes = True
elif check_for_modes is True:
if 'PTP Hardware Clock' in data[i]:
# no more modes after this label
break
elif 'hardware-transmit' in data[i]:
hw_tx = True
elif 'hardware-receive' in data[i]:
hw_rx = True
elif 'software-transmit' in data[i]:
sw_tx = True
elif 'software-receive' in data[i]:
sw_rx = True
elif 'hardware-raw-clock' in data[i]:
modes.append(TIMESTAMP_MODE__LEGACY)
if sw_tx is True and sw_rx is True:
modes.append(TIMESTAMP_MODE__SW)
if hw_tx is True and hw_rx is True:
modes.append(TIMESTAMP_MODE__HW)
if modes:
collectd.debug("%s %s interface PTP capabilities: %s" %
(PLUGIN, interface, modes))
else:
collectd.info("%s no capabilities advertised for %s" %
(PLUGIN, interface))
else:
collectd.info("%s no ethtool output for %s" % (PLUGIN, interface))
return None
return modes
#####################################################################
#
# Name : get_alarm_object
#
# Description: Search the alarm list based on the alarm cause
# code and interface.
#
# Returns : Alarm object if found ; otherwise None
#
#####################################################################
def get_alarm_object(alarm, interface=None):
"""Alarm object lookup"""
for o in ALARM_OBJ_LIST:
# print_alarm_object(o)
if interface is None:
if o.alarm == alarm:
return o
else:
if o.interface == interface:
if o.alarm == alarm:
return o
collectd.info("%s alarm object lookup failed ; %d:%s" %
(PLUGIN, alarm, interface))
return None
#####################################################################
#
# Name : clear_alarm
#
# Description: Clear the ptp alarm with the specified entity ID.
#
# Returns : True if operation succeeded
# False if there was an error exception.
#
# Assumptions: Caller can decide to retry based on return status.
#
#####################################################################
def clear_alarm(eid):
"""Clear the ptp alarm with the specified entity ID"""
try:
if api.clear_fault(PLUGIN_ALARMID, eid) is True:
collectd.info("%s %s:%s alarm cleared" %
(PLUGIN, PLUGIN_ALARMID, eid))
else:
collectd.info("%s %s:%s alarm already cleared" %
(PLUGIN, PLUGIN_ALARMID, eid))
return True
except Exception as ex:
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
(PLUGIN, PLUGIN_ALARMID, eid, ex))
return False
#####################################################################
#
# Name : raise_alarm
#
# Description: Assert a specific PTP alarm based on the alarm cause
# code and interface.
#
# Handle special case cause codes
# Handle failure to raise fault
#
# Assumptions: Short circuited Success return if the alarm is
# already known to be asserted.
#
# Returns : False on Failure
# True on Success
#
#####################################################################
def raise_alarm(alarm_cause, interface=None, data=0):
"""Assert a cause based PTP alarm"""
collectd.debug("%s Raising Alarm %d" % (PLUGIN, alarm_cause))
alarm = get_alarm_object(alarm_cause, interface)
if alarm is None:
# log created for None case in the get_alarm_object util
return True
# copy the reason as it might be updated for the OOT,
# most typical, case.
reason = alarm.reason
# Handle some special cases
#
if alarm_cause == ALARM_CAUSE__OOT:
# If this is an out of tolerance alarm then add the
# out of tolerance reading to the reason string before
# asserting the alarm.
#
# Keep the alarm updated with the latest sample reading
# and severity even if its already asserted.
if abs(float(data)) > 100000000000:
reason += 'more than 100 seconds'
elif abs(float(data)) > 10000000000:
reason += 'more than 10 seconds'
elif abs(float(data)) > 1000000000:
reason += 'more than 1 second'
elif abs(float(data)) > 1000000:
reason += str(abs(int(data)) / 1000000)
reason += ' millisecs'
elif abs(float(data)) > 1000:
reason += str(abs(int(data)) / 1000)
reason += ' microsecs'
else:
reason += str(float(data))
reason += ' ' + PLUGIN_TYPE_INSTANCE
elif alarm.raised is True:
# If alarm already raised then exit.
#
# All other alarms are a Major so there is no need to
# track a change in severity and update accordingly.
return True
elif alarm_cause == ALARM_CAUSE__PROCESS:
reason = 'Provisioned ' + PTP + ' \'' + obj.mode
reason += '\' time stamping mode seems to be unsupported by this host'
try:
fault = fm_api.Fault(
alarm_id=PLUGIN_ALARMID,
alarm_state=fm_constants.FM_ALARM_STATE_SET,
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
entity_instance_id=alarm.eid,
severity=alarm.severity,
reason_text=reason,
alarm_type=obj.alarm_type,
probable_cause=alarm.cause,
proposed_repair_action=alarm.repair,
service_affecting=False, # obj.service_affecting,
suppression=True) # obj.suppression)
alarm_uuid = api.set_fault(fault)
if pc.is_uuid_like(alarm_uuid) is False:
# Don't _add_unreachable_server list if the fm call failed.
# That way it will be retried at a later time.
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
(PLUGIN, PLUGIN_ALARMID, alarm.eid, alarm_uuid))
return False
else:
collectd.info("%s %s:%s:%s alarm raised" %
(PLUGIN, PLUGIN_ALARMID, alarm.eid, alarm.severity))
alarm.raised = True
return True
except Exception as ex:
collectd.error("%s 'set_fault' exception ; %s:%s:%s ; %s" %
(PLUGIN,
PLUGIN_ALARMID,
alarm.eid,
alarm.severity,
ex))
return False
#####################################################################
#
# Name : create_interface_alarm_objects
#
# Description: Create alarm objects for specified interface
#
#####################################################################
def create_interface_alarm_objects(interface=None):
"""Create alarm objects"""
collectd.debug("%s Alarm Object Create: Interface:%s " %
(PLUGIN, interface))
if interface is None:
o = PTP_alarm_object()
o.alarm = ALARM_CAUSE__PROCESS
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
o.reason = obj.hostname + ' does not support the provisioned '
o.reason += PTP + ' mode '
o.repair = 'Check host hardware reference manual '
o.repair += 'to verify that the selected PTP mode is supported'
o.eid = obj.base_eid + '.ptp'
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN # 'unknown'
ALARM_OBJ_LIST.append(o)
ctrl.process_alarm_object = o
o = PTP_alarm_object()
o.alarm = ALARM_CAUSE__OOT
o.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
o.reason = obj.hostname + ' '
o.reason += PTP + " clocking is out of tolerance by "
o.repair = "Check quality of the clocking network"
o.eid = obj.base_eid + '.ptp=out-of-tolerance'
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_50 # THRESHOLD CROSS
ALARM_OBJ_LIST.append(o)
ctrl.oot_alarm_object = o
o = PTP_alarm_object()
# Only applies to storage and worker nodes
o.alarm = ALARM_CAUSE__NO_LOCK
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
o.reason = obj.hostname
o.reason += ' is not locked to remote PTP Grand Master'
o.repair = 'Check network'
o.eid = obj.base_eid + '.ptp=no-lock'
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_51 # timing-problem
ALARM_OBJ_LIST.append(o)
ctrl.nolock_alarm_object = o
else:
o = PTP_alarm_object(interface)
o.alarm = ALARM_CAUSE__UNSUPPORTED_HW
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
o.reason = obj.hostname + " '" + interface + "' does not support "
o.reason += PTP + ' Hardware timestamping'
o.repair = 'Check host hardware reference manual to verify PTP '
o.repair += 'Hardware timestamping is supported by this interface'
o.eid = obj.base_eid + '.ptp=' + interface
o.eid += '.unsupported=hardware-timestamping'
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_7 # 'config error'
ALARM_OBJ_LIST.append(o)
o = PTP_alarm_object(interface)
o.alarm = ALARM_CAUSE__UNSUPPORTED_SW
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
o.reason = obj.hostname + " '" + interface + "' does not support "
o.reason += PTP + ' Software timestamping'
o.repair = 'Check host hardware reference manual to verify PTP '
o.repair += 'Software timestamping is supported by this interface'
o.eid = obj.base_eid + '.ptp=' + interface
o.eid += '.unsupported=software-timestamping'
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_7 # 'config error'
ALARM_OBJ_LIST.append(o)
o = PTP_alarm_object(interface)
o.alarm = ALARM_CAUSE__UNSUPPORTED_LEGACY
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
o.reason = obj.hostname + " '" + interface + "' does not support "
o.reason += PTP + " Legacy timestamping"
o.repair = 'Check host hardware reference manual to verify PTP '
o.repair += 'Legacy or Raw Clock is supported by this host'
o.eid = obj.base_eid + '.ptp=' + interface
o.eid += '.unsupported=legacy-timestamping'
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_7 # 'config error'
ALARM_OBJ_LIST.append(o)
#####################################################################
#
# Name : read_timestamp_mode
#
# Description: Refresh the timestamping mode if it changes
#
#####################################################################
def read_timestamp_mode():
"""Load timestamping mode"""
if os.path.exists(PLUGIN_CONF_FILE):
current_mode = obj.mode
with open(PLUGIN_CONF_FILE, 'r') as infile:
for line in infile:
if PLUGIN_CONF_TIMESTAMPING in line:
obj.mode = line.split()[1].strip('\n')
break
if obj.mode:
if obj.mode != current_mode:
collectd.info("%s Timestamping Mode: %s" %
(PLUGIN, obj.mode))
else:
collectd.error("%s failed to get Timestamping Mode" % PLUGIN)
else:
collectd.error("%s failed to load ptp4l configuration" % PLUGIN)
obj.mode = None
#####################################################################
#
# Name : init_func
#
# Description: The collectd initialization entrypoint for
# this plugin
#
# Assumptions: called only once
#
# Algorithm : check for no
#
#
#####################################################################
def init_func():
if obj.init_ready() is False:
return False
obj.hostname = obj.gethostname()
obj.base_eid = 'host=' + obj.hostname
# Create the interface independent alarm objects.
create_interface_alarm_objects()
# load monitored interfaces and supported modes
if os.path.exists(PLUGIN_CONF_FILE):
with open(PLUGIN_CONF_FILE, 'r') as infile:
for line in infile:
# The PTP interfaces used are specified in the ptp4l.conf
# file as [interface]. There may be more than one.
# Presently there is no need to track the function of the
# interface ; namely mgmnt or oam.
if line[0] == '[':
interface = line.split(']')[0].split('[')[1]
if interface and interface != 'global':
interfaces[interface] = _get_supported_modes(interface)
create_interface_alarm_objects(interface)
if PLUGIN_CONF_TIMESTAMPING in line:
obj.mode = line.split()[1].strip('\n')
if obj.mode:
collectd.info("%s Timestamping Mode: %s" %
(PLUGIN, obj.mode))
else:
collectd.error("%s failed to get Timestamping Mode" % PLUGIN)
else:
collectd.error("%s failed to load ptp4l configuration" % PLUGIN)
obj.mode = None
for key, value in interfaces.items():
collectd.info("%s interface %s supports timestamping modes: %s" %
(PLUGIN, key, value))
# remove '# to dump alarm object data
# print_alarm_objects()
if tsc.nodetype == 'controller':
obj.controller = True
obj.virtual = obj.is_virtual()
obj.init_done = True
obj.log_throttle_count = 0
collectd.info("%s initialization complete" % PLUGIN)
#####################################################################
#
# Name : read_func
#
# Description: The collectd audit entrypoint for PTP Monitoring
#
# Assumptions: collectd calls init_func one time.
#
#
# retry init if needed
# retry fm connect if needed
# check service enabled state
# check service running state
# error -> alarm host=<hostname>.ptp
# check
#
#####################################################################
def read_func():
if obj.virtual is True:
return 0
# check and run init until it reports init_done True
if obj.init_done is False:
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
collectd.info("%s re-running init" % PLUGIN)
obj.log_throttle_count += 1
init_func()
return 0
if obj.fm_connectivity is False:
try:
# query FM for existing alarms.
alarms = api.get_faults_by_id(PLUGIN_ALARMID)
except Exception as ex:
collectd.error("%s 'get_faults_by_id' exception ;"
" %s ; %s" %
(PLUGIN, PLUGIN_ALARMID, ex))
return 0
if alarms:
for alarm in alarms:
collectd.debug("%s found startup alarm '%s'" %
(PLUGIN, alarm.entity_instance_id))
eid = alarm.entity_instance_id
if eid is None:
collectd.error("%s startup alarm query error ; no eid" %
PLUGIN)
continue
# get the hostname host=<hostname>.stuff
# split over base eid and then
# compare that to this plugin's base eid
# ignore alarms not for this host
if eid.split('.')[0] != obj.base_eid:
continue
else:
# load the state of the specific alarm
instance = eid.split('.')[1].split('=')
if instance[0] == 'ptp':
# clear all ptp alarms on process startup
# just in case interface names have changed
# since the alarm was raised.
if clear_alarm(eid) is False:
# if we can't clear the alarm now then error out.
collectd.error("%s failed to clear startup "
"alarm %s:%s" %
(PLUGIN, PLUGIN_ALARMID, eid))
# try again next time around
return 0
else:
collectd.info("%s cleared startup alarm '%s'" %
(PLUGIN, alarm.entity_instance_id))
else:
if clear_alarm(eid) is False:
collectd.error("%s failed to clear invalid PTP "
"alarm %s:%s" %
(PLUGIN, PLUGIN_ALARMID,
alarm.entity_instance_id))
return 0
else:
collectd.info("%s cleared found invalid startup"
" alarm %s:%s" %
(PLUGIN,
PLUGIN_ALARMID,
alarm.entity_instance_id))
else:
collectd.info("%s no startup alarms found" % PLUGIN)
obj.config_complete = True
obj.fm_connectivity = True
# assert_all_alarms()
# This plugin supports PTP in-service state change by checking
# service state on every audit ; every 5 minutes.
data = subprocess.check_output([SYSTEMCTL,
SYSTEMCTL_IS_ENABLED_OPTION,
PLUGIN_SERVICE])
collectd.debug("%s PTP admin state:%s" % (PLUGIN, data.rstrip()))
if data.rstrip() == SYSTEMCTL_IS_DISABLED_RESPONSE:
# Manage execution phase
if obj.phase != RUN_PHASE__DISABLED:
obj.phase = RUN_PHASE__DISABLED
obj.log_throttle_count = 0
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
collectd.info("%s PTP Service Disabled" % PLUGIN)
obj.log_throttle_count += 1
for o in ALARM_OBJ_LIST:
if o.raised is True:
if clear_alarm(o.eid) is True:
o.raised = False
else:
collectd.error("%s %s:%s clear alarm failed "
"; will retry" %
(PLUGIN, PLUGIN_ALARMID, o.eid))
return 0
data = subprocess.check_output([SYSTEMCTL,
SYSTEMCTL_IS_ACTIVE_OPTION,
PLUGIN_SERVICE])
if data.rstrip() == SYSTEMCTL_IS_INACTIVE_RESPONSE:
# Manage execution phase
if obj.phase != RUN_PHASE__NOT_RUNNING:
obj.phase = RUN_PHASE__NOT_RUNNING
obj.log_throttle_count = 0
if ctrl.process_alarm_object.alarm == ALARM_CAUSE__PROCESS:
if ctrl.process_alarm_object.raised is False:
collectd.error("%s PTP service enabled but not running" %
PLUGIN)
if raise_alarm(ALARM_CAUSE__PROCESS) is True:
ctrl.process_alarm_object.raised = True
# clear all other alarms if the 'process' alarm is raised
elif ctrl.process_alarm_object.raised is True:
if clear_alarm(ctrl.process_alarm_object.eid) is True:
msg = 'cleared'
ctrl.process_alarm_object.raised = False
else:
msg = 'failed to clear'
collectd.info("%s %s %s:%s" %
(PLUGIN, msg, PLUGIN_ALARMID,
ctrl.process_alarm_object.eid))
return 0
# Handle clearing the 'process' alarm if it is asserted and
# the process is now running
if ctrl.process_alarm_object.raised is True:
if clear_alarm(ctrl.process_alarm_object.eid) is True:
ctrl.process_alarm_object.raised = False
collectd.info("%s PTP service enabled and running" % PLUGIN)
# Auto refresh the timestamping mode in case collectd runs
# before the ptp manifest or the mode changes on the fly by
# an in-service manifest.
# Every 4 audits.
obj.audits += 1
if not obj.audits % 4:
read_timestamp_mode()
# Manage execution phase
if obj.phase != RUN_PHASE__SAMPLING:
obj.phase = RUN_PHASE__SAMPLING
obj.log_throttle_count = 0
# Let's read the port status information
#
# sudo /usr/sbin/pmc -u -b 0 'GET PORT_DATA_SET'
#
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC,
'-u', '-b', '0', 'GET PORT_DATA_SET'])
port_locked = False
obj.resp = data.split('\n')
for line in obj.resp:
if 'portState' in line:
collectd.debug("%s portState : %s" % (PLUGIN, line.split()[1]))
port_state = line.split()[1]
if port_state == 'SLAVE':
port_locked = True
# Let's read the clock info, Grand Master sig and skew
#
# sudo /usr/sbin/pmc -u -b 0 'GET TIME_STATUS_NP'
#
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC,
'-u', '-b', '0', 'GET TIME_STATUS_NP'])
got_master_offset = False
master_offset = 0
my_identity = ''
gm_identity = ''
gm_present = False
obj.resp = data.split('\n')
for line in obj.resp:
if 'RESPONSE MANAGEMENT TIME_STATUS_NP' in line:
collectd.debug("%s key : %s" %
(PLUGIN, line.split()[0].split('-')[0]))
my_identity = line.split()[0].split('-')[0]
if 'master_offset' in line:
collectd.debug("%s Offset : %s" % (PLUGIN, line.split()[1]))
master_offset = float(line.split()[1])
got_master_offset = True
if 'gmPresent' in line:
collectd.debug("%s gmPresent : %s" % (PLUGIN, line.split()[1]))
gm_present = line.split()[1]
if 'gmIdentity' in line:
collectd.debug("%s gmIdentity: %s" % (PLUGIN, line.split()[1]))
gm_identity = line.split()[1]
# Handle case where this host is the Grand Master
# ... or assumes it is.
if my_identity == gm_identity or port_locked is False:
if obj.controller is False:
# Compute and storage nodes should not be the Grand Master
if ctrl.nolock_alarm_object.raised is False:
if raise_alarm(ALARM_CAUSE__NO_LOCK, None, 0) is True:
ctrl.nolock_alarm_object.raised = True
# produce a throttled log while this host is not locked to the GM
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
collectd.info("%s %s not locked to remote Grand Master "
"(%s)" % (PLUGIN, obj.hostname, gm_identity))
obj.log_throttle_count += 1
# No samples for storage and compute nodes that are not
# locked to a Grand Master
return 0
else:
# Controllers can be a Grand Master ; throttle the log
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
collectd.info("%s %s is Grand Master:%s" %
(PLUGIN, obj.hostname, gm_identity))
obj.log_throttle_count += 1
# The Grand Master will always be 0 so there is no point
# creating a sample for it.
return 0
# Handle clearing nolock alarm for computes and storage nodes
elif obj.controller is False:
if ctrl.nolock_alarm_object.raised is True:
if clear_alarm(ctrl.nolock_alarm_object.eid) is True:
ctrl.nolock_alarm_object.raised = False
# Keep this FIT test code but make it commented out for security
# if os.path.exists('/var/run/fit/ptp_data'):
# master_offset = 0
# with open('/var/run/fit/ptp_data', 'r') as infile:
# for line in infile:
# master_offset = int(line)
# got_master_offset = True
# collectd.info("%s using ptp FIT data skew:%d" %
# (PLUGIN, master_offset))
# break
# Send sample and Manage the Out-Of-Tolerance alarm
if got_master_offset is True:
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
collectd.info("%s %s is collecting samples [%5d] "
"with Grand Master %s" %
(PLUGIN, obj.hostname,
float(master_offset), gm_identity))
obj.log_throttle_count += 1
# setup the sample structure and dispatch
val = collectd.Values(host=obj.hostname)
val.type = PLUGIN_TYPE
val.type_instance = PLUGIN_TYPE_INSTANCE
val.plugin = 'ptp'
val.dispatch(values=[float(master_offset)])
# Manage the sample OOT alarm severity
severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
if abs(master_offset) > OOT_MAJOR_THRESHOLD:
severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
elif abs(master_offset) > OOT_MINOR_THRESHOLD:
severity = fm_constants.FM_ALARM_SEVERITY_MINOR
# Handle clearing of Out-Of-Tolerance alarm
if severity == fm_constants.FM_ALARM_SEVERITY_CLEAR:
if ctrl.oot_alarm_object.raised is True:
if clear_alarm(ctrl.oot_alarm_object.eid) is True:
ctrl.oot_alarm_object.severity = \
fm_constants.FM_ALARM_SEVERITY_CLEAR
ctrl.oot_alarm_object.raised = False
else:
# Special Case:
# -------------
# Don't raise minor alarm when in software timestamping mode.
# Too much skew in software or legacy mode ; alarm would bounce.
# TODO: Consider making ptp a real time process
if severity == fm_constants.FM_ALARM_SEVERITY_MINOR \
and obj.mode != 'hardware':
return 0
# Handle debounce of the OOT alarm.
# Debounce by 1 for the same severity level.
if ctrl.oot_alarm_object.severity != severity:
ctrl.oot_alarm_object.severity = severity
# This will keep refreshing the alarm text with the current
# skew value while still debounce on state transitions.
#
# Precision ... (PTP) clocking is out of tolerance by 1004 nsec
#
elif severity == fm_constants.FM_ALARM_SEVERITY_MINOR:
# Handle raising the Minor OOT Alarm.
rc = raise_alarm(ALARM_CAUSE__OOT, None, master_offset)
if rc is True:
ctrl.oot_alarm_object.raised = True
elif severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
# Handle raising the Major OOT Alarm.
rc = raise_alarm(ALARM_CAUSE__OOT, None, master_offset)
if rc is True:
ctrl.oot_alarm_object.raised = True
# Record the value that is alarmable
if severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
collectd.info("%s Grand Master ID: %s ; "
"HOST ID: %s ; "
"GM Present:%s ; "
"Skew:%5d" % (PLUGIN,
gm_identity,
my_identity,
gm_present,
master_offset))
else:
collectd.info("%s No Clock Sync" % PLUGIN)
return 0
collectd.register_init(init_func)
collectd.register_read(read_func, interval=PLUGIN_AUDIT_INTERVAL)

View File

@ -1,21 +0,0 @@
LoadPlugin python
<Plugin python>
ModulePath "/opt/collectd/extensions/python"
Import "cpu"
<Module "cpu">
Path "/proc/cpuinfo"
</Module>
Import "memory"
<Module "memory">
Path "/proc/meminfo"
</Module>
Import "ntpq"
Import "ptp"
Import "interface"
<Module "interface">
Port 2122
</Module>
Import "remotels"
LogTraces = true
Encoding "utf-8"
</Plugin>

View File

@ -1,13 +0,0 @@
<Plugin "threshold">
<Plugin "remotels">
<Type "absolute">
Instance "reachable"
Persist true
PersistOK true
WarningMin 1
FailureMin 0
Hits 2
Invert false
</Type>
</Plugin>
</Plugin>

View File

@ -1,350 +0,0 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
############################################################################
#
# This is the Remote Logging Server plugin for collectd.
#
# The Remote Logging Server is enabled if /etc/syslog-ng/syslog-ng.conf
# contains '@include remotelogging.conf'
#
# There is no asynchronous notification of remote logging server
# configuration enable/disable state changes. Therefore, each audit
# interval needs to check whether its enabled or not.
#
# every audit interval ...
#
# read_func:
# check enabled:
# if disabled and alarmed:
# clear alarm
# if enabled:
# get ip and port
# query status
# if connected and alarmed:
# clear alarm
# if not connected and not alarmed:
# raise alarm
#
# system remotelogging-modify --ip_address <ip address>
# --transport tcp
# --enabled True
#
############################################################################
import os
import collectd
import tsconfig.tsconfig as tsc
import plugin_common as pc
from fm_api import constants as fm_constants
from oslo_concurrency import processutils
from fm_api import fm_api
# Fault manager API Object
api = fm_api.FaultAPIsV2()
# name of the plugin
PLUGIN_NAME = 'remotels'
# all logs produced by this plugin are prefixed with this
PLUGIN = 'remote logging server'
# Interface Monitoring Interval in seconds
PLUGIN_AUDIT_INTERVAL = 60
# Sample Data 'type' and 'instance' database field values.
PLUGIN_TYPE = 'absolute'
PLUGIN_TYPE_INSTANCE = 'reachable'
# Remote Logging Connectivity Alarm ID
PLUGIN_ALARMID = '100.118'
# The file where this plugin learns if remote logging is enabled
SYSLOG_CONF_FILE = '/etc/syslog-ng/syslog-ng.conf'
# Plugin Control Object
obj = pc.PluginObject(PLUGIN, "")
# Raise Remote Logging Server Alarm
def raise_alarm():
"""Raise Remote Logging Server Alarm"""
repair = 'Ensure Remote Log Server IP is reachable from '
repair += 'Controller through OAM interface; otherwise '
repair += 'contact next level of support.'
reason = 'Controller cannot establish connection with '
reason += 'remote logging server.'
try:
fault = fm_api.Fault(
alarm_id=PLUGIN_ALARMID,
alarm_state=fm_constants.FM_ALARM_STATE_SET,
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
entity_instance_id=obj.base_eid,
severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
reason_text=reason,
alarm_type=fm_constants.FM_ALARM_TYPE_1,
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_6,
proposed_repair_action=repair,
service_affecting=False,
suppression=False)
alarm_uuid = api.set_fault(fault)
if pc.is_uuid_like(alarm_uuid) is False:
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
(PLUGIN, PLUGIN_ALARMID,
obj.base_eid, alarm_uuid))
else:
collectd.info("%s %s:%s alarm raised" %
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
obj.alarmed = True
except Exception as ex:
collectd.error("%s 'set_fault' exception ; %s:%s ; %s " %
(PLUGIN, PLUGIN_ALARMID, obj.base_eid, ex))
# Clear remote logging server alarm
def clear_alarm():
"""Clear remote logging server alarm"""
try:
if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is True:
collectd.info("%s %s:%s alarm cleared" %
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
else:
collectd.info("%s %s:%s alarm clear" %
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
obj.alarmed = False
return True
except Exception as ex:
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
(PLUGIN, PLUGIN_ALARMID, obj.base_eid, ex))
return False
# The config function - called once on collectd process startup
def config_func(config):
"""Configure the plugin"""
# all configuration is learned during normal monitoring
obj.config_done = True
return 0
# The init function - called once on collectd process startup
def init_func():
"""Init the plugin"""
# remote logging server monitoring is for controllers only
if tsc.nodetype != 'controller':
return 0
if obj.init_done is False:
if obj.init_ready() is False:
return False
obj.hostname = obj.gethostname()
obj.base_eid = 'host=' + obj.hostname
obj.init_done = True
collectd.info("%s initialization complete" % PLUGIN)
return True
# The sample read function - called on every audit interval
def read_func():
"""Remote logging server connectivity plugin read function"""
# remote logging server monitoring is for controllers only
if tsc.nodetype != 'controller':
return 0
if obj.init_done is False:
init_func()
return 0
# get current state
current_enabled_state = obj.enabled
# check to see if remote logging is enabled
obj.enabled = False # assume disabled
if os.path.exists(SYSLOG_CONF_FILE) is True:
with open(SYSLOG_CONF_FILE, 'r') as infile:
for line in infile:
if line.startswith('@include '):
service = line.rstrip().split(' ')[1]
if service == '"remotelogging.conf"':
obj.enabled = True
break
if current_enabled_state == obj.enabled:
logit = False
else:
if obj.enabled is False:
collectd.info("%s is disabled" % PLUGIN)
else:
collectd.info("%s is enabled" % PLUGIN)
logit = True
# Handle startup case by clearing existing alarm if its raised.
# Its runtime cheaper and simpler to issue a blind clear than query.
if obj.audits == 0:
if clear_alarm() is False:
# if clear fails then retry next time
return 0
if obj.enabled is False:
collectd.info("%s is disabled" % PLUGIN)
obj.audits = 1
if obj.enabled is False:
if obj.alarmed is True:
clear_alarm()
return 0
# If we get here then the server is enabled ...
# Need to query it
# Get the ip and port from line that looks like this
#
# tag proto address port
# ----------------------------- --- -------------- ---
# destination remote_log_server {tcp("128.224.186.65" port(514));};
#
address = protocol = port = ''
with open(SYSLOG_CONF_FILE, 'r') as infile:
for line in infile:
if line.startswith('destination remote_log_server'):
try:
if len(line.split('{')) > 1:
protocol = line.split('{')[1][0:3]
address = line.split('{')[1].split('"')[1]
port = line.split('{')[1].split('(')[2].split(')')[0]
if not protocol or not address or not port:
collectd.error("%s remote log server credentials "
"parse error ; (%s:%s:%s)" %
(PLUGIN, protocol, address, port))
return 1
else:
# line parsed ; move on ...
break
else:
collectd.error("%s remote log server line parse error"
" ; %s" % (PLUGIN, line))
except Exception as ex:
collectd.error("%s remote log server credentials "
"parse exception ; (%s)" % (PLUGIN, line))
if ':' in address:
ipv = 6
protocol += 6
# Monitoring of IPV6 is not currently supported
return 0
else:
ipv = 4
# This plugin detects server connectivity through its socket status.
# To get that construct the remote logging server IP string.
# The files being looked at(/proc/net/tcp(udp)) use hex values,
# so convert the string caps hex value with reverse ordering of
# the "ipv4" values
index = 3
addr = [0, 0, 0, 0]
# swap order
for tup in address.split('.'):
addr[index] = int(tup)
index -= 1
# build the CAPs HEX address
UPPER_HEX_IP = ''
for tup in addr:
val = hex(int(tup)).split('x')[-1].upper()
if len(val) == 1:
UPPER_HEX_IP += '0'
UPPER_HEX_IP += val
UPPER_HEX_IP += ':'
tmp = hex(int(port)).split('x')[-1].upper()
for i in range(4 - len(tmp)):
UPPER_HEX_IP += '0'
UPPER_HEX_IP += tmp
# log example tcp:ipv4:128.224.186.65:514 : IP:41BAE080:0202
collectd.debug("%s %s:ipv%d:%s:%s : IP:%s" %
(PLUGIN, protocol, ipv, address, port, UPPER_HEX_IP))
cmd = "cat /proc/net/" + protocol
cmd += " | awk '{print $3 \" \" $4}' | grep " + UPPER_HEX_IP
cmd += " | awk '{print $2}'"
res, err = processutils.execute(cmd, shell=True)
if err:
collectd.error("%s processutils error:%s" % (PLUGIN, err))
# cmd example:
# cat /proc/net/tcp | awk '{print $3 " " $4}'
# | grep 41BAE080:0202
# | awk '{print $2}'
collectd.debug("%s Cmd:%s" % (PLUGIN, cmd))
return 0
if res and res.rstrip() == '01':
# connected state reads 01
# Example log: Res:[01]
# clear alarm if
# - currently alarmed and
# - debounced by 1 ; need 2 connected readings in a row
if obj.alarmed is True:
clear_alarm()
# Only log on state change
if obj.usage != 1:
logit = True
obj.usage = 1
conn = ''
else:
# res typically reads 02 when notr connected
# Example log: Res:[02]
collectd.debug("%s Res:[%s] " % (PLUGIN, res.rstrip()))
# raise alarm if
# - not already alarmed
# - debounced by 1 ; need 2 failures in a row
if obj.alarmed is False and obj.usage == 0:
raise_alarm()
# only log on state change
if obj.usage == 1 or obj.audits == 1:
logit = True
obj.usage = 0
conn = 'not '
if logit is True:
collectd.info("%s is %sconnected [%s ipv%d %s:%s]" %
(PLUGIN, conn, protocol, ipv, address, port))
obj.audits += 1
# Dispatch usage value to collectd
val = collectd.Values(host=obj.hostname)
val.plugin = PLUGIN_NAME
val.type = PLUGIN_TYPE
val.type_instance = PLUGIN_TYPE_INSTANCE
val.dispatch(values=[obj.usage])
return 0
# register the config, init and read functions
collectd.register_config(config_func)
collectd.register_init(init_func)
collectd.register_read(read_func, interval=PLUGIN_AUDIT_INTERVAL)

View File

@ -1,10 +0,0 @@
Metadata-Version: 1.1
Name: influxdb-extensions
Version: 1.0
Summary: influxdb-extensions
Home-page:
Author: Windriver
Author-email: info@windriver.com
License: ASL 2.0
Description: Titanium Cloud influxdb extensions.
Platform: UNKNOWN

View File

@ -1,7 +0,0 @@
SRC_DIR="$PKG_BASE"
COPY_LIST="$PKG_BASE/src/LICENSE \
$PKG_BASE/src/influxdb.conf.pmon \
$PKG_BASE/src/influxdb.service"
TIS_PATCH_VER=2

View File

@ -1,46 +0,0 @@
Summary: Titanuim Server influxdb Extensions Package
Name: influxdb-extensions
Version: 1.0
Release: 0%{?_tis_dist}.%{tis_patch_ver}
License: ASL 2.0
Group: base
Packager: Wind River <info@windriver.com>
URL: unknown
# create the files tarball
Source0: %{name}-%{version}.tar.gz
source1: influxdb.service
Source2: influxdb.conf.pmon
Requires: systemd
Requires: influxdb
Requires: /bin/systemctl
%description
Titanium Cloud influxdb extensions
%define debug_package %{nil}
%define local_unit_dir %{_sysconfdir}/systemd/system
%prep
%setup
%build
%install
install -m 755 -d %{buildroot}%{_sysconfdir}
install -m 755 -d %{buildroot}%{_sysconfdir}/influxdb
install -m 755 -d %{buildroot}%{local_unit_dir}
install -m 644 %{SOURCE1} %{buildroot}%{local_unit_dir}
install -m 600 %{SOURCE2} %{buildroot}%{_sysconfdir}/influxdb
%clean
rm -rf $RPM_BUILD_ROOT
%files
%defattr(-,root,root,-)
%config(noreplace) %{local_unit_dir}/influxdb.service
%{_sysconfdir}/influxdb/*

View File

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,322 +0,0 @@
### Welcome to the InfluxDB configuration file.
# Once every 24 hours InfluxDB will report anonymous data to m.influxdb.com
# The data includes raft id (random 8 bytes), os, arch, version, and metadata.
# We don't track ip addresses of servers reporting. This is only used
# to track the number of instances running and the versions, which
# is very helpful for us.
# Change this option to true to disable reporting.
reporting-disabled = false
###
### Enterprise registration control
###
[registration]
# enabled = true
# url = "https://enterprise.influxdata.com" # The Enterprise server URL
# token = "" # Registration token for Enterprise server
###
### [meta]
###
### Controls the parameters for the Raft consensus group that stores metadata
### about the InfluxDB cluster.
###
[meta]
dir = "/var/lib/influxdb/meta"
hostname = "localhost"
bind-address = ":8088"
retention-autocreate = true
election-timeout = "1s"
heartbeat-timeout = "1s"
leader-lease-timeout = "500ms"
commit-timeout = "50ms"
cluster-tracing = false
# If enabled, when a Raft cluster loses a peer due to a `DROP SERVER` command,
# the leader will automatically ask a non-raft peer node to promote to a raft
# peer. This only happens if there is a non-raft peer node available to promote.
# This setting only affects the local node, so to ensure if operates correctly, be sure to set
# it in the config of every node.
raft-promotion-enabled = true
###
### [data]
###
### Controls where the actual shard data for InfluxDB lives and how it is
### flushed from the WAL. "dir" may need to be changed to a suitable place
### for your system, but the WAL settings are an advanced configuration. The
### defaults should work for most systems.
###
[data]
dir = "/var/lib/influxdb/data"
# Controls the engine type for new shards. Options are b1, bz1, or tsm1.
# b1 is the 0.9.2 storage engine, bz1 is the 0.9.3 and 0.9.4 engine.
# tsm1 is the 0.9.5 engine and is currenly EXPERIMENTAL. Until 0.9.5 is
# actually released data written into a tsm1 engine may be need to be wiped
# between upgrades.
# engine ="bz1"
# The following WAL settings are for the b1 storage engine used in 0.9.2. They won't
# apply to any new shards created after upgrading to a version > 0.9.3.
max-wal-size = 104857600 # Maximum size the WAL can reach before a flush. Defaults to 100MB.
wal-flush-interval = "10m" # Maximum time data can sit in WAL before a flush.
wal-partition-flush-delay = "2s" # The delay time between each WAL partition being flushed.
# These are the WAL settings for the storage engine >= 0.9.3
wal-dir = "/var/lib/influxdb/wal"
wal-enable-logging = true
# When a series in the WAL in-memory cache reaches this size in bytes it is marked as ready to
# flush to the index
# wal-ready-series-size = 25600
# Flush and compact a partition once this ratio of series are over the ready size
# wal-compaction-threshold = 0.6
# Force a flush and compaction if any series in a partition gets above this size in bytes
# wal-max-series-size = 2097152
# Force a flush of all series and full compaction if there have been no writes in this
# amount of time. This is useful for ensuring that shards that are cold for writes don't
# keep a bunch of data cached in memory and in the WAL.
# wal-flush-cold-interval = "10m"
# Force a partition to flush its largest series if it reaches this approximate size in
# bytes. Remember there are 5 partitions so you'll need at least 5x this amount of memory.
# The more memory you have, the bigger this can be.
# wal-partition-size-threshold = 20971520
# Whether queries should be logged before execution. Very useful for troubleshooting, but will
# log any sensitive data contained within a query.
# query-log-enabled = true
###
### [hinted-handoff]
###
### Controls the hinted handoff feature, which allows nodes to temporarily
### store queued data when one node of a cluster is down for a short period
### of time.
###
[hinted-handoff]
enabled = true
dir = "/var/lib/influxdb/hh"
max-size = 1073741824
max-age = "168h"
retry-rate-limit = 0
# Hinted handoff will start retrying writes to down nodes at a rate of once per second.
# If any error occurs, it will backoff in an exponential manner, until the interval
# reaches retry-max-interval. Once writes to all nodes are successfully completed the
# interval will reset to retry-interval.
retry-interval = "1s"
retry-max-interval = "1m"
# Interval between running checks for data that should be purged. Data is purged from
# hinted-handoff queues for two reasons. 1) The data is older than the max age, or
# 2) the target node has been dropped from the cluster. Data is never dropped until
# it has reached max-age however, for a dropped node or not.
purge-interval = "1h"
###
### [cluster]
###
### Controls non-Raft cluster behavior, which generally includes how data is
### shared across shards.
###
[cluster]
shard-writer-timeout = "10s" # The time within which a shard must respond to write.
write-timeout = "5s" # The time within which a write operation must complete on the cluster.
###
### [retention]
###
### Controls the enforcement of retention policies for evicting old data.
###
[retention]
enabled = true
check-interval = "30m"
###
### [shard-precreation]
###
### Controls the precreation of shards, so they are created before data arrives.
### Only shards that will exist in the future, at time of creation, are precreated.
[shard-precreation]
enabled = true
check-interval = "10m"
advance-period = "30m"
###
### Controls the system self-monitoring, statistics and diagnostics.
###
### The internal database for monitoring data is created automatically if
### if it does not already exist. The target retention within this database
### is called 'monitor' and is also created with a retention period of 7 days
### and a replication factor of 1, if it does not exist. In all cases the
### this retention policy is configured as the default for the database.
[monitor]
store-enabled = true # Whether to record statistics internally.
store-database = "_internal" # The destination database for recorded statistics
store-interval = "10s" # The interval at which to record statistics
###
### [admin]
###
### Controls the availability of the built-in, web-based admin interface. If HTTPS is
### enabled for the admin interface, HTTPS must also be enabled on the [http] service.
###
[admin]
enabled = true
bind-address = ":8083"
https-enabled = false
https-certificate = "/etc/ssl/influxdb.pem"
###
### [http]
###
### Controls how the HTTP endpoints are configured. These are the primary
### mechanism for getting data into and out of InfluxDB.
###
[http]
enabled = true
bind-address = ":8086"
auth-enabled = false
log-enabled = true
write-tracing = false
pprof-enabled = false
https-enabled = false
https-certificate = "/etc/ssl/influxdb.pem"
###
### [[graphite]]
###
### Controls one or many listeners for Graphite data.
###
[[graphite]]
enabled = false
# database = "graphite"
# bind-address = ":2003"
# protocol = "tcp"
# consistency-level = "one"
# name-separator = "."
# These next lines control how batching works. You should have this enabled
# otherwise you could get dropped metrics or poor performance. Batching
# will buffer points in memory if you have many coming in.
# batch-size = 1000 # will flush if this many points get buffered
# batch-pending = 5 # number of batches that may be pending in memory
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
# udp-read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
## "name-schema" configures tag names for parsing the metric name from graphite protocol;
## separated by `name-separator`.
## The "measurement" tag is special and the corresponding field will become
## the name of the metric.
## e.g. "type.host.measurement.device" will parse "server.localhost.cpu.cpu0" as
## {
## measurement: "cpu",
## tags: {
## "type": "server",
## "host": "localhost,
## "device": "cpu0"
## }
## }
# name-schema = "type.host.measurement.device"
## If set to true, when the input metric name has more fields than `name-schema` specified,
## the extra fields will be ignored.
## Otherwise an error will be logged and the metric rejected.
# ignore-unnamed = true
###
### [collectd]
###
### Controls the listener for collectd data.
###
[collectd]
enabled = true
bind-address = "127.0.0.1:25826"
database = "collectd"
typesdb = "/usr/share/collectd/types.db"
# These next lines control how batching works. You should have this enabled
# otherwise you could get dropped metrics or poor performance. Batching
# will buffer points in memory if you have many coming in.
# batch-size = 1000 # will flush if this many points get buffered
# batch-pending = 5 # number of batches that may be pending in memory
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
# read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
###
### [opentsdb]
###
### Controls the listener for OpenTSDB data.
###
[opentsdb]
enabled = false
# bind-address = ":4242"
# database = "opentsdb"
# retention-policy = ""
# consistency-level = "one"
# tls-enabled = false
# certificate= ""
# These next lines control how batching works. You should have this enabled
# otherwise you could get dropped metrics or poor performance. Only points
# metrics received over the telnet protocol undergo batching.
# batch-size = 1000 # will flush if this many points get buffered
# batch-pending = 5 # number of batches that may be pending in memory
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
###
### [[udp]]
###
### Controls the listeners for InfluxDB line protocol data via UDP.
###
[[udp]]
enabled = false
# bind-address = ""
# database = "udp"
# retention-policy = ""
# These next lines control how batching works. You should have this enabled
# otherwise you could get dropped metrics or poor performance. Batching
# will buffer points in memory if you have many coming in.
# batch-size = 1000 # will flush if this many points get buffered
# batch-pending = 5 # number of batches that may be pending in memory
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
# read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
###
### [continuous_queries]
###
### Controls how continuous queries are run within InfluxDB.
###
[continuous_queries]
log-enabled = true
enabled = true
recompute-previous-n = 2
recompute-no-older-than = "10m"
compute-runs-per-interval = 10
compute-no-more-than = "2m"

View File

@ -1,17 +0,0 @@
[process]
process = influxdb
service = influxdb
style = lsb
pidfile = /var/run/influxdb/influxdb.pid
severity = major ; minor, major, critical
restarts = 3 ; restart retries before error assertion
interval = 5 ; number of seconds to wait between restarts
debounce = 10 ; number of seconds that a process needs to remain
; running before degrade is removed and retry count
; is cleared.
startuptime = 3 ; Seconds to wait after process start before starting the debounce monitor
mode = passive ; Monitoring mode: passive (default) or active
; passive: process death monitoring (default: always)
; active : heartbeat monitoring, i.e. request / response messaging
; ignore : do not monitor or stop monitoring
quorum = 0 ; process is in the host watchdog quorum

View File

@ -1,16 +0,0 @@
#daily
nodateext
/var/log/influxdb/influxdb.log
{
size 20M
start 1
missingok
rotate 20
compress
sharedscripts
postrotate
systemctl reload syslog-ng > /dev/null 2>&1 || true
endscript
}

View File

@ -1,25 +0,0 @@
[Unit]
Description=InfluxDB open-source, distributed, time series database
Documentation=https://influxdb.com/docs/
Before=collectd.service
Before=pmon.service
After=local-fs.target network-online.target
Requires=local-fs.target network-online.target
[Service]
User=influxdb
Group=influxdb
LimitNOFILE=65536
Environment='STDOUT=/dev/null'
Environment='STDERR=/var/log/influxdb/influxd.log'
EnvironmentFile=-/etc/default/influxdb
PermissionsStartOnly=true
ExecStartPre=-/usr/bin/mkdir -p /var/run/influxdb
ExecStartPre=-/usr/bin/chown influxdb:influxdb /var/run/influxdb
ExecStart=/bin/sh -c "/usr/bin/influxd -config /etc/influxdb/influxdb.conf -pidfile /var/run/influxdb/influxdb.pid ${INFLUXD_OPTS} >> ${STDOUT} 2>> ${STDERR}"
ExecStopPost=/bin/bash -c 'rm /var/run/influxdb/influxdb.pid'
KillMode=control-group
[Install]
WantedBy=multi-user.target
Alias=influxd.service

View File

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,2 +0,0 @@
SRC_DIR=scripts
TIS_PATCH_VER=0

View File

@ -1,42 +0,0 @@
Summary: Monitor tools package
Name: monitor-tools
Version: 1.0
Release: %{tis_patch_ver}%{?_tis_dist}
License: Apache-2.0
Group: base
Packager: Wind River <info@windriver.com>
URL: unknown
BuildArch: noarch
Source: %name-%version.tar.gz
Requires: initscripts-config
%description
This package contains data collection tools to monitor host performance.
Tools are general purpose engineering and debugging related. Includes
overall memory, cpu occupancy, per-task cpu, per-task scheduling, per-task
io.
%prep
%autosetup
%install
rm -rf $RPM_BUILD_ROOT
%global _buildsubdir %{_builddir}/%{name}-%{version}
install -d %{buildroot}/usr/bin
install %{_buildsubdir}/memtop %{buildroot}/usr/bin
install %{_buildsubdir}/schedtop %{buildroot}/usr/bin
install %{_buildsubdir}/occtop %{buildroot}/usr/bin
%files
%license LICENSE
%defattr(-,root,root,-)
/usr/bin/*
%post
grep schedstats /etc/sysctl.conf
if [ $? -ne 0 ]; then
echo -e "\nkernel.sched_schedstats=1" >> /etc/sysctl.conf
sysctl -p &>/dev/null
fi
exit 0

View File

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,344 +0,0 @@
#!/usr/bin/perl
########################################################################
#
# Copyright (c) 2015 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
########################################################################
#
# Description:
# This displays overall memory information per sample period.
# Output includes total, used, avail, per-numa node breakdown of avail
# and free hugepages memory.
#
# Usage: memtop OPTIONS
# memtop [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>] [--help]
#
# Summarize high-level memory usage.
use 5.10.0;
use warnings;
use strict;
use Benchmark ':hireswallclock';
use POSIX qw(strftime);
use Data::Dumper;
use File::Basename;
use File::Spec ();
use Time::HiRes qw(time usleep);
use Carp qw(croak carp);
# IEC and SI constants
use constant SI_k => 1.0E3;
use constant SI_M => 1.0E6;
use constant SI_G => 1.0E9;
use constant Ki => 1024.0;
use constant Mi => 1024.0*1024.0;
use constant Gi => 1024.0*1024.0*1024.0;
# Name of this program
our $TOOLNAME = basename($0);
our $VERSION = "0.1";
# Argument list parameters
our ($arg_debug,
$arg_delay,
$arg_repeat,
$arg_period) = ();
# Globals
our $t_0 = ();
our $t_1 = ();
our $t_elapsed = ();
our $t_final = ();
our $is_strict = ();
our $num_nodes = ();
#-------------------------------------------------------------------------------
# MAIN Program
#-------------------------------------------------------------------------------
# benchmark variables
my ($bd, $b0, $b1);
# Autoflush output
select(STDERR);
$| = 1;
select(STDOUT); # default
$| = 1;
# Parse input arguments and print tool usage if necessary
&parse_memtop_args(
\$::arg_debug,
\$::arg_delay,
\$::arg_repeat,
\$::arg_period,
);
# Print out some debugging information
if (defined $::arg_debug) {
$Data::Dumper::Indent = 1;
}
# Strict vs non-strict memory accounting
$::is_strict = &is_strict();
# Number of numa nodes
$::num_nodes = &num_numa_nodes();
# Print tool header and selected options
printf "%s %s -- ".
"selected options: delay = %.3fs, repeat = %d, period = %.3fs, %s, unit = %s\n",
$::TOOLNAME, $::VERSION,
$::arg_delay, $::arg_repeat, $::arg_period,
$::is_strict ? 'strict' : 'non-strict',
'MiB';
# Capture timestamp
$b0 = new Benchmark;
# Get current hires epoc timestamp
$::t_1 = time();
$::t_final = $::t_1 + $::arg_period;
# Set initial delay
$::t_elapsed = $::arg_delay;
# Main loop
my $delay = SI_M*$::arg_delay - 600.0;
REPEAT_LOOP: for (my $rep=1; $rep <= $::arg_repeat; $rep++) {
# Copy all state variables
$::t_0 = $::t_1;
# Sleep for desired interarrival time
usleep( $delay );
# Current hires epoc timestamp
$::t_1 = time();
# Delta calculation
$::t_elapsed = $::t_1 - $::t_0;
# Print summary
&print_memory(\$::t_1);
# Exit if we have reached period
last if ((defined $::t_final) && ($::t_1 > $::t_final));
}
# Print that tool has finished
print "done\n";
# Capture timestamp and report delta
if (defined $::arg_debug) {
$b1 = new Benchmark; $bd = Benchmark::timediff($b1, $b0);
printf "processing time: %s\n", timestr($bd);
}
exit 0;
################################################################################
# Parse input option arguments
sub parse_memtop_args {
(local *::arg_debug,
local *::arg_delay,
local *::arg_repeat,
local *::arg_period,
) = @_;
# Local variables
my ($fail, $arg_help);
# Use the Argument processing module
use Getopt::Long;
# Process input arguments
$fail = 0;
GetOptions(
"debug:i", \$::arg_debug,
"delay=f", \$::arg_delay,
"repeat=i", \$::arg_repeat,
"period=i", \$::arg_period,
"help|h", \$arg_help
) || GetOptionsMessage();
# Print help documentation if user has selected --help
&ListHelp() if (defined $arg_help);
# Validate options
if ((defined $::arg_repeat) && (defined $::arg_period)) {
$fail = 1;
warn "$::TOOLNAME: Input error: cannot specify both --repeat and --period options.\n";
}
if ((defined $::arg_delay) && ($::arg_delay < 0.01)) {
$fail = 1;
warn "$::TOOLNAME: Input error: --delay %f is less than 0.01.\n",
$::arg_delay;
}
if (@::ARGV) {
$fail = 1;
warn "$::TOOLNAME: Input error: not expecting these options: '@::ARGV'.\n";
}
# Set reasonable defaults
$::arg_delay ||= 1.0;
$::arg_repeat ||= 1;
if ($::arg_period) {
$::arg_repeat = $::arg_period / $::arg_delay;
} else {
$::arg_period = $::arg_delay * $::arg_repeat;
}
# Upon missing or invalid options, print usage
if ($fail == 1) {
&Usage();
exit 1;
}
}
# Print out a warning message and usage
sub GetOptionsMessage {
warn "$::TOOLNAME: Error processing input arguments.\n";
&Usage();
exit 1;
}
# Print out program usage
sub Usage {
printf "Usage: $::TOOLNAME OPTIONS\n";
printf " [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]\n";
printf " [--help]\n";
printf "\n";
}
# Print tool help
sub ListHelp {
printf "$::TOOLNAME -- displays high memory usage at high level\n";
&Usage();
printf " --delay=<seconds> : output interval (seconds): default: 1.0\n";
printf " --repeat=<num> : number of repeat samples: default: 1\n";
printf " --period=<seconds> : overall tool duration (seconds): default: --\n";
printf " --help : this help\n";
printf "\n";
exit 0;
}
# Print memory summary
sub print_memory {
(local *::t_1) = @_;
# counter
our $count;
$::count++; $::count %= 15;
my ($file, $n);
my %mem = ();
my %node = ();
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($::t_1);
my $msec = 1000.0*($::t_1 - int($::t_1));
# Process all entries of MEMINFO
$file = '/proc/meminfo';
open(FILE, $file) || die "Cannot open file: $file ($!)";
while($_ = <FILE>) {
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
if (/^(\S+):\s+(\d+)\b/) {
$mem{$1} = $2;
}
}
close(FILE);
# Process all entries of per-Node MEMINFO
for ($n=0; $n < $::num_nodes; $n++) {
$file = sprintf('/sys/devices/system/node/node%d/meminfo', $n);
open(FILE, $file) || die "Cannot open file: $file ($!)";
while($_ = <FILE>) {
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
if (/^Node\s+(\d+)\s+(\S+):\s+(\d+)\b/) {
$node{$1}{$2} = $3;
}
}
close(FILE);
}
# Calculate available memory
if ($::is_strict) {
$mem{'Avail'} = $mem{'CommitLimit'} - $mem{'Committed_AS'};
} else {
$mem{'Avail'} = $mem{'MemFree'} +
$mem{'Cached'} +
$mem{'Buffers'} +
$mem{'SReclaimable'};
}
$mem{'Used'} = $mem{'MemTotal'} - $mem{'Avail'};
$mem{'Anon'} = $mem{'AnonPages'};
for ($n=0; $n < $::num_nodes; $n++) {
$node{$n}{'Avail'} = $node{$n}{'MemFree'} +
$node{$n}{'FilePages'} +
$node{$n}{'SReclaimable'};
$node{$n}{'HFree'} = $node{$n}{'HugePages_Free'} * $mem{'Hugepagesize'};
}
# Print heading every so often
if ($::count == 1) {
printf "%s ".
"%8s %8s %8s %7s %6s %6s %8s %8s %7s %7s %8s %8s",
'yyyy-mm-dd hh:mm:ss.fff',
'Tot', 'Used', 'Free', 'Ca', 'Buf', 'Slab', 'CAS', 'CLim', 'Dirty', 'WBack', 'Anon', 'Avail';
for ($n=0; $n < $::num_nodes; $n++) {
printf " %8s %8s", sprintf('%d:Avail', $n), sprintf('%d:HFree', $n);
}
printf "\n";
}
# Print one line memory summary
printf "%4d-%02d-%02d %02d:%02d:%02d.%03d ".
"%8.1f %8.1f %8.1f %7.1f %6.1f %6.1f %8.1f %8.1f %7.1f %7.1f %8.1f %8.1f",
1900+$year, 1+$mon, $mday, $hour, $min, $sec, $msec,
$mem{'MemTotal'}/Ki,
$mem{'Used'}/Ki,
$mem{'MemFree'}/Ki,
$mem{'Cached'}/Ki,
$mem{'Buffers'}/Ki,
$mem{'Slab'}/Ki,
$mem{'Committed_AS'}/Ki,
$mem{'CommitLimit'}/Ki,
$mem{'Dirty'}/Ki,
$mem{'Writeback'}/Ki,
$mem{'Anon'}/Ki,
$mem{'Avail'}/Ki;
for ($n=0; $n < $::num_nodes; $n++) {
printf " %8.1f %8.1f", $node{$n}{'Avail'}/Ki, $node{$n}{'HFree'}/Ki;
}
printf "\n";
}
sub num_numa_nodes {
my $file = '/proc/cpuinfo';
my %nodes = ();
open(FILE, $file) || die "Cannot open file: $file ($!)";
while($_ = <FILE>) {
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
if (/^physical\s+id\s+:\s+(\d+)\b/) {
$nodes{$1} = 1;
}
}
close(FILE);
return scalar keys %nodes;
}
sub is_strict {
my $value = 0;
my $file = '/proc/sys/vm/overcommit_memory';
open(FILE, $file) || die "Cannot open file: $file ($!)";
$_ = <FILE>;
$value = /(\d+)/;
close(FILE);
return ($value == 2) ? 1 : 0;
}
1;

View File

@ -1,592 +0,0 @@
#!/usr/bin/perl
########################################################################
#
# Copyright (c) 2015-2016 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
########################################################################
#
# Description:
# This displays per-core occupancy information per sample period.
# Output includes total occupancy, and per-core occupancy based on
# hi-resolution timings.
#
# Usage: occtop OPTIONS
# [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]
# [--header=<num>]
# [--help]
use strict;
use warnings;
use Data::Dumper;
use POSIX qw(uname strftime);
use Time::HiRes qw(clock_gettime usleep CLOCK_MONOTONIC CLOCK_REALTIME);
use Benchmark ':hireswallclock';
use Carp qw(croak carp);
# Define toolname
our $TOOLNAME = "occtop";
our $VERSION = "0.1";
# Constants
use constant SI_k => 1.0E3;
use constant SI_M => 1.0E6;
use constant SI_G => 1.0E9;
use constant Ki => 1024.0;
use constant Mi => 1024.0*1024.0;
use constant Gi => 1024.0*1024.0*1024.0;
# Globals
our %percpu_0 = ();
our %percpu_1 = ();
our %D_percpu = ();
our %loadavg = ();
our $D_total = 0.0;
our $tm_0 = 0.0;
our $tm_1 = 0.0;
our $tr_0 = 0.0;
our $tr_1 = 0.0;
our $tm_elapsed = 0.0;
our $tm_final = 0.0;
our $uptime = 0.0;
our $num_cpus = 1;
our $num_tasks = 0;
our $num_blk = 0;
our $print_host = 1;
our $is_schedstat = 1;
our $USER_HZ = 100; # no easy way to get this
our $CLOCK_NS = SI_G / $USER_HZ;
# Argument list parameters
our ($arg_debug,
$arg_delay,
$arg_repeat,
$arg_period,
$arg_header,
) = ();
#-------------------------------------------------------------------------------
# MAIN Program
#-------------------------------------------------------------------------------
my $MIN_DELAY = 0.001;
my $MAX_DELAY = 0.001;
# benchmark variables
my ($bd, $b0, $b1);
# Autoflush output
select(STDERR);
$| = 1;
select(STDOUT); # default
$| = 1;
# Parse input arguments and print tool usage if necessary
&parse_occtop_args(
\$::arg_debug,
\$::arg_delay,
\$::arg_repeat,
\$::arg_period,
\$::arg_header,
);
# Print out some debugging information
if (defined $::arg_debug) {
$Data::Dumper::Indent = 1;
}
# Check for schedstat support; fallback to stats
$is_schedstat = -e '/proc/schedstat' ? 1 : 0;
# Print out selected options
printf "selected options: delay = %.3fs, repeat = %d, header = %d, source = %s\n",
$::arg_delay, $::arg_repeat, $::arg_header, $is_schedstat ? 'schedstat' : 'jiffie';
# Capture timestamp
$b0 = new Benchmark;
# Get number of logical cpus
&get_num_logical_cpus(\$::num_cpus);
# Get current hires epoc timestamp
$::tm_1 = clock_gettime(CLOCK_MONOTONIC);
$::tr_1 = clock_gettime(CLOCK_REALTIME);
$::tm_final = $::tm_1 + $::arg_delay*$::arg_repeat;
# Set initial delay
$::tm_elapsed = $::arg_delay;
$MAX_DELAY = $::arg_delay + $MIN_DELAY;
# Get overall per-cpu stats
if ($is_schedstat) {
&read_schedstat(\%::percpu_1);
} else {
&read_stat(\%::percpu_1);
}
# Main loop
REPEAT_LOOP: for (my $repeat=1; $repeat <= $::arg_repeat; $repeat++) {
# copy all state variables
%::tm_0 = (); %::tr_0 = (); %::percpu_0 = ();
$::tm_0 = $::tm_1; $::tr_0 = $::tr_1;
foreach my $cpu (keys %::percpu_1) { $::percpu_0{$cpu} = $::percpu_1{$cpu}; }
# estimate sleep delay to achieve desired interarrival by subtracting out
# the measured cpu runtime of the tool.
my $delay = $::arg_delay;
$delay = $MIN_DELAY if ($delay < $MIN_DELAY);
$delay = $MAX_DELAY if ($delay > $MAX_DELAY);
usleep( SI_M*$delay );
# Collect current state
$::tm_1 = (); $::tr_1 = (); %::percpu_1 = ();
# Get current hires epoc timestamp
$::tm_1 = clock_gettime(CLOCK_MONOTONIC);
$::tr_1 = clock_gettime(CLOCK_REALTIME);
# Get overall per-cpu stats
if ($is_schedstat) {
&read_schedstat(\%::percpu_1);
} else {
&read_stat(\%::percpu_1);
}
# Get current uptime
&get_uptime(\$::uptime);
# Get current loadavg
&get_loadavg(\%::loadavg, \$::runq, \$::num_tasks);
# Get current processes blocked
&get_blocked(\$::num_blk);
# Delta calculation
%::D_percpu = ();
$::tm_elapsed = $tm_1 - $tm_0;
foreach my $cpu (keys %::percpu_1) {
$::D_percpu{$cpu}{'runtime'} = ($::percpu_1{$cpu} - $::percpu_0{$cpu})/1.0E6;
if ($::tm_elapsed > 0.0) {
$::D_percpu{$cpu}{'occ'} = 100.0*$D_percpu{$cpu}{'runtime'}/1.0E3/$::tm_elapsed;
} else {
$::D_percpu{$cpu}{'occ'} = 0.0;
}
}
# Print tool header
if ($repeat == 1) {
&occtop_header(
\$::tr_1,
\$::uptime,
\%::loadavg,
\$::runq,
\$::num_blk,
\$::num_tasks,
\$::print_host,
);
}
# Print one-liner summary
&print_occtop(
\$::tr_1,
\$::num_cpus,
\%::D_percpu,
\$::arg_header,
);
# exit repeat loop if we have exceeded overall time
last if ($::tm_1 > $::tm_final);
} # REPEAT LOOP
# Print that tool has finished
print "done\n";
# Capture timestamp and report delta
$b1 = new Benchmark; $bd = Benchmark::timediff($b1, $b0);
printf "processing time: %s\n", timestr($bd);
exit 0;
#-------------------------------------------------------------------------------
# Parse per-cpu hi-resolution scheduling stats
sub read_schedstat
{
(local *::percpu) = @_;
my ($version, $timestamp);
my ($cpu, $cputime);
my ($fh, $file);
%::percpu = ();
# parse /proc/schedstat
$file = '/proc/schedstat';
open($fh, $file) || croak "Cannot open file: $file ($!)";
$_ = <$fh>; ($version) = /^version\s+(\d+)/;
$_ = <$fh>; ($timestamp) = /^timestamp\s+(\d+)/;
if ($version == 15) {
LOOP_SCHEDSTAT: while (<$fh>) {
# version 15: cputime is 7th field
if (/^cpu(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)\s+/) {
$cpu = $1; $cputime = $2;
$::percpu{$cpu} = $cputime;
}
}
} else {
croak "schedstat version: $version method not implemented.";
}
close($fh);
}
# Parse per-cpu jiffie stats; cputime excludes iowait.
sub read_stat
{
(local *::percpu) = @_;
my ($cpu, $cputime);
my ($user, $sys, $nice, $idle, $iowt, $hirq, $sirq);
my ($fh, $file);
%::percpu = ();
# parse /proc/stat
$file = '/proc/stat';
open($fh, $file) || croak "Cannot open file: $file ($!)";
LOOP_STAT: while (<$fh>) {
if (/^cpu(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+/) {
$cpu =$1; $user = $2; $sys = $3; $nice = $4; $idle = $5; $iowt = $6; $hirq = $7; $sirq = $8;
$cputime = $CLOCK_NS * ($user + $sys + $nice + $iowt + $hirq + $sirq);
$::percpu{$cpu} = $cputime;
}
}
close($fh);
}
# Parse load-average from /proc/loadavg
sub get_loadavg
{
(local *::loadavg, local *::runq, *::num_tasks) = @_;
$::loadavg{'1'} = 0.0;
$::loadavg{'5'} = 0.0;
$::loadavg{'15'} = 0.0;
$::runq = 0;
$::num_tasks = 0;
my $file = '/proc/loadavg';
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
$_ = <$fh>;
if (/^(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\/(\d+)\s+\d+/) {
$::loadavg{'1'} = $1;
$::loadavg{'5'} = $2;
$::loadavg{'15'} = $3;
$::runq = $4;
$::num_tasks = $5;
}
close($fh);
}
# Parse blocked from /proc/stat
sub get_blocked
{
(local *::num_blk) = @_;
$::num_blk = 0;
my $file = '/proc/stat';
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
while ($_ = <$fh>) {
if (/^procs_blocked\s+(\d+)/) {
$::num_blk = $1;
}
}
close($fh);
}
# Parse uptime from /proc/uptime
sub get_uptime
{
(local *::uptime) = @_;
$::uptime = 0.0;
my $file = '/proc/uptime';
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
$_ = <$fh>;
if (/^(\S+)\s+\S+/) {
$::uptime = $1;
}
close($fh);
}
# Get number of online logical cpus
sub get_num_logical_cpus {
(local *::num_cpus) = @_;
$::num_cpus = 0;
my $file = "/proc/cpuinfo";
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
LOOP_CPUINFO: while (<$fh>) {
if (/^[Pp]rocessor\s+:\s\d+/) {
$::num_cpus++;
}
}
close($fh);
}
# Print occupancy summary
sub print_occtop {
(local *::tr_1,
local *::num_cpus,
local *::D_percpu,
local *::arg_header,
) = @_;
# counter
our $count;
$::count++; $::count %= $::arg_header;
$::count = 1 if ($::arg_header == 1);
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($::tr_1);
my $msec = 1000.0*($::tr_1 - int($::tr_1));
# Print heading every so often
if ($::count == 1) {
printf "%s ".
"%7s ",
'yyyy-mm-dd hh:mm:ss.fff',
'total';
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
printf "%5s ", $cpu;
}
print "\n";
}
# Print one summary
my $occ_total = 0.0;
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
$occ_total += $::D_percpu{$cpu}{'occ'};
}
printf "%4d-%02d-%02d %02d:%02d:%02d.%03d ".
"%7.1f ",
1900+$year, 1+$mon, $mday, $hour, $min, $sec, $msec,
$occ_total;
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
printf "%5.1f ", $::D_percpu{$cpu}{'occ'};
}
print "\n";
}
# Print header
sub occtop_header {
(local *::tr_1,
local *::uptime,
local *::loadavg,
local *::runq,
local *::num_blk,
local *::num_tasks,
local *::print_host,
) = @_;
# process epoch to get current timestamp
my $mm_in_s = 60;
my $hh_in_s = 60*60;
my $dd_in_s = 24*60*60;
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($::tr_1);
my $msec = 1000.0*($::tr_1 - int($::tr_1));
# convert uptime to elapsed <d>:<hh>:<mm>:<ss>
my ($up, $up_dd, $up_hh, $up_mm, $up_ss);
$up = int($::uptime);
$up_dd = int($up/$dd_in_s);
$up -= $dd_in_s*$up_dd;
$up_hh = int($up/$hh_in_s);
$up -= $hh_in_s*$up_hh;
$up_mm = int($up/$mm_in_s);
$up -= $mm_in_s*$up_mm;
$up_ss = $up;
#occtop -- 2014/03/03 02:00:21.357 ldavg:0.07, 0.09, 0.08 runq:1 nproc:440 up:6:13:00:56
printf "%s %s -- ".
"%4d-%02d-%02d %02d:%02d:%02d.%03d ".
"ldavg:%.2f, %.2f, %.2f runq:%d blk:%d nproc:%d ".
"up:%d:%02d:%02d:%02d\n",
$::TOOLNAME, $::VERSION,
1900+$year, 1+$mon, $mday, $hour, $min, $sec, $msec,
$::loadavg{'1'}, $::loadavg{'5'}, $::loadavg{'15'},
$::runq, $::num_blk, $::num_tasks,
$up_dd, $up_hh, $up_mm, $up_ss;
return if (!($::print_host));
# After first print, disable print host information
$::print_host = 0;
# Get host specific information
my ($OSTYPE, $NODENAME, $OSRELEASE, $version, $MACHINE);
($OSTYPE, $NODENAME, $OSRELEASE, $version, $MACHINE) = POSIX::uname();
my ($NODETYPE, $SUBFUNCTION, $BUILDINFO) = ('-', '-', '-');
my ($SW_VERSION, $BUILD_ID) = ('-', '-');
# Get platform nodetype and subfunction
PLATFORM: {
my $file = "/etc/platform/platform.conf";
open(FILE, $file) || next;
while($_ = <FILE>) {
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
if (/^nodetype=(\S+)/) {
$NODETYPE = $1;
}
if (/^subfunction=(\S+)/) {
$SUBFUNCTION = $1;
}
}
close(FILE);
}
# Get loadbuild info
BUILD: {
my $file = "/etc/build.info";
open(FILE, $file) || next;
while($_ = <FILE>) {
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
if (/^SW_VERSION=\"([^"]+)\"/) {
$SW_VERSION = $1;
}
if (/^BUILD_ID=\"([^"]+)\"/) {
$BUILD_ID = $1;
}
}
close(FILE);
}
$BUILDINFO = join(' ', $SW_VERSION, $BUILD_ID);
# Parse /proc/cpuinfo to get specific processor info
my ($n_cpu, $model_name, $cpu_MHz) = (0, '-', 0);
CPUINFO: {
my $file = "/proc/cpuinfo";
open(FILE, $file) || croak "Cannot open file: $file ($!)";
while($_ = <FILE>) {
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
if (/^[Pp]rocessor\s+:\s+\d+/) {
$n_cpu++;
} elsif (/^model name\s+:\s+(.*)$/) {
$_ = $1; s/\s+/ /g;
$model_name = $_;
} elsif (/^cpu MHz\s+:\s+(\S+)/) {
$cpu_MHz = $1;
} elsif (/^bogomips\s+:\s+(\S+)/) {
$cpu_MHz = $1 if ($cpu_MHz == 0);
}
}
close(FILE);
}
printf " host:%s nodetype:%s subfunction:%s\n",
$NODENAME, $NODETYPE, $SUBFUNCTION;
printf " arch:%s processor:%s speed:%.0f #CPUs:%d\n",
$MACHINE, $model_name, $cpu_MHz, $n_cpu;
printf " %s %s build:%s\n", $OSTYPE, $OSRELEASE, $BUILDINFO;
}
# Parse and validate command line arguments
sub parse_occtop_args {
(local *::arg_debug,
local *::arg_delay,
local *::arg_repeat,
local *::arg_period,
local *::arg_header,
) = @_;
# Local variables
my ($fail, $arg_help);
# Use the Argument processing module
use Getopt::Long;
# Print usage if no arguments
if (!@::ARGV) {
&Usage();
exit 0;
}
# Process input arguments
$fail = 0;
GetOptions(
"debug:i", \$::arg_debug,
"delay=f", \$::arg_delay,
"period=i", \$::arg_period,
"repeat=i", \$::arg_repeat,
"header:i", \$::arg_header,
"help|h", \$arg_help
) || GetOptionsMessage();
# Print help documentation if user has selected --help
&ListHelp() if (defined $arg_help);
# Validate options
if ((defined $::arg_repeat) && (defined $::arg_period)) {
$fail = 1;
warn "$::TOOLNAME: Input error: cannot specify both --repeat and --period options.\n";
}
if ((defined $::arg_delay) && ($::arg_delay < 0.01)) {
$fail = 1;
warn "$::TOOLNAME: Input error: --delay %f is less than 0.01.\n",
$::arg_delay;
}
if (@::ARGV) {
$fail = 1;
warn "$::TOOLNAME: Input error: not expecting these options: '@::ARGV'.\n";
}
# Set reasonable defaults
$::arg_header ||= 15;
$::arg_delay ||= 1.0;
$::arg_repeat ||= 1;
if ($::arg_period) {
$::arg_repeat = $::arg_period / $::arg_delay;
} else {
$::arg_period = $::arg_delay * $::arg_repeat;
}
# Upon missing or invalid options, print usage
if ($fail == 1) {
&Usage();
exit 1;
}
}
# Print out a warning message and usage
sub GetOptionsMessage {
warn "$::TOOLNAME: Error processing input arguments.\n";
&Usage();
exit 1;
}
# Print out program usage
sub Usage {
printf "Usage: $::TOOLNAME OPTIONS\n";
printf " [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]\n";
printf " [--header=<num>]\n";
printf " [--help]\n";
printf "\n";
}
# Print tool help
sub ListHelp {
printf "$::TOOLNAME -- display hi-resolution per-cpu occupancy\n";
&Usage();
printf "Options: miscellaneous\n";
printf " --delay=<seconds> : output interval (seconds): default: 1.0\n";
printf " --repeat=<num> : number of repeat samples: default: 1\n";
printf " --period=<seconds> : overall tool duration (seconds): default: --\n";
printf " --header=<num> : print header every num samples: default: 15\n";
printf " --help : this help\n";
exit 0;
}
1;

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +0,0 @@
PACKAGE_NAME=vm-topology
VERSION=1.0
SRC_DIR=$PKG_BASE/$PACKAGE_NAME
TIS_PATCH_VER=1

View File

@ -1,61 +0,0 @@
%global pypi_name vm-topology
Summary: vm_topology
Name: vm-topology
Version: 1.0
Release: %{tis_patch_ver}%{?_tis_dist}
License: Apache-2.0
Group: base
Packager: Wind River <info@windriver.com>
URL: unknown
Source0: %{pypi_name}-%{version}.tar.gz
BuildArch: noarch
BuildRequires: python
BuildRequires: python-setuptools
BuildRequires: python2-pip
BuildRequires: python2-wheel
BuildRequires: python-keyring
BuildRequires: libvirt
Requires: python
Requires: python-keyring
Requires: /usr/bin/env
Requires: libvirt
%description
Show compute resources and VM topology
%prep
%autosetup -p 1 -n %{pypi_name}-%{version}
# Remove bundled egg-info
rm -rf %{pypi_name}.egg-info
# Let RPM handle the dependencies
rm -f requirements.txt
%build
%{__python2} setup.py build
%py2_build_wheel
%install
%{__python2} setup.py install --skip-build --root %{buildroot}
mkdir -p $RPM_BUILD_ROOT/wheels
install -m 644 dist/*.whl $RPM_BUILD_ROOT/wheels/
%files
%defattr(-,root,root,-)
%license LICENSE
%{_bindir}/vm-topology
%{python2_sitelib}/vm_topology
%{python2_sitelib}/*.egg-info
%package wheels
Summary: %{name} wheels
%description wheels
Contains python wheels for %{name}
%files wheels
/wheels/*

View File

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,19 +0,0 @@
#
# Copyright (c) 2013-2014 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import setuptools
setuptools.setup(
name='vm_topology',
description='Show compute resources and VM topology',
version='1.0.0',
license='Apache-2.0',
packages=['vm_topology', 'vm_topology.exec'],
entry_points={
'console_scripts': [
'vm-topology = vm_topology.exec.vm_topology:main',
]}
)

View File

@ -1,5 +0,0 @@
#
# Copyright (c) 2014 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#

View File

@ -1,5 +0,0 @@
#
# Copyright (c) 2014 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#

File diff suppressed because it is too large Load Diff

View File

@ -102,10 +102,9 @@ deps = -r{toxinidir}/test-requirements.txt
python-daemon==2.1.2
pylint
# There are currenrly 2 python modules with a setup.py file
# There are currenrly 1 python module with a setup.py file
commands = pylint --rcfile=./pylint.rc \
tools/storage-topology/storage-topology/storage_topology \
tools/vm-topology/vm-topology/vm_topology
tools/storage-topology/storage-topology/storage_topology
[testenv:venv]
basepython = python3