Relocated some packages to repo 'monitoring'
List of relocated subdirectories: monitoring/collectd-extensions monitoring/influxdb-extensions tools/monitor-tools tools/vm-topology Story: 2006166 Task: 35687 Depends-On: I6c62895f8dda5b8dc4ff56680c73c49f3f3d7935 Depends-On: I665dc7fabbfffc798ad57843eb74dca16e7647a3 Change-Id: Iffacd50340005320540cd9ba1495cde0b2231cd0 Signed-off-by: Scott Little <scott.little@windriver.com> Depends-On: I14e631137ff5658a54d62ad3d7aa2cd0ffaba6e0
This commit is contained in:
parent
062ec89dbb
commit
3637d66ae4
@ -1,10 +0,0 @@
|
||||
Metadata-Version: 1.1
|
||||
Name: collectd-extensions
|
||||
Version: 1.0
|
||||
Summary: collectd-extensions
|
||||
Home-page:
|
||||
Author: Windriver
|
||||
Author-email: info@windriver.com
|
||||
License: ASL 2.0
|
||||
Description: Titanium Cloud collectd extensions
|
||||
Platform: UNKNOWN
|
@ -1,25 +0,0 @@
|
||||
SRC_DIR="$PKG_BASE"
|
||||
|
||||
COPY_LIST="$PKG_BASE/src/LICENSE \
|
||||
$PKG_BASE/src/collectd.conf.pmon \
|
||||
$PKG_BASE/src/collectd.service \
|
||||
$PKG_BASE/src/fm_notifier.py \
|
||||
$PKG_BASE/src/mtce_notifier.py \
|
||||
$PKG_BASE/src/plugin_common.py \
|
||||
$PKG_BASE/src/python_plugins.conf \
|
||||
$PKG_BASE/src/cpu.py \
|
||||
$PKG_BASE/src/cpu.conf \
|
||||
$PKG_BASE/src/memory.py \
|
||||
$PKG_BASE/src/memory.conf \
|
||||
$PKG_BASE/src/df.conf \
|
||||
$PKG_BASE/src/ntpq.py \
|
||||
$PKG_BASE/src/ntpq.conf \
|
||||
$PKG_BASE/src/interface.py \
|
||||
$PKG_BASE/src/interface.conf \
|
||||
$PKG_BASE/src/remotels.py \
|
||||
$PKG_BASE/src/remotels.conf \
|
||||
$PKG_BASE/src/ptp.py \
|
||||
$PKG_BASE/src/ptp.conf \
|
||||
$PKG_BASE/src/example.py \
|
||||
$PKG_BASE/src/example.conf"
|
||||
TIS_PATCH_VER=13
|
@ -1,110 +0,0 @@
|
||||
Summary: Titanuim Server collectd Package
|
||||
Name: collectd-extensions
|
||||
Version: 1.0
|
||||
Release: 0%{?_tis_dist}.%{tis_patch_ver}
|
||||
License: ASL 2.0
|
||||
Group: base
|
||||
Packager: Wind River <info@windriver.com>
|
||||
URL: unknown
|
||||
|
||||
# create the files tarball
|
||||
Source0: %{name}-%{version}.tar.gz
|
||||
Source1: collectd.service
|
||||
Source2: collectd.conf.pmon
|
||||
|
||||
# collectd python plugin files - notifiers
|
||||
Source3: fm_notifier.py
|
||||
Source4: mtce_notifier.py
|
||||
Source5: plugin_common.py
|
||||
|
||||
# collectd python plugin files - resource plugins
|
||||
Source11: cpu.py
|
||||
Source12: memory.py
|
||||
Source14: example.py
|
||||
Source15: ntpq.py
|
||||
Source16: interface.py
|
||||
Source17: remotels.py
|
||||
Source18: ptp.py
|
||||
|
||||
# collectd plugin conf files into /etc/collectd.d
|
||||
Source100: python_plugins.conf
|
||||
Source101: cpu.conf
|
||||
Source102: memory.conf
|
||||
Source103: df.conf
|
||||
Source104: example.conf
|
||||
Source105: ntpq.conf
|
||||
Source106: interface.conf
|
||||
Source107: remotels.conf
|
||||
Source108: ptp.conf
|
||||
|
||||
BuildRequires: systemd-devel
|
||||
|
||||
Requires: systemd
|
||||
Requires: collectd
|
||||
Requires: fm-api
|
||||
Requires: python-httplib2
|
||||
Requires: python-influxdb
|
||||
Requires: python-oslo-concurrency
|
||||
Requires: tsconfig
|
||||
Requires: /bin/systemctl
|
||||
|
||||
%description
|
||||
Titanium Cloud collectd extensions
|
||||
|
||||
%define debug_package %{nil}
|
||||
%define local_unit_dir %{_sysconfdir}/systemd/system
|
||||
%define local_plugin_dir %{_sysconfdir}/collectd.d
|
||||
%define local_python_extensions_dir /opt/collectd/extensions/python
|
||||
%define local_config_extensions_dir /opt/collectd/extensions/config
|
||||
|
||||
%prep
|
||||
%setup
|
||||
|
||||
%build
|
||||
|
||||
%install
|
||||
install -m 755 -d %{buildroot}%{_sysconfdir}
|
||||
install -m 755 -d %{buildroot}%{local_unit_dir}
|
||||
install -m 755 -d %{buildroot}%{local_plugin_dir}
|
||||
install -m 755 -d %{buildroot}%{local_config_extensions_dir}
|
||||
install -m 755 -d %{buildroot}%{local_python_extensions_dir}
|
||||
|
||||
# support files ; service and pmon conf
|
||||
install -m 644 %{SOURCE1} %{buildroot}%{local_unit_dir}
|
||||
install -m 600 %{SOURCE2} %{buildroot}%{local_config_extensions_dir}
|
||||
|
||||
# collectd python plugin files - notifiers
|
||||
install -m 700 %{SOURCE3} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE4} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE5} %{buildroot}%{local_python_extensions_dir}
|
||||
|
||||
# collectd python plugin files - resource plugins
|
||||
install -m 700 %{SOURCE11} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE12} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE14} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE15} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE16} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE17} %{buildroot}%{local_python_extensions_dir}
|
||||
install -m 700 %{SOURCE18} %{buildroot}%{local_python_extensions_dir}
|
||||
|
||||
|
||||
# collectd plugin conf files into /etc/collectd.d
|
||||
install -m 600 %{SOURCE100} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE101} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE102} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE103} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE104} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE105} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE106} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE107} %{buildroot}%{local_plugin_dir}
|
||||
install -m 600 %{SOURCE108} %{buildroot}%{local_plugin_dir}
|
||||
|
||||
%clean
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
|
||||
%files
|
||||
%defattr(-,root,root,-)
|
||||
%config(noreplace) %{local_unit_dir}/collectd.service
|
||||
%{local_plugin_dir}/*
|
||||
%{local_config_extensions_dir}/*
|
||||
%{local_python_extensions_dir}/*
|
@ -1,202 +0,0 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
@ -1,18 +0,0 @@
|
||||
[process]
|
||||
process = collectd
|
||||
service = collectd
|
||||
style = lsb
|
||||
pidfile = /var/run/collectd.pid
|
||||
severity = major ; minor, major, critical
|
||||
restarts = 3 ; restart retries before error assertion
|
||||
interval = 5 ; number of seconds to wait between restarts
|
||||
debounce = 10 ; number of seconds that a process needs to remain
|
||||
; running before degrade is removed and retry count
|
||||
; is cleared.
|
||||
startuptime = 3 ; Seconds to wait after process start before starting the debounce monitor
|
||||
mode = passive ; Monitoring mode: passive (default) or active
|
||||
; passive: process death monitoring (default: always)
|
||||
; active : heartbeat monitoring, i.e. request / response messaging
|
||||
; ignore : do not monitor or stop monitoring
|
||||
quorum = 0 ; process is in the host watchdog quorum
|
||||
|
@ -1,15 +0,0 @@
|
||||
[Unit]
|
||||
Description=Collectd statistics daemon and extension services
|
||||
Documentation=man:collectd(1) man:collectd.conf(5)
|
||||
Before=pmon.service
|
||||
After=local-fs.target network-online.target
|
||||
Requires=local-fs.target network-online.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
ExecStart=/usr/sbin/collectd
|
||||
ExecStartPost=/bin/bash -c 'echo $MAINPID > /var/run/collectd.pid'
|
||||
ExecStopPost=/bin/rm -f /var/run/collectd.pid
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
@ -1,22 +0,0 @@
|
||||
# For stock plugin only
|
||||
# Uncomment to compare stock to tiS plugin readings
|
||||
# ---------------------
|
||||
# <Plugin cpu>
|
||||
# ReportByCpu false
|
||||
# ReportByState false
|
||||
# ValuesPercentage true
|
||||
# </Plugin>
|
||||
|
||||
<Plugin "threshold">
|
||||
<Plugin "cpu">
|
||||
<Type "percent">
|
||||
Instance "used"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMax 90.00
|
||||
FailureMax 95.00
|
||||
Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
@ -1,262 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2018-2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
############################################################################
|
||||
#
|
||||
# This file is the collectd 'Platform CPU Usage' Monitor.
|
||||
#
|
||||
# The Platform CPU Usage is calculated as an averaged percentage of
|
||||
# platform core usable since the previous sample.
|
||||
#
|
||||
# Init Function:
|
||||
# - if 'worker_reserved.conf exists then query/store PLATFORM_CPU_LIST
|
||||
#
|
||||
############################################################################
|
||||
import os
|
||||
import time
|
||||
import collectd
|
||||
|
||||
debug = False
|
||||
|
||||
PASS = 0
|
||||
FAIL = 1
|
||||
|
||||
PATH = '/proc/cpuinfo'
|
||||
WORKER_RESERVED_CONF = '/etc/platform/worker_reserved.conf'
|
||||
|
||||
PLUGIN = 'platform cpu usage plugin'
|
||||
|
||||
|
||||
# CPU Control class
|
||||
class CPU:
|
||||
hostname = "" # hostname for sample notification message
|
||||
usage = float(0.0) # float value of cpu usage
|
||||
|
||||
processors = int(0) # number of processors for all cpus case
|
||||
cpu_list = [] # list of CPUs to calculate combined usage for
|
||||
cpu_time = [] # schedstat time for each CPU
|
||||
cpu_time_last = [] # last schedstat time for each CPU
|
||||
time_last = float(0.0) # float of the time the last sample was taken
|
||||
|
||||
def log_error(self, err_str):
|
||||
"""Print an error log with plugin name prefixing the log"""
|
||||
|
||||
collectd.error("%s %s" % (PLUGIN, err_str))
|
||||
|
||||
|
||||
# Instantiate the class
|
||||
c = CPU()
|
||||
|
||||
|
||||
# The collectd configuration interface
|
||||
# collectd needs this defined ; but not used/needed.
|
||||
def config_func(config):
|
||||
collectd.info('%s config function' % PLUGIN)
|
||||
|
||||
|
||||
# Get the platform cpu list and number of cpus reported by /proc/cpuinfo
|
||||
def init_func():
|
||||
# get current hostname
|
||||
c.hostname = os.uname()[1]
|
||||
|
||||
collectd.info('%s init function for %s' % (PLUGIN, c.hostname))
|
||||
|
||||
raw_list = ""
|
||||
if os.path.exists(WORKER_RESERVED_CONF):
|
||||
with open(WORKER_RESERVED_CONF, 'r') as infile:
|
||||
for line in infile:
|
||||
if 'PLATFORM_CPU_LIST' in line:
|
||||
val = line.split("=")
|
||||
raw_list = val[1].strip('\n')[1:-1].strip('"')
|
||||
break
|
||||
if raw_list:
|
||||
|
||||
# Convert the cpu list fetched from the compute
|
||||
# reserved file into an integer list.
|
||||
# Handle mix of number list #,# and number range #-#
|
||||
split_list = raw_list.split(',')
|
||||
if debug:
|
||||
collectd.info('%s split list: %s' % (PLUGIN, split_list))
|
||||
for cpu in split_list:
|
||||
if cpu.find('-') == -1:
|
||||
# add individual cpu # with assumed ',' delimiter
|
||||
c.cpu_list.append(int(cpu))
|
||||
else:
|
||||
# add all in range #-#
|
||||
cpu_range = cpu.split('-')
|
||||
if len(cpu_range) == 2:
|
||||
first = int(cpu_range[0])
|
||||
last = int(cpu_range[1]) + 1
|
||||
# add each
|
||||
for i in list(range(first, last)):
|
||||
c.cpu_list.append(i)
|
||||
|
||||
# with the full CPU list in hand we can now just read their samples
|
||||
if debug:
|
||||
collectd.info('%s full cpu list: %s' %
|
||||
(PLUGIN, c.cpu_list))
|
||||
|
||||
try:
|
||||
f = open('/proc/cpuinfo')
|
||||
except EnvironmentError as e:
|
||||
collectd.error(str(e), UserWarning)
|
||||
else:
|
||||
|
||||
if len(c.cpu_list) == 0:
|
||||
_want_all_cpus = True
|
||||
else:
|
||||
_want_all_cpus = False
|
||||
|
||||
c.processors = 0
|
||||
for line in f:
|
||||
name_value = [s.strip() for s in line.split(':', 1)]
|
||||
if len(name_value) != 2:
|
||||
continue
|
||||
|
||||
name, value = name_value
|
||||
if 'rocessor' in name:
|
||||
if _want_all_cpus is True:
|
||||
c.cpu_list.append(int(c.processors))
|
||||
c.processors += 1
|
||||
|
||||
collectd.info('%s has found %d cpus total' %
|
||||
(PLUGIN, c.processors))
|
||||
collectd.info('%s monitoring %d cpus %s' %
|
||||
(PLUGIN, len(c.cpu_list), c.cpu_list))
|
||||
f.close()
|
||||
|
||||
|
||||
# Calculate the CPU usage sample
|
||||
def read_func():
|
||||
try:
|
||||
f = open('/proc/schedstat')
|
||||
except EnvironmentError as e:
|
||||
c.log_error('file open failed ; ' + str(e))
|
||||
return FAIL
|
||||
else:
|
||||
# schedstat time for each CPU
|
||||
c.cpu_time = []
|
||||
|
||||
# Loop over each line ...
|
||||
# get the output version ; only 15 is supported
|
||||
# get the cpu time from each line staring with 'cpux ....'
|
||||
for line in f:
|
||||
|
||||
# break each line into name/value pairs
|
||||
line_split = [s.strip() for s in line.split(' ', 1)]
|
||||
name, value = line_split
|
||||
|
||||
# get the output version.
|
||||
if 'ersion' in name:
|
||||
try:
|
||||
c.version = int(value)
|
||||
except ValueError as e:
|
||||
c.log_error('got invalid schedstat version ; ' + str(e))
|
||||
|
||||
# TODO: Consider exiting here and raising alarm.
|
||||
# Calling this type of exit will stop the plugin.
|
||||
# sys._exit()
|
||||
return FAIL
|
||||
|
||||
# only version 15 is supported
|
||||
if c.version == 15:
|
||||
if 'cpu' in name:
|
||||
# get the cpu number for each line
|
||||
if int(name.replace('cpu', '')) in c.cpu_list:
|
||||
_in_list = True
|
||||
else:
|
||||
_in_list = False
|
||||
|
||||
# get cpu time for each cpu that is valid
|
||||
if len(c.cpu_list) == 0 or _in_list is True:
|
||||
_schedstat = value
|
||||
value_split = value.split(' ')
|
||||
c.cpu_time.append(float(value_split[6]))
|
||||
if debug:
|
||||
collectd.info('%s %s schedstat is %s [%s]' %
|
||||
(PLUGIN, name, value_split[6],
|
||||
_schedstat))
|
||||
else:
|
||||
collectd.error('%s unsupported schedstat version [%d]' %
|
||||
(PLUGIN, c.version))
|
||||
return 0
|
||||
|
||||
f.close()
|
||||
|
||||
# Now that we have the cpu time recorded for each cpu
|
||||
_time_delta = float(0)
|
||||
_cpu_count = int(0)
|
||||
if len(c.cpu_time_last) == 0:
|
||||
c.time_last = time.time()
|
||||
if c.cpu_list:
|
||||
# This is a compute node.
|
||||
# Do not include vswitch or pinned cpus in calculation.
|
||||
for cpu in c.cpu_list:
|
||||
c.cpu_time_last.append(float(c.cpu_time[_cpu_count]))
|
||||
_cpu_count += 1
|
||||
if debug:
|
||||
collectd.info('%s cpu time ; first pass ; %s' %
|
||||
(PLUGIN, c.cpu_time))
|
||||
return PASS
|
||||
else:
|
||||
_time_this = time.time()
|
||||
_time_delta = _time_this - c.time_last
|
||||
c.total_avg_cpu = 0
|
||||
cpu_occupancy = []
|
||||
if debug:
|
||||
collectd.info('%s cpu time ; this pass ; %s -> %s' %
|
||||
(PLUGIN, c.cpu_time_last, c.cpu_time))
|
||||
|
||||
if c.cpu_list:
|
||||
# This is a compute node.
|
||||
# Do not include vswitch or pinned cpus in calculation.
|
||||
for cpu in c.cpu_list:
|
||||
if cpu >= c.processors:
|
||||
c.log_error(' got out of range cpu number')
|
||||
else:
|
||||
_delta = (c.cpu_time[_cpu_count] - c.cpu_time_last[_cpu_count])
|
||||
_delta = _delta / 1000000 / _time_delta
|
||||
cpu_occupancy.append(float((100 * (_delta)) / 1000))
|
||||
c.total_avg_cpu += cpu_occupancy[_cpu_count]
|
||||
if debug:
|
||||
collectd.info('%s cpu %d - count:%d [%s]' %
|
||||
(PLUGIN, cpu, _cpu_count, cpu_occupancy))
|
||||
_cpu_count += 1
|
||||
|
||||
else:
|
||||
collectd.info('%s no cpus to monitor' % PLUGIN)
|
||||
return 0
|
||||
|
||||
c.usage = c.total_avg_cpu / _cpu_count
|
||||
if debug:
|
||||
collectd.info('%s reports %.2f %% usage (averaged)' %
|
||||
(PLUGIN, c.usage))
|
||||
|
||||
# Prepare for next audit ; mode now to last
|
||||
# c.cpu_time_last = []
|
||||
c.cpu_time_last = c.cpu_time
|
||||
c.time_last = _time_this
|
||||
|
||||
# if os.path.exists('/var/run/fit/cpu_data'):
|
||||
# with open('/var/run/fit/cpu_data', 'r') as infile:
|
||||
# for line in infile:
|
||||
# c.usage = float(line)
|
||||
# collectd.info("%s using FIT data:%.2f" %
|
||||
# (PLUGIN, c.usage))
|
||||
# break
|
||||
|
||||
# Dispatch usage value to collectd
|
||||
val = collectd.Values(host=c.hostname)
|
||||
val.plugin = 'cpu'
|
||||
val.type = 'percent'
|
||||
val.type_instance = 'used'
|
||||
val.dispatch(values=[c.usage])
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func)
|
@ -1,41 +0,0 @@
|
||||
<Plugin df>
|
||||
ValuesPercentage true
|
||||
IgnoreSelected false
|
||||
ReportByDevice false
|
||||
ReportInodes false
|
||||
ValuesAbsolute false
|
||||
MountPoint "/"
|
||||
MountPoint "/tmp"
|
||||
MountPoint "/dev"
|
||||
MountPoint "/dev/shm"
|
||||
MountPoint "/var/run"
|
||||
MountPoint "/var/log"
|
||||
MountPoint "/var/lock"
|
||||
MountPoint "/boot"
|
||||
MountPoint "/scratch"
|
||||
MountPoint "/opt/etcd"
|
||||
MountPoint "/opt/platform"
|
||||
MountPoint "/opt/extension"
|
||||
MountPoint "/var/lib/rabbitmq"
|
||||
MountPoint "/var/lib/postgresql"
|
||||
MountPoint "/var/lib/ceph/mon"
|
||||
MountPoint "/var/lib/docker"
|
||||
MountPoint "/var/lib/docker-distribution"
|
||||
MountPoint "/var/lib/kubelet"
|
||||
MountPoint "/var/lib/nova/instances"
|
||||
MountPoint "/opt/backups"
|
||||
</Plugin>
|
||||
|
||||
<Plugin "threshold">
|
||||
<Plugin "df">
|
||||
<Type "percent_bytes">
|
||||
Instance "used"
|
||||
WarningMax 80.00
|
||||
FailureMax 90.00
|
||||
Persist true
|
||||
PersistOK true
|
||||
Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
@ -1,13 +0,0 @@
|
||||
<Plugin "threshold">
|
||||
<Plugin "example">
|
||||
<Type "percent">
|
||||
Instance "used"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMax 49.00
|
||||
FailureMax 74.00
|
||||
Hits 1
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
@ -1,73 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2018 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import os
|
||||
import random
|
||||
import collectd
|
||||
|
||||
PLUGIN = 'random number plugin'
|
||||
|
||||
# static variables
|
||||
|
||||
|
||||
# define a class here that will persist over read calls
|
||||
class ExampleObject:
|
||||
hostname = ""
|
||||
plugin_data = ['1', '100']
|
||||
|
||||
|
||||
obj = ExampleObject()
|
||||
|
||||
|
||||
# The config function - called once on collectd process startup
|
||||
def config_func(config):
|
||||
"""Configure the plugin"""
|
||||
|
||||
for node in config.children:
|
||||
key = node.key.lower()
|
||||
val = node.values[0]
|
||||
|
||||
if key == 'data':
|
||||
obj.plugin_data = str(val).split(' ')
|
||||
collectd.info("%s configured data '%d:%d'" %
|
||||
(PLUGIN,
|
||||
int(obj.plugin_data[0]),
|
||||
int(obj.plugin_data[1])))
|
||||
return 0
|
||||
|
||||
collectd.info('%s config function' % PLUGIN)
|
||||
return 0
|
||||
|
||||
|
||||
# The init function - called once on collectd process startup
|
||||
def init_func():
|
||||
|
||||
# get current hostname
|
||||
obj.hostname = os.uname()[1]
|
||||
return 0
|
||||
|
||||
|
||||
# The sample read function - called on every audit interval
|
||||
def read_func():
|
||||
|
||||
# do the work to create the sample
|
||||
low = int(obj.plugin_data[0])
|
||||
high = int(obj.plugin_data[1])
|
||||
sample = random.randint(low, high)
|
||||
|
||||
# Dispatch usage value to collectd
|
||||
val = collectd.Values(host=obj.hostname)
|
||||
val.plugin = 'example'
|
||||
val.type = 'percent'
|
||||
val.type_instance = 'used'
|
||||
val.dispatch(values=[sample])
|
||||
return 0
|
||||
|
||||
|
||||
# register the config, init and read functions
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func)
|
File diff suppressed because it is too large
Load Diff
@ -1,13 +0,0 @@
|
||||
<Plugin "threshold">
|
||||
<Plugin "interface">
|
||||
<Type "percent">
|
||||
Instance "used"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMin 51
|
||||
FailureMin 1
|
||||
# Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
@ -1,981 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
############################################################################
|
||||
#
|
||||
# This is the Host Interface Monitor plugin for collectd.
|
||||
#
|
||||
# Only mgmt, cluster-host and oam interfaces are supported with the following
|
||||
# mapping specified in /etc/platform/platform.conf
|
||||
#
|
||||
# oam - oam_interface | controller | mandatory
|
||||
# mgmnt - management_interface | all hosts | mandatory
|
||||
# clstr - cluster_host_interface | any host | optional
|
||||
#
|
||||
# This plugin queries the maintenance Link Monitor daemon 'lmon'
|
||||
# for a link status summary of that hosts configured networks.
|
||||
#
|
||||
# This plugin's read_func issues an http GET request to the Link Monitor
|
||||
# which responds with a json string that represents a complete summary
|
||||
# of the monitored links, state and the time of the last event or when
|
||||
# initial status was learned. An example of the Link Monitor response is
|
||||
#
|
||||
# {
|
||||
# "status" : "pass"
|
||||
# "link_info": [
|
||||
# { "network":"mgmt",
|
||||
# "type":"vlan",
|
||||
# "links": [
|
||||
# { "name":"enp0s8.1", "state":"Up", "time":"5674323454567" },
|
||||
# { "name":"enp0s8.2", "state":"Up", "time":"5674323454567" }]
|
||||
# },
|
||||
# { "network":"clstr",
|
||||
# "type":"bond",
|
||||
# "bond":"bond0",
|
||||
# "links": [
|
||||
# { "name":"enp0s9f1", "state":"Down", "time":"5674323454567" },
|
||||
# { "name":"enp0s9f0", "state":"Up" , "time":"5674323454567" }]
|
||||
# },
|
||||
# { "network":"oam",
|
||||
# "type":"single",
|
||||
# "links": [
|
||||
# { "name":"enp0s3", "state":"Up", "time":"5674323454567" }]
|
||||
# }]
|
||||
# }
|
||||
#
|
||||
# On failure
|
||||
#
|
||||
# {
|
||||
# "status" : "fail ; bad request <or other text based reason>"
|
||||
# }
|
||||
#
|
||||
# This plugin then uses this information to manage interface alarm
|
||||
# assertion and clear with appropriate severity.
|
||||
#
|
||||
# Severity: Interface and Port levels
|
||||
#
|
||||
# Alarm Level Minor Major Critical
|
||||
# ----------- ----- --------------------- ----------------------------
|
||||
# Interface N/A One of lag pair is Up All Interface ports are Down
|
||||
# Port N/A Physical Link is Down N/A
|
||||
#
|
||||
# Sample Data: represented as % of total links Up for that network interface
|
||||
#
|
||||
# 100 or 100% percent used - all links of interface are up.
|
||||
# 50 or 50% percent used - one of lag pair is Up and the other is Down
|
||||
# 0 or 0% percent used - all ports for that network are Down
|
||||
#
|
||||
############################################################################
|
||||
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
import collectd
|
||||
import plugin_common as pc
|
||||
from fm_api import constants as fm_constants
|
||||
from fm_api import fm_api
|
||||
|
||||
# Fault manager API Object
|
||||
api = fm_api.FaultAPIsV2()
|
||||
|
||||
# name of the plugin - all logs produced by this plugin are prefixed with this
|
||||
PLUGIN = 'interface plugin'
|
||||
|
||||
# Interface Monitoring Interval in seconds
|
||||
PLUGIN_AUDIT_INTERVAL = 10
|
||||
|
||||
# Sample Data 'type' and 'instance' database field values.
|
||||
PLUGIN_TYPE = 'percent'
|
||||
PLUGIN_TYPE_INSTANCE = 'usage'
|
||||
|
||||
# The Link Status Query URL
|
||||
PLUGIN_HTTP_URL_PREFIX = 'http://localhost:'
|
||||
|
||||
# This plugin's timeout
|
||||
PLUGIN_HTTP_TIMEOUT = 5
|
||||
|
||||
# Specify the link monitor as the maintenance destination service
|
||||
# full path should look like ; http://localhost:2122/mtce/lmon
|
||||
PLUGIN_HTTP_URL_PATH = '/mtce/lmon'
|
||||
|
||||
# Port and Interface Alarm Identifiers
|
||||
PLUGIN_OAM_PORT_ALARMID = '100.106' # OAM Network Port
|
||||
PLUGIN_OAM_IFACE_ALARMID = '100.107' # OAM Network Interface
|
||||
|
||||
PLUGIN_MGMT_PORT_ALARMID = '100.108' # Management Network Port
|
||||
PLUGIN_MGMT_IFACE_ALARMID = '100.109' # Management Network Interface
|
||||
|
||||
PLUGIN_CLSTR_PORT_ALARMID = '100.110' # Cluster-host Network Port
|
||||
PLUGIN_CLSTR_IFACE_ALARMID = '100.111' # Cluster-host Nwk Interface
|
||||
|
||||
# List of all alarm identifiers.
|
||||
ALARM_ID_LIST = [PLUGIN_OAM_PORT_ALARMID,
|
||||
PLUGIN_OAM_IFACE_ALARMID,
|
||||
PLUGIN_MGMT_PORT_ALARMID,
|
||||
PLUGIN_MGMT_IFACE_ALARMID,
|
||||
PLUGIN_CLSTR_PORT_ALARMID,
|
||||
PLUGIN_CLSTR_IFACE_ALARMID]
|
||||
|
||||
# Monitored Network Name Strings
|
||||
NETWORK_MGMT = 'mgmt'
|
||||
NETWORK_CLSTR = 'cluster-host'
|
||||
NETWORK_OAM = 'oam'
|
||||
|
||||
# Port / Interface State strings
|
||||
LINK_UP = 'Up'
|
||||
LINK_DOWN = 'Down'
|
||||
|
||||
# Alarm control actions
|
||||
ALARM_ACTION_RAISE = 'raise'
|
||||
ALARM_ACTION_CLEAR = 'clear'
|
||||
|
||||
# Alarm level.
|
||||
# Ports are the lowest level and represent a physical link
|
||||
# Interfaces are port groupings in terms of LAG
|
||||
LEVEL_PORT = 'port'
|
||||
LEVEL_IFACE = 'interface'
|
||||
|
||||
# Run phases
|
||||
RUN_PHASE__INIT = 0
|
||||
RUN_PHASE__ALARMS_CLEARED = 1
|
||||
RUN_PHASE__HTTP_REQUEST_PASS = 2
|
||||
|
||||
|
||||
# Link Object (aka Port or Physical interface) Structure
|
||||
# and member functions.
|
||||
class LinkObject:
|
||||
|
||||
def __init__(self, alarm_id):
|
||||
|
||||
self.name = None
|
||||
self.state = LINK_UP
|
||||
self.timestamp = float(0)
|
||||
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
||||
self.alarm_id = alarm_id
|
||||
self.state_change = True
|
||||
|
||||
collectd.debug("%s LinkObject constructor: %s" %
|
||||
(PLUGIN, alarm_id))
|
||||
|
||||
##################################################################
|
||||
#
|
||||
# Name : raise_port_alarm
|
||||
#
|
||||
# Purpose : This link object member function is used to
|
||||
# raise link/port alarms.
|
||||
#
|
||||
# Parameters : Network the link is part of.
|
||||
#
|
||||
# Returns : False on failure
|
||||
# True on success
|
||||
#
|
||||
##################################################################
|
||||
def raise_port_alarm(self, network):
|
||||
"""Raise a port alarm"""
|
||||
|
||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
||||
|
||||
if manage_alarm(self.name,
|
||||
network,
|
||||
LEVEL_PORT,
|
||||
ALARM_ACTION_RAISE,
|
||||
fm_constants.FM_ALARM_SEVERITY_MAJOR,
|
||||
self.alarm_id,
|
||||
self.timestamp) is True:
|
||||
|
||||
self.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
||||
collectd.info("%s %s %s port alarm raised" %
|
||||
(PLUGIN, self.name, self.alarm_id))
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
##################################################################
|
||||
#
|
||||
# Name : clear_port_alarm
|
||||
#
|
||||
# Purpose : This link object member function is used to
|
||||
# clear link/port alarms.
|
||||
#
|
||||
# Parameters : Network the link is part of.
|
||||
#
|
||||
# Returns : False on failure
|
||||
# True on success.
|
||||
#
|
||||
##################################################################
|
||||
def clear_port_alarm(self, network):
|
||||
"""Clear a port alarm"""
|
||||
|
||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
||||
if manage_alarm(self.name,
|
||||
network,
|
||||
LEVEL_PORT,
|
||||
ALARM_ACTION_CLEAR,
|
||||
fm_constants.FM_ALARM_SEVERITY_CLEAR,
|
||||
self.alarm_id,
|
||||
self.timestamp) is True:
|
||||
|
||||
collectd.info("%s %s %s port alarm cleared" %
|
||||
(PLUGIN, self.name, self.alarm_id))
|
||||
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
# Interface (aka Network) Level Object Structure and member functions
|
||||
class NetworkObject:
|
||||
|
||||
def __init__(self, name):
|
||||
|
||||
self.name = name
|
||||
self.sample = 0
|
||||
self.sample_last = 0
|
||||
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
||||
self.degraded = False
|
||||
self.timestamp = float(0)
|
||||
|
||||
# add the respective alarm IDs to each object
|
||||
alarm_id = None
|
||||
if name == NETWORK_OAM:
|
||||
alarm_id = PLUGIN_OAM_PORT_ALARMID
|
||||
self.alarm_id = PLUGIN_OAM_IFACE_ALARMID
|
||||
elif name == NETWORK_MGMT:
|
||||
alarm_id = PLUGIN_MGMT_PORT_ALARMID
|
||||
self.alarm_id = PLUGIN_MGMT_IFACE_ALARMID
|
||||
elif name == NETWORK_CLSTR:
|
||||
alarm_id = PLUGIN_CLSTR_PORT_ALARMID
|
||||
self.alarm_id = PLUGIN_CLSTR_IFACE_ALARMID
|
||||
else:
|
||||
self.alarm_id = ""
|
||||
collectd.error("%s unexpected network (%s)" % (PLUGIN, name))
|
||||
|
||||
collectd.debug("%s %s NetworkObject constructor: %s" %
|
||||
(PLUGIN, name, self.alarm_id))
|
||||
|
||||
if alarm_id:
|
||||
self.link_one = LinkObject(alarm_id)
|
||||
self.link_two = LinkObject(alarm_id)
|
||||
|
||||
##################################################################
|
||||
#
|
||||
# Name : raise_iface_alarm
|
||||
#
|
||||
# Purpose : This network object member function used to
|
||||
# raise interface alarms.
|
||||
#
|
||||
# Parameters : None
|
||||
#
|
||||
# Returns : False on failure
|
||||
# True on success
|
||||
#
|
||||
##################################################################
|
||||
def raise_iface_alarm(self, severity):
|
||||
"""Raise an interface alarm"""
|
||||
|
||||
if severity == fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
||||
collectd.error("%s %s raise alarm called with clear severity" %
|
||||
(PLUGIN, self.name))
|
||||
return True
|
||||
|
||||
if self.severity != severity:
|
||||
if manage_alarm(self.name,
|
||||
self.name,
|
||||
LEVEL_IFACE,
|
||||
ALARM_ACTION_RAISE,
|
||||
severity,
|
||||
self.alarm_id,
|
||||
self.timestamp) is True:
|
||||
|
||||
self.severity = severity
|
||||
collectd.info("%s %s %s %s interface alarm raised" %
|
||||
(PLUGIN,
|
||||
self.name,
|
||||
self.alarm_id,
|
||||
pc.get_severity_str(severity)))
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
##################################################################
|
||||
#
|
||||
# Name : clear_iface_alarm
|
||||
#
|
||||
# Purpose : This network object member function used to
|
||||
# clear interface alarms.
|
||||
#
|
||||
# Parameters : None
|
||||
#
|
||||
# Returns : False on failure
|
||||
# True on success.
|
||||
#
|
||||
##################################################################
|
||||
def clear_iface_alarm(self):
|
||||
"""Clear an interface alarm"""
|
||||
|
||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
||||
if manage_alarm(self.name,
|
||||
self.name,
|
||||
LEVEL_IFACE,
|
||||
ALARM_ACTION_CLEAR,
|
||||
fm_constants.FM_ALARM_SEVERITY_CLEAR,
|
||||
self.alarm_id,
|
||||
self.timestamp) is True:
|
||||
|
||||
collectd.info("%s %s %s %s interface alarm cleared" %
|
||||
(PLUGIN,
|
||||
self.name,
|
||||
self.alarm_id,
|
||||
pc.get_severity_str(self.severity)))
|
||||
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
######################################################################
|
||||
#
|
||||
# Name : manage_iface_alarm
|
||||
#
|
||||
# Purpose : clear or raise appropriate severity level interface alarm
|
||||
#
|
||||
# Returns : None
|
||||
#
|
||||
######################################################################
|
||||
def manage_iface_alarm(self):
|
||||
# Single Link Config
|
||||
if self.link_two.name is None:
|
||||
if self.link_one.state == LINK_DOWN:
|
||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CRITICAL:
|
||||
self.timestamp = self.link_one.timestamp
|
||||
self.raise_iface_alarm(
|
||||
fm_constants.FM_ALARM_SEVERITY_CRITICAL)
|
||||
elif self.link_one.state == LINK_UP:
|
||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
||||
self.clear_iface_alarm()
|
||||
|
||||
# Lagged Link Config
|
||||
#
|
||||
# The interface level timestamp is updated based on the failed
|
||||
# link timestamps
|
||||
elif self.link_one.state == LINK_UP and \
|
||||
self.link_two.state == LINK_DOWN:
|
||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
||||
self.timestamp = self.link_two.timestamp
|
||||
self.raise_iface_alarm(fm_constants.FM_ALARM_SEVERITY_MAJOR)
|
||||
|
||||
elif self.link_one.state == LINK_DOWN and \
|
||||
self.link_two.state == LINK_UP:
|
||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
||||
self.timestamp = self.link_one.timestamp
|
||||
self.raise_iface_alarm(fm_constants.FM_ALARM_SEVERITY_MAJOR)
|
||||
|
||||
elif self.link_one.state == LINK_UP and self.link_two.state == LINK_UP:
|
||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
||||
self.clear_iface_alarm()
|
||||
|
||||
elif self.link_one.state == LINK_DOWN and \
|
||||
self.link_two.state == LINK_DOWN:
|
||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CRITICAL:
|
||||
if self.link_one.timestamp > self.link_two.timestamp:
|
||||
self.timestamp = self.link_one.timestamp
|
||||
else:
|
||||
self.timestamp = self.link_two.timestamp
|
||||
self.raise_iface_alarm(fm_constants.FM_ALARM_SEVERITY_CRITICAL)
|
||||
|
||||
|
||||
# Plugin Control Object
|
||||
obj = pc.PluginObject(PLUGIN, PLUGIN_HTTP_URL_PREFIX)
|
||||
|
||||
|
||||
# Network Object List - Primary Network/Link Control Object
|
||||
NETWORKS = [NetworkObject(NETWORK_MGMT),
|
||||
NetworkObject(NETWORK_OAM),
|
||||
NetworkObject(NETWORK_CLSTR)]
|
||||
|
||||
|
||||
##########################################################################
|
||||
#
|
||||
# Name : get_timestamp
|
||||
#
|
||||
# Purpose : Convert the long long int microsecond time as string
|
||||
# that accompany link info from the Link Monitor (lmond)
|
||||
# and catch exceptions in doing so.
|
||||
#
|
||||
# Parameters: lmon_time - long long int as string
|
||||
#
|
||||
# Returns : float time that can be consumed by datetime.fromtimestamp
|
||||
#
|
||||
# Returns same unit of now time if provided lmon_time is
|
||||
# invalid.
|
||||
#
|
||||
##########################################################################
|
||||
def get_timestamp(lmon_time):
|
||||
"""Convert lmon time to fm timestamp time"""
|
||||
|
||||
if lmon_time:
|
||||
try:
|
||||
return(float(float(lmon_time) / 1000000))
|
||||
except:
|
||||
collectd.error("%s failed to parse timestamp ;"
|
||||
" using current time" % PLUGIN)
|
||||
else:
|
||||
collectd.error("%s no timestamp ;"
|
||||
" using current time" % PLUGIN)
|
||||
|
||||
return(float(time.time()))
|
||||
|
||||
|
||||
def dump_network_info(network):
|
||||
"""Log the specified network info"""
|
||||
|
||||
link_one_event_time = datetime.datetime.fromtimestamp(
|
||||
float(network.link_one.timestamp)).strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
link_two_info = ''
|
||||
if network.link_two.name is not None:
|
||||
link_two_event_time = datetime.datetime.fromtimestamp(
|
||||
float(network.link_two.timestamp)).strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
link_two_info += "; link two '"
|
||||
link_two_info += network.link_two.name
|
||||
link_two_info += "' went " + network.link_two.state
|
||||
link_two_info += " at " + link_two_event_time
|
||||
|
||||
pcnt = '%'
|
||||
|
||||
collectd.info("%s %5s %3d%c ; "
|
||||
"link one '%s' went %s at %s %s" %
|
||||
(PLUGIN,
|
||||
network.name,
|
||||
network.sample,
|
||||
pcnt,
|
||||
network.link_one.name,
|
||||
network.link_one.state,
|
||||
link_one_event_time,
|
||||
link_two_info))
|
||||
|
||||
|
||||
#########################################################################
|
||||
#
|
||||
# Name : this_hosts_alarm
|
||||
#
|
||||
# Purpose : Determine if the supplied eid is for this host.
|
||||
#
|
||||
# Description: The eid formats for the alarms managed by this plugin are
|
||||
#
|
||||
# host=<hostname>.port=<port_name>
|
||||
# host=<hostname>.interface=<network_name>
|
||||
#
|
||||
# Assumptions: There is no restriction preventing the system
|
||||
# administrator from creating hostnames with period's ('.')
|
||||
# in them. Because so the eid cannot simply be split
|
||||
# around '='s and '.'s. Instead its split around this
|
||||
# plugins level type '.port' or '.interface'.
|
||||
#
|
||||
# Returns : True if hostname is a match
|
||||
# False otherwise
|
||||
#
|
||||
##########################################################################
|
||||
def this_hosts_alarm(hostname, eid):
|
||||
"""Check if the specified eid is for this host"""
|
||||
|
||||
if hostname:
|
||||
if eid:
|
||||
# 'host=controller-0.interface=mgmt'
|
||||
try:
|
||||
eid_host = None
|
||||
eid_disected = eid.split('=')
|
||||
if len(eid_disected) == 3:
|
||||
# ['host', 'controller-0.interface', 'mgmt']
|
||||
if len(eid_disected[1].split('.port')) == 2:
|
||||
eid_host = eid_disected[1].split('.port')[0]
|
||||
if eid_host and eid_host == hostname:
|
||||
return True
|
||||
elif len(eid_disected[1].split('.interface')) == 2:
|
||||
eid_host = eid_disected[1].split('.interface')[0]
|
||||
if eid_host and eid_host == hostname:
|
||||
return True
|
||||
except Exception as ex:
|
||||
collectd.error("%s failed to parse alarm eid (%s)"
|
||||
" [eid:%s]" % (PLUGIN, str(ex), eid))
|
||||
|
||||
return False
|
||||
|
||||
|
||||
##########################################################################
|
||||
#
|
||||
# Name : clear_alarms
|
||||
#
|
||||
# Purpose : Clear all interface alarms on process startup.
|
||||
#
|
||||
# Description: Called after first successful Link Status query.
|
||||
#
|
||||
# Loops over the provided alarm id list querying all alarms
|
||||
# for each. Any that are raised are precisely cleared.
|
||||
#
|
||||
# Prevents stuck alarms over port and interface reconfig.
|
||||
#
|
||||
# If the original alarm case still exists the alarm will
|
||||
# be re-raised with the original link event timestamp that
|
||||
# is part of the Link Status query response.
|
||||
#
|
||||
# Parameters : A list of this plugin's alarm ids
|
||||
#
|
||||
# Returns : True on Success
|
||||
# False on Failure
|
||||
#
|
||||
##########################################################################
|
||||
def clear_alarms(alarm_id_list):
|
||||
"""Clear alarm state of all plugin alarms"""
|
||||
found = False
|
||||
for alarm_id in alarm_id_list:
|
||||
|
||||
try:
|
||||
alarms = api.get_faults_by_id(alarm_id)
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'get_faults_by_id' exception ;"
|
||||
" %s ; %s" %
|
||||
(PLUGIN, alarm_id, ex))
|
||||
return False
|
||||
|
||||
if alarms:
|
||||
for alarm in alarms:
|
||||
eid = alarm.entity_instance_id
|
||||
if this_hosts_alarm(obj.hostname, eid) is False:
|
||||
# ignore other host alarms
|
||||
continue
|
||||
|
||||
if alarm_id == PLUGIN_OAM_PORT_ALARMID or \
|
||||
alarm_id == PLUGIN_OAM_IFACE_ALARMID or \
|
||||
alarm_id == PLUGIN_MGMT_PORT_ALARMID or \
|
||||
alarm_id == PLUGIN_MGMT_IFACE_ALARMID or \
|
||||
alarm_id == PLUGIN_CLSTR_PORT_ALARMID or \
|
||||
alarm_id == PLUGIN_CLSTR_IFACE_ALARMID:
|
||||
|
||||
try:
|
||||
if api.clear_fault(alarm_id, eid) is False:
|
||||
collectd.info("%s %s:%s:%s alarm already cleared" %
|
||||
(PLUGIN,
|
||||
alarm.severity,
|
||||
alarm_id,
|
||||
eid))
|
||||
else:
|
||||
found = True
|
||||
collectd.info("%s %s:%s:%s alarm cleared" %
|
||||
(PLUGIN,
|
||||
alarm.severity,
|
||||
alarm_id,
|
||||
eid))
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'clear_fault' exception ; "
|
||||
"%s:%s ; %s" %
|
||||
(PLUGIN, alarm_id, eid, ex))
|
||||
return False
|
||||
if found is False:
|
||||
collectd.info("%s found no startup alarms" % PLUGIN)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
##########################################################################
|
||||
#
|
||||
# Name : manage_alarm
|
||||
#
|
||||
# Purpose : Raises or clears port and interface alarms based on
|
||||
# calling parameters.
|
||||
#
|
||||
# Returns : True on success
|
||||
# False on failure
|
||||
#
|
||||
##########################################################################
|
||||
def manage_alarm(name, network, level, action, severity, alarm_id, timestamp):
|
||||
"""Manage raise and clear of port and interface alarms"""
|
||||
|
||||
ts = datetime.datetime.fromtimestamp(
|
||||
float(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
|
||||
collectd.debug("%s %s %s %s alarm for %s:%s [%s] %s" % (PLUGIN,
|
||||
severity, level, alarm_id, network, name, action, ts))
|
||||
|
||||
if action == ALARM_ACTION_CLEAR:
|
||||
alarm_state = fm_constants.FM_ALARM_STATE_CLEAR
|
||||
reason = ''
|
||||
repair = ''
|
||||
else:
|
||||
# reason ad repair strings are only needed on alarm assertion
|
||||
alarm_state = fm_constants.FM_ALARM_STATE_SET
|
||||
reason = "'" + network.upper() + "' " + level
|
||||
repair = 'Check cabling and far-end port configuration ' \
|
||||
'and status on adjacent equipment.'
|
||||
|
||||
# build the alarm eid and name string
|
||||
if level == LEVEL_PORT:
|
||||
eid = 'host=' + obj.hostname + "." + level + '=' + name
|
||||
reason += " failed"
|
||||
else:
|
||||
eid = 'host=' + obj.hostname + "." + level + '=' + network
|
||||
if severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
||||
reason += " degraded"
|
||||
else:
|
||||
reason += " failed"
|
||||
|
||||
if alarm_state == fm_constants.FM_ALARM_STATE_CLEAR:
|
||||
try:
|
||||
if api.clear_fault(alarm_id, eid) is False:
|
||||
collectd.info("%s %s:%s alarm already cleared" %
|
||||
(PLUGIN, alarm_id, eid))
|
||||
else:
|
||||
collectd.info("%s %s:%s alarm cleared" %
|
||||
(PLUGIN, alarm_id, eid))
|
||||
return True
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'clear_fault' failed ; %s:%s ; %s" %
|
||||
(PLUGIN, alarm_id, eid, ex))
|
||||
return False
|
||||
|
||||
else:
|
||||
fault = fm_api.Fault(
|
||||
uuid="",
|
||||
alarm_id=alarm_id,
|
||||
alarm_state=alarm_state,
|
||||
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
||||
entity_instance_id=eid,
|
||||
severity=severity,
|
||||
reason_text=reason,
|
||||
alarm_type=fm_constants.FM_ALARM_TYPE_7,
|
||||
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN,
|
||||
proposed_repair_action=repair,
|
||||
service_affecting=True,
|
||||
timestamp=ts,
|
||||
suppression=True)
|
||||
|
||||
try:
|
||||
alarm_uuid = api.set_fault(fault)
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'set_fault' exception ; %s:%s ; %s" %
|
||||
(PLUGIN, alarm_id, eid, ex))
|
||||
return False
|
||||
|
||||
if pc.is_uuid_like(alarm_uuid) is False:
|
||||
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
|
||||
(PLUGIN, alarm_id, eid, alarm_uuid))
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
# The config function - called once on collectd process startup
|
||||
def config_func(config):
|
||||
"""Configure the plugin"""
|
||||
|
||||
# Need to update the Link Status Query URL with the port number.
|
||||
url_updated = False
|
||||
|
||||
# The Link Monitor port number is first searched for in
|
||||
# the /etc/mtc/lmond.conf file.
|
||||
# If its not there then its taken from the plugin config.
|
||||
|
||||
# /etc/mtc/lmond.conf
|
||||
fn = '/etc/mtc/lmond.conf'
|
||||
if (os.path.exists(fn)):
|
||||
try:
|
||||
with open(fn, 'r') as infile:
|
||||
for line in infile:
|
||||
if 'lmon_query_port' in line:
|
||||
if isinstance(int(line.split()[2]), int):
|
||||
|
||||
# add the port
|
||||
obj.url += line.split()[2]
|
||||
|
||||
# add the path /mtce/lmon
|
||||
obj.url += PLUGIN_HTTP_URL_PATH
|
||||
|
||||
url_updated = "config file"
|
||||
break
|
||||
except EnvironmentError as e:
|
||||
collectd.error(str(e), UserWarning)
|
||||
|
||||
if url_updated is False:
|
||||
# Try the config as this might be updated by manifest
|
||||
for node in config.children:
|
||||
key = node.key.lower()
|
||||
val = int(node.values[0])
|
||||
if key == 'port':
|
||||
if isinstance(int(val), int):
|
||||
|
||||
# add the port
|
||||
obj.url += str(val)
|
||||
|
||||
# add the path /mtce/lmon
|
||||
obj.url += PLUGIN_HTTP_URL_PATH
|
||||
|
||||
url_updated = "manifest"
|
||||
break
|
||||
|
||||
if url_updated:
|
||||
collectd.info("%s configured by %s [%s]" %
|
||||
(PLUGIN, url_updated, obj.url))
|
||||
obj.config_done = True
|
||||
else:
|
||||
collectd.error("%s config failure ; cannot monitor" %
|
||||
(PLUGIN))
|
||||
return 0
|
||||
|
||||
|
||||
# The init function - called once on collectd process startup
|
||||
def init_func():
|
||||
"""Init the plugin"""
|
||||
|
||||
if obj.config_done is False:
|
||||
collectd.info("%s configuration failed" % PLUGIN)
|
||||
time.sleep(300)
|
||||
return False
|
||||
|
||||
if obj.init_done is False:
|
||||
if obj.init_ready() is False:
|
||||
return 0
|
||||
|
||||
obj.hostname = obj.gethostname()
|
||||
obj.init_done = True
|
||||
collectd.info("%s initialization complete" % PLUGIN)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# The sample read function - called on every audit interval
|
||||
def read_func():
|
||||
"""collectd interface monitor plugin read function"""
|
||||
|
||||
if obj.init_done is False:
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
if obj.phase < RUN_PHASE__ALARMS_CLEARED:
|
||||
|
||||
# clear all alarms on first audit
|
||||
#
|
||||
# block on fm availability
|
||||
#
|
||||
# If the existing raised alarms are still valid then
|
||||
# they will be re-raised with the same timestamp the
|
||||
# original event occurred at once auditing resumes.
|
||||
if clear_alarms(ALARM_ID_LIST) is False:
|
||||
collectd.error("%s failed to clear existing alarms ; "
|
||||
"retry next audit" % PLUGIN)
|
||||
|
||||
# Don't proceed till we can communicate with FM and
|
||||
# clear all existing interface and port alarms.
|
||||
return 0
|
||||
else:
|
||||
obj.phase = RUN_PHASE__ALARMS_CLEARED
|
||||
|
||||
# Throttle HTTP request error retries
|
||||
if obj.http_retry_count != 0:
|
||||
obj.http_retry_count += 1
|
||||
if obj.http_retry_count > obj.HTTP_RETRY_THROTTLE:
|
||||
obj.http_retry_count = 0
|
||||
return 0
|
||||
|
||||
# Issue query and construct the monitoring object
|
||||
success = obj.make_http_request(to=PLUGIN_HTTP_TIMEOUT)
|
||||
|
||||
if success is False:
|
||||
obj.http_retry_count += 1
|
||||
return 0
|
||||
|
||||
if len(obj.jresp) == 0:
|
||||
collectd.error("%s no json response from http request" % PLUGIN)
|
||||
obj.http_retry_count += 1
|
||||
return 0
|
||||
|
||||
# Check query status
|
||||
try:
|
||||
if obj.jresp['status'] != 'pass':
|
||||
collectd.error("%s link monitor query %s" %
|
||||
(PLUGIN, obj.jresp['status']))
|
||||
obj.http_retry_count += 1
|
||||
return 0
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s http request get reason failed ; %s" %
|
||||
(PLUGIN, str(ex)))
|
||||
collectd.info("%s resp:%d:%s" %
|
||||
(PLUGIN, len(obj.jresp), obj.jresp))
|
||||
obj.http_retry_count += 1
|
||||
return 0
|
||||
|
||||
# log the first query response
|
||||
if obj.audits == 0:
|
||||
collectd.info("%s Link Status Query Response:%d:\n%s" %
|
||||
(PLUGIN, len(obj.jresp), obj.jresp))
|
||||
|
||||
# uncomment below for debug purposes
|
||||
#
|
||||
# for network in NETWORKS:
|
||||
# dump_network_info(network)
|
||||
|
||||
try:
|
||||
link_info = obj.jresp['link_info']
|
||||
for network_link_info in link_info:
|
||||
collectd.debug("%s parse link info:%s" %
|
||||
(PLUGIN, network_link_info))
|
||||
for network in NETWORKS:
|
||||
if network.name == network_link_info['network']:
|
||||
links = network_link_info['links']
|
||||
nname = network.name
|
||||
if len(links) > 0:
|
||||
link_one = links[0]
|
||||
|
||||
# get initial link one name
|
||||
if network.link_one.name is None:
|
||||
network.link_one.name = link_one['name']
|
||||
|
||||
network.link_one.timestamp =\
|
||||
float(get_timestamp(link_one['time']))
|
||||
|
||||
# load link one state
|
||||
if link_one['state'] == LINK_UP:
|
||||
collectd.debug("%s %s IS Up [%s]" %
|
||||
(PLUGIN, network.link_one.name,
|
||||
network.link_one.state))
|
||||
if network.link_one.state != LINK_UP:
|
||||
network.link_one.state_change = True
|
||||
network.link_one.clear_port_alarm(nname)
|
||||
network.link_one.state = LINK_UP
|
||||
else:
|
||||
collectd.debug("%s %s IS Down [%s]" %
|
||||
(PLUGIN, network.link_one.name,
|
||||
network.link_one.state))
|
||||
if network.link_one.state == LINK_UP:
|
||||
network.link_one.state_change = True
|
||||
network.link_one.raise_port_alarm(nname)
|
||||
network.link_one.state = LINK_DOWN
|
||||
|
||||
if len(links) > 1:
|
||||
link_two = links[1]
|
||||
|
||||
# get initial link two name
|
||||
if network.link_two.name is None:
|
||||
network.link_two.name = link_two['name']
|
||||
|
||||
network.link_two.timestamp =\
|
||||
float(get_timestamp(link_two['time']))
|
||||
|
||||
# load link two state
|
||||
if link_two['state'] == LINK_UP:
|
||||
collectd.debug("%s %s IS Up [%s]" %
|
||||
(PLUGIN, network.link_two.name,
|
||||
network.link_two.state))
|
||||
if network.link_two.state != LINK_UP:
|
||||
network.link_two.state_change = True
|
||||
network.link_two.clear_port_alarm(nname)
|
||||
network.link_two.state = LINK_UP
|
||||
else:
|
||||
collectd.debug("%s %s IS Down [%s]" %
|
||||
(PLUGIN, network.link_two.name,
|
||||
network.link_two.state))
|
||||
if network.link_two.state == LINK_UP:
|
||||
network.link_two.state_change = True
|
||||
network.link_two.raise_port_alarm(nname)
|
||||
network.link_two.state = LINK_DOWN
|
||||
|
||||
# manage interface alarms
|
||||
network.manage_iface_alarm()
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s link monitor query parse exception ; %s " %
|
||||
(PLUGIN, obj.resp))
|
||||
|
||||
# handle state changes
|
||||
for network in NETWORKS:
|
||||
if network.link_two.name is not None and \
|
||||
network.link_one.state_change is True:
|
||||
|
||||
if network.link_one.state == LINK_UP:
|
||||
collectd.info("%s %s link one '%s' is Up" %
|
||||
(PLUGIN,
|
||||
network.name,
|
||||
network.link_one.name))
|
||||
else:
|
||||
collectd.info("%s %s link one '%s' is Down" %
|
||||
(PLUGIN,
|
||||
network.name,
|
||||
network.link_one.name))
|
||||
|
||||
if network.link_two.name is not None and \
|
||||
network.link_two.state_change is True:
|
||||
|
||||
if network.link_two.state == LINK_UP:
|
||||
collectd.info("%s %s link two '%s' is Up" %
|
||||
(PLUGIN,
|
||||
network.name,
|
||||
network.link_two.name))
|
||||
else:
|
||||
collectd.info("%s %s link two %s 'is' Down" %
|
||||
(PLUGIN,
|
||||
network.name,
|
||||
network.link_two.name))
|
||||
|
||||
# Dispatch usage value to collectd
|
||||
val = collectd.Values(host=obj.hostname)
|
||||
val.plugin = 'interface'
|
||||
val.type = 'percent'
|
||||
val.type_instance = 'used'
|
||||
|
||||
# For each interface [ mgmt, oam, infra ]
|
||||
# calculate the percentage used sample
|
||||
# sample = 100 % when all its links are up
|
||||
# sample = 0 % when all its links are down
|
||||
# sample = 50 % when one of a lagged group is down
|
||||
for network in NETWORKS:
|
||||
|
||||
if network.link_one.name is not None:
|
||||
|
||||
val.plugin_instance = network.name
|
||||
|
||||
network.sample = 0
|
||||
|
||||
if network.link_two.name is not None:
|
||||
# lagged
|
||||
|
||||
if network.link_one.state == LINK_UP:
|
||||
network.sample = 50
|
||||
if network.link_two.state == LINK_UP:
|
||||
network.sample += 50
|
||||
else:
|
||||
if network.link_one.state == LINK_UP:
|
||||
network.sample = 100
|
||||
val.dispatch(values=[network.sample])
|
||||
|
||||
if network.link_one.state_change is True or \
|
||||
network.link_two.state_change is True:
|
||||
|
||||
dump_network_info(network)
|
||||
|
||||
network.link_one.state_change = False
|
||||
network.link_two.state_change = False
|
||||
|
||||
network.sample_last = network.sample
|
||||
|
||||
else:
|
||||
collectd.debug("%s %s network not provisioned" %
|
||||
(PLUGIN, network.name))
|
||||
obj.audits += 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# register the config, init and read functions
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func, interval=PLUGIN_AUDIT_INTERVAL)
|
@ -1,21 +0,0 @@
|
||||
# For stock plugin only
|
||||
# Uncomment to compare stock to tiS plugin readings
|
||||
# ---------------------
|
||||
# <Plugin memory>
|
||||
# ValuesAbsolute false
|
||||
# ValuesPercentage true
|
||||
# </Plugin>
|
||||
|
||||
<Plugin "threshold">
|
||||
<Plugin "memory">
|
||||
<Type "percent">
|
||||
Instance "used"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMax 80.00
|
||||
FailureMax 90.00
|
||||
Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
@ -1,279 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2018-2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
############################################################################
|
||||
#
|
||||
# This file is the collectd 'Platform CPU Usage' Monitor.
|
||||
#
|
||||
# The Platform CPU Usage is calculated as an averaged percentage of
|
||||
# platform core usable since the previous sample.
|
||||
#
|
||||
# Init Function:
|
||||
# - if 'worker_reserved.conf exists then query/store PLATFORM_CPU_LIST
|
||||
#
|
||||
############################################################################
|
||||
import os
|
||||
import collectd
|
||||
|
||||
debug = False
|
||||
|
||||
PLUGIN = 'platform memory usage'
|
||||
PLUGIN_NUMA = 'numa memory usage'
|
||||
PLUGIN_HUGE = 'hugepage memory usage'
|
||||
|
||||
|
||||
# CPU Control class
|
||||
class MEM:
|
||||
hostname = "" # hostname for sample notification message
|
||||
cmd = '/proc/meminfo' # the query comment
|
||||
value = float(0.0) # float value of memory usage
|
||||
|
||||
# meminfo values we care about
|
||||
memTotal_kB = 0
|
||||
memFree_kB = 0
|
||||
buffers = 0
|
||||
cached = 0
|
||||
SReclaimable = 0
|
||||
CommitLimit = 0
|
||||
Committed_AS = 0
|
||||
HugePages_Total = 0
|
||||
HugePages_Free = 0
|
||||
Hugepagesize = 0
|
||||
AnonPages = 0
|
||||
FilePages = 0
|
||||
|
||||
# derived values
|
||||
avail = 0
|
||||
total = 0
|
||||
strict = 0
|
||||
|
||||
|
||||
# Instantiate the class
|
||||
obj = MEM()
|
||||
|
||||
|
||||
def log_meminfo(plugin, name, meminfo):
|
||||
"""Log the supplied meminfo"""
|
||||
|
||||
if debug is False:
|
||||
return
|
||||
|
||||
collectd.info("%s %s" % (plugin, name))
|
||||
collectd.info("%s ---------------------------" % plugin)
|
||||
collectd.info("%s memTotal_kB : %f" % (plugin, meminfo.memTotal_kB))
|
||||
collectd.info("%s memFree_kB : %f" % (plugin, meminfo.memFree_kB))
|
||||
collectd.info("%s Buffers : %f" % (plugin, meminfo.buffers))
|
||||
collectd.info("%s Cached : %f" % (plugin, meminfo.cached))
|
||||
collectd.info("%s SReclaimable : %f" % (plugin, meminfo.SReclaimable))
|
||||
collectd.info("%s CommitLimit : %f" % (plugin, meminfo.CommitLimit))
|
||||
collectd.info("%s Committed_AS : %f" % (plugin, meminfo.Committed_AS))
|
||||
collectd.info("%s HugePages_Total: %f" % (plugin, meminfo.HugePages_Total))
|
||||
collectd.info("%s HugePages_Free : %f" % (plugin, meminfo.HugePages_Free))
|
||||
collectd.info("%s Hugepagesize : %f" % (plugin, meminfo.Hugepagesize))
|
||||
collectd.info("%s AnonPages : %f" % (plugin, meminfo.AnonPages))
|
||||
|
||||
|
||||
def config_func(config):
|
||||
"""Configure the memory usage plugin"""
|
||||
|
||||
for node in config.children:
|
||||
key = node.key.lower()
|
||||
val = node.values[0]
|
||||
|
||||
if key == 'path':
|
||||
obj.cmd = str(val)
|
||||
collectd.info("%s configured query command: '%s'" %
|
||||
(PLUGIN, obj.cmd))
|
||||
return 0
|
||||
|
||||
collectd.info("%s no config command provided ; "
|
||||
"defaulting to '%s'" %
|
||||
(PLUGIN, obj.cmd))
|
||||
|
||||
|
||||
# Load the hostname and kernel memory 'overcommit' setting.
|
||||
def init_func():
|
||||
# get current hostname
|
||||
obj.hostname = os.uname()[1]
|
||||
|
||||
# get strict setting
|
||||
#
|
||||
# a value of 0 means "heuristic overcommit"
|
||||
# a value of 1 means "always overcommit"
|
||||
# a value of 2 means "don't overcommit".
|
||||
#
|
||||
# set strict true strict=1 if value is = 2
|
||||
# otherwise strict is false strict=0 (default)
|
||||
|
||||
fn = '/proc/sys/vm/overcommit_memory'
|
||||
if os.path.exists(fn):
|
||||
with open(fn, 'r') as infile:
|
||||
for line in infile:
|
||||
if int(line) == 2:
|
||||
obj.strict = 1
|
||||
break
|
||||
|
||||
collectd.info("%s strict:%d" % (PLUGIN, obj.strict))
|
||||
|
||||
|
||||
# Calculate the CPU usage sample
|
||||
def read_func():
|
||||
meminfo = {}
|
||||
try:
|
||||
with open(obj.cmd) as fd:
|
||||
for line in fd:
|
||||
meminfo[line.split(':')[0]] = line.split(':')[1].strip()
|
||||
|
||||
except EnvironmentError as e:
|
||||
collectd.error("%s unable to read from %s ; str(e)" %
|
||||
(PLUGIN, str(e)))
|
||||
return 0
|
||||
|
||||
# setup the sample structure
|
||||
val = collectd.Values(host=obj.hostname)
|
||||
val.type = 'percent'
|
||||
val.type_instance = 'used'
|
||||
|
||||
# fit_value = 0
|
||||
# if os.path.exists('/var/run/fit/mem_data'):
|
||||
# with open('/var/run/fit/mem_data', 'r') as infile:
|
||||
# for line in infile:
|
||||
# fit_value = float(line)
|
||||
# collectd.info("%s using FIT data:%.2f" %
|
||||
# (PLUGIN, fit_value))
|
||||
# break
|
||||
|
||||
# remove the 'unit' (kB) suffix that might be on some of the lines
|
||||
for line in meminfo:
|
||||
# remove the units from the value read
|
||||
value_unit = [u.strip() for u in meminfo[line].split(' ', 1)]
|
||||
if len(value_unit) == 2:
|
||||
value, unit = value_unit
|
||||
meminfo[line] = float(value)
|
||||
else:
|
||||
meminfo[line] = float(meminfo[line])
|
||||
|
||||
obj.memTotal_kB = float(meminfo['MemTotal'])
|
||||
obj.memFree_kB = float(meminfo['MemFree'])
|
||||
obj.buffers = float(meminfo['Buffers'])
|
||||
obj.cached = float(meminfo['Cached'])
|
||||
obj.SReclaimable = float(meminfo['SReclaimable'])
|
||||
obj.CommitLimit = float(meminfo['CommitLimit'])
|
||||
obj.Committed_AS = float(meminfo['Committed_AS'])
|
||||
obj.HugePages_Total = float(meminfo['HugePages_Total'])
|
||||
obj.HugePages_Free = float(meminfo['HugePages_Free'])
|
||||
obj.Hugepagesize = float(meminfo['Hugepagesize'])
|
||||
obj.AnonPages = float(meminfo['AnonPages'])
|
||||
|
||||
log_meminfo(PLUGIN, "/proc/meminfo", obj)
|
||||
|
||||
obj.avail = float(float(obj.memFree_kB) +
|
||||
float(obj.buffers) +
|
||||
float(obj.cached) +
|
||||
float(obj.SReclaimable))
|
||||
obj.total = float(float(obj.avail) +
|
||||
float(obj.AnonPages))
|
||||
|
||||
if obj.strict == 1:
|
||||
obj.value = float(float(obj.Committed_AS) / float(obj.CommitLimit))
|
||||
else:
|
||||
obj.value = float(float(obj.AnonPages) / float(obj.total))
|
||||
obj.value = float(float(obj.value) * 100)
|
||||
|
||||
# if fit_value != 0:
|
||||
# obj.value = fit_value
|
||||
|
||||
if debug is True:
|
||||
collectd.info("%s ---------------------------" % PLUGIN)
|
||||
collectd.info("%s memAvail: %d" % (PLUGIN, obj.avail))
|
||||
collectd.info("%s memTotal: %d" % (PLUGIN, obj.total))
|
||||
collectd.info('%s reports %.2f %% usage' % (PLUGIN, obj.value))
|
||||
|
||||
# Dispatch usage value to collectd
|
||||
val.plugin = 'memory'
|
||||
val.plugin_instance = 'platform'
|
||||
val.dispatch(values=[obj.value])
|
||||
|
||||
#####################################################################
|
||||
# Now get the Numa Node Memory Usage
|
||||
#####################################################################
|
||||
numa_node_files = []
|
||||
fn = "/sys/devices/system/node/"
|
||||
files = os.listdir(fn)
|
||||
for file in files:
|
||||
if 'node' in file:
|
||||
numa_node_files.append(fn + file + '/meminfo')
|
||||
|
||||
for numa_node in numa_node_files:
|
||||
meminfo = {}
|
||||
try:
|
||||
with open(numa_node) as fd:
|
||||
for line in fd:
|
||||
meminfo[line.split()[2][0:-1]] = line.split()[3].strip()
|
||||
|
||||
obj.memFree_kB = float(meminfo['MemFree'])
|
||||
obj.FilePages = float(meminfo['FilePages'])
|
||||
obj.SReclaimable = float(meminfo['SReclaimable'])
|
||||
obj.AnonPages = float(meminfo['AnonPages'])
|
||||
obj.HugePages_Total = float(meminfo['HugePages_Total'])
|
||||
obj.HugePages_Free = float(meminfo['HugePages_Free'])
|
||||
|
||||
log_meminfo(PLUGIN, numa_node, obj)
|
||||
|
||||
avail = float(float(obj.memFree_kB) +
|
||||
float(obj.FilePages) +
|
||||
float(obj.SReclaimable))
|
||||
total = float(float(avail) +
|
||||
float(obj.AnonPages))
|
||||
obj.value = float(float(obj.AnonPages)) / float(total)
|
||||
obj.value = float(float(obj.value) * 100)
|
||||
|
||||
# if fit_value != 0:
|
||||
# obj.value = fit_value
|
||||
|
||||
# Dispatch usage value to collectd for this numa node
|
||||
val.plugin_instance = numa_node.split('/')[5]
|
||||
val.dispatch(values=[obj.value])
|
||||
|
||||
collectd.debug('%s reports %s at %.2f %% usage (%s)' %
|
||||
(PLUGIN_NUMA,
|
||||
val.plugin,
|
||||
obj.value,
|
||||
val.plugin_instance))
|
||||
|
||||
# Numa Node Huge Page Memory Monitoring
|
||||
#
|
||||
# Only monitor if there is Huge Page Memory
|
||||
if obj.HugePages_Total > 0:
|
||||
obj.value = \
|
||||
float(float(obj.HugePages_Total -
|
||||
obj.HugePages_Free)) / \
|
||||
float(obj.HugePages_Total)
|
||||
obj.value = float(float(obj.value) * 100)
|
||||
|
||||
# if fit_value != 0:
|
||||
# obj.value = fit_value
|
||||
|
||||
# Dispatch huge page memory usage value
|
||||
# to collectd for this numa node.
|
||||
val.plugin_instance = numa_node.split('/')[5] + '_hugepages'
|
||||
val.dispatch(values=[obj.value])
|
||||
|
||||
collectd.debug('%s reports %s at %.2f %% usage (%s)' %
|
||||
(PLUGIN_HUGE,
|
||||
val.plugin,
|
||||
obj.value,
|
||||
val.plugin_instance))
|
||||
|
||||
except EnvironmentError as e:
|
||||
collectd.error("%s unable to read from %s ; str(e)" %
|
||||
(PLUGIN_NUMA, str(e)))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func)
|
@ -1,380 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2018-2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
#############################################################################
|
||||
#
|
||||
# This file is the collectd 'Maintenance' Notifier.
|
||||
#
|
||||
# Collects provides information about each event as an object passed to the
|
||||
# notification handler ; the notification object.
|
||||
#
|
||||
# object.host - the hostname
|
||||
#
|
||||
# object.plugin - the name of the plugin aka resource
|
||||
# object.plugin_instance - plugin instance string i.e. say mountpoint
|
||||
# for df plugin
|
||||
# object.type, - the unit i.e. percent or absolute
|
||||
# object.type_instance - the attribute i.e. free, used, etc
|
||||
#
|
||||
# object.severity - a integer value 0=OK , 1=warning, 2=failure
|
||||
# object.message - a log-able message containing the above along
|
||||
# with the value
|
||||
#
|
||||
# This notifier manages requesting mtce to assert or clear its collectd
|
||||
# host-degrade-cause flag based on notification messages sent from collectd.
|
||||
#
|
||||
# Messages to maintenance are throttled ONE_EVERY while this state is the
|
||||
# same as last state.
|
||||
#
|
||||
# Message is sent on every state change
|
||||
# from clear to assert or
|
||||
# from assert to clear
|
||||
#
|
||||
# See code comments for details.
|
||||
#
|
||||
############################################################################
|
||||
#
|
||||
# Import list
|
||||
|
||||
import os
|
||||
import socket
|
||||
import collectd
|
||||
import tsconfig.tsconfig as tsc
|
||||
|
||||
# This plugin name
|
||||
PLUGIN = 'degrade notifier'
|
||||
|
||||
# collectd severity definitions ;
|
||||
# Note: can't seem to pull then in symbolically with a header
|
||||
NOTIF_FAILURE = 1
|
||||
NOTIF_WARNING = 2
|
||||
NOTIF_OKAY = 4
|
||||
|
||||
# default mtce port.
|
||||
# ... with configuration override
|
||||
MTCE_CMD_RX_PORT = 2101
|
||||
|
||||
# same state message throttle count.
|
||||
# ... only send the degrade message every 'this' number
|
||||
# while the state of assert or clear remains the same.
|
||||
ONE_EVERY = 10
|
||||
|
||||
PLUGIN__DF = 'df'
|
||||
PLUGIN__MEM = 'memory'
|
||||
PLUGIN__CPU = 'cpu'
|
||||
|
||||
PLUGIN__VSWITCH_MEM = 'vswitch_mem'
|
||||
PLUGIN__VSWITCH_CPU = 'vswitch_cpu'
|
||||
PLUGIN__VSWITCH_PORT = "vswitch_port"
|
||||
PLUGIN__VSWITCH_IFACE = "vswitch_iface"
|
||||
|
||||
|
||||
PLUGIN_INTERFACE = 'interface'
|
||||
PLUGIN__EXAMPLE = 'example'
|
||||
|
||||
|
||||
# The collectd Maintenance Notifier Object
|
||||
class collectdMtceNotifierObject:
|
||||
|
||||
def __init__(self, port):
|
||||
"""collectdMtceNotifierObject Class constructor"""
|
||||
# default maintenance port
|
||||
self.port = port
|
||||
self.addr = None
|
||||
|
||||
# specifies the protocol family to use when messaging maintenance.
|
||||
# if system is IPV6, then that is learned and this 'protocol' is
|
||||
# updated with AF_INET6
|
||||
self.protocol = socket.AF_INET
|
||||
|
||||
# List of plugin names that require degrade for specified severity.
|
||||
self.degrade_list__failure = [PLUGIN__DF,
|
||||
PLUGIN__MEM,
|
||||
PLUGIN__CPU,
|
||||
PLUGIN__VSWITCH_MEM,
|
||||
PLUGIN__VSWITCH_CPU,
|
||||
PLUGIN__VSWITCH_PORT,
|
||||
PLUGIN__VSWITCH_IFACE,
|
||||
PLUGIN_INTERFACE,
|
||||
PLUGIN__EXAMPLE]
|
||||
self.degrade_list__warning = [PLUGIN_INTERFACE]
|
||||
|
||||
# the running list of resources that require degrade.
|
||||
# a degrade clear message is sent whenever this list is empty.
|
||||
# a degrade assert message is sent whenever this list is not empty.
|
||||
self.degrade_list = []
|
||||
|
||||
# throttle down sending of duplicate degrade assert/clear messages
|
||||
self.last_state = "undef"
|
||||
self.msg_throttle = 0
|
||||
|
||||
|
||||
# Instantiate the mtce_notifier object
|
||||
# This object persists from notificaiton to notification
|
||||
obj = collectdMtceNotifierObject(MTCE_CMD_RX_PORT)
|
||||
|
||||
|
||||
def _get_active_controller_ip():
|
||||
"""Get the active controller host IP"""
|
||||
|
||||
try:
|
||||
obj.addr = socket.getaddrinfo('controller', None)[0][4][0]
|
||||
collectd.info("%s controller ip: %s" % (PLUGIN, obj.addr))
|
||||
except Exception as ex:
|
||||
obj.addr = None
|
||||
collectd.error("%s failed to get controller ip ; %s" %
|
||||
(PLUGIN, str(ex)))
|
||||
return 0
|
||||
|
||||
|
||||
def _df_instance_to_path(df_inst):
|
||||
"""Convert a df instance name to a mountpoint"""
|
||||
|
||||
# df_root is not a dynamic file system. Ignore that one.
|
||||
if df_inst == 'df_root':
|
||||
return '/'
|
||||
else:
|
||||
# For all others replace all '-' with '/'
|
||||
return('/' + df_inst[3:].replace('-', '/'))
|
||||
|
||||
|
||||
# This function removes degraded file systems that are no longer present.
|
||||
def _clear_degrade_for_missing_filesystems():
|
||||
"""Remove degraded file systems that are no longer mounted or present"""
|
||||
|
||||
for df_inst in obj.degrade_list:
|
||||
|
||||
# Only file system plugins are looked at.
|
||||
# File system plugin instance names are prefixed with 'df_'
|
||||
# as the first 3 chars in the instance name.
|
||||
if df_inst[0:3] == 'df_':
|
||||
path = _df_instance_to_path(df_inst)
|
||||
|
||||
# check the mount point.
|
||||
# if the mount point no longer exists then remove
|
||||
# this instance from the degrade list.
|
||||
if os.path.ismount(path) is False:
|
||||
collectd.info("%s clearing degrade for missing %s ; %s" %
|
||||
(PLUGIN, path, obj.degrade_list))
|
||||
obj.degrade_list.remove(df_inst)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# The collectd configuration interface
|
||||
#
|
||||
# Used to configure the maintenance port.
|
||||
# key = 'port'
|
||||
# val = port number
|
||||
#
|
||||
def config_func(config):
|
||||
"""Configure the maintenance degrade notifier plugin"""
|
||||
|
||||
collectd.debug('%s config function' % PLUGIN)
|
||||
for node in config.children:
|
||||
key = node.key.lower()
|
||||
val = node.values[0]
|
||||
|
||||
if key == 'port':
|
||||
obj.port = int(val)
|
||||
collectd.info("%s configured mtce port: %d" %
|
||||
(PLUGIN, obj.port))
|
||||
return 0
|
||||
|
||||
obj.port = MTCE_CMD_RX_PORT
|
||||
collectd.error("%s no mtce port provided ; defaulting to %d" %
|
||||
(PLUGIN, obj.port))
|
||||
|
||||
|
||||
# Collectd calls this function on startup.
|
||||
def init_func():
|
||||
"""Collectd Mtce Notifier Initialization Function"""
|
||||
|
||||
obj.host = os.uname()[1]
|
||||
collectd.info("%s %s:%s sending to mtce port %d" %
|
||||
(PLUGIN, tsc.nodetype, obj.host, obj.port))
|
||||
|
||||
collectd.debug("%s init function" % PLUGIN)
|
||||
|
||||
|
||||
# This is the Notifier function that is called by collectd.
|
||||
#
|
||||
# Handling steps are
|
||||
#
|
||||
# 1. build resource name from notification object.
|
||||
# 2. check resource against severity lists.
|
||||
# 3. manage this instance's degrade state.
|
||||
# 4. send mtcAgent the degrade state message.
|
||||
#
|
||||
def notifier_func(nObject):
|
||||
"""Collectd Mtce Notifier Handler Function"""
|
||||
|
||||
# Create the resource name from the notifier object.
|
||||
# format: <plugin name>_<plugin_instance_name>
|
||||
resource = nObject.plugin
|
||||
if nObject.plugin_instance:
|
||||
resource += "_" + nObject.plugin_instance
|
||||
|
||||
# This block looks at the current notification severity
|
||||
# and manages the degrade_list.
|
||||
# If the specified plugin name exists in each of the warnings
|
||||
# or failure lists and there is a current severity match then
|
||||
# add that resource instance to the degrade list.
|
||||
# Conversly if this notification is OKAY then make sure this
|
||||
# resource instance is not in the degrade list (remove it if it is)
|
||||
if nObject.severity is NOTIF_OKAY:
|
||||
if obj.degrade_list and resource in obj.degrade_list:
|
||||
obj.degrade_list.remove(resource)
|
||||
|
||||
elif nObject.severity is NOTIF_FAILURE:
|
||||
if obj.degrade_list__failure:
|
||||
if nObject.plugin in obj.degrade_list__failure:
|
||||
if resource not in obj.degrade_list:
|
||||
# handle dynamic filesystems going missing over a swact
|
||||
# or unmount and being reported as a transient error by
|
||||
# the df plugin. Don't add it to the failed list if the
|
||||
# mountpoint is gone.
|
||||
add = True
|
||||
if nObject.plugin == PLUGIN__DF:
|
||||
path = _df_instance_to_path(resource)
|
||||
add = os.path.ismount(path)
|
||||
if add is True:
|
||||
collectd.info("%s %s added to degrade list" %
|
||||
(PLUGIN, resource))
|
||||
obj.degrade_list.append(resource)
|
||||
else:
|
||||
# If severity is failure and no failures cause degrade
|
||||
# then make sure this plugin is not in the degrade list,
|
||||
# Should never occur.
|
||||
if resource in obj.degrade_list:
|
||||
obj.degrade_list.remove(resource)
|
||||
|
||||
elif nObject.severity is NOTIF_WARNING:
|
||||
if obj.degrade_list__warning:
|
||||
if nObject.plugin in obj.degrade_list__warning:
|
||||
if resource not in obj.degrade_list:
|
||||
# handle dynamic filesystems going missing over a swact
|
||||
# or unmount and being reported as a transient error by
|
||||
# the df plugin. Don't add it to the failed list if the
|
||||
# mountpoint is gone.
|
||||
add = True
|
||||
if nObject.plugin == PLUGIN__DF:
|
||||
path = _df_instance_to_path(resource)
|
||||
add = os.path.ismount(path)
|
||||
if add is True:
|
||||
collectd.info("%s %s added to degrade list" %
|
||||
(PLUGIN, resource))
|
||||
obj.degrade_list.append(resource)
|
||||
else:
|
||||
# If severity is warning and no warnings cause degrade
|
||||
# then make sure this plugin is not in the degrade list.
|
||||
# Should never occur..
|
||||
if resource in obj.degrade_list:
|
||||
obj.degrade_list.remove(resource)
|
||||
else:
|
||||
collectd.info("%s unsupported severity %d" %
|
||||
(PLUGIN, nObject.severity))
|
||||
return 0
|
||||
|
||||
# running counter of notifications.
|
||||
obj.msg_throttle += 1
|
||||
|
||||
# Support for Dynamic File Systems
|
||||
# --------------------------------
|
||||
# Some active controller mounted filesystems can become
|
||||
# unmounted under the watch of collectd. This can occur
|
||||
# as a result of a Swact. If an 'degrade' is raised at the
|
||||
# time an fs disappears then that state can become stuck
|
||||
# active until the next Swact. This call handles this case.
|
||||
#
|
||||
# Audit file system presence every time we get the
|
||||
# notification for the root file system.
|
||||
# Depending on the root filesystem always being there.
|
||||
if nObject.plugin == 'df' \
|
||||
and nObject.plugin_instance == 'root' \
|
||||
and len(obj.degrade_list):
|
||||
_clear_degrade_for_missing_filesystems()
|
||||
|
||||
# If degrade list is empty then a clear state is sent to maintenance.
|
||||
# If degrade list is NOT empty then an assert state is sent to maintenance
|
||||
# For logging and to ease debug the code below will create a list of
|
||||
# degraded resource instances to be included in the message to maintenance
|
||||
# for mtcAgent to optionally log it.
|
||||
resources = ""
|
||||
if obj.degrade_list:
|
||||
# loop over the list,
|
||||
# limit the degraded resource list being sent to mtce to 5
|
||||
for r in obj.degrade_list[0:1:5]:
|
||||
resources += r + ','
|
||||
resources = resources[:-1]
|
||||
state = "assert"
|
||||
else:
|
||||
state = "clear"
|
||||
|
||||
# Message throttling ....
|
||||
|
||||
# Avoid sending the same last state message for up to ONE_EVERY count.
|
||||
# Just reduce load on mtcAgent
|
||||
if obj.last_state == state and obj.msg_throttle < ONE_EVERY:
|
||||
return 0
|
||||
|
||||
# if the degrade state has changed then log it and proceed
|
||||
if obj.last_state != state:
|
||||
if obj.last_state != "undef":
|
||||
collectd.info("%s degrade %s %s" %
|
||||
(PLUGIN,
|
||||
state,
|
||||
obj.degrade_list))
|
||||
|
||||
# Save state for next time
|
||||
obj.last_state = state
|
||||
|
||||
# Clear the message throttle counter
|
||||
obj.msg_throttle = 0
|
||||
|
||||
# Send the degrade state ; assert or clear message to mtcAgent.
|
||||
# If we get a send failure then log it and set the addr to None
|
||||
# so it forces us to refresh the controller address on the next
|
||||
# notification
|
||||
try:
|
||||
mtce_socket = socket.socket(obj.protocol, socket.SOCK_DGRAM)
|
||||
if mtce_socket:
|
||||
if obj.addr is None:
|
||||
_get_active_controller_ip()
|
||||
if obj.addr is None:
|
||||
return 0
|
||||
|
||||
# Create the Maintenance message.
|
||||
message = "{\"service\":\"collectd_notifier\","
|
||||
message += "\"hostname\":\"" + nObject.host + "\","
|
||||
message += "\"degrade\":\"" + state + "\","
|
||||
message += "\"resource\":\"" + resources + "\"}"
|
||||
collectd.debug("%s: %s" % (PLUGIN, message))
|
||||
|
||||
mtce_socket.settimeout(1.0)
|
||||
mtce_socket.sendto(message, (obj.addr, obj.port))
|
||||
mtce_socket.close()
|
||||
else:
|
||||
collectd.error("%s %s failed to open socket (%s)" %
|
||||
(PLUGIN, resource, obj.addr))
|
||||
except socket.error as e:
|
||||
if e.args[0] == socket.EAI_ADDRFAMILY:
|
||||
# Handle IPV4 to IPV6 switchover:
|
||||
obj.protocol = socket.AF_INET6
|
||||
collectd.info("%s %s ipv6 addressing (%s)" %
|
||||
(PLUGIN, resource, obj.addr))
|
||||
else:
|
||||
collectd.error("%s %s socket error (%s) ; %s" %
|
||||
(PLUGIN, resource, obj.addr, str(e)))
|
||||
# try self correction
|
||||
obj.addr = None
|
||||
obj.protocol = socket.AF_INET
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_notification(notifier_func)
|
@ -1,13 +0,0 @@
|
||||
<Plugin "threshold">
|
||||
<Plugin "ntpq">
|
||||
<Type "absolute">
|
||||
Instance "reachable"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMin 1
|
||||
FailureMin 0
|
||||
Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
@ -1,857 +0,0 @@
|
||||
############################################################################
|
||||
# Copyright (c) 2018-2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
#############################################################################
|
||||
#
|
||||
# This is the NTP connectivity monitor plugin for collectd.
|
||||
#
|
||||
# This plugin uses the industry standard ntpq exec to query NTP attributes.
|
||||
#
|
||||
# This plugin executes 'ntpq -np' to determined which provisioned servers
|
||||
# are reachable. The ntpq output includes Tally Code. The tally Code is
|
||||
# represented by the first character in each server's line item.
|
||||
#
|
||||
# The only ntpq output looked at by this plugin are the Tally Codes and
|
||||
# associated IPs.
|
||||
#
|
||||
# Tally Code Summary:
|
||||
#
|
||||
# A server is considered reachable only when the Tally Code is a * or a +.
|
||||
# A server is considered unreachable if the Tally Code is a ' ' (space)
|
||||
# A server with a '*' Tally Code is the 'selected' server.
|
||||
#
|
||||
# Here is an example of the ntpq command output
|
||||
#
|
||||
# remote refid st t when poll reach delay offset jitter
|
||||
# =============================================================================
|
||||
# +192.168.204.104 206.108.0.133 2 u 203 1024 377 0.226 -3.443 1.137
|
||||
# +97.107.129.217 200.98.196.212 2 u 904 1024 377 21.677 5.577 0.624
|
||||
# 192.95.27.155 24.150.203.150 2 u 226 1024 377 15.867 0.381 1.124
|
||||
# -97.107.129.217 200.98.196.212 2 u 904 1024 377 21.677 5.577 0.624
|
||||
# *182.95.27.155 24.150.203.150 2 u 226 1024 377 15.867 0.381 1.124
|
||||
#
|
||||
# The local controller node is not to be considered a reachable server and is
|
||||
# never alarmed if it is not reachable.
|
||||
#
|
||||
# Normal running modes with no alarms include
|
||||
#
|
||||
# 0 - All NTP servers are reachable and one is selected
|
||||
# 1 - No NTP servers are provisioned
|
||||
#
|
||||
# Failure modes that warrant alarms include
|
||||
#
|
||||
# 2 - None of the NTP servers are reachable - major alarm
|
||||
# 3 - Some NTP servers reachable and one is selected - server IP minor alarm
|
||||
# 4 - Some NTP servers reachable but none is selected - major alarm
|
||||
#
|
||||
# None of these failures result in a host being degraded.
|
||||
#
|
||||
# This script will only be run on the controller nodes.
|
||||
#
|
||||
# This script logs to daemon.log with the 'collectd' process label
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import uuid
|
||||
import collectd
|
||||
from fm_api import constants as fm_constants
|
||||
from fm_api import fm_api
|
||||
import tsconfig.tsconfig as tsc
|
||||
import socket
|
||||
|
||||
api = fm_api.FaultAPIsV2()
|
||||
|
||||
PLUGIN = 'NTP query plugin'
|
||||
PLUGIN_INTERVAL = 600 # audit interval in secs
|
||||
PLUGIN_CONF = '/etc/ntp.conf'
|
||||
PLUGIN_EXEC = '/usr/sbin/ntpq'
|
||||
PLUGIN_EXEC_OPTIONS = '-pn'
|
||||
PLUGIN_ALARMID = "100.114"
|
||||
|
||||
|
||||
# define a class here that will persist over read calls
|
||||
class NtpqObject:
|
||||
|
||||
# static variables set in init
|
||||
hostname = '' # the name of this host
|
||||
base_eid = '' # the eid for the major alarm
|
||||
init_complete = False # set to true once config is complete
|
||||
alarm_raised = False # True when the major alarm is asserted
|
||||
|
||||
server_list_conf = [] # list of servers in the /etc/ntp.conf file
|
||||
server_list_ntpq = [] # list of servers in the ntpq -np output
|
||||
unreachable_servers = [] # list of unreachable servers
|
||||
reachable_servers = [] # list of reachable servers
|
||||
selected_server = 'None' # the ip address of the selected server
|
||||
selected_server_save = 'None' # the last selected server ; note change
|
||||
peer_selected = False # true when peer is selected
|
||||
|
||||
# variables used to raise alarms to FM
|
||||
suppression = True
|
||||
service_affecting = False
|
||||
name = "NTP"
|
||||
alarm_type = fm_constants.FM_ALARM_TYPE_1
|
||||
cause = fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN
|
||||
repair = "Monitor and if condition persists, "
|
||||
repair += "contact next level of support."
|
||||
|
||||
|
||||
# This plugin's class object - persists over read calls
|
||||
obj = NtpqObject()
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : _add_unreachable_server
|
||||
#
|
||||
# Description: This private interface is used to add an ip to the
|
||||
# unreachable servers list.
|
||||
#
|
||||
# Parameters : IP address
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def _add_unreachable_server(ip=None):
|
||||
"""Add ip to unreachable_servers list"""
|
||||
|
||||
if ip:
|
||||
if ip not in obj.unreachable_servers:
|
||||
collectd.debug("%s adding '%s' to unreachable servers list: %s" %
|
||||
(PLUGIN, ip, obj.unreachable_servers))
|
||||
|
||||
obj.unreachable_servers.append(ip)
|
||||
|
||||
collectd.info("%s added '%s' to unreachable servers list: %s" %
|
||||
(PLUGIN, ip, obj.unreachable_servers))
|
||||
else:
|
||||
collectd.debug("%s ip '%s' already in unreachable_servers list" %
|
||||
(PLUGIN, ip))
|
||||
else:
|
||||
collectd.error("%s _add_unreachable_server called with no IP" % PLUGIN)
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : _raise_alarm
|
||||
#
|
||||
# Description: This private interface is used to raise NTP alarms.
|
||||
#
|
||||
# Parameters : Optional IP address
|
||||
#
|
||||
# If called with no or empty IP then a generic major alarm is raised.
|
||||
# If called with an IP then an IP specific minor alarm is raised.
|
||||
#
|
||||
# Returns : Error indication.
|
||||
#
|
||||
# True : is error. FM call failed to set the
|
||||
# alarm and needs to be retried.
|
||||
#
|
||||
# False: no error. FM call succeeds
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def _raise_alarm(ip=None):
|
||||
"""Assert an NTP alarm"""
|
||||
|
||||
if not ip:
|
||||
# Don't re-raise the alarm if its already raised
|
||||
if obj.alarm_raised is True:
|
||||
return False
|
||||
|
||||
if obj.peer_selected:
|
||||
reason = "NTP cannot reach external time source; " \
|
||||
"syncing with peer controller only"
|
||||
fm_severity = fm_constants.FM_ALARM_SEVERITY_MINOR
|
||||
else:
|
||||
reason = "NTP configuration does not contain any valid "
|
||||
reason += "or reachable NTP servers."
|
||||
fm_severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
||||
|
||||
eid = obj.base_eid
|
||||
|
||||
else:
|
||||
reason = "NTP address "
|
||||
reason += ip
|
||||
reason += " is not a valid or a reachable NTP server."
|
||||
eid = obj.base_eid + '=' + ip
|
||||
fm_severity = fm_constants.FM_ALARM_SEVERITY_MINOR
|
||||
|
||||
try:
|
||||
fault = fm_api.Fault(
|
||||
alarm_id=PLUGIN_ALARMID,
|
||||
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
||||
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
||||
entity_instance_id=eid,
|
||||
severity=fm_severity,
|
||||
reason_text=reason,
|
||||
alarm_type=obj.alarm_type,
|
||||
probable_cause=obj.cause,
|
||||
proposed_repair_action=obj.repair,
|
||||
service_affecting=obj.service_affecting,
|
||||
suppression=obj.suppression)
|
||||
|
||||
alarm_uuid = api.set_fault(fault)
|
||||
if _is_uuid_like(alarm_uuid) is False:
|
||||
|
||||
# Don't _add_unreachable_server list if the fm call failed.
|
||||
# That way it will be retried at a later time.
|
||||
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
|
||||
(PLUGIN, PLUGIN_ALARMID, eid, alarm_uuid))
|
||||
return 0
|
||||
else:
|
||||
collectd.info("%s raised alarm %s:%s" %
|
||||
(PLUGIN,
|
||||
PLUGIN_ALARMID,
|
||||
eid))
|
||||
if ip:
|
||||
_add_unreachable_server(ip)
|
||||
else:
|
||||
obj.alarm_raised = True
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'set_fault' exception ; %s:%s:%s ; %s" %
|
||||
(PLUGIN,
|
||||
PLUGIN_ALARMID,
|
||||
eid,
|
||||
fm_severity,
|
||||
ex))
|
||||
return 0
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : _clear_base_alarm
|
||||
#
|
||||
# Description: This private interface is used to clear the NTP base alarm.
|
||||
#
|
||||
# Parameters : None
|
||||
#
|
||||
# Returns : Error indication.
|
||||
#
|
||||
# False: is error. FM call failed to clear the
|
||||
# alarm and needs to be retried.
|
||||
#
|
||||
# True : no error. FM call succeeds
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def _clear_base_alarm():
|
||||
"""Clear the NTP base alarm"""
|
||||
|
||||
try:
|
||||
if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is False:
|
||||
collectd.info("%s %s:%s alarm already cleared" %
|
||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
|
||||
else:
|
||||
collectd.info("%s %s:%s alarm cleared" %
|
||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
|
||||
obj.alarm_raised = False
|
||||
return True
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
|
||||
(PLUGIN,
|
||||
PLUGIN_ALARMID,
|
||||
obj.base_eid,
|
||||
ex))
|
||||
return False
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : _remove_ip_from_unreachable_list
|
||||
#
|
||||
# Description: This private interface is used to remove the specified IP
|
||||
# from the unreachable servers list and clear its alarm if raised.
|
||||
#
|
||||
# Parameters : IP address
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def _remove_ip_from_unreachable_list(ip):
|
||||
"""Remove an IP address from the unreachable list and clear its NTP alarms"""
|
||||
|
||||
# remove from unreachable list if its there
|
||||
if ip and ip in obj.unreachable_servers:
|
||||
|
||||
eid = obj.base_eid + '=' + ip
|
||||
collectd.debug("%s trying to clear alarm %s" % (PLUGIN, eid))
|
||||
|
||||
try:
|
||||
# clear the alarm if its asserted
|
||||
if api.clear_fault(PLUGIN_ALARMID, eid) is True:
|
||||
collectd.info("%s %s:%s alarm cleared " %
|
||||
(PLUGIN, PLUGIN_ALARMID, eid))
|
||||
else:
|
||||
# alarm does not exist
|
||||
collectd.info("%s %s:%s alarm clear" %
|
||||
(PLUGIN, PLUGIN_ALARMID, eid))
|
||||
|
||||
obj.unreachable_servers.remove(ip)
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
|
||||
(PLUGIN,
|
||||
PLUGIN_ALARMID,
|
||||
eid,
|
||||
ex))
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : _add_ip_to_ntpq_server_list
|
||||
#
|
||||
# Description: This private interface is used to create a list if servers
|
||||
# found in the ntpq output.
|
||||
#
|
||||
# This list is used to detect and handle servers that might come
|
||||
# and go between readings that might otherwise result in stuck
|
||||
# alarms.
|
||||
#
|
||||
# Parameters : IP address
|
||||
#
|
||||
# Returns : nothing
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def _add_ip_to_ntpq_server_list(ip):
|
||||
"""Add this IP to the list of servers that ntpq reports against"""
|
||||
|
||||
if ip not in obj.server_list_ntpq:
|
||||
obj.server_list_ntpq.append(ip)
|
||||
|
||||
|
||||
##############################################################################
|
||||
#
|
||||
# Name : _cleanup_stale_servers
|
||||
#
|
||||
# Description: This private interface walks through each server tracking list
|
||||
# removing any that it finds that are not in the ntpq server list.
|
||||
#
|
||||
# Alarms are cleared as needed to avoid stale alarms
|
||||
#
|
||||
# Parameters : None
|
||||
#
|
||||
# Returns : nothing
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def _cleanup_stale_servers():
|
||||
"""Cleanup the server IP tracking lists"""
|
||||
|
||||
collectd.debug("%s CLEANUP REACHABLE: %s %s" %
|
||||
(PLUGIN, obj.server_list_ntpq, obj.reachable_servers))
|
||||
for ip in obj.reachable_servers:
|
||||
if ip not in obj.server_list_ntpq:
|
||||
collectd.info("%s removing missing '%s' server from reachable "
|
||||
"server list" % (PLUGIN, ip))
|
||||
obj.reachable_servers.remove(ip)
|
||||
|
||||
collectd.debug("%s CLEANUP UNREACHABLE: %s %s" %
|
||||
(PLUGIN, obj.server_list_ntpq, obj.unreachable_servers))
|
||||
for ip in obj.unreachable_servers:
|
||||
if ip not in obj.server_list_ntpq:
|
||||
collectd.info("%s removing missing '%s' server from unreachable "
|
||||
"server list" % (PLUGIN, ip))
|
||||
_remove_ip_from_unreachable_list(ip)
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : _get_ntp_servers
|
||||
#
|
||||
# Description: This private interface reads the list of ntp servers from the
|
||||
# ntp.conf file
|
||||
#
|
||||
# Parameters : None
|
||||
#
|
||||
# Returns : nothing
|
||||
#
|
||||
# Updates : server_list_conf
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def _get_ntp_servers():
|
||||
"""Read the provisioned servers from the ntp conf file"""
|
||||
|
||||
with open(PLUGIN_CONF, 'r') as infile:
|
||||
for line in infile:
|
||||
if line.startswith('server '):
|
||||
ip = line.rstrip().split(' ')[1]
|
||||
if ip not in obj.server_list_conf:
|
||||
obj.server_list_conf.append(ip)
|
||||
if len(obj.server_list_conf):
|
||||
collectd.info("%s server list: %s" %
|
||||
(PLUGIN, obj.server_list_conf))
|
||||
else:
|
||||
##################################################################
|
||||
#
|
||||
# Handle NTP_NOT_PROVISIONED (1) case
|
||||
#
|
||||
# There is no alarming for this case.
|
||||
# Clear any that may have been raised.
|
||||
#
|
||||
##################################################################
|
||||
collectd.info("%s NTP Service Disabled ; no provisioned servers" %
|
||||
PLUGIN)
|
||||
|
||||
# clear all alarms
|
||||
if obj.alarm_raised:
|
||||
_clear_base_alarm()
|
||||
|
||||
if obj.unreachable_servers:
|
||||
for ip in obj.unreachable_servers:
|
||||
_remove_ip_from_unreachable_list(ip)
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : is_controller
|
||||
#
|
||||
# Description: This private interface returns a True if the specified ip is
|
||||
# associated with a local controller.
|
||||
#
|
||||
# Parameters : IP address
|
||||
#
|
||||
# Returns : True or False
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def _is_controller(ip):
|
||||
"""Returns True if this IP corresponds to one of the controllers"""
|
||||
|
||||
collectd.debug("%s check if '%s' is a controller ip" % (PLUGIN, ip))
|
||||
with open('/etc/hosts', 'r') as infile:
|
||||
for line in infile:
|
||||
# skip over file comment lines prefixed with '#'
|
||||
if line[0] == '#':
|
||||
continue
|
||||
# line format is 'ip' 'name' ....
|
||||
split_line = line.split()
|
||||
if len(split_line) >= 2:
|
||||
# look for exact match ip that contains controller in its name
|
||||
if split_line[0] == ip and 'controller' in line:
|
||||
collectd.debug("%s %s is a controller" % (PLUGIN, ip))
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : _is_ip_address
|
||||
#
|
||||
# Description: This private interface returns:
|
||||
# AF_INET if val is ipv4
|
||||
# AF_INET6 if val is ipv6
|
||||
# False if val is not a valid ip address
|
||||
#
|
||||
# Parameters : val is a uuid string
|
||||
#
|
||||
# Returns : socket.AF_INET for ipv4, socket.AF_INET6 for ipv6
|
||||
# or False for invalid
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def _is_ip_address(val):
|
||||
try:
|
||||
socket.inet_pton(socket.AF_INET, val)
|
||||
return socket.AF_INET
|
||||
except socket.error:
|
||||
pass
|
||||
|
||||
try:
|
||||
socket.inet_pton(socket.AF_INET6, val)
|
||||
return socket.AF_INET6
|
||||
except socket.error:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : is_uuid_like
|
||||
#
|
||||
# Description: This private interface returns a True if the specified value is
|
||||
# a valid uuid.
|
||||
#
|
||||
# Parameters : val is a uuid string
|
||||
#
|
||||
# Returns : True or False
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def _is_uuid_like(val):
|
||||
"""Returns validation of a value as a UUID"""
|
||||
try:
|
||||
return str(uuid.UUID(val)) == val
|
||||
except (TypeError, ValueError, AttributeError):
|
||||
return False
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : config_func
|
||||
#
|
||||
# Description: The configuration interface this plugin publishes to collectd.
|
||||
#
|
||||
# collectd calls this interface one time on its process startup
|
||||
# when it loads this plugin.
|
||||
#
|
||||
# There is currently no specific configuration options to parse
|
||||
# for this plugin.
|
||||
#
|
||||
# Parameters : collectd config object
|
||||
#
|
||||
# Returns : zero
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def config_func(config):
|
||||
"""Configure the plugin"""
|
||||
|
||||
collectd.debug('%s config function' % PLUGIN)
|
||||
return 0
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : init_func
|
||||
#
|
||||
# Description: The initialization interface this plugin publishes to collectd.
|
||||
#
|
||||
# collectd calls this interface one time on its process startup
|
||||
# when it loads this plugin.
|
||||
#
|
||||
# 1. get hostname
|
||||
# 2. build base entity id for the NTP alarm
|
||||
# 3. query FM for existing NTP alarms
|
||||
# - base alarm is maintained and state loaded if it exists
|
||||
# - ntp ip minor alalrms are cleared on init. This is done to
|
||||
# auto correct ntp server IP address changes over process
|
||||
# restart ; avoid stuck alarms.
|
||||
#
|
||||
# Parameters : None
|
||||
#
|
||||
# Returns : zero
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def init_func():
|
||||
|
||||
# ntp query is for controllers only
|
||||
if tsc.nodetype != 'controller':
|
||||
return 0
|
||||
|
||||
# do nothing till config is complete.
|
||||
# init_func will be called again by read_func once config is complete.
|
||||
if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is False:
|
||||
return 0
|
||||
|
||||
# get current hostname
|
||||
obj.hostname = os.uname()[1]
|
||||
if not obj.hostname:
|
||||
collectd.error("%s failed to get hostname" % PLUGIN)
|
||||
return 1
|
||||
|
||||
obj.base_eid = 'host=' + obj.hostname + '.ntp'
|
||||
collectd.debug("%s on %s with entity id '%s'" %
|
||||
(PLUGIN, obj.hostname, obj.base_eid))
|
||||
|
||||
# get a list of provisioned ntp servers
|
||||
_get_ntp_servers()
|
||||
|
||||
# manage existing alarms.
|
||||
try:
|
||||
alarms = api.get_faults_by_id(PLUGIN_ALARMID)
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'get_faults_by_id' exception ; %s ; %s" %
|
||||
(PLUGIN, PLUGIN_ALARMID, ex))
|
||||
return 0
|
||||
|
||||
if alarms:
|
||||
for alarm in alarms:
|
||||
eid = alarm.entity_instance_id
|
||||
# ignore alarms not for this host
|
||||
if obj.hostname not in eid:
|
||||
continue
|
||||
|
||||
# maintain only the base alarm.
|
||||
if alarm.entity_instance_id != obj.base_eid:
|
||||
# clear any ntp server specific alarms over process restart
|
||||
# this is done to avoid the potential for stuck ntp ip alarms
|
||||
collectd.info("%s clearing found startup alarm '%s'" %
|
||||
(PLUGIN, alarm.entity_instance_id))
|
||||
try:
|
||||
api.clear_fault(PLUGIN_ALARMID, alarm.entity_instance_id)
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
|
||||
(PLUGIN,
|
||||
PLUGIN_ALARMID,
|
||||
alarm.entity_instance_id,
|
||||
ex))
|
||||
return 0
|
||||
|
||||
else:
|
||||
obj.alarm_raised = True
|
||||
collectd.info("%s found alarm %s:%s" %
|
||||
(PLUGIN,
|
||||
PLUGIN_ALARMID,
|
||||
alarm.entity_instance_id))
|
||||
|
||||
# ensure the base alarm is cleared if there are no
|
||||
# provisioned servers.
|
||||
if not obj.server_list_conf:
|
||||
_clear_base_alarm()
|
||||
|
||||
else:
|
||||
collectd.info("%s no major startup alarms found" % PLUGIN)
|
||||
|
||||
obj.init_complete = True
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
###############################################################################
|
||||
#
|
||||
# Name : read_func
|
||||
#
|
||||
# Description: The sample read interface this plugin publishes to collectd.
|
||||
#
|
||||
# collectd calls this interface every audit interval.
|
||||
#
|
||||
# Runs ntpq -np to query NTP status and manages alarms based on
|
||||
# the result.
|
||||
#
|
||||
# See file header (above) for more specific behavioral detail.
|
||||
#
|
||||
# Should only run on a controller ; both
|
||||
#
|
||||
# Parameters : None
|
||||
#
|
||||
# Returns : zero or non-zero on significant error
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
def read_func():
|
||||
|
||||
# ntp query is for controllers only
|
||||
if tsc.nodetype != 'controller':
|
||||
return 0
|
||||
|
||||
if obj.init_complete is False:
|
||||
if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is True:
|
||||
collectd.info("%s re-running init" % PLUGIN)
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
# get a list if provisioned ntp servers
|
||||
_get_ntp_servers()
|
||||
|
||||
# nothing to do while there are no provisioned NTP servers
|
||||
if len(obj.server_list_conf) == 0:
|
||||
return 0
|
||||
|
||||
# Do NTP Query
|
||||
data = subprocess.check_output([PLUGIN_EXEC, PLUGIN_EXEC_OPTIONS])
|
||||
|
||||
# Keep this FIT test code but make it commented out for security
|
||||
#
|
||||
# if os.path.exists('/var/run/fit/ntpq_data'):
|
||||
# data = ''
|
||||
# collectd.info("%s using ntpq FIT data" % PLUGIN)
|
||||
# with open('/var/run/fit/ntpq_data', 'r') as infile:
|
||||
# for line in infile:
|
||||
# data += line
|
||||
|
||||
if not data:
|
||||
collectd.error("%s no data from query" % PLUGIN)
|
||||
return 0
|
||||
|
||||
# Get the ntp query output into a list of lines
|
||||
obj.ntpq = data.split('\n')
|
||||
|
||||
# keep track of changes ; only log on changes
|
||||
reachable_list_changed = False
|
||||
unreachable_list_changed = False
|
||||
|
||||
# Manage the selected server name
|
||||
#
|
||||
# save the old value so we can print a log if the selected server changes
|
||||
if obj.selected_server:
|
||||
obj.selected_server_save = obj.selected_server
|
||||
# always assume no selected server ; till its learned
|
||||
obj.selected_server = ''
|
||||
|
||||
# start with a fresh empty list for this new run to populate
|
||||
obj.server_list_ntpq = []
|
||||
|
||||
# Loop through the ntpq output.
|
||||
# Ignore the first 2 lines ; just header data.
|
||||
for i in range(2, len(obj.ntpq)):
|
||||
|
||||
# ignore empty or lines that are not long enough
|
||||
if len(obj.ntpq[i]) < 10:
|
||||
continue
|
||||
|
||||
# log the ntpq output ; minus the 2 lines of header
|
||||
collectd.info("NTPQ: %s" % obj.ntpq[i])
|
||||
|
||||
# Unreachable servers are ones whose line start with a space
|
||||
ip = ''
|
||||
if obj.ntpq[i][0] == ' ':
|
||||
# get the ip address
|
||||
# example format of line:['', '132.163.4.102', '', '', '.INIT.',
|
||||
# get ip from index [1] of the list
|
||||
unreachable = obj.ntpq[i].split(' ')[1]
|
||||
if unreachable:
|
||||
# check to see if its a controller ip
|
||||
# we skip over controller ips
|
||||
if _is_controller(unreachable) is False:
|
||||
_add_ip_to_ntpq_server_list(unreachable)
|
||||
if unreachable not in obj.unreachable_servers:
|
||||
if _raise_alarm(unreachable) is False:
|
||||
unreachable_list_changed = True
|
||||
# if the FM call to raise the alarm worked then
|
||||
# add this ip to the unreachable list if its not
|
||||
# already in it
|
||||
_add_unreachable_server(unreachable)
|
||||
|
||||
# Reachable servers are ones whose line start with a '+'
|
||||
elif obj.ntpq[i][0] == '+':
|
||||
# remove the '+' and get the ip
|
||||
ip = obj.ntpq[i].split(' ')[0][1:]
|
||||
|
||||
elif obj.ntpq[i][0] == '*':
|
||||
# remove the '*' and get the ip
|
||||
cols = obj.ntpq[i].split(' ')
|
||||
ip = cols[0][1:]
|
||||
if ip:
|
||||
ip_family = _is_ip_address(ip)
|
||||
obj.peer_selected = _is_controller(ip)
|
||||
if ip != obj.selected_server and obj.alarm_raised is True:
|
||||
# a new ntp server is selected, old alarm may not be
|
||||
# valid
|
||||
_clear_base_alarm()
|
||||
obj.alarm_raised = False
|
||||
if obj.peer_selected is False:
|
||||
if obj.selected_server:
|
||||
# done update the selected server if more selections
|
||||
# are found. go with the first one found.
|
||||
collectd.info("%s additional selected server found"
|
||||
" '%s'; current selection is '%s'" %
|
||||
(PLUGIN, ip, obj.selected_server))
|
||||
else:
|
||||
# update the selected server list
|
||||
obj.selected_server = ip
|
||||
collectd.debug("%s selected server is '%s'" %
|
||||
(PLUGIN, obj.selected_server))
|
||||
else:
|
||||
# refer to peer
|
||||
refid = ''
|
||||
for i in range(1, len(cols)):
|
||||
if cols[i] != '':
|
||||
refid = cols[i]
|
||||
break
|
||||
|
||||
if refid not in ('', '127.0.0.1') and \
|
||||
not _is_controller(refid) and \
|
||||
socket.AF_INET == ip_family:
|
||||
# ipv4, peer controller refer to a time source is not
|
||||
# itself or a controller (this node)
|
||||
obj.selected_server = ip
|
||||
collectd.debug("peer controller has a reliable "
|
||||
"source")
|
||||
|
||||
# anything else is unreachable
|
||||
else:
|
||||
unreachable = obj.ntpq[i][1:].split(' ')[0]
|
||||
if _is_controller(unreachable) is False:
|
||||
_add_ip_to_ntpq_server_list(unreachable)
|
||||
if unreachable not in obj.unreachable_servers:
|
||||
if _raise_alarm(unreachable) is False:
|
||||
unreachable_list_changed = True
|
||||
# if the FM call to raise the alarm worked then
|
||||
# add this ip to the unreachable list if its not
|
||||
# already in it
|
||||
_add_unreachable_server(unreachable)
|
||||
|
||||
if ip:
|
||||
# if the ip is valid then manage it
|
||||
if _is_controller(ip) is False:
|
||||
_add_ip_to_ntpq_server_list(ip)
|
||||
# add the ip to the reachable servers list
|
||||
# if its not already there
|
||||
if ip not in obj.reachable_servers:
|
||||
obj.reachable_servers.append(ip)
|
||||
reachable_list_changed = True
|
||||
# make sure this IP is no longer in the unreachable
|
||||
# list and that alarms for it are cleared
|
||||
_remove_ip_from_unreachable_list(ip)
|
||||
|
||||
_cleanup_stale_servers()
|
||||
|
||||
if obj.selected_server:
|
||||
if obj.selected_server != obj.selected_server_save:
|
||||
collectd.info("%s selected server changed from '%s' to '%s'" %
|
||||
(PLUGIN,
|
||||
obj.selected_server_save,
|
||||
obj.selected_server))
|
||||
obj.selected_server_save = obj.selected_server
|
||||
if obj.alarm_raised is True:
|
||||
_clear_base_alarm()
|
||||
|
||||
elif obj.alarm_raised is False:
|
||||
if obj.peer_selected:
|
||||
collectd.info("%s peer is selected" % PLUGIN)
|
||||
else:
|
||||
collectd.error("%s no selected server" % PLUGIN)
|
||||
if _raise_alarm() is False:
|
||||
obj.selected_server_save = 'None'
|
||||
|
||||
# only log and act on changes
|
||||
if reachable_list_changed is True:
|
||||
if obj.reachable_servers:
|
||||
collectd.info("%s reachable servers: %s" %
|
||||
(PLUGIN, obj.reachable_servers))
|
||||
if obj.alarm_raised is True:
|
||||
if obj.selected_server and obj.reachable_servers:
|
||||
_clear_base_alarm()
|
||||
else:
|
||||
collectd.error("%s no reachable servers" % PLUGIN)
|
||||
_raise_alarm()
|
||||
|
||||
# only log changes
|
||||
if unreachable_list_changed is True:
|
||||
if obj.unreachable_servers:
|
||||
collectd.info("%s unreachable servers: %s" %
|
||||
(PLUGIN, obj.unreachable_servers))
|
||||
else:
|
||||
collectd.info("%s all servers are reachable" % PLUGIN)
|
||||
|
||||
# The sample published to the database is simply the number
|
||||
# of reachable servers if one is selected
|
||||
if not obj.selected_server:
|
||||
sample = 0
|
||||
else:
|
||||
sample = len(obj.reachable_servers)
|
||||
|
||||
# Dispatch usage value to collectd
|
||||
val = collectd.Values(host=obj.hostname)
|
||||
val.plugin = 'ntpq'
|
||||
val.type = 'absolute'
|
||||
val.type_instance = 'reachable'
|
||||
val.dispatch(values=[sample])
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
# register the config, init and read functions
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func, interval=PLUGIN_INTERVAL)
|
@ -1,311 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
############################################################################
|
||||
#
|
||||
# This file contains common collectd plugin constructs and utilities
|
||||
#
|
||||
############################################################################
|
||||
|
||||
import collectd
|
||||
import json
|
||||
import uuid
|
||||
import httplib2
|
||||
import socket
|
||||
import os
|
||||
from oslo_concurrency import processutils
|
||||
from fm_api import constants as fm_constants
|
||||
import tsconfig.tsconfig as tsc
|
||||
|
||||
# http request constants
|
||||
PLUGIN_TIMEOUT = 10
|
||||
PLUGIN_HTTP_HEADERS = {'Accept': 'application/json', 'Connection': 'close'}
|
||||
|
||||
MIN_AUDITS_B4_FIRST_QUERY = 2
|
||||
|
||||
|
||||
class PluginObject(object):
|
||||
|
||||
def __init__(self, plugin, url):
|
||||
|
||||
# static variables set in init_func
|
||||
self.plugin = plugin # the name of this plugin
|
||||
self.hostname = '' # the name of this host
|
||||
self.port = 0 # the port number for this plugin
|
||||
self.base_eid = '' # the base entity id host=<hostname>
|
||||
self.controller = False # set true if node is controller
|
||||
|
||||
# dynamic gate variables
|
||||
self.virtual = False # set to True if host is virtual
|
||||
self.config_complete = False # set to True once config is complete
|
||||
self.config_done = False # set true if config_func completed ok
|
||||
self.init_done = False # set true if init_func completed ok
|
||||
self.fm_connectivity = False # set true when fm connectivity ok
|
||||
|
||||
self.alarm_type = fm_constants.FM_ALARM_TYPE_7 # OPERATIONAL
|
||||
self.cause = fm_constants.ALARM_PROBABLE_CAUSE_50 # THRESHOLD CROSS
|
||||
self.suppression = True
|
||||
self.service_affecting = False
|
||||
|
||||
# dynamic variables set in read_func
|
||||
self.usage = float(0) # last usage value recorded as float
|
||||
self.value = float(0) # last read value
|
||||
self.audits = 0 # number of audit since init
|
||||
self.enabled = False # tracks a plugin's enabled state
|
||||
self.alarmed = False # tracks the current alarmed state
|
||||
self.mode = '' # mode specific to plugin
|
||||
|
||||
# http and json specific variables
|
||||
self.url = url # target url
|
||||
self.jresp = None # used to store the json response
|
||||
self.resp = ''
|
||||
|
||||
self.objects = [] # list of plugin specific objects
|
||||
self.cmd = '' # plugin specific command string
|
||||
|
||||
# Log controls
|
||||
self.config_logged = False # used to log once the plugin config
|
||||
self.error_logged = False # used to prevent log flooding
|
||||
self.log_throttle_count = 0 # used to count throttle logs
|
||||
self.INIT_LOG_THROTTLE = 10 # the init log throttle threshold
|
||||
self.http_retry_count = 0 # track http error cases
|
||||
self.HTTP_RETRY_THROTTLE = 6 # http retry threshold
|
||||
self.phase = 0 # tracks current phase; init, sampling
|
||||
|
||||
collectd.debug("%s Common PluginObject constructor [%s]" %
|
||||
(plugin, url))
|
||||
|
||||
###########################################################################
|
||||
#
|
||||
# Name : init_ready
|
||||
#
|
||||
# Description: Test for init ready condition
|
||||
#
|
||||
# Parameters : plugin name
|
||||
#
|
||||
# Returns : False if initial config complete is not done
|
||||
# True if initial config complete is done
|
||||
#
|
||||
###########################################################################
|
||||
|
||||
def init_ready(self):
|
||||
"""Test for system init ready state"""
|
||||
|
||||
if os.path.exists(tsc.INITIAL_CONFIG_COMPLETE_FLAG) is False:
|
||||
self.log_throttle_count += 1
|
||||
if self.log_throttle_count > self.INIT_LOG_THROTTLE:
|
||||
collectd.info("%s initialization needs retry" % self.plugin)
|
||||
self.log_throttle_count = 0
|
||||
return False
|
||||
else:
|
||||
self.log_throttle_count = 0
|
||||
|
||||
return True
|
||||
|
||||
###########################################################################
|
||||
#
|
||||
# Name : gethostname
|
||||
#
|
||||
# Description: load the hostname
|
||||
#
|
||||
# Parameters : plugin name
|
||||
#
|
||||
# Returns : Success - hostname
|
||||
# Failure - None
|
||||
#
|
||||
# Updates : obj.hostname
|
||||
#
|
||||
###########################################################################
|
||||
def gethostname(self):
|
||||
"""Fetch the hostname"""
|
||||
|
||||
# get current hostname
|
||||
try:
|
||||
hostname = socket.gethostname()
|
||||
if hostname:
|
||||
return hostname
|
||||
except:
|
||||
collectd.error("%s failed to get hostname" % self.plugin)
|
||||
|
||||
return None
|
||||
|
||||
###########################################################################
|
||||
#
|
||||
# Name : is_virtual
|
||||
#
|
||||
# Description: Execute facter command with output filter on 'is_virtual'
|
||||
#
|
||||
# Parameters : None
|
||||
#
|
||||
# Returns : True if current host is virtual.
|
||||
# False if current host is NOT virtual
|
||||
#
|
||||
###########################################################################
|
||||
def is_virtual(self):
|
||||
"""Check for virtual host"""
|
||||
|
||||
try:
|
||||
cmd = '/usr/bin/facter is_virtual'
|
||||
res, err = processutils.execute(cmd, shell=True)
|
||||
if err:
|
||||
return False
|
||||
elif res:
|
||||
# remove the trailing '\n' with strip()
|
||||
if res.strip() == 'true':
|
||||
collectd.info("%s %s is virtual" %
|
||||
(self.plugin, self.hostname))
|
||||
return True
|
||||
|
||||
except Exception as ex:
|
||||
collectd.info("%s failed to execute '/usr/bin/facter' ; %s" %
|
||||
self.plugin, ex)
|
||||
|
||||
return False
|
||||
|
||||
###########################################################################
|
||||
#
|
||||
# Name : check_for_fit
|
||||
#
|
||||
# Description: load FIT data if it is present
|
||||
#
|
||||
# Fit Format : unit data -> 0 89
|
||||
# - instance 0 value 89
|
||||
#
|
||||
# Parameters : plugin name
|
||||
# object to update with fit
|
||||
# name in fit file
|
||||
# unit
|
||||
#
|
||||
# Returns : Did a failure occur ?
|
||||
# False = no
|
||||
# True = yes
|
||||
#
|
||||
# Updates : self.usage with FIT value if FIT conditions are present
|
||||
# and apply
|
||||
#
|
||||
###########################################################################
|
||||
def check_for_fit(self, name, unit):
|
||||
"""Load FIT data into usage if it exists"""
|
||||
|
||||
fit_file = '/var/run/fit/' + name + '_data'
|
||||
|
||||
if os.path.exists(fit_file):
|
||||
valid = False
|
||||
with open(fit_file, 'r') as infile:
|
||||
for line in infile:
|
||||
try:
|
||||
inst, val = line.split(' ')
|
||||
if int(unit) == int(inst):
|
||||
self.usage = float(val)
|
||||
valid = True
|
||||
|
||||
except:
|
||||
try:
|
||||
val = float(line)
|
||||
self.usage = float(val)
|
||||
valid = True
|
||||
|
||||
except:
|
||||
collectd.error("%s bad FIT data; ignoring" %
|
||||
self.plugin)
|
||||
|
||||
if valid is True:
|
||||
collectd.info("%s %.2f usage (unit %d) (FIT)" %
|
||||
(self.plugin, unit, self.usage))
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
###########################################################################
|
||||
#
|
||||
# Name : make_http_request
|
||||
#
|
||||
# Description: Issue an http request to the specified URL.
|
||||
# Load and return the response
|
||||
# Handling execution errors
|
||||
#
|
||||
# Parameters : self as current context.
|
||||
#
|
||||
# Optional:
|
||||
#
|
||||
# url - override the default self url with http address to
|
||||
# issue the get request to.
|
||||
# to - timeout override
|
||||
# hdrs - override use of the default header list
|
||||
#
|
||||
# Updates : self.jresp with the json string response from the request.
|
||||
#
|
||||
# Returns : Error indication (True/False)
|
||||
# True on success
|
||||
# False on error
|
||||
#
|
||||
###########################################################################
|
||||
def make_http_request(self, url=None, to=None, hdrs=None):
|
||||
"""Make a blocking HTTP Request and return result"""
|
||||
|
||||
try:
|
||||
|
||||
# handle timeout override
|
||||
if to is None:
|
||||
to = PLUGIN_TIMEOUT
|
||||
|
||||
# handle url override
|
||||
if url is None:
|
||||
url = self.url
|
||||
|
||||
# handle header override
|
||||
if hdrs is None:
|
||||
hdrs = PLUGIN_HTTP_HEADERS
|
||||
|
||||
http = httplib2.Http(timeout=to)
|
||||
resp = http.request(url, headers=hdrs)
|
||||
|
||||
except Exception as ex:
|
||||
collectd.info("%s http request exception ; %s" %
|
||||
(self.plugin, str(ex)))
|
||||
return False
|
||||
|
||||
try:
|
||||
collectd.debug("%s Resp: %s" %
|
||||
(self.plugin, resp[1]))
|
||||
|
||||
self.resp = resp[1]
|
||||
self.jresp = json.loads(resp[1])
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s http response parse exception ; %s" %
|
||||
(self.plugin, str(ex)))
|
||||
if len(self.resp):
|
||||
collectd.error("%s response: %s" %
|
||||
(self.plugin, self.resp))
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_uuid_like(val):
|
||||
"""Returns validation of a value as a UUID
|
||||
|
||||
For our purposes, a UUID is a canonical form string:
|
||||
aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa
|
||||
"""
|
||||
try:
|
||||
return str(uuid.UUID(val)) == val
|
||||
except (TypeError, ValueError, AttributeError):
|
||||
return False
|
||||
|
||||
|
||||
def get_severity_str(severity):
|
||||
"""get string that represents the specified severity"""
|
||||
|
||||
if severity == fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
||||
return "clear"
|
||||
elif severity == fm_constants.FM_ALARM_SEVERITY_CRITICAL:
|
||||
return "critical"
|
||||
elif severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
||||
return "major"
|
||||
elif severity == fm_constants.FM_ALARM_SEVERITY_MINOR:
|
||||
return "minor"
|
||||
else:
|
||||
return "unknown"
|
@ -1,15 +0,0 @@
|
||||
<Plugin "threshold">
|
||||
<Plugin "ptp">
|
||||
<Type "time_offset">
|
||||
Instance "nsec"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMax 1000
|
||||
FailureMax 1000000
|
||||
WarningMin -1000
|
||||
FailureMin -1000000
|
||||
Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
@ -1,988 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
############################################################################
|
||||
#
|
||||
# This file is the collectd 'Precision Time Protocol' Service Monitor.
|
||||
#
|
||||
# Algorithm:
|
||||
#
|
||||
# while not config ; check again
|
||||
# while not init ; retry
|
||||
# if startup
|
||||
# clear all ptp alarms
|
||||
# if ptp enabled
|
||||
# if ptp not running
|
||||
# raise 'process' alarm
|
||||
# else
|
||||
# read grand master and current skew
|
||||
# if not controller and is grand master
|
||||
# raise 'no lock' alarm
|
||||
# if skew is out-of-tolerance
|
||||
# raise out-of-tolerance alarm
|
||||
#
|
||||
#
|
||||
# manage alarm state throught
|
||||
# retry on alarm state change failures
|
||||
# only make raise/clear alarm calls on severity state changes
|
||||
#
|
||||
############################################################################
|
||||
import os
|
||||
import collectd
|
||||
import subprocess
|
||||
import tsconfig.tsconfig as tsc
|
||||
import plugin_common as pc
|
||||
from fm_api import constants as fm_constants
|
||||
from fm_api import fm_api
|
||||
|
||||
debug = False
|
||||
|
||||
# Fault manager API Object
|
||||
api = fm_api.FaultAPIsV2()
|
||||
|
||||
PLUGIN_ALARMID = "100.119"
|
||||
|
||||
# name of the plugin - all logs produced by this plugin are prefixed with this
|
||||
PLUGIN = 'ptp plugin'
|
||||
|
||||
# Service name
|
||||
PTP = 'Precision Time Protocol (PTP)'
|
||||
|
||||
# Interface Monitoring Interval in seconds
|
||||
PLUGIN_AUDIT_INTERVAL = 300
|
||||
|
||||
# Sample Data 'type' and 'instance' database field values.
|
||||
PLUGIN_TYPE = 'time_offset'
|
||||
PLUGIN_TYPE_INSTANCE = 'nsec'
|
||||
|
||||
# Primary PTP service name
|
||||
PLUGIN_SERVICE = 'ptp4l.service'
|
||||
|
||||
# Plugin configuration file
|
||||
#
|
||||
# This plugin looks for the timestamping mode in the ptp4l config file.
|
||||
# time_stamping hardware
|
||||
#
|
||||
PLUGIN_CONF_FILE = '/etc/ptp4l.conf'
|
||||
PLUGIN_CONF_TIMESTAMPING = 'time_stamping'
|
||||
|
||||
# Tools used by plugin
|
||||
SYSTEMCTL = '/usr/bin/systemctl'
|
||||
ETHTOOL = '/usr/sbin/ethtool'
|
||||
PLUGIN_STATUS_QUERY_EXEC = '/usr/sbin/pmc'
|
||||
|
||||
# Query PTP service administrative (enabled/disabled) state
|
||||
#
|
||||
# > systemctl is-enabled ptp4l
|
||||
# enabled
|
||||
# > systemctl disable ptp4l
|
||||
# > systemctl is-enabled ptp4l
|
||||
# disabled
|
||||
|
||||
SYSTEMCTL_IS_ENABLED_OPTION = 'is-enabled'
|
||||
SYSTEMCTL_IS_ENABLED_RESPONSE = 'enabled'
|
||||
SYSTEMCTL_IS_DISABLED_RESPONSE = 'disabled'
|
||||
|
||||
# Query PTP service activity (active=running / inactive) state
|
||||
#
|
||||
# > systemctl is-active ptp4l
|
||||
# active
|
||||
# > systemctl stop ptp4l
|
||||
# > systemctl is-active ptp4l
|
||||
# inactive
|
||||
|
||||
SYSTEMCTL_IS_ACTIVE_OPTION = 'is-active'
|
||||
SYSTEMCTL_IS_ACTIVE_RESPONSE = 'active'
|
||||
SYSTEMCTL_IS_INACTIVE_RESPONSE = 'inactive'
|
||||
|
||||
# Alarm Cause codes ; used to specify what alarm EID to assert or clear.
|
||||
ALARM_CAUSE__NONE = 0
|
||||
ALARM_CAUSE__PROCESS = 1
|
||||
ALARM_CAUSE__OOT = 2
|
||||
ALARM_CAUSE__NO_LOCK = 3
|
||||
ALARM_CAUSE__UNSUPPORTED_HW = 4
|
||||
ALARM_CAUSE__UNSUPPORTED_SW = 5
|
||||
ALARM_CAUSE__UNSUPPORTED_LEGACY = 6
|
||||
|
||||
# Run Phase
|
||||
RUN_PHASE__INIT = 0
|
||||
RUN_PHASE__DISABLED = 1
|
||||
RUN_PHASE__NOT_RUNNING = 2
|
||||
RUN_PHASE__SAMPLING = 3
|
||||
|
||||
# Clock Sync Out-Of-Tolerance thresholds
|
||||
OOT_MINOR_THRESHOLD = int(1000)
|
||||
OOT_MAJOR_THRESHOLD = int(1000000)
|
||||
|
||||
# Instantiate the common plugin control object
|
||||
obj = pc.PluginObject(PLUGIN, "")
|
||||
|
||||
|
||||
# Create an alarm management class
|
||||
class PTP_alarm_object:
|
||||
|
||||
def __init__(self, interface=None):
|
||||
|
||||
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
||||
self.cause = fm_constants.ALARM_PROBABLE_CAUSE_50
|
||||
self.alarm = ALARM_CAUSE__NONE
|
||||
self.interface = interface
|
||||
self.raised = False
|
||||
self.reason = ''
|
||||
self.repair = ''
|
||||
self.eid = ''
|
||||
|
||||
|
||||
# Plugin specific control class and object.
|
||||
class PTP_ctrl_object:
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self.gm_log_throttle = 0
|
||||
self.nolock_alarm_object = None
|
||||
self.process_alarm_object = None
|
||||
self.oot_alarm_object = None
|
||||
|
||||
|
||||
ctrl = PTP_ctrl_object()
|
||||
|
||||
|
||||
# Alarm object list, one entry for each interface and alarm cause case
|
||||
ALARM_OBJ_LIST = []
|
||||
|
||||
|
||||
# UT verification utilities
|
||||
def assert_all_alarms():
|
||||
for o in ALARM_OBJ_LIST:
|
||||
raise_alarm(o.alarm, o.interface, 0)
|
||||
|
||||
|
||||
def clear_all_alarms():
|
||||
for o in ALARM_OBJ_LIST:
|
||||
if clear_alarm(o.eid) is True:
|
||||
msg = 'cleared'
|
||||
else:
|
||||
msg = 'clear failed'
|
||||
collectd.info("%s %s:%s alarm %s" %
|
||||
(PLUGIN, PLUGIN_ALARMID, o.eid, msg))
|
||||
|
||||
|
||||
def print_alarm_object(o):
|
||||
collectd.info("%s Interface:%s Cause: %d Severity:%s Raised:%d" %
|
||||
(PLUGIN,
|
||||
o.interface,
|
||||
o.alarm,
|
||||
o.severity,
|
||||
o.raised))
|
||||
collectd.info("%s Entity:[%s]" % (PLUGIN, o.eid))
|
||||
collectd.info("%s Reason:[%s]" % (PLUGIN, o.reason))
|
||||
collectd.info("%s Repair:[%s]" % (PLUGIN, o.repair))
|
||||
|
||||
|
||||
def print_alarm_objects():
|
||||
for o in ALARM_OBJ_LIST:
|
||||
print_alarm_object(o)
|
||||
|
||||
|
||||
# Interface:Supported Modes dictionary. key:value
|
||||
#
|
||||
# interface:modes
|
||||
#
|
||||
interfaces = {}
|
||||
|
||||
|
||||
#####################################################################
|
||||
#
|
||||
# Name : _get_supported_modes
|
||||
#
|
||||
# Description: Invoke ethtool -T <interface> and load its
|
||||
# time stamping capabilities.
|
||||
#
|
||||
# hardware, software or legacy.
|
||||
#
|
||||
# Parameters : The name of the physical interface to query the
|
||||
# supported modes for.
|
||||
#
|
||||
# Interface Capabilities Output Examples:
|
||||
#
|
||||
# vbox prints this as it only supports software timestamping
|
||||
# software-transmit (SOF_TIMESTAMPING_TX_SOFTWARE)
|
||||
# software-receive (SOF_TIMESTAMPING_RX_SOFTWARE)
|
||||
#
|
||||
# full support output looks like this
|
||||
# hardware-transmit (SOF_TIMESTAMPING_TX_HARDWARE)
|
||||
# software-transmit (SOF_TIMESTAMPING_TX_SOFTWARE)
|
||||
# hardware-receive (SOF_TIMESTAMPING_RX_HARDWARE)
|
||||
# software-receive (SOF_TIMESTAMPING_RX_SOFTWARE)
|
||||
# hardware-raw-clock (SOF_TIMESTAMPING_RAW_HARDWARE)
|
||||
#
|
||||
# Only legacy support output looks like this
|
||||
# hardware-raw-clock (SOF_TIMESTAMPING_RAW_HARDWARE)
|
||||
#
|
||||
# Provisionable PTP Modes are
|
||||
# hardware -> hardware-transmit/receive
|
||||
# software -> software-transmit/receive
|
||||
# legacy -> hardware-raw-clock
|
||||
|
||||
TIMESTAMP_MODE__HW = 'hardware'
|
||||
TIMESTAMP_MODE__SW = 'software'
|
||||
TIMESTAMP_MODE__LEGACY = 'legacy'
|
||||
|
||||
|
||||
#
|
||||
# Returns : a list of supported modes
|
||||
#
|
||||
#####################################################################
|
||||
def _get_supported_modes(interface):
|
||||
"""Get the supported modes for the specified interface"""
|
||||
|
||||
hw_tx = hw_rx = sw_tx = sw_rx = False
|
||||
modes = []
|
||||
data = subprocess.check_output([ETHTOOL, '-T', interface]).split('\n')
|
||||
if data:
|
||||
collectd.debug("%s 'ethtool -T %s' output:%s\n" %
|
||||
(PLUGIN, interface, data))
|
||||
check_for_modes = False
|
||||
for i in range(0, len(data)):
|
||||
collectd.debug("%s data[%d]:%s\n" % (PLUGIN, i, data[i]))
|
||||
if 'Capabilities' in data[i]:
|
||||
|
||||
# start of capabilities list
|
||||
check_for_modes = True
|
||||
|
||||
elif check_for_modes is True:
|
||||
|
||||
if 'PTP Hardware Clock' in data[i]:
|
||||
# no more modes after this label
|
||||
break
|
||||
elif 'hardware-transmit' in data[i]:
|
||||
hw_tx = True
|
||||
elif 'hardware-receive' in data[i]:
|
||||
hw_rx = True
|
||||
elif 'software-transmit' in data[i]:
|
||||
sw_tx = True
|
||||
elif 'software-receive' in data[i]:
|
||||
sw_rx = True
|
||||
elif 'hardware-raw-clock' in data[i]:
|
||||
modes.append(TIMESTAMP_MODE__LEGACY)
|
||||
|
||||
if sw_tx is True and sw_rx is True:
|
||||
modes.append(TIMESTAMP_MODE__SW)
|
||||
|
||||
if hw_tx is True and hw_rx is True:
|
||||
modes.append(TIMESTAMP_MODE__HW)
|
||||
|
||||
if modes:
|
||||
collectd.debug("%s %s interface PTP capabilities: %s" %
|
||||
(PLUGIN, interface, modes))
|
||||
else:
|
||||
collectd.info("%s no capabilities advertised for %s" %
|
||||
(PLUGIN, interface))
|
||||
|
||||
else:
|
||||
collectd.info("%s no ethtool output for %s" % (PLUGIN, interface))
|
||||
return None
|
||||
|
||||
return modes
|
||||
|
||||
|
||||
#####################################################################
|
||||
#
|
||||
# Name : get_alarm_object
|
||||
#
|
||||
# Description: Search the alarm list based on the alarm cause
|
||||
# code and interface.
|
||||
#
|
||||
# Returns : Alarm object if found ; otherwise None
|
||||
#
|
||||
#####################################################################
|
||||
def get_alarm_object(alarm, interface=None):
|
||||
"""Alarm object lookup"""
|
||||
|
||||
for o in ALARM_OBJ_LIST:
|
||||
# print_alarm_object(o)
|
||||
if interface is None:
|
||||
if o.alarm == alarm:
|
||||
return o
|
||||
else:
|
||||
if o.interface == interface:
|
||||
if o.alarm == alarm:
|
||||
return o
|
||||
|
||||
collectd.info("%s alarm object lookup failed ; %d:%s" %
|
||||
(PLUGIN, alarm, interface))
|
||||
return None
|
||||
|
||||
|
||||
#####################################################################
|
||||
#
|
||||
# Name : clear_alarm
|
||||
#
|
||||
# Description: Clear the ptp alarm with the specified entity ID.
|
||||
#
|
||||
# Returns : True if operation succeeded
|
||||
# False if there was an error exception.
|
||||
#
|
||||
# Assumptions: Caller can decide to retry based on return status.
|
||||
#
|
||||
#####################################################################
|
||||
def clear_alarm(eid):
|
||||
"""Clear the ptp alarm with the specified entity ID"""
|
||||
|
||||
try:
|
||||
if api.clear_fault(PLUGIN_ALARMID, eid) is True:
|
||||
collectd.info("%s %s:%s alarm cleared" %
|
||||
(PLUGIN, PLUGIN_ALARMID, eid))
|
||||
else:
|
||||
collectd.info("%s %s:%s alarm already cleared" %
|
||||
(PLUGIN, PLUGIN_ALARMID, eid))
|
||||
return True
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
|
||||
(PLUGIN, PLUGIN_ALARMID, eid, ex))
|
||||
return False
|
||||
|
||||
|
||||
#####################################################################
|
||||
#
|
||||
# Name : raise_alarm
|
||||
#
|
||||
# Description: Assert a specific PTP alarm based on the alarm cause
|
||||
# code and interface.
|
||||
#
|
||||
# Handle special case cause codes
|
||||
# Handle failure to raise fault
|
||||
#
|
||||
# Assumptions: Short circuited Success return if the alarm is
|
||||
# already known to be asserted.
|
||||
#
|
||||
# Returns : False on Failure
|
||||
# True on Success
|
||||
#
|
||||
#####################################################################
|
||||
def raise_alarm(alarm_cause, interface=None, data=0):
|
||||
"""Assert a cause based PTP alarm"""
|
||||
|
||||
collectd.debug("%s Raising Alarm %d" % (PLUGIN, alarm_cause))
|
||||
|
||||
alarm = get_alarm_object(alarm_cause, interface)
|
||||
if alarm is None:
|
||||
# log created for None case in the get_alarm_object util
|
||||
return True
|
||||
|
||||
# copy the reason as it might be updated for the OOT,
|
||||
# most typical, case.
|
||||
reason = alarm.reason
|
||||
|
||||
# Handle some special cases
|
||||
#
|
||||
|
||||
if alarm_cause == ALARM_CAUSE__OOT:
|
||||
# If this is an out of tolerance alarm then add the
|
||||
# out of tolerance reading to the reason string before
|
||||
# asserting the alarm.
|
||||
#
|
||||
# Keep the alarm updated with the latest sample reading
|
||||
# and severity even if its already asserted.
|
||||
if abs(float(data)) > 100000000000:
|
||||
reason += 'more than 100 seconds'
|
||||
elif abs(float(data)) > 10000000000:
|
||||
reason += 'more than 10 seconds'
|
||||
elif abs(float(data)) > 1000000000:
|
||||
reason += 'more than 1 second'
|
||||
elif abs(float(data)) > 1000000:
|
||||
reason += str(abs(int(data)) / 1000000)
|
||||
reason += ' millisecs'
|
||||
elif abs(float(data)) > 1000:
|
||||
reason += str(abs(int(data)) / 1000)
|
||||
reason += ' microsecs'
|
||||
else:
|
||||
reason += str(float(data))
|
||||
reason += ' ' + PLUGIN_TYPE_INSTANCE
|
||||
|
||||
elif alarm.raised is True:
|
||||
# If alarm already raised then exit.
|
||||
#
|
||||
# All other alarms are a Major so there is no need to
|
||||
# track a change in severity and update accordingly.
|
||||
return True
|
||||
|
||||
elif alarm_cause == ALARM_CAUSE__PROCESS:
|
||||
reason = 'Provisioned ' + PTP + ' \'' + obj.mode
|
||||
reason += '\' time stamping mode seems to be unsupported by this host'
|
||||
|
||||
try:
|
||||
fault = fm_api.Fault(
|
||||
alarm_id=PLUGIN_ALARMID,
|
||||
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
||||
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
||||
entity_instance_id=alarm.eid,
|
||||
severity=alarm.severity,
|
||||
reason_text=reason,
|
||||
alarm_type=obj.alarm_type,
|
||||
probable_cause=alarm.cause,
|
||||
proposed_repair_action=alarm.repair,
|
||||
service_affecting=False, # obj.service_affecting,
|
||||
suppression=True) # obj.suppression)
|
||||
|
||||
alarm_uuid = api.set_fault(fault)
|
||||
if pc.is_uuid_like(alarm_uuid) is False:
|
||||
|
||||
# Don't _add_unreachable_server list if the fm call failed.
|
||||
# That way it will be retried at a later time.
|
||||
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
|
||||
(PLUGIN, PLUGIN_ALARMID, alarm.eid, alarm_uuid))
|
||||
return False
|
||||
|
||||
else:
|
||||
collectd.info("%s %s:%s:%s alarm raised" %
|
||||
(PLUGIN, PLUGIN_ALARMID, alarm.eid, alarm.severity))
|
||||
alarm.raised = True
|
||||
return True
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'set_fault' exception ; %s:%s:%s ; %s" %
|
||||
(PLUGIN,
|
||||
PLUGIN_ALARMID,
|
||||
alarm.eid,
|
||||
alarm.severity,
|
||||
ex))
|
||||
return False
|
||||
|
||||
|
||||
#####################################################################
|
||||
#
|
||||
# Name : create_interface_alarm_objects
|
||||
#
|
||||
# Description: Create alarm objects for specified interface
|
||||
#
|
||||
#####################################################################
|
||||
def create_interface_alarm_objects(interface=None):
|
||||
"""Create alarm objects"""
|
||||
|
||||
collectd.debug("%s Alarm Object Create: Interface:%s " %
|
||||
(PLUGIN, interface))
|
||||
|
||||
if interface is None:
|
||||
o = PTP_alarm_object()
|
||||
o.alarm = ALARM_CAUSE__PROCESS
|
||||
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
||||
o.reason = obj.hostname + ' does not support the provisioned '
|
||||
o.reason += PTP + ' mode '
|
||||
o.repair = 'Check host hardware reference manual '
|
||||
o.repair += 'to verify that the selected PTP mode is supported'
|
||||
o.eid = obj.base_eid + '.ptp'
|
||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN # 'unknown'
|
||||
ALARM_OBJ_LIST.append(o)
|
||||
ctrl.process_alarm_object = o
|
||||
|
||||
o = PTP_alarm_object()
|
||||
o.alarm = ALARM_CAUSE__OOT
|
||||
o.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
||||
o.reason = obj.hostname + ' '
|
||||
o.reason += PTP + " clocking is out of tolerance by "
|
||||
o.repair = "Check quality of the clocking network"
|
||||
o.eid = obj.base_eid + '.ptp=out-of-tolerance'
|
||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_50 # THRESHOLD CROSS
|
||||
ALARM_OBJ_LIST.append(o)
|
||||
ctrl.oot_alarm_object = o
|
||||
|
||||
o = PTP_alarm_object()
|
||||
# Only applies to storage and worker nodes
|
||||
o.alarm = ALARM_CAUSE__NO_LOCK
|
||||
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
||||
o.reason = obj.hostname
|
||||
o.reason += ' is not locked to remote PTP Grand Master'
|
||||
o.repair = 'Check network'
|
||||
o.eid = obj.base_eid + '.ptp=no-lock'
|
||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_51 # timing-problem
|
||||
ALARM_OBJ_LIST.append(o)
|
||||
ctrl.nolock_alarm_object = o
|
||||
|
||||
else:
|
||||
o = PTP_alarm_object(interface)
|
||||
o.alarm = ALARM_CAUSE__UNSUPPORTED_HW
|
||||
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
||||
o.reason = obj.hostname + " '" + interface + "' does not support "
|
||||
o.reason += PTP + ' Hardware timestamping'
|
||||
o.repair = 'Check host hardware reference manual to verify PTP '
|
||||
o.repair += 'Hardware timestamping is supported by this interface'
|
||||
o.eid = obj.base_eid + '.ptp=' + interface
|
||||
o.eid += '.unsupported=hardware-timestamping'
|
||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_7 # 'config error'
|
||||
ALARM_OBJ_LIST.append(o)
|
||||
|
||||
o = PTP_alarm_object(interface)
|
||||
o.alarm = ALARM_CAUSE__UNSUPPORTED_SW
|
||||
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
||||
o.reason = obj.hostname + " '" + interface + "' does not support "
|
||||
o.reason += PTP + ' Software timestamping'
|
||||
o.repair = 'Check host hardware reference manual to verify PTP '
|
||||
o.repair += 'Software timestamping is supported by this interface'
|
||||
o.eid = obj.base_eid + '.ptp=' + interface
|
||||
o.eid += '.unsupported=software-timestamping'
|
||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_7 # 'config error'
|
||||
ALARM_OBJ_LIST.append(o)
|
||||
|
||||
o = PTP_alarm_object(interface)
|
||||
o.alarm = ALARM_CAUSE__UNSUPPORTED_LEGACY
|
||||
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
||||
o.reason = obj.hostname + " '" + interface + "' does not support "
|
||||
o.reason += PTP + " Legacy timestamping"
|
||||
o.repair = 'Check host hardware reference manual to verify PTP '
|
||||
o.repair += 'Legacy or Raw Clock is supported by this host'
|
||||
o.eid = obj.base_eid + '.ptp=' + interface
|
||||
o.eid += '.unsupported=legacy-timestamping'
|
||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_7 # 'config error'
|
||||
ALARM_OBJ_LIST.append(o)
|
||||
|
||||
|
||||
#####################################################################
|
||||
#
|
||||
# Name : read_timestamp_mode
|
||||
#
|
||||
# Description: Refresh the timestamping mode if it changes
|
||||
#
|
||||
#####################################################################
|
||||
def read_timestamp_mode():
|
||||
"""Load timestamping mode"""
|
||||
|
||||
if os.path.exists(PLUGIN_CONF_FILE):
|
||||
current_mode = obj.mode
|
||||
with open(PLUGIN_CONF_FILE, 'r') as infile:
|
||||
for line in infile:
|
||||
if PLUGIN_CONF_TIMESTAMPING in line:
|
||||
obj.mode = line.split()[1].strip('\n')
|
||||
break
|
||||
|
||||
if obj.mode:
|
||||
if obj.mode != current_mode:
|
||||
collectd.info("%s Timestamping Mode: %s" %
|
||||
(PLUGIN, obj.mode))
|
||||
else:
|
||||
collectd.error("%s failed to get Timestamping Mode" % PLUGIN)
|
||||
else:
|
||||
collectd.error("%s failed to load ptp4l configuration" % PLUGIN)
|
||||
obj.mode = None
|
||||
|
||||
|
||||
#####################################################################
|
||||
#
|
||||
# Name : init_func
|
||||
#
|
||||
# Description: The collectd initialization entrypoint for
|
||||
# this plugin
|
||||
#
|
||||
# Assumptions: called only once
|
||||
#
|
||||
# Algorithm : check for no
|
||||
#
|
||||
#
|
||||
#####################################################################
|
||||
def init_func():
|
||||
|
||||
if obj.init_ready() is False:
|
||||
return False
|
||||
|
||||
obj.hostname = obj.gethostname()
|
||||
obj.base_eid = 'host=' + obj.hostname
|
||||
|
||||
# Create the interface independent alarm objects.
|
||||
create_interface_alarm_objects()
|
||||
|
||||
# load monitored interfaces and supported modes
|
||||
if os.path.exists(PLUGIN_CONF_FILE):
|
||||
with open(PLUGIN_CONF_FILE, 'r') as infile:
|
||||
for line in infile:
|
||||
# The PTP interfaces used are specified in the ptp4l.conf
|
||||
# file as [interface]. There may be more than one.
|
||||
# Presently there is no need to track the function of the
|
||||
# interface ; namely mgmnt or oam.
|
||||
if line[0] == '[':
|
||||
interface = line.split(']')[0].split('[')[1]
|
||||
if interface and interface != 'global':
|
||||
interfaces[interface] = _get_supported_modes(interface)
|
||||
create_interface_alarm_objects(interface)
|
||||
|
||||
if PLUGIN_CONF_TIMESTAMPING in line:
|
||||
obj.mode = line.split()[1].strip('\n')
|
||||
|
||||
if obj.mode:
|
||||
collectd.info("%s Timestamping Mode: %s" %
|
||||
(PLUGIN, obj.mode))
|
||||
else:
|
||||
collectd.error("%s failed to get Timestamping Mode" % PLUGIN)
|
||||
else:
|
||||
collectd.error("%s failed to load ptp4l configuration" % PLUGIN)
|
||||
obj.mode = None
|
||||
|
||||
for key, value in interfaces.items():
|
||||
collectd.info("%s interface %s supports timestamping modes: %s" %
|
||||
(PLUGIN, key, value))
|
||||
|
||||
# remove '# to dump alarm object data
|
||||
# print_alarm_objects()
|
||||
|
||||
if tsc.nodetype == 'controller':
|
||||
obj.controller = True
|
||||
|
||||
obj.virtual = obj.is_virtual()
|
||||
obj.init_done = True
|
||||
obj.log_throttle_count = 0
|
||||
collectd.info("%s initialization complete" % PLUGIN)
|
||||
|
||||
|
||||
#####################################################################
|
||||
#
|
||||
# Name : read_func
|
||||
#
|
||||
# Description: The collectd audit entrypoint for PTP Monitoring
|
||||
#
|
||||
# Assumptions: collectd calls init_func one time.
|
||||
#
|
||||
#
|
||||
# retry init if needed
|
||||
# retry fm connect if needed
|
||||
# check service enabled state
|
||||
# check service running state
|
||||
# error -> alarm host=<hostname>.ptp
|
||||
# check
|
||||
#
|
||||
#####################################################################
|
||||
def read_func():
|
||||
|
||||
if obj.virtual is True:
|
||||
return 0
|
||||
|
||||
# check and run init until it reports init_done True
|
||||
if obj.init_done is False:
|
||||
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
|
||||
collectd.info("%s re-running init" % PLUGIN)
|
||||
obj.log_throttle_count += 1
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
if obj.fm_connectivity is False:
|
||||
|
||||
try:
|
||||
# query FM for existing alarms.
|
||||
alarms = api.get_faults_by_id(PLUGIN_ALARMID)
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'get_faults_by_id' exception ;"
|
||||
" %s ; %s" %
|
||||
(PLUGIN, PLUGIN_ALARMID, ex))
|
||||
return 0
|
||||
|
||||
if alarms:
|
||||
for alarm in alarms:
|
||||
collectd.debug("%s found startup alarm '%s'" %
|
||||
(PLUGIN, alarm.entity_instance_id))
|
||||
|
||||
eid = alarm.entity_instance_id
|
||||
if eid is None:
|
||||
collectd.error("%s startup alarm query error ; no eid" %
|
||||
PLUGIN)
|
||||
continue
|
||||
|
||||
# get the hostname host=<hostname>.stuff
|
||||
# split over base eid and then
|
||||
# compare that to this plugin's base eid
|
||||
# ignore alarms not for this host
|
||||
if eid.split('.')[0] != obj.base_eid:
|
||||
continue
|
||||
else:
|
||||
# load the state of the specific alarm
|
||||
instance = eid.split('.')[1].split('=')
|
||||
if instance[0] == 'ptp':
|
||||
# clear all ptp alarms on process startup
|
||||
# just in case interface names have changed
|
||||
# since the alarm was raised.
|
||||
if clear_alarm(eid) is False:
|
||||
# if we can't clear the alarm now then error out.
|
||||
collectd.error("%s failed to clear startup "
|
||||
"alarm %s:%s" %
|
||||
(PLUGIN, PLUGIN_ALARMID, eid))
|
||||
# try again next time around
|
||||
return 0
|
||||
else:
|
||||
collectd.info("%s cleared startup alarm '%s'" %
|
||||
(PLUGIN, alarm.entity_instance_id))
|
||||
else:
|
||||
|
||||
if clear_alarm(eid) is False:
|
||||
collectd.error("%s failed to clear invalid PTP "
|
||||
"alarm %s:%s" %
|
||||
(PLUGIN, PLUGIN_ALARMID,
|
||||
alarm.entity_instance_id))
|
||||
return 0
|
||||
else:
|
||||
collectd.info("%s cleared found invalid startup"
|
||||
" alarm %s:%s" %
|
||||
(PLUGIN,
|
||||
PLUGIN_ALARMID,
|
||||
alarm.entity_instance_id))
|
||||
else:
|
||||
collectd.info("%s no startup alarms found" % PLUGIN)
|
||||
|
||||
obj.config_complete = True
|
||||
obj.fm_connectivity = True
|
||||
# assert_all_alarms()
|
||||
|
||||
# This plugin supports PTP in-service state change by checking
|
||||
# service state on every audit ; every 5 minutes.
|
||||
data = subprocess.check_output([SYSTEMCTL,
|
||||
SYSTEMCTL_IS_ENABLED_OPTION,
|
||||
PLUGIN_SERVICE])
|
||||
collectd.debug("%s PTP admin state:%s" % (PLUGIN, data.rstrip()))
|
||||
|
||||
if data.rstrip() == SYSTEMCTL_IS_DISABLED_RESPONSE:
|
||||
|
||||
# Manage execution phase
|
||||
if obj.phase != RUN_PHASE__DISABLED:
|
||||
obj.phase = RUN_PHASE__DISABLED
|
||||
obj.log_throttle_count = 0
|
||||
|
||||
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
|
||||
collectd.info("%s PTP Service Disabled" % PLUGIN)
|
||||
obj.log_throttle_count += 1
|
||||
|
||||
for o in ALARM_OBJ_LIST:
|
||||
if o.raised is True:
|
||||
if clear_alarm(o.eid) is True:
|
||||
o.raised = False
|
||||
else:
|
||||
collectd.error("%s %s:%s clear alarm failed "
|
||||
"; will retry" %
|
||||
(PLUGIN, PLUGIN_ALARMID, o.eid))
|
||||
return 0
|
||||
|
||||
data = subprocess.check_output([SYSTEMCTL,
|
||||
SYSTEMCTL_IS_ACTIVE_OPTION,
|
||||
PLUGIN_SERVICE])
|
||||
|
||||
if data.rstrip() == SYSTEMCTL_IS_INACTIVE_RESPONSE:
|
||||
|
||||
# Manage execution phase
|
||||
if obj.phase != RUN_PHASE__NOT_RUNNING:
|
||||
obj.phase = RUN_PHASE__NOT_RUNNING
|
||||
obj.log_throttle_count = 0
|
||||
|
||||
if ctrl.process_alarm_object.alarm == ALARM_CAUSE__PROCESS:
|
||||
if ctrl.process_alarm_object.raised is False:
|
||||
collectd.error("%s PTP service enabled but not running" %
|
||||
PLUGIN)
|
||||
if raise_alarm(ALARM_CAUSE__PROCESS) is True:
|
||||
ctrl.process_alarm_object.raised = True
|
||||
|
||||
# clear all other alarms if the 'process' alarm is raised
|
||||
elif ctrl.process_alarm_object.raised is True:
|
||||
if clear_alarm(ctrl.process_alarm_object.eid) is True:
|
||||
msg = 'cleared'
|
||||
ctrl.process_alarm_object.raised = False
|
||||
else:
|
||||
msg = 'failed to clear'
|
||||
collectd.info("%s %s %s:%s" %
|
||||
(PLUGIN, msg, PLUGIN_ALARMID,
|
||||
ctrl.process_alarm_object.eid))
|
||||
return 0
|
||||
|
||||
# Handle clearing the 'process' alarm if it is asserted and
|
||||
# the process is now running
|
||||
if ctrl.process_alarm_object.raised is True:
|
||||
if clear_alarm(ctrl.process_alarm_object.eid) is True:
|
||||
ctrl.process_alarm_object.raised = False
|
||||
collectd.info("%s PTP service enabled and running" % PLUGIN)
|
||||
|
||||
# Auto refresh the timestamping mode in case collectd runs
|
||||
# before the ptp manifest or the mode changes on the fly by
|
||||
# an in-service manifest.
|
||||
# Every 4 audits.
|
||||
obj.audits += 1
|
||||
if not obj.audits % 4:
|
||||
read_timestamp_mode()
|
||||
|
||||
# Manage execution phase
|
||||
if obj.phase != RUN_PHASE__SAMPLING:
|
||||
obj.phase = RUN_PHASE__SAMPLING
|
||||
obj.log_throttle_count = 0
|
||||
|
||||
# Let's read the port status information
|
||||
#
|
||||
# sudo /usr/sbin/pmc -u -b 0 'GET PORT_DATA_SET'
|
||||
#
|
||||
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC,
|
||||
'-u', '-b', '0', 'GET PORT_DATA_SET'])
|
||||
|
||||
port_locked = False
|
||||
obj.resp = data.split('\n')
|
||||
for line in obj.resp:
|
||||
if 'portState' in line:
|
||||
collectd.debug("%s portState : %s" % (PLUGIN, line.split()[1]))
|
||||
port_state = line.split()[1]
|
||||
if port_state == 'SLAVE':
|
||||
port_locked = True
|
||||
|
||||
# Let's read the clock info, Grand Master sig and skew
|
||||
#
|
||||
# sudo /usr/sbin/pmc -u -b 0 'GET TIME_STATUS_NP'
|
||||
#
|
||||
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC,
|
||||
'-u', '-b', '0', 'GET TIME_STATUS_NP'])
|
||||
|
||||
got_master_offset = False
|
||||
master_offset = 0
|
||||
my_identity = ''
|
||||
gm_identity = ''
|
||||
gm_present = False
|
||||
obj.resp = data.split('\n')
|
||||
for line in obj.resp:
|
||||
if 'RESPONSE MANAGEMENT TIME_STATUS_NP' in line:
|
||||
collectd.debug("%s key : %s" %
|
||||
(PLUGIN, line.split()[0].split('-')[0]))
|
||||
my_identity = line.split()[0].split('-')[0]
|
||||
if 'master_offset' in line:
|
||||
collectd.debug("%s Offset : %s" % (PLUGIN, line.split()[1]))
|
||||
master_offset = float(line.split()[1])
|
||||
got_master_offset = True
|
||||
if 'gmPresent' in line:
|
||||
collectd.debug("%s gmPresent : %s" % (PLUGIN, line.split()[1]))
|
||||
gm_present = line.split()[1]
|
||||
if 'gmIdentity' in line:
|
||||
collectd.debug("%s gmIdentity: %s" % (PLUGIN, line.split()[1]))
|
||||
gm_identity = line.split()[1]
|
||||
|
||||
# Handle case where this host is the Grand Master
|
||||
# ... or assumes it is.
|
||||
if my_identity == gm_identity or port_locked is False:
|
||||
|
||||
if obj.controller is False:
|
||||
|
||||
# Compute and storage nodes should not be the Grand Master
|
||||
if ctrl.nolock_alarm_object.raised is False:
|
||||
if raise_alarm(ALARM_CAUSE__NO_LOCK, None, 0) is True:
|
||||
ctrl.nolock_alarm_object.raised = True
|
||||
|
||||
# produce a throttled log while this host is not locked to the GM
|
||||
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
|
||||
collectd.info("%s %s not locked to remote Grand Master "
|
||||
"(%s)" % (PLUGIN, obj.hostname, gm_identity))
|
||||
obj.log_throttle_count += 1
|
||||
|
||||
# No samples for storage and compute nodes that are not
|
||||
# locked to a Grand Master
|
||||
return 0
|
||||
|
||||
else:
|
||||
# Controllers can be a Grand Master ; throttle the log
|
||||
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
|
||||
collectd.info("%s %s is Grand Master:%s" %
|
||||
(PLUGIN, obj.hostname, gm_identity))
|
||||
obj.log_throttle_count += 1
|
||||
|
||||
# The Grand Master will always be 0 so there is no point
|
||||
# creating a sample for it.
|
||||
return 0
|
||||
|
||||
# Handle clearing nolock alarm for computes and storage nodes
|
||||
elif obj.controller is False:
|
||||
if ctrl.nolock_alarm_object.raised is True:
|
||||
if clear_alarm(ctrl.nolock_alarm_object.eid) is True:
|
||||
ctrl.nolock_alarm_object.raised = False
|
||||
|
||||
# Keep this FIT test code but make it commented out for security
|
||||
# if os.path.exists('/var/run/fit/ptp_data'):
|
||||
# master_offset = 0
|
||||
# with open('/var/run/fit/ptp_data', 'r') as infile:
|
||||
# for line in infile:
|
||||
# master_offset = int(line)
|
||||
# got_master_offset = True
|
||||
# collectd.info("%s using ptp FIT data skew:%d" %
|
||||
# (PLUGIN, master_offset))
|
||||
# break
|
||||
|
||||
# Send sample and Manage the Out-Of-Tolerance alarm
|
||||
if got_master_offset is True:
|
||||
|
||||
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
|
||||
collectd.info("%s %s is collecting samples [%5d] "
|
||||
"with Grand Master %s" %
|
||||
(PLUGIN, obj.hostname,
|
||||
float(master_offset), gm_identity))
|
||||
|
||||
obj.log_throttle_count += 1
|
||||
|
||||
# setup the sample structure and dispatch
|
||||
val = collectd.Values(host=obj.hostname)
|
||||
val.type = PLUGIN_TYPE
|
||||
val.type_instance = PLUGIN_TYPE_INSTANCE
|
||||
val.plugin = 'ptp'
|
||||
val.dispatch(values=[float(master_offset)])
|
||||
|
||||
# Manage the sample OOT alarm severity
|
||||
severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
||||
if abs(master_offset) > OOT_MAJOR_THRESHOLD:
|
||||
severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
||||
elif abs(master_offset) > OOT_MINOR_THRESHOLD:
|
||||
severity = fm_constants.FM_ALARM_SEVERITY_MINOR
|
||||
|
||||
# Handle clearing of Out-Of-Tolerance alarm
|
||||
if severity == fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
||||
if ctrl.oot_alarm_object.raised is True:
|
||||
if clear_alarm(ctrl.oot_alarm_object.eid) is True:
|
||||
ctrl.oot_alarm_object.severity = \
|
||||
fm_constants.FM_ALARM_SEVERITY_CLEAR
|
||||
ctrl.oot_alarm_object.raised = False
|
||||
|
||||
else:
|
||||
# Special Case:
|
||||
# -------------
|
||||
# Don't raise minor alarm when in software timestamping mode.
|
||||
# Too much skew in software or legacy mode ; alarm would bounce.
|
||||
# TODO: Consider making ptp a real time process
|
||||
if severity == fm_constants.FM_ALARM_SEVERITY_MINOR \
|
||||
and obj.mode != 'hardware':
|
||||
return 0
|
||||
|
||||
# Handle debounce of the OOT alarm.
|
||||
# Debounce by 1 for the same severity level.
|
||||
if ctrl.oot_alarm_object.severity != severity:
|
||||
ctrl.oot_alarm_object.severity = severity
|
||||
|
||||
# This will keep refreshing the alarm text with the current
|
||||
# skew value while still debounce on state transitions.
|
||||
#
|
||||
# Precision ... (PTP) clocking is out of tolerance by 1004 nsec
|
||||
#
|
||||
elif severity == fm_constants.FM_ALARM_SEVERITY_MINOR:
|
||||
# Handle raising the Minor OOT Alarm.
|
||||
rc = raise_alarm(ALARM_CAUSE__OOT, None, master_offset)
|
||||
if rc is True:
|
||||
ctrl.oot_alarm_object.raised = True
|
||||
|
||||
elif severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
||||
# Handle raising the Major OOT Alarm.
|
||||
rc = raise_alarm(ALARM_CAUSE__OOT, None, master_offset)
|
||||
if rc is True:
|
||||
ctrl.oot_alarm_object.raised = True
|
||||
|
||||
# Record the value that is alarmable
|
||||
if severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
||||
collectd.info("%s Grand Master ID: %s ; "
|
||||
"HOST ID: %s ; "
|
||||
"GM Present:%s ; "
|
||||
"Skew:%5d" % (PLUGIN,
|
||||
gm_identity,
|
||||
my_identity,
|
||||
gm_present,
|
||||
master_offset))
|
||||
else:
|
||||
collectd.info("%s No Clock Sync" % PLUGIN)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func, interval=PLUGIN_AUDIT_INTERVAL)
|
@ -1,21 +0,0 @@
|
||||
LoadPlugin python
|
||||
<Plugin python>
|
||||
ModulePath "/opt/collectd/extensions/python"
|
||||
Import "cpu"
|
||||
<Module "cpu">
|
||||
Path "/proc/cpuinfo"
|
||||
</Module>
|
||||
Import "memory"
|
||||
<Module "memory">
|
||||
Path "/proc/meminfo"
|
||||
</Module>
|
||||
Import "ntpq"
|
||||
Import "ptp"
|
||||
Import "interface"
|
||||
<Module "interface">
|
||||
Port 2122
|
||||
</Module>
|
||||
Import "remotels"
|
||||
LogTraces = true
|
||||
Encoding "utf-8"
|
||||
</Plugin>
|
@ -1,13 +0,0 @@
|
||||
<Plugin "threshold">
|
||||
<Plugin "remotels">
|
||||
<Type "absolute">
|
||||
Instance "reachable"
|
||||
Persist true
|
||||
PersistOK true
|
||||
WarningMin 1
|
||||
FailureMin 0
|
||||
Hits 2
|
||||
Invert false
|
||||
</Type>
|
||||
</Plugin>
|
||||
</Plugin>
|
@ -1,350 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
############################################################################
|
||||
#
|
||||
# This is the Remote Logging Server plugin for collectd.
|
||||
#
|
||||
# The Remote Logging Server is enabled if /etc/syslog-ng/syslog-ng.conf
|
||||
# contains '@include remotelogging.conf'
|
||||
#
|
||||
# There is no asynchronous notification of remote logging server
|
||||
# configuration enable/disable state changes. Therefore, each audit
|
||||
# interval needs to check whether its enabled or not.
|
||||
#
|
||||
# every audit interval ...
|
||||
#
|
||||
# read_func:
|
||||
# check enabled:
|
||||
# if disabled and alarmed:
|
||||
# clear alarm
|
||||
# if enabled:
|
||||
# get ip and port
|
||||
# query status
|
||||
# if connected and alarmed:
|
||||
# clear alarm
|
||||
# if not connected and not alarmed:
|
||||
# raise alarm
|
||||
#
|
||||
# system remotelogging-modify --ip_address <ip address>
|
||||
# --transport tcp
|
||||
# --enabled True
|
||||
#
|
||||
############################################################################
|
||||
|
||||
import os
|
||||
import collectd
|
||||
import tsconfig.tsconfig as tsc
|
||||
import plugin_common as pc
|
||||
from fm_api import constants as fm_constants
|
||||
from oslo_concurrency import processutils
|
||||
from fm_api import fm_api
|
||||
|
||||
# Fault manager API Object
|
||||
api = fm_api.FaultAPIsV2()
|
||||
|
||||
# name of the plugin
|
||||
PLUGIN_NAME = 'remotels'
|
||||
|
||||
# all logs produced by this plugin are prefixed with this
|
||||
PLUGIN = 'remote logging server'
|
||||
|
||||
# Interface Monitoring Interval in seconds
|
||||
PLUGIN_AUDIT_INTERVAL = 60
|
||||
|
||||
# Sample Data 'type' and 'instance' database field values.
|
||||
PLUGIN_TYPE = 'absolute'
|
||||
PLUGIN_TYPE_INSTANCE = 'reachable'
|
||||
|
||||
# Remote Logging Connectivity Alarm ID
|
||||
PLUGIN_ALARMID = '100.118'
|
||||
|
||||
# The file where this plugin learns if remote logging is enabled
|
||||
SYSLOG_CONF_FILE = '/etc/syslog-ng/syslog-ng.conf'
|
||||
|
||||
# Plugin Control Object
|
||||
obj = pc.PluginObject(PLUGIN, "")
|
||||
|
||||
|
||||
# Raise Remote Logging Server Alarm
|
||||
def raise_alarm():
|
||||
"""Raise Remote Logging Server Alarm"""
|
||||
|
||||
repair = 'Ensure Remote Log Server IP is reachable from '
|
||||
repair += 'Controller through OAM interface; otherwise '
|
||||
repair += 'contact next level of support.'
|
||||
|
||||
reason = 'Controller cannot establish connection with '
|
||||
reason += 'remote logging server.'
|
||||
|
||||
try:
|
||||
fault = fm_api.Fault(
|
||||
alarm_id=PLUGIN_ALARMID,
|
||||
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
||||
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
||||
entity_instance_id=obj.base_eid,
|
||||
severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
|
||||
reason_text=reason,
|
||||
alarm_type=fm_constants.FM_ALARM_TYPE_1,
|
||||
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_6,
|
||||
proposed_repair_action=repair,
|
||||
service_affecting=False,
|
||||
suppression=False)
|
||||
|
||||
alarm_uuid = api.set_fault(fault)
|
||||
if pc.is_uuid_like(alarm_uuid) is False:
|
||||
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
|
||||
(PLUGIN, PLUGIN_ALARMID,
|
||||
obj.base_eid, alarm_uuid))
|
||||
else:
|
||||
collectd.info("%s %s:%s alarm raised" %
|
||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
|
||||
obj.alarmed = True
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'set_fault' exception ; %s:%s ; %s " %
|
||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid, ex))
|
||||
|
||||
|
||||
# Clear remote logging server alarm
|
||||
def clear_alarm():
|
||||
"""Clear remote logging server alarm"""
|
||||
|
||||
try:
|
||||
if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is True:
|
||||
collectd.info("%s %s:%s alarm cleared" %
|
||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
|
||||
else:
|
||||
collectd.info("%s %s:%s alarm clear" %
|
||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
|
||||
|
||||
obj.alarmed = False
|
||||
return True
|
||||
|
||||
except Exception as ex:
|
||||
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
|
||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid, ex))
|
||||
return False
|
||||
|
||||
|
||||
# The config function - called once on collectd process startup
|
||||
def config_func(config):
|
||||
"""Configure the plugin"""
|
||||
|
||||
# all configuration is learned during normal monitoring
|
||||
obj.config_done = True
|
||||
return 0
|
||||
|
||||
|
||||
# The init function - called once on collectd process startup
|
||||
def init_func():
|
||||
"""Init the plugin"""
|
||||
|
||||
# remote logging server monitoring is for controllers only
|
||||
if tsc.nodetype != 'controller':
|
||||
return 0
|
||||
|
||||
if obj.init_done is False:
|
||||
if obj.init_ready() is False:
|
||||
return False
|
||||
|
||||
obj.hostname = obj.gethostname()
|
||||
obj.base_eid = 'host=' + obj.hostname
|
||||
obj.init_done = True
|
||||
collectd.info("%s initialization complete" % PLUGIN)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# The sample read function - called on every audit interval
|
||||
def read_func():
|
||||
"""Remote logging server connectivity plugin read function"""
|
||||
|
||||
# remote logging server monitoring is for controllers only
|
||||
if tsc.nodetype != 'controller':
|
||||
return 0
|
||||
|
||||
if obj.init_done is False:
|
||||
init_func()
|
||||
return 0
|
||||
|
||||
# get current state
|
||||
current_enabled_state = obj.enabled
|
||||
|
||||
# check to see if remote logging is enabled
|
||||
obj.enabled = False # assume disabled
|
||||
if os.path.exists(SYSLOG_CONF_FILE) is True:
|
||||
with open(SYSLOG_CONF_FILE, 'r') as infile:
|
||||
for line in infile:
|
||||
if line.startswith('@include '):
|
||||
service = line.rstrip().split(' ')[1]
|
||||
if service == '"remotelogging.conf"':
|
||||
obj.enabled = True
|
||||
break
|
||||
|
||||
if current_enabled_state == obj.enabled:
|
||||
logit = False
|
||||
else:
|
||||
if obj.enabled is False:
|
||||
collectd.info("%s is disabled" % PLUGIN)
|
||||
else:
|
||||
collectd.info("%s is enabled" % PLUGIN)
|
||||
logit = True
|
||||
|
||||
# Handle startup case by clearing existing alarm if its raised.
|
||||
# Its runtime cheaper and simpler to issue a blind clear than query.
|
||||
if obj.audits == 0:
|
||||
if clear_alarm() is False:
|
||||
# if clear fails then retry next time
|
||||
return 0
|
||||
if obj.enabled is False:
|
||||
collectd.info("%s is disabled" % PLUGIN)
|
||||
obj.audits = 1
|
||||
|
||||
if obj.enabled is False:
|
||||
if obj.alarmed is True:
|
||||
clear_alarm()
|
||||
return 0
|
||||
|
||||
# If we get here then the server is enabled ...
|
||||
# Need to query it
|
||||
|
||||
# Get the ip and port from line that looks like this
|
||||
#
|
||||
# tag proto address port
|
||||
# ----------------------------- --- -------------- ---
|
||||
# destination remote_log_server {tcp("128.224.186.65" port(514));};
|
||||
#
|
||||
address = protocol = port = ''
|
||||
with open(SYSLOG_CONF_FILE, 'r') as infile:
|
||||
for line in infile:
|
||||
if line.startswith('destination remote_log_server'):
|
||||
try:
|
||||
if len(line.split('{')) > 1:
|
||||
protocol = line.split('{')[1][0:3]
|
||||
address = line.split('{')[1].split('"')[1]
|
||||
port = line.split('{')[1].split('(')[2].split(')')[0]
|
||||
if not protocol or not address or not port:
|
||||
collectd.error("%s remote log server credentials "
|
||||
"parse error ; (%s:%s:%s)" %
|
||||
(PLUGIN, protocol, address, port))
|
||||
return 1
|
||||
else:
|
||||
# line parsed ; move on ...
|
||||
break
|
||||
else:
|
||||
collectd.error("%s remote log server line parse error"
|
||||
" ; %s" % (PLUGIN, line))
|
||||
except Exception as ex:
|
||||
collectd.error("%s remote log server credentials "
|
||||
"parse exception ; (%s)" % (PLUGIN, line))
|
||||
|
||||
if ':' in address:
|
||||
ipv = 6
|
||||
protocol += 6
|
||||
|
||||
# Monitoring of IPV6 is not currently supported
|
||||
return 0
|
||||
|
||||
else:
|
||||
ipv = 4
|
||||
|
||||
# This plugin detects server connectivity through its socket status.
|
||||
# To get that construct the remote logging server IP string.
|
||||
# The files being looked at(/proc/net/tcp(udp)) use hex values,
|
||||
# so convert the string caps hex value with reverse ordering of
|
||||
# the "ipv4" values
|
||||
index = 3
|
||||
addr = [0, 0, 0, 0]
|
||||
|
||||
# swap order
|
||||
for tup in address.split('.'):
|
||||
addr[index] = int(tup)
|
||||
index -= 1
|
||||
|
||||
# build the CAPs HEX address
|
||||
UPPER_HEX_IP = ''
|
||||
for tup in addr:
|
||||
val = hex(int(tup)).split('x')[-1].upper()
|
||||
if len(val) == 1:
|
||||
UPPER_HEX_IP += '0'
|
||||
UPPER_HEX_IP += val
|
||||
UPPER_HEX_IP += ':'
|
||||
tmp = hex(int(port)).split('x')[-1].upper()
|
||||
for i in range(4 - len(tmp)):
|
||||
UPPER_HEX_IP += '0'
|
||||
UPPER_HEX_IP += tmp
|
||||
|
||||
# log example tcp:ipv4:128.224.186.65:514 : IP:41BAE080:0202
|
||||
collectd.debug("%s %s:ipv%d:%s:%s : IP:%s" %
|
||||
(PLUGIN, protocol, ipv, address, port, UPPER_HEX_IP))
|
||||
|
||||
cmd = "cat /proc/net/" + protocol
|
||||
cmd += " | awk '{print $3 \" \" $4}' | grep " + UPPER_HEX_IP
|
||||
cmd += " | awk '{print $2}'"
|
||||
res, err = processutils.execute(cmd, shell=True)
|
||||
if err:
|
||||
collectd.error("%s processutils error:%s" % (PLUGIN, err))
|
||||
|
||||
# cmd example:
|
||||
# cat /proc/net/tcp | awk '{print $3 " " $4}'
|
||||
# | grep 41BAE080:0202
|
||||
# | awk '{print $2}'
|
||||
collectd.debug("%s Cmd:%s" % (PLUGIN, cmd))
|
||||
return 0
|
||||
|
||||
if res and res.rstrip() == '01':
|
||||
# connected state reads 01
|
||||
# Example log: Res:[01]
|
||||
|
||||
# clear alarm if
|
||||
# - currently alarmed and
|
||||
# - debounced by 1 ; need 2 connected readings in a row
|
||||
if obj.alarmed is True:
|
||||
clear_alarm()
|
||||
|
||||
# Only log on state change
|
||||
if obj.usage != 1:
|
||||
logit = True
|
||||
|
||||
obj.usage = 1
|
||||
conn = ''
|
||||
|
||||
else:
|
||||
# res typically reads 02 when notr connected
|
||||
# Example log: Res:[02]
|
||||
collectd.debug("%s Res:[%s] " % (PLUGIN, res.rstrip()))
|
||||
|
||||
# raise alarm if
|
||||
# - not already alarmed
|
||||
# - debounced by 1 ; need 2 failures in a row
|
||||
if obj.alarmed is False and obj.usage == 0:
|
||||
raise_alarm()
|
||||
|
||||
# only log on state change
|
||||
if obj.usage == 1 or obj.audits == 1:
|
||||
logit = True
|
||||
|
||||
obj.usage = 0
|
||||
conn = 'not '
|
||||
|
||||
if logit is True:
|
||||
collectd.info("%s is %sconnected [%s ipv%d %s:%s]" %
|
||||
(PLUGIN, conn, protocol, ipv, address, port))
|
||||
obj.audits += 1
|
||||
|
||||
# Dispatch usage value to collectd
|
||||
val = collectd.Values(host=obj.hostname)
|
||||
val.plugin = PLUGIN_NAME
|
||||
val.type = PLUGIN_TYPE
|
||||
val.type_instance = PLUGIN_TYPE_INSTANCE
|
||||
val.dispatch(values=[obj.usage])
|
||||
return 0
|
||||
|
||||
|
||||
# register the config, init and read functions
|
||||
collectd.register_config(config_func)
|
||||
collectd.register_init(init_func)
|
||||
collectd.register_read(read_func, interval=PLUGIN_AUDIT_INTERVAL)
|
@ -1,10 +0,0 @@
|
||||
Metadata-Version: 1.1
|
||||
Name: influxdb-extensions
|
||||
Version: 1.0
|
||||
Summary: influxdb-extensions
|
||||
Home-page:
|
||||
Author: Windriver
|
||||
Author-email: info@windriver.com
|
||||
License: ASL 2.0
|
||||
Description: Titanium Cloud influxdb extensions.
|
||||
Platform: UNKNOWN
|
@ -1,7 +0,0 @@
|
||||
SRC_DIR="$PKG_BASE"
|
||||
|
||||
COPY_LIST="$PKG_BASE/src/LICENSE \
|
||||
$PKG_BASE/src/influxdb.conf.pmon \
|
||||
$PKG_BASE/src/influxdb.service"
|
||||
|
||||
TIS_PATCH_VER=2
|
@ -1,46 +0,0 @@
|
||||
Summary: Titanuim Server influxdb Extensions Package
|
||||
Name: influxdb-extensions
|
||||
Version: 1.0
|
||||
Release: 0%{?_tis_dist}.%{tis_patch_ver}
|
||||
License: ASL 2.0
|
||||
Group: base
|
||||
Packager: Wind River <info@windriver.com>
|
||||
URL: unknown
|
||||
|
||||
# create the files tarball
|
||||
Source0: %{name}-%{version}.tar.gz
|
||||
|
||||
source1: influxdb.service
|
||||
Source2: influxdb.conf.pmon
|
||||
|
||||
Requires: systemd
|
||||
Requires: influxdb
|
||||
Requires: /bin/systemctl
|
||||
|
||||
%description
|
||||
Titanium Cloud influxdb extensions
|
||||
|
||||
%define debug_package %{nil}
|
||||
%define local_unit_dir %{_sysconfdir}/systemd/system
|
||||
|
||||
%prep
|
||||
%setup
|
||||
|
||||
%build
|
||||
|
||||
%install
|
||||
install -m 755 -d %{buildroot}%{_sysconfdir}
|
||||
install -m 755 -d %{buildroot}%{_sysconfdir}/influxdb
|
||||
install -m 755 -d %{buildroot}%{local_unit_dir}
|
||||
|
||||
install -m 644 %{SOURCE1} %{buildroot}%{local_unit_dir}
|
||||
install -m 600 %{SOURCE2} %{buildroot}%{_sysconfdir}/influxdb
|
||||
|
||||
|
||||
%clean
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
|
||||
%files
|
||||
%defattr(-,root,root,-)
|
||||
%config(noreplace) %{local_unit_dir}/influxdb.service
|
||||
%{_sysconfdir}/influxdb/*
|
@ -1,202 +0,0 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
@ -1,322 +0,0 @@
|
||||
### Welcome to the InfluxDB configuration file.
|
||||
|
||||
# Once every 24 hours InfluxDB will report anonymous data to m.influxdb.com
|
||||
# The data includes raft id (random 8 bytes), os, arch, version, and metadata.
|
||||
# We don't track ip addresses of servers reporting. This is only used
|
||||
# to track the number of instances running and the versions, which
|
||||
# is very helpful for us.
|
||||
# Change this option to true to disable reporting.
|
||||
reporting-disabled = false
|
||||
|
||||
###
|
||||
### Enterprise registration control
|
||||
###
|
||||
|
||||
[registration]
|
||||
# enabled = true
|
||||
# url = "https://enterprise.influxdata.com" # The Enterprise server URL
|
||||
# token = "" # Registration token for Enterprise server
|
||||
|
||||
###
|
||||
### [meta]
|
||||
###
|
||||
### Controls the parameters for the Raft consensus group that stores metadata
|
||||
### about the InfluxDB cluster.
|
||||
###
|
||||
|
||||
[meta]
|
||||
dir = "/var/lib/influxdb/meta"
|
||||
hostname = "localhost"
|
||||
bind-address = ":8088"
|
||||
retention-autocreate = true
|
||||
election-timeout = "1s"
|
||||
heartbeat-timeout = "1s"
|
||||
leader-lease-timeout = "500ms"
|
||||
commit-timeout = "50ms"
|
||||
cluster-tracing = false
|
||||
|
||||
# If enabled, when a Raft cluster loses a peer due to a `DROP SERVER` command,
|
||||
# the leader will automatically ask a non-raft peer node to promote to a raft
|
||||
# peer. This only happens if there is a non-raft peer node available to promote.
|
||||
# This setting only affects the local node, so to ensure if operates correctly, be sure to set
|
||||
# it in the config of every node.
|
||||
raft-promotion-enabled = true
|
||||
|
||||
###
|
||||
### [data]
|
||||
###
|
||||
### Controls where the actual shard data for InfluxDB lives and how it is
|
||||
### flushed from the WAL. "dir" may need to be changed to a suitable place
|
||||
### for your system, but the WAL settings are an advanced configuration. The
|
||||
### defaults should work for most systems.
|
||||
###
|
||||
|
||||
[data]
|
||||
dir = "/var/lib/influxdb/data"
|
||||
|
||||
# Controls the engine type for new shards. Options are b1, bz1, or tsm1.
|
||||
# b1 is the 0.9.2 storage engine, bz1 is the 0.9.3 and 0.9.4 engine.
|
||||
# tsm1 is the 0.9.5 engine and is currenly EXPERIMENTAL. Until 0.9.5 is
|
||||
# actually released data written into a tsm1 engine may be need to be wiped
|
||||
# between upgrades.
|
||||
# engine ="bz1"
|
||||
|
||||
# The following WAL settings are for the b1 storage engine used in 0.9.2. They won't
|
||||
# apply to any new shards created after upgrading to a version > 0.9.3.
|
||||
max-wal-size = 104857600 # Maximum size the WAL can reach before a flush. Defaults to 100MB.
|
||||
wal-flush-interval = "10m" # Maximum time data can sit in WAL before a flush.
|
||||
wal-partition-flush-delay = "2s" # The delay time between each WAL partition being flushed.
|
||||
|
||||
# These are the WAL settings for the storage engine >= 0.9.3
|
||||
wal-dir = "/var/lib/influxdb/wal"
|
||||
wal-enable-logging = true
|
||||
|
||||
# When a series in the WAL in-memory cache reaches this size in bytes it is marked as ready to
|
||||
# flush to the index
|
||||
# wal-ready-series-size = 25600
|
||||
|
||||
# Flush and compact a partition once this ratio of series are over the ready size
|
||||
# wal-compaction-threshold = 0.6
|
||||
|
||||
# Force a flush and compaction if any series in a partition gets above this size in bytes
|
||||
# wal-max-series-size = 2097152
|
||||
|
||||
# Force a flush of all series and full compaction if there have been no writes in this
|
||||
# amount of time. This is useful for ensuring that shards that are cold for writes don't
|
||||
# keep a bunch of data cached in memory and in the WAL.
|
||||
# wal-flush-cold-interval = "10m"
|
||||
|
||||
# Force a partition to flush its largest series if it reaches this approximate size in
|
||||
# bytes. Remember there are 5 partitions so you'll need at least 5x this amount of memory.
|
||||
# The more memory you have, the bigger this can be.
|
||||
# wal-partition-size-threshold = 20971520
|
||||
|
||||
# Whether queries should be logged before execution. Very useful for troubleshooting, but will
|
||||
# log any sensitive data contained within a query.
|
||||
# query-log-enabled = true
|
||||
|
||||
###
|
||||
### [hinted-handoff]
|
||||
###
|
||||
### Controls the hinted handoff feature, which allows nodes to temporarily
|
||||
### store queued data when one node of a cluster is down for a short period
|
||||
### of time.
|
||||
###
|
||||
|
||||
[hinted-handoff]
|
||||
enabled = true
|
||||
dir = "/var/lib/influxdb/hh"
|
||||
max-size = 1073741824
|
||||
max-age = "168h"
|
||||
retry-rate-limit = 0
|
||||
|
||||
# Hinted handoff will start retrying writes to down nodes at a rate of once per second.
|
||||
# If any error occurs, it will backoff in an exponential manner, until the interval
|
||||
# reaches retry-max-interval. Once writes to all nodes are successfully completed the
|
||||
# interval will reset to retry-interval.
|
||||
retry-interval = "1s"
|
||||
retry-max-interval = "1m"
|
||||
|
||||
# Interval between running checks for data that should be purged. Data is purged from
|
||||
# hinted-handoff queues for two reasons. 1) The data is older than the max age, or
|
||||
# 2) the target node has been dropped from the cluster. Data is never dropped until
|
||||
# it has reached max-age however, for a dropped node or not.
|
||||
purge-interval = "1h"
|
||||
|
||||
###
|
||||
### [cluster]
|
||||
###
|
||||
### Controls non-Raft cluster behavior, which generally includes how data is
|
||||
### shared across shards.
|
||||
###
|
||||
|
||||
[cluster]
|
||||
shard-writer-timeout = "10s" # The time within which a shard must respond to write.
|
||||
write-timeout = "5s" # The time within which a write operation must complete on the cluster.
|
||||
|
||||
###
|
||||
### [retention]
|
||||
###
|
||||
### Controls the enforcement of retention policies for evicting old data.
|
||||
###
|
||||
|
||||
[retention]
|
||||
enabled = true
|
||||
check-interval = "30m"
|
||||
|
||||
###
|
||||
### [shard-precreation]
|
||||
###
|
||||
### Controls the precreation of shards, so they are created before data arrives.
|
||||
### Only shards that will exist in the future, at time of creation, are precreated.
|
||||
|
||||
[shard-precreation]
|
||||
enabled = true
|
||||
check-interval = "10m"
|
||||
advance-period = "30m"
|
||||
|
||||
###
|
||||
### Controls the system self-monitoring, statistics and diagnostics.
|
||||
###
|
||||
### The internal database for monitoring data is created automatically if
|
||||
### if it does not already exist. The target retention within this database
|
||||
### is called 'monitor' and is also created with a retention period of 7 days
|
||||
### and a replication factor of 1, if it does not exist. In all cases the
|
||||
### this retention policy is configured as the default for the database.
|
||||
|
||||
[monitor]
|
||||
store-enabled = true # Whether to record statistics internally.
|
||||
store-database = "_internal" # The destination database for recorded statistics
|
||||
store-interval = "10s" # The interval at which to record statistics
|
||||
|
||||
###
|
||||
### [admin]
|
||||
###
|
||||
### Controls the availability of the built-in, web-based admin interface. If HTTPS is
|
||||
### enabled for the admin interface, HTTPS must also be enabled on the [http] service.
|
||||
###
|
||||
|
||||
[admin]
|
||||
enabled = true
|
||||
bind-address = ":8083"
|
||||
https-enabled = false
|
||||
https-certificate = "/etc/ssl/influxdb.pem"
|
||||
|
||||
###
|
||||
### [http]
|
||||
###
|
||||
### Controls how the HTTP endpoints are configured. These are the primary
|
||||
### mechanism for getting data into and out of InfluxDB.
|
||||
###
|
||||
|
||||
[http]
|
||||
enabled = true
|
||||
bind-address = ":8086"
|
||||
auth-enabled = false
|
||||
log-enabled = true
|
||||
write-tracing = false
|
||||
pprof-enabled = false
|
||||
https-enabled = false
|
||||
https-certificate = "/etc/ssl/influxdb.pem"
|
||||
|
||||
###
|
||||
### [[graphite]]
|
||||
###
|
||||
### Controls one or many listeners for Graphite data.
|
||||
###
|
||||
|
||||
[[graphite]]
|
||||
enabled = false
|
||||
# database = "graphite"
|
||||
# bind-address = ":2003"
|
||||
# protocol = "tcp"
|
||||
# consistency-level = "one"
|
||||
# name-separator = "."
|
||||
|
||||
# These next lines control how batching works. You should have this enabled
|
||||
# otherwise you could get dropped metrics or poor performance. Batching
|
||||
# will buffer points in memory if you have many coming in.
|
||||
|
||||
# batch-size = 1000 # will flush if this many points get buffered
|
||||
# batch-pending = 5 # number of batches that may be pending in memory
|
||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
||||
# udp-read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
|
||||
|
||||
## "name-schema" configures tag names for parsing the metric name from graphite protocol;
|
||||
## separated by `name-separator`.
|
||||
## The "measurement" tag is special and the corresponding field will become
|
||||
## the name of the metric.
|
||||
## e.g. "type.host.measurement.device" will parse "server.localhost.cpu.cpu0" as
|
||||
## {
|
||||
## measurement: "cpu",
|
||||
## tags: {
|
||||
## "type": "server",
|
||||
## "host": "localhost,
|
||||
## "device": "cpu0"
|
||||
## }
|
||||
## }
|
||||
# name-schema = "type.host.measurement.device"
|
||||
|
||||
## If set to true, when the input metric name has more fields than `name-schema` specified,
|
||||
## the extra fields will be ignored.
|
||||
## Otherwise an error will be logged and the metric rejected.
|
||||
# ignore-unnamed = true
|
||||
|
||||
###
|
||||
### [collectd]
|
||||
###
|
||||
### Controls the listener for collectd data.
|
||||
###
|
||||
|
||||
[collectd]
|
||||
enabled = true
|
||||
bind-address = "127.0.0.1:25826"
|
||||
database = "collectd"
|
||||
typesdb = "/usr/share/collectd/types.db"
|
||||
|
||||
# These next lines control how batching works. You should have this enabled
|
||||
# otherwise you could get dropped metrics or poor performance. Batching
|
||||
# will buffer points in memory if you have many coming in.
|
||||
|
||||
# batch-size = 1000 # will flush if this many points get buffered
|
||||
# batch-pending = 5 # number of batches that may be pending in memory
|
||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
||||
# read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
|
||||
|
||||
###
|
||||
### [opentsdb]
|
||||
###
|
||||
### Controls the listener for OpenTSDB data.
|
||||
###
|
||||
|
||||
[opentsdb]
|
||||
enabled = false
|
||||
# bind-address = ":4242"
|
||||
# database = "opentsdb"
|
||||
# retention-policy = ""
|
||||
# consistency-level = "one"
|
||||
# tls-enabled = false
|
||||
# certificate= ""
|
||||
|
||||
# These next lines control how batching works. You should have this enabled
|
||||
# otherwise you could get dropped metrics or poor performance. Only points
|
||||
# metrics received over the telnet protocol undergo batching.
|
||||
|
||||
# batch-size = 1000 # will flush if this many points get buffered
|
||||
# batch-pending = 5 # number of batches that may be pending in memory
|
||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
||||
|
||||
###
|
||||
### [[udp]]
|
||||
###
|
||||
### Controls the listeners for InfluxDB line protocol data via UDP.
|
||||
###
|
||||
|
||||
[[udp]]
|
||||
enabled = false
|
||||
# bind-address = ""
|
||||
# database = "udp"
|
||||
# retention-policy = ""
|
||||
|
||||
# These next lines control how batching works. You should have this enabled
|
||||
# otherwise you could get dropped metrics or poor performance. Batching
|
||||
# will buffer points in memory if you have many coming in.
|
||||
|
||||
# batch-size = 1000 # will flush if this many points get buffered
|
||||
# batch-pending = 5 # number of batches that may be pending in memory
|
||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
||||
# read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
|
||||
|
||||
###
|
||||
### [continuous_queries]
|
||||
###
|
||||
### Controls how continuous queries are run within InfluxDB.
|
||||
###
|
||||
|
||||
[continuous_queries]
|
||||
log-enabled = true
|
||||
enabled = true
|
||||
recompute-previous-n = 2
|
||||
recompute-no-older-than = "10m"
|
||||
compute-runs-per-interval = 10
|
||||
compute-no-more-than = "2m"
|
@ -1,17 +0,0 @@
|
||||
[process]
|
||||
process = influxdb
|
||||
service = influxdb
|
||||
style = lsb
|
||||
pidfile = /var/run/influxdb/influxdb.pid
|
||||
severity = major ; minor, major, critical
|
||||
restarts = 3 ; restart retries before error assertion
|
||||
interval = 5 ; number of seconds to wait between restarts
|
||||
debounce = 10 ; number of seconds that a process needs to remain
|
||||
; running before degrade is removed and retry count
|
||||
; is cleared.
|
||||
startuptime = 3 ; Seconds to wait after process start before starting the debounce monitor
|
||||
mode = passive ; Monitoring mode: passive (default) or active
|
||||
; passive: process death monitoring (default: always)
|
||||
; active : heartbeat monitoring, i.e. request / response messaging
|
||||
; ignore : do not monitor or stop monitoring
|
||||
quorum = 0 ; process is in the host watchdog quorum
|
@ -1,16 +0,0 @@
|
||||
#daily
|
||||
nodateext
|
||||
|
||||
/var/log/influxdb/influxdb.log
|
||||
{
|
||||
size 20M
|
||||
start 1
|
||||
missingok
|
||||
rotate 20
|
||||
compress
|
||||
sharedscripts
|
||||
postrotate
|
||||
systemctl reload syslog-ng > /dev/null 2>&1 || true
|
||||
endscript
|
||||
}
|
||||
|
@ -1,25 +0,0 @@
|
||||
[Unit]
|
||||
Description=InfluxDB open-source, distributed, time series database
|
||||
Documentation=https://influxdb.com/docs/
|
||||
Before=collectd.service
|
||||
Before=pmon.service
|
||||
After=local-fs.target network-online.target
|
||||
Requires=local-fs.target network-online.target
|
||||
|
||||
[Service]
|
||||
User=influxdb
|
||||
Group=influxdb
|
||||
LimitNOFILE=65536
|
||||
Environment='STDOUT=/dev/null'
|
||||
Environment='STDERR=/var/log/influxdb/influxd.log'
|
||||
EnvironmentFile=-/etc/default/influxdb
|
||||
PermissionsStartOnly=true
|
||||
ExecStartPre=-/usr/bin/mkdir -p /var/run/influxdb
|
||||
ExecStartPre=-/usr/bin/chown influxdb:influxdb /var/run/influxdb
|
||||
ExecStart=/bin/sh -c "/usr/bin/influxd -config /etc/influxdb/influxdb.conf -pidfile /var/run/influxdb/influxdb.pid ${INFLUXD_OPTS} >> ${STDOUT} 2>> ${STDERR}"
|
||||
ExecStopPost=/bin/bash -c 'rm /var/run/influxdb/influxdb.pid'
|
||||
KillMode=control-group
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
Alias=influxd.service
|
@ -1,202 +0,0 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
@ -1,2 +0,0 @@
|
||||
SRC_DIR=scripts
|
||||
TIS_PATCH_VER=0
|
@ -1,42 +0,0 @@
|
||||
Summary: Monitor tools package
|
||||
Name: monitor-tools
|
||||
Version: 1.0
|
||||
Release: %{tis_patch_ver}%{?_tis_dist}
|
||||
License: Apache-2.0
|
||||
Group: base
|
||||
Packager: Wind River <info@windriver.com>
|
||||
URL: unknown
|
||||
BuildArch: noarch
|
||||
Source: %name-%version.tar.gz
|
||||
|
||||
Requires: initscripts-config
|
||||
|
||||
%description
|
||||
This package contains data collection tools to monitor host performance.
|
||||
Tools are general purpose engineering and debugging related. Includes
|
||||
overall memory, cpu occupancy, per-task cpu, per-task scheduling, per-task
|
||||
io.
|
||||
|
||||
%prep
|
||||
%autosetup
|
||||
|
||||
%install
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
%global _buildsubdir %{_builddir}/%{name}-%{version}
|
||||
install -d %{buildroot}/usr/bin
|
||||
install %{_buildsubdir}/memtop %{buildroot}/usr/bin
|
||||
install %{_buildsubdir}/schedtop %{buildroot}/usr/bin
|
||||
install %{_buildsubdir}/occtop %{buildroot}/usr/bin
|
||||
|
||||
%files
|
||||
%license LICENSE
|
||||
%defattr(-,root,root,-)
|
||||
/usr/bin/*
|
||||
|
||||
%post
|
||||
grep schedstats /etc/sysctl.conf
|
||||
if [ $? -ne 0 ]; then
|
||||
echo -e "\nkernel.sched_schedstats=1" >> /etc/sysctl.conf
|
||||
sysctl -p &>/dev/null
|
||||
fi
|
||||
exit 0
|
@ -1,202 +0,0 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
@ -1,344 +0,0 @@
|
||||
#!/usr/bin/perl
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2015 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# Description:
|
||||
# This displays overall memory information per sample period.
|
||||
# Output includes total, used, avail, per-numa node breakdown of avail
|
||||
# and free hugepages memory.
|
||||
#
|
||||
# Usage: memtop OPTIONS
|
||||
# memtop [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>] [--help]
|
||||
#
|
||||
|
||||
# Summarize high-level memory usage.
|
||||
use 5.10.0;
|
||||
use warnings;
|
||||
use strict;
|
||||
use Benchmark ':hireswallclock';
|
||||
use POSIX qw(strftime);
|
||||
use Data::Dumper;
|
||||
use File::Basename;
|
||||
use File::Spec ();
|
||||
use Time::HiRes qw(time usleep);
|
||||
use Carp qw(croak carp);
|
||||
|
||||
# IEC and SI constants
|
||||
use constant SI_k => 1.0E3;
|
||||
use constant SI_M => 1.0E6;
|
||||
use constant SI_G => 1.0E9;
|
||||
use constant Ki => 1024.0;
|
||||
use constant Mi => 1024.0*1024.0;
|
||||
use constant Gi => 1024.0*1024.0*1024.0;
|
||||
|
||||
# Name of this program
|
||||
our $TOOLNAME = basename($0);
|
||||
our $VERSION = "0.1";
|
||||
|
||||
# Argument list parameters
|
||||
our ($arg_debug,
|
||||
$arg_delay,
|
||||
$arg_repeat,
|
||||
$arg_period) = ();
|
||||
|
||||
# Globals
|
||||
our $t_0 = ();
|
||||
our $t_1 = ();
|
||||
our $t_elapsed = ();
|
||||
our $t_final = ();
|
||||
our $is_strict = ();
|
||||
our $num_nodes = ();
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program
|
||||
#-------------------------------------------------------------------------------
|
||||
# benchmark variables
|
||||
my ($bd, $b0, $b1);
|
||||
|
||||
# Autoflush output
|
||||
select(STDERR);
|
||||
$| = 1;
|
||||
select(STDOUT); # default
|
||||
$| = 1;
|
||||
|
||||
# Parse input arguments and print tool usage if necessary
|
||||
&parse_memtop_args(
|
||||
\$::arg_debug,
|
||||
\$::arg_delay,
|
||||
\$::arg_repeat,
|
||||
\$::arg_period,
|
||||
);
|
||||
|
||||
# Print out some debugging information
|
||||
if (defined $::arg_debug) {
|
||||
$Data::Dumper::Indent = 1;
|
||||
}
|
||||
|
||||
# Strict vs non-strict memory accounting
|
||||
$::is_strict = &is_strict();
|
||||
|
||||
# Number of numa nodes
|
||||
$::num_nodes = &num_numa_nodes();
|
||||
|
||||
# Print tool header and selected options
|
||||
printf "%s %s -- ".
|
||||
"selected options: delay = %.3fs, repeat = %d, period = %.3fs, %s, unit = %s\n",
|
||||
$::TOOLNAME, $::VERSION,
|
||||
$::arg_delay, $::arg_repeat, $::arg_period,
|
||||
$::is_strict ? 'strict' : 'non-strict',
|
||||
'MiB';
|
||||
|
||||
# Capture timestamp
|
||||
$b0 = new Benchmark;
|
||||
|
||||
# Get current hires epoc timestamp
|
||||
$::t_1 = time();
|
||||
$::t_final = $::t_1 + $::arg_period;
|
||||
|
||||
# Set initial delay
|
||||
$::t_elapsed = $::arg_delay;
|
||||
|
||||
# Main loop
|
||||
my $delay = SI_M*$::arg_delay - 600.0;
|
||||
REPEAT_LOOP: for (my $rep=1; $rep <= $::arg_repeat; $rep++) {
|
||||
# Copy all state variables
|
||||
$::t_0 = $::t_1;
|
||||
|
||||
# Sleep for desired interarrival time
|
||||
usleep( $delay );
|
||||
|
||||
# Current hires epoc timestamp
|
||||
$::t_1 = time();
|
||||
|
||||
# Delta calculation
|
||||
$::t_elapsed = $::t_1 - $::t_0;
|
||||
|
||||
# Print summary
|
||||
&print_memory(\$::t_1);
|
||||
|
||||
# Exit if we have reached period
|
||||
last if ((defined $::t_final) && ($::t_1 > $::t_final));
|
||||
}
|
||||
|
||||
# Print that tool has finished
|
||||
print "done\n";
|
||||
|
||||
# Capture timestamp and report delta
|
||||
if (defined $::arg_debug) {
|
||||
$b1 = new Benchmark; $bd = Benchmark::timediff($b1, $b0);
|
||||
printf "processing time: %s\n", timestr($bd);
|
||||
}
|
||||
exit 0;
|
||||
|
||||
|
||||
################################################################################
|
||||
|
||||
# Parse input option arguments
|
||||
sub parse_memtop_args {
|
||||
(local *::arg_debug,
|
||||
local *::arg_delay,
|
||||
local *::arg_repeat,
|
||||
local *::arg_period,
|
||||
) = @_;
|
||||
|
||||
# Local variables
|
||||
my ($fail, $arg_help);
|
||||
|
||||
# Use the Argument processing module
|
||||
use Getopt::Long;
|
||||
|
||||
# Process input arguments
|
||||
$fail = 0;
|
||||
GetOptions(
|
||||
"debug:i", \$::arg_debug,
|
||||
"delay=f", \$::arg_delay,
|
||||
"repeat=i", \$::arg_repeat,
|
||||
"period=i", \$::arg_period,
|
||||
"help|h", \$arg_help
|
||||
) || GetOptionsMessage();
|
||||
|
||||
# Print help documentation if user has selected --help
|
||||
&ListHelp() if (defined $arg_help);
|
||||
|
||||
# Validate options
|
||||
if ((defined $::arg_repeat) && (defined $::arg_period)) {
|
||||
$fail = 1;
|
||||
warn "$::TOOLNAME: Input error: cannot specify both --repeat and --period options.\n";
|
||||
}
|
||||
if ((defined $::arg_delay) && ($::arg_delay < 0.01)) {
|
||||
$fail = 1;
|
||||
warn "$::TOOLNAME: Input error: --delay %f is less than 0.01.\n",
|
||||
$::arg_delay;
|
||||
}
|
||||
if (@::ARGV) {
|
||||
$fail = 1;
|
||||
warn "$::TOOLNAME: Input error: not expecting these options: '@::ARGV'.\n";
|
||||
}
|
||||
|
||||
# Set reasonable defaults
|
||||
$::arg_delay ||= 1.0;
|
||||
$::arg_repeat ||= 1;
|
||||
if ($::arg_period) {
|
||||
$::arg_repeat = $::arg_period / $::arg_delay;
|
||||
} else {
|
||||
$::arg_period = $::arg_delay * $::arg_repeat;
|
||||
}
|
||||
|
||||
# Upon missing or invalid options, print usage
|
||||
if ($fail == 1) {
|
||||
&Usage();
|
||||
exit 1;
|
||||
}
|
||||
}
|
||||
|
||||
# Print out a warning message and usage
|
||||
sub GetOptionsMessage {
|
||||
warn "$::TOOLNAME: Error processing input arguments.\n";
|
||||
&Usage();
|
||||
exit 1;
|
||||
}
|
||||
|
||||
# Print out program usage
|
||||
sub Usage {
|
||||
printf "Usage: $::TOOLNAME OPTIONS\n";
|
||||
printf " [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]\n";
|
||||
printf " [--help]\n";
|
||||
printf "\n";
|
||||
}
|
||||
|
||||
# Print tool help
|
||||
sub ListHelp {
|
||||
printf "$::TOOLNAME -- displays high memory usage at high level\n";
|
||||
&Usage();
|
||||
printf " --delay=<seconds> : output interval (seconds): default: 1.0\n";
|
||||
printf " --repeat=<num> : number of repeat samples: default: 1\n";
|
||||
printf " --period=<seconds> : overall tool duration (seconds): default: --\n";
|
||||
printf " --help : this help\n";
|
||||
printf "\n";
|
||||
exit 0;
|
||||
}
|
||||
|
||||
# Print memory summary
|
||||
sub print_memory {
|
||||
(local *::t_1) = @_;
|
||||
|
||||
# counter
|
||||
our $count;
|
||||
$::count++; $::count %= 15;
|
||||
|
||||
my ($file, $n);
|
||||
my %mem = ();
|
||||
my %node = ();
|
||||
|
||||
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
|
||||
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($::t_1);
|
||||
my $msec = 1000.0*($::t_1 - int($::t_1));
|
||||
|
||||
# Process all entries of MEMINFO
|
||||
$file = '/proc/meminfo';
|
||||
open(FILE, $file) || die "Cannot open file: $file ($!)";
|
||||
while($_ = <FILE>) {
|
||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
||||
if (/^(\S+):\s+(\d+)\b/) {
|
||||
$mem{$1} = $2;
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
|
||||
# Process all entries of per-Node MEMINFO
|
||||
for ($n=0; $n < $::num_nodes; $n++) {
|
||||
$file = sprintf('/sys/devices/system/node/node%d/meminfo', $n);
|
||||
open(FILE, $file) || die "Cannot open file: $file ($!)";
|
||||
while($_ = <FILE>) {
|
||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
||||
if (/^Node\s+(\d+)\s+(\S+):\s+(\d+)\b/) {
|
||||
$node{$1}{$2} = $3;
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
}
|
||||
|
||||
# Calculate available memory
|
||||
if ($::is_strict) {
|
||||
$mem{'Avail'} = $mem{'CommitLimit'} - $mem{'Committed_AS'};
|
||||
} else {
|
||||
$mem{'Avail'} = $mem{'MemFree'} +
|
||||
$mem{'Cached'} +
|
||||
$mem{'Buffers'} +
|
||||
$mem{'SReclaimable'};
|
||||
}
|
||||
$mem{'Used'} = $mem{'MemTotal'} - $mem{'Avail'};
|
||||
$mem{'Anon'} = $mem{'AnonPages'};
|
||||
for ($n=0; $n < $::num_nodes; $n++) {
|
||||
$node{$n}{'Avail'} = $node{$n}{'MemFree'} +
|
||||
$node{$n}{'FilePages'} +
|
||||
$node{$n}{'SReclaimable'};
|
||||
$node{$n}{'HFree'} = $node{$n}{'HugePages_Free'} * $mem{'Hugepagesize'};
|
||||
}
|
||||
|
||||
# Print heading every so often
|
||||
if ($::count == 1) {
|
||||
printf "%s ".
|
||||
"%8s %8s %8s %7s %6s %6s %8s %8s %7s %7s %8s %8s",
|
||||
'yyyy-mm-dd hh:mm:ss.fff',
|
||||
'Tot', 'Used', 'Free', 'Ca', 'Buf', 'Slab', 'CAS', 'CLim', 'Dirty', 'WBack', 'Anon', 'Avail';
|
||||
for ($n=0; $n < $::num_nodes; $n++) {
|
||||
printf " %8s %8s", sprintf('%d:Avail', $n), sprintf('%d:HFree', $n);
|
||||
}
|
||||
printf "\n";
|
||||
}
|
||||
|
||||
# Print one line memory summary
|
||||
printf "%4d-%02d-%02d %02d:%02d:%02d.%03d ".
|
||||
"%8.1f %8.1f %8.1f %7.1f %6.1f %6.1f %8.1f %8.1f %7.1f %7.1f %8.1f %8.1f",
|
||||
1900+$year, 1+$mon, $mday, $hour, $min, $sec, $msec,
|
||||
$mem{'MemTotal'}/Ki,
|
||||
$mem{'Used'}/Ki,
|
||||
$mem{'MemFree'}/Ki,
|
||||
$mem{'Cached'}/Ki,
|
||||
$mem{'Buffers'}/Ki,
|
||||
$mem{'Slab'}/Ki,
|
||||
$mem{'Committed_AS'}/Ki,
|
||||
$mem{'CommitLimit'}/Ki,
|
||||
$mem{'Dirty'}/Ki,
|
||||
$mem{'Writeback'}/Ki,
|
||||
$mem{'Anon'}/Ki,
|
||||
$mem{'Avail'}/Ki;
|
||||
for ($n=0; $n < $::num_nodes; $n++) {
|
||||
printf " %8.1f %8.1f", $node{$n}{'Avail'}/Ki, $node{$n}{'HFree'}/Ki;
|
||||
}
|
||||
printf "\n";
|
||||
|
||||
}
|
||||
|
||||
sub num_numa_nodes {
|
||||
my $file = '/proc/cpuinfo';
|
||||
my %nodes = ();
|
||||
open(FILE, $file) || die "Cannot open file: $file ($!)";
|
||||
while($_ = <FILE>) {
|
||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
||||
if (/^physical\s+id\s+:\s+(\d+)\b/) {
|
||||
$nodes{$1} = 1;
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
return scalar keys %nodes;
|
||||
}
|
||||
|
||||
sub is_strict {
|
||||
my $value = 0;
|
||||
my $file = '/proc/sys/vm/overcommit_memory';
|
||||
open(FILE, $file) || die "Cannot open file: $file ($!)";
|
||||
$_ = <FILE>;
|
||||
$value = /(\d+)/;
|
||||
close(FILE);
|
||||
return ($value == 2) ? 1 : 0;
|
||||
}
|
||||
|
||||
1;
|
@ -1,592 +0,0 @@
|
||||
#!/usr/bin/perl
|
||||
########################################################################
|
||||
#
|
||||
# Copyright (c) 2015-2016 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
#
|
||||
########################################################################
|
||||
#
|
||||
# Description:
|
||||
# This displays per-core occupancy information per sample period.
|
||||
# Output includes total occupancy, and per-core occupancy based on
|
||||
# hi-resolution timings.
|
||||
#
|
||||
# Usage: occtop OPTIONS
|
||||
# [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]
|
||||
# [--header=<num>]
|
||||
# [--help]
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use Data::Dumper;
|
||||
use POSIX qw(uname strftime);
|
||||
use Time::HiRes qw(clock_gettime usleep CLOCK_MONOTONIC CLOCK_REALTIME);
|
||||
|
||||
use Benchmark ':hireswallclock';
|
||||
use Carp qw(croak carp);
|
||||
|
||||
# Define toolname
|
||||
our $TOOLNAME = "occtop";
|
||||
our $VERSION = "0.1";
|
||||
|
||||
# Constants
|
||||
use constant SI_k => 1.0E3;
|
||||
use constant SI_M => 1.0E6;
|
||||
use constant SI_G => 1.0E9;
|
||||
use constant Ki => 1024.0;
|
||||
use constant Mi => 1024.0*1024.0;
|
||||
use constant Gi => 1024.0*1024.0*1024.0;
|
||||
|
||||
# Globals
|
||||
our %percpu_0 = ();
|
||||
our %percpu_1 = ();
|
||||
our %D_percpu = ();
|
||||
our %loadavg = ();
|
||||
our $D_total = 0.0;
|
||||
our $tm_0 = 0.0;
|
||||
our $tm_1 = 0.0;
|
||||
our $tr_0 = 0.0;
|
||||
our $tr_1 = 0.0;
|
||||
our $tm_elapsed = 0.0;
|
||||
our $tm_final = 0.0;
|
||||
our $uptime = 0.0;
|
||||
our $num_cpus = 1;
|
||||
our $num_tasks = 0;
|
||||
our $num_blk = 0;
|
||||
our $print_host = 1;
|
||||
our $is_schedstat = 1;
|
||||
our $USER_HZ = 100; # no easy way to get this
|
||||
our $CLOCK_NS = SI_G / $USER_HZ;
|
||||
|
||||
# Argument list parameters
|
||||
our ($arg_debug,
|
||||
$arg_delay,
|
||||
$arg_repeat,
|
||||
$arg_period,
|
||||
$arg_header,
|
||||
) = ();
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# MAIN Program
|
||||
#-------------------------------------------------------------------------------
|
||||
my $MIN_DELAY = 0.001;
|
||||
my $MAX_DELAY = 0.001;
|
||||
|
||||
# benchmark variables
|
||||
my ($bd, $b0, $b1);
|
||||
|
||||
# Autoflush output
|
||||
select(STDERR);
|
||||
$| = 1;
|
||||
select(STDOUT); # default
|
||||
$| = 1;
|
||||
|
||||
# Parse input arguments and print tool usage if necessary
|
||||
&parse_occtop_args(
|
||||
\$::arg_debug,
|
||||
\$::arg_delay,
|
||||
\$::arg_repeat,
|
||||
\$::arg_period,
|
||||
\$::arg_header,
|
||||
);
|
||||
|
||||
# Print out some debugging information
|
||||
if (defined $::arg_debug) {
|
||||
$Data::Dumper::Indent = 1;
|
||||
}
|
||||
|
||||
# Check for schedstat support; fallback to stats
|
||||
$is_schedstat = -e '/proc/schedstat' ? 1 : 0;
|
||||
|
||||
# Print out selected options
|
||||
printf "selected options: delay = %.3fs, repeat = %d, header = %d, source = %s\n",
|
||||
$::arg_delay, $::arg_repeat, $::arg_header, $is_schedstat ? 'schedstat' : 'jiffie';
|
||||
|
||||
# Capture timestamp
|
||||
$b0 = new Benchmark;
|
||||
|
||||
# Get number of logical cpus
|
||||
&get_num_logical_cpus(\$::num_cpus);
|
||||
|
||||
|
||||
# Get current hires epoc timestamp
|
||||
$::tm_1 = clock_gettime(CLOCK_MONOTONIC);
|
||||
$::tr_1 = clock_gettime(CLOCK_REALTIME);
|
||||
$::tm_final = $::tm_1 + $::arg_delay*$::arg_repeat;
|
||||
|
||||
# Set initial delay
|
||||
$::tm_elapsed = $::arg_delay;
|
||||
$MAX_DELAY = $::arg_delay + $MIN_DELAY;
|
||||
|
||||
# Get overall per-cpu stats
|
||||
if ($is_schedstat) {
|
||||
&read_schedstat(\%::percpu_1);
|
||||
} else {
|
||||
&read_stat(\%::percpu_1);
|
||||
}
|
||||
|
||||
# Main loop
|
||||
REPEAT_LOOP: for (my $repeat=1; $repeat <= $::arg_repeat; $repeat++) {
|
||||
|
||||
# copy all state variables
|
||||
%::tm_0 = (); %::tr_0 = (); %::percpu_0 = ();
|
||||
$::tm_0 = $::tm_1; $::tr_0 = $::tr_1;
|
||||
foreach my $cpu (keys %::percpu_1) { $::percpu_0{$cpu} = $::percpu_1{$cpu}; }
|
||||
|
||||
# estimate sleep delay to achieve desired interarrival by subtracting out
|
||||
# the measured cpu runtime of the tool.
|
||||
my $delay = $::arg_delay;
|
||||
$delay = $MIN_DELAY if ($delay < $MIN_DELAY);
|
||||
$delay = $MAX_DELAY if ($delay > $MAX_DELAY);
|
||||
usleep( SI_M*$delay );
|
||||
|
||||
# Collect current state
|
||||
$::tm_1 = (); $::tr_1 = (); %::percpu_1 = ();
|
||||
# Get current hires epoc timestamp
|
||||
$::tm_1 = clock_gettime(CLOCK_MONOTONIC);
|
||||
$::tr_1 = clock_gettime(CLOCK_REALTIME);
|
||||
# Get overall per-cpu stats
|
||||
if ($is_schedstat) {
|
||||
&read_schedstat(\%::percpu_1);
|
||||
} else {
|
||||
&read_stat(\%::percpu_1);
|
||||
}
|
||||
|
||||
# Get current uptime
|
||||
&get_uptime(\$::uptime);
|
||||
# Get current loadavg
|
||||
&get_loadavg(\%::loadavg, \$::runq, \$::num_tasks);
|
||||
# Get current processes blocked
|
||||
&get_blocked(\$::num_blk);
|
||||
|
||||
# Delta calculation
|
||||
%::D_percpu = ();
|
||||
$::tm_elapsed = $tm_1 - $tm_0;
|
||||
foreach my $cpu (keys %::percpu_1) {
|
||||
$::D_percpu{$cpu}{'runtime'} = ($::percpu_1{$cpu} - $::percpu_0{$cpu})/1.0E6;
|
||||
if ($::tm_elapsed > 0.0) {
|
||||
$::D_percpu{$cpu}{'occ'} = 100.0*$D_percpu{$cpu}{'runtime'}/1.0E3/$::tm_elapsed;
|
||||
} else {
|
||||
$::D_percpu{$cpu}{'occ'} = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
# Print tool header
|
||||
if ($repeat == 1) {
|
||||
&occtop_header(
|
||||
\$::tr_1,
|
||||
\$::uptime,
|
||||
\%::loadavg,
|
||||
\$::runq,
|
||||
\$::num_blk,
|
||||
\$::num_tasks,
|
||||
\$::print_host,
|
||||
);
|
||||
}
|
||||
|
||||
# Print one-liner summary
|
||||
&print_occtop(
|
||||
\$::tr_1,
|
||||
\$::num_cpus,
|
||||
\%::D_percpu,
|
||||
\$::arg_header,
|
||||
);
|
||||
|
||||
# exit repeat loop if we have exceeded overall time
|
||||
last if ($::tm_1 > $::tm_final);
|
||||
|
||||
} # REPEAT LOOP
|
||||
|
||||
# Print that tool has finished
|
||||
print "done\n";
|
||||
|
||||
# Capture timestamp and report delta
|
||||
$b1 = new Benchmark; $bd = Benchmark::timediff($b1, $b0);
|
||||
printf "processing time: %s\n", timestr($bd);
|
||||
exit 0;
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# Parse per-cpu hi-resolution scheduling stats
|
||||
sub read_schedstat
|
||||
{
|
||||
(local *::percpu) = @_;
|
||||
my ($version, $timestamp);
|
||||
my ($cpu, $cputime);
|
||||
my ($fh, $file);
|
||||
|
||||
%::percpu = ();
|
||||
|
||||
# parse /proc/schedstat
|
||||
$file = '/proc/schedstat';
|
||||
open($fh, $file) || croak "Cannot open file: $file ($!)";
|
||||
$_ = <$fh>; ($version) = /^version\s+(\d+)/;
|
||||
$_ = <$fh>; ($timestamp) = /^timestamp\s+(\d+)/;
|
||||
|
||||
if ($version == 15) {
|
||||
LOOP_SCHEDSTAT: while (<$fh>) {
|
||||
# version 15: cputime is 7th field
|
||||
if (/^cpu(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)\s+/) {
|
||||
$cpu = $1; $cputime = $2;
|
||||
$::percpu{$cpu} = $cputime;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
croak "schedstat version: $version method not implemented.";
|
||||
}
|
||||
close($fh);
|
||||
}
|
||||
|
||||
# Parse per-cpu jiffie stats; cputime excludes iowait.
|
||||
sub read_stat
|
||||
{
|
||||
(local *::percpu) = @_;
|
||||
my ($cpu, $cputime);
|
||||
my ($user, $sys, $nice, $idle, $iowt, $hirq, $sirq);
|
||||
my ($fh, $file);
|
||||
|
||||
%::percpu = ();
|
||||
|
||||
# parse /proc/stat
|
||||
$file = '/proc/stat';
|
||||
open($fh, $file) || croak "Cannot open file: $file ($!)";
|
||||
LOOP_STAT: while (<$fh>) {
|
||||
if (/^cpu(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+/) {
|
||||
$cpu =$1; $user = $2; $sys = $3; $nice = $4; $idle = $5; $iowt = $6; $hirq = $7; $sirq = $8;
|
||||
$cputime = $CLOCK_NS * ($user + $sys + $nice + $iowt + $hirq + $sirq);
|
||||
$::percpu{$cpu} = $cputime;
|
||||
}
|
||||
}
|
||||
close($fh);
|
||||
}
|
||||
|
||||
# Parse load-average from /proc/loadavg
|
||||
sub get_loadavg
|
||||
{
|
||||
(local *::loadavg, local *::runq, *::num_tasks) = @_;
|
||||
|
||||
$::loadavg{'1'} = 0.0;
|
||||
$::loadavg{'5'} = 0.0;
|
||||
$::loadavg{'15'} = 0.0;
|
||||
$::runq = 0;
|
||||
$::num_tasks = 0;
|
||||
|
||||
my $file = '/proc/loadavg';
|
||||
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
||||
$_ = <$fh>;
|
||||
if (/^(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\/(\d+)\s+\d+/) {
|
||||
$::loadavg{'1'} = $1;
|
||||
$::loadavg{'5'} = $2;
|
||||
$::loadavg{'15'} = $3;
|
||||
$::runq = $4;
|
||||
$::num_tasks = $5;
|
||||
}
|
||||
close($fh);
|
||||
}
|
||||
|
||||
# Parse blocked from /proc/stat
|
||||
sub get_blocked
|
||||
{
|
||||
(local *::num_blk) = @_;
|
||||
|
||||
$::num_blk = 0;
|
||||
|
||||
my $file = '/proc/stat';
|
||||
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
||||
while ($_ = <$fh>) {
|
||||
if (/^procs_blocked\s+(\d+)/) {
|
||||
$::num_blk = $1;
|
||||
}
|
||||
}
|
||||
close($fh);
|
||||
}
|
||||
|
||||
# Parse uptime from /proc/uptime
|
||||
sub get_uptime
|
||||
{
|
||||
(local *::uptime) = @_;
|
||||
$::uptime = 0.0;
|
||||
|
||||
my $file = '/proc/uptime';
|
||||
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
||||
$_ = <$fh>;
|
||||
if (/^(\S+)\s+\S+/) {
|
||||
$::uptime = $1;
|
||||
}
|
||||
close($fh);
|
||||
}
|
||||
|
||||
# Get number of online logical cpus
|
||||
sub get_num_logical_cpus {
|
||||
(local *::num_cpus) = @_;
|
||||
$::num_cpus = 0;
|
||||
|
||||
my $file = "/proc/cpuinfo";
|
||||
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
||||
LOOP_CPUINFO: while (<$fh>) {
|
||||
if (/^[Pp]rocessor\s+:\s\d+/) {
|
||||
$::num_cpus++;
|
||||
}
|
||||
}
|
||||
close($fh);
|
||||
}
|
||||
|
||||
# Print occupancy summary
|
||||
sub print_occtop {
|
||||
(local *::tr_1,
|
||||
local *::num_cpus,
|
||||
local *::D_percpu,
|
||||
local *::arg_header,
|
||||
) = @_;
|
||||
|
||||
# counter
|
||||
our $count;
|
||||
$::count++; $::count %= $::arg_header;
|
||||
$::count = 1 if ($::arg_header == 1);
|
||||
|
||||
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
|
||||
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($::tr_1);
|
||||
my $msec = 1000.0*($::tr_1 - int($::tr_1));
|
||||
|
||||
# Print heading every so often
|
||||
if ($::count == 1) {
|
||||
printf "%s ".
|
||||
"%7s ",
|
||||
'yyyy-mm-dd hh:mm:ss.fff',
|
||||
'total';
|
||||
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
||||
printf "%5s ", $cpu;
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
|
||||
# Print one summary
|
||||
my $occ_total = 0.0;
|
||||
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
||||
$occ_total += $::D_percpu{$cpu}{'occ'};
|
||||
}
|
||||
printf "%4d-%02d-%02d %02d:%02d:%02d.%03d ".
|
||||
"%7.1f ",
|
||||
1900+$year, 1+$mon, $mday, $hour, $min, $sec, $msec,
|
||||
$occ_total;
|
||||
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
||||
printf "%5.1f ", $::D_percpu{$cpu}{'occ'};
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
|
||||
# Print header
|
||||
sub occtop_header {
|
||||
(local *::tr_1,
|
||||
local *::uptime,
|
||||
local *::loadavg,
|
||||
local *::runq,
|
||||
local *::num_blk,
|
||||
local *::num_tasks,
|
||||
local *::print_host,
|
||||
) = @_;
|
||||
|
||||
# process epoch to get current timestamp
|
||||
my $mm_in_s = 60;
|
||||
my $hh_in_s = 60*60;
|
||||
my $dd_in_s = 24*60*60;
|
||||
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
|
||||
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($::tr_1);
|
||||
my $msec = 1000.0*($::tr_1 - int($::tr_1));
|
||||
|
||||
# convert uptime to elapsed <d>:<hh>:<mm>:<ss>
|
||||
my ($up, $up_dd, $up_hh, $up_mm, $up_ss);
|
||||
$up = int($::uptime);
|
||||
$up_dd = int($up/$dd_in_s);
|
||||
$up -= $dd_in_s*$up_dd;
|
||||
$up_hh = int($up/$hh_in_s);
|
||||
$up -= $hh_in_s*$up_hh;
|
||||
$up_mm = int($up/$mm_in_s);
|
||||
$up -= $mm_in_s*$up_mm;
|
||||
$up_ss = $up;
|
||||
|
||||
#occtop -- 2014/03/03 02:00:21.357 ldavg:0.07, 0.09, 0.08 runq:1 nproc:440 up:6:13:00:56
|
||||
printf "%s %s -- ".
|
||||
"%4d-%02d-%02d %02d:%02d:%02d.%03d ".
|
||||
"ldavg:%.2f, %.2f, %.2f runq:%d blk:%d nproc:%d ".
|
||||
"up:%d:%02d:%02d:%02d\n",
|
||||
$::TOOLNAME, $::VERSION,
|
||||
1900+$year, 1+$mon, $mday, $hour, $min, $sec, $msec,
|
||||
$::loadavg{'1'}, $::loadavg{'5'}, $::loadavg{'15'},
|
||||
$::runq, $::num_blk, $::num_tasks,
|
||||
$up_dd, $up_hh, $up_mm, $up_ss;
|
||||
|
||||
return if (!($::print_host));
|
||||
|
||||
# After first print, disable print host information
|
||||
$::print_host = 0;
|
||||
|
||||
# Get host specific information
|
||||
my ($OSTYPE, $NODENAME, $OSRELEASE, $version, $MACHINE);
|
||||
($OSTYPE, $NODENAME, $OSRELEASE, $version, $MACHINE) = POSIX::uname();
|
||||
my ($NODETYPE, $SUBFUNCTION, $BUILDINFO) = ('-', '-', '-');
|
||||
my ($SW_VERSION, $BUILD_ID) = ('-', '-');
|
||||
|
||||
# Get platform nodetype and subfunction
|
||||
PLATFORM: {
|
||||
my $file = "/etc/platform/platform.conf";
|
||||
open(FILE, $file) || next;
|
||||
while($_ = <FILE>) {
|
||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
||||
if (/^nodetype=(\S+)/) {
|
||||
$NODETYPE = $1;
|
||||
}
|
||||
if (/^subfunction=(\S+)/) {
|
||||
$SUBFUNCTION = $1;
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
}
|
||||
|
||||
# Get loadbuild info
|
||||
BUILD: {
|
||||
my $file = "/etc/build.info";
|
||||
open(FILE, $file) || next;
|
||||
while($_ = <FILE>) {
|
||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
||||
if (/^SW_VERSION=\"([^"]+)\"/) {
|
||||
$SW_VERSION = $1;
|
||||
}
|
||||
if (/^BUILD_ID=\"([^"]+)\"/) {
|
||||
$BUILD_ID = $1;
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
}
|
||||
$BUILDINFO = join(' ', $SW_VERSION, $BUILD_ID);
|
||||
|
||||
# Parse /proc/cpuinfo to get specific processor info
|
||||
my ($n_cpu, $model_name, $cpu_MHz) = (0, '-', 0);
|
||||
CPUINFO: {
|
||||
my $file = "/proc/cpuinfo";
|
||||
open(FILE, $file) || croak "Cannot open file: $file ($!)";
|
||||
while($_ = <FILE>) {
|
||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
||||
if (/^[Pp]rocessor\s+:\s+\d+/) {
|
||||
$n_cpu++;
|
||||
} elsif (/^model name\s+:\s+(.*)$/) {
|
||||
$_ = $1; s/\s+/ /g;
|
||||
$model_name = $_;
|
||||
} elsif (/^cpu MHz\s+:\s+(\S+)/) {
|
||||
$cpu_MHz = $1;
|
||||
} elsif (/^bogomips\s+:\s+(\S+)/) {
|
||||
$cpu_MHz = $1 if ($cpu_MHz == 0);
|
||||
}
|
||||
}
|
||||
close(FILE);
|
||||
}
|
||||
|
||||
printf " host:%s nodetype:%s subfunction:%s\n",
|
||||
$NODENAME, $NODETYPE, $SUBFUNCTION;
|
||||
printf " arch:%s processor:%s speed:%.0f #CPUs:%d\n",
|
||||
$MACHINE, $model_name, $cpu_MHz, $n_cpu;
|
||||
printf " %s %s build:%s\n", $OSTYPE, $OSRELEASE, $BUILDINFO;
|
||||
|
||||
}
|
||||
|
||||
# Parse and validate command line arguments
|
||||
sub parse_occtop_args {
|
||||
(local *::arg_debug,
|
||||
local *::arg_delay,
|
||||
local *::arg_repeat,
|
||||
local *::arg_period,
|
||||
local *::arg_header,
|
||||
) = @_;
|
||||
|
||||
# Local variables
|
||||
my ($fail, $arg_help);
|
||||
|
||||
# Use the Argument processing module
|
||||
use Getopt::Long;
|
||||
|
||||
# Print usage if no arguments
|
||||
if (!@::ARGV) {
|
||||
&Usage();
|
||||
exit 0;
|
||||
}
|
||||
|
||||
# Process input arguments
|
||||
$fail = 0;
|
||||
GetOptions(
|
||||
"debug:i", \$::arg_debug,
|
||||
"delay=f", \$::arg_delay,
|
||||
"period=i", \$::arg_period,
|
||||
"repeat=i", \$::arg_repeat,
|
||||
"header:i", \$::arg_header,
|
||||
"help|h", \$arg_help
|
||||
) || GetOptionsMessage();
|
||||
|
||||
# Print help documentation if user has selected --help
|
||||
&ListHelp() if (defined $arg_help);
|
||||
|
||||
# Validate options
|
||||
if ((defined $::arg_repeat) && (defined $::arg_period)) {
|
||||
$fail = 1;
|
||||
warn "$::TOOLNAME: Input error: cannot specify both --repeat and --period options.\n";
|
||||
}
|
||||
if ((defined $::arg_delay) && ($::arg_delay < 0.01)) {
|
||||
$fail = 1;
|
||||
warn "$::TOOLNAME: Input error: --delay %f is less than 0.01.\n",
|
||||
$::arg_delay;
|
||||
}
|
||||
if (@::ARGV) {
|
||||
$fail = 1;
|
||||
warn "$::TOOLNAME: Input error: not expecting these options: '@::ARGV'.\n";
|
||||
}
|
||||
|
||||
# Set reasonable defaults
|
||||
$::arg_header ||= 15;
|
||||
$::arg_delay ||= 1.0;
|
||||
$::arg_repeat ||= 1;
|
||||
if ($::arg_period) {
|
||||
$::arg_repeat = $::arg_period / $::arg_delay;
|
||||
} else {
|
||||
$::arg_period = $::arg_delay * $::arg_repeat;
|
||||
}
|
||||
|
||||
# Upon missing or invalid options, print usage
|
||||
if ($fail == 1) {
|
||||
&Usage();
|
||||
exit 1;
|
||||
}
|
||||
}
|
||||
|
||||
# Print out a warning message and usage
|
||||
sub GetOptionsMessage {
|
||||
warn "$::TOOLNAME: Error processing input arguments.\n";
|
||||
&Usage();
|
||||
exit 1;
|
||||
}
|
||||
|
||||
# Print out program usage
|
||||
sub Usage {
|
||||
printf "Usage: $::TOOLNAME OPTIONS\n";
|
||||
printf " [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]\n";
|
||||
printf " [--header=<num>]\n";
|
||||
printf " [--help]\n";
|
||||
|
||||
printf "\n";
|
||||
}
|
||||
|
||||
# Print tool help
|
||||
sub ListHelp {
|
||||
printf "$::TOOLNAME -- display hi-resolution per-cpu occupancy\n";
|
||||
&Usage();
|
||||
printf "Options: miscellaneous\n";
|
||||
printf " --delay=<seconds> : output interval (seconds): default: 1.0\n";
|
||||
printf " --repeat=<num> : number of repeat samples: default: 1\n";
|
||||
printf " --period=<seconds> : overall tool duration (seconds): default: --\n";
|
||||
printf " --header=<num> : print header every num samples: default: 15\n";
|
||||
printf " --help : this help\n";
|
||||
exit 0;
|
||||
}
|
||||
|
||||
1;
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +0,0 @@
|
||||
PACKAGE_NAME=vm-topology
|
||||
VERSION=1.0
|
||||
SRC_DIR=$PKG_BASE/$PACKAGE_NAME
|
||||
TIS_PATCH_VER=1
|
@ -1,61 +0,0 @@
|
||||
%global pypi_name vm-topology
|
||||
|
||||
Summary: vm_topology
|
||||
Name: vm-topology
|
||||
Version: 1.0
|
||||
Release: %{tis_patch_ver}%{?_tis_dist}
|
||||
License: Apache-2.0
|
||||
Group: base
|
||||
Packager: Wind River <info@windriver.com>
|
||||
|
||||
URL: unknown
|
||||
Source0: %{pypi_name}-%{version}.tar.gz
|
||||
|
||||
BuildArch: noarch
|
||||
|
||||
BuildRequires: python
|
||||
BuildRequires: python-setuptools
|
||||
BuildRequires: python2-pip
|
||||
BuildRequires: python2-wheel
|
||||
BuildRequires: python-keyring
|
||||
BuildRequires: libvirt
|
||||
|
||||
Requires: python
|
||||
Requires: python-keyring
|
||||
Requires: /usr/bin/env
|
||||
Requires: libvirt
|
||||
|
||||
%description
|
||||
Show compute resources and VM topology
|
||||
|
||||
%prep
|
||||
%autosetup -p 1 -n %{pypi_name}-%{version}
|
||||
# Remove bundled egg-info
|
||||
rm -rf %{pypi_name}.egg-info
|
||||
# Let RPM handle the dependencies
|
||||
rm -f requirements.txt
|
||||
|
||||
%build
|
||||
%{__python2} setup.py build
|
||||
%py2_build_wheel
|
||||
|
||||
%install
|
||||
%{__python2} setup.py install --skip-build --root %{buildroot}
|
||||
mkdir -p $RPM_BUILD_ROOT/wheels
|
||||
install -m 644 dist/*.whl $RPM_BUILD_ROOT/wheels/
|
||||
|
||||
%files
|
||||
%defattr(-,root,root,-)
|
||||
%license LICENSE
|
||||
%{_bindir}/vm-topology
|
||||
%{python2_sitelib}/vm_topology
|
||||
%{python2_sitelib}/*.egg-info
|
||||
|
||||
%package wheels
|
||||
Summary: %{name} wheels
|
||||
|
||||
%description wheels
|
||||
Contains python wheels for %{name}
|
||||
|
||||
%files wheels
|
||||
/wheels/*
|
@ -1,202 +0,0 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
@ -1,19 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2013-2014 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import setuptools
|
||||
|
||||
setuptools.setup(
|
||||
name='vm_topology',
|
||||
description='Show compute resources and VM topology',
|
||||
version='1.0.0',
|
||||
license='Apache-2.0',
|
||||
packages=['vm_topology', 'vm_topology.exec'],
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'vm-topology = vm_topology.exec.vm_topology:main',
|
||||
]}
|
||||
)
|
@ -1,5 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2014 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
@ -1,5 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2014 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
File diff suppressed because it is too large
Load Diff
5
tox.ini
5
tox.ini
@ -102,10 +102,9 @@ deps = -r{toxinidir}/test-requirements.txt
|
||||
python-daemon==2.1.2
|
||||
pylint
|
||||
|
||||
# There are currenrly 2 python modules with a setup.py file
|
||||
# There are currenrly 1 python module with a setup.py file
|
||||
commands = pylint --rcfile=./pylint.rc \
|
||||
tools/storage-topology/storage-topology/storage_topology \
|
||||
tools/vm-topology/vm-topology/vm_topology
|
||||
tools/storage-topology/storage-topology/storage_topology
|
||||
|
||||
[testenv:venv]
|
||||
basepython = python3
|
||||
|
Loading…
Reference in New Issue
Block a user