Relocated some packages to repo 'monitoring'
List of relocated subdirectories: monitoring/collectd-extensions monitoring/influxdb-extensions tools/monitor-tools tools/vm-topology Story: 2006166 Task: 35687 Depends-On: I6c62895f8dda5b8dc4ff56680c73c49f3f3d7935 Depends-On: I665dc7fabbfffc798ad57843eb74dca16e7647a3 Change-Id: Iffacd50340005320540cd9ba1495cde0b2231cd0 Signed-off-by: Scott Little <scott.little@windriver.com> Depends-On: I14e631137ff5658a54d62ad3d7aa2cd0ffaba6e0
This commit is contained in:
parent
062ec89dbb
commit
3637d66ae4
@ -1,10 +0,0 @@
|
|||||||
Metadata-Version: 1.1
|
|
||||||
Name: collectd-extensions
|
|
||||||
Version: 1.0
|
|
||||||
Summary: collectd-extensions
|
|
||||||
Home-page:
|
|
||||||
Author: Windriver
|
|
||||||
Author-email: info@windriver.com
|
|
||||||
License: ASL 2.0
|
|
||||||
Description: Titanium Cloud collectd extensions
|
|
||||||
Platform: UNKNOWN
|
|
@ -1,25 +0,0 @@
|
|||||||
SRC_DIR="$PKG_BASE"
|
|
||||||
|
|
||||||
COPY_LIST="$PKG_BASE/src/LICENSE \
|
|
||||||
$PKG_BASE/src/collectd.conf.pmon \
|
|
||||||
$PKG_BASE/src/collectd.service \
|
|
||||||
$PKG_BASE/src/fm_notifier.py \
|
|
||||||
$PKG_BASE/src/mtce_notifier.py \
|
|
||||||
$PKG_BASE/src/plugin_common.py \
|
|
||||||
$PKG_BASE/src/python_plugins.conf \
|
|
||||||
$PKG_BASE/src/cpu.py \
|
|
||||||
$PKG_BASE/src/cpu.conf \
|
|
||||||
$PKG_BASE/src/memory.py \
|
|
||||||
$PKG_BASE/src/memory.conf \
|
|
||||||
$PKG_BASE/src/df.conf \
|
|
||||||
$PKG_BASE/src/ntpq.py \
|
|
||||||
$PKG_BASE/src/ntpq.conf \
|
|
||||||
$PKG_BASE/src/interface.py \
|
|
||||||
$PKG_BASE/src/interface.conf \
|
|
||||||
$PKG_BASE/src/remotels.py \
|
|
||||||
$PKG_BASE/src/remotels.conf \
|
|
||||||
$PKG_BASE/src/ptp.py \
|
|
||||||
$PKG_BASE/src/ptp.conf \
|
|
||||||
$PKG_BASE/src/example.py \
|
|
||||||
$PKG_BASE/src/example.conf"
|
|
||||||
TIS_PATCH_VER=13
|
|
@ -1,110 +0,0 @@
|
|||||||
Summary: Titanuim Server collectd Package
|
|
||||||
Name: collectd-extensions
|
|
||||||
Version: 1.0
|
|
||||||
Release: 0%{?_tis_dist}.%{tis_patch_ver}
|
|
||||||
License: ASL 2.0
|
|
||||||
Group: base
|
|
||||||
Packager: Wind River <info@windriver.com>
|
|
||||||
URL: unknown
|
|
||||||
|
|
||||||
# create the files tarball
|
|
||||||
Source0: %{name}-%{version}.tar.gz
|
|
||||||
Source1: collectd.service
|
|
||||||
Source2: collectd.conf.pmon
|
|
||||||
|
|
||||||
# collectd python plugin files - notifiers
|
|
||||||
Source3: fm_notifier.py
|
|
||||||
Source4: mtce_notifier.py
|
|
||||||
Source5: plugin_common.py
|
|
||||||
|
|
||||||
# collectd python plugin files - resource plugins
|
|
||||||
Source11: cpu.py
|
|
||||||
Source12: memory.py
|
|
||||||
Source14: example.py
|
|
||||||
Source15: ntpq.py
|
|
||||||
Source16: interface.py
|
|
||||||
Source17: remotels.py
|
|
||||||
Source18: ptp.py
|
|
||||||
|
|
||||||
# collectd plugin conf files into /etc/collectd.d
|
|
||||||
Source100: python_plugins.conf
|
|
||||||
Source101: cpu.conf
|
|
||||||
Source102: memory.conf
|
|
||||||
Source103: df.conf
|
|
||||||
Source104: example.conf
|
|
||||||
Source105: ntpq.conf
|
|
||||||
Source106: interface.conf
|
|
||||||
Source107: remotels.conf
|
|
||||||
Source108: ptp.conf
|
|
||||||
|
|
||||||
BuildRequires: systemd-devel
|
|
||||||
|
|
||||||
Requires: systemd
|
|
||||||
Requires: collectd
|
|
||||||
Requires: fm-api
|
|
||||||
Requires: python-httplib2
|
|
||||||
Requires: python-influxdb
|
|
||||||
Requires: python-oslo-concurrency
|
|
||||||
Requires: tsconfig
|
|
||||||
Requires: /bin/systemctl
|
|
||||||
|
|
||||||
%description
|
|
||||||
Titanium Cloud collectd extensions
|
|
||||||
|
|
||||||
%define debug_package %{nil}
|
|
||||||
%define local_unit_dir %{_sysconfdir}/systemd/system
|
|
||||||
%define local_plugin_dir %{_sysconfdir}/collectd.d
|
|
||||||
%define local_python_extensions_dir /opt/collectd/extensions/python
|
|
||||||
%define local_config_extensions_dir /opt/collectd/extensions/config
|
|
||||||
|
|
||||||
%prep
|
|
||||||
%setup
|
|
||||||
|
|
||||||
%build
|
|
||||||
|
|
||||||
%install
|
|
||||||
install -m 755 -d %{buildroot}%{_sysconfdir}
|
|
||||||
install -m 755 -d %{buildroot}%{local_unit_dir}
|
|
||||||
install -m 755 -d %{buildroot}%{local_plugin_dir}
|
|
||||||
install -m 755 -d %{buildroot}%{local_config_extensions_dir}
|
|
||||||
install -m 755 -d %{buildroot}%{local_python_extensions_dir}
|
|
||||||
|
|
||||||
# support files ; service and pmon conf
|
|
||||||
install -m 644 %{SOURCE1} %{buildroot}%{local_unit_dir}
|
|
||||||
install -m 600 %{SOURCE2} %{buildroot}%{local_config_extensions_dir}
|
|
||||||
|
|
||||||
# collectd python plugin files - notifiers
|
|
||||||
install -m 700 %{SOURCE3} %{buildroot}%{local_python_extensions_dir}
|
|
||||||
install -m 700 %{SOURCE4} %{buildroot}%{local_python_extensions_dir}
|
|
||||||
install -m 700 %{SOURCE5} %{buildroot}%{local_python_extensions_dir}
|
|
||||||
|
|
||||||
# collectd python plugin files - resource plugins
|
|
||||||
install -m 700 %{SOURCE11} %{buildroot}%{local_python_extensions_dir}
|
|
||||||
install -m 700 %{SOURCE12} %{buildroot}%{local_python_extensions_dir}
|
|
||||||
install -m 700 %{SOURCE14} %{buildroot}%{local_python_extensions_dir}
|
|
||||||
install -m 700 %{SOURCE15} %{buildroot}%{local_python_extensions_dir}
|
|
||||||
install -m 700 %{SOURCE16} %{buildroot}%{local_python_extensions_dir}
|
|
||||||
install -m 700 %{SOURCE17} %{buildroot}%{local_python_extensions_dir}
|
|
||||||
install -m 700 %{SOURCE18} %{buildroot}%{local_python_extensions_dir}
|
|
||||||
|
|
||||||
|
|
||||||
# collectd plugin conf files into /etc/collectd.d
|
|
||||||
install -m 600 %{SOURCE100} %{buildroot}%{local_plugin_dir}
|
|
||||||
install -m 600 %{SOURCE101} %{buildroot}%{local_plugin_dir}
|
|
||||||
install -m 600 %{SOURCE102} %{buildroot}%{local_plugin_dir}
|
|
||||||
install -m 600 %{SOURCE103} %{buildroot}%{local_plugin_dir}
|
|
||||||
install -m 600 %{SOURCE104} %{buildroot}%{local_plugin_dir}
|
|
||||||
install -m 600 %{SOURCE105} %{buildroot}%{local_plugin_dir}
|
|
||||||
install -m 600 %{SOURCE106} %{buildroot}%{local_plugin_dir}
|
|
||||||
install -m 600 %{SOURCE107} %{buildroot}%{local_plugin_dir}
|
|
||||||
install -m 600 %{SOURCE108} %{buildroot}%{local_plugin_dir}
|
|
||||||
|
|
||||||
%clean
|
|
||||||
rm -rf $RPM_BUILD_ROOT
|
|
||||||
|
|
||||||
%files
|
|
||||||
%defattr(-,root,root,-)
|
|
||||||
%config(noreplace) %{local_unit_dir}/collectd.service
|
|
||||||
%{local_plugin_dir}/*
|
|
||||||
%{local_config_extensions_dir}/*
|
|
||||||
%{local_python_extensions_dir}/*
|
|
@ -1,202 +0,0 @@
|
|||||||
|
|
||||||
Apache License
|
|
||||||
Version 2.0, January 2004
|
|
||||||
http://www.apache.org/licenses/
|
|
||||||
|
|
||||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
||||||
|
|
||||||
1. Definitions.
|
|
||||||
|
|
||||||
"License" shall mean the terms and conditions for use, reproduction,
|
|
||||||
and distribution as defined by Sections 1 through 9 of this document.
|
|
||||||
|
|
||||||
"Licensor" shall mean the copyright owner or entity authorized by
|
|
||||||
the copyright owner that is granting the License.
|
|
||||||
|
|
||||||
"Legal Entity" shall mean the union of the acting entity and all
|
|
||||||
other entities that control, are controlled by, or are under common
|
|
||||||
control with that entity. For the purposes of this definition,
|
|
||||||
"control" means (i) the power, direct or indirect, to cause the
|
|
||||||
direction or management of such entity, whether by contract or
|
|
||||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
||||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
||||||
|
|
||||||
"You" (or "Your") shall mean an individual or Legal Entity
|
|
||||||
exercising permissions granted by this License.
|
|
||||||
|
|
||||||
"Source" form shall mean the preferred form for making modifications,
|
|
||||||
including but not limited to software source code, documentation
|
|
||||||
source, and configuration files.
|
|
||||||
|
|
||||||
"Object" form shall mean any form resulting from mechanical
|
|
||||||
transformation or translation of a Source form, including but
|
|
||||||
not limited to compiled object code, generated documentation,
|
|
||||||
and conversions to other media types.
|
|
||||||
|
|
||||||
"Work" shall mean the work of authorship, whether in Source or
|
|
||||||
Object form, made available under the License, as indicated by a
|
|
||||||
copyright notice that is included in or attached to the work
|
|
||||||
(an example is provided in the Appendix below).
|
|
||||||
|
|
||||||
"Derivative Works" shall mean any work, whether in Source or Object
|
|
||||||
form, that is based on (or derived from) the Work and for which the
|
|
||||||
editorial revisions, annotations, elaborations, or other modifications
|
|
||||||
represent, as a whole, an original work of authorship. For the purposes
|
|
||||||
of this License, Derivative Works shall not include works that remain
|
|
||||||
separable from, or merely link (or bind by name) to the interfaces of,
|
|
||||||
the Work and Derivative Works thereof.
|
|
||||||
|
|
||||||
"Contribution" shall mean any work of authorship, including
|
|
||||||
the original version of the Work and any modifications or additions
|
|
||||||
to that Work or Derivative Works thereof, that is intentionally
|
|
||||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
||||||
or by an individual or Legal Entity authorized to submit on behalf of
|
|
||||||
the copyright owner. For the purposes of this definition, "submitted"
|
|
||||||
means any form of electronic, verbal, or written communication sent
|
|
||||||
to the Licensor or its representatives, including but not limited to
|
|
||||||
communication on electronic mailing lists, source code control systems,
|
|
||||||
and issue tracking systems that are managed by, or on behalf of, the
|
|
||||||
Licensor for the purpose of discussing and improving the Work, but
|
|
||||||
excluding communication that is conspicuously marked or otherwise
|
|
||||||
designated in writing by the copyright owner as "Not a Contribution."
|
|
||||||
|
|
||||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
||||||
on behalf of whom a Contribution has been received by Licensor and
|
|
||||||
subsequently incorporated within the Work.
|
|
||||||
|
|
||||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
copyright license to reproduce, prepare Derivative Works of,
|
|
||||||
publicly display, publicly perform, sublicense, and distribute the
|
|
||||||
Work and such Derivative Works in Source or Object form.
|
|
||||||
|
|
||||||
3. Grant of Patent License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
(except as stated in this section) patent license to make, have made,
|
|
||||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
||||||
where such license applies only to those patent claims licensable
|
|
||||||
by such Contributor that are necessarily infringed by their
|
|
||||||
Contribution(s) alone or by combination of their Contribution(s)
|
|
||||||
with the Work to which such Contribution(s) was submitted. If You
|
|
||||||
institute patent litigation against any entity (including a
|
|
||||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
||||||
or a Contribution incorporated within the Work constitutes direct
|
|
||||||
or contributory patent infringement, then any patent licenses
|
|
||||||
granted to You under this License for that Work shall terminate
|
|
||||||
as of the date such litigation is filed.
|
|
||||||
|
|
||||||
4. Redistribution. You may reproduce and distribute copies of the
|
|
||||||
Work or Derivative Works thereof in any medium, with or without
|
|
||||||
modifications, and in Source or Object form, provided that You
|
|
||||||
meet the following conditions:
|
|
||||||
|
|
||||||
(a) You must give any other recipients of the Work or
|
|
||||||
Derivative Works a copy of this License; and
|
|
||||||
|
|
||||||
(b) You must cause any modified files to carry prominent notices
|
|
||||||
stating that You changed the files; and
|
|
||||||
|
|
||||||
(c) You must retain, in the Source form of any Derivative Works
|
|
||||||
that You distribute, all copyright, patent, trademark, and
|
|
||||||
attribution notices from the Source form of the Work,
|
|
||||||
excluding those notices that do not pertain to any part of
|
|
||||||
the Derivative Works; and
|
|
||||||
|
|
||||||
(d) If the Work includes a "NOTICE" text file as part of its
|
|
||||||
distribution, then any Derivative Works that You distribute must
|
|
||||||
include a readable copy of the attribution notices contained
|
|
||||||
within such NOTICE file, excluding those notices that do not
|
|
||||||
pertain to any part of the Derivative Works, in at least one
|
|
||||||
of the following places: within a NOTICE text file distributed
|
|
||||||
as part of the Derivative Works; within the Source form or
|
|
||||||
documentation, if provided along with the Derivative Works; or,
|
|
||||||
within a display generated by the Derivative Works, if and
|
|
||||||
wherever such third-party notices normally appear. The contents
|
|
||||||
of the NOTICE file are for informational purposes only and
|
|
||||||
do not modify the License. You may add Your own attribution
|
|
||||||
notices within Derivative Works that You distribute, alongside
|
|
||||||
or as an addendum to the NOTICE text from the Work, provided
|
|
||||||
that such additional attribution notices cannot be construed
|
|
||||||
as modifying the License.
|
|
||||||
|
|
||||||
You may add Your own copyright statement to Your modifications and
|
|
||||||
may provide additional or different license terms and conditions
|
|
||||||
for use, reproduction, or distribution of Your modifications, or
|
|
||||||
for any such Derivative Works as a whole, provided Your use,
|
|
||||||
reproduction, and distribution of the Work otherwise complies with
|
|
||||||
the conditions stated in this License.
|
|
||||||
|
|
||||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
||||||
any Contribution intentionally submitted for inclusion in the Work
|
|
||||||
by You to the Licensor shall be under the terms and conditions of
|
|
||||||
this License, without any additional terms or conditions.
|
|
||||||
Notwithstanding the above, nothing herein shall supersede or modify
|
|
||||||
the terms of any separate license agreement you may have executed
|
|
||||||
with Licensor regarding such Contributions.
|
|
||||||
|
|
||||||
6. Trademarks. This License does not grant permission to use the trade
|
|
||||||
names, trademarks, service marks, or product names of the Licensor,
|
|
||||||
except as required for reasonable and customary use in describing the
|
|
||||||
origin of the Work and reproducing the content of the NOTICE file.
|
|
||||||
|
|
||||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
||||||
agreed to in writing, Licensor provides the Work (and each
|
|
||||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
implied, including, without limitation, any warranties or conditions
|
|
||||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
||||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
||||||
appropriateness of using or redistributing the Work and assume any
|
|
||||||
risks associated with Your exercise of permissions under this License.
|
|
||||||
|
|
||||||
8. Limitation of Liability. In no event and under no legal theory,
|
|
||||||
whether in tort (including negligence), contract, or otherwise,
|
|
||||||
unless required by applicable law (such as deliberate and grossly
|
|
||||||
negligent acts) or agreed to in writing, shall any Contributor be
|
|
||||||
liable to You for damages, including any direct, indirect, special,
|
|
||||||
incidental, or consequential damages of any character arising as a
|
|
||||||
result of this License or out of the use or inability to use the
|
|
||||||
Work (including but not limited to damages for loss of goodwill,
|
|
||||||
work stoppage, computer failure or malfunction, or any and all
|
|
||||||
other commercial damages or losses), even if such Contributor
|
|
||||||
has been advised of the possibility of such damages.
|
|
||||||
|
|
||||||
9. Accepting Warranty or Additional Liability. While redistributing
|
|
||||||
the Work or Derivative Works thereof, You may choose to offer,
|
|
||||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
||||||
or other liability obligations and/or rights consistent with this
|
|
||||||
License. However, in accepting such obligations, You may act only
|
|
||||||
on Your own behalf and on Your sole responsibility, not on behalf
|
|
||||||
of any other Contributor, and only if You agree to indemnify,
|
|
||||||
defend, and hold each Contributor harmless for any liability
|
|
||||||
incurred by, or claims asserted against, such Contributor by reason
|
|
||||||
of your accepting any such warranty or additional liability.
|
|
||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
|
||||||
|
|
||||||
APPENDIX: How to apply the Apache License to your work.
|
|
||||||
|
|
||||||
To apply the Apache License to your work, attach the following
|
|
||||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
||||||
replaced with your own identifying information. (Don't include
|
|
||||||
the brackets!) The text should be enclosed in the appropriate
|
|
||||||
comment syntax for the file format. We also recommend that a
|
|
||||||
file or class name and description of purpose be included on the
|
|
||||||
same "printed page" as the copyright notice for easier
|
|
||||||
identification within third-party archives.
|
|
||||||
|
|
||||||
Copyright [yyyy] [name of copyright owner]
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
@ -1,18 +0,0 @@
|
|||||||
[process]
|
|
||||||
process = collectd
|
|
||||||
service = collectd
|
|
||||||
style = lsb
|
|
||||||
pidfile = /var/run/collectd.pid
|
|
||||||
severity = major ; minor, major, critical
|
|
||||||
restarts = 3 ; restart retries before error assertion
|
|
||||||
interval = 5 ; number of seconds to wait between restarts
|
|
||||||
debounce = 10 ; number of seconds that a process needs to remain
|
|
||||||
; running before degrade is removed and retry count
|
|
||||||
; is cleared.
|
|
||||||
startuptime = 3 ; Seconds to wait after process start before starting the debounce monitor
|
|
||||||
mode = passive ; Monitoring mode: passive (default) or active
|
|
||||||
; passive: process death monitoring (default: always)
|
|
||||||
; active : heartbeat monitoring, i.e. request / response messaging
|
|
||||||
; ignore : do not monitor or stop monitoring
|
|
||||||
quorum = 0 ; process is in the host watchdog quorum
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=Collectd statistics daemon and extension services
|
|
||||||
Documentation=man:collectd(1) man:collectd.conf(5)
|
|
||||||
Before=pmon.service
|
|
||||||
After=local-fs.target network-online.target
|
|
||||||
Requires=local-fs.target network-online.target
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=notify
|
|
||||||
ExecStart=/usr/sbin/collectd
|
|
||||||
ExecStartPost=/bin/bash -c 'echo $MAINPID > /var/run/collectd.pid'
|
|
||||||
ExecStopPost=/bin/rm -f /var/run/collectd.pid
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
@ -1,22 +0,0 @@
|
|||||||
# For stock plugin only
|
|
||||||
# Uncomment to compare stock to tiS plugin readings
|
|
||||||
# ---------------------
|
|
||||||
# <Plugin cpu>
|
|
||||||
# ReportByCpu false
|
|
||||||
# ReportByState false
|
|
||||||
# ValuesPercentage true
|
|
||||||
# </Plugin>
|
|
||||||
|
|
||||||
<Plugin "threshold">
|
|
||||||
<Plugin "cpu">
|
|
||||||
<Type "percent">
|
|
||||||
Instance "used"
|
|
||||||
Persist true
|
|
||||||
PersistOK true
|
|
||||||
WarningMax 90.00
|
|
||||||
FailureMax 95.00
|
|
||||||
Hits 2
|
|
||||||
Invert false
|
|
||||||
</Type>
|
|
||||||
</Plugin>
|
|
||||||
</Plugin>
|
|
@ -1,262 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2018-2019 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
#
|
|
||||||
# This file is the collectd 'Platform CPU Usage' Monitor.
|
|
||||||
#
|
|
||||||
# The Platform CPU Usage is calculated as an averaged percentage of
|
|
||||||
# platform core usable since the previous sample.
|
|
||||||
#
|
|
||||||
# Init Function:
|
|
||||||
# - if 'worker_reserved.conf exists then query/store PLATFORM_CPU_LIST
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import collectd
|
|
||||||
|
|
||||||
debug = False
|
|
||||||
|
|
||||||
PASS = 0
|
|
||||||
FAIL = 1
|
|
||||||
|
|
||||||
PATH = '/proc/cpuinfo'
|
|
||||||
WORKER_RESERVED_CONF = '/etc/platform/worker_reserved.conf'
|
|
||||||
|
|
||||||
PLUGIN = 'platform cpu usage plugin'
|
|
||||||
|
|
||||||
|
|
||||||
# CPU Control class
|
|
||||||
class CPU:
|
|
||||||
hostname = "" # hostname for sample notification message
|
|
||||||
usage = float(0.0) # float value of cpu usage
|
|
||||||
|
|
||||||
processors = int(0) # number of processors for all cpus case
|
|
||||||
cpu_list = [] # list of CPUs to calculate combined usage for
|
|
||||||
cpu_time = [] # schedstat time for each CPU
|
|
||||||
cpu_time_last = [] # last schedstat time for each CPU
|
|
||||||
time_last = float(0.0) # float of the time the last sample was taken
|
|
||||||
|
|
||||||
def log_error(self, err_str):
|
|
||||||
"""Print an error log with plugin name prefixing the log"""
|
|
||||||
|
|
||||||
collectd.error("%s %s" % (PLUGIN, err_str))
|
|
||||||
|
|
||||||
|
|
||||||
# Instantiate the class
|
|
||||||
c = CPU()
|
|
||||||
|
|
||||||
|
|
||||||
# The collectd configuration interface
|
|
||||||
# collectd needs this defined ; but not used/needed.
|
|
||||||
def config_func(config):
|
|
||||||
collectd.info('%s config function' % PLUGIN)
|
|
||||||
|
|
||||||
|
|
||||||
# Get the platform cpu list and number of cpus reported by /proc/cpuinfo
|
|
||||||
def init_func():
|
|
||||||
# get current hostname
|
|
||||||
c.hostname = os.uname()[1]
|
|
||||||
|
|
||||||
collectd.info('%s init function for %s' % (PLUGIN, c.hostname))
|
|
||||||
|
|
||||||
raw_list = ""
|
|
||||||
if os.path.exists(WORKER_RESERVED_CONF):
|
|
||||||
with open(WORKER_RESERVED_CONF, 'r') as infile:
|
|
||||||
for line in infile:
|
|
||||||
if 'PLATFORM_CPU_LIST' in line:
|
|
||||||
val = line.split("=")
|
|
||||||
raw_list = val[1].strip('\n')[1:-1].strip('"')
|
|
||||||
break
|
|
||||||
if raw_list:
|
|
||||||
|
|
||||||
# Convert the cpu list fetched from the compute
|
|
||||||
# reserved file into an integer list.
|
|
||||||
# Handle mix of number list #,# and number range #-#
|
|
||||||
split_list = raw_list.split(',')
|
|
||||||
if debug:
|
|
||||||
collectd.info('%s split list: %s' % (PLUGIN, split_list))
|
|
||||||
for cpu in split_list:
|
|
||||||
if cpu.find('-') == -1:
|
|
||||||
# add individual cpu # with assumed ',' delimiter
|
|
||||||
c.cpu_list.append(int(cpu))
|
|
||||||
else:
|
|
||||||
# add all in range #-#
|
|
||||||
cpu_range = cpu.split('-')
|
|
||||||
if len(cpu_range) == 2:
|
|
||||||
first = int(cpu_range[0])
|
|
||||||
last = int(cpu_range[1]) + 1
|
|
||||||
# add each
|
|
||||||
for i in list(range(first, last)):
|
|
||||||
c.cpu_list.append(i)
|
|
||||||
|
|
||||||
# with the full CPU list in hand we can now just read their samples
|
|
||||||
if debug:
|
|
||||||
collectd.info('%s full cpu list: %s' %
|
|
||||||
(PLUGIN, c.cpu_list))
|
|
||||||
|
|
||||||
try:
|
|
||||||
f = open('/proc/cpuinfo')
|
|
||||||
except EnvironmentError as e:
|
|
||||||
collectd.error(str(e), UserWarning)
|
|
||||||
else:
|
|
||||||
|
|
||||||
if len(c.cpu_list) == 0:
|
|
||||||
_want_all_cpus = True
|
|
||||||
else:
|
|
||||||
_want_all_cpus = False
|
|
||||||
|
|
||||||
c.processors = 0
|
|
||||||
for line in f:
|
|
||||||
name_value = [s.strip() for s in line.split(':', 1)]
|
|
||||||
if len(name_value) != 2:
|
|
||||||
continue
|
|
||||||
|
|
||||||
name, value = name_value
|
|
||||||
if 'rocessor' in name:
|
|
||||||
if _want_all_cpus is True:
|
|
||||||
c.cpu_list.append(int(c.processors))
|
|
||||||
c.processors += 1
|
|
||||||
|
|
||||||
collectd.info('%s has found %d cpus total' %
|
|
||||||
(PLUGIN, c.processors))
|
|
||||||
collectd.info('%s monitoring %d cpus %s' %
|
|
||||||
(PLUGIN, len(c.cpu_list), c.cpu_list))
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
|
|
||||||
# Calculate the CPU usage sample
|
|
||||||
def read_func():
|
|
||||||
try:
|
|
||||||
f = open('/proc/schedstat')
|
|
||||||
except EnvironmentError as e:
|
|
||||||
c.log_error('file open failed ; ' + str(e))
|
|
||||||
return FAIL
|
|
||||||
else:
|
|
||||||
# schedstat time for each CPU
|
|
||||||
c.cpu_time = []
|
|
||||||
|
|
||||||
# Loop over each line ...
|
|
||||||
# get the output version ; only 15 is supported
|
|
||||||
# get the cpu time from each line staring with 'cpux ....'
|
|
||||||
for line in f:
|
|
||||||
|
|
||||||
# break each line into name/value pairs
|
|
||||||
line_split = [s.strip() for s in line.split(' ', 1)]
|
|
||||||
name, value = line_split
|
|
||||||
|
|
||||||
# get the output version.
|
|
||||||
if 'ersion' in name:
|
|
||||||
try:
|
|
||||||
c.version = int(value)
|
|
||||||
except ValueError as e:
|
|
||||||
c.log_error('got invalid schedstat version ; ' + str(e))
|
|
||||||
|
|
||||||
# TODO: Consider exiting here and raising alarm.
|
|
||||||
# Calling this type of exit will stop the plugin.
|
|
||||||
# sys._exit()
|
|
||||||
return FAIL
|
|
||||||
|
|
||||||
# only version 15 is supported
|
|
||||||
if c.version == 15:
|
|
||||||
if 'cpu' in name:
|
|
||||||
# get the cpu number for each line
|
|
||||||
if int(name.replace('cpu', '')) in c.cpu_list:
|
|
||||||
_in_list = True
|
|
||||||
else:
|
|
||||||
_in_list = False
|
|
||||||
|
|
||||||
# get cpu time for each cpu that is valid
|
|
||||||
if len(c.cpu_list) == 0 or _in_list is True:
|
|
||||||
_schedstat = value
|
|
||||||
value_split = value.split(' ')
|
|
||||||
c.cpu_time.append(float(value_split[6]))
|
|
||||||
if debug:
|
|
||||||
collectd.info('%s %s schedstat is %s [%s]' %
|
|
||||||
(PLUGIN, name, value_split[6],
|
|
||||||
_schedstat))
|
|
||||||
else:
|
|
||||||
collectd.error('%s unsupported schedstat version [%d]' %
|
|
||||||
(PLUGIN, c.version))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
# Now that we have the cpu time recorded for each cpu
|
|
||||||
_time_delta = float(0)
|
|
||||||
_cpu_count = int(0)
|
|
||||||
if len(c.cpu_time_last) == 0:
|
|
||||||
c.time_last = time.time()
|
|
||||||
if c.cpu_list:
|
|
||||||
# This is a compute node.
|
|
||||||
# Do not include vswitch or pinned cpus in calculation.
|
|
||||||
for cpu in c.cpu_list:
|
|
||||||
c.cpu_time_last.append(float(c.cpu_time[_cpu_count]))
|
|
||||||
_cpu_count += 1
|
|
||||||
if debug:
|
|
||||||
collectd.info('%s cpu time ; first pass ; %s' %
|
|
||||||
(PLUGIN, c.cpu_time))
|
|
||||||
return PASS
|
|
||||||
else:
|
|
||||||
_time_this = time.time()
|
|
||||||
_time_delta = _time_this - c.time_last
|
|
||||||
c.total_avg_cpu = 0
|
|
||||||
cpu_occupancy = []
|
|
||||||
if debug:
|
|
||||||
collectd.info('%s cpu time ; this pass ; %s -> %s' %
|
|
||||||
(PLUGIN, c.cpu_time_last, c.cpu_time))
|
|
||||||
|
|
||||||
if c.cpu_list:
|
|
||||||
# This is a compute node.
|
|
||||||
# Do not include vswitch or pinned cpus in calculation.
|
|
||||||
for cpu in c.cpu_list:
|
|
||||||
if cpu >= c.processors:
|
|
||||||
c.log_error(' got out of range cpu number')
|
|
||||||
else:
|
|
||||||
_delta = (c.cpu_time[_cpu_count] - c.cpu_time_last[_cpu_count])
|
|
||||||
_delta = _delta / 1000000 / _time_delta
|
|
||||||
cpu_occupancy.append(float((100 * (_delta)) / 1000))
|
|
||||||
c.total_avg_cpu += cpu_occupancy[_cpu_count]
|
|
||||||
if debug:
|
|
||||||
collectd.info('%s cpu %d - count:%d [%s]' %
|
|
||||||
(PLUGIN, cpu, _cpu_count, cpu_occupancy))
|
|
||||||
_cpu_count += 1
|
|
||||||
|
|
||||||
else:
|
|
||||||
collectd.info('%s no cpus to monitor' % PLUGIN)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
c.usage = c.total_avg_cpu / _cpu_count
|
|
||||||
if debug:
|
|
||||||
collectd.info('%s reports %.2f %% usage (averaged)' %
|
|
||||||
(PLUGIN, c.usage))
|
|
||||||
|
|
||||||
# Prepare for next audit ; mode now to last
|
|
||||||
# c.cpu_time_last = []
|
|
||||||
c.cpu_time_last = c.cpu_time
|
|
||||||
c.time_last = _time_this
|
|
||||||
|
|
||||||
# if os.path.exists('/var/run/fit/cpu_data'):
|
|
||||||
# with open('/var/run/fit/cpu_data', 'r') as infile:
|
|
||||||
# for line in infile:
|
|
||||||
# c.usage = float(line)
|
|
||||||
# collectd.info("%s using FIT data:%.2f" %
|
|
||||||
# (PLUGIN, c.usage))
|
|
||||||
# break
|
|
||||||
|
|
||||||
# Dispatch usage value to collectd
|
|
||||||
val = collectd.Values(host=c.hostname)
|
|
||||||
val.plugin = 'cpu'
|
|
||||||
val.type = 'percent'
|
|
||||||
val.type_instance = 'used'
|
|
||||||
val.dispatch(values=[c.usage])
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
collectd.register_config(config_func)
|
|
||||||
collectd.register_init(init_func)
|
|
||||||
collectd.register_read(read_func)
|
|
@ -1,41 +0,0 @@
|
|||||||
<Plugin df>
|
|
||||||
ValuesPercentage true
|
|
||||||
IgnoreSelected false
|
|
||||||
ReportByDevice false
|
|
||||||
ReportInodes false
|
|
||||||
ValuesAbsolute false
|
|
||||||
MountPoint "/"
|
|
||||||
MountPoint "/tmp"
|
|
||||||
MountPoint "/dev"
|
|
||||||
MountPoint "/dev/shm"
|
|
||||||
MountPoint "/var/run"
|
|
||||||
MountPoint "/var/log"
|
|
||||||
MountPoint "/var/lock"
|
|
||||||
MountPoint "/boot"
|
|
||||||
MountPoint "/scratch"
|
|
||||||
MountPoint "/opt/etcd"
|
|
||||||
MountPoint "/opt/platform"
|
|
||||||
MountPoint "/opt/extension"
|
|
||||||
MountPoint "/var/lib/rabbitmq"
|
|
||||||
MountPoint "/var/lib/postgresql"
|
|
||||||
MountPoint "/var/lib/ceph/mon"
|
|
||||||
MountPoint "/var/lib/docker"
|
|
||||||
MountPoint "/var/lib/docker-distribution"
|
|
||||||
MountPoint "/var/lib/kubelet"
|
|
||||||
MountPoint "/var/lib/nova/instances"
|
|
||||||
MountPoint "/opt/backups"
|
|
||||||
</Plugin>
|
|
||||||
|
|
||||||
<Plugin "threshold">
|
|
||||||
<Plugin "df">
|
|
||||||
<Type "percent_bytes">
|
|
||||||
Instance "used"
|
|
||||||
WarningMax 80.00
|
|
||||||
FailureMax 90.00
|
|
||||||
Persist true
|
|
||||||
PersistOK true
|
|
||||||
Hits 2
|
|
||||||
Invert false
|
|
||||||
</Type>
|
|
||||||
</Plugin>
|
|
||||||
</Plugin>
|
|
@ -1,13 +0,0 @@
|
|||||||
<Plugin "threshold">
|
|
||||||
<Plugin "example">
|
|
||||||
<Type "percent">
|
|
||||||
Instance "used"
|
|
||||||
Persist true
|
|
||||||
PersistOK true
|
|
||||||
WarningMax 49.00
|
|
||||||
FailureMax 74.00
|
|
||||||
Hits 1
|
|
||||||
Invert false
|
|
||||||
</Type>
|
|
||||||
</Plugin>
|
|
||||||
</Plugin>
|
|
@ -1,73 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2018 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
|
|
||||||
import os
|
|
||||||
import random
|
|
||||||
import collectd
|
|
||||||
|
|
||||||
PLUGIN = 'random number plugin'
|
|
||||||
|
|
||||||
# static variables
|
|
||||||
|
|
||||||
|
|
||||||
# define a class here that will persist over read calls
|
|
||||||
class ExampleObject:
|
|
||||||
hostname = ""
|
|
||||||
plugin_data = ['1', '100']
|
|
||||||
|
|
||||||
|
|
||||||
obj = ExampleObject()
|
|
||||||
|
|
||||||
|
|
||||||
# The config function - called once on collectd process startup
|
|
||||||
def config_func(config):
|
|
||||||
"""Configure the plugin"""
|
|
||||||
|
|
||||||
for node in config.children:
|
|
||||||
key = node.key.lower()
|
|
||||||
val = node.values[0]
|
|
||||||
|
|
||||||
if key == 'data':
|
|
||||||
obj.plugin_data = str(val).split(' ')
|
|
||||||
collectd.info("%s configured data '%d:%d'" %
|
|
||||||
(PLUGIN,
|
|
||||||
int(obj.plugin_data[0]),
|
|
||||||
int(obj.plugin_data[1])))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
collectd.info('%s config function' % PLUGIN)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
# The init function - called once on collectd process startup
|
|
||||||
def init_func():
|
|
||||||
|
|
||||||
# get current hostname
|
|
||||||
obj.hostname = os.uname()[1]
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
# The sample read function - called on every audit interval
|
|
||||||
def read_func():
|
|
||||||
|
|
||||||
# do the work to create the sample
|
|
||||||
low = int(obj.plugin_data[0])
|
|
||||||
high = int(obj.plugin_data[1])
|
|
||||||
sample = random.randint(low, high)
|
|
||||||
|
|
||||||
# Dispatch usage value to collectd
|
|
||||||
val = collectd.Values(host=obj.hostname)
|
|
||||||
val.plugin = 'example'
|
|
||||||
val.type = 'percent'
|
|
||||||
val.type_instance = 'used'
|
|
||||||
val.dispatch(values=[sample])
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
# register the config, init and read functions
|
|
||||||
collectd.register_config(config_func)
|
|
||||||
collectd.register_init(init_func)
|
|
||||||
collectd.register_read(read_func)
|
|
File diff suppressed because it is too large
Load Diff
@ -1,13 +0,0 @@
|
|||||||
<Plugin "threshold">
|
|
||||||
<Plugin "interface">
|
|
||||||
<Type "percent">
|
|
||||||
Instance "used"
|
|
||||||
Persist true
|
|
||||||
PersistOK true
|
|
||||||
WarningMin 51
|
|
||||||
FailureMin 1
|
|
||||||
# Hits 2
|
|
||||||
Invert false
|
|
||||||
</Type>
|
|
||||||
</Plugin>
|
|
||||||
</Plugin>
|
|
@ -1,981 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
#
|
|
||||||
# This is the Host Interface Monitor plugin for collectd.
|
|
||||||
#
|
|
||||||
# Only mgmt, cluster-host and oam interfaces are supported with the following
|
|
||||||
# mapping specified in /etc/platform/platform.conf
|
|
||||||
#
|
|
||||||
# oam - oam_interface | controller | mandatory
|
|
||||||
# mgmnt - management_interface | all hosts | mandatory
|
|
||||||
# clstr - cluster_host_interface | any host | optional
|
|
||||||
#
|
|
||||||
# This plugin queries the maintenance Link Monitor daemon 'lmon'
|
|
||||||
# for a link status summary of that hosts configured networks.
|
|
||||||
#
|
|
||||||
# This plugin's read_func issues an http GET request to the Link Monitor
|
|
||||||
# which responds with a json string that represents a complete summary
|
|
||||||
# of the monitored links, state and the time of the last event or when
|
|
||||||
# initial status was learned. An example of the Link Monitor response is
|
|
||||||
#
|
|
||||||
# {
|
|
||||||
# "status" : "pass"
|
|
||||||
# "link_info": [
|
|
||||||
# { "network":"mgmt",
|
|
||||||
# "type":"vlan",
|
|
||||||
# "links": [
|
|
||||||
# { "name":"enp0s8.1", "state":"Up", "time":"5674323454567" },
|
|
||||||
# { "name":"enp0s8.2", "state":"Up", "time":"5674323454567" }]
|
|
||||||
# },
|
|
||||||
# { "network":"clstr",
|
|
||||||
# "type":"bond",
|
|
||||||
# "bond":"bond0",
|
|
||||||
# "links": [
|
|
||||||
# { "name":"enp0s9f1", "state":"Down", "time":"5674323454567" },
|
|
||||||
# { "name":"enp0s9f0", "state":"Up" , "time":"5674323454567" }]
|
|
||||||
# },
|
|
||||||
# { "network":"oam",
|
|
||||||
# "type":"single",
|
|
||||||
# "links": [
|
|
||||||
# { "name":"enp0s3", "state":"Up", "time":"5674323454567" }]
|
|
||||||
# }]
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
# On failure
|
|
||||||
#
|
|
||||||
# {
|
|
||||||
# "status" : "fail ; bad request <or other text based reason>"
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
# This plugin then uses this information to manage interface alarm
|
|
||||||
# assertion and clear with appropriate severity.
|
|
||||||
#
|
|
||||||
# Severity: Interface and Port levels
|
|
||||||
#
|
|
||||||
# Alarm Level Minor Major Critical
|
|
||||||
# ----------- ----- --------------------- ----------------------------
|
|
||||||
# Interface N/A One of lag pair is Up All Interface ports are Down
|
|
||||||
# Port N/A Physical Link is Down N/A
|
|
||||||
#
|
|
||||||
# Sample Data: represented as % of total links Up for that network interface
|
|
||||||
#
|
|
||||||
# 100 or 100% percent used - all links of interface are up.
|
|
||||||
# 50 or 50% percent used - one of lag pair is Up and the other is Down
|
|
||||||
# 0 or 0% percent used - all ports for that network are Down
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import datetime
|
|
||||||
import collectd
|
|
||||||
import plugin_common as pc
|
|
||||||
from fm_api import constants as fm_constants
|
|
||||||
from fm_api import fm_api
|
|
||||||
|
|
||||||
# Fault manager API Object
|
|
||||||
api = fm_api.FaultAPIsV2()
|
|
||||||
|
|
||||||
# name of the plugin - all logs produced by this plugin are prefixed with this
|
|
||||||
PLUGIN = 'interface plugin'
|
|
||||||
|
|
||||||
# Interface Monitoring Interval in seconds
|
|
||||||
PLUGIN_AUDIT_INTERVAL = 10
|
|
||||||
|
|
||||||
# Sample Data 'type' and 'instance' database field values.
|
|
||||||
PLUGIN_TYPE = 'percent'
|
|
||||||
PLUGIN_TYPE_INSTANCE = 'usage'
|
|
||||||
|
|
||||||
# The Link Status Query URL
|
|
||||||
PLUGIN_HTTP_URL_PREFIX = 'http://localhost:'
|
|
||||||
|
|
||||||
# This plugin's timeout
|
|
||||||
PLUGIN_HTTP_TIMEOUT = 5
|
|
||||||
|
|
||||||
# Specify the link monitor as the maintenance destination service
|
|
||||||
# full path should look like ; http://localhost:2122/mtce/lmon
|
|
||||||
PLUGIN_HTTP_URL_PATH = '/mtce/lmon'
|
|
||||||
|
|
||||||
# Port and Interface Alarm Identifiers
|
|
||||||
PLUGIN_OAM_PORT_ALARMID = '100.106' # OAM Network Port
|
|
||||||
PLUGIN_OAM_IFACE_ALARMID = '100.107' # OAM Network Interface
|
|
||||||
|
|
||||||
PLUGIN_MGMT_PORT_ALARMID = '100.108' # Management Network Port
|
|
||||||
PLUGIN_MGMT_IFACE_ALARMID = '100.109' # Management Network Interface
|
|
||||||
|
|
||||||
PLUGIN_CLSTR_PORT_ALARMID = '100.110' # Cluster-host Network Port
|
|
||||||
PLUGIN_CLSTR_IFACE_ALARMID = '100.111' # Cluster-host Nwk Interface
|
|
||||||
|
|
||||||
# List of all alarm identifiers.
|
|
||||||
ALARM_ID_LIST = [PLUGIN_OAM_PORT_ALARMID,
|
|
||||||
PLUGIN_OAM_IFACE_ALARMID,
|
|
||||||
PLUGIN_MGMT_PORT_ALARMID,
|
|
||||||
PLUGIN_MGMT_IFACE_ALARMID,
|
|
||||||
PLUGIN_CLSTR_PORT_ALARMID,
|
|
||||||
PLUGIN_CLSTR_IFACE_ALARMID]
|
|
||||||
|
|
||||||
# Monitored Network Name Strings
|
|
||||||
NETWORK_MGMT = 'mgmt'
|
|
||||||
NETWORK_CLSTR = 'cluster-host'
|
|
||||||
NETWORK_OAM = 'oam'
|
|
||||||
|
|
||||||
# Port / Interface State strings
|
|
||||||
LINK_UP = 'Up'
|
|
||||||
LINK_DOWN = 'Down'
|
|
||||||
|
|
||||||
# Alarm control actions
|
|
||||||
ALARM_ACTION_RAISE = 'raise'
|
|
||||||
ALARM_ACTION_CLEAR = 'clear'
|
|
||||||
|
|
||||||
# Alarm level.
|
|
||||||
# Ports are the lowest level and represent a physical link
|
|
||||||
# Interfaces are port groupings in terms of LAG
|
|
||||||
LEVEL_PORT = 'port'
|
|
||||||
LEVEL_IFACE = 'interface'
|
|
||||||
|
|
||||||
# Run phases
|
|
||||||
RUN_PHASE__INIT = 0
|
|
||||||
RUN_PHASE__ALARMS_CLEARED = 1
|
|
||||||
RUN_PHASE__HTTP_REQUEST_PASS = 2
|
|
||||||
|
|
||||||
|
|
||||||
# Link Object (aka Port or Physical interface) Structure
|
|
||||||
# and member functions.
|
|
||||||
class LinkObject:
|
|
||||||
|
|
||||||
def __init__(self, alarm_id):
|
|
||||||
|
|
||||||
self.name = None
|
|
||||||
self.state = LINK_UP
|
|
||||||
self.timestamp = float(0)
|
|
||||||
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
|
||||||
self.alarm_id = alarm_id
|
|
||||||
self.state_change = True
|
|
||||||
|
|
||||||
collectd.debug("%s LinkObject constructor: %s" %
|
|
||||||
(PLUGIN, alarm_id))
|
|
||||||
|
|
||||||
##################################################################
|
|
||||||
#
|
|
||||||
# Name : raise_port_alarm
|
|
||||||
#
|
|
||||||
# Purpose : This link object member function is used to
|
|
||||||
# raise link/port alarms.
|
|
||||||
#
|
|
||||||
# Parameters : Network the link is part of.
|
|
||||||
#
|
|
||||||
# Returns : False on failure
|
|
||||||
# True on success
|
|
||||||
#
|
|
||||||
##################################################################
|
|
||||||
def raise_port_alarm(self, network):
|
|
||||||
"""Raise a port alarm"""
|
|
||||||
|
|
||||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
|
||||||
|
|
||||||
if manage_alarm(self.name,
|
|
||||||
network,
|
|
||||||
LEVEL_PORT,
|
|
||||||
ALARM_ACTION_RAISE,
|
|
||||||
fm_constants.FM_ALARM_SEVERITY_MAJOR,
|
|
||||||
self.alarm_id,
|
|
||||||
self.timestamp) is True:
|
|
||||||
|
|
||||||
self.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
|
||||||
collectd.info("%s %s %s port alarm raised" %
|
|
||||||
(PLUGIN, self.name, self.alarm_id))
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
##################################################################
|
|
||||||
#
|
|
||||||
# Name : clear_port_alarm
|
|
||||||
#
|
|
||||||
# Purpose : This link object member function is used to
|
|
||||||
# clear link/port alarms.
|
|
||||||
#
|
|
||||||
# Parameters : Network the link is part of.
|
|
||||||
#
|
|
||||||
# Returns : False on failure
|
|
||||||
# True on success.
|
|
||||||
#
|
|
||||||
##################################################################
|
|
||||||
def clear_port_alarm(self, network):
|
|
||||||
"""Clear a port alarm"""
|
|
||||||
|
|
||||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
|
||||||
if manage_alarm(self.name,
|
|
||||||
network,
|
|
||||||
LEVEL_PORT,
|
|
||||||
ALARM_ACTION_CLEAR,
|
|
||||||
fm_constants.FM_ALARM_SEVERITY_CLEAR,
|
|
||||||
self.alarm_id,
|
|
||||||
self.timestamp) is True:
|
|
||||||
|
|
||||||
collectd.info("%s %s %s port alarm cleared" %
|
|
||||||
(PLUGIN, self.name, self.alarm_id))
|
|
||||||
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
# Interface (aka Network) Level Object Structure and member functions
|
|
||||||
class NetworkObject:
|
|
||||||
|
|
||||||
def __init__(self, name):
|
|
||||||
|
|
||||||
self.name = name
|
|
||||||
self.sample = 0
|
|
||||||
self.sample_last = 0
|
|
||||||
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
|
||||||
self.degraded = False
|
|
||||||
self.timestamp = float(0)
|
|
||||||
|
|
||||||
# add the respective alarm IDs to each object
|
|
||||||
alarm_id = None
|
|
||||||
if name == NETWORK_OAM:
|
|
||||||
alarm_id = PLUGIN_OAM_PORT_ALARMID
|
|
||||||
self.alarm_id = PLUGIN_OAM_IFACE_ALARMID
|
|
||||||
elif name == NETWORK_MGMT:
|
|
||||||
alarm_id = PLUGIN_MGMT_PORT_ALARMID
|
|
||||||
self.alarm_id = PLUGIN_MGMT_IFACE_ALARMID
|
|
||||||
elif name == NETWORK_CLSTR:
|
|
||||||
alarm_id = PLUGIN_CLSTR_PORT_ALARMID
|
|
||||||
self.alarm_id = PLUGIN_CLSTR_IFACE_ALARMID
|
|
||||||
else:
|
|
||||||
self.alarm_id = ""
|
|
||||||
collectd.error("%s unexpected network (%s)" % (PLUGIN, name))
|
|
||||||
|
|
||||||
collectd.debug("%s %s NetworkObject constructor: %s" %
|
|
||||||
(PLUGIN, name, self.alarm_id))
|
|
||||||
|
|
||||||
if alarm_id:
|
|
||||||
self.link_one = LinkObject(alarm_id)
|
|
||||||
self.link_two = LinkObject(alarm_id)
|
|
||||||
|
|
||||||
##################################################################
|
|
||||||
#
|
|
||||||
# Name : raise_iface_alarm
|
|
||||||
#
|
|
||||||
# Purpose : This network object member function used to
|
|
||||||
# raise interface alarms.
|
|
||||||
#
|
|
||||||
# Parameters : None
|
|
||||||
#
|
|
||||||
# Returns : False on failure
|
|
||||||
# True on success
|
|
||||||
#
|
|
||||||
##################################################################
|
|
||||||
def raise_iface_alarm(self, severity):
|
|
||||||
"""Raise an interface alarm"""
|
|
||||||
|
|
||||||
if severity == fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
|
||||||
collectd.error("%s %s raise alarm called with clear severity" %
|
|
||||||
(PLUGIN, self.name))
|
|
||||||
return True
|
|
||||||
|
|
||||||
if self.severity != severity:
|
|
||||||
if manage_alarm(self.name,
|
|
||||||
self.name,
|
|
||||||
LEVEL_IFACE,
|
|
||||||
ALARM_ACTION_RAISE,
|
|
||||||
severity,
|
|
||||||
self.alarm_id,
|
|
||||||
self.timestamp) is True:
|
|
||||||
|
|
||||||
self.severity = severity
|
|
||||||
collectd.info("%s %s %s %s interface alarm raised" %
|
|
||||||
(PLUGIN,
|
|
||||||
self.name,
|
|
||||||
self.alarm_id,
|
|
||||||
pc.get_severity_str(severity)))
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
##################################################################
|
|
||||||
#
|
|
||||||
# Name : clear_iface_alarm
|
|
||||||
#
|
|
||||||
# Purpose : This network object member function used to
|
|
||||||
# clear interface alarms.
|
|
||||||
#
|
|
||||||
# Parameters : None
|
|
||||||
#
|
|
||||||
# Returns : False on failure
|
|
||||||
# True on success.
|
|
||||||
#
|
|
||||||
##################################################################
|
|
||||||
def clear_iface_alarm(self):
|
|
||||||
"""Clear an interface alarm"""
|
|
||||||
|
|
||||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
|
||||||
if manage_alarm(self.name,
|
|
||||||
self.name,
|
|
||||||
LEVEL_IFACE,
|
|
||||||
ALARM_ACTION_CLEAR,
|
|
||||||
fm_constants.FM_ALARM_SEVERITY_CLEAR,
|
|
||||||
self.alarm_id,
|
|
||||||
self.timestamp) is True:
|
|
||||||
|
|
||||||
collectd.info("%s %s %s %s interface alarm cleared" %
|
|
||||||
(PLUGIN,
|
|
||||||
self.name,
|
|
||||||
self.alarm_id,
|
|
||||||
pc.get_severity_str(self.severity)))
|
|
||||||
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
######################################################################
|
|
||||||
#
|
|
||||||
# Name : manage_iface_alarm
|
|
||||||
#
|
|
||||||
# Purpose : clear or raise appropriate severity level interface alarm
|
|
||||||
#
|
|
||||||
# Returns : None
|
|
||||||
#
|
|
||||||
######################################################################
|
|
||||||
def manage_iface_alarm(self):
|
|
||||||
# Single Link Config
|
|
||||||
if self.link_two.name is None:
|
|
||||||
if self.link_one.state == LINK_DOWN:
|
|
||||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CRITICAL:
|
|
||||||
self.timestamp = self.link_one.timestamp
|
|
||||||
self.raise_iface_alarm(
|
|
||||||
fm_constants.FM_ALARM_SEVERITY_CRITICAL)
|
|
||||||
elif self.link_one.state == LINK_UP:
|
|
||||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
|
||||||
self.clear_iface_alarm()
|
|
||||||
|
|
||||||
# Lagged Link Config
|
|
||||||
#
|
|
||||||
# The interface level timestamp is updated based on the failed
|
|
||||||
# link timestamps
|
|
||||||
elif self.link_one.state == LINK_UP and \
|
|
||||||
self.link_two.state == LINK_DOWN:
|
|
||||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
|
||||||
self.timestamp = self.link_two.timestamp
|
|
||||||
self.raise_iface_alarm(fm_constants.FM_ALARM_SEVERITY_MAJOR)
|
|
||||||
|
|
||||||
elif self.link_one.state == LINK_DOWN and \
|
|
||||||
self.link_two.state == LINK_UP:
|
|
||||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
|
||||||
self.timestamp = self.link_one.timestamp
|
|
||||||
self.raise_iface_alarm(fm_constants.FM_ALARM_SEVERITY_MAJOR)
|
|
||||||
|
|
||||||
elif self.link_one.state == LINK_UP and self.link_two.state == LINK_UP:
|
|
||||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
|
||||||
self.clear_iface_alarm()
|
|
||||||
|
|
||||||
elif self.link_one.state == LINK_DOWN and \
|
|
||||||
self.link_two.state == LINK_DOWN:
|
|
||||||
if self.severity != fm_constants.FM_ALARM_SEVERITY_CRITICAL:
|
|
||||||
if self.link_one.timestamp > self.link_two.timestamp:
|
|
||||||
self.timestamp = self.link_one.timestamp
|
|
||||||
else:
|
|
||||||
self.timestamp = self.link_two.timestamp
|
|
||||||
self.raise_iface_alarm(fm_constants.FM_ALARM_SEVERITY_CRITICAL)
|
|
||||||
|
|
||||||
|
|
||||||
# Plugin Control Object
|
|
||||||
obj = pc.PluginObject(PLUGIN, PLUGIN_HTTP_URL_PREFIX)
|
|
||||||
|
|
||||||
|
|
||||||
# Network Object List - Primary Network/Link Control Object
|
|
||||||
NETWORKS = [NetworkObject(NETWORK_MGMT),
|
|
||||||
NetworkObject(NETWORK_OAM),
|
|
||||||
NetworkObject(NETWORK_CLSTR)]
|
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
|
||||||
#
|
|
||||||
# Name : get_timestamp
|
|
||||||
#
|
|
||||||
# Purpose : Convert the long long int microsecond time as string
|
|
||||||
# that accompany link info from the Link Monitor (lmond)
|
|
||||||
# and catch exceptions in doing so.
|
|
||||||
#
|
|
||||||
# Parameters: lmon_time - long long int as string
|
|
||||||
#
|
|
||||||
# Returns : float time that can be consumed by datetime.fromtimestamp
|
|
||||||
#
|
|
||||||
# Returns same unit of now time if provided lmon_time is
|
|
||||||
# invalid.
|
|
||||||
#
|
|
||||||
##########################################################################
|
|
||||||
def get_timestamp(lmon_time):
|
|
||||||
"""Convert lmon time to fm timestamp time"""
|
|
||||||
|
|
||||||
if lmon_time:
|
|
||||||
try:
|
|
||||||
return(float(float(lmon_time) / 1000000))
|
|
||||||
except:
|
|
||||||
collectd.error("%s failed to parse timestamp ;"
|
|
||||||
" using current time" % PLUGIN)
|
|
||||||
else:
|
|
||||||
collectd.error("%s no timestamp ;"
|
|
||||||
" using current time" % PLUGIN)
|
|
||||||
|
|
||||||
return(float(time.time()))
|
|
||||||
|
|
||||||
|
|
||||||
def dump_network_info(network):
|
|
||||||
"""Log the specified network info"""
|
|
||||||
|
|
||||||
link_one_event_time = datetime.datetime.fromtimestamp(
|
|
||||||
float(network.link_one.timestamp)).strftime('%Y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
link_two_info = ''
|
|
||||||
if network.link_two.name is not None:
|
|
||||||
link_two_event_time = datetime.datetime.fromtimestamp(
|
|
||||||
float(network.link_two.timestamp)).strftime('%Y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
link_two_info += "; link two '"
|
|
||||||
link_two_info += network.link_two.name
|
|
||||||
link_two_info += "' went " + network.link_two.state
|
|
||||||
link_two_info += " at " + link_two_event_time
|
|
||||||
|
|
||||||
pcnt = '%'
|
|
||||||
|
|
||||||
collectd.info("%s %5s %3d%c ; "
|
|
||||||
"link one '%s' went %s at %s %s" %
|
|
||||||
(PLUGIN,
|
|
||||||
network.name,
|
|
||||||
network.sample,
|
|
||||||
pcnt,
|
|
||||||
network.link_one.name,
|
|
||||||
network.link_one.state,
|
|
||||||
link_one_event_time,
|
|
||||||
link_two_info))
|
|
||||||
|
|
||||||
|
|
||||||
#########################################################################
|
|
||||||
#
|
|
||||||
# Name : this_hosts_alarm
|
|
||||||
#
|
|
||||||
# Purpose : Determine if the supplied eid is for this host.
|
|
||||||
#
|
|
||||||
# Description: The eid formats for the alarms managed by this plugin are
|
|
||||||
#
|
|
||||||
# host=<hostname>.port=<port_name>
|
|
||||||
# host=<hostname>.interface=<network_name>
|
|
||||||
#
|
|
||||||
# Assumptions: There is no restriction preventing the system
|
|
||||||
# administrator from creating hostnames with period's ('.')
|
|
||||||
# in them. Because so the eid cannot simply be split
|
|
||||||
# around '='s and '.'s. Instead its split around this
|
|
||||||
# plugins level type '.port' or '.interface'.
|
|
||||||
#
|
|
||||||
# Returns : True if hostname is a match
|
|
||||||
# False otherwise
|
|
||||||
#
|
|
||||||
##########################################################################
|
|
||||||
def this_hosts_alarm(hostname, eid):
|
|
||||||
"""Check if the specified eid is for this host"""
|
|
||||||
|
|
||||||
if hostname:
|
|
||||||
if eid:
|
|
||||||
# 'host=controller-0.interface=mgmt'
|
|
||||||
try:
|
|
||||||
eid_host = None
|
|
||||||
eid_disected = eid.split('=')
|
|
||||||
if len(eid_disected) == 3:
|
|
||||||
# ['host', 'controller-0.interface', 'mgmt']
|
|
||||||
if len(eid_disected[1].split('.port')) == 2:
|
|
||||||
eid_host = eid_disected[1].split('.port')[0]
|
|
||||||
if eid_host and eid_host == hostname:
|
|
||||||
return True
|
|
||||||
elif len(eid_disected[1].split('.interface')) == 2:
|
|
||||||
eid_host = eid_disected[1].split('.interface')[0]
|
|
||||||
if eid_host and eid_host == hostname:
|
|
||||||
return True
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s failed to parse alarm eid (%s)"
|
|
||||||
" [eid:%s]" % (PLUGIN, str(ex), eid))
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
|
||||||
#
|
|
||||||
# Name : clear_alarms
|
|
||||||
#
|
|
||||||
# Purpose : Clear all interface alarms on process startup.
|
|
||||||
#
|
|
||||||
# Description: Called after first successful Link Status query.
|
|
||||||
#
|
|
||||||
# Loops over the provided alarm id list querying all alarms
|
|
||||||
# for each. Any that are raised are precisely cleared.
|
|
||||||
#
|
|
||||||
# Prevents stuck alarms over port and interface reconfig.
|
|
||||||
#
|
|
||||||
# If the original alarm case still exists the alarm will
|
|
||||||
# be re-raised with the original link event timestamp that
|
|
||||||
# is part of the Link Status query response.
|
|
||||||
#
|
|
||||||
# Parameters : A list of this plugin's alarm ids
|
|
||||||
#
|
|
||||||
# Returns : True on Success
|
|
||||||
# False on Failure
|
|
||||||
#
|
|
||||||
##########################################################################
|
|
||||||
def clear_alarms(alarm_id_list):
|
|
||||||
"""Clear alarm state of all plugin alarms"""
|
|
||||||
found = False
|
|
||||||
for alarm_id in alarm_id_list:
|
|
||||||
|
|
||||||
try:
|
|
||||||
alarms = api.get_faults_by_id(alarm_id)
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'get_faults_by_id' exception ;"
|
|
||||||
" %s ; %s" %
|
|
||||||
(PLUGIN, alarm_id, ex))
|
|
||||||
return False
|
|
||||||
|
|
||||||
if alarms:
|
|
||||||
for alarm in alarms:
|
|
||||||
eid = alarm.entity_instance_id
|
|
||||||
if this_hosts_alarm(obj.hostname, eid) is False:
|
|
||||||
# ignore other host alarms
|
|
||||||
continue
|
|
||||||
|
|
||||||
if alarm_id == PLUGIN_OAM_PORT_ALARMID or \
|
|
||||||
alarm_id == PLUGIN_OAM_IFACE_ALARMID or \
|
|
||||||
alarm_id == PLUGIN_MGMT_PORT_ALARMID or \
|
|
||||||
alarm_id == PLUGIN_MGMT_IFACE_ALARMID or \
|
|
||||||
alarm_id == PLUGIN_CLSTR_PORT_ALARMID or \
|
|
||||||
alarm_id == PLUGIN_CLSTR_IFACE_ALARMID:
|
|
||||||
|
|
||||||
try:
|
|
||||||
if api.clear_fault(alarm_id, eid) is False:
|
|
||||||
collectd.info("%s %s:%s:%s alarm already cleared" %
|
|
||||||
(PLUGIN,
|
|
||||||
alarm.severity,
|
|
||||||
alarm_id,
|
|
||||||
eid))
|
|
||||||
else:
|
|
||||||
found = True
|
|
||||||
collectd.info("%s %s:%s:%s alarm cleared" %
|
|
||||||
(PLUGIN,
|
|
||||||
alarm.severity,
|
|
||||||
alarm_id,
|
|
||||||
eid))
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'clear_fault' exception ; "
|
|
||||||
"%s:%s ; %s" %
|
|
||||||
(PLUGIN, alarm_id, eid, ex))
|
|
||||||
return False
|
|
||||||
if found is False:
|
|
||||||
collectd.info("%s found no startup alarms" % PLUGIN)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
|
||||||
#
|
|
||||||
# Name : manage_alarm
|
|
||||||
#
|
|
||||||
# Purpose : Raises or clears port and interface alarms based on
|
|
||||||
# calling parameters.
|
|
||||||
#
|
|
||||||
# Returns : True on success
|
|
||||||
# False on failure
|
|
||||||
#
|
|
||||||
##########################################################################
|
|
||||||
def manage_alarm(name, network, level, action, severity, alarm_id, timestamp):
|
|
||||||
"""Manage raise and clear of port and interface alarms"""
|
|
||||||
|
|
||||||
ts = datetime.datetime.fromtimestamp(
|
|
||||||
float(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
|
|
||||||
collectd.debug("%s %s %s %s alarm for %s:%s [%s] %s" % (PLUGIN,
|
|
||||||
severity, level, alarm_id, network, name, action, ts))
|
|
||||||
|
|
||||||
if action == ALARM_ACTION_CLEAR:
|
|
||||||
alarm_state = fm_constants.FM_ALARM_STATE_CLEAR
|
|
||||||
reason = ''
|
|
||||||
repair = ''
|
|
||||||
else:
|
|
||||||
# reason ad repair strings are only needed on alarm assertion
|
|
||||||
alarm_state = fm_constants.FM_ALARM_STATE_SET
|
|
||||||
reason = "'" + network.upper() + "' " + level
|
|
||||||
repair = 'Check cabling and far-end port configuration ' \
|
|
||||||
'and status on adjacent equipment.'
|
|
||||||
|
|
||||||
# build the alarm eid and name string
|
|
||||||
if level == LEVEL_PORT:
|
|
||||||
eid = 'host=' + obj.hostname + "." + level + '=' + name
|
|
||||||
reason += " failed"
|
|
||||||
else:
|
|
||||||
eid = 'host=' + obj.hostname + "." + level + '=' + network
|
|
||||||
if severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
|
||||||
reason += " degraded"
|
|
||||||
else:
|
|
||||||
reason += " failed"
|
|
||||||
|
|
||||||
if alarm_state == fm_constants.FM_ALARM_STATE_CLEAR:
|
|
||||||
try:
|
|
||||||
if api.clear_fault(alarm_id, eid) is False:
|
|
||||||
collectd.info("%s %s:%s alarm already cleared" %
|
|
||||||
(PLUGIN, alarm_id, eid))
|
|
||||||
else:
|
|
||||||
collectd.info("%s %s:%s alarm cleared" %
|
|
||||||
(PLUGIN, alarm_id, eid))
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'clear_fault' failed ; %s:%s ; %s" %
|
|
||||||
(PLUGIN, alarm_id, eid, ex))
|
|
||||||
return False
|
|
||||||
|
|
||||||
else:
|
|
||||||
fault = fm_api.Fault(
|
|
||||||
uuid="",
|
|
||||||
alarm_id=alarm_id,
|
|
||||||
alarm_state=alarm_state,
|
|
||||||
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
|
||||||
entity_instance_id=eid,
|
|
||||||
severity=severity,
|
|
||||||
reason_text=reason,
|
|
||||||
alarm_type=fm_constants.FM_ALARM_TYPE_7,
|
|
||||||
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN,
|
|
||||||
proposed_repair_action=repair,
|
|
||||||
service_affecting=True,
|
|
||||||
timestamp=ts,
|
|
||||||
suppression=True)
|
|
||||||
|
|
||||||
try:
|
|
||||||
alarm_uuid = api.set_fault(fault)
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'set_fault' exception ; %s:%s ; %s" %
|
|
||||||
(PLUGIN, alarm_id, eid, ex))
|
|
||||||
return False
|
|
||||||
|
|
||||||
if pc.is_uuid_like(alarm_uuid) is False:
|
|
||||||
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
|
|
||||||
(PLUGIN, alarm_id, eid, alarm_uuid))
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
# The config function - called once on collectd process startup
|
|
||||||
def config_func(config):
|
|
||||||
"""Configure the plugin"""
|
|
||||||
|
|
||||||
# Need to update the Link Status Query URL with the port number.
|
|
||||||
url_updated = False
|
|
||||||
|
|
||||||
# The Link Monitor port number is first searched for in
|
|
||||||
# the /etc/mtc/lmond.conf file.
|
|
||||||
# If its not there then its taken from the plugin config.
|
|
||||||
|
|
||||||
# /etc/mtc/lmond.conf
|
|
||||||
fn = '/etc/mtc/lmond.conf'
|
|
||||||
if (os.path.exists(fn)):
|
|
||||||
try:
|
|
||||||
with open(fn, 'r') as infile:
|
|
||||||
for line in infile:
|
|
||||||
if 'lmon_query_port' in line:
|
|
||||||
if isinstance(int(line.split()[2]), int):
|
|
||||||
|
|
||||||
# add the port
|
|
||||||
obj.url += line.split()[2]
|
|
||||||
|
|
||||||
# add the path /mtce/lmon
|
|
||||||
obj.url += PLUGIN_HTTP_URL_PATH
|
|
||||||
|
|
||||||
url_updated = "config file"
|
|
||||||
break
|
|
||||||
except EnvironmentError as e:
|
|
||||||
collectd.error(str(e), UserWarning)
|
|
||||||
|
|
||||||
if url_updated is False:
|
|
||||||
# Try the config as this might be updated by manifest
|
|
||||||
for node in config.children:
|
|
||||||
key = node.key.lower()
|
|
||||||
val = int(node.values[0])
|
|
||||||
if key == 'port':
|
|
||||||
if isinstance(int(val), int):
|
|
||||||
|
|
||||||
# add the port
|
|
||||||
obj.url += str(val)
|
|
||||||
|
|
||||||
# add the path /mtce/lmon
|
|
||||||
obj.url += PLUGIN_HTTP_URL_PATH
|
|
||||||
|
|
||||||
url_updated = "manifest"
|
|
||||||
break
|
|
||||||
|
|
||||||
if url_updated:
|
|
||||||
collectd.info("%s configured by %s [%s]" %
|
|
||||||
(PLUGIN, url_updated, obj.url))
|
|
||||||
obj.config_done = True
|
|
||||||
else:
|
|
||||||
collectd.error("%s config failure ; cannot monitor" %
|
|
||||||
(PLUGIN))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
# The init function - called once on collectd process startup
|
|
||||||
def init_func():
|
|
||||||
"""Init the plugin"""
|
|
||||||
|
|
||||||
if obj.config_done is False:
|
|
||||||
collectd.info("%s configuration failed" % PLUGIN)
|
|
||||||
time.sleep(300)
|
|
||||||
return False
|
|
||||||
|
|
||||||
if obj.init_done is False:
|
|
||||||
if obj.init_ready() is False:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
obj.hostname = obj.gethostname()
|
|
||||||
obj.init_done = True
|
|
||||||
collectd.info("%s initialization complete" % PLUGIN)
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
# The sample read function - called on every audit interval
|
|
||||||
def read_func():
|
|
||||||
"""collectd interface monitor plugin read function"""
|
|
||||||
|
|
||||||
if obj.init_done is False:
|
|
||||||
init_func()
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if obj.phase < RUN_PHASE__ALARMS_CLEARED:
|
|
||||||
|
|
||||||
# clear all alarms on first audit
|
|
||||||
#
|
|
||||||
# block on fm availability
|
|
||||||
#
|
|
||||||
# If the existing raised alarms are still valid then
|
|
||||||
# they will be re-raised with the same timestamp the
|
|
||||||
# original event occurred at once auditing resumes.
|
|
||||||
if clear_alarms(ALARM_ID_LIST) is False:
|
|
||||||
collectd.error("%s failed to clear existing alarms ; "
|
|
||||||
"retry next audit" % PLUGIN)
|
|
||||||
|
|
||||||
# Don't proceed till we can communicate with FM and
|
|
||||||
# clear all existing interface and port alarms.
|
|
||||||
return 0
|
|
||||||
else:
|
|
||||||
obj.phase = RUN_PHASE__ALARMS_CLEARED
|
|
||||||
|
|
||||||
# Throttle HTTP request error retries
|
|
||||||
if obj.http_retry_count != 0:
|
|
||||||
obj.http_retry_count += 1
|
|
||||||
if obj.http_retry_count > obj.HTTP_RETRY_THROTTLE:
|
|
||||||
obj.http_retry_count = 0
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Issue query and construct the monitoring object
|
|
||||||
success = obj.make_http_request(to=PLUGIN_HTTP_TIMEOUT)
|
|
||||||
|
|
||||||
if success is False:
|
|
||||||
obj.http_retry_count += 1
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if len(obj.jresp) == 0:
|
|
||||||
collectd.error("%s no json response from http request" % PLUGIN)
|
|
||||||
obj.http_retry_count += 1
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Check query status
|
|
||||||
try:
|
|
||||||
if obj.jresp['status'] != 'pass':
|
|
||||||
collectd.error("%s link monitor query %s" %
|
|
||||||
(PLUGIN, obj.jresp['status']))
|
|
||||||
obj.http_retry_count += 1
|
|
||||||
return 0
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s http request get reason failed ; %s" %
|
|
||||||
(PLUGIN, str(ex)))
|
|
||||||
collectd.info("%s resp:%d:%s" %
|
|
||||||
(PLUGIN, len(obj.jresp), obj.jresp))
|
|
||||||
obj.http_retry_count += 1
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# log the first query response
|
|
||||||
if obj.audits == 0:
|
|
||||||
collectd.info("%s Link Status Query Response:%d:\n%s" %
|
|
||||||
(PLUGIN, len(obj.jresp), obj.jresp))
|
|
||||||
|
|
||||||
# uncomment below for debug purposes
|
|
||||||
#
|
|
||||||
# for network in NETWORKS:
|
|
||||||
# dump_network_info(network)
|
|
||||||
|
|
||||||
try:
|
|
||||||
link_info = obj.jresp['link_info']
|
|
||||||
for network_link_info in link_info:
|
|
||||||
collectd.debug("%s parse link info:%s" %
|
|
||||||
(PLUGIN, network_link_info))
|
|
||||||
for network in NETWORKS:
|
|
||||||
if network.name == network_link_info['network']:
|
|
||||||
links = network_link_info['links']
|
|
||||||
nname = network.name
|
|
||||||
if len(links) > 0:
|
|
||||||
link_one = links[0]
|
|
||||||
|
|
||||||
# get initial link one name
|
|
||||||
if network.link_one.name is None:
|
|
||||||
network.link_one.name = link_one['name']
|
|
||||||
|
|
||||||
network.link_one.timestamp =\
|
|
||||||
float(get_timestamp(link_one['time']))
|
|
||||||
|
|
||||||
# load link one state
|
|
||||||
if link_one['state'] == LINK_UP:
|
|
||||||
collectd.debug("%s %s IS Up [%s]" %
|
|
||||||
(PLUGIN, network.link_one.name,
|
|
||||||
network.link_one.state))
|
|
||||||
if network.link_one.state != LINK_UP:
|
|
||||||
network.link_one.state_change = True
|
|
||||||
network.link_one.clear_port_alarm(nname)
|
|
||||||
network.link_one.state = LINK_UP
|
|
||||||
else:
|
|
||||||
collectd.debug("%s %s IS Down [%s]" %
|
|
||||||
(PLUGIN, network.link_one.name,
|
|
||||||
network.link_one.state))
|
|
||||||
if network.link_one.state == LINK_UP:
|
|
||||||
network.link_one.state_change = True
|
|
||||||
network.link_one.raise_port_alarm(nname)
|
|
||||||
network.link_one.state = LINK_DOWN
|
|
||||||
|
|
||||||
if len(links) > 1:
|
|
||||||
link_two = links[1]
|
|
||||||
|
|
||||||
# get initial link two name
|
|
||||||
if network.link_two.name is None:
|
|
||||||
network.link_two.name = link_two['name']
|
|
||||||
|
|
||||||
network.link_two.timestamp =\
|
|
||||||
float(get_timestamp(link_two['time']))
|
|
||||||
|
|
||||||
# load link two state
|
|
||||||
if link_two['state'] == LINK_UP:
|
|
||||||
collectd.debug("%s %s IS Up [%s]" %
|
|
||||||
(PLUGIN, network.link_two.name,
|
|
||||||
network.link_two.state))
|
|
||||||
if network.link_two.state != LINK_UP:
|
|
||||||
network.link_two.state_change = True
|
|
||||||
network.link_two.clear_port_alarm(nname)
|
|
||||||
network.link_two.state = LINK_UP
|
|
||||||
else:
|
|
||||||
collectd.debug("%s %s IS Down [%s]" %
|
|
||||||
(PLUGIN, network.link_two.name,
|
|
||||||
network.link_two.state))
|
|
||||||
if network.link_two.state == LINK_UP:
|
|
||||||
network.link_two.state_change = True
|
|
||||||
network.link_two.raise_port_alarm(nname)
|
|
||||||
network.link_two.state = LINK_DOWN
|
|
||||||
|
|
||||||
# manage interface alarms
|
|
||||||
network.manage_iface_alarm()
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s link monitor query parse exception ; %s " %
|
|
||||||
(PLUGIN, obj.resp))
|
|
||||||
|
|
||||||
# handle state changes
|
|
||||||
for network in NETWORKS:
|
|
||||||
if network.link_two.name is not None and \
|
|
||||||
network.link_one.state_change is True:
|
|
||||||
|
|
||||||
if network.link_one.state == LINK_UP:
|
|
||||||
collectd.info("%s %s link one '%s' is Up" %
|
|
||||||
(PLUGIN,
|
|
||||||
network.name,
|
|
||||||
network.link_one.name))
|
|
||||||
else:
|
|
||||||
collectd.info("%s %s link one '%s' is Down" %
|
|
||||||
(PLUGIN,
|
|
||||||
network.name,
|
|
||||||
network.link_one.name))
|
|
||||||
|
|
||||||
if network.link_two.name is not None and \
|
|
||||||
network.link_two.state_change is True:
|
|
||||||
|
|
||||||
if network.link_two.state == LINK_UP:
|
|
||||||
collectd.info("%s %s link two '%s' is Up" %
|
|
||||||
(PLUGIN,
|
|
||||||
network.name,
|
|
||||||
network.link_two.name))
|
|
||||||
else:
|
|
||||||
collectd.info("%s %s link two %s 'is' Down" %
|
|
||||||
(PLUGIN,
|
|
||||||
network.name,
|
|
||||||
network.link_two.name))
|
|
||||||
|
|
||||||
# Dispatch usage value to collectd
|
|
||||||
val = collectd.Values(host=obj.hostname)
|
|
||||||
val.plugin = 'interface'
|
|
||||||
val.type = 'percent'
|
|
||||||
val.type_instance = 'used'
|
|
||||||
|
|
||||||
# For each interface [ mgmt, oam, infra ]
|
|
||||||
# calculate the percentage used sample
|
|
||||||
# sample = 100 % when all its links are up
|
|
||||||
# sample = 0 % when all its links are down
|
|
||||||
# sample = 50 % when one of a lagged group is down
|
|
||||||
for network in NETWORKS:
|
|
||||||
|
|
||||||
if network.link_one.name is not None:
|
|
||||||
|
|
||||||
val.plugin_instance = network.name
|
|
||||||
|
|
||||||
network.sample = 0
|
|
||||||
|
|
||||||
if network.link_two.name is not None:
|
|
||||||
# lagged
|
|
||||||
|
|
||||||
if network.link_one.state == LINK_UP:
|
|
||||||
network.sample = 50
|
|
||||||
if network.link_two.state == LINK_UP:
|
|
||||||
network.sample += 50
|
|
||||||
else:
|
|
||||||
if network.link_one.state == LINK_UP:
|
|
||||||
network.sample = 100
|
|
||||||
val.dispatch(values=[network.sample])
|
|
||||||
|
|
||||||
if network.link_one.state_change is True or \
|
|
||||||
network.link_two.state_change is True:
|
|
||||||
|
|
||||||
dump_network_info(network)
|
|
||||||
|
|
||||||
network.link_one.state_change = False
|
|
||||||
network.link_two.state_change = False
|
|
||||||
|
|
||||||
network.sample_last = network.sample
|
|
||||||
|
|
||||||
else:
|
|
||||||
collectd.debug("%s %s network not provisioned" %
|
|
||||||
(PLUGIN, network.name))
|
|
||||||
obj.audits += 1
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
# register the config, init and read functions
|
|
||||||
collectd.register_config(config_func)
|
|
||||||
collectd.register_init(init_func)
|
|
||||||
collectd.register_read(read_func, interval=PLUGIN_AUDIT_INTERVAL)
|
|
@ -1,21 +0,0 @@
|
|||||||
# For stock plugin only
|
|
||||||
# Uncomment to compare stock to tiS plugin readings
|
|
||||||
# ---------------------
|
|
||||||
# <Plugin memory>
|
|
||||||
# ValuesAbsolute false
|
|
||||||
# ValuesPercentage true
|
|
||||||
# </Plugin>
|
|
||||||
|
|
||||||
<Plugin "threshold">
|
|
||||||
<Plugin "memory">
|
|
||||||
<Type "percent">
|
|
||||||
Instance "used"
|
|
||||||
Persist true
|
|
||||||
PersistOK true
|
|
||||||
WarningMax 80.00
|
|
||||||
FailureMax 90.00
|
|
||||||
Hits 2
|
|
||||||
Invert false
|
|
||||||
</Type>
|
|
||||||
</Plugin>
|
|
||||||
</Plugin>
|
|
@ -1,279 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2018-2019 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
#
|
|
||||||
# This file is the collectd 'Platform CPU Usage' Monitor.
|
|
||||||
#
|
|
||||||
# The Platform CPU Usage is calculated as an averaged percentage of
|
|
||||||
# platform core usable since the previous sample.
|
|
||||||
#
|
|
||||||
# Init Function:
|
|
||||||
# - if 'worker_reserved.conf exists then query/store PLATFORM_CPU_LIST
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
import os
|
|
||||||
import collectd
|
|
||||||
|
|
||||||
debug = False
|
|
||||||
|
|
||||||
PLUGIN = 'platform memory usage'
|
|
||||||
PLUGIN_NUMA = 'numa memory usage'
|
|
||||||
PLUGIN_HUGE = 'hugepage memory usage'
|
|
||||||
|
|
||||||
|
|
||||||
# CPU Control class
|
|
||||||
class MEM:
|
|
||||||
hostname = "" # hostname for sample notification message
|
|
||||||
cmd = '/proc/meminfo' # the query comment
|
|
||||||
value = float(0.0) # float value of memory usage
|
|
||||||
|
|
||||||
# meminfo values we care about
|
|
||||||
memTotal_kB = 0
|
|
||||||
memFree_kB = 0
|
|
||||||
buffers = 0
|
|
||||||
cached = 0
|
|
||||||
SReclaimable = 0
|
|
||||||
CommitLimit = 0
|
|
||||||
Committed_AS = 0
|
|
||||||
HugePages_Total = 0
|
|
||||||
HugePages_Free = 0
|
|
||||||
Hugepagesize = 0
|
|
||||||
AnonPages = 0
|
|
||||||
FilePages = 0
|
|
||||||
|
|
||||||
# derived values
|
|
||||||
avail = 0
|
|
||||||
total = 0
|
|
||||||
strict = 0
|
|
||||||
|
|
||||||
|
|
||||||
# Instantiate the class
|
|
||||||
obj = MEM()
|
|
||||||
|
|
||||||
|
|
||||||
def log_meminfo(plugin, name, meminfo):
|
|
||||||
"""Log the supplied meminfo"""
|
|
||||||
|
|
||||||
if debug is False:
|
|
||||||
return
|
|
||||||
|
|
||||||
collectd.info("%s %s" % (plugin, name))
|
|
||||||
collectd.info("%s ---------------------------" % plugin)
|
|
||||||
collectd.info("%s memTotal_kB : %f" % (plugin, meminfo.memTotal_kB))
|
|
||||||
collectd.info("%s memFree_kB : %f" % (plugin, meminfo.memFree_kB))
|
|
||||||
collectd.info("%s Buffers : %f" % (plugin, meminfo.buffers))
|
|
||||||
collectd.info("%s Cached : %f" % (plugin, meminfo.cached))
|
|
||||||
collectd.info("%s SReclaimable : %f" % (plugin, meminfo.SReclaimable))
|
|
||||||
collectd.info("%s CommitLimit : %f" % (plugin, meminfo.CommitLimit))
|
|
||||||
collectd.info("%s Committed_AS : %f" % (plugin, meminfo.Committed_AS))
|
|
||||||
collectd.info("%s HugePages_Total: %f" % (plugin, meminfo.HugePages_Total))
|
|
||||||
collectd.info("%s HugePages_Free : %f" % (plugin, meminfo.HugePages_Free))
|
|
||||||
collectd.info("%s Hugepagesize : %f" % (plugin, meminfo.Hugepagesize))
|
|
||||||
collectd.info("%s AnonPages : %f" % (plugin, meminfo.AnonPages))
|
|
||||||
|
|
||||||
|
|
||||||
def config_func(config):
|
|
||||||
"""Configure the memory usage plugin"""
|
|
||||||
|
|
||||||
for node in config.children:
|
|
||||||
key = node.key.lower()
|
|
||||||
val = node.values[0]
|
|
||||||
|
|
||||||
if key == 'path':
|
|
||||||
obj.cmd = str(val)
|
|
||||||
collectd.info("%s configured query command: '%s'" %
|
|
||||||
(PLUGIN, obj.cmd))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
collectd.info("%s no config command provided ; "
|
|
||||||
"defaulting to '%s'" %
|
|
||||||
(PLUGIN, obj.cmd))
|
|
||||||
|
|
||||||
|
|
||||||
# Load the hostname and kernel memory 'overcommit' setting.
|
|
||||||
def init_func():
|
|
||||||
# get current hostname
|
|
||||||
obj.hostname = os.uname()[1]
|
|
||||||
|
|
||||||
# get strict setting
|
|
||||||
#
|
|
||||||
# a value of 0 means "heuristic overcommit"
|
|
||||||
# a value of 1 means "always overcommit"
|
|
||||||
# a value of 2 means "don't overcommit".
|
|
||||||
#
|
|
||||||
# set strict true strict=1 if value is = 2
|
|
||||||
# otherwise strict is false strict=0 (default)
|
|
||||||
|
|
||||||
fn = '/proc/sys/vm/overcommit_memory'
|
|
||||||
if os.path.exists(fn):
|
|
||||||
with open(fn, 'r') as infile:
|
|
||||||
for line in infile:
|
|
||||||
if int(line) == 2:
|
|
||||||
obj.strict = 1
|
|
||||||
break
|
|
||||||
|
|
||||||
collectd.info("%s strict:%d" % (PLUGIN, obj.strict))
|
|
||||||
|
|
||||||
|
|
||||||
# Calculate the CPU usage sample
|
|
||||||
def read_func():
|
|
||||||
meminfo = {}
|
|
||||||
try:
|
|
||||||
with open(obj.cmd) as fd:
|
|
||||||
for line in fd:
|
|
||||||
meminfo[line.split(':')[0]] = line.split(':')[1].strip()
|
|
||||||
|
|
||||||
except EnvironmentError as e:
|
|
||||||
collectd.error("%s unable to read from %s ; str(e)" %
|
|
||||||
(PLUGIN, str(e)))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# setup the sample structure
|
|
||||||
val = collectd.Values(host=obj.hostname)
|
|
||||||
val.type = 'percent'
|
|
||||||
val.type_instance = 'used'
|
|
||||||
|
|
||||||
# fit_value = 0
|
|
||||||
# if os.path.exists('/var/run/fit/mem_data'):
|
|
||||||
# with open('/var/run/fit/mem_data', 'r') as infile:
|
|
||||||
# for line in infile:
|
|
||||||
# fit_value = float(line)
|
|
||||||
# collectd.info("%s using FIT data:%.2f" %
|
|
||||||
# (PLUGIN, fit_value))
|
|
||||||
# break
|
|
||||||
|
|
||||||
# remove the 'unit' (kB) suffix that might be on some of the lines
|
|
||||||
for line in meminfo:
|
|
||||||
# remove the units from the value read
|
|
||||||
value_unit = [u.strip() for u in meminfo[line].split(' ', 1)]
|
|
||||||
if len(value_unit) == 2:
|
|
||||||
value, unit = value_unit
|
|
||||||
meminfo[line] = float(value)
|
|
||||||
else:
|
|
||||||
meminfo[line] = float(meminfo[line])
|
|
||||||
|
|
||||||
obj.memTotal_kB = float(meminfo['MemTotal'])
|
|
||||||
obj.memFree_kB = float(meminfo['MemFree'])
|
|
||||||
obj.buffers = float(meminfo['Buffers'])
|
|
||||||
obj.cached = float(meminfo['Cached'])
|
|
||||||
obj.SReclaimable = float(meminfo['SReclaimable'])
|
|
||||||
obj.CommitLimit = float(meminfo['CommitLimit'])
|
|
||||||
obj.Committed_AS = float(meminfo['Committed_AS'])
|
|
||||||
obj.HugePages_Total = float(meminfo['HugePages_Total'])
|
|
||||||
obj.HugePages_Free = float(meminfo['HugePages_Free'])
|
|
||||||
obj.Hugepagesize = float(meminfo['Hugepagesize'])
|
|
||||||
obj.AnonPages = float(meminfo['AnonPages'])
|
|
||||||
|
|
||||||
log_meminfo(PLUGIN, "/proc/meminfo", obj)
|
|
||||||
|
|
||||||
obj.avail = float(float(obj.memFree_kB) +
|
|
||||||
float(obj.buffers) +
|
|
||||||
float(obj.cached) +
|
|
||||||
float(obj.SReclaimable))
|
|
||||||
obj.total = float(float(obj.avail) +
|
|
||||||
float(obj.AnonPages))
|
|
||||||
|
|
||||||
if obj.strict == 1:
|
|
||||||
obj.value = float(float(obj.Committed_AS) / float(obj.CommitLimit))
|
|
||||||
else:
|
|
||||||
obj.value = float(float(obj.AnonPages) / float(obj.total))
|
|
||||||
obj.value = float(float(obj.value) * 100)
|
|
||||||
|
|
||||||
# if fit_value != 0:
|
|
||||||
# obj.value = fit_value
|
|
||||||
|
|
||||||
if debug is True:
|
|
||||||
collectd.info("%s ---------------------------" % PLUGIN)
|
|
||||||
collectd.info("%s memAvail: %d" % (PLUGIN, obj.avail))
|
|
||||||
collectd.info("%s memTotal: %d" % (PLUGIN, obj.total))
|
|
||||||
collectd.info('%s reports %.2f %% usage' % (PLUGIN, obj.value))
|
|
||||||
|
|
||||||
# Dispatch usage value to collectd
|
|
||||||
val.plugin = 'memory'
|
|
||||||
val.plugin_instance = 'platform'
|
|
||||||
val.dispatch(values=[obj.value])
|
|
||||||
|
|
||||||
#####################################################################
|
|
||||||
# Now get the Numa Node Memory Usage
|
|
||||||
#####################################################################
|
|
||||||
numa_node_files = []
|
|
||||||
fn = "/sys/devices/system/node/"
|
|
||||||
files = os.listdir(fn)
|
|
||||||
for file in files:
|
|
||||||
if 'node' in file:
|
|
||||||
numa_node_files.append(fn + file + '/meminfo')
|
|
||||||
|
|
||||||
for numa_node in numa_node_files:
|
|
||||||
meminfo = {}
|
|
||||||
try:
|
|
||||||
with open(numa_node) as fd:
|
|
||||||
for line in fd:
|
|
||||||
meminfo[line.split()[2][0:-1]] = line.split()[3].strip()
|
|
||||||
|
|
||||||
obj.memFree_kB = float(meminfo['MemFree'])
|
|
||||||
obj.FilePages = float(meminfo['FilePages'])
|
|
||||||
obj.SReclaimable = float(meminfo['SReclaimable'])
|
|
||||||
obj.AnonPages = float(meminfo['AnonPages'])
|
|
||||||
obj.HugePages_Total = float(meminfo['HugePages_Total'])
|
|
||||||
obj.HugePages_Free = float(meminfo['HugePages_Free'])
|
|
||||||
|
|
||||||
log_meminfo(PLUGIN, numa_node, obj)
|
|
||||||
|
|
||||||
avail = float(float(obj.memFree_kB) +
|
|
||||||
float(obj.FilePages) +
|
|
||||||
float(obj.SReclaimable))
|
|
||||||
total = float(float(avail) +
|
|
||||||
float(obj.AnonPages))
|
|
||||||
obj.value = float(float(obj.AnonPages)) / float(total)
|
|
||||||
obj.value = float(float(obj.value) * 100)
|
|
||||||
|
|
||||||
# if fit_value != 0:
|
|
||||||
# obj.value = fit_value
|
|
||||||
|
|
||||||
# Dispatch usage value to collectd for this numa node
|
|
||||||
val.plugin_instance = numa_node.split('/')[5]
|
|
||||||
val.dispatch(values=[obj.value])
|
|
||||||
|
|
||||||
collectd.debug('%s reports %s at %.2f %% usage (%s)' %
|
|
||||||
(PLUGIN_NUMA,
|
|
||||||
val.plugin,
|
|
||||||
obj.value,
|
|
||||||
val.plugin_instance))
|
|
||||||
|
|
||||||
# Numa Node Huge Page Memory Monitoring
|
|
||||||
#
|
|
||||||
# Only monitor if there is Huge Page Memory
|
|
||||||
if obj.HugePages_Total > 0:
|
|
||||||
obj.value = \
|
|
||||||
float(float(obj.HugePages_Total -
|
|
||||||
obj.HugePages_Free)) / \
|
|
||||||
float(obj.HugePages_Total)
|
|
||||||
obj.value = float(float(obj.value) * 100)
|
|
||||||
|
|
||||||
# if fit_value != 0:
|
|
||||||
# obj.value = fit_value
|
|
||||||
|
|
||||||
# Dispatch huge page memory usage value
|
|
||||||
# to collectd for this numa node.
|
|
||||||
val.plugin_instance = numa_node.split('/')[5] + '_hugepages'
|
|
||||||
val.dispatch(values=[obj.value])
|
|
||||||
|
|
||||||
collectd.debug('%s reports %s at %.2f %% usage (%s)' %
|
|
||||||
(PLUGIN_HUGE,
|
|
||||||
val.plugin,
|
|
||||||
obj.value,
|
|
||||||
val.plugin_instance))
|
|
||||||
|
|
||||||
except EnvironmentError as e:
|
|
||||||
collectd.error("%s unable to read from %s ; str(e)" %
|
|
||||||
(PLUGIN_NUMA, str(e)))
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
collectd.register_config(config_func)
|
|
||||||
collectd.register_init(init_func)
|
|
||||||
collectd.register_read(read_func)
|
|
@ -1,380 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2018-2019 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
#############################################################################
|
|
||||||
#
|
|
||||||
# This file is the collectd 'Maintenance' Notifier.
|
|
||||||
#
|
|
||||||
# Collects provides information about each event as an object passed to the
|
|
||||||
# notification handler ; the notification object.
|
|
||||||
#
|
|
||||||
# object.host - the hostname
|
|
||||||
#
|
|
||||||
# object.plugin - the name of the plugin aka resource
|
|
||||||
# object.plugin_instance - plugin instance string i.e. say mountpoint
|
|
||||||
# for df plugin
|
|
||||||
# object.type, - the unit i.e. percent or absolute
|
|
||||||
# object.type_instance - the attribute i.e. free, used, etc
|
|
||||||
#
|
|
||||||
# object.severity - a integer value 0=OK , 1=warning, 2=failure
|
|
||||||
# object.message - a log-able message containing the above along
|
|
||||||
# with the value
|
|
||||||
#
|
|
||||||
# This notifier manages requesting mtce to assert or clear its collectd
|
|
||||||
# host-degrade-cause flag based on notification messages sent from collectd.
|
|
||||||
#
|
|
||||||
# Messages to maintenance are throttled ONE_EVERY while this state is the
|
|
||||||
# same as last state.
|
|
||||||
#
|
|
||||||
# Message is sent on every state change
|
|
||||||
# from clear to assert or
|
|
||||||
# from assert to clear
|
|
||||||
#
|
|
||||||
# See code comments for details.
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
#
|
|
||||||
# Import list
|
|
||||||
|
|
||||||
import os
|
|
||||||
import socket
|
|
||||||
import collectd
|
|
||||||
import tsconfig.tsconfig as tsc
|
|
||||||
|
|
||||||
# This plugin name
|
|
||||||
PLUGIN = 'degrade notifier'
|
|
||||||
|
|
||||||
# collectd severity definitions ;
|
|
||||||
# Note: can't seem to pull then in symbolically with a header
|
|
||||||
NOTIF_FAILURE = 1
|
|
||||||
NOTIF_WARNING = 2
|
|
||||||
NOTIF_OKAY = 4
|
|
||||||
|
|
||||||
# default mtce port.
|
|
||||||
# ... with configuration override
|
|
||||||
MTCE_CMD_RX_PORT = 2101
|
|
||||||
|
|
||||||
# same state message throttle count.
|
|
||||||
# ... only send the degrade message every 'this' number
|
|
||||||
# while the state of assert or clear remains the same.
|
|
||||||
ONE_EVERY = 10
|
|
||||||
|
|
||||||
PLUGIN__DF = 'df'
|
|
||||||
PLUGIN__MEM = 'memory'
|
|
||||||
PLUGIN__CPU = 'cpu'
|
|
||||||
|
|
||||||
PLUGIN__VSWITCH_MEM = 'vswitch_mem'
|
|
||||||
PLUGIN__VSWITCH_CPU = 'vswitch_cpu'
|
|
||||||
PLUGIN__VSWITCH_PORT = "vswitch_port"
|
|
||||||
PLUGIN__VSWITCH_IFACE = "vswitch_iface"
|
|
||||||
|
|
||||||
|
|
||||||
PLUGIN_INTERFACE = 'interface'
|
|
||||||
PLUGIN__EXAMPLE = 'example'
|
|
||||||
|
|
||||||
|
|
||||||
# The collectd Maintenance Notifier Object
|
|
||||||
class collectdMtceNotifierObject:
|
|
||||||
|
|
||||||
def __init__(self, port):
|
|
||||||
"""collectdMtceNotifierObject Class constructor"""
|
|
||||||
# default maintenance port
|
|
||||||
self.port = port
|
|
||||||
self.addr = None
|
|
||||||
|
|
||||||
# specifies the protocol family to use when messaging maintenance.
|
|
||||||
# if system is IPV6, then that is learned and this 'protocol' is
|
|
||||||
# updated with AF_INET6
|
|
||||||
self.protocol = socket.AF_INET
|
|
||||||
|
|
||||||
# List of plugin names that require degrade for specified severity.
|
|
||||||
self.degrade_list__failure = [PLUGIN__DF,
|
|
||||||
PLUGIN__MEM,
|
|
||||||
PLUGIN__CPU,
|
|
||||||
PLUGIN__VSWITCH_MEM,
|
|
||||||
PLUGIN__VSWITCH_CPU,
|
|
||||||
PLUGIN__VSWITCH_PORT,
|
|
||||||
PLUGIN__VSWITCH_IFACE,
|
|
||||||
PLUGIN_INTERFACE,
|
|
||||||
PLUGIN__EXAMPLE]
|
|
||||||
self.degrade_list__warning = [PLUGIN_INTERFACE]
|
|
||||||
|
|
||||||
# the running list of resources that require degrade.
|
|
||||||
# a degrade clear message is sent whenever this list is empty.
|
|
||||||
# a degrade assert message is sent whenever this list is not empty.
|
|
||||||
self.degrade_list = []
|
|
||||||
|
|
||||||
# throttle down sending of duplicate degrade assert/clear messages
|
|
||||||
self.last_state = "undef"
|
|
||||||
self.msg_throttle = 0
|
|
||||||
|
|
||||||
|
|
||||||
# Instantiate the mtce_notifier object
|
|
||||||
# This object persists from notificaiton to notification
|
|
||||||
obj = collectdMtceNotifierObject(MTCE_CMD_RX_PORT)
|
|
||||||
|
|
||||||
|
|
||||||
def _get_active_controller_ip():
|
|
||||||
"""Get the active controller host IP"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
obj.addr = socket.getaddrinfo('controller', None)[0][4][0]
|
|
||||||
collectd.info("%s controller ip: %s" % (PLUGIN, obj.addr))
|
|
||||||
except Exception as ex:
|
|
||||||
obj.addr = None
|
|
||||||
collectd.error("%s failed to get controller ip ; %s" %
|
|
||||||
(PLUGIN, str(ex)))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
def _df_instance_to_path(df_inst):
|
|
||||||
"""Convert a df instance name to a mountpoint"""
|
|
||||||
|
|
||||||
# df_root is not a dynamic file system. Ignore that one.
|
|
||||||
if df_inst == 'df_root':
|
|
||||||
return '/'
|
|
||||||
else:
|
|
||||||
# For all others replace all '-' with '/'
|
|
||||||
return('/' + df_inst[3:].replace('-', '/'))
|
|
||||||
|
|
||||||
|
|
||||||
# This function removes degraded file systems that are no longer present.
|
|
||||||
def _clear_degrade_for_missing_filesystems():
|
|
||||||
"""Remove degraded file systems that are no longer mounted or present"""
|
|
||||||
|
|
||||||
for df_inst in obj.degrade_list:
|
|
||||||
|
|
||||||
# Only file system plugins are looked at.
|
|
||||||
# File system plugin instance names are prefixed with 'df_'
|
|
||||||
# as the first 3 chars in the instance name.
|
|
||||||
if df_inst[0:3] == 'df_':
|
|
||||||
path = _df_instance_to_path(df_inst)
|
|
||||||
|
|
||||||
# check the mount point.
|
|
||||||
# if the mount point no longer exists then remove
|
|
||||||
# this instance from the degrade list.
|
|
||||||
if os.path.ismount(path) is False:
|
|
||||||
collectd.info("%s clearing degrade for missing %s ; %s" %
|
|
||||||
(PLUGIN, path, obj.degrade_list))
|
|
||||||
obj.degrade_list.remove(df_inst)
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
# The collectd configuration interface
|
|
||||||
#
|
|
||||||
# Used to configure the maintenance port.
|
|
||||||
# key = 'port'
|
|
||||||
# val = port number
|
|
||||||
#
|
|
||||||
def config_func(config):
|
|
||||||
"""Configure the maintenance degrade notifier plugin"""
|
|
||||||
|
|
||||||
collectd.debug('%s config function' % PLUGIN)
|
|
||||||
for node in config.children:
|
|
||||||
key = node.key.lower()
|
|
||||||
val = node.values[0]
|
|
||||||
|
|
||||||
if key == 'port':
|
|
||||||
obj.port = int(val)
|
|
||||||
collectd.info("%s configured mtce port: %d" %
|
|
||||||
(PLUGIN, obj.port))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
obj.port = MTCE_CMD_RX_PORT
|
|
||||||
collectd.error("%s no mtce port provided ; defaulting to %d" %
|
|
||||||
(PLUGIN, obj.port))
|
|
||||||
|
|
||||||
|
|
||||||
# Collectd calls this function on startup.
|
|
||||||
def init_func():
|
|
||||||
"""Collectd Mtce Notifier Initialization Function"""
|
|
||||||
|
|
||||||
obj.host = os.uname()[1]
|
|
||||||
collectd.info("%s %s:%s sending to mtce port %d" %
|
|
||||||
(PLUGIN, tsc.nodetype, obj.host, obj.port))
|
|
||||||
|
|
||||||
collectd.debug("%s init function" % PLUGIN)
|
|
||||||
|
|
||||||
|
|
||||||
# This is the Notifier function that is called by collectd.
|
|
||||||
#
|
|
||||||
# Handling steps are
|
|
||||||
#
|
|
||||||
# 1. build resource name from notification object.
|
|
||||||
# 2. check resource against severity lists.
|
|
||||||
# 3. manage this instance's degrade state.
|
|
||||||
# 4. send mtcAgent the degrade state message.
|
|
||||||
#
|
|
||||||
def notifier_func(nObject):
|
|
||||||
"""Collectd Mtce Notifier Handler Function"""
|
|
||||||
|
|
||||||
# Create the resource name from the notifier object.
|
|
||||||
# format: <plugin name>_<plugin_instance_name>
|
|
||||||
resource = nObject.plugin
|
|
||||||
if nObject.plugin_instance:
|
|
||||||
resource += "_" + nObject.plugin_instance
|
|
||||||
|
|
||||||
# This block looks at the current notification severity
|
|
||||||
# and manages the degrade_list.
|
|
||||||
# If the specified plugin name exists in each of the warnings
|
|
||||||
# or failure lists and there is a current severity match then
|
|
||||||
# add that resource instance to the degrade list.
|
|
||||||
# Conversly if this notification is OKAY then make sure this
|
|
||||||
# resource instance is not in the degrade list (remove it if it is)
|
|
||||||
if nObject.severity is NOTIF_OKAY:
|
|
||||||
if obj.degrade_list and resource in obj.degrade_list:
|
|
||||||
obj.degrade_list.remove(resource)
|
|
||||||
|
|
||||||
elif nObject.severity is NOTIF_FAILURE:
|
|
||||||
if obj.degrade_list__failure:
|
|
||||||
if nObject.plugin in obj.degrade_list__failure:
|
|
||||||
if resource not in obj.degrade_list:
|
|
||||||
# handle dynamic filesystems going missing over a swact
|
|
||||||
# or unmount and being reported as a transient error by
|
|
||||||
# the df plugin. Don't add it to the failed list if the
|
|
||||||
# mountpoint is gone.
|
|
||||||
add = True
|
|
||||||
if nObject.plugin == PLUGIN__DF:
|
|
||||||
path = _df_instance_to_path(resource)
|
|
||||||
add = os.path.ismount(path)
|
|
||||||
if add is True:
|
|
||||||
collectd.info("%s %s added to degrade list" %
|
|
||||||
(PLUGIN, resource))
|
|
||||||
obj.degrade_list.append(resource)
|
|
||||||
else:
|
|
||||||
# If severity is failure and no failures cause degrade
|
|
||||||
# then make sure this plugin is not in the degrade list,
|
|
||||||
# Should never occur.
|
|
||||||
if resource in obj.degrade_list:
|
|
||||||
obj.degrade_list.remove(resource)
|
|
||||||
|
|
||||||
elif nObject.severity is NOTIF_WARNING:
|
|
||||||
if obj.degrade_list__warning:
|
|
||||||
if nObject.plugin in obj.degrade_list__warning:
|
|
||||||
if resource not in obj.degrade_list:
|
|
||||||
# handle dynamic filesystems going missing over a swact
|
|
||||||
# or unmount and being reported as a transient error by
|
|
||||||
# the df plugin. Don't add it to the failed list if the
|
|
||||||
# mountpoint is gone.
|
|
||||||
add = True
|
|
||||||
if nObject.plugin == PLUGIN__DF:
|
|
||||||
path = _df_instance_to_path(resource)
|
|
||||||
add = os.path.ismount(path)
|
|
||||||
if add is True:
|
|
||||||
collectd.info("%s %s added to degrade list" %
|
|
||||||
(PLUGIN, resource))
|
|
||||||
obj.degrade_list.append(resource)
|
|
||||||
else:
|
|
||||||
# If severity is warning and no warnings cause degrade
|
|
||||||
# then make sure this plugin is not in the degrade list.
|
|
||||||
# Should never occur..
|
|
||||||
if resource in obj.degrade_list:
|
|
||||||
obj.degrade_list.remove(resource)
|
|
||||||
else:
|
|
||||||
collectd.info("%s unsupported severity %d" %
|
|
||||||
(PLUGIN, nObject.severity))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# running counter of notifications.
|
|
||||||
obj.msg_throttle += 1
|
|
||||||
|
|
||||||
# Support for Dynamic File Systems
|
|
||||||
# --------------------------------
|
|
||||||
# Some active controller mounted filesystems can become
|
|
||||||
# unmounted under the watch of collectd. This can occur
|
|
||||||
# as a result of a Swact. If an 'degrade' is raised at the
|
|
||||||
# time an fs disappears then that state can become stuck
|
|
||||||
# active until the next Swact. This call handles this case.
|
|
||||||
#
|
|
||||||
# Audit file system presence every time we get the
|
|
||||||
# notification for the root file system.
|
|
||||||
# Depending on the root filesystem always being there.
|
|
||||||
if nObject.plugin == 'df' \
|
|
||||||
and nObject.plugin_instance == 'root' \
|
|
||||||
and len(obj.degrade_list):
|
|
||||||
_clear_degrade_for_missing_filesystems()
|
|
||||||
|
|
||||||
# If degrade list is empty then a clear state is sent to maintenance.
|
|
||||||
# If degrade list is NOT empty then an assert state is sent to maintenance
|
|
||||||
# For logging and to ease debug the code below will create a list of
|
|
||||||
# degraded resource instances to be included in the message to maintenance
|
|
||||||
# for mtcAgent to optionally log it.
|
|
||||||
resources = ""
|
|
||||||
if obj.degrade_list:
|
|
||||||
# loop over the list,
|
|
||||||
# limit the degraded resource list being sent to mtce to 5
|
|
||||||
for r in obj.degrade_list[0:1:5]:
|
|
||||||
resources += r + ','
|
|
||||||
resources = resources[:-1]
|
|
||||||
state = "assert"
|
|
||||||
else:
|
|
||||||
state = "clear"
|
|
||||||
|
|
||||||
# Message throttling ....
|
|
||||||
|
|
||||||
# Avoid sending the same last state message for up to ONE_EVERY count.
|
|
||||||
# Just reduce load on mtcAgent
|
|
||||||
if obj.last_state == state and obj.msg_throttle < ONE_EVERY:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# if the degrade state has changed then log it and proceed
|
|
||||||
if obj.last_state != state:
|
|
||||||
if obj.last_state != "undef":
|
|
||||||
collectd.info("%s degrade %s %s" %
|
|
||||||
(PLUGIN,
|
|
||||||
state,
|
|
||||||
obj.degrade_list))
|
|
||||||
|
|
||||||
# Save state for next time
|
|
||||||
obj.last_state = state
|
|
||||||
|
|
||||||
# Clear the message throttle counter
|
|
||||||
obj.msg_throttle = 0
|
|
||||||
|
|
||||||
# Send the degrade state ; assert or clear message to mtcAgent.
|
|
||||||
# If we get a send failure then log it and set the addr to None
|
|
||||||
# so it forces us to refresh the controller address on the next
|
|
||||||
# notification
|
|
||||||
try:
|
|
||||||
mtce_socket = socket.socket(obj.protocol, socket.SOCK_DGRAM)
|
|
||||||
if mtce_socket:
|
|
||||||
if obj.addr is None:
|
|
||||||
_get_active_controller_ip()
|
|
||||||
if obj.addr is None:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Create the Maintenance message.
|
|
||||||
message = "{\"service\":\"collectd_notifier\","
|
|
||||||
message += "\"hostname\":\"" + nObject.host + "\","
|
|
||||||
message += "\"degrade\":\"" + state + "\","
|
|
||||||
message += "\"resource\":\"" + resources + "\"}"
|
|
||||||
collectd.debug("%s: %s" % (PLUGIN, message))
|
|
||||||
|
|
||||||
mtce_socket.settimeout(1.0)
|
|
||||||
mtce_socket.sendto(message, (obj.addr, obj.port))
|
|
||||||
mtce_socket.close()
|
|
||||||
else:
|
|
||||||
collectd.error("%s %s failed to open socket (%s)" %
|
|
||||||
(PLUGIN, resource, obj.addr))
|
|
||||||
except socket.error as e:
|
|
||||||
if e.args[0] == socket.EAI_ADDRFAMILY:
|
|
||||||
# Handle IPV4 to IPV6 switchover:
|
|
||||||
obj.protocol = socket.AF_INET6
|
|
||||||
collectd.info("%s %s ipv6 addressing (%s)" %
|
|
||||||
(PLUGIN, resource, obj.addr))
|
|
||||||
else:
|
|
||||||
collectd.error("%s %s socket error (%s) ; %s" %
|
|
||||||
(PLUGIN, resource, obj.addr, str(e)))
|
|
||||||
# try self correction
|
|
||||||
obj.addr = None
|
|
||||||
obj.protocol = socket.AF_INET
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
collectd.register_config(config_func)
|
|
||||||
collectd.register_init(init_func)
|
|
||||||
collectd.register_notification(notifier_func)
|
|
@ -1,13 +0,0 @@
|
|||||||
<Plugin "threshold">
|
|
||||||
<Plugin "ntpq">
|
|
||||||
<Type "absolute">
|
|
||||||
Instance "reachable"
|
|
||||||
Persist true
|
|
||||||
PersistOK true
|
|
||||||
WarningMin 1
|
|
||||||
FailureMin 0
|
|
||||||
Hits 2
|
|
||||||
Invert false
|
|
||||||
</Type>
|
|
||||||
</Plugin>
|
|
||||||
</Plugin>
|
|
@ -1,857 +0,0 @@
|
|||||||
############################################################################
|
|
||||||
# Copyright (c) 2018-2019 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
#############################################################################
|
|
||||||
#
|
|
||||||
# This is the NTP connectivity monitor plugin for collectd.
|
|
||||||
#
|
|
||||||
# This plugin uses the industry standard ntpq exec to query NTP attributes.
|
|
||||||
#
|
|
||||||
# This plugin executes 'ntpq -np' to determined which provisioned servers
|
|
||||||
# are reachable. The ntpq output includes Tally Code. The tally Code is
|
|
||||||
# represented by the first character in each server's line item.
|
|
||||||
#
|
|
||||||
# The only ntpq output looked at by this plugin are the Tally Codes and
|
|
||||||
# associated IPs.
|
|
||||||
#
|
|
||||||
# Tally Code Summary:
|
|
||||||
#
|
|
||||||
# A server is considered reachable only when the Tally Code is a * or a +.
|
|
||||||
# A server is considered unreachable if the Tally Code is a ' ' (space)
|
|
||||||
# A server with a '*' Tally Code is the 'selected' server.
|
|
||||||
#
|
|
||||||
# Here is an example of the ntpq command output
|
|
||||||
#
|
|
||||||
# remote refid st t when poll reach delay offset jitter
|
|
||||||
# =============================================================================
|
|
||||||
# +192.168.204.104 206.108.0.133 2 u 203 1024 377 0.226 -3.443 1.137
|
|
||||||
# +97.107.129.217 200.98.196.212 2 u 904 1024 377 21.677 5.577 0.624
|
|
||||||
# 192.95.27.155 24.150.203.150 2 u 226 1024 377 15.867 0.381 1.124
|
|
||||||
# -97.107.129.217 200.98.196.212 2 u 904 1024 377 21.677 5.577 0.624
|
|
||||||
# *182.95.27.155 24.150.203.150 2 u 226 1024 377 15.867 0.381 1.124
|
|
||||||
#
|
|
||||||
# The local controller node is not to be considered a reachable server and is
|
|
||||||
# never alarmed if it is not reachable.
|
|
||||||
#
|
|
||||||
# Normal running modes with no alarms include
|
|
||||||
#
|
|
||||||
# 0 - All NTP servers are reachable and one is selected
|
|
||||||
# 1 - No NTP servers are provisioned
|
|
||||||
#
|
|
||||||
# Failure modes that warrant alarms include
|
|
||||||
#
|
|
||||||
# 2 - None of the NTP servers are reachable - major alarm
|
|
||||||
# 3 - Some NTP servers reachable and one is selected - server IP minor alarm
|
|
||||||
# 4 - Some NTP servers reachable but none is selected - major alarm
|
|
||||||
#
|
|
||||||
# None of these failures result in a host being degraded.
|
|
||||||
#
|
|
||||||
# This script will only be run on the controller nodes.
|
|
||||||
#
|
|
||||||
# This script logs to daemon.log with the 'collectd' process label
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import uuid
|
|
||||||
import collectd
|
|
||||||
from fm_api import constants as fm_constants
|
|
||||||
from fm_api import fm_api
|
|
||||||
import tsconfig.tsconfig as tsc
|
|
||||||
import socket
|
|
||||||
|
|
||||||
api = fm_api.FaultAPIsV2()
|
|
||||||
|
|
||||||
PLUGIN = 'NTP query plugin'
|
|
||||||
PLUGIN_INTERVAL = 600 # audit interval in secs
|
|
||||||
PLUGIN_CONF = '/etc/ntp.conf'
|
|
||||||
PLUGIN_EXEC = '/usr/sbin/ntpq'
|
|
||||||
PLUGIN_EXEC_OPTIONS = '-pn'
|
|
||||||
PLUGIN_ALARMID = "100.114"
|
|
||||||
|
|
||||||
|
|
||||||
# define a class here that will persist over read calls
|
|
||||||
class NtpqObject:
|
|
||||||
|
|
||||||
# static variables set in init
|
|
||||||
hostname = '' # the name of this host
|
|
||||||
base_eid = '' # the eid for the major alarm
|
|
||||||
init_complete = False # set to true once config is complete
|
|
||||||
alarm_raised = False # True when the major alarm is asserted
|
|
||||||
|
|
||||||
server_list_conf = [] # list of servers in the /etc/ntp.conf file
|
|
||||||
server_list_ntpq = [] # list of servers in the ntpq -np output
|
|
||||||
unreachable_servers = [] # list of unreachable servers
|
|
||||||
reachable_servers = [] # list of reachable servers
|
|
||||||
selected_server = 'None' # the ip address of the selected server
|
|
||||||
selected_server_save = 'None' # the last selected server ; note change
|
|
||||||
peer_selected = False # true when peer is selected
|
|
||||||
|
|
||||||
# variables used to raise alarms to FM
|
|
||||||
suppression = True
|
|
||||||
service_affecting = False
|
|
||||||
name = "NTP"
|
|
||||||
alarm_type = fm_constants.FM_ALARM_TYPE_1
|
|
||||||
cause = fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN
|
|
||||||
repair = "Monitor and if condition persists, "
|
|
||||||
repair += "contact next level of support."
|
|
||||||
|
|
||||||
|
|
||||||
# This plugin's class object - persists over read calls
|
|
||||||
obj = NtpqObject()
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : _add_unreachable_server
|
|
||||||
#
|
|
||||||
# Description: This private interface is used to add an ip to the
|
|
||||||
# unreachable servers list.
|
|
||||||
#
|
|
||||||
# Parameters : IP address
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def _add_unreachable_server(ip=None):
|
|
||||||
"""Add ip to unreachable_servers list"""
|
|
||||||
|
|
||||||
if ip:
|
|
||||||
if ip not in obj.unreachable_servers:
|
|
||||||
collectd.debug("%s adding '%s' to unreachable servers list: %s" %
|
|
||||||
(PLUGIN, ip, obj.unreachable_servers))
|
|
||||||
|
|
||||||
obj.unreachable_servers.append(ip)
|
|
||||||
|
|
||||||
collectd.info("%s added '%s' to unreachable servers list: %s" %
|
|
||||||
(PLUGIN, ip, obj.unreachable_servers))
|
|
||||||
else:
|
|
||||||
collectd.debug("%s ip '%s' already in unreachable_servers list" %
|
|
||||||
(PLUGIN, ip))
|
|
||||||
else:
|
|
||||||
collectd.error("%s _add_unreachable_server called with no IP" % PLUGIN)
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : _raise_alarm
|
|
||||||
#
|
|
||||||
# Description: This private interface is used to raise NTP alarms.
|
|
||||||
#
|
|
||||||
# Parameters : Optional IP address
|
|
||||||
#
|
|
||||||
# If called with no or empty IP then a generic major alarm is raised.
|
|
||||||
# If called with an IP then an IP specific minor alarm is raised.
|
|
||||||
#
|
|
||||||
# Returns : Error indication.
|
|
||||||
#
|
|
||||||
# True : is error. FM call failed to set the
|
|
||||||
# alarm and needs to be retried.
|
|
||||||
#
|
|
||||||
# False: no error. FM call succeeds
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def _raise_alarm(ip=None):
|
|
||||||
"""Assert an NTP alarm"""
|
|
||||||
|
|
||||||
if not ip:
|
|
||||||
# Don't re-raise the alarm if its already raised
|
|
||||||
if obj.alarm_raised is True:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if obj.peer_selected:
|
|
||||||
reason = "NTP cannot reach external time source; " \
|
|
||||||
"syncing with peer controller only"
|
|
||||||
fm_severity = fm_constants.FM_ALARM_SEVERITY_MINOR
|
|
||||||
else:
|
|
||||||
reason = "NTP configuration does not contain any valid "
|
|
||||||
reason += "or reachable NTP servers."
|
|
||||||
fm_severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
|
||||||
|
|
||||||
eid = obj.base_eid
|
|
||||||
|
|
||||||
else:
|
|
||||||
reason = "NTP address "
|
|
||||||
reason += ip
|
|
||||||
reason += " is not a valid or a reachable NTP server."
|
|
||||||
eid = obj.base_eid + '=' + ip
|
|
||||||
fm_severity = fm_constants.FM_ALARM_SEVERITY_MINOR
|
|
||||||
|
|
||||||
try:
|
|
||||||
fault = fm_api.Fault(
|
|
||||||
alarm_id=PLUGIN_ALARMID,
|
|
||||||
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
|
||||||
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
|
||||||
entity_instance_id=eid,
|
|
||||||
severity=fm_severity,
|
|
||||||
reason_text=reason,
|
|
||||||
alarm_type=obj.alarm_type,
|
|
||||||
probable_cause=obj.cause,
|
|
||||||
proposed_repair_action=obj.repair,
|
|
||||||
service_affecting=obj.service_affecting,
|
|
||||||
suppression=obj.suppression)
|
|
||||||
|
|
||||||
alarm_uuid = api.set_fault(fault)
|
|
||||||
if _is_uuid_like(alarm_uuid) is False:
|
|
||||||
|
|
||||||
# Don't _add_unreachable_server list if the fm call failed.
|
|
||||||
# That way it will be retried at a later time.
|
|
||||||
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, eid, alarm_uuid))
|
|
||||||
return 0
|
|
||||||
else:
|
|
||||||
collectd.info("%s raised alarm %s:%s" %
|
|
||||||
(PLUGIN,
|
|
||||||
PLUGIN_ALARMID,
|
|
||||||
eid))
|
|
||||||
if ip:
|
|
||||||
_add_unreachable_server(ip)
|
|
||||||
else:
|
|
||||||
obj.alarm_raised = True
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'set_fault' exception ; %s:%s:%s ; %s" %
|
|
||||||
(PLUGIN,
|
|
||||||
PLUGIN_ALARMID,
|
|
||||||
eid,
|
|
||||||
fm_severity,
|
|
||||||
ex))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : _clear_base_alarm
|
|
||||||
#
|
|
||||||
# Description: This private interface is used to clear the NTP base alarm.
|
|
||||||
#
|
|
||||||
# Parameters : None
|
|
||||||
#
|
|
||||||
# Returns : Error indication.
|
|
||||||
#
|
|
||||||
# False: is error. FM call failed to clear the
|
|
||||||
# alarm and needs to be retried.
|
|
||||||
#
|
|
||||||
# True : no error. FM call succeeds
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def _clear_base_alarm():
|
|
||||||
"""Clear the NTP base alarm"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is False:
|
|
||||||
collectd.info("%s %s:%s alarm already cleared" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
|
|
||||||
else:
|
|
||||||
collectd.info("%s %s:%s alarm cleared" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
|
|
||||||
obj.alarm_raised = False
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
|
|
||||||
(PLUGIN,
|
|
||||||
PLUGIN_ALARMID,
|
|
||||||
obj.base_eid,
|
|
||||||
ex))
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : _remove_ip_from_unreachable_list
|
|
||||||
#
|
|
||||||
# Description: This private interface is used to remove the specified IP
|
|
||||||
# from the unreachable servers list and clear its alarm if raised.
|
|
||||||
#
|
|
||||||
# Parameters : IP address
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def _remove_ip_from_unreachable_list(ip):
|
|
||||||
"""Remove an IP address from the unreachable list and clear its NTP alarms"""
|
|
||||||
|
|
||||||
# remove from unreachable list if its there
|
|
||||||
if ip and ip in obj.unreachable_servers:
|
|
||||||
|
|
||||||
eid = obj.base_eid + '=' + ip
|
|
||||||
collectd.debug("%s trying to clear alarm %s" % (PLUGIN, eid))
|
|
||||||
|
|
||||||
try:
|
|
||||||
# clear the alarm if its asserted
|
|
||||||
if api.clear_fault(PLUGIN_ALARMID, eid) is True:
|
|
||||||
collectd.info("%s %s:%s alarm cleared " %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, eid))
|
|
||||||
else:
|
|
||||||
# alarm does not exist
|
|
||||||
collectd.info("%s %s:%s alarm clear" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, eid))
|
|
||||||
|
|
||||||
obj.unreachable_servers.remove(ip)
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
|
|
||||||
(PLUGIN,
|
|
||||||
PLUGIN_ALARMID,
|
|
||||||
eid,
|
|
||||||
ex))
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : _add_ip_to_ntpq_server_list
|
|
||||||
#
|
|
||||||
# Description: This private interface is used to create a list if servers
|
|
||||||
# found in the ntpq output.
|
|
||||||
#
|
|
||||||
# This list is used to detect and handle servers that might come
|
|
||||||
# and go between readings that might otherwise result in stuck
|
|
||||||
# alarms.
|
|
||||||
#
|
|
||||||
# Parameters : IP address
|
|
||||||
#
|
|
||||||
# Returns : nothing
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def _add_ip_to_ntpq_server_list(ip):
|
|
||||||
"""Add this IP to the list of servers that ntpq reports against"""
|
|
||||||
|
|
||||||
if ip not in obj.server_list_ntpq:
|
|
||||||
obj.server_list_ntpq.append(ip)
|
|
||||||
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
#
|
|
||||||
# Name : _cleanup_stale_servers
|
|
||||||
#
|
|
||||||
# Description: This private interface walks through each server tracking list
|
|
||||||
# removing any that it finds that are not in the ntpq server list.
|
|
||||||
#
|
|
||||||
# Alarms are cleared as needed to avoid stale alarms
|
|
||||||
#
|
|
||||||
# Parameters : None
|
|
||||||
#
|
|
||||||
# Returns : nothing
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def _cleanup_stale_servers():
|
|
||||||
"""Cleanup the server IP tracking lists"""
|
|
||||||
|
|
||||||
collectd.debug("%s CLEANUP REACHABLE: %s %s" %
|
|
||||||
(PLUGIN, obj.server_list_ntpq, obj.reachable_servers))
|
|
||||||
for ip in obj.reachable_servers:
|
|
||||||
if ip not in obj.server_list_ntpq:
|
|
||||||
collectd.info("%s removing missing '%s' server from reachable "
|
|
||||||
"server list" % (PLUGIN, ip))
|
|
||||||
obj.reachable_servers.remove(ip)
|
|
||||||
|
|
||||||
collectd.debug("%s CLEANUP UNREACHABLE: %s %s" %
|
|
||||||
(PLUGIN, obj.server_list_ntpq, obj.unreachable_servers))
|
|
||||||
for ip in obj.unreachable_servers:
|
|
||||||
if ip not in obj.server_list_ntpq:
|
|
||||||
collectd.info("%s removing missing '%s' server from unreachable "
|
|
||||||
"server list" % (PLUGIN, ip))
|
|
||||||
_remove_ip_from_unreachable_list(ip)
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : _get_ntp_servers
|
|
||||||
#
|
|
||||||
# Description: This private interface reads the list of ntp servers from the
|
|
||||||
# ntp.conf file
|
|
||||||
#
|
|
||||||
# Parameters : None
|
|
||||||
#
|
|
||||||
# Returns : nothing
|
|
||||||
#
|
|
||||||
# Updates : server_list_conf
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def _get_ntp_servers():
|
|
||||||
"""Read the provisioned servers from the ntp conf file"""
|
|
||||||
|
|
||||||
with open(PLUGIN_CONF, 'r') as infile:
|
|
||||||
for line in infile:
|
|
||||||
if line.startswith('server '):
|
|
||||||
ip = line.rstrip().split(' ')[1]
|
|
||||||
if ip not in obj.server_list_conf:
|
|
||||||
obj.server_list_conf.append(ip)
|
|
||||||
if len(obj.server_list_conf):
|
|
||||||
collectd.info("%s server list: %s" %
|
|
||||||
(PLUGIN, obj.server_list_conf))
|
|
||||||
else:
|
|
||||||
##################################################################
|
|
||||||
#
|
|
||||||
# Handle NTP_NOT_PROVISIONED (1) case
|
|
||||||
#
|
|
||||||
# There is no alarming for this case.
|
|
||||||
# Clear any that may have been raised.
|
|
||||||
#
|
|
||||||
##################################################################
|
|
||||||
collectd.info("%s NTP Service Disabled ; no provisioned servers" %
|
|
||||||
PLUGIN)
|
|
||||||
|
|
||||||
# clear all alarms
|
|
||||||
if obj.alarm_raised:
|
|
||||||
_clear_base_alarm()
|
|
||||||
|
|
||||||
if obj.unreachable_servers:
|
|
||||||
for ip in obj.unreachable_servers:
|
|
||||||
_remove_ip_from_unreachable_list(ip)
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : is_controller
|
|
||||||
#
|
|
||||||
# Description: This private interface returns a True if the specified ip is
|
|
||||||
# associated with a local controller.
|
|
||||||
#
|
|
||||||
# Parameters : IP address
|
|
||||||
#
|
|
||||||
# Returns : True or False
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def _is_controller(ip):
|
|
||||||
"""Returns True if this IP corresponds to one of the controllers"""
|
|
||||||
|
|
||||||
collectd.debug("%s check if '%s' is a controller ip" % (PLUGIN, ip))
|
|
||||||
with open('/etc/hosts', 'r') as infile:
|
|
||||||
for line in infile:
|
|
||||||
# skip over file comment lines prefixed with '#'
|
|
||||||
if line[0] == '#':
|
|
||||||
continue
|
|
||||||
# line format is 'ip' 'name' ....
|
|
||||||
split_line = line.split()
|
|
||||||
if len(split_line) >= 2:
|
|
||||||
# look for exact match ip that contains controller in its name
|
|
||||||
if split_line[0] == ip and 'controller' in line:
|
|
||||||
collectd.debug("%s %s is a controller" % (PLUGIN, ip))
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : _is_ip_address
|
|
||||||
#
|
|
||||||
# Description: This private interface returns:
|
|
||||||
# AF_INET if val is ipv4
|
|
||||||
# AF_INET6 if val is ipv6
|
|
||||||
# False if val is not a valid ip address
|
|
||||||
#
|
|
||||||
# Parameters : val is a uuid string
|
|
||||||
#
|
|
||||||
# Returns : socket.AF_INET for ipv4, socket.AF_INET6 for ipv6
|
|
||||||
# or False for invalid
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def _is_ip_address(val):
|
|
||||||
try:
|
|
||||||
socket.inet_pton(socket.AF_INET, val)
|
|
||||||
return socket.AF_INET
|
|
||||||
except socket.error:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
socket.inet_pton(socket.AF_INET6, val)
|
|
||||||
return socket.AF_INET6
|
|
||||||
except socket.error:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : is_uuid_like
|
|
||||||
#
|
|
||||||
# Description: This private interface returns a True if the specified value is
|
|
||||||
# a valid uuid.
|
|
||||||
#
|
|
||||||
# Parameters : val is a uuid string
|
|
||||||
#
|
|
||||||
# Returns : True or False
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def _is_uuid_like(val):
|
|
||||||
"""Returns validation of a value as a UUID"""
|
|
||||||
try:
|
|
||||||
return str(uuid.UUID(val)) == val
|
|
||||||
except (TypeError, ValueError, AttributeError):
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : config_func
|
|
||||||
#
|
|
||||||
# Description: The configuration interface this plugin publishes to collectd.
|
|
||||||
#
|
|
||||||
# collectd calls this interface one time on its process startup
|
|
||||||
# when it loads this plugin.
|
|
||||||
#
|
|
||||||
# There is currently no specific configuration options to parse
|
|
||||||
# for this plugin.
|
|
||||||
#
|
|
||||||
# Parameters : collectd config object
|
|
||||||
#
|
|
||||||
# Returns : zero
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def config_func(config):
|
|
||||||
"""Configure the plugin"""
|
|
||||||
|
|
||||||
collectd.debug('%s config function' % PLUGIN)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : init_func
|
|
||||||
#
|
|
||||||
# Description: The initialization interface this plugin publishes to collectd.
|
|
||||||
#
|
|
||||||
# collectd calls this interface one time on its process startup
|
|
||||||
# when it loads this plugin.
|
|
||||||
#
|
|
||||||
# 1. get hostname
|
|
||||||
# 2. build base entity id for the NTP alarm
|
|
||||||
# 3. query FM for existing NTP alarms
|
|
||||||
# - base alarm is maintained and state loaded if it exists
|
|
||||||
# - ntp ip minor alalrms are cleared on init. This is done to
|
|
||||||
# auto correct ntp server IP address changes over process
|
|
||||||
# restart ; avoid stuck alarms.
|
|
||||||
#
|
|
||||||
# Parameters : None
|
|
||||||
#
|
|
||||||
# Returns : zero
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def init_func():
|
|
||||||
|
|
||||||
# ntp query is for controllers only
|
|
||||||
if tsc.nodetype != 'controller':
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# do nothing till config is complete.
|
|
||||||
# init_func will be called again by read_func once config is complete.
|
|
||||||
if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is False:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# get current hostname
|
|
||||||
obj.hostname = os.uname()[1]
|
|
||||||
if not obj.hostname:
|
|
||||||
collectd.error("%s failed to get hostname" % PLUGIN)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
obj.base_eid = 'host=' + obj.hostname + '.ntp'
|
|
||||||
collectd.debug("%s on %s with entity id '%s'" %
|
|
||||||
(PLUGIN, obj.hostname, obj.base_eid))
|
|
||||||
|
|
||||||
# get a list of provisioned ntp servers
|
|
||||||
_get_ntp_servers()
|
|
||||||
|
|
||||||
# manage existing alarms.
|
|
||||||
try:
|
|
||||||
alarms = api.get_faults_by_id(PLUGIN_ALARMID)
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'get_faults_by_id' exception ; %s ; %s" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, ex))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if alarms:
|
|
||||||
for alarm in alarms:
|
|
||||||
eid = alarm.entity_instance_id
|
|
||||||
# ignore alarms not for this host
|
|
||||||
if obj.hostname not in eid:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# maintain only the base alarm.
|
|
||||||
if alarm.entity_instance_id != obj.base_eid:
|
|
||||||
# clear any ntp server specific alarms over process restart
|
|
||||||
# this is done to avoid the potential for stuck ntp ip alarms
|
|
||||||
collectd.info("%s clearing found startup alarm '%s'" %
|
|
||||||
(PLUGIN, alarm.entity_instance_id))
|
|
||||||
try:
|
|
||||||
api.clear_fault(PLUGIN_ALARMID, alarm.entity_instance_id)
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
|
|
||||||
(PLUGIN,
|
|
||||||
PLUGIN_ALARMID,
|
|
||||||
alarm.entity_instance_id,
|
|
||||||
ex))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
else:
|
|
||||||
obj.alarm_raised = True
|
|
||||||
collectd.info("%s found alarm %s:%s" %
|
|
||||||
(PLUGIN,
|
|
||||||
PLUGIN_ALARMID,
|
|
||||||
alarm.entity_instance_id))
|
|
||||||
|
|
||||||
# ensure the base alarm is cleared if there are no
|
|
||||||
# provisioned servers.
|
|
||||||
if not obj.server_list_conf:
|
|
||||||
_clear_base_alarm()
|
|
||||||
|
|
||||||
else:
|
|
||||||
collectd.info("%s no major startup alarms found" % PLUGIN)
|
|
||||||
|
|
||||||
obj.init_complete = True
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Name : read_func
|
|
||||||
#
|
|
||||||
# Description: The sample read interface this plugin publishes to collectd.
|
|
||||||
#
|
|
||||||
# collectd calls this interface every audit interval.
|
|
||||||
#
|
|
||||||
# Runs ntpq -np to query NTP status and manages alarms based on
|
|
||||||
# the result.
|
|
||||||
#
|
|
||||||
# See file header (above) for more specific behavioral detail.
|
|
||||||
#
|
|
||||||
# Should only run on a controller ; both
|
|
||||||
#
|
|
||||||
# Parameters : None
|
|
||||||
#
|
|
||||||
# Returns : zero or non-zero on significant error
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
def read_func():
|
|
||||||
|
|
||||||
# ntp query is for controllers only
|
|
||||||
if tsc.nodetype != 'controller':
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if obj.init_complete is False:
|
|
||||||
if os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE) is True:
|
|
||||||
collectd.info("%s re-running init" % PLUGIN)
|
|
||||||
init_func()
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# get a list if provisioned ntp servers
|
|
||||||
_get_ntp_servers()
|
|
||||||
|
|
||||||
# nothing to do while there are no provisioned NTP servers
|
|
||||||
if len(obj.server_list_conf) == 0:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Do NTP Query
|
|
||||||
data = subprocess.check_output([PLUGIN_EXEC, PLUGIN_EXEC_OPTIONS])
|
|
||||||
|
|
||||||
# Keep this FIT test code but make it commented out for security
|
|
||||||
#
|
|
||||||
# if os.path.exists('/var/run/fit/ntpq_data'):
|
|
||||||
# data = ''
|
|
||||||
# collectd.info("%s using ntpq FIT data" % PLUGIN)
|
|
||||||
# with open('/var/run/fit/ntpq_data', 'r') as infile:
|
|
||||||
# for line in infile:
|
|
||||||
# data += line
|
|
||||||
|
|
||||||
if not data:
|
|
||||||
collectd.error("%s no data from query" % PLUGIN)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Get the ntp query output into a list of lines
|
|
||||||
obj.ntpq = data.split('\n')
|
|
||||||
|
|
||||||
# keep track of changes ; only log on changes
|
|
||||||
reachable_list_changed = False
|
|
||||||
unreachable_list_changed = False
|
|
||||||
|
|
||||||
# Manage the selected server name
|
|
||||||
#
|
|
||||||
# save the old value so we can print a log if the selected server changes
|
|
||||||
if obj.selected_server:
|
|
||||||
obj.selected_server_save = obj.selected_server
|
|
||||||
# always assume no selected server ; till its learned
|
|
||||||
obj.selected_server = ''
|
|
||||||
|
|
||||||
# start with a fresh empty list for this new run to populate
|
|
||||||
obj.server_list_ntpq = []
|
|
||||||
|
|
||||||
# Loop through the ntpq output.
|
|
||||||
# Ignore the first 2 lines ; just header data.
|
|
||||||
for i in range(2, len(obj.ntpq)):
|
|
||||||
|
|
||||||
# ignore empty or lines that are not long enough
|
|
||||||
if len(obj.ntpq[i]) < 10:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# log the ntpq output ; minus the 2 lines of header
|
|
||||||
collectd.info("NTPQ: %s" % obj.ntpq[i])
|
|
||||||
|
|
||||||
# Unreachable servers are ones whose line start with a space
|
|
||||||
ip = ''
|
|
||||||
if obj.ntpq[i][0] == ' ':
|
|
||||||
# get the ip address
|
|
||||||
# example format of line:['', '132.163.4.102', '', '', '.INIT.',
|
|
||||||
# get ip from index [1] of the list
|
|
||||||
unreachable = obj.ntpq[i].split(' ')[1]
|
|
||||||
if unreachable:
|
|
||||||
# check to see if its a controller ip
|
|
||||||
# we skip over controller ips
|
|
||||||
if _is_controller(unreachable) is False:
|
|
||||||
_add_ip_to_ntpq_server_list(unreachable)
|
|
||||||
if unreachable not in obj.unreachable_servers:
|
|
||||||
if _raise_alarm(unreachable) is False:
|
|
||||||
unreachable_list_changed = True
|
|
||||||
# if the FM call to raise the alarm worked then
|
|
||||||
# add this ip to the unreachable list if its not
|
|
||||||
# already in it
|
|
||||||
_add_unreachable_server(unreachable)
|
|
||||||
|
|
||||||
# Reachable servers are ones whose line start with a '+'
|
|
||||||
elif obj.ntpq[i][0] == '+':
|
|
||||||
# remove the '+' and get the ip
|
|
||||||
ip = obj.ntpq[i].split(' ')[0][1:]
|
|
||||||
|
|
||||||
elif obj.ntpq[i][0] == '*':
|
|
||||||
# remove the '*' and get the ip
|
|
||||||
cols = obj.ntpq[i].split(' ')
|
|
||||||
ip = cols[0][1:]
|
|
||||||
if ip:
|
|
||||||
ip_family = _is_ip_address(ip)
|
|
||||||
obj.peer_selected = _is_controller(ip)
|
|
||||||
if ip != obj.selected_server and obj.alarm_raised is True:
|
|
||||||
# a new ntp server is selected, old alarm may not be
|
|
||||||
# valid
|
|
||||||
_clear_base_alarm()
|
|
||||||
obj.alarm_raised = False
|
|
||||||
if obj.peer_selected is False:
|
|
||||||
if obj.selected_server:
|
|
||||||
# done update the selected server if more selections
|
|
||||||
# are found. go with the first one found.
|
|
||||||
collectd.info("%s additional selected server found"
|
|
||||||
" '%s'; current selection is '%s'" %
|
|
||||||
(PLUGIN, ip, obj.selected_server))
|
|
||||||
else:
|
|
||||||
# update the selected server list
|
|
||||||
obj.selected_server = ip
|
|
||||||
collectd.debug("%s selected server is '%s'" %
|
|
||||||
(PLUGIN, obj.selected_server))
|
|
||||||
else:
|
|
||||||
# refer to peer
|
|
||||||
refid = ''
|
|
||||||
for i in range(1, len(cols)):
|
|
||||||
if cols[i] != '':
|
|
||||||
refid = cols[i]
|
|
||||||
break
|
|
||||||
|
|
||||||
if refid not in ('', '127.0.0.1') and \
|
|
||||||
not _is_controller(refid) and \
|
|
||||||
socket.AF_INET == ip_family:
|
|
||||||
# ipv4, peer controller refer to a time source is not
|
|
||||||
# itself or a controller (this node)
|
|
||||||
obj.selected_server = ip
|
|
||||||
collectd.debug("peer controller has a reliable "
|
|
||||||
"source")
|
|
||||||
|
|
||||||
# anything else is unreachable
|
|
||||||
else:
|
|
||||||
unreachable = obj.ntpq[i][1:].split(' ')[0]
|
|
||||||
if _is_controller(unreachable) is False:
|
|
||||||
_add_ip_to_ntpq_server_list(unreachable)
|
|
||||||
if unreachable not in obj.unreachable_servers:
|
|
||||||
if _raise_alarm(unreachable) is False:
|
|
||||||
unreachable_list_changed = True
|
|
||||||
# if the FM call to raise the alarm worked then
|
|
||||||
# add this ip to the unreachable list if its not
|
|
||||||
# already in it
|
|
||||||
_add_unreachable_server(unreachable)
|
|
||||||
|
|
||||||
if ip:
|
|
||||||
# if the ip is valid then manage it
|
|
||||||
if _is_controller(ip) is False:
|
|
||||||
_add_ip_to_ntpq_server_list(ip)
|
|
||||||
# add the ip to the reachable servers list
|
|
||||||
# if its not already there
|
|
||||||
if ip not in obj.reachable_servers:
|
|
||||||
obj.reachable_servers.append(ip)
|
|
||||||
reachable_list_changed = True
|
|
||||||
# make sure this IP is no longer in the unreachable
|
|
||||||
# list and that alarms for it are cleared
|
|
||||||
_remove_ip_from_unreachable_list(ip)
|
|
||||||
|
|
||||||
_cleanup_stale_servers()
|
|
||||||
|
|
||||||
if obj.selected_server:
|
|
||||||
if obj.selected_server != obj.selected_server_save:
|
|
||||||
collectd.info("%s selected server changed from '%s' to '%s'" %
|
|
||||||
(PLUGIN,
|
|
||||||
obj.selected_server_save,
|
|
||||||
obj.selected_server))
|
|
||||||
obj.selected_server_save = obj.selected_server
|
|
||||||
if obj.alarm_raised is True:
|
|
||||||
_clear_base_alarm()
|
|
||||||
|
|
||||||
elif obj.alarm_raised is False:
|
|
||||||
if obj.peer_selected:
|
|
||||||
collectd.info("%s peer is selected" % PLUGIN)
|
|
||||||
else:
|
|
||||||
collectd.error("%s no selected server" % PLUGIN)
|
|
||||||
if _raise_alarm() is False:
|
|
||||||
obj.selected_server_save = 'None'
|
|
||||||
|
|
||||||
# only log and act on changes
|
|
||||||
if reachable_list_changed is True:
|
|
||||||
if obj.reachable_servers:
|
|
||||||
collectd.info("%s reachable servers: %s" %
|
|
||||||
(PLUGIN, obj.reachable_servers))
|
|
||||||
if obj.alarm_raised is True:
|
|
||||||
if obj.selected_server and obj.reachable_servers:
|
|
||||||
_clear_base_alarm()
|
|
||||||
else:
|
|
||||||
collectd.error("%s no reachable servers" % PLUGIN)
|
|
||||||
_raise_alarm()
|
|
||||||
|
|
||||||
# only log changes
|
|
||||||
if unreachable_list_changed is True:
|
|
||||||
if obj.unreachable_servers:
|
|
||||||
collectd.info("%s unreachable servers: %s" %
|
|
||||||
(PLUGIN, obj.unreachable_servers))
|
|
||||||
else:
|
|
||||||
collectd.info("%s all servers are reachable" % PLUGIN)
|
|
||||||
|
|
||||||
# The sample published to the database is simply the number
|
|
||||||
# of reachable servers if one is selected
|
|
||||||
if not obj.selected_server:
|
|
||||||
sample = 0
|
|
||||||
else:
|
|
||||||
sample = len(obj.reachable_servers)
|
|
||||||
|
|
||||||
# Dispatch usage value to collectd
|
|
||||||
val = collectd.Values(host=obj.hostname)
|
|
||||||
val.plugin = 'ntpq'
|
|
||||||
val.type = 'absolute'
|
|
||||||
val.type_instance = 'reachable'
|
|
||||||
val.dispatch(values=[sample])
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
# register the config, init and read functions
|
|
||||||
collectd.register_config(config_func)
|
|
||||||
collectd.register_init(init_func)
|
|
||||||
collectd.register_read(read_func, interval=PLUGIN_INTERVAL)
|
|
@ -1,311 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
#
|
|
||||||
# This file contains common collectd plugin constructs and utilities
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
|
|
||||||
import collectd
|
|
||||||
import json
|
|
||||||
import uuid
|
|
||||||
import httplib2
|
|
||||||
import socket
|
|
||||||
import os
|
|
||||||
from oslo_concurrency import processutils
|
|
||||||
from fm_api import constants as fm_constants
|
|
||||||
import tsconfig.tsconfig as tsc
|
|
||||||
|
|
||||||
# http request constants
|
|
||||||
PLUGIN_TIMEOUT = 10
|
|
||||||
PLUGIN_HTTP_HEADERS = {'Accept': 'application/json', 'Connection': 'close'}
|
|
||||||
|
|
||||||
MIN_AUDITS_B4_FIRST_QUERY = 2
|
|
||||||
|
|
||||||
|
|
||||||
class PluginObject(object):
|
|
||||||
|
|
||||||
def __init__(self, plugin, url):
|
|
||||||
|
|
||||||
# static variables set in init_func
|
|
||||||
self.plugin = plugin # the name of this plugin
|
|
||||||
self.hostname = '' # the name of this host
|
|
||||||
self.port = 0 # the port number for this plugin
|
|
||||||
self.base_eid = '' # the base entity id host=<hostname>
|
|
||||||
self.controller = False # set true if node is controller
|
|
||||||
|
|
||||||
# dynamic gate variables
|
|
||||||
self.virtual = False # set to True if host is virtual
|
|
||||||
self.config_complete = False # set to True once config is complete
|
|
||||||
self.config_done = False # set true if config_func completed ok
|
|
||||||
self.init_done = False # set true if init_func completed ok
|
|
||||||
self.fm_connectivity = False # set true when fm connectivity ok
|
|
||||||
|
|
||||||
self.alarm_type = fm_constants.FM_ALARM_TYPE_7 # OPERATIONAL
|
|
||||||
self.cause = fm_constants.ALARM_PROBABLE_CAUSE_50 # THRESHOLD CROSS
|
|
||||||
self.suppression = True
|
|
||||||
self.service_affecting = False
|
|
||||||
|
|
||||||
# dynamic variables set in read_func
|
|
||||||
self.usage = float(0) # last usage value recorded as float
|
|
||||||
self.value = float(0) # last read value
|
|
||||||
self.audits = 0 # number of audit since init
|
|
||||||
self.enabled = False # tracks a plugin's enabled state
|
|
||||||
self.alarmed = False # tracks the current alarmed state
|
|
||||||
self.mode = '' # mode specific to plugin
|
|
||||||
|
|
||||||
# http and json specific variables
|
|
||||||
self.url = url # target url
|
|
||||||
self.jresp = None # used to store the json response
|
|
||||||
self.resp = ''
|
|
||||||
|
|
||||||
self.objects = [] # list of plugin specific objects
|
|
||||||
self.cmd = '' # plugin specific command string
|
|
||||||
|
|
||||||
# Log controls
|
|
||||||
self.config_logged = False # used to log once the plugin config
|
|
||||||
self.error_logged = False # used to prevent log flooding
|
|
||||||
self.log_throttle_count = 0 # used to count throttle logs
|
|
||||||
self.INIT_LOG_THROTTLE = 10 # the init log throttle threshold
|
|
||||||
self.http_retry_count = 0 # track http error cases
|
|
||||||
self.HTTP_RETRY_THROTTLE = 6 # http retry threshold
|
|
||||||
self.phase = 0 # tracks current phase; init, sampling
|
|
||||||
|
|
||||||
collectd.debug("%s Common PluginObject constructor [%s]" %
|
|
||||||
(plugin, url))
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
#
|
|
||||||
# Name : init_ready
|
|
||||||
#
|
|
||||||
# Description: Test for init ready condition
|
|
||||||
#
|
|
||||||
# Parameters : plugin name
|
|
||||||
#
|
|
||||||
# Returns : False if initial config complete is not done
|
|
||||||
# True if initial config complete is done
|
|
||||||
#
|
|
||||||
###########################################################################
|
|
||||||
|
|
||||||
def init_ready(self):
|
|
||||||
"""Test for system init ready state"""
|
|
||||||
|
|
||||||
if os.path.exists(tsc.INITIAL_CONFIG_COMPLETE_FLAG) is False:
|
|
||||||
self.log_throttle_count += 1
|
|
||||||
if self.log_throttle_count > self.INIT_LOG_THROTTLE:
|
|
||||||
collectd.info("%s initialization needs retry" % self.plugin)
|
|
||||||
self.log_throttle_count = 0
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
self.log_throttle_count = 0
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
#
|
|
||||||
# Name : gethostname
|
|
||||||
#
|
|
||||||
# Description: load the hostname
|
|
||||||
#
|
|
||||||
# Parameters : plugin name
|
|
||||||
#
|
|
||||||
# Returns : Success - hostname
|
|
||||||
# Failure - None
|
|
||||||
#
|
|
||||||
# Updates : obj.hostname
|
|
||||||
#
|
|
||||||
###########################################################################
|
|
||||||
def gethostname(self):
|
|
||||||
"""Fetch the hostname"""
|
|
||||||
|
|
||||||
# get current hostname
|
|
||||||
try:
|
|
||||||
hostname = socket.gethostname()
|
|
||||||
if hostname:
|
|
||||||
return hostname
|
|
||||||
except:
|
|
||||||
collectd.error("%s failed to get hostname" % self.plugin)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
#
|
|
||||||
# Name : is_virtual
|
|
||||||
#
|
|
||||||
# Description: Execute facter command with output filter on 'is_virtual'
|
|
||||||
#
|
|
||||||
# Parameters : None
|
|
||||||
#
|
|
||||||
# Returns : True if current host is virtual.
|
|
||||||
# False if current host is NOT virtual
|
|
||||||
#
|
|
||||||
###########################################################################
|
|
||||||
def is_virtual(self):
|
|
||||||
"""Check for virtual host"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
cmd = '/usr/bin/facter is_virtual'
|
|
||||||
res, err = processutils.execute(cmd, shell=True)
|
|
||||||
if err:
|
|
||||||
return False
|
|
||||||
elif res:
|
|
||||||
# remove the trailing '\n' with strip()
|
|
||||||
if res.strip() == 'true':
|
|
||||||
collectd.info("%s %s is virtual" %
|
|
||||||
(self.plugin, self.hostname))
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.info("%s failed to execute '/usr/bin/facter' ; %s" %
|
|
||||||
self.plugin, ex)
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
#
|
|
||||||
# Name : check_for_fit
|
|
||||||
#
|
|
||||||
# Description: load FIT data if it is present
|
|
||||||
#
|
|
||||||
# Fit Format : unit data -> 0 89
|
|
||||||
# - instance 0 value 89
|
|
||||||
#
|
|
||||||
# Parameters : plugin name
|
|
||||||
# object to update with fit
|
|
||||||
# name in fit file
|
|
||||||
# unit
|
|
||||||
#
|
|
||||||
# Returns : Did a failure occur ?
|
|
||||||
# False = no
|
|
||||||
# True = yes
|
|
||||||
#
|
|
||||||
# Updates : self.usage with FIT value if FIT conditions are present
|
|
||||||
# and apply
|
|
||||||
#
|
|
||||||
###########################################################################
|
|
||||||
def check_for_fit(self, name, unit):
|
|
||||||
"""Load FIT data into usage if it exists"""
|
|
||||||
|
|
||||||
fit_file = '/var/run/fit/' + name + '_data'
|
|
||||||
|
|
||||||
if os.path.exists(fit_file):
|
|
||||||
valid = False
|
|
||||||
with open(fit_file, 'r') as infile:
|
|
||||||
for line in infile:
|
|
||||||
try:
|
|
||||||
inst, val = line.split(' ')
|
|
||||||
if int(unit) == int(inst):
|
|
||||||
self.usage = float(val)
|
|
||||||
valid = True
|
|
||||||
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
val = float(line)
|
|
||||||
self.usage = float(val)
|
|
||||||
valid = True
|
|
||||||
|
|
||||||
except:
|
|
||||||
collectd.error("%s bad FIT data; ignoring" %
|
|
||||||
self.plugin)
|
|
||||||
|
|
||||||
if valid is True:
|
|
||||||
collectd.info("%s %.2f usage (unit %d) (FIT)" %
|
|
||||||
(self.plugin, unit, self.usage))
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
#
|
|
||||||
# Name : make_http_request
|
|
||||||
#
|
|
||||||
# Description: Issue an http request to the specified URL.
|
|
||||||
# Load and return the response
|
|
||||||
# Handling execution errors
|
|
||||||
#
|
|
||||||
# Parameters : self as current context.
|
|
||||||
#
|
|
||||||
# Optional:
|
|
||||||
#
|
|
||||||
# url - override the default self url with http address to
|
|
||||||
# issue the get request to.
|
|
||||||
# to - timeout override
|
|
||||||
# hdrs - override use of the default header list
|
|
||||||
#
|
|
||||||
# Updates : self.jresp with the json string response from the request.
|
|
||||||
#
|
|
||||||
# Returns : Error indication (True/False)
|
|
||||||
# True on success
|
|
||||||
# False on error
|
|
||||||
#
|
|
||||||
###########################################################################
|
|
||||||
def make_http_request(self, url=None, to=None, hdrs=None):
|
|
||||||
"""Make a blocking HTTP Request and return result"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
|
|
||||||
# handle timeout override
|
|
||||||
if to is None:
|
|
||||||
to = PLUGIN_TIMEOUT
|
|
||||||
|
|
||||||
# handle url override
|
|
||||||
if url is None:
|
|
||||||
url = self.url
|
|
||||||
|
|
||||||
# handle header override
|
|
||||||
if hdrs is None:
|
|
||||||
hdrs = PLUGIN_HTTP_HEADERS
|
|
||||||
|
|
||||||
http = httplib2.Http(timeout=to)
|
|
||||||
resp = http.request(url, headers=hdrs)
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.info("%s http request exception ; %s" %
|
|
||||||
(self.plugin, str(ex)))
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
collectd.debug("%s Resp: %s" %
|
|
||||||
(self.plugin, resp[1]))
|
|
||||||
|
|
||||||
self.resp = resp[1]
|
|
||||||
self.jresp = json.loads(resp[1])
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s http response parse exception ; %s" %
|
|
||||||
(self.plugin, str(ex)))
|
|
||||||
if len(self.resp):
|
|
||||||
collectd.error("%s response: %s" %
|
|
||||||
(self.plugin, self.resp))
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def is_uuid_like(val):
|
|
||||||
"""Returns validation of a value as a UUID
|
|
||||||
|
|
||||||
For our purposes, a UUID is a canonical form string:
|
|
||||||
aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
return str(uuid.UUID(val)) == val
|
|
||||||
except (TypeError, ValueError, AttributeError):
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def get_severity_str(severity):
|
|
||||||
"""get string that represents the specified severity"""
|
|
||||||
|
|
||||||
if severity == fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
|
||||||
return "clear"
|
|
||||||
elif severity == fm_constants.FM_ALARM_SEVERITY_CRITICAL:
|
|
||||||
return "critical"
|
|
||||||
elif severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
|
||||||
return "major"
|
|
||||||
elif severity == fm_constants.FM_ALARM_SEVERITY_MINOR:
|
|
||||||
return "minor"
|
|
||||||
else:
|
|
||||||
return "unknown"
|
|
@ -1,15 +0,0 @@
|
|||||||
<Plugin "threshold">
|
|
||||||
<Plugin "ptp">
|
|
||||||
<Type "time_offset">
|
|
||||||
Instance "nsec"
|
|
||||||
Persist true
|
|
||||||
PersistOK true
|
|
||||||
WarningMax 1000
|
|
||||||
FailureMax 1000000
|
|
||||||
WarningMin -1000
|
|
||||||
FailureMin -1000000
|
|
||||||
Hits 2
|
|
||||||
Invert false
|
|
||||||
</Type>
|
|
||||||
</Plugin>
|
|
||||||
</Plugin>
|
|
@ -1,988 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
#
|
|
||||||
# This file is the collectd 'Precision Time Protocol' Service Monitor.
|
|
||||||
#
|
|
||||||
# Algorithm:
|
|
||||||
#
|
|
||||||
# while not config ; check again
|
|
||||||
# while not init ; retry
|
|
||||||
# if startup
|
|
||||||
# clear all ptp alarms
|
|
||||||
# if ptp enabled
|
|
||||||
# if ptp not running
|
|
||||||
# raise 'process' alarm
|
|
||||||
# else
|
|
||||||
# read grand master and current skew
|
|
||||||
# if not controller and is grand master
|
|
||||||
# raise 'no lock' alarm
|
|
||||||
# if skew is out-of-tolerance
|
|
||||||
# raise out-of-tolerance alarm
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# manage alarm state throught
|
|
||||||
# retry on alarm state change failures
|
|
||||||
# only make raise/clear alarm calls on severity state changes
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
import os
|
|
||||||
import collectd
|
|
||||||
import subprocess
|
|
||||||
import tsconfig.tsconfig as tsc
|
|
||||||
import plugin_common as pc
|
|
||||||
from fm_api import constants as fm_constants
|
|
||||||
from fm_api import fm_api
|
|
||||||
|
|
||||||
debug = False
|
|
||||||
|
|
||||||
# Fault manager API Object
|
|
||||||
api = fm_api.FaultAPIsV2()
|
|
||||||
|
|
||||||
PLUGIN_ALARMID = "100.119"
|
|
||||||
|
|
||||||
# name of the plugin - all logs produced by this plugin are prefixed with this
|
|
||||||
PLUGIN = 'ptp plugin'
|
|
||||||
|
|
||||||
# Service name
|
|
||||||
PTP = 'Precision Time Protocol (PTP)'
|
|
||||||
|
|
||||||
# Interface Monitoring Interval in seconds
|
|
||||||
PLUGIN_AUDIT_INTERVAL = 300
|
|
||||||
|
|
||||||
# Sample Data 'type' and 'instance' database field values.
|
|
||||||
PLUGIN_TYPE = 'time_offset'
|
|
||||||
PLUGIN_TYPE_INSTANCE = 'nsec'
|
|
||||||
|
|
||||||
# Primary PTP service name
|
|
||||||
PLUGIN_SERVICE = 'ptp4l.service'
|
|
||||||
|
|
||||||
# Plugin configuration file
|
|
||||||
#
|
|
||||||
# This plugin looks for the timestamping mode in the ptp4l config file.
|
|
||||||
# time_stamping hardware
|
|
||||||
#
|
|
||||||
PLUGIN_CONF_FILE = '/etc/ptp4l.conf'
|
|
||||||
PLUGIN_CONF_TIMESTAMPING = 'time_stamping'
|
|
||||||
|
|
||||||
# Tools used by plugin
|
|
||||||
SYSTEMCTL = '/usr/bin/systemctl'
|
|
||||||
ETHTOOL = '/usr/sbin/ethtool'
|
|
||||||
PLUGIN_STATUS_QUERY_EXEC = '/usr/sbin/pmc'
|
|
||||||
|
|
||||||
# Query PTP service administrative (enabled/disabled) state
|
|
||||||
#
|
|
||||||
# > systemctl is-enabled ptp4l
|
|
||||||
# enabled
|
|
||||||
# > systemctl disable ptp4l
|
|
||||||
# > systemctl is-enabled ptp4l
|
|
||||||
# disabled
|
|
||||||
|
|
||||||
SYSTEMCTL_IS_ENABLED_OPTION = 'is-enabled'
|
|
||||||
SYSTEMCTL_IS_ENABLED_RESPONSE = 'enabled'
|
|
||||||
SYSTEMCTL_IS_DISABLED_RESPONSE = 'disabled'
|
|
||||||
|
|
||||||
# Query PTP service activity (active=running / inactive) state
|
|
||||||
#
|
|
||||||
# > systemctl is-active ptp4l
|
|
||||||
# active
|
|
||||||
# > systemctl stop ptp4l
|
|
||||||
# > systemctl is-active ptp4l
|
|
||||||
# inactive
|
|
||||||
|
|
||||||
SYSTEMCTL_IS_ACTIVE_OPTION = 'is-active'
|
|
||||||
SYSTEMCTL_IS_ACTIVE_RESPONSE = 'active'
|
|
||||||
SYSTEMCTL_IS_INACTIVE_RESPONSE = 'inactive'
|
|
||||||
|
|
||||||
# Alarm Cause codes ; used to specify what alarm EID to assert or clear.
|
|
||||||
ALARM_CAUSE__NONE = 0
|
|
||||||
ALARM_CAUSE__PROCESS = 1
|
|
||||||
ALARM_CAUSE__OOT = 2
|
|
||||||
ALARM_CAUSE__NO_LOCK = 3
|
|
||||||
ALARM_CAUSE__UNSUPPORTED_HW = 4
|
|
||||||
ALARM_CAUSE__UNSUPPORTED_SW = 5
|
|
||||||
ALARM_CAUSE__UNSUPPORTED_LEGACY = 6
|
|
||||||
|
|
||||||
# Run Phase
|
|
||||||
RUN_PHASE__INIT = 0
|
|
||||||
RUN_PHASE__DISABLED = 1
|
|
||||||
RUN_PHASE__NOT_RUNNING = 2
|
|
||||||
RUN_PHASE__SAMPLING = 3
|
|
||||||
|
|
||||||
# Clock Sync Out-Of-Tolerance thresholds
|
|
||||||
OOT_MINOR_THRESHOLD = int(1000)
|
|
||||||
OOT_MAJOR_THRESHOLD = int(1000000)
|
|
||||||
|
|
||||||
# Instantiate the common plugin control object
|
|
||||||
obj = pc.PluginObject(PLUGIN, "")
|
|
||||||
|
|
||||||
|
|
||||||
# Create an alarm management class
|
|
||||||
class PTP_alarm_object:
|
|
||||||
|
|
||||||
def __init__(self, interface=None):
|
|
||||||
|
|
||||||
self.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
|
||||||
self.cause = fm_constants.ALARM_PROBABLE_CAUSE_50
|
|
||||||
self.alarm = ALARM_CAUSE__NONE
|
|
||||||
self.interface = interface
|
|
||||||
self.raised = False
|
|
||||||
self.reason = ''
|
|
||||||
self.repair = ''
|
|
||||||
self.eid = ''
|
|
||||||
|
|
||||||
|
|
||||||
# Plugin specific control class and object.
|
|
||||||
class PTP_ctrl_object:
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
|
|
||||||
self.gm_log_throttle = 0
|
|
||||||
self.nolock_alarm_object = None
|
|
||||||
self.process_alarm_object = None
|
|
||||||
self.oot_alarm_object = None
|
|
||||||
|
|
||||||
|
|
||||||
ctrl = PTP_ctrl_object()
|
|
||||||
|
|
||||||
|
|
||||||
# Alarm object list, one entry for each interface and alarm cause case
|
|
||||||
ALARM_OBJ_LIST = []
|
|
||||||
|
|
||||||
|
|
||||||
# UT verification utilities
|
|
||||||
def assert_all_alarms():
|
|
||||||
for o in ALARM_OBJ_LIST:
|
|
||||||
raise_alarm(o.alarm, o.interface, 0)
|
|
||||||
|
|
||||||
|
|
||||||
def clear_all_alarms():
|
|
||||||
for o in ALARM_OBJ_LIST:
|
|
||||||
if clear_alarm(o.eid) is True:
|
|
||||||
msg = 'cleared'
|
|
||||||
else:
|
|
||||||
msg = 'clear failed'
|
|
||||||
collectd.info("%s %s:%s alarm %s" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, o.eid, msg))
|
|
||||||
|
|
||||||
|
|
||||||
def print_alarm_object(o):
|
|
||||||
collectd.info("%s Interface:%s Cause: %d Severity:%s Raised:%d" %
|
|
||||||
(PLUGIN,
|
|
||||||
o.interface,
|
|
||||||
o.alarm,
|
|
||||||
o.severity,
|
|
||||||
o.raised))
|
|
||||||
collectd.info("%s Entity:[%s]" % (PLUGIN, o.eid))
|
|
||||||
collectd.info("%s Reason:[%s]" % (PLUGIN, o.reason))
|
|
||||||
collectd.info("%s Repair:[%s]" % (PLUGIN, o.repair))
|
|
||||||
|
|
||||||
|
|
||||||
def print_alarm_objects():
|
|
||||||
for o in ALARM_OBJ_LIST:
|
|
||||||
print_alarm_object(o)
|
|
||||||
|
|
||||||
|
|
||||||
# Interface:Supported Modes dictionary. key:value
|
|
||||||
#
|
|
||||||
# interface:modes
|
|
||||||
#
|
|
||||||
interfaces = {}
|
|
||||||
|
|
||||||
|
|
||||||
#####################################################################
|
|
||||||
#
|
|
||||||
# Name : _get_supported_modes
|
|
||||||
#
|
|
||||||
# Description: Invoke ethtool -T <interface> and load its
|
|
||||||
# time stamping capabilities.
|
|
||||||
#
|
|
||||||
# hardware, software or legacy.
|
|
||||||
#
|
|
||||||
# Parameters : The name of the physical interface to query the
|
|
||||||
# supported modes for.
|
|
||||||
#
|
|
||||||
# Interface Capabilities Output Examples:
|
|
||||||
#
|
|
||||||
# vbox prints this as it only supports software timestamping
|
|
||||||
# software-transmit (SOF_TIMESTAMPING_TX_SOFTWARE)
|
|
||||||
# software-receive (SOF_TIMESTAMPING_RX_SOFTWARE)
|
|
||||||
#
|
|
||||||
# full support output looks like this
|
|
||||||
# hardware-transmit (SOF_TIMESTAMPING_TX_HARDWARE)
|
|
||||||
# software-transmit (SOF_TIMESTAMPING_TX_SOFTWARE)
|
|
||||||
# hardware-receive (SOF_TIMESTAMPING_RX_HARDWARE)
|
|
||||||
# software-receive (SOF_TIMESTAMPING_RX_SOFTWARE)
|
|
||||||
# hardware-raw-clock (SOF_TIMESTAMPING_RAW_HARDWARE)
|
|
||||||
#
|
|
||||||
# Only legacy support output looks like this
|
|
||||||
# hardware-raw-clock (SOF_TIMESTAMPING_RAW_HARDWARE)
|
|
||||||
#
|
|
||||||
# Provisionable PTP Modes are
|
|
||||||
# hardware -> hardware-transmit/receive
|
|
||||||
# software -> software-transmit/receive
|
|
||||||
# legacy -> hardware-raw-clock
|
|
||||||
|
|
||||||
TIMESTAMP_MODE__HW = 'hardware'
|
|
||||||
TIMESTAMP_MODE__SW = 'software'
|
|
||||||
TIMESTAMP_MODE__LEGACY = 'legacy'
|
|
||||||
|
|
||||||
|
|
||||||
#
|
|
||||||
# Returns : a list of supported modes
|
|
||||||
#
|
|
||||||
#####################################################################
|
|
||||||
def _get_supported_modes(interface):
|
|
||||||
"""Get the supported modes for the specified interface"""
|
|
||||||
|
|
||||||
hw_tx = hw_rx = sw_tx = sw_rx = False
|
|
||||||
modes = []
|
|
||||||
data = subprocess.check_output([ETHTOOL, '-T', interface]).split('\n')
|
|
||||||
if data:
|
|
||||||
collectd.debug("%s 'ethtool -T %s' output:%s\n" %
|
|
||||||
(PLUGIN, interface, data))
|
|
||||||
check_for_modes = False
|
|
||||||
for i in range(0, len(data)):
|
|
||||||
collectd.debug("%s data[%d]:%s\n" % (PLUGIN, i, data[i]))
|
|
||||||
if 'Capabilities' in data[i]:
|
|
||||||
|
|
||||||
# start of capabilities list
|
|
||||||
check_for_modes = True
|
|
||||||
|
|
||||||
elif check_for_modes is True:
|
|
||||||
|
|
||||||
if 'PTP Hardware Clock' in data[i]:
|
|
||||||
# no more modes after this label
|
|
||||||
break
|
|
||||||
elif 'hardware-transmit' in data[i]:
|
|
||||||
hw_tx = True
|
|
||||||
elif 'hardware-receive' in data[i]:
|
|
||||||
hw_rx = True
|
|
||||||
elif 'software-transmit' in data[i]:
|
|
||||||
sw_tx = True
|
|
||||||
elif 'software-receive' in data[i]:
|
|
||||||
sw_rx = True
|
|
||||||
elif 'hardware-raw-clock' in data[i]:
|
|
||||||
modes.append(TIMESTAMP_MODE__LEGACY)
|
|
||||||
|
|
||||||
if sw_tx is True and sw_rx is True:
|
|
||||||
modes.append(TIMESTAMP_MODE__SW)
|
|
||||||
|
|
||||||
if hw_tx is True and hw_rx is True:
|
|
||||||
modes.append(TIMESTAMP_MODE__HW)
|
|
||||||
|
|
||||||
if modes:
|
|
||||||
collectd.debug("%s %s interface PTP capabilities: %s" %
|
|
||||||
(PLUGIN, interface, modes))
|
|
||||||
else:
|
|
||||||
collectd.info("%s no capabilities advertised for %s" %
|
|
||||||
(PLUGIN, interface))
|
|
||||||
|
|
||||||
else:
|
|
||||||
collectd.info("%s no ethtool output for %s" % (PLUGIN, interface))
|
|
||||||
return None
|
|
||||||
|
|
||||||
return modes
|
|
||||||
|
|
||||||
|
|
||||||
#####################################################################
|
|
||||||
#
|
|
||||||
# Name : get_alarm_object
|
|
||||||
#
|
|
||||||
# Description: Search the alarm list based on the alarm cause
|
|
||||||
# code and interface.
|
|
||||||
#
|
|
||||||
# Returns : Alarm object if found ; otherwise None
|
|
||||||
#
|
|
||||||
#####################################################################
|
|
||||||
def get_alarm_object(alarm, interface=None):
|
|
||||||
"""Alarm object lookup"""
|
|
||||||
|
|
||||||
for o in ALARM_OBJ_LIST:
|
|
||||||
# print_alarm_object(o)
|
|
||||||
if interface is None:
|
|
||||||
if o.alarm == alarm:
|
|
||||||
return o
|
|
||||||
else:
|
|
||||||
if o.interface == interface:
|
|
||||||
if o.alarm == alarm:
|
|
||||||
return o
|
|
||||||
|
|
||||||
collectd.info("%s alarm object lookup failed ; %d:%s" %
|
|
||||||
(PLUGIN, alarm, interface))
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
#####################################################################
|
|
||||||
#
|
|
||||||
# Name : clear_alarm
|
|
||||||
#
|
|
||||||
# Description: Clear the ptp alarm with the specified entity ID.
|
|
||||||
#
|
|
||||||
# Returns : True if operation succeeded
|
|
||||||
# False if there was an error exception.
|
|
||||||
#
|
|
||||||
# Assumptions: Caller can decide to retry based on return status.
|
|
||||||
#
|
|
||||||
#####################################################################
|
|
||||||
def clear_alarm(eid):
|
|
||||||
"""Clear the ptp alarm with the specified entity ID"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
if api.clear_fault(PLUGIN_ALARMID, eid) is True:
|
|
||||||
collectd.info("%s %s:%s alarm cleared" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, eid))
|
|
||||||
else:
|
|
||||||
collectd.info("%s %s:%s alarm already cleared" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, eid))
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, eid, ex))
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
#####################################################################
|
|
||||||
#
|
|
||||||
# Name : raise_alarm
|
|
||||||
#
|
|
||||||
# Description: Assert a specific PTP alarm based on the alarm cause
|
|
||||||
# code and interface.
|
|
||||||
#
|
|
||||||
# Handle special case cause codes
|
|
||||||
# Handle failure to raise fault
|
|
||||||
#
|
|
||||||
# Assumptions: Short circuited Success return if the alarm is
|
|
||||||
# already known to be asserted.
|
|
||||||
#
|
|
||||||
# Returns : False on Failure
|
|
||||||
# True on Success
|
|
||||||
#
|
|
||||||
#####################################################################
|
|
||||||
def raise_alarm(alarm_cause, interface=None, data=0):
|
|
||||||
"""Assert a cause based PTP alarm"""
|
|
||||||
|
|
||||||
collectd.debug("%s Raising Alarm %d" % (PLUGIN, alarm_cause))
|
|
||||||
|
|
||||||
alarm = get_alarm_object(alarm_cause, interface)
|
|
||||||
if alarm is None:
|
|
||||||
# log created for None case in the get_alarm_object util
|
|
||||||
return True
|
|
||||||
|
|
||||||
# copy the reason as it might be updated for the OOT,
|
|
||||||
# most typical, case.
|
|
||||||
reason = alarm.reason
|
|
||||||
|
|
||||||
# Handle some special cases
|
|
||||||
#
|
|
||||||
|
|
||||||
if alarm_cause == ALARM_CAUSE__OOT:
|
|
||||||
# If this is an out of tolerance alarm then add the
|
|
||||||
# out of tolerance reading to the reason string before
|
|
||||||
# asserting the alarm.
|
|
||||||
#
|
|
||||||
# Keep the alarm updated with the latest sample reading
|
|
||||||
# and severity even if its already asserted.
|
|
||||||
if abs(float(data)) > 100000000000:
|
|
||||||
reason += 'more than 100 seconds'
|
|
||||||
elif abs(float(data)) > 10000000000:
|
|
||||||
reason += 'more than 10 seconds'
|
|
||||||
elif abs(float(data)) > 1000000000:
|
|
||||||
reason += 'more than 1 second'
|
|
||||||
elif abs(float(data)) > 1000000:
|
|
||||||
reason += str(abs(int(data)) / 1000000)
|
|
||||||
reason += ' millisecs'
|
|
||||||
elif abs(float(data)) > 1000:
|
|
||||||
reason += str(abs(int(data)) / 1000)
|
|
||||||
reason += ' microsecs'
|
|
||||||
else:
|
|
||||||
reason += str(float(data))
|
|
||||||
reason += ' ' + PLUGIN_TYPE_INSTANCE
|
|
||||||
|
|
||||||
elif alarm.raised is True:
|
|
||||||
# If alarm already raised then exit.
|
|
||||||
#
|
|
||||||
# All other alarms are a Major so there is no need to
|
|
||||||
# track a change in severity and update accordingly.
|
|
||||||
return True
|
|
||||||
|
|
||||||
elif alarm_cause == ALARM_CAUSE__PROCESS:
|
|
||||||
reason = 'Provisioned ' + PTP + ' \'' + obj.mode
|
|
||||||
reason += '\' time stamping mode seems to be unsupported by this host'
|
|
||||||
|
|
||||||
try:
|
|
||||||
fault = fm_api.Fault(
|
|
||||||
alarm_id=PLUGIN_ALARMID,
|
|
||||||
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
|
||||||
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
|
||||||
entity_instance_id=alarm.eid,
|
|
||||||
severity=alarm.severity,
|
|
||||||
reason_text=reason,
|
|
||||||
alarm_type=obj.alarm_type,
|
|
||||||
probable_cause=alarm.cause,
|
|
||||||
proposed_repair_action=alarm.repair,
|
|
||||||
service_affecting=False, # obj.service_affecting,
|
|
||||||
suppression=True) # obj.suppression)
|
|
||||||
|
|
||||||
alarm_uuid = api.set_fault(fault)
|
|
||||||
if pc.is_uuid_like(alarm_uuid) is False:
|
|
||||||
|
|
||||||
# Don't _add_unreachable_server list if the fm call failed.
|
|
||||||
# That way it will be retried at a later time.
|
|
||||||
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, alarm.eid, alarm_uuid))
|
|
||||||
return False
|
|
||||||
|
|
||||||
else:
|
|
||||||
collectd.info("%s %s:%s:%s alarm raised" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, alarm.eid, alarm.severity))
|
|
||||||
alarm.raised = True
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'set_fault' exception ; %s:%s:%s ; %s" %
|
|
||||||
(PLUGIN,
|
|
||||||
PLUGIN_ALARMID,
|
|
||||||
alarm.eid,
|
|
||||||
alarm.severity,
|
|
||||||
ex))
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
#####################################################################
|
|
||||||
#
|
|
||||||
# Name : create_interface_alarm_objects
|
|
||||||
#
|
|
||||||
# Description: Create alarm objects for specified interface
|
|
||||||
#
|
|
||||||
#####################################################################
|
|
||||||
def create_interface_alarm_objects(interface=None):
|
|
||||||
"""Create alarm objects"""
|
|
||||||
|
|
||||||
collectd.debug("%s Alarm Object Create: Interface:%s " %
|
|
||||||
(PLUGIN, interface))
|
|
||||||
|
|
||||||
if interface is None:
|
|
||||||
o = PTP_alarm_object()
|
|
||||||
o.alarm = ALARM_CAUSE__PROCESS
|
|
||||||
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
|
||||||
o.reason = obj.hostname + ' does not support the provisioned '
|
|
||||||
o.reason += PTP + ' mode '
|
|
||||||
o.repair = 'Check host hardware reference manual '
|
|
||||||
o.repair += 'to verify that the selected PTP mode is supported'
|
|
||||||
o.eid = obj.base_eid + '.ptp'
|
|
||||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN # 'unknown'
|
|
||||||
ALARM_OBJ_LIST.append(o)
|
|
||||||
ctrl.process_alarm_object = o
|
|
||||||
|
|
||||||
o = PTP_alarm_object()
|
|
||||||
o.alarm = ALARM_CAUSE__OOT
|
|
||||||
o.severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
|
||||||
o.reason = obj.hostname + ' '
|
|
||||||
o.reason += PTP + " clocking is out of tolerance by "
|
|
||||||
o.repair = "Check quality of the clocking network"
|
|
||||||
o.eid = obj.base_eid + '.ptp=out-of-tolerance'
|
|
||||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_50 # THRESHOLD CROSS
|
|
||||||
ALARM_OBJ_LIST.append(o)
|
|
||||||
ctrl.oot_alarm_object = o
|
|
||||||
|
|
||||||
o = PTP_alarm_object()
|
|
||||||
# Only applies to storage and worker nodes
|
|
||||||
o.alarm = ALARM_CAUSE__NO_LOCK
|
|
||||||
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
|
||||||
o.reason = obj.hostname
|
|
||||||
o.reason += ' is not locked to remote PTP Grand Master'
|
|
||||||
o.repair = 'Check network'
|
|
||||||
o.eid = obj.base_eid + '.ptp=no-lock'
|
|
||||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_51 # timing-problem
|
|
||||||
ALARM_OBJ_LIST.append(o)
|
|
||||||
ctrl.nolock_alarm_object = o
|
|
||||||
|
|
||||||
else:
|
|
||||||
o = PTP_alarm_object(interface)
|
|
||||||
o.alarm = ALARM_CAUSE__UNSUPPORTED_HW
|
|
||||||
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
|
||||||
o.reason = obj.hostname + " '" + interface + "' does not support "
|
|
||||||
o.reason += PTP + ' Hardware timestamping'
|
|
||||||
o.repair = 'Check host hardware reference manual to verify PTP '
|
|
||||||
o.repair += 'Hardware timestamping is supported by this interface'
|
|
||||||
o.eid = obj.base_eid + '.ptp=' + interface
|
|
||||||
o.eid += '.unsupported=hardware-timestamping'
|
|
||||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_7 # 'config error'
|
|
||||||
ALARM_OBJ_LIST.append(o)
|
|
||||||
|
|
||||||
o = PTP_alarm_object(interface)
|
|
||||||
o.alarm = ALARM_CAUSE__UNSUPPORTED_SW
|
|
||||||
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
|
||||||
o.reason = obj.hostname + " '" + interface + "' does not support "
|
|
||||||
o.reason += PTP + ' Software timestamping'
|
|
||||||
o.repair = 'Check host hardware reference manual to verify PTP '
|
|
||||||
o.repair += 'Software timestamping is supported by this interface'
|
|
||||||
o.eid = obj.base_eid + '.ptp=' + interface
|
|
||||||
o.eid += '.unsupported=software-timestamping'
|
|
||||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_7 # 'config error'
|
|
||||||
ALARM_OBJ_LIST.append(o)
|
|
||||||
|
|
||||||
o = PTP_alarm_object(interface)
|
|
||||||
o.alarm = ALARM_CAUSE__UNSUPPORTED_LEGACY
|
|
||||||
o.severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
|
||||||
o.reason = obj.hostname + " '" + interface + "' does not support "
|
|
||||||
o.reason += PTP + " Legacy timestamping"
|
|
||||||
o.repair = 'Check host hardware reference manual to verify PTP '
|
|
||||||
o.repair += 'Legacy or Raw Clock is supported by this host'
|
|
||||||
o.eid = obj.base_eid + '.ptp=' + interface
|
|
||||||
o.eid += '.unsupported=legacy-timestamping'
|
|
||||||
o.cause = fm_constants.ALARM_PROBABLE_CAUSE_7 # 'config error'
|
|
||||||
ALARM_OBJ_LIST.append(o)
|
|
||||||
|
|
||||||
|
|
||||||
#####################################################################
|
|
||||||
#
|
|
||||||
# Name : read_timestamp_mode
|
|
||||||
#
|
|
||||||
# Description: Refresh the timestamping mode if it changes
|
|
||||||
#
|
|
||||||
#####################################################################
|
|
||||||
def read_timestamp_mode():
|
|
||||||
"""Load timestamping mode"""
|
|
||||||
|
|
||||||
if os.path.exists(PLUGIN_CONF_FILE):
|
|
||||||
current_mode = obj.mode
|
|
||||||
with open(PLUGIN_CONF_FILE, 'r') as infile:
|
|
||||||
for line in infile:
|
|
||||||
if PLUGIN_CONF_TIMESTAMPING in line:
|
|
||||||
obj.mode = line.split()[1].strip('\n')
|
|
||||||
break
|
|
||||||
|
|
||||||
if obj.mode:
|
|
||||||
if obj.mode != current_mode:
|
|
||||||
collectd.info("%s Timestamping Mode: %s" %
|
|
||||||
(PLUGIN, obj.mode))
|
|
||||||
else:
|
|
||||||
collectd.error("%s failed to get Timestamping Mode" % PLUGIN)
|
|
||||||
else:
|
|
||||||
collectd.error("%s failed to load ptp4l configuration" % PLUGIN)
|
|
||||||
obj.mode = None
|
|
||||||
|
|
||||||
|
|
||||||
#####################################################################
|
|
||||||
#
|
|
||||||
# Name : init_func
|
|
||||||
#
|
|
||||||
# Description: The collectd initialization entrypoint for
|
|
||||||
# this plugin
|
|
||||||
#
|
|
||||||
# Assumptions: called only once
|
|
||||||
#
|
|
||||||
# Algorithm : check for no
|
|
||||||
#
|
|
||||||
#
|
|
||||||
#####################################################################
|
|
||||||
def init_func():
|
|
||||||
|
|
||||||
if obj.init_ready() is False:
|
|
||||||
return False
|
|
||||||
|
|
||||||
obj.hostname = obj.gethostname()
|
|
||||||
obj.base_eid = 'host=' + obj.hostname
|
|
||||||
|
|
||||||
# Create the interface independent alarm objects.
|
|
||||||
create_interface_alarm_objects()
|
|
||||||
|
|
||||||
# load monitored interfaces and supported modes
|
|
||||||
if os.path.exists(PLUGIN_CONF_FILE):
|
|
||||||
with open(PLUGIN_CONF_FILE, 'r') as infile:
|
|
||||||
for line in infile:
|
|
||||||
# The PTP interfaces used are specified in the ptp4l.conf
|
|
||||||
# file as [interface]. There may be more than one.
|
|
||||||
# Presently there is no need to track the function of the
|
|
||||||
# interface ; namely mgmnt or oam.
|
|
||||||
if line[0] == '[':
|
|
||||||
interface = line.split(']')[0].split('[')[1]
|
|
||||||
if interface and interface != 'global':
|
|
||||||
interfaces[interface] = _get_supported_modes(interface)
|
|
||||||
create_interface_alarm_objects(interface)
|
|
||||||
|
|
||||||
if PLUGIN_CONF_TIMESTAMPING in line:
|
|
||||||
obj.mode = line.split()[1].strip('\n')
|
|
||||||
|
|
||||||
if obj.mode:
|
|
||||||
collectd.info("%s Timestamping Mode: %s" %
|
|
||||||
(PLUGIN, obj.mode))
|
|
||||||
else:
|
|
||||||
collectd.error("%s failed to get Timestamping Mode" % PLUGIN)
|
|
||||||
else:
|
|
||||||
collectd.error("%s failed to load ptp4l configuration" % PLUGIN)
|
|
||||||
obj.mode = None
|
|
||||||
|
|
||||||
for key, value in interfaces.items():
|
|
||||||
collectd.info("%s interface %s supports timestamping modes: %s" %
|
|
||||||
(PLUGIN, key, value))
|
|
||||||
|
|
||||||
# remove '# to dump alarm object data
|
|
||||||
# print_alarm_objects()
|
|
||||||
|
|
||||||
if tsc.nodetype == 'controller':
|
|
||||||
obj.controller = True
|
|
||||||
|
|
||||||
obj.virtual = obj.is_virtual()
|
|
||||||
obj.init_done = True
|
|
||||||
obj.log_throttle_count = 0
|
|
||||||
collectd.info("%s initialization complete" % PLUGIN)
|
|
||||||
|
|
||||||
|
|
||||||
#####################################################################
|
|
||||||
#
|
|
||||||
# Name : read_func
|
|
||||||
#
|
|
||||||
# Description: The collectd audit entrypoint for PTP Monitoring
|
|
||||||
#
|
|
||||||
# Assumptions: collectd calls init_func one time.
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# retry init if needed
|
|
||||||
# retry fm connect if needed
|
|
||||||
# check service enabled state
|
|
||||||
# check service running state
|
|
||||||
# error -> alarm host=<hostname>.ptp
|
|
||||||
# check
|
|
||||||
#
|
|
||||||
#####################################################################
|
|
||||||
def read_func():
|
|
||||||
|
|
||||||
if obj.virtual is True:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# check and run init until it reports init_done True
|
|
||||||
if obj.init_done is False:
|
|
||||||
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
|
|
||||||
collectd.info("%s re-running init" % PLUGIN)
|
|
||||||
obj.log_throttle_count += 1
|
|
||||||
init_func()
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if obj.fm_connectivity is False:
|
|
||||||
|
|
||||||
try:
|
|
||||||
# query FM for existing alarms.
|
|
||||||
alarms = api.get_faults_by_id(PLUGIN_ALARMID)
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'get_faults_by_id' exception ;"
|
|
||||||
" %s ; %s" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, ex))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if alarms:
|
|
||||||
for alarm in alarms:
|
|
||||||
collectd.debug("%s found startup alarm '%s'" %
|
|
||||||
(PLUGIN, alarm.entity_instance_id))
|
|
||||||
|
|
||||||
eid = alarm.entity_instance_id
|
|
||||||
if eid is None:
|
|
||||||
collectd.error("%s startup alarm query error ; no eid" %
|
|
||||||
PLUGIN)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# get the hostname host=<hostname>.stuff
|
|
||||||
# split over base eid and then
|
|
||||||
# compare that to this plugin's base eid
|
|
||||||
# ignore alarms not for this host
|
|
||||||
if eid.split('.')[0] != obj.base_eid:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# load the state of the specific alarm
|
|
||||||
instance = eid.split('.')[1].split('=')
|
|
||||||
if instance[0] == 'ptp':
|
|
||||||
# clear all ptp alarms on process startup
|
|
||||||
# just in case interface names have changed
|
|
||||||
# since the alarm was raised.
|
|
||||||
if clear_alarm(eid) is False:
|
|
||||||
# if we can't clear the alarm now then error out.
|
|
||||||
collectd.error("%s failed to clear startup "
|
|
||||||
"alarm %s:%s" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, eid))
|
|
||||||
# try again next time around
|
|
||||||
return 0
|
|
||||||
else:
|
|
||||||
collectd.info("%s cleared startup alarm '%s'" %
|
|
||||||
(PLUGIN, alarm.entity_instance_id))
|
|
||||||
else:
|
|
||||||
|
|
||||||
if clear_alarm(eid) is False:
|
|
||||||
collectd.error("%s failed to clear invalid PTP "
|
|
||||||
"alarm %s:%s" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID,
|
|
||||||
alarm.entity_instance_id))
|
|
||||||
return 0
|
|
||||||
else:
|
|
||||||
collectd.info("%s cleared found invalid startup"
|
|
||||||
" alarm %s:%s" %
|
|
||||||
(PLUGIN,
|
|
||||||
PLUGIN_ALARMID,
|
|
||||||
alarm.entity_instance_id))
|
|
||||||
else:
|
|
||||||
collectd.info("%s no startup alarms found" % PLUGIN)
|
|
||||||
|
|
||||||
obj.config_complete = True
|
|
||||||
obj.fm_connectivity = True
|
|
||||||
# assert_all_alarms()
|
|
||||||
|
|
||||||
# This plugin supports PTP in-service state change by checking
|
|
||||||
# service state on every audit ; every 5 minutes.
|
|
||||||
data = subprocess.check_output([SYSTEMCTL,
|
|
||||||
SYSTEMCTL_IS_ENABLED_OPTION,
|
|
||||||
PLUGIN_SERVICE])
|
|
||||||
collectd.debug("%s PTP admin state:%s" % (PLUGIN, data.rstrip()))
|
|
||||||
|
|
||||||
if data.rstrip() == SYSTEMCTL_IS_DISABLED_RESPONSE:
|
|
||||||
|
|
||||||
# Manage execution phase
|
|
||||||
if obj.phase != RUN_PHASE__DISABLED:
|
|
||||||
obj.phase = RUN_PHASE__DISABLED
|
|
||||||
obj.log_throttle_count = 0
|
|
||||||
|
|
||||||
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
|
|
||||||
collectd.info("%s PTP Service Disabled" % PLUGIN)
|
|
||||||
obj.log_throttle_count += 1
|
|
||||||
|
|
||||||
for o in ALARM_OBJ_LIST:
|
|
||||||
if o.raised is True:
|
|
||||||
if clear_alarm(o.eid) is True:
|
|
||||||
o.raised = False
|
|
||||||
else:
|
|
||||||
collectd.error("%s %s:%s clear alarm failed "
|
|
||||||
"; will retry" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, o.eid))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
data = subprocess.check_output([SYSTEMCTL,
|
|
||||||
SYSTEMCTL_IS_ACTIVE_OPTION,
|
|
||||||
PLUGIN_SERVICE])
|
|
||||||
|
|
||||||
if data.rstrip() == SYSTEMCTL_IS_INACTIVE_RESPONSE:
|
|
||||||
|
|
||||||
# Manage execution phase
|
|
||||||
if obj.phase != RUN_PHASE__NOT_RUNNING:
|
|
||||||
obj.phase = RUN_PHASE__NOT_RUNNING
|
|
||||||
obj.log_throttle_count = 0
|
|
||||||
|
|
||||||
if ctrl.process_alarm_object.alarm == ALARM_CAUSE__PROCESS:
|
|
||||||
if ctrl.process_alarm_object.raised is False:
|
|
||||||
collectd.error("%s PTP service enabled but not running" %
|
|
||||||
PLUGIN)
|
|
||||||
if raise_alarm(ALARM_CAUSE__PROCESS) is True:
|
|
||||||
ctrl.process_alarm_object.raised = True
|
|
||||||
|
|
||||||
# clear all other alarms if the 'process' alarm is raised
|
|
||||||
elif ctrl.process_alarm_object.raised is True:
|
|
||||||
if clear_alarm(ctrl.process_alarm_object.eid) is True:
|
|
||||||
msg = 'cleared'
|
|
||||||
ctrl.process_alarm_object.raised = False
|
|
||||||
else:
|
|
||||||
msg = 'failed to clear'
|
|
||||||
collectd.info("%s %s %s:%s" %
|
|
||||||
(PLUGIN, msg, PLUGIN_ALARMID,
|
|
||||||
ctrl.process_alarm_object.eid))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Handle clearing the 'process' alarm if it is asserted and
|
|
||||||
# the process is now running
|
|
||||||
if ctrl.process_alarm_object.raised is True:
|
|
||||||
if clear_alarm(ctrl.process_alarm_object.eid) is True:
|
|
||||||
ctrl.process_alarm_object.raised = False
|
|
||||||
collectd.info("%s PTP service enabled and running" % PLUGIN)
|
|
||||||
|
|
||||||
# Auto refresh the timestamping mode in case collectd runs
|
|
||||||
# before the ptp manifest or the mode changes on the fly by
|
|
||||||
# an in-service manifest.
|
|
||||||
# Every 4 audits.
|
|
||||||
obj.audits += 1
|
|
||||||
if not obj.audits % 4:
|
|
||||||
read_timestamp_mode()
|
|
||||||
|
|
||||||
# Manage execution phase
|
|
||||||
if obj.phase != RUN_PHASE__SAMPLING:
|
|
||||||
obj.phase = RUN_PHASE__SAMPLING
|
|
||||||
obj.log_throttle_count = 0
|
|
||||||
|
|
||||||
# Let's read the port status information
|
|
||||||
#
|
|
||||||
# sudo /usr/sbin/pmc -u -b 0 'GET PORT_DATA_SET'
|
|
||||||
#
|
|
||||||
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC,
|
|
||||||
'-u', '-b', '0', 'GET PORT_DATA_SET'])
|
|
||||||
|
|
||||||
port_locked = False
|
|
||||||
obj.resp = data.split('\n')
|
|
||||||
for line in obj.resp:
|
|
||||||
if 'portState' in line:
|
|
||||||
collectd.debug("%s portState : %s" % (PLUGIN, line.split()[1]))
|
|
||||||
port_state = line.split()[1]
|
|
||||||
if port_state == 'SLAVE':
|
|
||||||
port_locked = True
|
|
||||||
|
|
||||||
# Let's read the clock info, Grand Master sig and skew
|
|
||||||
#
|
|
||||||
# sudo /usr/sbin/pmc -u -b 0 'GET TIME_STATUS_NP'
|
|
||||||
#
|
|
||||||
data = subprocess.check_output([PLUGIN_STATUS_QUERY_EXEC,
|
|
||||||
'-u', '-b', '0', 'GET TIME_STATUS_NP'])
|
|
||||||
|
|
||||||
got_master_offset = False
|
|
||||||
master_offset = 0
|
|
||||||
my_identity = ''
|
|
||||||
gm_identity = ''
|
|
||||||
gm_present = False
|
|
||||||
obj.resp = data.split('\n')
|
|
||||||
for line in obj.resp:
|
|
||||||
if 'RESPONSE MANAGEMENT TIME_STATUS_NP' in line:
|
|
||||||
collectd.debug("%s key : %s" %
|
|
||||||
(PLUGIN, line.split()[0].split('-')[0]))
|
|
||||||
my_identity = line.split()[0].split('-')[0]
|
|
||||||
if 'master_offset' in line:
|
|
||||||
collectd.debug("%s Offset : %s" % (PLUGIN, line.split()[1]))
|
|
||||||
master_offset = float(line.split()[1])
|
|
||||||
got_master_offset = True
|
|
||||||
if 'gmPresent' in line:
|
|
||||||
collectd.debug("%s gmPresent : %s" % (PLUGIN, line.split()[1]))
|
|
||||||
gm_present = line.split()[1]
|
|
||||||
if 'gmIdentity' in line:
|
|
||||||
collectd.debug("%s gmIdentity: %s" % (PLUGIN, line.split()[1]))
|
|
||||||
gm_identity = line.split()[1]
|
|
||||||
|
|
||||||
# Handle case where this host is the Grand Master
|
|
||||||
# ... or assumes it is.
|
|
||||||
if my_identity == gm_identity or port_locked is False:
|
|
||||||
|
|
||||||
if obj.controller is False:
|
|
||||||
|
|
||||||
# Compute and storage nodes should not be the Grand Master
|
|
||||||
if ctrl.nolock_alarm_object.raised is False:
|
|
||||||
if raise_alarm(ALARM_CAUSE__NO_LOCK, None, 0) is True:
|
|
||||||
ctrl.nolock_alarm_object.raised = True
|
|
||||||
|
|
||||||
# produce a throttled log while this host is not locked to the GM
|
|
||||||
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
|
|
||||||
collectd.info("%s %s not locked to remote Grand Master "
|
|
||||||
"(%s)" % (PLUGIN, obj.hostname, gm_identity))
|
|
||||||
obj.log_throttle_count += 1
|
|
||||||
|
|
||||||
# No samples for storage and compute nodes that are not
|
|
||||||
# locked to a Grand Master
|
|
||||||
return 0
|
|
||||||
|
|
||||||
else:
|
|
||||||
# Controllers can be a Grand Master ; throttle the log
|
|
||||||
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
|
|
||||||
collectd.info("%s %s is Grand Master:%s" %
|
|
||||||
(PLUGIN, obj.hostname, gm_identity))
|
|
||||||
obj.log_throttle_count += 1
|
|
||||||
|
|
||||||
# The Grand Master will always be 0 so there is no point
|
|
||||||
# creating a sample for it.
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Handle clearing nolock alarm for computes and storage nodes
|
|
||||||
elif obj.controller is False:
|
|
||||||
if ctrl.nolock_alarm_object.raised is True:
|
|
||||||
if clear_alarm(ctrl.nolock_alarm_object.eid) is True:
|
|
||||||
ctrl.nolock_alarm_object.raised = False
|
|
||||||
|
|
||||||
# Keep this FIT test code but make it commented out for security
|
|
||||||
# if os.path.exists('/var/run/fit/ptp_data'):
|
|
||||||
# master_offset = 0
|
|
||||||
# with open('/var/run/fit/ptp_data', 'r') as infile:
|
|
||||||
# for line in infile:
|
|
||||||
# master_offset = int(line)
|
|
||||||
# got_master_offset = True
|
|
||||||
# collectd.info("%s using ptp FIT data skew:%d" %
|
|
||||||
# (PLUGIN, master_offset))
|
|
||||||
# break
|
|
||||||
|
|
||||||
# Send sample and Manage the Out-Of-Tolerance alarm
|
|
||||||
if got_master_offset is True:
|
|
||||||
|
|
||||||
if not (obj.log_throttle_count % obj.INIT_LOG_THROTTLE):
|
|
||||||
collectd.info("%s %s is collecting samples [%5d] "
|
|
||||||
"with Grand Master %s" %
|
|
||||||
(PLUGIN, obj.hostname,
|
|
||||||
float(master_offset), gm_identity))
|
|
||||||
|
|
||||||
obj.log_throttle_count += 1
|
|
||||||
|
|
||||||
# setup the sample structure and dispatch
|
|
||||||
val = collectd.Values(host=obj.hostname)
|
|
||||||
val.type = PLUGIN_TYPE
|
|
||||||
val.type_instance = PLUGIN_TYPE_INSTANCE
|
|
||||||
val.plugin = 'ptp'
|
|
||||||
val.dispatch(values=[float(master_offset)])
|
|
||||||
|
|
||||||
# Manage the sample OOT alarm severity
|
|
||||||
severity = fm_constants.FM_ALARM_SEVERITY_CLEAR
|
|
||||||
if abs(master_offset) > OOT_MAJOR_THRESHOLD:
|
|
||||||
severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
|
||||||
elif abs(master_offset) > OOT_MINOR_THRESHOLD:
|
|
||||||
severity = fm_constants.FM_ALARM_SEVERITY_MINOR
|
|
||||||
|
|
||||||
# Handle clearing of Out-Of-Tolerance alarm
|
|
||||||
if severity == fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
|
||||||
if ctrl.oot_alarm_object.raised is True:
|
|
||||||
if clear_alarm(ctrl.oot_alarm_object.eid) is True:
|
|
||||||
ctrl.oot_alarm_object.severity = \
|
|
||||||
fm_constants.FM_ALARM_SEVERITY_CLEAR
|
|
||||||
ctrl.oot_alarm_object.raised = False
|
|
||||||
|
|
||||||
else:
|
|
||||||
# Special Case:
|
|
||||||
# -------------
|
|
||||||
# Don't raise minor alarm when in software timestamping mode.
|
|
||||||
# Too much skew in software or legacy mode ; alarm would bounce.
|
|
||||||
# TODO: Consider making ptp a real time process
|
|
||||||
if severity == fm_constants.FM_ALARM_SEVERITY_MINOR \
|
|
||||||
and obj.mode != 'hardware':
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Handle debounce of the OOT alarm.
|
|
||||||
# Debounce by 1 for the same severity level.
|
|
||||||
if ctrl.oot_alarm_object.severity != severity:
|
|
||||||
ctrl.oot_alarm_object.severity = severity
|
|
||||||
|
|
||||||
# This will keep refreshing the alarm text with the current
|
|
||||||
# skew value while still debounce on state transitions.
|
|
||||||
#
|
|
||||||
# Precision ... (PTP) clocking is out of tolerance by 1004 nsec
|
|
||||||
#
|
|
||||||
elif severity == fm_constants.FM_ALARM_SEVERITY_MINOR:
|
|
||||||
# Handle raising the Minor OOT Alarm.
|
|
||||||
rc = raise_alarm(ALARM_CAUSE__OOT, None, master_offset)
|
|
||||||
if rc is True:
|
|
||||||
ctrl.oot_alarm_object.raised = True
|
|
||||||
|
|
||||||
elif severity == fm_constants.FM_ALARM_SEVERITY_MAJOR:
|
|
||||||
# Handle raising the Major OOT Alarm.
|
|
||||||
rc = raise_alarm(ALARM_CAUSE__OOT, None, master_offset)
|
|
||||||
if rc is True:
|
|
||||||
ctrl.oot_alarm_object.raised = True
|
|
||||||
|
|
||||||
# Record the value that is alarmable
|
|
||||||
if severity != fm_constants.FM_ALARM_SEVERITY_CLEAR:
|
|
||||||
collectd.info("%s Grand Master ID: %s ; "
|
|
||||||
"HOST ID: %s ; "
|
|
||||||
"GM Present:%s ; "
|
|
||||||
"Skew:%5d" % (PLUGIN,
|
|
||||||
gm_identity,
|
|
||||||
my_identity,
|
|
||||||
gm_present,
|
|
||||||
master_offset))
|
|
||||||
else:
|
|
||||||
collectd.info("%s No Clock Sync" % PLUGIN)
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
collectd.register_init(init_func)
|
|
||||||
collectd.register_read(read_func, interval=PLUGIN_AUDIT_INTERVAL)
|
|
@ -1,21 +0,0 @@
|
|||||||
LoadPlugin python
|
|
||||||
<Plugin python>
|
|
||||||
ModulePath "/opt/collectd/extensions/python"
|
|
||||||
Import "cpu"
|
|
||||||
<Module "cpu">
|
|
||||||
Path "/proc/cpuinfo"
|
|
||||||
</Module>
|
|
||||||
Import "memory"
|
|
||||||
<Module "memory">
|
|
||||||
Path "/proc/meminfo"
|
|
||||||
</Module>
|
|
||||||
Import "ntpq"
|
|
||||||
Import "ptp"
|
|
||||||
Import "interface"
|
|
||||||
<Module "interface">
|
|
||||||
Port 2122
|
|
||||||
</Module>
|
|
||||||
Import "remotels"
|
|
||||||
LogTraces = true
|
|
||||||
Encoding "utf-8"
|
|
||||||
</Plugin>
|
|
@ -1,13 +0,0 @@
|
|||||||
<Plugin "threshold">
|
|
||||||
<Plugin "remotels">
|
|
||||||
<Type "absolute">
|
|
||||||
Instance "reachable"
|
|
||||||
Persist true
|
|
||||||
PersistOK true
|
|
||||||
WarningMin 1
|
|
||||||
FailureMin 0
|
|
||||||
Hits 2
|
|
||||||
Invert false
|
|
||||||
</Type>
|
|
||||||
</Plugin>
|
|
||||||
</Plugin>
|
|
@ -1,350 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
#
|
|
||||||
# This is the Remote Logging Server plugin for collectd.
|
|
||||||
#
|
|
||||||
# The Remote Logging Server is enabled if /etc/syslog-ng/syslog-ng.conf
|
|
||||||
# contains '@include remotelogging.conf'
|
|
||||||
#
|
|
||||||
# There is no asynchronous notification of remote logging server
|
|
||||||
# configuration enable/disable state changes. Therefore, each audit
|
|
||||||
# interval needs to check whether its enabled or not.
|
|
||||||
#
|
|
||||||
# every audit interval ...
|
|
||||||
#
|
|
||||||
# read_func:
|
|
||||||
# check enabled:
|
|
||||||
# if disabled and alarmed:
|
|
||||||
# clear alarm
|
|
||||||
# if enabled:
|
|
||||||
# get ip and port
|
|
||||||
# query status
|
|
||||||
# if connected and alarmed:
|
|
||||||
# clear alarm
|
|
||||||
# if not connected and not alarmed:
|
|
||||||
# raise alarm
|
|
||||||
#
|
|
||||||
# system remotelogging-modify --ip_address <ip address>
|
|
||||||
# --transport tcp
|
|
||||||
# --enabled True
|
|
||||||
#
|
|
||||||
############################################################################
|
|
||||||
|
|
||||||
import os
|
|
||||||
import collectd
|
|
||||||
import tsconfig.tsconfig as tsc
|
|
||||||
import plugin_common as pc
|
|
||||||
from fm_api import constants as fm_constants
|
|
||||||
from oslo_concurrency import processutils
|
|
||||||
from fm_api import fm_api
|
|
||||||
|
|
||||||
# Fault manager API Object
|
|
||||||
api = fm_api.FaultAPIsV2()
|
|
||||||
|
|
||||||
# name of the plugin
|
|
||||||
PLUGIN_NAME = 'remotels'
|
|
||||||
|
|
||||||
# all logs produced by this plugin are prefixed with this
|
|
||||||
PLUGIN = 'remote logging server'
|
|
||||||
|
|
||||||
# Interface Monitoring Interval in seconds
|
|
||||||
PLUGIN_AUDIT_INTERVAL = 60
|
|
||||||
|
|
||||||
# Sample Data 'type' and 'instance' database field values.
|
|
||||||
PLUGIN_TYPE = 'absolute'
|
|
||||||
PLUGIN_TYPE_INSTANCE = 'reachable'
|
|
||||||
|
|
||||||
# Remote Logging Connectivity Alarm ID
|
|
||||||
PLUGIN_ALARMID = '100.118'
|
|
||||||
|
|
||||||
# The file where this plugin learns if remote logging is enabled
|
|
||||||
SYSLOG_CONF_FILE = '/etc/syslog-ng/syslog-ng.conf'
|
|
||||||
|
|
||||||
# Plugin Control Object
|
|
||||||
obj = pc.PluginObject(PLUGIN, "")
|
|
||||||
|
|
||||||
|
|
||||||
# Raise Remote Logging Server Alarm
|
|
||||||
def raise_alarm():
|
|
||||||
"""Raise Remote Logging Server Alarm"""
|
|
||||||
|
|
||||||
repair = 'Ensure Remote Log Server IP is reachable from '
|
|
||||||
repair += 'Controller through OAM interface; otherwise '
|
|
||||||
repair += 'contact next level of support.'
|
|
||||||
|
|
||||||
reason = 'Controller cannot establish connection with '
|
|
||||||
reason += 'remote logging server.'
|
|
||||||
|
|
||||||
try:
|
|
||||||
fault = fm_api.Fault(
|
|
||||||
alarm_id=PLUGIN_ALARMID,
|
|
||||||
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
|
||||||
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
|
||||||
entity_instance_id=obj.base_eid,
|
|
||||||
severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
|
|
||||||
reason_text=reason,
|
|
||||||
alarm_type=fm_constants.FM_ALARM_TYPE_1,
|
|
||||||
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_6,
|
|
||||||
proposed_repair_action=repair,
|
|
||||||
service_affecting=False,
|
|
||||||
suppression=False)
|
|
||||||
|
|
||||||
alarm_uuid = api.set_fault(fault)
|
|
||||||
if pc.is_uuid_like(alarm_uuid) is False:
|
|
||||||
collectd.error("%s 'set_fault' failed ; %s:%s ; %s" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID,
|
|
||||||
obj.base_eid, alarm_uuid))
|
|
||||||
else:
|
|
||||||
collectd.info("%s %s:%s alarm raised" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
|
|
||||||
obj.alarmed = True
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'set_fault' exception ; %s:%s ; %s " %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid, ex))
|
|
||||||
|
|
||||||
|
|
||||||
# Clear remote logging server alarm
|
|
||||||
def clear_alarm():
|
|
||||||
"""Clear remote logging server alarm"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is True:
|
|
||||||
collectd.info("%s %s:%s alarm cleared" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
|
|
||||||
else:
|
|
||||||
collectd.info("%s %s:%s alarm clear" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid))
|
|
||||||
|
|
||||||
obj.alarmed = False
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" %
|
|
||||||
(PLUGIN, PLUGIN_ALARMID, obj.base_eid, ex))
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
# The config function - called once on collectd process startup
|
|
||||||
def config_func(config):
|
|
||||||
"""Configure the plugin"""
|
|
||||||
|
|
||||||
# all configuration is learned during normal monitoring
|
|
||||||
obj.config_done = True
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
# The init function - called once on collectd process startup
|
|
||||||
def init_func():
|
|
||||||
"""Init the plugin"""
|
|
||||||
|
|
||||||
# remote logging server monitoring is for controllers only
|
|
||||||
if tsc.nodetype != 'controller':
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if obj.init_done is False:
|
|
||||||
if obj.init_ready() is False:
|
|
||||||
return False
|
|
||||||
|
|
||||||
obj.hostname = obj.gethostname()
|
|
||||||
obj.base_eid = 'host=' + obj.hostname
|
|
||||||
obj.init_done = True
|
|
||||||
collectd.info("%s initialization complete" % PLUGIN)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
# The sample read function - called on every audit interval
|
|
||||||
def read_func():
|
|
||||||
"""Remote logging server connectivity plugin read function"""
|
|
||||||
|
|
||||||
# remote logging server monitoring is for controllers only
|
|
||||||
if tsc.nodetype != 'controller':
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if obj.init_done is False:
|
|
||||||
init_func()
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# get current state
|
|
||||||
current_enabled_state = obj.enabled
|
|
||||||
|
|
||||||
# check to see if remote logging is enabled
|
|
||||||
obj.enabled = False # assume disabled
|
|
||||||
if os.path.exists(SYSLOG_CONF_FILE) is True:
|
|
||||||
with open(SYSLOG_CONF_FILE, 'r') as infile:
|
|
||||||
for line in infile:
|
|
||||||
if line.startswith('@include '):
|
|
||||||
service = line.rstrip().split(' ')[1]
|
|
||||||
if service == '"remotelogging.conf"':
|
|
||||||
obj.enabled = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if current_enabled_state == obj.enabled:
|
|
||||||
logit = False
|
|
||||||
else:
|
|
||||||
if obj.enabled is False:
|
|
||||||
collectd.info("%s is disabled" % PLUGIN)
|
|
||||||
else:
|
|
||||||
collectd.info("%s is enabled" % PLUGIN)
|
|
||||||
logit = True
|
|
||||||
|
|
||||||
# Handle startup case by clearing existing alarm if its raised.
|
|
||||||
# Its runtime cheaper and simpler to issue a blind clear than query.
|
|
||||||
if obj.audits == 0:
|
|
||||||
if clear_alarm() is False:
|
|
||||||
# if clear fails then retry next time
|
|
||||||
return 0
|
|
||||||
if obj.enabled is False:
|
|
||||||
collectd.info("%s is disabled" % PLUGIN)
|
|
||||||
obj.audits = 1
|
|
||||||
|
|
||||||
if obj.enabled is False:
|
|
||||||
if obj.alarmed is True:
|
|
||||||
clear_alarm()
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# If we get here then the server is enabled ...
|
|
||||||
# Need to query it
|
|
||||||
|
|
||||||
# Get the ip and port from line that looks like this
|
|
||||||
#
|
|
||||||
# tag proto address port
|
|
||||||
# ----------------------------- --- -------------- ---
|
|
||||||
# destination remote_log_server {tcp("128.224.186.65" port(514));};
|
|
||||||
#
|
|
||||||
address = protocol = port = ''
|
|
||||||
with open(SYSLOG_CONF_FILE, 'r') as infile:
|
|
||||||
for line in infile:
|
|
||||||
if line.startswith('destination remote_log_server'):
|
|
||||||
try:
|
|
||||||
if len(line.split('{')) > 1:
|
|
||||||
protocol = line.split('{')[1][0:3]
|
|
||||||
address = line.split('{')[1].split('"')[1]
|
|
||||||
port = line.split('{')[1].split('(')[2].split(')')[0]
|
|
||||||
if not protocol or not address or not port:
|
|
||||||
collectd.error("%s remote log server credentials "
|
|
||||||
"parse error ; (%s:%s:%s)" %
|
|
||||||
(PLUGIN, protocol, address, port))
|
|
||||||
return 1
|
|
||||||
else:
|
|
||||||
# line parsed ; move on ...
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
collectd.error("%s remote log server line parse error"
|
|
||||||
" ; %s" % (PLUGIN, line))
|
|
||||||
except Exception as ex:
|
|
||||||
collectd.error("%s remote log server credentials "
|
|
||||||
"parse exception ; (%s)" % (PLUGIN, line))
|
|
||||||
|
|
||||||
if ':' in address:
|
|
||||||
ipv = 6
|
|
||||||
protocol += 6
|
|
||||||
|
|
||||||
# Monitoring of IPV6 is not currently supported
|
|
||||||
return 0
|
|
||||||
|
|
||||||
else:
|
|
||||||
ipv = 4
|
|
||||||
|
|
||||||
# This plugin detects server connectivity through its socket status.
|
|
||||||
# To get that construct the remote logging server IP string.
|
|
||||||
# The files being looked at(/proc/net/tcp(udp)) use hex values,
|
|
||||||
# so convert the string caps hex value with reverse ordering of
|
|
||||||
# the "ipv4" values
|
|
||||||
index = 3
|
|
||||||
addr = [0, 0, 0, 0]
|
|
||||||
|
|
||||||
# swap order
|
|
||||||
for tup in address.split('.'):
|
|
||||||
addr[index] = int(tup)
|
|
||||||
index -= 1
|
|
||||||
|
|
||||||
# build the CAPs HEX address
|
|
||||||
UPPER_HEX_IP = ''
|
|
||||||
for tup in addr:
|
|
||||||
val = hex(int(tup)).split('x')[-1].upper()
|
|
||||||
if len(val) == 1:
|
|
||||||
UPPER_HEX_IP += '0'
|
|
||||||
UPPER_HEX_IP += val
|
|
||||||
UPPER_HEX_IP += ':'
|
|
||||||
tmp = hex(int(port)).split('x')[-1].upper()
|
|
||||||
for i in range(4 - len(tmp)):
|
|
||||||
UPPER_HEX_IP += '0'
|
|
||||||
UPPER_HEX_IP += tmp
|
|
||||||
|
|
||||||
# log example tcp:ipv4:128.224.186.65:514 : IP:41BAE080:0202
|
|
||||||
collectd.debug("%s %s:ipv%d:%s:%s : IP:%s" %
|
|
||||||
(PLUGIN, protocol, ipv, address, port, UPPER_HEX_IP))
|
|
||||||
|
|
||||||
cmd = "cat /proc/net/" + protocol
|
|
||||||
cmd += " | awk '{print $3 \" \" $4}' | grep " + UPPER_HEX_IP
|
|
||||||
cmd += " | awk '{print $2}'"
|
|
||||||
res, err = processutils.execute(cmd, shell=True)
|
|
||||||
if err:
|
|
||||||
collectd.error("%s processutils error:%s" % (PLUGIN, err))
|
|
||||||
|
|
||||||
# cmd example:
|
|
||||||
# cat /proc/net/tcp | awk '{print $3 " " $4}'
|
|
||||||
# | grep 41BAE080:0202
|
|
||||||
# | awk '{print $2}'
|
|
||||||
collectd.debug("%s Cmd:%s" % (PLUGIN, cmd))
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if res and res.rstrip() == '01':
|
|
||||||
# connected state reads 01
|
|
||||||
# Example log: Res:[01]
|
|
||||||
|
|
||||||
# clear alarm if
|
|
||||||
# - currently alarmed and
|
|
||||||
# - debounced by 1 ; need 2 connected readings in a row
|
|
||||||
if obj.alarmed is True:
|
|
||||||
clear_alarm()
|
|
||||||
|
|
||||||
# Only log on state change
|
|
||||||
if obj.usage != 1:
|
|
||||||
logit = True
|
|
||||||
|
|
||||||
obj.usage = 1
|
|
||||||
conn = ''
|
|
||||||
|
|
||||||
else:
|
|
||||||
# res typically reads 02 when notr connected
|
|
||||||
# Example log: Res:[02]
|
|
||||||
collectd.debug("%s Res:[%s] " % (PLUGIN, res.rstrip()))
|
|
||||||
|
|
||||||
# raise alarm if
|
|
||||||
# - not already alarmed
|
|
||||||
# - debounced by 1 ; need 2 failures in a row
|
|
||||||
if obj.alarmed is False and obj.usage == 0:
|
|
||||||
raise_alarm()
|
|
||||||
|
|
||||||
# only log on state change
|
|
||||||
if obj.usage == 1 or obj.audits == 1:
|
|
||||||
logit = True
|
|
||||||
|
|
||||||
obj.usage = 0
|
|
||||||
conn = 'not '
|
|
||||||
|
|
||||||
if logit is True:
|
|
||||||
collectd.info("%s is %sconnected [%s ipv%d %s:%s]" %
|
|
||||||
(PLUGIN, conn, protocol, ipv, address, port))
|
|
||||||
obj.audits += 1
|
|
||||||
|
|
||||||
# Dispatch usage value to collectd
|
|
||||||
val = collectd.Values(host=obj.hostname)
|
|
||||||
val.plugin = PLUGIN_NAME
|
|
||||||
val.type = PLUGIN_TYPE
|
|
||||||
val.type_instance = PLUGIN_TYPE_INSTANCE
|
|
||||||
val.dispatch(values=[obj.usage])
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
# register the config, init and read functions
|
|
||||||
collectd.register_config(config_func)
|
|
||||||
collectd.register_init(init_func)
|
|
||||||
collectd.register_read(read_func, interval=PLUGIN_AUDIT_INTERVAL)
|
|
@ -1,10 +0,0 @@
|
|||||||
Metadata-Version: 1.1
|
|
||||||
Name: influxdb-extensions
|
|
||||||
Version: 1.0
|
|
||||||
Summary: influxdb-extensions
|
|
||||||
Home-page:
|
|
||||||
Author: Windriver
|
|
||||||
Author-email: info@windriver.com
|
|
||||||
License: ASL 2.0
|
|
||||||
Description: Titanium Cloud influxdb extensions.
|
|
||||||
Platform: UNKNOWN
|
|
@ -1,7 +0,0 @@
|
|||||||
SRC_DIR="$PKG_BASE"
|
|
||||||
|
|
||||||
COPY_LIST="$PKG_BASE/src/LICENSE \
|
|
||||||
$PKG_BASE/src/influxdb.conf.pmon \
|
|
||||||
$PKG_BASE/src/influxdb.service"
|
|
||||||
|
|
||||||
TIS_PATCH_VER=2
|
|
@ -1,46 +0,0 @@
|
|||||||
Summary: Titanuim Server influxdb Extensions Package
|
|
||||||
Name: influxdb-extensions
|
|
||||||
Version: 1.0
|
|
||||||
Release: 0%{?_tis_dist}.%{tis_patch_ver}
|
|
||||||
License: ASL 2.0
|
|
||||||
Group: base
|
|
||||||
Packager: Wind River <info@windriver.com>
|
|
||||||
URL: unknown
|
|
||||||
|
|
||||||
# create the files tarball
|
|
||||||
Source0: %{name}-%{version}.tar.gz
|
|
||||||
|
|
||||||
source1: influxdb.service
|
|
||||||
Source2: influxdb.conf.pmon
|
|
||||||
|
|
||||||
Requires: systemd
|
|
||||||
Requires: influxdb
|
|
||||||
Requires: /bin/systemctl
|
|
||||||
|
|
||||||
%description
|
|
||||||
Titanium Cloud influxdb extensions
|
|
||||||
|
|
||||||
%define debug_package %{nil}
|
|
||||||
%define local_unit_dir %{_sysconfdir}/systemd/system
|
|
||||||
|
|
||||||
%prep
|
|
||||||
%setup
|
|
||||||
|
|
||||||
%build
|
|
||||||
|
|
||||||
%install
|
|
||||||
install -m 755 -d %{buildroot}%{_sysconfdir}
|
|
||||||
install -m 755 -d %{buildroot}%{_sysconfdir}/influxdb
|
|
||||||
install -m 755 -d %{buildroot}%{local_unit_dir}
|
|
||||||
|
|
||||||
install -m 644 %{SOURCE1} %{buildroot}%{local_unit_dir}
|
|
||||||
install -m 600 %{SOURCE2} %{buildroot}%{_sysconfdir}/influxdb
|
|
||||||
|
|
||||||
|
|
||||||
%clean
|
|
||||||
rm -rf $RPM_BUILD_ROOT
|
|
||||||
|
|
||||||
%files
|
|
||||||
%defattr(-,root,root,-)
|
|
||||||
%config(noreplace) %{local_unit_dir}/influxdb.service
|
|
||||||
%{_sysconfdir}/influxdb/*
|
|
@ -1,202 +0,0 @@
|
|||||||
|
|
||||||
Apache License
|
|
||||||
Version 2.0, January 2004
|
|
||||||
http://www.apache.org/licenses/
|
|
||||||
|
|
||||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
||||||
|
|
||||||
1. Definitions.
|
|
||||||
|
|
||||||
"License" shall mean the terms and conditions for use, reproduction,
|
|
||||||
and distribution as defined by Sections 1 through 9 of this document.
|
|
||||||
|
|
||||||
"Licensor" shall mean the copyright owner or entity authorized by
|
|
||||||
the copyright owner that is granting the License.
|
|
||||||
|
|
||||||
"Legal Entity" shall mean the union of the acting entity and all
|
|
||||||
other entities that control, are controlled by, or are under common
|
|
||||||
control with that entity. For the purposes of this definition,
|
|
||||||
"control" means (i) the power, direct or indirect, to cause the
|
|
||||||
direction or management of such entity, whether by contract or
|
|
||||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
||||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
||||||
|
|
||||||
"You" (or "Your") shall mean an individual or Legal Entity
|
|
||||||
exercising permissions granted by this License.
|
|
||||||
|
|
||||||
"Source" form shall mean the preferred form for making modifications,
|
|
||||||
including but not limited to software source code, documentation
|
|
||||||
source, and configuration files.
|
|
||||||
|
|
||||||
"Object" form shall mean any form resulting from mechanical
|
|
||||||
transformation or translation of a Source form, including but
|
|
||||||
not limited to compiled object code, generated documentation,
|
|
||||||
and conversions to other media types.
|
|
||||||
|
|
||||||
"Work" shall mean the work of authorship, whether in Source or
|
|
||||||
Object form, made available under the License, as indicated by a
|
|
||||||
copyright notice that is included in or attached to the work
|
|
||||||
(an example is provided in the Appendix below).
|
|
||||||
|
|
||||||
"Derivative Works" shall mean any work, whether in Source or Object
|
|
||||||
form, that is based on (or derived from) the Work and for which the
|
|
||||||
editorial revisions, annotations, elaborations, or other modifications
|
|
||||||
represent, as a whole, an original work of authorship. For the purposes
|
|
||||||
of this License, Derivative Works shall not include works that remain
|
|
||||||
separable from, or merely link (or bind by name) to the interfaces of,
|
|
||||||
the Work and Derivative Works thereof.
|
|
||||||
|
|
||||||
"Contribution" shall mean any work of authorship, including
|
|
||||||
the original version of the Work and any modifications or additions
|
|
||||||
to that Work or Derivative Works thereof, that is intentionally
|
|
||||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
||||||
or by an individual or Legal Entity authorized to submit on behalf of
|
|
||||||
the copyright owner. For the purposes of this definition, "submitted"
|
|
||||||
means any form of electronic, verbal, or written communication sent
|
|
||||||
to the Licensor or its representatives, including but not limited to
|
|
||||||
communication on electronic mailing lists, source code control systems,
|
|
||||||
and issue tracking systems that are managed by, or on behalf of, the
|
|
||||||
Licensor for the purpose of discussing and improving the Work, but
|
|
||||||
excluding communication that is conspicuously marked or otherwise
|
|
||||||
designated in writing by the copyright owner as "Not a Contribution."
|
|
||||||
|
|
||||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
||||||
on behalf of whom a Contribution has been received by Licensor and
|
|
||||||
subsequently incorporated within the Work.
|
|
||||||
|
|
||||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
copyright license to reproduce, prepare Derivative Works of,
|
|
||||||
publicly display, publicly perform, sublicense, and distribute the
|
|
||||||
Work and such Derivative Works in Source or Object form.
|
|
||||||
|
|
||||||
3. Grant of Patent License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
(except as stated in this section) patent license to make, have made,
|
|
||||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
||||||
where such license applies only to those patent claims licensable
|
|
||||||
by such Contributor that are necessarily infringed by their
|
|
||||||
Contribution(s) alone or by combination of their Contribution(s)
|
|
||||||
with the Work to which such Contribution(s) was submitted. If You
|
|
||||||
institute patent litigation against any entity (including a
|
|
||||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
||||||
or a Contribution incorporated within the Work constitutes direct
|
|
||||||
or contributory patent infringement, then any patent licenses
|
|
||||||
granted to You under this License for that Work shall terminate
|
|
||||||
as of the date such litigation is filed.
|
|
||||||
|
|
||||||
4. Redistribution. You may reproduce and distribute copies of the
|
|
||||||
Work or Derivative Works thereof in any medium, with or without
|
|
||||||
modifications, and in Source or Object form, provided that You
|
|
||||||
meet the following conditions:
|
|
||||||
|
|
||||||
(a) You must give any other recipients of the Work or
|
|
||||||
Derivative Works a copy of this License; and
|
|
||||||
|
|
||||||
(b) You must cause any modified files to carry prominent notices
|
|
||||||
stating that You changed the files; and
|
|
||||||
|
|
||||||
(c) You must retain, in the Source form of any Derivative Works
|
|
||||||
that You distribute, all copyright, patent, trademark, and
|
|
||||||
attribution notices from the Source form of the Work,
|
|
||||||
excluding those notices that do not pertain to any part of
|
|
||||||
the Derivative Works; and
|
|
||||||
|
|
||||||
(d) If the Work includes a "NOTICE" text file as part of its
|
|
||||||
distribution, then any Derivative Works that You distribute must
|
|
||||||
include a readable copy of the attribution notices contained
|
|
||||||
within such NOTICE file, excluding those notices that do not
|
|
||||||
pertain to any part of the Derivative Works, in at least one
|
|
||||||
of the following places: within a NOTICE text file distributed
|
|
||||||
as part of the Derivative Works; within the Source form or
|
|
||||||
documentation, if provided along with the Derivative Works; or,
|
|
||||||
within a display generated by the Derivative Works, if and
|
|
||||||
wherever such third-party notices normally appear. The contents
|
|
||||||
of the NOTICE file are for informational purposes only and
|
|
||||||
do not modify the License. You may add Your own attribution
|
|
||||||
notices within Derivative Works that You distribute, alongside
|
|
||||||
or as an addendum to the NOTICE text from the Work, provided
|
|
||||||
that such additional attribution notices cannot be construed
|
|
||||||
as modifying the License.
|
|
||||||
|
|
||||||
You may add Your own copyright statement to Your modifications and
|
|
||||||
may provide additional or different license terms and conditions
|
|
||||||
for use, reproduction, or distribution of Your modifications, or
|
|
||||||
for any such Derivative Works as a whole, provided Your use,
|
|
||||||
reproduction, and distribution of the Work otherwise complies with
|
|
||||||
the conditions stated in this License.
|
|
||||||
|
|
||||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
||||||
any Contribution intentionally submitted for inclusion in the Work
|
|
||||||
by You to the Licensor shall be under the terms and conditions of
|
|
||||||
this License, without any additional terms or conditions.
|
|
||||||
Notwithstanding the above, nothing herein shall supersede or modify
|
|
||||||
the terms of any separate license agreement you may have executed
|
|
||||||
with Licensor regarding such Contributions.
|
|
||||||
|
|
||||||
6. Trademarks. This License does not grant permission to use the trade
|
|
||||||
names, trademarks, service marks, or product names of the Licensor,
|
|
||||||
except as required for reasonable and customary use in describing the
|
|
||||||
origin of the Work and reproducing the content of the NOTICE file.
|
|
||||||
|
|
||||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
||||||
agreed to in writing, Licensor provides the Work (and each
|
|
||||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
implied, including, without limitation, any warranties or conditions
|
|
||||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
||||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
||||||
appropriateness of using or redistributing the Work and assume any
|
|
||||||
risks associated with Your exercise of permissions under this License.
|
|
||||||
|
|
||||||
8. Limitation of Liability. In no event and under no legal theory,
|
|
||||||
whether in tort (including negligence), contract, or otherwise,
|
|
||||||
unless required by applicable law (such as deliberate and grossly
|
|
||||||
negligent acts) or agreed to in writing, shall any Contributor be
|
|
||||||
liable to You for damages, including any direct, indirect, special,
|
|
||||||
incidental, or consequential damages of any character arising as a
|
|
||||||
result of this License or out of the use or inability to use the
|
|
||||||
Work (including but not limited to damages for loss of goodwill,
|
|
||||||
work stoppage, computer failure or malfunction, or any and all
|
|
||||||
other commercial damages or losses), even if such Contributor
|
|
||||||
has been advised of the possibility of such damages.
|
|
||||||
|
|
||||||
9. Accepting Warranty or Additional Liability. While redistributing
|
|
||||||
the Work or Derivative Works thereof, You may choose to offer,
|
|
||||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
||||||
or other liability obligations and/or rights consistent with this
|
|
||||||
License. However, in accepting such obligations, You may act only
|
|
||||||
on Your own behalf and on Your sole responsibility, not on behalf
|
|
||||||
of any other Contributor, and only if You agree to indemnify,
|
|
||||||
defend, and hold each Contributor harmless for any liability
|
|
||||||
incurred by, or claims asserted against, such Contributor by reason
|
|
||||||
of your accepting any such warranty or additional liability.
|
|
||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
|
||||||
|
|
||||||
APPENDIX: How to apply the Apache License to your work.
|
|
||||||
|
|
||||||
To apply the Apache License to your work, attach the following
|
|
||||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
||||||
replaced with your own identifying information. (Don't include
|
|
||||||
the brackets!) The text should be enclosed in the appropriate
|
|
||||||
comment syntax for the file format. We also recommend that a
|
|
||||||
file or class name and description of purpose be included on the
|
|
||||||
same "printed page" as the copyright notice for easier
|
|
||||||
identification within third-party archives.
|
|
||||||
|
|
||||||
Copyright [yyyy] [name of copyright owner]
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
@ -1,322 +0,0 @@
|
|||||||
### Welcome to the InfluxDB configuration file.
|
|
||||||
|
|
||||||
# Once every 24 hours InfluxDB will report anonymous data to m.influxdb.com
|
|
||||||
# The data includes raft id (random 8 bytes), os, arch, version, and metadata.
|
|
||||||
# We don't track ip addresses of servers reporting. This is only used
|
|
||||||
# to track the number of instances running and the versions, which
|
|
||||||
# is very helpful for us.
|
|
||||||
# Change this option to true to disable reporting.
|
|
||||||
reporting-disabled = false
|
|
||||||
|
|
||||||
###
|
|
||||||
### Enterprise registration control
|
|
||||||
###
|
|
||||||
|
|
||||||
[registration]
|
|
||||||
# enabled = true
|
|
||||||
# url = "https://enterprise.influxdata.com" # The Enterprise server URL
|
|
||||||
# token = "" # Registration token for Enterprise server
|
|
||||||
|
|
||||||
###
|
|
||||||
### [meta]
|
|
||||||
###
|
|
||||||
### Controls the parameters for the Raft consensus group that stores metadata
|
|
||||||
### about the InfluxDB cluster.
|
|
||||||
###
|
|
||||||
|
|
||||||
[meta]
|
|
||||||
dir = "/var/lib/influxdb/meta"
|
|
||||||
hostname = "localhost"
|
|
||||||
bind-address = ":8088"
|
|
||||||
retention-autocreate = true
|
|
||||||
election-timeout = "1s"
|
|
||||||
heartbeat-timeout = "1s"
|
|
||||||
leader-lease-timeout = "500ms"
|
|
||||||
commit-timeout = "50ms"
|
|
||||||
cluster-tracing = false
|
|
||||||
|
|
||||||
# If enabled, when a Raft cluster loses a peer due to a `DROP SERVER` command,
|
|
||||||
# the leader will automatically ask a non-raft peer node to promote to a raft
|
|
||||||
# peer. This only happens if there is a non-raft peer node available to promote.
|
|
||||||
# This setting only affects the local node, so to ensure if operates correctly, be sure to set
|
|
||||||
# it in the config of every node.
|
|
||||||
raft-promotion-enabled = true
|
|
||||||
|
|
||||||
###
|
|
||||||
### [data]
|
|
||||||
###
|
|
||||||
### Controls where the actual shard data for InfluxDB lives and how it is
|
|
||||||
### flushed from the WAL. "dir" may need to be changed to a suitable place
|
|
||||||
### for your system, but the WAL settings are an advanced configuration. The
|
|
||||||
### defaults should work for most systems.
|
|
||||||
###
|
|
||||||
|
|
||||||
[data]
|
|
||||||
dir = "/var/lib/influxdb/data"
|
|
||||||
|
|
||||||
# Controls the engine type for new shards. Options are b1, bz1, or tsm1.
|
|
||||||
# b1 is the 0.9.2 storage engine, bz1 is the 0.9.3 and 0.9.4 engine.
|
|
||||||
# tsm1 is the 0.9.5 engine and is currenly EXPERIMENTAL. Until 0.9.5 is
|
|
||||||
# actually released data written into a tsm1 engine may be need to be wiped
|
|
||||||
# between upgrades.
|
|
||||||
# engine ="bz1"
|
|
||||||
|
|
||||||
# The following WAL settings are for the b1 storage engine used in 0.9.2. They won't
|
|
||||||
# apply to any new shards created after upgrading to a version > 0.9.3.
|
|
||||||
max-wal-size = 104857600 # Maximum size the WAL can reach before a flush. Defaults to 100MB.
|
|
||||||
wal-flush-interval = "10m" # Maximum time data can sit in WAL before a flush.
|
|
||||||
wal-partition-flush-delay = "2s" # The delay time between each WAL partition being flushed.
|
|
||||||
|
|
||||||
# These are the WAL settings for the storage engine >= 0.9.3
|
|
||||||
wal-dir = "/var/lib/influxdb/wal"
|
|
||||||
wal-enable-logging = true
|
|
||||||
|
|
||||||
# When a series in the WAL in-memory cache reaches this size in bytes it is marked as ready to
|
|
||||||
# flush to the index
|
|
||||||
# wal-ready-series-size = 25600
|
|
||||||
|
|
||||||
# Flush and compact a partition once this ratio of series are over the ready size
|
|
||||||
# wal-compaction-threshold = 0.6
|
|
||||||
|
|
||||||
# Force a flush and compaction if any series in a partition gets above this size in bytes
|
|
||||||
# wal-max-series-size = 2097152
|
|
||||||
|
|
||||||
# Force a flush of all series and full compaction if there have been no writes in this
|
|
||||||
# amount of time. This is useful for ensuring that shards that are cold for writes don't
|
|
||||||
# keep a bunch of data cached in memory and in the WAL.
|
|
||||||
# wal-flush-cold-interval = "10m"
|
|
||||||
|
|
||||||
# Force a partition to flush its largest series if it reaches this approximate size in
|
|
||||||
# bytes. Remember there are 5 partitions so you'll need at least 5x this amount of memory.
|
|
||||||
# The more memory you have, the bigger this can be.
|
|
||||||
# wal-partition-size-threshold = 20971520
|
|
||||||
|
|
||||||
# Whether queries should be logged before execution. Very useful for troubleshooting, but will
|
|
||||||
# log any sensitive data contained within a query.
|
|
||||||
# query-log-enabled = true
|
|
||||||
|
|
||||||
###
|
|
||||||
### [hinted-handoff]
|
|
||||||
###
|
|
||||||
### Controls the hinted handoff feature, which allows nodes to temporarily
|
|
||||||
### store queued data when one node of a cluster is down for a short period
|
|
||||||
### of time.
|
|
||||||
###
|
|
||||||
|
|
||||||
[hinted-handoff]
|
|
||||||
enabled = true
|
|
||||||
dir = "/var/lib/influxdb/hh"
|
|
||||||
max-size = 1073741824
|
|
||||||
max-age = "168h"
|
|
||||||
retry-rate-limit = 0
|
|
||||||
|
|
||||||
# Hinted handoff will start retrying writes to down nodes at a rate of once per second.
|
|
||||||
# If any error occurs, it will backoff in an exponential manner, until the interval
|
|
||||||
# reaches retry-max-interval. Once writes to all nodes are successfully completed the
|
|
||||||
# interval will reset to retry-interval.
|
|
||||||
retry-interval = "1s"
|
|
||||||
retry-max-interval = "1m"
|
|
||||||
|
|
||||||
# Interval between running checks for data that should be purged. Data is purged from
|
|
||||||
# hinted-handoff queues for two reasons. 1) The data is older than the max age, or
|
|
||||||
# 2) the target node has been dropped from the cluster. Data is never dropped until
|
|
||||||
# it has reached max-age however, for a dropped node or not.
|
|
||||||
purge-interval = "1h"
|
|
||||||
|
|
||||||
###
|
|
||||||
### [cluster]
|
|
||||||
###
|
|
||||||
### Controls non-Raft cluster behavior, which generally includes how data is
|
|
||||||
### shared across shards.
|
|
||||||
###
|
|
||||||
|
|
||||||
[cluster]
|
|
||||||
shard-writer-timeout = "10s" # The time within which a shard must respond to write.
|
|
||||||
write-timeout = "5s" # The time within which a write operation must complete on the cluster.
|
|
||||||
|
|
||||||
###
|
|
||||||
### [retention]
|
|
||||||
###
|
|
||||||
### Controls the enforcement of retention policies for evicting old data.
|
|
||||||
###
|
|
||||||
|
|
||||||
[retention]
|
|
||||||
enabled = true
|
|
||||||
check-interval = "30m"
|
|
||||||
|
|
||||||
###
|
|
||||||
### [shard-precreation]
|
|
||||||
###
|
|
||||||
### Controls the precreation of shards, so they are created before data arrives.
|
|
||||||
### Only shards that will exist in the future, at time of creation, are precreated.
|
|
||||||
|
|
||||||
[shard-precreation]
|
|
||||||
enabled = true
|
|
||||||
check-interval = "10m"
|
|
||||||
advance-period = "30m"
|
|
||||||
|
|
||||||
###
|
|
||||||
### Controls the system self-monitoring, statistics and diagnostics.
|
|
||||||
###
|
|
||||||
### The internal database for monitoring data is created automatically if
|
|
||||||
### if it does not already exist. The target retention within this database
|
|
||||||
### is called 'monitor' and is also created with a retention period of 7 days
|
|
||||||
### and a replication factor of 1, if it does not exist. In all cases the
|
|
||||||
### this retention policy is configured as the default for the database.
|
|
||||||
|
|
||||||
[monitor]
|
|
||||||
store-enabled = true # Whether to record statistics internally.
|
|
||||||
store-database = "_internal" # The destination database for recorded statistics
|
|
||||||
store-interval = "10s" # The interval at which to record statistics
|
|
||||||
|
|
||||||
###
|
|
||||||
### [admin]
|
|
||||||
###
|
|
||||||
### Controls the availability of the built-in, web-based admin interface. If HTTPS is
|
|
||||||
### enabled for the admin interface, HTTPS must also be enabled on the [http] service.
|
|
||||||
###
|
|
||||||
|
|
||||||
[admin]
|
|
||||||
enabled = true
|
|
||||||
bind-address = ":8083"
|
|
||||||
https-enabled = false
|
|
||||||
https-certificate = "/etc/ssl/influxdb.pem"
|
|
||||||
|
|
||||||
###
|
|
||||||
### [http]
|
|
||||||
###
|
|
||||||
### Controls how the HTTP endpoints are configured. These are the primary
|
|
||||||
### mechanism for getting data into and out of InfluxDB.
|
|
||||||
###
|
|
||||||
|
|
||||||
[http]
|
|
||||||
enabled = true
|
|
||||||
bind-address = ":8086"
|
|
||||||
auth-enabled = false
|
|
||||||
log-enabled = true
|
|
||||||
write-tracing = false
|
|
||||||
pprof-enabled = false
|
|
||||||
https-enabled = false
|
|
||||||
https-certificate = "/etc/ssl/influxdb.pem"
|
|
||||||
|
|
||||||
###
|
|
||||||
### [[graphite]]
|
|
||||||
###
|
|
||||||
### Controls one or many listeners for Graphite data.
|
|
||||||
###
|
|
||||||
|
|
||||||
[[graphite]]
|
|
||||||
enabled = false
|
|
||||||
# database = "graphite"
|
|
||||||
# bind-address = ":2003"
|
|
||||||
# protocol = "tcp"
|
|
||||||
# consistency-level = "one"
|
|
||||||
# name-separator = "."
|
|
||||||
|
|
||||||
# These next lines control how batching works. You should have this enabled
|
|
||||||
# otherwise you could get dropped metrics or poor performance. Batching
|
|
||||||
# will buffer points in memory if you have many coming in.
|
|
||||||
|
|
||||||
# batch-size = 1000 # will flush if this many points get buffered
|
|
||||||
# batch-pending = 5 # number of batches that may be pending in memory
|
|
||||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
|
||||||
# udp-read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
|
|
||||||
|
|
||||||
## "name-schema" configures tag names for parsing the metric name from graphite protocol;
|
|
||||||
## separated by `name-separator`.
|
|
||||||
## The "measurement" tag is special and the corresponding field will become
|
|
||||||
## the name of the metric.
|
|
||||||
## e.g. "type.host.measurement.device" will parse "server.localhost.cpu.cpu0" as
|
|
||||||
## {
|
|
||||||
## measurement: "cpu",
|
|
||||||
## tags: {
|
|
||||||
## "type": "server",
|
|
||||||
## "host": "localhost,
|
|
||||||
## "device": "cpu0"
|
|
||||||
## }
|
|
||||||
## }
|
|
||||||
# name-schema = "type.host.measurement.device"
|
|
||||||
|
|
||||||
## If set to true, when the input metric name has more fields than `name-schema` specified,
|
|
||||||
## the extra fields will be ignored.
|
|
||||||
## Otherwise an error will be logged and the metric rejected.
|
|
||||||
# ignore-unnamed = true
|
|
||||||
|
|
||||||
###
|
|
||||||
### [collectd]
|
|
||||||
###
|
|
||||||
### Controls the listener for collectd data.
|
|
||||||
###
|
|
||||||
|
|
||||||
[collectd]
|
|
||||||
enabled = true
|
|
||||||
bind-address = "127.0.0.1:25826"
|
|
||||||
database = "collectd"
|
|
||||||
typesdb = "/usr/share/collectd/types.db"
|
|
||||||
|
|
||||||
# These next lines control how batching works. You should have this enabled
|
|
||||||
# otherwise you could get dropped metrics or poor performance. Batching
|
|
||||||
# will buffer points in memory if you have many coming in.
|
|
||||||
|
|
||||||
# batch-size = 1000 # will flush if this many points get buffered
|
|
||||||
# batch-pending = 5 # number of batches that may be pending in memory
|
|
||||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
|
||||||
# read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
|
|
||||||
|
|
||||||
###
|
|
||||||
### [opentsdb]
|
|
||||||
###
|
|
||||||
### Controls the listener for OpenTSDB data.
|
|
||||||
###
|
|
||||||
|
|
||||||
[opentsdb]
|
|
||||||
enabled = false
|
|
||||||
# bind-address = ":4242"
|
|
||||||
# database = "opentsdb"
|
|
||||||
# retention-policy = ""
|
|
||||||
# consistency-level = "one"
|
|
||||||
# tls-enabled = false
|
|
||||||
# certificate= ""
|
|
||||||
|
|
||||||
# These next lines control how batching works. You should have this enabled
|
|
||||||
# otherwise you could get dropped metrics or poor performance. Only points
|
|
||||||
# metrics received over the telnet protocol undergo batching.
|
|
||||||
|
|
||||||
# batch-size = 1000 # will flush if this many points get buffered
|
|
||||||
# batch-pending = 5 # number of batches that may be pending in memory
|
|
||||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
|
||||||
|
|
||||||
###
|
|
||||||
### [[udp]]
|
|
||||||
###
|
|
||||||
### Controls the listeners for InfluxDB line protocol data via UDP.
|
|
||||||
###
|
|
||||||
|
|
||||||
[[udp]]
|
|
||||||
enabled = false
|
|
||||||
# bind-address = ""
|
|
||||||
# database = "udp"
|
|
||||||
# retention-policy = ""
|
|
||||||
|
|
||||||
# These next lines control how batching works. You should have this enabled
|
|
||||||
# otherwise you could get dropped metrics or poor performance. Batching
|
|
||||||
# will buffer points in memory if you have many coming in.
|
|
||||||
|
|
||||||
# batch-size = 1000 # will flush if this many points get buffered
|
|
||||||
# batch-pending = 5 # number of batches that may be pending in memory
|
|
||||||
# batch-timeout = "1s" # will flush at least this often even if we haven't hit buffer limit
|
|
||||||
# read-buffer = 0 # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max.
|
|
||||||
|
|
||||||
###
|
|
||||||
### [continuous_queries]
|
|
||||||
###
|
|
||||||
### Controls how continuous queries are run within InfluxDB.
|
|
||||||
###
|
|
||||||
|
|
||||||
[continuous_queries]
|
|
||||||
log-enabled = true
|
|
||||||
enabled = true
|
|
||||||
recompute-previous-n = 2
|
|
||||||
recompute-no-older-than = "10m"
|
|
||||||
compute-runs-per-interval = 10
|
|
||||||
compute-no-more-than = "2m"
|
|
@ -1,17 +0,0 @@
|
|||||||
[process]
|
|
||||||
process = influxdb
|
|
||||||
service = influxdb
|
|
||||||
style = lsb
|
|
||||||
pidfile = /var/run/influxdb/influxdb.pid
|
|
||||||
severity = major ; minor, major, critical
|
|
||||||
restarts = 3 ; restart retries before error assertion
|
|
||||||
interval = 5 ; number of seconds to wait between restarts
|
|
||||||
debounce = 10 ; number of seconds that a process needs to remain
|
|
||||||
; running before degrade is removed and retry count
|
|
||||||
; is cleared.
|
|
||||||
startuptime = 3 ; Seconds to wait after process start before starting the debounce monitor
|
|
||||||
mode = passive ; Monitoring mode: passive (default) or active
|
|
||||||
; passive: process death monitoring (default: always)
|
|
||||||
; active : heartbeat monitoring, i.e. request / response messaging
|
|
||||||
; ignore : do not monitor or stop monitoring
|
|
||||||
quorum = 0 ; process is in the host watchdog quorum
|
|
@ -1,16 +0,0 @@
|
|||||||
#daily
|
|
||||||
nodateext
|
|
||||||
|
|
||||||
/var/log/influxdb/influxdb.log
|
|
||||||
{
|
|
||||||
size 20M
|
|
||||||
start 1
|
|
||||||
missingok
|
|
||||||
rotate 20
|
|
||||||
compress
|
|
||||||
sharedscripts
|
|
||||||
postrotate
|
|
||||||
systemctl reload syslog-ng > /dev/null 2>&1 || true
|
|
||||||
endscript
|
|
||||||
}
|
|
||||||
|
|
@ -1,25 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=InfluxDB open-source, distributed, time series database
|
|
||||||
Documentation=https://influxdb.com/docs/
|
|
||||||
Before=collectd.service
|
|
||||||
Before=pmon.service
|
|
||||||
After=local-fs.target network-online.target
|
|
||||||
Requires=local-fs.target network-online.target
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
User=influxdb
|
|
||||||
Group=influxdb
|
|
||||||
LimitNOFILE=65536
|
|
||||||
Environment='STDOUT=/dev/null'
|
|
||||||
Environment='STDERR=/var/log/influxdb/influxd.log'
|
|
||||||
EnvironmentFile=-/etc/default/influxdb
|
|
||||||
PermissionsStartOnly=true
|
|
||||||
ExecStartPre=-/usr/bin/mkdir -p /var/run/influxdb
|
|
||||||
ExecStartPre=-/usr/bin/chown influxdb:influxdb /var/run/influxdb
|
|
||||||
ExecStart=/bin/sh -c "/usr/bin/influxd -config /etc/influxdb/influxdb.conf -pidfile /var/run/influxdb/influxdb.pid ${INFLUXD_OPTS} >> ${STDOUT} 2>> ${STDERR}"
|
|
||||||
ExecStopPost=/bin/bash -c 'rm /var/run/influxdb/influxdb.pid'
|
|
||||||
KillMode=control-group
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
Alias=influxd.service
|
|
@ -1,202 +0,0 @@
|
|||||||
|
|
||||||
Apache License
|
|
||||||
Version 2.0, January 2004
|
|
||||||
http://www.apache.org/licenses/
|
|
||||||
|
|
||||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
||||||
|
|
||||||
1. Definitions.
|
|
||||||
|
|
||||||
"License" shall mean the terms and conditions for use, reproduction,
|
|
||||||
and distribution as defined by Sections 1 through 9 of this document.
|
|
||||||
|
|
||||||
"Licensor" shall mean the copyright owner or entity authorized by
|
|
||||||
the copyright owner that is granting the License.
|
|
||||||
|
|
||||||
"Legal Entity" shall mean the union of the acting entity and all
|
|
||||||
other entities that control, are controlled by, or are under common
|
|
||||||
control with that entity. For the purposes of this definition,
|
|
||||||
"control" means (i) the power, direct or indirect, to cause the
|
|
||||||
direction or management of such entity, whether by contract or
|
|
||||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
||||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
||||||
|
|
||||||
"You" (or "Your") shall mean an individual or Legal Entity
|
|
||||||
exercising permissions granted by this License.
|
|
||||||
|
|
||||||
"Source" form shall mean the preferred form for making modifications,
|
|
||||||
including but not limited to software source code, documentation
|
|
||||||
source, and configuration files.
|
|
||||||
|
|
||||||
"Object" form shall mean any form resulting from mechanical
|
|
||||||
transformation or translation of a Source form, including but
|
|
||||||
not limited to compiled object code, generated documentation,
|
|
||||||
and conversions to other media types.
|
|
||||||
|
|
||||||
"Work" shall mean the work of authorship, whether in Source or
|
|
||||||
Object form, made available under the License, as indicated by a
|
|
||||||
copyright notice that is included in or attached to the work
|
|
||||||
(an example is provided in the Appendix below).
|
|
||||||
|
|
||||||
"Derivative Works" shall mean any work, whether in Source or Object
|
|
||||||
form, that is based on (or derived from) the Work and for which the
|
|
||||||
editorial revisions, annotations, elaborations, or other modifications
|
|
||||||
represent, as a whole, an original work of authorship. For the purposes
|
|
||||||
of this License, Derivative Works shall not include works that remain
|
|
||||||
separable from, or merely link (or bind by name) to the interfaces of,
|
|
||||||
the Work and Derivative Works thereof.
|
|
||||||
|
|
||||||
"Contribution" shall mean any work of authorship, including
|
|
||||||
the original version of the Work and any modifications or additions
|
|
||||||
to that Work or Derivative Works thereof, that is intentionally
|
|
||||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
||||||
or by an individual or Legal Entity authorized to submit on behalf of
|
|
||||||
the copyright owner. For the purposes of this definition, "submitted"
|
|
||||||
means any form of electronic, verbal, or written communication sent
|
|
||||||
to the Licensor or its representatives, including but not limited to
|
|
||||||
communication on electronic mailing lists, source code control systems,
|
|
||||||
and issue tracking systems that are managed by, or on behalf of, the
|
|
||||||
Licensor for the purpose of discussing and improving the Work, but
|
|
||||||
excluding communication that is conspicuously marked or otherwise
|
|
||||||
designated in writing by the copyright owner as "Not a Contribution."
|
|
||||||
|
|
||||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
||||||
on behalf of whom a Contribution has been received by Licensor and
|
|
||||||
subsequently incorporated within the Work.
|
|
||||||
|
|
||||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
copyright license to reproduce, prepare Derivative Works of,
|
|
||||||
publicly display, publicly perform, sublicense, and distribute the
|
|
||||||
Work and such Derivative Works in Source or Object form.
|
|
||||||
|
|
||||||
3. Grant of Patent License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
(except as stated in this section) patent license to make, have made,
|
|
||||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
||||||
where such license applies only to those patent claims licensable
|
|
||||||
by such Contributor that are necessarily infringed by their
|
|
||||||
Contribution(s) alone or by combination of their Contribution(s)
|
|
||||||
with the Work to which such Contribution(s) was submitted. If You
|
|
||||||
institute patent litigation against any entity (including a
|
|
||||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
||||||
or a Contribution incorporated within the Work constitutes direct
|
|
||||||
or contributory patent infringement, then any patent licenses
|
|
||||||
granted to You under this License for that Work shall terminate
|
|
||||||
as of the date such litigation is filed.
|
|
||||||
|
|
||||||
4. Redistribution. You may reproduce and distribute copies of the
|
|
||||||
Work or Derivative Works thereof in any medium, with or without
|
|
||||||
modifications, and in Source or Object form, provided that You
|
|
||||||
meet the following conditions:
|
|
||||||
|
|
||||||
(a) You must give any other recipients of the Work or
|
|
||||||
Derivative Works a copy of this License; and
|
|
||||||
|
|
||||||
(b) You must cause any modified files to carry prominent notices
|
|
||||||
stating that You changed the files; and
|
|
||||||
|
|
||||||
(c) You must retain, in the Source form of any Derivative Works
|
|
||||||
that You distribute, all copyright, patent, trademark, and
|
|
||||||
attribution notices from the Source form of the Work,
|
|
||||||
excluding those notices that do not pertain to any part of
|
|
||||||
the Derivative Works; and
|
|
||||||
|
|
||||||
(d) If the Work includes a "NOTICE" text file as part of its
|
|
||||||
distribution, then any Derivative Works that You distribute must
|
|
||||||
include a readable copy of the attribution notices contained
|
|
||||||
within such NOTICE file, excluding those notices that do not
|
|
||||||
pertain to any part of the Derivative Works, in at least one
|
|
||||||
of the following places: within a NOTICE text file distributed
|
|
||||||
as part of the Derivative Works; within the Source form or
|
|
||||||
documentation, if provided along with the Derivative Works; or,
|
|
||||||
within a display generated by the Derivative Works, if and
|
|
||||||
wherever such third-party notices normally appear. The contents
|
|
||||||
of the NOTICE file are for informational purposes only and
|
|
||||||
do not modify the License. You may add Your own attribution
|
|
||||||
notices within Derivative Works that You distribute, alongside
|
|
||||||
or as an addendum to the NOTICE text from the Work, provided
|
|
||||||
that such additional attribution notices cannot be construed
|
|
||||||
as modifying the License.
|
|
||||||
|
|
||||||
You may add Your own copyright statement to Your modifications and
|
|
||||||
may provide additional or different license terms and conditions
|
|
||||||
for use, reproduction, or distribution of Your modifications, or
|
|
||||||
for any such Derivative Works as a whole, provided Your use,
|
|
||||||
reproduction, and distribution of the Work otherwise complies with
|
|
||||||
the conditions stated in this License.
|
|
||||||
|
|
||||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
||||||
any Contribution intentionally submitted for inclusion in the Work
|
|
||||||
by You to the Licensor shall be under the terms and conditions of
|
|
||||||
this License, without any additional terms or conditions.
|
|
||||||
Notwithstanding the above, nothing herein shall supersede or modify
|
|
||||||
the terms of any separate license agreement you may have executed
|
|
||||||
with Licensor regarding such Contributions.
|
|
||||||
|
|
||||||
6. Trademarks. This License does not grant permission to use the trade
|
|
||||||
names, trademarks, service marks, or product names of the Licensor,
|
|
||||||
except as required for reasonable and customary use in describing the
|
|
||||||
origin of the Work and reproducing the content of the NOTICE file.
|
|
||||||
|
|
||||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
||||||
agreed to in writing, Licensor provides the Work (and each
|
|
||||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
implied, including, without limitation, any warranties or conditions
|
|
||||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
||||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
||||||
appropriateness of using or redistributing the Work and assume any
|
|
||||||
risks associated with Your exercise of permissions under this License.
|
|
||||||
|
|
||||||
8. Limitation of Liability. In no event and under no legal theory,
|
|
||||||
whether in tort (including negligence), contract, or otherwise,
|
|
||||||
unless required by applicable law (such as deliberate and grossly
|
|
||||||
negligent acts) or agreed to in writing, shall any Contributor be
|
|
||||||
liable to You for damages, including any direct, indirect, special,
|
|
||||||
incidental, or consequential damages of any character arising as a
|
|
||||||
result of this License or out of the use or inability to use the
|
|
||||||
Work (including but not limited to damages for loss of goodwill,
|
|
||||||
work stoppage, computer failure or malfunction, or any and all
|
|
||||||
other commercial damages or losses), even if such Contributor
|
|
||||||
has been advised of the possibility of such damages.
|
|
||||||
|
|
||||||
9. Accepting Warranty or Additional Liability. While redistributing
|
|
||||||
the Work or Derivative Works thereof, You may choose to offer,
|
|
||||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
||||||
or other liability obligations and/or rights consistent with this
|
|
||||||
License. However, in accepting such obligations, You may act only
|
|
||||||
on Your own behalf and on Your sole responsibility, not on behalf
|
|
||||||
of any other Contributor, and only if You agree to indemnify,
|
|
||||||
defend, and hold each Contributor harmless for any liability
|
|
||||||
incurred by, or claims asserted against, such Contributor by reason
|
|
||||||
of your accepting any such warranty or additional liability.
|
|
||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
|
||||||
|
|
||||||
APPENDIX: How to apply the Apache License to your work.
|
|
||||||
|
|
||||||
To apply the Apache License to your work, attach the following
|
|
||||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
||||||
replaced with your own identifying information. (Don't include
|
|
||||||
the brackets!) The text should be enclosed in the appropriate
|
|
||||||
comment syntax for the file format. We also recommend that a
|
|
||||||
file or class name and description of purpose be included on the
|
|
||||||
same "printed page" as the copyright notice for easier
|
|
||||||
identification within third-party archives.
|
|
||||||
|
|
||||||
Copyright [yyyy] [name of copyright owner]
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
@ -1,2 +0,0 @@
|
|||||||
SRC_DIR=scripts
|
|
||||||
TIS_PATCH_VER=0
|
|
@ -1,42 +0,0 @@
|
|||||||
Summary: Monitor tools package
|
|
||||||
Name: monitor-tools
|
|
||||||
Version: 1.0
|
|
||||||
Release: %{tis_patch_ver}%{?_tis_dist}
|
|
||||||
License: Apache-2.0
|
|
||||||
Group: base
|
|
||||||
Packager: Wind River <info@windriver.com>
|
|
||||||
URL: unknown
|
|
||||||
BuildArch: noarch
|
|
||||||
Source: %name-%version.tar.gz
|
|
||||||
|
|
||||||
Requires: initscripts-config
|
|
||||||
|
|
||||||
%description
|
|
||||||
This package contains data collection tools to monitor host performance.
|
|
||||||
Tools are general purpose engineering and debugging related. Includes
|
|
||||||
overall memory, cpu occupancy, per-task cpu, per-task scheduling, per-task
|
|
||||||
io.
|
|
||||||
|
|
||||||
%prep
|
|
||||||
%autosetup
|
|
||||||
|
|
||||||
%install
|
|
||||||
rm -rf $RPM_BUILD_ROOT
|
|
||||||
%global _buildsubdir %{_builddir}/%{name}-%{version}
|
|
||||||
install -d %{buildroot}/usr/bin
|
|
||||||
install %{_buildsubdir}/memtop %{buildroot}/usr/bin
|
|
||||||
install %{_buildsubdir}/schedtop %{buildroot}/usr/bin
|
|
||||||
install %{_buildsubdir}/occtop %{buildroot}/usr/bin
|
|
||||||
|
|
||||||
%files
|
|
||||||
%license LICENSE
|
|
||||||
%defattr(-,root,root,-)
|
|
||||||
/usr/bin/*
|
|
||||||
|
|
||||||
%post
|
|
||||||
grep schedstats /etc/sysctl.conf
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo -e "\nkernel.sched_schedstats=1" >> /etc/sysctl.conf
|
|
||||||
sysctl -p &>/dev/null
|
|
||||||
fi
|
|
||||||
exit 0
|
|
@ -1,202 +0,0 @@
|
|||||||
|
|
||||||
Apache License
|
|
||||||
Version 2.0, January 2004
|
|
||||||
http://www.apache.org/licenses/
|
|
||||||
|
|
||||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
||||||
|
|
||||||
1. Definitions.
|
|
||||||
|
|
||||||
"License" shall mean the terms and conditions for use, reproduction,
|
|
||||||
and distribution as defined by Sections 1 through 9 of this document.
|
|
||||||
|
|
||||||
"Licensor" shall mean the copyright owner or entity authorized by
|
|
||||||
the copyright owner that is granting the License.
|
|
||||||
|
|
||||||
"Legal Entity" shall mean the union of the acting entity and all
|
|
||||||
other entities that control, are controlled by, or are under common
|
|
||||||
control with that entity. For the purposes of this definition,
|
|
||||||
"control" means (i) the power, direct or indirect, to cause the
|
|
||||||
direction or management of such entity, whether by contract or
|
|
||||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
||||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
||||||
|
|
||||||
"You" (or "Your") shall mean an individual or Legal Entity
|
|
||||||
exercising permissions granted by this License.
|
|
||||||
|
|
||||||
"Source" form shall mean the preferred form for making modifications,
|
|
||||||
including but not limited to software source code, documentation
|
|
||||||
source, and configuration files.
|
|
||||||
|
|
||||||
"Object" form shall mean any form resulting from mechanical
|
|
||||||
transformation or translation of a Source form, including but
|
|
||||||
not limited to compiled object code, generated documentation,
|
|
||||||
and conversions to other media types.
|
|
||||||
|
|
||||||
"Work" shall mean the work of authorship, whether in Source or
|
|
||||||
Object form, made available under the License, as indicated by a
|
|
||||||
copyright notice that is included in or attached to the work
|
|
||||||
(an example is provided in the Appendix below).
|
|
||||||
|
|
||||||
"Derivative Works" shall mean any work, whether in Source or Object
|
|
||||||
form, that is based on (or derived from) the Work and for which the
|
|
||||||
editorial revisions, annotations, elaborations, or other modifications
|
|
||||||
represent, as a whole, an original work of authorship. For the purposes
|
|
||||||
of this License, Derivative Works shall not include works that remain
|
|
||||||
separable from, or merely link (or bind by name) to the interfaces of,
|
|
||||||
the Work and Derivative Works thereof.
|
|
||||||
|
|
||||||
"Contribution" shall mean any work of authorship, including
|
|
||||||
the original version of the Work and any modifications or additions
|
|
||||||
to that Work or Derivative Works thereof, that is intentionally
|
|
||||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
||||||
or by an individual or Legal Entity authorized to submit on behalf of
|
|
||||||
the copyright owner. For the purposes of this definition, "submitted"
|
|
||||||
means any form of electronic, verbal, or written communication sent
|
|
||||||
to the Licensor or its representatives, including but not limited to
|
|
||||||
communication on electronic mailing lists, source code control systems,
|
|
||||||
and issue tracking systems that are managed by, or on behalf of, the
|
|
||||||
Licensor for the purpose of discussing and improving the Work, but
|
|
||||||
excluding communication that is conspicuously marked or otherwise
|
|
||||||
designated in writing by the copyright owner as "Not a Contribution."
|
|
||||||
|
|
||||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
||||||
on behalf of whom a Contribution has been received by Licensor and
|
|
||||||
subsequently incorporated within the Work.
|
|
||||||
|
|
||||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
copyright license to reproduce, prepare Derivative Works of,
|
|
||||||
publicly display, publicly perform, sublicense, and distribute the
|
|
||||||
Work and such Derivative Works in Source or Object form.
|
|
||||||
|
|
||||||
3. Grant of Patent License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
(except as stated in this section) patent license to make, have made,
|
|
||||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
||||||
where such license applies only to those patent claims licensable
|
|
||||||
by such Contributor that are necessarily infringed by their
|
|
||||||
Contribution(s) alone or by combination of their Contribution(s)
|
|
||||||
with the Work to which such Contribution(s) was submitted. If You
|
|
||||||
institute patent litigation against any entity (including a
|
|
||||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
||||||
or a Contribution incorporated within the Work constitutes direct
|
|
||||||
or contributory patent infringement, then any patent licenses
|
|
||||||
granted to You under this License for that Work shall terminate
|
|
||||||
as of the date such litigation is filed.
|
|
||||||
|
|
||||||
4. Redistribution. You may reproduce and distribute copies of the
|
|
||||||
Work or Derivative Works thereof in any medium, with or without
|
|
||||||
modifications, and in Source or Object form, provided that You
|
|
||||||
meet the following conditions:
|
|
||||||
|
|
||||||
(a) You must give any other recipients of the Work or
|
|
||||||
Derivative Works a copy of this License; and
|
|
||||||
|
|
||||||
(b) You must cause any modified files to carry prominent notices
|
|
||||||
stating that You changed the files; and
|
|
||||||
|
|
||||||
(c) You must retain, in the Source form of any Derivative Works
|
|
||||||
that You distribute, all copyright, patent, trademark, and
|
|
||||||
attribution notices from the Source form of the Work,
|
|
||||||
excluding those notices that do not pertain to any part of
|
|
||||||
the Derivative Works; and
|
|
||||||
|
|
||||||
(d) If the Work includes a "NOTICE" text file as part of its
|
|
||||||
distribution, then any Derivative Works that You distribute must
|
|
||||||
include a readable copy of the attribution notices contained
|
|
||||||
within such NOTICE file, excluding those notices that do not
|
|
||||||
pertain to any part of the Derivative Works, in at least one
|
|
||||||
of the following places: within a NOTICE text file distributed
|
|
||||||
as part of the Derivative Works; within the Source form or
|
|
||||||
documentation, if provided along with the Derivative Works; or,
|
|
||||||
within a display generated by the Derivative Works, if and
|
|
||||||
wherever such third-party notices normally appear. The contents
|
|
||||||
of the NOTICE file are for informational purposes only and
|
|
||||||
do not modify the License. You may add Your own attribution
|
|
||||||
notices within Derivative Works that You distribute, alongside
|
|
||||||
or as an addendum to the NOTICE text from the Work, provided
|
|
||||||
that such additional attribution notices cannot be construed
|
|
||||||
as modifying the License.
|
|
||||||
|
|
||||||
You may add Your own copyright statement to Your modifications and
|
|
||||||
may provide additional or different license terms and conditions
|
|
||||||
for use, reproduction, or distribution of Your modifications, or
|
|
||||||
for any such Derivative Works as a whole, provided Your use,
|
|
||||||
reproduction, and distribution of the Work otherwise complies with
|
|
||||||
the conditions stated in this License.
|
|
||||||
|
|
||||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
||||||
any Contribution intentionally submitted for inclusion in the Work
|
|
||||||
by You to the Licensor shall be under the terms and conditions of
|
|
||||||
this License, without any additional terms or conditions.
|
|
||||||
Notwithstanding the above, nothing herein shall supersede or modify
|
|
||||||
the terms of any separate license agreement you may have executed
|
|
||||||
with Licensor regarding such Contributions.
|
|
||||||
|
|
||||||
6. Trademarks. This License does not grant permission to use the trade
|
|
||||||
names, trademarks, service marks, or product names of the Licensor,
|
|
||||||
except as required for reasonable and customary use in describing the
|
|
||||||
origin of the Work and reproducing the content of the NOTICE file.
|
|
||||||
|
|
||||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
||||||
agreed to in writing, Licensor provides the Work (and each
|
|
||||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
implied, including, without limitation, any warranties or conditions
|
|
||||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
||||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
||||||
appropriateness of using or redistributing the Work and assume any
|
|
||||||
risks associated with Your exercise of permissions under this License.
|
|
||||||
|
|
||||||
8. Limitation of Liability. In no event and under no legal theory,
|
|
||||||
whether in tort (including negligence), contract, or otherwise,
|
|
||||||
unless required by applicable law (such as deliberate and grossly
|
|
||||||
negligent acts) or agreed to in writing, shall any Contributor be
|
|
||||||
liable to You for damages, including any direct, indirect, special,
|
|
||||||
incidental, or consequential damages of any character arising as a
|
|
||||||
result of this License or out of the use or inability to use the
|
|
||||||
Work (including but not limited to damages for loss of goodwill,
|
|
||||||
work stoppage, computer failure or malfunction, or any and all
|
|
||||||
other commercial damages or losses), even if such Contributor
|
|
||||||
has been advised of the possibility of such damages.
|
|
||||||
|
|
||||||
9. Accepting Warranty or Additional Liability. While redistributing
|
|
||||||
the Work or Derivative Works thereof, You may choose to offer,
|
|
||||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
||||||
or other liability obligations and/or rights consistent with this
|
|
||||||
License. However, in accepting such obligations, You may act only
|
|
||||||
on Your own behalf and on Your sole responsibility, not on behalf
|
|
||||||
of any other Contributor, and only if You agree to indemnify,
|
|
||||||
defend, and hold each Contributor harmless for any liability
|
|
||||||
incurred by, or claims asserted against, such Contributor by reason
|
|
||||||
of your accepting any such warranty or additional liability.
|
|
||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
|
||||||
|
|
||||||
APPENDIX: How to apply the Apache License to your work.
|
|
||||||
|
|
||||||
To apply the Apache License to your work, attach the following
|
|
||||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
||||||
replaced with your own identifying information. (Don't include
|
|
||||||
the brackets!) The text should be enclosed in the appropriate
|
|
||||||
comment syntax for the file format. We also recommend that a
|
|
||||||
file or class name and description of purpose be included on the
|
|
||||||
same "printed page" as the copyright notice for easier
|
|
||||||
identification within third-party archives.
|
|
||||||
|
|
||||||
Copyright [yyyy] [name of copyright owner]
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
@ -1,344 +0,0 @@
|
|||||||
#!/usr/bin/perl
|
|
||||||
########################################################################
|
|
||||||
#
|
|
||||||
# Copyright (c) 2015 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
#
|
|
||||||
########################################################################
|
|
||||||
#
|
|
||||||
# Description:
|
|
||||||
# This displays overall memory information per sample period.
|
|
||||||
# Output includes total, used, avail, per-numa node breakdown of avail
|
|
||||||
# and free hugepages memory.
|
|
||||||
#
|
|
||||||
# Usage: memtop OPTIONS
|
|
||||||
# memtop [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>] [--help]
|
|
||||||
#
|
|
||||||
|
|
||||||
# Summarize high-level memory usage.
|
|
||||||
use 5.10.0;
|
|
||||||
use warnings;
|
|
||||||
use strict;
|
|
||||||
use Benchmark ':hireswallclock';
|
|
||||||
use POSIX qw(strftime);
|
|
||||||
use Data::Dumper;
|
|
||||||
use File::Basename;
|
|
||||||
use File::Spec ();
|
|
||||||
use Time::HiRes qw(time usleep);
|
|
||||||
use Carp qw(croak carp);
|
|
||||||
|
|
||||||
# IEC and SI constants
|
|
||||||
use constant SI_k => 1.0E3;
|
|
||||||
use constant SI_M => 1.0E6;
|
|
||||||
use constant SI_G => 1.0E9;
|
|
||||||
use constant Ki => 1024.0;
|
|
||||||
use constant Mi => 1024.0*1024.0;
|
|
||||||
use constant Gi => 1024.0*1024.0*1024.0;
|
|
||||||
|
|
||||||
# Name of this program
|
|
||||||
our $TOOLNAME = basename($0);
|
|
||||||
our $VERSION = "0.1";
|
|
||||||
|
|
||||||
# Argument list parameters
|
|
||||||
our ($arg_debug,
|
|
||||||
$arg_delay,
|
|
||||||
$arg_repeat,
|
|
||||||
$arg_period) = ();
|
|
||||||
|
|
||||||
# Globals
|
|
||||||
our $t_0 = ();
|
|
||||||
our $t_1 = ();
|
|
||||||
our $t_elapsed = ();
|
|
||||||
our $t_final = ();
|
|
||||||
our $is_strict = ();
|
|
||||||
our $num_nodes = ();
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# MAIN Program
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# benchmark variables
|
|
||||||
my ($bd, $b0, $b1);
|
|
||||||
|
|
||||||
# Autoflush output
|
|
||||||
select(STDERR);
|
|
||||||
$| = 1;
|
|
||||||
select(STDOUT); # default
|
|
||||||
$| = 1;
|
|
||||||
|
|
||||||
# Parse input arguments and print tool usage if necessary
|
|
||||||
&parse_memtop_args(
|
|
||||||
\$::arg_debug,
|
|
||||||
\$::arg_delay,
|
|
||||||
\$::arg_repeat,
|
|
||||||
\$::arg_period,
|
|
||||||
);
|
|
||||||
|
|
||||||
# Print out some debugging information
|
|
||||||
if (defined $::arg_debug) {
|
|
||||||
$Data::Dumper::Indent = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Strict vs non-strict memory accounting
|
|
||||||
$::is_strict = &is_strict();
|
|
||||||
|
|
||||||
# Number of numa nodes
|
|
||||||
$::num_nodes = &num_numa_nodes();
|
|
||||||
|
|
||||||
# Print tool header and selected options
|
|
||||||
printf "%s %s -- ".
|
|
||||||
"selected options: delay = %.3fs, repeat = %d, period = %.3fs, %s, unit = %s\n",
|
|
||||||
$::TOOLNAME, $::VERSION,
|
|
||||||
$::arg_delay, $::arg_repeat, $::arg_period,
|
|
||||||
$::is_strict ? 'strict' : 'non-strict',
|
|
||||||
'MiB';
|
|
||||||
|
|
||||||
# Capture timestamp
|
|
||||||
$b0 = new Benchmark;
|
|
||||||
|
|
||||||
# Get current hires epoc timestamp
|
|
||||||
$::t_1 = time();
|
|
||||||
$::t_final = $::t_1 + $::arg_period;
|
|
||||||
|
|
||||||
# Set initial delay
|
|
||||||
$::t_elapsed = $::arg_delay;
|
|
||||||
|
|
||||||
# Main loop
|
|
||||||
my $delay = SI_M*$::arg_delay - 600.0;
|
|
||||||
REPEAT_LOOP: for (my $rep=1; $rep <= $::arg_repeat; $rep++) {
|
|
||||||
# Copy all state variables
|
|
||||||
$::t_0 = $::t_1;
|
|
||||||
|
|
||||||
# Sleep for desired interarrival time
|
|
||||||
usleep( $delay );
|
|
||||||
|
|
||||||
# Current hires epoc timestamp
|
|
||||||
$::t_1 = time();
|
|
||||||
|
|
||||||
# Delta calculation
|
|
||||||
$::t_elapsed = $::t_1 - $::t_0;
|
|
||||||
|
|
||||||
# Print summary
|
|
||||||
&print_memory(\$::t_1);
|
|
||||||
|
|
||||||
# Exit if we have reached period
|
|
||||||
last if ((defined $::t_final) && ($::t_1 > $::t_final));
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print that tool has finished
|
|
||||||
print "done\n";
|
|
||||||
|
|
||||||
# Capture timestamp and report delta
|
|
||||||
if (defined $::arg_debug) {
|
|
||||||
$b1 = new Benchmark; $bd = Benchmark::timediff($b1, $b0);
|
|
||||||
printf "processing time: %s\n", timestr($bd);
|
|
||||||
}
|
|
||||||
exit 0;
|
|
||||||
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Parse input option arguments
|
|
||||||
sub parse_memtop_args {
|
|
||||||
(local *::arg_debug,
|
|
||||||
local *::arg_delay,
|
|
||||||
local *::arg_repeat,
|
|
||||||
local *::arg_period,
|
|
||||||
) = @_;
|
|
||||||
|
|
||||||
# Local variables
|
|
||||||
my ($fail, $arg_help);
|
|
||||||
|
|
||||||
# Use the Argument processing module
|
|
||||||
use Getopt::Long;
|
|
||||||
|
|
||||||
# Process input arguments
|
|
||||||
$fail = 0;
|
|
||||||
GetOptions(
|
|
||||||
"debug:i", \$::arg_debug,
|
|
||||||
"delay=f", \$::arg_delay,
|
|
||||||
"repeat=i", \$::arg_repeat,
|
|
||||||
"period=i", \$::arg_period,
|
|
||||||
"help|h", \$arg_help
|
|
||||||
) || GetOptionsMessage();
|
|
||||||
|
|
||||||
# Print help documentation if user has selected --help
|
|
||||||
&ListHelp() if (defined $arg_help);
|
|
||||||
|
|
||||||
# Validate options
|
|
||||||
if ((defined $::arg_repeat) && (defined $::arg_period)) {
|
|
||||||
$fail = 1;
|
|
||||||
warn "$::TOOLNAME: Input error: cannot specify both --repeat and --period options.\n";
|
|
||||||
}
|
|
||||||
if ((defined $::arg_delay) && ($::arg_delay < 0.01)) {
|
|
||||||
$fail = 1;
|
|
||||||
warn "$::TOOLNAME: Input error: --delay %f is less than 0.01.\n",
|
|
||||||
$::arg_delay;
|
|
||||||
}
|
|
||||||
if (@::ARGV) {
|
|
||||||
$fail = 1;
|
|
||||||
warn "$::TOOLNAME: Input error: not expecting these options: '@::ARGV'.\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Set reasonable defaults
|
|
||||||
$::arg_delay ||= 1.0;
|
|
||||||
$::arg_repeat ||= 1;
|
|
||||||
if ($::arg_period) {
|
|
||||||
$::arg_repeat = $::arg_period / $::arg_delay;
|
|
||||||
} else {
|
|
||||||
$::arg_period = $::arg_delay * $::arg_repeat;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Upon missing or invalid options, print usage
|
|
||||||
if ($fail == 1) {
|
|
||||||
&Usage();
|
|
||||||
exit 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print out a warning message and usage
|
|
||||||
sub GetOptionsMessage {
|
|
||||||
warn "$::TOOLNAME: Error processing input arguments.\n";
|
|
||||||
&Usage();
|
|
||||||
exit 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print out program usage
|
|
||||||
sub Usage {
|
|
||||||
printf "Usage: $::TOOLNAME OPTIONS\n";
|
|
||||||
printf " [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]\n";
|
|
||||||
printf " [--help]\n";
|
|
||||||
printf "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print tool help
|
|
||||||
sub ListHelp {
|
|
||||||
printf "$::TOOLNAME -- displays high memory usage at high level\n";
|
|
||||||
&Usage();
|
|
||||||
printf " --delay=<seconds> : output interval (seconds): default: 1.0\n";
|
|
||||||
printf " --repeat=<num> : number of repeat samples: default: 1\n";
|
|
||||||
printf " --period=<seconds> : overall tool duration (seconds): default: --\n";
|
|
||||||
printf " --help : this help\n";
|
|
||||||
printf "\n";
|
|
||||||
exit 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print memory summary
|
|
||||||
sub print_memory {
|
|
||||||
(local *::t_1) = @_;
|
|
||||||
|
|
||||||
# counter
|
|
||||||
our $count;
|
|
||||||
$::count++; $::count %= 15;
|
|
||||||
|
|
||||||
my ($file, $n);
|
|
||||||
my %mem = ();
|
|
||||||
my %node = ();
|
|
||||||
|
|
||||||
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
|
|
||||||
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($::t_1);
|
|
||||||
my $msec = 1000.0*($::t_1 - int($::t_1));
|
|
||||||
|
|
||||||
# Process all entries of MEMINFO
|
|
||||||
$file = '/proc/meminfo';
|
|
||||||
open(FILE, $file) || die "Cannot open file: $file ($!)";
|
|
||||||
while($_ = <FILE>) {
|
|
||||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
||||||
if (/^(\S+):\s+(\d+)\b/) {
|
|
||||||
$mem{$1} = $2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close(FILE);
|
|
||||||
|
|
||||||
# Process all entries of per-Node MEMINFO
|
|
||||||
for ($n=0; $n < $::num_nodes; $n++) {
|
|
||||||
$file = sprintf('/sys/devices/system/node/node%d/meminfo', $n);
|
|
||||||
open(FILE, $file) || die "Cannot open file: $file ($!)";
|
|
||||||
while($_ = <FILE>) {
|
|
||||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
||||||
if (/^Node\s+(\d+)\s+(\S+):\s+(\d+)\b/) {
|
|
||||||
$node{$1}{$2} = $3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close(FILE);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Calculate available memory
|
|
||||||
if ($::is_strict) {
|
|
||||||
$mem{'Avail'} = $mem{'CommitLimit'} - $mem{'Committed_AS'};
|
|
||||||
} else {
|
|
||||||
$mem{'Avail'} = $mem{'MemFree'} +
|
|
||||||
$mem{'Cached'} +
|
|
||||||
$mem{'Buffers'} +
|
|
||||||
$mem{'SReclaimable'};
|
|
||||||
}
|
|
||||||
$mem{'Used'} = $mem{'MemTotal'} - $mem{'Avail'};
|
|
||||||
$mem{'Anon'} = $mem{'AnonPages'};
|
|
||||||
for ($n=0; $n < $::num_nodes; $n++) {
|
|
||||||
$node{$n}{'Avail'} = $node{$n}{'MemFree'} +
|
|
||||||
$node{$n}{'FilePages'} +
|
|
||||||
$node{$n}{'SReclaimable'};
|
|
||||||
$node{$n}{'HFree'} = $node{$n}{'HugePages_Free'} * $mem{'Hugepagesize'};
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print heading every so often
|
|
||||||
if ($::count == 1) {
|
|
||||||
printf "%s ".
|
|
||||||
"%8s %8s %8s %7s %6s %6s %8s %8s %7s %7s %8s %8s",
|
|
||||||
'yyyy-mm-dd hh:mm:ss.fff',
|
|
||||||
'Tot', 'Used', 'Free', 'Ca', 'Buf', 'Slab', 'CAS', 'CLim', 'Dirty', 'WBack', 'Anon', 'Avail';
|
|
||||||
for ($n=0; $n < $::num_nodes; $n++) {
|
|
||||||
printf " %8s %8s", sprintf('%d:Avail', $n), sprintf('%d:HFree', $n);
|
|
||||||
}
|
|
||||||
printf "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print one line memory summary
|
|
||||||
printf "%4d-%02d-%02d %02d:%02d:%02d.%03d ".
|
|
||||||
"%8.1f %8.1f %8.1f %7.1f %6.1f %6.1f %8.1f %8.1f %7.1f %7.1f %8.1f %8.1f",
|
|
||||||
1900+$year, 1+$mon, $mday, $hour, $min, $sec, $msec,
|
|
||||||
$mem{'MemTotal'}/Ki,
|
|
||||||
$mem{'Used'}/Ki,
|
|
||||||
$mem{'MemFree'}/Ki,
|
|
||||||
$mem{'Cached'}/Ki,
|
|
||||||
$mem{'Buffers'}/Ki,
|
|
||||||
$mem{'Slab'}/Ki,
|
|
||||||
$mem{'Committed_AS'}/Ki,
|
|
||||||
$mem{'CommitLimit'}/Ki,
|
|
||||||
$mem{'Dirty'}/Ki,
|
|
||||||
$mem{'Writeback'}/Ki,
|
|
||||||
$mem{'Anon'}/Ki,
|
|
||||||
$mem{'Avail'}/Ki;
|
|
||||||
for ($n=0; $n < $::num_nodes; $n++) {
|
|
||||||
printf " %8.1f %8.1f", $node{$n}{'Avail'}/Ki, $node{$n}{'HFree'}/Ki;
|
|
||||||
}
|
|
||||||
printf "\n";
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
sub num_numa_nodes {
|
|
||||||
my $file = '/proc/cpuinfo';
|
|
||||||
my %nodes = ();
|
|
||||||
open(FILE, $file) || die "Cannot open file: $file ($!)";
|
|
||||||
while($_ = <FILE>) {
|
|
||||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
||||||
if (/^physical\s+id\s+:\s+(\d+)\b/) {
|
|
||||||
$nodes{$1} = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close(FILE);
|
|
||||||
return scalar keys %nodes;
|
|
||||||
}
|
|
||||||
|
|
||||||
sub is_strict {
|
|
||||||
my $value = 0;
|
|
||||||
my $file = '/proc/sys/vm/overcommit_memory';
|
|
||||||
open(FILE, $file) || die "Cannot open file: $file ($!)";
|
|
||||||
$_ = <FILE>;
|
|
||||||
$value = /(\d+)/;
|
|
||||||
close(FILE);
|
|
||||||
return ($value == 2) ? 1 : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
1;
|
|
@ -1,592 +0,0 @@
|
|||||||
#!/usr/bin/perl
|
|
||||||
########################################################################
|
|
||||||
#
|
|
||||||
# Copyright (c) 2015-2016 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
#
|
|
||||||
########################################################################
|
|
||||||
#
|
|
||||||
# Description:
|
|
||||||
# This displays per-core occupancy information per sample period.
|
|
||||||
# Output includes total occupancy, and per-core occupancy based on
|
|
||||||
# hi-resolution timings.
|
|
||||||
#
|
|
||||||
# Usage: occtop OPTIONS
|
|
||||||
# [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]
|
|
||||||
# [--header=<num>]
|
|
||||||
# [--help]
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
use warnings;
|
|
||||||
use Data::Dumper;
|
|
||||||
use POSIX qw(uname strftime);
|
|
||||||
use Time::HiRes qw(clock_gettime usleep CLOCK_MONOTONIC CLOCK_REALTIME);
|
|
||||||
|
|
||||||
use Benchmark ':hireswallclock';
|
|
||||||
use Carp qw(croak carp);
|
|
||||||
|
|
||||||
# Define toolname
|
|
||||||
our $TOOLNAME = "occtop";
|
|
||||||
our $VERSION = "0.1";
|
|
||||||
|
|
||||||
# Constants
|
|
||||||
use constant SI_k => 1.0E3;
|
|
||||||
use constant SI_M => 1.0E6;
|
|
||||||
use constant SI_G => 1.0E9;
|
|
||||||
use constant Ki => 1024.0;
|
|
||||||
use constant Mi => 1024.0*1024.0;
|
|
||||||
use constant Gi => 1024.0*1024.0*1024.0;
|
|
||||||
|
|
||||||
# Globals
|
|
||||||
our %percpu_0 = ();
|
|
||||||
our %percpu_1 = ();
|
|
||||||
our %D_percpu = ();
|
|
||||||
our %loadavg = ();
|
|
||||||
our $D_total = 0.0;
|
|
||||||
our $tm_0 = 0.0;
|
|
||||||
our $tm_1 = 0.0;
|
|
||||||
our $tr_0 = 0.0;
|
|
||||||
our $tr_1 = 0.0;
|
|
||||||
our $tm_elapsed = 0.0;
|
|
||||||
our $tm_final = 0.0;
|
|
||||||
our $uptime = 0.0;
|
|
||||||
our $num_cpus = 1;
|
|
||||||
our $num_tasks = 0;
|
|
||||||
our $num_blk = 0;
|
|
||||||
our $print_host = 1;
|
|
||||||
our $is_schedstat = 1;
|
|
||||||
our $USER_HZ = 100; # no easy way to get this
|
|
||||||
our $CLOCK_NS = SI_G / $USER_HZ;
|
|
||||||
|
|
||||||
# Argument list parameters
|
|
||||||
our ($arg_debug,
|
|
||||||
$arg_delay,
|
|
||||||
$arg_repeat,
|
|
||||||
$arg_period,
|
|
||||||
$arg_header,
|
|
||||||
) = ();
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
# MAIN Program
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
my $MIN_DELAY = 0.001;
|
|
||||||
my $MAX_DELAY = 0.001;
|
|
||||||
|
|
||||||
# benchmark variables
|
|
||||||
my ($bd, $b0, $b1);
|
|
||||||
|
|
||||||
# Autoflush output
|
|
||||||
select(STDERR);
|
|
||||||
$| = 1;
|
|
||||||
select(STDOUT); # default
|
|
||||||
$| = 1;
|
|
||||||
|
|
||||||
# Parse input arguments and print tool usage if necessary
|
|
||||||
&parse_occtop_args(
|
|
||||||
\$::arg_debug,
|
|
||||||
\$::arg_delay,
|
|
||||||
\$::arg_repeat,
|
|
||||||
\$::arg_period,
|
|
||||||
\$::arg_header,
|
|
||||||
);
|
|
||||||
|
|
||||||
# Print out some debugging information
|
|
||||||
if (defined $::arg_debug) {
|
|
||||||
$Data::Dumper::Indent = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Check for schedstat support; fallback to stats
|
|
||||||
$is_schedstat = -e '/proc/schedstat' ? 1 : 0;
|
|
||||||
|
|
||||||
# Print out selected options
|
|
||||||
printf "selected options: delay = %.3fs, repeat = %d, header = %d, source = %s\n",
|
|
||||||
$::arg_delay, $::arg_repeat, $::arg_header, $is_schedstat ? 'schedstat' : 'jiffie';
|
|
||||||
|
|
||||||
# Capture timestamp
|
|
||||||
$b0 = new Benchmark;
|
|
||||||
|
|
||||||
# Get number of logical cpus
|
|
||||||
&get_num_logical_cpus(\$::num_cpus);
|
|
||||||
|
|
||||||
|
|
||||||
# Get current hires epoc timestamp
|
|
||||||
$::tm_1 = clock_gettime(CLOCK_MONOTONIC);
|
|
||||||
$::tr_1 = clock_gettime(CLOCK_REALTIME);
|
|
||||||
$::tm_final = $::tm_1 + $::arg_delay*$::arg_repeat;
|
|
||||||
|
|
||||||
# Set initial delay
|
|
||||||
$::tm_elapsed = $::arg_delay;
|
|
||||||
$MAX_DELAY = $::arg_delay + $MIN_DELAY;
|
|
||||||
|
|
||||||
# Get overall per-cpu stats
|
|
||||||
if ($is_schedstat) {
|
|
||||||
&read_schedstat(\%::percpu_1);
|
|
||||||
} else {
|
|
||||||
&read_stat(\%::percpu_1);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Main loop
|
|
||||||
REPEAT_LOOP: for (my $repeat=1; $repeat <= $::arg_repeat; $repeat++) {
|
|
||||||
|
|
||||||
# copy all state variables
|
|
||||||
%::tm_0 = (); %::tr_0 = (); %::percpu_0 = ();
|
|
||||||
$::tm_0 = $::tm_1; $::tr_0 = $::tr_1;
|
|
||||||
foreach my $cpu (keys %::percpu_1) { $::percpu_0{$cpu} = $::percpu_1{$cpu}; }
|
|
||||||
|
|
||||||
# estimate sleep delay to achieve desired interarrival by subtracting out
|
|
||||||
# the measured cpu runtime of the tool.
|
|
||||||
my $delay = $::arg_delay;
|
|
||||||
$delay = $MIN_DELAY if ($delay < $MIN_DELAY);
|
|
||||||
$delay = $MAX_DELAY if ($delay > $MAX_DELAY);
|
|
||||||
usleep( SI_M*$delay );
|
|
||||||
|
|
||||||
# Collect current state
|
|
||||||
$::tm_1 = (); $::tr_1 = (); %::percpu_1 = ();
|
|
||||||
# Get current hires epoc timestamp
|
|
||||||
$::tm_1 = clock_gettime(CLOCK_MONOTONIC);
|
|
||||||
$::tr_1 = clock_gettime(CLOCK_REALTIME);
|
|
||||||
# Get overall per-cpu stats
|
|
||||||
if ($is_schedstat) {
|
|
||||||
&read_schedstat(\%::percpu_1);
|
|
||||||
} else {
|
|
||||||
&read_stat(\%::percpu_1);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get current uptime
|
|
||||||
&get_uptime(\$::uptime);
|
|
||||||
# Get current loadavg
|
|
||||||
&get_loadavg(\%::loadavg, \$::runq, \$::num_tasks);
|
|
||||||
# Get current processes blocked
|
|
||||||
&get_blocked(\$::num_blk);
|
|
||||||
|
|
||||||
# Delta calculation
|
|
||||||
%::D_percpu = ();
|
|
||||||
$::tm_elapsed = $tm_1 - $tm_0;
|
|
||||||
foreach my $cpu (keys %::percpu_1) {
|
|
||||||
$::D_percpu{$cpu}{'runtime'} = ($::percpu_1{$cpu} - $::percpu_0{$cpu})/1.0E6;
|
|
||||||
if ($::tm_elapsed > 0.0) {
|
|
||||||
$::D_percpu{$cpu}{'occ'} = 100.0*$D_percpu{$cpu}{'runtime'}/1.0E3/$::tm_elapsed;
|
|
||||||
} else {
|
|
||||||
$::D_percpu{$cpu}{'occ'} = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print tool header
|
|
||||||
if ($repeat == 1) {
|
|
||||||
&occtop_header(
|
|
||||||
\$::tr_1,
|
|
||||||
\$::uptime,
|
|
||||||
\%::loadavg,
|
|
||||||
\$::runq,
|
|
||||||
\$::num_blk,
|
|
||||||
\$::num_tasks,
|
|
||||||
\$::print_host,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print one-liner summary
|
|
||||||
&print_occtop(
|
|
||||||
\$::tr_1,
|
|
||||||
\$::num_cpus,
|
|
||||||
\%::D_percpu,
|
|
||||||
\$::arg_header,
|
|
||||||
);
|
|
||||||
|
|
||||||
# exit repeat loop if we have exceeded overall time
|
|
||||||
last if ($::tm_1 > $::tm_final);
|
|
||||||
|
|
||||||
} # REPEAT LOOP
|
|
||||||
|
|
||||||
# Print that tool has finished
|
|
||||||
print "done\n";
|
|
||||||
|
|
||||||
# Capture timestamp and report delta
|
|
||||||
$b1 = new Benchmark; $bd = Benchmark::timediff($b1, $b0);
|
|
||||||
printf "processing time: %s\n", timestr($bd);
|
|
||||||
exit 0;
|
|
||||||
|
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# Parse per-cpu hi-resolution scheduling stats
|
|
||||||
sub read_schedstat
|
|
||||||
{
|
|
||||||
(local *::percpu) = @_;
|
|
||||||
my ($version, $timestamp);
|
|
||||||
my ($cpu, $cputime);
|
|
||||||
my ($fh, $file);
|
|
||||||
|
|
||||||
%::percpu = ();
|
|
||||||
|
|
||||||
# parse /proc/schedstat
|
|
||||||
$file = '/proc/schedstat';
|
|
||||||
open($fh, $file) || croak "Cannot open file: $file ($!)";
|
|
||||||
$_ = <$fh>; ($version) = /^version\s+(\d+)/;
|
|
||||||
$_ = <$fh>; ($timestamp) = /^timestamp\s+(\d+)/;
|
|
||||||
|
|
||||||
if ($version == 15) {
|
|
||||||
LOOP_SCHEDSTAT: while (<$fh>) {
|
|
||||||
# version 15: cputime is 7th field
|
|
||||||
if (/^cpu(\d+)\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)\s+/) {
|
|
||||||
$cpu = $1; $cputime = $2;
|
|
||||||
$::percpu{$cpu} = $cputime;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
croak "schedstat version: $version method not implemented.";
|
|
||||||
}
|
|
||||||
close($fh);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse per-cpu jiffie stats; cputime excludes iowait.
|
|
||||||
sub read_stat
|
|
||||||
{
|
|
||||||
(local *::percpu) = @_;
|
|
||||||
my ($cpu, $cputime);
|
|
||||||
my ($user, $sys, $nice, $idle, $iowt, $hirq, $sirq);
|
|
||||||
my ($fh, $file);
|
|
||||||
|
|
||||||
%::percpu = ();
|
|
||||||
|
|
||||||
# parse /proc/stat
|
|
||||||
$file = '/proc/stat';
|
|
||||||
open($fh, $file) || croak "Cannot open file: $file ($!)";
|
|
||||||
LOOP_STAT: while (<$fh>) {
|
|
||||||
if (/^cpu(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+/) {
|
|
||||||
$cpu =$1; $user = $2; $sys = $3; $nice = $4; $idle = $5; $iowt = $6; $hirq = $7; $sirq = $8;
|
|
||||||
$cputime = $CLOCK_NS * ($user + $sys + $nice + $iowt + $hirq + $sirq);
|
|
||||||
$::percpu{$cpu} = $cputime;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close($fh);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse load-average from /proc/loadavg
|
|
||||||
sub get_loadavg
|
|
||||||
{
|
|
||||||
(local *::loadavg, local *::runq, *::num_tasks) = @_;
|
|
||||||
|
|
||||||
$::loadavg{'1'} = 0.0;
|
|
||||||
$::loadavg{'5'} = 0.0;
|
|
||||||
$::loadavg{'15'} = 0.0;
|
|
||||||
$::runq = 0;
|
|
||||||
$::num_tasks = 0;
|
|
||||||
|
|
||||||
my $file = '/proc/loadavg';
|
|
||||||
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
||||||
$_ = <$fh>;
|
|
||||||
if (/^(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\/(\d+)\s+\d+/) {
|
|
||||||
$::loadavg{'1'} = $1;
|
|
||||||
$::loadavg{'5'} = $2;
|
|
||||||
$::loadavg{'15'} = $3;
|
|
||||||
$::runq = $4;
|
|
||||||
$::num_tasks = $5;
|
|
||||||
}
|
|
||||||
close($fh);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse blocked from /proc/stat
|
|
||||||
sub get_blocked
|
|
||||||
{
|
|
||||||
(local *::num_blk) = @_;
|
|
||||||
|
|
||||||
$::num_blk = 0;
|
|
||||||
|
|
||||||
my $file = '/proc/stat';
|
|
||||||
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
||||||
while ($_ = <$fh>) {
|
|
||||||
if (/^procs_blocked\s+(\d+)/) {
|
|
||||||
$::num_blk = $1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close($fh);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse uptime from /proc/uptime
|
|
||||||
sub get_uptime
|
|
||||||
{
|
|
||||||
(local *::uptime) = @_;
|
|
||||||
$::uptime = 0.0;
|
|
||||||
|
|
||||||
my $file = '/proc/uptime';
|
|
||||||
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
||||||
$_ = <$fh>;
|
|
||||||
if (/^(\S+)\s+\S+/) {
|
|
||||||
$::uptime = $1;
|
|
||||||
}
|
|
||||||
close($fh);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get number of online logical cpus
|
|
||||||
sub get_num_logical_cpus {
|
|
||||||
(local *::num_cpus) = @_;
|
|
||||||
$::num_cpus = 0;
|
|
||||||
|
|
||||||
my $file = "/proc/cpuinfo";
|
|
||||||
open(my $fh, $file) || croak "Cannot open file: $file ($!)";
|
|
||||||
LOOP_CPUINFO: while (<$fh>) {
|
|
||||||
if (/^[Pp]rocessor\s+:\s\d+/) {
|
|
||||||
$::num_cpus++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close($fh);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print occupancy summary
|
|
||||||
sub print_occtop {
|
|
||||||
(local *::tr_1,
|
|
||||||
local *::num_cpus,
|
|
||||||
local *::D_percpu,
|
|
||||||
local *::arg_header,
|
|
||||||
) = @_;
|
|
||||||
|
|
||||||
# counter
|
|
||||||
our $count;
|
|
||||||
$::count++; $::count %= $::arg_header;
|
|
||||||
$::count = 1 if ($::arg_header == 1);
|
|
||||||
|
|
||||||
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
|
|
||||||
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($::tr_1);
|
|
||||||
my $msec = 1000.0*($::tr_1 - int($::tr_1));
|
|
||||||
|
|
||||||
# Print heading every so often
|
|
||||||
if ($::count == 1) {
|
|
||||||
printf "%s ".
|
|
||||||
"%7s ",
|
|
||||||
'yyyy-mm-dd hh:mm:ss.fff',
|
|
||||||
'total';
|
|
||||||
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
|
||||||
printf "%5s ", $cpu;
|
|
||||||
}
|
|
||||||
print "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print one summary
|
|
||||||
my $occ_total = 0.0;
|
|
||||||
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
|
||||||
$occ_total += $::D_percpu{$cpu}{'occ'};
|
|
||||||
}
|
|
||||||
printf "%4d-%02d-%02d %02d:%02d:%02d.%03d ".
|
|
||||||
"%7.1f ",
|
|
||||||
1900+$year, 1+$mon, $mday, $hour, $min, $sec, $msec,
|
|
||||||
$occ_total;
|
|
||||||
for (my $cpu=0; $cpu < $::num_cpus; $cpu++) {
|
|
||||||
printf "%5.1f ", $::D_percpu{$cpu}{'occ'};
|
|
||||||
}
|
|
||||||
print "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print header
|
|
||||||
sub occtop_header {
|
|
||||||
(local *::tr_1,
|
|
||||||
local *::uptime,
|
|
||||||
local *::loadavg,
|
|
||||||
local *::runq,
|
|
||||||
local *::num_blk,
|
|
||||||
local *::num_tasks,
|
|
||||||
local *::print_host,
|
|
||||||
) = @_;
|
|
||||||
|
|
||||||
# process epoch to get current timestamp
|
|
||||||
my $mm_in_s = 60;
|
|
||||||
my $hh_in_s = 60*60;
|
|
||||||
my $dd_in_s = 24*60*60;
|
|
||||||
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
|
|
||||||
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($::tr_1);
|
|
||||||
my $msec = 1000.0*($::tr_1 - int($::tr_1));
|
|
||||||
|
|
||||||
# convert uptime to elapsed <d>:<hh>:<mm>:<ss>
|
|
||||||
my ($up, $up_dd, $up_hh, $up_mm, $up_ss);
|
|
||||||
$up = int($::uptime);
|
|
||||||
$up_dd = int($up/$dd_in_s);
|
|
||||||
$up -= $dd_in_s*$up_dd;
|
|
||||||
$up_hh = int($up/$hh_in_s);
|
|
||||||
$up -= $hh_in_s*$up_hh;
|
|
||||||
$up_mm = int($up/$mm_in_s);
|
|
||||||
$up -= $mm_in_s*$up_mm;
|
|
||||||
$up_ss = $up;
|
|
||||||
|
|
||||||
#occtop -- 2014/03/03 02:00:21.357 ldavg:0.07, 0.09, 0.08 runq:1 nproc:440 up:6:13:00:56
|
|
||||||
printf "%s %s -- ".
|
|
||||||
"%4d-%02d-%02d %02d:%02d:%02d.%03d ".
|
|
||||||
"ldavg:%.2f, %.2f, %.2f runq:%d blk:%d nproc:%d ".
|
|
||||||
"up:%d:%02d:%02d:%02d\n",
|
|
||||||
$::TOOLNAME, $::VERSION,
|
|
||||||
1900+$year, 1+$mon, $mday, $hour, $min, $sec, $msec,
|
|
||||||
$::loadavg{'1'}, $::loadavg{'5'}, $::loadavg{'15'},
|
|
||||||
$::runq, $::num_blk, $::num_tasks,
|
|
||||||
$up_dd, $up_hh, $up_mm, $up_ss;
|
|
||||||
|
|
||||||
return if (!($::print_host));
|
|
||||||
|
|
||||||
# After first print, disable print host information
|
|
||||||
$::print_host = 0;
|
|
||||||
|
|
||||||
# Get host specific information
|
|
||||||
my ($OSTYPE, $NODENAME, $OSRELEASE, $version, $MACHINE);
|
|
||||||
($OSTYPE, $NODENAME, $OSRELEASE, $version, $MACHINE) = POSIX::uname();
|
|
||||||
my ($NODETYPE, $SUBFUNCTION, $BUILDINFO) = ('-', '-', '-');
|
|
||||||
my ($SW_VERSION, $BUILD_ID) = ('-', '-');
|
|
||||||
|
|
||||||
# Get platform nodetype and subfunction
|
|
||||||
PLATFORM: {
|
|
||||||
my $file = "/etc/platform/platform.conf";
|
|
||||||
open(FILE, $file) || next;
|
|
||||||
while($_ = <FILE>) {
|
|
||||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
||||||
if (/^nodetype=(\S+)/) {
|
|
||||||
$NODETYPE = $1;
|
|
||||||
}
|
|
||||||
if (/^subfunction=(\S+)/) {
|
|
||||||
$SUBFUNCTION = $1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close(FILE);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get loadbuild info
|
|
||||||
BUILD: {
|
|
||||||
my $file = "/etc/build.info";
|
|
||||||
open(FILE, $file) || next;
|
|
||||||
while($_ = <FILE>) {
|
|
||||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
||||||
if (/^SW_VERSION=\"([^"]+)\"/) {
|
|
||||||
$SW_VERSION = $1;
|
|
||||||
}
|
|
||||||
if (/^BUILD_ID=\"([^"]+)\"/) {
|
|
||||||
$BUILD_ID = $1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close(FILE);
|
|
||||||
}
|
|
||||||
$BUILDINFO = join(' ', $SW_VERSION, $BUILD_ID);
|
|
||||||
|
|
||||||
# Parse /proc/cpuinfo to get specific processor info
|
|
||||||
my ($n_cpu, $model_name, $cpu_MHz) = (0, '-', 0);
|
|
||||||
CPUINFO: {
|
|
||||||
my $file = "/proc/cpuinfo";
|
|
||||||
open(FILE, $file) || croak "Cannot open file: $file ($!)";
|
|
||||||
while($_ = <FILE>) {
|
|
||||||
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
|
|
||||||
if (/^[Pp]rocessor\s+:\s+\d+/) {
|
|
||||||
$n_cpu++;
|
|
||||||
} elsif (/^model name\s+:\s+(.*)$/) {
|
|
||||||
$_ = $1; s/\s+/ /g;
|
|
||||||
$model_name = $_;
|
|
||||||
} elsif (/^cpu MHz\s+:\s+(\S+)/) {
|
|
||||||
$cpu_MHz = $1;
|
|
||||||
} elsif (/^bogomips\s+:\s+(\S+)/) {
|
|
||||||
$cpu_MHz = $1 if ($cpu_MHz == 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close(FILE);
|
|
||||||
}
|
|
||||||
|
|
||||||
printf " host:%s nodetype:%s subfunction:%s\n",
|
|
||||||
$NODENAME, $NODETYPE, $SUBFUNCTION;
|
|
||||||
printf " arch:%s processor:%s speed:%.0f #CPUs:%d\n",
|
|
||||||
$MACHINE, $model_name, $cpu_MHz, $n_cpu;
|
|
||||||
printf " %s %s build:%s\n", $OSTYPE, $OSRELEASE, $BUILDINFO;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
# Parse and validate command line arguments
|
|
||||||
sub parse_occtop_args {
|
|
||||||
(local *::arg_debug,
|
|
||||||
local *::arg_delay,
|
|
||||||
local *::arg_repeat,
|
|
||||||
local *::arg_period,
|
|
||||||
local *::arg_header,
|
|
||||||
) = @_;
|
|
||||||
|
|
||||||
# Local variables
|
|
||||||
my ($fail, $arg_help);
|
|
||||||
|
|
||||||
# Use the Argument processing module
|
|
||||||
use Getopt::Long;
|
|
||||||
|
|
||||||
# Print usage if no arguments
|
|
||||||
if (!@::ARGV) {
|
|
||||||
&Usage();
|
|
||||||
exit 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process input arguments
|
|
||||||
$fail = 0;
|
|
||||||
GetOptions(
|
|
||||||
"debug:i", \$::arg_debug,
|
|
||||||
"delay=f", \$::arg_delay,
|
|
||||||
"period=i", \$::arg_period,
|
|
||||||
"repeat=i", \$::arg_repeat,
|
|
||||||
"header:i", \$::arg_header,
|
|
||||||
"help|h", \$arg_help
|
|
||||||
) || GetOptionsMessage();
|
|
||||||
|
|
||||||
# Print help documentation if user has selected --help
|
|
||||||
&ListHelp() if (defined $arg_help);
|
|
||||||
|
|
||||||
# Validate options
|
|
||||||
if ((defined $::arg_repeat) && (defined $::arg_period)) {
|
|
||||||
$fail = 1;
|
|
||||||
warn "$::TOOLNAME: Input error: cannot specify both --repeat and --period options.\n";
|
|
||||||
}
|
|
||||||
if ((defined $::arg_delay) && ($::arg_delay < 0.01)) {
|
|
||||||
$fail = 1;
|
|
||||||
warn "$::TOOLNAME: Input error: --delay %f is less than 0.01.\n",
|
|
||||||
$::arg_delay;
|
|
||||||
}
|
|
||||||
if (@::ARGV) {
|
|
||||||
$fail = 1;
|
|
||||||
warn "$::TOOLNAME: Input error: not expecting these options: '@::ARGV'.\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Set reasonable defaults
|
|
||||||
$::arg_header ||= 15;
|
|
||||||
$::arg_delay ||= 1.0;
|
|
||||||
$::arg_repeat ||= 1;
|
|
||||||
if ($::arg_period) {
|
|
||||||
$::arg_repeat = $::arg_period / $::arg_delay;
|
|
||||||
} else {
|
|
||||||
$::arg_period = $::arg_delay * $::arg_repeat;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Upon missing or invalid options, print usage
|
|
||||||
if ($fail == 1) {
|
|
||||||
&Usage();
|
|
||||||
exit 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print out a warning message and usage
|
|
||||||
sub GetOptionsMessage {
|
|
||||||
warn "$::TOOLNAME: Error processing input arguments.\n";
|
|
||||||
&Usage();
|
|
||||||
exit 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print out program usage
|
|
||||||
sub Usage {
|
|
||||||
printf "Usage: $::TOOLNAME OPTIONS\n";
|
|
||||||
printf " [--delay=<seconds>] [--repeat=<num>] [--period=<seconds>]\n";
|
|
||||||
printf " [--header=<num>]\n";
|
|
||||||
printf " [--help]\n";
|
|
||||||
|
|
||||||
printf "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Print tool help
|
|
||||||
sub ListHelp {
|
|
||||||
printf "$::TOOLNAME -- display hi-resolution per-cpu occupancy\n";
|
|
||||||
&Usage();
|
|
||||||
printf "Options: miscellaneous\n";
|
|
||||||
printf " --delay=<seconds> : output interval (seconds): default: 1.0\n";
|
|
||||||
printf " --repeat=<num> : number of repeat samples: default: 1\n";
|
|
||||||
printf " --period=<seconds> : overall tool duration (seconds): default: --\n";
|
|
||||||
printf " --header=<num> : print header every num samples: default: 15\n";
|
|
||||||
printf " --help : this help\n";
|
|
||||||
exit 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
1;
|
|
File diff suppressed because it is too large
Load Diff
@ -1,4 +0,0 @@
|
|||||||
PACKAGE_NAME=vm-topology
|
|
||||||
VERSION=1.0
|
|
||||||
SRC_DIR=$PKG_BASE/$PACKAGE_NAME
|
|
||||||
TIS_PATCH_VER=1
|
|
@ -1,61 +0,0 @@
|
|||||||
%global pypi_name vm-topology
|
|
||||||
|
|
||||||
Summary: vm_topology
|
|
||||||
Name: vm-topology
|
|
||||||
Version: 1.0
|
|
||||||
Release: %{tis_patch_ver}%{?_tis_dist}
|
|
||||||
License: Apache-2.0
|
|
||||||
Group: base
|
|
||||||
Packager: Wind River <info@windriver.com>
|
|
||||||
|
|
||||||
URL: unknown
|
|
||||||
Source0: %{pypi_name}-%{version}.tar.gz
|
|
||||||
|
|
||||||
BuildArch: noarch
|
|
||||||
|
|
||||||
BuildRequires: python
|
|
||||||
BuildRequires: python-setuptools
|
|
||||||
BuildRequires: python2-pip
|
|
||||||
BuildRequires: python2-wheel
|
|
||||||
BuildRequires: python-keyring
|
|
||||||
BuildRequires: libvirt
|
|
||||||
|
|
||||||
Requires: python
|
|
||||||
Requires: python-keyring
|
|
||||||
Requires: /usr/bin/env
|
|
||||||
Requires: libvirt
|
|
||||||
|
|
||||||
%description
|
|
||||||
Show compute resources and VM topology
|
|
||||||
|
|
||||||
%prep
|
|
||||||
%autosetup -p 1 -n %{pypi_name}-%{version}
|
|
||||||
# Remove bundled egg-info
|
|
||||||
rm -rf %{pypi_name}.egg-info
|
|
||||||
# Let RPM handle the dependencies
|
|
||||||
rm -f requirements.txt
|
|
||||||
|
|
||||||
%build
|
|
||||||
%{__python2} setup.py build
|
|
||||||
%py2_build_wheel
|
|
||||||
|
|
||||||
%install
|
|
||||||
%{__python2} setup.py install --skip-build --root %{buildroot}
|
|
||||||
mkdir -p $RPM_BUILD_ROOT/wheels
|
|
||||||
install -m 644 dist/*.whl $RPM_BUILD_ROOT/wheels/
|
|
||||||
|
|
||||||
%files
|
|
||||||
%defattr(-,root,root,-)
|
|
||||||
%license LICENSE
|
|
||||||
%{_bindir}/vm-topology
|
|
||||||
%{python2_sitelib}/vm_topology
|
|
||||||
%{python2_sitelib}/*.egg-info
|
|
||||||
|
|
||||||
%package wheels
|
|
||||||
Summary: %{name} wheels
|
|
||||||
|
|
||||||
%description wheels
|
|
||||||
Contains python wheels for %{name}
|
|
||||||
|
|
||||||
%files wheels
|
|
||||||
/wheels/*
|
|
@ -1,202 +0,0 @@
|
|||||||
|
|
||||||
Apache License
|
|
||||||
Version 2.0, January 2004
|
|
||||||
http://www.apache.org/licenses/
|
|
||||||
|
|
||||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
||||||
|
|
||||||
1. Definitions.
|
|
||||||
|
|
||||||
"License" shall mean the terms and conditions for use, reproduction,
|
|
||||||
and distribution as defined by Sections 1 through 9 of this document.
|
|
||||||
|
|
||||||
"Licensor" shall mean the copyright owner or entity authorized by
|
|
||||||
the copyright owner that is granting the License.
|
|
||||||
|
|
||||||
"Legal Entity" shall mean the union of the acting entity and all
|
|
||||||
other entities that control, are controlled by, or are under common
|
|
||||||
control with that entity. For the purposes of this definition,
|
|
||||||
"control" means (i) the power, direct or indirect, to cause the
|
|
||||||
direction or management of such entity, whether by contract or
|
|
||||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
||||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
||||||
|
|
||||||
"You" (or "Your") shall mean an individual or Legal Entity
|
|
||||||
exercising permissions granted by this License.
|
|
||||||
|
|
||||||
"Source" form shall mean the preferred form for making modifications,
|
|
||||||
including but not limited to software source code, documentation
|
|
||||||
source, and configuration files.
|
|
||||||
|
|
||||||
"Object" form shall mean any form resulting from mechanical
|
|
||||||
transformation or translation of a Source form, including but
|
|
||||||
not limited to compiled object code, generated documentation,
|
|
||||||
and conversions to other media types.
|
|
||||||
|
|
||||||
"Work" shall mean the work of authorship, whether in Source or
|
|
||||||
Object form, made available under the License, as indicated by a
|
|
||||||
copyright notice that is included in or attached to the work
|
|
||||||
(an example is provided in the Appendix below).
|
|
||||||
|
|
||||||
"Derivative Works" shall mean any work, whether in Source or Object
|
|
||||||
form, that is based on (or derived from) the Work and for which the
|
|
||||||
editorial revisions, annotations, elaborations, or other modifications
|
|
||||||
represent, as a whole, an original work of authorship. For the purposes
|
|
||||||
of this License, Derivative Works shall not include works that remain
|
|
||||||
separable from, or merely link (or bind by name) to the interfaces of,
|
|
||||||
the Work and Derivative Works thereof.
|
|
||||||
|
|
||||||
"Contribution" shall mean any work of authorship, including
|
|
||||||
the original version of the Work and any modifications or additions
|
|
||||||
to that Work or Derivative Works thereof, that is intentionally
|
|
||||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
||||||
or by an individual or Legal Entity authorized to submit on behalf of
|
|
||||||
the copyright owner. For the purposes of this definition, "submitted"
|
|
||||||
means any form of electronic, verbal, or written communication sent
|
|
||||||
to the Licensor or its representatives, including but not limited to
|
|
||||||
communication on electronic mailing lists, source code control systems,
|
|
||||||
and issue tracking systems that are managed by, or on behalf of, the
|
|
||||||
Licensor for the purpose of discussing and improving the Work, but
|
|
||||||
excluding communication that is conspicuously marked or otherwise
|
|
||||||
designated in writing by the copyright owner as "Not a Contribution."
|
|
||||||
|
|
||||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
||||||
on behalf of whom a Contribution has been received by Licensor and
|
|
||||||
subsequently incorporated within the Work.
|
|
||||||
|
|
||||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
copyright license to reproduce, prepare Derivative Works of,
|
|
||||||
publicly display, publicly perform, sublicense, and distribute the
|
|
||||||
Work and such Derivative Works in Source or Object form.
|
|
||||||
|
|
||||||
3. Grant of Patent License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
(except as stated in this section) patent license to make, have made,
|
|
||||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
||||||
where such license applies only to those patent claims licensable
|
|
||||||
by such Contributor that are necessarily infringed by their
|
|
||||||
Contribution(s) alone or by combination of their Contribution(s)
|
|
||||||
with the Work to which such Contribution(s) was submitted. If You
|
|
||||||
institute patent litigation against any entity (including a
|
|
||||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
||||||
or a Contribution incorporated within the Work constitutes direct
|
|
||||||
or contributory patent infringement, then any patent licenses
|
|
||||||
granted to You under this License for that Work shall terminate
|
|
||||||
as of the date such litigation is filed.
|
|
||||||
|
|
||||||
4. Redistribution. You may reproduce and distribute copies of the
|
|
||||||
Work or Derivative Works thereof in any medium, with or without
|
|
||||||
modifications, and in Source or Object form, provided that You
|
|
||||||
meet the following conditions:
|
|
||||||
|
|
||||||
(a) You must give any other recipients of the Work or
|
|
||||||
Derivative Works a copy of this License; and
|
|
||||||
|
|
||||||
(b) You must cause any modified files to carry prominent notices
|
|
||||||
stating that You changed the files; and
|
|
||||||
|
|
||||||
(c) You must retain, in the Source form of any Derivative Works
|
|
||||||
that You distribute, all copyright, patent, trademark, and
|
|
||||||
attribution notices from the Source form of the Work,
|
|
||||||
excluding those notices that do not pertain to any part of
|
|
||||||
the Derivative Works; and
|
|
||||||
|
|
||||||
(d) If the Work includes a "NOTICE" text file as part of its
|
|
||||||
distribution, then any Derivative Works that You distribute must
|
|
||||||
include a readable copy of the attribution notices contained
|
|
||||||
within such NOTICE file, excluding those notices that do not
|
|
||||||
pertain to any part of the Derivative Works, in at least one
|
|
||||||
of the following places: within a NOTICE text file distributed
|
|
||||||
as part of the Derivative Works; within the Source form or
|
|
||||||
documentation, if provided along with the Derivative Works; or,
|
|
||||||
within a display generated by the Derivative Works, if and
|
|
||||||
wherever such third-party notices normally appear. The contents
|
|
||||||
of the NOTICE file are for informational purposes only and
|
|
||||||
do not modify the License. You may add Your own attribution
|
|
||||||
notices within Derivative Works that You distribute, alongside
|
|
||||||
or as an addendum to the NOTICE text from the Work, provided
|
|
||||||
that such additional attribution notices cannot be construed
|
|
||||||
as modifying the License.
|
|
||||||
|
|
||||||
You may add Your own copyright statement to Your modifications and
|
|
||||||
may provide additional or different license terms and conditions
|
|
||||||
for use, reproduction, or distribution of Your modifications, or
|
|
||||||
for any such Derivative Works as a whole, provided Your use,
|
|
||||||
reproduction, and distribution of the Work otherwise complies with
|
|
||||||
the conditions stated in this License.
|
|
||||||
|
|
||||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
||||||
any Contribution intentionally submitted for inclusion in the Work
|
|
||||||
by You to the Licensor shall be under the terms and conditions of
|
|
||||||
this License, without any additional terms or conditions.
|
|
||||||
Notwithstanding the above, nothing herein shall supersede or modify
|
|
||||||
the terms of any separate license agreement you may have executed
|
|
||||||
with Licensor regarding such Contributions.
|
|
||||||
|
|
||||||
6. Trademarks. This License does not grant permission to use the trade
|
|
||||||
names, trademarks, service marks, or product names of the Licensor,
|
|
||||||
except as required for reasonable and customary use in describing the
|
|
||||||
origin of the Work and reproducing the content of the NOTICE file.
|
|
||||||
|
|
||||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
||||||
agreed to in writing, Licensor provides the Work (and each
|
|
||||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
implied, including, without limitation, any warranties or conditions
|
|
||||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
||||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
||||||
appropriateness of using or redistributing the Work and assume any
|
|
||||||
risks associated with Your exercise of permissions under this License.
|
|
||||||
|
|
||||||
8. Limitation of Liability. In no event and under no legal theory,
|
|
||||||
whether in tort (including negligence), contract, or otherwise,
|
|
||||||
unless required by applicable law (such as deliberate and grossly
|
|
||||||
negligent acts) or agreed to in writing, shall any Contributor be
|
|
||||||
liable to You for damages, including any direct, indirect, special,
|
|
||||||
incidental, or consequential damages of any character arising as a
|
|
||||||
result of this License or out of the use or inability to use the
|
|
||||||
Work (including but not limited to damages for loss of goodwill,
|
|
||||||
work stoppage, computer failure or malfunction, or any and all
|
|
||||||
other commercial damages or losses), even if such Contributor
|
|
||||||
has been advised of the possibility of such damages.
|
|
||||||
|
|
||||||
9. Accepting Warranty or Additional Liability. While redistributing
|
|
||||||
the Work or Derivative Works thereof, You may choose to offer,
|
|
||||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
||||||
or other liability obligations and/or rights consistent with this
|
|
||||||
License. However, in accepting such obligations, You may act only
|
|
||||||
on Your own behalf and on Your sole responsibility, not on behalf
|
|
||||||
of any other Contributor, and only if You agree to indemnify,
|
|
||||||
defend, and hold each Contributor harmless for any liability
|
|
||||||
incurred by, or claims asserted against, such Contributor by reason
|
|
||||||
of your accepting any such warranty or additional liability.
|
|
||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
|
||||||
|
|
||||||
APPENDIX: How to apply the Apache License to your work.
|
|
||||||
|
|
||||||
To apply the Apache License to your work, attach the following
|
|
||||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
||||||
replaced with your own identifying information. (Don't include
|
|
||||||
the brackets!) The text should be enclosed in the appropriate
|
|
||||||
comment syntax for the file format. We also recommend that a
|
|
||||||
file or class name and description of purpose be included on the
|
|
||||||
same "printed page" as the copyright notice for easier
|
|
||||||
identification within third-party archives.
|
|
||||||
|
|
||||||
Copyright [yyyy] [name of copyright owner]
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
@ -1,19 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2013-2014 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
|
|
||||||
import setuptools
|
|
||||||
|
|
||||||
setuptools.setup(
|
|
||||||
name='vm_topology',
|
|
||||||
description='Show compute resources and VM topology',
|
|
||||||
version='1.0.0',
|
|
||||||
license='Apache-2.0',
|
|
||||||
packages=['vm_topology', 'vm_topology.exec'],
|
|
||||||
entry_points={
|
|
||||||
'console_scripts': [
|
|
||||||
'vm-topology = vm_topology.exec.vm_topology:main',
|
|
||||||
]}
|
|
||||||
)
|
|
@ -1,5 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2014 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
@ -1,5 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2014 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
File diff suppressed because it is too large
Load Diff
5
tox.ini
5
tox.ini
@ -102,10 +102,9 @@ deps = -r{toxinidir}/test-requirements.txt
|
|||||||
python-daemon==2.1.2
|
python-daemon==2.1.2
|
||||||
pylint
|
pylint
|
||||||
|
|
||||||
# There are currenrly 2 python modules with a setup.py file
|
# There are currenrly 1 python module with a setup.py file
|
||||||
commands = pylint --rcfile=./pylint.rc \
|
commands = pylint --rcfile=./pylint.rc \
|
||||||
tools/storage-topology/storage-topology/storage_topology \
|
tools/storage-topology/storage-topology/storage_topology
|
||||||
tools/vm-topology/vm-topology/vm_topology
|
|
||||||
|
|
||||||
[testenv:venv]
|
[testenv:venv]
|
||||||
basepython = python3
|
basepython = python3
|
||||||
|
Loading…
Reference in New Issue
Block a user