0232b8b9dc
The collectd cpu plugin and monitor-tools are updated to support diagnosing high cpu usage on shorter time scale. This includes tools that assist SystemEngineering determine the source where CPU time is coming from. This collectd cpu plugin is updated to support Kubernetes services under system.slice or k8splatform.slice. This changes the frequency of read function sampling to 1 second. We now see logs with instantaneous cpu spikes at the cgroup level. This dispatch of results still occurs at the original plugin interval of 30 seconds. The logging of the 1 second sampling is configurable via /etc/collectd.d/starlingx/python_plugins.conf field 'hires = <true|false>. The hiresolution samples are always collected and used for a histogram, but it is not always desired to log this due to the volume of output. This adds new logs for occupancy wait. This is similar to cpu occupancy, but instead of realtime used, it measures the aggregate percent of time a given cgroup is waiting to schedule. This is a measure of CPU contention. This adds new logs for occupancy histograms for all cgroups and aggregated groupings based on the 1 second occupancy samples. The histograms are displayed in hirunner order. This displays the histogram, the mean, 95th-percentile, and max value. The histograms are logged at 5 minute intervals. This reduces collectd cgroup to 256 CPUShare from (1024). This smoothes out behaviour of poorly behaved audits. The 'schedtop' tool is updated to display 'cgroup' field. This is the systemd cgroup name, or abbrieviated pod-name. This also handles Kernel sched output format changes for 6.6. New tool 'portscanner' is added to monitor-tools to diagnose local host processes that are using specific ports. This has been instrumental in discovering gunicorn/keystone API users. New tool 'k8smetrics' is added to monitor-tools to display the delay histogram and percentiles for kube-apiserver and etdcserver. This gives a way to quantify performance as a result of system load. Partial-Bug: 2084714 TEST PLAN: AIO-SX, AIO-DX, Standard, Storage, DC: PASS: Fresh install ISO PASS: Verify /var/log/collectd.logs for 1 second cpu/wait logs, and contains: etcd, kubelet, and containerd services. PASS: Verify we are dispatching at 30 second granularity. PASS: Verify we are displaying histograms every 5 minutes. PASS: Verify we can enable/disable the display of hiresolution logs with /etc/collectd.d/starlingx/python_plugins.conf field 'hires = <true|false>'. PASS: Verify schedtop contains 'cgroup' output. PASS: Verify output from 'k8smetrics'. Cross check against Prometheus GUI for apiserver percentile. PASS: Verify output from portscanner with port 5000. Verify 1-to-1 mapping against /var/log/keystone/keystone-all.log. Change-Id: I82d4f414afdf1cecbcc99680b360cbad702ba140 Signed-off-by: Jim Gauld <James.Gauld@windriver.com>
88 lines
2.7 KiB
INI
88 lines
2.7 KiB
INI
[tox]
|
|
envlist = linters,pylint
|
|
minversion = 2.3
|
|
skipsdist = True
|
|
sitepackages=False
|
|
|
|
[testenv]
|
|
install_command = pip install -U {opts} {packages}
|
|
setenv =
|
|
VIRTUAL_ENV={envdir}
|
|
OS_STDOUT_CAPTURE=1
|
|
OS_STDERR_CAPTURE=1
|
|
OS_DEBUG=1
|
|
OS_LOG_CAPTURE=1
|
|
deps =
|
|
-r{toxinidir}/requirements.txt
|
|
-r{toxinidir}/test-requirements.txt
|
|
allowlist_externals =
|
|
bash
|
|
|
|
[testenv:bashate]
|
|
# Treat all E* codes as Errors rather than warnings using: -e 'E*'
|
|
commands =
|
|
bash -c "find {toxinidir} \
|
|
-not \( -type d -name .?\* -prune \) \
|
|
-type f \
|
|
-not -name \*~ \
|
|
-not -name \*.md \
|
|
-name \*.sh \
|
|
-print0 | xargs -r -n 1 -0 bashate -v \
|
|
-e 'E*'"
|
|
|
|
[flake8]
|
|
# Note: hacking pulls in flake8 2.5.5 which can not parse an ignore list spanning multiple lines
|
|
# F errors are high priority to fix. W are warnings. E series are pep8, H series are hacking
|
|
# F401 'FOO' imported but unused
|
|
# F841 local variable 'foo' is assigned to but never used
|
|
# W291 trailing whitespace
|
|
# W503 line break before binary operator
|
|
# W504 line break after binary operator
|
|
# W605 invalid escape sequence '\d'
|
|
# E117 over-indented
|
|
# E265 block comment should start with '# '
|
|
# E266 too many leading '#' for block comment
|
|
# E305 expected 2 blank lines after class or function definition, found 1
|
|
# E402 module level import not at top of file
|
|
# E722 do not use bare 'except'
|
|
# E741 ambiguous variable name 'I'
|
|
# F632 use ==/!= to compare constant literals (str, bytes, int, float, tuple)
|
|
# F821 undefined name 'dpdk' (this is a flake8 bug)
|
|
# H101 is TODO
|
|
# H104 File contains nothing but comments
|
|
# H201 no 'except:' at least use 'except Exception:'
|
|
# H238 old style class declaration, use new style (inherit from `object`)
|
|
# H306 imports not in alphabetical order (sys, re)
|
|
ignore=F401,F632,F821,F841,W291,W503,W504,W605,E117,E265,E266,E305,E402,E722,E741,
|
|
H101,H104,H201,H238,H306
|
|
max-line-length=110
|
|
|
|
[testenv:flake8]
|
|
commands =
|
|
flake8
|
|
|
|
[testenv:linters]
|
|
commands =
|
|
{[testenv:bashate]commands}
|
|
{[testenv:flake8]commands}
|
|
|
|
[testenv:pylint]
|
|
basepython = python3
|
|
description = Dummy environment to allow pylint to be run in subdir tox
|
|
# deps = -r{toxinidir}/test-requirements.txt
|
|
|
|
[bandit]
|
|
# The following bandit tests are being skipped:
|
|
# B602: Test for use of popen with shell equals true
|
|
#
|
|
# Note: 'skips' entry cannot be split across multiple lines
|
|
#
|
|
skips = B602
|
|
exclude = tests
|
|
|
|
[testenv:bandit]
|
|
basepython = python3
|
|
description = Bandit code scan for *.py files under config folder
|
|
deps = -r{toxinidir}/test-requirements.txt
|
|
commands = bandit --ini tox.ini -n 5 -r {toxinidir}/ -x '**/.tox/**,**/.eggs/**' -lll
|