Browse Source

Infrastructure and Cluster Monitoring

Introduce a new Armada manifest (stx-monitor) that defines the
necessary software components and services to deploy a
monitoring solution for the infrastructure and Kubernetes cluster
as well as the services it is running.

The Elastic 7.x Apache 2.0 Licensed ("oss") images are included.

The following Elastic components are introduced in this commit:
elasticsearch, filebeat, metricbeat, logstash, kibana.
This enables the build of the stx-monitor application manifest
and upon application, the capturing of logs  and metrics
into elasticsearch.  The logs and metrics may then be viewed with a
tool such as Kibana.

The stx-monitor application is built:
   build-pkgs monitor-helm
   build-pkgs stx-monitor-helm
   build-helm-charts.sh --app stx-monitor

The settings for elasticsearch helm chart are set to allow mtce actions,
such as host-lock, of a controller.

The following host labels are required on controllers prior
to the application-apply:
    system host-label-list controller-0
    (and, in duplex, for controller-1)
    | controller-0 | elastic-controller      | enabled     |
    | controller-0 | elastic-data            | enabled     |
    | controller-0 | elastic-client          | enabled     |

Tests:
    build-helm-charts.sh --app stx-monitor
    system application-upload
    system application-apply
    host management: host-lock, host-swact, host-unlock
    Determine the port for kibana:
        kubectl get service -n monitor
        Access kibana via <oamip>:<service port>
    verify logs and container logs are captured, include subdirs
        under /var/log such as /var/log/openvswitch
    system application-remove
    system application-delete

Change-Id: I0e2f94d302b3ec3a22ee5853d529efb6b423bdaa
Depends-On: Ic9a5b909dfbf60e39f98f3f14fe9bbac2de1d42b
Story: 2005733
Task: 31081
Co-Authored-By: Kevin Smith <kevin.smith@windriver.com>
Signed-off-by: John Kung <john.kung@windriver.com>
changes/17/660417/10
John Kung 2 years ago
parent
commit
d60eb9a172
  1. 1
      centos_helm.inc
  2. 1
      centos_pkg_dirs
  3. 3
      centos_pkg_dirs_containers
  4. 6
      kubernetes/applications/stx-monitor/stx-monitor-helm/centos/build_srpm.data
  5. 30
      kubernetes/applications/stx-monitor/stx-monitor-helm/centos/stx-monitor-helm.spec
  6. 43
      kubernetes/applications/stx-monitor/stx-monitor-helm/stx-monitor-helm/Makefile
  7. 5
      kubernetes/applications/stx-monitor/stx-monitor-helm/stx-monitor-helm/README
  8. 3
      kubernetes/applications/stx-monitor/stx-monitor-helm/stx-monitor-helm/files/index.yaml
  9. 12
      kubernetes/applications/stx-monitor/stx-monitor-helm/stx-monitor-helm/files/repositories.yaml
  10. 573
      kubernetes/applications/stx-monitor/stx-monitor-helm/stx-monitor-helm/manifests/monitor_manifest.yaml
  11. 11
      sysinv/sysinv/sysinv/setup.cfg
  12. 73
      sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_app.py
  13. 1
      sysinv/sysinv/sysinv/sysinv/common/constants.py
  14. 104
      sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py
  15. 2
      sysinv/sysinv/sysinv/sysinv/helm/base.py
  16. 13
      sysinv/sysinv/sysinv/sysinv/helm/common.py
  17. 25
      sysinv/sysinv/sysinv/sysinv/helm/elastic.py
  18. 112
      sysinv/sysinv/sysinv/sysinv/helm/elasticsearch.py
  19. 57
      sysinv/sysinv/sysinv/sysinv/helm/filebeat.py
  20. 3
      sysinv/sysinv/sysinv/sysinv/helm/helm.py
  21. 39
      sysinv/sysinv/sysinv/sysinv/helm/kibana.py
  22. 31
      sysinv/sysinv/sysinv/sysinv/helm/kube_state_metrics.py
  23. 37
      sysinv/sysinv/sysinv/sysinv/helm/logstash.py
  24. 132
      sysinv/sysinv/sysinv/sysinv/helm/metricbeat.py
  25. 20
      sysinv/sysinv/sysinv/sysinv/helm/monitor_version_check.py
  26. 30
      sysinv/sysinv/sysinv/sysinv/helm/nginx_ingress.py

1
centos_helm.inc

@ -1,2 +1,3 @@
# Packages used by build-helm-charts.sh
stx-openstack-helm
stx-monitor-helm

1
centos_pkg_dirs

@ -1,4 +1,5 @@
kubernetes/applications/stx-openstack/stx-openstack-helm
kubernetes/applications/stx-monitor/stx-monitor-helm
kubernetes/platform/stx-platform/stx-platform-helm
worker-utils
workerconfig

3
centos_pkg_dirs_containers

@ -1 +1,2 @@
kubernetes/applications/stx-openstack/stx-openstack-helm
kubernetes/applications/stx-openstack/stx-openstack-helm
kubernetes/applications/stx-monitor/stx-monitor-helm

6
kubernetes/applications/stx-monitor/stx-monitor-helm/centos/build_srpm.data

@ -0,0 +1,6 @@
SRC_DIR="stx-monitor-helm"
# This version is used as a component of the stx-monitor application
# version. Any change to this version must also be reflected in the
# SUPPORTED_VERSIONS list in sysinv/helm/monitor_version_check.py
#
TIS_PATCH_VER=1

30
kubernetes/applications/stx-monitor/stx-monitor-helm/centos/stx-monitor-helm.spec

@ -0,0 +1,30 @@
%global armada_folder /usr/lib/armada
Summary: StarlingX Monitor Application Armada Helm Charts
Name: stx-monitor-helm
Version: 1.0
Release: %{tis_patch_ver}%{?_tis_dist}
License: Apache-2.0
Group: base
Packager: Wind River <info@windriver.com>
URL: unknown
Source0: %{name}-%{version}.tar.gz
BuildArch: noarch
BuildRequires: monitor-helm
Requires: monitor-helm
%description
StarlingX Monitor Application Armada Helm Charts
%prep
%setup
%install
install -d -m 755 ${RPM_BUILD_ROOT}%{armada_folder}
install -p -D -m 755 manifests/*.yaml ${RPM_BUILD_ROOT}%{armada_folder}
%files
%defattr(-,root,root,-)
%{armada_folder}/*

43
kubernetes/applications/stx-monitor/stx-monitor-helm/stx-monitor-helm/Makefile

@ -0,0 +1,43 @@
#
# Copyright 2017 The Openstack-Helm Authors.
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# It's necessary to set this because some environments don't link sh -> bash.
SHELL := /bin/bash
TASK := build
EXCLUDES := helm-toolkit doc tests tools logs tmp
CHARTS := helm-toolkit $(filter-out $(EXCLUDES), $(patsubst %/.,%,$(wildcard */.)))
.PHONY: $(EXCLUDES) $(CHARTS)
all: $(CHARTS)
$(CHARTS):
@if [ -d $@ ]; then \
echo; \
echo "===== Processing [$@] chart ====="; \
make $(TASK)-$@; \
fi
init-%:
if [ -f $*/Makefile ]; then make -C $*; fi
if [ -f $*/requirements.yaml ]; then helm dep up $*; fi
lint-%: init-%
if [ -d $* ]; then helm lint $*; fi
build-%: lint-%
if [ -d $* ]; then helm package $*; fi
clean:
@echo "Clean all build artifacts"
rm -f */templates/_partials.tpl */templates/_globals.tpl
rm -f *tgz */charts/*tgz */requirements.lock
rm -rf */charts */tmpcharts
%:
@:

5
kubernetes/applications/stx-monitor/stx-monitor-helm/stx-monitor-helm/README

@ -0,0 +1,5 @@
This directory contains all StarlingX charts that need to be built for this
application. Some charts are common across applications. These common charts
reside in the stx-config/kubernetes/helm-charts directory. To include these in
this application update the build_srpm.data file and use the COPY_LIST_TO_TAR
mechanism to populate these common charts.

3
kubernetes/applications/stx-monitor/stx-monitor-helm/stx-monitor-helm/files/index.yaml

@ -0,0 +1,3 @@
apiVersion: v1
entries: {}
generated: 2019-01-07T12:33:46.098166523-06:00

12
kubernetes/applications/stx-monitor/stx-monitor-helm/stx-monitor-helm/files/repositories.yaml

@ -0,0 +1,12 @@
apiVersion: v1
generated: 2019-01-02T15:19:36.215111369-06:00
repositories:
- caFile: ""
cache: /builddir/.helm/repository/cache/local-index.yaml
certFile: ""
keyFile: ""
name: local
password: ""
url: http://127.0.0.1:8879/charts
username: ""

573
kubernetes/applications/stx-monitor/stx-monitor-helm/stx-monitor-helm/manifests/monitor_manifest.yaml

@ -0,0 +1,573 @@
---
schema: armada/Chart/v1
metadata:
schema: metadata/Document/v1
name: nginx-ingress
data:
chart_name: nginx-ingress
release: nginx-ingress
namespace: monitor
wait:
timeout: 600
labels:
release: stx-nginx-ingress
install:
no_hooks: false
upgrade:
no_hooks: false
pre:
delete:
- type: job
labels:
release: stx-nginx-ingress
values:
controller:
kind: DaemonSet
daemonset:
useHostPort: false
nodeSelector:
elastic-controller: "enabled"
config:
# https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/configmap/
nginx-status-ipv4-whitelist: 0.0.0.0/0
# See https://bugs.launchpad.net/starlingx/+bug/1823803
# Note quotes are necessary.
worker-processes: '1'
scope:
enabled: true
namespace: "monitor"
service:
type: "NodePort"
nodePorts:
http: 31001
defaultBackend:
nodeSelector:
elastic-controller: "enabled"
service:
nodePorts:
http: 31001
source:
type: tar
location: http://172.17.0.1:8080/helm_charts/starlingx/nginx-ingress-1.4.0.tgz
subpath: nginx-ingress
reference: master
dependencies: []
---
schema: armada/Chart/v1
metadata:
schema: metadata/Document/v1
name: elasticsearch
data:
chart_name: elasticsearch
release: elasticsearch
namespace: monitor
wait:
timeout: 600
labels:
release: stx-elasticsearch
test:
enabled: false
install:
no_hooks: false
upgrade:
no_hooks: false
pre:
delete:
- type: job
labels:
release: stx-elasticsearch
- type: pod
labels:
release: stx-elasticsearch
component: test
values:
image:
repository: "docker.elastic.co/elasticsearch/elasticsearch-oss"
tag: 7.2.0
cluster:
config:
cluster.initial_master_nodes:
- stx-elasticsearch-master-0
- stx-elasticsearch-master-1
env:
MINIMUM_MASTER_NODES: "1"
EXPECTED_MASTER_NODES: "1"
RECOVER_AFTER_MASTER_NODES: "1"
client:
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: "nginx"
nginx.ingress.kubernetes.io/rewrite-target: /$2
nginx.ingress.kubernetes.io/ssl-redirect: "false"
nginx.ingress.kubernetes.io/force-ssl-redirect: "false"
# Note in order to differentiate from kibana, prefix with
# stx-elasticsearch-client, but strip before presenting
# to elasticsearch with above rewrite-target
path: /stx-elasticsearch-client(/|$)(.*)
hosts:
- ""
source:
type: tar
location: http://172.17.0.1:8080/helm_charts/starlingx/elasticsearch-1.24.0.tgz
subpath: elasticsearch
reference: master
dependencies: []
---
schema: armada/Chart/v1
metadata:
schema: metadata/Document/v1
name: filebeat
data:
chart_name: filebeat
release: filebeat
namespace: monitor
wait:
timeout: 600
labels:
release: stx-filebeat
test:
enabled: false
install:
no_hooks: false
upgrade:
no_hooks: false
pre:
delete:
- type: job
labels:
release: stx-filebeat
- type: pod
labels:
release: stx-filebeat
component: test
values:
image:
repository: "docker.elastic.co/beats/filebeat-oss"
tag: 7.2.0
config:
output.file:
enabled: false
output.logstash:
enabled: true
hosts: ["stx-logstash:5044"]
output.elasticsearch:
enabled: false
hosts: ["http://stx-elasticsearch-client:9200"]
ilm.pattern: "000001"
setup.kibana:
# Note port 5601 is default
host: "stx-kibana"
setup.dashboards:
enabled: true
filebeat.autodiscover:
providers:
- type: kubernetes
host: ${NODE_NAME}
hints.enabled: true
# Mount the setup_script to main container for visibility
# even though not used there.
extraVolumeMounts:
- mountPath: /usr/share/filebeat/setup-script.sh
name: setupscript
subPath: setup-script.sh
extraVolumes:
- configMap:
defaultMode: 493
name: stx-filebeat
name: setupscript
tolerations:
- key: "services"
operator: "Equal"
value: "disabled"
effect: "NoExecute"
source:
type: tar
location: http://172.17.0.1:8080/helm_charts/starlingx/filebeat-1.5.1.tgz
subpath: filebeat
reference: master
dependencies: []
---
schema: armada/Chart/v1
metadata:
schema: metadata/Document/v1
name: metricbeat
data:
chart_name: metricbeat
release: metricbeat
namespace: monitor
wait:
timeout: 600
labels:
release: stx-metricbeat
test:
enabled: false
install:
no_hooks: false
upgrade:
no_hooks: false
pre:
delete:
- type: job
labels:
release: stx-metricbeat
- type: pod
labels:
release: stx-metricbeat
component: test
values:
image:
repository: "docker.elastic.co/beats/metricbeat-oss"
tag: 7.2.0
daemonset:
config:
output.file:
enabled: false
output.logstash:
enabled: true
hosts: ["stx-logstash:5044"]
output.elasticsearch:
# this must be opposite of above output.logstash.enabled
enabled: false
hosts: ["http://stx-elasticsearch-client:9200"]
ilm.pattern: "000001"
processors:
- add_kubernetes_metadata:
in_cluster: true
setup.kibana:
# for on box kibana, note port 5601 is default
host: "stx-kibana"
setup.dashboards:
enabled: true
metricbeat.autodiscover:
providers:
- type: kubernetes
host: ${NODE_NAME}
hints.enabled: true
tolerations:
- key: "services"
operator: "Equal"
value: "disabled"
effect: "NoExecute"
deployment:
nodeSelector:
elastic-controller: "enabled"
config:
output.file:
enabled: false
output.logstash:
enabled: true
hosts: ["stx-logstash:5044"]
output.elasticsearch:
enabled: false
hosts: ["http://stx-elasticsearch-client:9200"]
ilm.pattern: "000001"
setup.kibana:
# for onbox kibana, note port 5601 is default
host: "stx-kibana"
# for offbox kibana, will fail if we can't connect
# host: "128.224.140.223"
setup.dashboards:
enabled: true
metricbeat.autodiscover:
providers:
- type: kubernetes
host: ${NODE_NAME}
hints.enabled: true
source:
type: tar
location: http://172.17.0.1:8080/helm_charts/starlingx/metricbeat-1.6.0.tgz
subpath: metricbeat
reference: master
dependencies: []
---
schema: armada/Chart/v1
metadata:
schema: metadata/Document/v1
name: kube-state-metrics
data:
chart_name: kube-state-metrics
release: kube-state-metrics
namespace: monitor
wait:
timeout: 600
labels:
release: stx-kube-state-metrics
test:
enabled: false
install:
no_hooks: false
upgrade:
no_hooks: false
pre:
delete:
- type: job
labels:
release: stx-kube-state-metrics
- type: pod
labels:
release: stx-kube-state-metrics
component: test
values:
image:
tag: v1.5.0
nodeSelector:
elastic-controller: "enabled"
source:
type: tar
location: http://172.17.0.1:8080/helm_charts/starlingx/kube-state-metrics-0.16.0.tgz
subpath: kube-state-metrics
reference: master
dependencies: []
---
schema: armada/Chart/v1
metadata:
schema: metadata/Document/v1
name: kibana
data:
chart_name: kibana
release: kibana
namespace: monitor
wait:
timeout: 600
labels:
release: stx-kibana
test:
enabled: false
install:
no_hooks: false
upgrade:
no_hooks: false
pre:
delete:
- type: job
labels:
release: stx-kibana
- type: pod
labels:
release: stx-kibana
component: test
values:
image:
repository: "docker.elastic.co/kibana/kibana-oss"
tag: 7.2.0
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: "nginx"
nginx.ingress.kubernetes.io/rewrite-target: /
nginx.ingress.kubernetes.io/ssl-redirect: "false"
nginx.ingress.kubernetes.io/force-ssl-redirect: "false"
hosts:
- ""
files:
kibana.yml:
elasticsearch.hosts: http://stx-elasticsearch-client:9200
nodeSelector:
elastic-controller: "enabled"
source:
type: tar
location: http://172.17.0.1:8080/helm_charts/starlingx/kibana-2.2.0.tgz
subpath: kibana
reference: master
dependencies: []
---
schema: armada/Chart/v1
metadata:
schema: metadata/Document/v1
name: logstash
data:
chart_name: logstash
release: logstash
namespace: monitor
wait:
timeout: 600
labels:
release: stx-logstash
test:
enabled: false
install:
no_hooks: false
upgrade:
no_hooks: false
pre:
delete:
- type: job
labels:
release: stx-logstash
- type: pod
labels:
release: stx-logstash
component: test
values:
image:
repository: "docker.elastic.co/logstash/logstash-oss"
tag: 7.2.0
nodeSelector:
elastic-controller: "enabled"
elasticsearch:
host: stx-elasticsearch-client
outputs:
main: |-
output {
# Note uncomment below and can see raw input in logs
#stdout { codec => rubydebug }
if [type] == "collectd" {
elasticsearch {
hosts => ["${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}"]
manage_template => false
index => "collectd"
}
}
if [type] == "beats" {
elasticsearch {
hosts => ["${ELASTICSEARCH_HOST}:${ELASTICSEARCH_PORT}"]
manage_template => false
index => "%{[@metadata][beat]}-%{[@metadata][version]}"
}
}
}
inputs:
main: |-
input {
udp {
#Note default port 25826 in use by influxdb.
#port => 25826
port => 31005
buffer_size => 1452
codec => collectd { }
type => 'collectd'
}
beats {
port => 5044
type => 'beats'
}
}
ports:
# influxdb port conflict, can't use 25826
# - containerPort: 25826
# name: collectd-udp
# protocol: UDP
- containerPort: 31005
name: collectd-udp
protocol: UDP
- containerPort: 5044
name: beats
protocol: TCP
service:
ports:
# collectd-udp:
# port: 25826
# targetPort: collectd-udp
# protocol: UDP
collectd-udp:
port: 31005
targetPort: collectd-udp
protocol: UDP
monitor:
port: 9600
targetPort: monitor
protocol: TCP
ingress:
enabled: true
annotations:
kubernetes.io/ingress.class: "nginx"
nginx.ingress.kubernetes.io/rewrite-target: /$2
nginx.ingress.kubernetes.io/ssl-redirect: "false"
nginx.ingress.kubernetes.io/force-ssl-redirect: "false"
path: /stx-logstash(/|$)(.*)
servicePort: 9600
hosts:
- ""
source:
type: tar
location: http://172.17.0.1:8080/helm_charts/starlingx/logstash-1.7.0.tgz
subpath: logstash
reference: master
dependencies: []
---
schema: armada/ChartGroup/v1
metadata:
schema: metadata/Document/v1
name: logstash
data:
description: "Deploy logstash"
sequenced: true
chart_group:
- logstash
---
schema: armada/ChartGroup/v1
metadata:
schema: metadata/Document/v1
name: elasticsearch
data:
description: "Deploy elasticsearch"
sequenced: true
chart_group:
- elasticsearch
---
schema: armada/ChartGroup/v1
metadata:
schema: metadata/Document/v1
name: filebeat
data:
description: "Deploy filebeat"
sequenced: true
chart_group:
- filebeat
---
schema: armada/ChartGroup/v1
metadata:
schema: metadata/Document/v1
name: metricbeat
data:
description: "Deploy metricbeat"
sequenced: true
chart_group:
- metricbeat
---
schema: armada/ChartGroup/v1
metadata:
schema: metadata/Document/v1
name: kube-state-metrics
data:
description: "Deploy kube-state-metrics"
sequenced: true
chart_group:
- kube-state-metrics
---
schema: armada/ChartGroup/v1
metadata:
schema: metadata/Document/v1
name: nginx-ingress
data:
description: "Deploy ingress"
sequenced: true
chart_group:
- nginx-ingress
---
schema: armada/ChartGroup/v1
metadata:
schema: metadata/Document/v1
name: kibana
data:
description: "Deploy kibana"
sequenced: true
chart_group:
- kibana
---
schema: armada/Manifest/v1
metadata:
schema: metadata/Document/v1
name: monitor-armada-manifest
data:
release_prefix: stx
chart_groups:
- nginx-ingress
- kibana
- elasticsearch
- logstash
- filebeat
- metricbeat
- kube-state-metrics

11
sysinv/sysinv/sysinv/setup.cfg

@ -66,12 +66,23 @@ systemconfig.puppet_plugins =
systemconfig.helm_applications =
stx-openstack = systemconfig.helm_plugins.stx_openstack
platform-integ-apps = systemconfig.helm_plugins.platform_integ_apps
stx-monitor = systemconfig.helm_plugins.stx_monitor
systemconfig.helm_plugins.platform_integ_apps =
001_helm-toolkit = sysinv.helm.helm_toolkit:HelmToolkitHelm
002_rbd-provisioner = sysinv.helm.rbd_provisioner:RbdProvisionerHelm
003_ceph-pools-audit = sysinv.helm.ceph_pools_audit:CephPoolsAuditHelm
systemconfig.helm_plugins.stx_monitor =
001_elasticsearch = sysinv.helm.elasticsearch:ElasticsearchHelm
002_kibana = sysinv.helm.kibana:KibanaHelm
003_filebeat = sysinv.helm.filebeat:FilebeatHelm
004_metricbeat = sysinv.helm.metricbeat:MetricbeatHelm
005_kube-state-metrics = sysinv.helm.kube_state_metrics:KubeStateMetricsHelm
006_nginx-ingress = sysinv.helm.nginx_ingress:NginxIngressHelm
007_logstash = sysinv.helm.logstash:LogstashHelm
008_monitor_version_check = sysinv.helm.monitor_version_check:StxMonitorVersionCheckHelm
systemconfig.helm_plugins.stx_openstack =
001_ingress = sysinv.helm.ingress:IngressHelm
002_mariadb = sysinv.helm.mariadb:MariadbHelm

73
sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_app.py

@ -14,6 +14,7 @@ from wsme import types as wtypes
import wsmeext.pecan as wsme_pecan
from contextlib import contextmanager
from oslo_log import log
from sysinv import objects
from sysinv.api.controllers.v1 import base
from sysinv.api.controllers.v1 import collection
@ -22,7 +23,7 @@ from sysinv.api.controllers.v1 import types
from sysinv.common import constants
from sysinv.common import exception
from sysinv.common import utils as cutils
from sysinv.openstack.common import log
from sysinv.helm import common as helm_common
from sysinv.openstack.common.gettextutils import _
import cgcs_patch.constants as patch_constants
@ -192,6 +193,74 @@ class KubeAppController(rest.RestController):
"""Retrieve a single application."""
return self._get_one(app_name)
@staticmethod
def _check_controller_labels(chosts):
def _check_monitor_controller_labels(host_uuid, hostname):
labels = pecan.request.dbapi.label_get_by_host(host_uuid)
required_labels = {
helm_common.LABEL_MONITOR_CONTROLLER:
helm_common.LABEL_VALUE_ENABLED,
helm_common.LABEL_MONITOR_DATA:
helm_common.LABEL_VALUE_ENABLED,
helm_common.LABEL_MONITOR_CLIENT:
helm_common.LABEL_VALUE_ENABLED}
assigned_labels = {}
for label in labels:
if label.label_key in required_labels:
if label.label_value == required_labels[label.label_key]:
assigned_labels.update(
{label.label_key: label.label_value})
missing_labels = {k: required_labels[k] for k in
set(required_labels) - set(assigned_labels)}
msg = ""
if missing_labels:
for k, v in missing_labels.items():
msg += "%s=%s " % (k, v)
if msg:
msg = " 'system host-label-assign {} {}'".format(
hostname, msg)
return msg
client_msg = ""
for chost in chosts:
msg = _check_monitor_controller_labels(
chost.uuid, chost.hostname)
if msg:
client_msg += msg
if client_msg:
raise wsme.exc.ClientSideError(
_("Operation rejected: application stx-monitor "
"requires labels on controllers. {}".format(client_msg)))
def _semantic_check(self, db_app):
"""Semantic check for application deployment
"""
if db_app.name == constants.HELM_APP_MONITOR:
chosts = pecan.request.dbapi.ihost_get_by_personality(
constants.CONTROLLER)
if not cutils.is_aio_simplex_system(pecan.request.dbapi):
if chosts and len(chosts) < 2:
raise wsme.exc.ClientSideError(_(
"Operation rejected: application {} requires 2 "
"controllers".format(db_app.name)))
self._check_controller_labels(chosts)
for chost in chosts:
if (chost.administrative != constants.ADMIN_UNLOCKED or
chost.operational != constants.OPERATIONAL_ENABLED):
raise wsme.exc.ClientSideError(_(
"Operation rejected: application {} requires {} to be "
"unlocked-enabled".format(
db_app.name, chost.hostname)))
@cutils.synchronized(LOCK_NAME)
@wsme_pecan.wsexpose(KubeApp, body=types.apidict)
def post(self, body):
@ -263,6 +332,8 @@ class KubeAppController(rest.RestController):
else:
mode = values['mode']
self._semantic_check(db_app)
if db_app.status == constants.APP_APPLY_IN_PROGRESS:
raise wsme.exc.ClientSideError(_(
"Application-apply rejected: install/update is already "

1
sysinv/sysinv/sysinv/sysinv/common/constants.py

@ -1337,6 +1337,7 @@ SYSTEM_SECURITY_FEATURE_SPECTRE_MELTDOWN_DEFAULT_OPTS = SYSTEM_SECURITY_FEATURE_
# Helm: Supported application (aka chart bundles)
HELM_APP_OPENSTACK = 'stx-openstack'
HELM_APP_PLATFORM = 'platform-integ-apps'
HELM_APP_MONITOR = 'stx-monitor'
# Apply mode for openstack app
OPENSTACK_RESTORE_DB = 'restore_db'

104
sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py

@ -506,16 +506,30 @@ class AppOperator(object):
TODO(awang): Support custom apps to pull images from local registry
"""
image_tags = []
ids = []
for r, f in cutils.get_files_matching(path, 'values.yaml'):
with open(os.path.join(r, f), 'r') as value_f:
try:
def _parse_charts():
ids = []
image_tags = []
for r, f in cutils.get_files_matching(path, 'values.yaml'):
with open(os.path.join(r, f), 'r') as value_f:
try_image_tag_repo_format = False
y = yaml.safe_load(value_f)
ids = y["images"]["tags"].values()
except (TypeError, KeyError):
pass
image_tags.extend(ids)
try:
ids = y["images"]["tags"].values()
except (AttributeError, TypeError, KeyError):
try_image_tag_repo_format = True
if try_image_tag_repo_format:
try:
y_image = y["image"]
y_image_tag = y_image['repository'] + ":" + y_image['tag']
ids = [y_image_tag]
except (AttributeError, TypeError, KeyError):
pass
image_tags.extend(ids)
return image_tags
image_tags = _parse_charts()
return list(set(image_tags))
def _get_image_tags_by_charts(self, app_images_file, app_manifest_file, overrides_dir):
@ -569,13 +583,24 @@ class AppOperator(object):
pass
# Get the image tags from the armada manifest file
try_image_tag_repo_format = False
try:
images_manifest = chart_data['values']['images']['tags']
except (TypeError, KeyError):
except (TypeError, KeyError, AttributeError):
try_image_tag_repo_format = True
LOG.info("Armada manifest file has no img tags for "
"chart %s" % chart_name)
pass
if try_image_tag_repo_format:
try:
y_image = chart_data['values']['image']
y_image_tag = \
y_image['repository'] + ":" + y_image['tag']
images_manifest = {chart_name: y_image_tag}
except (AttributeError, TypeError, KeyError):
pass
# For the image tags from the chart path which do not exist
# in the overrides and manifest file, add to manifest file.
# Convert the image tags in the overrides and manifest file
@ -687,13 +712,26 @@ class AppOperator(object):
chart_name = os.path.join(app.charts_dir, chart.name)
chart_path = os.path.join(chart_name, 'values.yaml')
try_image_tag_repo_format = False
if os.path.exists(chart_path):
with open(chart_path, 'r') as f:
y = yaml.safe_load(f)
try:
y = yaml.safe_load(f)
images = y["images"]["tags"]
except (TypeError, KeyError):
LOG.warn("Chart %s has no image tags" % chart_name)
except (TypeError, KeyError, AttributeError):
LOG.info("Chart %s has no images tags" % chart_name)
try_image_tag_repo_format = True
if try_image_tag_repo_format:
try:
y_image = y["image"]
y_image_tag = \
y_image['repository'] + ":" + y_image['tag']
images = {chart.name: y_image_tag}
except (AttributeError, TypeError, KeyError):
LOG.info("Chart %s has no image tags" % chart_name)
pass
if images:
images_by_charts.update({chart.name: images})
@ -1375,6 +1413,16 @@ class AppOperator(object):
def _check_progress(monitor_flag, app, pattern, logfile):
""" Progress monitoring task, to be run in a separate thread """
LOG.info("Starting progress monitoring thread for app %s" % app.name)
def _progress_adjust(app):
# helm-toolkit doesn't count; it is not in stx-monitor
non_helm_toolkit_apps = [constants.HELM_APP_MONITOR]
if app.name in non_helm_toolkit_apps:
adjust = 0
else:
adjust = 1
return adjust
try:
with Timeout(INSTALLATION_TIMEOUT,
exception.KubeAppProgressMonitorTimeout()):
@ -1388,9 +1436,10 @@ class AppOperator(object):
last, num = _get_armada_log_stats(pattern, logfile)
if last:
if app.system_app:
# helm-toolkit doesn't count
adjust = _progress_adjust(app)
percent = \
round(float(num) / (len(app.charts) - 1) * 100)
round(float(num) /
(len(app.charts) - adjust) * 100)
else:
percent = round(float(num) / len(app.charts) * 100)
progress_str = 'processing chart: ' + last +\
@ -1467,20 +1516,24 @@ class AppOperator(object):
:param app_name: Name of the application.
"""
if app_name == constants.HELM_APP_OPENSTACK:
self._delete_persistent_volume_claim(common.HELM_NS_OPENSTACK)
def _delete_ceph_persistent_volume_claim(namespace):
self._delete_persistent_volume_claim(namespace)
try:
# Remove the configmap with the ceph monitor information
# required by the application into the application namespace
self._kube.kube_delete_config_map(
self.APP_OPENSTACK_RESOURCE_CONFIG_MAP,
common.HELM_NS_OPENSTACK)
namespace)
except Exception as e:
LOG.error(e)
raise
self._delete_namespace(namespace)
self._delete_namespace(common.HELM_NS_OPENSTACK)
if app_name == constants.HELM_APP_OPENSTACK:
_delete_ceph_persistent_volume_claim(common.HELM_NS_OPENSTACK)
elif app_name == constants.HELM_APP_MONITOR:
_delete_ceph_persistent_volume_claim(common.HELM_NS_MONITOR)
def _perform_app_recover(self, old_app, new_app, armada_process_required=True):
"""Perform application recover
@ -1655,6 +1708,11 @@ class AppOperator(object):
LOG.error("Application rollback aborted!")
return False
def _is_system_app(self, name):
if name in self._helm.get_helm_applications():
return True
return False
def perform_app_upload(self, rpc_app, tarfile):
"""Process application upload request
@ -1668,8 +1726,7 @@ class AppOperator(object):
"""
app = AppOperator.Application(rpc_app,
rpc_app.get('name') in self._helm.get_helm_applications())
self._is_system_app(rpc_app.get('name')))
LOG.info("Application %s (%s) upload started." % (app.name, app.version))
try:
@ -1767,7 +1824,7 @@ class AppOperator(object):
"""
app = AppOperator.Application(rpc_app,
rpc_app.get('name') in self._helm.get_helm_applications())
self._is_system_app(rpc_app.get('name')))
# If apply is called from update method, the app's abort status has
# already been registered.
@ -2265,7 +2322,8 @@ class AppOperator(object):
self._kube_app.name = new_name
self._kube_app.app_version = new_version
self.system_app = \
(self.name == constants.HELM_APP_OPENSTACK)
(self.name == constants.HELM_APP_OPENSTACK or
self.name == constants.HELM_APP_MONITOR)
new_armada_dir = cutils.generate_armada_manifest_dir(
self.name, self.version)

2
sysinv/sysinv/sysinv/sysinv/helm/base.py

@ -126,7 +126,7 @@ class BaseHelm(object):
def _count_hosts_by_label(self, label):
return int(self.dbapi.count_hosts_by_label(label))
def _num_controllers(self):
def _num_controllers(self, label=None):
return self._count_hosts_by_label(common.LABEL_CONTROLLER)
def _num_computes(self):

13
sysinv/sysinv/sysinv/sysinv/helm/common.py

@ -57,6 +57,14 @@ HELM_CHART_KEYSTONE_API_PROXY = 'keystone-api-proxy'
HELM_CHART_SWIFT = 'ceph-rgw'
HELM_CHART_NGINX_PORTS_CONTROL = "nginx-ports-control"
HELM_CHART_ELASTICSEARCH = 'elasticsearch'
HELM_CHART_KIBANA = 'kibana'
HELM_CHART_FILEBEAT = 'filebeat'
HELM_CHART_METRICBEAT = 'metricbeat'
HELM_CHART_KUBESTATEMETRICS = 'kube-state-metrics'
HELM_CHART_NGINX_INGRESS = 'nginx-ingress'
HELM_CHART_LOGSTASH = 'logstash'
# Namespaces
HELM_NS_CEPH = 'ceph'
HELM_NS_DEFAULT = 'default'
@ -64,6 +72,7 @@ HELM_NS_KUBE_SYSTEM = 'kube-system'
HELM_NS_NFS = 'nfs'
HELM_NS_OPENSTACK = 'openstack'
HELM_NS_HELM_TOOLKIT = 'helm-toolkit'
HELM_NS_MONITOR = 'monitor'
# Namespaces: for system functions
HELM_NS_STORAGE_PROVISIONER = HELM_NS_KUBE_SYSTEM
@ -87,6 +96,10 @@ LABEL_COMPUTE_LABEL = 'openstack-compute-node'
LABEL_OPENVSWITCH = 'openvswitch'
LABEL_REMOTE_STORAGE = 'remote-storage'
LABEL_MONITOR_CONTROLLER = 'elastic-controller'
LABEL_MONITOR_DATA = 'elastic-data'
LABEL_MONITOR_CLIENT = 'elastic-client'
# Label values
LABEL_VALUE_ENABLED = 'enabled'
LABEL_VALUE_DISABLED = 'disabled'

25
sysinv/sysinv/sysinv/sysinv/helm/elastic.py

@ -0,0 +1,25 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from sysinv.helm import base
from sysinv.helm import common
from sysinv.common import constants
class ElasticBaseHelm(base.BaseHelm):
"""Class to encapsulate Elastic service operations for helm"""
SUPPORTED_NAMESPACES = \
base.BaseHelm.SUPPORTED_NAMESPACES + [common.HELM_NS_MONITOR]
SUPPORTED_APP_NAMESPACES = {
constants.HELM_APP_MONITOR:
base.BaseHelm.SUPPORTED_NAMESPACES + [common.HELM_NS_MONITOR]
}
def get_namespaces(self):
return self.SUPPORTED_NAMESPACES

112
sysinv/sysinv/sysinv/sysinv/helm/elasticsearch.py

@ -0,0 +1,112 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from oslo_log import log as logging
from sysinv.common import exception
from sysinv.common import utils
from sysinv.helm import common
from sysinv.helm import elastic
LOG = logging.getLogger(__name__)
class ElasticsearchHelm(elastic.ElasticBaseHelm):
"""Class to encapsulate helm operations for elasticsearch"""
CHART = common.HELM_CHART_ELASTICSEARCH
def get_overrides(self, namespace=None):
overrides = {
common.HELM_NS_MONITOR: {
'cluster': self._get_cluster_overrides(),
'master': self._get_master_overrides(),
'data': self._get_data_overrides(),
'client': self._get_client_overrides(),
}
}
if namespace in self.SUPPORTED_NAMESPACES:
return overrides[namespace]
elif namespace:
raise exception.InvalidHelmNamespace(chart=self.CHART,
namespace=namespace)
else:
return overrides
def _get_cluster_overrides(self):
env_vars = {'MINIMUM_MASTER_NODES': "1",
'EXPECTED_MASTER_NODES': "1",
'RECOVER_AFTER_MASTER_NODES': "1"}
if utils.is_aio_simplex_system(self.dbapi):
cluster_initial_master_nodes = ['stx-elasticsearch-master-0']
else:
cluster_initial_master_nodes = ['stx-elasticsearch-master-0',
'stx-elasticsearch-master-1']
conf = {
'env': env_vars,
'config': {
'cluster.initial_master_nodes': cluster_initial_master_nodes},
}
return conf
def _get_master_overrides(self):
if utils.is_aio_system(self.dbapi):
heap_size = "256m"
else:
heap_size = "512m"
conf = {
'replicas':
self._count_hosts_by_label(common.LABEL_MONITOR_CONTROLLER),
'heapSize': heap_size,
'nodeSelector': {common.LABEL_MONITOR_CONTROLLER: "enabled"},
}
return conf
def _get_data_overrides(self):
# Note memory values are to be system engineered.
if utils.is_aio_system(self.dbapi):
heap_size = "512m"
memory_size = "512Mi"
else:
heap_size = "1536m"
memory_size = "1536Mi"
conf = {
'replicas':
self._count_hosts_by_label(common.LABEL_MONITOR_DATA),
'heapSize': heap_size,
'resources': {
'limits': {
'cpu': "1"
},
'requests': {
'cpu': "25m",
'memory': memory_size,
}, },
'persistence': {'storageClass': 'general',
'size': "100Gi"},
'nodeSelector': {common.LABEL_MONITOR_DATA: "enabled"},
}
return conf
def _get_client_overrides(self):
if utils.is_aio_system(self.dbapi):
heap_size = "256m"
else:
heap_size = "512m"
conf = {
'replicas':
self._count_hosts_by_label(common.LABEL_MONITOR_CLIENT),
'heapSize': heap_size,
'nodeSelector': {common.LABEL_MONITOR_CLIENT: "enabled"},
}
return conf

57
sysinv/sysinv/sysinv/sysinv/helm/filebeat.py

@ -0,0 +1,57 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from oslo_log import log as logging
from sysinv.common import exception
from sysinv.helm import common
from sysinv.helm import elastic
LOG = logging.getLogger(__name__)
class FilebeatHelm(elastic.ElasticBaseHelm):
"""Class to encapsulate helm operations for filebeat"""
CHART = common.HELM_CHART_FILEBEAT
def get_overrides(self, namespace=None):
overrides = {
common.HELM_NS_MONITOR: {
'config': self._get_config_overrides(),
}
}
if namespace in self.SUPPORTED_NAMESPACES:
return overrides[namespace]
elif namespace:
raise exception.InvalidHelmNamespace(chart=self.CHART,
namespace=namespace)
else:
return overrides
def _get_config_overrides(self):
conf = {
'processors': [{'add_kubernetes_metadata': {'in_cluster': True}}],
'filebeat.inputs': [
{
'enabled': True,
'fields': {
"hostname": "${NODE_NAME}",
},
'paths': [
"/var/log/*.log",
"/var/log/messages",
"/var/log/syslog",
"/var/log/**/*.log"
],
'type': "log"
}
]
}
return conf

3
sysinv/sysinv/sysinv/sysinv/helm/helm.py

@ -104,7 +104,8 @@ class HelmOperator(object):
supported_helm_applications = {}
for name, namespace in helm_application_dict.items():
supported_helm_applications[name] = []
helm_plugins = extension.ExtensionManager(namespace=namespace, invoke_on_load=True, invoke_args=(self,))
helm_plugins = extension.ExtensionManager(
namespace=namespace, invoke_on_load=True, invoke_args=(self,))
sorted_helm_plugins = sorted(helm_plugins.extensions, key=lambda x: x.name)
for plugin in sorted_helm_plugins:
plugin_name = plugin.name[HELM_PLUGIN_PREFIX_LENGTH:]

39
sysinv/sysinv/sysinv/sysinv/helm/kibana.py

@ -0,0 +1,39 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from oslo_log import log as logging
from sysinv.common import exception
from sysinv.helm import common
from sysinv.helm import elastic
LOG = logging.getLogger(__name__)
class KibanaHelm(elastic.ElasticBaseHelm):
"""Class to encapsulate helm operations for kibana"""
CHART = common.HELM_CHART_KIBANA
SERVICE_NAME = "kibana"
SERVICE_PORT = 5601
def get_overrides(self, namespace=None):
overrides = {
common.HELM_NS_MONITOR: {
"service": {
"externalPort": self.SERVICE_PORT,
"internalPort": self.SERVICE_PORT,
"portName": self.SERVICE_NAME
}
}
}
if namespace in self.SUPPORTED_NAMESPACES:
return overrides[namespace]
elif namespace:
raise exception.InvalidHelmNamespace(chart=self.CHART,
namespace=namespace)
else:
return overrides

31
sysinv/sysinv/sysinv/sysinv/helm/kube_state_metrics.py

@ -0,0 +1,31 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from oslo_log import log as logging
from sysinv.common import exception
from sysinv.helm import common
from sysinv.helm import elastic
LOG = logging.getLogger(__name__)
class KubeStateMetricsHelm(elastic.ElasticBaseHelm):
"""Class to encapsulate helm operations for kube-state-metrics"""
CHART = common.HELM_CHART_KUBESTATEMETRICS
def get_overrides(self, namespace=None):
overrides = {
common.HELM_NS_MONITOR: {}
}
if namespace in self.SUPPORTED_NAMESPACES:
return overrides[namespace]
elif namespace:
raise exception.InvalidHelmNamespace(chart=self.CHART,
namespace=namespace)
else:
return overrides

37
sysinv/sysinv/sysinv/sysinv/helm/logstash.py

@ -0,0 +1,37 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from oslo_log import log as logging
from sysinv.common import exception
from sysinv.helm import common
from sysinv.helm import elastic
LOG = logging.getLogger(__name__)
class LogstashHelm(elastic.ElasticBaseHelm):
"""Class to encapsulate helm operations for logstash"""
CHART = common.HELM_CHART_LOGSTASH
def get_overrides(self, namespace=None):
overrides = {
common.HELM_NS_MONITOR: {
'replicaCount': self._count_hosts_by_label(
common.LABEL_MONITOR_CONTROLLER),
'persistence': {
'storageClass': 'general',
'size': "20Gi"},
}
}
if namespace in self.SUPPORTED_NAMESPACES:
return overrides[namespace]
elif namespace:
raise exception.InvalidHelmNamespace(chart=self.CHART,
namespace=namespace)
else:
return overrides

132
sysinv/sysinv/sysinv/sysinv/helm/metricbeat.py

@ -0,0 +1,132 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from oslo_log import log as logging
from sysinv.common import exception
from sysinv.helm import common
from sysinv.helm import elastic
LOG = logging.getLogger(__name__)
class MetricbeatHelm(elastic.ElasticBaseHelm):
"""Class to encapsulate helm operations for metricbeat"""
CHART = common.HELM_CHART_METRICBEAT
def get_overrides(self, namespace=None):
overrides = {
common.HELM_NS_MONITOR: {
'daemonset': {
'modules': {
'system': self._get_metric_system(),
'kubernetes': self._get_metric_kubernetes(),
},
},
'deployment': {
'modules': {
'kubernetes':
self._get_metric_deployment_kubernetes()
}
}
}
}
if namespace in self.SUPPORTED_NAMESPACES:
return overrides[namespace]
elif namespace:
raise exception.InvalidHelmNamespace(chart=self.CHART,
namespace=namespace)
else:
return overrides
def _get_metric_system(self):
conf = {
"enabled": True,
"config": self._get_metric_module_config()
}
return conf
def _get_metric_module_config(self):
metricsets = [
"cpu",
"load",
"memory",
"network",
"process",
"process_summary",
"core",
"diskio"]
period = "60s"
conf = [
{"module": "system",
"period": period,
"metricsets": metricsets,
"processes": [
".*"
],
"process.include_top_n": None,
"by_cpu": 5,
"by_memory": 5
}
]
return conf
def _get_metric_kubernetes(self):
metricsets = [
"node", "system", "pod", "container", "volume"]
period = "60s"
conf = {
"enabled": True,
"config": [
{
"module": "kubernetes",
"in_cluster": True,
"add_metadata": True,
"metricsets": metricsets,
"period": period,
"host": "${NODE_NAME}",
"hosts": [
"https://${HOSTNAME}:10250"
],
"bearer_token_file":
"/var/run/secrets/kubernetes.io/serviceaccount/token",
"ssl.verification_mode": "none",
"ssl.certificate_authorities": [
"/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
]
}
]
}
return conf
def _get_metric_deployment_kubernetes(self):
metricsets_k8s = [
"state_node",
"state_deployment",
"state_replicaset",
"state_pod",
"state_container",
"event"
]
period = "60s"
conf = {
"enabled": True,
"config": [
{
"module": "kubernetes",
"in_cluster": True,
"add_metadata": True,
"metricsets": metricsets_k8s,
"period": period,
"host": "${NODE_NAME}",
"hosts": [
"stx-kube-state-metrics.monitor.svc.cluster.local:8080"
]
}
]
}
return conf

20
sysinv/sysinv/sysinv/sysinv/helm/monitor_version_check.py

@ -0,0 +1,20 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from sysinv.helm import base
MONITOR_SUPPORTED_VERSIONS = [
'1.0-1',
]
class StxMonitorVersionCheckHelm(base.BaseHelm):
"""Class to provide application version check"""
def _get_supported_versions(self):
return MONITOR_SUPPORTED_VERSIONS
def version_check(self, app_version):
return app_version in self._get_supported_versions()

30
sysinv/sysinv/sysinv/sysinv/helm/nginx_ingress.py

@ -0,0 +1,30 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from oslo_log import log as logging
from sysinv.common import exception
from sysinv.helm import common
from sysinv.helm import elastic
LOG