Browse Source

Leverage heat-container-agent for monitoring

Use the heat-container-agent from a system container.
It means that the docker daemon can be started later.

Pass as a software deployment with the heat-agent the following
software-configurations:
* prometheus-monitoring
** pin prometheus to v1.8.2 since its config is not 2.0.0
   compatible

Add heat-container-agent container image.

Implements: blueprint heat-agent
Related-Bug: #1680900
Change-Id: I084b7fe51eddb7b36c74f9fe76cda37e8b48f646
(cherry picked from commit 273405cb5a)
changes/92/533092/1
Spyros Trigazis 4 years ago
committed by yatin
parent
commit
9aacda8f83
  1. 49
      magnum/drivers/common/image/heat-container-agent/Dockerfile
  2. 373
      magnum/drivers/common/image/heat-container-agent/config.json.template
  3. 5
      magnum/drivers/common/image/heat-container-agent/launch
  4. 4
      magnum/drivers/common/image/heat-container-agent/manifest.json
  5. 116
      magnum/drivers/common/image/heat-container-agent/scripts/50-heat-config-docker-compose
  6. 194
      magnum/drivers/common/image/heat-container-agent/scripts/55-heat-config
  7. 36
      magnum/drivers/common/image/heat-container-agent/scripts/configure_container_agent.sh
  8. 163
      magnum/drivers/common/image/heat-container-agent/scripts/heat-config-notify
  9. 115
      magnum/drivers/common/image/heat-container-agent/scripts/hooks/atomic
  10. 127
      magnum/drivers/common/image/heat-container-agent/scripts/hooks/docker-compose
  11. 96
      magnum/drivers/common/image/heat-container-agent/scripts/hooks/script
  12. 71
      magnum/drivers/common/image/heat-container-agent/scripts/write-os-apply-config-templates.sh
  13. 11
      magnum/drivers/common/image/heat-container-agent/service.template
  14. 10
      magnum/drivers/common/image/heat-container-agent/tmpfiles.template
  15. 139
      magnum/drivers/common/templates/kubernetes/fragments/enable-monitoring.sh
  16. 434
      magnum/drivers/common/templates/kubernetes/fragments/enable-prometheus-monitoring
  17. 16
      magnum/drivers/common/templates/kubernetes/fragments/start-container-agent.sh
  18. 67
      magnum/drivers/common/templates/kubernetes/fragments/write-grafana-service.yaml
  19. 163
      magnum/drivers/common/templates/kubernetes/fragments/write-prometheus-configmap.yaml
  20. 60
      magnum/drivers/common/templates/kubernetes/fragments/write-prometheus-service.yaml
  21. 65
      magnum/drivers/k8s_fedora_atomic_v1/templates/kubemaster.yaml
  22. 65
      magnum/drivers/k8s_fedora_ironic_v1/templates/kubemaster.yaml
  23. 4
      magnum/tests/contrib/copy_instance_logs.sh

49
magnum/drivers/common/image/heat-container-agent/Dockerfile

@ -0,0 +1,49 @@
FROM registry.fedoraproject.org/fedora:rawhide
# Fill out the labels
LABEL name="heat-container-agent" \
maintainer="Spyros Trigazis <strigazi@gmail.com>" \
license="UNKNOWN" \
summary="Heat Container Agent system image" \
version="1.0" \
help="No help" \
architecture="x86_64" \
atomic.type="system" \
distribution-scope="public"
RUN dnf -y --setopt=tsflags=nodocs install \
findutils os-collect-config os-apply-config \
os-refresh-config dib-utils python-pip python-docker-py \
python-yaml python-zaqarclient python2-oslo-log \
python-psutil && dnf clean all
# pip installing dpath as python-dpath is an older version of dpath
# install docker-compose
RUN pip install --no-cache dpath docker-compose
ADD ./scripts/55-heat-config \
/opt/heat-container-agent/scripts/
ADD ./scripts/50-heat-config-docker-compose \
/opt/heat-container-agent/scripts/
ADD ./scripts/hooks/* \
/opt/heat-container-agent/hooks/
ADD ./scripts/heat-config-notify \
/usr/bin/heat-config-notify
RUN chmod 755 /usr/bin/heat-config-notify
ADD ./scripts/configure_container_agent.sh /opt/heat-container-agent/
RUN chmod 700 /opt/heat-container-agent/configure_container_agent.sh
ADD ./scripts/write-os-apply-config-templates.sh /tmp
RUN chmod 700 /tmp/write-os-apply-config-templates.sh
RUN /tmp/write-os-apply-config-templates.sh
COPY manifest.json service.template config.json.template tmpfiles.template /exports/
COPY launch /usr/bin/start-heat-container-agent
# Execution
CMD ["/usr/bin/start-heat-container-agent"]

373
magnum/drivers/common/image/heat-container-agent/config.json.template

@ -0,0 +1,373 @@
{
"hooks": {},
"hostname": "acme",
"linux": {
"namespaces": [
{
"type": "mount"
},
{
"type": "ipc"
},
{
"type": "uts"
}
],
"resources": {
"devices": [
{
"access": "rwm",
"allow": false
}
]
}
},
"mounts": [
{
"type": "bind",
"source": "/srv/magnum",
"destination": "/srv/magnum",
"options": [
"rbind",
"rw",
"rprivate"
]
},
{
"type": "bind",
"source": "/opt/stack/os-config-refresh",
"destination": "/opt/stack/os-config-refresh",
"options": [
"rbind",
"rw",
"rprivate"
]
},
{
"type": "bind",
"source": "/run/systemd",
"destination": "/run/systemd",
"options": [
"rbind",
"ro",
"rprivate"
]
},
{
"type": "bind",
"source": "/etc/",
"destination": "/etc/",
"options": [
"rbind",
"rw",
"rprivate"
]
},
{
"type": "bind",
"source": "/var/lib",
"destination": "/var/lib",
"options": [
"rbind",
"rw",
"rprivate"
]
},
{
"type": "bind",
"source": "/var/run",
"destination": "/var/run",
"options": [
"rbind",
"rw",
"rprivate"
]
},
{
"type": "bind",
"source": "/var/log",
"destination": "/var/log",
"options": [
"rbind",
"rw",
"rprivate"
]
},
{
"type": "bind",
"source": "/tmp",
"destination": "/tmp",
"options": [
"rbind",
"rw",
"rprivate"
]
},
{
"destination": "/proc",
"source": "proc",
"type": "proc"
},
{
"destination": "/dev",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
],
"source": "tmpfs",
"type": "tmpfs"
},
{
"destination": "/dev/pts",
"options": [
"nosuid",
"noexec",
"newinstance",
"ptmxmode=0666",
"mode=0620",
"gid=5"
],
"source": "devpts",
"type": "devpts"
},
{
"destination": "/dev/shm",
"options": [
"nosuid",
"noexec",
"nodev",
"mode=1777",
"size=65536k"
],
"source": "shm",
"type": "tmpfs"
},
{
"destination": "/dev/mqueue",
"options": [
"nosuid",
"noexec",
"nodev"
],
"source": "mqueue",
"type": "mqueue"
},
{
"destination": "/sys",
"options": [
"nosuid",
"noexec",
"nodev",
"ro"
],
"source": "sysfs",
"type": "sysfs"
},
{
"destination": "/sys/fs/cgroup",
"options": [
"nosuid",
"noexec",
"nodev",
"relatime",
"ro"
],
"source": "cgroup",
"type": "cgroup"
}
],
"ociVersion": "0.6.0-dev",
"platform": {
"arch": "amd64",
"os": "linux"
},
"process": {
"args": [
"/usr/bin/start-heat-container-agent"
],
"capabilities": {
"bounding": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_DAC_READ_SEARCH",
"CAP_FOWNER",
"CAP_FSETID",
"CAP_KILL",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETPCAP",
"CAP_LINUX_IMMUTABLE",
"CAP_NET_BIND_SERVICE",
"CAP_NET_BROADCAST",
"CAP_NET_ADMIN",
"CAP_NET_RAW",
"CAP_IPC_LOCK",
"CAP_IPC_OWNER",
"CAP_SYS_MODULE",
"CAP_SYS_RAWIO",
"CAP_SYS_CHROOT",
"CAP_SYS_PTRACE",
"CAP_SYS_PACCT",
"CAP_SYS_ADMIN",
"CAP_SYS_BOOT",
"CAP_SYS_NICE",
"CAP_SYS_RESOURCE",
"CAP_SYS_TIME",
"CAP_SYS_TTY_CONFIG",
"CAP_MKNOD",
"CAP_LEASE",
"CAP_AUDIT_WRITE",
"CAP_AUDIT_CONTROL",
"CAP_SETFCAP",
"CAP_MAC_OVERRIDE",
"CAP_MAC_ADMIN",
"CAP_SYSLOG",
"CAP_WAKE_ALARM",
"CAP_BLOCK_SUSPEND",
"CAP_AUDIT_READ"
],
"permitted": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_DAC_READ_SEARCH",
"CAP_FOWNER",
"CAP_FSETID",
"CAP_KILL",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETPCAP",
"CAP_LINUX_IMMUTABLE",
"CAP_NET_BIND_SERVICE",
"CAP_NET_BROADCAST",
"CAP_NET_ADMIN",
"CAP_NET_RAW",
"CAP_IPC_LOCK",
"CAP_IPC_OWNER",
"CAP_SYS_MODULE",
"CAP_SYS_RAWIO",
"CAP_SYS_CHROOT",
"CAP_SYS_PTRACE",
"CAP_SYS_PACCT",
"CAP_SYS_ADMIN",
"CAP_SYS_BOOT",
"CAP_SYS_NICE",
"CAP_SYS_RESOURCE",
"CAP_SYS_TIME",
"CAP_SYS_TTY_CONFIG",
"CAP_MKNOD",
"CAP_LEASE",
"CAP_AUDIT_WRITE",
"CAP_AUDIT_CONTROL",
"CAP_SETFCAP",
"CAP_MAC_OVERRIDE",
"CAP_MAC_ADMIN",
"CAP_SYSLOG",
"CAP_WAKE_ALARM",
"CAP_BLOCK_SUSPEND",
"CAP_AUDIT_READ"
],
"inheritable": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_DAC_READ_SEARCH",
"CAP_FOWNER",
"CAP_FSETID",
"CAP_KILL",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETPCAP",
"CAP_LINUX_IMMUTABLE",
"CAP_NET_BIND_SERVICE",
"CAP_NET_BROADCAST",
"CAP_NET_ADMIN",
"CAP_NET_RAW",
"CAP_IPC_LOCK",
"CAP_IPC_OWNER",
"CAP_SYS_MODULE",
"CAP_SYS_RAWIO",
"CAP_SYS_CHROOT",
"CAP_SYS_PTRACE",
"CAP_SYS_PACCT",
"CAP_SYS_ADMIN",
"CAP_SYS_BOOT",
"CAP_SYS_NICE",
"CAP_SYS_RESOURCE",
"CAP_SYS_TIME",
"CAP_SYS_TTY_CONFIG",
"CAP_MKNOD",
"CAP_LEASE",
"CAP_AUDIT_WRITE",
"CAP_AUDIT_CONTROL",
"CAP_SETFCAP",
"CAP_MAC_OVERRIDE",
"CAP_MAC_ADMIN",
"CAP_SYSLOG",
"CAP_WAKE_ALARM",
"CAP_BLOCK_SUSPEND",
"CAP_AUDIT_READ"
],
"effective": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_DAC_READ_SEARCH",
"CAP_FOWNER",
"CAP_FSETID",
"CAP_KILL",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETPCAP",
"CAP_LINUX_IMMUTABLE",
"CAP_NET_BIND_SERVICE",
"CAP_NET_BROADCAST",
"CAP_NET_ADMIN",
"CAP_NET_RAW",
"CAP_IPC_LOCK",
"CAP_IPC_OWNER",
"CAP_SYS_MODULE",
"CAP_SYS_RAWIO",
"CAP_SYS_CHROOT",
"CAP_SYS_PTRACE",
"CAP_SYS_PACCT",
"CAP_SYS_ADMIN",
"CAP_SYS_BOOT",
"CAP_SYS_NICE",
"CAP_SYS_RESOURCE",
"CAP_SYS_TIME",
"CAP_SYS_TTY_CONFIG",
"CAP_MKNOD",
"CAP_LEASE",
"CAP_AUDIT_WRITE",
"CAP_AUDIT_CONTROL",
"CAP_SETFCAP",
"CAP_MAC_OVERRIDE",
"CAP_MAC_ADMIN",
"CAP_SYSLOG",
"CAP_WAKE_ALARM",
"CAP_BLOCK_SUSPEND",
"CAP_AUDIT_READ"
]
},
"cwd": "/",
"env": [
"REQUESTS_CA_BUNDLE=$REQUESTS_CA_BUNDLE",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"SYSTEMD_IGNORE_CHROOT=1",
"TERM=xterm"
],
"rlimits": [
{
"hard": 1024,
"soft": 1024,
"type": "RLIMIT_NOFILE"
}
],
"terminal": false,
"user": {}
},
"root": {
"path": "rootfs",
"readonly": true
}
}

5
magnum/drivers/common/image/heat-container-agent/launch

@ -0,0 +1,5 @@
#!/bin/bash
/opt/heat-container-agent/configure_container_agent.sh
exec /usr/bin/os-collect-config --debug

4
magnum/drivers/common/image/heat-container-agent/manifest.json

@ -0,0 +1,4 @@
{
"defaultValues": {},
"version": "1.0"
}

116
magnum/drivers/common/image/heat-container-agent/scripts/50-heat-config-docker-compose

@ -0,0 +1,116 @@
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import json
import logging
import os
import subprocess
import sys
import yaml
CONF_FILE = os.environ.get('HEAT_SHELL_CONFIG',
'/var/run/heat-config/heat-config')
DOCKER_COMPOSE_DIR = os.environ.get(
'HEAT_DOCKER_COMPOSE_WORKING',
'/var/lib/heat-config/heat-config-docker-compose')
DOCKER_COMPOSE_CMD = os.environ.get('HEAT_DOCKER_COMPOSE_CMD',
'docker-compose')
def main(argv=sys.argv):
log = logging.getLogger('heat-config')
handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(
logging.Formatter(
'[%(asctime)s] (%(name)s) [%(levelname)s] %(message)s'))
log.addHandler(handler)
log.setLevel('DEBUG')
if not os.path.exists(CONF_FILE):
log.error('No config file %s' % CONF_FILE)
return 1
if not os.path.isdir(DOCKER_COMPOSE_DIR):
os.makedirs(DOCKER_COMPOSE_DIR, 0o700)
try:
configs = json.load(open(CONF_FILE))
except ValueError:
pass
try:
cleanup_stale_projects(configs)
for c in configs:
write_compose_config(c)
except Exception as e:
log.exception(e)
def cleanup_stale_projects(configs):
def deployments(configs):
for c in configs:
yield c['name']
def compose_projects(compose_dir):
for proj in os.listdir(compose_dir):
if os.path.isfile(
os.path.join(DOCKER_COMPOSE_DIR,
'%s/docker-compose.yml' % proj)):
yield proj
def cleanup_containers(project):
cmd = [
DOCKER_COMPOSE_CMD,
'kill'
]
subproc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = subproc.communicate()
for proj in compose_projects(DOCKER_COMPOSE_DIR):
if proj not in deployments(configs):
proj_dir = os.path.join(DOCKER_COMPOSE_DIR, proj)
os.chdir(proj_dir)
cleanup_containers(proj)
os.remove('%s/docker-compose.yml' % proj_dir)
def write_compose_config(c):
group = c.get('group')
if group != 'docker-compose':
return
def prepare_dir(path):
if not os.path.isdir(path):
os.makedirs(path, 0o700)
compose_conf = c.get('config', '')
if isinstance(compose_conf, dict):
yaml_config = yaml.safe_dump(compose_conf, default_flow_style=False)
else:
yaml_config = compose_conf
proj_dir = os.path.join(DOCKER_COMPOSE_DIR, c['name'])
prepare_dir(proj_dir)
fn = os.path.join(proj_dir, 'docker-compose.yml')
with os.fdopen(os.open(fn, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600),
'w') as f:
f.write(yaml_config.encode('utf-8'))
if __name__ == '__main__':
sys.exit(main(sys.argv))

194
magnum/drivers/common/image/heat-container-agent/scripts/55-heat-config

@ -0,0 +1,194 @@
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import json
import logging
import os
import shutil
import stat
import subprocess
import sys
import requests
HOOKS_DIR_PATHS = (
os.environ.get('HEAT_CONFIG_HOOKS'),
'/usr/libexec/heat-config/hooks',
'/var/lib/heat-config/hooks',
)
CONF_FILE = os.environ.get('HEAT_SHELL_CONFIG',
'/var/run/heat-config/heat-config')
DEPLOYED_DIR = os.environ.get('HEAT_CONFIG_DEPLOYED',
'/var/lib/heat-config/deployed')
OLD_DEPLOYED_DIR = os.environ.get('HEAT_CONFIG_DEPLOYED_OLD',
'/var/run/heat-config/deployed')
HEAT_CONFIG_NOTIFY = os.environ.get('HEAT_CONFIG_NOTIFY',
'heat-config-notify')
def main(argv=sys.argv):
log = logging.getLogger('heat-config')
handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(
logging.Formatter(
'[%(asctime)s] (%(name)s) [%(levelname)s] %(message)s'))
log.addHandler(handler)
log.setLevel('DEBUG')
if not os.path.exists(CONF_FILE):
log.error('No config file %s' % CONF_FILE)
return 1
conf_mode = stat.S_IMODE(os.lstat(CONF_FILE).st_mode)
if conf_mode != 0o600:
os.chmod(CONF_FILE, 0o600)
if not os.path.isdir(DEPLOYED_DIR):
if DEPLOYED_DIR != OLD_DEPLOYED_DIR and os.path.isdir(OLD_DEPLOYED_DIR):
log.debug('Migrating deployed state from %s to %s' %
(OLD_DEPLOYED_DIR, DEPLOYED_DIR))
shutil.move(OLD_DEPLOYED_DIR, DEPLOYED_DIR)
else:
os.makedirs(DEPLOYED_DIR, 0o700)
try:
configs = json.load(open(CONF_FILE))
except ValueError:
pass
else:
for c in configs:
try:
invoke_hook(c, log)
except Exception as e:
log.exception(e)
def find_hook_path(group):
# sanitise the group to get an alphanumeric hook file name
hook = "".join(
x for x in group if x == '-' or x == '_' or x.isalnum())
for h in HOOKS_DIR_PATHS:
if not h or not os.path.exists(h):
continue
hook_path = os.path.join(h, hook)
if os.path.exists(hook_path):
return hook_path
def invoke_hook(c, log):
# Sanitize input values (bug 1333992). Convert all String
# inputs to strings if they're not already
hot_inputs = c.get('inputs', [])
for hot_input in hot_inputs:
if hot_input.get('type', None) == 'String' and \
not isinstance(hot_input['value'], basestring):
hot_input['value'] = str(hot_input['value'])
iv = dict((i['name'], i['value']) for i in c['inputs'])
# The group property indicates whether it is softwarecomponent or
# plain softwareconfig
# If it is softwarecomponent, pick up a property config to invoke
# according to deploy_action
group = c.get('group')
if group == 'component':
found = False
action = iv.get('deploy_action')
config = c.get('config')
configs = config.get('configs')
if configs:
for cfg in configs:
if action in cfg['actions']:
c['config'] = cfg['config']
c['group'] = cfg['tool']
found = True
break
if not found:
log.warn('Skipping group %s, no valid script is defined'
' for deploy action %s' % (group, action))
return
# check to see if this config is already deployed
deployed_path = os.path.join(DEPLOYED_DIR, '%s.json' % c['id'])
if os.path.exists(deployed_path):
log.warn('Skipping config %s, already deployed' % c['id'])
log.warn('To force-deploy, rm %s' % deployed_path)
return
signal_data = {}
hook_path = find_hook_path(c['group'])
if not hook_path:
log.warn('Skipping group %s with no hook script %s' % (
c['group'], hook_path))
return
# write out config, which indicates it is deployed regardless of
# subsequent hook success
with os.fdopen(os.open(
deployed_path, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
json.dump(c, f, indent=2)
log.debug('Running %s < %s' % (hook_path, deployed_path))
subproc = subprocess.Popen([hook_path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = subproc.communicate(input=json.dumps(c))
log.info(stdout)
log.debug(stderr)
if subproc.returncode:
log.error("Error running %s. [%s]\n" % (
hook_path, subproc.returncode))
else:
log.info('Completed %s' % hook_path)
try:
if stdout:
signal_data = json.loads(stdout)
except ValueError:
signal_data = {
'deploy_stdout': stdout,
'deploy_stderr': stderr,
'deploy_status_code': subproc.returncode,
}
signal_data_path = os.path.join(DEPLOYED_DIR, '%s.notify.json' % c['id'])
# write out notify data for debugging
with os.fdopen(os.open(
signal_data_path, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
json.dump(signal_data, f, indent=2)
log.debug('Running %s %s < %s' % (
HEAT_CONFIG_NOTIFY, deployed_path, signal_data_path))
subproc = subprocess.Popen([HEAT_CONFIG_NOTIFY, deployed_path],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = subproc.communicate(input=json.dumps(signal_data))
log.info(stdout)
if subproc.returncode:
log.error(
"Error running heat-config-notify. [%s]\n" % subproc.returncode)
log.error(stderr)
else:
log.debug(stderr)
if __name__ == '__main__':
sys.exit(main(sys.argv))

36
magnum/drivers/common/image/heat-container-agent/scripts/configure_container_agent.sh

@ -0,0 +1,36 @@
#!/bin/bash
set -eux
# initial /etc/os-collect-config.conf
cat <<EOF >/etc/os-collect-config.conf
[DEFAULT]
command = os-refresh-config
EOF
# os-refresh-config scripts directory
# This moves to /usr/libexec/os-refresh-config in later releases
# Be sure to have this dir mounted and created by config.json and tmpfiles
orc_scripts=/opt/stack/os-config-refresh
for d in pre-configure.d configure.d migration.d post-configure.d; do
install -m 0755 -o root -g root -d $orc_scripts/$d
done
# os-refresh-config script for running os-apply-config
cat <<EOF >$orc_scripts/configure.d/20-os-apply-config
#!/bin/bash
set -ue
exec os-apply-config
EOF
chmod 700 $orc_scripts/configure.d/20-os-apply-config
cp /opt/heat-container-agent/scripts/55-heat-config $orc_scripts/configure.d/55-heat-config
chmod 700 $orc_scripts/configure.d/55-heat-config
cp /opt/heat-container-agent/scripts/50-heat-config-docker-compose $orc_scripts/configure.d/50-heat-config-docker-compose
chmod 700 $orc_scripts/configure.d/50-heat-config-docker-compose
mkdir -p /var/lib/heat-config/hooks
cp /opt/heat-container-agent/hooks/* /var/lib/heat-config/hooks/
chmod 755 /var/lib/heat-config/hooks/atomic
chmod 755 /var/lib/heat-config/hooks/docker-compose
chmod 755 /var/lib/heat-config/hooks/script

163
magnum/drivers/common/image/heat-container-agent/scripts/heat-config-notify

@ -0,0 +1,163 @@
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import json
import logging
import os
import sys
import requests
try:
from heatclient import client as heatclient
except ImportError:
heatclient = None
try:
from keystoneclient.v3 import client as ksclient
except ImportError:
ksclient = None
try:
from zaqarclient.queues.v1 import client as zaqarclient
except ImportError:
zaqarclient = None
MAX_RESPONSE_SIZE = 950000
def init_logging():
log = logging.getLogger('heat-config-notify')
handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(
logging.Formatter(
'[%(asctime)s] (%(name)s) [%(levelname)s] %(message)s'))
log.addHandler(handler)
log.setLevel('DEBUG')
return log
def trim_response(response, trimmed_values=None):
"""Trim selected values from response.
Makes given response smaller or the same size as MAX_RESPONSE_SIZE by
trimming given trimmed_values from response dict from the left side
(beginning). Returns trimmed and serialized JSON response itself.
"""
trimmed_values = trimmed_values or ('deploy_stdout', 'deploy_stderr')
str_response = json.dumps(response, ensure_ascii=True, encoding='utf-8')
len_total = len(str_response)
offset = MAX_RESPONSE_SIZE - len_total
if offset >= 0:
return str_response
offset = abs(offset)
for key in trimmed_values:
len_value = len(response[key])
cut = int(round(float(len_value) / len_total * offset))
response[key] = response[key][cut:]
str_response = json.dumps(response, ensure_ascii=True, encoding='utf-8')
return str_response
def main(argv=sys.argv, stdin=sys.stdin):
log = init_logging()
usage = ('Usage:\n heat-config-notify /path/to/config.json '
'< /path/to/signal_data.json')
if len(argv) < 2:
log.error(usage)
return 1
try:
signal_data = json.load(stdin)
except ValueError:
log.warn('No valid json found on stdin')
signal_data = {}
conf_file = argv[1]
if not os.path.exists(conf_file):
log.error('No config file %s' % conf_file)
log.error(usage)
return 1
c = json.load(open(conf_file))
iv = dict((i['name'], i['value']) for i in c['inputs'])
if 'deploy_signal_id' in iv:
sigurl = iv.get('deploy_signal_id')
sigverb = iv.get('deploy_signal_verb', 'POST')
log.debug('Signaling to %s via %s' % (sigurl, sigverb))
# we need to trim log content because Heat response size is limited
# by max_json_body_size = 1048576
str_signal_data = trim_response(signal_data)
if sigverb == 'PUT':
r = requests.put(sigurl, data=str_signal_data,
headers={'content-type': 'application/json'})
else:
r = requests.post(sigurl, data=str_signal_data,
headers={'content-type': 'application/json'})
log.debug('Response %s ' % r)
if 'deploy_queue_id' in iv:
queue_id = iv.get('deploy_queue_id')
log.debug('Signaling to queue %s' % (queue_id,))
ks = ksclient.Client(
auth_url=iv['deploy_auth_url'],
user_id=iv['deploy_user_id'],
password=iv['deploy_password'],
project_id=iv['deploy_project_id'])
endpoint = ks.service_catalog.url_for(
service_type='messaging', endpoint_type='publicURL')
conf = {
'auth_opts': {
'backend': 'keystone',
'options': {
'os_auth_token': ks.auth_token,
'os_project_id': iv['deploy_project_id'],
}
}
}
cli = zaqarclient.Client(endpoint, conf=conf, version=1.1)
queue = cli.queue(queue_id)
r = queue.post({'body': signal_data, 'ttl': 600})
log.debug('Response %s ' % r)
elif 'deploy_auth_url' in iv:
ks = ksclient.Client(
auth_url=iv['deploy_auth_url'],
user_id=iv['deploy_user_id'],
password=iv['deploy_password'],
project_id=iv['deploy_project_id'])
endpoint = ks.service_catalog.url_for(
service_type='orchestration', endpoint_type='publicURL')
log.debug('Signalling to %s' % endpoint)
heat = heatclient.Client(
'1', endpoint, token=ks.auth_token)
r = heat.resources.signal(
iv.get('deploy_stack_id'),
iv.get('deploy_resource_name'),
data=signal_data)
log.debug('Response %s ' % r)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv, sys.stdin))

115
magnum/drivers/common/image/heat-container-agent/scripts/hooks/atomic

@ -0,0 +1,115 @@
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import json
import logging
import os
import subprocess
import sys
WORKING_DIR = os.environ.get('HEAT_ATOMIC_WORKING',
'/var/lib/heat-config/heat-config-atomic')
ATOMIC_CMD = os.environ.get('HEAT_ATOMIC_CMD', 'atomic')
def prepare_dir(path):
if not os.path.isdir(path):
os.makedirs(path, 0o700)
def build_response(deploy_stdout, deploy_stderr, deploy_status_code):
return {
'deploy_stdout': deploy_stdout,
'deploy_stderr': deploy_stderr,
'deploy_status_code': deploy_status_code,
}
def main(argv=sys.argv):
log = logging.getLogger('heat-config')
handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(
logging.Formatter(
'[%(asctime)s] (%(name)s) [%(levelname)s] %(message)s'))
log.addHandler(handler)
log.setLevel('DEBUG')
c = json.load(sys.stdin)
prepare_dir(WORKING_DIR)
os.chdir(WORKING_DIR)
env = os.environ.copy()
input_values = dict((i['name'], i['value']) for i in c['inputs'])
stdout, stderr = {}, {}
config = c.get('config', '')
if not config:
log.debug("No 'config' input found, nothing to do.")
json.dump(build_response(stdout, stderr, 0), sys.stdout)
return
atomic_subcmd = config.get('command', 'install')
image = config.get('image')
if input_values.get('deploy_action') == 'DELETE':
cmd = [
'uninstall',
atomic_subcmd,
image
]
subproc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, env=env)
stdout, stderr = subproc.communicate()
json.dump(build_response(stdout, stderr, subproc.returncode), sys.stdout)
return
install_cmd = config.get('installcmd', '')
name = config.get('name', c.get('id'))
cmd = [
ATOMIC_CMD,
atomic_subcmd,
image,
'-n %s' % name
]
if atomic_subcmd == 'install':
cmd.extend([install_cmd])
privileged = config.get('privileged', False)
if atomic_subcmd == 'run' and privileged:
cmd.extend(['--spc'])
log.debug('Running %s' % cmd)
subproc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = subproc.communicate()
log.debug(stdout)
log.debug(stderr)
if subproc.returncode:
log.error("Error running %s. [%s]\n" % (cmd, subproc.returncode))
else:
log.debug('Completed %s' % cmd)
json.dump(build_response(stdout, stderr, subproc.returncode), sys.stdout)
if __name__ == '__main__':
sys.exit(main(sys.argv))

127
magnum/drivers/common/image/heat-container-agent/scripts/hooks/docker-compose

@ -0,0 +1,127 @@
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import ast
import dpath
import json
import logging
import os
import subprocess
import sys
import yaml
WORKING_DIR = os.environ.get('HEAT_DOCKER_COMPOSE_WORKING',
'/var/lib/heat-config/heat-config-docker-compose')
DOCKER_COMPOSE_CMD = os.environ.get('HEAT_DOCKER_COMPOSE_CMD',
'docker-compose')
def prepare_dir(path):
if not os.path.isdir(path):
os.makedirs(path, 0o700)
def write_input_file(file_path, content):
prepare_dir(os.path.dirname(file_path))
with os.fdopen(os.open(
file_path, os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f:
f.write(content.encode('utf-8'))
def build_response(deploy_stdout, deploy_stderr, deploy_status_code):
return {
'deploy_stdout': deploy_stdout,
'deploy_stderr': deploy_stderr,
'deploy_status_code': deploy_status_code,
}
def main(argv=sys.argv):
log = logging.getLogger('heat-config')
handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(
logging.Formatter(
'[%(asctime)s] (%(name)s) [%(levelname)s] %(message)s'))
log.addHandler(handler)
log.setLevel('DEBUG')
c = json.load(sys.stdin)
input_values = dict((i['name'], i['value']) for i in c['inputs'])
proj = os.path.join(WORKING_DIR, c.get('name'))
prepare_dir(proj)
stdout, stderr = {}, {}
if input_values.get('deploy_action') == 'DELETE':
json.dump(build_response(stdout, stderr, 0), sys.stdout)
return
config = c.get('config', '')
if not config:
log.debug("No 'config' input found, nothing to do.")
json.dump(build_response(stdout, stderr, 0), sys.stdout)
return
# convert config to dict
if not isinstance(config, dict):
config = ast.literal_eval(json.dumps(yaml.safe_load(config)))
os.chdir(proj)
compose_env_files = []
for value in dpath.util.values(config, '*/env_file'):
if isinstance(value, list):
compose_env_files.extend(value)
elif isinstance(value, basestring):
compose_env_files.extend([value])
input_env_files = {}
if input_values.get('env_files'):
input_env_files = dict(
(i['file_name'], i['content'])
for i in ast.literal_eval(input_values.get('env_files')))
for file in compose_env_files:
if file in input_env_files.keys():
write_input_file(file, input_env_files.get(file))
cmd = [
DOCKER_COMPOSE_CMD,
'up',
'-d',
'--no-build',
]
log.debug('Running %s' % cmd)
subproc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = subproc.communicate()
log.debug(stdout)
log.debug(stderr)
if subproc.returncode:
log.error("Error running %s. [%s]\n" % (cmd, subproc.returncode))
else:
log.debug('Completed %s' % cmd)
json.dump(build_response(stdout, stderr, subproc.returncode), sys.stdout)
if __name__ == '__main__':
sys.exit(main(sys.argv))

96
magnum/drivers/common/image/heat-container-agent/scripts/hooks/script

@ -0,0 +1,96 @@
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import json
import logging
import os
import subprocess
import sys
WORKING_DIR = os.environ.get('HEAT_SCRIPT_WORKING',
'/var/lib/heat-config/heat-config-script')
OUTPUTS_DIR = os.environ.get('HEAT_SCRIPT_OUTPUTS',
'/var/run/heat-config/heat-config-script')
def prepare_dir(path):
if not os.path.isdir(path):
os.makedirs(path, 0o700)
def main(argv=sys.argv):
log = logging.getLogger('heat-config')
handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(
logging.Formatter(
'[%(asctime)s] (%(name)s) [%(levelname)s] %(message)s'))
log.addHandler(handler)
log.setLevel('DEBUG')
prepare_dir(OUTPUTS_DIR)
prepare_dir(WORKING_DIR)
os.chdir(WORKING_DIR)
c = json.load(sys.stdin)
env = os.environ.copy()
for input in c['inputs']:
input_name = input['name']
value = input.get('value', '')
if isinstance(value, dict) or isinstance(value, list):
env[input_name] = json.dumps(value)
else:
env[input_name] = value
log.info('%s=%s' % (input_name, env[input_name]))
fn = os.path.join(WORKING_DIR, c['id'])
heat_outputs_path = os.path.join(OUTPUTS_DIR, c['id'])
env['heat_outputs_path'] = heat_outputs_path
with os.fdopen(os.open(fn, os.O_CREAT | os.O_WRONLY, 0o700), 'w') as f:
f.write(c.get('config', '').encode('utf-8'))
log.debug('Running %s' % fn)
subproc = subprocess.Popen([fn], stdout=subprocess.PIPE,
stderr=subprocess.PIPE, env=env)
stdout, stderr = subproc.communicate()
log.info(stdout)
log.debug(stderr)
if subproc.returncode:
log.error("Error running %s. [%s]\n" % (fn, subproc.returncode))
else:
log.info('Completed %s' % fn)
response = {}
for output in c.get('outputs') or []:
output_name = output['name']
try:
with open('%s.%s' % (heat_outputs_path, output_name)) as out:
response[output_name] = out.read()
except IOError:
pass
response.update({
'deploy_stdout': stdout,
'deploy_stderr': stderr,
'deploy_status_code': subproc.returncode,
})
json.dump(response, sys.stdout)
if __name__ == '__main__':
sys.exit(main(sys.argv))

71
magnum/drivers/common/image/heat-container-agent/scripts/write-os-apply-config-templates.sh

@ -0,0 +1,71 @@
#!/bin/bash
set -eux
# os-apply-config templates directory
oac_templates=/usr/libexec/os-apply-config/templates
mkdir -p $oac_templates/etc
# template for building os-collect-config.conf for polling heat
cat <<EOF >$oac_templates/etc/os-collect-config.conf
[DEFAULT]
{{^os-collect-config.command}}
command = os-refresh-config
{{/os-collect-config.command}}
{{#os-collect-config}}
{{#command}}
command = {{command}}
{{/command}}
{{#polling_interval}}
polling_interval = {{polling_interval}}
{{/polling_interval}}
{{#cachedir}}
cachedir = {{cachedir}}
{{/cachedir}}
{{#collectors}}
collectors = {{.}}
{{/collectors}}
{{#cfn}}
[cfn]
{{#metadata_url}}
metadata_url = {{metadata_url}}
{{/metadata_url}}
stack_name = {{stack_name}}
secret_access_key = {{secret_access_key}}
access_key_id = {{access_key_id}}
path = {{path}}
{{/cfn}}
{{#heat}}
[heat]
auth_url = {{auth_url}}
user_id = {{user_id}}
password = {{password}}
project_id = {{project_id}}
stack_id = {{stack_id}}
resource_name = {{resource_name}}
{{/heat}}
{{#zaqar}}
[zaqar]
auth_url = {{auth_url}}
user_id = {{user_id}}
password = {{password}}
project_id = {{project_id}}
queue_id = {{queue_id}}
{{/zaqar}}
{{#request}}
[request]
{{#metadata_url}}
metadata_url = {{metadata_url}}
{{/metadata_url}}
{{/request}}
{{/os-collect-config}}
EOF
mkdir -p $oac_templates/var/run/heat-config
# template for writing heat deployments data to a file
echo "{{deployments}}" > $oac_templates/var/run/heat-config/heat-config

11
magnum/drivers/common/image/heat-container-agent/service.template

@ -0,0 +1,11 @@
[Unit]
Description=Heat Container Agent system image
[Service]
ExecStart=$EXEC_START
ExecStop=$EXEC_STOP
Restart=on-failure
WorkingDirectory=$DESTDIR
[Install]
WantedBy=multi-user.target

10
magnum/drivers/common/image/heat-container-agent/tmpfiles.template

@ -0,0 +1,10 @@
d /var/lib/heat-container-agent - - - - -
Z /var/lib/heat-container-agent - - - - -
d /var/run/heat-config - - - - -
Z /var/run/heat-config - - - - -
d /var/run/os-collect-config - - - - -
Z /var/run/os-collect-config - - - - -
d /opt/stack/os-config-refresh - - - - -
Z /opt/stack/os-config-refresh - - - - -
d /srv/magnum - - - - -
Z /srv/magnum - - - - -

139
magnum/drivers/common/templates/kubernetes/fragments/enable-monitoring.sh

@ -1,139 +0,0 @@
#!/bin/bash
. /etc/sysconfig/heat-params
if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "false" ]; then
exit 0
fi
function writeFile {
# $1 is filename
# $2 is file content
[ -f ${1} ] || {
echo "Writing File: $1"
mkdir -p $(dirname ${1})
cat << EOF > ${1}
$2
EOF
}
}
KUBE_MON_BIN=/usr/local/bin/kube-enable-monitoring
KUBE_MON_SERVICE=/etc/systemd/system/kube-enable-monitoring.service
GRAFANA_DEF_DASHBOARDS="/var/lib/grafana/dashboards"
GRAFANA_DEF_DASHBOARD_FILE=$GRAFANA_DEF_DASHBOARDS"/default.json"
# Write the binary for enable-monitoring
KUBE_MON_BIN_CONTENT='''#!/bin/sh
until curl -sf "http://127.0.0.1:8080/healthz"
do
echo "Waiting for Kubernetes API..."
sleep 5
done
# Check if all resources exist already before creating them
# Check if configmap Prometheus exists
kubectl get configmap prometheus -n kube-system
if [ "$?" != "0" ] && \
[ -f "/srv/kubernetes/monitoring/prometheusConfigMap.yaml" ]; then
kubectl create -f /srv/kubernetes/monitoring/prometheusConfigMap.yaml
fi
# Check if deployment and service Prometheus exist
kubectl get service prometheus -n kube-system | kubectl get deployment prometheus -n kube-system
if [ "${PIPESTATUS[0]}" != "0" ] && [ "${PIPESTATUS[1]}" != "0" ] && \
[ -f "/srv/kubernetes/monitoring/prometheusService.yaml" ]; then
kubectl create -f /srv/kubernetes/monitoring/prometheusService.yaml
fi
# Check if configmap graf-dash exists
kubectl get configmap graf-dash -n kube-system
if [ "$?" != "0" ] && \
[ -f '''$GRAFANA_DEF_DASHBOARD_FILE''' ]; then
kubectl create configmap graf-dash --from-file='''$GRAFANA_DEF_DASHBOARD_FILE''' -n kube-system
fi
# Check if deployment and service Grafana exist
kubectl get service grafana -n kube-system | kubectl get deployment grafana -n kube-system
if [ "${PIPESTATUS[0]}" != "0" ] && [ "${PIPESTATUS[1]}" != "0" ] && \
[ -f "/srv/kubernetes/monitoring/grafanaService.yaml" ]; then
kubectl create -f /srv/kubernetes/monitoring/grafanaService.yaml
fi
# Wait for Grafana pod and then inject data source
while true
do
echo "Waiting for Grafana pod to be up and Running"
if [ "$(kubectl get po -n kube-system -l name=grafana -o jsonpath={..phase})" = "Running" ]; then
break
fi
sleep 2
done
# Which node is running Grafana
NODE_IP=`kubectl get po -n kube-system -o jsonpath={.items[0].status.hostIP} -l name=grafana`
PROM_SERVICE_IP=`kubectl get svc prometheus --namespace kube-system -o jsonpath={..clusterIP}`
# The Grafana pod might be running but the app might still be initiating
echo "Check if Grafana is ready..."
curl --user admin:$ADMIN_PASSWD -X GET http://$NODE_IP:3000/api/datasources/1
until [ $? -eq 0 ]
do
sleep 2
curl --user admin:$ADMIN_PASSWD -X GET http://$NODE_IP:3000/api/datasources/1
done
# Inject Prometheus datasource into Grafana
while true
do
INJECT=`curl --user admin:$ADMIN_PASSWD -X POST \
-H "Content-Type: application/json;charset=UTF-8" \
--data-binary '''"'"'''{"name":"k8sPrometheus","isDefault":true,
"type":"prometheus","url":"http://'''"'"'''$PROM_SERVICE_IP'''"'"''':9090","access":"proxy"}'''"'"'''\
"http://$NODE_IP:3000/api/datasources/"`
if [[ "$INJECT" = *"Datasource added"* ]]; then
echo "Prometheus datasource injected into Grafana"
break
fi
echo "Trying to inject Prometheus datasource into Grafana - "$INJECT
done
'''
writeFile $KUBE_MON_BIN "$KUBE_MON_BIN_CONTENT"
# Write the monitoring service
KUBE_MON_SERVICE_CONTENT='''[Unit]
Requires=kubelet.service
[Service]
Type=oneshot
Environment=HOME=/root
EnvironmentFile=-/etc/kubernetes/config
ExecStart='''${KUBE_MON_BIN}'''
[Install]
WantedBy=multi-user.target
'''
writeFile $KUBE_MON_SERVICE "$KUBE_MON_SERVICE_CONTENT"
chown root:root ${KUBE_MON_BIN}
chmod 0755 ${KUBE_MON_BIN}
chown root:root ${KUBE_MON_SERVICE}
chmod 0644 ${KUBE_MON_SERVICE}
# Download the default JSON Grafana dashboard
# Not a crucial step, so allow it to fail
# TODO: this JSON should be passed into the minions as gzip in cloud-init
GRAFANA_DASHB_URL="https://grafana.net/api/dashboards/1621/revisions/1/download"
mkdir -p $GRAFANA_DEF_DASHBOARDS
curl $GRAFANA_DASHB_URL -o $GRAFANA_DEF_DASHBOARD_FILE || echo "Failed to fetch default Grafana dashboard"
if [ -f $GRAFANA_DEF_DASHBOARD_FILE ]; then
sed -i -- 's|${DS_PROMETHEUS}|k8sPrometheus|g' $GRAFANA_DEF_DASHBOARD_FILE
fi
# Launch the monitoring service
systemctl enable kube-enable-monitoring
systemctl start --no-block kube-enable-monitoring

434
magnum/drivers/common/templates/kubernetes/fragments/enable-prometheus-monitoring

@ -0,0 +1,434 @@
#!/bin/bash
. /etc/sysconfig/heat-params
function writeFile {
# $1 is filename
# $2 is file content
[ -f ${1} ] || {
echo "Writing File: $1"
mkdir -p $(dirname ${1})
cat << EOF > ${1}
$2
EOF
}
}
prometheusConfigMap_file=/srv/magnum/kubernetes/monitoring/prometheusConfigMap.yaml
[ -f ${prometheusConfigMap_file} ] || {
echo "Writing File: $prometheusConfigMap_file"
mkdir -p $(dirname ${prometheusConfigMap_file})
# NOTE: EOF needs to be in quotes in order to not escape the $ characters
cat << 'EOF' > ${prometheusConfigMap_file}
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus
namespace: kube-system
data:
prometheus.yml: |
global:
scrape_interval: 10s
scrape_timeout: 10s
evaluation_interval: 10s
scrape_configs:
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kubernetes-services'
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- job_name: 'kubernetes-node-exporter'
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_role]
action: replace
target_label: kubernetes_role
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
EOF
}
prometheusService_file=/srv/magnum/kubernetes/monitoring/prometheusService.yaml
prometheusService_content=$(cat <<EOF
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: 'true'
labels:
name: prometheus
name: prometheus
namespace: kube-system
spec:
selector:
app: prometheus
type: NodePort
ports:
- name: prometheus
protocol: TCP
port: 9090
nodePort: 30900
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: prometheus
namespace: kube-system
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
template:
metadata:
name: prometheus
labels:
app: prometheus
spec:
containers:
- name: prometheus
image: ${CONTAINER_INFRA_PREFIX:-docker.io/prom/}prometheus:v1.8.2
args:
- '-storage.local.retention=6h'
- '-storage.local.memory-chunks=500000'
- '-config.file=/etc/prometheus/prometheus.yml'
ports:
- name: web
containerPort: 9090
hostPort: 9090
volumeMounts:
- name: config-volume
mountPath: /etc/prometheus
volumes:
- name: config-volume
configMap:
name: prometheus
EOF
)
writeFile $prometheusService_file "$prometheusService_content"
grafanaService_file=/srv/magnum/kubernetes/monitoring/grafanaService.yaml
grafanaService_content=$(cat <<EOF
apiVersion: v1
kind: Service
metadata:
labels:
name: node
role: service
name: grafana
namespace: kube-system
spec:
type: "NodePort"
ports:
- port: 3000
targetPort: 3000
nodePort: 30603
selector:
grafana: "true"
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: grafana
namespace: kube-system
spec:
replicas: 1
template:
metadata:
labels:
name: grafana
grafana: "true"
role: db
spec:
containers:
- image: ${CONTAINER_INFRA_PREFIX:-docker.io/grafana/}grafana
imagePullPolicy: Always
name: grafana
env:
- name: GF_SECURITY_ADMIN_PASSWORD
value: $ADMIN_PASSWD
- name: GF_DASHBOARDS_JSON_ENABLED
value: "true"
- name: GF_DASHBOARDS_JSON_PATH
value: /var/lib/grafana/dashboards
resources:
# keep request = limit to keep this container in guaranteed class
limits:
cpu: 100m
memory: 200Mi
requests:
cpu: 100m
memory: 200Mi
volumeMounts:
- name: default-dashboard
mountPath: /var/lib/grafana/dashboards
ports:
- containerPort: 3000
hostPort: 3000
volumes:
- name: default-dashboard
configMap:
name: graf-dash
EOF
)
writeFile $grafanaService_file "$grafanaService_content"
. /etc/sysconfig/heat-params
if [ "$(echo $PROMETHEUS_MONITORING | tr '[:upper:]' '[:lower:]')" = "false" ]; then
exit 0
fi
PROMETHEUS_MON_BASE_DIR="/srv/magnum/kubernetes/monitoring"
KUBE_MON_BIN=${PROMETHEUS_MON_BASE_DIR}"/bin/kube-enable-monitoring"
KUBE_MON_SERVICE="/etc/systemd/system/kube-enable-monitoring.service"
GRAFANA_DEF_DASHBOARDS=${PROMETHEUS_MON_BASE_DIR}"/dashboards"
GRAFANA_DEF_DASHBOARD_FILE=$GRAFANA_DEF_DASHBOARDS"/default.json"
# Write the binary for enable-monitoring
KUBE_MON_BIN_CONTENT='''#!/bin/sh
until curl -sf "http://127.0.0.1:8080/healthz"
do
echo "Waiting for Kubernetes API..."
sleep 5
done
# Check if all resources exist already before creating them
# Check if configmap Prometheus exists
kubectl get configmap prometheus -n kube-system
if [ "$?" != "0" ] && \
[ -f "'''${PROMETHEUS_MON_BASE_DIR}'''/prometheusConfigMap.yaml" ]; then
kubectl create -f '''${PROMETHEUS_MON_BASE_DIR}'''/prometheusConfigMap.yaml
fi
# Check if deployment and service Prometheus exist
kubectl get service prometheus -n kube-system | kubectl get deployment prometheus -n kube-system
if [ "${PIPESTATUS[0]}" != "0" ] && [ "${PIPESTATUS[1]}" != "0" ] && \
[ -f "'''${PROMETHEUS_MON_BASE_DIR}'''/prometheusService.yaml" ]; then
kubectl create -f '''${PROMETHEUS_MON_BASE_DIR}'''/prometheusService.yaml
fi
# Check if configmap graf-dash exists
kubectl get configmap graf-dash -n kube-system
if [ "$?" != "0" ] && \
[ -f '''$GRAFANA_DEF_DASHBOARD_FILE''' ]; then
kubectl create configmap graf-dash --from-file='''$GRAFANA_DEF_DASHBOARD_FILE''' -n kube-system
fi
# Check if deployment and service Grafana exist
kubectl get service grafana -n kube-system | kubectl get deployment grafana -n kube-system
if [ "${PIPESTATUS[0]}" != "0" ] && [ "${PIPESTATUS[1]}" != "0" ] && \
[ -f "'''${PROMETHEUS_MON_BASE_DIR}'''/grafanaService.yaml" ]; then
kubectl create -f '''${PROMETHEUS_MON_BASE_DIR}'''/grafanaService.yaml
fi
# Wait for Grafana pod and then inject data source
while true
do
echo "Waiting for Grafana pod to be up and Running"
if [ "$(kubectl get po -n kube-system -l name=grafana -o jsonpath={..phase})" = "Running" ]; then
break
fi
sleep 2
done
# Which node is running Grafana
NODE_IP=`kubectl get po -n kube-system -o jsonpath={.items[0].status.hostIP} -l name=grafana`
PROM_SERVICE_IP=`kubectl get svc prometheus --namespace kube-system -o jsonpath={..clusterIP}`