Containerized Openstack Monitoring Solution

Change-Id: I66ea0711dd0319c1153a13b159dc5be6f7a7016c
2016-12-29 04:12:01 +03:00
parent f812999925
commit 013d072f2b
45 changed files with 14200 additions and 1 deletions
--- a/doc/source/methodologies/index.rst
+++ b/doc/source/methodologies/index.rst
@@ -7,7 +7,8 @@ Methodologies
 =======================
 .. toctree::
-    :maxdepth: 2
+    :maxdepth: 4
    tools
    hyper-scale
    monitoring/index
--- a/doc/source/methodologies/monitoring/configs/ccp/ccp.yaml
+++ b/doc/source/methodologies/monitoring/configs/ccp/ccp.yaml
@@ -0,0 +1,15 @@
 builder:
  push: true
  no_cache: false
 registry:
  address: "172.20.8.35:5000/env-1"
 repositories:
  skip_empty: True
 kubernetes:
  server: http://172.20.9.234:8080
 ---
 !include
 - versions.yaml
 - topology.yaml
 - configs.yaml
 - repos.yaml
--- a/doc/source/methodologies/monitoring/configs/ccp/configs.yaml
+++ b/doc/source/methodologies/monitoring/configs/ccp/configs.yaml
@@ -0,0 +1,38 @@
 configs:
  private_interface: p1p1.602
  public_interface: p1p1.602
  ingress:
    enabled: true
  glance:
    bootstrap:
      enable: true
 #  nova:
 #    allocation_ratio:
 #      cpu: 16.0
  neutron:
    physnets:
      - name: "physnet1"
        bridge_name: "br-ex"
        interface: "p1p1.649"
        flat: true
        vlan_range: false
    bootstrap:
      internal:
        enable: true
      external:
        enable: true
        net_name: ext-net
        subnet_name: ext-subnet
        physnet: physnet1
        network: 10.144.0.0/12
        gateway: 10.144.0.1
        nameserver: 10.144.0.1
        pool:
          start: 10.144.1.0
          end: 10.159.255.250
  keystone:
    debug: true
  heat:
    debug: true
  memcached:
    ram: 30720
--- a/doc/source/methodologies/monitoring/configs/ccp/deploy-ccp.sh
+++ b/doc/source/methodologies/monitoring/configs/ccp/deploy-ccp.sh
@@ -0,0 +1,78 @@
 #!/bin/bash
 set -ex
 if [ -z "$1" ]; then
   echo "Please set number of env as argument"
   exit 1
 fi
 DEPLOY_TIMEOUT=1200
 export SSH_USER="root"
 export SSH_PASS="r00tme"
 cd $(dirname $(realpath $0))
 NODE1="172.20.8.6${1}"
 SSH_OPTS="-q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
 SSH_CMD="sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${NODE1}"
 SCP_CMD="sshpass -p ${SSH_PASS} scp ${SSH_OPTS}"
 if [ ! -d ./env-${1} ]; then
  echo "Yaml files for env-${1} is not found"
  echo "Please, create and commit deployment/ccp/rackspace/env-${1}/configs with correct yaml files"
  echo "Main file should be deployment/ccp/rackspace/env-${1}/configs/ccp.yaml"
  exit 1
 fi
 $SCP_CMD ./env-${1}/configs/ccp.yaml ${SSH_USER}@${NODE1}:/root/.ccp.yaml
 for i in $(ls -1 ./env-${1}/configs/ | grep -v ccp.yaml ); do
  $SCP_CMD ./env-${1}/configs/${i} ${SSH_USER}@${NODE1}:/root/
 done
 $SSH_CMD "rm -rf /root/fuel-ccp; cd /root; git clone https://git.openstack.org/openstack/fuel-ccp"
 $SSH_CMD "apt-get -y install python-pip"
 $SSH_CMD "/usr/bin/pip install --upgrade pip"
 $SSH_CMD "/usr/bin/pip install /root/fuel-ccp/"
 CCP_STATUS=$($SSH_CMD "/usr/local/bin/ccp status")
 if [ -n "$CCP_STATUS" ]; then
  echo "Active deployment was found"
  echo "$CCP_STATUS"
  echo "Please execute 'ccp cleanup' and 'rm -rf /var/lib/mysql/*' on the ${NODE1} manually"
  exit 1
 fi
 $SSH_CMD "echo '172.20.8.6${1} cloudformation.ccp.external console.ccp.external identity.ccp.external object-store.ccp.external compute.ccp.external orchestration.ccp.external network.ccp.external image.ccp.external volume.ccp.external horizon.ccp.external' >> /etc/hosts"
 # $SSH_CMD kubectl delete configmaps traefik-conf -n kube-system
 # $SSH_CMD kubectl delete service traefik -n kube-system
 # $SSH_CMD kubectl delete secret traefik-cert -n kube-system
 # $SSH_CMD kubectl delete deployment traefik -n kube-system
 $SSH_CMD "/root/fuel-ccp/tools/ingress/deploy-ingress-controller.sh -i 172.20.8.6${1}" || echo "Already configured"
 $SSH_CMD "echo 172.20.8.6${1} \$(ccp domains list -f value) >> /etc/hosts"
 $SSH_CMD "openssl s_client -status -connect identity.ccp.external:8443 < /dev/null 2>&1 | awk 'BEGIN {pr=0;} /-----BEGIN CERTIFICATE-----/ {pr=1;} {if (pr) print;} /-----END CERTIFICATE-----/ {exit;}' >> /usr/local/lib/python2.7/dist-packages/requests/cacert.pem"
 $SSH_CMD "openssl s_client -status -connect identity.ccp.external:8443 < /dev/null 2>&1 | awk 'BEGIN {pr=0;} /-----BEGIN CERTIFICATE-----/ {pr=1;} {if (pr) print;} /-----END CERTIFICATE-----/ {exit;}' > /usr/share/ca-certificates/ingress.crt"
 $SSH_CMD "cp /usr/share/ca-certificates/ingress.crt /usr/local/share/ca-certificates/"
 $SSH_CMD "update-ca-certificates"
 if [ $($SSH_CMD "curl -s 'https://identity.ccp.external:8443/' > /dev/null; echo \$?") != 0 ]
 then
  echo "keystone is unreachable check https://identity.ccp.external:8443"
  exit 1
 fi
 #$SSH_CMD "/root/fuel-ccp/tools/registry/deploy-registry.sh" &&
 $SSH_CMD "/usr/local/bin/ccp fetch"
 $SSH_CMD "/usr/local/bin/ccp build"
 $SSH_CMD "/usr/local/bin/ccp deploy"
 DEPLOY_TIME=0
 while [ "$($SSH_CMD '/usr/local/bin/ccp status -s -f value' 2>/dev/null)" != "ok" ]
 do
  sleep 5
  DEPLOY_TIME=$((${DEPLOY_TIME} + 5))
  if [ $DEPLOY_TIME -ge $DEPLOY_TIMEOUT ]; then
      echo "Deployment timeout"
      exit 1
  fi
 done
 $SSH_CMD "/usr/local/bin/ccp status"
--- a/doc/source/methodologies/monitoring/configs/ccp/openrc-ccp
+++ b/doc/source/methodologies/monitoring/configs/ccp/openrc-ccp
@@ -0,0 +1,7 @@
 export OS_PROJECT_DOMAIN_NAME=default
 export OS_USER_DOMAIN_NAME=default
 export OS_PROJECT_NAME=admin
 export OS_USERNAME=admin
 export OS_PASSWORD=password
 export OS_IDENTITY_API_VERSION=3
 export OS_AUTH_URL=https://identity.ccp.external:8443/v3
--- a/doc/source/methodologies/monitoring/configs/ccp/repos.yaml
+++ b/doc/source/methodologies/monitoring/configs/ccp/repos.yaml
@@ -0,0 +1,44 @@
 repositories:
  repos:
  - git_url: https://git.openstack.org/openstack/fuel-ccp-ceph
    name: fuel-ccp-ceph
  - git_url: https://git.openstack.org/openstack/fuel-ccp-cinder
    name: fuel-ccp-cinder
  - git_url: https://git.openstack.org/openstack/fuel-ccp-debian-base
    name: fuel-ccp-debian-base
  - git_url: https://git.openstack.org/openstack/fuel-ccp-entrypoint
    name: fuel-ccp-entrypoint
  - git_url: https://git.openstack.org/openstack/fuel-ccp-etcd
    name: fuel-ccp-etcd
  - git_url: https://git.openstack.org/openstack/fuel-ccp-glance
    name: fuel-ccp-glance
  - git_url: https://git.openstack.org/openstack/fuel-ccp-heat
    name: fuel-ccp-heat
  - git_url: https://git.openstack.org/openstack/fuel-ccp-horizon
    name: fuel-ccp-horizon
 #  - git_url: https://git.openstack.org/openstack/fuel-ccp-ironic
 #    name: fuel-ccp-ironic
  - git_url: https://git.openstack.org/openstack/fuel-ccp-keystone
    name: fuel-ccp-keystone
 #  - git_url: https://git.openstack.org/openstack/fuel-ccp-mariadb
 #    name: fuel-ccp-mariadb
  - git_url: https://git.openstack.org/openstack/fuel-ccp-galera
    name: fuel-ccp-galera
  - git_url: https://git.openstack.org/openstack/fuel-ccp-memcached
    name: fuel-ccp-memcached
 #  - git_url: https://git.openstack.org/openstack/fuel-ccp-murano
 #    name: fuel-ccp-murano
  - git_url: https://git.openstack.org/openstack/fuel-ccp-neutron
    name: fuel-ccp-neutron
  - git_url: https://git.openstack.org/openstack/fuel-ccp-nova
    name: fuel-ccp-nova
  - git_url: https://git.openstack.org/openstack/fuel-ccp-openstack-base
    name: fuel-ccp-openstack-base
  - git_url: https://git.openstack.org/openstack/fuel-ccp-rabbitmq
    name: fuel-ccp-rabbitmq
 #  - git_url: https://git.openstack.org/openstack/fuel-ccp-sahara
 #    name: fuel-ccp-sahara
 #  - git_url: https://git.openstack.org/openstack/fuel-ccp-searchlight
 #    name: fuel-ccp-searchlight
 #  - git_url: https://git.openstack.org/openstack/fuel-ccp-stacklight
 #    name: fuel-ccp-stacklight
--- a/doc/source/methodologies/monitoring/configs/ccp/topology.yaml
+++ b/doc/source/methodologies/monitoring/configs/ccp/topology.yaml
@@ -0,0 +1,77 @@
 nodes:
 # node[1-3]: Kubernetes
  node([4-6])$: # 4-6
    roles:
      - controller
      - openvswitch
  node[7-9]$: # 7-9
    roles:
      - rabbitmq
  node10$: # 10
    roles:
      - galera
  node11$: # 11
    roles:
      - heat
  node(1[2-9])$: # 12-19
    roles:
      - compute
      - openvswitch
  node[2-9][0-9]$: # 20-99
    roles:
      - compute
      - openvswitch
  node(1[0-9][0-9])$: # 100-199
    roles:
      - compute
      - openvswitch
  node200$:
    roles:
      - backup
 replicas:
  glance-api: 1
  glance-registry: 1
  keystone: 3
  nova-api: 3
  nova-scheduler: 3
  nova-conductor: 3
  neutron-server: 3
  neutron-metadata-agent: 3
  horizon: 3
  heat-api: 1
  heat-api-cfn: 1
  heat-engine: 1
 roles:
  galera:
    - galera
  rabbitmq:
    - rabbitmq
  controller:
    - etcd
    - glance-api
    - glance-registry
    - horizon
    - keystone
    - memcached
    - neutron-dhcp-agent
    - neutron-l3-agent
    - neutron-metadata-agent
    - neutron-server
    - nova-api
    - nova-conductor
    - nova-consoleauth
    - nova-novncproxy
    - nova-scheduler
  compute:
    - nova-compute
    - nova-libvirt
  openvswitch:
    - neutron-openvswitch-agent
    - openvswitch-db
    - openvswitch-vswitchd
  backup:
    - backup
  heat:
    - heat-api
    - heat-api-cfn
    - heat-engine
--- a/doc/source/methodologies/monitoring/configs/ccp/versions.yaml
+++ b/doc/source/methodologies/monitoring/configs/ccp/versions.yaml
@@ -0,0 +1,71 @@
 images:
  tag: newton
 #  image_specs:
 #    keystone:
 #      tag: newton
 #    horizon:
 #      tag: newton
 #    nova-upgrade:
 #      tag: newton
 #    nova-api:
 #      tag: newton
 #    nova-conductor:
 #      tag: newton
 #    nova-consoleauth:
 #      tag: newton
 #    nova-novncproxy:
 #      tag: newton
 #    nova-scheduler:
 #      tag: newton
 #    nova-compute:
 #      tag: newton
 #    nova-libvirt:
 #      tag: newton
 #    neutron-dhcp-agent:
 #      tag: newton
 #    neutron-l3-agent:
 #      tag: newton
 #    neutron-metadata-agent:
 #      tag: newton
 #    neutron-server:
 #      tag: newton
 #    neutron-openvswitch-agent:
 #      tag: newton
 #    glance-api:
 #      tag: newton
 #    glance-registry:
 #      tag: newton
 #    glance-upgrade:
 #      tag: newton
 sources:
  openstack/cinder:
    git_ref: stable/newton
    git_url: https://github.com/openstack/cinder.git
  openstack/glance:
    git_ref: stable/newton
    git_url: https://github.com/openstack/glance.git
  openstack/heat:
    git_ref: stable/newton
    git_url: https://github.com/openstack/heat.git
  openstack/horizon:
    git_ref: stable/newton
    git_url: https://github.com/openstack/horizon.git
  openstack/keystone:
    git_ref: stable/newton
    git_url: https://github.com/openstack/keystone.git
  openstack/neutron:
    git_ref: stable/newton
    git_url: https://github.com/openstack/neutron.git
  openstack/nova:
    git_ref: stable/newton
    git_url: https://github.com/openstack/nova.git
  openstack/requirements:
    git_ref: stable/newton
    git_url: https://git.openstack.org/openstack/requirements.git
  openstack/sahara-dashboard:
    git_ref: stable/newton
    git_url: https://git.openstack.org/openstack/sahara-dashboard.git
--- a/doc/source/methodologies/monitoring/configs/dashboards/ETCD.json
+++ b/doc/source/methodologies/monitoring/configs/dashboards/ETCD.json
--- a/doc/source/methodologies/monitoring/configs/dashboards/Kibana_dashboard.json
+++ b/doc/source/methodologies/monitoring/configs/dashboards/Kibana_dashboard.json
@@ -0,0 +1,103 @@
 [
  {
    "_id": "Response-Time-Dashboard",
    "_type": "dashboard",
    "_source": {
      "title": "Response Time Dashboard",
      "hits": 0,
      "description": "",
      "panelsJSON": "[{\"id\":\"Env-1-Response-Time\",\"type\":\"visualization\",\"panelIndex\":1,\"size_x\":3,\"size_y\":2,\"col\":1,\"row\":1},{\"id\":\"Env-2-Response-Time\",\"type\":\"visualization\",\"panelIndex\":2,\"size_x\":3,\"size_y\":2,\"col\":4,\"row\":1},{\"id\":\"Env-3-Response-Time\",\"type\":\"visualization\",\"panelIndex\":3,\"size_x\":3,\"size_y\":2,\"col\":7,\"row\":1},{\"id\":\"Env-4-Response-Time\",\"type\":\"visualization\",\"panelIndex\":4,\"size_x\":3,\"size_y\":2,\"col\":1,\"row\":3},{\"id\":\"Env-5-Response-Time\",\"type\":\"visualization\",\"panelIndex\":5,\"size_x\":3,\"size_y\":2,\"col\":4,\"row\":3},{\"id\":\"Env-6-Response-Time\",\"type\":\"visualization\",\"panelIndex\":6,\"size_x\":3,\"size_y\":2,\"col\":7,\"row\":3}]",
      "optionsJSON": "{\"darkTheme\":true}",
      "uiStateJSON": "{}",
      "version": 1,
      "timeRestore": false,
      "kibanaSavedObjectMeta": {
        "searchSourceJSON": "{\"filter\":[{\"query\":{\"query_string\":{\"query\":\"*\",\"analyze_wildcard\":true}}}]}"
      }
    }
  },
  {
    "_id": "Env-1-Response-Time",
    "_type": "visualization",
    "_source": {
      "title": "Env-1 Response Time",
      "visState": "{\"title\":\"New Visualization\",\"type\":\"line\",\"params\":{\"shareYAxis\":true,\"addTooltip\":true,\"addLegend\":true,\"showCircles\":true,\"smoothLines\":false,\"interpolate\":\"linear\",\"scale\":\"linear\",\"drawLinesBetweenPoints\":true,\"radiusRatio\":9,\"times\":[],\"addTimeMarker\":false,\"defaultYExtents\":false,\"setYExtents\":false,\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}",
      "uiStateJSON": "{}",
      "description": "",
      "version": 1,
      "kibanaSavedObjectMeta": {
        "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-1\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
      }
    }
  },
  {
    "_id": "Env-4-Response-Time",
    "_type": "visualization",
    "_source": {
      "title": "Env-4 Response Time",
      "visState": "{\"title\":\"Env-3 Response Time\",\"type\":\"line\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}",
      "uiStateJSON": "{}",
      "description": "",
      "version": 1,
      "kibanaSavedObjectMeta": {
        "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-4\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
      }
    }
  },
  {
    "_id": "Env-5-Response-Time",
    "_type": "visualization",
    "_source": {
      "title": "Env-5 Response Time",
      "visState": "{\"title\":\"Env-4 Response Time\",\"type\":\"line\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}",
      "uiStateJSON": "{}",
      "description": "",
      "version": 1,
      "kibanaSavedObjectMeta": {
        "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-5\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
      }
    }
  },
  {
    "_id": "Env-6-Response-Time",
    "_type": "visualization",
    "_source": {
      "title": "Env-6 Response Time",
      "visState": "{\"title\":\"Env-5 Response Time\",\"type\":\"line\",\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"aggs\":[{\"id\":\"1\",\"type\":\"avg\",\"schema\":\"metric\",\"params\":{\"field\":\"ResponseTime\",\"customLabel\":\"Avg Response Time ms\"}},{\"id\":\"2\",\"type\":\"date_histogram\",\"schema\":\"segment\",\"params\":{\"field\":\"Timestamp\",\"interval\":\"auto\",\"customInterval\":\"2h\",\"min_doc_count\":1,\"extended_bounds\":{}}}],\"listeners\":{}}",
      "uiStateJSON": "{}",
      "description": "",
      "version": 1,
      "kibanaSavedObjectMeta": {
        "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-6\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
      }
    }
  },
  {
    "_id": "Env-3-Response-Time",
    "_type": "visualization",
    "_source": {
      "title": "Env-3 Response Time",
      "visState": "{\"aggs\":[{\"id\":\"1\",\"params\":{\"customLabel\":\"Avg Response Time ms\",\"field\":\"ResponseTime\"},\"schema\":\"metric\",\"type\":\"avg\"},{\"id\":\"2\",\"params\":{\"customInterval\":\"2h\",\"extended_bounds\":{},\"field\":\"Timestamp\",\"interval\":\"auto\",\"min_doc_count\":1},\"schema\":\"segment\",\"type\":\"date_histogram\"}],\"listeners\":{},\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"title\":\"Env-2 Response Time\",\"type\":\"line\"}",
      "uiStateJSON": "{}",
      "description": "",
      "version": 1,
      "kibanaSavedObjectMeta": {
        "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-3\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
      }
    }
  },
  {
    "_id": "Env-2-Response-Time",
    "_type": "visualization",
    "_source": {
      "title": "Env-2 Response Time",
      "visState": "{\"aggs\":[{\"id\":\"1\",\"params\":{\"customLabel\":\"Avg Response Time ms\",\"field\":\"ResponseTime\"},\"schema\":\"metric\",\"type\":\"avg\"},{\"id\":\"2\",\"params\":{\"customInterval\":\"2h\",\"extended_bounds\":{},\"field\":\"Timestamp\",\"interval\":\"auto\",\"min_doc_count\":1},\"schema\":\"segment\",\"type\":\"date_histogram\"}],\"listeners\":{},\"params\":{\"addLegend\":true,\"addTimeMarker\":false,\"addTooltip\":true,\"defaultYExtents\":false,\"drawLinesBetweenPoints\":true,\"interpolate\":\"linear\",\"radiusRatio\":9,\"scale\":\"linear\",\"setYExtents\":false,\"shareYAxis\":true,\"showCircles\":true,\"smoothLines\":false,\"times\":[],\"yAxis\":{}},\"title\":\"Env-1 Response Time\",\"type\":\"line\"}",
      "uiStateJSON": "{}",
      "description": "",
      "version": 1,
      "kibanaSavedObjectMeta": {
        "searchSourceJSON": "{\"index\":\"env-*-heka*\",\"query\":{\"query_string\":{\"query\":\"Environment: \\\"env-2\\\"\",\"analyze_wildcard\":true}},\"filter\":[]}"
      }
    }
  }
 ]
--- a/doc/source/methodologies/monitoring/configs/dashboards/Kubernetes_statistics.json
+++ b/doc/source/methodologies/monitoring/configs/dashboards/Kubernetes_statistics.json
--- a/doc/source/methodologies/monitoring/configs/dashboards/OpenStack.json
+++ b/doc/source/methodologies/monitoring/configs/dashboards/OpenStack.json
--- a/doc/source/methodologies/monitoring/configs/dashboards/Systems_nodes_statistics.json
+++ b/doc/source/methodologies/monitoring/configs/dashboards/Systems_nodes_statistics.json
--- a/doc/source/methodologies/monitoring/configs/deploy_k8s_using_kargo.sh
+++ b/doc/source/methodologies/monitoring/configs/deploy_k8s_using_kargo.sh
@@ -0,0 +1,77 @@
 #!/usr/bin/env bash
 : ${DB_CONNECTION_STRING:?"You need to specify DB_CONNECTION_STRING parameter"}
 : ${ENV_NAME:?"You need to specify ENV_NAME parameter"}
 : ${MANAGEMENT_INTERFACE:="p1p1.602"}
 : ${COBBLER_ADDRESS:="172.20.8.34"}
 : ${CUSTOM_YAML}
 : ${KARGO_REPO}
 : ${KARGO_COMMIT}
 : ${FUEL_CCP_COMMIT}
 : ${ADMIN_USER}
 : ${ADMIN_PASSWORD}
 : ${ADMIN_NODE_CLEANUP}
 DEPLOY_METHOD="kargo"
 WORKSPACE="~/kargo_workspace_${ENV_NAME}"
 SSH_OPTIONS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
 get_env_nodes ()
 {
  ENV_NODES_NAMES=$(echo $(psql ${DB_CONNECTION_STRING} -c "select name from servers where environment_id in (select id from environments where name='${ENV_NAME}')" -P format=unaligned -t))
  if [ -z "${ENV_NODES_NAMES}" ]
  then
    echo "No nodes in environment with name ${ENV_NAME}"
    exit 1
  fi
 }
 get_env_nodes_ips ()
 {
  ENV_NODES_IPS=$(echo $(ssh ${SSH_OPTIONS} root@${COBBLER_ADDRESS} bash -ex << EOF
  for COBBLER_SYSTEM_NAME in ${ENV_NODES_NAMES}
  do
    NODE_IP=\$(cobbler system dumpvars --name=\${COBBLER_SYSTEM_NAME} | grep ^ip_address_${MANAGEMENT_INTERFACE} | awk '{print \$3}')
    NODE_IPS+=\${NODE_IP}" "
  done
  echo \${NODE_IPS}
 EOF
  ))
 }
 main ()
 {
  get_env_nodes
  get_env_nodes_ips
  export ADMIN_IP=$(echo ${ENV_NODES_IPS} | awk '{print $1}')
  export SLAVE_IPS=$(echo ${ENV_NODES_IPS})
 #  for SLAVE_IP in ${SLAVE_IPS}
 #  do
 #    ssh ${SSH_OPTIONS} root@${SLAVE_IP} bash -ex << EOF
 #echo "deb https://apt.dockerproject.org/repo ubuntu-\$(grep DISTRIB_CODENAME /etc/lsb-release | awk -F"=" '{print \$2}') main" >> /etc/apt/sources.list
 #apt-get update && apt-get install -y --allow-unauthenticated -o Dpkg::Options::="--force-confdef" docker-engine
 #EOF
 #  done
  if [ -d "$WORKSPACE" ] ; then
      rm -rf $WORKSPACE
  fi
  mkdir -p $WORKSPACE
  cd $WORKSPACE
  if [ -d './fuel-ccp-installer' ] ; then
      rm -rf ./fuel-ccp-installer
  fi
  git clone https://review.openstack.org/openstack/fuel-ccp-installer
  cd ./fuel-ccp-installer
  if [ "$FUEL_CCP_COMMIT" ]; then
      git fetch git://git.openstack.org/openstack/fuel-ccp-installer $FUEL_CCP_COMMIT && git checkout FETCH_HEAD
  fi
  echo "Running on $NODE_NAME: $ENV_NAME"
  bash -xe "./utils/jenkins/run_k8s_deploy_test.sh"
 }
 main
--- a/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy-heka.yaml
+++ b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy-heka.yaml
@@ -0,0 +1,46 @@
 ---
 - hosts: main-kuber
  remote_user: root
  tasks:
    - name: Fetch heka package
      get_url:
        url: "{{ heka_package_url }}"
        dest: /tmp/heka_amd64.deb
        mode: 0664
        force: yes
    - name: Download heka package locally
      fetch:
        src: /tmp/heka_amd64.deb
        dest: ./heka_amd64.deb
        fail_on_missing: yes
        flat: yes
 - hosts: cluster-nodes
  remote_user: root
  tasks:
    - name: Propagate heka package across cluster nodes
      copy:
        src: ./heka_amd64.deb
        dest: /tmp/heka_amd64.deb
 - hosts: all-cluster-nodes
  remote_user: root
  tasks:
    - name: Install heka package
      apt: deb=/tmp/heka_amd64.deb
    - name: Adding heka user to docker group
      user: name='heka' groups=docker append=yes
    - name: Copy heka conf
      template: src=heka/00-hekad.toml.j2 dest=/etc/heka/conf.d/00-hekad.toml
      notify: restart heka
    - name: Copy heka lua scripts
      template: src=heka/kubeapi_to_int.lua.j2 dest=/usr/share/heka/lua_filters/kubeapi_to_int.lua
      register: heka_lua
      notify: restart heka
    - name: ensure heka is running
      systemd: state=started name=heka enabled=yes
  handlers:
    - name: restart heka
      systemd: state=restarted name=heka
--- a/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy_elasticsearch_kibana.sh
+++ b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy_elasticsearch_kibana.sh
@@ -0,0 +1,71 @@
 #!/bin/bash -xe
 HOSTNAME=`hostname`
 ELASTICSEARCH_NODE=${ELASTICSEARCH_NODE:-172.20.9.3}
 # install java
 sudo add-apt-repository -y ppa:webupd8team/java
 sudo apt-get update
 sudo apt-get -y install oracle-java8-installer
 # install elastic by adding extra repository
 wget -qO - https://packages.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add -
 echo "deb http://packages.elastic.co/elasticsearch/2.x/debian stable main" | sudo tee -a /etc/apt/sources.list.d/elasticsearch-2.x.list
 sudo apt-get update
 sudo apt-get -y install elasticsearch
 # edit configuration:
 sed -i -E -e 's/^.*cluster.name: .*$/ cluster.name: elasticsearch_k8s/g' /etc/elasticsearch/elasticsearch.yml
 sed -i -E -e "s/^.*node.name: .*$/ cluster.name: ${HOSTNAME}/g" /etc/elasticsearch/elasticsearch.yml
 sed -i -E -e "s/^.*network.host: .*$/ network.host: ${ELASTICSEARCH_NODE}/g" /etc/elasticsearch/elasticsearch.yml
 # increase memory limits:
 sed -i -E -e "s/^.*ES_HEAP_SIZE=.*$/ES_HEAP_SIZE=10g/g" /etc/default/elasticsearch
 # start service:
 sudo systemctl restart elasticsearch
 sudo systemctl daemon-reload
 sudo systemctl enable elasticsearch
 # install kibana from extra repository:
 echo "deb http://packages.elastic.co/kibana/4.5/debian stable main" | sudo tee -a /etc/apt/sources.list
 sudo apt-get update
 sudo apt-get -y install kibana
 sed -i -E -e "s/^.*elasticsearch.url:.*$/ elasticsearch.url: \"http://${ELASTICSEARCH_NODE}:9200\"/g" /opt/kibana/config/kibana.yml
 # enable kibana service:
 sudo systemctl daemon-reload
 sudo systemctl enable kibana
 sudo systemctl start kibana
 # install nginx:
 sudo apt-get -y install nginx
 # set kibana admin:password (admin:admin)
 echo "admin:`openssl passwd admin`" | sudo tee -a /etc/nginx/htpasswd.users
 # prepare nginx config:
 cat << EOF >> /etc/nginx/sites-available/default
 server {
    listen 80;
    server_name ${HOSTNAME};
    auth_basic "Restricted Access";
    auth_basic_user_file /etc/nginx/htpasswd.users;
    location / {
        proxy_pass http://localhost:5601;
        proxy_http_version 1.1;
        proxy_set_header Upgrade \$http_upgrade;
        proxy_set_header Connection 'upgrade';
        proxy_set_header Host \$host;
        proxy_cache_bypass \$http_upgrade;
    }
 }
 EOF
 # check and start nginx service:
 sudo nginx -t
 sudo systemctl restart nginx
--- a/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy_heka.sh
+++ b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/deploy_heka.sh
@@ -0,0 +1,60 @@
 #!/bin/bash
 set -e
 export ANSIBLE_HOST_KEY_CHECKING=False
 export SSH_USER="root"
 export SSH_PASS="r00tme"
 cd $(dirname $(realpath $0))
 ENV=${1}
 if [ -z "${ENV}" ]; then
   echo "Please provide env number $(basename $0) [1|2|3|4|5|6]"
   exit 1
 fi
 # elastic for k8s at rackspace as default
 ELASTICSEARCH_NODE=${ELASTICSEARCH_NODE:-172.20.9.3}
 # heka 0.10.0 as default
 HEKA_PACKAGE_URL=${HEKA_PACKAGE_URL:-https://github.com/mozilla-services/heka/releases/download/v0.10.0/heka_0.10.0_amd64.deb}
 KUBE_MAIN_NODE="172.20.8.6${ENV}"
 SSH_OPTS="-q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
 echo "Get clusters nodes ..."
 NODES_TMP=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} 'kubectl get nodes -o jsonpath='"'"'{.items[*].status.addresses[?(@.type=="InternalIP")].address}'"'"'')
 ALL_IP_ON_KUBER_NODE=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} ip addr | grep 172.20 | awk '{print $2}' | awk -F'/' '{print $1}')
 GREP_STRING_TMP=""
 for i in $ALL_IP_ON_KUBER_NODE; do
   GREP_STRING_TMP="${GREP_STRING_TMP}${i}|"
 done
 GREP_STRING=${GREP_STRING_TMP:0:-1}
 SSH_AUTH="ansible_ssh_user=${SSH_USER} ansible_ssh_pass=${SSH_PASS}"
 echo "[main-kuber]" > cluster-hosts
 echo "${KUBE_MAIN_NODE} ${SSH_AUTH}" >> cluster-hosts
 echo "[cluster-nodes]" >> cluster-hosts
 set +e
 # Remove IP of kuber node
 for i in ${NODES_TMP} ; do
    TMP_VAR=$(echo $i | grep -vE "(${GREP_STRING})")
    NODES="${NODES} ${TMP_VAR}"
 done
 set -e
 for i in ${NODES} ; do
    if [ "$i" != "${KUBE_MAIN_NODE}" ]; then
        echo "${i} ${SSH_AUTH}" >>  cluster-hosts
    fi
 done
 echo "[all-cluster-nodes:children]" >> cluster-hosts
 echo "main-kuber" >> cluster-hosts
 echo "cluster-nodes" >> cluster-hosts
 # Calculate parallel ansible execution
 NODES_IPS=( $NODES )
 if [[ "${#NODES_IPS[@]}" -lt 50 ]] && [[ "${#NODES_IPS[@]}" -gt 5 ]]; then
    ANSIBLE_FORKS="${#NODES_IPS[@]}"
 elif [[ "${#NODES_IPS[@]}" -ge 50 ]]; then
    ANSIBLE_FORKS=50
 else
    ANSIBLE_FORKS=10
 fi
 echo "Starting ansible ..."
 ansible-playbook -v --ssh-extra-args "-o\ StrictHostKeyChecking=no" -f ${ANSIBLE_FORKS} -i ./cluster-hosts -e env_num=${ENV} -e elasticsearch_node="${ELASTICSEARCH_NODE}" -e heka_package_url=${HEKA_PACKAGE_URL} ./deploy-heka.yaml --diff
--- a/doc/source/methodologies/monitoring/configs/elasticsearch-heka/heka/00-hekad.toml.j2
+++ b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/heka/00-hekad.toml.j2
@@ -0,0 +1,69 @@
 # vim: set syntax=yaml
 [hekad]
 maxprocs = 2
 [DockerLogInput]
 endpoint = "unix:///var/run/docker.sock"
 #decoder = "KubeAPI_decoder"
 decoder = "MultiDecoder"
 [MultiDecoder]
 type = "MultiDecoder"
 subs = ["KubeAPI_decoder", "EnvironmentScribbler"]
 cascade_strategy = "all"
 #log_sub_errors = true
 {% raw %}
 [KubeAPI_decoder]
 type = "PayloadRegexDecoder"
 match_regex = '\S+ \S+ .+ (?P<Code>\S+)\] (?P<Method>[A-Z]+) (?P<Url>\S+)\: \((?P<ResponseTime>\S+)ms\) (?P<StatusCode>\d+) \[\[(?P<Agent>.+)\] (?P<RemoteIP>\S+)\:(?P<RemotePort>\d+)\]'
 [KubeAPI_decoder.message_fields]
 Type = "KubeAPIlog"
 Logger = "Docker"
 Code = "%Code%"
 Method = "%Method%"
 Url|uri = "%Url%"
 ResponseTime = "%ResponseTime%"
 StatusCode = "%StatusCode%"
 Agent = "%Agent%"
 RemoteIP|ipv4 = "%RemoteIP%"
 RemotePort = "%RemotePort%"
 {% endraw %}
 [EnvironmentScribbler]
 type = "ScribbleDecoder"
 [EnvironmentScribbler.message_fields]
 Environment = "env-{{ env_num }}"
 [KubeAPI_to_int]
 type = "SandboxFilter"
 filename = "lua_filters/kubeapi_to_int.lua"
 message_matcher = "Type == 'KubeAPIlog'"
 [ESJsonEncoder]
 index = "env-{{ env_num }}-{{ '%{Type}-%{%Y.%m.%d}' }}"
 #es_index_from_timestamp = true
 type_name = "%{Type}"
 [ElasticSearchOutput]
 message_matcher = "Type == 'heka.sandbox.KubeAPIlog' || Type == 'DockerLog'"
 server = "http://{{ elasticsearch_node }}:9200"
 flush_interval = 5000
 flush_count = 10
 encoder = "ESJsonEncoder"
 [PayloadEncoder]
 append_newlines = false
 #
 [LogOutput]
 <<<<<<< HEAD
 #message_matcher = "Type == 'KubeAPIlog'"
 message_matcher = "TRUE"
 #encoder = "ESJsonEncoder"
 encoder = "PayloadEncoder"
 =======
 message_matcher = "Type == 'heka.sandbox.KubeAPIlog' ||  Type == 'DockerLog'"
 #message_matcher = "TRUE"
 encoder = "ESJsonEncoder"
 #encoder = "PayloadEncoder"
 >>>>>>> b0caa3ceb82399dd16465645eebdebf90242662c
--- a/doc/source/methodologies/monitoring/configs/elasticsearch-heka/heka/kubeapi_to_int.lua.j2
+++ b/doc/source/methodologies/monitoring/configs/elasticsearch-heka/heka/kubeapi_to_int.lua.j2
@@ -0,0 +1,30 @@
 {% raw %}
 -- Invert Response time and some more fields to integer type
 local fields = {["ResponseTime"] = 0, ["RemotePort"] = 0, ["StatusCode"] = 0}
 local msg = {
    Type = "KubeAPIlog",
    Severity = 6,
    Fields = fields
 }
 function process_message ()
    fields["ResponseTime"] = tonumber(read_message("Fields[ResponseTime]"))
    fields["RemotePort"] = tonumber(read_message("Fields[RemotePort]"))
    fields["StatusCode"] = tonumber(read_message("Fields[StatusCode]"))
    msg.Payload = read_message("Payload")
    fields["Code"] = read_message("Fields[Code]")
    fields["ContainerID"] = read_message("Fields[ContainerID]")
    fields["ContainerName"] = read_message("Fields[ContainerName]")
    fields["Environment"] = read_message("Fields[Environment]")
    fields["Method"] = read_message("Fields[Method]")
    fields["RemoteIP"] = read_message("Fields[RemoteIP]")
    fields["Url"] = read_message("Fields[Url]")
    local ok, msg = pcall(inject_message, msg)
        if not ok then
        inject_payload("txt", "error", msg)
    end
    return 0
 end
 {% endraw %}
--- a/doc/source/methodologies/monitoring/configs/node1.tar.gz
+++ b/doc/source/methodologies/monitoring/configs/node1.tar.gz
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy-graf-prom.yaml
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy-graf-prom.yaml
@@ -0,0 +1,124 @@
 ---
 - hosts: common
  remote_user: root
  tasks:
     - name: Install common packages
       apt: name={{ item }} state=installed
       with_items:
           - python-pip
       tags: [ 'always' ]
     - name: Install docker for Ubuntu 14.04
       apt: name=docker.io state=installed
       when: ansible_distribution == 'Ubuntu' and ansible_distribution_version == '14.04'
       tags: [ 'always' ]
     - name: Install docker for Ubuntu 16.01
       apt: name=docker state=installed
       when: ansible_distribution == 'Ubuntu' and ansible_distribution_version == '16.0.'
       tags: [ 'always' ]
     - name: Install python deps
       pip: name={{ item }}
       with_items:
           - docker-py
           - docker-compose
       tags: [ 'always' ]
 - hosts: grafana
  remote_user: root
  vars:
     postgresql_root_user: root
     postgresql_root_password: aijoom1Shiex
     grafana_postgresql_user: grafana
     grafana_postgresql_password: sHskdhos6se
     grafana_postgresql_db: grafana
     grafana_user: admin
     grafana_password: admin
  tasks:
     - name: Install packages for grafana
       apt: name={{ item }} state=installed
       with_items:
           - postgresql-client-9.3
           - python-psycopg2
     - name: Create postgres data dir
       file: path=/var/lib/postgres/data/db state=directory
       tags: [ 'grafana' ]
     - name: Run postgres in docker
       docker_container:
          name: postgres
          image: 'postgres:latest'
          ports: 5432:5432
          volumes: '/var/lib/postgres/data:/var/lib/postgres/data'
          env:
             POSTGRES_USER: "{{ postgresql_root_user }}"
             POSTGRES_PASSWORD: "{{ postgresql_root_password }}"
             PGDATA: /var/lib/postgres/data/db
       tags: [ 'grafana' ]
     - name: Create DB for grafana
       postgresql_db:
          name: "{{ grafana_postgresql_db }}"
          login_user: "{{ postgresql_root_user }}"
          login_password: "{{ postgresql_root_password }}"
          login_host: localhost
          encoding: 'UTF-8'
       tags: [ 'grafana' ]
     - name: Create user for grafana in postgres
       postgresql_user:
          name: "{{ grafana_postgresql_user }}"
          login_user: "{{ postgresql_root_user }}"
          login_password: "{{ postgresql_root_password }}"
          login_host: localhost
          password: "{{ grafana_postgresql_password }}"
          db: grafana
          priv: ALL
       tags: [ 'grafana' ]
     - name: Create data dir for Grafana
       file: path=/var/lib/grafana state=directory
       tags: [ 'grafana' ]
     - name: Start Grafana container
       docker_container:
          name: grafana
          image: 'grafana/grafana:4.0.1'
          volumes: '/var/lib/grafana:/var/lib/grafana'
          ports: 3000:3000
          env:
             GF_SECURITY_ADMIN_PASSWORD: "{{ grafana_user }}"
             GF_SECURITY_ADMIN_USER: "{{ grafana_password }}"
             GF_DATABASE_TYPE: postgres
             GF_DATABASE_HOST: "{{ ansible_default_ipv4.address }}"
             GF_DATABASE_NAME: "{{ grafana_postgresql_db }}"
             GF_DATABASE_USER: "{{ grafana_postgresql_user }}"
             GF_DATABASE_PASSWORD: "{{ grafana_postgresql_password }}"
             GF_INSTALL_PLUGINS: grafana-piechart-panel
       tags: [ 'grafana' ]
 - hosts: prometheuses
  remote_user: root
  tasks:
     - name: Data dir for prometheus
       file: path=/var/lib/prometheus state=directory
       tags: [ 'prometheus' ]
     - include: docker_prometheus.yaml
 - hosts: prometheus-kuber
  remote_user: root
  tasks:
     - name: Copy prometheus config
       template: src=prometheus/prometheus-kuber.yml.j2 dest=/var/lib/prometheus/prometheus.yml
       register: prometheus_yml
       tags: [ 'prometheus', 'prometheus-conf' ]
     - include: docker_prometheus.yaml
     - name: Send kill -1 to prometheus if prometheus.yml changed
       command: pkill -1 prometheus
       when: prometheus_yml.changed
       tags: [ 'prometheus', 'prometheus-conf']
 - hosts: prometheus-system
  remote_user: root
  tasks:
     - name: Copy prometheus config
       template: src=prometheus/prometheus-system.yml.j2 dest=/var/lib/prometheus/prometheus.yml
       register: prometheus_yml
       tags: [ 'prometheus', 'prometheus-conf' ]
     - include: docker_prometheus.yaml
     - name: Send kill -1 to prometheus if prometheus.yml changed
       command: pkill -1 prometheus
       when: prometheus_yml.changed
       tags: [ 'prometheus', 'prometheus-conf']
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy-telegraf.yaml
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy-telegraf.yaml
@@ -0,0 +1,118 @@
 ---
 - hosts: all-cluster-nodes
  remote_user: root
  tasks:
     - name: Create user telegraf
       user: name=telegraf home=/opt/telegraf
     - name: Create /opt/telegraf
       file: path=/opt/telegraf state=directory owner=telegraf
     - name: Create bin dir for telegraf
       file: path=/opt/telegraf/bin state=directory owner=telegraf
     - name: Create etc dir for telegraf
       file: path=/opt/telegraf/etc state=directory owner=telegraf
     - name: Copy telegraf to server
       copy: src=../../telegraf/opt/bin/telegraf dest=/opt/telegraf/bin/telegraf mode=0755
       register: telegraf_bin
     - name: Copy telegraf.service
       copy: src=telegraf/telegraf.service dest=/etc/systemd/system/telegraf.service
       register: telegraf_service
     - name: Start and enable telegraf
       systemd: state=started enabled=yes daemon_reload=yes name=telegraf
     - name: Delete allmetrics.tmp.lock
       file: path=/opt/telegraf/bin/data/allmetrics.tmp.lock state=absent
       when: telegraf_service.changed or telegraf_bin.changed
     - name: Restart telegraf if telegraf binary has been changed
       systemd: state=restarted name=telegraf
       when: telegraf_bin.changed
     - name: Install software
       apt: name={{ item }} state=installed
       with_items:
           - sysstat
           - numactl
     - name: Copy system metric scripts
       copy: src=../../telegraf/opt/system_stats/{{ item }} dest=/opt/telegraf/bin/{{ item }} mode=0755
       with_items:
         - entropy.sh
         - iostat_per_device.sh
         - memory_bandwidth.sh
         - numa_stat_per_pid.sh
         - per_process_cpu_usage.sh
         - list_openstack_processes.sh
         - network_tcp_queue.sh
     - name: Copy pcm-memory-one-line.x
       copy: src=../../telegraf/opt/system_stats/intel_pcm_mem/pcm-memory-one-line.x dest=/opt/telegraf/bin/pcm-memory-one-line.x mode=0755
     - name: Add sysctl for pcm
       sysctl: name=kernel.nmi_watchdog value=0 state=present reload=yes
     - name: Load kernel module msr
       modprobe: name=msr state=present
     - name: Add module autoload
       lineinfile: dest=/etc/modules line='msr'
     - name: Add user telegraf to sudoers
       lineinfile:
          dest: /etc/sudoers
          state: present
          line: "telegraf ALL=(ALL) NOPASSWD: ALL"
 - hosts: cluster-nodes
  remote_user: root
  tasks:
     - name: Copy telegraf config
       copy: src=./telegraf/telegraf-sys.conf dest=/opt/telegraf/etc/telegraf.conf
       register: telegraf_conf
     - name: Restart telegraf if config has been changed
       systemd: state=restarted name=telegraf
       when: telegraf_conf.changed
 - hosts: main-kuber
  remote_user: root
  tasks:
     - name: Copy openstack scripts
       copy: src=../../telegraf/opt/osapi/{{ item }} dest=/opt/telegraf/bin/{{ item }} mode=0755
       with_items:
          - glog.sh
          - osapitime.sh
          - vmtime.sh
       tags: [ 'openstack' ]
     - name: Copy etcd scripts
       copy: src=../../telegraf/opt/k8s_etcd/{{ item }} dest=/opt/telegraf/bin/{{ item }} mode=0755
       with_items:
          - etcd_get_metrics.sh
          - k8s_get_metrics.sh
     - name: Install software for scripts
       apt: name={{ item }} state=installed
       with_items:
           - mysql-client
           - bc
           - jq
       tags: [ 'openstack' ]
     - name: Create dirs for scripts
       file: path=/opt/telegraf/bin/{{ item }} state=directory owner=telegraf
       with_items:
          - log
          - data
     - name: Copy telegraf config
       template: src=telegraf/telegraf-openstack.conf.j2 dest=/opt/telegraf/etc/telegraf.conf
       register: telegraf_conf
       tags: [ 'openstack' ]
     - name: Delete allmetrics.tmp.lock
       file: path=/opt/telegraf/bin/data/allmetrics.tmp.lock state=absent
       when: telegraf_conf.changed
     - name: Restart telegraf if config has been changed
       systemd: state=restarted name=telegraf
       when: telegraf_conf.changed
       tags: [ 'openstack' ]
 - hosts: all-cluster-nodes
  remote_user: root
  tasks:
     - name: Reload telegraf is service file has been changed
       systemd: daemon_reload=yes state=reloaded name=telegraf
       when: telegraf_service.changed
 - hosts: main
  remote_user: root
  tasks:
     - name: update prometheus config
       template: src=./prometheus/targets.yml.j2 dest=/var/lib/prometheus/targets-{{ cluster_tag }}.yml
       tags: [ 'prometheus' ]
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_etcd_collect.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_etcd_collect.sh
@@ -0,0 +1,46 @@
 #!/bin/bash
 CLUSTER=${1}
 TMP_YAML=$(mktemp -u)
 export ANSIBLE_HOST_KEY_CHECKING=False
 export SSH_USER="root"
 export SSH_PASS="r00tme"
 cd $(dirname $(realpath $0))
 ENV=${1}
 if [ -z "${ENV}" ]; then
   echo "Please provide env number $(basename $0) [1|2|3|4|5|6]"
   exit 1
 fi
 PROMETHEUS_HOST="172.20.9.115"
 KUBE_MAIN_NODE="172.20.8.6${ENV}"
 CLUSTER_TAG="env-${ENV}"
 ETCD=""
 SSH_OPTS="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
 TARGETS=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} curl -ks https://127.0.0.1:2379/v2/members | python -m json.tool | grep 2379)
 if [ -z "$TARGETS" ]; then
  echo "No etcd found"
  exit 1
 fi
 for i in ${TARGETS}; do
  TEMP_TARGET=${i#\"https://}
  ETCD="$ETCD ${TEMP_TARGET%\"}"
 done
 echo "- targets:" > ${TMP_YAML}
 for i in ${ETCD}; do
  echo "  - $i" >> ${TMP_YAML}
 done
 echo "  labels:" >> ${TMP_YAML}
 echo "    env: ${CLUSTER_TAG}" >> ${TMP_YAML}
 echo "Targets file is ready"
 cat ${TMP_YAML}
 sshpass -p ${SSH_PASS} scp ${SSH_OPTS} ${TMP_YAML} root@${PROMETHEUS_HOST}:/var/lib/prometheus/etcd-env-${1}.yml
 rm ${TMP_YAML}
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_grafana.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_grafana.sh
@@ -0,0 +1,2 @@
 #!/bin/bash
 ansible-playbook -i ./hosts ./deploy-graf-prom.yaml --tags "grafana"
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_prometheus.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_prometheus.sh
@@ -0,0 +1,2 @@
 #!/bin/bash
 ansible-playbook -i ./hosts ./deploy-graf-prom.yaml --tags "prometheus"
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_telegraf.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/deploy_telegraf.sh
@@ -0,0 +1,65 @@
 #!/bin/bash
 set -e
 export ANSIBLE_HOST_KEY_CHECKING=False
 export SSH_USER="root"
 export SSH_PASS="r00tme"
 cd $(dirname $(realpath $0))
 ENV=${1}
 if [ -z "${ENV}" ]; then
   echo "Please provide env number $(basename $0) [1|2|3|4|5|6]"
   exit 1
 fi
 PROMETHEUS_NODE="172.20.124.25"
 KUBE_MAIN_NODE="172.20.8.6${ENV}"
 CLUSTER_TAG="env-${ENV}"
 # Secret option
 ANSIBLE_TAG=$2
 SSH_OPTS="-q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
 echo "Get clusters nodes"
 NODES_TMP=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} 'kubectl get nodes -o jsonpath='"'"'{.items[*].status.addresses[?(@.type=="InternalIP")].address}'"'"'')
 ALL_IP_ON_KUBER_NODE=$(sshpass -p ${SSH_PASS} ssh ${SSH_OPTS} ${SSH_USER}@${KUBE_MAIN_NODE} ip addr | grep 172.20 | awk '{print $2}' | awk -F'/' '{print $1}')
 GREP_STRING_TMP=""
 for i in $ALL_IP_ON_KUBER_NODE; do
   GREP_STRING_TMP="${GREP_STRING_TMP}${i}|"
 done
 GREP_STRING=${GREP_STRING_TMP:0:-1}
 SSH_AUTH="ansible_ssh_user=${SSH_USER} ansible_ssh_pass=${SSH_PASS}"
 echo "[main]" > cluster-hosts
 echo "${PROMETHEUS_NODE} ${SSH_AUTH}" >> cluster-hosts
 echo "[main-kuber]" >> cluster-hosts
 echo "${KUBE_MAIN_NODE} ${SSH_AUTH}" >> cluster-hosts
 echo "[cluster-nodes]" >> cluster-hosts
 set +e
 # Remove IP of kuber node
 for i in ${NODES_TMP} ; do
    TMP_VAR=$(echo $i | grep -vE "(${GREP_STRING})")
    NODES="${NODES} ${TMP_VAR}"
 done
 set -e
 for i in ${NODES} ; do
    if [ "$i" != "${KUBE_MAIN_NODE}" ]; then
        echo "${i} ${SSH_AUTH}" >>  cluster-hosts
    fi
 done
 echo "[all-cluster-nodes:children]" >> cluster-hosts
 echo "main-kuber" >> cluster-hosts
 echo "cluster-nodes" >> cluster-hosts
 LINES=$(wc -l cluster-hosts | awk '{print $1}')
 NUM_NODES=$(($LINES - 7))
 if [ ${NUM_NODES} -le 0 ]; then
   echo "Something wrong, $NUM_NODES nodes found"
   exit 1
 else
   echo "${NUM_NODES} nodes found"
 fi
 if [ -z "${ANSIBLE_TAG}" ]; then
   ansible-playbook -f 40 -i ./cluster-hosts -e cluster_tag=${CLUSTER_TAG} ./deploy-telegraf.yaml
 else
   ansible-playbook -f 40 -i ./cluster-hosts -e cluster_tag=${CLUSTER_TAG} -t ${ANSIBLE_TAG} ./deploy-telegraf.yaml
 fi
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/docker_prometheus.yaml
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/docker_prometheus.yaml
@@ -0,0 +1,10 @@
 ---
 - name: Deploy prometheus in docker
  docker_container:
    name: prometheus
    image: 'prom/prometheus:v1.4.0'
    ports: 9090:9090
    state: started
    volumes: ['/var/lib/prometheus:/prometheus']
    command: '-config.file=/prometheus/prometheus.yml -storage.local.retention 168h0m0s -storage.local.max-chunks-to-persist 3024288 -storage.local.memory-chunks=50502740 -storage.local.num-fingerprint-mutexes=300960'
  tags: [ 'prometheus' ]
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/prometheus-kuber.yml.j2
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/prometheus-kuber.yml.j2
@@ -0,0 +1,58 @@
 global:
  scrape_interval:     15s # By default, scrape targets every 15 seconds.
  evaluation_interval: 15s # By default, scrape targets every 15 seconds.
  # Attach these labels to any time series or alerts when communicating with
  # external systems (federation, remote storage, Alertmanager).
  external_labels:
      monitor: 'codelab-monitor'
 rule_files:
  # - "first.rules"
  # - "second.rules"
 scrape_configs:
  - job_name: 'prometheus'
    scrape_interval: 5s
    scrape_timeout: 5s
    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.
    static_configs:
      - targets: ['172.20.9.115:9090']
 {% for env_num in range(1,7) %}
  - job_name: 'k8-env-{{env_num}}'
    scrape_interval: 30s
    scrape_timeout: 30s
    scheme: https
    tls_config:
       insecure_skip_verify: true
    kubernetes_sd_configs:
    - api_server: 'https://172.20.8.6{{env_num}}:443'
      role: node
      tls_config:
        insecure_skip_verify: true
      basic_auth:
        username: kube
        password: changeme
    relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
      - source_labels: [__address__]
        target_label: env
        regex: .*
        replacement: env-{{env_num}}
  - job_name: 'etcd-env-{{env_num}}'
    scrape_interval: 5s
    scrape_timeout: 5s
    scheme: https
    tls_config:
       insecure_skip_verify: true
    file_sd_configs:
       - files:
          - etcd-env-{{env_num}}.yml
 {% endfor %}
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/prometheus-system.yml.j2
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/prometheus-system.yml.j2
@@ -0,0 +1,33 @@
 global:
  scrape_interval:     15s # By default, scrape targets every 15 seconds.
  evaluation_interval: 15s # By default, scrape targets every 15 seconds.
  # Attach these labels to any time series or alerts when communicating with
  # external systems (federation, remote storage, Alertmanager).
  external_labels:
      monitor: 'codelab-monitor'
 rule_files:
  # - "first.rules"
  # - "second.rules"
 scrape_configs:
  - job_name: 'prometheus'
    scrape_interval: 5s
    scrape_timeout: 5s
    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.
    static_configs:
      - targets: ['172.20.124.25:9090']
 {% for env_num in range(1,7) %}
  - job_name: 'telegraf-systems-env-{{env_num}}'
    scrape_interval: 30s
    scrape_timeout: 30s
    file_sd_configs:
       - files:
          - targets-env-{{env_num}}.yml
 {% endfor %}
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/targets.yml.j2
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/prometheus/targets.yml.j2
@@ -0,0 +1,6 @@
 - targets:
 {% for host in groups['all-cluster-nodes']%}
  - {{hostvars[host]['inventory_hostname']}}:9126
 {% endfor %}
  labels:
    env: {{ cluster_tag }}
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/entropy.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/entropy.sh
@@ -0,0 +1,5 @@
 #!/bin/bash
 export LANG=C
 set -o nounset                              # Treat unset variables as an error
 echo "system entropy=$(cat /proc/sys/kernel/random/entropy_avail)"
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/etcd_get_metrics.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/etcd_get_metrics.sh
@@ -0,0 +1,33 @@
 #!/bin/bash -e
 ETCD=/usr/local/bin/etcdctl
 type jq >/dev/null 2>&1 || ( echo "Jq is not installed" ; exit 1 )
 type curl >/dev/null 2>&1 || ( echo "Curl is not installed" ; exit 1 )
 # get etcd members credentials
 MEMBERS="${ETCD} --endpoints https://127.0.0.1:2379 member list"
 LEADER_ID=$(eval "$MEMBERS" | awk -F ':' '/isLeader=true/ {print $1}')
 LEADER_ENDPOINT=$(eval "$MEMBERS" | awk '/isLeader=true/ {print $4}' | cut -d"=" -f2)
 SLAVE_ID=$(eval "$MEMBERS" | grep 'isLeader=false' | head -n 1 | awk -F ":" '{print $1}')
 SLAVE_ENDPOINT=$(eval "$MEMBERS" | grep 'isLeader=false' | head -n 1 | awk '{print $4}' | cut -d"=" -f2)
 # member count:
 metric_members_count=`curl -s -k https://172.20.9.15:2379/v2/members | jq -c '.members | length'`
 metric_total_keys_count=`${ETCD} --endpoints https://127.0.0.1:2379 ls -r --sort | wc -l`
 metric_total_size_dataset=`pidof etcd | xargs ps -o rss | awk '{rss=+$1} END {print rss}'`
 metric_store_stats=`curl -s -k ${LEADER_ENDPOINT}/v2/stats/store| tr -d \"\{\} | sed -e 's/:/=/g'`
 metric_latency_from_leader_avg=`curl -s -k ${LEADER_ENDPOINT}/v2/stats/leader | \
                                jq -c ".followers.\"${SLAVE_ID}\".latency.average"`
 metric_leader_stats=`curl -s -k ${LEADER_ENDPOINT}/v2/stats/self | \
                                jq -c "{ sendBandwidthRate: .sendBandwidthRate, sendAppendRequestCnt: \
                                .sendAppendRequestCnt, sendPkgRate: .sendPkgRate }"| tr -d \"\{\} | sed -e 's/:/=/g'`
 metric_slave_stats=`curl -s -k ${SLAVE_ENDPOINT}/v2/stats/self | \
                                jq -c "{ recvBandwidthRate: .recvBandwidthRate, recvAppendRequestCnt: \
                                .recvAppendRequestCnt, recvPkgRate: .recvPkgRate }"| tr -d \"\{\} | sed -e 's/:/=/g'`
 cat << EOF
 etcd_general_stats,group=etcd_cluster_metrics members_count=${metric_members_count},dataset_size=${metric_total_size_dataset},total_keys_count=${metric_total_keys_count}
 etcd_leader_stats,group=etcd_cluster_metrics $metric_leader_stats
 etcd_follower_stats,group=etcd_cluster_metrics ${metric_slave_stats},latency_from_leader_avg=${metric_latency_from_leader_avg}
 etcd_store_stats,group=etcd_cluster_metrics $metric_store_stats
 EOF
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/glog.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/glog.sh
@@ -0,0 +1,105 @@
 #!/bin/bash
 # Logs extractor / parser
 # checking that we are good
 if [[ -z "${TMP_DIR}" || -z "${POD}" || -z "${CONTAINER}" || -z "${K8S_NS}" || -z "${OS_LOG_FIELDS}" || -z ${CONTID} ]]; then
  echo "Required variables are not set, exiting!"
  exit 1
 fi
 # Variables declaration
 SSH_USER="${SSH_USER:-root}"
 SSH_PASS="${SSH_PASS:-r00tme}"
 LOG_ENTRIES_NUMBER=${LOG_ENTRIES_NUMBER:-1000}
 LAST_TIME_STAMP_FILE="${TMP_DIR}/timestamp.tmp"
 # get | set last timestamp for log entries
 function last_ts_data()
 {
  local action
  action=${1}
  shift
  if [ "${action}" == "get" ]; then
    if [ -e ${LAST_TIME_STAMP_FILE} ]; then
      cat ${LAST_TIME_STAMP_FILE}
    fi
  else
    echo "$*" > ${LAST_TIME_STAMP_FILE}
  fi
 }
 function print_out()
 {
  if [ -z "${TMP_METRICS}" ];then
    echo "$@"
  else
    echo "$@" >> ${TMP_METRICS}
  fi
 }
 function micro_to_seconds()
 {
  local input
  local output
  input="${1}"
  output=$(echo "scale=4;${input}/1000000" | bc)
  if echo ${output} | grep -q '^\..'; then
    output="0${output}"
  fi
  echo "${output}"
 }
 # extract container logs from k8s
 function get_logs()
 {
  local sdate
  local stime
  local scalltime
  local lasttimestamp
  local is_foundlast
  local tmpdata
  tmpdata="${TMP_DIR}/tmpdata.log"
  if [ -e "${tmpdata}" ]; then rm -f ${tmpdata}; fi
    if [ "${CONTAINER}" == "keystone" ];then
      sshpass -p ${SSH_PASS} ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ${SSH_USER}@${HOST} "tail -n${LOG_ENTRIES_NUMBER} /var/log/ccp/keystone/keystone-access.log | cut -d' ' -f${OS_LOG_FIELDS} | sed -e 's#\[##g' -e 's#\]##g'" 2>/dev/null > ${tmpdata}
    else
      sshpass -p ${SSH_PASS} ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ${SSH_USER}@${HOST} "docker logs --tail ${LOG_ENTRIES_NUMBER} ${CONTID} 2>&1 | grep 'INFO' | grep 'GET /' | cut -d' ' -f${OS_LOG_FIELDS}" 2>/dev/null > ${tmpdata}
    fi
  is_foundlast=false
  lasttimestamp=$(last_ts_data "get")
  if [ -z "${lasttimestamp}" ]; then
  while read log
  do
    sdate=$(echo ${log} | cut -d' ' -f1)
    stime=$(echo ${log} | cut -d' ' -f2)
    scalltime=$(echo ${log} | cut -d' ' -f3)
    if [ "${CONTAINER}" == "keystone" ];then scalltime=$(micro_to_seconds ${scalltime});fi
    if [ ! -z "${scalltime}" ]; then
      print_out "os_api_response_time,container=${CONTAINER},pod=${POD},instance=${HOST},requestdate=${sdate},requesttime=${stime} processingtime=${scalltime}"
    fi
  done < <(cat ${tmpdata})
    sdate=$(tail -n 1 ${tmpdata} | cut -d' ' -f1)
    stime=$(tail -n 1 ${tmpdata} | cut -d' ' -f2)
    last_ts_data "set" "${sdate}${stime}"
  else
    while read log
    do
      sdate=$(echo ${log} | cut -d' ' -f1)
      stime=$(echo ${log} | cut -d' ' -f2)
      scalltime=$(echo ${log} | cut -d' ' -f3)
      if [ "${CONTAINER}" == "keystone" ];then scalltime=$(micro_to_seconds ${scalltime});fi
      if [[ "${is_foundlast}" = "false"  && "${lasttimestamp}" = "${sdate}${stime}" ]]; then
        #echo "FOUND: ${sdate}${stime} ${scalltime}"
        is_foundlast=true
        continue
      fi
      if [ "${is_foundlast}" == "true" ]; then
        if [ ! -z "${scalltime}" ]; then
          print_out "os_api_response_time,container=${CONTAINER},pod=${POD},instance=${HOST},requestdate=${sdate},requesttime=${stime} processingtime=${scalltime}"
        fi
      fi
    done < <(cat ${tmpdata})
    if [ "${is_foundlast}" == "true" ]; then
      sdate=$(tail -n 1 ${tmpdata} | cut -d' ' -f1)
      stime=$(tail -n 1 ${tmpdata} | cut -d' ' -f2)
      last_ts_data "set" "${sdate}${stime}"
    fi
  fi
  rm -f ${tmpdata}
 }
 # Main logic
 get_logs
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/iostat_per_device.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/iostat_per_device.sh
@@ -0,0 +1,6 @@
 #!/bin/bash
 # output from iostat -Ndx is
 # Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s avgrq-sz avgqu-sz   await r_await w_await  svctm  %util
 export LANG=C
 iostat -Ndx | tail -n +4 | head -n -1 | awk '{print "system_per_device_iostat,device="$1" read_merge="$2",write_merge="$3",await="$10",read_await="$11",write_await="$12",util="$14",average_queue="$9}'
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/k8s_get_metrics.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/k8s_get_metrics.sh
@@ -0,0 +1,75 @@
 #!/bin/bash -e
 K8S_MASTER=127.0.0.1
 if [[ $1 ]] ; then
    K8S_MASTER=$1
 fi
 type jq >/dev/null 2>&1 || ( echo "Jq is not installed" ; exit 1 )
 type curl >/dev/null 2>&1 || ( echo "Curl is not installed" ; exit 1 )
 curl_get() {
    url="https://${K8S_MASTER}$@"
    curl -k -s -u kube:changeme $url || ( echo "Curl failed at: $url" 1>&2; exit 1 )
 }
 # gathering frequent API calls output to separate file(in order to avoid long timeouts):
 node_file=`mktemp /tmp/XXXXX`
 pods_file=`mktemp /tmp/XXXXX`
 endpoints_file=`mktemp /tmp/XXXXX`
 curl_get "/api/v1/nodes" > $node_file
 curl_get "/api/v1/pods" > $pods_file
 curl_get "/api/v1/endpoints" > $endpoints_file
 # metrics withdrawal:
 number_of_namespaces_total=`curl_get "/api/v1/namespaces" | jq '[ .items[] .metadata.name ] | length'`
 number_of_services_total=`curl_get "/api/v1/services" | jq -c '[ .items[] .metadata.name ] | length'`
 number_of_nodes_total=`jq -c '[ .items[] .metadata.name ] | length' $node_file`
 number_of_unsched=`jq -c '[ .items[] | select(.spec.unschedulable != null) .metadata.name ] | length' $node_file`
 number_in_each_status=`jq -c '[ .items[] | .status.conditions[] | select(.type == "Ready") .status \
                    | gsub("(?<a>.+)"; "number_of_status_\(.a)" ) ] | group_by(.) | map({(.[0]): length}) | add ' $node_file \
                    | tr -d \"\{\} | sed -e 's/:/=/g'`
 number_of_pods_total=`jq -c '[ .items[] .metadata.name ] | length' $pods_file`
 number_of_pods_state_Pending=`jq -c '[ .items[] .status.phase | select(. == "Pending")] | length' $pods_file`
 number_of_pods_state_Running=`jq -c '[ .items[] .status.phase | select(. == "Running")] | length' $pods_file`
 number_of_pods_state_Succeeded=`jq -c '[ .items[] .status.phase | select(. == "Succeeded")] | length' $pods_file`
 number_of_pods_state_Failed=`jq -c '[ .items[] .status.phase | select(. == "Failed")] | length' $pods_file`
 number_of_pods_state_Unknown=`jq -c '[ .items[] .status.phase | select(. == "Unknown")] | length' $pods_file`
 number_of_pods_per_node=`jq -c '[ .items[] | .spec.nodeName ] | group_by(.) | \
                    map("k8s_pods_per_node,group=k8s_cluster_metrics,pod_node=\(.[0]) value=\(length)")' $pods_file \
                    | sed -e 's/\["//g' -e 's/"\]//g' -e 's/","/\n/g'`
 number_of_pods_per_ns=`jq -c '[ .items[] | .metadata.namespace ] | group_by(.) | \
                    map("k8s_pods_per_namespace,group=k8s_cluster_metrics,ns=\(.[0]) value=\(length)")' $pods_file \
                    | sed -e 's/\["//g' -e 's/"\]//g' -e 's/","/\n/g'`
 number_of_endpoints_each_service=`jq -c '[ .items[] | { service: .metadata.name, endpoints: .subsets[] } | \
                    . as { service: $svc, endpoints: $endp } | $endp.addresses | length | . as $addr | $endp.ports | length | \
                    . as $prts | "k8s_services,group=k8s_cluster_metrics,service=\($svc) endpoints_number=\($addr * $prts)" ] ' $endpoints_file \
                    | sed -e 's/\["//g' -e 's/"\]//g' -e 's/","/\n/g'`
 number_of_endpoints_total=`jq -c '[ .items[] | .subsets[] | { addrs: .addresses, ports: .ports } \
                    | map (length ) | .[0] * .[1] ] | add' $endpoints_file`
 number_of_API_instances=`curl_get "/api/" |  jq -c '.serverAddressByClientCIDRs | length'`
 number_of_controllers=`curl_get "/api/v1/replicationcontrollers" | jq '.items | length'`
 number_of_scheduler_instances=`curl_get /api/v1/namespaces/kube-system/pods?labelSelector='k8s-app=kube-scheduler' \
                    | jq -c '.items | length' `
 cluster_resources_CPU=`jq -c '[ .items[] .status.capacity.cpu | tonumber ] | add' $node_file`
 cluster_resources_RAM=`jq -c '[ .items[] .status.capacity.memory| gsub("[a-z]+$"; "" ; "i") | tonumber] | add' $node_file`
 # output:
 cat << EOF
 k8s_nodes,group=k8s_cluster_metrics number_of_nodes_total=${number_of_nodes_total},number_of_unsched=${number_of_unsched}
 k8s_nodes_states,group=k8s_cluster_metrics ${number_in_each_status}
 k8s_namespaces,group=k8s_cluster_metrics number_of_namespaces_total=${number_of_namespaces_total}
 k8s_pods,group=k8s_cluster_metrics number_of_pods_total=${number_of_pods_total}
 k8s_pods_states,group=k8s_cluster_metrics number_of_pods_state_Pending=${number_of_pods_state_Pending},number_of_pods_state_Running=${number_of_pods_state_Running},number_of_pods_state_Succeeded=${number_of_pods_state_Succeeded},number_of_pods_state_Failed=${number_of_pods_state_Failed},number_of_pods_state_Unknown=${number_of_pods_state_Unknown}
 ${number_of_pods_per_node}
 ${number_of_pods_per_ns}
 ${number_of_endpoints_each_service}
 k8s_services,group=k8s_cluster_metrics number_of_services_total=${number_of_services_total},number_of_endpoints_total=${number_of_endpoints_total}
 k8s_number_of_API_instances,group=k8s_cluster_metrics value=${number_of_API_instances}
 k8s_number_of_controllers,group=k8s_cluster_metrics value=${number_of_controllers}
 k8s_number_of_scheduler_instances,group=k8s_cluster_metrics value=${number_of_scheduler_instances}
 k8s_cluster_resources,group=k8s_cluster_metrics cpu_total=${cluster_resources_CPU},ram_total=${cluster_resources_RAM}
 EOF
 # cleanup
 rm -f $node_file $pods_file $endpoints_file
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/list_openstack_processes.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/list_openstack_processes.sh
@@ -0,0 +1,15 @@
 #!/bin/bash
 export LANG=C
 PS_ALL=$(ps --no-headers -A -o command | grep -vE '(sh|bash)')
 M_NAME=system_openstack_list
 MARIADB=$(echo "${PS_ALL}" | grep 'mariadb' | wc -l)
 RABBITMQ=$(echo "${PS_ALL}" | grep 'rabbitmq' | wc -l)
 KEYSTONE=$(echo "${PS_ALL}" | grep 'keystone' | wc -l)
 GLANCE=$(echo "${PS_ALL}" | grep -E '(glance-api|glance-registry)' | wc -l)
 CINDER=$(echo "${PS_ALL}" | grep 'cinder' | wc -l)
 NOVA=$(echo "${PS_ALL}" | grep -E '(nova-api|nova-conductor|nova-consoleauth|nova-scheduler)' | wc -l)
 NEUTRON=$(echo "${PS_ALL}" | grep -E '(neutron-server|neutron-metadata-agent|neutron-dhcp-agent|neutron-l3-agent|neutron-openvswitch-agent)' | wc -l)
 OPENVSWITCH=$(echo "${PS_ALL}" | grep -E '(ovsdb-server|ovs-vswitchd|ovsdb-client)' | wc -l)
 echo "${M_NAME} mariadb=${MARIADB},rabbitmq=${RABBITMQ},keystone=${KEYSTONE},glance=${GLANCE},cinder=${CINDER},nova=${NOVA},neutron=${NEUTRON},openvswitch=${OPENVSWITCH}"
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/memory_bandwidth.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/memory_bandwidth.sh
@@ -0,0 +1,7 @@
 #!/bin/bash
 # Output in MB/s
 # echo 0 > /proc/sys/kernel/nmi_watchdog
 # modprobe msr
 export LANG=C
 MEM_BW=$(sudo /opt/telegraf/bin/pcm-memory-one-line.x /csv 1 2>/dev/null | tail -n 1 | awk '{print $28}')
 echo "system_memory bandwidth=${MEM_BW}"
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/network_tcp_queue.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/network_tcp_queue.sh
@@ -0,0 +1,13 @@
 #!/bin/bash
 export LANG=C
 IFS='
 '
 SUM_RESV_Q=0
 SUM_SEND_Q=0
 for i in $(netstat -4 -n); do
   RESV_Q=$(echo $i | awk '{print $2}')
   SEND_Q=$(echo $i | awk '{print $3}')
   SUM_RESV_Q=$((${SUM_RESV_Q} + ${RESV_Q}))
   SUM_SEND_Q=$((${SUM_SEND_Q} + ${SEND_Q}))
 done
 echo "system_tcp_queue sum_recv=${SUM_RESV_Q},sum_send=${SUM_SEND_Q}"
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/numa_stat_per_pid.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/numa_stat_per_pid.sh
@@ -0,0 +1,22 @@
 #!/bin/bash
 set -o nounset                              # Treat unset variables as an error
 #set -x
 export LANG=C
 if [ ! -d '/sys/devices/system/node' ]; then
   # This host does not have NUMA
   exit 44
 fi
 ALL_PROCESS="$(ps --no-headers -A -o pid,ucomm)"
 for i in $(echo "${ALL_PROCESS}" | awk '{print $1}'); do
   if [ -f "/proc/$i/numa_maps" ]; then
     NUM_STAT=$(numastat -p $i)
     PROC_NAME=$(echo "${ALL_PROCESS}" | grep -E "( $i |^$i )" | awk '{print $2}')
     echo "${NUM_STAT}" | grep Huge | awk -v p=$i -v n=$PROC_NAME \
      '{printf "system_numa_memory_per_pid,pid="p",name="n" memory_huge="$NF","}'
     echo "${NUM_STAT}" | grep Heap | awk '{printf "memory_heap="$NF","}'
     echo "${NUM_STAT}" | grep Stack | awk '{printf "memory_stack="$NF","}'
     echo "${NUM_STAT}" | grep Private | awk '{print "memory_private="$NF}'
   fi
 done
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/osapitime.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/osapitime.sh
@@ -0,0 +1,215 @@
 #!/bin/bash
 # Variables declaration
 WORKDIR="$(cd "$(dirname ${0})" && pwd)"
 OS_LOG_PARSER="${WORKDIR}/glog.sh"
 TMPDATADIR="${WORKDIR}/data"
 TMP_METRICS="${TMPDATADIR}/allmetrics.tmp"
 MODE="${MODE:-bg}"
 SCRIPT_LOG_DIR="${WORKDIR}/logs"
 SCRIPT_LOG_FILE="${SCRIPT_LOG_DIR}/run_results_$(date +%Y-%m-%d).log"
 SCRIPT_LOG_LVL=2
 K8S_NS="${K8S_NS:-ccp}"
 declare -a OSCONTROLLER=(
 'cinder-api:1,2,21'
 'glance-api:1,2,22'
 'heat-api:1,2,22'
 'neutron-metadata-agent:1,2,17'
 'neutron-server:1,2,22'
 'nova-api:1,2,21'
 'keystone:4,5,11'
 )
 declare -a OSCOMPUTE=(
 'nova-compute:'
 )
 # crete subfolder under working directory
 function mk_dir()
 {
  local newdir="${TMPDATADIR}/${1}"
  if [ ! -d "${newdir}" ]; then
    mkdir -p ${newdir}
  fi
 }
 # log function
 function log()
 {
  local input
  local dtstamp
  input="$*"
  dtstamp="$(date +%Y-%m-%d_%H%M%S)"
  if [ ! -d "${SCRIPT_LOG_DIR}" ]; then
    mkdir -p "${SCRIPT_LOG_DIR}"
  fi
  case "${SCRIPT_LOG_LVL}" in
    3)
      if [ ! -z "${input}" ]; then
        echo "${dtstamp}: ${input}" | tee -a "${SCRIPT_LOG_FILE}"
      fi
      ;;
    2)
      if [ ! -z "${input}" ]; then
        echo "${dtstamp}: ${input}" >> "${SCRIPT_LOG_FILE}"
      fi
      ;;
    1)
      if [ ! -z "${input}" ]; then
        echo "${dtstamp}: ${input}"
      fi
      ;;
    *)
      ;;
  esac
 }
 # get roles according to predefined in OSCONTROLLER & OSCOMPUTE
 function get_role()
 {
  local role
  local input
  local arr_name
  local arr_name_fields
  role=${1}
  shift
  input=$*
  case ${role} in
    "controller")
      for i in $(seq 0 $(( ${#OSCONTROLLER[@]} - 1)))
      do
        arr_name=$(echo ${OSCONTROLLER[${i}]} | cut -d":" -f1)
        arr_name_fields=$(echo ${OSCONTROLLER[${i}]} | cut -d":" -f2)
        if [[ "${arr_name}" == "${input}" ]]; then
          echo "${arr_name_fields}"
          return 0
        fi
      done
      ;;
    "compute")
      for i in $(seq 0 $(( ${#OSCOMPUTE[@]} - 1)))
      do
        arr_name=$(echo ${OSCOMPUTE[${i}]} | cut -d":" -f1)
        arr_name_fields=$(echo ${OSCOMPUTE[${i}]} | cut -d":" -f2)
        if [ "${arr_name}" == "${input}" ]; then
          echo "${arr_name_fields}"
          return 0
        fi
      done
      ;;
  esac
  return 1
 }
 # diff in seconds
 function tdiff()
 {
  local now
  local datetime
  local result
  datetime="$(date -d "${1}" +%s)"
  now="$(date +%s)"
  result=$(( ${now} - ${datetime} ))
  echo ${result}
 }
 # lock file function
 function glock()
 {
  local action
  local lockfile
  local accessdate
  local old_in_sec=120
  action="${1}"
  # lockfile="${TMP_METRICS}.lock"
  lockfile="${TMPDATADIR}/allmetrics.tmp.lock"
  if [[ "${action}" == "lock" && ! -e "${lockfile}" ]]; then
    touch "${lockfile}"
  elif [[ "${action}" == "lock" && -e "${lockfile}" ]]; then
    accessdate="$(stat ${lockfile} | grep Modify | cut -d' ' -f2,3)"
    if [ "$(tdiff "${accessdate}")" -ge "${old_in_sec}" ]; then
      rm "${lockfile}"
      touch "${lockfile}"
    else
      log "Lock file ${lockfile} exists!"
      return 1
    fi
  else
    rm "${lockfile}"
  fi
  return 0
 }
 # wait for parcers launched in backgroud mode
 function gatherchildren()
 {
  local childrencount
  while true
  do
    childrencount=$(ps axf| grep ${OS_LOG_PARSER} | grep -v grep | wc -l)
    if [ "${childrencount}" -eq 0 ]; then
      return
    fi
    log "Children running ${childrencount}."
    sleep 1
  done
 }
 # list of running contaners
 function get_k8s_containers()
 {
  local cont_host
  local cont_pod
  local cont_name
  local cont_id
  local os_log_fields
  local cont_tmp_dir
  local _raw_data
  glock "lock"
  if [ "$?" -ne 0 ]; then exit 1;fi
  #echo '[' > ${TMP_METRICS}
  _raw_data="${TMPDATADIR}/._raw_data"
  rm -rf ${_raw_data}
  kubectl get pods -n "${K8S_NS}" -o 'go-template={{range .items}}{{if or (ne .status.phase "Succeeded")  (eq .status.phase "Running")}}{{.spec.nodeName}},{{.metadata.name}},{{range .status.containerStatuses}}{{.name}},{{.containerID}}{{end}}{{"\n"}}{{end}}{{end}}' > ${_raw_data}
  for data in $(cat ${_raw_data})
  do
    cont_host=$(echo ${data} | cut -d',' -f1)
    cont_pod=$(echo ${data} | cut -d',' -f2)
    cont_name=$(echo ${data} | cut -d',' -f3)
    cont_id=$(echo ${data} | cut -d',' -f4 | sed 's|^docker://||')
    cont_tmp_dir="${cont_host}_${cont_pod}_${cont_name}"
    os_log_fields=$(get_role "controller" "${cont_name}")
    if [ "$?" -eq 0  ]; then
      mk_dir "${cont_tmp_dir}"
      export K8S_NS=${K8S_NS}
      export TMP_DIR=${TMPDATADIR}/${cont_tmp_dir}
      # export TMP_METRICS=${TMP_METRICS}
      export TMP_METRICS="${TMPDATADIR}/results/${cont_pod}.tmp"
      export CONTID=${cont_id}
      export CONTAINER=${cont_name}
      export HOST=${cont_host}
      export POD=${cont_pod}
      export OS_LOG_FIELDS=${os_log_fields}
      log "MODE=${MODE} CONTID=${cont_id} TMP_METRICS=${TMP_METRICS} ROLE=controller HOST=${cont_host} POD=${cont_pod} CONTAINER=${cont_name} OS_LOG_FIELDS=${os_log_fields} TMP_DIR=${TMPDATADIR}/${cont_tmp_dir}  K8S_NS=${K8S_NS} ${OS_LOG_PARSER}"
      if [[ "${MODE}" == "bg" ]]; then
        log "${cont_pod} ${cont_name} ${cont_id}"
        ${OS_LOG_PARSER} &
      else
        ${OS_LOG_PARSER}
      fi
    unset TMP_METRICS
    unset CONTID
    unset CONTAINER
    unset POD
    unset OS_LOG_FIELDS
    unset HOST
    fi
    # os_log_fields=$(get_role "compute" "${cont_name}")
    # if [ "$?" -eq 0 ]; then
    #   mk_dir "${cont_tmp_dir}"
    #   log "ROLE=compute HOST=${cont_host} POD=${cont_pod} CONTAINER=${cont_name} OS_LOG_FIELDS=${os_log_fields} TMP_DIR=${TMPDATADIR}/${cont_tmp_dir} K8S_NS=${K8S_NS} ${OS_LOG_PARSER}"
    # fi
  done
  gatherchildren
  if [ "$(ls ${TMPDATADIR}/results/ | wc -l)" -gt 0 ]; then
    cat ${TMPDATADIR}/results/*.tmp
    log "Resulting lines $(cat ${TMPDATADIR}/results/*.tmp | wc -l)"
    rm -rf ${TMPDATADIR}/results/*
  fi
  glock "unlock"
 }
 # Main logic
 mk_dir
 mk_dir "results"
 get_k8s_containers
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/per_process_cpu_usage.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/per_process_cpu_usage.sh
@@ -0,0 +1,6 @@
 #!/bin/bash
 export LANG=C
 for i in $(ps --no-headers -A -o pid); do
   pidstat -p $i | tail -n 1 | grep -v PID | awk '{print "system_per_process_cpu_usage,process="$9" user="$4",system="$5}'
 done
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/vmtime.sh
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/scripts/vmtime.sh
@@ -0,0 +1,12 @@
 #!/bin/bash
 #
 WORKDIR="$(cd "$(dirname ${0})" && pwd)"
 SCRIPT="${WORKDIR}/$(basename ${0})"
 MYSQLUSER="nova"
 MYSQPASSWD="password"
 MYSQLHOST="mariadb.ccp"
 avgdata=$(mysql -u${MYSQLUSER} -p${MYSQPASSWD} -h ${MYSQLHOST} -D nova --skip-column-names --batch -e "select diff from (select avg(unix_timestamp(launched_at) - unix_timestamp(created_at)) as diff from instances where vm_state != 'error' and launched_at >= subtime(now(),'30')) t1 where diff IS NOT NULL;" 2>/dev/null | sed 's/\t/,/g';)
 if [ ! -z "${avgdata}" ]; then
  echo "vm_spawn_avg_time timediffinsec=${avgdata}"
 fi
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/telegraf-openstack.conf.j2
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/telegraf-openstack.conf.j2
@@ -0,0 +1,116 @@
 [global_tags]
 metrics_source="system_openstack"
 [agent]
  interval = "10s"
  round_interval = true
  metric_batch_size = 1000
  metric_buffer_limit = 10000
  collection_jitter = "0s"
  flush_interval = "15s"
  flush_jitter = "5s"
  precision = ""
  debug = false
  quiet = false
  hostname = ""
  omit_hostname = false
 [[outputs.prometheus_client]]
    listen = ":9126"
 [[inputs.cpu]]
  percpu = true
  totalcpu = true
  fielddrop = ["time_*"]
 [[inputs.disk]]
  ignore_fs = ["tmpfs", "devtmpfs"]
 [[inputs.diskio]]
 [[inputs.kernel]]
 [[inputs.mem]]
 [[inputs.processes]]
 [[inputs.swap]]
 [[inputs.system]]
 [[inputs.kernel_vmstat]]
 [[inputs.net]]
 [[inputs.netstat]]
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/vmtime.sh",
    ]
  timeout = "30s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "30s"
  commands = [
        "/opt/telegraf/bin/osapitime.sh",
    ]
  timeout = "60s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/etcd_get_metrics.sh"
    ]
  timeout = "30s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/k8s_get_metrics.sh"
    ]
  timeout = "30s"
  data_format = "influx"
 [[inputs.openstack]]
  interval = '40s'
  identity_endpoint = "http://keystone.ccp.svc.cluster.local:5000/v3"
  domain = "default"
  project = "admin"
  username = "admin"
  password = "password"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/iostat_per_device.sh"
    ]
  timeout = "30s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/per_process_cpu_usage.sh"
    ]
  timeout = "30s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/entropy.sh"
    ]
  timeout = "30s"
  data_format = "influx"
  [[inputs.exec]]
  interval = "60s"
  commands = [
        "/opt/telegraf/bin/numa_stat_per_pid.sh"
    ]
  timeout = "60s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/memory_bandwidth.sh"
    ]
  timeout = "30s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/list_openstack_processes.sh"
    ]
  timeout = "30s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/network_tcp_queue.sh"
    ]
  timeout = "30s"
  data_format = "influx"
--- a/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/telegraf-sys.conf
+++ b/doc/source/methodologies/monitoring/configs/prometheus-grafana-telegraf/telegraf/telegraf-sys.conf
@@ -0,0 +1,81 @@
 [global_tags]
 metrics_source="system"
 [agent]
  interval = "10s"
  round_interval = true
  metric_batch_size = 1000
  metric_buffer_limit = 10000
  collection_jitter = "0s"
  flush_interval = "15s"
  flush_jitter = "5s"
  precision = ""
  debug = false
  quiet = false
  hostname = ""
  omit_hostname = false
 [[outputs.prometheus_client]]
    listen = ":9126"
 [[inputs.cpu]]
  percpu = true
  totalcpu = true
  fielddrop = ["time_*"]
 [[inputs.disk]]
  ignore_fs = ["tmpfs", "devtmpfs"]
 [[inputs.diskio]]
 [[inputs.kernel]]
 [[inputs.mem]]
 [[inputs.processes]]
 [[inputs.swap]]
 [[inputs.system]]
 [[inputs.kernel_vmstat]]
 [[inputs.net]]
 [[inputs.netstat]]
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/iostat_per_device.sh"
    ]
  timeout = "30s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/per_process_cpu_usage.sh"
    ]
  timeout = "30s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/entropy.sh"
    ]
  timeout = "30s"
  data_format = "influx"
  [[inputs.exec]]
  interval = "60s"
  commands = [
        "/opt/telegraf/bin/numa_stat_per_pid.sh"
    ]
  timeout = "60s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/memory_bandwidth.sh"
    ]
  timeout = "30s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/list_openstack_processes.sh"
    ]
  timeout = "30s"
  data_format = "influx"
 [[inputs.exec]]
  interval = "15s"
  commands = [
        "/opt/telegraf/bin/network_tcp_queue.sh"
    ]
  timeout = "30s"
  data_format = "influx"
--- a/doc/source/methodologies/monitoring/index.rst
+++ b/doc/source/methodologies/monitoring/index.rst
@@ -0,0 +1,948 @@
 .. _Methodology_for_Containerized_Openstack_Monitoring:
 **************************************************
 Methodology for Containerized Openstack Monitoring
 **************************************************
 :Abstract:
  This document describes one of the Containerized Openstack monitoring solutions
  to provide scalable and comprehensive architecture and obtain all crucial performance
  metrics on each structure layer.
 Containerized Openstack Monitoring Architecture
 ===============================================
  This part of documentation describes required performance metrics in each
  distinguished Containerized Openstack layer.
 Containerized Openstack comprises three layers where Monitoring System should
 be able to query all necessary counters:
 - OS layer
 - Kubernetes layer
 - Openstack layer
 Monitoring instruments must be logically divided in two groups:
 - Monitoring Server Side
 - Node Client Side
 Operation System Layer
 ----------------------
 We were using Ubuntu Xenial on top of bare-metal servers for both server and node side.
 Baremetal hardware description
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 We deployed everything at 200 servers environment with following hardware characteristics:
 .. table::
  +-------+----------------+------------------------+
  |server |vendor,model    |HP,DL380 Gen9           |
  +-------+----------------+------------------------+
  |CPU    |vendor,model    |Intel,E5-2680 v3        |
  |       +----------------+------------------------+
  |       |processor_count |2                       |
  |       +----------------+------------------------+
  |       |core_count      |12                      |
  |       +----------------+------------------------+
  |       |frequency_MHz   |2500                    |
  +-------+----------------+------------------------+
  |RAM    |vendor,model    |HP,752369-081           |
  |       +----------------+------------------------+
  |       |amount_MB       |262144                  |
  +-------+----------------+------------------------+
  |NETWORK|interface_name  |p1p1                    |
  |       +----------------+------------------------+
  |       |vendor,model    |Intel,X710 Dual Port    |
  |       +----------------+------------------------+
  |       |bandwidth       |10G                     |
  +-------+----------------+------------------------+
  |STORAGE|dev_name        |/dev/sda                |
  |       +----------------+------------------------+
  |       |vendor,model    | | raid10 - HP P840     |
  |       |                | | 12 disks EH0600JEDHE |
  |       +----------------+------------------------+
  |       |SSD/HDD         |HDD                     |
  |       +----------------+------------------------+
  |       |size            | 3,6TB                  |
  +-------+----------------+------------------------+
 Operating system configuration
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Baremetal nodes were provisioned with Cobbler with our in-home preseed scripts.
 OS versions we used:
 .. table:: Versions Operating Systems
  +--------------------+-----------------------------------------+
  |Software            |Version                                  |
  +--------------------+-----------------------------------------+
  |Ubuntu              |Ubuntu 16.04.1 LTS                       |
  +--------------------+-----------------------------------------+
  |Kernel              |4.4.0-47-generic                         |
  +--------------------+-----------------------------------------+
 You can find /etc folder contents from the one of the typical system we were using:
 :download:`etc_tarball <configs/node1.tar.gz>`
 Required system metrics
 ^^^^^^^^^^^^^^^^^^^^^^^
 At this layer we must get this list of processes:
 .. table::
  +------------------------+-----------------------------------------+
  |List of processes       |Mariadb                                  |
  |                        +-----------------------------------------+
  |                        |Rabbitmq                                 |
  |                        |-----------------------------------------+
  |                        |Keystone                                 |
  |                        +-----------------------------------------+
  |                        |Glance                                   |
  |                        +-----------------------------------------+
  |                        |Cinder                                   |
  |                        +-----------------------------------------+
  |                        |Nova                                     |
  |                        +-----------------------------------------+
  |                        |Neutron                                  |
  |                        +-----------------------------------------+
  |                        |Openvswitch                              |
  |                        +-----------------------------------------+
  |                        |Kubernetes                               |
  +------------------------+-----------------------------------------+
 And following list of metrics:
 .. table::
  +------------------------+-----------------------------------------+
  |Node load average       |1min                                     |
  |                        +-----------------------------------------+
  |                        |5min                                     |
  |                        |-----------------------------------------+
  |                        |15min                                    |
  +------------------------+-----------------------------------------+
  |Global process stats    |Running                                  |
  |                        +-----------------------------------------+
  |                        |Stopped                                  |
  |                        |-----------------------------------------+
  |                        |Waiting                                  |
  +------------------------+-----------------------------------------+
  |Global CPU Usage        | Steal                                   |
  |                        +-----------------------------------------+
  |                        | Wait                                    |
  |                        +-----------------------------------------+
  |                        | User                                    |
  |                        +-----------------------------------------+
  |                        | System                                  |
  |                        +-----------------------------------------+
  |                        | Interrupt                               |
  |                        +-----------------------------------------+
  |                        | Nice                                    |
  |                        +-----------------------------------------+
  |                        | Idle                                    |
  +------------------------+-----------------------------------------+
  |Per CPU Usage           | User                                    |
  |                        +-----------------------------------------+
  |                        | System                                  |
  +------------------------+-----------------------------------------+
  |Global memory usage     |bandwidth                                |
  |                        +-----------------------------------------+
  |                        |Cached                                   |
  |                        +-----------------------------------------+
  |                        |Buffered                                 |
  |                        +-----------------------------------------+
  |                        |Free                                     |
  |                        +-----------------------------------------+
  |                        |Used                                     |
  |                        +-----------------------------------------+
  |                        |Total                                    |
  +------------------------+-----------------------------------------+
  |Numa monitoring         |Numa_hit                                 |
  |For each node           +-----------------------------------------+
  |                        |Numa_miss                                |
  |                        |-----------------------------------------+
  |                        |Numa_foreign                             |
  |                        +-----------------------------------------+
  |                        |Local_node                               |
  |                        +-----------------------------------------+
  |                        |Other_node                               |
  +------------------------+-----------------------------------------+
  |Numa monitoring         |Huge                                     |
  |For each pid            +-----------------------------------------+
  |                        |Heap                                     |
  |                        |-----------------------------------------+
  |                        |Stack                                    |
  |                        +-----------------------------------------+
  |                        |Private                                  |
  +------------------------+-----------------------------------------+
  |Global IOSTAT \+        |Merge reads /s                           |
  |Per device IOSTAT       +-----------------------------------------+
  |                        |Merge write /s                           |
  |                        +-----------------------------------------+
  |                        |read/s                                   |
  |                        +-----------------------------------------+
  |                        |write/s                                  |
  |                        +-----------------------------------------+
  |                        |Read transfer                            |
  |                        +-----------------------------------------+
  |                        |Write transfer                           |
  |                        +-----------------------------------------+
  |                        |Read latency                             |
  |                        +-----------------------------------------+
  |                        |Write latency                            |
  |                        +-----------------------------------------+
  |                        |Write transfer                           |
  |                        +-----------------------------------------+
  |                        |Queue size                               |
  |                        +-----------------------------------------+
  |                        |Await                                    |
  +------------------------+-----------------------------------------+
  |Network per interface   |Octets /s (in, out)                      |
  |                        +-----------------------------------------+
  |                        |Packet /s (in, out)                      |
  |                        |-----------------------------------------+
  |                        |Dropped /s                               |
  +------------------------+-----------------------------------------+
  |Other system metrics    |Entropy                                  |
  |                        +-----------------------------------------+
  |                        |DF per device                            |
  +------------------------+-----------------------------------------+
 Kubernetes Layer
 ----------------
 `Kargo`_ from `Fuel-CCP-installer`_ was our main tool to deploy K8S
 on top of provisioned systems (monitored nodes).
  Kargo sets up Kubernetes in the following way:
  - masters: Calico, Kubernetes API services
  - nodes: Calico, Kubernetes minion services
  - etcd: etcd service
 Kargo deployment parameters
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 You can find Kargo deployment script in `Kargo deployment script`_ section
 .. code:: bash
  docker_options: "--insecure-registry 172.20.8.35:5000 -D"
  upstream_dns_servers: [172.20.8.34, 8.8.4.4]
  nameservers: [172.20.8.34, 8.8.4.4]
  kube_service_addresses: 10.224.0.0/12
  kube_pods_subnet: 10.240.0.0/12
  kube_network_node_prefix: 22
  kube_apiserver_insecure_bind_address: "0.0.0.0"
  dns_replicas: 3
  dns_cpu_limit: "100m"
  dns_memory_limit: "512Mi"
  dns_cpu_requests: "70m"
  dns_memory_requests: "70Mi"
  deploy_netchecker: false
 .. table::
  +----------------------+-----------------------------------------+
  |Software              |Version                                  |
  +----------------------+-----------------------------------------+
  |`Fuel-CCP-Installer`_ |6fd81252cb2d2c804f388337aa67d4403700f094 |
  |                      |                                         |
  +----------------------+-----------------------------------------+
  |`Kargo`_              |2c23027794d7851ee31363c5b6594180741ee923 |
  +----------------------+-----------------------------------------+
 Required K8S metrics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Here we should get K8S health
 metrics and ETCD performance metrics:
 .. table::
  +------------------------+-----------------------------------------+
  |ETCD performance metrics|members count / states                   |
  |                        +-----------------------------------------+
  |                        |numbers of keys in a cluster             |
  |                        |-----------------------------------------+
  |                        |Size of data set                         |
  |                        +-----------------------------------------+
  |                        |Avg. latency from leader to followers    |
  |                        +-----------------------------------------+
  |                        |Bandwidth rate, send/receive             |
  |                        +-----------------------------------------+
  |                        |Create store success/fail                |
  |                        +-----------------------------------------+
  |                        |Get success/fail                         |
  |                        +-----------------------------------------+
  |                        |Set success/fail                         |
  |                        +-----------------------------------------+
  |                        |Package rate, send/receive               |
  |                        +-----------------------------------------+
  |                        |Expire count                             |
  |                        +-----------------------------------------+
  |                        |Update success/fail                      |
  |                        +-----------------------------------------+
  |                        |Compare-and-swap success/fail            |
  |                        +-----------------------------------------+
  |                        |Watchers                                 |
  |                        +-----------------------------------------+
  |                        |Delete success/fail                      |
  |                        +-----------------------------------------+
  |                        |Compare-and-delete success/fail          |
  |                        +-----------------------------------------+
  |                        |Append req, send/ receive                |
  +------------------------+-----------------------------------------+
  |K8S health metrics      |Number of node in each state             |
  |                        +-----------------------------------------+
  |                        |Total number of namespaces               |
  |                        +-----------------------------------------+
  |                        |Total number of PODs per cluster,node,ns |
  |                        +-----------------------------------------+
  |                        |Total of number of services              |
  |                        +-----------------------------------------+
  |                        |Endpoints in each service                |
  |                        +-----------------------------------------+
  |                        |Number of API service instances          |
  |                        +-----------------------------------------+
  |                        |Number of controller instances           |
  |                        +-----------------------------------------+
  |                        |Number of scheduler instances            |
  |                        +-----------------------------------------+
  |                        |Cluster resources, scheduler view        |
  +------------------------+-----------------------------------------+
  |K8S API log analysis    |Number of responses (per each HTTP code) |
  |                        +-----------------------------------------+
  |                        |Response Time                            |
  +------------------------+-----------------------------------------+
 For last two metrics we should utilize log collector to store and parse all
 log records within K8S environments.
 Openstack Layer
 -----------------
 CCP stands for "Containerized Control Plane". CCP aims to build, run and manage
 production-ready OpenStack containers on top of Kubernetes cluster.
 .. table::
  +--------------------+-----------------------------------------+
  |Software            |Version                                  |
  +--------------------+-----------------------------------------+
  |`Fuel-CCP`_         |8570d0e0e512bd16f8449f0a10b1e3900fd09b2d |
  +--------------------+-----------------------------------------+
 CCP configuration
 ^^^^^^^^^^^^^^^^^
 CCP was deployed on top of 200 nodes K8S cluster in the following configuration:
 .. code-block:: yaml
  node[1-3]: Kubernetes
  node([4-6])$: # 4-6
    roles:
      - controller
      - openvswitch
  node[7-9]$: # 7-9
    roles:
      - rabbitmq
  node10$: # 10
    roles:
      - galera
  node11$: # 11
    roles:
      - heat
  node(1[2-9])$: # 12-19
    roles:
      - compute
      - openvswitch
  node[2-9][0-9]$: # 20-99
    roles:
      - compute
      - openvswitch
  node(1[0-9][0-9])$: # 100-199
    roles:
      - compute
      - openvswitch
  node200$:
    roles:
      - backup
 CCP Openstack services list ( `versions.yaml`_ ):
 .. code-block:: yaml
  openstack/cinder:
    git_ref: stable/newton
    git_url: https://github.com/openstack/cinder.git
  openstack/glance:
    git_ref: stable/newton
    git_url: https://github.com/openstack/glance.git
  openstack/heat:
    git_ref: stable/newton
    git_url: https://github.com/openstack/heat.git
  openstack/horizon:
    git_ref: stable/newton
    git_url: https://github.com/openstack/horizon.git
  openstack/keystone:
    git_ref: stable/newton
    git_url: https://github.com/openstack/keystone.git
  openstack/neutron:
    git_ref: stable/newton
    git_url: https://github.com/openstack/neutron.git
  openstack/nova:
    git_ref: stable/newton
    git_url: https://github.com/openstack/nova.git
  openstack/requirements:
    git_ref: stable/newton
    git_url: https://git.openstack.org/openstack/requirements.git
  openstack/sahara-dashboard:
    git_ref: stable/newton
    git_url: https://git.openstack.org/openstack/sahara-dashboard.git
 `K8S Ingress Resources`_ rules were enabled during CCP deployment to expose Openstack services
 endpoints to external routable network.
 See CCP deployment script and configuration files in the
 `CCP deployment and configuration files`_ section.
 Required Openstack-related metrics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 At this layer we should get openstack environment metrics,
 API and resources utilization metrics.
 .. table:: Versions of CCP-related software
  +------------------------+-----------------------------------------+
  |Openstack metrics       |Total number of controller nodes         |
  |                        +-----------------------------------------+
  |                        |Total number of services                 |
  |                        |-----------------------------------------+
  |                        |Total number of compute nodes            |
  |                        +-----------------------------------------+
  |                        |Total number of nodes                    |
  |                        +-----------------------------------------+
  |                        |Total number of VMs                      |
  |                        +-----------------------------------------+
  |                        |Number of VMs per tenant, per node       |
  |                        +-----------------------------------------+
  |                        |Resource utilization per project,service |
  |                        +-----------------------------------------+
  |                        |Total number of tenants                  |
  |                        +-----------------------------------------+
  |                        |API request time                         |
  |                        +-----------------------------------------+
  |                        |Mean time to spawn VM                    |
  +------------------------+-----------------------------------------+
 Implementation
 ==============
 This part of documentation describes Monitoring System implementation.
 Here is software list that we chose to realize all required tasks:
 .. table::
  +-----------------------------------------+-----------------------------------------+
  |Monitoring Node Server Side              |Monitored Node Client Side               |
  +--------------------+--------------------+--------------------+--------------------+
  |Metrics server      |Log storage         |Metrics agent       |Log collector       |
  |                    |                    |                    |                    |
  +--------------------+--------------------+--------------------+--------------------+
  |  `Prometheus`_ \+  | `ElasticSearch`_   |`Telegraf`_         | `Heka`_            |
  |  `Grafana`_        | \+ `Kibana`_       |                    |                    |
  +--------------------+--------------------+--------------------+--------------------+
 Server Side Software
 ---------------------
 Prometheus
 ^^^^^^^^^^
 .. table::
  +--------------------+-----------------------------------------+
  |Software            |Version                                  |
  +--------------------+-----------------------------------------+
  |`Prometheus GitHub`_|7e369b9318a4d5d97a004586a99f10fa51a46b26 |
  +--------------------+-----------------------------------------+
 Due to high load rate we faced an issue with Prometheus performance at metrics count up to 15 millions.
 We split Prometheus setup in 2 standalone nodes. First node - to poll API metrics from K8S-related services
 that natively available at `/metrics` uri and exposed by K8S API and ETCD API by default.
 Second node - to store all other metrics that should be collected and calculated locally on environment
 servers via Telegraf.
 Prometheus nodes deployments scripts and configuration files could be found at `Prometheus deployment and configuration files`_ section
 Grafana
 ^^^^^^^
 .. table::
  +--------------------+-----------------------------------------+
  |Software            |Version                                  |
  +--------------------+-----------------------------------------+
  |`Grafana`_          |v4.0.1                                   |
  +--------------------+-----------------------------------------+
 Grafana was used as a metrics visualizer with several dashboards for each metrics group.
 Separate individual dashboards were built for each group of metrics:
 - System nodes metrics
 - Kubernetes metrics
 - ETCD metrics
 - Openstack metrics
 You can find their setting at `Grafana dashboards configuration`_
 Grafana server deployment script:
 .. code-block:: bash
  #!/bin/bash
  ansible-playbook -i ./hosts ./deploy-graf-prom.yaml --tags "grafana"
 It uses the same yaml configuration file `deploy-graf-prom.yaml`_ from `Prometheus deployment and configuration files`_ section.
 ElasticSearch
 ^^^^^^^^^^^^^
 .. table::
  +--------------------+-----------------------------------------+
  |Software            |Version                                  |
  +--------------------+-----------------------------------------+
  |`ElasticSearch`_    |2.4.2                                    |
  +--------------------+-----------------------------------------+
 ElasticSearch is well-known proven log storage and we used it as a standalone
 node for collecting Kubernetes API logs and all other logs from containers across environment.
 For appropriate performance at 200 nodes lab we increased `ES_HEAP_SIZE` from default 1G to 10G
 in /etc/default/elasticsearch configuration file.
 Elastic search and Kibana dashboard were installed with
 `deploy_elasticsearch_kibana.sh`_ deployment script.
 Kibana
 ^^^^^^
 .. table::
  +--------------------+-----------------------------------------+
  |Software            |Version                                  |
  +--------------------+-----------------------------------------+
  |`Kibana`_           |4.5.4                                    |
  +--------------------+-----------------------------------------+
 We used Kibana as a main visualization tool for Elastic Search. We were able to create chart
 graphs based on K8S API logs analysis. Kibana was installed on a single separate node
 with a single dashboard representing K8S API Response time graph.
 Dashboard settings:
 :download:`Kibana_dashboard.json <configs/dashboards/Kibana_dashboard.json>`
 Client side Software
 --------------------
 Telegraf
 ^^^^^^^^
 .. table::
  +--------------------+-----------------------------------------+
  |Software            |Version                                  |
  +--------------------+-----------------------------------------+
  |`Telegraf`_         |v1.0.0-beta2-235-gbc14ac5                |
  |                    |git: openstack_stats                     |
  |                    |bc14ac5b9475a59504b463ad8f82ed810feed3ec |
  +--------------------+-----------------------------------------+
 Telegraf was chosen as client-side metrics agent. It provides multiple ways to poll and calculate from variety of
 different sources. With regard to its plugin-driven nature, it takes data from different inputs and
 exposes calculated metrics in Prometheus format. We used forked version of Telegraf with custom patches to
 be able to utilize custom Openstack-input plugin:
 - `GitHub Telegraf Fork`_
 - `Go SDK for OpenStack`_
 Following automation scripts and configuration files were used to start Telegraf agent
 across environment nodes.
 `Telegraf deployment and configuration files`_
 Below you can see which plugins were used to obtain metrics.
 Standart Plugins
 """"""""""""""""
 .. code:: bash
  inputs.cpu  CPU
  inputs.disk
  inputs.diskio
  inputs.kernel
  inputs.mem
  inputs.processes
  inputs.swap
  inputs.system
  inputs.kernel_vmstat
  inputs.net
  inputs.netstat
  inputs.exec
 Openstack input plugin
 """"""""""""""""""""""
 `inputs.openstack` custom plugin was used to gather the most of required Openstack-related metrics.
 settings:
 .. code:: bash
  interval = '40s'
  identity_endpoint = "http://keystone.ccp.svc.cluster.local:5000/v3"
  domain = "default"
  project = "admin"
  username = "admin"
  password = "password"
 `System.exec` plugin
 """"""""""""""""""""
 `system.exec` plugin was used to trigger scripts to poll
 and calculate all non-standard metrics.
 common settings:
 .. code:: bash
  interval = "15s"
  timeout = "30s"
  data_format = "influx"
 commands:
 .. code:: bash
  "/opt/telegraf/bin/list_openstack_processes.sh"
  "/opt/telegraf/bin/per_process_cpu_usage.sh"
  "/opt/telegraf/bin/numa_stat_per_pid.sh"
  "/opt/telegraf/bin/iostat_per_device.sh"
  "/opt/telegraf/bin/memory_bandwidth.sh"
  "/opt/telegraf/bin/network_tcp_queue.sh"
  "/opt/telegraf/bin/etcd_get_metrics.sh"
  "/opt/telegraf/bin/k8s_get_metrics.sh"
  "/opt/telegraf/bin/vmtime.sh"
  "/opt/telegraf/bin/osapitime.sh"
 You can see full Telegraf configuration file and its custom input scripts in the
 section `Telegraf deployment and configuration files`_.
 Heka
 ^^^^
 .. table::
  +--------------------+-----------------------------------------+
  |Software            |Version                                  |
  +--------------------+-----------------------------------------+
  |`Heka`_             |0.10.0                                   |
  +--------------------+-----------------------------------------+
 We chose Heka as log collecting agent for its wide variety of inputs
 (possibility to feed data from Docker socket), filters (custom shorthand SandBox filters in LUA language)
 and possibility to encode data for ElasticSearch.
 With Heka agent started across environment servers we were able to send containers' logs to ElasticSearch
 server. With custom LUA filter we extracted K8S API data and convert it in appropriate format to
 visualize API timing counters (Average Response Time).
 Heka deployment scripts and configuration file with LUA custom filter are in
 `Heka deployment and configuration`_ section.
 Applications
 ============
 Kargo deployment script
 -----------------------
 deploy_k8s_using_kargo.sh
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 .. literalinclude:: configs/deploy_k8s_using_kargo.sh
   :language: bash
 CCP deployment and configuration files
 ---------------------------------------
 deploy-ccp.sh
 ^^^^^^^^^^^^^
 .. literalinclude:: configs/ccp/deploy-ccp.sh
    :language: bash
 ccp.yaml
 ^^^^^^^^
 .. literalinclude:: configs/ccp/ccp.yaml
    :language: yaml
 configs.yaml
 ^^^^^^^^^^^^
 .. literalinclude:: configs/ccp/configs.yaml
    :language: yaml
 topology.yaml
 ^^^^^^^^^^^^^
 .. literalinclude:: configs/ccp/topology.yaml
    :language: yaml
 repos.yaml
 ^^^^^^^^^^
 .. literalinclude:: configs/ccp/repos.yaml
    :language: yaml
 versions.yaml
 ^^^^^^^^^^^^^
 .. literalinclude:: configs/ccp/versions.yaml
    :language: yaml
 Prometheus deployment and configuration files
 ---------------------------------------------
 Deployment scripts
 ^^^^^^^^^^^^^^^^^^
 deploy_prometheus.sh
 """"""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/deploy_prometheus.sh
    :language: bash
 deploy-graf-prom.yaml
 """""""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/deploy-graf-prom.yaml
    :language: yaml
 docker_prometheus.yaml
 """"""""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/docker_prometheus.yaml
    :language: yaml
 deploy_etcd_collect.sh
 """"""""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/deploy_etcd_collect.sh
    :language: bash
 Configuration files
 ^^^^^^^^^^^^^^^^^^^
 prometheus-kuber.yml.j2
 """""""""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/prometheus/prometheus-kuber.yml.j2
    :language: bash
 prometheus-system.yml.j2
 """"""""""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/prometheus/prometheus-system.yml.j2
    :language: bash
 targets.yml.j2
 """"""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/prometheus/targets.yml.j2
    :language: bash
 Grafana dashboards configuration
 --------------------------------
 :download:`Systems_nodes_statistics.json <configs/dashboards/Systems_nodes_statistics.json>`
 :download:`Kubernetes_statistics.json <configs/dashboards/Kubernetes_statistics.json>`
 :download:`ETCD.json <configs/dashboards/ETCD.json>`
 :download:`OpenStack.json <configs/dashboards/OpenStack.json>`
 ElasticSearch deployment script
 -------------------------------
 deploy_elasticsearch_kibana.sh
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. literalinclude:: configs/elasticsearch-heka/deploy_elasticsearch_kibana.sh
    :language: bash
 Telegraf deployment and configuration files
 -------------------------------------------
 deploy_telegraf.sh
 ^^^^^^^^^^^^^^^^^^
 .. literalinclude:: configs/prometheus-grafana-telegraf/deploy_telegraf.sh
    :language: bash
 deploy-telegraf.yaml
 ^^^^^^^^^^^^^^^^^^^^
 .. literalinclude:: configs/prometheus-grafana-telegraf/deploy-telegraf.yaml
    :language: yaml
 Telegraf system
 ^^^^^^^^^^^^^^^
 telegraf-sys.conf
 """""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/telegraf-sys.conf
    :language: bash
 Telegraf  openstack
 ^^^^^^^^^^^^^^^^^^^
 telegraf-openstack.conf.j2
 """"""""""""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/telegraf-openstack.conf.j2
    :language: bash
 Telegraf inputs scripts
 ^^^^^^^^^^^^^^^^^^^^^^^
 list_openstack_processes.sh
 """""""""""""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/list_openstack_processes.sh
    :language: bash
 per_process_cpu_usage.sh
 """"""""""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/per_process_cpu_usage.sh
    :language: bash
 numa_stat_per_pid.sh
 """"""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/numa_stat_per_pid.sh
    :language: bash
 iostat_per_device.sh
 """"""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/iostat_per_device.sh
    :language: bash
 memory_bandwidth.sh
 """""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/memory_bandwidth.sh
    :language: bash
 network_tcp_queue.sh
 """"""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/network_tcp_queue.sh
    :language: bash
 etcd_get_metrics.sh
 """""""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/etcd_get_metrics.sh
    :language: bash
 k8s_get_metrics.sh
 """"""""""""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/k8s_get_metrics.sh
    :language: bash
 vmtime.sh
 """""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/vmtime.sh
    :language: bash
 osapitime.sh
 """"""""""""
 .. literalinclude:: configs/prometheus-grafana-telegraf/telegraf/scripts/osapitime.sh
    :language: bash
 Heka deployment and configuration
 ---------------------------------
 Deployment
 ^^^^^^^^^^
 deploy_heka.sh
 """"""""""""""
 .. literalinclude:: configs/elasticsearch-heka/deploy_heka.sh
    :language: bash
 deploy-heka.yaml
 """"""""""""""""
 .. literalinclude:: configs/elasticsearch-heka/deploy-heka.yaml
    :language: yaml
 Configuration
 ^^^^^^^^^^^^^
 00-hekad.toml.j2
 """"""""""""""""
 .. literalinclude:: configs/elasticsearch-heka/heka/00-hekad.toml.j2
    :language: bash
 kubeapi_to_int.lua.j2
 """""""""""""""""""""
 .. literalinclude:: configs/elasticsearch-heka/heka/kubeapi_to_int.lua.j2
    :language: bash
 .. references:
 .. _Fuel-CCP-Installer: https://github.com/openstack/fuel-ccp-installer
 .. _Kargo: https://github.com/kubernetes-incubator/kargo.git
 .. _Fuel-CCP: https://github.com/openstack/fuel-ccp
 .. _Prometheus: https://prometheus.io/
 .. _Prometheus GitHub: https://github.com/prometheus/prometheus
 .. _Grafana: http://grafana.org/
 .. _ElasticSearch: https://www.elastic.co/products/elasticsearch
 .. _Kibana: https://www.elastic.co/products/kibana
 .. _Telegraf: https://www.influxdata.com/time-series-platform/telegraf/
 .. _GitHub Telegraf Fork: https://github.com/spjmurray/telegraf/tree/openstack_stats/plugins/inputs/openstack
 .. _Go SDK for OpenStack: https://github.com/rackspace/gophercloud/
 .. _Heka: https://hekad.readthedocs.io/en/v0.10.0/
 .. _K8S Ingress Resources: http://kubernetes.io/docs/user-guide/ingress/
		`@@ -0,0 +1,2 @@`
							`#!/bin/bash`
							`ansible-playbook -i ./hosts ./deploy-graf-prom.yaml --tags "grafana"`