implement minimal metric collection
This change implements metric collection system using influxdata (influxdb and telegraf) with visulization using grafana. No Dashboard automation is provided at this time however a template dashboard can be used by importing the JSON files from the dashboards directory. Change-Id: I5445b01170054393a31afc2a20ffb3ea4eda1209 Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
This commit is contained in:
parent
3d3a8c0d5d
commit
19255fd1a8
22
cluster_metrics/ansible.cfg
Normal file
22
cluster_metrics/ansible.cfg
Normal file
@ -0,0 +1,22 @@
|
||||
[defaults]
|
||||
# Set the role path
|
||||
roles_path = /etc/ansible/roles:roles
|
||||
|
||||
inventory = /opt/openstack-ansible/playbooks/inventory/dynamic_inventory.py
|
||||
|
||||
# Fact caching
|
||||
gathering = smart
|
||||
fact_caching = jsonfile
|
||||
fact_caching_connection = /etc/openstack_deploy/ansible_facts
|
||||
fact_caching_timeout = 86400
|
||||
|
||||
# Additional plugins
|
||||
action_plugins = /etc/ansible/roles/plugins/action
|
||||
callback_plugins = /etc/ansible/roles/plugins/callback
|
||||
filter_plugins = /etc/ansible/roles/plugins/filter
|
||||
lookup_plugins = /etc/ansible/roles/plugins/lookup
|
||||
library = /etc/ansible/roles/plugins/library
|
||||
|
||||
# Set color options
|
||||
nocolor = 0
|
||||
host_key_checking = False
|
12
cluster_metrics/etc/env.d/cluster_metrics.yml
Normal file
12
cluster_metrics/etc/env.d/cluster_metrics.yml
Normal file
@ -0,0 +1,12 @@
|
||||
---
|
||||
component_skel:
|
||||
cluster-metrics:
|
||||
belongs_to:
|
||||
- cluster-metrics_all
|
||||
|
||||
container_skel:
|
||||
cluster-metrics_container:
|
||||
belongs_to:
|
||||
- log_containers
|
||||
contains:
|
||||
- cluster-metrics
|
0
cluster_metrics/etc/user_metrics.yml
Normal file
0
cluster_metrics/etc/user_metrics.yml
Normal file
23
cluster_metrics/files/kvm_virsh.py
Normal file
23
cluster_metrics/files/kvm_virsh.py
Normal file
@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env python
|
||||
import json
|
||||
import libvirt
|
||||
import socket
|
||||
|
||||
return_data = dict()
|
||||
conn = libvirt.openReadOnly()
|
||||
try:
|
||||
domains = conn.listDomainsID()
|
||||
return_data['kvm_vms'] = len(domains)
|
||||
return_data['kvm_total_vcpus'] = conn.getCPUMap()[0]
|
||||
return_data['kvm_scheduled_vcpus'] = 0
|
||||
for domain in domains:
|
||||
return_data['kvm_scheduled_vcpus'] += conn.lookupByID(
|
||||
domain
|
||||
).maxVcpus()
|
||||
return_data['kvm_host_id'] = abs(hash(socket.getfqdn()))
|
||||
except Exception:
|
||||
raise SystemExit('Plugin failure')
|
||||
else:
|
||||
print(json.dumps(return_data))
|
||||
finally:
|
||||
conn.close()
|
446
cluster_metrics/grafana-dashboards/openstack-aggregates.json
Normal file
446
cluster_metrics/grafana-dashboards/openstack-aggregates.json
Normal file
@ -0,0 +1,446 @@
|
||||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_OSIC_INFLUXDB",
|
||||
"label": "OSIC InfluxDB",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "influxdb",
|
||||
"pluginName": "InfluxDB"
|
||||
}
|
||||
],
|
||||
"__requires": [
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "singlestat",
|
||||
"name": "Singlestat",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "3.1.1"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "influxdb",
|
||||
"name": "InfluxDB",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
],
|
||||
"id": null,
|
||||
"title": "OpenStack Compute Aggregates",
|
||||
"tags": [],
|
||||
"style": "dark",
|
||||
"timezone": "browser",
|
||||
"editable": true,
|
||||
"hideControls": false,
|
||||
"sharedCrosshair": false,
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"editable": true,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": true,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"rgba(204, 85, 16, 0.97)",
|
||||
"rgba(4, 133, 3, 0.89)",
|
||||
"rgba(245, 54, 54, 0.9)"
|
||||
],
|
||||
"datasource": "${DS_OSIC_INFLUXDB}",
|
||||
"editable": true,
|
||||
"error": false,
|
||||
"format": "bytes",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"height": "10px",
|
||||
"id": 3,
|
||||
"interval": null,
|
||||
"isNew": true,
|
||||
"links": [],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"minSpan": 6,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "RAM:",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 12,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(189, 188, 31, 0.18)",
|
||||
"full": true,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"dsType": "influxdb",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"null"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"policy": "default",
|
||||
"query": "SELECT sum(total) as total FROM \"mem\" WHERE host =~ /comp/ AND $timeFilter GROUP BY time($interval)",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"value"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Compute node total Memory",
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "70%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "N/A",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"datasource": "${DS_OSIC_INFLUXDB}",
|
||||
"editable": true,
|
||||
"error": false,
|
||||
"fill": 1,
|
||||
"grid": {
|
||||
"threshold1": null,
|
||||
"threshold1Color": "rgba(27, 42, 216, 0.27)",
|
||||
"threshold2": null,
|
||||
"threshold2Color": "rgba(167, 0, 0, 0.22)",
|
||||
"thresholdLine": false
|
||||
},
|
||||
"height": "250px",
|
||||
"id": 2,
|
||||
"isNew": true,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": 15,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"minSpan": 6,
|
||||
"nullPointMode": "connected",
|
||||
"percentage": false,
|
||||
"pointradius": 1,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"dsType": "influxdb",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"null"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"policy": "default",
|
||||
"query": "SELECT sum(used) as used FROM \"mem\" WHERE host =~ /$compute_node$/ AND $timeFilter GROUP BY time($interval)",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"value"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Compute Node Used Memory",
|
||||
"tooltip": {
|
||||
"msResolution": true,
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "cumulative"
|
||||
},
|
||||
"transparent": true,
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"show": true
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"datasource": "${DS_OSIC_INFLUXDB}",
|
||||
"editable": true,
|
||||
"error": false,
|
||||
"fill": 1,
|
||||
"grid": {
|
||||
"threshold1": null,
|
||||
"threshold1Color": "rgba(27, 42, 216, 0.27)",
|
||||
"threshold2": null,
|
||||
"threshold2Color": "rgba(167, 0, 0, 0.22)",
|
||||
"thresholdLine": false
|
||||
},
|
||||
"height": "250px",
|
||||
"id": 1,
|
||||
"isNew": true,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": 15,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"minSpan": 6,
|
||||
"nullPointMode": "connected",
|
||||
"percentage": false,
|
||||
"pointradius": 1,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"dsType": "influxdb",
|
||||
"groupBy": [
|
||||
{
|
||||
"params": [
|
||||
"$interval"
|
||||
],
|
||||
"type": "time"
|
||||
},
|
||||
{
|
||||
"params": [
|
||||
"null"
|
||||
],
|
||||
"type": "fill"
|
||||
}
|
||||
],
|
||||
"hide": false,
|
||||
"policy": "default",
|
||||
"query": "SELECT sum(available) as available FROM \"mem\" WHERE host =~ /$compute_node$/ AND $timeFilter GROUP BY time($interval)",
|
||||
"rawQuery": true,
|
||||
"refId": "A",
|
||||
"resultFormat": "time_series",
|
||||
"select": [
|
||||
[
|
||||
{
|
||||
"params": [
|
||||
"value"
|
||||
],
|
||||
"type": "field"
|
||||
},
|
||||
{
|
||||
"params": [],
|
||||
"type": "mean"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": []
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Compute Node Available Memory",
|
||||
"tooltip": {
|
||||
"msResolution": true,
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "cumulative"
|
||||
},
|
||||
"transparent": true,
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"show": true
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"showTitle": true,
|
||||
"title": "Memory"
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
"from": "now/d",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"15s",
|
||||
"1m",
|
||||
"15m",
|
||||
"1h"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": "${DS_OSIC_INFLUXDB}",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "compute node",
|
||||
"multi": false,
|
||||
"name": "compute_node",
|
||||
"options": [],
|
||||
"query": "SHOW TAG VALUES FROM system WITH KEY=host",
|
||||
"refresh": 1,
|
||||
"regex": "/comp/",
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"schemaVersion": 12,
|
||||
"version": 43,
|
||||
"links": [],
|
||||
"gnetId": null
|
||||
}
|
2734
cluster_metrics/grafana-dashboards/openstack-metrics.json
Normal file
2734
cluster_metrics/grafana-dashboards/openstack-metrics.json
Normal file
File diff suppressed because it is too large
Load Diff
14
cluster_metrics/handlers/main.yml
Normal file
14
cluster_metrics/handlers/main.yml
Normal file
@ -0,0 +1,14 @@
|
||||
---
|
||||
# Copyright 2016, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
70
cluster_metrics/playbook-grafana.yml
Normal file
70
cluster_metrics/playbook-grafana.yml
Normal file
@ -0,0 +1,70 @@
|
||||
---
|
||||
# Copyright 2016, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
- name: Deploy grafana
|
||||
hosts: "cluster-metrics"
|
||||
gather_facts: true
|
||||
user: root
|
||||
pre_tasks:
|
||||
- name: Create DB for service
|
||||
mysql_db:
|
||||
login_user: "{{ galera_root_user }}"
|
||||
login_password: "{{ galera_root_password }}"
|
||||
login_host: "127.0.0.1"
|
||||
name: "{{ grafana_db_name }}"
|
||||
state: "present"
|
||||
delegate_to: "{{ groups['galera_all'][0] }}"
|
||||
- name: Grant access to the DB for the service
|
||||
mysql_user:
|
||||
login_user: "{{ galera_root_user }}"
|
||||
login_password: "{{ galera_root_password }}"
|
||||
login_host: "127.0.0.1"
|
||||
name: "{{ grafana_db_user }}"
|
||||
password: "{{ grafana_db_password }}"
|
||||
host: "{{ item }}"
|
||||
state: "present"
|
||||
priv: "{{ grafana_db_name }}.*:ALL"
|
||||
delegate_to: "{{ groups['galera_all'][0] }}"
|
||||
with_items:
|
||||
- "localhost"
|
||||
- "%"
|
||||
tasks:
|
||||
- name: Ensure https repos function
|
||||
apt:
|
||||
pkg: "apt-transport-https"
|
||||
state: "latest"
|
||||
- name: Add grafana apt-keys
|
||||
apt_key:
|
||||
url: "https://packagecloud.io/gpg.key"
|
||||
state: "present"
|
||||
- name: Add grafana repo
|
||||
apt_repository:
|
||||
repo: "deb https://packagecloud.io/grafana/stable/debian/ wheezy main"
|
||||
state: "present"
|
||||
- name: Install grafana
|
||||
apt:
|
||||
pkg: "grafana"
|
||||
state: "latest"
|
||||
- name: Drop grafana config file
|
||||
template:
|
||||
src: templates/grafana.ini.j2
|
||||
dest: /etc/grafana/grafana.ini
|
||||
- name: Enable and start grafana
|
||||
service:
|
||||
name: "grafana-server"
|
||||
enabled: true
|
||||
state: restarted
|
||||
vars_files:
|
||||
- vars.yml
|
67
cluster_metrics/playbook-influx-db.yml
Normal file
67
cluster_metrics/playbook-influx-db.yml
Normal file
@ -0,0 +1,67 @@
|
||||
---
|
||||
# Copyright 2016, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
- name: Deploy influxdb
|
||||
hosts: "cluster-metrics"
|
||||
gather_facts: true
|
||||
user: root
|
||||
tasks:
|
||||
- name: InfluxDB datapath bind mount
|
||||
lxc_container:
|
||||
name: "{{ inventory_hostname }}"
|
||||
container_command: |
|
||||
[[ ! -d "/var/lib/influxdb" ]] && mkdir -p "/var/lib/influxdb"
|
||||
container_config:
|
||||
- "lxc.mount.entry=/openstack/{{ inventory_hostname }} var/lib/influxdb none bind 0 0"
|
||||
delegate_to: "{{ physical_host }}"
|
||||
- name: Add influxdata apt-keys
|
||||
apt_key:
|
||||
url: "https://repos.influxdata.com/influxdb.key"
|
||||
state: "present"
|
||||
- name: Add influxdata repo
|
||||
apt_repository:
|
||||
repo: "deb https://repos.influxdata.com/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} stable"
|
||||
state: "present"
|
||||
- name: Install influxdb
|
||||
apt:
|
||||
pkg: "influxdb"
|
||||
state: "latest"
|
||||
- name: Drop influxdb config file
|
||||
template:
|
||||
src: templates/influxdb.conf.j2
|
||||
dest: /etc/influxdb/influxdb.conf
|
||||
- name: Enable and restart influxdb
|
||||
service:
|
||||
name: "influxdb"
|
||||
enabled: true
|
||||
state: restarted
|
||||
- name: Wait for influxdb to be ready
|
||||
wait_for:
|
||||
host: "{{ hostvars[groups['cluster-metrics'][0]]['ansible_ssh_host'] }}"
|
||||
port: "{{ influxdb_port }}"
|
||||
delay: 1
|
||||
- name: Create metrics DB
|
||||
shell: >
|
||||
influx -username {{ influxdb_db_root_name }}
|
||||
-password {{ influxdb_db_root_password }}
|
||||
-execute "{{ item }}"
|
||||
with_items:
|
||||
- "CREATE DATABASE {{ influxdb_db_name }}"
|
||||
- "CREATE RETENTION POLICY {{ influxdb_db_retention_policy }} ON {{ influxdb_db_name }} DURATION {{ influxdb_db_retention }} REPLICATION {{ influxdb_db_replication }}"
|
||||
- "CREATE USER {{ influxdb_db_metric_user }} WITH PASSWORD '{{ influxdb_db_metric_password }}'"
|
||||
- "GRANT ALL ON {{ influxdb_db_name }} TO {{ influxdb_db_metric_user }}"
|
||||
vars_files:
|
||||
- vars.yml
|
||||
|
64
cluster_metrics/playbook-influx-telegraf.yml
Normal file
64
cluster_metrics/playbook-influx-telegraf.yml
Normal file
@ -0,0 +1,64 @@
|
||||
---
|
||||
# Copyright 2016, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
- name: Deploy telegraf
|
||||
hosts: "all"
|
||||
gather_facts: true
|
||||
user: root
|
||||
tasks:
|
||||
- name: Add influxdata apt-keys
|
||||
apt_key:
|
||||
url: "https://repos.influxdata.com/influxdb.key"
|
||||
state: "present"
|
||||
- name: Add influxdata repo
|
||||
apt_repository:
|
||||
repo: "deb https://repos.influxdata.com/{{ ansible_distribution | lower }} {{ ansible_distribution_release }} stable"
|
||||
state: "present"
|
||||
- name: Install telegraf
|
||||
apt:
|
||||
pkg: "telegraf"
|
||||
state: "latest"
|
||||
- name: Create telegraf plugin dir
|
||||
file:
|
||||
path: "/opt/telegraf"
|
||||
state: directory
|
||||
mode: "0755"
|
||||
- name: Drop telegraf plugin file(s)
|
||||
copy:
|
||||
src: "files/{{ item }}"
|
||||
dest: "/opt/telegraf/{{ item }}"
|
||||
mode: '0755'
|
||||
with_items:
|
||||
- kvm_virsh.py
|
||||
- name: Drop telegraf config file
|
||||
template:
|
||||
src: templates/telegraf.conf.j2
|
||||
dest: /etc/telegraf/telegraf.conf
|
||||
register: telegraf_config
|
||||
- name: Enable and restart telegraf
|
||||
service:
|
||||
name: "telegraf"
|
||||
enabled: true
|
||||
state: restarted
|
||||
when: telegraf_config | changed
|
||||
- name: Enable and start telegraf
|
||||
service:
|
||||
name: "telegraf"
|
||||
enabled: true
|
||||
state: started
|
||||
when: not telegraf_config | changed
|
||||
vars_files:
|
||||
- vars.yml
|
||||
|
55
cluster_metrics/playbook-metrics-lb.yml
Normal file
55
cluster_metrics/playbook-metrics-lb.yml
Normal file
@ -0,0 +1,55 @@
|
||||
---
|
||||
# Copyright 2016, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
- name: Add haproxy config
|
||||
hosts: haproxy
|
||||
gather_facts: true
|
||||
user: root
|
||||
roles:
|
||||
- role: "haproxy_server"
|
||||
haproxy_service_configs:
|
||||
- service:
|
||||
haproxy_service_name: influxdb_admin
|
||||
haproxy_backend_nodes: "{{ groups['cluster-metrics'] | default([]) }}"
|
||||
haproxy_ssl: "{{ haproxy_ssl }}"
|
||||
haproxy_port: 8083
|
||||
haproxy_balance_type: tcp
|
||||
haproxy_backend_options:
|
||||
- tcp-check
|
||||
haproxy_whitelist_networks:
|
||||
- 192.168.0.0/16
|
||||
- 172.16.0.0/12
|
||||
- 10.0.0.0/8
|
||||
- service:
|
||||
haproxy_service_name: influxdb
|
||||
haproxy_backend_nodes: "{{ groups['cluster-metrics'] | default([]) }}"
|
||||
haproxy_ssl: "{{ haproxy_ssl }}"
|
||||
haproxy_port: 8086
|
||||
haproxy_balance_type: tcp
|
||||
haproxy_backend_options:
|
||||
- tcp-check
|
||||
haproxy_whitelist_networks:
|
||||
- 192.168.0.0/16
|
||||
- 172.16.0.0/12
|
||||
- 10.0.0.0/8
|
||||
- service:
|
||||
haproxy_service_name: grafana
|
||||
haproxy_backend_nodes: "{{ groups['cluster-metrics'] | default([]) }}"
|
||||
haproxy_ssl: "{{ haproxy_ssl }}"
|
||||
haproxy_port: 8089
|
||||
haproxy_balance_type: tcp
|
||||
haproxy_backend_options:
|
||||
- tcp-check
|
||||
|
56
cluster_metrics/readme.rst
Normal file
56
cluster_metrics/readme.rst
Normal file
@ -0,0 +1,56 @@
|
||||
Gather and visualize cluster wide metrics
|
||||
#########################################
|
||||
:date: 2016-09-01
|
||||
:tags: openstack, ansible
|
||||
:category: \*openstack, \*nix
|
||||
|
||||
|
||||
About this repository
|
||||
---------------------
|
||||
|
||||
This set of playbooks will deploy InfluxDB, Telegraf, and Grafana for the purpose of collecting metrics on an OpenStack cluster.
|
||||
|
||||
Process
|
||||
-------
|
||||
|
||||
Clone the OPS repo
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cd /opt
|
||||
git clone https://github.com/openstack/openstack-ansible-ops
|
||||
|
||||
Copy the env.d files into place
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cd openstack-ansible-ops/cluster_metrics
|
||||
cp etc/env.d/cluster_metrics.yml /etc/openstack_deploy/env.d/
|
||||
|
||||
Create the containers
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
openstack-ansible /opt/openstack-ansible/playbooks/lxc-containers-create.yml -e container_group=cluster-metrics
|
||||
|
||||
Install InfluxDB
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
openstack-ansible playbook-influx-db.yml
|
||||
|
||||
Install Influx Telegraf
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
openstack-ansible playbook-influx-telegraf.yml --forks 100
|
||||
|
||||
Install grafana
|
||||
|
||||
If you're proxy'ing grafana you will need to provide the full ``root_path`` when you run the playbook add the following ``-e grafana_root_url='https://cloud.something:8443/grafana/'``
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
openstack-ansible playbook-grafana.yml -e galera_root_user=root -e galera_address='127.0.0.1'
|
||||
|
||||
Once that last playbook is completed you will have a functioning InfluxDB, Telegraf, and Grafana metric collection system active and collecting metrics. Grafana will need some setup, however functional dash boards have been provided in the ``grafana-dashboards`` directory.
|
66
cluster_metrics/templates/grafana.ini.j2
Normal file
66
cluster_metrics/templates/grafana.ini.j2
Normal file
@ -0,0 +1,66 @@
|
||||
# {{ ansible_managed }}
|
||||
[paths]
|
||||
|
||||
[server]
|
||||
http_port = {{ grafana_port }}
|
||||
{% if grafana_root_url is defined %}
|
||||
root_url = {{ grafana_root_url }}
|
||||
{% endif %}
|
||||
|
||||
[database]
|
||||
type = mysql
|
||||
host = {{ galera_address }}:3306
|
||||
name = {{ grafana_db_name }}
|
||||
user = {{ grafana_db_user }}
|
||||
password = {{ grafana_db_password }}
|
||||
|
||||
[session]
|
||||
|
||||
[analytics]
|
||||
check_for_updates = true
|
||||
|
||||
[security]
|
||||
admin_user = admin
|
||||
admin_password = {{ grafana_admin_password }}
|
||||
|
||||
[snapshots]
|
||||
|
||||
[users]
|
||||
allow_sign_up = false
|
||||
allow_org_create = false
|
||||
|
||||
[auth.anonymous]
|
||||
enabled = true
|
||||
org_name = OpenStack
|
||||
org_role = Viewer
|
||||
|
||||
[auth.github]
|
||||
|
||||
[auth.google]
|
||||
|
||||
[auth.proxy]
|
||||
|
||||
[auth.basic]
|
||||
|
||||
[auth.ldap]
|
||||
|
||||
[smtp]
|
||||
|
||||
[emails]
|
||||
|
||||
[log]
|
||||
|
||||
[log.console]
|
||||
|
||||
[log.file]
|
||||
|
||||
[log.syslog]
|
||||
|
||||
[event_publisher]
|
||||
|
||||
[dashboards.json]
|
||||
|
||||
[metrics]
|
||||
|
||||
[grafana_net]
|
||||
url = https://grafana.net
|
81
cluster_metrics/templates/influxdb.conf.j2
Normal file
81
cluster_metrics/templates/influxdb.conf.j2
Normal file
@ -0,0 +1,81 @@
|
||||
# {{ ansible_managed }}
|
||||
reporting-disabled = false
|
||||
|
||||
[logging]
|
||||
level = "info"
|
||||
|
||||
[meta]
|
||||
dir = "/var/lib/influxdb/meta"
|
||||
retention-autocreate = true
|
||||
logging-enabled = true
|
||||
pprof-enabled = false
|
||||
lease-duration = "1m0s"
|
||||
|
||||
[data]
|
||||
enabled = true
|
||||
dir = "/var/lib/influxdb/data"
|
||||
wal-dir = "/var/lib/influxdb/wal"
|
||||
wal-logging-enabled = true
|
||||
query-log-enabled = false
|
||||
cache-max-memory-size = 679477248
|
||||
cache-snapshot-memory-size = 28311552
|
||||
cache-snapshot-write-cold-duration = "1h0m0s"
|
||||
compact-full-write-cold-duration = "24h0m0s"
|
||||
max-points-per-block = 0
|
||||
data-logging-enabled = false
|
||||
|
||||
[cluster]
|
||||
shard-writer-timeout = "8s" # The time within which a remote shard must respond to a write request.
|
||||
write-timeout = "16s" # The time within which a write request must complete on the cluster.
|
||||
max-concurrent-queries = 0 # The maximum number of concurrent queries that can run. 0 to disable.
|
||||
query-timeout = "0s" # The time within a query must complete before being killed automatically. 0s to disable.
|
||||
max-select-point = 0 # The maximum number of points to scan in a query. 0 to disable.
|
||||
max-select-series = 0 # The maximum number of series to select in a query. 0 to disable.
|
||||
max-select-buckets = 0 # The maximum number of buckets to select in an aggregate query. 0 to disable.
|
||||
|
||||
[retention]
|
||||
enabled = true
|
||||
check-interval = "32m"
|
||||
|
||||
[shard-precreation]
|
||||
enabled = true
|
||||
check-interval = "16m"
|
||||
advance-period = "32m"
|
||||
|
||||
[monitor]
|
||||
store-enabled = true # Whether to record statistics internally.
|
||||
store-database = "_internal" # The destination database for recorded statistics
|
||||
store-interval = "16s" # The interval at which to record statistics
|
||||
|
||||
[admin]
|
||||
enabled = true
|
||||
bind-address = ":{{ influxdb_admin_port }}"
|
||||
https-enabled = false
|
||||
https-certificate = "/etc/ssl/influxdb.pem"
|
||||
|
||||
[http]
|
||||
enabled = true
|
||||
bind-address = ":{{ influxdb_port }}"
|
||||
auth-enabled = false
|
||||
log-enabled = false
|
||||
write-tracing = false
|
||||
pprof-enabled = false
|
||||
https-enabled = false
|
||||
https-certificate = "/etc/ssl/influxdb.pem"
|
||||
max-row-limit = 10240
|
||||
|
||||
[[graphite]]
|
||||
enabled = false
|
||||
|
||||
[[collectd]]
|
||||
enabled = false
|
||||
|
||||
[[opentsdb]]
|
||||
enabled = false
|
||||
|
||||
[[udp]]
|
||||
enabled = false
|
||||
|
||||
[continuous_queries]
|
||||
log-enabled = false
|
||||
enabled = true
|
67
cluster_metrics/templates/telegraf.conf.j2
Normal file
67
cluster_metrics/templates/telegraf.conf.j2
Normal file
@ -0,0 +1,67 @@
|
||||
[global_tags]
|
||||
{% if inventory_hostname in groups['all_containers'] %}
|
||||
node_type = "container"
|
||||
{% elif inventory_hostname in groups['hosts'] %}
|
||||
node_type = "physical_host"
|
||||
{% endif %}
|
||||
|
||||
[agent]
|
||||
interval = "24s"
|
||||
round_interval = false
|
||||
metric_batch_size = 1024
|
||||
metric_buffer_limit = 10240
|
||||
collection_jitter = "8s"
|
||||
flush_interval = "48s"
|
||||
flush_jitter = "8s"
|
||||
debug = false
|
||||
quiet = true
|
||||
{% if inventory_hostname in groups['all_containers'] %}
|
||||
hostname = "{{ ansible_hostname }}"
|
||||
{% else %}
|
||||
hostname = "{{ inventory_hostname }}"
|
||||
{% endif %}
|
||||
omit_hostname = false
|
||||
|
||||
[[outputs.influxdb]]
|
||||
urls = ["http://{{ hostvars[groups['cluster-metrics'][0]]['ansible_ssh_host'] }}:{{ influxdb_port }}"]
|
||||
database = "{{ influxdb_db_name }}"
|
||||
precision = "s"
|
||||
write_consistency = "any"
|
||||
timeout = "5s"
|
||||
|
||||
[[inputs.processes]]
|
||||
|
||||
[[inputs.system]]
|
||||
|
||||
{% if inventory_hostname in groups['all_containers'] %}
|
||||
[[inputs.net]]
|
||||
|
||||
{% elif inventory_hostname in groups['hosts'] %}
|
||||
[[inputs.cpu]]
|
||||
percpu = true
|
||||
totalcpu = true
|
||||
fielddrop = ["time_*"]
|
||||
|
||||
[[inputs.net]]
|
||||
|
||||
[[inputs.netstat]]
|
||||
|
||||
[[inputs.disk]]
|
||||
ignore_fs = ["tmpfs", "devtmpfs"]
|
||||
|
||||
[[inputs.diskio]]
|
||||
|
||||
[[inputs.kernel]]
|
||||
|
||||
[[inputs.mem]]
|
||||
|
||||
[[inputs.swap]]
|
||||
|
||||
{% if inventory_hostname in groups['nova_compute'] %}
|
||||
[[inputs.exec]]
|
||||
commands = ["/opt/telegraf/kvm_virsh.py"]
|
||||
timeout = "15s"
|
||||
data_format = "json"
|
||||
name_prefix = "custom_"
|
||||
{% endif %}
|
||||
{% endif %}
|
34
cluster_metrics/vars.yml
Normal file
34
cluster_metrics/vars.yml
Normal file
@ -0,0 +1,34 @@
|
||||
---
|
||||
# Copyright 2016, Rackspace US, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
# Grafana vars
|
||||
grafana_port: 8089
|
||||
grafana_db_name: grafana
|
||||
grafana_db_user: grafana
|
||||
grafana_db_password: secrete
|
||||
grafana_admin_password: SuperSecrete
|
||||
|
||||
# InfluxDB vars
|
||||
influxdb_admin_port: 8083
|
||||
influxdb_port: 8086
|
||||
influxdb_db_name: telegraf
|
||||
influxdb_db_retention: 90d
|
||||
influxdb_db_retention_policy: openstack
|
||||
influxdb_db_replication: 1
|
||||
influxdb_db_root_name: root
|
||||
influxdb_db_root_password: SuperSecrete
|
||||
influxdb_db_metric_user: openstack
|
||||
influxdb_db_metric_password: SuperDuperSecrete
|
Loading…
Reference in New Issue
Block a user