Added nova quota plugin

This change adds a second plugin to the telegraf setup. A change is being made to the telegraf config file to allow for more than one external plugin to be executed and to allow for full plugin execution between telegraf reporting intervals. Each plugin will potentially account for up to 8 seconds of runtime with the telegraf agent now using a dynamic reporting interval based on the number of plugins a given agent is needing to execute. Change-Id: I652e8e2f13bd4fb9135280b76f2344177a14eaf7 Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
2016-12-07 15:27:14 -06:00 · 2016-12-07 15:27:14 -06:00 · 5b93b9a2c2
commit 5b93b9a2c2
parent 76ad4f52da
3 changed files with 237 additions and 16 deletions
--- a/cluster_metrics/playbook-influx-telegraf.yml
+++ b/cluster_metrics/playbook-influx-telegraf.yml
@ -44,18 +44,8 @@
      with_dict: "{{ command_plugins }}"
      when:
        - item.value.when_group | bool
-        - item.value.group == inventory_hostname or 
+        - item.value.group == inventory_hostname or
          inventory_hostname in item.value.group | default([])
-    - name: Add to command plugins
-      set_fact:
-        commands: "{{ commands | union(item.value.command) }}"
-      with_dict: "{{ command_plugins }}"
-      when:
-        - item.value.when_group | bool
-        - item.value.group == inventory_hostname or 
-          inventory_hostname in item.value.group | default([])
-      tags:
-        - always
    - name: Store my_cnf
      slurp:
        src: "/root/.my.cnf"
@ -96,5 +86,11 @@
          - "python /opt/telegraf/ironic_nodes.py"
        group: "{{ groups['utility_all'][0] }}"
        when_group: "{{ (groups['ironic_api'] | length) > 0 }}"
+      vm_quota:
+        plugin_name: "vm_quota.py"
+        command:
+          - "python /opt/telegraf/vm_quota.py"
+        group: "{{ groups['utility_all'][0] }}"
+        when_group: "{{ (groups['nova_compute'] | length) > 0 }}"
    influx_telegraf_targets:
      - "{{ influxdb_host|default(internal_lb_vip_address) }}:{{ influxdb_port }}"
--- a/cluster_metrics/templates/telegraf-plugins/vm_consumers.py
+++ b/cluster_metrics/templates/telegraf-plugins/vm_consumers.py
@ -0,0 +1,205 @@
+#!/bin/python
+#
+# Copyright 2016, Rackspace US, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+
+from openstack import connection as os_conn
+
+
+OS_AUTH_ARGS = {
+    'auth_url': '{{ keystone_service_internalurl }}',
+    'project_name': '{{ keystone_admin_tenant_name }}',
+    'user_domain_name': '{{ openrc_os_domain_name }}',
+    'project_domain_name': '{{ openrc_os_domain_name }}',
+    'username': '{{ keystone_admin_user_name }}',
+    'password': '{{ keystone_auth_admin_password }}',
+}
+
+OS_CONNECTION = {'conn': None}
+
+
+def line_return(collection, metric_name):
+    system_states_return = '%s ' % metric_name
+    for key, value in collection.items():
+        system_states_return += '%s=%s,' % (key.replace(' ', '_'), value)
+    else:
+        system_states_return = system_states_return.rstrip(',')
+    return system_states_return
+
+
+def _connect():
+    if OS_CONNECTION['conn']:
+        return OS_CONNECTION['conn']
+    else:
+        OS_CONNECTION['conn'] = os_conn.Connection(**OS_AUTH_ARGS)
+        return OS_CONNECTION['conn']
+
+
+def get_consumers():
+    conn = _connect()
+    _consumers = list()
+    projects = conn.identity.projects()
+    for project in projects:
+        if project['description'].lower() != 'heat stack user project':
+            _consumers.append(project)
+    return _consumers
+
+
+def get_consumer_limits(consumer_id):
+    conn = _connect()
+    url = conn.compute.session.get_endpoint(
+        interface='internal',
+        service_type='compute'
+    )
+    quota_data = conn.compute.session.get(
+        url + '/os-quota-sets/' + consumer_id
+    )
+    quota_data = quota_data.json()
+    return quota_data['quota_set']
+
+
+def get_consumer_usage():
+    conn = _connect()
+    tenant_kwargs = {'all_tenants': True, 'limit': 5000}
+    return conn.compute.servers(details=True, **tenant_kwargs)
+
+
+def get_flavors():
+    conn = _connect()
+    flavor_cache = dict()
+    for flavor in conn.compute.flavors():
+        entry = flavor_cache[flavor['id']] = dict()
+        entry['ram'] = flavor['ram']
+        entry['cores'] = flavor['vcpus']
+        entry['disk'] = flavor['disk']
+    return flavor_cache
+
+
+def main():
+    return_data = list()
+    consumer_quota_instance = dict()
+    consumer_quota_cores = dict()
+    consumer_quota_ram = dict()
+    consumer_used_instances = collections.Counter()
+    consumer_used_cores = collections.Counter()
+    consumer_used_ram = collections.Counter()
+    consumer_used_disk = collections.Counter()
+    consumer_quota_totals = dict()
+
+    flavor_cache = get_flavors()
+    consumer_id_cache = dict()
+    for consumer in get_consumers():
+        consumer_name = consumer['name']
+        consumer_id = consumer['id']
+        _quota = get_consumer_limits(consumer_id)
+        consumer_id_cache[consumer_id] = consumer_name
+        consumer_quota_instance[consumer_name] = int(_quota['instances'])
+        consumer_quota_cores[consumer_name] = int(_quota['cores'])
+        consumer_quota_ram[consumer_name] = int(_quota['ram'])
+
+    for used_instance in get_consumer_usage():
+        consumer_name = consumer_id_cache[used_instance['tenant_id']]
+        consumer_used_instances[consumer_name] += 1
+        consumer_used_cores[consumer_name] += \
+            int(flavor_cache[used_instance['flavor']['id']]['cores'])
+        consumer_used_ram[consumer_name] += \
+            int(flavor_cache[used_instance['flavor']['id']]['ram'])
+        consumer_used_disk[consumer_name] += \
+            int(flavor_cache[used_instance['flavor']['id']]['disk'])
+
+    consumer_quota_totals['total_quota_instance'] = sum(
+        consumer_quota_instance.values()
+    )
+    consumer_quota_totals['total_quota_cores'] = sum(
+        consumer_quota_cores.values()
+    )
+    consumer_quota_totals['total_quota_ram'] = sum(
+        consumer_quota_ram.values()
+    )
+
+    consumer_quota_totals['total_used_instances'] = sum(
+        consumer_used_instances.values()
+    )
+    consumer_quota_totals['total_used_cores'] = sum(
+        consumer_used_cores.values()
+    )
+    consumer_quota_totals['total_used_ram'] = sum(
+        consumer_used_ram.values()
+    )
+    consumer_quota_totals['total_used_disk'] = sum(
+        consumer_used_disk.values()
+    )
+
+    return_data.append(
+        line_return(
+            collection=consumer_quota_instance,
+            metric_name='consumer_quota_instance'
+        )
+    )
+
+    return_data.append(
+        line_return(
+            collection=consumer_quota_cores,
+            metric_name='consumer_quota_cores'
+        )
+    )
+
+    return_data.append(
+        line_return(
+            collection=consumer_quota_ram,
+            metric_name='consumer_quota_ram'
+        )
+    )
+
+    return_data.append(
+        line_return(
+            collection=consumer_used_instances,
+            metric_name='consumer_used_instances'
+        )
+    )
+
+    return_data.append(
+        line_return(
+            collection=consumer_used_cores,
+            metric_name='consumer_used_cores'
+        )
+    )
+
+    return_data.append(
+        line_return(
+            collection=consumer_used_ram,
+            metric_name='consumer_used_ram'
+        )
+    )
+
+    return_data.append(
+        line_return(
+            collection=consumer_used_disk,
+            metric_name='consumer_used_disk'
+        )
+    )
+
+    return_data.append(
+        line_return(
+            collection=consumer_quota_totals,
+            metric_name='consumer_quota_totals'
+        )
+    )
+    for item in return_data:
+        print(item)
+
+if __name__ == '__main__':
+    main()
--- a/cluster_metrics/templates/telegraf.conf.j2
+++ b/cluster_metrics/templates/telegraf.conf.j2
@ -5,13 +5,26 @@
  node_type = "physical_host"
 {% endif %}

+{%   set run_commands = [] %}
+{%   for key, value in command_plugins.items() %}
+{%     if value.when_group | bool and (value.group == inventory_hostname or inventory_hostname in value.group | default([])) %}
+{%       set _ = run_commands.extend(value.command) %}
+{%     endif %}
+{%   endfor %}
+
+{# The run_int adds padding to the interval so that plugins being added to the system have #}
+{#  enough time to execute. Every added plugin will add 8 seconds to the interval with a #}
+{#  default of 24. This value is later used as the flush interval which needs to be 2x the agent. #}
+{% set run_int = run_commands | length %}
+{% set interval = (run_int < 1 | ternary(0, run_int * 8)) + 24 %}
+
 [agent]
-  interval = "24s"
+  interval = "{{ interval }}s"
  round_interval = false
  metric_batch_size = 1024
  metric_buffer_limit = 10240
  collection_jitter = "8s"
-  flush_interval = "48s"
+  flush_interval = "{{ interval * 2 }}s"
  flush_jitter = "8s"
  debug = false
  quiet = true
@ -33,10 +46,17 @@

 [[inputs.system]]

-{%   if commands %}
+{%   set run_commands = [] %}
+{%   for key, value in command_plugins.items() %}
+{%     if value.when_group | bool and (value.group == inventory_hostname or inventory_hostname in value.group | default([])) %}
+{%       set _ = run_commands.extend(value.command) %}
+{%     endif %}
+{%   endfor %}
+
+{%   if run_commands %}
 [[inputs.exec]]
-  commands = [{{ commands | map('quote') | join(',') }}]
-  timeout = "15s"
+  commands = [{{ run_commands | map('quote') | join(',') }}]
+  timeout = "{{ (run_commands | length) * 8 }}s"
  data_format = "influx"
 {%   endif %}