diff --git a/prometheus-openstack-exporter/Dockerfile.ubuntu_xenial b/prometheus-openstack-exporter/Dockerfile.ubuntu_xenial
new file mode 100644
index 00000000..be6deaa8
--- /dev/null
+++ b/prometheus-openstack-exporter/Dockerfile.ubuntu_xenial
@@ -0,0 +1,15 @@
+ARG FROM=docker.io/ubuntu:xenial
+FROM ${FROM}
+
+RUN apt-get -y update \
+ && apt-get -y install curl python-dateutil python-requests python-simplejson python-yaml python-prometheus-client\
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+RUN mkdir /usr/local/bin/exporter
+COPY exporter /usr/local/bin/exporter
+RUN chmod +x /usr/local/bin/exporter/main.py
+
+EXPOSE 9103
+
+CMD ["/usr/local/bin/exporter/main.py"]
diff --git a/prometheus-openstack-exporter/build.sh b/prometheus-openstack-exporter/build.sh
new file mode 100644
index 00000000..dbf95a3a
--- /dev/null
+++ b/prometheus-openstack-exporter/build.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+SCRIPT=`realpath $0`
+SCRIPT_DIR=`dirname ${SCRIPT}`
+## Only build from main folder
+cd ${SCRIPT_DIR}/..
+
+IMAGE="prometheus-openstack-exporter"
+VERSION=${VERSION:-latest}
+DISTRO=${DISTRO:-ubuntu_xenial}
+REGISTRY_URI=${REGISTRY_URI:-"openstackhelm/"}
+EXTRA_TAG_INFO=${EXTRA_TAG_INFO:-""}
+docker build -f ${IMAGE}/Dockerfile.${DISTRO} --network=host -t ${REGISTRY_URI}${IMAGE}:${VERSION}-${DISTRO}${EXTRA_TAG_INFO} ${extra_build_args} ${IMAGE}
+
+cd -
diff --git a/prometheus-openstack-exporter/exporter/__init__.py b/prometheus-openstack-exporter/exporter/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/prometheus-openstack-exporter/exporter/base.py b/prometheus-openstack-exporter/exporter/base.py
new file mode 100644
index 00000000..c85e06bc
--- /dev/null
+++ b/prometheus-openstack-exporter/exporter/base.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# Copyright 2017 The Openstack-Helm Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+
+
+class OSBase(object):
+ FAIL = 0
+ OK = 1
+ UNKNOWN = 2
+ GAUGE_NAME_FORMAT = "openstack_{}"
+
+ def __init__(self, oscache, osclient):
+ self.oscache = oscache
+ self.osclient = osclient
+ self.oscache.cache_me(self)
+
+ def get_cache_data(self):
+ return self.oscache.get_cache_data(self.get_cache_key())
+
+ def build_cache_data(self):
+ """ build a hash to store in cache """
+ raise NotImplemented("Must be implemented by the subclass!")
+
+ def get_cache_key(self):
+ """ cache key """
+ raise NotImplemented("Must be implemented by the subclass!")
+
+ def get_stats(self):
+ """ build stats for prometheus exporter """
+ raise NotImplemented("Must be implemented by the subclass!")
+
+ def gauge_name_sanitize(self, input):
+ if input.startswith("openstack_"):
+ return re.sub(r'[^a-zA-Z0-9:_]', '_', input)
+ else:
+ return self.GAUGE_NAME_FORMAT.format(
+ re.sub(r'[^a-zA-Z0-9:_]', '_', input))
diff --git a/prometheus-openstack-exporter/exporter/check_os_api.py b/prometheus-openstack-exporter/exporter/check_os_api.py
new file mode 100644
index 00000000..9778432e
--- /dev/null
+++ b/prometheus-openstack-exporter/exporter/check_os_api.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+# Copyright 2017 The Openstack-Helm Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from base import OSBase
+
+from urlparse import urlparse
+from prometheus_client import CollectorRegistry, generate_latest, Gauge
+import logging
+logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s:%(levelname)s:%(message)s")
+logger = logging.getLogger(__name__)
+
+
+class CheckOSApi(OSBase):
+ """Class to check the status of OpenStack API services."""
+
+ CHECK_MAP = {
+ 'keystone': {'path': '/', 'expect': [300], 'name': 'keystone-public-api'},
+ 'heat': {'path': '/', 'expect': [300], 'name': 'heat-api'},
+ 'heat-cfn': {'path': '/', 'expect': [300], 'name': 'heat-cfn-api'},
+ 'glance': {'path': '/', 'expect': [300], 'name': 'glance-api'},
+ 'cinder': {'path': '/', 'expect': [200, 300], 'name': 'cinder-api'},
+ 'cinderv2': {
+ 'path': '/', 'expect': [200, 300], 'name': 'cinder-v2-api'},
+ 'neutron': {'path': '/', 'expect': [200], 'name': 'neutron-api'},
+ 'nova': {'path': '/', 'expect': [200], 'name': 'nova-api'},
+ 'ceilometer': {
+ 'path': 'v2/capabilities', 'expect': [200], 'auth': True,
+ 'name': 'ceilometer-api'},
+ 'swift': {'path': '/', 'expect': [200], 'name': 'ceph'},
+ 'swift_s3': {
+ 'path': 'healthcheck', 'expect': [200], 'name': 'swift-s3-api'},
+ 'murano': {'path': '/', 'expect': [200, 300], 'name': 'murano-api'},
+ 'trove': {'path': '/', 'expect': [200, 300], 'name': 'trove-api'},
+ 'mistral': {'path': '/', 'expect': [200, 300], 'name': 'mistral-api'},
+ 'designate': {'path': '/', 'expect': [200, 300], 'name': 'designate-api'},
+ 'contrail_analytics': {'path': '/', 'expect': [200], 'name': 'contrail-analytics-api'},
+ 'contrail_config': {'path': '/', 'expect': [200], 'name': 'contrail-config-api'},
+ 'congress': {'path': '/', 'expect': [200], 'name': 'congress-api'},
+ 'placement': {'path': '/', 'expect': [401], 'name': 'placement-api'},
+ 'shipyard': {'path': 'v1.0/health', 'expect': [204, 503], 'name': 'shipyard'},
+ 'armada': {'path': 'v1.0/health', 'expect': [204, 503], 'name': 'armada'},
+ 'deckhand': {'path': 'v1.0/health', 'expect': [204, 503], 'name': 'deckhand'},
+ 'drydock': {'path': 'v1.0/health', 'expect': [204, 503], 'name': 'drydock'},
+ 'promenade': {'path': 'v1.0/health', 'expect': [204, 503], 'name': 'promenade'},
+ }
+
+ def _service_url(self, endpoint, path):
+ url = urlparse(endpoint)
+ u = '%s://%s' % (url.scheme, url.netloc)
+ if path != '/':
+ u = '%s/%s' % (u, path)
+ return u
+
+ def build_cache_data(self):
+ """ Check the status of all the API services.
+
+ Yields a list of dict items with 'service', 'status' (either OK,
+ FAIL or UNKNOWN) and 'region' keys.
+ """
+ check_array = []
+ catalog = self.osclient.service_catalog
+
+ for service in catalog:
+ name = service['name']
+ url = None
+ status_code = 500
+ if name not in self.CHECK_MAP:
+ logger.info(
+ "No check found for service '%s', creating one" % name)
+ self.CHECK_MAP[name] = {
+ 'path': '/',
+ 'expect': [200, 300, 302, 401, 404],
+ 'name': name,
+ }
+ check = self.CHECK_MAP[name]
+ url = self._service_url(service['url'], check['path'])
+ r = self.osclient.raw_get(
+ url, token_required=check.get(
+ 'auth', False))
+
+ if r is not None:
+ status_code = r.status_code
+
+ if r is None or status_code not in check['expect']:
+ logger.info(
+ "Service %s check failed "
+ "(returned '%s' but expected '%s')" % (
+ name, status_code, check['expect'])
+ )
+ status = self.FAIL
+ else:
+ status = self.OK
+
+ check_array.append({
+ 'service': name,
+ 'status': status,
+ 'url': url,
+ 'status_code': status_code,
+ 'region': self.osclient.region,
+ })
+ return check_array
+
+ def get_cache_key(self):
+ return "check_os_api"
+
+ def get_stats(self):
+ registry = CollectorRegistry()
+ labels = ['region', 'url', 'service']
+ check_api_data_cache = self.get_cache_data()
+ for check_api_data in check_api_data_cache:
+ label_values = [
+ check_api_data['region'],
+ check_api_data['url'],
+ check_api_data['service']]
+ gague_name = self.gauge_name_sanitize(
+ "check_{}_api".format(check_api_data['service']))
+ check_gauge = Gauge(
+ gague_name,
+ 'Openstack API check. fail = 0, ok = 1 and unknown = 2',
+ labels,
+ registry=registry)
+ check_gauge.labels(*label_values).set(check_api_data['status'])
+ return generate_latest(registry)
diff --git a/prometheus-openstack-exporter/exporter/cinder_services.py b/prometheus-openstack-exporter/exporter/cinder_services.py
new file mode 100644
index 00000000..ff94f45d
--- /dev/null
+++ b/prometheus-openstack-exporter/exporter/cinder_services.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+# Copyright 2017 The Openstack-Helm Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from base import OSBase
+from collections import Counter
+from collections import defaultdict
+from prometheus_client import CollectorRegistry, generate_latest, Gauge
+import logging
+logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s:%(levelname)s:%(message)s")
+logger = logging.getLogger(__name__)
+
+
+class CinderServiceStats(OSBase):
+ """ Class to report the statistics on Cinder services.
+
+ state of workers broken down by state
+ """
+
+ def build_cache_data(self):
+
+ aggregated_workers = defaultdict(Counter)
+
+ stats = self.osclient.get_workers('cinder')
+ for worker in stats:
+ service = worker['service']
+ state = worker['state']
+ aggregated_workers[service][state] += 1
+
+ for service in aggregated_workers:
+ totalw = sum(aggregated_workers[service].values())
+ for state in self.osclient.states:
+ prct = (100.0 * aggregated_workers[service][state]) / totalw
+ stats.append({
+ 'stat_name': "services_{}_{}_percent".format(service, state),
+ 'stat_value': prct,
+ 'state': state,
+ 'service': service
+ })
+ stats.append({
+ 'stat_name': "services_{}_{}_total".format(service, state),
+ 'stat_value': aggregated_workers[service][state],
+ 'state': state,
+ 'service': service
+ })
+
+ return stats
+
+ def get_cache_key(self):
+ return "cinder_services_stats"
+
+ def get_stats(self):
+ registry = CollectorRegistry()
+ labels = ['region', 'host', 'service', 'state']
+ cinder_services_stats_cache = self.get_cache_data()
+ for cinder_services_stat in cinder_services_stats_cache:
+ stat_gauge = Gauge(
+ self.gauge_name_sanitize(
+ cinder_services_stat['stat_name']),
+ 'Openstack Cinder Service statistic',
+ labels,
+ registry=registry)
+ label_values = [self.osclient.region,
+ cinder_services_stat.get('host', ''),
+ cinder_services_stat.get('service', ''),
+ cinder_services_stat.get('state', '')]
+ stat_gauge.labels(
+ *
+ label_values).set(
+ cinder_services_stat['stat_value'])
+ return generate_latest(registry)
diff --git a/prometheus-openstack-exporter/exporter/hypervisor_stats.py b/prometheus-openstack-exporter/exporter/hypervisor_stats.py
new file mode 100644
index 00000000..df5e92c9
--- /dev/null
+++ b/prometheus-openstack-exporter/exporter/hypervisor_stats.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+# Copyright 2017 The Openstack-Helm Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from base import OSBase
+
+from prometheus_client import CollectorRegistry, generate_latest, Gauge
+import logging
+logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s:%(levelname)s:%(message)s")
+logger = logging.getLogger(__name__)
+
+
+class HypervisorStats(OSBase):
+ """ Class to report the statistics on Nova hypervisors."""
+ VALUE_MAP = {
+ 'current_workload': 'running_tasks',
+ 'running_vms': 'running_instances',
+ 'local_gb_used': 'used_disk_GB',
+ 'free_disk_gb': 'free_disk_GB',
+ 'memory_mb_used': 'used_ram_MB',
+ 'free_ram_mb': 'free_ram_MB',
+ 'vcpus_used': 'used_vcpus',
+ }
+
+ def __init__(
+ self,
+ oscache,
+ osclient,
+ cpu_overcommit_ratio,
+ ram_overcommit_ratio):
+ super(HypervisorStats, self).__init__(oscache, osclient)
+ self.cpu_overcommit_ratio = cpu_overcommit_ratio
+ self.ram_overcommit_ratio = ram_overcommit_ratio
+
+ def build_cache_data(self):
+ cache_stats = []
+ nova_aggregates = {}
+ r = self.osclient.get('nova', 'os-aggregates')
+ if not r:
+ logger.warning("Could not get nova aggregates")
+ else:
+ aggregates_list = r.json().get('aggregates', [])
+ for agg in aggregates_list:
+ nova_aggregates[agg['name']] = {
+ 'id': agg['id'],
+ 'hosts': [h.split('.')[0] for h in agg['hosts']],
+ 'metrics': {'free_vcpus': 0},
+ }
+ nova_aggregates[agg['name']]['metrics'].update(
+ {v: 0 for v in self.VALUE_MAP.values()}
+ )
+
+ r = self.osclient.get('nova', 'os-hypervisors/detail')
+ if not r:
+ logger.warning("Could not get hypervisor statistics")
+ return
+
+ total_stats = {v: 0 for v in self.VALUE_MAP.values()}
+ total_stats['free_vcpus'] = 0
+ hypervisor_stats = r.json().get('hypervisors', [])
+ for stats in hypervisor_stats:
+ host = stats['hypervisor_hostname']
+ for k, v in self.VALUE_MAP.iteritems():
+ m_val = stats.get(k, 0)
+ cache_stats.append({
+ 'stat_name': v,
+ 'stat_value': m_val,
+ 'host': host,
+ })
+ total_stats[v] += m_val
+ for agg in nova_aggregates.keys():
+ agg_hosts = nova_aggregates[agg]['hosts']
+ if host in agg_hosts:
+ nova_aggregates[agg]['metrics'][v] += m_val
+ m_vcpus = stats.get('vcpus', 0)
+ m_vcpus_used = stats.get('vcpus_used', 0)
+ free = (int(self.cpu_overcommit_ratio * m_vcpus)) - m_vcpus_used
+ cache_stats.append({
+ 'stat_name': 'free_vcpus',
+ 'stat_value': free,
+ 'host': host,
+ })
+ total_stats['free_vcpus'] += free
+ for agg in nova_aggregates.keys():
+ agg_hosts = nova_aggregates[agg]['hosts']
+ if host in agg_hosts:
+ free = ((int(self.extra_config['cpu_ratio'] *
+ m_vcpus)) -
+ m_vcpus_used)
+ nova_aggregates[agg]['metrics']['free_vcpus'] += free
+
+ # Dispatch the aggregate metrics
+ for agg in nova_aggregates.keys():
+ agg_id = nova_aggregates[agg]['id']
+ agg_total_free_ram = (
+ nova_aggregates[agg]['metrics']['free_ram_MB'] +
+ nova_aggregates[agg]['metrics']['used_ram_MB']
+ )
+ if agg_total_free_ram > 0:
+ nova_aggregates[agg]['metrics']['free_ram_percent'] = round(
+ (100.0 * nova_aggregates[agg]['metrics']['free_ram_MB']) /
+ agg_total_free_ram,
+ 2)
+ for k, v in nova_aggregates[agg]['metrics'].iteritems():
+ cache_stats.append({
+ 'stat_name': 'aggregate_{}'.format(k),
+ 'stat_value': v,
+ 'aggregate': agg,
+ 'aggregate_id': agg_id,
+ })
+ # Dispatch the global metrics
+ for k, v in total_stats.iteritems():
+ cache_stats.append({
+ 'stat_name': 'total_{}'.format(k),
+ 'stat_value': v,
+ })
+
+ return cache_stats
+
+ def get_cache_key(self):
+ return "hypervisor_stats"
+
+ def get_stats(self):
+ registry = CollectorRegistry()
+ labels = ['region', 'host', 'aggregate', 'aggregate_id']
+ hypervisor_stats_cache = self.get_cache_data()
+ for hypervisor_stat in hypervisor_stats_cache:
+ stat_gauge = Gauge(
+ self.gauge_name_sanitize(
+ hypervisor_stat['stat_name']),
+ 'Openstack Hypervisor statistic',
+ labels,
+ registry=registry)
+ label_values = [self.osclient.region,
+ hypervisor_stat.get('host', ''),
+ hypervisor_stat.get('aggregate', ''),
+ hypervisor_stat.get('aggregate_id', '')]
+ stat_gauge.labels(*label_values).set(hypervisor_stat['stat_value'])
+ return generate_latest(registry)
diff --git a/prometheus-openstack-exporter/exporter/main.py b/prometheus-openstack-exporter/exporter/main.py
new file mode 100644
index 00000000..51990850
--- /dev/null
+++ b/prometheus-openstack-exporter/exporter/main.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+# Copyright 2017 The Openstack-Helm Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import yaml
+import os
+import urlparse
+from BaseHTTPServer import BaseHTTPRequestHandler
+from BaseHTTPServer import HTTPServer
+from SocketServer import ForkingMixIn
+from prometheus_client import CONTENT_TYPE_LATEST
+
+from osclient import OSClient
+from oscache import OSCache
+from check_os_api import CheckOSApi
+from neutron_agents import NeutronAgentStats
+from nova_services import NovaServiceStats
+from cinder_services import CinderServiceStats
+from hypervisor_stats import HypervisorStats
+
+import logging
+logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s:%(levelname)s:%(message)s")
+logger = logging.getLogger(__name__)
+
+collectors = []
+
+
+class ForkingHTTPServer(ForkingMixIn, HTTPServer):
+ pass
+
+
+class OpenstackExporterHandler(BaseHTTPRequestHandler):
+ def __init__(self, *args, **kwargs):
+ BaseHTTPRequestHandler.__init__(self, *args, **kwargs)
+
+ def do_GET(self):
+ url = urlparse.urlparse(self.path)
+ if url.path == '/metrics':
+ output = ''
+ for collector in collectors:
+ try:
+ stats = collector.get_stats()
+ if stats is not None:
+ output = output + stats
+ except BaseException:
+ logger.warning(
+ "Could not get stats for collector {}".format(
+ collector.get_cache_key()))
+ self.send_response(200)
+ self.send_header('Content-Type', CONTENT_TYPE_LATEST)
+ self.end_headers()
+ self.wfile.write(output)
+ elif url.path == '/':
+ self.send_response(200)
+ self.end_headers()
+ self.wfile.write("""
+
OpenStack Exporter
+
+ OpenStack Exporter
+ Visit /metrics
to use.
+
+ """)
+ else:
+ self.send_response(404)
+ self.end_headers()
+
+
+def handler(*args, **kwargs):
+ OpenstackExporterHandler(*args, **kwargs)
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ usage=__doc__,
+ description='Prometheus OpenStack exporter',
+ formatter_class=argparse.RawTextHelpFormatter)
+ parser.add_argument('--config-file', nargs='?',
+ help='Configuration file path',
+ type=argparse.FileType('r'),
+ required=False)
+ args = parser.parse_args()
+ config = {}
+ if args.config_file:
+ config = yaml.safe_load(args.config_file.read())
+
+ os_keystone_url = config.get('OS_AUTH_URL', os.getenv('OS_AUTH_URL'))
+ os_password = config.get('OS_PASSWORD', os.getenv('OS_PASSWORD'))
+ os_tenant_name = config.get(
+ 'OS_PROJECT_NAME',
+ os.getenv('OS_PROJECT_NAME'))
+ os_username = config.get('OS_USERNAME', os.getenv('OS_USERNAME'))
+ os_user_domain = config.get(
+ 'OS_USER_DOMAIN_NAME',
+ os.getenv('OS_USER_DOMAIN_NAME'))
+ os_region = config.get('OS_REGION_NAME', os.getenv('OS_REGION_NAME'))
+ os_timeout = config.get(
+ 'TIMEOUT_SECONDS', int(
+ os.getenv(
+ 'TIMEOUT_SECONDS', 10)))
+ os_polling_interval = config.get(
+ 'OS_POLLING_INTERVAL', int(
+ os.getenv(
+ 'OS_POLLING_INTERVAL', 900)))
+ os_retries = config.get('OS_RETRIES', int(os.getenv('OS_RETRIES', 1)))
+ os_cpu_overcomit_ratio = config.get(
+ 'OS_CPU_OC_RATIO', float(
+ os.getenv(
+ 'OS_CPU_OC_RATIO', 1)))
+ os_ram_overcomit_ratio = config.get(
+ 'OS_RAM_OC_RATIO', float(
+ os.getenv(
+ 'OS_RAM_OC_RATIO', 1)))
+
+ osclient = OSClient(
+ os_keystone_url,
+ os_password,
+ os_tenant_name,
+ os_username,
+ os_user_domain,
+ os_region,
+ os_timeout,
+ os_retries)
+ oscache = OSCache(os_polling_interval, os_region)
+ collectors.append(oscache)
+
+ check_os_api = CheckOSApi(oscache, osclient)
+ collectors.append(check_os_api)
+ neutron_agent_stats = NeutronAgentStats(oscache, osclient)
+ collectors.append(neutron_agent_stats)
+ cinder_service_stats = CinderServiceStats(oscache, osclient)
+ collectors.append(cinder_service_stats)
+ nova_service_stats = NovaServiceStats(oscache, osclient)
+ collectors.append(nova_service_stats)
+ hypervisor_stats = HypervisorStats(
+ oscache,
+ osclient,
+ os_cpu_overcomit_ratio,
+ os_ram_overcomit_ratio)
+ collectors.append(hypervisor_stats)
+
+ oscache.start()
+
+ listen_port = config.get(
+ 'LISTEN_PORT', int(
+ os.getenv(
+ 'LISTEN_PORT', 9103)))
+ server = ForkingHTTPServer(('', listen_port), handler)
+ server.serve_forever()
diff --git a/prometheus-openstack-exporter/exporter/neutron_agents.py b/prometheus-openstack-exporter/exporter/neutron_agents.py
new file mode 100644
index 00000000..934b141d
--- /dev/null
+++ b/prometheus-openstack-exporter/exporter/neutron_agents.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+# Copyright 2017 The Openstack-Helm Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from base import OSBase
+from collections import Counter
+from collections import defaultdict
+from prometheus_client import CollectorRegistry, generate_latest, Gauge
+import logging
+logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s:%(levelname)s:%(message)s")
+logger = logging.getLogger(__name__)
+
+
+class NeutronAgentStats(OSBase):
+ """ Class to report the statistics on Neutron agents.
+
+ state of agents
+ """
+
+ def build_cache_data(self):
+
+ # Get information of the state per agent
+ # State can be up or down
+ aggregated_agents = defaultdict(Counter)
+ stats = self.osclient.get_workers('neutron')
+
+ for agent in stats:
+ service = agent['service']
+ state = agent['state']
+ aggregated_agents[service][state] += 1
+
+ for service in aggregated_agents:
+ totala = sum(aggregated_agents[service].values())
+ for state in self.osclient.states:
+ prct = (100.0 * aggregated_agents[service][state]) / totala
+ stats.append({
+ 'stat_name': "services_{}_{}_percent".format(service, state),
+ 'stat_value': prct,
+ 'service': service,
+ 'state': state
+ })
+ stats.append({
+ 'stat_name': "services_{}_{}_total".format(service, state),
+ 'stat_value': aggregated_agents[service][state],
+ 'service': service,
+ 'state': state,
+ })
+ return stats
+
+ def get_cache_key(self):
+ return "neutron_agent_stats"
+
+ def get_stats(self):
+ registry = CollectorRegistry()
+ labels = ['region', 'host', 'service', 'state']
+ neutron_agent_stats_cache = self.get_cache_data()
+ for neutron_agent_stat in neutron_agent_stats_cache:
+ stat_gauge = Gauge(
+ self.gauge_name_sanitize(
+ neutron_agent_stat['stat_name']),
+ 'Openstack Neutron agent statistic',
+ labels,
+ registry=registry)
+ label_values = [self.osclient.region,
+ neutron_agent_stat.get('host', ''),
+ neutron_agent_stat.get('service', ''),
+ neutron_agent_stat.get('state', '')]
+ stat_gauge.labels(
+ *
+ label_values).set(
+ neutron_agent_stat['stat_value'])
+ return generate_latest(registry)
diff --git a/prometheus-openstack-exporter/exporter/nova_services.py b/prometheus-openstack-exporter/exporter/nova_services.py
new file mode 100644
index 00000000..dc8672c9
--- /dev/null
+++ b/prometheus-openstack-exporter/exporter/nova_services.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+# Copyright 2017 The Openstack-Helm Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from base import OSBase
+from collections import Counter
+from collections import defaultdict
+from prometheus_client import CollectorRegistry, generate_latest, Gauge
+import logging
+logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s:%(levelname)s:%(message)s")
+logger = logging.getLogger(__name__)
+
+
+class NovaServiceStats(OSBase):
+ """ Class to report the statistics on Nova services.
+
+ status per service broken down by state
+ """
+
+ def build_cache_data(self):
+ # Get information of the state per service
+ # State can be: 'up', 'down' or 'disabled'
+ aggregated_workers = defaultdict(Counter)
+ stats = self.osclient.get_workers('nova')
+ for worker in stats:
+ service = worker['service']
+ state = worker['state']
+ aggregated_workers[service][state] += 1
+
+ for service in aggregated_workers:
+ total = sum(aggregated_workers[service].values())
+ for state in self.osclient.states:
+ prct = 0
+ if total > 0:
+ prct = (100.0 * aggregated_workers[service][state]) / total
+
+ stats.append({
+ 'stat_name': "services_{}_{}_percent".format(service, state),
+ 'stat_value': prct,
+ 'state': state,
+ 'service': service
+ })
+ stats.append({
+ 'stat_name': "services_{}_{}_total".format(service, state),
+ 'stat_value': aggregated_workers[service][state],
+ 'state': state,
+ 'service': service
+ })
+ return stats
+
+ def get_cache_key(self):
+ return "nova_services_stats"
+
+ def get_stats(self):
+ registry = CollectorRegistry()
+ labels = ['region', 'host', 'service', 'state']
+ services_stats_cache = self.get_cache_data()
+ for services_stat in services_stats_cache:
+ stat_gauge = Gauge(
+ self.gauge_name_sanitize(
+ services_stat['stat_name']),
+ 'Openstack Nova Service statistic',
+ labels,
+ registry=registry)
+ label_values = [self.osclient.region,
+ services_stat.get('host', ''),
+ services_stat.get('service', ''),
+ services_stat.get('state', '')]
+ stat_gauge.labels(*label_values).set(services_stat['stat_value'])
+ return generate_latest(registry)
diff --git a/prometheus-openstack-exporter/exporter/oscache.py b/prometheus-openstack-exporter/exporter/oscache.py
new file mode 100644
index 00000000..50f6ffde
--- /dev/null
+++ b/prometheus-openstack-exporter/exporter/oscache.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+# Copyright 2017 The Openstack-Helm Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from threading import Thread
+from threading import Lock
+from prometheus_client import CollectorRegistry, generate_latest, Gauge
+from time import sleep, time
+import logging
+logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s:%(levelname)s:%(message)s")
+logger = logging.getLogger(__name__)
+
+
+class ThreadSafeDict(dict):
+ def __init__(self, * p_arg, ** n_arg):
+ dict.__init__(self, * p_arg, ** n_arg)
+ self._lock = Lock()
+
+ def __enter__(self):
+ self._lock.acquire()
+ return self
+
+ def __exit__(self, type, value, traceback):
+ self._lock.release()
+
+
+class OSCache(Thread):
+
+ def __init__(self, refresh_interval, region):
+ Thread.__init__(self)
+ self.daemon = True
+ self.duration = 0
+ self.refresh_interval = refresh_interval
+ self.cache = ThreadSafeDict()
+ self.region = region
+ self.osclients = []
+
+ def cache_me(self, osclient):
+ self.osclients.append(osclient)
+ logger.debug("new osclient added to cache")
+
+ def run(self):
+ while True:
+ start_time = time()
+ for osclient in self.osclients:
+ try:
+ self.cache[osclient.get_cache_key(
+ )] = osclient.build_cache_data()
+ except Exception as e:
+ logger.error(str(e))
+ logger.error(
+ "failed to get data for cache key {}".format(
+ osclient.get_cache_key()))
+ self.duration = time() - start_time
+ sleep(self.refresh_interval)
+
+ def get_cache_data(self, key):
+ if key in self.cache:
+ return self.cache[key]
+ else:
+ return []
+
+ def get_stats(self):
+ registry = CollectorRegistry()
+ labels = ['region']
+ label_values = [self.region]
+ duration = Gauge('openstack_exporter_cache_refresh_duration_seconds',
+ 'Cache refresh duration in seconds.',
+ labels, registry=registry)
+ duration.labels(*label_values).set(self.duration)
+ return generate_latest(registry)
diff --git a/prometheus-openstack-exporter/exporter/osclient.py b/prometheus-openstack-exporter/exporter/osclient.py
new file mode 100644
index 00000000..c99ff355
--- /dev/null
+++ b/prometheus-openstack-exporter/exporter/osclient.py
@@ -0,0 +1,285 @@
+#!/usr/bin/env python
+# Copyright 2017 The Openstack-Helm Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import dateutil.parser
+import dateutil.tz
+import requests
+import simplejson as json
+import logging
+logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s:%(levelname)s:%(message)s")
+logger = logging.getLogger(__name__)
+
+
+class KeystoneException(Exception):
+ pass
+
+
+class OSClient(object):
+ """ Base class for querying the OpenStack API endpoints.
+
+ It uses the Keystone service catalog to discover the API endpoints.
+ """
+ EXPIRATION_TOKEN_DELTA = datetime.timedelta(0, 30)
+ states = {'up': 1, 'down': 0, 'disabled': 2}
+
+ def __init__(
+ self,
+ keystone_url,
+ password,
+ tenant_name,
+ username,
+ user_domain,
+ region,
+ timeout,
+ retries):
+ self.keystone_url = keystone_url
+ self.password = password
+ self.tenant_name = tenant_name
+ self.username = username
+ self.user_domain = user_domain
+ self.region = region
+ self.timeout = timeout
+ self.retries = retries
+ self.token = None
+ self.valid_until = None
+ self.session = requests.Session()
+ self.session.mount(
+ 'http://', requests.adapters.HTTPAdapter(max_retries=retries))
+ self.session.mount(
+ 'https://', requests.adapters.HTTPAdapter(max_retries=retries))
+ self._service_catalog = []
+
+ def is_valid_token(self):
+ now = datetime.datetime.now(tz=dateutil.tz.tzutc())
+ return self.token is not None and self.valid_until is not None and self.valid_until > now
+
+ def clear_token(self):
+ self.token = None
+ self.valid_until = None
+
+ def get_token(self):
+ self.clear_token()
+ data = json.dumps({
+ "auth": {
+ "identity": {
+ "methods": ["password"],
+ "password": {
+ "user": {
+ "name": self.username,
+ "domain": {"id": self.user_domain},
+ "password": self.password
+ }
+ }
+ },
+ "scope": {
+ "project": {
+ "name": self.tenant_name,
+ "domain": {"id": self.user_domain}
+ }
+ }
+ }
+ })
+ logger.info("Trying to get token from '%s'" % self.keystone_url)
+ r = self.make_request('post',
+ '%s/auth/tokens' % self.keystone_url, data=data,
+ token_required=False)
+ if not r:
+ logger.error(
+ "Cannot get a valid token from {}".format(
+ self.keystone_url))
+
+ if r.status_code < 200 or r.status_code > 299:
+ logger.error(
+ "{} responded with code {}".format(
+ self.keystone_url,
+ r.status_code))
+
+ data = r.json()
+ self.token = r.headers.get("X-Subject-Token")
+ self.tenant_id = data['token']['project']['id']
+ self.valid_until = dateutil.parser.parse(
+ data['token']['expires_at']) - self.EXPIRATION_TOKEN_DELTA
+ self._service_catalog = []
+ for item in data['token']['catalog']:
+ internalURL = None
+ publicURL = None
+ adminURL = None
+ for endpoint in item['endpoints']:
+ if endpoint['region'] == self.region or self.region is None:
+ if endpoint['interface'] == 'internal':
+ internalURL = endpoint['url']
+ elif endpoint['interface'] == 'public':
+ publicURL = endpoint['url']
+ elif endpoint['interface'] == 'admin':
+ adminURL = endpoint['url']
+
+ if internalURL is None and publicURL is None:
+ logger.warning(
+ "Service '{}' skipped because no URL can be found".format(
+ item['name']))
+ continue
+ self._service_catalog.append({
+ 'name': item['name'],
+ 'region': self.region,
+ 'service_type': item['type'],
+ 'url': internalURL if internalURL is not None else publicURL,
+ 'admin_url': adminURL,
+ })
+
+ logger.debug("Got token '%s'" % self.token)
+ return self.token
+
+ @property
+ def service_catalog(self):
+ if not self._service_catalog:
+ self.get_token()
+ return self._service_catalog
+
+ @service_catalog.setter
+ def service_catalog(self, service_catalog):
+ self._service_catalog = service_catalog
+
+ def get_service(self, service_name):
+ return next((x for x in self._service_catalog
+ if x['name'] == service_name), None)
+
+ def raw_get(self, url, token_required=False):
+ return self.make_request('get', url,
+ token_required=token_required)
+
+ def make_request(self, verb, url, data=None, token_required=True,
+ params=None):
+ kwargs = {
+ 'url': url,
+ 'timeout': self.timeout,
+ 'headers': {'Content-type': 'application/json'}
+ }
+ if token_required and not self.is_valid_token():
+ self.get_token()
+ if not self.is_valid_token():
+ logger.error("Aborting request, no valid token")
+ return
+ if token_required:
+ kwargs['headers']['X-Auth-Token'] = self.token
+
+ if data is not None:
+ kwargs['data'] = data
+
+ if params is not None:
+ kwargs['params'] = params
+
+ func = getattr(self.session, verb.lower())
+
+ try:
+ r = func(**kwargs)
+ except Exception as e:
+ logger.error("Got exception for '%s': '%s'" %
+ (kwargs['url'], e))
+ return
+
+ logger.info("%s responded with status code %d" %
+ (kwargs['url'], r.status_code))
+
+ return r
+
+ def get(self, service, resource, params=None):
+ url = self._build_url(service, resource)
+ if not url:
+ return
+ logger.info('GET({}) {}'.format(url, params))
+ return self.make_request('get', url, params=params)
+
+ def _build_url(self, service, resource):
+ s = (self.get_service(service) or {})
+ url = s.get('url')
+ # v3 API must be used in order to obtain tenants in multi-domain envs
+ if service == 'keystone' and (resource in ['projects',
+ 'users', 'roles']):
+ url = url.replace('v2.0', 'v3')
+
+ if url:
+ if url[-1] != '/':
+ url += '/'
+ url = "%s%s" % (url, resource)
+ else:
+ logger.error("Service '%s' not found in catalog" % service)
+ return url
+
+ def get_workers(self, service):
+ """ Return the list of workers and their state
+
+ Here is an example of returned dictionnary:
+ {
+ 'host': 'node.example.com',
+ 'service': 'nova-compute',
+ 'state': 'up'
+ }
+
+ where 'state' can be 'up', 'down' or 'disabled'
+ """
+ worker_metrics = []
+ if service == 'neutron':
+ endpoint = 'v2.0/agents'
+ entry = 'agents'
+ else:
+ endpoint = 'os-services'
+ entry = 'services'
+
+ ost_services_r = self.get(service, endpoint)
+
+ msg = "Cannot get state of {} workers".format(service)
+ if ost_services_r is None:
+ logger.warning(msg)
+ elif ost_services_r.status_code != 200:
+ msg = "{}: Got {} ({})".format(
+ msg, ost_services_r.status_code, ost_services_r.content)
+ logger.warning(msg)
+ else:
+ try:
+ r_json = ost_services_r.json()
+ except ValueError:
+ r_json = {}
+
+ if entry not in r_json:
+ msg = "{}: couldn't find '{}' key".format(msg, entry)
+ logger.warning(msg)
+ else:
+ for val in r_json[entry]:
+ data = {'host': val['host'], 'service': val['binary']}
+
+ if service == 'neutron':
+ if not val['admin_state_up']:
+ data['state'] = 'disabled'
+ else:
+ data['state'] = 'up' if val['alive'] else 'down'
+ else:
+ if val['status'] == 'disabled':
+ data['state'] = 'disabled'
+ elif val['state'] == 'up' or val['state'] == 'down':
+ data['state'] = val['state']
+ else:
+ data['state'] = 'unknown'
+ msg = "Unknown state for {} workers:{}".format(
+ service, val['state'])
+ logger.warning(msg)
+ continue
+ data['stat_value'] = self.states[data['state']]
+ data['stat_name'] = "services_{}_{}".format(
+ service, val['binary'])
+ worker_metrics.append(data)
+ return worker_metrics
diff --git a/zuul.d/prometheus-openstack-exporter.yaml b/zuul.d/prometheus-openstack-exporter.yaml
new file mode 100644
index 00000000..8f533f43
--- /dev/null
+++ b/zuul.d/prometheus-openstack-exporter.yaml
@@ -0,0 +1,57 @@
+---
+# Copyright 2019 The Openstack-Helm Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+- project:
+ check:
+ jobs:
+ - openstack-helm-images-build-prometheus-openstack-exporter
+ gate:
+ jobs:
+ - openstack-helm-images-upload-prometheus-openstack-exporter
+ promote:
+ jobs:
+ - openstack-helm-images-promote-prometheus-openstack-exporter
+ periodic:
+ jobs:
+ - openstack-helm-images-build-prometheus-openstack-exporter
+
+- job:
+ name: openstack-helm-images-build-prometheus-openstack-exporter
+ parent: openstack-helm-images-build
+ description: Build Prometheus OpenStack exporter image
+ vars: &prometheus-openstack-exporter_vars
+ docker_images:
+ - context: prometheus-openstack-exporter
+ repository: openstackhelm/prometheus-openstack-exporter
+ dockerfile: Dockerfile.ubuntu_xenial
+ tags:
+ - latest-ubuntu_xenial
+ files: &prometheus-openstack-exporter_files
+ - prometheus-openstack-exporter/.*
+ - zuul.d/prometheus-openstack-exporter.yaml
+
+- job:
+ name: openstack-helm-images-upload-prometheus-openstack-exporter
+ parent: openstack-helm-images-upload
+ description: Build and upload Prometheus OpenStack exporter image
+ vars: *prometheus-openstack-exporter_vars
+ files: *prometheus-openstack-exporter_files
+
+- job:
+ name: openstack-helm-images-promote-prometheus-openstack-exporter
+ parent: openstack-helm-images-promote
+ description: Promote a previously published Prometheus OpenStack exporter image.
+ vars: *prometheus-openstack-exporter_vars
+ files: *prometheus-openstack-exporter_files