Merge "Add Prometheus openstack exporter image"

This commit is contained in:
Zuul 2019-06-25 16:31:15 +00:00 committed by Gerrit Code Review
commit a31f5dce35
13 changed files with 1208 additions and 0 deletions

View File

@ -0,0 +1,15 @@
ARG FROM=docker.io/ubuntu:xenial
FROM ${FROM}
RUN apt-get -y update \
&& apt-get -y install curl python-dateutil python-requests python-simplejson python-yaml python-prometheus-client\
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir /usr/local/bin/exporter
COPY exporter /usr/local/bin/exporter
RUN chmod +x /usr/local/bin/exporter/main.py
EXPOSE 9103
CMD ["/usr/local/bin/exporter/main.py"]

View File

@ -0,0 +1,14 @@
#!/bin/bash
SCRIPT=`realpath $0`
SCRIPT_DIR=`dirname ${SCRIPT}`
## Only build from main folder
cd ${SCRIPT_DIR}/..
IMAGE="prometheus-openstack-exporter"
VERSION=${VERSION:-latest}
DISTRO=${DISTRO:-ubuntu_xenial}
REGISTRY_URI=${REGISTRY_URI:-"openstackhelm/"}
EXTRA_TAG_INFO=${EXTRA_TAG_INFO:-""}
docker build -f ${IMAGE}/Dockerfile.${DISTRO} --network=host -t ${REGISTRY_URI}${IMAGE}:${VERSION}-${DISTRO}${EXTRA_TAG_INFO} ${extra_build_args} ${IMAGE}
cd -

View File

@ -0,0 +1,50 @@
#!/usr/bin/env python
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
class OSBase(object):
FAIL = 0
OK = 1
UNKNOWN = 2
GAUGE_NAME_FORMAT = "openstack_{}"
def __init__(self, oscache, osclient):
self.oscache = oscache
self.osclient = osclient
self.oscache.cache_me(self)
def get_cache_data(self):
return self.oscache.get_cache_data(self.get_cache_key())
def build_cache_data(self):
""" build a hash to store in cache """
raise NotImplemented("Must be implemented by the subclass!")
def get_cache_key(self):
""" cache key """
raise NotImplemented("Must be implemented by the subclass!")
def get_stats(self):
""" build stats for prometheus exporter """
raise NotImplemented("Must be implemented by the subclass!")
def gauge_name_sanitize(self, input):
if input.startswith("openstack_"):
return re.sub(r'[^a-zA-Z0-9:_]', '_', input)
else:
return self.GAUGE_NAME_FORMAT.format(
re.sub(r'[^a-zA-Z0-9:_]', '_', input))

View File

@ -0,0 +1,137 @@
#!/usr/bin/env python
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from base import OSBase
from urlparse import urlparse
from prometheus_client import CollectorRegistry, generate_latest, Gauge
import logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s:%(levelname)s:%(message)s")
logger = logging.getLogger(__name__)
class CheckOSApi(OSBase):
"""Class to check the status of OpenStack API services."""
CHECK_MAP = {
'keystone': {'path': '/', 'expect': [300], 'name': 'keystone-public-api'},
'heat': {'path': '/', 'expect': [300], 'name': 'heat-api'},
'heat-cfn': {'path': '/', 'expect': [300], 'name': 'heat-cfn-api'},
'glance': {'path': '/', 'expect': [300], 'name': 'glance-api'},
'cinder': {'path': '/', 'expect': [200, 300], 'name': 'cinder-api'},
'cinderv2': {
'path': '/', 'expect': [200, 300], 'name': 'cinder-v2-api'},
'neutron': {'path': '/', 'expect': [200], 'name': 'neutron-api'},
'nova': {'path': '/', 'expect': [200], 'name': 'nova-api'},
'ceilometer': {
'path': 'v2/capabilities', 'expect': [200], 'auth': True,
'name': 'ceilometer-api'},
'swift': {'path': '/', 'expect': [200], 'name': 'ceph'},
'swift_s3': {
'path': 'healthcheck', 'expect': [200], 'name': 'swift-s3-api'},
'murano': {'path': '/', 'expect': [200, 300], 'name': 'murano-api'},
'trove': {'path': '/', 'expect': [200, 300], 'name': 'trove-api'},
'mistral': {'path': '/', 'expect': [200, 300], 'name': 'mistral-api'},
'designate': {'path': '/', 'expect': [200, 300], 'name': 'designate-api'},
'contrail_analytics': {'path': '/', 'expect': [200], 'name': 'contrail-analytics-api'},
'contrail_config': {'path': '/', 'expect': [200], 'name': 'contrail-config-api'},
'congress': {'path': '/', 'expect': [200], 'name': 'congress-api'},
'placement': {'path': '/', 'expect': [401], 'name': 'placement-api'},
'shipyard': {'path': 'v1.0/health', 'expect': [204, 503], 'name': 'shipyard'},
'armada': {'path': 'v1.0/health', 'expect': [204, 503], 'name': 'armada'},
'deckhand': {'path': 'v1.0/health', 'expect': [204, 503], 'name': 'deckhand'},
'drydock': {'path': 'v1.0/health', 'expect': [204, 503], 'name': 'drydock'},
'promenade': {'path': 'v1.0/health', 'expect': [204, 503], 'name': 'promenade'},
}
def _service_url(self, endpoint, path):
url = urlparse(endpoint)
u = '%s://%s' % (url.scheme, url.netloc)
if path != '/':
u = '%s/%s' % (u, path)
return u
def build_cache_data(self):
""" Check the status of all the API services.
Yields a list of dict items with 'service', 'status' (either OK,
FAIL or UNKNOWN) and 'region' keys.
"""
check_array = []
catalog = self.osclient.service_catalog
for service in catalog:
name = service['name']
url = None
status_code = 500
if name not in self.CHECK_MAP:
logger.info(
"No check found for service '%s', creating one" % name)
self.CHECK_MAP[name] = {
'path': '/',
'expect': [200, 300, 302, 401, 404],
'name': name,
}
check = self.CHECK_MAP[name]
url = self._service_url(service['url'], check['path'])
r = self.osclient.raw_get(
url, token_required=check.get(
'auth', False))
if r is not None:
status_code = r.status_code
if r is None or status_code not in check['expect']:
logger.info(
"Service %s check failed "
"(returned '%s' but expected '%s')" % (
name, status_code, check['expect'])
)
status = self.FAIL
else:
status = self.OK
check_array.append({
'service': name,
'status': status,
'url': url,
'status_code': status_code,
'region': self.osclient.region,
})
return check_array
def get_cache_key(self):
return "check_os_api"
def get_stats(self):
registry = CollectorRegistry()
labels = ['region', 'url', 'service']
check_api_data_cache = self.get_cache_data()
for check_api_data in check_api_data_cache:
label_values = [
check_api_data['region'],
check_api_data['url'],
check_api_data['service']]
gague_name = self.gauge_name_sanitize(
"check_{}_api".format(check_api_data['service']))
check_gauge = Gauge(
gague_name,
'Openstack API check. fail = 0, ok = 1 and unknown = 2',
labels,
registry=registry)
check_gauge.labels(*label_values).set(check_api_data['status'])
return generate_latest(registry)

View File

@ -0,0 +1,84 @@
#!/usr/bin/env python
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from base import OSBase
from collections import Counter
from collections import defaultdict
from prometheus_client import CollectorRegistry, generate_latest, Gauge
import logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s:%(levelname)s:%(message)s")
logger = logging.getLogger(__name__)
class CinderServiceStats(OSBase):
""" Class to report the statistics on Cinder services.
state of workers broken down by state
"""
def build_cache_data(self):
aggregated_workers = defaultdict(Counter)
stats = self.osclient.get_workers('cinder')
for worker in stats:
service = worker['service']
state = worker['state']
aggregated_workers[service][state] += 1
for service in aggregated_workers:
totalw = sum(aggregated_workers[service].values())
for state in self.osclient.states:
prct = (100.0 * aggregated_workers[service][state]) / totalw
stats.append({
'stat_name': "services_{}_{}_percent".format(service, state),
'stat_value': prct,
'state': state,
'service': service
})
stats.append({
'stat_name': "services_{}_{}_total".format(service, state),
'stat_value': aggregated_workers[service][state],
'state': state,
'service': service
})
return stats
def get_cache_key(self):
return "cinder_services_stats"
def get_stats(self):
registry = CollectorRegistry()
labels = ['region', 'host', 'service', 'state']
cinder_services_stats_cache = self.get_cache_data()
for cinder_services_stat in cinder_services_stats_cache:
stat_gauge = Gauge(
self.gauge_name_sanitize(
cinder_services_stat['stat_name']),
'Openstack Cinder Service statistic',
labels,
registry=registry)
label_values = [self.osclient.region,
cinder_services_stat.get('host', ''),
cinder_services_stat.get('service', ''),
cinder_services_stat.get('state', '')]
stat_gauge.labels(
*
label_values).set(
cinder_services_stat['stat_value'])
return generate_latest(registry)

View File

@ -0,0 +1,152 @@
#!/usr/bin/env python
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from base import OSBase
from prometheus_client import CollectorRegistry, generate_latest, Gauge
import logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s:%(levelname)s:%(message)s")
logger = logging.getLogger(__name__)
class HypervisorStats(OSBase):
""" Class to report the statistics on Nova hypervisors."""
VALUE_MAP = {
'current_workload': 'running_tasks',
'running_vms': 'running_instances',
'local_gb_used': 'used_disk_GB',
'free_disk_gb': 'free_disk_GB',
'memory_mb_used': 'used_ram_MB',
'free_ram_mb': 'free_ram_MB',
'vcpus_used': 'used_vcpus',
}
def __init__(
self,
oscache,
osclient,
cpu_overcommit_ratio,
ram_overcommit_ratio):
super(HypervisorStats, self).__init__(oscache, osclient)
self.cpu_overcommit_ratio = cpu_overcommit_ratio
self.ram_overcommit_ratio = ram_overcommit_ratio
def build_cache_data(self):
cache_stats = []
nova_aggregates = {}
r = self.osclient.get('nova', 'os-aggregates')
if not r:
logger.warning("Could not get nova aggregates")
else:
aggregates_list = r.json().get('aggregates', [])
for agg in aggregates_list:
nova_aggregates[agg['name']] = {
'id': agg['id'],
'hosts': [h.split('.')[0] for h in agg['hosts']],
'metrics': {'free_vcpus': 0},
}
nova_aggregates[agg['name']]['metrics'].update(
{v: 0 for v in self.VALUE_MAP.values()}
)
r = self.osclient.get('nova', 'os-hypervisors/detail')
if not r:
logger.warning("Could not get hypervisor statistics")
return
total_stats = {v: 0 for v in self.VALUE_MAP.values()}
total_stats['free_vcpus'] = 0
hypervisor_stats = r.json().get('hypervisors', [])
for stats in hypervisor_stats:
host = stats['hypervisor_hostname']
for k, v in self.VALUE_MAP.iteritems():
m_val = stats.get(k, 0)
cache_stats.append({
'stat_name': v,
'stat_value': m_val,
'host': host,
})
total_stats[v] += m_val
for agg in nova_aggregates.keys():
agg_hosts = nova_aggregates[agg]['hosts']
if host in agg_hosts:
nova_aggregates[agg]['metrics'][v] += m_val
m_vcpus = stats.get('vcpus', 0)
m_vcpus_used = stats.get('vcpus_used', 0)
free = (int(self.cpu_overcommit_ratio * m_vcpus)) - m_vcpus_used
cache_stats.append({
'stat_name': 'free_vcpus',
'stat_value': free,
'host': host,
})
total_stats['free_vcpus'] += free
for agg in nova_aggregates.keys():
agg_hosts = nova_aggregates[agg]['hosts']
if host in agg_hosts:
free = ((int(self.extra_config['cpu_ratio'] *
m_vcpus)) -
m_vcpus_used)
nova_aggregates[agg]['metrics']['free_vcpus'] += free
# Dispatch the aggregate metrics
for agg in nova_aggregates.keys():
agg_id = nova_aggregates[agg]['id']
agg_total_free_ram = (
nova_aggregates[agg]['metrics']['free_ram_MB'] +
nova_aggregates[agg]['metrics']['used_ram_MB']
)
if agg_total_free_ram > 0:
nova_aggregates[agg]['metrics']['free_ram_percent'] = round(
(100.0 * nova_aggregates[agg]['metrics']['free_ram_MB']) /
agg_total_free_ram,
2)
for k, v in nova_aggregates[agg]['metrics'].iteritems():
cache_stats.append({
'stat_name': 'aggregate_{}'.format(k),
'stat_value': v,
'aggregate': agg,
'aggregate_id': agg_id,
})
# Dispatch the global metrics
for k, v in total_stats.iteritems():
cache_stats.append({
'stat_name': 'total_{}'.format(k),
'stat_value': v,
})
return cache_stats
def get_cache_key(self):
return "hypervisor_stats"
def get_stats(self):
registry = CollectorRegistry()
labels = ['region', 'host', 'aggregate', 'aggregate_id']
hypervisor_stats_cache = self.get_cache_data()
for hypervisor_stat in hypervisor_stats_cache:
stat_gauge = Gauge(
self.gauge_name_sanitize(
hypervisor_stat['stat_name']),
'Openstack Hypervisor statistic',
labels,
registry=registry)
label_values = [self.osclient.region,
hypervisor_stat.get('host', ''),
hypervisor_stat.get('aggregate', ''),
hypervisor_stat.get('aggregate_id', '')]
stat_gauge.labels(*label_values).set(hypervisor_stat['stat_value'])
return generate_latest(registry)

View File

@ -0,0 +1,162 @@
#!/usr/bin/env python
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import yaml
import os
import urlparse
from BaseHTTPServer import BaseHTTPRequestHandler
from BaseHTTPServer import HTTPServer
from SocketServer import ForkingMixIn
from prometheus_client import CONTENT_TYPE_LATEST
from osclient import OSClient
from oscache import OSCache
from check_os_api import CheckOSApi
from neutron_agents import NeutronAgentStats
from nova_services import NovaServiceStats
from cinder_services import CinderServiceStats
from hypervisor_stats import HypervisorStats
import logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s:%(levelname)s:%(message)s")
logger = logging.getLogger(__name__)
collectors = []
class ForkingHTTPServer(ForkingMixIn, HTTPServer):
pass
class OpenstackExporterHandler(BaseHTTPRequestHandler):
def __init__(self, *args, **kwargs):
BaseHTTPRequestHandler.__init__(self, *args, **kwargs)
def do_GET(self):
url = urlparse.urlparse(self.path)
if url.path == '/metrics':
output = ''
for collector in collectors:
try:
stats = collector.get_stats()
if stats is not None:
output = output + stats
except BaseException:
logger.warning(
"Could not get stats for collector {}".format(
collector.get_cache_key()))
self.send_response(200)
self.send_header('Content-Type', CONTENT_TYPE_LATEST)
self.end_headers()
self.wfile.write(output)
elif url.path == '/':
self.send_response(200)
self.end_headers()
self.wfile.write("""<html>
<head><title>OpenStack Exporter</title></head>
<body>
<h1>OpenStack Exporter</h1>
<p>Visit <code>/metrics</code> to use.</p>
</body>
</html>""")
else:
self.send_response(404)
self.end_headers()
def handler(*args, **kwargs):
OpenstackExporterHandler(*args, **kwargs)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
usage=__doc__,
description='Prometheus OpenStack exporter',
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('--config-file', nargs='?',
help='Configuration file path',
type=argparse.FileType('r'),
required=False)
args = parser.parse_args()
config = {}
if args.config_file:
config = yaml.safe_load(args.config_file.read())
os_keystone_url = config.get('OS_AUTH_URL', os.getenv('OS_AUTH_URL'))
os_password = config.get('OS_PASSWORD', os.getenv('OS_PASSWORD'))
os_tenant_name = config.get(
'OS_PROJECT_NAME',
os.getenv('OS_PROJECT_NAME'))
os_username = config.get('OS_USERNAME', os.getenv('OS_USERNAME'))
os_user_domain = config.get(
'OS_USER_DOMAIN_NAME',
os.getenv('OS_USER_DOMAIN_NAME'))
os_region = config.get('OS_REGION_NAME', os.getenv('OS_REGION_NAME'))
os_timeout = config.get(
'TIMEOUT_SECONDS', int(
os.getenv(
'TIMEOUT_SECONDS', 10)))
os_polling_interval = config.get(
'OS_POLLING_INTERVAL', int(
os.getenv(
'OS_POLLING_INTERVAL', 900)))
os_retries = config.get('OS_RETRIES', int(os.getenv('OS_RETRIES', 1)))
os_cpu_overcomit_ratio = config.get(
'OS_CPU_OC_RATIO', float(
os.getenv(
'OS_CPU_OC_RATIO', 1)))
os_ram_overcomit_ratio = config.get(
'OS_RAM_OC_RATIO', float(
os.getenv(
'OS_RAM_OC_RATIO', 1)))
osclient = OSClient(
os_keystone_url,
os_password,
os_tenant_name,
os_username,
os_user_domain,
os_region,
os_timeout,
os_retries)
oscache = OSCache(os_polling_interval, os_region)
collectors.append(oscache)
check_os_api = CheckOSApi(oscache, osclient)
collectors.append(check_os_api)
neutron_agent_stats = NeutronAgentStats(oscache, osclient)
collectors.append(neutron_agent_stats)
cinder_service_stats = CinderServiceStats(oscache, osclient)
collectors.append(cinder_service_stats)
nova_service_stats = NovaServiceStats(oscache, osclient)
collectors.append(nova_service_stats)
hypervisor_stats = HypervisorStats(
oscache,
osclient,
os_cpu_overcomit_ratio,
os_ram_overcomit_ratio)
collectors.append(hypervisor_stats)
oscache.start()
listen_port = config.get(
'LISTEN_PORT', int(
os.getenv(
'LISTEN_PORT', 9103)))
server = ForkingHTTPServer(('', listen_port), handler)
server.serve_forever()

View File

@ -0,0 +1,85 @@
#!/usr/bin/env python
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from base import OSBase
from collections import Counter
from collections import defaultdict
from prometheus_client import CollectorRegistry, generate_latest, Gauge
import logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s:%(levelname)s:%(message)s")
logger = logging.getLogger(__name__)
class NeutronAgentStats(OSBase):
""" Class to report the statistics on Neutron agents.
state of agents
"""
def build_cache_data(self):
# Get information of the state per agent
# State can be up or down
aggregated_agents = defaultdict(Counter)
stats = self.osclient.get_workers('neutron')
for agent in stats:
service = agent['service']
state = agent['state']
aggregated_agents[service][state] += 1
for service in aggregated_agents:
totala = sum(aggregated_agents[service].values())
for state in self.osclient.states:
prct = (100.0 * aggregated_agents[service][state]) / totala
stats.append({
'stat_name': "services_{}_{}_percent".format(service, state),
'stat_value': prct,
'service': service,
'state': state
})
stats.append({
'stat_name': "services_{}_{}_total".format(service, state),
'stat_value': aggregated_agents[service][state],
'service': service,
'state': state,
})
return stats
def get_cache_key(self):
return "neutron_agent_stats"
def get_stats(self):
registry = CollectorRegistry()
labels = ['region', 'host', 'service', 'state']
neutron_agent_stats_cache = self.get_cache_data()
for neutron_agent_stat in neutron_agent_stats_cache:
stat_gauge = Gauge(
self.gauge_name_sanitize(
neutron_agent_stat['stat_name']),
'Openstack Neutron agent statistic',
labels,
registry=registry)
label_values = [self.osclient.region,
neutron_agent_stat.get('host', ''),
neutron_agent_stat.get('service', ''),
neutron_agent_stat.get('state', '')]
stat_gauge.labels(
*
label_values).set(
neutron_agent_stat['stat_value'])
return generate_latest(registry)

View File

@ -0,0 +1,83 @@
#!/usr/bin/env python
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from base import OSBase
from collections import Counter
from collections import defaultdict
from prometheus_client import CollectorRegistry, generate_latest, Gauge
import logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s:%(levelname)s:%(message)s")
logger = logging.getLogger(__name__)
class NovaServiceStats(OSBase):
""" Class to report the statistics on Nova services.
status per service broken down by state
"""
def build_cache_data(self):
# Get information of the state per service
# State can be: 'up', 'down' or 'disabled'
aggregated_workers = defaultdict(Counter)
stats = self.osclient.get_workers('nova')
for worker in stats:
service = worker['service']
state = worker['state']
aggregated_workers[service][state] += 1
for service in aggregated_workers:
total = sum(aggregated_workers[service].values())
for state in self.osclient.states:
prct = 0
if total > 0:
prct = (100.0 * aggregated_workers[service][state]) / total
stats.append({
'stat_name': "services_{}_{}_percent".format(service, state),
'stat_value': prct,
'state': state,
'service': service
})
stats.append({
'stat_name': "services_{}_{}_total".format(service, state),
'stat_value': aggregated_workers[service][state],
'state': state,
'service': service
})
return stats
def get_cache_key(self):
return "nova_services_stats"
def get_stats(self):
registry = CollectorRegistry()
labels = ['region', 'host', 'service', 'state']
services_stats_cache = self.get_cache_data()
for services_stat in services_stats_cache:
stat_gauge = Gauge(
self.gauge_name_sanitize(
services_stat['stat_name']),
'Openstack Nova Service statistic',
labels,
registry=registry)
label_values = [self.osclient.region,
services_stat.get('host', ''),
services_stat.get('service', ''),
services_stat.get('state', '')]
stat_gauge.labels(*label_values).set(services_stat['stat_value'])
return generate_latest(registry)

View File

@ -0,0 +1,84 @@
#!/usr/bin/env python
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from threading import Thread
from threading import Lock
from prometheus_client import CollectorRegistry, generate_latest, Gauge
from time import sleep, time
import logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s:%(levelname)s:%(message)s")
logger = logging.getLogger(__name__)
class ThreadSafeDict(dict):
def __init__(self, * p_arg, ** n_arg):
dict.__init__(self, * p_arg, ** n_arg)
self._lock = Lock()
def __enter__(self):
self._lock.acquire()
return self
def __exit__(self, type, value, traceback):
self._lock.release()
class OSCache(Thread):
def __init__(self, refresh_interval, region):
Thread.__init__(self)
self.daemon = True
self.duration = 0
self.refresh_interval = refresh_interval
self.cache = ThreadSafeDict()
self.region = region
self.osclients = []
def cache_me(self, osclient):
self.osclients.append(osclient)
logger.debug("new osclient added to cache")
def run(self):
while True:
start_time = time()
for osclient in self.osclients:
try:
self.cache[osclient.get_cache_key(
)] = osclient.build_cache_data()
except Exception as e:
logger.error(str(e))
logger.error(
"failed to get data for cache key {}".format(
osclient.get_cache_key()))
self.duration = time() - start_time
sleep(self.refresh_interval)
def get_cache_data(self, key):
if key in self.cache:
return self.cache[key]
else:
return []
def get_stats(self):
registry = CollectorRegistry()
labels = ['region']
label_values = [self.region]
duration = Gauge('openstack_exporter_cache_refresh_duration_seconds',
'Cache refresh duration in seconds.',
labels, registry=registry)
duration.labels(*label_values).set(self.duration)
return generate_latest(registry)

View File

@ -0,0 +1,285 @@
#!/usr/bin/env python
# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import dateutil.parser
import dateutil.tz
import requests
import simplejson as json
import logging
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s:%(levelname)s:%(message)s")
logger = logging.getLogger(__name__)
class KeystoneException(Exception):
pass
class OSClient(object):
""" Base class for querying the OpenStack API endpoints.
It uses the Keystone service catalog to discover the API endpoints.
"""
EXPIRATION_TOKEN_DELTA = datetime.timedelta(0, 30)
states = {'up': 1, 'down': 0, 'disabled': 2}
def __init__(
self,
keystone_url,
password,
tenant_name,
username,
user_domain,
region,
timeout,
retries):
self.keystone_url = keystone_url
self.password = password
self.tenant_name = tenant_name
self.username = username
self.user_domain = user_domain
self.region = region
self.timeout = timeout
self.retries = retries
self.token = None
self.valid_until = None
self.session = requests.Session()
self.session.mount(
'http://', requests.adapters.HTTPAdapter(max_retries=retries))
self.session.mount(
'https://', requests.adapters.HTTPAdapter(max_retries=retries))
self._service_catalog = []
def is_valid_token(self):
now = datetime.datetime.now(tz=dateutil.tz.tzutc())
return self.token is not None and self.valid_until is not None and self.valid_until > now
def clear_token(self):
self.token = None
self.valid_until = None
def get_token(self):
self.clear_token()
data = json.dumps({
"auth": {
"identity": {
"methods": ["password"],
"password": {
"user": {
"name": self.username,
"domain": {"id": self.user_domain},
"password": self.password
}
}
},
"scope": {
"project": {
"name": self.tenant_name,
"domain": {"id": self.user_domain}
}
}
}
})
logger.info("Trying to get token from '%s'" % self.keystone_url)
r = self.make_request('post',
'%s/auth/tokens' % self.keystone_url, data=data,
token_required=False)
if not r:
logger.error(
"Cannot get a valid token from {}".format(
self.keystone_url))
if r.status_code < 200 or r.status_code > 299:
logger.error(
"{} responded with code {}".format(
self.keystone_url,
r.status_code))
data = r.json()
self.token = r.headers.get("X-Subject-Token")
self.tenant_id = data['token']['project']['id']
self.valid_until = dateutil.parser.parse(
data['token']['expires_at']) - self.EXPIRATION_TOKEN_DELTA
self._service_catalog = []
for item in data['token']['catalog']:
internalURL = None
publicURL = None
adminURL = None
for endpoint in item['endpoints']:
if endpoint['region'] == self.region or self.region is None:
if endpoint['interface'] == 'internal':
internalURL = endpoint['url']
elif endpoint['interface'] == 'public':
publicURL = endpoint['url']
elif endpoint['interface'] == 'admin':
adminURL = endpoint['url']
if internalURL is None and publicURL is None:
logger.warning(
"Service '{}' skipped because no URL can be found".format(
item['name']))
continue
self._service_catalog.append({
'name': item['name'],
'region': self.region,
'service_type': item['type'],
'url': internalURL if internalURL is not None else publicURL,
'admin_url': adminURL,
})
logger.debug("Got token '%s'" % self.token)
return self.token
@property
def service_catalog(self):
if not self._service_catalog:
self.get_token()
return self._service_catalog
@service_catalog.setter
def service_catalog(self, service_catalog):
self._service_catalog = service_catalog
def get_service(self, service_name):
return next((x for x in self._service_catalog
if x['name'] == service_name), None)
def raw_get(self, url, token_required=False):
return self.make_request('get', url,
token_required=token_required)
def make_request(self, verb, url, data=None, token_required=True,
params=None):
kwargs = {
'url': url,
'timeout': self.timeout,
'headers': {'Content-type': 'application/json'}
}
if token_required and not self.is_valid_token():
self.get_token()
if not self.is_valid_token():
logger.error("Aborting request, no valid token")
return
if token_required:
kwargs['headers']['X-Auth-Token'] = self.token
if data is not None:
kwargs['data'] = data
if params is not None:
kwargs['params'] = params
func = getattr(self.session, verb.lower())
try:
r = func(**kwargs)
except Exception as e:
logger.error("Got exception for '%s': '%s'" %
(kwargs['url'], e))
return
logger.info("%s responded with status code %d" %
(kwargs['url'], r.status_code))
return r
def get(self, service, resource, params=None):
url = self._build_url(service, resource)
if not url:
return
logger.info('GET({}) {}'.format(url, params))
return self.make_request('get', url, params=params)
def _build_url(self, service, resource):
s = (self.get_service(service) or {})
url = s.get('url')
# v3 API must be used in order to obtain tenants in multi-domain envs
if service == 'keystone' and (resource in ['projects',
'users', 'roles']):
url = url.replace('v2.0', 'v3')
if url:
if url[-1] != '/':
url += '/'
url = "%s%s" % (url, resource)
else:
logger.error("Service '%s' not found in catalog" % service)
return url
def get_workers(self, service):
""" Return the list of workers and their state
Here is an example of returned dictionnary:
{
'host': 'node.example.com',
'service': 'nova-compute',
'state': 'up'
}
where 'state' can be 'up', 'down' or 'disabled'
"""
worker_metrics = []
if service == 'neutron':
endpoint = 'v2.0/agents'
entry = 'agents'
else:
endpoint = 'os-services'
entry = 'services'
ost_services_r = self.get(service, endpoint)
msg = "Cannot get state of {} workers".format(service)
if ost_services_r is None:
logger.warning(msg)
elif ost_services_r.status_code != 200:
msg = "{}: Got {} ({})".format(
msg, ost_services_r.status_code, ost_services_r.content)
logger.warning(msg)
else:
try:
r_json = ost_services_r.json()
except ValueError:
r_json = {}
if entry not in r_json:
msg = "{}: couldn't find '{}' key".format(msg, entry)
logger.warning(msg)
else:
for val in r_json[entry]:
data = {'host': val['host'], 'service': val['binary']}
if service == 'neutron':
if not val['admin_state_up']:
data['state'] = 'disabled'
else:
data['state'] = 'up' if val['alive'] else 'down'
else:
if val['status'] == 'disabled':
data['state'] = 'disabled'
elif val['state'] == 'up' or val['state'] == 'down':
data['state'] = val['state']
else:
data['state'] = 'unknown'
msg = "Unknown state for {} workers:{}".format(
service, val['state'])
logger.warning(msg)
continue
data['stat_value'] = self.states[data['state']]
data['stat_name'] = "services_{}_{}".format(
service, val['binary'])
worker_metrics.append(data)
return worker_metrics

View File

@ -0,0 +1,57 @@
---
# Copyright 2019 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- project:
check:
jobs:
- openstack-helm-images-build-prometheus-openstack-exporter
gate:
jobs:
- openstack-helm-images-upload-prometheus-openstack-exporter
promote:
jobs:
- openstack-helm-images-promote-prometheus-openstack-exporter
periodic:
jobs:
- openstack-helm-images-build-prometheus-openstack-exporter
- job:
name: openstack-helm-images-build-prometheus-openstack-exporter
parent: openstack-helm-images-build
description: Build Prometheus OpenStack exporter image
vars: &prometheus-openstack-exporter_vars
docker_images:
- context: prometheus-openstack-exporter
repository: openstackhelm/prometheus-openstack-exporter
dockerfile: Dockerfile.ubuntu_xenial
tags:
- latest-ubuntu_xenial
files: &prometheus-openstack-exporter_files
- prometheus-openstack-exporter/.*
- zuul.d/prometheus-openstack-exporter.yaml
- job:
name: openstack-helm-images-upload-prometheus-openstack-exporter
parent: openstack-helm-images-upload
description: Build and upload Prometheus OpenStack exporter image
vars: *prometheus-openstack-exporter_vars
files: *prometheus-openstack-exporter_files
- job:
name: openstack-helm-images-promote-prometheus-openstack-exporter
parent: openstack-helm-images-promote
description: Promote a previously published Prometheus OpenStack exporter image.
vars: *prometheus-openstack-exporter_vars
files: *prometheus-openstack-exporter_files