[fix] Openstack Exporter - Handle Duplicate Values
The CollectorRegistry object does not allow an identical metric to be added, which occasionally happens when the nova, neutron and hypervisor collectors update their caches. Also, - The cinder endpoint has been updated to v3 to resolve a 404 which was occuring during metric collection. - Extra logging settings were removed from files. Log level and format are now set in main.py - misc pep8 fixes to imports and newlines Change-Id: Ia0bebdc1a39b25bdeae47d01625cfb7b89d132eb
This commit is contained in:
parent
7cf31e02d8
commit
20f9ad4756
@ -14,7 +14,6 @@
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class OSBase(object):
|
||||
FAIL = 0
|
||||
OK = 1
|
||||
|
@ -12,17 +12,15 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from prometheus_client import CollectorRegistry, generate_latest, Gauge
|
||||
|
||||
from base import OSBase
|
||||
|
||||
from urllib.parse import urlparse
|
||||
from prometheus_client import CollectorRegistry, generate_latest, Gauge
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s:%(levelname)s:%(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CheckOSApi(OSBase):
|
||||
"""Class to check the status of OpenStack API services."""
|
||||
|
||||
|
@ -12,14 +12,13 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from base import OSBase
|
||||
from collections import Counter
|
||||
from collections import defaultdict
|
||||
from prometheus_client import CollectorRegistry, generate_latest, Gauge
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s:%(levelname)s:%(message)s")
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
from prometheus_client import CollectorRegistry, generate_latest, Gauge
|
||||
|
||||
from base import OSBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -33,7 +32,7 @@ class CinderServiceStats(OSBase):
|
||||
|
||||
aggregated_workers = defaultdict(Counter)
|
||||
|
||||
stats = self.osclient.get_workers('cinder')
|
||||
stats = self.osclient.get_workers('cinderv3')
|
||||
for worker in stats:
|
||||
service = worker['service']
|
||||
state = worker['state']
|
||||
|
@ -12,13 +12,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from base import OSBase
|
||||
import logging
|
||||
|
||||
from prometheus_client import CollectorRegistry, generate_latest, Gauge
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s:%(levelname)s:%(message)s")
|
||||
|
||||
from base import OSBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -59,7 +58,7 @@ class HypervisorStats(OSBase):
|
||||
'metrics': {'free_vcpus': 0},
|
||||
}
|
||||
nova_aggregates[agg['name']]['metrics'].update(
|
||||
{v: 0 for v in list(self.VALUE_MAP.values())}
|
||||
{v: 0 for v in list(self.VALUE_MAP.values())}
|
||||
)
|
||||
|
||||
r = self.osclient.get('nova', 'os-hypervisors/detail')
|
||||
@ -137,15 +136,28 @@ class HypervisorStats(OSBase):
|
||||
labels = ['region', 'host', 'aggregate', 'aggregate_id']
|
||||
hypervisor_stats_cache = self.get_cache_data()
|
||||
for hypervisor_stat in hypervisor_stats_cache:
|
||||
stat_gauge = Gauge(
|
||||
self.gauge_name_sanitize(
|
||||
hypervisor_stat['stat_name']),
|
||||
'Openstack Hypervisor statistic',
|
||||
labels,
|
||||
registry=registry)
|
||||
label_values = [self.osclient.region,
|
||||
hypervisor_stat.get('host', ''),
|
||||
hypervisor_stat.get('aggregate', ''),
|
||||
hypervisor_stat.get('aggregate_id', '')]
|
||||
stat_gauge.labels(*label_values).set(hypervisor_stat['stat_value'])
|
||||
try:
|
||||
stat_gauge = Gauge(
|
||||
self.gauge_name_sanitize(
|
||||
hypervisor_stat['stat_name']),
|
||||
'Openstack Hypervisor statistic',
|
||||
labels,
|
||||
registry=registry)
|
||||
label_values = [self.osclient.region,
|
||||
hypervisor_stat.get('host', ''),
|
||||
hypervisor_stat.get('aggregate', ''),
|
||||
hypervisor_stat.get('aggregate_id', '')]
|
||||
stat_gauge.labels(*label_values).set(hypervisor_stat['stat_value'])
|
||||
except ValueError:
|
||||
if 'host' in hypervisor_stat:
|
||||
location = hypervisor_stat['host']
|
||||
elif 'aggregate' in hypervisor_stat:
|
||||
location = hypervisor_stat['aggregate']
|
||||
else:
|
||||
location = 'N/A'
|
||||
|
||||
logger.debug('Unchanged value for stat {} already present in '
|
||||
'hypervisor registry for host {}; ignoring.'
|
||||
.format(hypervisor_stat['stat_name'], location))
|
||||
|
||||
return generate_latest(registry)
|
||||
|
@ -17,10 +17,10 @@ import argparse
|
||||
import yaml
|
||||
import os
|
||||
import urllib.parse
|
||||
|
||||
from http.server import BaseHTTPRequestHandler
|
||||
from http.server import HTTPServer
|
||||
import logging
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from socketserver import ForkingMixIn
|
||||
|
||||
from prometheus_client import CONTENT_TYPE_LATEST
|
||||
|
||||
from osclient import OSClient
|
||||
@ -31,10 +31,10 @@ from nova_services import NovaServiceStats
|
||||
from cinder_services import CinderServiceStats
|
||||
from hypervisor_stats import HypervisorStats
|
||||
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s:%(levelname)s:%(message)s")
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s:%(levelname)s: %(message)s")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
collectors = []
|
||||
@ -57,10 +57,15 @@ class OpenstackExporterHandler(BaseHTTPRequestHandler):
|
||||
stats = collector.get_stats()
|
||||
if stats is not None:
|
||||
output = output + stats
|
||||
except BaseException:
|
||||
except BaseException as inst:
|
||||
logger.warning(
|
||||
"Could not get stats for collector {}".format(
|
||||
collector.get_cache_key()))
|
||||
'Could not get stats for collector {}.'
|
||||
'"{}" Exception "{}" occured.'
|
||||
.format(
|
||||
collector.get_cache_key(),
|
||||
type(inst),
|
||||
inst
|
||||
))
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', CONTENT_TYPE_LATEST)
|
||||
self.end_headers()
|
||||
|
@ -12,14 +12,13 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from base import OSBase
|
||||
from collections import Counter
|
||||
from collections import defaultdict
|
||||
from prometheus_client import CollectorRegistry, generate_latest, Gauge
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s:%(levelname)s:%(message)s")
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
from prometheus_client import CollectorRegistry, generate_latest, Gauge
|
||||
|
||||
from base import OSBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -67,18 +66,25 @@ class NeutronAgentStats(OSBase):
|
||||
labels = ['region', 'host', 'service', 'state']
|
||||
neutron_agent_stats_cache = self.get_cache_data()
|
||||
for neutron_agent_stat in neutron_agent_stats_cache:
|
||||
stat_gauge = Gauge(
|
||||
self.gauge_name_sanitize(
|
||||
neutron_agent_stat['stat_name']),
|
||||
'Openstack Neutron agent statistic',
|
||||
labels,
|
||||
registry=registry)
|
||||
label_values = [self.osclient.region,
|
||||
neutron_agent_stat.get('host', ''),
|
||||
neutron_agent_stat.get('service', ''),
|
||||
neutron_agent_stat.get('state', '')]
|
||||
stat_gauge.labels(
|
||||
*
|
||||
label_values).set(
|
||||
neutron_agent_stat['stat_value'])
|
||||
try:
|
||||
stat_gauge = Gauge(
|
||||
self.gauge_name_sanitize(
|
||||
neutron_agent_stat['stat_name']),
|
||||
'Openstack Neutron agent statistic',
|
||||
labels,
|
||||
registry=registry)
|
||||
label_values = [self.osclient.region,
|
||||
neutron_agent_stat.get('host', ''),
|
||||
neutron_agent_stat.get('service', ''),
|
||||
neutron_agent_stat.get('state', '')]
|
||||
stat_gauge.labels(
|
||||
*
|
||||
label_values).set(
|
||||
neutron_agent_stat['stat_value'])
|
||||
except ValueError:
|
||||
logger.debug('Unchanged value for stat {} already present in '
|
||||
'neutron agent registry for host {}; ignoring.'
|
||||
.format(neutron_agent_stat['stat_name'],
|
||||
neutron_agent_stat['host']))
|
||||
|
||||
return generate_latest(registry)
|
||||
|
@ -12,16 +12,14 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from base import OSBase
|
||||
from collections import Counter
|
||||
from collections import defaultdict
|
||||
from prometheus_client import CollectorRegistry, generate_latest, Gauge
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s:%(levelname)s:%(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
from collections import Counter, defaultdict
|
||||
|
||||
from prometheus_client import CollectorRegistry, generate_latest, Gauge
|
||||
|
||||
from base import OSBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class NovaServiceStats(OSBase):
|
||||
""" Class to report the statistics on Nova services.
|
||||
@ -68,15 +66,22 @@ class NovaServiceStats(OSBase):
|
||||
labels = ['region', 'host', 'service', 'state']
|
||||
services_stats_cache = self.get_cache_data()
|
||||
for services_stat in services_stats_cache:
|
||||
stat_gauge = Gauge(
|
||||
self.gauge_name_sanitize(
|
||||
services_stat['stat_name']),
|
||||
'Openstack Nova Service statistic',
|
||||
labels,
|
||||
registry=registry)
|
||||
label_values = [self.osclient.region,
|
||||
services_stat.get('host', ''),
|
||||
services_stat.get('service', ''),
|
||||
services_stat.get('state', '')]
|
||||
stat_gauge.labels(*label_values).set(services_stat['stat_value'])
|
||||
try:
|
||||
stat_gauge = Gauge(
|
||||
self.gauge_name_sanitize(
|
||||
services_stat['stat_name']),
|
||||
'Openstack Nova Service statistic',
|
||||
labels,
|
||||
registry=registry)
|
||||
label_values = [self.osclient.region,
|
||||
services_stat.get('host', ''),
|
||||
services_stat.get('service', ''),
|
||||
services_stat.get('state', '')]
|
||||
stat_gauge.labels(*label_values).set(services_stat['stat_value'])
|
||||
except ValueError:
|
||||
logger.debug('Unchanged value for stat {} already present in '
|
||||
'nova services registry for host {}; ignoring.'
|
||||
.format(services_stat['stat_name'],
|
||||
services_stat['host']))
|
||||
|
||||
return generate_latest(registry)
|
||||
|
@ -14,14 +14,12 @@
|
||||
|
||||
from threading import Thread
|
||||
from threading import Lock
|
||||
from prometheus_client import CollectorRegistry, generate_latest, Gauge
|
||||
from time import sleep, time
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s:%(levelname)s:%(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from prometheus_client import CollectorRegistry, generate_latest, Gauge
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ThreadSafeDict(dict):
|
||||
def __init__(self, * p_arg, ** n_arg):
|
||||
@ -35,7 +33,6 @@ class ThreadSafeDict(dict):
|
||||
def __exit__(self, type, value, traceback):
|
||||
self._lock.release()
|
||||
|
||||
|
||||
class OSCache(Thread):
|
||||
|
||||
def __init__(self, refresh_interval, region):
|
||||
|
@ -18,16 +18,12 @@ import dateutil.tz
|
||||
import requests
|
||||
import simplejson as json
|
||||
import logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s:%(levelname)s:%(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class KeystoneException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class OSClient(object):
|
||||
""" Base class for querying the OpenStack API endpoints.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user