Expose critical lbs metrics
Includes a new Gauge metric that records the number of members of load balancers considered critical. The metric is labeled with the Name of load balancer and pool name, and the amount of members. Also includes an Enum with the current state of the lb. Change-Id: Id89bb48d86588f4d2a28ab91963e0b84843cbd6f
This commit is contained in:
parent
291821cbf6
commit
c4278f9da2
@ -22,6 +22,7 @@ from oslo_log import log as logging
|
||||
|
||||
from kuryr_kubernetes import clients
|
||||
from kuryr_kubernetes import config
|
||||
from kuryr_kubernetes import utils
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
CONF = cfg.CONF
|
||||
@ -44,6 +45,7 @@ class ControllerPrometheusExporter(object):
|
||||
'/metrics', methods=['GET'], view_func=self.metrics)
|
||||
self.headers = {'Connection': 'close'}
|
||||
self._os_net = clients.get_network_client()
|
||||
self._os_lb = clients.get_loadbalancer_client()
|
||||
self._project_id = config.CONF.neutron_defaults.project
|
||||
self._create_metrics()
|
||||
|
||||
@ -51,6 +53,7 @@ class ControllerPrometheusExporter(object):
|
||||
"""Provides the registered metrics"""
|
||||
self._record_quota_free_count_metric()
|
||||
self._record_ports_quota_per_subnet_metric()
|
||||
self._record_lbs_metrics()
|
||||
|
||||
collected_metric = generate_latest(self.registry)
|
||||
return flask.Response(collected_metric, mimetype='text/plain')
|
||||
@ -117,8 +120,30 @@ class ControllerPrometheusExporter(object):
|
||||
ports_availability = total_num_addresses-ports_count
|
||||
self.port_quota_per_subnet.labels(**labels).set(ports_availability)
|
||||
|
||||
def _record_lbs_metrics(self):
|
||||
"""Records the number of members available per LB and the LB state"""
|
||||
critical_lbs = [
|
||||
('dns-default', 'openshift-dns'),
|
||||
('kubernetes', 'default')]
|
||||
for name, namespace in critical_lbs:
|
||||
klb = utils.get_kuryrloadbalancer(name, namespace)
|
||||
lb = klb.get('status', {}).get('loadbalancer', {})
|
||||
lb_id = lb.get('id')
|
||||
if not lb_id:
|
||||
continue
|
||||
lb = self._os_lb.find_load_balancer(lb_id)
|
||||
labels = {'lb_name': namespace + '/' + name}
|
||||
if not lb:
|
||||
self.lbs_state.labels(**labels).state('DELETED')
|
||||
continue
|
||||
self.lbs_state.labels(**labels).state(lb.provisioning_status)
|
||||
pools = self._os_lb.pools(loadbalancer_id=lb.id)
|
||||
for pool in pools:
|
||||
labels = {'lb_name': lb.name, 'lb_pool_name': pool.name}
|
||||
self.lbs_members_count.labels(**labels).set(len(pool.members))
|
||||
|
||||
def _create_metrics(self):
|
||||
"""Creates a registry and records a new Gauge metric"""
|
||||
"""Creates a registry and records metrics"""
|
||||
self.registry = prometheus_client.CollectorRegistry()
|
||||
self.quota_free_count = prometheus_client.Gauge(
|
||||
'kuryr_quota_free_count', 'Amount of quota available'
|
||||
@ -130,6 +155,19 @@ class ControllerPrometheusExporter(object):
|
||||
' on Subnet', labelnames={'subnet_id', 'subnet_name'},
|
||||
registry=self.registry)
|
||||
|
||||
self.lbs_members_count = prometheus_client.Gauge(
|
||||
'kuryr_critical_lb_members_count', 'Amount of members per '
|
||||
'critical Load Balancer pool',
|
||||
labelnames={'lb_name', 'lb_pool_name'},
|
||||
registry=self.registry)
|
||||
|
||||
self.lbs_state = prometheus_client.Enum(
|
||||
'kuryr_critical_lb_state', 'Critical Load Balancer State',
|
||||
labelnames={'lb_name'},
|
||||
states=['ERROR', 'ACTIVE', 'DELETED', 'PENDING_CREATE',
|
||||
'PENDING_UPDATE', 'PENDING_DELETE'],
|
||||
registry=self.registry)
|
||||
|
||||
buckets = (10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, _INF)
|
||||
self.pod_creation_latency = prometheus_client.Histogram(
|
||||
'kuryr_pod_creation_latency', 'Time taken for a pod to have'
|
||||
|
@ -60,8 +60,13 @@ class TestControllerPrometheusExporter(base.TestCase):
|
||||
spec=prometheus_client.Gauge)
|
||||
self.srv.port_quota_per_subnet = mock.MagicMock(
|
||||
spec=prometheus_client.Gauge)
|
||||
self.srv.lbs_members_count = mock.MagicMock(
|
||||
spec=prometheus_client.Gauge)
|
||||
self.srv.lbs_state = mock.MagicMock(
|
||||
spec=prometheus_client.Enum)
|
||||
self.srv._project_id = mock.sentinel.project_id
|
||||
self.srv._os_net = self.useFixture(k_fix.MockNetworkClient()).client
|
||||
self.srv._os_lb = self.useFixture(k_fix.MockLBaaSClient()).client
|
||||
|
||||
def test__record_quota_free_count_metric(self):
|
||||
quota = get_quota_obj()
|
||||
@ -103,3 +108,62 @@ class TestControllerPrometheusExporter(base.TestCase):
|
||||
self.srv.port_quota_per_subnet.labels.assert_called_with(
|
||||
**{'subnet_id': subnet_id, 'subnet_name': subnet_name})
|
||||
self.srv.port_quota_per_subnet.labels().set.assert_called_with(509)
|
||||
|
||||
@mock.patch('kuryr_kubernetes.utils.get_kuryrloadbalancer')
|
||||
def test__record_lbs_metrics(self, m_get_klb):
|
||||
lb_name = 'default/kubernetes'
|
||||
lb_id = mock.sentinel.id
|
||||
pool_name = mock.sentinel.name
|
||||
pool_id = mock.sentinel.id
|
||||
lb_state = 'ACTIVE'
|
||||
m_get_klb.return_value = {
|
||||
"status": {
|
||||
"loadbalancer": {
|
||||
"id": lb_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
self.srv._os_lb.find_load_balancer.return_value = munch.Munch(
|
||||
{'id': lb_id, 'name': lb_name,
|
||||
'provisioning_status': lb_state, 'pools': [{'id': pool_id}]})
|
||||
self.srv._os_lb.pools.return_value = [munch.Munch(
|
||||
{'id': pool_id, 'name': pool_name,
|
||||
'loadbalancers': [{'id': lb_id}],
|
||||
'members': [{'id': mock.sentinel.id}]})]
|
||||
|
||||
self.cls._record_lbs_metrics(self.srv)
|
||||
|
||||
self.srv.lbs_state.labels.assert_called_with(
|
||||
**{'lb_name': lb_name})
|
||||
self.srv.lbs_state.labels().state.assert_called_with(lb_state)
|
||||
self.srv.lbs_members_count.labels.assert_called_with(
|
||||
**{'lb_name': lb_name, 'lb_pool_name': pool_name})
|
||||
self.srv.lbs_members_count.labels().set.assert_called_with(1)
|
||||
|
||||
@mock.patch('kuryr_kubernetes.utils.get_kuryrloadbalancer')
|
||||
def test__record_no_lb_present_metric(self, m_get_klb):
|
||||
lb_name = 'default/kubernetes'
|
||||
lb_id = mock.sentinel.id
|
||||
m_get_klb.return_value = {
|
||||
"status": {
|
||||
"loadbalancer": {
|
||||
"id": lb_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
self.srv._os_lb.find_load_balancer.return_value = None
|
||||
self.cls._record_lbs_metrics(self.srv)
|
||||
self.srv.lbs_state.labels.assert_called_with(
|
||||
**{'lb_name': lb_name})
|
||||
self.srv.lbs_state.labels().state.assert_called_with('DELETED')
|
||||
|
||||
@mock.patch('kuryr_kubernetes.utils.get_kuryrloadbalancer')
|
||||
def test__no_record_lbs_metrics(self, m_get_klb):
|
||||
m_get_klb.return_value = {}
|
||||
|
||||
self.cls._record_lbs_metrics(self.srv)
|
||||
|
||||
self.srv.lbs_state.labels.assert_not_called()
|
||||
self.srv.lbs_state.labels().state.assert_not_called()
|
||||
self.srv.lbs_members_count.labels.assert_not_called()
|
||||
self.srv.lbs_members_count.labels().set.assert_not_called()
|
||||
|
@ -636,3 +636,13 @@ def get_subnet_by_ip(nodes_subnets, target_ip):
|
||||
return nodes_subnet
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_kuryrloadbalancer(name, namespace):
|
||||
k8s = clients.get_kubernetes_client()
|
||||
try:
|
||||
return k8s.get(f'{constants.K8S_API_CRD_NAMESPACES}/'
|
||||
f'{namespace}/kuryrloadbalancers/'
|
||||
f'{name}')
|
||||
except exceptions.K8sResourceNotFound:
|
||||
return {}
|
||||
|
Loading…
Reference in New Issue
Block a user