Expose critical lbs metrics

Includes a new Gauge metric that records the
number of members of load balancers considered
critical. The metric is labeled with the Name
of load balancer and pool name, and the amount
of members. Also includes an Enum with the
current state of the lb.

Change-Id: Id89bb48d86588f4d2a28ab91963e0b84843cbd6f
This commit is contained in:
Maysa Macedo 2021-06-30 12:34:09 +02:00 committed by Robin Cernin
parent 291821cbf6
commit c4278f9da2
3 changed files with 113 additions and 1 deletions

View File

@ -22,6 +22,7 @@ from oslo_log import log as logging
from kuryr_kubernetes import clients from kuryr_kubernetes import clients
from kuryr_kubernetes import config from kuryr_kubernetes import config
from kuryr_kubernetes import utils
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
CONF = cfg.CONF CONF = cfg.CONF
@ -44,6 +45,7 @@ class ControllerPrometheusExporter(object):
'/metrics', methods=['GET'], view_func=self.metrics) '/metrics', methods=['GET'], view_func=self.metrics)
self.headers = {'Connection': 'close'} self.headers = {'Connection': 'close'}
self._os_net = clients.get_network_client() self._os_net = clients.get_network_client()
self._os_lb = clients.get_loadbalancer_client()
self._project_id = config.CONF.neutron_defaults.project self._project_id = config.CONF.neutron_defaults.project
self._create_metrics() self._create_metrics()
@ -51,6 +53,7 @@ class ControllerPrometheusExporter(object):
"""Provides the registered metrics""" """Provides the registered metrics"""
self._record_quota_free_count_metric() self._record_quota_free_count_metric()
self._record_ports_quota_per_subnet_metric() self._record_ports_quota_per_subnet_metric()
self._record_lbs_metrics()
collected_metric = generate_latest(self.registry) collected_metric = generate_latest(self.registry)
return flask.Response(collected_metric, mimetype='text/plain') return flask.Response(collected_metric, mimetype='text/plain')
@ -117,8 +120,30 @@ class ControllerPrometheusExporter(object):
ports_availability = total_num_addresses-ports_count ports_availability = total_num_addresses-ports_count
self.port_quota_per_subnet.labels(**labels).set(ports_availability) self.port_quota_per_subnet.labels(**labels).set(ports_availability)
def _record_lbs_metrics(self):
"""Records the number of members available per LB and the LB state"""
critical_lbs = [
('dns-default', 'openshift-dns'),
('kubernetes', 'default')]
for name, namespace in critical_lbs:
klb = utils.get_kuryrloadbalancer(name, namespace)
lb = klb.get('status', {}).get('loadbalancer', {})
lb_id = lb.get('id')
if not lb_id:
continue
lb = self._os_lb.find_load_balancer(lb_id)
labels = {'lb_name': namespace + '/' + name}
if not lb:
self.lbs_state.labels(**labels).state('DELETED')
continue
self.lbs_state.labels(**labels).state(lb.provisioning_status)
pools = self._os_lb.pools(loadbalancer_id=lb.id)
for pool in pools:
labels = {'lb_name': lb.name, 'lb_pool_name': pool.name}
self.lbs_members_count.labels(**labels).set(len(pool.members))
def _create_metrics(self): def _create_metrics(self):
"""Creates a registry and records a new Gauge metric""" """Creates a registry and records metrics"""
self.registry = prometheus_client.CollectorRegistry() self.registry = prometheus_client.CollectorRegistry()
self.quota_free_count = prometheus_client.Gauge( self.quota_free_count = prometheus_client.Gauge(
'kuryr_quota_free_count', 'Amount of quota available' 'kuryr_quota_free_count', 'Amount of quota available'
@ -130,6 +155,19 @@ class ControllerPrometheusExporter(object):
' on Subnet', labelnames={'subnet_id', 'subnet_name'}, ' on Subnet', labelnames={'subnet_id', 'subnet_name'},
registry=self.registry) registry=self.registry)
self.lbs_members_count = prometheus_client.Gauge(
'kuryr_critical_lb_members_count', 'Amount of members per '
'critical Load Balancer pool',
labelnames={'lb_name', 'lb_pool_name'},
registry=self.registry)
self.lbs_state = prometheus_client.Enum(
'kuryr_critical_lb_state', 'Critical Load Balancer State',
labelnames={'lb_name'},
states=['ERROR', 'ACTIVE', 'DELETED', 'PENDING_CREATE',
'PENDING_UPDATE', 'PENDING_DELETE'],
registry=self.registry)
buckets = (10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, _INF) buckets = (10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, _INF)
self.pod_creation_latency = prometheus_client.Histogram( self.pod_creation_latency = prometheus_client.Histogram(
'kuryr_pod_creation_latency', 'Time taken for a pod to have' 'kuryr_pod_creation_latency', 'Time taken for a pod to have'

View File

@ -60,8 +60,13 @@ class TestControllerPrometheusExporter(base.TestCase):
spec=prometheus_client.Gauge) spec=prometheus_client.Gauge)
self.srv.port_quota_per_subnet = mock.MagicMock( self.srv.port_quota_per_subnet = mock.MagicMock(
spec=prometheus_client.Gauge) spec=prometheus_client.Gauge)
self.srv.lbs_members_count = mock.MagicMock(
spec=prometheus_client.Gauge)
self.srv.lbs_state = mock.MagicMock(
spec=prometheus_client.Enum)
self.srv._project_id = mock.sentinel.project_id self.srv._project_id = mock.sentinel.project_id
self.srv._os_net = self.useFixture(k_fix.MockNetworkClient()).client self.srv._os_net = self.useFixture(k_fix.MockNetworkClient()).client
self.srv._os_lb = self.useFixture(k_fix.MockLBaaSClient()).client
def test__record_quota_free_count_metric(self): def test__record_quota_free_count_metric(self):
quota = get_quota_obj() quota = get_quota_obj()
@ -103,3 +108,62 @@ class TestControllerPrometheusExporter(base.TestCase):
self.srv.port_quota_per_subnet.labels.assert_called_with( self.srv.port_quota_per_subnet.labels.assert_called_with(
**{'subnet_id': subnet_id, 'subnet_name': subnet_name}) **{'subnet_id': subnet_id, 'subnet_name': subnet_name})
self.srv.port_quota_per_subnet.labels().set.assert_called_with(509) self.srv.port_quota_per_subnet.labels().set.assert_called_with(509)
@mock.patch('kuryr_kubernetes.utils.get_kuryrloadbalancer')
def test__record_lbs_metrics(self, m_get_klb):
lb_name = 'default/kubernetes'
lb_id = mock.sentinel.id
pool_name = mock.sentinel.name
pool_id = mock.sentinel.id
lb_state = 'ACTIVE'
m_get_klb.return_value = {
"status": {
"loadbalancer": {
"id": lb_id,
}
}
}
self.srv._os_lb.find_load_balancer.return_value = munch.Munch(
{'id': lb_id, 'name': lb_name,
'provisioning_status': lb_state, 'pools': [{'id': pool_id}]})
self.srv._os_lb.pools.return_value = [munch.Munch(
{'id': pool_id, 'name': pool_name,
'loadbalancers': [{'id': lb_id}],
'members': [{'id': mock.sentinel.id}]})]
self.cls._record_lbs_metrics(self.srv)
self.srv.lbs_state.labels.assert_called_with(
**{'lb_name': lb_name})
self.srv.lbs_state.labels().state.assert_called_with(lb_state)
self.srv.lbs_members_count.labels.assert_called_with(
**{'lb_name': lb_name, 'lb_pool_name': pool_name})
self.srv.lbs_members_count.labels().set.assert_called_with(1)
@mock.patch('kuryr_kubernetes.utils.get_kuryrloadbalancer')
def test__record_no_lb_present_metric(self, m_get_klb):
lb_name = 'default/kubernetes'
lb_id = mock.sentinel.id
m_get_klb.return_value = {
"status": {
"loadbalancer": {
"id": lb_id,
}
}
}
self.srv._os_lb.find_load_balancer.return_value = None
self.cls._record_lbs_metrics(self.srv)
self.srv.lbs_state.labels.assert_called_with(
**{'lb_name': lb_name})
self.srv.lbs_state.labels().state.assert_called_with('DELETED')
@mock.patch('kuryr_kubernetes.utils.get_kuryrloadbalancer')
def test__no_record_lbs_metrics(self, m_get_klb):
m_get_klb.return_value = {}
self.cls._record_lbs_metrics(self.srv)
self.srv.lbs_state.labels.assert_not_called()
self.srv.lbs_state.labels().state.assert_not_called()
self.srv.lbs_members_count.labels.assert_not_called()
self.srv.lbs_members_count.labels().set.assert_not_called()

View File

@ -636,3 +636,13 @@ def get_subnet_by_ip(nodes_subnets, target_ip):
return nodes_subnet return nodes_subnet
return None return None
def get_kuryrloadbalancer(name, namespace):
k8s = clients.get_kubernetes_client()
try:
return k8s.get(f'{constants.K8S_API_CRD_NAMESPACES}/'
f'{namespace}/kuryrloadbalancers/'
f'{name}')
except exceptions.K8sResourceNotFound:
return {}