Prometheus rule file syntax test

This is improvement of functional test of
prometheus plugin. By this fix, the created
prometheus rule files are checked by promtool.
If the syntax of them are invalid, the test fails.

This check is disabled by default and enabled when
CONF.prometheus_plugin.test_rule_with_promtool
is true.

Implements: blueprint support-auto-lcm
Change-Id: Ica331befe225156c607d5c8267462b7281669c91
This commit is contained in:
Koji Shimizu
2022-10-23 10:49:05 +09:00
parent 7bb1bf8f5e
commit 5b02232aaa
7 changed files with 87 additions and 18 deletions

View File

@@ -586,9 +586,10 @@
v2_vnfm: v2_vnfm:
kubernetes_vim_rsc_wait_timeout: 800 kubernetes_vim_rsc_wait_timeout: 800
prometheus_plugin: prometheus_plugin:
fault_management: True fault_management: true
performance_management: True performance_management: true
auto_scaling: True auto_scaling: true
test_rule_with_promtool: true
tox_envlist: dsvm-functional-sol-kubernetes-v2 tox_envlist: dsvm-functional-sol-kubernetes-v2
vars: vars:
prometheus_setup: true prometheus_setup: true

View File

@@ -52,6 +52,9 @@ performance_management, fault_management or auto_scaling below.
* - ``CONF.prometheus_plugin.auto_scaling`` * - ``CONF.prometheus_plugin.auto_scaling``
- false - false
- Enable prometheus plugin autoscaling. - Enable prometheus plugin autoscaling.
* - ``CONF.prometheus_plugin.test_rule_with_promtool``
- false
- Enable rule file validation using promtool.
System System
~~~~~~ ~~~~~~
@@ -241,6 +244,14 @@ needs to activate sshd.
- The directory indicated by "rule_files" setting of prometheus - The directory indicated by "rule_files" setting of prometheus
server config should be accessible by SSH. server config should be accessible by SSH.
Supported versions
------------------
Tacker Zed release
- Prometheus: 2.37
- Alertmanager: 0.24
Alert rule registration Alert rule registration
~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~

View File

@@ -161,6 +161,9 @@ PROMETHEUS_PLUGIN_OPTS = [
'This configuration is changed in case of replacing ' 'This configuration is changed in case of replacing '
'the original function with a vendor specific ' 'the original function with a vendor specific '
'function.')), 'function.')),
cfg.BoolOpt('test_rule_with_promtool',
default=False,
help=_('Enable rule file validation using promtool.')),
] ]
CONF.register_opts(PROMETHEUS_PLUGIN_OPTS, 'prometheus_plugin') CONF.register_opts(PROMETHEUS_PLUGIN_OPTS, 'prometheus_plugin')

View File

@@ -21,6 +21,7 @@ import paramiko
import re import re
import tempfile import tempfile
from keystoneauth1 import exceptions as ks_exc
from oslo_log import log as logging from oslo_log import log as logging
from oslo_utils import uuidutils from oslo_utils import uuidutils
from tacker.sol_refactored.api import prometheus_plugin_validator as validator from tacker.sol_refactored.api import prometheus_plugin_validator as validator
@@ -37,6 +38,7 @@ from tacker.sol_refactored import objects
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
logging.getLogger("paramiko").setLevel(logging.WARNING)
CONF = cfg.CONF CONF = cfg.CONF
@@ -510,12 +512,23 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
def delete_rules(self, context, pm_job): def delete_rules(self, context, pm_job):
target_list, reload_list = self.get_access_info(pm_job) target_list, reload_list = self.get_access_info(pm_job)
for info in target_list: for target in target_list:
self._delete_rule( try:
info['host'], info['port'], info['user'], self._delete_rule(
info['password'], info['path'], pm_job.id) target['host'], target['port'], target['user'],
target['password'], target['path'], pm_job.id)
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
paramiko.SSHException):
# This exception is ignored. DELETE /pm_jobs/{id}
# will be success even if _delete_rule() is failed.
# Because the rule file was already deleted.
pass
for uri in reload_list: for uri in reload_list:
self.reload_prom_server(context, uri) try:
self.reload_prom_server(context, uri)
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
paramiko.SSHException):
pass
def decompose_metrics(self, pm_job): def decompose_metrics(self, pm_job):
if pm_job.objectType in {'Vnf', 'Vnfc'}: if pm_job.objectType in {'Vnf', 'Vnfc'}:
@@ -528,9 +541,10 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
def reload_prom_server(self, context, reload_uri): def reload_prom_server(self, context, reload_uri):
resp, _ = self.client.do_request( resp, _ = self.client.do_request(
reload_uri, "PUT", context=context) reload_uri, "PUT", context=context)
if resp.status_code != 202: if resp.status_code >= 400 and resp.status_code < 600:
LOG.error("reloading request to prometheus is failed: %d.", raise sol_ex.PrometheusPluginError(
resp.status_code) f"Reloading request to prometheus is failed: "
f"{resp.status_code}.")
def _upload_rule(self, rule_group, host, port, user, password, path, def _upload_rule(self, rule_group, host, port, user, password, path,
pm_job_id): pm_job_id):
@@ -544,6 +558,25 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
client.connect(username=user, password=password) client.connect(username=user, password=password)
sftp = paramiko.SFTPClient.from_transport(client) sftp = paramiko.SFTPClient.from_transport(client)
sftp.put(filename, f'{path}/{pm_job_id}.json') sftp.put(filename, f'{path}/{pm_job_id}.json')
self.verify_rule(host, port, user, password, path, pm_job_id)
def verify_rule(self, host, port, user, password, path, pm_job_id):
if not CONF.prometheus_plugin.test_rule_with_promtool:
return
with paramiko.SSHClient() as client:
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(host, port=port, username=user, password=password)
command = f"promtool check rules {path}/{pm_job_id}.json"
LOG.info("Rule file validation command: %s", command)
_, stdout, stderr = client.exec_command(command)
if stdout.channel.recv_exit_status() != 0:
error_byte = stderr.read()
error_str = error_byte.decode('utf-8')
LOG.error(
"Rule file validation with promtool failed: %s",
error_str)
raise sol_ex.PrometheusPluginError(
"Rule file validation with promtool failed.")
def get_access_info(self, pm_job): def get_access_info(self, pm_job):
target_list = [] target_list = []
@@ -579,12 +612,32 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
def upload_rules( def upload_rules(
self, context, target_list, reload_list, rule_group, pm_job): self, context, target_list, reload_list, rule_group, pm_job):
for info in target_list: def _cleanup_error(target_list):
self._upload_rule( for target in target_list:
rule_group, info['host'], info['port'], info['user'], try:
info['password'], info['path'], pm_job.id) self._delete_rule(target['host'], target['port'],
for uri in reload_list: target['user'], target['password'], target['path'],
self.reload_prom_server(context, uri) pm_job.id)
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
paramiko.SSHException):
pass
try:
for target in target_list:
self._upload_rule(
rule_group, target['host'], target['port'],
target['user'], target['password'], target['path'],
pm_job.id)
for uri in reload_list:
self.reload_prom_server(context, uri)
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
paramiko.SSHException) as e:
LOG.error("failed to upload rule files: %s", e.args[0])
_cleanup_error(target_list)
raise e
except Exception as e:
_cleanup_error(target_list)
raise e
def get_vnf_instances(self, context, pm_job): def get_vnf_instances(self, context, pm_job):
object_instance_ids = list(set(pm_job.objectInstanceIds)) object_instance_ids = list(set(pm_job.objectInstanceIds))

View File

@@ -60,4 +60,4 @@ class PrometheusPluginDriver():
url = f'{ep}/vnflcm/v2/vnf_instances/{vnf_instance_id}/scale' url = f'{ep}/vnflcm/v2/vnf_instances/{vnf_instance_id}/scale'
resp, _ = self.client.do_request( resp, _ = self.client.do_request(
url, "POST", context=context, body=scale_req, version="2.0.0") url, "POST", context=context, body=scale_req, version="2.0.0")
LOG.info("AutoHealing request is processed: %d.", resp.status_code) LOG.info("AutoScaling request is processed: %d.", resp.status_code)

View File

@@ -199,6 +199,7 @@ class VnfPmControllerV2(sol_wsgi.SolAPIController):
try: try:
self.plugin.create_job(context=context, pm_job=pm_job) self.plugin.create_job(context=context, pm_job=pm_job)
except sol_ex.PrometheusPluginError as e: except sol_ex.PrometheusPluginError as e:
LOG.error("Failed to create PM job: %s", e.args[0])
raise sol_ex.PrometheusSettingFailed from e raise sol_ex.PrometheusSettingFailed from e
pm_job.create(context) pm_job.create(context)