Prometheus rule file syntax test

This is improvement of functional test of
prometheus plugin. By this fix, the created
prometheus rule files are checked by promtool.
If the syntax of them are invalid, the test fails.

This check is disabled by default and enabled when
CONF.prometheus_plugin.test_rule_with_promtool
is true.

Implements: blueprint support-auto-lcm
Change-Id: Ica331befe225156c607d5c8267462b7281669c91
This commit is contained in:
Koji Shimizu 2022-10-23 10:49:05 +09:00
parent 7bb1bf8f5e
commit 5b02232aaa
7 changed files with 87 additions and 18 deletions

View File

@ -586,9 +586,10 @@
v2_vnfm:
kubernetes_vim_rsc_wait_timeout: 800
prometheus_plugin:
fault_management: True
performance_management: True
auto_scaling: True
fault_management: true
performance_management: true
auto_scaling: true
test_rule_with_promtool: true
tox_envlist: dsvm-functional-sol-kubernetes-v2
vars:
prometheus_setup: true

View File

@ -52,6 +52,9 @@ performance_management, fault_management or auto_scaling below.
* - ``CONF.prometheus_plugin.auto_scaling``
- false
- Enable prometheus plugin autoscaling.
* - ``CONF.prometheus_plugin.test_rule_with_promtool``
- false
- Enable rule file validation using promtool.
System
~~~~~~
@ -241,6 +244,14 @@ needs to activate sshd.
- The directory indicated by "rule_files" setting of prometheus
server config should be accessible by SSH.
Supported versions
------------------
Tacker Zed release
- Prometheus: 2.37
- Alertmanager: 0.24
Alert rule registration
~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -161,6 +161,9 @@ PROMETHEUS_PLUGIN_OPTS = [
'This configuration is changed in case of replacing '
'the original function with a vendor specific '
'function.')),
cfg.BoolOpt('test_rule_with_promtool',
default=False,
help=_('Enable rule file validation using promtool.')),
]
CONF.register_opts(PROMETHEUS_PLUGIN_OPTS, 'prometheus_plugin')

View File

@ -21,6 +21,7 @@ import paramiko
import re
import tempfile
from keystoneauth1 import exceptions as ks_exc
from oslo_log import log as logging
from oslo_utils import uuidutils
from tacker.sol_refactored.api import prometheus_plugin_validator as validator
@ -37,6 +38,7 @@ from tacker.sol_refactored import objects
LOG = logging.getLogger(__name__)
logging.getLogger("paramiko").setLevel(logging.WARNING)
CONF = cfg.CONF
@ -510,12 +512,23 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
def delete_rules(self, context, pm_job):
target_list, reload_list = self.get_access_info(pm_job)
for info in target_list:
self._delete_rule(
info['host'], info['port'], info['user'],
info['password'], info['path'], pm_job.id)
for target in target_list:
try:
self._delete_rule(
target['host'], target['port'], target['user'],
target['password'], target['path'], pm_job.id)
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
paramiko.SSHException):
# This exception is ignored. DELETE /pm_jobs/{id}
# will be success even if _delete_rule() is failed.
# Because the rule file was already deleted.
pass
for uri in reload_list:
self.reload_prom_server(context, uri)
try:
self.reload_prom_server(context, uri)
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
paramiko.SSHException):
pass
def decompose_metrics(self, pm_job):
if pm_job.objectType in {'Vnf', 'Vnfc'}:
@ -528,9 +541,10 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
def reload_prom_server(self, context, reload_uri):
resp, _ = self.client.do_request(
reload_uri, "PUT", context=context)
if resp.status_code != 202:
LOG.error("reloading request to prometheus is failed: %d.",
resp.status_code)
if resp.status_code >= 400 and resp.status_code < 600:
raise sol_ex.PrometheusPluginError(
f"Reloading request to prometheus is failed: "
f"{resp.status_code}.")
def _upload_rule(self, rule_group, host, port, user, password, path,
pm_job_id):
@ -544,6 +558,25 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
client.connect(username=user, password=password)
sftp = paramiko.SFTPClient.from_transport(client)
sftp.put(filename, f'{path}/{pm_job_id}.json')
self.verify_rule(host, port, user, password, path, pm_job_id)
def verify_rule(self, host, port, user, password, path, pm_job_id):
if not CONF.prometheus_plugin.test_rule_with_promtool:
return
with paramiko.SSHClient() as client:
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(host, port=port, username=user, password=password)
command = f"promtool check rules {path}/{pm_job_id}.json"
LOG.info("Rule file validation command: %s", command)
_, stdout, stderr = client.exec_command(command)
if stdout.channel.recv_exit_status() != 0:
error_byte = stderr.read()
error_str = error_byte.decode('utf-8')
LOG.error(
"Rule file validation with promtool failed: %s",
error_str)
raise sol_ex.PrometheusPluginError(
"Rule file validation with promtool failed.")
def get_access_info(self, pm_job):
target_list = []
@ -579,12 +612,32 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
def upload_rules(
self, context, target_list, reload_list, rule_group, pm_job):
for info in target_list:
self._upload_rule(
rule_group, info['host'], info['port'], info['user'],
info['password'], info['path'], pm_job.id)
for uri in reload_list:
self.reload_prom_server(context, uri)
def _cleanup_error(target_list):
for target in target_list:
try:
self._delete_rule(target['host'], target['port'],
target['user'], target['password'], target['path'],
pm_job.id)
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
paramiko.SSHException):
pass
try:
for target in target_list:
self._upload_rule(
rule_group, target['host'], target['port'],
target['user'], target['password'], target['path'],
pm_job.id)
for uri in reload_list:
self.reload_prom_server(context, uri)
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
paramiko.SSHException) as e:
LOG.error("failed to upload rule files: %s", e.args[0])
_cleanup_error(target_list)
raise e
except Exception as e:
_cleanup_error(target_list)
raise e
def get_vnf_instances(self, context, pm_job):
object_instance_ids = list(set(pm_job.objectInstanceIds))

View File

@ -60,4 +60,4 @@ class PrometheusPluginDriver():
url = f'{ep}/vnflcm/v2/vnf_instances/{vnf_instance_id}/scale'
resp, _ = self.client.do_request(
url, "POST", context=context, body=scale_req, version="2.0.0")
LOG.info("AutoHealing request is processed: %d.", resp.status_code)
LOG.info("AutoScaling request is processed: %d.", resp.status_code)

View File

@ -199,6 +199,7 @@ class VnfPmControllerV2(sol_wsgi.SolAPIController):
try:
self.plugin.create_job(context=context, pm_job=pm_job)
except sol_ex.PrometheusPluginError as e:
LOG.error("Failed to create PM job: %s", e.args[0])
raise sol_ex.PrometheusSettingFailed from e
pm_job.create(context)