Merge "Prometheus rule file syntax test"
This commit is contained in:
@@ -586,9 +586,10 @@
|
|||||||
v2_vnfm:
|
v2_vnfm:
|
||||||
kubernetes_vim_rsc_wait_timeout: 800
|
kubernetes_vim_rsc_wait_timeout: 800
|
||||||
prometheus_plugin:
|
prometheus_plugin:
|
||||||
fault_management: True
|
fault_management: true
|
||||||
performance_management: True
|
performance_management: true
|
||||||
auto_scaling: True
|
auto_scaling: true
|
||||||
|
test_rule_with_promtool: true
|
||||||
tox_envlist: dsvm-functional-sol-kubernetes-v2
|
tox_envlist: dsvm-functional-sol-kubernetes-v2
|
||||||
vars:
|
vars:
|
||||||
prometheus_setup: true
|
prometheus_setup: true
|
||||||
|
@@ -52,6 +52,9 @@ performance_management, fault_management or auto_scaling below.
|
|||||||
* - ``CONF.prometheus_plugin.auto_scaling``
|
* - ``CONF.prometheus_plugin.auto_scaling``
|
||||||
- false
|
- false
|
||||||
- Enable prometheus plugin autoscaling.
|
- Enable prometheus plugin autoscaling.
|
||||||
|
* - ``CONF.prometheus_plugin.test_rule_with_promtool``
|
||||||
|
- false
|
||||||
|
- Enable rule file validation using promtool.
|
||||||
|
|
||||||
System
|
System
|
||||||
~~~~~~
|
~~~~~~
|
||||||
@@ -241,6 +244,14 @@ needs to activate sshd.
|
|||||||
- The directory indicated by "rule_files" setting of prometheus
|
- The directory indicated by "rule_files" setting of prometheus
|
||||||
server config should be accessible by SSH.
|
server config should be accessible by SSH.
|
||||||
|
|
||||||
|
Supported versions
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Tacker Zed release
|
||||||
|
|
||||||
|
- Prometheus: 2.37
|
||||||
|
- Alertmanager: 0.24
|
||||||
|
|
||||||
Alert rule registration
|
Alert rule registration
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
@@ -161,6 +161,9 @@ PROMETHEUS_PLUGIN_OPTS = [
|
|||||||
'This configuration is changed in case of replacing '
|
'This configuration is changed in case of replacing '
|
||||||
'the original function with a vendor specific '
|
'the original function with a vendor specific '
|
||||||
'function.')),
|
'function.')),
|
||||||
|
cfg.BoolOpt('test_rule_with_promtool',
|
||||||
|
default=False,
|
||||||
|
help=_('Enable rule file validation using promtool.')),
|
||||||
]
|
]
|
||||||
|
|
||||||
CONF.register_opts(PROMETHEUS_PLUGIN_OPTS, 'prometheus_plugin')
|
CONF.register_opts(PROMETHEUS_PLUGIN_OPTS, 'prometheus_plugin')
|
||||||
|
@@ -21,6 +21,7 @@ import paramiko
|
|||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
from keystoneauth1 import exceptions as ks_exc
|
||||||
from oslo_log import log as logging
|
from oslo_log import log as logging
|
||||||
from oslo_utils import uuidutils
|
from oslo_utils import uuidutils
|
||||||
from tacker.sol_refactored.api import prometheus_plugin_validator as validator
|
from tacker.sol_refactored.api import prometheus_plugin_validator as validator
|
||||||
@@ -37,6 +38,7 @@ from tacker.sol_refactored import objects
|
|||||||
|
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
logging.getLogger("paramiko").setLevel(logging.WARNING)
|
||||||
|
|
||||||
CONF = cfg.CONF
|
CONF = cfg.CONF
|
||||||
|
|
||||||
@@ -510,12 +512,23 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
|
|||||||
|
|
||||||
def delete_rules(self, context, pm_job):
|
def delete_rules(self, context, pm_job):
|
||||||
target_list, reload_list = self.get_access_info(pm_job)
|
target_list, reload_list = self.get_access_info(pm_job)
|
||||||
for info in target_list:
|
for target in target_list:
|
||||||
self._delete_rule(
|
try:
|
||||||
info['host'], info['port'], info['user'],
|
self._delete_rule(
|
||||||
info['password'], info['path'], pm_job.id)
|
target['host'], target['port'], target['user'],
|
||||||
|
target['password'], target['path'], pm_job.id)
|
||||||
|
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
|
||||||
|
paramiko.SSHException):
|
||||||
|
# This exception is ignored. DELETE /pm_jobs/{id}
|
||||||
|
# will be success even if _delete_rule() is failed.
|
||||||
|
# Because the rule file was already deleted.
|
||||||
|
pass
|
||||||
for uri in reload_list:
|
for uri in reload_list:
|
||||||
self.reload_prom_server(context, uri)
|
try:
|
||||||
|
self.reload_prom_server(context, uri)
|
||||||
|
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
|
||||||
|
paramiko.SSHException):
|
||||||
|
pass
|
||||||
|
|
||||||
def decompose_metrics(self, pm_job):
|
def decompose_metrics(self, pm_job):
|
||||||
if pm_job.objectType in {'Vnf', 'Vnfc'}:
|
if pm_job.objectType in {'Vnf', 'Vnfc'}:
|
||||||
@@ -528,9 +541,10 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
|
|||||||
def reload_prom_server(self, context, reload_uri):
|
def reload_prom_server(self, context, reload_uri):
|
||||||
resp, _ = self.client.do_request(
|
resp, _ = self.client.do_request(
|
||||||
reload_uri, "PUT", context=context)
|
reload_uri, "PUT", context=context)
|
||||||
if resp.status_code != 202:
|
if resp.status_code >= 400 and resp.status_code < 600:
|
||||||
LOG.error("reloading request to prometheus is failed: %d.",
|
raise sol_ex.PrometheusPluginError(
|
||||||
resp.status_code)
|
f"Reloading request to prometheus is failed: "
|
||||||
|
f"{resp.status_code}.")
|
||||||
|
|
||||||
def _upload_rule(self, rule_group, host, port, user, password, path,
|
def _upload_rule(self, rule_group, host, port, user, password, path,
|
||||||
pm_job_id):
|
pm_job_id):
|
||||||
@@ -544,6 +558,25 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
|
|||||||
client.connect(username=user, password=password)
|
client.connect(username=user, password=password)
|
||||||
sftp = paramiko.SFTPClient.from_transport(client)
|
sftp = paramiko.SFTPClient.from_transport(client)
|
||||||
sftp.put(filename, f'{path}/{pm_job_id}.json')
|
sftp.put(filename, f'{path}/{pm_job_id}.json')
|
||||||
|
self.verify_rule(host, port, user, password, path, pm_job_id)
|
||||||
|
|
||||||
|
def verify_rule(self, host, port, user, password, path, pm_job_id):
|
||||||
|
if not CONF.prometheus_plugin.test_rule_with_promtool:
|
||||||
|
return
|
||||||
|
with paramiko.SSHClient() as client:
|
||||||
|
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||||
|
client.connect(host, port=port, username=user, password=password)
|
||||||
|
command = f"promtool check rules {path}/{pm_job_id}.json"
|
||||||
|
LOG.info("Rule file validation command: %s", command)
|
||||||
|
_, stdout, stderr = client.exec_command(command)
|
||||||
|
if stdout.channel.recv_exit_status() != 0:
|
||||||
|
error_byte = stderr.read()
|
||||||
|
error_str = error_byte.decode('utf-8')
|
||||||
|
LOG.error(
|
||||||
|
"Rule file validation with promtool failed: %s",
|
||||||
|
error_str)
|
||||||
|
raise sol_ex.PrometheusPluginError(
|
||||||
|
"Rule file validation with promtool failed.")
|
||||||
|
|
||||||
def get_access_info(self, pm_job):
|
def get_access_info(self, pm_job):
|
||||||
target_list = []
|
target_list = []
|
||||||
@@ -579,12 +612,32 @@ class PrometheusPluginPm(PrometheusPlugin, mon_base.MonitoringPlugin):
|
|||||||
|
|
||||||
def upload_rules(
|
def upload_rules(
|
||||||
self, context, target_list, reload_list, rule_group, pm_job):
|
self, context, target_list, reload_list, rule_group, pm_job):
|
||||||
for info in target_list:
|
def _cleanup_error(target_list):
|
||||||
self._upload_rule(
|
for target in target_list:
|
||||||
rule_group, info['host'], info['port'], info['user'],
|
try:
|
||||||
info['password'], info['path'], pm_job.id)
|
self._delete_rule(target['host'], target['port'],
|
||||||
for uri in reload_list:
|
target['user'], target['password'], target['path'],
|
||||||
self.reload_prom_server(context, uri)
|
pm_job.id)
|
||||||
|
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
|
||||||
|
paramiko.SSHException):
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
for target in target_list:
|
||||||
|
self._upload_rule(
|
||||||
|
rule_group, target['host'], target['port'],
|
||||||
|
target['user'], target['password'], target['path'],
|
||||||
|
pm_job.id)
|
||||||
|
for uri in reload_list:
|
||||||
|
self.reload_prom_server(context, uri)
|
||||||
|
except (sol_ex.PrometheusPluginError, ks_exc.ClientException,
|
||||||
|
paramiko.SSHException) as e:
|
||||||
|
LOG.error("failed to upload rule files: %s", e.args[0])
|
||||||
|
_cleanup_error(target_list)
|
||||||
|
raise e
|
||||||
|
except Exception as e:
|
||||||
|
_cleanup_error(target_list)
|
||||||
|
raise e
|
||||||
|
|
||||||
def get_vnf_instances(self, context, pm_job):
|
def get_vnf_instances(self, context, pm_job):
|
||||||
object_instance_ids = list(set(pm_job.objectInstanceIds))
|
object_instance_ids = list(set(pm_job.objectInstanceIds))
|
||||||
|
@@ -60,4 +60,4 @@ class PrometheusPluginDriver():
|
|||||||
url = f'{ep}/vnflcm/v2/vnf_instances/{vnf_instance_id}/scale'
|
url = f'{ep}/vnflcm/v2/vnf_instances/{vnf_instance_id}/scale'
|
||||||
resp, _ = self.client.do_request(
|
resp, _ = self.client.do_request(
|
||||||
url, "POST", context=context, body=scale_req, version="2.0.0")
|
url, "POST", context=context, body=scale_req, version="2.0.0")
|
||||||
LOG.info("AutoHealing request is processed: %d.", resp.status_code)
|
LOG.info("AutoScaling request is processed: %d.", resp.status_code)
|
||||||
|
@@ -199,6 +199,7 @@ class VnfPmControllerV2(sol_wsgi.SolAPIController):
|
|||||||
try:
|
try:
|
||||||
self.plugin.create_job(context=context, pm_job=pm_job)
|
self.plugin.create_job(context=context, pm_job=pm_job)
|
||||||
except sol_ex.PrometheusPluginError as e:
|
except sol_ex.PrometheusPluginError as e:
|
||||||
|
LOG.error("Failed to create PM job: %s", e.args[0])
|
||||||
raise sol_ex.PrometheusSettingFailed from e
|
raise sol_ex.PrometheusSettingFailed from e
|
||||||
|
|
||||||
pm_job.create(context)
|
pm_job.create(context)
|
||||||
|
Binary file not shown.
Reference in New Issue
Block a user