Prometheus interaction (#7)

* Remove old observability client

* Add initial functionality for prometheus querying

* Fix a copy-paste error in get_client()

* Add additional functionality.

This commit adds:
    - commands:
        delete
        clear-tombstones
        snapshot
    - Better rbac injection as well as a possibility
      to disable rbac.
    - Configuration of prometheus_client through
      env variables and /etc/openstack/prometheus.yaml

* Make README up to date

* Implement Martin's PR comments

* Implement better support for label values in rbac

* PEP8
This commit is contained in:
Jaromír Wysoglad 2023-08-03 15:30:19 +02:00 committed by GitHub
parent 3f8acf0574
commit a580772449
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 815 additions and 628 deletions

View File

@ -1,8 +1,7 @@
# python-observabilityclient
observabilityclient is an OpenStackClient (OSC) plugin implementation that
implements commands for management of OpenStack observability components such
as Prometheus, collectd and Ceilometer.
implements commands for management of Prometheus.
## Development
@ -17,58 +16,37 @@ su - stack
git clone https://github.com/infrawatch/python-observabilityclient
cd python-observabilityclient
sudo python setup.py install --prefix=/usr
# clone and install observability playbooks and roles
git clone https://github.com/infrawatch/osp-observability-ansible
sudo mkdir /usr/share/osp-observability
sudo ln -s `pwd`/osp-observability-ansible/playbooks /usr/share/osp-observability/playbooks
sudo ln -s `pwd`/osp-observability-ansible/roles/spawn_container /usr/share/ansible/roles/spawn_container
sudo ln -s `pwd`/osp-observability-ansible/roles/osp_observability /usr/share/ansible/roles/osp_observability
```
### Enable collectd write_prometheus
Create a THT environment file to enable the write_prometheus plugin for the collectd service. Then redeploy your overcloud and include this new file:
## Usage
Use `openstack metric query somequery` to query for metrics in prometheus.
To use the python api do the following:
```
mkdir -p ~/templates/observability
cat <EOF >> templates/observability/collectd-write-prometheus.yaml
resource_registry:
OS::TripleO::Services::Collectd: /usr/share/openstack-tripleo-heat-templates/deployment/metrics/collectd-container-puppet.yaml
from observabilityclient import client
# TEST
# parameter_merge_strategies:
# CollectdExtraPlugins: merge
parameter_defaults:
CollectdExtraPlugins:
- write_prometheus
EOF
c = client.Client(
'1', keystone_client.get_session(conf),
adapter_options={
'interface': conf.service_credentials.interface,
'region_name': conf.service_credentials.region_name})
c.query.query("somequery")
```
### Discover endpoints
After deployment of your cloud you can discover endpoints available for scraping:
## List of commands
```
source stackrc
openstack observability discover --stack-name=standalone
```
openstack metric list - lists all metrics
openstack metric show - shows current values of a metric
openstack metric query - queries prometheus and outputs the result
openstack metric delete - deletes some metrics
openstack metric snapshot - takes a snapshot of the current data
openstack metric clean-tombstones - cleans the tsdb tombstones
### Deploy prometheus:
Create a config file and run the setup command
```
$ cat test_params.yaml
prometheus_remote_write:
stf:
url: https://default-prometheus-proxy-service-telemetry.apps.FAKE.ocp.cluster/api/v1/write
basic_user: internal
basic_pass: Pl4iNt3xTp4a55
ca_cert: |
-----BEGIN CERTIFICATE-----
ABCDEFGHIJKLMNOPQRSTUVWXYZ
-----END CERTIFICATE-----
not-stf:
url: http://prometheus-rw.example.com/api/v1/write
$ openstack observability setup prometheus_agent --config ./test_params.yaml
```
## List of functions provided by the python library
c.query.list - lists all metrics
c.query.show - shows current values of a metric
c.query.query - queries prometheus and outputs the result
c.query.delete - deletes some metrics
c.query.snapshot - takes a snapshot of the current data
c.query.clean-tombstones - cleans the tsdb tombstones

View File

@ -0,0 +1,22 @@
# Copyright 2023 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sys
def Client(version, *args, **kwargs):
module = 'observabilityclient.v%s.client' % version
__import__(module)
client_class = getattr(sys.modules[module], 'Client')
return client_class(*args, **kwargs)

View File

@ -1,3 +1,16 @@
# Copyright 2023 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""OpenStackClient Plugin interface"""
@ -8,7 +21,7 @@ DEFAULT_API_VERSION = '1'
API_NAME = 'observabilityclient'
API_VERSION_OPTION = 'os_observabilityclient_api_version'
API_VERSIONS = {
'1': 'observabilityclient.plugin',
'1': 'observabilityclient.v1.client.Client',
}
@ -20,12 +33,16 @@ def make_client(instance):
:param ClientManager instance: The ClientManager that owns the new client
"""
plugin_client = utils.get_client_class(
observability_client = utils.get_client_class(
API_NAME,
instance._api_version[API_NAME],
API_VERSIONS)
client = plugin_client()
client = observability_client(session=instance.session,
adapter_options={
'interface': instance.interface,
'region_name': instance.region_name
})
return client

View File

@ -0,0 +1,200 @@
# Copyright 2023 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import requests
LOG = logging.getLogger(__name__)
class PrometheusAPIClientError(Exception):
def __init__(self, response):
self.resp = response
def __str__(self) -> str:
if self.resp.status_code != requests.codes.ok:
if self.resp.status_code != 204:
decoded = self.resp.json()
if 'error' in decoded:
return f'[{self.resp.status_code}] {decoded["error"]}'
return f'[{self.resp.status_code}] {self.resp.reason}'
else:
decoded = self.resp.json()
return f'[{decoded.status}]'
def __repr__(self) -> str:
if self.resp.status_code != requests.codes.ok:
if self.resp.status_code != 204:
decoded = self.resp.json()
if 'error' in decoded:
return f'[{self.resp.status_code}] {decoded["error"]}'
return f'[{self.resp.status_code}] {self.resp.reason}'
else:
decoded = self.resp.json()
return f'[{decoded.status}]'
class PrometheusMetric:
def __init__(self, input):
self.timestamp = input['value'][0]
self.labels = input['metric']
self.value = input['value'][1]
class PrometheusAPIClient:
def __init__(self, host):
self._host = host
self._session = requests.Session()
self._session.verify = False
def set_ca_cert(self, ca_cert):
self._session.verify = ca_cert
def set_client_cert(self, client_cert, client_key):
self._session.cert = client_cert
self._session.key = client_key
def set_basic_auth(self, auth_user, auth_password):
self._session.auth = (auth_user, auth_password)
def _get(self, endpoint, params=None):
url = (f"{'https' if self._session.verify else 'http'}://"
f"{self._host}/api/v1/{endpoint}")
resp = self._session.get(url, params=params,
headers={'Accept': 'application/json'})
if resp.status_code != requests.codes.ok:
raise PrometheusAPIClientError(resp)
decoded = resp.json()
if decoded['status'] != 'success':
raise PrometheusAPIClientError(resp)
return decoded
def _post(self, endpoint, params=None):
url = (f"{'https' if self._session.verify else 'http'}://"
f"{self._host}/api/v1/{endpoint}")
resp = self._session.post(url, params=params,
headers={'Accept': 'application/json'})
if resp.status_code != requests.codes.ok:
raise PrometheusAPIClientError(resp)
decoded = resp.json()
if 'status' in decoded and decoded['status'] != 'success':
raise PrometheusAPIClientError(resp)
return decoded
def query(self, query):
"""Sends custom queries to Prometheus
:param query: the query to send
:type query: str
"""
LOG.debug(f"Querying prometheus with query: {query}")
decoded = self._get("query", dict(query=query))
if decoded['data']['resultType'] == 'vector':
result = [PrometheusMetric(i) for i in decoded['data']['result']]
else:
result = [PrometheusMetric(decoded)]
return result
def series(self, matches):
"""Queries the /series/ endpoint of prometheus
:param matches: List of matches to send as parameters
:type matches: [str]
"""
LOG.debug(f"Querying prometheus for series with matches: {matches}")
decoded = self._get("series", {"match[]": matches})
return decoded['data']
def labels(self):
"""Queries the /labels/ endpoint of prometheus, returns list of labels
There isn't a way to tell prometheus to restrict
which labels to return. It's not possible to enforce
rbac with this for example.
"""
LOG.debug("Querying prometheus for labels")
decoded = self._get("labels")
return decoded['data']
def label_values(self, label):
"""Queries prometheus for values of a specified label.
:param label: Name of label for which to return values
:type label: str
"""
LOG.debug(f"Querying prometheus for the values of label: {label}")
decoded = self._get(f"label/{label}/values")
return decoded['data']
# ---------
# admin api
# ---------
def delete(self, matches, start=None, end=None):
"""Deletes some metrics from prometheus
:param matches: List of matches, that specify which metrics to delete
:type matches [str]
:param start: Timestamp from which to start deleting.
None for as early as possible.
:type start: timestamp
:param end: Timestamp until which to delete.
None for as late as possible.
:type end: timestamp
"""
# NOTE Prometheus doesn't seem to return anything except
# of 204 status code. There doesn't seem to be a
# way to know if anything got actually deleted.
# It does however return 500 code and error msg
# if the admin APIs are disabled.
LOG.debug(f"Deleting metrics from prometheus matching: {matches}")
try:
self._post("admin/tsdb/delete_series", {"match[]": matches,
"start": start,
"end": end})
except PrometheusAPIClientError as exc:
# The 204 is allowed here. 204 is "No Content",
# which is expected on a successful call
if exc.resp.status_code != 204:
raise exc
def clean_tombstones(self):
"""Asks prometheus to clean tombstones"""
LOG.debug("Cleaning tombstones from prometheus")
try:
self._post("admin/tsdb/clean_tombstones")
except PrometheusAPIClientError as exc:
# The 204 is allowed here. 204 is "No Content",
# which is expected on a successful call
if exc.resp.status_code != 204:
raise exc
def snapshot(self):
"""Creates a snapshot and returns the file name containing the data"""
LOG.debug("Taking prometheus data snapshot")
ret = self._post("admin/tsdb/snapshot")
return ret["data"]["name"]

View File

@ -0,0 +1,110 @@
# Copyright 2023 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import os
import yaml
from observabilityclient.prometheus_client import PrometheusAPIClient
DEFAULT_CONFIG_LOCATIONS = [os.environ["HOME"] + "/.config/openstack/",
"/etc/openstack/"]
CONFIG_FILE_NAME = "prometheus.yaml"
LOG = logging.getLogger(__name__)
class ConfigurationError(Exception):
pass
def get_config_file():
if os.path.exists(CONFIG_FILE_NAME):
LOG.debug(f"Using {CONFIG_FILE_NAME} as prometheus configuration")
return open(CONFIG_FILE_NAME, "r")
for path in DEFAULT_CONFIG_LOCATIONS:
full_filename = path + CONFIG_FILE_NAME
if os.path.exists(full_filename):
LOG.debug(f"Using {full_filename} as prometheus configuration")
return open(full_filename, "r")
return None
def get_prometheus_client():
host = None
port = None
conf_file = get_config_file()
if conf_file is not None:
conf = yaml.safe_load(conf_file)
if 'host' in conf:
host = conf['host']
if 'port' in conf:
port = conf['port']
conf_file.close()
# NOTE(jwysogla): We allow to overide the prometheus.yaml by
# the environment variables
if 'PROMETHEUS_HOST' in os.environ:
host = os.environ['PROMETHEUS_HOST']
if 'PROMETHEUS_PORT' in os.environ:
port = os.environ['PROMETHEUS_PORT']
if host is None or port is None:
raise ConfigurationError("Can't find prometheus host and "
"port configuration.")
return PrometheusAPIClient(f"{host}:{port}")
def get_client(obj):
return obj.app.client_manager.observabilityclient
def list2cols(cols, objs):
return cols, [tuple([o[k] for k in cols])
for o in objs]
def format_labels(d: dict) -> str:
def replace_doubled_quotes(string):
if "''" in string:
string = string.replace("''", "'")
if '""' in string:
string = string.replace('""', '"')
return string
ret = ""
for key, value in d.items():
ret += "{}='{}', ".format(key, value)
ret = ret[0:-2]
old = ""
while ret != old:
old = ret
ret = replace_doubled_quotes(ret)
return ret
def metrics2cols(m):
cols = []
fields = []
first = True
for metric in m:
row = []
for key, value in metric.labels.items():
if first:
cols.append(key)
row.append(value)
if first:
cols.append("value")
row.append(metric.value)
fields.append(row)
first = False
return cols, fields

View File

@ -1,201 +0,0 @@
# Copyright 2022 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import ansible_runner
import configparser
import os
import shutil
from ansible.inventory.manager import InventoryManager
from ansible.parsing.dataloader import DataLoader
from ansible.vars.manager import VariableManager
from observabilityclient.utils import shell
class AnsibleRunnerException(Exception):
"""Base exception class for runner exceptions"""
class AnsibleRunnerFailed(AnsibleRunnerException):
"""Raised when ansible run failed"""
def __init__(self, status, rc, stderr):
super(AnsibleRunnerFailed).__init__()
self.status = status
self.rc = rc
self.stderr = stderr
def __str__(self):
return ('Ansible run failed with status {}'
' (return code {}):\n{}').format(self.status, self.rc,
self.stderr)
def parse_inventory_hosts(inventory):
"""Returns list of dictionaries. Each dictionary contains info about
single node from inventory.
"""
dl = DataLoader()
if isinstance(inventory, str):
inventory = [inventory]
im = InventoryManager(loader=dl, sources=inventory)
vm = VariableManager(loader=dl, inventory=im)
out = []
for host in im.get_hosts():
data = vm.get_vars(host=host)
out.append(
dict(host=data.get('inventory_hostname', str(host)),
ip=data.get('ctlplane_ip', data.get('ansible_host')),
hostname=data.get('canonical_hostname'))
)
return out
class AnsibleRunner:
"""Simple wrapper for ansible-playbook."""
def __init__(self, workdir: str, moduledir: str = None,
ssh_user: str = 'root', ssh_key: str = None,
ansible_cfg: str = None):
"""
:param workdir: Location of the working directory.
:type workdir: String
:param ssh_user: User for the ssh connection.
:type ssh_user: String
:param ssh_key: Private key to use for the ssh connection.
:type ssh_key: String
:param moduledir: Location of the ansible module and library.
:type moduledir: String
:param ansible_cfg: Path to an ansible configuration file.
:type ansible_cfg: String
"""
self.workdir = shell.file_check(workdir, ftype='directory')
if moduledir is None:
moduledir = ''
ansible_cfg = ansible_cfg or os.path.join(workdir, 'ansible.cfg')
if not os.path.exists(ansible_cfg):
conf = dict(
ssh_connection=dict(
ssh_args=(
'-o UserKnownHostsFile={} '
'-o StrictHostKeyChecking=no '
'-o ControlMaster=auto '
'-o ControlPersist=30m '
'-o ServerAliveInterval=64 '
'-o ServerAliveCountMax=1024 '
'-o Compression=no '
'-o TCPKeepAlive=yes '
'-o VerifyHostKeyDNS=no '
'-o ForwardX11=no '
'-o ForwardAgent=yes '
'-o PreferredAuthentications=publickey '
'-T'
).format(os.devnull),
retries=3,
timeout=30,
scp_if_ssh=True,
pipelining=True
),
defaults=dict(
deprecation_warnings=False,
remote_user=ssh_user,
private_key_file=ssh_key,
library=os.path.expanduser(
'~/.ansible/plugins/modules:{workdir}/modules:'
'{userdir}:{ansible}/plugins/modules:'
'{ansible}-modules'.format(
userdir=moduledir, workdir=workdir,
ansible='/usr/share/ansible'
)
),
lookup_plugins=os.path.expanduser(
'~/.ansible/plugins/lookup:{workdir}/lookup:'
'{ansible}/plugins/lookup:'.format(
workdir=workdir, ansible='/usr/share/ansible'
)
),
gathering='smart',
log_path=shell.file_check(
os.path.join(workdir, 'ansible.log'),
clear=True
)
),
)
parser = configparser.ConfigParser()
parser.read_dict(conf)
with open(ansible_cfg, 'w') as conffile:
parser.write(conffile)
os.environ['ANSIBLE_CONFIG'] = ansible_cfg
def run(self, playbook, tags: str = None, skip_tags: str = None,
timeout: int = 30, quiet: bool = False, debug: bool = False):
"""Run given Ansible playbook.
:param playbook: Playbook filename.
:type playbook: String
:param tags: Run specific tags.
:type tags: String
:param skip_tags: Skip specific tags.
:type skip_tags: String
:param timeout: Timeout to finish playbook execution (minutes).
:type timeout: int
:param quiet: Disable all output (Defaults to False)
:type quiet: Boolean
:param debug: Enable debug output (Defaults to False)
:type quiet: Boolean
"""
kwargs = {
'private_data_dir': self.workdir,
'verbosity': 3 if debug else 0,
}
locs = locals()
for arg in ['playbook', 'tags', 'skip_tags', 'quiet']:
if locs[arg] is not None:
kwargs[arg] = locs[arg]
run_conf = ansible_runner.runner_config.RunnerConfig(**kwargs)
run_conf.prepare()
run = ansible_runner.Runner(config=run_conf)
try:
status, rc = run.run()
finally:
if status in ['failed', 'timeout', 'canceled'] or rc != 0:
err = getattr(run, 'stderr', getattr(run, 'stdout', None))
if err:
error = err.read()
else:
error = "Ansible failed with status %s" % status
raise AnsibleRunnerFailed(status, rc, error)
def destroy(self, clear: bool = False):
"""Cleans environment after Ansible run.
:param clear: Clear also workdir
:type clear: Boolean
"""
del os.environ['ANSIBLE_CONFIG']
if clear:
shutil.rmtree(self.workdir, ignore_errors=True)

View File

@ -1,75 +0,0 @@
# Copyright 2022 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import os
import pipes
import shutil
import subprocess
import tempfile
from contextlib import contextmanager
from observabilityclient.utils import strings
@contextmanager
def tempdir(base: str, prefix: str = None, clear: bool = True) -> str:
path = tempfile.mkdtemp(prefix=prefix, dir=base)
try:
yield path
finally:
if clear:
shutil.rmtree(path, ignore_errors=True)
def file_check(path: str, ftype: str = 'file', clear: bool = False) -> str:
"""Check if given path exists and create it in case required."""
if not os.path.exists(path) or clear:
if ftype == 'directory':
if clear:
shutil.rmtree(path, ignore_errors=True)
os.makedirs(path, mode=0o700, exist_ok=True)
elif ftype == 'file':
with open(path, 'w') as f:
f.close()
return path
def execute(cmd, workdir: str = None, can_fail: bool = True,
mask_list: list = None, use_shell: bool = False):
"""
Runs given shell command. Returns return code and content of stdout.
:param workdir: Location of the working directory.
:type workdir: String
:param can_fail: If is set to True RuntimeError is raised in case
of command returned non-zero return code.
:type can_fail: Boolean
"""
mask_list = mask_list or []
if not isinstance(cmd, str):
masked = ' '.join((pipes.quote(i) for i in cmd))
else:
masked = cmd
masked = strings.mask_string(masked, mask_list)
proc = subprocess.Popen(cmd, cwd=workdir, shell=use_shell, close_fds=True,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = proc.communicate()
if proc.returncode and can_fail:
raise RuntimeError('Failed to execute command: %s' % masked)
return proc.returncode, out, err

View File

@ -1,41 +0,0 @@
# Copyright 2022 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
STR_MASK = '*' * 8
COLORS = {'nocolor': "\033[0m",
'red': "\033[0;31m",
'green': "\033[32m",
'blue': "\033[34m",
'yellow': "\033[33m"}
def color_text(text, color):
"""Returns given text string with appropriate color tag. Allowed value
for color parameter is 'red', 'blue', 'green' and 'yellow'.
"""
return '%s%s%s' % (COLORS[color], text, COLORS['nocolor'])
def mask_string(unmasked, mask_list=None):
"""Replaces words from mask_list with MASK in unmasked string."""
mask_list = mask_list or []
masked = unmasked
for word in mask_list:
if not word:
continue
masked = masked.replace(word, STR_MASK)
return masked

View File

@ -13,24 +13,12 @@
# under the License.
#
import os
import shutil
from osc_lib.command import command
from osc_lib.i18n import _
from observabilityclient.utils import runner
from observabilityclient.utils import shell
OBSLIBDIR = shell.file_check('/usr/share/osp-observability', 'directory')
OBSWRKDIR = shell.file_check(
os.path.expanduser('~/.osp-observability'), 'directory'
)
class ObservabilityBaseCommand(command.Command):
"""Base class for observability commands."""
"""Base class for metric commands."""
def get_parser(self, prog_name):
parser = super().get_parser(prog_name)
@ -44,68 +32,50 @@ class ObservabilityBaseCommand(command.Command):
action='store_true',
help=_("Disable cleanup of temporary files.")
)
# TODO(jwysogla): Should this be restricted somehow?
parser.add_argument(
'--workdir',
default=OBSWRKDIR,
help=_("Working directory for observability commands.")
)
parser.add_argument(
'--moduledir',
default=None,
help=_("Directory with additional Ansible modules.")
)
parser.add_argument(
'--ssh-user',
default='heat-admin',
help=_("Username to be used for SSH connection.")
)
parser.add_argument(
'--ssh-key',
default='/home/stack/.ssh/id_rsa',
help=_("SSH private key to be used for SSH connection.")
)
parser.add_argument(
'--ansible-cfg',
default=os.path.join(OBSWRKDIR, 'ansible.cfg'),
help=_("Path to Ansible configuration.")
)
parser.add_argument(
'--config',
default=None,
help=_("Path to playbook configuration file.")
'--disable-rbac',
action='store_true',
help=_("Disable rbac injection")
)
return parser
def _run_playbook(self, playbook, inventory, parsed_args):
"""Run Ansible raw playbook"""
playbook = os.path.join(OBSLIBDIR, 'playbooks', playbook)
with shell.tempdir(parsed_args.workdir,
prefix=os.path.splitext(playbook)[0],
clear=not parsed_args.messy) as tmpdir:
# copy extravars file for the playbook run
if parsed_args.config:
envdir = shell.file_check(os.path.join(tmpdir, 'env'),
'directory')
shutil.copy(parsed_args.config,
os.path.join(envdir, 'extravars'))
# copy inventory file for the playbook run
shutil.copy(inventory, os.path.join(tmpdir, 'inventory'))
# run playbook
rnr = runner.AnsibleRunner(tmpdir,
moduledir=parsed_args.moduledir,
ssh_user=parsed_args.ssh_user,
ssh_key=parsed_args.ssh_key,
ansible_cfg=parsed_args.ansible_cfg)
if parsed_args.messy:
print("Running playbook %s" % playbook)
rnr.run(playbook, debug=parsed_args.dev)
rnr.destroy(clear=not parsed_args.messy)
def _execute(self, command, parsed_args):
"""Execute local command"""
with shell.tempdir(parsed_args.workdir, prefix='exec',
clear=not parsed_args.messy) as tmpdir:
rc, out, err = shell.execute(command, workdir=tmpdir,
can_fail=parsed_args.dev,
use_shell=True)
return rc, out, err
class Manager(object):
"""Base class for the python api."""
DEFAULT_HEADERS = {
"Accept": "application/json",
}
def __init__(self, client):
self.client = client
self.prom = client.prometheus_client
def _set_default_headers(self, kwargs):
headers = kwargs.get('headers', {})
for k, v in self.DEFAULT_HEADERS.items():
if k not in headers:
headers[k] = v
kwargs['headers'] = headers
return kwargs
def _get(self, *args, **kwargs):
self._set_default_headers(kwargs)
return self.client.api.get(*args, **kwargs)
def _post(self, *args, **kwargs):
self._set_default_headers(kwargs)
return self.client.api.post(*args, **kwargs)
def _put(self, *args, **kwargs):
self._set_default_headers(kwargs)
return self.client.api.put(*args, **kwargs)
def _patch(self, *args, **kwargs):
self._set_default_headers(kwargs)
return self.client.api.patch(*args, **kwargs)
def _delete(self, *args, **kwargs):
self._set_default_headers(kwargs)
return self.client.api.delete(*args, **kwargs)

View File

@ -0,0 +1,109 @@
# Copyright 2023 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from observabilityclient.utils import metric_utils
from observabilityclient.v1 import base
from osc_lib.i18n import _
from cliff import lister
class List(base.ObservabilityBaseCommand, lister.Lister):
"""Query prometheus for list of all metrics"""
def take_action(self, parsed_args):
client = metric_utils.get_client(self)
metrics = client.query.list(disable_rbac=parsed_args.disable_rbac)
return ["metric_name"], [[m] for m in metrics]
class Show(base.ObservabilityBaseCommand, lister.Lister):
"""Query prometheus for the current value of metric"""
def get_parser(self, prog_name):
parser = super().get_parser(prog_name)
parser.add_argument(
'name',
help=_("Name of the metric to show"))
return parser
def take_action(self, parsed_args):
client = metric_utils.get_client(self)
metric = client.query.show(parsed_args.name,
disable_rbac=parsed_args.disable_rbac)
return metric_utils.metrics2cols(metric)
class Query(base.ObservabilityBaseCommand, lister.Lister):
"""Query prometheus with a custom query string"""
def get_parser(self, prog_name):
parser = super().get_parser(prog_name)
parser.add_argument(
'query',
help=_("Custom PromQL query"))
return parser
def take_action(self, parsed_args):
client = metric_utils.get_client(self)
metric = client.query.query(parsed_args.query,
disable_rbac=parsed_args.disable_rbac)
ret = metric_utils.metrics2cols(metric)
return ret
class Delete(base.ObservabilityBaseCommand):
"""Delete data for a selected series and time range"""
def get_parser(self, prog_name):
parser = super().get_parser(prog_name)
parser.add_argument(
'matches',
action="append",
nargs='+',
help=_("Series selector, that selects the series to delete. "
"Specify multiple selectors delimited by space to "
"delete multiple series."))
parser.add_argument(
'--start',
help=_("Start timestamp in rfc3339 or unix timestamp. "
"Defaults to minimum possible timestamp."))
parser.add_argument(
'--end',
help=_("End timestamp in rfc3339 or unix timestamp. "
"Defaults to maximum possible timestamp."))
return parser
def take_action(self, parsed_args):
client = metric_utils.get_client(self)
return client.query.delete(parsed_args.matches,
parsed_args.start,
parsed_args.end)
class CleanTombstones(base.ObservabilityBaseCommand):
"""Remove deleted data from disk and clean up the existing tombstones"""
def get_parser(self, prog_name):
parser = super().get_parser(prog_name)
return parser
def take_action(self, parsed_args):
client = metric_utils.get_client(self)
return client.query.clean_tombstones()
class Snapshot(base.ObservabilityBaseCommand, lister.Lister):
def take_action(self, parsed_args):
client = metric_utils.get_client(self)
ret = client.query.snapshot()
return ["Snapshot file name"], [[ret]]

View File

@ -0,0 +1,43 @@
# Copyright 2023 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import keystoneauth1.session
from observabilityclient.utils.metric_utils import get_prometheus_client
from observabilityclient.v1 import python_api
from observabilityclient.v1 import rbac
class Client(object):
"""Client for the observabilityclient api"""
def __init__(self, session=None, adapter_options=None,
session_options=None, disable_rbac=False):
"""Initialize a new client for the Observabilityclient v1 API."""
session_options = session_options or {}
adapter_options = adapter_options or {}
adapter_options.setdefault('service_type', "metric")
if session is None:
session = keystoneauth1.session.Session(**session_options)
else:
if session_options:
raise ValueError("session and session_options are exclusive")
self.session = session
self.prometheus_client = get_prometheus_client()
self.query = python_api.QueryManager(self)
self.rbac = rbac.Rbac(self, self.session, disable_rbac)

View File

@ -1,180 +0,0 @@
# Copyright 2022 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import os
import requests
import shutil
import sys
import yaml
from osc_lib.i18n import _
from observabilityclient.v1 import base
from observabilityclient.utils import runner
class InventoryError(Exception):
def __init__(self, err, out):
self.err = err
self.out = out
def __str__(self):
return ('Failed to generate or locate Ansible '
'inventory file:\n%s\n%s' % (self.err or '', self.out))
INVENTORY = os.path.join(base.OBSWRKDIR, 'openstack-inventory.yaml')
INV_FALLBACKS = [
'~/tripleo-deploy/{stack}/openstack-inventory.yaml',
'~/tripleo-deploy/{stack}/tripleo-ansible-inventory.yaml',
'./overcloud-deploy/{stack}/openstack-inventory.yaml',
'./overcloud-deploy/{stack}/tripleo-ansible-inventory.yaml',
]
ENDPOINTS = os.path.join(base.OBSWRKDIR, 'scrape-endpoints.yaml')
STACKRC = os.path.join(base.OBSWRKDIR, 'stackrc')
def _curl(host: dict, port: int, timeout: int = 1) -> str:
"""Returns scraping endpoint URL if it is reachable
otherwise returns None."""
url = f'http://{host["ip"]}:{port}/metrics'
try:
r = requests.get(url, timeout=1)
if r.status_code != 200:
url = None
r.close()
except requests.exceptions.ConnectionError:
url = None
return url
class Discover(base.ObservabilityBaseCommand):
"""Generate Ansible inventory file and scrapable enpoints list file."""
def get_parser(self, prog_name):
parser = super().get_parser(prog_name)
parser.add_argument(
'--scrape',
action='append',
default=['collectd/9103'],
help=_("Service/Port of scrape endpoint to check on nodes")
)
parser.add_argument(
'--stack-name',
default='overcloud',
help=_("Overcloud stack name for which inventory file should "
"be generated")
)
parser.add_argument(
'--inventory',
help=_("Use this argument in case you have inventory file "
"generated or moved to non-standard place. Value has to be "
"path to inventory file including the file name.")
)
return parser
def take_action(self, parsed_args):
# discover undercloud and overcloud nodes
try:
rc, out, err = self._execute(
'tripleo-ansible-inventory '
'--static-yaml-inventory {} '
'--stack {}'.format(INVENTORY, parsed_args.stack_name),
parsed_args
)
if rc:
raise InventoryError(err, out)
# OSP versions with deprecated tripleo-ansible-inventory fallbacks
# to static inventory file generated at one of the fallback path
if not os.path.exists(INVENTORY):
if parsed_args.inventory:
INV_FALLBACKS.insert(0, parsed_args.inventory)
for i in INV_FALLBACKS:
absi = i.format(stack=parsed_args.stack_name)
absi = os.path.abspath(os.path.expanduser(absi))
if os.path.exists(absi):
shutil.copyfile(absi, INVENTORY)
break
else:
raise InventoryError('None of the fallback inventory files'
' exists: %s' % INV_FALLBACKS, '')
except InventoryError as ex:
print(str(ex))
sys.exit(1)
# discover scrape endpoints
endpoints = dict()
hosts = runner.parse_inventory_hosts(INVENTORY)
for scrape in parsed_args.scrape:
service, port = scrape.split('/')
for host in hosts:
if parsed_args.dev:
name = host["hostname"] if host["hostname"] else host["ip"]
print(f'Trying to fetch {service} metrics on host '
f'{name} at port {port}', end='')
node = _curl(host, port, timeout=1)
if node:
endpoints.setdefault(service.strip(), []).append(node)
if parsed_args.dev:
print(' [success]' if node else ' [failure]')
data = yaml.safe_dump(endpoints, default_flow_style=False)
with open(ENDPOINTS, 'w') as f:
f.write(data)
print("Discovered following scraping endpoints:\n%s" % data)
class Setup(base.ObservabilityBaseCommand):
"""Install and configure given Observability component(s)"""
auth_required = False
def get_parser(self, prog_name):
parser = super().get_parser(prog_name)
parser.add_argument(
'components',
nargs='+',
choices=[
'prometheus_agent',
# TODO: in future will contain option for all stack components
]
)
parser.add_argument(
'--inventory',
help=_("Use this argument in case you don't want to use for "
"whatever reason the inventory file generated by discovery "
"command")
)
return parser
def take_action(self, parsed_args):
inventory = INVENTORY
if parsed_args.inventory:
inventory = parsed_args.inventory
for compnt in parsed_args.components:
playbook = '%s.yml' % compnt
try:
self._run_playbook(playbook, inventory,
parsed_args=parsed_args)
except OSError as ex:
print('Failed to load playbook file: %s' % ex)
sys.exit(1)
except yaml.YAMLError as ex:
print('Failed to parse playbook configuration: %s' % ex)
sys.exit(1)
except runner.AnsibleRunnerFailed as ex:
print('Ansible run %s (rc %d)' % (ex.status, ex.rc))
if parsed_args.dev:
print(ex.stderr)

View File

@ -0,0 +1,96 @@
# Copyright 2023 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from observabilityclient.utils.metric_utils import format_labels
from observabilityclient.v1 import base
class QueryManager(base.Manager):
def list(self, disable_rbac=False):
"""Lists metric names
:param disable_rbac: Disables rbac injection if set to True
:type disable_rbac: boolean
"""
if disable_rbac or self.client.rbac.disable_rbac:
metric_names = self.prom.label_values("__name__")
return metric_names
else:
match = f"{{{format_labels(self.client.rbac.default_labels)}}}"
metrics = self.prom.series(match)
if metrics == []:
return []
unique_metric_names = list(set([m['__name__'] for m in metrics]))
return sorted(unique_metric_names)
def show(self, name, disable_rbac=False):
"""Shows current values for metrics of a specified name
:param disable_rbac: Disables rbac injection if set to True
:type disable_rbac: boolean
"""
enriched = self.client.rbac.append_rbac(name,
disable_rbac=disable_rbac)
last_metric_query = f"last_over_time({enriched}[5m])"
return self.prom.query(last_metric_query)
def query(self, query, disable_rbac=False):
"""Sends a query to prometheus
The query can be any PromQL query. Labels for enforcing
rbac will be added to all of the metric name inside the query.
Having labels as part of a query is allowed.
A call like this:
query("sum(name1) - sum(name2{label1='value'})")
will result in a query string like this:
"sum(name1{rbac='rbac_value'}) -
sum(name2{label1='value', rbac='rbac_value'})"
:param query: Custom query string
:type query: str
:param disable_rbac: Disables rbac injection if set to True
:type disable_rbac: boolean
"""
query = self.client.rbac.enrich_query(query, disable_rbac)
return self.prom.query(query)
def delete(self, matches, start=None, end=None):
"""Deletes metrics from Prometheus
The metrics aren't deleted immediately. Do a call to clean_tombstones()
to speed up the deletion. If start and end isn't specified, then
minimum and maximum timestamps are used.
:param matches: List of matches to match which metrics to delete
:type matches: [str]
:param start: timestamp from which to start deleting
:type start: rfc3339 or unix_timestamp
:param end: timestamp until which to delete
:type end: rfc3339 or unix_timestamp
"""
# TODO(jwysogla) Do we want to restrict access to the admin api
# endpoints? We could either try to inject
# the project label like in query. We could also
# do some check right here, before
# it gets to prometheus.
return self.prom.delete(matches, start, end)
def clean_tombstones(self):
"""Instructs prometheus to clean tombstones"""
return self.prom.clean_tombstones()
def snapshot(self):
"Creates a snapshot of the current data"
return self.prom.snapshot()

View File

@ -0,0 +1,139 @@
# Copyright 2023 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from keystoneauth1.exceptions.auth_plugins import MissingAuthPlugin
from observabilityclient.utils.metric_utils import format_labels
import re
class ObservabilityRbacError(Exception):
pass
class Rbac():
def __init__(self, client, session, disable_rbac=False):
self.client = client
self.session = session
self.disable_rbac = disable_rbac
try:
self.project_id = self.session.get_project_id()
self.default_labels = {
"project": self.project_id
}
self.rbac_init_successful = True
except MissingAuthPlugin:
self.project_id = None
self.default_labels = {
"project": "no-project"
}
self.rbac_init_successful = False
def _find_label_value_end(self, query, start, quote_char):
end = start
while (end == start or
query[end - 1] == '\\'):
# Looking for first unescaped quotes
end = query.find(quote_char, end + 1)
# returns the quote position or -1
return end
def _find_label_pair_end(self, query, start):
eq_sign_pos = query.find('=', start)
quote_char = "'"
quote_start_pos = query.find(quote_char, eq_sign_pos)
if quote_start_pos == -1:
quote_char = '"'
quote_start_pos = query.find(quote_char, eq_sign_pos)
end = self._find_label_value_end(query, quote_start_pos, quote_char)
# returns the pair end or -1
return end
def _find_label_section_end(self, query, start):
nearest_curly_brace_pos = None
while nearest_curly_brace_pos != -1:
pair_end = self._find_label_pair_end(query, start)
nearest_curly_brace_pos = query.find("}", pair_end)
nearest_eq_sign_pos = query.find("=", pair_end)
if (nearest_curly_brace_pos < nearest_eq_sign_pos or
nearest_eq_sign_pos == -1):
# If we have "}" before the nearest "=",
# then we must be at the end of the label section
# and the "=" is a part of the next section.
return nearest_curly_brace_pos
start = pair_end
return -1
def enrich_query(self, query, disable_rbac=False):
"""Used to add rbac labels to queries
:param query: The query to enrich
:type query: str
:param disable_rbac: Disables rbac injection if set to True
:type disable_rbac: boolean
"""
# TODO(jwysogla): This should be properly tested
if disable_rbac:
return query
labels = self.default_labels
# We need to get all metric names, no matter the rbac
metric_names = self.client.query.list(disable_rbac=False)