node: add dcos node metrics command (#909)
This commit is contained in:
@@ -13,6 +13,8 @@ Usage:
|
||||
dcos node list-components [--leader --mesos-id=<mesos-id> --json]
|
||||
dcos node log [--follow --lines=N --leader --master --mesos-id=<mesos-id> --slave=<agent-id>]
|
||||
[--component=<component-name> --filter=<filter>...]
|
||||
dcos node metrics details <mesos-id> [--json]
|
||||
dcos node metrics summary <mesos-id> [--json]
|
||||
dcos node ssh (--leader | --master | --mesos-id=<mesos-id> | --private-ip=<private-ip> | --slave=<agent-id>)
|
||||
[--config-file=<path>]
|
||||
[--user=<user>]
|
||||
@@ -34,6 +36,11 @@ Commands:
|
||||
Print a list of available DC/OS components on specified node.
|
||||
log
|
||||
Print the Mesos logs for the leading master node, agent nodes, or both.
|
||||
metrics details
|
||||
Print a table of all metrics for the agent node specified by <mesos-id>.
|
||||
metrics summary
|
||||
Print CPU, memory and disk metrics for the agent node specified by
|
||||
<mesos-id>.
|
||||
ssh
|
||||
Establish an SSH connection to the master or agent nodes of your DC/OS
|
||||
cluster.
|
||||
|
||||
173
cli/dcoscli/metrics.py
Normal file
173
cli/dcoscli/metrics.py
Normal file
@@ -0,0 +1,173 @@
|
||||
import contextlib
|
||||
import json
|
||||
|
||||
from dcos import emitting, http, util
|
||||
from dcos.errors import DCOSException, DCOSHTTPException
|
||||
from dcoscli import tables
|
||||
|
||||
logger = util.get_logger(__name__)
|
||||
emitter = emitting.FlatEmitter()
|
||||
|
||||
|
||||
def _gib(n):
|
||||
return n * pow(2, -30)
|
||||
|
||||
|
||||
def _fetch_node_metrics(url):
|
||||
"""Retrieve the metrics data from `dcos-metrics`' `node` endpoint.
|
||||
|
||||
:param url: `dcos-metrics` `node` endpoint
|
||||
:type url: str
|
||||
:returns: List of metrics datapoints
|
||||
:rtype: [dict]
|
||||
"""
|
||||
with contextlib.closing(http.get(url)) as r:
|
||||
|
||||
if r.status_code == 204:
|
||||
raise DCOSException('No metrics found')
|
||||
|
||||
if r.status_code != 200:
|
||||
raise DCOSHTTPException(r)
|
||||
|
||||
return r.json().get('datapoints', [])
|
||||
|
||||
|
||||
def _get_datapoint(datapoints, name, tags=None):
|
||||
"""Find a specific datapoint by name and tags
|
||||
|
||||
:param datapoints: a list of datapoints
|
||||
:type datapoints: [dict]
|
||||
:param name: the name of the required datapoint
|
||||
:type name: str
|
||||
:param tags: required tags by key and value
|
||||
:type tags: dict
|
||||
:return: a matching datapoint
|
||||
:rtype: dict
|
||||
"""
|
||||
for datapoint in datapoints:
|
||||
if datapoint['name'] == name:
|
||||
if tags is None:
|
||||
return datapoint
|
||||
|
||||
dtags = datapoint.get('tags', {})
|
||||
tag_match = True
|
||||
for k, v in tags.items():
|
||||
tag_match = tag_match and dtags.get(k) == v
|
||||
if tag_match:
|
||||
return datapoint
|
||||
|
||||
|
||||
def _node_summary_json(datapoints):
|
||||
"""Filters datapoints down to CPU, memory and root disk space fields.
|
||||
|
||||
:param datapoints: a list of datapoints
|
||||
:type datapoints: [dict]
|
||||
:return: JSON data
|
||||
:rtype: str
|
||||
"""
|
||||
summary_datapoints = [
|
||||
_get_datapoint(datapoints, 'cpu.total'),
|
||||
_get_datapoint(datapoints, 'memory.total'),
|
||||
_get_datapoint(datapoints, 'filesystem.capacity.used', {'path': '/'})
|
||||
]
|
||||
return json.dumps(summary_datapoints)
|
||||
|
||||
|
||||
def _node_summary_data(datapoints):
|
||||
"""Extracts CPU, memory and root disk space fields from node datapoints.
|
||||
|
||||
:param datapoints: a list of raw datapoints
|
||||
:type datapoints: [dict]
|
||||
:return: a dictionary of summary fields
|
||||
:rtype: dict
|
||||
"""
|
||||
|
||||
def _percentage(dividend, divisor):
|
||||
if divisor > 0:
|
||||
return dividend / divisor * 100
|
||||
return 0
|
||||
|
||||
cpu_used = _get_datapoint(datapoints, 'load.1min')['value']
|
||||
cpu_used_pc = _get_datapoint(datapoints, 'cpu.total')['value']
|
||||
|
||||
mem_total = _get_datapoint(datapoints, 'memory.total')['value']
|
||||
mem_free = _get_datapoint(datapoints, 'memory.free')['value']
|
||||
mem_used = mem_total - mem_free
|
||||
mem_used_pc = _percentage(mem_used, mem_total)
|
||||
|
||||
disk_total = _get_datapoint(
|
||||
datapoints, 'filesystem.capacity.total', {'path': '/'})['value']
|
||||
disk_free = _get_datapoint(
|
||||
datapoints, 'filesystem.capacity.used', {'path': '/'})['value']
|
||||
disk_used = disk_total - disk_free
|
||||
disk_used_pc = _percentage(disk_used, disk_total)
|
||||
|
||||
return {
|
||||
'cpu': '{:0.2f} ({:0.2f}%)'.format(cpu_used, cpu_used_pc),
|
||||
'mem': '{:0.2f}GiB ({:0.2f}%)'.format(_gib(mem_used), mem_used_pc),
|
||||
'disk': '{:0.2f}GiB ({:0.2f}%)'.format(_gib(disk_used), disk_used_pc)
|
||||
}
|
||||
|
||||
|
||||
def _format_datapoints(datapoints):
|
||||
"""Format raw datapoints for output by making values human-readable
|
||||
according to their unit and formatting tags.
|
||||
|
||||
:param datapoints: a list of datapoints
|
||||
:type datapoints: [dict]
|
||||
:return: a list of formatted datapoints
|
||||
:rtype: [dict]
|
||||
"""
|
||||
|
||||
def _format_tags(tags):
|
||||
if tags is None:
|
||||
return ''
|
||||
pairs = []
|
||||
for k, v in tags.items():
|
||||
pairs.append('{}: {}'.format(k, v))
|
||||
return ', '.join(pairs)
|
||||
|
||||
def _format_value(v, u):
|
||||
if u == 'bytes':
|
||||
return '{:0.2f}GiB'.format(_gib(v))
|
||||
if u == 'percent':
|
||||
return '{:0.2f}%'.format(v)
|
||||
return v
|
||||
|
||||
formatted_datapoints = []
|
||||
for d in datapoints:
|
||||
formatted_datapoints.append({
|
||||
'name': d['name'],
|
||||
'value': _format_value(d['value'], d['unit']),
|
||||
'tags': _format_tags(d.get('tags'))
|
||||
})
|
||||
|
||||
return formatted_datapoints
|
||||
|
||||
|
||||
def print_node_metrics(url, summary, json_):
|
||||
"""Retrieve and pretty-print key fields from the `dcos-metrics`' `node`
|
||||
endpoint.
|
||||
|
||||
:param url: `dcos-metrics` `node` endpoint
|
||||
:type url: str
|
||||
:param summary: print summary if true, or all fields if false
|
||||
:type summary: bool
|
||||
:param json_: print json list if true
|
||||
:type json_: bool
|
||||
:returns: Process status
|
||||
:rtype: int
|
||||
"""
|
||||
|
||||
datapoints = _fetch_node_metrics(url)
|
||||
|
||||
if summary:
|
||||
if json_:
|
||||
return emitter.publish(_node_summary_json(datapoints))
|
||||
table = tables.metrics_summary_table(_node_summary_data(datapoints))
|
||||
else:
|
||||
if json_:
|
||||
return emitter.publish(datapoints)
|
||||
table = tables.metrics_details_table(_format_datapoints(datapoints))
|
||||
|
||||
return emitter.publish(table)
|
||||
@@ -1,5 +1,5 @@
|
||||
import functools
|
||||
import os
|
||||
from functools import partial, wraps
|
||||
|
||||
import docopt
|
||||
import six
|
||||
@@ -10,7 +10,7 @@ from dcos import (cmds, config, emitting, errors,
|
||||
http, mesos, packagemanager, subprocess, util)
|
||||
from dcos.cosmos import get_cosmos_url
|
||||
from dcos.errors import DCOSException, DefaultError
|
||||
from dcoscli import log, tables
|
||||
from dcoscli import log, metrics, tables
|
||||
from dcoscli.package.main import confirm
|
||||
from dcoscli.subcommand import default_command_info, default_doc
|
||||
from dcoscli.util import decorate_docopt_usage
|
||||
@@ -71,6 +71,16 @@ def _cmds():
|
||||
'--component', '--filter'],
|
||||
function=_log),
|
||||
|
||||
cmds.Command(
|
||||
hierarchy=['node', 'metrics', 'details'],
|
||||
arg_keys=['<mesos-id>', '--json'],
|
||||
function=partial(_metrics, False)),
|
||||
|
||||
cmds.Command(
|
||||
hierarchy=['node', 'metrics', 'summary'],
|
||||
arg_keys=['<mesos-id>', '--json'],
|
||||
function=partial(_metrics, True)),
|
||||
|
||||
cmds.Command(
|
||||
hierarchy=['node', 'list-components'],
|
||||
arg_keys=['--leader', '--mesos-id', '--json'],
|
||||
@@ -111,7 +121,7 @@ def _cmds():
|
||||
|
||||
|
||||
def diagnostics_error(fn):
|
||||
@functools.wraps(fn)
|
||||
@wraps(fn)
|
||||
def check_for_diagnostics_error(*args, **kwargs):
|
||||
response = fn(*args, **kwargs)
|
||||
if response.status_code != 200:
|
||||
@@ -520,6 +530,29 @@ def _log(follow, lines, leader, slave, component, filters):
|
||||
return 0
|
||||
|
||||
|
||||
def _metrics(summary, mesos_id, json_):
|
||||
""" Get metrics from the specified agent.
|
||||
|
||||
:param summary: summarise output if true, output all if false
|
||||
:type summary: bool
|
||||
:param mesos_id: mesos node id
|
||||
:type mesos_id: str
|
||||
:param json_: print raw JSON
|
||||
:type json_: bool
|
||||
:returns: Process status
|
||||
:rtype: int
|
||||
"""
|
||||
|
||||
endpoint = '/system/v1/agent/{}/metrics/v0/node'.format(mesos_id)
|
||||
|
||||
dcos_url = config.get_config_val('core.dcos_url').rstrip('/')
|
||||
if not dcos_url:
|
||||
raise config.missing_config_exception(['core.dcos_url'])
|
||||
|
||||
url = dcos_url + endpoint
|
||||
return metrics.print_node_metrics(url, summary, json_)
|
||||
|
||||
|
||||
def _get_slave_ip(slave):
|
||||
""" Get an agent IP address based on mesos id.
|
||||
If slave parameter is empty, the function will return
|
||||
|
||||
@@ -886,6 +886,47 @@ def ls_long_table(files):
|
||||
return tb
|
||||
|
||||
|
||||
def metrics_summary_table(data):
|
||||
"""Prints a table of CPU, Memory and Disk for the given data.
|
||||
|
||||
:param data: A dictionary of formatted summary values.
|
||||
:type data: dict
|
||||
"""
|
||||
fields = OrderedDict([
|
||||
('CPU', lambda d: d['cpu']),
|
||||
('MEM', lambda d: d['mem']),
|
||||
('DISK', lambda d: d['disk'])
|
||||
])
|
||||
|
||||
# table has a single row
|
||||
metrics_table = table(fields, [data])
|
||||
metrics_table.align['CPU'] = 'l'
|
||||
metrics_table.align['MEM'] = 'l'
|
||||
metrics_table.align['DISK'] = 'l'
|
||||
|
||||
return metrics_table
|
||||
|
||||
|
||||
def metrics_details_table(datapoints):
|
||||
"""Prints a table of all passed metrics
|
||||
|
||||
:param datapoints: A raw list of datapoints
|
||||
:type datapoints: [dict]
|
||||
"""
|
||||
|
||||
fields = OrderedDict([
|
||||
('NAME', lambda d: d['name']),
|
||||
('VALUE', lambda d: d['value']),
|
||||
('TAGS', lambda d: d['tags'])
|
||||
])
|
||||
|
||||
metrics_table = table(fields, datapoints)
|
||||
metrics_table.align['NAME'] = 'l'
|
||||
metrics_table.align['VALUE'] = 'l'
|
||||
metrics_table.align['TAGS'] = 'l'
|
||||
return metrics_table
|
||||
|
||||
|
||||
def truncate_table(fields, objs, limits, **kwargs):
|
||||
"""Returns a PrettyTable. `fields` represents the header schema of
|
||||
the table. `objs` represents the objects to be rendered into
|
||||
|
||||
48
cli/tests/fixtures/metrics.py
vendored
Normal file
48
cli/tests/fixtures/metrics.py
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
def agent_metrics_node_details_fixture():
|
||||
"""Agent metrics /node fixture
|
||||
|
||||
:rtype: [dict]
|
||||
"""
|
||||
|
||||
return [
|
||||
{"name": "uptime", "value": 1245, "tags": ""},
|
||||
|
||||
{"name": "cpu.cores", "value": 4, "tags": ""},
|
||||
{"name": "cpu.total", "value": "74.94%", "tags": ""},
|
||||
{"name": "cpu.user", "value": "15.67%", "tags": ""},
|
||||
{"name": "cpu.system", "value": "59.27%", "tags": ""},
|
||||
{"name": "cpu.idle", "value": "24.38%", "tags": ""},
|
||||
{"name": "cpu.wait", "value": "0.03%", "tags": ""},
|
||||
|
||||
{"name": "load.1min", "value": 2.85, "tags": ""},
|
||||
{"name": "load.5min", "value": 2.92, "tags": ""},
|
||||
{"name": "load.15min", "value": 2.74, "tags": ""},
|
||||
|
||||
{"name": "filesystem.capacity.total", "value": "5.44GiB",
|
||||
"tags": "path: /"},
|
||||
{"name": "filesystem.capacity.used", "value": "1.65GiB",
|
||||
"tags": "path: /"},
|
||||
{"name": "filesystem.capacity.free", "value": "3.53GiB",
|
||||
"tags": "path: /"},
|
||||
|
||||
{"name": "memory.total", "value": "14.69GiB", "tags": ""},
|
||||
{"name": "memory.free", "value": "12.20GiB", "tags": ""},
|
||||
{"name": "memory.buffers", "value": "0.09GiB", "tags": ""},
|
||||
{"name": "memory.cached", "value": "1.72GiB", "tags": ""},
|
||||
|
||||
{"name": "swap.total", "value": "0.00GiB", "tags": ""},
|
||||
{"name": "swap.free", "value": "0.00GiB", "tags": ""},
|
||||
{"name": "swap.used", "value": "0.00GiB", "tags": ""}
|
||||
]
|
||||
|
||||
|
||||
def agent_metrics_node_summary_fixture():
|
||||
"""Fixture for summary information for node
|
||||
|
||||
:rtype: dict
|
||||
"""
|
||||
return {
|
||||
'cpu': '2.85 (74.94%)',
|
||||
'mem': '2.49GiB (16.98%)',
|
||||
'disk': '1.65GiB (30.30%)'
|
||||
}
|
||||
@@ -465,15 +465,15 @@ def delete_zk_node(znode):
|
||||
http.delete(znode_url)
|
||||
|
||||
|
||||
def assert_lines(cmd, num_lines, great_then=False):
|
||||
def assert_lines(cmd, num_lines, greater_than=False):
|
||||
""" Assert stdout contains the expected number of lines
|
||||
|
||||
:param cmd: program and arguments
|
||||
:type cmd: [str]
|
||||
:param num_lines: expected number of lines for stdout
|
||||
:type num_lines: int
|
||||
:param great_then: if True assume there may be at least num_lines or more
|
||||
:type great_then: bool
|
||||
:param greater_than: if True assert that there are at least num_lines
|
||||
:type greater_than: bool
|
||||
:rtype: None
|
||||
"""
|
||||
|
||||
@@ -482,12 +482,30 @@ def assert_lines(cmd, num_lines, great_then=False):
|
||||
assert returncode == 0
|
||||
assert stderr == b''
|
||||
lines = len(stdout.decode('utf-8').split('\n')) - 1
|
||||
if great_then:
|
||||
if greater_than:
|
||||
assert lines >= num_lines
|
||||
return
|
||||
assert lines == num_lines
|
||||
|
||||
|
||||
def fetch_valid_json(cmd):
|
||||
"""Assert stdout contains valid JSON
|
||||
|
||||
:param cmd: program and arguments
|
||||
:type cmd: [str]
|
||||
:returns: parsed JSON AST
|
||||
"""
|
||||
returncode, stdout, stderr = exec_command(cmd)
|
||||
|
||||
assert returncode == 0
|
||||
assert stderr == b''
|
||||
try:
|
||||
return json.loads(stdout.decode('utf-8'))
|
||||
except json.JSONDecodeError:
|
||||
error_text = 'Command "{}" returned invalid JSON'.format(' '.join(cmd))
|
||||
raise Exception(error_text)
|
||||
|
||||
|
||||
def file_json_ast(path):
|
||||
"""Returns the JSON AST parsed from file
|
||||
:param path: path to file
|
||||
|
||||
@@ -9,7 +9,8 @@ import dcos.util as util
|
||||
from dcos import mesos
|
||||
from dcos.util import create_schema
|
||||
|
||||
from .common import assert_command, assert_lines, exec_command, ssh_output
|
||||
from .common import assert_command, assert_lines, exec_command, \
|
||||
fetch_valid_json, ssh_output
|
||||
from ..fixtures.node import slave_fixture
|
||||
|
||||
|
||||
@@ -50,7 +51,7 @@ def test_node_log_empty():
|
||||
|
||||
|
||||
def test_node_log_leader():
|
||||
assert_lines(['dcos', 'node', 'log', '--leader'], 10, great_then=True)
|
||||
assert_lines(['dcos', 'node', 'log', '--leader'], 10, greater_than=True)
|
||||
|
||||
|
||||
def test_node_log_slave():
|
||||
@@ -58,7 +59,7 @@ def test_node_log_slave():
|
||||
assert_lines(
|
||||
['dcos', 'node', 'log', '--mesos-id={}'.format(slave_id)],
|
||||
10,
|
||||
great_then=True)
|
||||
greater_than=True)
|
||||
|
||||
|
||||
def test_node_log_missing_slave():
|
||||
@@ -77,7 +78,7 @@ def test_node_log_lines():
|
||||
assert_lines(
|
||||
['dcos', 'node', 'log', '--leader', '--lines=4'],
|
||||
4,
|
||||
great_then=True)
|
||||
greater_than=True)
|
||||
|
||||
|
||||
def test_node_log_invalid_lines():
|
||||
@@ -87,6 +88,46 @@ def test_node_log_invalid_lines():
|
||||
returncode=1)
|
||||
|
||||
|
||||
def test_node_metrics_agent_summary():
|
||||
first_node_id = _node()[0]['id']
|
||||
assert_lines(
|
||||
['dcos', 'node', 'metrics', 'summary', first_node_id],
|
||||
2
|
||||
)
|
||||
|
||||
|
||||
def test_node_metrics_agent_summary_json():
|
||||
first_node_id = _node()[0]['id']
|
||||
|
||||
node_json = fetch_valid_json(
|
||||
['dcos', 'node', 'metrics', 'summary', first_node_id, '--json']
|
||||
)
|
||||
|
||||
names = [d['name'] for d in node_json]
|
||||
assert names == ['cpu.total', 'memory.total', 'filesystem.capacity.used']
|
||||
|
||||
|
||||
def test_node_metrics_agent_details():
|
||||
first_node_id = _node()[0]['id']
|
||||
assert_lines(
|
||||
['dcos', 'node', 'metrics', 'details', first_node_id],
|
||||
100,
|
||||
greater_than=True
|
||||
)
|
||||
|
||||
|
||||
def test_node_metrics_agent_details_json():
|
||||
first_node_id = _node()[0]['id']
|
||||
|
||||
node_json = fetch_valid_json(
|
||||
['dcos', 'node', 'metrics', 'details', first_node_id, '--json']
|
||||
)
|
||||
|
||||
names = [d['name'] for d in node_json]
|
||||
assert 'uptime' in names
|
||||
assert 'cpu.cores' in names
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == 'win32',
|
||||
reason='No pseudo terminal on windows')
|
||||
def test_node_ssh_leader():
|
||||
@@ -198,8 +239,8 @@ def _node_ssh_output(args):
|
||||
|
||||
cmd = ('ssh-agent /bin/bash -c "ssh-add {} 2> /dev/null && ' +
|
||||
'dcos node ssh --option StrictHostKeyChecking=no {}"').format(
|
||||
cli_test_ssh_key_path,
|
||||
' '.join(args))
|
||||
cli_test_ssh_key_path,
|
||||
' '.join(args))
|
||||
|
||||
return ssh_output(cmd)
|
||||
|
||||
|
||||
21
cli/tests/unit/data/metrics_details.txt
Normal file
21
cli/tests/unit/data/metrics_details.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
NAME VALUE TAGS
|
||||
uptime 1245
|
||||
cpu.cores 4
|
||||
cpu.total 74.94%
|
||||
cpu.user 15.67%
|
||||
cpu.system 59.27%
|
||||
cpu.idle 24.38%
|
||||
cpu.wait 0.03%
|
||||
load.1min 2.85
|
||||
load.5min 2.92
|
||||
load.15min 2.74
|
||||
filesystem.capacity.total 5.44GiB path: /
|
||||
filesystem.capacity.used 1.65GiB path: /
|
||||
filesystem.capacity.free 3.53GiB path: /
|
||||
memory.total 14.69GiB
|
||||
memory.free 12.20GiB
|
||||
memory.buffers 0.09GiB
|
||||
memory.cached 1.72GiB
|
||||
swap.total 0.00GiB
|
||||
swap.free 0.00GiB
|
||||
swap.used 0.00GiB
|
||||
2
cli/tests/unit/data/metrics_summary.txt
Normal file
2
cli/tests/unit/data/metrics_summary.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
CPU MEM DISK
|
||||
2.85 (74.94%) 2.49GiB (16.98%) 1.65GiB (30.30%)
|
||||
@@ -14,6 +14,8 @@ from ..fixtures.marathon import (app_fixture, app_task_fixture,
|
||||
group_fixture, pod_list_fixture,
|
||||
pod_list_without_instances_fixture,
|
||||
pod_list_without_spec_version_fixture)
|
||||
from ..fixtures.metrics import (agent_metrics_node_details_fixture,
|
||||
agent_metrics_node_summary_fixture)
|
||||
from ..fixtures.node import slave_fixture
|
||||
from ..fixtures.package import package_fixture, search_result_fixture
|
||||
from ..fixtures.service import framework_fixture
|
||||
@@ -125,6 +127,18 @@ def test_ls_long_table():
|
||||
'tests/unit/data/ls_long.txt')
|
||||
|
||||
|
||||
def test_metrics_summary_table():
|
||||
_test_table(tables.metrics_summary_table,
|
||||
agent_metrics_node_summary_fixture(),
|
||||
'tests/unit/data/metrics_summary.txt')
|
||||
|
||||
|
||||
def test_metrics_details_table():
|
||||
_test_table(tables.metrics_details_table,
|
||||
agent_metrics_node_details_fixture(),
|
||||
'tests/unit/data/metrics_details.txt')
|
||||
|
||||
|
||||
def _test_table(table_fn, fixture_fn, path):
|
||||
table = table_fn(fixture_fn)
|
||||
with open(path) as f:
|
||||
|
||||
Reference in New Issue
Block a user