From 532710742cb5a8b9dd4c8bcd949166117dfa430b Mon Sep 17 00:00:00 2001 From: Philip Norman Date: Wed, 1 Mar 2017 16:21:26 -0800 Subject: [PATCH] node: add `dcos node metrics` command (#909) --- cli/dcoscli/data/help/node.txt | 7 + cli/dcoscli/metrics.py | 173 ++++++++++++++++++++++++ cli/dcoscli/node/main.py | 39 +++++- cli/dcoscli/tables.py | 41 ++++++ cli/tests/fixtures/metrics.py | 48 +++++++ cli/tests/integrations/common.py | 26 +++- cli/tests/integrations/test_node.py | 53 +++++++- cli/tests/unit/data/metrics_details.txt | 21 +++ cli/tests/unit/data/metrics_summary.txt | 2 + cli/tests/unit/test_tables.py | 14 ++ 10 files changed, 411 insertions(+), 13 deletions(-) create mode 100644 cli/dcoscli/metrics.py create mode 100644 cli/tests/fixtures/metrics.py create mode 100644 cli/tests/unit/data/metrics_details.txt create mode 100644 cli/tests/unit/data/metrics_summary.txt diff --git a/cli/dcoscli/data/help/node.txt b/cli/dcoscli/data/help/node.txt index aaca47b..bb0e7fe 100644 --- a/cli/dcoscli/data/help/node.txt +++ b/cli/dcoscli/data/help/node.txt @@ -13,6 +13,8 @@ Usage: dcos node list-components [--leader --mesos-id= --json] dcos node log [--follow --lines=N --leader --master --mesos-id= --slave=] [--component= --filter=...] + dcos node metrics details [--json] + dcos node metrics summary [--json] dcos node ssh (--leader | --master | --mesos-id= | --private-ip= | --slave=) [--config-file=] [--user=] @@ -34,6 +36,11 @@ Commands: Print a list of available DC/OS components on specified node. log Print the Mesos logs for the leading master node, agent nodes, or both. + metrics details + Print a table of all metrics for the agent node specified by . + metrics summary + Print CPU, memory and disk metrics for the agent node specified by + . ssh Establish an SSH connection to the master or agent nodes of your DC/OS cluster. diff --git a/cli/dcoscli/metrics.py b/cli/dcoscli/metrics.py new file mode 100644 index 0000000..948d54f --- /dev/null +++ b/cli/dcoscli/metrics.py @@ -0,0 +1,173 @@ +import contextlib +import json + +from dcos import emitting, http, util +from dcos.errors import DCOSException, DCOSHTTPException +from dcoscli import tables + +logger = util.get_logger(__name__) +emitter = emitting.FlatEmitter() + + +def _gib(n): + return n * pow(2, -30) + + +def _fetch_node_metrics(url): + """Retrieve the metrics data from `dcos-metrics`' `node` endpoint. + + :param url: `dcos-metrics` `node` endpoint + :type url: str + :returns: List of metrics datapoints + :rtype: [dict] + """ + with contextlib.closing(http.get(url)) as r: + + if r.status_code == 204: + raise DCOSException('No metrics found') + + if r.status_code != 200: + raise DCOSHTTPException(r) + + return r.json().get('datapoints', []) + + +def _get_datapoint(datapoints, name, tags=None): + """Find a specific datapoint by name and tags + + :param datapoints: a list of datapoints + :type datapoints: [dict] + :param name: the name of the required datapoint + :type name: str + :param tags: required tags by key and value + :type tags: dict + :return: a matching datapoint + :rtype: dict + """ + for datapoint in datapoints: + if datapoint['name'] == name: + if tags is None: + return datapoint + + dtags = datapoint.get('tags', {}) + tag_match = True + for k, v in tags.items(): + tag_match = tag_match and dtags.get(k) == v + if tag_match: + return datapoint + + +def _node_summary_json(datapoints): + """Filters datapoints down to CPU, memory and root disk space fields. + + :param datapoints: a list of datapoints + :type datapoints: [dict] + :return: JSON data + :rtype: str + """ + summary_datapoints = [ + _get_datapoint(datapoints, 'cpu.total'), + _get_datapoint(datapoints, 'memory.total'), + _get_datapoint(datapoints, 'filesystem.capacity.used', {'path': '/'}) + ] + return json.dumps(summary_datapoints) + + +def _node_summary_data(datapoints): + """Extracts CPU, memory and root disk space fields from node datapoints. + + :param datapoints: a list of raw datapoints + :type datapoints: [dict] + :return: a dictionary of summary fields + :rtype: dict + """ + + def _percentage(dividend, divisor): + if divisor > 0: + return dividend / divisor * 100 + return 0 + + cpu_used = _get_datapoint(datapoints, 'load.1min')['value'] + cpu_used_pc = _get_datapoint(datapoints, 'cpu.total')['value'] + + mem_total = _get_datapoint(datapoints, 'memory.total')['value'] + mem_free = _get_datapoint(datapoints, 'memory.free')['value'] + mem_used = mem_total - mem_free + mem_used_pc = _percentage(mem_used, mem_total) + + disk_total = _get_datapoint( + datapoints, 'filesystem.capacity.total', {'path': '/'})['value'] + disk_free = _get_datapoint( + datapoints, 'filesystem.capacity.used', {'path': '/'})['value'] + disk_used = disk_total - disk_free + disk_used_pc = _percentage(disk_used, disk_total) + + return { + 'cpu': '{:0.2f} ({:0.2f}%)'.format(cpu_used, cpu_used_pc), + 'mem': '{:0.2f}GiB ({:0.2f}%)'.format(_gib(mem_used), mem_used_pc), + 'disk': '{:0.2f}GiB ({:0.2f}%)'.format(_gib(disk_used), disk_used_pc) + } + + +def _format_datapoints(datapoints): + """Format raw datapoints for output by making values human-readable + according to their unit and formatting tags. + + :param datapoints: a list of datapoints + :type datapoints: [dict] + :return: a list of formatted datapoints + :rtype: [dict] + """ + + def _format_tags(tags): + if tags is None: + return '' + pairs = [] + for k, v in tags.items(): + pairs.append('{}: {}'.format(k, v)) + return ', '.join(pairs) + + def _format_value(v, u): + if u == 'bytes': + return '{:0.2f}GiB'.format(_gib(v)) + if u == 'percent': + return '{:0.2f}%'.format(v) + return v + + formatted_datapoints = [] + for d in datapoints: + formatted_datapoints.append({ + 'name': d['name'], + 'value': _format_value(d['value'], d['unit']), + 'tags': _format_tags(d.get('tags')) + }) + + return formatted_datapoints + + +def print_node_metrics(url, summary, json_): + """Retrieve and pretty-print key fields from the `dcos-metrics`' `node` + endpoint. + + :param url: `dcos-metrics` `node` endpoint + :type url: str + :param summary: print summary if true, or all fields if false + :type summary: bool + :param json_: print json list if true + :type json_: bool + :returns: Process status + :rtype: int + """ + + datapoints = _fetch_node_metrics(url) + + if summary: + if json_: + return emitter.publish(_node_summary_json(datapoints)) + table = tables.metrics_summary_table(_node_summary_data(datapoints)) + else: + if json_: + return emitter.publish(datapoints) + table = tables.metrics_details_table(_format_datapoints(datapoints)) + + return emitter.publish(table) diff --git a/cli/dcoscli/node/main.py b/cli/dcoscli/node/main.py index 6b92f34..dfdd752 100644 --- a/cli/dcoscli/node/main.py +++ b/cli/dcoscli/node/main.py @@ -1,5 +1,5 @@ -import functools import os +from functools import partial, wraps import docopt import six @@ -10,7 +10,7 @@ from dcos import (cmds, config, emitting, errors, http, mesos, packagemanager, subprocess, util) from dcos.cosmos import get_cosmos_url from dcos.errors import DCOSException, DefaultError -from dcoscli import log, tables +from dcoscli import log, metrics, tables from dcoscli.package.main import confirm from dcoscli.subcommand import default_command_info, default_doc from dcoscli.util import decorate_docopt_usage @@ -71,6 +71,16 @@ def _cmds(): '--component', '--filter'], function=_log), + cmds.Command( + hierarchy=['node', 'metrics', 'details'], + arg_keys=['', '--json'], + function=partial(_metrics, False)), + + cmds.Command( + hierarchy=['node', 'metrics', 'summary'], + arg_keys=['', '--json'], + function=partial(_metrics, True)), + cmds.Command( hierarchy=['node', 'list-components'], arg_keys=['--leader', '--mesos-id', '--json'], @@ -111,7 +121,7 @@ def _cmds(): def diagnostics_error(fn): - @functools.wraps(fn) + @wraps(fn) def check_for_diagnostics_error(*args, **kwargs): response = fn(*args, **kwargs) if response.status_code != 200: @@ -520,6 +530,29 @@ def _log(follow, lines, leader, slave, component, filters): return 0 +def _metrics(summary, mesos_id, json_): + """ Get metrics from the specified agent. + + :param summary: summarise output if true, output all if false + :type summary: bool + :param mesos_id: mesos node id + :type mesos_id: str + :param json_: print raw JSON + :type json_: bool + :returns: Process status + :rtype: int + """ + + endpoint = '/system/v1/agent/{}/metrics/v0/node'.format(mesos_id) + + dcos_url = config.get_config_val('core.dcos_url').rstrip('/') + if not dcos_url: + raise config.missing_config_exception(['core.dcos_url']) + + url = dcos_url + endpoint + return metrics.print_node_metrics(url, summary, json_) + + def _get_slave_ip(slave): """ Get an agent IP address based on mesos id. If slave parameter is empty, the function will return diff --git a/cli/dcoscli/tables.py b/cli/dcoscli/tables.py index 6af9df1..5b4918e 100644 --- a/cli/dcoscli/tables.py +++ b/cli/dcoscli/tables.py @@ -886,6 +886,47 @@ def ls_long_table(files): return tb +def metrics_summary_table(data): + """Prints a table of CPU, Memory and Disk for the given data. + + :param data: A dictionary of formatted summary values. + :type data: dict + """ + fields = OrderedDict([ + ('CPU', lambda d: d['cpu']), + ('MEM', lambda d: d['mem']), + ('DISK', lambda d: d['disk']) + ]) + + # table has a single row + metrics_table = table(fields, [data]) + metrics_table.align['CPU'] = 'l' + metrics_table.align['MEM'] = 'l' + metrics_table.align['DISK'] = 'l' + + return metrics_table + + +def metrics_details_table(datapoints): + """Prints a table of all passed metrics + + :param datapoints: A raw list of datapoints + :type datapoints: [dict] + """ + + fields = OrderedDict([ + ('NAME', lambda d: d['name']), + ('VALUE', lambda d: d['value']), + ('TAGS', lambda d: d['tags']) + ]) + + metrics_table = table(fields, datapoints) + metrics_table.align['NAME'] = 'l' + metrics_table.align['VALUE'] = 'l' + metrics_table.align['TAGS'] = 'l' + return metrics_table + + def truncate_table(fields, objs, limits, **kwargs): """Returns a PrettyTable. `fields` represents the header schema of the table. `objs` represents the objects to be rendered into diff --git a/cli/tests/fixtures/metrics.py b/cli/tests/fixtures/metrics.py new file mode 100644 index 0000000..601951e --- /dev/null +++ b/cli/tests/fixtures/metrics.py @@ -0,0 +1,48 @@ +def agent_metrics_node_details_fixture(): + """Agent metrics /node fixture + + :rtype: [dict] + """ + + return [ + {"name": "uptime", "value": 1245, "tags": ""}, + + {"name": "cpu.cores", "value": 4, "tags": ""}, + {"name": "cpu.total", "value": "74.94%", "tags": ""}, + {"name": "cpu.user", "value": "15.67%", "tags": ""}, + {"name": "cpu.system", "value": "59.27%", "tags": ""}, + {"name": "cpu.idle", "value": "24.38%", "tags": ""}, + {"name": "cpu.wait", "value": "0.03%", "tags": ""}, + + {"name": "load.1min", "value": 2.85, "tags": ""}, + {"name": "load.5min", "value": 2.92, "tags": ""}, + {"name": "load.15min", "value": 2.74, "tags": ""}, + + {"name": "filesystem.capacity.total", "value": "5.44GiB", + "tags": "path: /"}, + {"name": "filesystem.capacity.used", "value": "1.65GiB", + "tags": "path: /"}, + {"name": "filesystem.capacity.free", "value": "3.53GiB", + "tags": "path: /"}, + + {"name": "memory.total", "value": "14.69GiB", "tags": ""}, + {"name": "memory.free", "value": "12.20GiB", "tags": ""}, + {"name": "memory.buffers", "value": "0.09GiB", "tags": ""}, + {"name": "memory.cached", "value": "1.72GiB", "tags": ""}, + + {"name": "swap.total", "value": "0.00GiB", "tags": ""}, + {"name": "swap.free", "value": "0.00GiB", "tags": ""}, + {"name": "swap.used", "value": "0.00GiB", "tags": ""} + ] + + +def agent_metrics_node_summary_fixture(): + """Fixture for summary information for node + + :rtype: dict + """ + return { + 'cpu': '2.85 (74.94%)', + 'mem': '2.49GiB (16.98%)', + 'disk': '1.65GiB (30.30%)' + } diff --git a/cli/tests/integrations/common.py b/cli/tests/integrations/common.py index 3b56e31..4c48951 100644 --- a/cli/tests/integrations/common.py +++ b/cli/tests/integrations/common.py @@ -465,15 +465,15 @@ def delete_zk_node(znode): http.delete(znode_url) -def assert_lines(cmd, num_lines, great_then=False): +def assert_lines(cmd, num_lines, greater_than=False): """ Assert stdout contains the expected number of lines :param cmd: program and arguments :type cmd: [str] :param num_lines: expected number of lines for stdout :type num_lines: int - :param great_then: if True assume there may be at least num_lines or more - :type great_then: bool + :param greater_than: if True assert that there are at least num_lines + :type greater_than: bool :rtype: None """ @@ -482,12 +482,30 @@ def assert_lines(cmd, num_lines, great_then=False): assert returncode == 0 assert stderr == b'' lines = len(stdout.decode('utf-8').split('\n')) - 1 - if great_then: + if greater_than: assert lines >= num_lines return assert lines == num_lines +def fetch_valid_json(cmd): + """Assert stdout contains valid JSON + + :param cmd: program and arguments + :type cmd: [str] + :returns: parsed JSON AST + """ + returncode, stdout, stderr = exec_command(cmd) + + assert returncode == 0 + assert stderr == b'' + try: + return json.loads(stdout.decode('utf-8')) + except json.JSONDecodeError: + error_text = 'Command "{}" returned invalid JSON'.format(' '.join(cmd)) + raise Exception(error_text) + + def file_json_ast(path): """Returns the JSON AST parsed from file :param path: path to file diff --git a/cli/tests/integrations/test_node.py b/cli/tests/integrations/test_node.py index 513fb20..0f0bac7 100644 --- a/cli/tests/integrations/test_node.py +++ b/cli/tests/integrations/test_node.py @@ -9,7 +9,8 @@ import dcos.util as util from dcos import mesos from dcos.util import create_schema -from .common import assert_command, assert_lines, exec_command, ssh_output +from .common import assert_command, assert_lines, exec_command, \ + fetch_valid_json, ssh_output from ..fixtures.node import slave_fixture @@ -50,7 +51,7 @@ def test_node_log_empty(): def test_node_log_leader(): - assert_lines(['dcos', 'node', 'log', '--leader'], 10, great_then=True) + assert_lines(['dcos', 'node', 'log', '--leader'], 10, greater_than=True) def test_node_log_slave(): @@ -58,7 +59,7 @@ def test_node_log_slave(): assert_lines( ['dcos', 'node', 'log', '--mesos-id={}'.format(slave_id)], 10, - great_then=True) + greater_than=True) def test_node_log_missing_slave(): @@ -77,7 +78,7 @@ def test_node_log_lines(): assert_lines( ['dcos', 'node', 'log', '--leader', '--lines=4'], 4, - great_then=True) + greater_than=True) def test_node_log_invalid_lines(): @@ -87,6 +88,46 @@ def test_node_log_invalid_lines(): returncode=1) +def test_node_metrics_agent_summary(): + first_node_id = _node()[0]['id'] + assert_lines( + ['dcos', 'node', 'metrics', 'summary', first_node_id], + 2 + ) + + +def test_node_metrics_agent_summary_json(): + first_node_id = _node()[0]['id'] + + node_json = fetch_valid_json( + ['dcos', 'node', 'metrics', 'summary', first_node_id, '--json'] + ) + + names = [d['name'] for d in node_json] + assert names == ['cpu.total', 'memory.total', 'filesystem.capacity.used'] + + +def test_node_metrics_agent_details(): + first_node_id = _node()[0]['id'] + assert_lines( + ['dcos', 'node', 'metrics', 'details', first_node_id], + 100, + greater_than=True + ) + + +def test_node_metrics_agent_details_json(): + first_node_id = _node()[0]['id'] + + node_json = fetch_valid_json( + ['dcos', 'node', 'metrics', 'details', first_node_id, '--json'] + ) + + names = [d['name'] for d in node_json] + assert 'uptime' in names + assert 'cpu.cores' in names + + @pytest.mark.skipif(sys.platform == 'win32', reason='No pseudo terminal on windows') def test_node_ssh_leader(): @@ -198,8 +239,8 @@ def _node_ssh_output(args): cmd = ('ssh-agent /bin/bash -c "ssh-add {} 2> /dev/null && ' + 'dcos node ssh --option StrictHostKeyChecking=no {}"').format( - cli_test_ssh_key_path, - ' '.join(args)) + cli_test_ssh_key_path, + ' '.join(args)) return ssh_output(cmd) diff --git a/cli/tests/unit/data/metrics_details.txt b/cli/tests/unit/data/metrics_details.txt new file mode 100644 index 0000000..bb7c828 --- /dev/null +++ b/cli/tests/unit/data/metrics_details.txt @@ -0,0 +1,21 @@ +NAME VALUE TAGS +uptime 1245 +cpu.cores 4 +cpu.total 74.94% +cpu.user 15.67% +cpu.system 59.27% +cpu.idle 24.38% +cpu.wait 0.03% +load.1min 2.85 +load.5min 2.92 +load.15min 2.74 +filesystem.capacity.total 5.44GiB path: / +filesystem.capacity.used 1.65GiB path: / +filesystem.capacity.free 3.53GiB path: / +memory.total 14.69GiB +memory.free 12.20GiB +memory.buffers 0.09GiB +memory.cached 1.72GiB +swap.total 0.00GiB +swap.free 0.00GiB +swap.used 0.00GiB \ No newline at end of file diff --git a/cli/tests/unit/data/metrics_summary.txt b/cli/tests/unit/data/metrics_summary.txt new file mode 100644 index 0000000..5aac933 --- /dev/null +++ b/cli/tests/unit/data/metrics_summary.txt @@ -0,0 +1,2 @@ +CPU MEM DISK +2.85 (74.94%) 2.49GiB (16.98%) 1.65GiB (30.30%) \ No newline at end of file diff --git a/cli/tests/unit/test_tables.py b/cli/tests/unit/test_tables.py index 0e6d498..384bed4 100644 --- a/cli/tests/unit/test_tables.py +++ b/cli/tests/unit/test_tables.py @@ -14,6 +14,8 @@ from ..fixtures.marathon import (app_fixture, app_task_fixture, group_fixture, pod_list_fixture, pod_list_without_instances_fixture, pod_list_without_spec_version_fixture) +from ..fixtures.metrics import (agent_metrics_node_details_fixture, + agent_metrics_node_summary_fixture) from ..fixtures.node import slave_fixture from ..fixtures.package import package_fixture, search_result_fixture from ..fixtures.service import framework_fixture @@ -125,6 +127,18 @@ def test_ls_long_table(): 'tests/unit/data/ls_long.txt') +def test_metrics_summary_table(): + _test_table(tables.metrics_summary_table, + agent_metrics_node_summary_fixture(), + 'tests/unit/data/metrics_summary.txt') + + +def test_metrics_details_table(): + _test_table(tables.metrics_details_table, + agent_metrics_node_details_fixture(), + 'tests/unit/data/metrics_details.txt') + + def _test_table(table_fn, fixture_fn, path): table = table_fn(fixture_fn) with open(path) as f: