dcos service log

Conflicts:
	dcos/mesos.py
This commit is contained in:
Michael Gummelt
2015-07-06 12:01:09 -07:00
parent 655e522fb4
commit 43ca73882d
12 changed files with 512 additions and 109 deletions

View File

@@ -18,7 +18,7 @@ Options:
--master Access the leading master
--slave=<slave-id> Access the slave with the provided ID
--option SSHOPT=VAL SSH option (see `man ssh_config`)
--config-file=<path> Path to ssh config file
--config-file=<path> Path to SSH config file
--user=<user> SSH user [default: core]
--version Show version
"""
@@ -186,12 +186,7 @@ def _ssh(master, slave, option, config_file, user):
"""
ssh_options = ' '.join('-o {}'.format(opt) for opt in option)
if config_file:
ssh_config = '-F {}'.format(config_file)
else:
ssh_config = ''
ssh_options = util.get_ssh_options(config_file, option)
if master:
host = mesos.MesosDNSClient().hosts('leader.mesos.')[0]['ip']
@@ -205,9 +200,8 @@ def _ssh(master, slave, option, config_file, user):
else:
raise DCOSException('No slave found with ID [{}]'.format(slave))
cmd = "ssh -t {0} {1} {2}@{3}".format(
cmd = "ssh -t {0} {1}@{2}".format(
ssh_options,
ssh_config,
user,
host)

View File

@@ -3,30 +3,46 @@
Usage:
dcos service --info
dcos service [--inactive --json]
dcos service log [--follow --lines=N --ssh-config-file=<path>]
<service> [<file>]
dcos service shutdown <service-id>
Options:
-h, --help Show this screen
-h, --help Show this screen
--info Show a short description of this subcommand
--info Show a short description of this subcommand
--json Print json-formatted services
--ssh-config-file=<path> Path to SSH config file. Used to access
marathon logs.
--inactive Show inactive services in addition to active ones.
Inactive services are those that have been disconnected from
master, but haven't yet reached their failover timeout.
--follow Output data as the file grows
--version Show version
--inactive Show inactive services in addition to active
ones. Inactive services are those that have
been disconnected from master, but haven't yet
reached their failover timeout.
--json Print json-formatted services
--lines=N Output the last N lines [default: 10]
--version Show version
Positional Arguments:
<service-id> The ID for the DCOS Service
<file> Output this file. [default: stdout]
<service> The DCOS Service name.
<service-id> The DCOS Service ID
"""
import subprocess
import dcoscli
import docopt
from dcos import cmds, emitting, mesos, util
from dcos import cmds, emitting, marathon, mesos, package, util
from dcos.errors import DCOSException
from dcoscli import tables
from dcoscli import log, tables
logger = util.get_logger(__name__)
emitter = emitting.FlatEmitter()
@@ -57,6 +73,13 @@ def _cmds():
"""
return [
cmds.Command(
hierarchy=['service', 'log'],
arg_keys=['--follow', '--lines', '--ssh-config-file', '<service>',
'<file>'],
function=_log),
cmds.Command(
hierarchy=['service', 'shutdown'],
arg_keys=['<service-id>'],
@@ -123,3 +146,161 @@ def _shutdown(service_id):
mesos.DCOSClient().shutdown_framework(service_id)
return 0
def _log(follow, lines, ssh_config_file, service, file_):
"""Prints the contents of the logs for a given service. The service
task is located by first identifying the marathon app running a
framework named `service`.
:param follow: same as unix tail's -f
:type follow: bool
:param lines: number of lines to print
:type lines: int
:param ssh_config_file: SSH config file. Used for marathon.
:type ssh_config_file: str | None
:param service: service name
:type service: str
:param file_: file path to read
:type file_: str
:returns: process return code
:rtype: int
"""
lines = util.parse_int(lines)
if service == 'marathon':
if file_:
raise DCOSException('The <file> argument is invalid for marathon.'
' The systemd journal is always used for the'
' marathon log.')
return _log_marathon(follow, lines, ssh_config_file)
else:
if ssh_config_file:
raise DCOSException(
'The `--ssh-config-file` argument is invalid for non-marathon '
'services. SSH is not used.')
return _log_service(follow, lines, service, file_)
def _log_service(follow, lines, service, file_):
"""Prints the contents of the logs for a given service. Used for
non-marathon services.
:param follow: same as unix tail's -f
:type follow: bool
:param lines: number of lines to print
:type lines: int
:param service: service name
:type service: str
:param file_: file path to read
:type file_: str
:returns: process return code
:rtype: int
"""
if file_ is None:
file_ = 'stdout'
task = _get_service_task(service)
return _log_task(task['id'], follow, lines, file_)
def _log_task(task_id, follow, lines, file_):
"""Prints the contents of the logs for a given task ID.
:param task_id: task ID
:type task_id: str
:param follow: same as unix tail's -f
:type follow: bool
:param lines: number of lines to print
:type lines: int
:param file_: file path to read
:type file_: str
:returns: process return code
:rtype: int
"""
dcos_client = mesos.DCOSClient()
task = mesos.get_master(dcos_client).task(task_id)
mesos_file = mesos.MesosFile(file_, task=task, dcos_client=dcos_client)
return log.log_files([mesos_file], follow, lines)
def _get_service_task(service_name):
"""Gets the task running the given service. If there is more than one
such task, throws an exception.
:param service_name: service name
:type service_name: str
:returns: The marathon task dict
:rtype: dict
"""
marathon_client = marathon.create_client()
app = _get_service_app(marathon_client, service_name)
tasks = marathon_client.get_app(app['id'])['tasks']
if len(tasks) != 1:
raise DCOSException(
('We expected marathon app [{}] to be running 1 task, but we ' +
'instead found {} tasks').format(app['id'], len(tasks)))
return tasks[0]
def _get_service_app(marathon_client, service_name):
"""Gets the marathon app running the given service. If there is not
exactly one such app, throws an exception.
:param marathon_client: marathon client
:type marathon_client: marathon.Client
:param service_name: service name
:type service_name: str
:returns: marathon app
:rtype: dict
"""
apps = package.get_apps_for_framework(service_name, marathon_client)
if len(apps) > 1:
raise DCOSException(
'Multiple marathon apps found for service name [{}]: {}'.format(
service_name,
', '.join('[{}]'.format(app['id']) for app in apps)))
elif len(apps) == 0:
raise DCOSException(
'No marathon apps found for service [{}]'.format(service_name))
else:
return apps[0]
def _log_marathon(follow, lines, ssh_config_file):
"""Prints the contents of the marathon logs.
:param follow: same as unix tail's -f
:type follow: bool
:param lines: number of lines to print
:type lines: int
:param ssh_config_file: SSH config file.
:type ssh_config_file: str | None
;:returns: process return code
:rtype: int
"""
ssh_options = util.get_ssh_options(ssh_config_file, [])
journalctl_args = ''
if follow:
journalctl_args += '-f '
if lines:
journalctl_args += '-n {} '.format(lines)
leader_ip = marathon.create_client().get_leader().split(':')[0]
cmd = ("ssh {0} core@{1} " +
"journalctl {2} -u marathon").format(
ssh_options,
leader_ip,
journalctl_args)
return subprocess.call(cmd, shell=True)

View File

@@ -116,7 +116,7 @@ def _task(fltr, completed, json_):
return 0
def _log(follow, completed, lines, task, path):
def _log(follow, completed, lines, task, file_):
""" Tail a file in the task's sandbox.
:param follow: same as unix tail's -f
@@ -127,8 +127,8 @@ def _log(follow, completed, lines, task, path):
:type lines: int
:param task: task pattern to match
:type task: str
:param path: file path to read
:type path: str
:param file_: file path to read
:type file_: str
:returns: process return code
:rtype: int
"""
@@ -138,12 +138,12 @@ def _log(follow, completed, lines, task, path):
else:
fltr = task
if path is None:
path = 'stdout'
if file_ is None:
file_ = 'stdout'
lines = util.parse_int(lines)
mesos_files = _mesos_files(completed, fltr, path)
mesos_files = _mesos_files(completed, fltr, file_)
if not mesos_files:
raise DCOSException('No matching tasks. Exiting.')
log.log_files(mesos_files, follow, lines)
@@ -151,7 +151,7 @@ def _log(follow, completed, lines, task, path):
return 0
def _mesos_files(completed, fltr, path):
def _mesos_files(completed, fltr, file_):
"""Return MesosFile objects for the specified files. Only include
files that satisfy all of the following:
@@ -162,8 +162,8 @@ def _mesos_files(completed, fltr, path):
:type completed: bool
:param fltr: task pattern to match
:type fltr: str
:param path: file path to read
:type path: str
:param file_: file path to read
:type file_: str
:returns: MesosFile objects
:rtype: [MesosFile]
@@ -184,7 +184,7 @@ def _mesos_files(completed, fltr, path):
if task.slave() in slaves and task.executor()]
# create files.
return [mesos.MesosFile(path, task=task, mesos_client=client)
return [mesos.MesosFile(file_, task=task, dcos_client=client)
for task in available_tasks]

View File

@@ -0,0 +1,7 @@
{
"marathon": {
"zk": "zk://master.mesos:2181/universe2",
"mem": 512,
"cpus": 1
}
}

View File

@@ -0,0 +1,3 @@
Host *
StrictHostKeyChecking no
IdentityFile /host-home/.vagrant.d/insecure_private_key

View File

@@ -2,8 +2,10 @@ import collections
import contextlib
import json
import os
import pty
import subprocess
import sys
import time
import requests
import six
@@ -219,6 +221,41 @@ def remove_app(app_id):
assert_command(['dcos', 'marathon', 'app', 'remove', app_id])
def package_install(package, deploy=False, args=[]):
""" Calls `dcos package install`
:param package: name of the package to install
:type package: str
:param deploy: whether or not to wait for the deploy
:type deploy: bool
:param args: extra CLI args
:type args: [str]
:rtype: None
"""
returncode, stdout, stderr = exec_command(
['dcos', 'package', 'install', '--yes', package] + args)
assert returncode == 0
assert stderr == b''
if deploy:
watch_all_deployments()
def package_uninstall(package, args=[]):
""" Calls `dcos package uninstall`
:param package: name of the package to uninstall
:type package: str
:param args: extra CLI args
:type args: [str]
:rtype: None
"""
assert_command(['dcos', 'package', 'uninstall', package] + args)
def get_services(expected_count=None, args=[]):
"""Get services
@@ -379,6 +416,25 @@ def app(path, app_id, deploy=False):
remove_app(app_id)
@contextlib.contextmanager
def package(package_name, deploy=False, args=[]):
"""Context manager that deploys an app on entrance, and removes it on
exit.
:param package_name: package name
:type package_name: str
:param deploy: If True, block on the deploy
:type deploy: bool
:rtype: None
"""
package_install(package_name, deploy, args)
try:
yield
finally:
package_uninstall(package_name)
@contextlib.contextmanager
def mock_args(args):
""" Context manager that mocks sys.args and captures stdout/stderr
@@ -394,3 +450,37 @@ def mock_args(args):
yield sys.stdout, sys.stderr
finally:
sys.stdout, sys.stderr = stdout, stderr
def ssh_output(cmd):
""" Runs an SSH command and returns the stdout/stderr.
:param cmd: command to run
:type cmd: str
:rtype: (str, str)
"""
# ssh must run with stdin attached to a tty
master, slave = pty.openpty()
proc = subprocess.Popen(cmd,
stdin=slave,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
preexec_fn=os.setsid,
close_fds=True,
shell=True)
os.close(slave)
# wait for the ssh connection
time.sleep(8)
# kill the whole process group
os.killpg(os.getpgid(proc.pid), 15)
os.close(master)
stdout, stderr = proc.communicate()
print('STDOUT: {}'.format(stdout.decode('utf-8')))
print('STDERR: {}'.format(stderr.decode('utf-8')))
return stdout, stderr

View File

@@ -1,16 +1,12 @@
import json
import os
import pty
import re
import subprocess
import time
import dcos.util as util
from dcos import mesos
from dcos.util import create_schema
from ..fixtures.node import slave_fixture
from .common import assert_command, assert_lines, exec_command
from .common import assert_command, assert_lines, exec_command, ssh_output
def test_help():
@@ -34,7 +30,7 @@ Options:
--master Access the leading master
--slave=<slave-id> Access the slave with the provided ID
--option SSHOPT=VAL SSH option (see `man ssh_config`)
--config-file=<path> Path to ssh config file
--config-file=<path> Path to SSH config file
--user=<user> SSH user [default: core]
--version Show version
"""
@@ -142,34 +138,16 @@ def test_node_ssh_user():
stdout, stderr = _node_ssh_output(
['--master', '--user=bogus', '--option', 'PasswordAuthentication=no'])
assert stdout == b''
assert stderr.startswith(b'Permission denied')
assert b'Permission denied' in stderr
def _node_ssh_output(args):
# ssh must run with stdin attached to a tty
master, slave = pty.openpty()
cmd = ('dcos node ssh --option ' +
'IdentityFile=/host-home/.vagrant.d/insecure_private_key ' +
'--option StrictHostKeyChecking=no ' +
'{}').format(' '.join(args))
proc = subprocess.Popen(cmd,
stdin=slave,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
preexec_fn=os.setsid,
close_fds=True,
shell=True)
os.close(slave)
# wait for the ssh connection
time.sleep(8)
# kill the whole process group
os.killpg(os.getpgid(proc.pid), 15)
os.close(master)
return proc.communicate()
return ssh_output(cmd)
def _node_ssh(args):

View File

@@ -1,3 +1,5 @@
import os
import subprocess
import time
import dcos.util as util
@@ -7,7 +9,9 @@ import pytest
from ..fixtures.service import framework_fixture
from .common import (assert_command, assert_lines, delete_zk_nodes,
get_services, service_shutdown, watch_all_deployments)
exec_command, get_services, package_install,
package_uninstall, service_shutdown, ssh_output,
watch_all_deployments)
@pytest.fixture(scope="module")
@@ -16,29 +20,52 @@ def zk_znode(request):
return request
def setup_module(module):
package_install('chronos', True)
def teardown_module(module):
package_uninstall('chronos')
delete_zk_nodes()
def test_help():
stdout = b"""Manage DCOS services
Usage:
dcos service --info
dcos service [--inactive --json]
dcos service log [--follow --lines=N --ssh-config-file=<path>]
<service> [<file>]
dcos service shutdown <service-id>
Options:
-h, --help Show this screen
-h, --help Show this screen
--info Show a short description of this subcommand
--info Show a short description of this subcommand
--json Print json-formatted services
--ssh-config-file=<path> Path to SSH config file. Used to access
marathon logs.
--inactive Show inactive services in addition to active ones.
Inactive services are those that have been disconnected from
master, but haven't yet reached their failover timeout.
--follow Output data as the file grows
--version Show version
--inactive Show inactive services in addition to active
ones. Inactive services are those that have
been disconnected from master, but haven't yet
reached their failover timeout.
--json Print json-formatted services
--lines=N Output the last N lines [default: 10]
--version Show version
Positional Arguments:
<service-id> The ID for the DCOS Service
<file> Output this file. [default: stdout]
<service> The DCOS Service name.
<service-id> The DCOS Service ID
"""
assert_command(['dcos', 'service', '--help'], stdout=stdout)
@@ -49,7 +76,7 @@ def test_info():
def test_service():
services = get_services(1)
services = get_services(2)
schema = _get_schema(framework_fixture())
for srv in services:
@@ -57,57 +84,133 @@ def test_service():
def test_service_table():
assert_lines(['dcos', 'service'], 2)
assert_lines(['dcos', 'service'], 3)
def test_service_inactive(zk_znode):
# install cassandra
stdout = b"""The Apache Cassandra DCOS Service implementation is alpha \
and there may be bugs, incomplete features, incorrect documentation or other \
discrepancies.
The default configuration requires 3 nodes each with 0.3 CPU shares, 1184MB \
of memory and 272MB of disk.
Installing package [cassandra] version [0.1.0-1]
Thank you for installing the Apache Cassandra DCOS Service.
\tDocumentation: http://mesosphere.github.io/cassandra-mesos/
\tIssues: https://github.com/mesosphere/cassandra-mesos/issues
"""
assert_command(['dcos', 'package', 'install', 'cassandra', '--yes'],
stdout=stdout)
# wait for it to deploy
watch_all_deployments(300)
package_install('cassandra', True)
# wait long enough for it to register
time.sleep(5)
# assert marathon and cassandra are listed
get_services(2)
# assert marathon, chronos, and cassandra are listed
get_services(3)
# uninstall cassandra using marathon. For now, need to explicitly remove
# the group that is left by cassandra. See MARATHON-144
assert_command(['dcos', 'marathon', 'group', 'remove', '/cassandra'])
watch_all_deployments(300)
watch_all_deployments()
# I'm not quite sure why we have to sleep, but it seems cassandra
# only transitions to "inactive" after a few seconds.
time.sleep(5)
# assert only marathon is active
get_services(1)
# assert marathon and cassandra are listed with --inactive
services = get_services(None, ['--inactive'])
assert len(services) >= 2
# assert only marathon and chronos are active
get_services(2)
# assert marathon, chronos, and cassandra are listed with --inactive
services = get_services(args=['--inactive'])
assert len(services) >= 3
# shutdown the cassandra framework
for framework in get_services(args=['--inactive']):
for framework in services:
if framework['name'] == 'cassandra.dcos':
service_shutdown(framework['id'])
# assert marathon is only listed with --inactive
get_services(1, ['--inactive'])
# assert marathon, chronos are only listed with --inactive
get_services(2, ['--inactive'])
def test_log():
returncode, stdout, stderr = exec_command(
['dcos', 'service', 'log', 'chronos'])
assert returncode == 0
assert len(stdout.decode('utf-8').split('\n')) > 1
assert stderr == b''
def test_log_file():
returncode, stdout, stderr = exec_command(
['dcos', 'service', 'log', 'chronos', 'stderr'])
assert returncode == 0
assert len(stdout.decode('utf-8').split('\n')) > 1
assert stderr == b''
def test_log_marathon_file():
assert_command(['dcos', 'service', 'log', 'marathon', 'stderr'],
stderr=(b'The <file> argument is invalid for marathon. ' +
b'The systemd journal is always used for the ' +
b'marathon log.\n'),
returncode=1)
def test_log_marathon_config():
stdout, stderr = ssh_output(
'dcos service log marathon ' +
'--ssh-config-file=tests/data/node/ssh_config')
assert stdout == b''
assert stderr.startswith(b'ignoring bad proto spec')
def test_log_marathon():
stdout, stderr = ssh_output(
'dcos service log marathon ' +
'--ssh-config-file=tests/data/service/ssh_config')
assert len(stdout.decode('utf-8').split('\n')) > 10
assert ((stderr == b'') or
(len(stderr.split('\n')) == 2 and
stderr.startswith('Warning: Permanently added')))
def test_log_config():
assert_command(
['dcos', 'service', 'log', 'chronos', '--ssh-config-file=/path'],
stderr=(b'The `--ssh-config-file` argument is invalid for '
b'non-marathon services. SSH is not used.\n'),
returncode=1)
def test_log_follow():
proc = subprocess.Popen(['dcos', 'service', 'log', 'chronos', '--follow'],
preexec_fn=os.setsid,
stdout=subprocess.PIPE)
time.sleep(3)
proc.poll()
assert proc.returncode is None
os.killpg(os.getpgid(proc.pid), 15)
assert len(proc.stdout.read().decode('utf-8').split('\n')) > 3
def test_log_lines():
assert_lines(['dcos', 'service', 'log', 'chronos', '--lines=4'], 4)
def test_log_multiple_apps(zk_znode):
package_install('marathon', True)
package_install('marathon', True,
['--options=tests/data/service/marathon-user2.json',
'--app-id=marathon-user2'])
try:
stderr = (b'Multiple marathon apps found for service name ' +
b'[marathon-user]: [/marathon-user], [/marathon-user2]\n')
assert_command(['dcos', 'service', 'log', 'marathon-user'],
returncode=1,
stderr=stderr)
finally:
package_uninstall('marathon', ['--all'])
def test_log_no_apps():
assert_command(['dcos', 'service', 'log', 'bogus'],
stderr=b'No marathon apps found for service [bogus]\n',
returncode=1)
def _get_schema(service):

View File

@@ -645,6 +645,18 @@ class Client(object):
return response.json()
def get_leader(self):
""" Get the leading marathon instance.
:returns: string of the form <ip>:<port>
:rtype: str
"""
url = self._create_url('v2/leader')
response = http.get(url)
return response.json()['leader']
def _default_marathon_error(message=""):
"""

View File

@@ -12,18 +12,19 @@ logger = util.get_logger(__name__)
MESOS_TIMEOUT = 5
def get_master():
def get_master(dcos_client=None):
"""Create a Master object using the url stored in the
'core.mesos_master_url' property if it exists. Otherwise, we use
the `core.dcos_url` property
:param config: user config
:type config: Toml
:param dcos_client: DCOSClient
:type dcos_client: DCOSClient | None
:returns: master state object
:rtype: Master
"""
return Master(DCOSClient().get_master_state())
dcos_client = dcos_client or DCOSClient()
return Master(dcos_client.get_master_state())
class DCOSClient(object):
@@ -481,7 +482,7 @@ class Slave(object):
:rtype: [dict]
"""
return _merge(self._state, ['frameworks', 'completed_frameworks'])
return _merge(self.state(), ['frameworks', 'completed_frameworks'])
def executor_dicts(self):
"""Returns the executor dictionaries from the state.json
@@ -664,12 +665,12 @@ class MesosFile(object):
:type task: Task | None
:param slave: slave where the file lives
:type slave: Slave | None
:param mesos_client: client to use for network requests
:type mesos_client: DCOSClient | None
:param dcos_client: client to use for network requests
:type dcos_client: DCOSClient | None
"""
def __init__(self, path, task=None, slave=None, mesos_client=None):
def __init__(self, path, task=None, slave=None, dcos_client=None):
if task and slave:
raise ValueError(
"You cannot provide both `task` and `slave` " +
@@ -684,7 +685,7 @@ class MesosFile(object):
self._task = task
self._path = path
self._mesos_client = mesos_client or DCOSClient()
self._dcos_client = dcos_client or DCOSClient()
self._cursor = 0
def size(self):
@@ -817,10 +818,10 @@ class MesosFile(object):
"""
if self._slave:
return self._mesos_client.slave_file_read(self._slave['id'],
**params)
return self._dcos_client.slave_file_read(self._slave['id'],
**params)
else:
return self._mesos_client.master_file_read(**params)
return self._dcos_client.master_file_read(**params)
def __str__(self):
"""String representation of the file: <task_id:file_path>

View File

@@ -1437,3 +1437,18 @@ class IndexEntries():
"""
return {'source': self.source.url, 'packages': self.packages}
def get_apps_for_framework(framework_name, client):
""" Return all apps running the given framework.
:param framework_name: framework name
:type framework_name: str
:param client: marathon client
:type client: marathon.Client
:rtype: [dict]
"""
return [app for app in client.get_apps()
if app.get('labels', {}).get(
PACKAGE_FRAMEWORK_NAME_KEY) == framework_name]

View File

@@ -583,9 +583,6 @@ def stream(fn, objs):
yield job, jobs[job]
logger = get_logger(__name__)
def get_proxy_dict_from_env():
""" Returns dict with proxy parameters
@@ -599,3 +596,25 @@ def get_proxy_dict_from_env():
if value and (name == 'http_proxy' or name == 'https_proxy'):
proxies[name] = value
return proxies
def get_ssh_options(config_file, options):
"""Returns the SSH arguments for the given parameters. Used by
commands that wrap SSH.
:param config_file: SSH config file.
:type config_file: str | None
:param options: SSH options
:type options: [str]
:rtype: str
"""
ssh_options = ' '.join('-o {}'.format(opt) for opt in options)
if config_file:
ssh_options += ' -F {}'.format(config_file)
return ssh_options
logger = get_logger(__name__)