add api for log collecting

- add new support_get_logs() api
- change dump api to require directory for dump to
match new get_logs api
- remove no longer needed pexpect
- update utests
- change log collector tool to use the new api

Jira-Issue: OSTACKDEV-18
This commit is contained in:
Steve Noyes 2016-04-05 15:18:14 -04:00
parent c60b33af43
commit 9ef934e240
10 changed files with 285 additions and 197 deletions

View File

@ -38,7 +38,6 @@ BuildRequires: python-pbr >= 1.3.0
Requires: openstack-kolla-ansible >= 0.2.0
Requires: openstack-kolla-ansible < 0.3.0
Requires: babel >= 2.0
Requires: pexpect >= 2.3
Requires: python-babel >= 2.0
Requires: python-cliff >= 1.13.0
Requires: python-cliff-tablib >= 1.1
@ -180,6 +179,9 @@ esac
%changelog
* Tue Apr 05 2016 - Steve Noyes <steve.noyes@oracle.com>
- remove obsolete pexpect requirement
* Tue Feb 23 2016 - Borne Mace <borne.mace@oracle.com>
- added clean up of old egg-info directories during update

View File

@ -11,20 +11,63 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import kollacli.i18n as u
import os
from kollacli.api.exceptions import InvalidArgument
from kollacli.common.support import dump
from kollacli.common.support import get_logs
from kollacli.common.utils import check_arg
from kollacli.common.utils import safe_decode
class SupportApi(object):
def support_dump(self):
def support_dump(self, dirpath):
"""Dumps configuration data for debugging.
Dumps most files in /etc/kolla and /usr/share/kolla into a
tar file so be given to support / development to help with
debugging problems.
:param dirpath: path to directory where dump will be placed
:type dirpath: string
:return: path to dump file
:rtype: string
"""
dumpfile_path = dump()
check_arg(dirpath, u._('Directory path'), str)
dirpath = safe_decode(dirpath)
if not os.path.exists(dirpath):
raise InvalidArgument(u._('Directory path: {path} does not exist')
.format(path=dirpath))
dumpfile_path = dump(dirpath)
return dumpfile_path
def support_get_logs(self, servicenames, hostname, dirpath):
"""get container logs
Fetch the container log files of services from the specified hosts.
The log files will be placed in the named directory. All the containers
for the host will be placed in a directory named hostname. The file
names for each log will be servicename_id.log.
:param servicenames: names of services (ie nova, glance, etc)
:type servicenames: list of strings
:param hostname: name of host to look for logs on
:type hostname: string
:param dirpath: path of directory where log files will be written
:type dirpath: string
"""
check_arg(dirpath, u._('Directory path'), str)
dirpath = safe_decode(dirpath)
if not os.path.exists(dirpath):
raise InvalidArgument(u._('Directory path: {path} does not exist')
.format(path=dirpath))
check_arg(servicenames, u._('Service names'), list)
servicenames = safe_decode(servicenames)
check_arg(hostname, u._('Host names'), str)
hostname = safe_decode(hostname)
get_logs(servicenames, hostname, dirpath)

View File

@ -15,6 +15,7 @@ from cliff.command import Command
from kollacli.api.client import ClientApi
import kollacli.i18n as u
import logging
import tempfile
import traceback
LOG = logging.getLogger(__name__)
@ -30,7 +31,7 @@ class Dump(Command):
"""
def take_action(self, parsed_args):
try:
dump_path = CLIENT.support_dump()
dump_path = CLIENT.support_dump(tempfile.gettempdir())
LOG.info(u._('Dump successful to {path}').format(path=dump_path))
except Exception:
msg = (u._('Dump failed: {reason}')

View File

@ -542,24 +542,8 @@ class Inventory(object):
return summary
def ssh_check_host(self, hostname):
err_msg = None
command_string = '/usr/bin/sudo -u %s %s -vvv ' % \
(get_admin_user(), get_ansible_command())
gen_file_path = self.create_json_gen_file()
err_msg, output = self.run_ansible_command('-m ping', hostname)
is_ok = True
try:
inventory_string = '-i ' + gen_file_path
ping_string = ' %s %s' % (hostname, '-m ping')
cmd = (command_string + inventory_string + ping_string)
err_msg, output = run_cmd(cmd, False)
except Exception as e:
is_ok = False
msg = (
u._('Host: ({host}) setup exception. : {error}')
.format(host=hostname, error=str(e)))
finally:
self.remove_json_gen_file(gen_file_path)
if err_msg:
is_ok = False
msg = (
@ -570,6 +554,21 @@ class Inventory(object):
.format(host=hostname))
return is_ok, msg
def run_ansible_command(self, ansible_command, hostname):
err_msg = None
command_string = '/usr/bin/sudo -u %s %s -vvv' % \
(get_admin_user(), get_ansible_command())
gen_file_path = self.create_json_gen_file()
cmd = '%s %s -i %s %s' % (command_string, hostname, gen_file_path,
ansible_command)
try:
err_msg, output = run_cmd(cmd, False)
except Exception as e:
err_msg = str(e)
finally:
self.remove_json_gen_file(gen_file_path)
return err_msg, output
def add_group(self, groupname):
# Group names cannot overlap with service names:

View File

@ -11,14 +11,14 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import os
import tarfile
import tempfile
from kollacli.api.exceptions import FailedOperation
from kollacli.common.inventory import Inventory
from kollacli.common.properties import AnsibleProperties
from kollacli.common.utils import get_kolla_etc
from kollacli.common.utils import get_kolla_home
from kollacli.common.utils import get_kolla_log_dir
@ -28,7 +28,132 @@ from kollacli.common.utils import run_cmd
LOG = logging.getLogger(__name__)
def dump():
class HostLogs(object):
def __init__(self, hostname, inventory, servicenames):
self.hostname = hostname
self.inventory = inventory
self.servicenames = servicenames
self.container_info = {} # container_id: container_img_name
self.filtered_servicenames = {}
def load_container_info(self):
"""get the list of containers on the host"""
hostname = self.hostname
err_msg, output = \
self.inventory.run_ansible_command('-a "docker ps -a"', hostname)
if err_msg:
msg = 'Error accessing host %s : %s ' % (hostname, err_msg)
raise FailedOperation(msg)
if not output:
msg = ('Host %s is not accessible.' % hostname)
raise FailedOperation(msg)
else:
if '>>' not in output:
msg = ('Host: %s. Invalid ansible return data: [%s].'
% (hostname, output))
raise FailedOperation(msg)
if 'NAMES' not in output:
msg = ('Host: %s. Invalid docker ps return data: [%s].'
% (hostname, output))
raise FailedOperation(msg)
ansible_properties = AnsibleProperties()
base_distro = \
ansible_properties.get_property('kolla_base_distro')
install_type = \
ansible_properties.get_property('kolla_install_type')
# typically this prefix will be "ol-openstack-"
container_prefix = base_distro + '-' + install_type + '-'
# process ps output
containers = {}
# the ps output is after the '>>'
output = output.split('>>', 1)[1]
LOG.info('docker ps -a on host: %s:\n%s' % (hostname, output))
lines = output.split('\n')
for line in lines:
tokens = line.split()
if len(tokens) < 2:
continue
cid = tokens[0]
image = tokens[1]
if container_prefix not in image:
# skip non-kolla containers
continue
name = image.split(container_prefix)[1]
name = name.split(':')[0]
containers[cid] = name
self.container_info = containers
def get_log(self, container_id):
"""read the container log"""
hostname = self.hostname
cmd = '-a "docker logs %s"' % container_id
err_msg, output = self.inventory.run_ansible_command(cmd, hostname)
if err_msg:
msg = 'Error accessing host %s : %s ' % (hostname, err_msg)
raise FailedOperation(msg)
if not output:
msg = ('Host %s is not accessible.' % hostname)
raise FailedOperation(msg)
if '>>' not in output:
msg = ('Host: %s. Invalid ansible return data: [%s].'
% (hostname, output))
raise FailedOperation(msg)
# the log info is after the '>>'
output = output.split('>>', 1)[1]
return output
def write_logs(self, dirname):
"""write out the log files for all containers"""
for container_id, container_name in self.filtered_services.items():
logdata = self.get_log(container_id)
if logdata:
logname = '%s_%s.log' % (container_name, container_id)
self.write_logfile(dirname, logname, logdata)
else:
LOG.warn('No log data found for service %s on host %s'
% (container_name, self.hostname))
def write_logfile(self, dirpath, logname, logdata):
"""write out one log file"""
hostdir = os.path.join(dirpath, self.hostname)
if not os.path.exists(hostdir):
os.mkdir(hostdir)
fpath = os.path.join(hostdir, logname)
with open(fpath, 'w') as logfile:
logfile.write(logdata)
def filter_services(self):
"""filter services to only those of interest"""
services_subset = {}
for host_svcid, host_svcname in self.container_info.items():
for servicename in self.servicenames:
if (host_svcname == servicename or
host_svcname.startswith(servicename + '-')):
services_subset[host_svcid] = host_svcname
self.filtered_services = services_subset
def get_logs(servicenames, hostname, dirname):
inventory = Inventory.load()
inventory.validate_hostnames([hostname])
inventory.validate_servicenames(servicenames)
logs = HostLogs(hostname, inventory, servicenames)
logs.load_container_info()
logs.filter_services()
logs.write_logs(dirname)
def dump(dirpath):
"""Dumps configuration data for debugging
Dumps most files in /etc/kolla and /usr/share/kolla into a
@ -44,7 +169,7 @@ def dump():
kollacli_etc = get_kollacli_etc().rstrip('/')
ketc = 'kolla/etc/'
kshare = 'kolla/share/'
fd, dump_path = tempfile.mkstemp(prefix='kollacli_dump_',
fd, dump_path = tempfile.mkstemp(dir=dirpath, prefix='kollacli_dump_',
suffix='.tgz')
os.close(fd) # avoid fd leak
with tarfile.open(dump_path, 'w:gz') as tar:

View File

@ -14,9 +14,9 @@
import grp
import logging
import os
import pexpect
import pwd
import six
import subprocess # nosec
import sys
import time
@ -146,42 +146,25 @@ def run_cmd(cmd, print_output=True):
- err_msg: empty string=command succeeded
not None=command failed
- output: string: all the output of the run command
If the command is an ansible playbook command, record the
output in an ansible log file.
"""
pwd_prompt = '[sudo] password'
err_msg = ''
output = ''
child = None
err = None
output = None
try:
child = pexpect.spawn(cmd)
sniff = child.read(len(pwd_prompt))
sniff = safe_decode(sniff)
if sniff == pwd_prompt:
output = sniff + '\n'
raise Exception(
u._('Insufficient permissions to run command "{command}".')
.format(command=cmd))
child.maxsize = 1
child.timeout = 86400
for line in child:
line = safe_decode(line)
outline = sniff + line.rstrip()
sniff = ''
output = ''.join([output, outline, '\n'])
if print_output:
LOG.info(outline)
process = subprocess.Popen(cmd, shell=True, # nosec
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, err = process.communicate()
except Exception as e:
err_msg = '%s' % e
finally:
if child:
child.close()
if child.exitstatus != 0:
err_msg = (u._('Command failed. : {error}')
.format(error=err_msg))
return err_msg, output
err = str(e)
err = safe_decode(err)
output = safe_decode(output)
if process.returncode != 0:
err = (u._('Command failed. : {error}')
.format(error=err))
if print_output:
LOG.info(output)
return err, output
def change_property(file_path, property_key, property_value, clear=False):

View File

@ -7,6 +7,5 @@ jsonpickle>=0.9
oslo.i18n>=1.3.0 # Apache-2.0
paramiko>=1.15
pbr>=0.10
pexpect>=2.3,!=3.3
PyYAML>=3.10
six>=1.9.0

View File

@ -164,6 +164,7 @@ class TestFunctional(KollaCliTest):
err_msg = job.get_error_message()
self.log.info('job is complete. status: %s, err: %s'
% (status, err_msg))
self.log.info('ansible output:\n%s' % job.get_console_output())
if is_physical_host:
self.assertEqual(0, status, 'Job %s failed: %s' % (descr, err_msg))
else:

View File

@ -17,26 +17,46 @@ import tarfile
import unittest
from common import KollaCliTest
from common import TestConfig
from kollacli.api.client import ClientApi
from kollacli.common.utils import get_kollacli_home
LOGS_PREFIX = '/tmp/kolla_support_logs_'
CLIENT = ClientApi()
class TestFunctional(KollaCliTest):
def test_log_collector(self):
test_config = TestConfig()
test_config.load()
is_physical_hosts = True
hostnames = test_config.get_hostnames()
if not hostnames:
is_physical_hosts = False
hostnames = ['test_host1']
CLIENT.host_add(hostnames)
zip_path = ''
host1 = 'host_test1'
try:
path = os.path.join(get_kollacli_home(),
'tools', 'log_collector.py')
retval, msg = self.run_command('%s %s' % (path, host1))
self.assertEqual(0, retval,
'log_collector command failed: %s' % msg)
self.assertIn(LOGS_PREFIX, msg)
zip_path = '/tmp' + msg.split('/tmp')[1].strip()
self.assertTrue(os.path.exists(zip_path),
'Zip file %s does not exist' % zip_path)
# run the log_collector tool
retval, msg = self.run_command('%s %s' % (path, 'all'))
if is_physical_hosts:
self.assertEqual(0, retval,
'log_collector command failed: %s' % msg)
self.assertNotIn('ERROR', msg)
self.assertIn(LOGS_PREFIX, msg)
zip_path = '/tmp' + msg.split('/tmp')[1].strip()
self.assertTrue(os.path.exists(zip_path),
'Zip file %s does not exist' % zip_path)
else:
# no host, this should fail
self.assertIn('error', msg.lower())
except Exception as e:
raise e
finally:

View File

@ -14,138 +14,47 @@
# under the License.
import os
import subprocess
import shutil
import sys
import tarfile
import tempfile
import traceback
from kollacli.api.client import ClientApi
from kollacli.common.inventory import Inventory
from kollacli.common.inventory import remove_temp_inventory
from kollacli.common import properties
from kollacli.common.utils import get_admin_user
from kollacli.common.utils import get_ansible_command
from kollacli.common.utils import safe_decode
tar_file_descr = None
CLIENT = ClientApi()
LOGDIR = '/tmp/container_logs'
def run_ansible_cmd(cmd, host):
# sudo -u kolla ansible ol7-c4 -i inv_path -a "cmd"
inv_path = None
out = None
def get_logs_from_host(host):
try:
user = get_admin_user()
inv = Inventory.load()
inv_path = inv.create_json_gen_file()
maj_services = []
services = CLIENT.service_get_all()
for service in services:
if not service.get_parent():
# top level service
maj_services.append(service.name)
ansible_verb = get_ansible_command()
ansible_cmd = ('/usr/bin/sudo -u %s %s %s -i %s -a "%s"'
% (user, ansible_verb, host, inv_path, cmd))
try:
(out, err) = subprocess.Popen(ansible_cmd, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE).communicate()
except Exception as e:
print('%s\nCannot communicate with host: %s, skipping' % (e, host))
if not out:
print('Host %s is not accessible: %s, skipping' % (host, err))
else:
out = safe_decode(out)
if '>>' not in out:
print('Ansible command: %s' % ansible_cmd)
print('Host: %s. \nInvalid ansible return data: [%s]. skipping'
% (host, out))
out = None
finally:
remove_temp_inventory(inv_path)
return out
print('Adding container logs from host: %s' % host)
CLIENT.support_get_logs(maj_services, host, LOGDIR)
except Exception as e:
print('Error getting logs on host: %s: %s' % (host, str(e)))
def add_logdata_to_tar(logdata, host, cname, cid):
print('Adding container log %s:%s(%s)' % (host, cname, cid))
archive_name = '/%s/%s_%s.log' % (host, cname, cid)
tmp_path = None
def dump_kolla_info():
print('Getting kolla client logs')
dump_path = None
try:
fd, tmp_path = tempfile.mkstemp()
os.close(fd) # avoid fd leak
with open(tmp_path, 'w') as tmpfile:
tmpfile.write(logdata)
tar_file_descr.add(tmp_path, arcname=archive_name)
dump_path = CLIENT.support_dump('/tmp')
tar_file_descr.add(dump_path)
except Exception:
print('ERROR adding %s\n%s' % (archive_name, traceback.format_exc()))
print('ERROR: running dump command %s' % traceback.format_exc())
finally:
if tmp_path and os.path.exists(tmp_path):
os.remove(tmp_path)
def get_containers(host):
"""return dict {id:name}"""
cmd = 'docker ps -a'
out = run_ansible_cmd(cmd, host)
if not out:
return None
out = safe_decode(out)
if 'NAMES' not in out:
print('Host: %s. \nInvalid docker ps return data: [%s]. skipping'
% (host, out))
return None
ansible_properties = properties.AnsibleProperties()
base_distro = \
ansible_properties.get_property('kolla_base_distro')
install_type = \
ansible_properties.get_property('kolla_install_type')
# typically this prefix will be "ol-openstack-"
container_prefix = base_distro + '-' + install_type + '-'
# add ps output to tar
add_logdata_to_tar(out, host, 'docker', 'ps')
# process ps output
containers = {}
valid_found = False
lines = out.split('\n')
for line in lines:
if container_prefix not in line:
# skip non-kolla containers
continue
valid_found = True
tokens = line.split()
cid = tokens[0]
image = tokens[1]
name = image.split(container_prefix)[1]
name = name.split(':')[0]
containers[cid] = name
if not valid_found:
print('no containers with %s in image name found on %s'
% (container_prefix, host))
return containers
def add_container_log(cid, cname, host):
cmd = 'docker logs %s' % cid
out = run_ansible_cmd(cmd, host)
if out:
out = safe_decode(out)
out = out.split('>>', 1)[1]
header = ('Host: %s, Container: %s, id: %s\n'
% (host, cname, cid))
out = header + out
add_logdata_to_tar(out, host, cname, cid)
def add_logs_from_host(host):
containers = get_containers(host)
if containers:
for (cid, cname) in containers.items():
add_container_log(cid, cname, host)
if dump_path and os.path.exists(dump_path):
os.remove(dump_path)
def main():
@ -163,8 +72,10 @@ def main():
sys.exit(0)
elif 'all' == sys.argv[1]:
# get logs from all hosts
inventory = Inventory.load()
hosts = inventory.get_hostnames()
hosts = []
host_objs = CLIENT.host_get_all()
for host_obj in host_objs:
hosts.append(host_obj.name)
else:
# get logs from specified hosts
hostnames = sys.argv[1].split(',')
@ -181,22 +92,26 @@ def main():
os.close(fd) # avoid fd leak
with tarfile.open(tar_path, 'w:gz') as tar_file_descr:
# gather dump output from kollacli
print('Getting kollacli logs')
dump_path = None
try:
dump_path = CLIENT.support_dump()
tar_file_descr.add(dump_path)
except Exception:
print('ERROR: running dump command %s' % traceback.format_exc())
finally:
if dump_path and os.path.exists(dump_path):
os.remove(dump_path)
# clear out old logs
if os.path.exists(LOGDIR):
shutil.rmtree(LOGDIR)
os.mkdir(LOGDIR)
# gather logs from selected hosts
for host in hosts:
print('Getting docker logs from host: %s' % host)
add_logs_from_host(host)
try:
for host in hosts:
get_logs_from_host(host)
# tar up all the container logs
tar_file_descr.add(LOGDIR, arcname='container_logs')
finally:
# remove uncompressed logs
if os.path.exists(LOGDIR):
shutil.rmtree(LOGDIR)
# gather dump output from kollacli
dump_kolla_info()
print('Log collection complete. Logs are at %s' % tar_path)
if __name__ == '__main__':