From 9ef934e2407304553022aa21ed65156c7bcd50f7 Mon Sep 17 00:00:00 2001 From: Steve Noyes Date: Tue, 5 Apr 2016 15:18:14 -0400 Subject: [PATCH] add api for log collecting - add new support_get_logs() api - change dump api to require directory for dump to match new get_logs api - remove no longer needed pexpect - update utests - change log collector tool to use the new api Jira-Issue: OSTACKDEV-18 --- buildrpm/openstack-kollacli.spec | 4 +- kollacli/api/support.py | 47 ++++++++- kollacli/commands/support.py | 3 +- kollacli/common/inventory.py | 33 +++--- kollacli/common/support.py | 133 +++++++++++++++++++++++- kollacli/common/utils.py | 51 +++------ requirements.txt | 1 - tests/destroy.py | 1 + tests/support.py | 36 +++++-- tools/log_collector.py | 173 ++++++++----------------------- 10 files changed, 285 insertions(+), 197 deletions(-) diff --git a/buildrpm/openstack-kollacli.spec b/buildrpm/openstack-kollacli.spec index 6d75886..e48e217 100644 --- a/buildrpm/openstack-kollacli.spec +++ b/buildrpm/openstack-kollacli.spec @@ -38,7 +38,6 @@ BuildRequires: python-pbr >= 1.3.0 Requires: openstack-kolla-ansible >= 0.2.0 Requires: openstack-kolla-ansible < 0.3.0 Requires: babel >= 2.0 -Requires: pexpect >= 2.3 Requires: python-babel >= 2.0 Requires: python-cliff >= 1.13.0 Requires: python-cliff-tablib >= 1.1 @@ -180,6 +179,9 @@ esac %changelog +* Tue Apr 05 2016 - Steve Noyes +- remove obsolete pexpect requirement + * Tue Feb 23 2016 - Borne Mace - added clean up of old egg-info directories during update diff --git a/kollacli/api/support.py b/kollacli/api/support.py index c64d2d7..7faa6ec 100644 --- a/kollacli/api/support.py +++ b/kollacli/api/support.py @@ -11,20 +11,63 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +import kollacli.i18n as u + +import os + +from kollacli.api.exceptions import InvalidArgument from kollacli.common.support import dump +from kollacli.common.support import get_logs +from kollacli.common.utils import check_arg +from kollacli.common.utils import safe_decode class SupportApi(object): - def support_dump(self): + def support_dump(self, dirpath): """Dumps configuration data for debugging. Dumps most files in /etc/kolla and /usr/share/kolla into a tar file so be given to support / development to help with debugging problems. + :param dirpath: path to directory where dump will be placed + :type dirpath: string :return: path to dump file :rtype: string """ - dumpfile_path = dump() + check_arg(dirpath, u._('Directory path'), str) + dirpath = safe_decode(dirpath) + if not os.path.exists(dirpath): + raise InvalidArgument(u._('Directory path: {path} does not exist') + .format(path=dirpath)) + dumpfile_path = dump(dirpath) return dumpfile_path + + def support_get_logs(self, servicenames, hostname, dirpath): + """get container logs + + Fetch the container log files of services from the specified hosts. + The log files will be placed in the named directory. All the containers + for the host will be placed in a directory named hostname. The file + names for each log will be servicename_id.log. + + :param servicenames: names of services (ie nova, glance, etc) + :type servicenames: list of strings + :param hostname: name of host to look for logs on + :type hostname: string + :param dirpath: path of directory where log files will be written + :type dirpath: string + """ + check_arg(dirpath, u._('Directory path'), str) + dirpath = safe_decode(dirpath) + if not os.path.exists(dirpath): + raise InvalidArgument(u._('Directory path: {path} does not exist') + .format(path=dirpath)) + + check_arg(servicenames, u._('Service names'), list) + servicenames = safe_decode(servicenames) + check_arg(hostname, u._('Host names'), str) + hostname = safe_decode(hostname) + + get_logs(servicenames, hostname, dirpath) diff --git a/kollacli/commands/support.py b/kollacli/commands/support.py index 2dd9229..f7fca4c 100644 --- a/kollacli/commands/support.py +++ b/kollacli/commands/support.py @@ -15,6 +15,7 @@ from cliff.command import Command from kollacli.api.client import ClientApi import kollacli.i18n as u import logging +import tempfile import traceback LOG = logging.getLogger(__name__) @@ -30,7 +31,7 @@ class Dump(Command): """ def take_action(self, parsed_args): try: - dump_path = CLIENT.support_dump() + dump_path = CLIENT.support_dump(tempfile.gettempdir()) LOG.info(u._('Dump successful to {path}').format(path=dump_path)) except Exception: msg = (u._('Dump failed: {reason}') diff --git a/kollacli/common/inventory.py b/kollacli/common/inventory.py index d26f99d..476b291 100644 --- a/kollacli/common/inventory.py +++ b/kollacli/common/inventory.py @@ -542,24 +542,8 @@ class Inventory(object): return summary def ssh_check_host(self, hostname): - err_msg = None - command_string = '/usr/bin/sudo -u %s %s -vvv ' % \ - (get_admin_user(), get_ansible_command()) - gen_file_path = self.create_json_gen_file() + err_msg, output = self.run_ansible_command('-m ping', hostname) is_ok = True - try: - inventory_string = '-i ' + gen_file_path - ping_string = ' %s %s' % (hostname, '-m ping') - cmd = (command_string + inventory_string + ping_string) - err_msg, output = run_cmd(cmd, False) - except Exception as e: - is_ok = False - msg = ( - u._('Host: ({host}) setup exception. : {error}') - .format(host=hostname, error=str(e))) - finally: - self.remove_json_gen_file(gen_file_path) - if err_msg: is_ok = False msg = ( @@ -570,6 +554,21 @@ class Inventory(object): .format(host=hostname)) return is_ok, msg + def run_ansible_command(self, ansible_command, hostname): + err_msg = None + command_string = '/usr/bin/sudo -u %s %s -vvv' % \ + (get_admin_user(), get_ansible_command()) + gen_file_path = self.create_json_gen_file() + cmd = '%s %s -i %s %s' % (command_string, hostname, gen_file_path, + ansible_command) + try: + err_msg, output = run_cmd(cmd, False) + except Exception as e: + err_msg = str(e) + finally: + self.remove_json_gen_file(gen_file_path) + return err_msg, output + def add_group(self, groupname): # Group names cannot overlap with service names: diff --git a/kollacli/common/support.py b/kollacli/common/support.py index 54dedf7..110ffaa 100644 --- a/kollacli/common/support.py +++ b/kollacli/common/support.py @@ -11,14 +11,14 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. - - import logging import os import tarfile import tempfile +from kollacli.api.exceptions import FailedOperation from kollacli.common.inventory import Inventory +from kollacli.common.properties import AnsibleProperties from kollacli.common.utils import get_kolla_etc from kollacli.common.utils import get_kolla_home from kollacli.common.utils import get_kolla_log_dir @@ -28,7 +28,132 @@ from kollacli.common.utils import run_cmd LOG = logging.getLogger(__name__) -def dump(): +class HostLogs(object): + + def __init__(self, hostname, inventory, servicenames): + self.hostname = hostname + self.inventory = inventory + self.servicenames = servicenames + self.container_info = {} # container_id: container_img_name + self.filtered_servicenames = {} + + def load_container_info(self): + """get the list of containers on the host""" + hostname = self.hostname + err_msg, output = \ + self.inventory.run_ansible_command('-a "docker ps -a"', hostname) + if err_msg: + msg = 'Error accessing host %s : %s ' % (hostname, err_msg) + raise FailedOperation(msg) + + if not output: + msg = ('Host %s is not accessible.' % hostname) + raise FailedOperation(msg) + else: + if '>>' not in output: + msg = ('Host: %s. Invalid ansible return data: [%s].' + % (hostname, output)) + raise FailedOperation(msg) + + if 'NAMES' not in output: + msg = ('Host: %s. Invalid docker ps return data: [%s].' + % (hostname, output)) + raise FailedOperation(msg) + + ansible_properties = AnsibleProperties() + base_distro = \ + ansible_properties.get_property('kolla_base_distro') + install_type = \ + ansible_properties.get_property('kolla_install_type') + # typically this prefix will be "ol-openstack-" + container_prefix = base_distro + '-' + install_type + '-' + + # process ps output + containers = {} + + # the ps output is after the '>>' + output = output.split('>>', 1)[1] + LOG.info('docker ps -a on host: %s:\n%s' % (hostname, output)) + + lines = output.split('\n') + for line in lines: + tokens = line.split() + if len(tokens) < 2: + continue + cid = tokens[0] + image = tokens[1] + if container_prefix not in image: + # skip non-kolla containers + continue + name = image.split(container_prefix)[1] + name = name.split(':')[0] + containers[cid] = name + self.container_info = containers + + def get_log(self, container_id): + """read the container log""" + hostname = self.hostname + cmd = '-a "docker logs %s"' % container_id + err_msg, output = self.inventory.run_ansible_command(cmd, hostname) + if err_msg: + msg = 'Error accessing host %s : %s ' % (hostname, err_msg) + raise FailedOperation(msg) + + if not output: + msg = ('Host %s is not accessible.' % hostname) + raise FailedOperation(msg) + if '>>' not in output: + msg = ('Host: %s. Invalid ansible return data: [%s].' + % (hostname, output)) + raise FailedOperation(msg) + + # the log info is after the '>>' + output = output.split('>>', 1)[1] + return output + + def write_logs(self, dirname): + """write out the log files for all containers""" + for container_id, container_name in self.filtered_services.items(): + logdata = self.get_log(container_id) + if logdata: + logname = '%s_%s.log' % (container_name, container_id) + self.write_logfile(dirname, logname, logdata) + else: + LOG.warn('No log data found for service %s on host %s' + % (container_name, self.hostname)) + + def write_logfile(self, dirpath, logname, logdata): + """write out one log file""" + hostdir = os.path.join(dirpath, self.hostname) + if not os.path.exists(hostdir): + os.mkdir(hostdir) + fpath = os.path.join(hostdir, logname) + with open(fpath, 'w') as logfile: + logfile.write(logdata) + + def filter_services(self): + """filter services to only those of interest""" + services_subset = {} + for host_svcid, host_svcname in self.container_info.items(): + for servicename in self.servicenames: + if (host_svcname == servicename or + host_svcname.startswith(servicename + '-')): + services_subset[host_svcid] = host_svcname + self.filtered_services = services_subset + + +def get_logs(servicenames, hostname, dirname): + inventory = Inventory.load() + inventory.validate_hostnames([hostname]) + inventory.validate_servicenames(servicenames) + + logs = HostLogs(hostname, inventory, servicenames) + logs.load_container_info() + logs.filter_services() + logs.write_logs(dirname) + + +def dump(dirpath): """Dumps configuration data for debugging Dumps most files in /etc/kolla and /usr/share/kolla into a @@ -44,7 +169,7 @@ def dump(): kollacli_etc = get_kollacli_etc().rstrip('/') ketc = 'kolla/etc/' kshare = 'kolla/share/' - fd, dump_path = tempfile.mkstemp(prefix='kollacli_dump_', + fd, dump_path = tempfile.mkstemp(dir=dirpath, prefix='kollacli_dump_', suffix='.tgz') os.close(fd) # avoid fd leak with tarfile.open(dump_path, 'w:gz') as tar: diff --git a/kollacli/common/utils.py b/kollacli/common/utils.py index 8def3db..3fa55c2 100644 --- a/kollacli/common/utils.py +++ b/kollacli/common/utils.py @@ -14,9 +14,9 @@ import grp import logging import os -import pexpect import pwd import six +import subprocess # nosec import sys import time @@ -146,42 +146,25 @@ def run_cmd(cmd, print_output=True): - err_msg: empty string=command succeeded not None=command failed - output: string: all the output of the run command - - If the command is an ansible playbook command, record the - output in an ansible log file. """ - pwd_prompt = '[sudo] password' - err_msg = '' - output = '' - child = None + err = None + output = None try: - child = pexpect.spawn(cmd) - sniff = child.read(len(pwd_prompt)) - sniff = safe_decode(sniff) - if sniff == pwd_prompt: - output = sniff + '\n' - raise Exception( - u._('Insufficient permissions to run command "{command}".') - .format(command=cmd)) - child.maxsize = 1 - child.timeout = 86400 - for line in child: - line = safe_decode(line) - outline = sniff + line.rstrip() - sniff = '' - output = ''.join([output, outline, '\n']) - if print_output: - LOG.info(outline) - + process = subprocess.Popen(cmd, shell=True, # nosec + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + output, err = process.communicate() except Exception as e: - err_msg = '%s' % e - finally: - if child: - child.close() - if child.exitstatus != 0: - err_msg = (u._('Command failed. : {error}') - .format(error=err_msg)) - return err_msg, output + err = str(e) + + err = safe_decode(err) + output = safe_decode(output) + if process.returncode != 0: + err = (u._('Command failed. : {error}') + .format(error=err)) + if print_output: + LOG.info(output) + return err, output def change_property(file_path, property_key, property_value, clear=False): diff --git a/requirements.txt b/requirements.txt index a6288cb..9a183cb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,5 @@ jsonpickle>=0.9 oslo.i18n>=1.3.0 # Apache-2.0 paramiko>=1.15 pbr>=0.10 -pexpect>=2.3,!=3.3 PyYAML>=3.10 six>=1.9.0 diff --git a/tests/destroy.py b/tests/destroy.py index 181e413..ce76daf 100644 --- a/tests/destroy.py +++ b/tests/destroy.py @@ -164,6 +164,7 @@ class TestFunctional(KollaCliTest): err_msg = job.get_error_message() self.log.info('job is complete. status: %s, err: %s' % (status, err_msg)) + self.log.info('ansible output:\n%s' % job.get_console_output()) if is_physical_host: self.assertEqual(0, status, 'Job %s failed: %s' % (descr, err_msg)) else: diff --git a/tests/support.py b/tests/support.py index cb27982..f265829 100644 --- a/tests/support.py +++ b/tests/support.py @@ -17,26 +17,46 @@ import tarfile import unittest from common import KollaCliTest +from common import TestConfig +from kollacli.api.client import ClientApi from kollacli.common.utils import get_kollacli_home LOGS_PREFIX = '/tmp/kolla_support_logs_' +CLIENT = ClientApi() class TestFunctional(KollaCliTest): def test_log_collector(self): + test_config = TestConfig() + test_config.load() + + is_physical_hosts = True + hostnames = test_config.get_hostnames() + if not hostnames: + is_physical_hosts = False + hostnames = ['test_host1'] + CLIENT.host_add(hostnames) + zip_path = '' - host1 = 'host_test1' try: path = os.path.join(get_kollacli_home(), 'tools', 'log_collector.py') - retval, msg = self.run_command('%s %s' % (path, host1)) - self.assertEqual(0, retval, - 'log_collector command failed: %s' % msg) - self.assertIn(LOGS_PREFIX, msg) - zip_path = '/tmp' + msg.split('/tmp')[1].strip() - self.assertTrue(os.path.exists(zip_path), - 'Zip file %s does not exist' % zip_path) + + # run the log_collector tool + retval, msg = self.run_command('%s %s' % (path, 'all')) + + if is_physical_hosts: + self.assertEqual(0, retval, + 'log_collector command failed: %s' % msg) + self.assertNotIn('ERROR', msg) + self.assertIn(LOGS_PREFIX, msg) + zip_path = '/tmp' + msg.split('/tmp')[1].strip() + self.assertTrue(os.path.exists(zip_path), + 'Zip file %s does not exist' % zip_path) + else: + # no host, this should fail + self.assertIn('error', msg.lower()) except Exception as e: raise e finally: diff --git a/tools/log_collector.py b/tools/log_collector.py index 291ca9b..34d9cc0 100755 --- a/tools/log_collector.py +++ b/tools/log_collector.py @@ -14,138 +14,47 @@ # under the License. import os -import subprocess +import shutil import sys import tarfile import tempfile import traceback from kollacli.api.client import ClientApi -from kollacli.common.inventory import Inventory -from kollacli.common.inventory import remove_temp_inventory -from kollacli.common import properties -from kollacli.common.utils import get_admin_user -from kollacli.common.utils import get_ansible_command -from kollacli.common.utils import safe_decode tar_file_descr = None CLIENT = ClientApi() +LOGDIR = '/tmp/container_logs' -def run_ansible_cmd(cmd, host): - # sudo -u kolla ansible ol7-c4 -i inv_path -a "cmd" - inv_path = None - out = None + +def get_logs_from_host(host): try: - user = get_admin_user() - inv = Inventory.load() - inv_path = inv.create_json_gen_file() + maj_services = [] + services = CLIENT.service_get_all() + for service in services: + if not service.get_parent(): + # top level service + maj_services.append(service.name) - ansible_verb = get_ansible_command() - ansible_cmd = ('/usr/bin/sudo -u %s %s %s -i %s -a "%s"' - % (user, ansible_verb, host, inv_path, cmd)) - - try: - (out, err) = subprocess.Popen(ansible_cmd, shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE).communicate() - except Exception as e: - print('%s\nCannot communicate with host: %s, skipping' % (e, host)) - - if not out: - print('Host %s is not accessible: %s, skipping' % (host, err)) - else: - out = safe_decode(out) - if '>>' not in out: - print('Ansible command: %s' % ansible_cmd) - print('Host: %s. \nInvalid ansible return data: [%s]. skipping' - % (host, out)) - out = None - finally: - remove_temp_inventory(inv_path) - - return out + print('Adding container logs from host: %s' % host) + CLIENT.support_get_logs(maj_services, host, LOGDIR) + except Exception as e: + print('Error getting logs on host: %s: %s' % (host, str(e))) -def add_logdata_to_tar(logdata, host, cname, cid): - print('Adding container log %s:%s(%s)' % (host, cname, cid)) - archive_name = '/%s/%s_%s.log' % (host, cname, cid) - tmp_path = None +def dump_kolla_info(): + print('Getting kolla client logs') + dump_path = None try: - fd, tmp_path = tempfile.mkstemp() - os.close(fd) # avoid fd leak - with open(tmp_path, 'w') as tmpfile: - tmpfile.write(logdata) - tar_file_descr.add(tmp_path, arcname=archive_name) + dump_path = CLIENT.support_dump('/tmp') + tar_file_descr.add(dump_path) except Exception: - print('ERROR adding %s\n%s' % (archive_name, traceback.format_exc())) + print('ERROR: running dump command %s' % traceback.format_exc()) finally: - if tmp_path and os.path.exists(tmp_path): - os.remove(tmp_path) - - -def get_containers(host): - """return dict {id:name}""" - cmd = 'docker ps -a' - out = run_ansible_cmd(cmd, host) - if not out: - return None - out = safe_decode(out) - if 'NAMES' not in out: - print('Host: %s. \nInvalid docker ps return data: [%s]. skipping' - % (host, out)) - return None - - ansible_properties = properties.AnsibleProperties() - base_distro = \ - ansible_properties.get_property('kolla_base_distro') - install_type = \ - ansible_properties.get_property('kolla_install_type') - # typically this prefix will be "ol-openstack-" - container_prefix = base_distro + '-' + install_type + '-' - - # add ps output to tar - add_logdata_to_tar(out, host, 'docker', 'ps') - - # process ps output - containers = {} - valid_found = False - lines = out.split('\n') - for line in lines: - if container_prefix not in line: - # skip non-kolla containers - continue - valid_found = True - tokens = line.split() - cid = tokens[0] - image = tokens[1] - name = image.split(container_prefix)[1] - name = name.split(':')[0] - containers[cid] = name - if not valid_found: - print('no containers with %s in image name found on %s' - % (container_prefix, host)) - return containers - - -def add_container_log(cid, cname, host): - cmd = 'docker logs %s' % cid - out = run_ansible_cmd(cmd, host) - if out: - out = safe_decode(out) - out = out.split('>>', 1)[1] - header = ('Host: %s, Container: %s, id: %s\n' - % (host, cname, cid)) - out = header + out - add_logdata_to_tar(out, host, cname, cid) - - -def add_logs_from_host(host): - containers = get_containers(host) - if containers: - for (cid, cname) in containers.items(): - add_container_log(cid, cname, host) + if dump_path and os.path.exists(dump_path): + os.remove(dump_path) def main(): @@ -163,8 +72,10 @@ def main(): sys.exit(0) elif 'all' == sys.argv[1]: # get logs from all hosts - inventory = Inventory.load() - hosts = inventory.get_hostnames() + hosts = [] + host_objs = CLIENT.host_get_all() + for host_obj in host_objs: + hosts.append(host_obj.name) else: # get logs from specified hosts hostnames = sys.argv[1].split(',') @@ -181,22 +92,26 @@ def main(): os.close(fd) # avoid fd leak with tarfile.open(tar_path, 'w:gz') as tar_file_descr: - # gather dump output from kollacli - print('Getting kollacli logs') - dump_path = None - try: - dump_path = CLIENT.support_dump() - tar_file_descr.add(dump_path) - except Exception: - print('ERROR: running dump command %s' % traceback.format_exc()) - finally: - if dump_path and os.path.exists(dump_path): - os.remove(dump_path) + # clear out old logs + if os.path.exists(LOGDIR): + shutil.rmtree(LOGDIR) + os.mkdir(LOGDIR) # gather logs from selected hosts - for host in hosts: - print('Getting docker logs from host: %s' % host) - add_logs_from_host(host) + try: + for host in hosts: + get_logs_from_host(host) + + # tar up all the container logs + tar_file_descr.add(LOGDIR, arcname='container_logs') + finally: + # remove uncompressed logs + if os.path.exists(LOGDIR): + shutil.rmtree(LOGDIR) + + # gather dump output from kollacli + dump_kolla_info() + print('Log collection complete. Logs are at %s' % tar_path) if __name__ == '__main__':