# Copyright 2013 Mirantis, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. from __future__ import division import hashlib import json import os import re from time import sleep from devops.error import TimeoutError from devops.helpers.helpers import _wait from devops.helpers.helpers import wait from netaddr import IPAddress from netaddr import IPNetwork from proboscis.asserts import assert_equal from proboscis.asserts import assert_false from proboscis.asserts import assert_true # pylint: disable=import-error from six.moves.urllib.error import HTTPError from six.moves.urllib.error import URLError # pylint: enable=import-error import yaml from fuelweb_test import logger from fuelweb_test import logwrap from fuelweb_test.helpers.ssh_manager import SSHManager from fuelweb_test.helpers.utils import get_mongo_partitions from fuelweb_test.settings import DNS_SUFFIX from fuelweb_test.settings import EXTERNAL_DNS from fuelweb_test.settings import EXTERNAL_NTP from fuelweb_test.settings import FUEL_MASTER_HOSTNAME from fuelweb_test.settings import OPENSTACK_RELEASE from fuelweb_test.settings import OPENSTACK_RELEASE_UBUNTU from fuelweb_test.settings import POOLS from fuelweb_test.settings import PUBLIC_TEST_IP ssh_manager = SSHManager() @logwrap def check_cinder_status(ip): """Parse output and return False if any enabled service is down. 'cinder service-list' stdout example: | cinder-scheduler | node-1.test.domain.local | nova | enabled | up | | cinder-scheduler | node-2.test.domain.local | nova | enabled | down | """ cmd = '. openrc; cinder service-list' result = ssh_manager.execute( ip=ip, cmd=cmd ) cinder_services = ''.join(result['stdout']) logger.debug('>$ cinder service-list\n{}'.format(cinder_services)) if result['exit_code'] == 0: return all(' up ' in x.split('enabled')[1] for x in cinder_services.split('\n') if 'cinder' in x and 'enabled' in x and len(x.split('enabled'))) return False @logwrap def check_image(image, md5, path): local_path = "{0}/{1}".format(path, image) logger.debug('Check md5 {0} of image {1}/{2}'.format(md5, path, image)) if not os.path.isfile(local_path): logger.error('Image {0} not found in {1} directory'.format( image, path)) return False with open(local_path, mode='rb') as fimage: digits = hashlib.md5() while True: buf = fimage.read(4096) if not buf: break digits.update(buf) md5_local = digits.hexdigest() if md5_local != md5: logger.error('MD5 of {0}/{1} is not correct, aborting'.format( path, image)) return False return True @logwrap def verify_service(ip, service_name, count=1, ignore_count_of_proccesses=False): ps_output = ssh_manager.execute_on_remote( ip=ip, cmd='ps ax' )['stdout'] api = [ps for ps in ps_output if service_name in ps] logger.debug("{} \\n: {}".format(service_name, str(api))) if not ignore_count_of_proccesses: assert_equal(len(api), count, "{0} count not equal to {1}".format(service_name, count)) else: assert_true(len(api), "Service '{0}' not found!".format(service_name)) @logwrap def verify_service_list_api(os_conn, service_count): def _verify(): ret = os_conn.get_nova_service_list() logger.debug('Service list {0}'.format(ret)) assert_equal(service_count, len(ret), 'Expected service count is {0},' ' but get {1} count, actual list {2}'.format( service_count, len(ret), ret)) for service in ret: logger.debug('service is {0}'.format(service)) assert_equal( service.state, 'up', 'Service {0} on host {1} has next ' 'state {2}'.format(service.binary, service.host, service.state)) try: _verify() except AssertionError: logger.debug( "Services still not read. Sleeping for 60 seconds and retrying") sleep(60) _verify() @logwrap def verify_glance_image_api(os_conn): ret = os_conn.get_image_list() assert_equal(1, len([i for i in ret if i.name == 'TestVM']), "TestVM not found in glance image-list") @logwrap def verify_network_list_api(os_conn, net_count=None): ret = os_conn.get_nova_network_list() assert_equal(net_count, len(ret), 'Unexpected count of networks detected, ' 'expected: {0}, current {1} count,' ' full list {2}'.format(net_count, len(ret), ret)) @logwrap def check_ceph_image_size(ip, expected_size, device='vdc'): ret = ssh_manager.check_call( ip=ip, cmd="df -m /dev/{device}* | grep ceph | awk" " {size}".format(device=device, size=re.escape('{print $2}')) )['stdout'] if not ret: logger.error("Partition not present! {}: ".format( ssh_manager.check_call(ip=ip, cmd="df -m"))) raise Exception() logger.debug("Partitions: {part}".format(part=ret)) assert_true(abs(float(ret[0].rstrip()) / expected_size - 1) < 0.1, "size {0} is not equal" " to {1}".format(ret[0].rstrip(), expected_size)) @logwrap def check_cinder_image_size(ip, expected_size, device='vdc3'): ret = get_mongo_partitions(ip, device)[0].rstrip().rstrip('G') cinder_size = float(ret) * 1024 assert_true(abs(cinder_size / expected_size - 1) < 0.1, "size {0} is not equal" " to {1}".format(ret[0].rstrip(), expected_size)) @logwrap def check_unallocated_space(disks, contr_img_ceph=False): for disk in disks: # In case we have Ceph for images all space on controller # should be given to Base System space: if contr_img_ceph: logger.info("Check that all space on /dev/{d} is allocated for " "Base System Space".format(d=disk['name'])) if not bool(disk["volumes"][0]["size"] == disk["size"]): return False else: logger.info("Get overall size of volumes") sizes = [v['size'] for v in disk["volumes"]] logger.info("Space on disk: {s}".format(s=disk['size'])) logger.info("Summary space of disks on /dev/{d}: {s}".format( d=disk["name"], s=sum(sizes))) if not bool(sum(sizes) == disk["size"]): return False return True @logwrap def check_archive_type(tar_path): if os.path.splitext(tar_path)[1] not in [".tar", ".lrz", ".fp", ".rpm"]: raise Exception("Wrong archive type!") @logwrap def check_file_exists(ip, path): assert_true(ssh_manager.exists_on_remote(ip, path), 'Can not find {0}'.format(path)) logger.info('File {0} exists on {1}'.format(path, ip)) @logwrap def wait_phrase_in_log(ip, timeout, interval, phrase, log_path): cmd = "grep '{0}' '{1}'".format(phrase, log_path) wait( lambda: not SSHManager().execute(ip=ip, cmd=cmd)['exit_code'], interval=interval, timeout=timeout, timeout_msg="The phrase {0} not found in {1} file on " "remote node".format(phrase, log_path)) @logwrap def enable_feature_group(env, group): fuel_settings = env.admin_actions.get_fuel_settings() if group not in fuel_settings["FEATURE_GROUPS"]: fuel_settings["FEATURE_GROUPS"].append(group) env.admin_actions.save_fuel_settings(fuel_settings) # NOTE(akostrikov) We use FUEL_SETTINGS_YAML as primary source or truth and # update nailgun configs via puppet from that value ssh_manager.check_call( ip=ssh_manager.admin_ip, cmd='puppet apply /etc/puppet/modules/fuel/examples/nailgun.pp' ) def check_api_group_enabled(): try: return (group in env.fuel_web.client.get_api_version()["feature_groups"]) except (HTTPError, URLError): return False wait(check_api_group_enabled, interval=10, timeout=60 * 20) def find_backup(ip): backups = ssh_manager.execute(ip, "ls -1u /var/backup/fuel/*/*.lrz")["stdout"] if backups: arch_path = backups[0] logger.info('Backup archive found: {0}'.format(arch_path)) return arch_path else: raise ValueError("No backup file found in the '/var/backup/fuel/'") @logwrap def backup_check(ip): logger.info("Backup check archive status") path = find_backup(ip) assert_true(path, "Can not find backup. Path value '{0}'".format(path)) test_result = ssh_manager.execute(ip, "test -e {0}".format(path.rstrip())) assert_true(test_result['exit_code'] == 0, "Archive '{0}' does not exist".format(path.rstrip())) @logwrap def restore_check_sum(ip): logger.debug('Check if removed file /etc/fuel/data was restored') res = ssh_manager.execute( ip=ip, cmd="if [ -e /etc/fuel/data ]; then echo Restored!!; fi" ) assert_true("Restored!!" in ''.join(res['stdout']).strip(), 'Test file /etc/fuel/data ' 'was not restored!!! {0}'.format(res['stderr'])) logger.info("Restore check md5sum") md5sum_backup = ssh_manager.execute(ip, "cat /etc/fuel/sum") assert_true(''.join(md5sum_backup['stdout']).strip(), 'Command cat /etc/fuel/sum ' 'failed with {0}'.format(md5sum_backup['stderr'])) md5sum_restore = ssh_manager.execute( ip=ip, cmd="md5sum /etc/fuel/data | sed -n 1p | awk '{print $1}'" ) assert_equal(md5sum_backup, md5sum_restore, "md5sums not equal: backup{0}, restore{1}". format(md5sum_backup, md5sum_restore)) @logwrap def iptables_check(ip): logger.info("Iptables check") ssh_manager.execute(ip, "iptables-save > /etc/fuel/iptables-restore") iptables_backup = ssh_manager.execute( ip=ip, cmd="sed -e '/^:/d; /^#/d' /etc/fuel/iptables-backup" ) iptables_restore = ssh_manager.execute( ip=ip, cmd="sed -e '/^:/d; /^#/d' /etc/fuel/iptables-restore" ) assert_equal(iptables_backup, iptables_restore, "list of iptables rules are not equal") @logwrap def check_mysql(ip, node_name): check_cmd = 'pkill -0 -x mysqld' check_crm_cmd = ('crm resource status clone_p_mysqld |' ' grep -q "is running on: $HOSTNAME"') check_galera_cmd = ("mysql --connect_timeout=5 -sse \"SELECT" " VARIABLE_VALUE FROM" " information_schema.GLOBAL_STATUS" " WHERE VARIABLE_NAME" " = 'wsrep_local_state_comment';\"") try: wait(lambda: ssh_manager.execute(ip, check_cmd)['exit_code'] == 0, timeout=10 * 60) logger.info('MySQL daemon is started on {0}'.format(node_name)) except TimeoutError: logger.error('MySQL daemon is down on {0}'.format(node_name)) raise _wait( lambda: assert_equal( ssh_manager.execute( ip, check_crm_cmd)['exit_code'], 0, 'MySQL resource is NOT running on {0}'.format(node_name)), timeout=120) try: wait(lambda: ''.join(ssh_manager.execute( ip, check_galera_cmd)['stdout']).rstrip() == 'Synced', timeout=600) except TimeoutError: logger.error('galera status is {0}'.format(''.join(ssh_manager.execute( ip, check_galera_cmd)['stdout']).rstrip())) raise @logwrap def check_action_logs(scenario, postgres_actions): def _check(_action, _group=False): assert_true(postgres_actions.action_logs_contain(_action, _group), "Action logs are missed for '{0}'!".format( _action)) actions = [ { 'desc': [''], 'name': ['master_node_settings'], 'group': [], 'regex': False, }, { 'desc': [r'create\s+.*(cluster|environment|cloud)'], 'name': ['cluster_collection'], 'group': ['cluster_attributes', 'network_configuration'], 'regex': True, }, { 'desc': ['deploy'], 'name': ['deploy_changes', 'provision', 'deployment', 'cluster_collection', 'check_before_deployment'], 'group': ['cluster_changes', 'cluster_checking'], 'regex': True, }, { 'desc': [r'verif.*\s+.*network|network.*\s+.*verif'], 'name': ['check_networks', 'verify_networks'], 'group': ['network_verification'], 'regex': True, }, { 'desc': [r'(stop|abort).*(deployment|provision)'], 'name': ['stop_deployment'], 'group': ['cluster_changes'], 'regex': True, }, { 'desc': [r'reset.*(deployment|provision)'], 'name': ['reset'], 'group': ['cluster_changes'], 'regex': True, }, { 'desc': [r'rename.*(cluster|environment|cloud)'], 'name': ['cluster_instance'], 'group': ['cluster_changes'], 'regex': True, }, { 'desc': [r'upgrade'], 'name': ['releases_collection'], 'group': ['release_changes'], 'regex': True, }, { 'desc': [r'update.*(cluster|environment|cloud)'], 'name': ['update'], 'group': ['cluster_changes'], 'regex': True, }, { 'desc': [r'upload.*deployment'], 'name': ['deployment_info'], 'group': ['orchestrator'], 'regex': True, }, { 'desc': [r'upload.*provisioning'], 'name': ['provisioning_info'], 'group': ['orchestrator'], 'regex': True, }, # Logging of OSTF isn't implemented yet, so actions list is # empty { 'desc': ['OSTF', 'Health'], 'name': [], 'group': [], 'regex': False, }, ] # Check logs in Nailgun database for action in actions: if action['regex']: if not any(re.search(regex, scenario, re.IGNORECASE) for regex in action['desc']): continue elif not any(action in scenario for action in action['desc']): logger.info(action['desc']) continue for action_name in action['name']: _check(action_name, _group=False) for action_group in action['group']: _check(action_group, _group=True) def execute_query_on_collector(collector_remote, master_uuid, query, collector_db='collector', collector_db_user='collector', collector_db_pass='collector'): if master_uuid is not None: query = "{0} where master_node_uid = '{1}';".format(query, master_uuid) cmd = 'PGPASSWORD={0} psql -qt -h -U {1} -d {2} -c "{3}"'.\ format(collector_db_pass, collector_db_user, collector_db, query) logger.debug('query collector is {0}'.format(cmd)) return ''.join(collector_remote.execute(cmd)['stdout']).strip() def count_stats_on_collector(collector_remote, master_uuid): return execute_query_on_collector(collector_remote, master_uuid=None, query="select (select count(*) from " "action_logs where master_node_uid" " = \'{0}\') + (select count(*) " "from installation_structures " "where master_node_uid = \'{0}\')". format(master_uuid)) @logwrap def check_stats_on_collector(collector_remote, postgres_actions, master_uuid): sent_logs_count = postgres_actions.count_sent_action_logs() logger.info("Number of logs that were sent to collector: {}".format( sent_logs_count )) logs = collector_remote.get_action_logs_count(master_uuid) logger.info("Number of logs that were saved on collector: {}".format(logs)) assert_true(sent_logs_count <= int(logs), ("Count of action logs in Nailgun DB ({0}) is bigger than on " "Collector ({1}), but should be less or equal").format( sent_logs_count, logs)) sum_stats_count = len( [collector_remote.get_installation_info(master_uuid)['id']]) assert_equal(int(sum_stats_count), 1, "Installation structure wasn't saved on Collector side proper" "ly: found: {0}, expected: 1 record.".format(sum_stats_count)) summ_stats = collector_remote.get_installation_info_data(master_uuid) general_stats = { 'clusters_num': int, 'allocated_nodes_num': int, 'unallocated_nodes_num': int, 'fuel_release': dict, 'clusters': list, 'user_information': dict, } # Check that important data (clusters number, nodes number, nodes roles, # user's email, used operation system, OpenStack stats) is saved correctly for stat_type in general_stats.keys(): assert_true( isinstance(summ_stats[stat_type], general_stats[stat_type]), "Installation structure in Collector's DB doesn't contain" "the following stats: {0}".format(stat_type)) real_clusters_number = int(postgres_actions.run_query( db='nailgun', query='select count(*) from clusters;')) assert_equal(real_clusters_number, summ_stats['clusters_num'], 'Real clusters number is {0}, but usage statistics says ' 'that clusters number is {1}'.format( real_clusters_number, summ_stats['clusters_num'])) real_allocated_nodes_num = int(postgres_actions.run_query( db='nailgun', query='select count(id) from nodes where cluster_id is not Null;')) assert_equal(real_allocated_nodes_num, summ_stats['allocated_nodes_num'], 'Real allocated nodes number is {0}, but usage statistics ' 'says that allocated nodes number is {1}'.format( real_allocated_nodes_num, summ_stats['allocated_nodes_num'])) real_user_email = json.loads(postgres_actions.run_query( db='nailgun', query='select settings from master_node_settings;') )['statistics']['email']['value'] assert_equal(real_user_email, summ_stats['user_information']['email'], "Usage statistics contains incorrect user's email address: " "'{0}', but should be {1}".format( summ_stats['user_information']['email'], real_user_email)) for cluster in summ_stats['clusters']: for node in cluster['nodes']: assert_true(len(node['roles']) > 0, "Usage statistics contains nodes without roles: node-" "{0} roles: {1}".format(node['id'], node['roles'])) assert_equal(len(cluster['nodes']), cluster['nodes_num'], "Usage statistics contains incorrect number of nodes" "assigned to cluster!") real_cluster_os = postgres_actions.run_query( db="nailgun", query="select operating_system from releases where " "id = (select release_id from clusters where " "id = {0});".format(cluster['id'])) assert_equal(real_cluster_os, cluster['release']['os'], "Usage statistics contains incorrect operation system " "that is used for environment with ID '{0}'. Expected: " "'{1}', reported: '{2}'.".format( cluster['id'], real_cluster_os, cluster['release']['os'])) logger.info("Usage stats were properly saved to collector's database.") @logwrap def check_stats_private_info(collector_remote, postgres_actions, master_uuid, _settings): def _contain_secret_data(data): _has_private_data = False # Check that stats doesn't contain private data (e.g. # specific passwords, settings, emails) for _private in private_data.keys(): _regex = r'(?P"\S+"): (?P[^:]*"{0}"[^:]*)'.format( private_data[_private]) for _match in re.finditer(_regex, data): logger.warning('Found private info in usage statistics using ' 'pattern: {0}'. format(_regex)) logger.debug('Usage statistics with private data:\n {0}'. format(data)) logger.error("Usage statistics contains private info: '{type}:" " {value}'. Part of the stats: {match}".format( type=_private, value=private_data[_private], match=_match.group('key', 'value'))) _has_private_data = True # Check that stats doesn't contain private types of data (e.g. any kind # of passwords) for _data_type in secret_data_types.keys(): _regex = (r'(?P"[^"]*{0}[^"]*": (\{{[^\}}]+\}}|\[[^\]+]\]|' r'"[^"]+"))').format(secret_data_types[_data_type]) for _match in re.finditer(_regex, data, re.IGNORECASE): logger.warning('Found private info in usage statistics using ' 'pattern: {0}'. format(_regex)) logger.debug('Usage statistics with private data:\n {0}'. format(data)) logger.error("Usage statistics contains private info: '{type}:" " {value}'. Part of the stats: {match}".format( type=_data_type, value=secret_data_types[_data_type], match=_match.group('secret'))) _has_private_data = True return _has_private_data def _contain_public_ip(data, _used_networks): _has_public_ip = False _ip_regex = (r'\b((\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.){3}' r'(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\b') _not_public_regex = [ r'\b10(\.\d{1,3}){3}', r'\b127(\.\d{1,3}){3}', r'\b169\.254(\.\d{1,3}){2}', r'172\.(1[6-9]|2[0-9]|3[0-1])(\.\d{1,3}){2}', r'192\.168(\.\d{1,3}){2}', r'2(2[4-9]|[3-5][0-9])(\.\d{1,3}){3}' ] for _match in re.finditer(_ip_regex, data): # If IP address isn't public and doesn't belong to defined for # deployment pools (e.g. admin, public, storage), then skip it if any(re.search(_r, _match.group()) for _r in _not_public_regex) \ and not any(IPAddress(str(_match.group())) in IPNetwork(str(net)) for net in _used_networks): continue logger.debug('Usage statistics with public IP(s):\n {0}'. format(data)) logger.error('Found public IP in usage statistics: "{0}"'.format( _match.group())) _has_public_ip = True return _has_public_ip private_data = { 'hostname': _settings['HOSTNAME'], 'dns_domain': _settings['DNS_DOMAIN'], 'dns_search': _settings['DNS_SEARCH'], 'dns_upstream': _settings['DNS_UPSTREAM'], 'fuel_password': ( _settings['FUEL_ACCESS']['password'] if _settings['FUEL_ACCESS']['password'] != 'admin' else 'DefaultPasswordIsNotAcceptableForSearch'), 'nailgun_password': _settings['postgres']['nailgun_password'], 'keystone_password': _settings['postgres']['keystone_password'], 'ostf_password': _settings['postgres']['ostf_password'], 'cobbler_password': _settings['cobbler']['password'], 'astute_password': _settings['astute']['password'], 'mcollective_password': _settings['mcollective']['password'], 'keystone_admin_token': _settings['keystone']['admin_token'], 'keystone_nailgun_password': _settings['keystone']['nailgun_password'], 'kesytone_ostf_password': _settings['keystone']['ostf_password'], } secret_data_types = { 'some_password': 'password', 'some_login': 'login', 'some_tenant': 'tenant', 'some_token': 'token', 'some_ip': '\bip\b', 'some_netmask': 'netmask', 'some_network': 'network\b', } action_logs = [l.strip() for l in postgres_actions.run_query( 'nailgun', 'select id from action_logs;').split('\n')] sent_stats = str(collector_remote.get_installation_info_data(master_uuid)) logger.debug('installation structure is {0}'.format(sent_stats)) used_networks = [POOLS[net_name][0] for net_name in POOLS.keys()] has_no_private_data = True logger.debug("Looking for private data in the installation structure, " "that was sent to collector") if _contain_secret_data(sent_stats) or _contain_public_ip(sent_stats, used_networks): has_no_private_data = False for log_id in action_logs: log_data = postgres_actions.run_query( 'nailgun', "select additional_info from action_logs where id = '{0}';".format( log_id )) logger.debug("Looking for private data in action log with ID={0}". format(log_id)) if _contain_secret_data(log_data) or _contain_public_ip(log_data, used_networks): has_no_private_data = False assert_true(has_no_private_data, 'Found private data in stats, check test ' 'output and logs for details.') logger.info('Found no private data in logs') def check_kernel(kernel, expected_kernel): assert_equal(kernel, expected_kernel, "kernel version is wrong, it is {0}".format(kernel)) @logwrap def external_dns_check(ip): logger.info("External dns check") provided_dns = EXTERNAL_DNS logger.debug("provided to test dns is {}".format(provided_dns)) cluster_dns = [] for dns in provided_dns: ext_dns_ip = ''.join( ssh_manager.execute( ip=ip, cmd="grep {0} /etc/resolv.dnsmasq.conf | " "awk {{'print $2'}}".format(dns) )["stdout"]).rstrip() cluster_dns.append(ext_dns_ip) logger.debug("external dns in conf is {}".format(cluster_dns)) assert_equal(set(provided_dns), set(cluster_dns), "/etc/resolv.dnsmasq.conf does not contain external dns ip") command_hostname = ''.join( ssh_manager.execute(ip, "host {0} | awk {{'print $5'}}" .format(PUBLIC_TEST_IP)) ["stdout"]).rstrip() hostname = 'google-public-dns-a.google.com.' assert_equal(command_hostname, hostname, "Can't resolve hostname") def verify_bootstrap_on_node(ip, os_type, uuid=None): os_type = os_type.lower() if 'ubuntu' not in os_type: raise Exception("Only Ubuntu are supported, " "you have chosen {0}".format(os_type)) logger.info("Verify bootstrap on slave {0}".format(ip)) cmd = 'cat /etc/*release' output = ssh_manager.execute_on_remote(ip, cmd)['stdout_str'].lower() assert_true(os_type in output, "Slave {0} doesn't use {1} image for bootstrap " "after {1} images were enabled, /etc/release " "content: {2}".format(ip, os_type, output)) if not uuid: return cmd = "cat /etc/nailgun-agent/config.yaml" output = yaml.load(ssh_manager.execute_on_remote(ip, cmd)['stdout_str']) actual_uuid = output.get("runtime_uuid") assert_equal(actual_uuid, uuid, "Actual uuid {0} is not the same as expected {1}" .format(actual_uuid, uuid)) @logwrap def external_ntp_check(ip, vrouter_vip): logger.info("External ntp check") provided_ntp = EXTERNAL_NTP logger.debug("provided to test ntp is {}".format(provided_ntp)) cluster_ntp = [] for ntp in provided_ntp: ext_ntp_ip = ''.join( ssh_manager.execute( ip=ip, cmd="awk '/^server +{0}/{{print $2}}' " "/etc/ntp.conf".format(ntp))["stdout"]).rstrip() cluster_ntp.append(ext_ntp_ip) logger.debug("external ntp in conf is {}".format(cluster_ntp)) assert_equal(set(provided_ntp), set(cluster_ntp), "/etc/ntp.conf does not contain external ntp ip") try: wait( lambda: is_ntpd_active(ip, vrouter_vip), timeout=120) except Exception as e: logger.error(e) status = is_ntpd_active(ip, vrouter_vip) assert_equal( status, 1, "Failed updated ntp. " "Exit code is {0}".format(status)) def check_swift_ring(ip): for ring in ['object', 'account', 'container']: res = ''.join(ssh_manager.execute( ip, "swift-ring-builder /etc/swift/{0}.builder".format( ring))['stdout']) logger.debug("swift ring builder information is {0}".format(res)) balance = re.search('(\d+.\d+) balance', res).group(1) assert_true(float(balance) < 10, "swift ring builder {1} is not ok," " balance is {0}".format(balance, ring)) def check_oswl_stat(postgres_actions, nailgun_actions, remote_collector, master_uid, operation='current', resources=None): if resources is None: resources = [ 'vm', 'flavor', 'volume', 'image', 'tenant', 'keystone_user' ] logger.info("Checking that all resources were collected...") expected_resource_count = { 'current': {'vm': 0, 'flavor': 6, 'volume': 0, 'image': 0, 'tenant': 2, 'keystone_user': 8 }, 'modified': {'vm': 0, 'flavor': 0, 'volume': 0, 'image': 0, 'tenant': 0, 'keystone_user': 0 }, 'removed': {'vm': 0, 'flavor': 0, 'volume': 0, 'image': 0, 'tenant': 0, 'keystone_user': 0 } } for resource in resources: q = "select resource_data from oswl_stats where" \ " resource_type = '\"'\"'{0}'\"'\"';".format(resource) # pylint: disable=undefined-loop-variable def get_resource(): result = postgres_actions.run_query('nailgun', q) logger.debug("resource state is {}".format(result)) if not result: return False return ( len(json.loads(result)[operation]) > expected_resource_count[operation][resource]) # pylint: enable=undefined-loop-variable wait(get_resource, timeout=10, timeout_msg="resource {} wasn't updated in db".format(resource)) q_result = postgres_actions.run_query('nailgun', q) assert_true(q_result.strip() is not None, "Resource {0} is absent in 'oswl_stats' table, " "please check /var/log/nailgun/oswl_{0}" "_collectord.log on Fuel admin node for details." .format(resource)) resource_data = json.loads(q_result) logger.debug('db return {0}'.format(resource_data)) assert_true(len(resource_data['added']) > expected_resource_count[operation][resource], "resource {0} wasn't added," " added is {1}".format(resource, resource_data['added'])) assert_true(len(resource_data[operation]) > expected_resource_count[operation][resource], "number of resources in current {0}," " expected is {1}".format(len(resource_data[operation]), expected_resource_count[ operation][resource])) # check stat on collector side def are_logs_sent(): sent_logs = postgres_actions.count_sent_action_logs( table='oswl_stats') result = sent_logs == 6 if not result: nailgun_actions.force_fuel_stats_sending() return result wait(are_logs_sent, timeout=20, timeout_msg='Logs status was not changed to sent in db') sent_logs_count = postgres_actions.count_sent_action_logs( table='oswl_stats') logger.info("Number of logs that were sent to collector: {}".format( sent_logs_count )) logger.debug('oswls are {}'.format(remote_collector.get_oswls(master_uid))) logs = remote_collector.get_oswls(master_uid)['paging_params']['total'] logger.info("Number of logs that were saved" " on collector: {}".format(logs)) assert_true(sent_logs_count <= int(logs), ("Count of action logs in Nailgun DB ({0}) is bigger than on " "Collector ({1}), but should be less or equal").format( sent_logs_count, logs)) for resource in resources: resource_data = remote_collector.get_oswls_by_resource_data( master_uid, resource) logger.debug('resource data on' ' collector is {0}'.format(resource_data)) assert_true(len(resource_data['added']) > expected_resource_count[operation][resource], "resource {0} wasn't added," " added is {1}".format(resource, resource_data['added'])) assert_true(len(resource_data[operation]) > expected_resource_count[operation][resource], "number of resources in current {0}," " expected is {1}".format(len(resource_data[operation]), expected_resource_count[ operation][resource])) logger.info("OSWL stats were properly saved to collector's database.") @logwrap def check_ping(ip, host, deadline=10, size=56, timeout=1, interval=1): """Check network connectivity from remote to host using ICMP (ping) :param ip: remote ip :param host: string IP address or host/domain name :param deadline: time in seconds before ping exits :param size: size of data to be sent :param timeout: time to wait for a response, in seconds :param interval: wait interval seconds between sending each packet :return: bool: True if ping command """ ssh_manager = SSHManager() cmd = ("ping -W {timeout} -i {interval} -s {size} -c 1 -w {deadline} " "{host}".format(host=host, size=size, timeout=timeout, interval=interval, deadline=deadline)) res = ssh_manager.execute(ip, cmd) return int(res['exit_code']) == 0 @logwrap def check_neutron_dhcp_lease(ip, instance_ip, instance_mac, dhcp_server_ip, dhcp_port_tag): """Check if the DHCP server offers a lease for a client with the specified MAC address :param SSHClient remote: fuel-devops.helpers.helpers object :param str instance_ip: IP address of instance :param str instance_mac: MAC address that will be checked :param str dhcp_server_ip: IP address of DHCP server for request a lease :param str dhcp_port_tag: OVS port tag used for access the DHCP server :return bool: True if DHCP lease for the 'instance_mac' was obtained """ logger.debug("Checking DHCP server {0} for lease {1} with MAC address {2}" .format(dhcp_server_ip, instance_ip, instance_mac)) ovs_port_name = 'tapdhcptest1' ovs_cmd = '/usr/bin/ovs-vsctl --timeout=10 --oneline --format=json -- ' ovs_add_port_cmd = ("--if-exists del-port {0} -- " "add-port br-int {0} -- " "set Interface {0} type=internal -- " "set Port {0} tag={1}" .format(ovs_port_name, dhcp_port_tag)) ovs_del_port_cmd = ("--if-exists del-port {0}".format(ovs_port_name)) # Add an OVS interface with a tag for accessing the DHCP server ssh_manager.execute_on_remote(ip, ovs_cmd + ovs_add_port_cmd) # Set to the created interface the same MAC address # that was used for the instance. ssh_manager.execute_on_remote( ip, "ifconfig {0} hw ether {1}".format(ovs_port_name, instance_mac)) ssh_manager.execute_on_remote(ip, "ifconfig {0} up".format(ovs_port_name)) # Perform a 'dhcpcheck' request to check if the lease can be obtained lease = ssh_manager.execute_on_remote( ip=ip, cmd="dhcpcheck request {0} {1} --range_start {2} " "--range_end | fgrep \" {1} \"" .format(ovs_port_name, dhcp_server_ip, instance_ip))['stdout'] # Remove the OVS interface ssh_manager.execute_on_remote(ip, ovs_cmd + ovs_del_port_cmd) logger.debug("DHCP server answer: {}".format(lease)) return ' ack ' in lease def check_available_mode(ip): command = ('umm status | grep runlevel &>/dev/null && echo "True" ' '|| echo "False"') if ssh_manager.execute(ip, command)['exit_code'] == 0: return ''.join(ssh_manager.execute(ip, command)['stdout']).strip() else: return ''.join(ssh_manager.execute(ip, command)['stderr']).strip() def check_auto_mode(ip): command = ('umm status | grep umm &>/dev/null && echo "True" ' '|| echo "False"') if ssh_manager.execute(ip, command)['exit_code'] == 0: return ''.join(ssh_manager.execute(ip, command)['stdout']).strip() else: return ''.join(ssh_manager.execute(ip, command)['stderr']).strip() def is_ntpd_active(ip, ntpd_ip): cmd = 'ntpdate -d -p 4 -t 0.2 -u {0}'.format(ntpd_ip) return not ssh_manager.execute(ip, cmd)['exit_code'] def check_repo_managment(ip): """Check repo management run 'yum -y clean all && yum check-update' or 'apt-get clean all && apt-get update' exit code should be 0 :type ip: node ip :rtype Dict """ if OPENSTACK_RELEASE == OPENSTACK_RELEASE_UBUNTU: cmd = "apt-get clean all && apt-get update > /dev/null" else: cmd = "yum -y clean all && yum check-update > /dev/null" ssh_manager.execute_on_remote( ip=ip, cmd=cmd ) def check_public_ping(ip): """ Check if ping public vip :type ip: node ip """ cmd = ('ruby /etc/puppet/modules/osnailyfacter/' 'modular/virtual_ips/public_vip_ping_post.rb') ssh_manager.execute_on_remote( ip=ip, cmd=cmd, err_msg='Public ping check failed' ) def check_cobbler_node_exists(ip, node_id): """Check node with following node_id is present in the cobbler node list :param ip: node ip :param node_id: fuel node id :return: bool: True if exit code of command (node) == 0 """ logger.debug("Check that cluster contains node with ID:{0} ". format(node_id)) node = ssh_manager.execute( ip=ip, cmd='bash -c "cobbler system list" | grep ' '-w "node-{0}"'.format(node_id) ) return int(node['exit_code']) == 0 def check_cluster_presence(cluster_id, postgres_actions): logger.debug("Check cluster presence") query_result = postgres_actions.run_query( db='nailgun', query="select id from clusters where id={0}".format(cluster_id)) return str(cluster_id) in query_result def check_haproxy_backend(ip, services=None, nodes=None, ignore_services=None, ignore_nodes=None): """Check DOWN state of HAProxy backends. Define names of service or nodes if need check some specific service or node. Use ignore_services for ignore service status on all nodes. Use ignore_nodes for ignore all services on all nodes. Ignoring has a bigger priority. :type ip: node ip :type services: List :type nodes: List :type ignore_services: List :type ignore_nodes: List :rtype: Dict """ cmd = 'haproxy-status | egrep -v "BACKEND|FRONTEND" | grep "DOWN"' positive_filter = (services, nodes) negative_filter = (ignore_services, ignore_nodes) grep = ['|egrep "{}"'.format('|'.join(n)) for n in positive_filter if n] grep.extend( ['|egrep -v "{}"'.format('|'.join(n)) for n in negative_filter if n]) result = ssh_manager.execute( ip=ip, cmd="{}{}".format(cmd, ''.join(grep)) ) return result def check_log_lines_order(ip, log_file_path, line_matcher): """Read log file and check that lines order are same as strings in list :param ip: ip of node in str format :param log_file_path: path to log file :param line_matcher: list of strings to search """ check_file_exists(ip, path=log_file_path) previous_line_pos = 1 previous_line = None for current_line in line_matcher: cmd = 'tail -n +{0} {1} | grep -n "{2}"'\ .format(previous_line_pos, log_file_path, current_line) result = ssh_manager.execute_on_remote( ip=ip, cmd=cmd, err_msg="Line '{0}' not found after line '{1}' in the file " "'{2}'.".format(current_line, previous_line, log_file_path) ) # few lines found case assert_equal(1, len(result['stdout']), "Found {0} lines like {1} but should be only 1 in {2}" " Command '{3}' executed with exit_code='{4}'\n" "stdout:\n* {5} *\n" "stderr:\n'* {6} *\n" .format(len(result['stdout']), current_line, log_file_path, cmd, result['exit_code'], '\n'.join(result['stdout']), '\n'.join(result['stderr']))) current_line_pos = int(result['stdout'][0].split(':')[0]) previous_line_pos += current_line_pos previous_line = current_line def check_hiera_hosts(nodes, cmd): hiera_hosts = [] for node in nodes: result = ssh_manager.execute_on_remote( ip=node['ip'], cmd=cmd )['stdout'] hosts = ''.join(result).strip().split(',') logger.debug("hosts on {0} are {1}".format(node['hostname'], hosts)) if not hiera_hosts: hiera_hosts = hosts continue else: assert_true(set(hosts) == set(hiera_hosts), 'Hosts on node {0} differ from' ' others'.format(node['hostname'])) def check_client_smoke(ip): fuel_output = ssh_manager.execute( ip=ip, cmd='fuel env list' )['stdout'][2].split('|')[2].strip() fuel_2_output = ssh_manager.execute( ip=ip, cmd='fuel2 env list' )['stdout'][3].split('|')[3].strip() assert_equal(fuel_output, fuel_2_output, "The fuel: {0} and fuel2: {1} outputs are not equal") def check_offload(ip, interface, offload_type): command = "ethtool --show-offload {0} |" \ " awk '/{1}/ {{print $2}}'".format(interface, offload_type) result = ssh_manager.execute_on_remote( ip=ip, cmd=command, err_msg="Failed to get Offload {0} " "on node {1}".format(offload_type, ip) ) return ''.join(result['stdout']).rstrip() def check_get_network_data_over_cli(ip, cluster_id, path): logger.info("Download network data over cli") cmd = 'fuel --debug --env {0} network --dir {1} --json -d'.format( cluster_id, path) ssh_manager.execute_on_remote( ip=ip, cmd=cmd, err_msg='Failed to upload network data' ) def check_update_network_data_over_cli(ip, cluster_id, path): logger.info("Upload network data over cli") cmd = 'fuel --debug --env {0} network --dir {1} --json -u'.format( cluster_id, path) ssh_manager.execute_on_remote( ip=ip, cmd=cmd, err_msg='Failed to upload network data' ) def check_plugin_path_env(var_name, plugin_path): assert_true( plugin_path, '{var_name:s} variable is not set or set incorrectly: ' '{plugin_path!r}'.format( var_name=var_name, plugin_path=plugin_path) ) assert_true( os.path.exists(plugin_path), 'File {plugin_path:s} (variable: {var_name:s}) does not exists!' ''.format(plugin_path=plugin_path, var_name=var_name) ) def check_snapshot_logs(ip, snapshot_name, controller_fqdns, compute_fqdns): snapshot_path_master = "/var/dump/{}".format(snapshot_name) master_hostname = ''.join((FUEL_MASTER_HOSTNAME, DNS_SUFFIX)) snapshot_logs_path = os.path.join( '{0}/fuelweb_test/config_templates/'.format(os.environ.get( "WORKSPACE", "./")), 'snapshot_logs.yaml') if os.path.exists(snapshot_logs_path): with open(snapshot_logs_path, 'r') as f: snapshot_logs = yaml.load(f) logger.debug("snapshot logs are {}".format(snapshot_logs)) assert_true(snapshot_logs, "Failed to get expected snapshot" " logs from {}".format(snapshot_logs_path)) absent_logs = [] logger.debug("checking master logs...") for log in snapshot_logs['master']['master_node_logs'].split(): logger.debug("checking {} log file".format(log)) log_path = "{dump_path}/{hostname}/{log}".format( dump_path=snapshot_path_master, hostname=master_hostname, log=log) cmd = "ls {}".format(log_path) result = ssh_manager.execute_on_remote( ip=ip, cmd=cmd, err_msg="Couldn't find {} log on master node".format(log), raise_on_assert=False) if not result['exit_code'] == 0: absent_logs.append(log_path) for controller_fqdn in controller_fqdns: logger.debug("checking controller logs from remote directory...") for log in snapshot_logs['master']['remote']['controller'].split(): logger.debug("checking {} log file".format(log)) log_path = "{dump_path}/{hostname}/var/log/remote" \ "/{fqdn}/{log}".format(dump_path=snapshot_path_master, hostname=master_hostname, fqdn=controller_fqdn, log=log) cmd = "ls {}".format(log_path) result = ssh_manager.execute_on_remote( ip=ip, cmd=cmd, err_msg="Couldn't find {0} log in controller remote directory" " for node {1}".format(log, controller_fqdn), raise_on_assert=False) if not result['exit_code'] == 0: absent_logs.append(log_path) logger.debug("checking controller logs...") for log in snapshot_logs['controller'].split(): logger.debug("checking {} log file".format(log)) log_path = "{dump_path}/{fqdn}/{log}".format( dump_path=snapshot_path_master, fqdn=controller_fqdn.replace(DNS_SUFFIX, ""), log=log) cmd = "ls {}".format(log_path) result = ssh_manager.execute_on_remote( ip=ip, cmd=cmd, err_msg="Couldn't find {0} log for" " node {1}".format(log, controller_fqdn), raise_on_assert=False) if not result['exit_code'] == 0: absent_logs.append(log_path) for compute_fqdn in compute_fqdns: logger.debug("checking compute logs from remote directory...") for log in snapshot_logs['master']['remote']['compute'].split(): logger.debug("checking {} log file".format(log)) log_path = "{dump_path}/{hostname}/var/log/remote" \ "/{fqdn}/{log}".format(dump_path=snapshot_path_master, hostname=master_hostname, fqdn=compute_fqdn, log=log) cmd = "ls {}".format(log_path) result = ssh_manager.execute_on_remote( ip=ip, cmd=cmd, err_msg="Couldn't find {0} log in compute remote directory" " for node {1}".format(log, compute_fqdn), raise_on_assert=False) if not result['exit_code'] == 0: absent_logs.append(log_path) logger.debug("checking compute logs...") for log in snapshot_logs['compute'].split(): logger.debug("checking {} log file".format(log)) log_path = "{dump_path}/{fqdn}/{log}".format( dump_path=snapshot_path_master, fqdn=compute_fqdn.replace(DNS_SUFFIX, ""), log=log) cmd = "ls {}".format(log_path) result = ssh_manager.execute_on_remote( ip=ip, cmd=cmd, err_msg="Couldn't find {0} log" " for node {1}".format(log, compute_fqdn), raise_on_assert=False) if not result['exit_code'] == 0: absent_logs.append(log_path) logger.debug("missed logs are {}".format(absent_logs)) assert_false(absent_logs, "Next logs aren't present" " in snapshot logs {}".format(absent_logs)) def incomplete_tasks(tasks, cluster_id=None): def get_last_tasks(): last_tasks = {} for tsk in tasks: if cluster_id is not None and cluster_id != tsk['cluster']: continue if (tsk['cluster'], tsk['name']) not in last_tasks: last_tasks[(tsk['cluster'], tsk['name'])] = tsk return last_tasks deploy_tasks = {} not_ready_tasks = {} allowed_statuses = {'ready', 'skipped'} for (task_cluster, task_name), task in get_last_tasks().items(): if task_name == 'deployment': deploy_tasks[task['cluster']] = task['id'] if task['status'] not in allowed_statuses: if task_cluster not in not_ready_tasks: not_ready_tasks[task_cluster] = [] not_ready_tasks[task_cluster].append(task) return not_ready_tasks, deploy_tasks def incomplete_deploy(deployment_tasks): allowed_statuses = {'ready', 'skipped'} not_ready_deploy = {} for cluster_id, tasks in deployment_tasks.items(): not_ready_jobs = {} for task in filter( lambda tsk: tsk['status'] not in allowed_statuses, tasks): if task['node_id'] not in not_ready_jobs: not_ready_jobs[task['node_id']] = [] not_ready_jobs[task['node_id']].append(task) if not_ready_jobs: not_ready_deploy[cluster_id] = not_ready_jobs return not_ready_deploy def fail_deploy(not_ready_transactions): if len(not_ready_transactions) > 0: cluster_info_template = "\n\tCluster ID: {cluster}{info}\n" task_details_template = ( "\n" "\t\t\tTask name: {deployment_graph_task_name}\n" "\t\t\t\tStatus: {status}\n" "\t\t\t\tStart: {time_start}\n" "\t\t\t\tEnd: {time_end}\n" ) failure_text = 'Not all deployments tasks completed: {}'.format( ''.join( cluster_info_template.format( cluster=cluster, info="".join( "\n\t\tNode: {node_id}{details}\n".format( node_id=node_id, details="".join( task_details_template.format(**task) for task in sorted( tasks, key=lambda item: item['status']) )) for node_id, tasks in sorted(records.items()) )) for cluster, records in sorted(not_ready_transactions.items()) )) logger.error(failure_text) assert_true(len(not_ready_transactions) == 0, failure_text)