From e21c9a6099b8a0f56fd0314816c5ad7921b8c724 Mon Sep 17 00:00:00 2001 From: Maksym Strukov Date: Mon, 13 Jun 2016 21:49:07 +0300 Subject: [PATCH] Disable UMM before nodes repetitive restart The problem is that after third reboot nodes are going into maintenance mode and became unavailable for further testing. We need disable UMM feature to prevent such behaviour. Change-Id: I1cce936201872f47d13e3c482e23e1ba4cfc24b2 Closes-Bug: #1588877 --- doc/helpers.rst | 5 ++ fuelweb_test/helpers/checkers.py | 23 ++------ fuelweb_test/helpers/cic_maintenance_mode.py | 53 +++++++++++++++++++ .../test_cic_maintenance_mode.py | 48 +++++++---------- .../tests_strength/test_repetitive_restart.py | 29 ++++++---- 5 files changed, 98 insertions(+), 60 deletions(-) create mode 100644 fuelweb_test/helpers/cic_maintenance_mode.py diff --git a/doc/helpers.rst b/doc/helpers.rst index 407db2282..1d8538bf6 100644 --- a/doc/helpers.rst +++ b/doc/helpers.rst @@ -21,6 +21,11 @@ Cloud-image .. automodule:: fuelweb_test.helpers.cloud_image :members: +CIC Maintenance Mode +-------------------- +.. automodule:: fuelweb_test.helpers.cic_maintenance_mode + :members: + Common ------ .. automodule:: fuelweb_test.helpers.common diff --git a/fuelweb_test/helpers/checkers.py b/fuelweb_test/helpers/checkers.py index d69b68acc..83e56145d 100644 --- a/fuelweb_test/helpers/checkers.py +++ b/fuelweb_test/helpers/checkers.py @@ -16,10 +16,10 @@ from __future__ import division import hashlib import json -import os -import re from time import sleep +import os +import re from devops.error import TimeoutError from devops.helpers.helpers import _wait from devops.helpers.helpers import wait @@ -28,6 +28,7 @@ from netaddr import IPNetwork from proboscis.asserts import assert_equal from proboscis.asserts import assert_false from proboscis.asserts import assert_true + # pylint: disable=import-error from six.moves.urllib.error import HTTPError from six.moves.urllib.error import URLError @@ -968,24 +969,6 @@ def check_neutron_dhcp_lease(ip, instance_ip, instance_mac, return ' ack ' in lease -def check_available_mode(ip): - command = ('umm status | grep runlevel &>/dev/null && echo "True" ' - '|| echo "False"') - if ssh_manager.execute(ip, command)['exit_code'] == 0: - return ''.join(ssh_manager.execute(ip, command)['stdout']).strip() - else: - return ''.join(ssh_manager.execute(ip, command)['stderr']).strip() - - -def check_auto_mode(ip): - command = ('umm status | grep umm &>/dev/null && echo "True" ' - '|| echo "False"') - if ssh_manager.execute(ip, command)['exit_code'] == 0: - return ''.join(ssh_manager.execute(ip, command)['stdout']).strip() - else: - return ''.join(ssh_manager.execute(ip, command)['stderr']).strip() - - def is_ntpd_active(ip, ntpd_ip): cmd = 'ntpdate -d -p 4 -t 0.2 -u {0}'.format(ntpd_ip) return not ssh_manager.execute(ip, cmd)['exit_code'] diff --git a/fuelweb_test/helpers/cic_maintenance_mode.py b/fuelweb_test/helpers/cic_maintenance_mode.py new file mode 100644 index 000000000..c630dbacd --- /dev/null +++ b/fuelweb_test/helpers/cic_maintenance_mode.py @@ -0,0 +1,53 @@ +# Copyright 2016 Mirantis, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from fuelweb_test import logwrap +from fuelweb_test.helpers.ssh_manager import SSHManager + + +ssh_manager = SSHManager() + + +@logwrap +def change_config(ip, umm=True, reboot_count=2, counter_reset_time=10): + umm_string = 'yes' if umm else 'no' + cmd = ("echo -e 'UMM={0}\n" + "REBOOT_COUNT={1}\n" + "COUNTER_RESET_TIME={2}' > /etc/umm.conf".format(umm_string, + reboot_count, + counter_reset_time) + ) + result = ssh_manager.execute( + ip=ip, + cmd=cmd + ) + return result + + +def check_available_mode(ip): + command = ('umm status | grep runlevel &>/dev/null && echo "True" ' + '|| echo "False"') + if ssh_manager.execute(ip, command)['exit_code'] == 0: + return ''.join(ssh_manager.execute(ip, command)['stdout']).strip() + else: + return ''.join(ssh_manager.execute(ip, command)['stderr']).strip() + + +def check_auto_mode(ip): + command = ('umm status | grep umm &>/dev/null && echo "True" ' + '|| echo "False"') + if ssh_manager.execute(ip, command)['exit_code'] == 0: + return ''.join(ssh_manager.execute(ip, command)['stdout']).strip() + else: + return ''.join(ssh_manager.execute(ip, command)['stderr']).strip() diff --git a/fuelweb_test/tests/tests_strength/test_cic_maintenance_mode.py b/fuelweb_test/tests/tests_strength/test_cic_maintenance_mode.py index e33990ab6..b7d650173 100644 --- a/fuelweb_test/tests/tests_strength/test_cic_maintenance_mode.py +++ b/fuelweb_test/tests/tests_strength/test_cic_maintenance_mode.py @@ -20,6 +20,9 @@ from proboscis import test from fuelweb_test.helpers import checkers from fuelweb_test.helpers.decorators import log_snapshot_after_test +from fuelweb_test.helpers.cic_maintenance_mode import change_config +from fuelweb_test.helpers.cic_maintenance_mode import check_auto_mode +from fuelweb_test.helpers.cic_maintenance_mode import check_available_mode from fuelweb_test import logger from fuelweb_test import ostf_test_mapping from fuelweb_test import settings @@ -105,7 +108,7 @@ class CICMaintenanceMode(TestBasic): _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] logger.info('Maintenance mode for node-{0}'.format(_id)) - asserts.assert_true('True' in checkers.check_available_mode(_ip), + asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") self.ssh_manager.execute_on_remote( ip=_ip, @@ -130,7 +133,7 @@ class CICMaintenanceMode(TestBasic): "Host {0} is not reachable by ping during 600 sec" .format(_ip)) - asserts.assert_true('True' in checkers.check_auto_mode(_ip), + asserts.assert_true('True' in check_auto_mode(_ip), "Maintenance mode is not switched on") self.ssh_manager.execute_on_remote( @@ -208,15 +211,10 @@ class CICMaintenanceMode(TestBasic): _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] - asserts.assert_true('True' in checkers.check_available_mode(_ip), + asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") - command1 = ("echo -e 'UMM=yes\nREBOOT_COUNT=0\n" - "COUNTER_RESET_TIME=10' > /etc/umm.conf") - - self.ssh_manager.execute_on_remote( - ip=_ip, - cmd=command1) + change_config(_ip, reboot_count=0) logger.info('Change UMM.CONF on node-{0}' .format(_id)) @@ -224,11 +222,11 @@ class CICMaintenanceMode(TestBasic): logger.info('Unexpected reboot on node-{0}' .format(_id)) - command2 = 'reboot --force >/dev/null & ' + command = 'reboot --force >/dev/null & ' self.ssh_manager.execute_on_remote( ip=_ip, - cmd=command2) + cmd=command) wait(lambda: not checkers.check_ping(self.env.get_admin_node_ip(), @@ -253,7 +251,7 @@ class CICMaintenanceMode(TestBasic): "Host {0} is not reachable by ping during 600 sec" .format(_ip)) - asserts.assert_true('True' in checkers.check_auto_mode(_ip), + asserts.assert_true('True' in check_auto_mode(_ip), "Maintenance mode is not switched on") logger.info('turn off Maintenance mode') @@ -261,12 +259,8 @@ class CICMaintenanceMode(TestBasic): ip=_ip, cmd="umm off") time.sleep(30) - command3 = ("echo -e 'UMM=yes\nREBOOT_COUNT=2\n" - "COUNTER_RESET_TIME=10' > /etc/umm.conf") - self.ssh_manager.execute_on_remote( - ip=_ip, - cmd=command3) + change_config(_ip) logger.info('Wait a node-{0} online status' .format(_id)) @@ -341,13 +335,13 @@ class CICMaintenanceMode(TestBasic): _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] - asserts.assert_true('True' in checkers.check_available_mode(_ip), + asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") self.ssh_manager.execute_on_remote( ip=_ip, cmd="umm disable") - asserts.assert_false('True' in checkers.check_available_mode(_ip), + asserts.assert_false('True' in check_available_mode(_ip), "Maintenance mode should not be available") logger.info('Try to execute maintenance mode ' @@ -406,27 +400,23 @@ class CICMaintenanceMode(TestBasic): _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] - asserts.assert_true('True' in checkers.check_available_mode(_ip), + asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") logger.info('Disable UMM on node-{0}'.format(_id)) - command1 = ("echo -e 'UMM=no\nREBOOT_COUNT=0\n" - "COUNTER_RESET_TIME=10' > /etc/umm.conf") - self.ssh_manager.execute_on_remote( - ip=_ip, - cmd=command1) + change_config(_ip, umm=False, reboot_count=0) - asserts.assert_false('True' in checkers.check_available_mode(_ip), + asserts.assert_false('True' in check_available_mode(_ip), "Maintenance mode should not be available") - command2 = 'reboot --force >/dev/null & ' + command = 'reboot --force >/dev/null & ' logger.info('Unexpected reboot on node-{0}' .format(_id)) self.ssh_manager.execute_on_remote( ip=_ip, - cmd=command2) + cmd=command) wait(lambda: not checkers.check_ping(self.env.get_admin_node_ip(), @@ -451,7 +441,7 @@ class CICMaintenanceMode(TestBasic): logger.info('Check that node-{0} not in maintenance mode after' ' unexpected reboot'.format(_id)) - asserts.assert_false('True' in checkers.check_auto_mode(_ip), + asserts.assert_false('True' in check_auto_mode(_ip), "Maintenance mode should not switched") # Wait until MySQL Galera is UP on some controller diff --git a/fuelweb_test/tests/tests_strength/test_repetitive_restart.py b/fuelweb_test/tests/tests_strength/test_repetitive_restart.py index 9c5c412cd..5125c77b8 100644 --- a/fuelweb_test/tests/tests_strength/test_repetitive_restart.py +++ b/fuelweb_test/tests/tests_strength/test_repetitive_restart.py @@ -23,6 +23,7 @@ from six.moves import xrange from fuelweb_test import logger from fuelweb_test import ostf_test_mapping from fuelweb_test import settings +from fuelweb_test.helpers.cic_maintenance_mode import change_config from fuelweb_test.helpers.decorators import log_snapshot_after_test from fuelweb_test.helpers.decorators import setup_teardown from fuelweb_test.helpers.rally import RallyBenchmarkTest @@ -55,12 +56,13 @@ class RepetitiveRestart(TestLoadBase): 4. Run ostf 5. Fill ceph partitions on all nodes up to 30% 6. Check Ceph status - 7. Run RALLY - 8. 100 times repetitive reboot: - 9. Cold restart of all nodes - 10. Wait for HA services ready - 11. Wait until MySQL Galera is UP on some controller - 12. Run ostf + 7. Disable UMM + 8. Run RALLY + 9. 100 times repetitive reboot: + 10. Cold restart of all nodes + 11. Wait for HA services ready + 12. Wait until MySQL Galera is UP on some controller + 13. Run ostf Duration 1700m Snapshot ceph_partitions_repetitive_cold_restart @@ -94,6 +96,11 @@ class RepetitiveRestart(TestLoadBase): self.fuel_web.check_ceph_status(cluster_id) self.show_step(7) + + for node in self.fuel_web.client.list_cluster_nodes(cluster_id): + change_config(node['ip'], umm=False) + + self.show_step(8) assert_true(settings.PATCHING_RUN_RALLY, 'PATCHING_RUN_RALLY was not set in true') rally_benchmarks = {} @@ -108,9 +115,9 @@ class RepetitiveRestart(TestLoadBase): benchmark_results[tag] = rally_benchmarks[tag].run() logger.debug(benchmark_results[tag].show()) - self.show_step(8) + self.show_step(9) for i in xrange(settings.RESTART_COUNT): - self.show_step(9, 'number {}'.format(i + 1), initialize=True) + self.show_step(10, 'number {}'.format(i + 1), initialize=True) self.fuel_web.cold_restart_nodes( self.env.d_env.get_nodes(name__in=[ 'slave-01', @@ -119,12 +126,12 @@ class RepetitiveRestart(TestLoadBase): 'slave-04', 'slave-05'])) - self.show_step(10) + self.show_step(11) self.fuel_web.assert_ha_services_ready(cluster_id) self.fuel_web.assert_os_services_ready(cluster_id) - self.show_step(11) + self.show_step(12) self.fuel_web.wait_mysql_galera_is_up([primary_controller.name]) try: @@ -141,7 +148,7 @@ class RepetitiveRestart(TestLoadBase): cluster_id, test_sets=['smoke'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'Create volume and attach it to instance')) - self.show_step(12) + self.show_step(13) # LB 1519018 self.fuel_web.run_ostf(cluster_id=cluster_id) self.env.make_snapshot("ceph_partitions_repetitive_cold_restart")