Disable UMM before nodes repetitive restart

The problem is that after third reboot nodes are going
into maintenance mode and became unavailable for further
testing. We need disable UMM feature to prevent such behaviour.

Change-Id: I1cce936201872f47d13e3c482e23e1ba4cfc24b2
Closes-Bug: #1588877
This commit is contained in:
Maksym Strukov 2016-06-13 21:49:07 +03:00
parent 8b3f7db3b5
commit e21c9a6099
5 changed files with 98 additions and 60 deletions

View File

@ -21,6 +21,11 @@ Cloud-image
.. automodule:: fuelweb_test.helpers.cloud_image .. automodule:: fuelweb_test.helpers.cloud_image
:members: :members:
CIC Maintenance Mode
--------------------
.. automodule:: fuelweb_test.helpers.cic_maintenance_mode
:members:
Common Common
------ ------
.. automodule:: fuelweb_test.helpers.common .. automodule:: fuelweb_test.helpers.common

View File

@ -16,10 +16,10 @@ from __future__ import division
import hashlib import hashlib
import json import json
import os
import re
from time import sleep from time import sleep
import os
import re
from devops.error import TimeoutError from devops.error import TimeoutError
from devops.helpers.helpers import _wait from devops.helpers.helpers import _wait
from devops.helpers.helpers import wait from devops.helpers.helpers import wait
@ -28,6 +28,7 @@ from netaddr import IPNetwork
from proboscis.asserts import assert_equal from proboscis.asserts import assert_equal
from proboscis.asserts import assert_false from proboscis.asserts import assert_false
from proboscis.asserts import assert_true from proboscis.asserts import assert_true
# pylint: disable=import-error # pylint: disable=import-error
from six.moves.urllib.error import HTTPError from six.moves.urllib.error import HTTPError
from six.moves.urllib.error import URLError from six.moves.urllib.error import URLError
@ -968,24 +969,6 @@ def check_neutron_dhcp_lease(ip, instance_ip, instance_mac,
return ' ack ' in lease return ' ack ' in lease
def check_available_mode(ip):
command = ('umm status | grep runlevel &>/dev/null && echo "True" '
'|| echo "False"')
if ssh_manager.execute(ip, command)['exit_code'] == 0:
return ''.join(ssh_manager.execute(ip, command)['stdout']).strip()
else:
return ''.join(ssh_manager.execute(ip, command)['stderr']).strip()
def check_auto_mode(ip):
command = ('umm status | grep umm &>/dev/null && echo "True" '
'|| echo "False"')
if ssh_manager.execute(ip, command)['exit_code'] == 0:
return ''.join(ssh_manager.execute(ip, command)['stdout']).strip()
else:
return ''.join(ssh_manager.execute(ip, command)['stderr']).strip()
def is_ntpd_active(ip, ntpd_ip): def is_ntpd_active(ip, ntpd_ip):
cmd = 'ntpdate -d -p 4 -t 0.2 -u {0}'.format(ntpd_ip) cmd = 'ntpdate -d -p 4 -t 0.2 -u {0}'.format(ntpd_ip)
return not ssh_manager.execute(ip, cmd)['exit_code'] return not ssh_manager.execute(ip, cmd)['exit_code']

View File

@ -0,0 +1,53 @@
# Copyright 2016 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from fuelweb_test import logwrap
from fuelweb_test.helpers.ssh_manager import SSHManager
ssh_manager = SSHManager()
@logwrap
def change_config(ip, umm=True, reboot_count=2, counter_reset_time=10):
umm_string = 'yes' if umm else 'no'
cmd = ("echo -e 'UMM={0}\n"
"REBOOT_COUNT={1}\n"
"COUNTER_RESET_TIME={2}' > /etc/umm.conf".format(umm_string,
reboot_count,
counter_reset_time)
)
result = ssh_manager.execute(
ip=ip,
cmd=cmd
)
return result
def check_available_mode(ip):
command = ('umm status | grep runlevel &>/dev/null && echo "True" '
'|| echo "False"')
if ssh_manager.execute(ip, command)['exit_code'] == 0:
return ''.join(ssh_manager.execute(ip, command)['stdout']).strip()
else:
return ''.join(ssh_manager.execute(ip, command)['stderr']).strip()
def check_auto_mode(ip):
command = ('umm status | grep umm &>/dev/null && echo "True" '
'|| echo "False"')
if ssh_manager.execute(ip, command)['exit_code'] == 0:
return ''.join(ssh_manager.execute(ip, command)['stdout']).strip()
else:
return ''.join(ssh_manager.execute(ip, command)['stderr']).strip()

View File

@ -20,6 +20,9 @@ from proboscis import test
from fuelweb_test.helpers import checkers from fuelweb_test.helpers import checkers
from fuelweb_test.helpers.decorators import log_snapshot_after_test from fuelweb_test.helpers.decorators import log_snapshot_after_test
from fuelweb_test.helpers.cic_maintenance_mode import change_config
from fuelweb_test.helpers.cic_maintenance_mode import check_auto_mode
from fuelweb_test.helpers.cic_maintenance_mode import check_available_mode
from fuelweb_test import logger from fuelweb_test import logger
from fuelweb_test import ostf_test_mapping from fuelweb_test import ostf_test_mapping
from fuelweb_test import settings from fuelweb_test import settings
@ -105,7 +108,7 @@ class CICMaintenanceMode(TestBasic):
_ip = regular_ctrl['ip'] _ip = regular_ctrl['ip']
_id = regular_ctrl['id'] _id = regular_ctrl['id']
logger.info('Maintenance mode for node-{0}'.format(_id)) logger.info('Maintenance mode for node-{0}'.format(_id))
asserts.assert_true('True' in checkers.check_available_mode(_ip), asserts.assert_true('True' in check_available_mode(_ip),
"Maintenance mode is not available") "Maintenance mode is not available")
self.ssh_manager.execute_on_remote( self.ssh_manager.execute_on_remote(
ip=_ip, ip=_ip,
@ -130,7 +133,7 @@ class CICMaintenanceMode(TestBasic):
"Host {0} is not reachable by ping during 600 sec" "Host {0} is not reachable by ping during 600 sec"
.format(_ip)) .format(_ip))
asserts.assert_true('True' in checkers.check_auto_mode(_ip), asserts.assert_true('True' in check_auto_mode(_ip),
"Maintenance mode is not switched on") "Maintenance mode is not switched on")
self.ssh_manager.execute_on_remote( self.ssh_manager.execute_on_remote(
@ -208,15 +211,10 @@ class CICMaintenanceMode(TestBasic):
_ip = regular_ctrl['ip'] _ip = regular_ctrl['ip']
_id = regular_ctrl['id'] _id = regular_ctrl['id']
asserts.assert_true('True' in checkers.check_available_mode(_ip), asserts.assert_true('True' in check_available_mode(_ip),
"Maintenance mode is not available") "Maintenance mode is not available")
command1 = ("echo -e 'UMM=yes\nREBOOT_COUNT=0\n" change_config(_ip, reboot_count=0)
"COUNTER_RESET_TIME=10' > /etc/umm.conf")
self.ssh_manager.execute_on_remote(
ip=_ip,
cmd=command1)
logger.info('Change UMM.CONF on node-{0}' logger.info('Change UMM.CONF on node-{0}'
.format(_id)) .format(_id))
@ -224,11 +222,11 @@ class CICMaintenanceMode(TestBasic):
logger.info('Unexpected reboot on node-{0}' logger.info('Unexpected reboot on node-{0}'
.format(_id)) .format(_id))
command2 = 'reboot --force >/dev/null & ' command = 'reboot --force >/dev/null & '
self.ssh_manager.execute_on_remote( self.ssh_manager.execute_on_remote(
ip=_ip, ip=_ip,
cmd=command2) cmd=command)
wait(lambda: wait(lambda:
not checkers.check_ping(self.env.get_admin_node_ip(), not checkers.check_ping(self.env.get_admin_node_ip(),
@ -253,7 +251,7 @@ class CICMaintenanceMode(TestBasic):
"Host {0} is not reachable by ping during 600 sec" "Host {0} is not reachable by ping during 600 sec"
.format(_ip)) .format(_ip))
asserts.assert_true('True' in checkers.check_auto_mode(_ip), asserts.assert_true('True' in check_auto_mode(_ip),
"Maintenance mode is not switched on") "Maintenance mode is not switched on")
logger.info('turn off Maintenance mode') logger.info('turn off Maintenance mode')
@ -261,12 +259,8 @@ class CICMaintenanceMode(TestBasic):
ip=_ip, ip=_ip,
cmd="umm off") cmd="umm off")
time.sleep(30) time.sleep(30)
command3 = ("echo -e 'UMM=yes\nREBOOT_COUNT=2\n"
"COUNTER_RESET_TIME=10' > /etc/umm.conf")
self.ssh_manager.execute_on_remote( change_config(_ip)
ip=_ip,
cmd=command3)
logger.info('Wait a node-{0} online status' logger.info('Wait a node-{0} online status'
.format(_id)) .format(_id))
@ -341,13 +335,13 @@ class CICMaintenanceMode(TestBasic):
_ip = regular_ctrl['ip'] _ip = regular_ctrl['ip']
_id = regular_ctrl['id'] _id = regular_ctrl['id']
asserts.assert_true('True' in checkers.check_available_mode(_ip), asserts.assert_true('True' in check_available_mode(_ip),
"Maintenance mode is not available") "Maintenance mode is not available")
self.ssh_manager.execute_on_remote( self.ssh_manager.execute_on_remote(
ip=_ip, ip=_ip,
cmd="umm disable") cmd="umm disable")
asserts.assert_false('True' in checkers.check_available_mode(_ip), asserts.assert_false('True' in check_available_mode(_ip),
"Maintenance mode should not be available") "Maintenance mode should not be available")
logger.info('Try to execute maintenance mode ' logger.info('Try to execute maintenance mode '
@ -406,27 +400,23 @@ class CICMaintenanceMode(TestBasic):
_ip = regular_ctrl['ip'] _ip = regular_ctrl['ip']
_id = regular_ctrl['id'] _id = regular_ctrl['id']
asserts.assert_true('True' in checkers.check_available_mode(_ip), asserts.assert_true('True' in check_available_mode(_ip),
"Maintenance mode is not available") "Maintenance mode is not available")
logger.info('Disable UMM on node-{0}'.format(_id)) logger.info('Disable UMM on node-{0}'.format(_id))
command1 = ("echo -e 'UMM=no\nREBOOT_COUNT=0\n" change_config(_ip, umm=False, reboot_count=0)
"COUNTER_RESET_TIME=10' > /etc/umm.conf")
self.ssh_manager.execute_on_remote(
ip=_ip,
cmd=command1)
asserts.assert_false('True' in checkers.check_available_mode(_ip), asserts.assert_false('True' in check_available_mode(_ip),
"Maintenance mode should not be available") "Maintenance mode should not be available")
command2 = 'reboot --force >/dev/null & ' command = 'reboot --force >/dev/null & '
logger.info('Unexpected reboot on node-{0}' logger.info('Unexpected reboot on node-{0}'
.format(_id)) .format(_id))
self.ssh_manager.execute_on_remote( self.ssh_manager.execute_on_remote(
ip=_ip, ip=_ip,
cmd=command2) cmd=command)
wait(lambda: wait(lambda:
not checkers.check_ping(self.env.get_admin_node_ip(), not checkers.check_ping(self.env.get_admin_node_ip(),
@ -451,7 +441,7 @@ class CICMaintenanceMode(TestBasic):
logger.info('Check that node-{0} not in maintenance mode after' logger.info('Check that node-{0} not in maintenance mode after'
' unexpected reboot'.format(_id)) ' unexpected reboot'.format(_id))
asserts.assert_false('True' in checkers.check_auto_mode(_ip), asserts.assert_false('True' in check_auto_mode(_ip),
"Maintenance mode should not switched") "Maintenance mode should not switched")
# Wait until MySQL Galera is UP on some controller # Wait until MySQL Galera is UP on some controller

View File

@ -23,6 +23,7 @@ from six.moves import xrange
from fuelweb_test import logger from fuelweb_test import logger
from fuelweb_test import ostf_test_mapping from fuelweb_test import ostf_test_mapping
from fuelweb_test import settings from fuelweb_test import settings
from fuelweb_test.helpers.cic_maintenance_mode import change_config
from fuelweb_test.helpers.decorators import log_snapshot_after_test from fuelweb_test.helpers.decorators import log_snapshot_after_test
from fuelweb_test.helpers.decorators import setup_teardown from fuelweb_test.helpers.decorators import setup_teardown
from fuelweb_test.helpers.rally import RallyBenchmarkTest from fuelweb_test.helpers.rally import RallyBenchmarkTest
@ -55,12 +56,13 @@ class RepetitiveRestart(TestLoadBase):
4. Run ostf 4. Run ostf
5. Fill ceph partitions on all nodes up to 30% 5. Fill ceph partitions on all nodes up to 30%
6. Check Ceph status 6. Check Ceph status
7. Run RALLY 7. Disable UMM
8. 100 times repetitive reboot: 8. Run RALLY
9. Cold restart of all nodes 9. 100 times repetitive reboot:
10. Wait for HA services ready 10. Cold restart of all nodes
11. Wait until MySQL Galera is UP on some controller 11. Wait for HA services ready
12. Run ostf 12. Wait until MySQL Galera is UP on some controller
13. Run ostf
Duration 1700m Duration 1700m
Snapshot ceph_partitions_repetitive_cold_restart Snapshot ceph_partitions_repetitive_cold_restart
@ -94,6 +96,11 @@ class RepetitiveRestart(TestLoadBase):
self.fuel_web.check_ceph_status(cluster_id) self.fuel_web.check_ceph_status(cluster_id)
self.show_step(7) self.show_step(7)
for node in self.fuel_web.client.list_cluster_nodes(cluster_id):
change_config(node['ip'], umm=False)
self.show_step(8)
assert_true(settings.PATCHING_RUN_RALLY, assert_true(settings.PATCHING_RUN_RALLY,
'PATCHING_RUN_RALLY was not set in true') 'PATCHING_RUN_RALLY was not set in true')
rally_benchmarks = {} rally_benchmarks = {}
@ -108,9 +115,9 @@ class RepetitiveRestart(TestLoadBase):
benchmark_results[tag] = rally_benchmarks[tag].run() benchmark_results[tag] = rally_benchmarks[tag].run()
logger.debug(benchmark_results[tag].show()) logger.debug(benchmark_results[tag].show())
self.show_step(8) self.show_step(9)
for i in xrange(settings.RESTART_COUNT): for i in xrange(settings.RESTART_COUNT):
self.show_step(9, 'number {}'.format(i + 1), initialize=True) self.show_step(10, 'number {}'.format(i + 1), initialize=True)
self.fuel_web.cold_restart_nodes( self.fuel_web.cold_restart_nodes(
self.env.d_env.get_nodes(name__in=[ self.env.d_env.get_nodes(name__in=[
'slave-01', 'slave-01',
@ -119,12 +126,12 @@ class RepetitiveRestart(TestLoadBase):
'slave-04', 'slave-04',
'slave-05'])) 'slave-05']))
self.show_step(10) self.show_step(11)
self.fuel_web.assert_ha_services_ready(cluster_id) self.fuel_web.assert_ha_services_ready(cluster_id)
self.fuel_web.assert_os_services_ready(cluster_id) self.fuel_web.assert_os_services_ready(cluster_id)
self.show_step(11) self.show_step(12)
self.fuel_web.wait_mysql_galera_is_up([primary_controller.name]) self.fuel_web.wait_mysql_galera_is_up([primary_controller.name])
try: try:
@ -141,7 +148,7 @@ class RepetitiveRestart(TestLoadBase):
cluster_id, test_sets=['smoke'], cluster_id, test_sets=['smoke'],
test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
'Create volume and attach it to instance')) 'Create volume and attach it to instance'))
self.show_step(12) self.show_step(13)
# LB 1519018 # LB 1519018
self.fuel_web.run_ostf(cluster_id=cluster_id) self.fuel_web.run_ostf(cluster_id=cluster_id)
self.env.make_snapshot("ceph_partitions_repetitive_cold_restart") self.env.make_snapshot("ceph_partitions_repetitive_cold_restart")