Disable UMM before nodes repetitive restart

The problem is that after third reboot nodes are going
into maintenance mode and became unavailable for further
testing. We need disable UMM feature to prevent such behaviour.

Change-Id: I1cce936201872f47d13e3c482e23e1ba4cfc24b2
Closes-Bug: #1588877
This commit is contained in:
Maksym Strukov 2016-06-13 21:49:07 +03:00
parent 8b3f7db3b5
commit e21c9a6099
5 changed files with 98 additions and 60 deletions

View File

@ -21,6 +21,11 @@ Cloud-image
.. automodule:: fuelweb_test.helpers.cloud_image
:members:
CIC Maintenance Mode
--------------------
.. automodule:: fuelweb_test.helpers.cic_maintenance_mode
:members:
Common
------
.. automodule:: fuelweb_test.helpers.common

View File

@ -16,10 +16,10 @@ from __future__ import division
import hashlib
import json
import os
import re
from time import sleep
import os
import re
from devops.error import TimeoutError
from devops.helpers.helpers import _wait
from devops.helpers.helpers import wait
@ -28,6 +28,7 @@ from netaddr import IPNetwork
from proboscis.asserts import assert_equal
from proboscis.asserts import assert_false
from proboscis.asserts import assert_true
# pylint: disable=import-error
from six.moves.urllib.error import HTTPError
from six.moves.urllib.error import URLError
@ -968,24 +969,6 @@ def check_neutron_dhcp_lease(ip, instance_ip, instance_mac,
return ' ack ' in lease
def check_available_mode(ip):
command = ('umm status | grep runlevel &>/dev/null && echo "True" '
'|| echo "False"')
if ssh_manager.execute(ip, command)['exit_code'] == 0:
return ''.join(ssh_manager.execute(ip, command)['stdout']).strip()
else:
return ''.join(ssh_manager.execute(ip, command)['stderr']).strip()
def check_auto_mode(ip):
command = ('umm status | grep umm &>/dev/null && echo "True" '
'|| echo "False"')
if ssh_manager.execute(ip, command)['exit_code'] == 0:
return ''.join(ssh_manager.execute(ip, command)['stdout']).strip()
else:
return ''.join(ssh_manager.execute(ip, command)['stderr']).strip()
def is_ntpd_active(ip, ntpd_ip):
cmd = 'ntpdate -d -p 4 -t 0.2 -u {0}'.format(ntpd_ip)
return not ssh_manager.execute(ip, cmd)['exit_code']

View File

@ -0,0 +1,53 @@
# Copyright 2016 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from fuelweb_test import logwrap
from fuelweb_test.helpers.ssh_manager import SSHManager
ssh_manager = SSHManager()
@logwrap
def change_config(ip, umm=True, reboot_count=2, counter_reset_time=10):
umm_string = 'yes' if umm else 'no'
cmd = ("echo -e 'UMM={0}\n"
"REBOOT_COUNT={1}\n"
"COUNTER_RESET_TIME={2}' > /etc/umm.conf".format(umm_string,
reboot_count,
counter_reset_time)
)
result = ssh_manager.execute(
ip=ip,
cmd=cmd
)
return result
def check_available_mode(ip):
command = ('umm status | grep runlevel &>/dev/null && echo "True" '
'|| echo "False"')
if ssh_manager.execute(ip, command)['exit_code'] == 0:
return ''.join(ssh_manager.execute(ip, command)['stdout']).strip()
else:
return ''.join(ssh_manager.execute(ip, command)['stderr']).strip()
def check_auto_mode(ip):
command = ('umm status | grep umm &>/dev/null && echo "True" '
'|| echo "False"')
if ssh_manager.execute(ip, command)['exit_code'] == 0:
return ''.join(ssh_manager.execute(ip, command)['stdout']).strip()
else:
return ''.join(ssh_manager.execute(ip, command)['stderr']).strip()

View File

@ -20,6 +20,9 @@ from proboscis import test
from fuelweb_test.helpers import checkers
from fuelweb_test.helpers.decorators import log_snapshot_after_test
from fuelweb_test.helpers.cic_maintenance_mode import change_config
from fuelweb_test.helpers.cic_maintenance_mode import check_auto_mode
from fuelweb_test.helpers.cic_maintenance_mode import check_available_mode
from fuelweb_test import logger
from fuelweb_test import ostf_test_mapping
from fuelweb_test import settings
@ -105,7 +108,7 @@ class CICMaintenanceMode(TestBasic):
_ip = regular_ctrl['ip']
_id = regular_ctrl['id']
logger.info('Maintenance mode for node-{0}'.format(_id))
asserts.assert_true('True' in checkers.check_available_mode(_ip),
asserts.assert_true('True' in check_available_mode(_ip),
"Maintenance mode is not available")
self.ssh_manager.execute_on_remote(
ip=_ip,
@ -130,7 +133,7 @@ class CICMaintenanceMode(TestBasic):
"Host {0} is not reachable by ping during 600 sec"
.format(_ip))
asserts.assert_true('True' in checkers.check_auto_mode(_ip),
asserts.assert_true('True' in check_auto_mode(_ip),
"Maintenance mode is not switched on")
self.ssh_manager.execute_on_remote(
@ -208,15 +211,10 @@ class CICMaintenanceMode(TestBasic):
_ip = regular_ctrl['ip']
_id = regular_ctrl['id']
asserts.assert_true('True' in checkers.check_available_mode(_ip),
asserts.assert_true('True' in check_available_mode(_ip),
"Maintenance mode is not available")
command1 = ("echo -e 'UMM=yes\nREBOOT_COUNT=0\n"
"COUNTER_RESET_TIME=10' > /etc/umm.conf")
self.ssh_manager.execute_on_remote(
ip=_ip,
cmd=command1)
change_config(_ip, reboot_count=0)
logger.info('Change UMM.CONF on node-{0}'
.format(_id))
@ -224,11 +222,11 @@ class CICMaintenanceMode(TestBasic):
logger.info('Unexpected reboot on node-{0}'
.format(_id))
command2 = 'reboot --force >/dev/null & '
command = 'reboot --force >/dev/null & '
self.ssh_manager.execute_on_remote(
ip=_ip,
cmd=command2)
cmd=command)
wait(lambda:
not checkers.check_ping(self.env.get_admin_node_ip(),
@ -253,7 +251,7 @@ class CICMaintenanceMode(TestBasic):
"Host {0} is not reachable by ping during 600 sec"
.format(_ip))
asserts.assert_true('True' in checkers.check_auto_mode(_ip),
asserts.assert_true('True' in check_auto_mode(_ip),
"Maintenance mode is not switched on")
logger.info('turn off Maintenance mode')
@ -261,12 +259,8 @@ class CICMaintenanceMode(TestBasic):
ip=_ip,
cmd="umm off")
time.sleep(30)
command3 = ("echo -e 'UMM=yes\nREBOOT_COUNT=2\n"
"COUNTER_RESET_TIME=10' > /etc/umm.conf")
self.ssh_manager.execute_on_remote(
ip=_ip,
cmd=command3)
change_config(_ip)
logger.info('Wait a node-{0} online status'
.format(_id))
@ -341,13 +335,13 @@ class CICMaintenanceMode(TestBasic):
_ip = regular_ctrl['ip']
_id = regular_ctrl['id']
asserts.assert_true('True' in checkers.check_available_mode(_ip),
asserts.assert_true('True' in check_available_mode(_ip),
"Maintenance mode is not available")
self.ssh_manager.execute_on_remote(
ip=_ip,
cmd="umm disable")
asserts.assert_false('True' in checkers.check_available_mode(_ip),
asserts.assert_false('True' in check_available_mode(_ip),
"Maintenance mode should not be available")
logger.info('Try to execute maintenance mode '
@ -406,27 +400,23 @@ class CICMaintenanceMode(TestBasic):
_ip = regular_ctrl['ip']
_id = regular_ctrl['id']
asserts.assert_true('True' in checkers.check_available_mode(_ip),
asserts.assert_true('True' in check_available_mode(_ip),
"Maintenance mode is not available")
logger.info('Disable UMM on node-{0}'.format(_id))
command1 = ("echo -e 'UMM=no\nREBOOT_COUNT=0\n"
"COUNTER_RESET_TIME=10' > /etc/umm.conf")
self.ssh_manager.execute_on_remote(
ip=_ip,
cmd=command1)
change_config(_ip, umm=False, reboot_count=0)
asserts.assert_false('True' in checkers.check_available_mode(_ip),
asserts.assert_false('True' in check_available_mode(_ip),
"Maintenance mode should not be available")
command2 = 'reboot --force >/dev/null & '
command = 'reboot --force >/dev/null & '
logger.info('Unexpected reboot on node-{0}'
.format(_id))
self.ssh_manager.execute_on_remote(
ip=_ip,
cmd=command2)
cmd=command)
wait(lambda:
not checkers.check_ping(self.env.get_admin_node_ip(),
@ -451,7 +441,7 @@ class CICMaintenanceMode(TestBasic):
logger.info('Check that node-{0} not in maintenance mode after'
' unexpected reboot'.format(_id))
asserts.assert_false('True' in checkers.check_auto_mode(_ip),
asserts.assert_false('True' in check_auto_mode(_ip),
"Maintenance mode should not switched")
# Wait until MySQL Galera is UP on some controller

View File

@ -23,6 +23,7 @@ from six.moves import xrange
from fuelweb_test import logger
from fuelweb_test import ostf_test_mapping
from fuelweb_test import settings
from fuelweb_test.helpers.cic_maintenance_mode import change_config
from fuelweb_test.helpers.decorators import log_snapshot_after_test
from fuelweb_test.helpers.decorators import setup_teardown
from fuelweb_test.helpers.rally import RallyBenchmarkTest
@ -55,12 +56,13 @@ class RepetitiveRestart(TestLoadBase):
4. Run ostf
5. Fill ceph partitions on all nodes up to 30%
6. Check Ceph status
7. Run RALLY
8. 100 times repetitive reboot:
9. Cold restart of all nodes
10. Wait for HA services ready
11. Wait until MySQL Galera is UP on some controller
12. Run ostf
7. Disable UMM
8. Run RALLY
9. 100 times repetitive reboot:
10. Cold restart of all nodes
11. Wait for HA services ready
12. Wait until MySQL Galera is UP on some controller
13. Run ostf
Duration 1700m
Snapshot ceph_partitions_repetitive_cold_restart
@ -94,6 +96,11 @@ class RepetitiveRestart(TestLoadBase):
self.fuel_web.check_ceph_status(cluster_id)
self.show_step(7)
for node in self.fuel_web.client.list_cluster_nodes(cluster_id):
change_config(node['ip'], umm=False)
self.show_step(8)
assert_true(settings.PATCHING_RUN_RALLY,
'PATCHING_RUN_RALLY was not set in true')
rally_benchmarks = {}
@ -108,9 +115,9 @@ class RepetitiveRestart(TestLoadBase):
benchmark_results[tag] = rally_benchmarks[tag].run()
logger.debug(benchmark_results[tag].show())
self.show_step(8)
self.show_step(9)
for i in xrange(settings.RESTART_COUNT):
self.show_step(9, 'number {}'.format(i + 1), initialize=True)
self.show_step(10, 'number {}'.format(i + 1), initialize=True)
self.fuel_web.cold_restart_nodes(
self.env.d_env.get_nodes(name__in=[
'slave-01',
@ -119,12 +126,12 @@ class RepetitiveRestart(TestLoadBase):
'slave-04',
'slave-05']))
self.show_step(10)
self.show_step(11)
self.fuel_web.assert_ha_services_ready(cluster_id)
self.fuel_web.assert_os_services_ready(cluster_id)
self.show_step(11)
self.show_step(12)
self.fuel_web.wait_mysql_galera_is_up([primary_controller.name])
try:
@ -141,7 +148,7 @@ class RepetitiveRestart(TestLoadBase):
cluster_id, test_sets=['smoke'],
test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
'Create volume and attach it to instance'))
self.show_step(12)
self.show_step(13)
# LB 1519018
self.fuel_web.run_ostf(cluster_id=cluster_id)
self.env.make_snapshot("ceph_partitions_repetitive_cold_restart")