fuel-qa/fuelweb_test/tests/tests_strength/test_failover_group_1.py

407 lines
15 KiB
Python

# Copyright 2015 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from devops.helpers.helpers import wait
from proboscis import test
from proboscis.asserts import assert_true, assert_equal
from fuelweb_test import logger
from fuelweb_test import settings
from fuelweb_test.helpers import os_actions
from fuelweb_test.helpers.decorators import log_snapshot_after_test
from fuelweb_test.tests.base_test_case import TestBasic
@test(groups=['failover_group_1'])
class FailoverGroup1(TestBasic):
"""FailoverGroup1""" # TODO documentation
@test(depends_on_groups=['prepare_slaves_5'],
groups=['deploy_ha_cinder'])
@log_snapshot_after_test
def deploy_ha_cinder(self):
"""Deploy environment with 3 controllers, Cinder and NeutronVLAN
Scenario:
1. Create environment with Cinder for storage and Neutron VLAN
2. Add 3 controller, 2 compute+cinder nodes
3. Verify networks
4. Deploy environment
5. Verify networks
6. Run OSTF tests
Duration 120m
Snapshot deploy_ha_cinder
"""
self.check_run('deploy_ha_cinder')
self.env.revert_snapshot('ready_with_5_slaves')
self.show_step(1, initialize=True)
data = {
'tenant': 'failover',
'user': 'failover',
'password': 'failover',
"net_provider": 'neutron',
"net_segment_type": settings.NEUTRON_SEGMENT['vlan'],
}
cluster_id = self.fuel_web.create_cluster(
name=self.__class__.__name__,
settings=data
)
self.show_step(2)
self.fuel_web.update_nodes(
cluster_id,
{
'slave-01': ['controller'],
'slave-02': ['controller'],
'slave-03': ['controller'],
'slave-04': ['compute', 'cinder'],
'slave-05': ['compute', 'cinder'],
}
)
self.show_step(3)
self.fuel_web.verify_network(cluster_id)
self.show_step(4)
self.fuel_web.deploy_cluster_wait(cluster_id)
self.show_step(5)
self.fuel_web.verify_network(cluster_id)
self.show_step(6)
self.fuel_web.run_ostf(cluster_id)
self.env.make_snapshot('deploy_ha_cinder', is_make=True)
@test(depends_on_groups=['deploy_ha_cinder'],
groups=['lock_db_access_from_primary_controller'])
@log_snapshot_after_test
def lock_db_access_from_primary_controller(self):
"""Lock DB access from primary controller
Scenario:
1. Pre-condition - do steps from 'deploy_ha_cinder' test
2. Lock DB access from primary controller
(emulate non-responsiveness of MySQL from the controller
where management VIP located)
3. Verify networks
4. Run HA OSTF tests, check MySQL tests fail
5. Run Smoke and Sanity OSTF tests
Duration 20m
Snapshot lock_db_access_from_primary_controller
"""
self.show_step(1, initialize=True)
self.env.revert_snapshot('deploy_ha_cinder')
self.show_step(2)
cluster_id = self.fuel_web.get_last_created_cluster()
controllers = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
cluster_id, roles=('controller',))
assert_equal(len(controllers), 3,
'Environment does not have 3 controller nodes, '
'found {} nodes!'.format(len(controllers)))
target_controllers = self.fuel_web.get_pacemaker_resource_location(
'slave-01', 'vip__management')
assert_equal(len(target_controllers), 1,
'Expected 1 controller with "vip__management" resource '
'running, found {0}: {1}!'.format(len(target_controllers),
target_controllers))
target_controller = self.fuel_web.get_nailgun_node_by_devops_node(
target_controllers[0])
result = self.ssh_manager.execute(
ip=target_controller['ip'],
cmd='iptables -I OUTPUT -p tcp --dport 4567 -j DROP && '
'iptables -I INPUT -p tcp --dport 4567 -j DROP')
assert_equal(result['exit_code'], 0,
"Lock DB access failed: {0}!".format(result))
self.show_step(3)
self.fuel_web.verify_network(cluster_id)
self.show_step(4)
self.fuel_web.run_ostf(cluster_id, test_sets=['ha'], should_fail=5)
self.show_step(5)
self.fuel_web.run_ostf(cluster_id)
self.env.make_snapshot('lock_db_access_from_primary_controller')
@test(depends_on_groups=['deploy_ha_cinder'],
groups=['recovery_neutron_agents_after_restart'])
@log_snapshot_after_test
def recovery_neutron_agents_after_restart(self):
"""Recovery of neutron agents after restart
Scenario:
1. Pre-condition - do steps from 'deploy_ha_cinder' test
2. Kill neutron agents at all on one of the controllers.
Pacemaker should restart it
2.1 verify output crm status | grep -A1 "clone_p_neutron-l3-agent"
have failed status for controller
2.2 verify neutron-l3-proccess restarted
by ps -aux | grep neutron-l3-agent
2.3 verify output crm status | grep -A1 "clone_p_neutron-l3-agent"
have started status for controller
3. Verify networks
4. Run OSTF tests
Duration 20m
Snapshot recovery_neutron_agents_after_restart
"""
self.show_step(1, initialize=True)
self.env.revert_snapshot('deploy_ha_cinder')
self.show_step(2)
neutron_agents = [
{'name': 'neutron-openvswitch-agent',
'resource': 'neutron-openvswitch-agent'},
{'name': 'neutron-l3-agent',
'resource': 'neutron-l3-agent'},
{'name': 'neutron-dhcp-agent',
'resource': 'neutron-dhcp-agent'},
{'name': 'neutron-metadata-agent',
'resource': 'neutron-metadata-agent'}
]
cluster_id = self.fuel_web.get_last_created_cluster()
controllers = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
cluster_id, roles=('controller',))
assert_equal(len(controllers), 3,
'Environment does not have 3 controller nodes, '
'found {} nodes!'.format(len(controllers)))
for agent in neutron_agents:
target_controllers = self.fuel_web.get_pacemaker_resource_location(
'slave-01', agent['resource'])
assert_true(len(target_controllers) >= 1,
"Didn't find controllers with "
"running {0} on it".format(agent['name']))
target_controller = self.fuel_web.get_nailgun_node_by_devops_node(
target_controllers[0])
old_pids = self.ssh_manager.execute(
target_controller['ip'],
cmd='pgrep -f {}'.format(agent['name']))['stdout']
assert_true(len(old_pids) > 0,
'PIDs of {0} not found on {1}'.format(
agent['name'], target_controller['name']))
logger.debug('Old PIDs of {0} on {1}: {2}'.format(
agent['name'], target_controller['name'], old_pids))
result = self.ssh_manager.execute(
target_controller['ip'],
cmd='pkill -9 -f {}'.format(agent['name']))
assert_equal(result['exit_code'], 0,
'Processes of {0} were not killed on {1}: {2}'.format(
agent['name'], target_controller['name'], result))
wait(lambda: len(self.ssh_manager.execute(
target_controller['ip'],
cmd='pgrep -f {}'.format(agent['name']))['stdout']) > 0,
timeout=60,
timeout_msg='Neutron agent {0} was not recovered on node {1} '
'within 60 seconds!'.format(
agent['name'], target_controller['name']))
new_pids = self.ssh_manager.execute(
target_controller['ip'],
cmd='pgrep -f {}'.format(agent['name']))['stdout']
bad_pids = set(old_pids) & set(new_pids)
assert_equal(len(bad_pids), 0,
'{0} processes with PIDs {1} were not '
'killed on {2}!'.format(agent['name'],
bad_pids,
target_controller['name']))
self.show_step(3)
self.fuel_web.verify_network(cluster_id)
self.show_step(4)
self.fuel_web.run_ostf(cluster_id, test_sets=['ha', 'smoke', 'sanity'])
self.env.make_snapshot('recovery_neutron_agents_after_restart')
@test(depends_on_groups=['deploy_ha_cinder'],
groups=['safe_reboot_primary_controller'])
@log_snapshot_after_test
def safe_reboot_primary_controller(self):
"""Safe reboot of primary controller
Scenario:
1. Pre-condition - do steps from 'deploy_ha_cinder' test
2. Safe reboot of primary controller
3. Wait up to 10 minutes for HA readiness
4. Verify networks
5. Run OSTF tests
Duration: 30 min
Snapshot: safe_reboot_primary_controller
"""
self.show_step(1, initialize=True)
self.env.revert_snapshot('deploy_ha_cinder')
cluster_id = self.fuel_web.get_last_created_cluster()
controllers = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
cluster_id, roles=('controller',))
assert_equal(len(controllers), 3,
'Environment does not have 3 controller nodes, '
'found {} nodes!'.format(len(controllers)))
self.show_step(2)
target_controller = self.fuel_web.get_nailgun_primary_node(
self.fuel_web.get_devops_node_by_nailgun_node(controllers[0]))
self.fuel_web.warm_restart_nodes([target_controller])
self.show_step(3)
self.fuel_web.assert_ha_services_ready(cluster_id, timeout=60 * 10)
self.show_step(4)
self.fuel_web.verify_network(cluster_id)
self.show_step(5)
self.fuel_web.run_ostf(cluster_id)
self.env.make_snapshot('safe_reboot_primary_controller')
@test(depends_on_groups=['deploy_ha_cinder'],
groups=['hard_reset_primary_controller'])
@log_snapshot_after_test
def hard_reset_primary_controller(self):
"""Hard reset of primary controller
Scenario:
1. Pre-condition - do steps from 'deploy_ha_cinder' test
2. Hard reset of primary controller
3. Wait up to 10 minutes for HA readiness
4. Verify networks
5. Run OSTF tests
Duration: 30 min
Snapshot: hard_reset_primary_controller
"""
self.show_step(1, initialize=True)
self.env.revert_snapshot('deploy_ha_cinder')
cluster_id = self.fuel_web.get_last_created_cluster()
controllers = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
cluster_id, roles=('controller',))
assert_equal(len(controllers), 3,
'Environment does not have 3 controller nodes, '
'found {} nodes!'.format(len(controllers)))
self.show_step(2)
target_controller = self.fuel_web.get_nailgun_primary_node(
self.fuel_web.get_devops_node_by_nailgun_node(controllers[0]))
self.fuel_web.cold_restart_nodes([target_controller])
self.show_step(3)
self.fuel_web.assert_ha_services_ready(cluster_id, timeout=60 * 10)
self.show_step(4)
self.fuel_web.verify_network(cluster_id)
self.show_step(5)
self.fuel_web.run_ostf(cluster_id)
self.env.make_snapshot('hard_reset_primary_controller')
@test(depends_on_groups=['deploy_ha_cinder'],
groups=['power_outage_cinder_cluster'])
@log_snapshot_after_test
def power_outage_cinder_cluster(self):
"""Power outage of Neutron vlan, cinder/swift cluster
Scenario:
1. Pre-condition - do steps from 'deploy_ha_cinder' test
2. Create 2 instances
3. Create 2 volumes
4. Attach volumes to instances
5. Fill cinder storage up to 30%
6. Cold shutdown of all nodes
7. Wait 5 min
8. Start of all nodes
9. Wait for HA services ready
10. Verify networks
11. Run OSTF tests
Duration: 30 min
"""
self.show_step(1, initialize=True)
self.env.revert_snapshot('deploy_ha_cinder')
cluster_id = self.fuel_web.get_last_created_cluster()
os_conn = os_actions.OpenStackActions(
self.fuel_web.get_public_vip(cluster_id), 'failover', 'failover',
'failover')
net_name = self.fuel_web.get_cluster_predefined_networks_name(
cluster_id)['private_net']
self.show_step(2)
self.show_step(3)
self.show_step(4)
server = os_conn.create_instance(
neutron_network=True, label=net_name)
volume = os_conn.create_volume()
os_conn.attach_volume(volume, server)
server = os_conn.create_instance(
flavor_name='test_flavor1',
server_name='test_instance1',
neutron_network=True, label=net_name)
vol = os_conn.create_volume()
os_conn.attach_volume(vol, server)
self.show_step(5)
with self.fuel_web.get_ssh_for_node('slave-04') as remote:
file_name = 'test_data'
result = remote.execute(
'lvcreate -n test -L20G cinder')['exit_code']
assert_equal(result, 0, "The file {0} was not "
"allocated".format(file_name))
self.show_step(6)
self.show_step(7)
self.show_step(8)
self.fuel_web.cold_restart_nodes(
self.env.d_env.get_nodes(name__in=[
'slave-01',
'slave-02',
'slave-03',
'slave-04',
'slave-05']), wait_after_destroy=300)
self.show_step(9)
self.fuel_web.assert_ha_services_ready(cluster_id)
self.show_step(10)
self.fuel_web.verify_network(cluster_id)
self.show_step(11)
self.fuel_web.run_ostf(cluster_id=cluster_id)