fuel-qa/fuelweb_test/tests/test_multiqueue.py

354 lines
14 KiB
Python

# Copyright 2016 Mirantis, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import division
from __future__ import unicode_literals
import random
from devops.helpers import helpers as devops_helpers
from devops.helpers.ssh_client import SSHAuth
from proboscis import test
from proboscis.asserts import assert_equal
from proboscis.asserts import assert_true
from fuelweb_test import logger
from fuelweb_test import settings
from fuelweb_test.helpers import os_actions
from fuelweb_test.helpers.checkers import enable_feature_group
from fuelweb_test.helpers.decorators import log_snapshot_after_test
from fuelweb_test.tests.base_test_case import SetupEnvironment
from fuelweb_test.tests.base_test_case import TestBasic
@test
class TestMultiqueue(TestBasic):
def __init__(self):
super(TestMultiqueue, self).__init__()
assert_true(settings.KVM_USE, "Multiqueue feature requires "
"KVM_USE=true env variable!")
assert_true(settings.HARDWARE["slave_node_cpu"] > 1,
"Multiqueue feature requires more than 1 cpu for "
"enabling queues!")
@test(depends_on=[SetupEnvironment.prepare_slaves_3],
groups=["multiqueue_base_check"])
@log_snapshot_after_test
def multiqueue_base_check(self):
"""Deploy non-HA cluster for base multiqueue check
Scenario:
1. Create cluster
2. Add 1 node with controller role and
1 node with compute+cinder role
3. Deploy the cluster
4. Run network verification
5. Run OSTF
6. Edit TestVM metadata - add hw_vif_multiqueue_enabled=true
7. Create flavor with all available VCPUs
8. Boot instance from TestVM image and new flavor
9. Assign floating IP
10. Enable queues in instance
11. Check that queues was created
11. Check instance availability
"""
self.env.revert_snapshot("ready_with_3_slaves")
self.show_step(1)
cluster_id = self.fuel_web.create_cluster(
name=self.__class__.__name__,
)
self.show_step(self.next_step)
self.fuel_web.update_nodes(
cluster_id,
{
'slave-01': ['controller'],
'slave-02': ['compute', 'cinder'],
}
)
self.show_step(self.next_step)
self.fuel_web.deploy_cluster_wait(cluster_id)
self.show_step(self.next_step)
self.fuel_web.verify_network(cluster_id)
self.show_step(self.next_step)
self.fuel_web.run_ostf(cluster_id=cluster_id)
# update image's metadata
self.show_step(self.next_step)
os_conn = os_actions.OpenStackActions(
self.fuel_web.get_public_vip(cluster_id))
test_vm_image = os_conn.glance.images.find(name="TestVM")
test_vm_image.update(properties={'hw_vif_multiqueue_enabled': True})
nova_compute = os_conn.nova.hypervisors.list().pop()
vcpus = nova_compute.vcpus
# create flavor
self.show_step(self.next_step)
flavor_id = random.randint(10, 10000)
name = 'system_test-{}'.format(random.randint(10, 10000))
os_conn.create_flavor(name=name, ram=64,
vcpus=vcpus, disk=1,
flavorid=flavor_id)
self.show_step(self.next_step)
net_name = self.fuel_web.get_cluster_predefined_networks_name(
cluster_id)['private_net']
server = os_conn.create_server_for_migration(neutron=True,
label=net_name,
flavor_id=flavor_id)
os_conn.verify_instance_status(server, 'ACTIVE')
self.show_step(self.next_step)
floating_ip = os_conn.assign_floating_ip(server)
logger.info("Floating address {0} associated with instance {1}"
.format(floating_ip.ip, server.id))
logger.info("Wait for ping from instance {} "
"by floating ip".format(server.id))
devops_helpers.wait(
lambda: devops_helpers.tcp_ping(floating_ip.ip, 22),
timeout=300,
timeout_msg=("Instance {0} is unreachable for {1} seconds".
format(server.id, 300)))
cirros_auth = SSHAuth(**settings.SSH_IMAGE_CREDENTIALS)
slave_01_ssh = self.fuel_web.get_ssh_for_node("slave-01")
self.show_step(self.next_step)
result = slave_01_ssh.execute_through_host(
hostname=floating_ip.ip,
cmd="sudo /sbin/ethtool -L eth0 combined {}".format(vcpus),
auth=cirros_auth)
assert_equal(
result.exit_code, 0,
"Enabling queues using ethtool failed!\n{}".format(result))
self.show_step(self.next_step)
result = slave_01_ssh.execute_through_host(
hostname=floating_ip.ip,
cmd="ls /sys/class/net/eth0/queues",
auth=cirros_auth
)
assert_equal(result.stdout_str.count("rx"), vcpus,
"RX queues count is not equal to vcpus count")
assert_equal(result.stdout_str.count("tx"), vcpus,
"TX queues count is not equal to vcpus count")
@staticmethod
def check_cpu_pinning_requirements():
assert_true(settings.KVM_USE)
assert_true(settings.ACPI_ENABLE)
assert_true(settings.HARDWARE['slave_node_cpu'] >= 6)
assert_true(settings.HARDWARE['numa_nodes'] >= 2)
def multiqueue_with_dpdk_and_numa(self, neutron_seg_type):
self.check_cpu_pinning_requirements()
self.env.revert_snapshot("ready_with_5_slaves")
self.env.bootstrap_nodes([self.env.d_env.get_node(name='slave-06')])
self.show_step(1)
enable_feature_group(self.env, 'experimental')
cluster_id = self.fuel_web.create_cluster(
name=self.__class__.__name__,
settings={
"net_provider": settings.NEUTRON,
"net_segment_type": neutron_seg_type,
'volumes_lvm': False,
'volumes_ceph': True,
'images_ceph': True,
'objects_ceph': True}
)
self.show_step(self.next_step)
self.fuel_web.update_nodes(
cluster_id,
{
'slave-01': ['controller'],
'slave-02': ['controller'],
'slave-03': ['controller'],
'slave-04': ['compute', 'ceph-osd'],
'slave-05': ['compute', 'ceph-osd'],
'slave-06': ['compute', 'ceph-osd']
}
)
self.show_step(self.next_step)
computes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
cluster_id, ['compute'], role_status='pending_roles')
for compute in computes:
self.fuel_web.enable_dpdk(compute['id'])
self.fuel_web.setup_hugepages(
compute['id'], hp_2mb=512, hp_dpdk_mb=1024)
self.fuel_web.enable_cpu_pinning(
compute['id'],
cpu_count=settings.HARDWARE["slave_node_cpu"] - 1)
self.show_step(self.next_step)
self.fuel_web.deploy_cluster_wait(cluster_id)
self.show_step(self.next_step)
self.fuel_web.run_ostf(cluster_id)
# update image's metadata
self.show_step(self.next_step)
cluster_id = self.fuel_web.get_last_created_cluster()
os_conn = os_actions.OpenStackActions(
self.fuel_web.get_public_vip(cluster_id))
test_vm_image = os_conn.glance.images.find(name="TestVM")
test_vm_image.update(properties={'hw_vif_multiqueue_enabled': True})
# create aggregate for first compute
self.show_step(self.next_step)
compute1 = self.fuel_web.get_nailgun_node_by_name('slave-04')
compute2 = self.fuel_web.get_nailgun_node_by_name('slave-05')
aggregate_name = 'system_tests' + str(random.randint(0, 1000))
os_conn.create_aggregate(aggregate_name,
metadata={'pinned': 'true'},
hosts=[compute1['fqdn'], compute2['fqdn']])
# create flavor
self.show_step(self.next_step)
name = 'system_test-{}'.format(random.randint(10, 10000))
extra_specs_flavor = {'aggregate_instance_extra_specs:pinned': 'true',
'hw:cpu_policy': 'dedicated',
'hw:mem_page_size': 2048}
pinned_cpus = int(settings.HARDWARE["slave_node_cpu"] /
settings.HARDWARE["numa_nodes"])
flavor_id = random.randint(10, 10000)
os_conn.create_flavor(name=name, ram=64, vcpus=pinned_cpus, disk=1,
flavorid=flavor_id,
extra_specs=extra_specs_flavor)
self.show_step(self.next_step)
net_name = self.fuel_web.get_cluster_predefined_networks_name(
cluster_id)['private_net']
server1 = os_conn.create_server_for_migration(neutron=True,
label=net_name,
flavor_id=flavor_id)
server2 = os_conn.create_server_for_migration(neutron=True,
label=net_name,
flavor_id=flavor_id)
vms = {server1: None, server2: None}
for server in vms:
os_conn.verify_instance_status(server, 'ACTIVE')
for server in vms:
floating_ip = os_conn.assign_floating_ip(server)
vms[server] = floating_ip
logger.info("Floating address {0} associated with instance {1}"
.format(floating_ip.ip, server.id))
for server, floating_ip in vms.items():
logger.info("Wait for ping from instance {} "
"by floating ip".format(server.id))
devops_helpers.wait(
# pylint: disable=undefined-loop-variable
lambda: devops_helpers.tcp_ping(floating_ip.ip, 22),
# pylint: enable=undefined-loop-variable
timeout=700,
timeout_msg=("Instance {0} is unreachable for {1} seconds".
format(server.id, 300)))
cirros_auth = SSHAuth(**settings.SSH_IMAGE_CREDENTIALS)
slave_01_ssh = self.fuel_web.get_ssh_for_node("slave-01")
self.show_step(self.next_step)
for server, floating_ip in vms.items():
result = slave_01_ssh.execute_through_host(
hostname=floating_ip.ip,
cmd="sudo /sbin/ethtool -L eth0 combined {}".format(
pinned_cpus),
auth=cirros_auth)
assert_equal(
result.exit_code, 0,
"Enabling queues using ethtool failed!\n{}".format(result))
self.show_step(self.next_step)
result = slave_01_ssh.execute_through_host(
hostname=floating_ip.ip,
cmd="ls /sys/class/net/eth0/queues",
auth=cirros_auth
)
assert_equal(result.stdout_str.count("rx"), pinned_cpus,
"RX queues count is not equal to vcpus count")
assert_equal(result.stdout_str.count("tx"), pinned_cpus,
"TX queues count is not equal to vcpus count")
self.show_step(self.next_step)
result = slave_01_ssh.execute_through_host(
hostname=vms[server1].ip,
cmd="ping -c 1 {}".format(vms[server2].ip),
auth=cirros_auth)
assert_equal(
result.exit_code, 0,
"Instances does not have connectivity between each other!")
@test(groups=["test_multiqueue_with_dpdk_and_numa_on_vlan",
"test_multiqueue_acceptance"],
depends_on_groups=["prepare_slaves_5"])
@log_snapshot_after_test
def test_multiqueue_with_dpdk_and_numa_on_vlan(self):
"""Deploy cluster with DPDK+Cpu Pin for multiqueue check on VLAN
Scenario:
1. Create cluster - Neutron VLAN, Ceph for all
2. Add 3 controllers and 3 compute+ceph-osd nodes
3. Configure dpdk, cpu pinning and hugepages on all computes
4. Deploy cluster
5. Run OSTF
6. Edit TestVM metadata - add hw_vif_multiqueue_enabled=true
7. Create aggregate with "pinned=true" metadata for 2 computes
8. Create flavor
9. Spawn 2 instances, allocate floating IP to both
10. Enable queues in instances and check it
11. Check connectivity between instances
"""
self.multiqueue_with_dpdk_and_numa(settings.NEUTRON_SEGMENT['vlan'])
@test(groups=["test_multiqueue_with_dpdk_and_numa_on_tun",
"test_multiqueue_acceptance"],
depends_on_groups=["prepare_slaves_5"])
@log_snapshot_after_test
def test_multiqueue_with_dpdk_and_numa_on_tun(self):
"""Deploy cluster with DPDK+Cpu Pin for multiqueue check on TUN
Scenario:
1. Create cluster - Neutron TUN(VxLAN), Ceph for all
2. Add 3 controllers and 3 compute+ceph-osd nodes
3. Configure dpdk, cpu pinning and hugepages on all computes
4. Deploy cluster
5. Run OSTF
6. Edit TestVM metadata - add hw_vif_multiqueue_enabled=true
7. Create aggregate with "pinned=true" metadata for 2 computes
8. Create flavor
9. Spawn 2 instances, allocate floating IP to both
10. Enable queues in instances and check it
11. Check connectivity between instances
"""
self.multiqueue_with_dpdk_and_numa(settings.NEUTRON_SEGMENT['tun'])