From 60556a3f857b474428d04db84e7045013cf87545 Mon Sep 17 00:00:00 2001 From: Artom Lifshitz Date: Thu, 18 Jun 2020 15:49:05 -0400 Subject: [PATCH] Smarter Nova service management This patch introduces a new NovaServiceManager class. This class uses Nova's os-services API to make sure the service is actually up or down, instead of dumbly sleep()ing. Because the restart() method now calls stop and start in sequence, the [whitebox-nova-compute]/restart_command config option is no longer necessary, and has been removed. In its place, a new [whitebox-nova-compute]/start_command option is needed. Change-Id: I7b0c3635298b68981d9c460769a4d4ac9805357a --- devstack/plugin.sh | 3 +- devstack/settings | 3 +- .../api/compute/test_cpu_pinning.py | 15 +++++--- whitebox_tempest_plugin/common/__init__.py | 0 whitebox_tempest_plugin/common/waiters.py | 35 +++++++++++++++++ whitebox_tempest_plugin/config.py | 8 +++- whitebox_tempest_plugin/services/clients.py | 38 +++++++++++++++++-- whitebox_tempest_plugin/tests/test_clients.py | 4 +- 8 files changed, 92 insertions(+), 14 deletions(-) create mode 100644 whitebox_tempest_plugin/common/__init__.py create mode 100644 whitebox_tempest_plugin/common/waiters.py diff --git a/devstack/plugin.sh b/devstack/plugin.sh index e2368d0b..2458b40f 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -11,7 +11,8 @@ function configure { iniset $TEMPEST_CONFIG whitebox available_cinder_storage $WHITEBOX_AVAILABLE_CINDER_STORAGE iniset $TEMPEST_CONFIG whitebox-nova-compute config_path "$WHITEBOX_NOVA_COMPUTE_CONFIG_PATH" - iniset $TEMPEST_CONFIG whitebox-nova-compute restart_command "$WHITEBOX_NOVA_COMPUTE_RESTART_COMMAND" + iniset $TEMPEST_CONFIG whitebox-nova-compute stop_command "$WHITEBOX_NOVA_COMPUTE_STOP_COMMAND" + iniset $TEMPEST_CONFIG whitebox-nova-compute start_command "$WHITEBOX_NOVA_COMPUTE_START_COMMAND" iniset $TEMPEST_CONFIG whitebox-libvirt restart_command "$WHITEBOX_LIBVIRT_RESTART_COMMAND" iniset $TEMPEST_CONFIG whitebox-libvirt stop_command "$WHITEBOX_LIBVIRT_STOP_COMMAND" diff --git a/devstack/settings b/devstack/settings index 9670dde2..5c5c00f5 100644 --- a/devstack/settings +++ b/devstack/settings @@ -3,7 +3,8 @@ NOVA_FILTERS="$NOVA_FILTERS,NUMATopologyFilter" WHITEBOX_AVAILABLE_CINDER_STORAGE=${WHITEBOX_AVAILABLE_CINDER_STORAGE:-24} WHITEBOX_NOVA_COMPUTE_CONFIG_PATH=${WHITEBOX_NOVA_COMPUTE_CONFIG_PATH:-/etc/nova/nova-cpu.conf} -WHITEBOX_NOVA_COMPUTE_RESTART_COMMAND=${WHITEBOX_NOVA_COMPUTE_RESTART_COMMAND:-'systemctl restart devstack@n-cpu'} +WHITEBOX_NOVA_COMPUTE_STOP_COMMAND=${WHITEBOX_NOVA_COMPUTE_STOP_COMMAND:-'systemctl stop devstack@n-cpu'} +WHITEBOX_NOVA_COMPUTE_START_COMMAND=${WHITEBOX_NOVA_COMPUTE_START_COMMAND:-'systemctl start devstack@n-cpu'} WHITEBOX_LIBVIRT_RESTART_COMMAND=${WHITEBOX_LIBVIRT_RESTART_COMMAND:-'systemctl restart libvirtd'} WHITEBOX_LIBVIRT_STOP_COMMAND=${WHITEBOX_LIBVIRT_STOP_COMMAND:-'systemctl stop libvirtd'} diff --git a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py index 5139de71..38ac01f6 100644 --- a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py +++ b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py @@ -507,8 +507,10 @@ class NUMALiveMigrationTest(BasePinningTest): # Set both hosts's vcpu_pin_set to the CPUs in the first NUMA node to # force instances to land there - host1_sm = clients.ServiceManager(host1, 'nova-compute') - host2_sm = clients.ServiceManager(host2, 'nova-compute') + host1_sm = clients.NovaServiceManager(host1, 'nova-compute', + self.os_admin.services_client) + host2_sm = clients.NovaServiceManager(host2, 'nova-compute', + self.os_admin.services_client) with whitebox_utils.multicontext( host1_sm.config_options(('DEFAULT', 'vcpu_pin_set', self._get_cpu_spec(topo_1[0]))), @@ -553,7 +555,8 @@ class NUMALiveMigrationTest(BasePinningTest): # NUMA node's CPUs to vcpu_pin_set host_a = self.get_host_other_than(server_b['id']) host_a_addr = self.get_ctlplane_address(host_a) - host_a_sm = clients.ServiceManager(host_a_addr, 'nova-compute') + host_a_sm = clients.NovaServiceManager( + host_a_addr, 'nova-compute', self.os_admin.services_client) numaclient_a = clients.NUMAClient(host_a_addr) topo_a = numaclient_a.get_host_topology() with host_a_sm.config_options( @@ -603,8 +606,10 @@ class NUMALiveMigrationTest(BasePinningTest): host, num_cpus) - host1_sm = clients.ServiceManager(host1, 'nova-compute') - host2_sm = clients.ServiceManager(host2, 'nova-compute') + host1_sm = clients.NovaServiceManager(host1, 'nova-compute', + self.os_admin.services_client) + host2_sm = clients.NovaServiceManager(host2, 'nova-compute', + self.os_admin.services_client) with whitebox_utils.multicontext( host1_sm.config_options(('DEFAULT', 'vcpu_pin_set', '0,1'), ('compute', 'cpu_shared_set', '2')), diff --git a/whitebox_tempest_plugin/common/__init__.py b/whitebox_tempest_plugin/common/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/whitebox_tempest_plugin/common/waiters.py b/whitebox_tempest_plugin/common/waiters.py new file mode 100644 index 00000000..2e70f99e --- /dev/null +++ b/whitebox_tempest_plugin/common/waiters.py @@ -0,0 +1,35 @@ +# Copyright 2020 +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import time + +from tempest.lib import exceptions as lib_exc + + +def wait_for_nova_service_state(client, host, binary, state): + timeout = client.build_timeout + start_time = int(time.time()) + # NOTE(artom) Assumes that the (host, binary) combination will yield a + # unique service. There is no service in Nova that can run multiple copies + # on the same host. + service = client.list_services(host=host, binary=binary)['services'][0] + while service['state'] != state: + time.sleep(client.build_interval) + timed_out = int(time.time()) - start_time >= timeout + if timed_out: + raise lib_exc.TimeoutException( + 'Service %s on host %s failed to reach state %s within ' + 'the required time (%s s)', binary, host, timeout) + service = client.list_services(host=host, binary=binary)['services'][0] diff --git a/whitebox_tempest_plugin/config.py b/whitebox_tempest_plugin/config.py index 4a30e855..f9786f62 100644 --- a/whitebox_tempest_plugin/config.py +++ b/whitebox_tempest_plugin/config.py @@ -80,8 +80,12 @@ nova_compute_opts = [ 'config_path', help='Path to the configration file for the nova-compute service.'), cfg.StrOpt( - 'restart_command', - help='Command to restart the nova-compute service, without any ' + 'start_command', + help='Command to start the nova-compute service, without any ' + 'privilege management (ie, no sudo).'), + cfg.StrOpt( + 'stop_command', + help='Command to stop the nova-compute service, without any ' 'privilege management (ie, no sudo).'), ] diff --git a/whitebox_tempest_plugin/services/clients.py b/whitebox_tempest_plugin/services/clients.py index e4c66f39..9f789575 100644 --- a/whitebox_tempest_plugin/services/clients.py +++ b/whitebox_tempest_plugin/services/clients.py @@ -25,6 +25,7 @@ from tempest import config from tempest.lib.common import ssh from tempest.lib import exceptions as tempest_libexc +from whitebox_tempest_plugin.common import waiters from whitebox_tempest_plugin import exceptions CONF = config.CONF @@ -37,10 +38,10 @@ class SSHClient(object): def __init__(self, hostname): self.ssh_key = CONF.whitebox.ctlplane_ssh_private_key_path self.ssh_user = CONF.whitebox.ctlplane_ssh_username - self.host = hostname + self.hostname = hostname def execute(self, command, container_name=None, sudo=False): - ssh_client = ssh.Client(self.host, self.ssh_user, + ssh_client = ssh.Client(self.hostname, self.ssh_user, key_filename=self.ssh_key) if (CONF.whitebox.containers and container_name): executable = CONF.whitebox.container_runtime @@ -77,15 +78,18 @@ class ServiceManager(SSHClient): """Init the client. :param service: The service this manager is managing. Must exist as a - whitebox- config section. + whitebox- config section. For Nova services, + this must match the binary in the Nova os-services API. """ super(ServiceManager, self).__init__(hostname) conf = getattr(CONF, 'whitebox-%s' % service, None) if conf is None: raise exceptions.MissingServiceSectionException(service=service) + self.service = service self.config_path = getattr(conf, 'config_path', None) self.restart_command = getattr(conf, 'restart_command', None) self.stop_command = getattr(conf, 'stop_command', None) + self.start_command = getattr(conf, 'start_command', None) @contextlib.contextmanager def config_options(self, *opts): @@ -164,6 +168,34 @@ class ServiceManager(SSHClient): return result +class NovaServiceManager(ServiceManager): + """A services manager for Nova services that uses Nova's service API to be + smarter about stopping and restarting services. + """ + + def __init__(self, host, service, services_client): + super(NovaServiceManager, self).__init__(host, service) + self.services_client = services_client + + def start(self): + result = self.execute(self.start_command, sudo=True) + waiters.wait_for_nova_service_state(self.services_client, + self.hostname, self.service, + 'up') + return result + + def stop(self): + result = self.execute(self.stop_command, sudo=True) + waiters.wait_for_nova_service_state(self.services_client, + self.hostname, self.service, + 'down') + return result + + def restart(self): + self.stop() + self.start() + + class NUMAClient(SSHClient): """A client to get host NUMA information. `numactl` needs to be installed in the environment or container(s). diff --git a/whitebox_tempest_plugin/tests/test_clients.py b/whitebox_tempest_plugin/tests/test_clients.py index 4d27cbac..ea838087 100644 --- a/whitebox_tempest_plugin/tests/test_clients.py +++ b/whitebox_tempest_plugin/tests/test_clients.py @@ -144,8 +144,8 @@ class ServiceManagerTestCase(base.WhiteboxPluginTestCase): def test_restart(self): self.flags(restart_command='fake restart command', - group='whitebox-nova-compute') - service = clients.ServiceManager('fake-host', 'nova-compute') + group='whitebox-libvirt') + service = clients.ServiceManager('fake-host', 'libvirt') with mock.patch.object(service, 'execute') as mock_exec: service.restart() mock_exec.assert_called_with('fake restart command', sudo=True)