Smarter Nova service management

This patch introduces a new NovaServiceManager class. This class uses
Nova's os-services API to make sure the service is actually up or
down, instead of dumbly sleep()ing.

Because the restart() method now calls stop and start in sequence, the
[whitebox-nova-compute]/restart_command config option is no longer
necessary, and has been removed. In its place, a new
[whitebox-nova-compute]/start_command option is needed.

Change-Id: I7b0c3635298b68981d9c460769a4d4ac9805357a
This commit is contained in:
Artom Lifshitz 2020-06-18 15:49:05 -04:00
parent 345b440b2b
commit 60556a3f85
8 changed files with 92 additions and 14 deletions

View File

@ -11,7 +11,8 @@ function configure {
iniset $TEMPEST_CONFIG whitebox available_cinder_storage $WHITEBOX_AVAILABLE_CINDER_STORAGE iniset $TEMPEST_CONFIG whitebox available_cinder_storage $WHITEBOX_AVAILABLE_CINDER_STORAGE
iniset $TEMPEST_CONFIG whitebox-nova-compute config_path "$WHITEBOX_NOVA_COMPUTE_CONFIG_PATH" iniset $TEMPEST_CONFIG whitebox-nova-compute config_path "$WHITEBOX_NOVA_COMPUTE_CONFIG_PATH"
iniset $TEMPEST_CONFIG whitebox-nova-compute restart_command "$WHITEBOX_NOVA_COMPUTE_RESTART_COMMAND" iniset $TEMPEST_CONFIG whitebox-nova-compute stop_command "$WHITEBOX_NOVA_COMPUTE_STOP_COMMAND"
iniset $TEMPEST_CONFIG whitebox-nova-compute start_command "$WHITEBOX_NOVA_COMPUTE_START_COMMAND"
iniset $TEMPEST_CONFIG whitebox-libvirt restart_command "$WHITEBOX_LIBVIRT_RESTART_COMMAND" iniset $TEMPEST_CONFIG whitebox-libvirt restart_command "$WHITEBOX_LIBVIRT_RESTART_COMMAND"
iniset $TEMPEST_CONFIG whitebox-libvirt stop_command "$WHITEBOX_LIBVIRT_STOP_COMMAND" iniset $TEMPEST_CONFIG whitebox-libvirt stop_command "$WHITEBOX_LIBVIRT_STOP_COMMAND"

View File

@ -3,7 +3,8 @@ NOVA_FILTERS="$NOVA_FILTERS,NUMATopologyFilter"
WHITEBOX_AVAILABLE_CINDER_STORAGE=${WHITEBOX_AVAILABLE_CINDER_STORAGE:-24} WHITEBOX_AVAILABLE_CINDER_STORAGE=${WHITEBOX_AVAILABLE_CINDER_STORAGE:-24}
WHITEBOX_NOVA_COMPUTE_CONFIG_PATH=${WHITEBOX_NOVA_COMPUTE_CONFIG_PATH:-/etc/nova/nova-cpu.conf} WHITEBOX_NOVA_COMPUTE_CONFIG_PATH=${WHITEBOX_NOVA_COMPUTE_CONFIG_PATH:-/etc/nova/nova-cpu.conf}
WHITEBOX_NOVA_COMPUTE_RESTART_COMMAND=${WHITEBOX_NOVA_COMPUTE_RESTART_COMMAND:-'systemctl restart devstack@n-cpu'} WHITEBOX_NOVA_COMPUTE_STOP_COMMAND=${WHITEBOX_NOVA_COMPUTE_STOP_COMMAND:-'systemctl stop devstack@n-cpu'}
WHITEBOX_NOVA_COMPUTE_START_COMMAND=${WHITEBOX_NOVA_COMPUTE_START_COMMAND:-'systemctl start devstack@n-cpu'}
WHITEBOX_LIBVIRT_RESTART_COMMAND=${WHITEBOX_LIBVIRT_RESTART_COMMAND:-'systemctl restart libvirtd'} WHITEBOX_LIBVIRT_RESTART_COMMAND=${WHITEBOX_LIBVIRT_RESTART_COMMAND:-'systemctl restart libvirtd'}
WHITEBOX_LIBVIRT_STOP_COMMAND=${WHITEBOX_LIBVIRT_STOP_COMMAND:-'systemctl stop libvirtd'} WHITEBOX_LIBVIRT_STOP_COMMAND=${WHITEBOX_LIBVIRT_STOP_COMMAND:-'systemctl stop libvirtd'}

View File

@ -507,8 +507,10 @@ class NUMALiveMigrationTest(BasePinningTest):
# Set both hosts's vcpu_pin_set to the CPUs in the first NUMA node to # Set both hosts's vcpu_pin_set to the CPUs in the first NUMA node to
# force instances to land there # force instances to land there
host1_sm = clients.ServiceManager(host1, 'nova-compute') host1_sm = clients.NovaServiceManager(host1, 'nova-compute',
host2_sm = clients.ServiceManager(host2, 'nova-compute') self.os_admin.services_client)
host2_sm = clients.NovaServiceManager(host2, 'nova-compute',
self.os_admin.services_client)
with whitebox_utils.multicontext( with whitebox_utils.multicontext(
host1_sm.config_options(('DEFAULT', 'vcpu_pin_set', host1_sm.config_options(('DEFAULT', 'vcpu_pin_set',
self._get_cpu_spec(topo_1[0]))), self._get_cpu_spec(topo_1[0]))),
@ -553,7 +555,8 @@ class NUMALiveMigrationTest(BasePinningTest):
# NUMA node's CPUs to vcpu_pin_set # NUMA node's CPUs to vcpu_pin_set
host_a = self.get_host_other_than(server_b['id']) host_a = self.get_host_other_than(server_b['id'])
host_a_addr = self.get_ctlplane_address(host_a) host_a_addr = self.get_ctlplane_address(host_a)
host_a_sm = clients.ServiceManager(host_a_addr, 'nova-compute') host_a_sm = clients.NovaServiceManager(
host_a_addr, 'nova-compute', self.os_admin.services_client)
numaclient_a = clients.NUMAClient(host_a_addr) numaclient_a = clients.NUMAClient(host_a_addr)
topo_a = numaclient_a.get_host_topology() topo_a = numaclient_a.get_host_topology()
with host_a_sm.config_options( with host_a_sm.config_options(
@ -603,8 +606,10 @@ class NUMALiveMigrationTest(BasePinningTest):
host, host,
num_cpus) num_cpus)
host1_sm = clients.ServiceManager(host1, 'nova-compute') host1_sm = clients.NovaServiceManager(host1, 'nova-compute',
host2_sm = clients.ServiceManager(host2, 'nova-compute') self.os_admin.services_client)
host2_sm = clients.NovaServiceManager(host2, 'nova-compute',
self.os_admin.services_client)
with whitebox_utils.multicontext( with whitebox_utils.multicontext(
host1_sm.config_options(('DEFAULT', 'vcpu_pin_set', '0,1'), host1_sm.config_options(('DEFAULT', 'vcpu_pin_set', '0,1'),
('compute', 'cpu_shared_set', '2')), ('compute', 'cpu_shared_set', '2')),

View File

@ -0,0 +1,35 @@
# Copyright 2020
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import time
from tempest.lib import exceptions as lib_exc
def wait_for_nova_service_state(client, host, binary, state):
timeout = client.build_timeout
start_time = int(time.time())
# NOTE(artom) Assumes that the (host, binary) combination will yield a
# unique service. There is no service in Nova that can run multiple copies
# on the same host.
service = client.list_services(host=host, binary=binary)['services'][0]
while service['state'] != state:
time.sleep(client.build_interval)
timed_out = int(time.time()) - start_time >= timeout
if timed_out:
raise lib_exc.TimeoutException(
'Service %s on host %s failed to reach state %s within '
'the required time (%s s)', binary, host, timeout)
service = client.list_services(host=host, binary=binary)['services'][0]

View File

@ -80,8 +80,12 @@ nova_compute_opts = [
'config_path', 'config_path',
help='Path to the configration file for the nova-compute service.'), help='Path to the configration file for the nova-compute service.'),
cfg.StrOpt( cfg.StrOpt(
'restart_command', 'start_command',
help='Command to restart the nova-compute service, without any ' help='Command to start the nova-compute service, without any '
'privilege management (ie, no sudo).'),
cfg.StrOpt(
'stop_command',
help='Command to stop the nova-compute service, without any '
'privilege management (ie, no sudo).'), 'privilege management (ie, no sudo).'),
] ]

View File

@ -25,6 +25,7 @@ from tempest import config
from tempest.lib.common import ssh from tempest.lib.common import ssh
from tempest.lib import exceptions as tempest_libexc from tempest.lib import exceptions as tempest_libexc
from whitebox_tempest_plugin.common import waiters
from whitebox_tempest_plugin import exceptions from whitebox_tempest_plugin import exceptions
CONF = config.CONF CONF = config.CONF
@ -37,10 +38,10 @@ class SSHClient(object):
def __init__(self, hostname): def __init__(self, hostname):
self.ssh_key = CONF.whitebox.ctlplane_ssh_private_key_path self.ssh_key = CONF.whitebox.ctlplane_ssh_private_key_path
self.ssh_user = CONF.whitebox.ctlplane_ssh_username self.ssh_user = CONF.whitebox.ctlplane_ssh_username
self.host = hostname self.hostname = hostname
def execute(self, command, container_name=None, sudo=False): def execute(self, command, container_name=None, sudo=False):
ssh_client = ssh.Client(self.host, self.ssh_user, ssh_client = ssh.Client(self.hostname, self.ssh_user,
key_filename=self.ssh_key) key_filename=self.ssh_key)
if (CONF.whitebox.containers and container_name): if (CONF.whitebox.containers and container_name):
executable = CONF.whitebox.container_runtime executable = CONF.whitebox.container_runtime
@ -77,15 +78,18 @@ class ServiceManager(SSHClient):
"""Init the client. """Init the client.
:param service: The service this manager is managing. Must exist as a :param service: The service this manager is managing. Must exist as a
whitebox-<service> config section. whitebox-<service> config section. For Nova services,
this must match the binary in the Nova os-services API.
""" """
super(ServiceManager, self).__init__(hostname) super(ServiceManager, self).__init__(hostname)
conf = getattr(CONF, 'whitebox-%s' % service, None) conf = getattr(CONF, 'whitebox-%s' % service, None)
if conf is None: if conf is None:
raise exceptions.MissingServiceSectionException(service=service) raise exceptions.MissingServiceSectionException(service=service)
self.service = service
self.config_path = getattr(conf, 'config_path', None) self.config_path = getattr(conf, 'config_path', None)
self.restart_command = getattr(conf, 'restart_command', None) self.restart_command = getattr(conf, 'restart_command', None)
self.stop_command = getattr(conf, 'stop_command', None) self.stop_command = getattr(conf, 'stop_command', None)
self.start_command = getattr(conf, 'start_command', None)
@contextlib.contextmanager @contextlib.contextmanager
def config_options(self, *opts): def config_options(self, *opts):
@ -164,6 +168,34 @@ class ServiceManager(SSHClient):
return result return result
class NovaServiceManager(ServiceManager):
"""A services manager for Nova services that uses Nova's service API to be
smarter about stopping and restarting services.
"""
def __init__(self, host, service, services_client):
super(NovaServiceManager, self).__init__(host, service)
self.services_client = services_client
def start(self):
result = self.execute(self.start_command, sudo=True)
waiters.wait_for_nova_service_state(self.services_client,
self.hostname, self.service,
'up')
return result
def stop(self):
result = self.execute(self.stop_command, sudo=True)
waiters.wait_for_nova_service_state(self.services_client,
self.hostname, self.service,
'down')
return result
def restart(self):
self.stop()
self.start()
class NUMAClient(SSHClient): class NUMAClient(SSHClient):
"""A client to get host NUMA information. `numactl` needs to be installed """A client to get host NUMA information. `numactl` needs to be installed
in the environment or container(s). in the environment or container(s).

View File

@ -144,8 +144,8 @@ class ServiceManagerTestCase(base.WhiteboxPluginTestCase):
def test_restart(self): def test_restart(self):
self.flags(restart_command='fake restart command', self.flags(restart_command='fake restart command',
group='whitebox-nova-compute') group='whitebox-libvirt')
service = clients.ServiceManager('fake-host', 'nova-compute') service = clients.ServiceManager('fake-host', 'libvirt')
with mock.patch.object(service, 'execute') as mock_exec: with mock.patch.object(service, 'execute') as mock_exec:
service.restart() service.restart()
mock_exec.assert_called_with('fake restart command', sudo=True) mock_exec.assert_called_with('fake restart command', sudo=True)