Add support for 'crash' reboot method

Change-Id: Iffae9115d858d8038287eaf764fe84f9bcf10ef5
This commit is contained in:
Julia Marciano 2021-01-27 23:23:41 +02:00 committed by Federico Ressi
parent d7e8a65948
commit 7875210146
5 changed files with 82 additions and 70 deletions

View File

@ -43,6 +43,7 @@ activate_server = _client.activate_server
ensure_server_status = _client.ensure_server_status ensure_server_status = _client.ensure_server_status
migrate_server = _client.migrate_server migrate_server = _client.migrate_server
confirm_resize = _client.confirm_resize confirm_resize = _client.confirm_resize
reboot_server = _client.reboot_server
NovaServer = _client.NovaServer NovaServer = _client.NovaServer
WaitForCloudInitTimeoutError = _cloud_init.WaitForCloudInitTimeoutError WaitForCloudInitTimeoutError = _cloud_init.WaitForCloudInitTimeoutError

View File

@ -238,7 +238,7 @@ class WaitForServerStatusTimeout(WaitForServerStatusError):
NOVA_SERVER_TRANSIENT_STATUS: typing.Dict[str, typing.List[str]] = { NOVA_SERVER_TRANSIENT_STATUS: typing.Dict[str, typing.List[str]] = {
'ACTIVE': ['BUILD', 'SHUTOFF'], 'ACTIVE': ['BUILD', 'SHUTOFF', 'REBOOT'],
'SHUTOFF': ['ACTIVE'], 'SHUTOFF': ['ACTIVE'],
'VERIFY_RESIZE': ['RESIZE'], 'VERIFY_RESIZE': ['RESIZE'],
} }
@ -324,7 +324,7 @@ def activate_server(server: ServerType,
LOG.info(f"Confirm resize of server '{server.id}' " LOG.info(f"Confirm resize of server '{server.id}' "
f"(status='{server.status}').") f"(status='{server.status}').")
client.servers.confirm_resize(server) client.servers.confirm_resize(server)
else: elif server.status != 'REBOOT':
LOG.warning(f"Try activating server '{server.id}' by rebooting " LOG.warning(f"Try activating server '{server.id}' by rebooting "
f"it (status='{server.status}').") f"it (status='{server.status}').")
client.servers.reboot(server.id, reboot_type='HARD') client.servers.reboot(server.id, reboot_type='HARD')
@ -334,6 +334,27 @@ def activate_server(server: ServerType,
sleep_time=sleep_time) sleep_time=sleep_time)
def reboot_server(server: ServerType,
client: NovaClientType = None,
timeout: tobiko.Seconds = None,
sleep_time: tobiko.Seconds = None) -> NovaServer:
client = nova_client(client)
server = get_server(server=server, client=client)
if server.status == 'REBOOT':
return server
if server.status == 'SHUTOFF':
LOG.info(f"Start server '{server.id}' (status='{server.status}').")
client.servers.start(server.id)
else:
LOG.info(f"Reboot server '{server.id}' (status='{server.status}').")
client.servers.reboot(server.id)
return wait_for_server_status(server=server.id, status='ACTIVE',
client=client, timeout=timeout,
sleep_time=sleep_time)
def ensure_server_status(server: ServerType, def ensure_server_status(server: ServerType,
status: str, status: str,
client: NovaClientType = None, client: NovaClientType = None,

View File

@ -72,8 +72,10 @@ reboot_host = _reboot.reboot_host
RebootHostError = _reboot.RebootHostError RebootHostError = _reboot.RebootHostError
RebootHostOperation = _reboot.RebootHostOperation RebootHostOperation = _reboot.RebootHostOperation
RebootHostTimeoutError = _reboot.RebootHostTimeoutError RebootHostTimeoutError = _reboot.RebootHostTimeoutError
hard_reset_method = _reboot.hard_reset_method RebootHostMethod = _reboot.RebootHostMethod
soft_reset_method = _reboot.soft_reset_method crash_method = RebootHostMethod.CRASH
hard_reset_method = RebootHostMethod.HARD
soft_reset_method = RebootHostMethod.SOFT
ssh_process = _ssh.ssh_process ssh_process = _ssh.ssh_process
ssh_execute = _ssh.ssh_execute ssh_execute = _ssh.ssh_execute

View File

@ -13,19 +13,28 @@
# under the License. # under the License.
from __future__ import absolute_import from __future__ import absolute_import
import enum
import typing # noqa import typing # noqa
from oslo_log import log from oslo_log import log
import tobiko import tobiko
from tobiko.shell.sh import _command
from tobiko.shell.sh import _uptime from tobiko.shell.sh import _uptime
from tobiko.shell import ssh from tobiko.shell import ssh
LOG = log.getLogger(__name__) LOG = log.getLogger(__name__)
hard_reset_method = 'echo b > /proc/sysrq-trigger'
soft_reset_method = '/sbin/reboot' class RebootHostMethod(enum.Enum):
SOFT = '/sbin/reboot',
HARD = 'echo 1 > /proc/sys/kernel/sysrq && echo b > /proc/sysrq-trigger',
CRASH = 'echo 1 > /proc/sys/kernel/sysrq && echo c > /proc/sysrq-trigger',
def __init__(self, command: str):
self.command = command
class RebootHostError(tobiko.TobikoException): class RebootHostError(tobiko.TobikoException):
@ -39,53 +48,37 @@ class RebootHostTimeoutError(RebootHostError):
def reboot_host(ssh_client: ssh.SSHClientFixture, def reboot_host(ssh_client: ssh.SSHClientFixture,
wait: bool = True, wait: bool = True,
timeout: tobiko.Seconds = None, timeout: tobiko.Seconds = None,
method: str = None, method: RebootHostMethod = RebootHostMethod.SOFT):
hard: bool = False):
if method not in (None, hard_reset_method, soft_reset_method):
raise ValueError(f"Unsupported method: '{method}'")
command = method or (hard and hard_reset_method) or None
reboot = RebootHostOperation(ssh_client=ssh_client, reboot = RebootHostOperation(ssh_client=ssh_client,
wait=wait,
timeout=timeout, timeout=timeout,
command=command) method=method)
return tobiko.setup_fixture(reboot) tobiko.setup_fixture(reboot)
if wait:
reboot.wait_for_operation()
return reboot
class RebootHostOperation(tobiko.Operation): class RebootHostOperation(tobiko.Operation):
hostname = None
is_rebooted: typing.Optional[bool] = None
start_time: tobiko.Seconds = None
default_wait_timeout = 300. default_wait_timeout = 300.
default_wait_interval = 5. default_wait_interval = 5.
default_wait_count = 60 default_wait_count = 60
command = soft_reset_method
@property
def ssh_client(self) -> ssh.SSHClientFixture:
if self._ssh_client is None:
raise ValueError(f"SSH client for object '{self}' is None")
return self._ssh_client
def __init__(self, def __init__(self,
ssh_client: typing.Optional[ssh.SSHClientFixture] = None, ssh_client: ssh.SSHClientFixture,
wait: bool = True,
timeout: tobiko.Seconds = None, timeout: tobiko.Seconds = None,
command: typing.Optional[str] = None): method: RebootHostMethod = RebootHostMethod.SOFT):
super(RebootHostOperation, self).__init__() super(RebootHostOperation, self).__init__()
self._ssh_client = ssh_client tobiko.check_valid_type(ssh_client, ssh.SSHClientFixture)
tobiko.check_valid_type(self.ssh_client, ssh.SSHClientFixture) tobiko.check_valid_type(method, RebootHostMethod)
self.wait = bool(wait) self.is_rebooted = False
self.method = method
self.ssh_client = ssh_client
self.start_time: tobiko.Seconds = None
self.timeout = tobiko.to_seconds(timeout) self.timeout = tobiko.to_seconds(timeout)
if command is not None:
self.command = command
def run_operation(self): def run_operation(self):
ssh_client = self.ssh_client self.is_rebooted = False
self.is_rebooted = None
self.start_time = None self.start_time = None
for attempt in tobiko.retry( for attempt in tobiko.retry(
timeout=self.timeout, timeout=self.timeout,
@ -93,14 +86,13 @@ class RebootHostOperation(tobiko.Operation):
default_count=self.default_wait_count, default_count=self.default_wait_count,
default_interval=self.default_wait_interval): default_interval=self.default_wait_interval):
try: try:
channel = ssh_client.connect( channel = self.ssh_client.connect(
connection_timeout=attempt.time_left, connection_timeout=attempt.time_left,
retry_count=1) retry_count=1)
self.hostname = self.hostname or ssh_client.hostname
LOG.info("Executing reboot command on host " LOG.info("Executing reboot command on host "
f"'{self.hostname}' (command='{self.command}')... ") f"'{self.hostname}' (command='{self.command}')... ")
self.start_time = tobiko.time() self.start_time = tobiko.time()
channel.exec_command(f"sudo /bin/sh -c '{self.command}'") channel.exec_command(str(self.command))
except Exception as ex: except Exception as ex:
if attempt.time_left > 0.: if attempt.time_left > 0.:
LOG.debug(f"Unable to reboot remote host " LOG.debug(f"Unable to reboot remote host "
@ -108,25 +100,29 @@ class RebootHostOperation(tobiko.Operation):
else: else:
LOG.exception(f"Unable to reboot remote host: {ex}") LOG.exception(f"Unable to reboot remote host: {ex}")
raise RebootHostTimeoutError( raise RebootHostTimeoutError(
hostname=self.hostname or ssh_client.host, hostname=self.hostname or self.ssh_client.host,
timeout=attempt.timeout) from ex timeout=attempt.timeout) from ex
else: else:
self.is_rebooted = False
LOG.info(f"Host '{self.hostname}' is rebooting " LOG.info(f"Host '{self.hostname}' is rebooting "
f"(command='{self.command}').") f"(command='{self.command}').")
break break
finally: finally:
# Ensure we close connection after rebooting command # Ensure we close connection after rebooting command
ssh_client.close() self.ssh_client.close()
if self.wait:
self.wait_for_operation()
def cleanup_fixture(self): def cleanup_fixture(self):
self.is_rebooted = None self.is_rebooted = False
self.hostname = None
self.start_time = None self.start_time = None
@property
def command(self) -> _command.ShellCommand:
return _command.shell_command(
['sudo', '/bin/sh', '-c', self.method.command])
@property
def hostname(self) -> str:
return self.ssh_client.hostname
@property @property
def elapsed_time(self) -> tobiko.Seconds: def elapsed_time(self) -> tobiko.Seconds:
if self.start_time is None: if self.start_time is None:
@ -166,7 +162,6 @@ class RebootHostOperation(tobiko.Operation):
f"'{self.hostname}'", exc_info=1) f"'{self.hostname}'", exc_info=1)
attempt.check_limits() attempt.check_limits()
else: else:
# verify that reboot actually happened by comparing elapsed # verify that reboot actually happened by comparing elapsed
# time with up_time # time with up_time
elapsed_time = self.elapsed_time elapsed_time = self.elapsed_time
@ -184,8 +179,4 @@ class RebootHostOperation(tobiko.Operation):
attempt.check_limits() attempt.check_limits()
finally: finally:
if not self.is_rebooted: if not self.is_rebooted:
try: self.ssh_client.close()
tobiko.cleanup_fixture(self.ssh_client)
except Exception:
LOG.exception("Error closing SSH connection to "
f"'{self.hostname}'")

View File

@ -21,6 +21,7 @@ from oslo_log import log
import testtools import testtools
import tobiko import tobiko
from tobiko.shell import ping
from tobiko.shell import sh from tobiko.shell import sh
from tobiko.openstack import nova from tobiko.openstack import nova
from tobiko.openstack import stacks from tobiko.openstack import stacks
@ -37,7 +38,7 @@ class RebootHostTest(testtools.TestCase):
stack = tobiko.required_setup_fixture(RebootHostStack) stack = tobiko.required_setup_fixture(RebootHostStack)
def test_reboot_host(self, **params): def test_reboot_host(self, nova_reboot=False, **params):
server = self.stack.ensure_server_status('ACTIVE') server = self.stack.ensure_server_status('ACTIVE')
self.assertEqual('ACTIVE', server.status) self.assertEqual('ACTIVE', server.status)
@ -53,19 +54,16 @@ class RebootHostTest(testtools.TestCase):
timeout=90.) timeout=90.)
reboot = sh.reboot_host(ssh_client=ssh_client, **params) reboot = sh.reboot_host(ssh_client=ssh_client, **params)
self.assertIs(ssh_client, reboot.ssh_client) self.assertIs(ssh_client, reboot.ssh_client)
self.assertEqual(ssh_client.hostname, reboot.hostname) self.assertEqual(ssh_client.hostname, reboot.hostname)
self.assertIs(params.get('wait', True), reboot.wait) method = params.get('method') or sh.soft_reset_method
hard = params.get('hard', False) self.assertIs(method, reboot.method)
command = (params.get('method') or
(hard and sh.hard_reset_method) or
sh.soft_reset_method)
self.assertEqual(command, reboot.command)
if not reboot.wait: if not reboot.is_rebooted:
self.assertFalse(reboot.is_rebooted)
self.assert_is_not_connected(ssh_client) self.assert_is_not_connected(ssh_client)
if nova_reboot:
ping.ping_until_unreceived(self.stack.ip_address)
nova.reboot_server(server)
reboot.wait_for_operation() reboot.wait_for_operation()
self.assertTrue(reboot.is_rebooted) self.assertTrue(reboot.is_rebooted)
@ -80,8 +78,10 @@ class RebootHostTest(testtools.TestCase):
"uptime=%r", uptime_1) "uptime=%r", uptime_1)
self.assertGreater(boottime_1, boottime_0) self.assertGreater(boottime_1, boottime_0)
def test_reboot_host_with_hard(self): def test_reboot_host_with_chash_method(self):
self.test_reboot_host(hard=True) self.test_reboot_host(method=sh.crash_method,
wait=False,
nova_reboot=True)
def test_reboot_host_with_hard_method(self): def test_reboot_host_with_hard_method(self):
self.test_reboot_host(method=sh.hard_reset_method) self.test_reboot_host(method=sh.hard_reset_method)
@ -90,14 +90,11 @@ class RebootHostTest(testtools.TestCase):
self.test_reboot_host(method=sh.soft_reset_method) self.test_reboot_host(method=sh.soft_reset_method)
def test_reboot_host_with_invalid_method(self): def test_reboot_host_with_invalid_method(self):
self.assertRaises(ValueError, self.assertRaises(TypeError,
sh.reboot_host, sh.reboot_host,
ssh_client=self.stack.ssh_client, ssh_client=self.stack.ssh_client,
method='<invalid-method>') method='<invalid-method>')
def test_reboot_host_with_no_hard(self):
self.test_reboot_host(hard=False)
def test_reboot_host_with_wait(self): def test_reboot_host_with_wait(self):
self.test_reboot_host(wait=True) self.test_reboot_host(wait=True)