Refactor reboot host operation

Change-Id: I9e6e2f129173f3bcc651169c70be3890eebaf4c9
This commit is contained in:
Federico Ressi 2020-08-24 14:36:07 +02:00
parent a78e8bd811
commit 5a8efae107
4 changed files with 124 additions and 121 deletions

View File

@ -262,6 +262,16 @@ class ServerStackFixture(heat.HeatStackFixture):
def user_data(self):
return nova.user_data(self.cloud_config)
def ensure_server_status(self, status):
tobiko.setup_fixture(self)
server = nova.get_server(self.server_id)
if server.status != status:
if status == "ACTIVE":
tobiko.reset_fixture(self)
else:
tobiko.skip(f"{type(self).__name__}.ensure_server_status "
"method not implemented")
class PeerServerStackFixture(ServerStackFixture):
"""Server witch networking access requires passing by a peer Nova server

View File

@ -62,7 +62,9 @@ list_processes = _ps.list_processes
wait_for_processes = _ps.wait_for_processes
reboot_host = _reboot.reboot_host
RebootHostError = _reboot.RebootHostError
RebootHostOperation = _reboot.RebootHostOperation
RebootHostTimeoutError = _reboot.RebootHostTimeoutError
ssh_process = _ssh.ssh_process
ssh_execute = _ssh.ssh_execute

View File

@ -13,11 +13,12 @@
# under the License.
from __future__ import absolute_import
import time
import typing # noqa
from oslo_log import log
import tobiko
from tobiko.shell.sh import _exception
from tobiko.shell.sh import _execute
from tobiko.shell.sh import _uptime
from tobiko.shell import ssh
@ -26,126 +27,127 @@ from tobiko.shell import ssh
LOG = log.getLogger(__name__)
class RebootHostTimeoutError(tobiko.TobikoException):
class RebootHostError(tobiko.TobikoException):
message = "host {hostname!r} not rebooted: {cause}"
class RebootHostTimeoutError(RebootHostError):
message = "host {hostname!r} not rebooted after {timeout!s} seconds"
def reboot_host(ssh_client, wait=True, timeout=None, sleep_interval=None):
reboot = RebootHostOperation(ssh_client=ssh_client,
wait=wait,
timeout=timeout,
sleep_interval=sleep_interval)
def reboot_host(ssh_client, wait: bool = True, timeout: tobiko.Seconds = None):
reboot = RebootHostOperation(ssh_client=ssh_client, wait=wait,
timeout=timeout)
return tobiko.setup_fixture(reboot)
class RebootHostOperation(tobiko.Operation):
wait = True
start_time = None
hostname = None
timeout = 600.
ssh_client = None
sleep_interval = 1.
is_rebooted = False
is_rebooted: typing.Optional[bool] = None
start_time: tobiko.Seconds = None
def __init__(self, ssh_client=None, timeout=None, wait=None,
sleep_interval=None):
default_wait_timeout = 300.
default_wait_interval = 5.
default_wait_count = 60
@property
def ssh_client(self) -> ssh.SSHClientFixture:
return self._ssh_client
def __init__(self,
ssh_client: typing.Optional[ssh.SSHClientFixture] = None,
wait=True,
timeout: tobiko.Seconds = None):
super(RebootHostOperation, self).__init__()
if ssh_client:
self.ssh_client = ssh_client
if ssh_client is not None:
self._ssh_client = ssh_client
tobiko.check_valid_type(self.ssh_client, ssh.SSHClientFixture)
if timeout is not None:
self.timeout = float(timeout)
assert self.timeout > 0.
if wait is not None:
self.wait = bool(wait)
if sleep_interval is not None:
self.sleep_interval = float(sleep_interval)
assert self.sleep_interval >= 0.
self.wait = bool(wait)
self.timeout = tobiko.to_seconds(timeout)
def run_operation(self):
self.start_time = time.time()
ssh_client = self.ssh_client
ssh_client.connect(connection_timeout=self.timeout)
with ssh_client:
self.hostname = ssh_client.hostname
LOG.debug(f"Rebooting host '{self.hostname}'... ")
self.is_rebooted = False
self.hostname = hostname = ssh_client.hostname
LOG.debug('Rebooting host %r...', hostname)
_execute.execute('sudo /sbin/reboot', timeout=self.timeout,
stdout=False, ssh_client=ssh_client)
self.start_time = tobiko.time()
try:
_execute.execute('sudo /sbin/reboot',
stdout=False,
ssh_client=ssh_client,
timeout=30.)
except _exception.ShellTimeoutExpired as ex:
LOG.debug(f"Reboot command timeout expired: {ex}")
if self.wait:
self.wait_for_operation()
def cleanup_fixture(self):
if self.hostname is not None:
del self.hostname
if self.start_time is not None:
del self.start_time
self.is_rebooted = False
self.is_rebooted = None
self.hostname = None
self.start_time = None
def wait_for_operation(self):
sleep_interval = self.sleep_interval
while not self.check_is_rebooted():
if sleep_interval > 0.:
time.sleep(sleep_interval)
@property
def elapsed_time(self) -> tobiko.Seconds:
if self.start_time is None:
return None
else:
return tobiko.time() - self.start_time
def check_is_rebooted(self):
@property
def time_left(self) -> tobiko.Seconds:
if self.timeout is None or self.elapsed_time is None:
return None
else:
return self.timeout - self.elapsed_time
def wait_for_operation(self, timeout: tobiko.Seconds = None):
if self.is_rebooted:
return True
# ensure SSH connection is closed before retrying connecting
ssh_client = self.ssh_client
tobiko.cleanup_fixture(ssh_client)
assert ssh_client.client is None
elapsed_time = self.check_elapsed_time()
LOG.debug("Reconnecting to host %r %s seconds after reboot...",
self.hostname, elapsed_time)
if elapsed_time is None:
raise RuntimeError("Reboot operation didn't started")
return
try:
uptime = _uptime.get_uptime(ssh_client=ssh_client,
timeout=(self.timeout-elapsed_time))
except Exception:
# if disconnected while getting uptime we assume the VM is just
# rebooting. These are good news!
tobiko.cleanup_fixture(ssh_client)
assert ssh_client.client is None
LOG.debug("Unable to get uptime from host %r", self.hostname,
exc_info=1)
return False
for attempt in tobiko.retry(
timeout=tobiko.min_seconds(timeout, self.time_left),
default_timeout=self.default_wait_timeout,
default_count=self.default_wait_count,
default_interval=self.default_wait_interval):
# ensure SSH connection is closed before retrying connecting
tobiko.cleanup_fixture(self.ssh_client)
assert self.ssh_client.client is None
LOG.debug(f"Getting uptime after reboot '{self.hostname}' "
"after reboot... ")
try:
up_time = _uptime.get_uptime(ssh_client=self.ssh_client,
timeout=30.)
# verify that reboot actually happened by comparing elapsed time with
# uptime
elapsed_time = self.get_elapsed_time()
if uptime >= elapsed_time:
tobiko.cleanup_fixture(ssh_client)
assert ssh_client.client is None
LOG.warning("Host %r still not restarted %s seconds after "
"reboot operation (uptime=%r)", self.hostname,
elapsed_time, uptime)
return False
except Exception:
# if disconnected while getting up time we assume the VM is
# just rebooting. These are good news!
LOG.debug("Unable to get uptime from host "
f"'{self.hostname}'", exc_info=1)
attempt.check_limits()
else:
self.is_rebooted = True
LOG.debug("Host %r resterted %s seconds after reboot operation"
"(uptime=%r)", self.hostname, elapsed_time - uptime, uptime)
assert ssh_client.client is not None
return True
def check_elapsed_time(self):
elapsed_time = self.get_elapsed_time()
if elapsed_time is None:
return None
if elapsed_time >= self.timeout:
raise RebootHostTimeoutError(hostname=self.hostname,
timeout=self.timeout)
return elapsed_time
def get_elapsed_time(self):
start_time = self.start_time
if start_time is None:
return None
return time.time() - start_time
# verify that reboot actually happened by comparing elapsed
# time with up_time
elapsed_time = self.elapsed_time
if up_time < elapsed_time:
assert self.ssh_client.client is not None
self.is_rebooted = True
LOG.debug(f"Host '{self.hostname}' restarted "
f"{elapsed_time} seconds after "
f"reboot operation (up_time={up_time})")
break
else:
LOG.debug(f"Host '{self.hostname}' still not "
f"restarted {elapsed_time} seconds after "
f"reboot operation (up_time={up_time!r})")
attempt.check_limits()
finally:
if not self.is_rebooted:
try:
tobiko.cleanup_fixture(self.ssh_client)
except Exception:
LOG.exception("Error closing SSH connection to "
f"'{self.hostname}'")

View File

@ -18,6 +18,7 @@ from __future__ import absolute_import
import time
from oslo_log import log
import paramiko
import testtools
import tobiko
@ -29,20 +30,16 @@ from tobiko.openstack import stacks
LOG = log.getLogger(__name__)
class RebootableServer(stacks.CirrosServerStackFixture):
class RebootHostStack(stacks.CirrosServerStackFixture):
"Server to be rebooted"
@tobiko.skip_if(
"This test is often failing because server endup in SHUTOFF "
"state", True)
class RebootHostTest(testtools.TestCase):
stack = tobiko.required_setup_fixture(RebootableServer)
stack = tobiko.required_setup_fixture(RebootHostStack)
def test_reboot_host(self, **params):
server = nova.activate_server(self.stack.server_id)
self.assertEqual('ACTIVE', server.status)
self.stack.ensure_server_status('ACTIVE')
ssh_client = self.stack.ssh_client
uptime_0 = sh.get_uptime(ssh_client=ssh_client)
@ -59,12 +56,7 @@ class RebootHostTest(testtools.TestCase):
self.assertIs(ssh_client, reboot.ssh_client)
self.assertEqual(ssh_client.hostname, reboot.hostname)
self.assertGreater(reboot.start_time, 0.)
self.assertEqual(params.get('timeout', sh.RebootHostOperation.timeout),
reboot.timeout)
self.assertIs(params.get('wait', True), reboot.wait)
self.assertEqual(params.get('sleep_interval', 1.),
reboot.sleep_interval)
if not reboot.wait:
self.assertFalse(reboot.is_rebooted)
@ -74,7 +66,7 @@ class RebootHostTest(testtools.TestCase):
self.assertTrue(reboot.is_rebooted)
self.assert_is_connected(ssh_client)
server = nova.wait_for_server_status(server, 'ACTIVE')
server = nova.wait_for_server_status(self.stack.server_id, 'ACTIVE')
self.assertEqual('ACTIVE', server.status)
uptime_1 = sh.get_uptime(ssh_client=ssh_client)
@ -89,18 +81,15 @@ class RebootHostTest(testtools.TestCase):
def test_reboot_host_with_no_wait(self):
self.test_reboot_host(wait=False)
def test_reboot_server_after_shutoff(self):
server = nova.activate_server(self.stack.server_id)
self.assertEqual('ACTIVE', server.status)
def test_reboot_server_when_shutoff(self):
self.stack.ensure_server_status('SHUTOFF')
ssh_client = self.stack.ssh_client
ssh_client.connect()
self.assert_is_connected(ssh_client)
server = nova.shutoff_server(self.stack.server_id)
self.assertEqual('SHUTOFF', server.status)
self.assertRaises(sh.ShellTimeoutExpired, sh.reboot_host,
ssh_client=ssh_client, timeout=5.0)
self.assert_is_not_connected(ssh_client)
errors = (paramiko.ssh_exception.NoValidConnectionsError,
paramiko.SSHException)
self.assertRaises(errors, sh.reboot_host, ssh_client=ssh_client,
timeout=5.0)
self.assert_is_not_connected(ssh_client)
server = nova.wait_for_server_status(self.stack.server_id, 'SHUTOFF')
self.assertEqual('SHUTOFF', server.status)