Use retry tool for waiting for healthy Nova services

Change-Id: Idd8976a67f175363267c4a89ae0a289d0fe76154
This commit is contained in:
Federico Ressi 2020-07-29 15:04:51 +02:00 committed by pinikomarov
parent 7dfc3a71fd
commit 6dcb631a08
8 changed files with 138 additions and 33 deletions

View File

@ -84,6 +84,7 @@ get_operation_name = _operation.get_operation_name
operation_config = _operation.operation_config
retry = _retry.retry
Retry = _retry.Retry
retry_attempt = _retry.retry_attempt
retry_on_exception = _retry.retry_on_exception
RetryAttempt = _retry.RetryAttempt

View File

@ -117,7 +117,6 @@ class Retry(object):
count: typing.Optional[int] = None,
timeout: _time.Seconds = None,
interval: _time.Seconds = None):
if count:
self.count = count
self.timeout = _time.to_seconds(timeout)
self.interval = _time.to_seconds(interval)
@ -151,12 +150,17 @@ class Retry(object):
elapsed_time = _time.time() - start_time
def retry(count: typing.Optional[int] = None,
def retry(other: typing.Optional[Retry] = None,
count: typing.Optional[int] = None,
timeout: _time.Seconds = None,
interval: _time.Seconds = None) -> Retry:
return Retry(count=count,
timeout=timeout,
interval=interval)
if other is not None:
_exception.check_valid_type(other, Retry)
count = count or other.count
timeout = timeout or other.timeout
interval = interval or other.interval
return Retry(count=count, timeout=timeout, interval=interval)
def retry_on_exception(exception: Exception,

View File

@ -17,6 +17,7 @@ from tobiko.openstack.nova import _client
from tobiko.openstack.nova import _cloud_init
from tobiko.openstack.nova import _hypervisor
from tobiko.openstack.nova import _server
from tobiko.openstack.nova import _service
CLIENT_CLASSES = _client.CLIENT_CLASSES
@ -25,9 +26,11 @@ get_nova_client = _client.get_nova_client
get_server = _client.get_server
find_hypervisor = _client.find_hypervisor
find_server = _client.find_server
find_service = _client.find_service
HasNovaClientMixin = _client.HasNovaClientMixin
list_hypervisors = _client.list_hypervisors
list_servers = _client.list_servers
list_services = _client.list_services
nova_client = _client.nova_client
NovaClientFixture = _client.NovaClientFixture
wait_for_server_status = _client.wait_for_server_status
@ -49,3 +52,5 @@ get_different_host_hypervisors = _hypervisor.get_different_host_hypervisors
find_server_ip_address = _server.find_server_ip_address
HasServerMixin = _server.HasServerMixin
list_server_ip_addresses = _server.list_server_ip_addresses
wait_for_services_up = _service.wait_for_services_up

View File

@ -94,6 +94,20 @@ def find_server(client=None, unique=False, **params):
return servers.first
def list_services(client=None, **params) -> tobiko.Selection:
client = nova_client(client)
services = client.services.list()
return tobiko.select(services).with_attributes(**params)
def find_service(client=None, unique=False, **params):
services = list_services(client=client, **params)
if unique:
return services.unique
else:
return services.first
def get_server(server, client=None, **params):
return nova_client(client).servers.get(server, **params)

View File

@ -0,0 +1,74 @@
# Copyright 2019 Red Hat
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import
import json
import typing
from oslo_log import log
import tobiko
from tobiko.openstack.nova import _client
LOG = log.getLogger(__name__)
class NovaServiceException(tobiko.TobikoException):
pass
class NovaServicesNotfound(NovaServiceException):
message = "Nova services not found ({attributes})"
class NovaServicesFailed(NovaServiceException):
message = "Nova services are failed:\n{details}"
def services_details(services: typing.List):
# pylint: disable=protected-access
return json.dumps([service._info for service in services],
indent=4, sort_keys=True)
def wait_for_services_up(retry: typing.Optional[tobiko.Retry] = None,
**kwargs):
retry = retry or tobiko.retry(timeout=30., interval=5.)
for attempt in retry:
services = _client.list_services(**kwargs)
LOG.debug(f"Found {len(services)} Nova services")
try:
if not services:
raise NovaServicesNotfound(attributes=json.dumps(kwargs))
heathy_services = services.with_attributes(state='up')
LOG.debug(f"Found {len(heathy_services)} healthy Nova services")
failed_services = [service
for service in services
if service not in heathy_services]
LOG.debug(f"Found {len(failed_services)} failed Nova services")
if failed_services:
details = services_details(failed_services)
LOG.info(f"Failed Nova services:\n{details}")
raise NovaServicesFailed(details=details)
LOG.info('All nova services are up!')
break # all Nova services are healthy
except NovaServiceException:
# Re-raises this exception in case this is the last retry
# attempt
attempt.check_limits()
continue

View File

@ -150,3 +150,9 @@ class HypervisorTest(testtools.TestCase):
def test_skip_if_missing_hypervisors_with_big_count(self):
self.test_skip_if_missing_hypervisors(count=1000000,
should_skip=True)
class ServiceTest(testtools.TestCase):
def test_wait_for_services_up(self):
nova.wait_for_services_up()

View File

@ -0,0 +1,25 @@
# Copyright (c) 2020 Red Hat
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import
import testtools
from tobiko.openstack import nova
class ServiceTest(testtools.TestCase):
def test_all_services_are_up(self):
nova.wait_for_services_up()

View File

@ -14,33 +14,9 @@ from tobiko.openstack import nova
LOG = log.getLogger(__name__)
def check_nova_services_health(timeout=600, interval=2):
failures = []
start = time.time()
while time.time() - start < timeout:
failures = []
nova_client = nova.get_nova_client()
services = nova_client.services.list()
for service in services:
if not service.state == 'up':
failures.append(
'failed service: {}\n\n'.format(vars(service)))
if failures:
LOG.info('Failed nova services:\n {}'.format(failures))
LOG.info('Not all nova services are up ..')
LOG.info('Retrying , timeout at: {}'
.format(timeout-(time.time() - start)))
time.sleep(interval)
else:
LOG.info([vars(service) for service in services])
LOG.info('All nova services are up!')
return
# exhausted all retries
if failures:
tobiko.fail(
'nova agents are unhealthy:\n{!s}', '\n'.join(failures))
def check_nova_services_health(timeout=600., interval=2.):
retry = tobiko.retry(timeout=timeout, interval=interval)
nova.wait_for_services_up(retry=retry)
def start_all_instances():