Skip test cases when nova scheduler hints are not respected
THE CHANGE This also introduce some new features: - allows to override a stack verification step to be executed at the end of stack creation to check the stack is in healthy cond and eventually retry creating it - restructure fail safe and concurrent creation loop to make it easier to understand and maintain - this should fix some different-host of same-host test case failures tipically seen in downstream jobs by checking nova scheduler hints are respected and eventually skip neutron test cases thare pretend to check connectivity between vms running on different (or the same) hypervisor host. THE PROBLEM When schedulers hints are given to nova to chose where to allocate new VMs, there are no warranties to make sure it actually follows them. In special it has saw many times that it fails to do it expecially when many VMs are being created on the same time, for example when running tobiko from a machine with many CPU core. On such case test runner decides to spawn many parallel workers (typically 2 for every core when executing on hyperthreading CPUs). It has been seen the more test runner workers executing tests the highest is the probability these hints are unmet by Nova. THE WORKAROUND To workaround this problem with this change we are verifying scheduler hints has met. If not it retries creating them up to two times before raising a SkipException to skip test current case. Change-Id: I8c0cff5ca69680aba3842bd7738f27651a677633
This commit is contained in:
parent
88cbd3847f
commit
3c2155cb4d
|
@ -31,3 +31,12 @@ HeatTemplateFileFixture = _template.HeatTemplateFileFixture
|
|||
|
||||
HeatStackFixture = _stack.HeatStackFixture
|
||||
heat_stack_parameters = _stack.heat_stack_parameters
|
||||
INIT_IN_PROGRESS = _stack.INIT_IN_PROGRESS
|
||||
INIT_COMPLETE = _stack.INIT_COMPLETE
|
||||
INIT_IN_PROGRESS = _stack.INIT_IN_PROGRESS
|
||||
CREATE_IN_PROGRESS = _stack.CREATE_IN_PROGRESS
|
||||
CREATE_COMPLETE = _stack.CREATE_COMPLETE
|
||||
CREATE_FAILED = _stack.CREATE_FAILED
|
||||
DELETE_IN_PROGRESS = _stack.DELETE_IN_PROGRESS
|
||||
DELETE_COMPLETE = _stack.DELETE_COMPLETE
|
||||
DELETE_FAILED = _stack.DELETE_FAILED
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import collections
|
||||
import random
|
||||
import time
|
||||
import typing # noqa
|
||||
|
||||
|
@ -109,62 +110,87 @@ class HeatStackFixture(tobiko.SharedFixture):
|
|||
def get_stack_parameters(self):
|
||||
return tobiko.reset_fixture(self.parameters).values
|
||||
|
||||
retry_create_min_sleep = 0.1
|
||||
retry_create_max_sleep = 3.
|
||||
|
||||
def create_stack(self, retry=None):
|
||||
"""Creates stack based on passed parameters."""
|
||||
created_stack_ids = set()
|
||||
retry = self._get_retry_value(retry)
|
||||
while True:
|
||||
attempts_count = self._get_retry_value(retry)
|
||||
if attempts_count:
|
||||
for attempt_number in range(1, attempts_count):
|
||||
try:
|
||||
LOG.debug('Creating stack %r: attempt %d of %d',
|
||||
self.stack_name, attempt_number, attempts_count)
|
||||
self.try_create_stack()
|
||||
return self.validate_created_stack()
|
||||
except tobiko.TobikoException:
|
||||
# I use random time sleep to make conflicting concurrent
|
||||
# creations less probable to occur
|
||||
sleep_time = random_sleep_time(
|
||||
min_time=self.retry_create_min_sleep,
|
||||
max_time=self.retry_create_max_sleep)
|
||||
LOG.debug('Failed creating stack %r (attempt %d of %d). '
|
||||
'Will retry in %s seconds',
|
||||
self.stack_name, attempt_number, attempts_count,
|
||||
sleep_time, exc_info=1)
|
||||
time.sleep(sleep_time)
|
||||
|
||||
LOG.debug('Creating stack %r: attempt %d of %d',
|
||||
self.stack_name, attempts_count, attempts_count)
|
||||
self.try_create_stack()
|
||||
|
||||
return self.validate_created_stack()
|
||||
|
||||
#: valid status expected to be the stack after exiting from create_stack
|
||||
# method
|
||||
expected_creted_status = {CREATE_IN_PROGRESS, CREATE_COMPLETE}
|
||||
|
||||
def validate_created_stack(self):
|
||||
return self.wait_for_stack_status(
|
||||
expected_status=self.expected_creted_status, check=True)
|
||||
|
||||
def try_create_stack(self):
|
||||
stack = self.wait_for_stack_status(
|
||||
expected_status={CREATE_COMPLETE, CREATE_FAILED,
|
||||
CREATE_IN_PROGRESS, DELETE_COMPLETE,
|
||||
DELETE_FAILED})
|
||||
|
||||
stack_status = getattr(stack, 'stack_status', DELETE_COMPLETE)
|
||||
if stack_status in {CREATE_IN_PROGRESS, CREATE_COMPLETE}:
|
||||
LOG.debug('Stack created: %r (id=%r)', self.stack_name, stack.id)
|
||||
return stack
|
||||
|
||||
if stack_status.endswith('_FAILED'):
|
||||
LOG.debug('Delete existing failed stack: %r (id=%r)',
|
||||
self.stack_name, stack.id)
|
||||
self.delete_stack(stack_id=stack.id)
|
||||
stack = self.wait_for_stack_status(
|
||||
expected_status={CREATE_COMPLETE, CREATE_FAILED,
|
||||
CREATE_IN_PROGRESS, DELETE_COMPLETE,
|
||||
DELETE_FAILED})
|
||||
stack_status = getattr(stack, 'stack_status', DELETE_COMPLETE)
|
||||
expected_status = {CREATE_COMPLETE, CREATE_IN_PROGRESS}
|
||||
if stack_status in expected_status:
|
||||
LOG.debug('Stack created: %r (id=%r)', self.stack_name,
|
||||
stack.id)
|
||||
for stack_id in created_stack_ids:
|
||||
if self.stack.id != stack_id:
|
||||
LOG.warning("Concurrent stack creation: delete "
|
||||
"duplicated stack is %r (id=%r).",
|
||||
self.stack_name, stack_id)
|
||||
self.delete_stack(stack_id)
|
||||
expected_status={DELETE_COMPLETE})
|
||||
|
||||
return stack
|
||||
# Cleanup cached objects
|
||||
self.stack = self._outputs = self._resources = None
|
||||
|
||||
if not retry:
|
||||
status_reason = getattr(stack, 'stack_status_reason', None)
|
||||
raise HeatStackCreationFailed(name=self.stack_name,
|
||||
observed=stack_status,
|
||||
expected=expected_status,
|
||||
status_reason=status_reason)
|
||||
# Compile template parameters
|
||||
parameters = self.get_stack_parameters()
|
||||
LOG.debug('Begin creating stack %r...', self.stack_name)
|
||||
try:
|
||||
created_stack_id = self.client.stacks.create(
|
||||
stack_name=self.stack_name,
|
||||
template=self.template.template_yaml,
|
||||
parameters=parameters)['stack']['id']
|
||||
except exc.HTTPConflict:
|
||||
LOG.debug('Stack %r already exists.', self.stack_name)
|
||||
created_stack_id = None
|
||||
|
||||
retry -= 1
|
||||
if stack_status.endswith('_FAILED'):
|
||||
LOG.debug('Delete existing failed stack: %r (id=%r)',
|
||||
self.stack_name, stack.id)
|
||||
self.delete_stack()
|
||||
stack = self.wait_for_stack_status(
|
||||
expected_status={DELETE_COMPLETE})
|
||||
stack = self.wait_for_stack_status(
|
||||
expected_status={CREATE_IN_PROGRESS, CREATE_COMPLETE})
|
||||
if created_stack_id and stack.id != created_stack_id:
|
||||
LOG.debug('Concurrent stack creation: delete duplicate stack %r '
|
||||
'(id=%r)', self.stack_name, created_stack_id)
|
||||
self.delete_stack(stack_id=created_stack_id)
|
||||
return stack
|
||||
|
||||
# Cleanup cached objects
|
||||
self.stack = self._outputs = self._resources = None
|
||||
|
||||
# Compile template parameters
|
||||
parameters = self.get_stack_parameters()
|
||||
try:
|
||||
LOG.debug('Creating stack %r (re-tries left %d)...',
|
||||
self.stack_name, retry)
|
||||
stack_id = self.client.stacks.create(
|
||||
stack_name=self.stack_name,
|
||||
template=self.template.template_yaml,
|
||||
parameters=parameters)['stack']['id']
|
||||
except exc.HTTPConflict:
|
||||
LOG.debug('Stack %r already exists.', self.stack_name)
|
||||
else:
|
||||
created_stack_ids.add(stack_id)
|
||||
LOG.debug('Creating stack %r (id=%r)...', self.stack_name,
|
||||
stack_id)
|
||||
def validate_stack(self):
|
||||
return self.stack
|
||||
|
||||
_resources = None
|
||||
|
||||
|
@ -184,9 +210,9 @@ class HeatStackFixture(tobiko.SharedFixture):
|
|||
|
||||
def delete_stack(self, stack_id=None):
|
||||
"""Deletes stack."""
|
||||
self.stack = self._outputs = self._resources = None
|
||||
if not stack_id:
|
||||
stack_id = self.stack_id
|
||||
self.stack = self._outputs = self._resources = None
|
||||
try:
|
||||
self.client.stacks.delete(stack_id)
|
||||
except exc.NotFound:
|
||||
|
@ -212,7 +238,7 @@ class HeatStackFixture(tobiko.SharedFixture):
|
|||
except exc.HTTPNotFound:
|
||||
self.stack = stack = None
|
||||
finally:
|
||||
self._outputs = None
|
||||
self._outputs = self._resources = None
|
||||
return stack
|
||||
|
||||
def wait_for_create_complete(self, check=True):
|
||||
|
@ -442,3 +468,8 @@ class HeatStackResourceFixture(HeatStackNamespaceFixture):
|
|||
@property
|
||||
def fixture_name(self):
|
||||
return self.stack_name + '.resources'
|
||||
|
||||
|
||||
def random_sleep_time(min_time, max_time):
|
||||
assert min_time <= min_time
|
||||
return (max_time - min_time) * random.random() + min_time
|
||||
|
|
|
@ -31,6 +31,8 @@ nova_client = _client.nova_client
|
|||
NovaClientFixture = _client.NovaClientFixture
|
||||
|
||||
skip_if_missing_hypervisors = _hypervisor.skip_if_missing_hypervisors
|
||||
get_same_host_hypervisors = _hypervisor.get_same_host_hypervisors
|
||||
get_different_host_hypervisors = _hypervisor.get_different_host_hypervisors
|
||||
|
||||
find_server_ip_address = _server.find_server_ip_address
|
||||
HasServerMixin = _server.HasServerMixin
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
# under the License.
|
||||
from __future__ import absolute_import
|
||||
|
||||
import collections
|
||||
|
||||
import tobiko
|
||||
from tobiko.openstack.nova import _client
|
||||
|
||||
|
@ -43,10 +45,6 @@ def missing_hypervisors(count=1, **params):
|
|||
return max(0, count - len(agents))
|
||||
|
||||
|
||||
def has_networking_agents(count=1, **params):
|
||||
return not missing_hypervisors(count=count, **params)
|
||||
|
||||
|
||||
def skip_if_missing_hypervisors(count=1, **params):
|
||||
message = "missing {return_value!r} hypervisor(s)"
|
||||
if params:
|
||||
|
@ -54,3 +52,28 @@ def skip_if_missing_hypervisors(count=1, **params):
|
|||
', '.join("{!s}={!r}".format(k, v) for k, v in params.items()))
|
||||
return tobiko.skip_if(message, missing_hypervisors, count=count,
|
||||
**params)
|
||||
|
||||
|
||||
def get_same_host_hypervisors(server_ids, hypervisor):
|
||||
host_hypervisors = get_servers_hypervisors(server_ids)
|
||||
same_host_server_ids = host_hypervisors.pop(hypervisor, None)
|
||||
if same_host_server_ids:
|
||||
return {hypervisor: same_host_server_ids}
|
||||
else:
|
||||
return {}
|
||||
|
||||
|
||||
def get_different_host_hypervisors(server_ids, hypervisor):
|
||||
host_hypervisors = get_servers_hypervisors(server_ids)
|
||||
host_hypervisors.pop(hypervisor, None)
|
||||
return host_hypervisors
|
||||
|
||||
|
||||
def get_servers_hypervisors(server_ids):
|
||||
hypervisors = collections.defaultdict(list)
|
||||
if server_ids:
|
||||
for server_id in (server_ids or list()):
|
||||
server = _client.get_server(server_id)
|
||||
hypervisor = getattr(server, 'OS-EXT-SRV-ATTR:host')
|
||||
hypervisors[hypervisor].append(server_id)
|
||||
return hypervisors
|
||||
|
|
|
@ -165,11 +165,50 @@ class ServerStackFixture(heat.HeatStackFixture):
|
|||
def scheduler_hints(self):
|
||||
scheduler_hints = {}
|
||||
if self.different_host:
|
||||
scheduler_hints.update(different_host=self.different_host)
|
||||
scheduler_hints.update(different_host=list(self.different_host))
|
||||
if self.same_host:
|
||||
scheduler_hints.update(same_host=self.same_host)
|
||||
scheduler_hints.update(same_host=list(self.same_host))
|
||||
return scheduler_hints
|
||||
|
||||
#: allow to retry creating server in case scheduler hits are not respected
|
||||
retry_create = 3
|
||||
expected_creted_status = {heat.CREATE_COMPLETE}
|
||||
|
||||
def validate_created_stack(self):
|
||||
stack = super(ServerStackFixture, self).validate_created_stack()
|
||||
self.validate_scheduler_hints()
|
||||
return stack
|
||||
|
||||
@property
|
||||
def hypervisor_host(self):
|
||||
return getattr(self.server_details, 'OS-EXT-SRV-ATTR:host')
|
||||
|
||||
def validate_scheduler_hints(self):
|
||||
if self.scheduler_hints:
|
||||
hypervisor = self.hypervisor_host
|
||||
self.validate_same_host_scheduler_hints(hypervisor=hypervisor)
|
||||
self.validate_different_host_scheduler_hints(hypervisor=hypervisor)
|
||||
|
||||
def validate_same_host_scheduler_hints(self, hypervisor):
|
||||
if self.same_host:
|
||||
different_host_hypervisors = nova.get_different_host_hypervisors(
|
||||
self.same_host, hypervisor)
|
||||
if different_host_hypervisors:
|
||||
tobiko.skip("server {!r} of stack {!r} created on "
|
||||
"different hypervisor host from servers:\n{!r}",
|
||||
self.server_id, self.stack_name,
|
||||
different_host_hypervisors)
|
||||
|
||||
def validate_different_host_scheduler_hints(self, hypervisor):
|
||||
if self.different_host:
|
||||
same_host_hypervisors = nova.get_same_host_hypervisors(
|
||||
self.different_host, hypervisor)
|
||||
if same_host_hypervisors:
|
||||
tobiko.skip("server {!r} of stack {!r} created on the same "
|
||||
"hypervisor host as servers:\n{!r}",
|
||||
self.server_id, self.stack_name,
|
||||
same_host_hypervisors)
|
||||
|
||||
@property
|
||||
def server_details(self):
|
||||
return nova.get_server(self.server_id)
|
||||
|
|
Loading…
Reference in New Issue