Merge "Max percentage failure support" into stable/train

This commit is contained in:
Zuul 2020-07-07 05:17:26 +00:00 committed by Gerrit Code Review
commit 2d5ee5f8b7
3 changed files with 57 additions and 5 deletions

View File

@ -47,6 +47,7 @@ class TripleoBase(StrategyBase):
self._play_context = None self._play_context = None
self._strat_results = [] self._strat_results = []
self.noop_task = None self.noop_task = None
self._fail_cache = {}
# these were defined in 2.9 # these were defined in 2.9
self._has_hosts_cache = False self._has_hosts_cache = False
self._has_hosts_cache_all = False self._has_hosts_cache_all = False
@ -84,6 +85,50 @@ class TripleoBase(StrategyBase):
task.name = name task.name = name
self._callback_sent = True self._callback_sent = True
def _get_fail_percent(self, host):
"""Return maximum percentage failure per role"""
if host and host in self._fail_cache:
return self._fail_cache[host]
fail_vars = self._variable_manager.get_vars(play=self._iterator._play,
host=host,
task=None)
percent = fail_vars.get('max_fail_percentage', 0)
role = fail_vars.get('tripleo_role_name', 'default')
self._fail_cache[host] = (percent, role)
return (percent, role)
def _check_fail_percent(self, host, current_failures):
"""Check if max fail pourcentage was reached
When a failure occurs for a host, check if we reached
the max percentage of failure for the group in which
the host is part from.
"""
percent, role = self._get_fail_percent(host)
current_failed = current_failures.get(role, 1)
groups = self._inventory.get_groups_dict()
group_count = len(groups.get(role, []))
if group_count == 0:
return True
failed_percent = (current_failed / group_count) * 100
if failed_percent > percent:
return True
return False
def _get_current_failures(self):
"""Return the number of failures per role"""
failures = {}
for host, _ in self._iterator.get_failed_hosts().items():
host_obj = self._inventory.get_host(host)
per, role = self._get_fail_percent(host_obj)
if role in failures:
failures[role] += 1
else:
failures[role] = 1
return failures
def process_includes(self, host_results, noop=False): def process_includes(self, host_results, noop=False):
"""Handle includes """Handle includes

View File

@ -123,10 +123,12 @@ class StrategyModule(BASE.TripleoBase):
function returns True if there were failures and False if function returns True if there were failures and False if
there are no failures. there are no failures.
""" """
fail_lookup = self._get_current_failures()
if self._any_errors_fatal: if self._any_errors_fatal:
for res in results: for res in results:
if ((res.is_failed() or res._task.action == 'meta') if ((res.is_failed() or res._task.action == 'meta')
and self._iterator.is_failed(res._host)): and self._iterator.is_failed(res._host)
and self._check_fail_percent(res._host, fail_lookup)):
return True return True
return False return False

View File

@ -334,15 +334,20 @@ class StrategyModule(BASE.TripleoBase):
failed_hosts = [] failed_hosts = []
unreachable_hosts = [] unreachable_hosts = []
fail_lookup = self._get_current_failures()
for res in self._strat_results: for res in self._strat_results:
if ((res.is_failed() or res._task.action == 'meta') if ((res.is_failed() or res._task.action == 'meta')
and self._iterator.is_failed(res._host)): and self._iterator.is_failed(res._host)):
failed_hosts.append(res._host.name) failed_hosts.append(res._host)
elif res.is_unreachable(): elif res.is_unreachable():
unreachable_hosts.append(res._host.name) unreachable_hosts.append(res._host)
# TODO(mwhahaha): handle max_fail_percentage by tripleo role errored = False
if (self._any_errors_fatal for host in set(failed_hosts + unreachable_hosts):
errored = self._check_fail_percent(host, fail_lookup)
if errored:
break
if (errored and self._any_errors_fatal
and (len(failed_hosts) > 0 and (len(failed_hosts) > 0
or len(unreachable_hosts) > 0)): or len(unreachable_hosts) > 0)):
result = self._process_failures() result = self._process_failures()