Clear dcagent cache upon failure

Currently, if a call to update dcagent cache fails, the previous
successful response will still be cached, so when dcmanager/dcorch
attempts to audit, an out-of-date response will be used. This commit
fixes this problem by clearing the cache for the function if the
call returns an error.

Test plan:
  - PASS: Manually force an error in software api. Verify the cache
          from dcagent is cleared and the next audit fails.
  - PASS: Manage a healthy subcloud and verify the audit is working
          as expected.

Closes-bug: 2085832

Change-Id: Ib97f58e8d20e6805b08e930b6e4200aae5451719
Signed-off-by: Victor Romano <victor.gluzromano@windriver.com>
This commit is contained in:
Victor Romano 2024-10-29 09:44:15 -03:00
parent 2b1e8fcc45
commit a4aa05ba8c
4 changed files with 28 additions and 13 deletions

View File

@ -56,6 +56,7 @@ class PeriodicAudit(utils.BaseAuditManager):
return
except Exception:
LOG.exception("Error in periodic audit loop")
eventlet.greenthread.sleep(2)
def _run_with_retry_if_unauthorized(self, func, arg_getter, **kwargs):
# If any exception is raised, we just ignore them and wait
@ -155,6 +156,9 @@ class RequestedAudit(utils.BaseAuditManager):
)
else:
raise exceptions.UnsupportedAudit(audit=audit_type)
if resp is None:
# resp is None when the audit fails to get the data internally
raise exceptions.AuditStatusFailure(audit=audit_type)
return audit_type, resp
def get_sync_status(self, payload):

View File

@ -44,3 +44,7 @@ class UnsupportedAudit(DcagentException):
class MissingRegionOneData(DcagentException):
message = _("Audit request does not have RegionOne data for %(audit)s.")
class AuditStatusFailure(DcagentException):
message = _("Failure getting %(audit)s sync status.")

View File

@ -48,15 +48,25 @@ def cache_wrapper(cls):
)
return response
result = method(self, *args, **kwargs)
# Cache the results in the '_result' class variable
LOG.debug(
f"Saving new response for {method.__name__} "
f"in {self.__class__.__name__}. Response: {result}"
)
with self.__class__._lock:
self.__class__._results[method.__name__] = result
return result
try:
result = method(self, *args, **kwargs)
# Cache the results in the '_result' class variable
LOG.debug(
f"Saving new response for {method.__name__} "
f"in {self.__class__.__name__}. Response: {result}"
)
with self.__class__._lock:
self.__class__._results[method.__name__] = result
return result
except Exception as e:
LOG.exception(
f"Error in {method.__name__} from {self.__class__.__name__}: {e}"
)
# Clear the cached result if an exception occurs
with self.__class__._lock:
if method.__name__ in self.__class__._results:
del self.__class__._results[method.__name__]
raise
return wrapper

View File

@ -527,10 +527,7 @@ class SubcloudAuditWorkerManager(manager.Manager):
except Exception:
LOG.exception(failmsg % (subcloud.name, "dcagent"))
failures.append("dcagent")
LOG.debug(
f"Audits results for subcloud {subcloud_name}: "
f"{subcloud_name}: {audit_results}"
)
LOG.debug(f"Audits results for subcloud {subcloud_name}: {audit_results}")
for audit_type, audit_value in audit_results.items():
if audit_type == dccommon_consts.BASE_AUDIT:
avail_to_set = audit_value.get("availability")