Retry puppet runtime apply in case of missing info

There are cases where sysinv-agents gets runtime puppet
apply requests before it gets the host_uuid from the
sysinv-conductor.

This causes the received request to be dropped because the
agent thinks the command is not for itself.

The fix for this is to expand the retry logic of the
config_apply_runtime_manifest method to retry if any
of the needed inventory info is needed.

Tests done:
- install of an SX setup and 10 lock-unlocks
- install of a 2+2 setup and 2 lock-unlocks on each node

Change-Id: I130e73881bd572398072aff7c46b3b7f165ca6b9
Closes-bug: 1841620
Signed-off-by: Stefan Dinescu <stefan.dinescu@windriver.com>
This commit is contained in:
Stefan Dinescu
2019-09-03 16:31:36 +03:00
parent 27e3ba045e
commit 86abe22b80
2 changed files with 22 additions and 11 deletions

View File

@ -1544,12 +1544,12 @@ class AgentManager(service.PeriodicService):
else:
LOG.error("report_inventory unknown request=%s" % inventory_update)
def _retry_on_missing_mgmt_ip(ex):
def _retry_on_missing_inventory_info(ex):
LOG.info('Caught exception. Retrying... Exception: {}'.format(ex))
return isinstance(ex, exception.LocalManagementIpNotFound)
return isinstance(ex, exception.AgentInventoryInfoNotFound)
@retrying.retry(wait_fixed=15 * 1000, stop_max_delay=300 * 1000,
retry_on_exception=_retry_on_missing_mgmt_ip)
retry_on_exception=_retry_on_missing_inventory_info)
@utils.synchronized(LOCK_AGENT_ACTION, external=False)
def config_apply_runtime_manifest(self, context, config_uuid, config_dict):
"""Asynchronously, have the agent apply the runtime manifest with the
@ -1577,6 +1577,21 @@ class AgentManager(service.PeriodicService):
not os.path.isfile(tsc.INITIAL_CONFIG_COMPLETE_FLAG)):
return
# The conductor may send requests to this function, before the
# agent finished its first inventory.
# We raise the exception in case any of the needed information is
# missing and the retry decorator will run this function again.
# NOTE: usually all these parameters are set at the same time
# during the first inventory, but just to be safe we are
# checking that all of them are set.
if (not self._mgmt_ip or
not self._ihost_uuid or
not self._ihost_personality):
raise exception.AgentInventoryInfoNotFound(
config_uuid=config_uuid, config_dict=config_dict,
host_personality=self._ihost_personality,
host_uuid=self._ihost_uuid, mgmt_ip=self._mgmt_ip)
personalities = config_dict.get('personalities')
host_uuids = config_dict.get('host_uuids')
@ -1592,11 +1607,6 @@ class AgentManager(service.PeriodicService):
else:
return
if not self._mgmt_ip:
raise exception.LocalManagementIpNotFound(
config_uuid=config_uuid, config_dict=config_dict,
host_personality=self._ihost_personality)
LOG.info("config_apply_runtime_manifest: %s %s %s" % (
config_uuid, config_dict, self._ihost_personality))
try:

View File

@ -1314,10 +1314,11 @@ class LocalManagementPersonalityNotFound(NotFound):
"host_personality=%(host_personality)s")
class LocalManagementIpNotFound(NotFound):
message = _("Local management IP not found: "
class AgentInventoryInfoNotFound(NotFound):
message = _("Agent inventory information not found: "
"config_uuid=%(config_uuid)s, config_dict=%(config_dict)s, "
"host_personality=%(host_personality)s")
"host_personality=%(host_personality)s, host_uuid=%(host_uuid)s, "
"mgmt_ip=%(mgmt_ip)s")
class LocalHostUUIDNotFound(NotFound):