From 3853336652cb672717f9d40f767cde0a3e274d17 Mon Sep 17 00:00:00 2001 From: Dan Voiculeasa Date: Sat, 17 Dec 2022 20:49:29 +0200 Subject: [PATCH] Sync part of the functions that call playbooks It is observed that concurrent calls to docker images download section may create problems due to race condition(for example 1 task removes the tags while the other expects them to be there). This becomes obvious when running upgrades with a race between upgrade-static-images.yml and upgrade-fluxcd-controllers.yml. Sync with a lock the functions that call upgrade playbooks at conductor start, and the one handling upgrade-activate. This may not be a complete solution, other playbook calls have to be examined. Tests on AIO-SX: DESC: Emulated race condition by adding a sync for _k8s_application_audit function, adding some logging and restarting sysinv conductor. PASS: Observed _k8s_application_audit called first, and the functions that handled the upgrade playbook at conductor start being serialized. PASS: AIO-SX bootstrap Closes-Bug: 1999971 Signed-off-by: Dan Voiculeasa Change-Id: Id96fa62b652b438fb71fc1132e3858f1bd3dca50 --- sysinv/sysinv/sysinv/sysinv/conductor/manager.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index b69d60fd80..e00afd2e26 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -196,6 +196,7 @@ CONFIG_UPDATE_FILE = 'config_update_file' LOCK_NAME_UPDATE_CONFIG = 'update_config_' LOCK_APP_AUTO_MANAGE = 'AppAutoManageLock' LOCK_RUNTIME_CONFIG_CHECK = 'runtime_config_check' +LOCK_IMAGE_PULL = 'image_pull_' # Keystone users whose passwords change are monitored by keystone listener, and # the puppet classes to update the service after the passwords change. @@ -7076,6 +7077,7 @@ class ConductorManager(service.PeriodicService): @retry(retry_on_result=lambda x: x is False, wait_fixed=(CONF.conductor.kube_upgrade_downgrade_retry_interval * 1000)) + @cutils.synchronized(LOCK_IMAGE_PULL) def _upgrade_downgrade_kube_networking(self): try: # Get the kubernetes version from the upgrade table @@ -7112,6 +7114,7 @@ class ConductorManager(service.PeriodicService): @retry(retry_on_result=lambda x: x is False, wait_fixed=(CONF.conductor.kube_upgrade_downgrade_retry_interval * 1000)) + @cutils.synchronized(LOCK_IMAGE_PULL) def _upgrade_downgrade_static_images(self): try: # Get the kubernetes version from the upgrade table @@ -12162,6 +12165,7 @@ class ConductorManager(service.PeriodicService): self.dbapi.software_upgrade_update( upgrade.uuid, {'state': constants.UPGRADE_STARTED}) + @cutils.synchronized(LOCK_IMAGE_PULL) def activate_upgrade(self, context, upgrade): """Activate the upgrade. Generate and apply new manifests.