From a846b257c928f929753472e38a6f8b7831c19c2d Mon Sep 17 00:00:00 2001 From: Simon Westphahl Date: Mon, 12 May 2025 12:48:07 +0200 Subject: [PATCH] Ignore lock errors when resetting image uploads When attempting to reset lost uploads multiple launchers could race trying to update the same upload. In addition this could also happen when the cache is not fully up-to-date. Instead of causing an error in the main thread, we can catch and ignore the exception in case we did not get the lock. ERROR zuul.Launcher: Error in main thread: Traceback (most recent call last): File "/opt/zuul/lib/python3.11/site-packages/zuul/launcher/server.py", line 955, in run self._run() File "/opt/zuul/lib/python3.11/site-packages/zuul/launcher/server.py", line 972, in _run self.checkMissingUploads() File "/opt/zuul/lib/python3.11/site-packages/zuul/launcher/server.py", line 1967, in checkMissingUploads with (upload.locked(ctx, blocking=False), File "/usr/local/lib/python3.11/contextlib.py", line 137, in __enter__ return next(self.gen) ^^^^^^^^^^^^^^ File "/opt/zuul/lib/python3.11/site-packages/zuul/zk/zkobject.py", line 575, in locked raise LockException(f"Failed to acquire lock on {self}") zuul.zk.exceptions.LockException: Failed to acquire lock on ... Change-Id: Ibdd75db1aa52be0da2beed681b597b73c9a205bb --- zuul/launcher/server.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/zuul/launcher/server.py b/zuul/launcher/server.py index 7d61cd8d0a..ad1cba3d00 100644 --- a/zuul/launcher/server.py +++ b/zuul/launcher/server.py @@ -1964,11 +1964,15 @@ class Launcher: # it has probably crashed. Reset it. if not upload.is_locked: with self.createZKContext(None, self.log) as ctx: - with (upload.locked(ctx, blocking=False), - upload.activeContext(ctx)): - # Double check the state after lock. - if upload.state == upload.State.UPLOADING: - upload.state = upload.State.PENDING + try: + with (upload.locked(ctx, blocking=False), + upload.activeContext(ctx)): + # Double check the state after lock. + if upload.state == upload.State.UPLOADING: + upload.state = upload.State.PENDING + except LockException: + # Upload locked again (lock / cache update race) + pass if upload.state != upload.State.PENDING: continue upload_list = uploads_by_artifact_id[upload.artifact_uuid]