From 3fb4bb577ec2b03c0c110b11b362ffa3d8a95527 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Mon, 22 Aug 2016 13:28:29 -0700 Subject: [PATCH] Don't delete building DIB images If DIB image builds take a long time then we may end up attempting to delete them before they have finished building :(. Correct this by treating BUILDING images as an undeleteable state. Its possible this could lead to image leaks if we somehow don't detect a failed built but that seems preferable to having no new images due to this problem. Change-Id: I519d01a5e33e9f1bdf49649cc21f2b58af9ae563 --- nodepool/nodepool.py | 1 + nodepool/tests/test_nodepool.py | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/nodepool/nodepool.py b/nodepool/nodepool.py index 036e2a1bc..3ce7828f4 100644 --- a/nodepool/nodepool.py +++ b/nodepool/nodepool.py @@ -2200,6 +2200,7 @@ class NodePool(threading.Thread): if len(images) > 1: previous = images[1] if (image != current and image != previous and + image.state != nodedb.BUILDING and (now - image.state_time) > IMAGE_CLEANUP): self.log.info("Deleting image id: %s which is " "%s hours old" % diff --git a/nodepool/tests/test_nodepool.py b/nodepool/tests/test_nodepool.py index 097b35434..5176cb5f8 100644 --- a/nodepool/tests/test_nodepool.py +++ b/nodepool/tests/test_nodepool.py @@ -692,6 +692,33 @@ class TestNodepool(tests.DBTestCase): node = session.getNode(2) self.assertEqual(node, None) + def test_dont_delete_building_images(self): + """Test we don't delete building dib images""" + # Get a valid image + configfile = self.setup_config('node_dib.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + self._useBuilder(configfile) + pool.start() + self.waitForImage(pool, 'fake-dib-provider', 'fake-dib-image') + self.waitForNodes(pool) + timeout = nodepool.nodepool.IMAGE_CLEANUP + + # Modify the image to be BUILDING and have a state time older + # than the cleanup time. + with pool.getDB().getSession() as session: + dib_image = session.getDibImage(1) + dib_image.state = nodedb.BUILDING + dib_image.state_time = time.time() - timeout - 1 + session.commit() + + # Run cleanup which should not delete the building image + pool.cleanupOneDibImage(session, dib_image) + + # Check that the image is still present in a new session + with pool.getDB().getSession() as session: + dib_image = session.getDibImage(1) + self.assertEqual(dib_image.state, nodedb.BUILDING) + self.assertTrue(time.time() - dib_image.state_time > timeout) class TestGearClient(tests.DBTestCase): def test_wait_for_completion(self):