From 08a62543482c6af0db932fabaf0b62ec15ab13e3 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Fri, 21 Feb 2014 13:08:26 -0800 Subject: [PATCH] Keep current and previous snapshot images The previous logic around how to keep images was not accomplishing anything particularly useful. Instead: * Delete images that are not configured or have no corresponding base image. * Keep the current and previous READY images. * Otherwise, delete any images that have been in their current state for more than 8 hours. Also, correct the image-update command which no longer needs to join a thread. Also, fix up some poorly exercised parts of the fake provider. Change-Id: Iba921f26d971e56692b9104f9d7c531d955d17b4 --- nodepool/cmd/nodepoolcmd.py | 3 +-- nodepool/fakeprovider.py | 12 ++++++++---- nodepool/nodedb.py | 6 +++++- nodepool/nodepool.py | 23 +++++++++++++++-------- tools/fake-servers.py | 3 ++- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/nodepool/cmd/nodepoolcmd.py b/nodepool/cmd/nodepoolcmd.py index f9e535ba2..ba6f5e0af 100644 --- a/nodepool/cmd/nodepoolcmd.py +++ b/nodepool/cmd/nodepoolcmd.py @@ -129,8 +129,7 @@ class NodePoolCmd(object): image = provider.images[self.args.image] with self.pool.getDB().getSession() as session: - t = self.pool.updateImage(session, provider, image) - t.join() + self.pool.updateImage(session, provider, image) def alien_list(self): self.pool.reconfigureManagers(self.pool.config) diff --git a/nodepool/fakeprovider.py b/nodepool/fakeprovider.py index 328e73cfc..f638b913f 100644 --- a/nodepool/fakeprovider.py +++ b/nodepool/fakeprovider.py @@ -54,11 +54,15 @@ class FakeList(object): time.sleep(delay) obj.status = status - def delete(self, obj): - if hasattr(obj, 'id'): - self._list.remove(obj) + def delete(self, *args, **kw): + if 'image' in kw: + self._list.remove(self.get(kw['image'])) else: - self._list.remove(self.get(obj)) + obj = args[0] + if hasattr(obj, 'id'): + self._list.remove(obj) + else: + self._list.remove(self.get(obj)) def create(self, **kw): s = Dummy(id=uuid.uuid4().hex, diff --git a/nodepool/nodedb.py b/nodepool/nodedb.py index cdb7029d0..021ce3d3d 100644 --- a/nodepool/nodedb.py +++ b/nodepool/nodedb.py @@ -250,12 +250,16 @@ class NodeDatabaseSession(object): return None return images[0] - def getCurrentSnapshotImage(self, provider_name, image_name): + def getOrderedReadySnapshotImages(self, provider_name, image_name): images = self.session().query(SnapshotImage).filter( snapshot_image_table.c.provider_name == provider_name, snapshot_image_table.c.image_name == image_name, snapshot_image_table.c.state == READY).order_by( snapshot_image_table.c.version.desc()).all() + return images + + def getCurrentSnapshotImage(self, provider_name, image_name): + images = self.getOrderedReadySnapshotImages(provider_name, image_name) if not images: return None return images[0] diff --git a/nodepool/nodepool.py b/nodepool/nodepool.py index e6aeef8bb..a4e08fce7 100644 --- a/nodepool/nodepool.py +++ b/nodepool/nodepool.py @@ -44,7 +44,8 @@ CONNECT_TIMEOUT = 10 * MINS # How long to try to connect after a server NODE_CLEANUP = 8 * HOURS # When to start deleting a node that is not # READY or HOLD TEST_CLEANUP = 5 * MINS # When to start deleting a node that is in TEST -KEEP_OLD_IMAGE = 24 * HOURS # How long to keep an old (good) image +IMAGE_CLEANUP = 8 * HOURS # When to start deleting an image that is not + # READY or is not the current or previous image DELETE_DELAY = 1 * MINS # Delay before deleting a node that has completed # its job. @@ -1283,7 +1284,8 @@ class NodePool(threading.Thread): def cleanupOneImage(self, session, image): # Normally, reap images that have sat in their current state - # for 24 hours, unless the image is the current snapshot + # for 8 hours, unless the image is the current or previous + # snapshot. delete = False now = time.time() if image.provider_name not in self.config.providers: @@ -1296,12 +1298,17 @@ class NodePool(threading.Thread): self.log.info("Deleting image id: %s which has no current " "base image" % image.id) else: - current = session.getCurrentSnapshotImage(image.provider_name, - image.image_name) - if (current and image != current and - (now - image.state_time) > KEEP_OLD_IMAGE): - self.log.info("Deleting non-current image id: %s because " - "the image is %s hours old" % + images = session.getOrderedReadySnapshotImages( + image.provider_name, image.image_name) + current = previous = None + if len(images) > 0: + current = images[0] + if len(images) > 1: + previous = images[1] + if (image != current and image != previous and + (now - image.state_time) > IMAGE_CLEANUP): + self.log.info("Deleting image id: %s which is " + "%s hours old" % (image.id, (now - image.state_time) / (60 * 60))) delete = True diff --git a/tools/fake-servers.py b/tools/fake-servers.py index d96e312f8..2b93e85b8 100644 --- a/tools/fake-servers.py +++ b/tools/fake-servers.py @@ -68,7 +68,8 @@ def main(): zsocket = context.socket(zmq.PUB) zsocket.bind("tcp://*:8881") - geard = MyGearmanServer() + geard = MyGearmanServer(statsd_host='localhost', statsd_port=8125, + statsd_prefix='zuul.geard') geard._count = 0 statsd = FakeStatsd()