Make sure we clean up diskimages with invalid external_name

It is possible for external_name on our upload images to be empty.
Either we have written the data to zookeeper incorrect or some other
unknown.

As a result, check if external_name is set, then delete the image.
This will allow our cleanup worker to properly delete unneeded
diskimages from the HDD.

Change-Id: I2bad5f2c2585c54879497de3c8997dd6234be5a7
Signed-off-by: Paul Belanger <pabelanger@redhat.com>
This commit is contained in:
Paul Belanger 2016-12-02 09:43:25 -05:00
parent 343f9d0b6e
commit 28eb62de86
4 changed files with 60 additions and 1 deletions

View File

@ -284,7 +284,14 @@ class CleanupWorker(BaseWorker):
if upload.state == zk.DELETING or deleted:
manager = self._config.provider_managers[provider.name]
try:
manager.deleteImage(upload.external_name)
# It is possible we got this far, but don't actually have an
# external_name. This could mean that zookeeper and cloud
# provider are some how out of sync.
if upload.external_name:
base = "-".join([image, upload.build_id])
self.log.info("Deleting image build %s from %s" %
(base, provider.name))
manager.deleteImage(upload.external_name)
except Exception:
self.log.exception(
"Unable to delete image %s from %s: %s",

View File

@ -183,6 +183,8 @@ class FakeOpenStackCloud(object):
return self._image_list
def delete_image(self, name_or_id):
if not name_or_id:
raise Exception('name_or_id is Empty')
self._delete(name_or_id, self._image_list)
def create_image_snapshot(self, name, server, **metadata):

View File

@ -459,6 +459,7 @@ class DBTestCase(BaseTestCase):
break
time.sleep(1)
self.wait_for_threads()
return image
def waitForImageDeletion(self, provider_name, image_name, match=None):
while True:

View File

@ -186,3 +186,52 @@ class TestNodePoolBuilder(tests.DBTestCase):
self.waitForBuild('fake-image', '0000000003')
builds = self.zk.getBuilds('fake-image', zk.READY)
self.assertEqual(len(builds), 2)
def test_image_rotation_invalid_external_name(self):
# NOTE(pabelanger): We are forcing fake-image to leak in fake-provider.
# We do this to test our CleanupWorker will properly delete diskimage
# builds from the HDD. For this test, we don't care about the leaked
# image.
#
# Ensure we have a total of 3 diskimages on disk, so we can confirm
# nodepool-builder will properly purge the 1 diskimage build leaving a
# total of 2 diskimages on disk at all times.
# Expire rebuild-age (2days), to avoid problems when expiring 2 images.
self._test_image_rebuild_age(expire=172800)
build = self.waitForBuild('fake-image', '0000000002')
# Make sure 2rd diskimage build was uploaded.
image = self.waitForImage('fake-provider', 'fake-image')
self.assertEqual(image.build_id, '0000000002')
# Delete external name / id so we can test exception handlers.
upload = self.zk.getUploads(
'fake-image', '0000000001', 'fake-provider', zk.READY)[0]
upload.external_name = None
upload.external_id = None
with self.zk.imageUploadLock(upload.image_name, upload.build_id,
upload.provider_name, blocking=True,
timeout=1):
self.zk.storeImageUpload(upload.image_name, upload.build_id,
upload.provider_name, upload, upload.id)
# Expire rebuild-age (default: 1day) to force a new build.
build.state_time -= 86400
with self.zk.imageBuildLock('fake-image', blocking=True, timeout=1):
self.zk.storeBuild('fake-image', build, '0000000002')
self.waitForBuildDeletion('fake-image', '0000000001')
# Make sure fake-image for fake-provider is removed from zookeeper.
upload = self.zk.getUploads(
'fake-image', '0000000001', 'fake-provider')
self.assertEqual(len(upload), 0)
self.waitForBuild('fake-image', '0000000003')
# Ensure we only have 2 builds on disk.
builds = self.zk.getBuilds('fake-image', zk.READY)
self.assertEqual(len(builds), 2)
# Make sure 3rd diskimage build was uploaded.
image = self.waitForImage('fake-provider', 'fake-image', [image])
self.assertEqual(image.build_id, '0000000003')