From 19334f0f8d0d2e8bcc086ef27e44c0a6a80bd9d6 Mon Sep 17 00:00:00 2001 From: Brian Elliott Date: Sun, 22 Jul 2012 14:39:08 -0500 Subject: [PATCH] Do not cache images that fail checksum verfication On an image GET, recalculate the image checksum as the image data is streamed to the client. Verify that the checksum matches the original checksum calculated when the image was added to Glance. If checksum validation fails, purge the image from the cache. This type of situation could occur if the backend image store is malfunctioning. bug 1028496 Change-Id: I9f38bac8360016bb12b5edaad87c50939a538cc0 --- glance/api/middleware/cache.py | 12 +++++- glance/image_cache/__init__.py | 16 +++++++- glance/tests/unit/test_cache_middleware.py | 36 ++++++++++++++++++ glance/tests/unit/test_image_cache.py | 43 ++++++++++++++++++++-- 4 files changed, 101 insertions(+), 6 deletions(-) diff --git a/glance/api/middleware/cache.py b/glance/api/middleware/cache.py index 0ad6e9ce50..8e24ef035b 100644 --- a/glance/api/middleware/cache.py +++ b/glance/api/middleware/cache.py @@ -164,7 +164,17 @@ class CacheFilter(wsgi.Middleware): return resp def _process_GET_response(self, resp, image_id): - resp.app_iter = self.cache.get_caching_iter(image_id, resp.app_iter) + image_checksum = resp.headers.get('Content-MD5', None) + + if not image_checksum: + # API V1 stores the checksum in a different header: + image_checksum = resp.headers.get('x-image-meta-checksum', None) + + if not image_checksum: + LOG.error(_("Checksum header is missing.")) + + resp.app_iter = self.cache.get_caching_iter(image_id, image_checksum, + resp.app_iter) return resp def get_status_code(self, response): diff --git a/glance/image_cache/__init__.py b/glance/image_cache/__init__.py index d45e520e0d..270343062c 100644 --- a/glance/image_cache/__init__.py +++ b/glance/image_cache/__init__.py @@ -19,6 +19,8 @@ LRU Cache for Image Data """ +import hashlib + from glance.common import exception from glance.common import utils from glance.openstack.common import cfg @@ -206,13 +208,15 @@ class ImageCache(object): """ return self.driver.queue_image(image_id) - def get_caching_iter(self, image_id, image_iter): + def get_caching_iter(self, image_id, image_checksum, image_iter): """ Returns an iterator that caches the contents of an image while the image contents are read through the supplied iterator. :param image_id: Image ID + :param image_checksum: checksum expected to be generated while + iterating over image data :param image_iter: Iterator that will read image contents """ if not self.driver.is_cacheable(image_id): @@ -222,13 +226,23 @@ class ImageCache(object): def tee_iter(image_id): try: + current_checksum = hashlib.md5() + with self.driver.open_for_write(image_id) as cache_file: for chunk in image_iter: try: cache_file.write(chunk) finally: + current_checksum.update(chunk) yield chunk cache_file.flush() + + if image_checksum and \ + image_checksum != current_checksum.hexdigest(): + msg = _("Checksum verification failed. Aborted caching " + "of image %s." % image_id) + raise exception.GlanceException(msg) + except Exception: LOG.exception(_("Exception encountered while tee'ing " "image '%s' into cache. Continuing " diff --git a/glance/tests/unit/test_cache_middleware.py b/glance/tests/unit/test_cache_middleware.py index 6c2c312762..0f97b866b0 100644 --- a/glance/tests/unit/test_cache_middleware.py +++ b/glance/tests/unit/test_cache_middleware.py @@ -19,6 +19,16 @@ import glance.api.middleware.cache from glance.tests.unit import base +class ChecksumTestCacheFilter(glance.api.middleware.cache.CacheFilter): + def __init__(self): + class DummyCache(object): + def get_caching_iter(self, image_id, image_checksum, + app_iter): + self.image_checksum = image_checksum + + self.cache = DummyCache() + + class TestCacheMiddleware(base.IsolatedUnitTest): def test_no_match_detail(self): req = webob.Request.blank('/v1/images/detail') @@ -34,3 +44,29 @@ class TestCacheMiddleware(base.IsolatedUnitTest): req = webob.Request.blank('/v1/images/asdf?ping=pong') out = glance.api.middleware.cache.CacheFilter._match_request(req) self.assertEqual(out, ('v1', 'GET', 'asdf')) + + def test_checksum_v1_header(self): + cache_filter = ChecksumTestCacheFilter() + headers = {"x-image-meta-checksum": "1234567890"} + resp = webob.Response(headers=headers) + cache_filter._process_GET_response(resp, None) + + self.assertEqual("1234567890", cache_filter.cache.image_checksum) + + def test_checksum_v2_header(self): + cache_filter = ChecksumTestCacheFilter() + headers = { + "x-image-meta-checksum": "1234567890", + "Content-MD5": "abcdefghi" + } + resp = webob.Response(headers=headers) + cache_filter._process_GET_response(resp, None) + + self.assertEqual("abcdefghi", cache_filter.cache.image_checksum) + + def test_checksum_missing_header(self): + cache_filter = ChecksumTestCacheFilter() + resp = webob.Response() + cache_filter._process_GET_response(resp, None) + + self.assertEqual(None, cache_filter.cache.image_checksum) diff --git a/glance/tests/unit/test_image_cache.py b/glance/tests/unit/test_image_cache.py index ce1758b3c9..47bd24dd30 100644 --- a/glance/tests/unit/test_image_cache.py +++ b/glance/tests/unit/test_image_cache.py @@ -16,6 +16,7 @@ # under the License. from contextlib import contextmanager +import hashlib import os import random import shutil @@ -23,6 +24,7 @@ import StringIO import stubout +from glance.common import exception from glance.common import utils from glance import image_cache from glance.tests import utils as test_utils @@ -274,7 +276,9 @@ class ImageCacheTestCase(object): # and a consuming iterator completes def consume(image_id): data = ['a', 'b', 'c', 'd', 'e', 'f'] - caching_iter = self.cache.get_caching_iter(image_id, iter(data)) + checksum = None + caching_iter = self.cache.get_caching_iter(image_id, checksum, + iter(data)) self.assertEqual(list(caching_iter), data) image_id = '1' @@ -300,7 +304,9 @@ class ImageCacheTestCase(object): # test a case where a consuming iterator just stops. def falloffend(image_id): data = ['a', 'b', 'c', 'd', 'e', 'f'] - caching_iter = self.cache.get_caching_iter(image_id, iter(data)) + checksum = None + caching_iter = self.cache.get_caching_iter(image_id, checksum, + iter(data)) self.assertEqual(caching_iter.next(), 'a') image_id = '1' @@ -406,6 +412,35 @@ class TestImageCacheSqlite(test_utils.BaseTestCase, if os.path.exists(self.cache_dir): shutil.rmtree(self.cache_dir) + def test_gate_caching_iter_good_checksum(self): + image = "12345678990abcdefghijklmnop" + image_id = 123 + + md5 = hashlib.md5() + md5.update(image) + checksum = md5.hexdigest() + + cache = image_cache.ImageCache() + img_iter = cache.get_caching_iter(image_id, checksum, image) + for chunk in img_iter: + pass + # checksum is valid, fake image should be cached: + self.assertTrue(cache.is_cached(image_id)) + + def test_gate_caching_iter_bad_checksum(self): + image = "12345678990abcdefghijklmnop" + image_id = 123 + checksum = "foobar" # bad. + + cache = image_cache.ImageCache() + img_iter = cache.get_caching_iter(image_id, checksum, image) + + def reader(): + for chunk in img_iter: + pass + # checksum is invalid, caching will fail: + self.assertFalse(cache.is_cached(image_id)) + class TestImageCacheNoDep(test_utils.BaseTestCase): @@ -445,7 +480,7 @@ class TestImageCacheNoDep(test_utils.BaseTestCase): cache = image_cache.ImageCache() data = ['a', 'b', 'c', 'Fail', 'd', 'e', 'f'] - caching_iter = cache.get_caching_iter('dummy_id', iter(data)) + caching_iter = cache.get_caching_iter('dummy_id', None, iter(data)) self.assertEqual(list(caching_iter), data) def test_get_caching_iter_when_open_fails(self): @@ -463,5 +498,5 @@ class TestImageCacheNoDep(test_utils.BaseTestCase): cache = image_cache.ImageCache() data = ['a', 'b', 'c', 'd', 'e', 'f'] - caching_iter = cache.get_caching_iter('dummy_id', iter(data)) + caching_iter = cache.get_caching_iter('dummy_id', None, iter(data)) self.assertEqual(list(caching_iter), data)