Limit CaptureRegion sizes in format_inspector for VMDK and VHDX

VMDK:
When parsing a VMDK file to calculate its size, the format_inspector
determines the location of the Descriptor section by reading two
uint64 from the headers of the file and uses them to create the
descriptor CaptureRegion.

It would be possible to craft a VMDK file that commands the
format_inspector to create a very big CaptureRegion, thus exhausting
resources on the glance-api process.

This patch binds the beginning of the descriptor to 0x200 and limits
the size of the CaptureRegion to 1MB, similar to how the VMDK
descriptor is parsed by qemu.

VHDX:
It is a bit more involved, but similar: when looking for the
VIRTUAL_DISK_SIZE metadata, the format_inspector was creating an
unbounded CaptureRegion.

In the same way as it seems to be done in Qemu, we now limit the upper
bound of this CaptureRegion.

Closes-Bug: #2006490
Change-Id: I3ec5a33df20e1cfb6673f4ff1c7c91aacd065532
(cherry picked from commit d4d33ee30f)
This commit is contained in:
Guillaume Espanel 2023-01-25 11:53:09 +01:00 committed by Abhishek Kekane
parent b1e5292248
commit 06a18202ab
2 changed files with 139 additions and 3 deletions

View File

@ -345,6 +345,7 @@ class VHDXInspector(FileInspector):
""" """
METAREGION = '8B7CA206-4790-4B9A-B8FE-575F050F886E' METAREGION = '8B7CA206-4790-4B9A-B8FE-575F050F886E'
VIRTUAL_DISK_SIZE = '2FA54224-CD1B-4876-B211-5DBED83BF4B8' VIRTUAL_DISK_SIZE = '2FA54224-CD1B-4876-B211-5DBED83BF4B8'
VHDX_METADATA_TABLE_MAX_SIZE = 32 * 2048 # From qemu
def __init__(self, *a, **k): def __init__(self, *a, **k):
super(VHDXInspector, self).__init__(*a, **k) super(VHDXInspector, self).__init__(*a, **k)
@ -459,6 +460,8 @@ class VHDXInspector(FileInspector):
item_offset, item_length, _reserved = struct.unpack( item_offset, item_length, _reserved = struct.unpack(
'<III', '<III',
meta_buffer[entry_offset + 16:entry_offset + 28]) meta_buffer[entry_offset + 16:entry_offset + 28])
item_length = min(item_length,
self.VHDX_METADATA_TABLE_MAX_SIZE)
self.region('metadata').length = len(meta_buffer) self.region('metadata').length = len(meta_buffer)
self._log.debug('Found entry at offset %x', item_offset) self._log.debug('Found entry at offset %x', item_offset)
# Metadata item offset is from the beginning of the metadata # Metadata item offset is from the beginning of the metadata
@ -516,6 +519,12 @@ class VMDKInspector(FileInspector):
variable number of 512 byte sectors, but is just text defining the variable number of 512 byte sectors, but is just text defining the
layout of the disk. layout of the disk.
""" """
# The beginning and max size of the descriptor is also hardcoded in Qemu
# at 0x200 and 1MB - 1
DESC_OFFSET = 0x200
DESC_MAX_SIZE = (1 << 20) - 1
def __init__(self, *a, **k): def __init__(self, *a, **k):
super(VMDKInspector, self).__init__(*a, **k) super(VMDKInspector, self).__init__(*a, **k)
self.new_region('header', CaptureRegion(0, 512)) self.new_region('header', CaptureRegion(0, 512))
@ -532,15 +541,22 @@ class VMDKInspector(FileInspector):
if sig != b'KDMV': if sig != b'KDMV':
raise ImageFormatError('Signature KDMV not found: %r' % sig) raise ImageFormatError('Signature KDMV not found: %r' % sig)
return
if ver not in (1, 2, 3): if ver not in (1, 2, 3):
raise ImageFormatError('Unsupported format version %i' % ver) raise ImageFormatError('Unsupported format version %i' % ver)
return
# Since we parse both desc_sec and desc_num (the location of the
# VMDK's descriptor, expressed in 512 bytes sectors) we enforce a
# check on the bounds to create a reasonable CaptureRegion. This
# is similar to how it's done in qemu.
desc_offset = desc_sec * 512
desc_size = min(desc_num * 512, self.DESC_MAX_SIZE)
if desc_offset != self.DESC_OFFSET:
raise ImageFormatError("Wrong descriptor location")
if not self.has_region('descriptor'): if not self.has_region('descriptor'):
self.new_region('descriptor', CaptureRegion( self.new_region('descriptor', CaptureRegion(
desc_sec * 512, desc_num * 512)) desc_offset, desc_size))
@property @property
def format_match(self): def format_match(self):

View File

@ -16,6 +16,7 @@
import io import io
import os import os
import re import re
import struct
import subprocess import subprocess
import tempfile import tempfile
from unittest import mock from unittest import mock
@ -63,6 +64,28 @@ class TestFormatInspectors(test_utils.BaseTestCase):
shell=True) shell=True)
return fn return fn
def _create_allocated_vmdk(self, size_mb):
# We need a "big" VMDK file to exercise some parts of the code of the
# format_inspector. A way to create one is to first create an empty
# file, and then to convert it with the -S 0 option.
fn = tempfile.mktemp(prefix='glance-unittest-formatinspector-',
suffix='.vmdk')
self._created_files.append(fn)
zeroes = tempfile.mktemp(prefix='glance-unittest-formatinspector-',
suffix='.zero')
self._created_files.append(zeroes)
# Create an empty file
subprocess.check_output(
'dd if=/dev/zero of=%s bs=1M count=%i' % (zeroes, size_mb),
shell=True)
# Convert it to VMDK
subprocess.check_output(
'qemu-img convert -f raw -O vmdk -S 0 %s %s' % (zeroes, fn),
shell=True)
return fn
def _test_format_at_block_size(self, format_name, img, block_size): def _test_format_at_block_size(self, format_name, img, block_size):
fmt = format_inspector.get_inspector(format_name)() fmt = format_inspector.get_inspector(format_name)()
self.assertIsNotNone(fmt, self.assertIsNotNone(fmt,
@ -119,6 +142,64 @@ class TestFormatInspectors(test_utils.BaseTestCase):
def test_vmdk(self): def test_vmdk(self):
self._test_format('vmdk') self._test_format('vmdk')
def test_vmdk_bad_descriptor_offset(self):
format_name = 'vmdk'
image_size = 10 * units.Mi
descriptorOffsetAddr = 0x1c
BAD_ADDRESS = 0x400
img = self._create_img(format_name, image_size)
# Corrupt the header
fd = open(img, 'r+b')
fd.seek(descriptorOffsetAddr)
fd.write(struct.pack('<Q', BAD_ADDRESS // 512))
fd.close()
# Read the format in various sizes, some of which will read whole
# sections in a single read, others will be completely unaligned, etc.
for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
fmt = self._test_format_at_block_size(format_name, img, block_size)
self.assertTrue(fmt.format_match,
'Failed to match %s at size %i block %i' % (
format_name, image_size, block_size))
self.assertEqual(0, fmt.virtual_size,
('Calculated a virtual size for a corrupt %s at '
'size %i block %i') % (format_name, image_size,
block_size))
def test_vmdk_bad_descriptor_mem_limit(self):
format_name = 'vmdk'
image_size = 5 * units.Mi
virtual_size = 5 * units.Mi
descriptorOffsetAddr = 0x1c
descriptorSizeAddr = descriptorOffsetAddr + 8
twoMBInSectors = (2 << 20) // 512
# We need a big VMDK because otherwise we will not have enough data to
# fill-up the CaptureRegion.
img = self._create_allocated_vmdk(image_size // units.Mi)
# Corrupt the end of descriptor address so it "ends" at 2MB
fd = open(img, 'r+b')
fd.seek(descriptorSizeAddr)
fd.write(struct.pack('<Q', twoMBInSectors))
fd.close()
# Read the format in various sizes, some of which will read whole
# sections in a single read, others will be completely unaligned, etc.
for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
fmt = self._test_format_at_block_size(format_name, img, block_size)
self.assertTrue(fmt.format_match,
'Failed to match %s at size %i block %i' % (
format_name, image_size, block_size))
self.assertEqual(virtual_size, fmt.virtual_size,
('Failed to calculate size for %s at size %i '
'block %i') % (format_name, image_size,
block_size))
memory = sum(fmt.context_info.values())
self.assertLess(memory, 1.5 * units.Mi,
'Format used more than 1.5MiB of memory: %s' % (
fmt.context_info))
def test_vdi(self): def test_vdi(self):
self._test_format('vdi') self._test_format('vdi')
@ -275,3 +356,42 @@ class TestFormatInspectorInfra(test_utils.BaseTestCase):
self.assertEqual(format_inspector.QcowInspector, self.assertEqual(format_inspector.QcowInspector,
format_inspector.get_inspector('qcow2')) format_inspector.get_inspector('qcow2'))
self.assertIsNone(format_inspector.get_inspector('foo')) self.assertIsNone(format_inspector.get_inspector('foo'))
class TestFormatInspectorsTargeted(test_utils.BaseTestCase):
def _make_vhd_meta(self, guid_raw, item_length):
# Meta region header, padded to 32 bytes
data = struct.pack('<8sHH', b'metadata', 0, 1)
data += b'0' * 20
# Metadata table entry, 16-byte GUID, 12-byte information,
# padded to 32-bytes
data += guid_raw
data += struct.pack('<III', 256, item_length, 0)
data += b'0' * 6
return data
def test_vhd_table_over_limit(self):
ins = format_inspector.VHDXInspector()
meta = format_inspector.CaptureRegion(0, 0)
desired = b'012345678ABCDEF0'
# This is a poorly-crafted image that specifies a larger table size
# than is allowed
meta.data = self._make_vhd_meta(desired, 33 * 2048)
ins.new_region('metadata', meta)
new_region = ins._find_meta_entry(ins._guid(desired))
# Make sure we clamp to our limit of 32 * 2048
self.assertEqual(
format_inspector.VHDXInspector.VHDX_METADATA_TABLE_MAX_SIZE,
new_region.length)
def test_vhd_table_under_limit(self):
ins = format_inspector.VHDXInspector()
meta = format_inspector.CaptureRegion(0, 0)
desired = b'012345678ABCDEF0'
meta.data = self._make_vhd_meta(desired, 16 * 2048)
ins.new_region('metadata', meta)
new_region = ins._find_meta_entry(ins._guid(desired))
# Table size was under the limit, make sure we get it back
self.assertEqual(16 * 2048, new_region.length)