Cinder store: Wait for device resize

When we have an image with size > 1 GB, we follow the following steps
to accomodate the image:
1) Detach the volume
2) extend the volume
3) Attach the volume
4) Open the volume device as a file and resume writing the image

Sometimes due to several reasons (mostly network related), the
size of the device file could mismatch with the actual volume size
(or the backend LUN size). This can happen if the extend was performed
(i.e. the control path) but it takes the time to reflect that into
the mapped device (i.e. the data path). This mismatch can cause the
issue "IOError: [Errno 28] No space left on device".
To avoid this scenario, we check if the device size is less than the
volume size, we wait for the extended LUN to show up in mapped device
and then continue the image writing operation.

Closes-Bug: #1959913

Change-Id: I206580f6be615ebc5e15b546b9c23728d4116a5d
(cherry picked from commit f3433ed1a5)
(cherry picked from commit ea5139be9a)
This commit is contained in:
whoami-rajat 2022-01-25 19:24:30 +05:30 committed by Rajat Dhasmana
parent 2bc17c0a9e
commit 5f1cee6fa9
4 changed files with 138 additions and 4 deletions

View File

@ -558,6 +558,37 @@ class Store(glance_store.driver.Store):
raise exceptions.BadStoreConfiguration(store_name="cinder",
reason=reason)
@staticmethod
def _get_device_size(device_file):
# The seek position is corrected after every extend operation
# with the bytes written (which is after this wait call) so we
# don't need to worry about setting it back to original position
device_file.seek(0, os.SEEK_END)
# There are other ways to determine the file size like os.stat
# or os.path.getsize but it requires file name attribute which
# we don't have for the RBD file wrapper RBDVolumeIOWrapper
device_size = device_file.tell()
device_size = int(math.ceil(float(device_size) / units.Gi))
return device_size
@staticmethod
def _wait_resize_device(volume, device_file):
timeout = 20
max_recheck_wait = 10
tries = 0
elapsed = 0
while Store._get_device_size(device_file) < volume.size:
wait = min(0.5 * 2 ** tries, max_recheck_wait)
time.sleep(wait)
tries += 1
elapsed += wait
if elapsed >= timeout:
msg = (_('Timeout while waiting while volume %(volume_id)s '
'to resize the device in %(tries)s tries.')
% {'volume_id': volume.id, 'tries': tries})
LOG.error(msg)
raise exceptions.BackendException(msg)
def _wait_volume_status(self, volume, status_transition, status_expected):
max_recheck_wait = 15
timeout = self.store_conf.cinder_state_transition_timeout
@ -865,6 +896,10 @@ class Store(glance_store.driver.Store):
try:
while need_extend:
with self._open_cinder_volume(client, volume, 'wb') as f:
# Sometimes the extended LUN on storage side takes time
# to reflect in the device so we wait until the device
# size is equal to the extended volume size.
Store._wait_resize_device(volume, f)
f.seek(bytes_written)
if buf:
f.write(buf)

View File

@ -16,6 +16,8 @@
import contextlib
import errno
import hashlib
import io
import math
import os
from unittest import mock
@ -344,7 +346,7 @@ class TestCinderStore(base.StoreBaseTest,
self.assertEqual(expected_image_size, image_size)
def _test_cinder_add(self, fake_volume, volume_file, size_kb=5,
verifier=None):
verifier=None, fail_resize=False):
expected_image_id = str(uuid.uuid4())
expected_size = size_kb * units.Ki
expected_file_contents = b"*" * expected_size
@ -365,7 +367,11 @@ class TestCinderStore(base.StoreBaseTest,
with mock.patch.object(cinder.Store, 'get_cinderclient') as mock_cc, \
mock.patch.object(self.store, '_open_cinder_volume',
side_effect=fake_open):
side_effect=fake_open), \
mock.patch.object(
cinder.Store, '_wait_resize_device') as mock_wait_resize:
if fail_resize:
mock_wait_resize.side_effect = exceptions.BackendException()
mock_cc.return_value = FakeObject(client=fake_client,
volumes=fake_volumes)
loc, size, checksum, multihash, _ = self.store.add(
@ -448,3 +454,43 @@ class TestCinderStore(base.StoreBaseTest,
self.store.configure_add()
mock_log.warning.assert_called_with(
"Invalid `cinder_volume_type some_random_type`")
def test__get_device_size(self):
fake_data = b"fake binary data"
fake_len = int(math.ceil(float(len(fake_data)) / units.Gi))
fake_file = io.BytesIO(fake_data)
dev_size = cinder.Store._get_device_size(fake_file)
self.assertEqual(fake_len, dev_size)
@mock.patch.object(time, 'sleep')
def test__wait_resize_device_resized(self, mock_sleep):
fake_vol = mock.MagicMock()
fake_vol.size = 2
fake_file = io.BytesIO(b"fake binary data")
with mock.patch.object(
cinder.Store, '_get_device_size') as mock_get_dev_size:
mock_get_dev_size.side_effect = [1, 2]
cinder.Store._wait_resize_device(fake_vol, fake_file)
@mock.patch.object(time, 'sleep')
def test__wait_resize_device_fails(self, mock_sleep):
fake_vol = mock.MagicMock()
fake_vol.size = 2
fake_file = io.BytesIO(b"fake binary data")
with mock.patch.object(
cinder.Store, '_get_device_size',
return_value=1):
self.assertRaises(
exceptions.BackendException,
cinder.Store._wait_resize_device,
fake_vol, fake_file)
def test_cinder_add_fail_resize(self):
volume_file = io.BytesIO()
fake_volume = mock.MagicMock(id=str(uuid.uuid4()),
status='available',
size=1)
self.assertRaises(exceptions.BackendException,
self._test_cinder_add, fake_volume, volume_file,
fail_resize=True)
fake_volume.delete.assert_called_once()

View File

@ -15,6 +15,8 @@
import contextlib
import errno
import io
import math
import os
from unittest import mock
@ -440,7 +442,7 @@ class TestMultiCinderStore(base.MultiStoreBaseTest,
self.assertEqual(expected_image_size, image_size)
def _test_cinder_add(self, fake_volume, volume_file, size_kb=5,
verifier=None, backend="cinder1"):
verifier=None, backend="cinder1", fail_resize=False):
expected_image_id = str(uuid.uuid4())
expected_size = size_kb * units.Ki
expected_file_contents = b"*" * expected_size
@ -460,7 +462,11 @@ class TestMultiCinderStore(base.MultiStoreBaseTest,
with mock.patch.object(cinder.Store, 'get_cinderclient') as mock_cc, \
mock.patch.object(self.store, '_open_cinder_volume',
side_effect=fake_open):
side_effect=fake_open), \
mock.patch.object(
cinder.Store, '_wait_resize_device') as mock_wait_resize:
if fail_resize:
mock_wait_resize.side_effect = exceptions.BackendException()
mock_cc.return_value = FakeObject(client=fake_client,
volumes=fake_volumes)
loc, size, checksum, metadata = self.store.add(expected_image_id,
@ -535,3 +541,43 @@ class TestMultiCinderStore(base.MultiStoreBaseTest,
size=1)
volume_file = six.BytesIO()
self._test_cinder_add(fake_volume, volume_file, backend="cinder2")
def test__get_device_size(self):
fake_data = b"fake binary data"
fake_len = int(math.ceil(float(len(fake_data)) / units.Gi))
fake_file = io.BytesIO(fake_data)
dev_size = cinder.Store._get_device_size(fake_file)
self.assertEqual(fake_len, dev_size)
@mock.patch.object(time, 'sleep')
def test__wait_resize_device_resized(self, mock_sleep):
fake_vol = mock.MagicMock()
fake_vol.size = 2
fake_file = io.BytesIO(b"fake binary data")
with mock.patch.object(
cinder.Store, '_get_device_size') as mock_get_dev_size:
mock_get_dev_size.side_effect = [1, 2]
cinder.Store._wait_resize_device(fake_vol, fake_file)
@mock.patch.object(time, 'sleep')
def test__wait_resize_device_fails(self, mock_sleep):
fake_vol = mock.MagicMock()
fake_vol.size = 2
fake_file = io.BytesIO(b"fake binary data")
with mock.patch.object(
cinder.Store, '_get_device_size',
return_value=1):
self.assertRaises(
exceptions.BackendException,
cinder.Store._wait_resize_device,
fake_vol, fake_file)
def test_cinder_add_fail_resize(self):
volume_file = io.BytesIO()
fake_volume = mock.MagicMock(id=str(uuid.uuid4()),
status='available',
size=1)
self.assertRaises(exceptions.BackendException,
self._test_cinder_add, fake_volume, volume_file,
fail_resize=True)
fake_volume.delete.assert_called_once()

View File

@ -0,0 +1,7 @@
---
fixes:
- |
`Bug #1959913 <https://bugs.launchpad.net/glance-store/+bug/1959913>`_:
Added wait between the volume being extended and
the new size being detected while opening the
volume device.