Support online volume resize
Trove now supports to resize volume without downtime. To use this feature, the version of Nova and Cinder needs to be at least Pike, the config option ``cinder_service_type`` needs to be set to ``volumev3``. The cloud admin can disable this feature by setting ``online_volume_resize=False``, default is enabled. Change-Id: I000a4e90800454972dd39f2f82d286571bc0b96c
This commit is contained in:
parent
39b0df0a6b
commit
ba046b2a14
7
releasenotes/notes/victoria-support-online-resize.yaml
Normal file
7
releasenotes/notes/victoria-support-online-resize.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
features:
|
||||
- Trove now supports to resize volume without downtime. To use this feature,
|
||||
the version of Nova and Cinder needs to be at least Pike, the config option
|
||||
``cinder_service_type`` needs to be set to ``volumev3``. The cloud admin
|
||||
can disable this feature by setting ``online_volume_resize=False``, default
|
||||
is enabled.
|
@ -93,7 +93,7 @@ common_opts = [
|
||||
cfg.BoolOpt('neutron_api_insecure', default=False,
|
||||
help="Allow to perform insecure SSL requests to neutron."),
|
||||
cfg.URIOpt('cinder_url', help='URL without the tenant segment.'),
|
||||
cfg.StrOpt('cinder_service_type', default='volumev2',
|
||||
cfg.StrOpt('cinder_service_type', default='volumev3',
|
||||
help='Service type to use when searching catalog.'),
|
||||
cfg.StrOpt('cinder_endpoint_type', default='publicURL',
|
||||
help='Service endpoint type to use when searching catalog.'),
|
||||
@ -475,7 +475,10 @@ common_opts = [
|
||||
help='The docker image used for backup and restore.'),
|
||||
cfg.ListOpt('reserved_network_cidrs', default=[],
|
||||
help='Network CIDRs reserved for Trove guest instance '
|
||||
'management.')
|
||||
'management.'),
|
||||
cfg.BoolOpt(
|
||||
'online_volume_resize', default=True,
|
||||
help='If online volume resize is supported.')
|
||||
]
|
||||
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from cinderclient.v2 import client as CinderClient
|
||||
from cinderclient import client as CinderClient
|
||||
import glanceclient
|
||||
from keystoneauth1 import loading
|
||||
from keystoneauth1 import session
|
||||
@ -95,13 +95,17 @@ def cinder_client_trove_admin(context, region_name=None):
|
||||
LOG.debug('Re-use admin cinder client')
|
||||
return ADMIN_CINDER_CLIENT
|
||||
|
||||
version = CONF.cinder_service_type.split('v')[-1] or '3'
|
||||
|
||||
ks_session = get_keystone_session()
|
||||
ADMIN_CINDER_CLIENT = CinderClient.Client(
|
||||
version,
|
||||
session=ks_session,
|
||||
service_type=CONF.cinder_service_type,
|
||||
region_name=region_name or CONF.service_credentials.region_name,
|
||||
insecure=CONF.cinder_api_insecure,
|
||||
endpoint_type=CONF.cinder_endpoint_type)
|
||||
endpoint_type=CONF.cinder_endpoint_type,
|
||||
additional_headers={'OpenStack-API-Version': 'volumev3 latest'})
|
||||
|
||||
if CONF.cinder_url and CONF.service_credentials.project_id:
|
||||
ADMIN_CINDER_CLIENT.client.management_url = "%s/%s/" % (
|
||||
|
@ -205,23 +205,6 @@ class TroveInstanceCreate(TroveCommonTraits):
|
||||
super(TroveInstanceCreate, self).notify('create')
|
||||
|
||||
|
||||
class TroveInstanceModifyVolume(TroveCommonTraits):
|
||||
|
||||
'''
|
||||
Additional traits for trove.instance.create notifications that describe
|
||||
instance action events
|
||||
|
||||
This class should correspond to trove_instance_modify_volume in
|
||||
ceilometer/event_definitions.yaml
|
||||
'''
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(TroveInstanceModifyVolume, self).__init__(**kwargs)
|
||||
|
||||
def notify(self):
|
||||
super(TroveInstanceModifyVolume, self).notify('modify_volume')
|
||||
|
||||
|
||||
class TroveInstanceModifyFlavor(TroveCommonTraits):
|
||||
|
||||
'''
|
||||
|
@ -473,7 +473,7 @@ class API(object):
|
||||
self.agent_low_timeout, version=version,
|
||||
device_path=device_path, mount_point=mount_point)
|
||||
|
||||
def resize_fs(self, device_path=None, mount_point=None):
|
||||
def resize_fs(self, device_path=None, mount_point=None, online=False):
|
||||
"""Resize the filesystem."""
|
||||
LOG.debug("Resize device %(device)s on instance %(id)s.", {
|
||||
'device': device_path, 'id': self.id})
|
||||
@ -481,7 +481,8 @@ class API(object):
|
||||
|
||||
self._call("resize_fs",
|
||||
self.agent_high_timeout, version=version,
|
||||
device_path=device_path, mount_point=mount_point)
|
||||
device_path=device_path, mount_point=mount_point,
|
||||
online=online)
|
||||
|
||||
def update_overrides(self, overrides, remove=False):
|
||||
"""Update the overrides."""
|
||||
|
@ -364,10 +364,11 @@ class Manager(periodic_task.PeriodicTasks):
|
||||
device = volume.VolumeDevice(device_path)
|
||||
device.unmount(mount_point)
|
||||
|
||||
def resize_fs(self, context, device_path=None, mount_point=None):
|
||||
LOG.debug("Resizing the filesystem at %s.", mount_point)
|
||||
def resize_fs(self, context, device_path=None, mount_point=None,
|
||||
online=False):
|
||||
LOG.info(f"Resizing the filesystem at {mount_point}, online: {online}")
|
||||
device = volume.VolumeDevice(device_path)
|
||||
device.resize_fs(mount_point)
|
||||
device.resize_fs(mount_point, online=online)
|
||||
|
||||
###############
|
||||
# Configuration
|
||||
|
@ -71,7 +71,7 @@ class FSBase(object):
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def resize(self, device_path):
|
||||
def resize(self, device_path, online=False):
|
||||
"""
|
||||
Resize the filesystem on device
|
||||
"""
|
||||
@ -113,7 +113,8 @@ class FSExt(FSBase):
|
||||
exc_fmt = _("Volume '%s' was not formatted.")
|
||||
log_and_raise(log_fmt, exc_fmt, device_path)
|
||||
|
||||
def resize(self, device_path):
|
||||
def resize(self, device_path, online=False):
|
||||
if not online:
|
||||
utils.execute("e2fsck", "-f", "-p", device_path,
|
||||
run_as_root=True, root_helper="sudo")
|
||||
utils.execute("resize2fs", device_path,
|
||||
@ -158,7 +159,7 @@ class FSXFS(FSBase):
|
||||
device_path)
|
||||
raise exception.GuestError(original_message=msg)
|
||||
|
||||
def resize(self, device_path):
|
||||
def resize(self, device_path, online=False):
|
||||
utils.execute("xfs_repair", device_path,
|
||||
run_as_root=True, root_helper="sudo")
|
||||
utils.execute("mount", device_path,
|
||||
@ -263,18 +264,18 @@ class VolumeDevice(object):
|
||||
|
||||
return True
|
||||
|
||||
def resize_fs(self, mount_point):
|
||||
def resize_fs(self, mount_point, online=False):
|
||||
"""Resize the filesystem on the specified device."""
|
||||
self._check_device_exists()
|
||||
# Some OS's will mount a file systems after it's attached if
|
||||
# an entry is put in the fstab file (like Trove does).
|
||||
# Thus it may be necessary to wait for the mount and then unmount
|
||||
# the fs again (since the volume was just attached).
|
||||
if self._wait_for_mount(mount_point, timeout=2):
|
||||
if not online and self._wait_for_mount(mount_point, timeout=2):
|
||||
LOG.debug("Unmounting '%s' before resizing.", mount_point)
|
||||
self.unmount(mount_point)
|
||||
try:
|
||||
self.volume_fs.resize(self.device_path)
|
||||
self.volume_fs.resize(self.device_path, online=online)
|
||||
except exception.ProcessExecutionError:
|
||||
log_fmt = "Error resizing the filesystem with device '%s'."
|
||||
exc_fmt = _("Error resizing the filesystem with device '%s'.")
|
||||
|
@ -17,7 +17,6 @@ import os.path
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from cinderclient import exceptions as cinder_exceptions
|
||||
from eventlet import greenthread
|
||||
from eventlet.timeout import Timeout
|
||||
from oslo_log import log as logging
|
||||
@ -55,7 +54,6 @@ from trove.common.notification import EndNotification
|
||||
from trove.common.notification import StartNotification
|
||||
from trove.common.notification import TroveInstanceCreate
|
||||
from trove.common.notification import TroveInstanceModifyFlavor
|
||||
from trove.common.notification import TroveInstanceModifyVolume
|
||||
from trove.common.strategies.cluster import strategy
|
||||
from trove.common.utils import try_recover
|
||||
from trove.extensions.mysql import models as mysql_models
|
||||
@ -1512,7 +1510,7 @@ class ResizeVolumeAction(object):
|
||||
return self.instance.device_path
|
||||
|
||||
def _fail(self, orig_func):
|
||||
LOG.exception("%(func)s encountered an error when "
|
||||
LOG.error("%(func)s encountered an error when "
|
||||
"attempting to resize the volume for "
|
||||
"instance %(id)s. Setting service "
|
||||
"status to failed.", {'func': orig_func.__name__,
|
||||
@ -1539,7 +1537,7 @@ class ResizeVolumeAction(object):
|
||||
self.instance.restart()
|
||||
|
||||
def _recover_full(self, orig_func):
|
||||
LOG.exception("%(func)s encountered an error when attempting to "
|
||||
LOG.error("%(func)s encountered an error when attempting to "
|
||||
"resize the volume for instance %(id)s. Trying to "
|
||||
"recover by attaching and"
|
||||
" mounting the volume and then restarting the "
|
||||
@ -1609,16 +1607,16 @@ class ResizeVolumeAction(object):
|
||||
'id': self.instance.id})
|
||||
|
||||
@try_recover
|
||||
def _resize_fs(self):
|
||||
LOG.debug("Resizing the filesystem for instance %(id)s", {
|
||||
'id': self.instance.id})
|
||||
def _resize_fs(self, online=False):
|
||||
LOG.info(f"Resizing the filesystem for instance {self.instance.id}, "
|
||||
f"online: {online}")
|
||||
mount_point = self.get_mount_point()
|
||||
device_path = self.get_device_path()
|
||||
self.instance.guest.resize_fs(device_path=device_path,
|
||||
mount_point=mount_point)
|
||||
LOG.debug("Successfully resized volume %(vol_id)s filesystem for "
|
||||
"instance %(id)s", {'vol_id': self.instance.volume_id,
|
||||
'id': self.instance.id})
|
||||
mount_point=mount_point,
|
||||
online=online)
|
||||
LOG.debug(f"Successfully resized volume {self.instance.volume_id} "
|
||||
f"filesystem for instance {self.instance.id}")
|
||||
|
||||
@try_recover
|
||||
def _mount_volume(self):
|
||||
@ -1634,10 +1632,8 @@ class ResizeVolumeAction(object):
|
||||
|
||||
@try_recover
|
||||
def _extend(self):
|
||||
LOG.debug("Extending volume %(vol_id)s for instance %(id)s to "
|
||||
"size %(size)s", {'vol_id': self.instance.volume_id,
|
||||
'id': self.instance.id,
|
||||
'size': self.new_size})
|
||||
LOG.info(f"Calling Cinder to extend volume {self.instance.volume_id} "
|
||||
f"for instance {self.instance.id} to size {self.new_size}")
|
||||
self.instance.volume_client.volumes.extend(self.instance.volume_id,
|
||||
self.new_size)
|
||||
LOG.debug("Successfully extended the volume %(vol_id)s for instance "
|
||||
@ -1649,9 +1645,8 @@ class ResizeVolumeAction(object):
|
||||
volume = self.instance.volume_client.volumes.get(
|
||||
self.instance.volume_id)
|
||||
if not volume:
|
||||
msg = (_('Failed to get volume %(vol_id)s') % {
|
||||
'vol_id': self.instance.volume_id})
|
||||
raise cinder_exceptions.ClientException(msg)
|
||||
msg = f'Failed to get volume {self.instance.volume_id}'
|
||||
raise exception.TroveError(msg)
|
||||
|
||||
def volume_is_new_size():
|
||||
volume = self.instance.volume_client.volumes.get(
|
||||
@ -1659,12 +1654,12 @@ class ResizeVolumeAction(object):
|
||||
return volume.size == self.new_size
|
||||
|
||||
utils.poll_until(volume_is_new_size,
|
||||
sleep_time=2,
|
||||
sleep_time=5,
|
||||
time_out=CONF.volume_time_out)
|
||||
|
||||
self.instance.update_db(volume_size=self.new_size)
|
||||
except PollTimeOut:
|
||||
LOG.exception("Timeout trying to extend the volume %(vol_id)s "
|
||||
LOG.error("Timeout trying to extend the volume %(vol_id)s "
|
||||
"for instance %(id)s",
|
||||
{'vol_id': self.instance.volume_id,
|
||||
'id': self.instance.id})
|
||||
@ -1674,19 +1669,31 @@ class ResizeVolumeAction(object):
|
||||
self._fail(self._verify_extend)
|
||||
elif volume.size != self.new_size:
|
||||
self.instance.update_db(volume_size=volume.size)
|
||||
if not CONF.online_volume_resize:
|
||||
self._recover_full(self._verify_extend)
|
||||
raise
|
||||
except Exception:
|
||||
LOG.exception("Error encountered trying to verify extend for "
|
||||
"the volume %(vol_id)s for instance %(id)s",
|
||||
except Exception as e:
|
||||
LOG.error("Error encountered trying to verify extend for "
|
||||
"the volume %(vol_id)s for instance %(id)s, "
|
||||
"error: %(error)s",
|
||||
{'vol_id': self.instance.volume_id,
|
||||
'id': self.instance.id})
|
||||
'id': self.instance.id,
|
||||
'error': str(e)})
|
||||
if not CONF.online_volume_resize:
|
||||
self._recover_full(self._verify_extend)
|
||||
raise
|
||||
|
||||
def _resize_active_volume(self):
|
||||
LOG.debug("Begin _resize_active_volume for id: %(id)s", {
|
||||
'id': self.instance.id})
|
||||
if CONF.online_volume_resize:
|
||||
try:
|
||||
self._extend()
|
||||
except Exception as e:
|
||||
LOG.error(f'Failed to extend volume, error: {str(e)}')
|
||||
|
||||
self._verify_extend()
|
||||
self._resize_fs(recover_func=self._fail, online=True)
|
||||
return
|
||||
|
||||
self._stop_db()
|
||||
self._unmount_volume(recover_func=self._recover_restart)
|
||||
self._detach_volume(recover_func=self._recover_mount_restart)
|
||||
@ -1694,11 +1701,9 @@ class ResizeVolumeAction(object):
|
||||
self._verify_extend()
|
||||
# if anything fails after this point, recovery is futile
|
||||
self._attach_volume(recover_func=self._fail)
|
||||
self._resize_fs(recover_func=self._fail)
|
||||
self._resize_fs(recover_func=self._fail, online=False)
|
||||
self._mount_volume(recover_func=self._fail)
|
||||
self.instance.restart()
|
||||
LOG.debug("End _resize_active_volume for id: %(id)s", {
|
||||
'id': self.instance.id})
|
||||
|
||||
def execute(self):
|
||||
LOG.debug("%(gt)s: Resizing instance %(id)s volume for server "
|
||||
@ -1711,19 +1716,11 @@ class ResizeVolumeAction(object):
|
||||
|
||||
if self.instance.server.status in [InstanceStatus.ACTIVE,
|
||||
InstanceStatus.HEALTHY]:
|
||||
try:
|
||||
self._resize_active_volume()
|
||||
finally:
|
||||
self.instance.reset_task_status()
|
||||
# send usage event for size reported by cinder
|
||||
volume = self.instance.volume_client.volumes.get(
|
||||
self.instance.volume_id)
|
||||
launched_time = timeutils.isotime(self.instance.updated)
|
||||
modified_time = timeutils.isotime(self.instance.updated)
|
||||
TroveInstanceModifyVolume(instance=self.instance,
|
||||
old_volume_size=self.old_size,
|
||||
launched_at=launched_time,
|
||||
modify_at=modified_time,
|
||||
volume_size=volume.size,
|
||||
).notify()
|
||||
|
||||
else:
|
||||
self.instance.reset_task_status()
|
||||
msg = (
|
||||
|
@ -546,7 +546,6 @@ class ResizeInstanceVolumeTest(ActionTestBase):
|
||||
self.new_volume_size)
|
||||
|
||||
@test(depends_on=[test_volume_resize])
|
||||
@time_out(300)
|
||||
def test_volume_resize_success(self):
|
||||
"""test_volume_resize_success"""
|
||||
|
||||
@ -559,7 +558,8 @@ class ResizeInstanceVolumeTest(ActionTestBase):
|
||||
else:
|
||||
asserts.fail("Status should not be %s" % instance.status)
|
||||
|
||||
poll_until(check_resize_status, sleep_time=2, time_out=300)
|
||||
poll_until(check_resize_status, sleep_time=5, time_out=300,
|
||||
initial_delay=5)
|
||||
instance = instance_info.dbaas.instances.get(instance_info.id)
|
||||
asserts.assert_equal(instance.volume['size'], self.new_volume_size)
|
||||
|
||||
|
@ -330,7 +330,7 @@ class FakeGuest(object):
|
||||
def unmount_volume(self, device_path=None, mount_point=None):
|
||||
pass
|
||||
|
||||
def resize_fs(self, device_path=None, mount_point=None):
|
||||
def resize_fs(self, device_path=None, mount_point=None, online=False):
|
||||
pass
|
||||
|
||||
def update_overrides(self, overrides, remove=False):
|
||||
|
@ -13,17 +13,19 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
from unittest.mock import Mock, patch
|
||||
from unittest.mock import Mock
|
||||
from unittest.mock import patch
|
||||
|
||||
from oslo_utils import timeutils
|
||||
|
||||
from trove import rpc
|
||||
from trove.common import cfg
|
||||
from trove.common.context import TroveContext
|
||||
from trove.common import exception
|
||||
from trove.common import notification
|
||||
from trove.common.notification import EndNotification, StartNotification
|
||||
from trove.common.context import TroveContext
|
||||
from trove.common.notification import EndNotification
|
||||
from trove.common.notification import StartNotification
|
||||
from trove.conductor import api as conductor_api
|
||||
from trove import rpc
|
||||
from trove.tests.unittests import trove_testtools
|
||||
|
||||
|
||||
@ -227,30 +229,6 @@ class TestTroveInstanceDelete(trove_testtools.TestCase):
|
||||
self.assertTrue(notifier().info.called)
|
||||
|
||||
|
||||
class TestTroveInstanceModifyVolume(trove_testtools.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(TestTroveInstanceModifyVolume, self).setUp()
|
||||
self.instance = Mock(db_info=Mock(created=timeutils.utcnow()))
|
||||
|
||||
@patch.object(cfg.CONF, 'get', Mock())
|
||||
@patch.object(rpc, 'get_notifier')
|
||||
def test_notification(self, notifier):
|
||||
notification.TroveInstanceModifyVolume(instance=self.instance).notify()
|
||||
self.assertTrue(notifier().info.called)
|
||||
|
||||
@patch.object(cfg.CONF, 'get', Mock())
|
||||
@patch.object(rpc, 'get_notifier')
|
||||
def test_notification_after_serialization(self, notifier):
|
||||
orig_notify = notification.TroveInstanceModifyVolume(
|
||||
instance=self.instance)
|
||||
serialized = orig_notify.serialize(None)
|
||||
new_notify = notification.TroveInstanceModifyVolume().deserialize(
|
||||
None, serialized)
|
||||
new_notify.notify()
|
||||
self.assertTrue(notifier().info.called)
|
||||
|
||||
|
||||
class TestTroveInstanceModifyFlavor(trove_testtools.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
|
@ -39,10 +39,10 @@ import trove.backup.models
|
||||
from trove.common import timeutils
|
||||
from trove.common import utils
|
||||
import trove.common.context
|
||||
from trove.common import exception
|
||||
from trove.common.exception import GuestError
|
||||
from trove.common.exception import PollTimeOut
|
||||
from trove.common.exception import TroveError
|
||||
from trove.common.notification import TroveInstanceModifyVolume
|
||||
import trove.common.template as template
|
||||
from trove.datastore import models as datastore_models
|
||||
import trove.db.models
|
||||
@ -627,11 +627,10 @@ class ResizeVolumeTest(trove_testtools.TestCase):
|
||||
self.instance.volume_client.volumes.extend.side_effect = None
|
||||
self.instance.reset_mock()
|
||||
|
||||
@patch('trove.taskmanager.models.LOG')
|
||||
def test_resize_volume_verify_extend_no_volume(self, mock_logging):
|
||||
def test_resize_volume_verify_extend_no_volume(self):
|
||||
self.instance.volume_client.volumes.get = Mock(
|
||||
return_value=None)
|
||||
self.assertRaises(cinder_exceptions.ClientException,
|
||||
self.assertRaises(exception.TroveError,
|
||||
self.action._verify_extend)
|
||||
self.instance.reset_mock()
|
||||
|
||||
@ -643,29 +642,20 @@ class ResizeVolumeTest(trove_testtools.TestCase):
|
||||
utils.poll_until.side_effect = None
|
||||
self.instance.reset_mock()
|
||||
|
||||
@patch.object(TroveInstanceModifyVolume, 'notify')
|
||||
def test_resize_volume_active_server_succeeds(self, *args):
|
||||
server = Mock(status=InstanceStatus.ACTIVE)
|
||||
self.instance.attach_mock(server, 'server')
|
||||
|
||||
self.action.execute()
|
||||
self.assertEqual(1, self.instance.guest.stop_db.call_count)
|
||||
self.assertEqual(1, self.instance.guest.unmount_volume.call_count)
|
||||
detach_count = (
|
||||
self.instance.nova_client.volumes.delete_server_volume.call_count)
|
||||
self.assertEqual(1, detach_count)
|
||||
|
||||
extend_count = self.instance.volume_client.volumes.extend.call_count
|
||||
self.assertEqual(1, extend_count)
|
||||
attach_count = (
|
||||
self.instance.nova_client.volumes.create_server_volume.call_count)
|
||||
self.assertEqual(1, attach_count)
|
||||
self.assertEqual(1, self.instance.guest.resize_fs.call_count)
|
||||
self.assertEqual(1, self.instance.guest.mount_volume.call_count)
|
||||
self.assertEqual(1, self.instance.restart.call_count)
|
||||
self.instance.reset_mock()
|
||||
|
||||
def test_resize_volume_server_error_fails(self):
|
||||
server = Mock(status=InstanceStatus.ERROR)
|
||||
self.instance.attach_mock(server, 'server')
|
||||
|
||||
self.assertRaises(TroveError, self.action.execute)
|
||||
self.instance.reset_mock()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user