Scalable backup service - Liberty compatibility
To support rolling upgrades we need to make sure that Mitaka's services are running fine with Liberty's. It gets complicated with backups as we've strongly reworked them. Main difference is that Mitaka c-bak can handle backup/restore of any volume and Liberty was restricted to operate only on volumes placed on the same node. Now when running in version heterogeneous environment we need to use old way of backup jobs scheduling and switch to new one (round robin) only when everything is running Mitaka. This commit implements that by adding a dummy backup RPC API version (1.3) that marks the beginning of scalable backups era. Jobs are scheduled the new way only if every c-bak reports that (or higher) version. There are also small changes to volume.rpcapi - to fail fast if some c-vol services aren't supporting new calls required by scalable backups feature. This allows us to error out backups with proper message when upgrade was done in an improper way (in Mitaka we require c-vols to be upgraded before c-baks). This commit also includes small changes to CinderObjectSerializer to block tries to "forwardport" an object when sending it over RPC. If a service receives an older object it should handle it explicitly. Related-Blueprint: scalable-backup-service Co-Authored-By: Michal Dulko <michal.dulko@intel.com> Change-Id: I45324336ba00726d53cfa012e8bd498868919a8c
This commit is contained in:
parent
3631fd2576
commit
05a516da01
@ -25,6 +25,7 @@ from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
from oslo_utils import excutils
|
||||
from oslo_utils import strutils
|
||||
from oslo_utils import versionutils
|
||||
from pytz import timezone
|
||||
import random
|
||||
|
||||
@ -39,6 +40,7 @@ import cinder.policy
|
||||
from cinder import quota
|
||||
from cinder import utils
|
||||
import cinder.volume
|
||||
from cinder.volume import utils as volume_utils
|
||||
|
||||
backup_api_opts = [
|
||||
cfg.BoolOpt('backup_use_same_backend',
|
||||
@ -135,6 +137,24 @@ class API(base.Base):
|
||||
|
||||
return backups
|
||||
|
||||
def _is_scalable_only(self):
|
||||
"""True if we're running in deployment where all c-bak are scalable.
|
||||
|
||||
We need this method to decide if we can assume that all of our c-bak
|
||||
services are decoupled from c-vol.
|
||||
|
||||
FIXME(dulek): This shouldn't be needed in Newton.
|
||||
"""
|
||||
cap = self.backup_rpcapi.client.version_cap
|
||||
if cap:
|
||||
cap = versionutils.convert_version_to_tuple(cap)
|
||||
return cap >= (1, 3) # Mitaka is marked by c-bak 1.3+.
|
||||
else:
|
||||
# NOTE(dulek): No version cap means we're running in an environment
|
||||
# without c-bak services. Letting it pass as Mitaka, request will
|
||||
# just fail anyway so it doesn't really matter.
|
||||
return True
|
||||
|
||||
def _az_matched(self, service, availability_zone):
|
||||
return ((not availability_zone) or
|
||||
service.availability_zone == availability_zone)
|
||||
@ -170,14 +190,29 @@ class API(base.Base):
|
||||
idx = idx + 1
|
||||
return None
|
||||
|
||||
def _get_available_backup_service_host(self, host, availability_zone):
|
||||
def _get_available_backup_service_host(self, host, az, volume_host=None):
|
||||
"""Return an appropriate backup service host."""
|
||||
|
||||
# FIXME(dulek): We need to keep compatibility with Liberty, where c-bak
|
||||
# were coupled with c-vol. If we're running in mixed Liberty-Mitaka
|
||||
# environment we will be scheduling backup jobs the old way.
|
||||
#
|
||||
# This snippet should go away in Newton. Note that volume_host
|
||||
# parameter will also be unnecessary then.
|
||||
if not self._is_scalable_only():
|
||||
if volume_host and self._is_backup_service_enabled(az,
|
||||
volume_host):
|
||||
return volume_host
|
||||
elif host and self._is_backup_service_enabled(az, host):
|
||||
return host
|
||||
else:
|
||||
raise exception.ServiceNotFound(service_id='cinder-backup')
|
||||
|
||||
backup_host = None
|
||||
if host and self._is_backup_service_enabled(availability_zone, host):
|
||||
if host and self._is_backup_service_enabled(az, host):
|
||||
backup_host = host
|
||||
if not backup_host and (not host or CONF.backup_use_same_backend):
|
||||
backup_host = self._get_any_available_backup_service(
|
||||
availability_zone)
|
||||
backup_host = self._get_any_available_backup_service(az)
|
||||
if not backup_host:
|
||||
raise exception.ServiceNotFound(service_id='cinder-backup')
|
||||
return backup_host
|
||||
@ -225,7 +260,8 @@ class API(base.Base):
|
||||
|
||||
previous_status = volume['status']
|
||||
host = self._get_available_backup_service_host(
|
||||
None, volume.availability_zone)
|
||||
None, volume.availability_zone,
|
||||
volume_utils.extract_host(volume.host, 'host'))
|
||||
|
||||
# Reserve a quota before setting volume status and backup status
|
||||
try:
|
||||
|
@ -81,7 +81,7 @@ QUOTAS = quota.QUOTAS
|
||||
class BackupManager(manager.SchedulerDependentManager):
|
||||
"""Manages backup of block storage devices."""
|
||||
|
||||
RPC_API_VERSION = '1.2'
|
||||
RPC_API_VERSION = '1.3'
|
||||
|
||||
target = messaging.Target(version=RPC_API_VERSION)
|
||||
|
||||
|
@ -35,9 +35,12 @@ class BackupAPI(rpc.RPCAPI):
|
||||
|
||||
1.0 - Initial version.
|
||||
1.1 - Changed methods to accept backup objects instead of IDs.
|
||||
1.2 - A version that got in by mistake (without breaking anything).
|
||||
1.3 - Dummy version bump to mark start of having cinder-backup service
|
||||
decoupled from cinder-volume.
|
||||
"""
|
||||
|
||||
RPC_API_VERSION = '1.1'
|
||||
RPC_API_VERSION = '1.3'
|
||||
TOPIC = CONF.backup_topic
|
||||
BINARY = 'cinder-backup'
|
||||
|
||||
|
@ -351,6 +351,10 @@ class ServiceNotFound(NotFound):
|
||||
message = _("Service %(service_id)s could not be found.")
|
||||
|
||||
|
||||
class ServiceTooOld(Invalid):
|
||||
message = _("Service is too old to fulfil this request.")
|
||||
|
||||
|
||||
class HostNotFound(NotFound):
|
||||
message = _("Host %(host)s could not be found.")
|
||||
|
||||
|
@ -18,6 +18,7 @@ import contextlib
|
||||
import datetime
|
||||
|
||||
from oslo_log import log as logging
|
||||
from oslo_utils import versionutils
|
||||
from oslo_versionedobjects import base
|
||||
from oslo_versionedobjects import fields
|
||||
|
||||
@ -394,8 +395,20 @@ class CinderObjectSerializer(base.VersionedObjectSerializer):
|
||||
|
||||
def _get_capped_obj_version(self, obj):
|
||||
objname = obj.obj_name()
|
||||
objver = OBJ_VERSIONS.get(self.version_cap, {})
|
||||
return objver.get(objname, None)
|
||||
version_dict = OBJ_VERSIONS.get(self.version_cap, {})
|
||||
version_cap = version_dict.get(objname, None)
|
||||
|
||||
if version_cap:
|
||||
cap_tuple = versionutils.convert_version_to_tuple(version_cap)
|
||||
obj_tuple = versionutils.convert_version_to_tuple(obj.VERSION)
|
||||
if cap_tuple > obj_tuple:
|
||||
# NOTE(dulek): Do not set version cap to be higher than actual
|
||||
# object version as we don't support "forwardporting" of
|
||||
# objects. If service will receive an object that's too old it
|
||||
# should handle it explicitly.
|
||||
version_cap = None
|
||||
|
||||
return version_cap
|
||||
|
||||
def serialize_entity(self, context, entity):
|
||||
if isinstance(entity, (tuple, list, set, dict)):
|
||||
|
@ -508,6 +508,26 @@ class BackupTestCase(BaseBackupTest):
|
||||
self.assertEqual(fields.BackupStatus.ERROR, backup['status'])
|
||||
self.assertTrue(mock_run_backup.called)
|
||||
|
||||
@mock.patch('cinder.utils.brick_get_connector_properties')
|
||||
@mock.patch('cinder.utils.temporary_chown')
|
||||
@mock.patch('six.moves.builtins.open')
|
||||
def test_create_backup_old_volume_service(self, mock_open,
|
||||
mock_temporary_chown,
|
||||
mock_get_backup_device):
|
||||
"""Test error handling when there's too old volume service in env."""
|
||||
vol_id = self._create_volume_db_entry(size=1)
|
||||
backup = self._create_backup_db_entry(volume_id=vol_id)
|
||||
|
||||
with mock.patch.object(self.backup_mgr.volume_rpcapi.client,
|
||||
'version_cap', '1.37'):
|
||||
self.assertRaises(exception.ServiceTooOld,
|
||||
self.backup_mgr.create_backup, self.ctxt, backup)
|
||||
vol = db.volume_get(self.ctxt, vol_id)
|
||||
self.assertEqual('available', vol['status'])
|
||||
self.assertEqual('error_backing-up', vol['previous_status'])
|
||||
backup = db.backup_get(self.ctxt, backup.id)
|
||||
self.assertEqual(fields.BackupStatus.ERROR, backup['status'])
|
||||
|
||||
@mock.patch('cinder.utils.brick_get_connector_properties')
|
||||
@mock.patch('cinder.volume.rpcapi.VolumeAPI.get_backup_device')
|
||||
@mock.patch('cinder.utils.temporary_chown')
|
||||
@ -619,6 +639,31 @@ class BackupTestCase(BaseBackupTest):
|
||||
self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status'])
|
||||
self.assertTrue(mock_run_restore.called)
|
||||
|
||||
@mock.patch('cinder.utils.brick_get_connector_properties')
|
||||
def test_restore_backup_with_old_volume_service(self, mock_get_conn):
|
||||
"""Test error handling when an error occurs during backup restore."""
|
||||
vol_id = self._create_volume_db_entry(status='restoring-backup',
|
||||
size=1)
|
||||
backup = self._create_backup_db_entry(
|
||||
status=fields.BackupStatus.RESTORING, volume_id=vol_id)
|
||||
|
||||
# Unmock secure_file_operations_enabled
|
||||
self.volume_patches['secure_file_operations_enabled'].stop()
|
||||
|
||||
with mock.patch.object(self.backup_mgr.volume_rpcapi.client,
|
||||
'version_cap', '1.37'):
|
||||
self.assertRaises(exception.ServiceTooOld,
|
||||
self.backup_mgr.restore_backup,
|
||||
self.ctxt,
|
||||
backup,
|
||||
vol_id)
|
||||
vol = db.volume_get(self.ctxt, vol_id)
|
||||
self.assertEqual('error_restoring', vol['status'])
|
||||
backup = db.backup_get(self.ctxt, backup.id)
|
||||
self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status'])
|
||||
|
||||
self.volume_patches['secure_file_operations_enabled'].start()
|
||||
|
||||
def test_restore_backup_with_bad_service(self):
|
||||
"""Test error handling.
|
||||
|
||||
|
@ -24,6 +24,7 @@ from oslo_serialization import jsonutils
|
||||
|
||||
from cinder import context
|
||||
from cinder import db
|
||||
from cinder import exception
|
||||
from cinder import objects
|
||||
from cinder import test
|
||||
from cinder.tests.unit import fake_backup
|
||||
@ -599,15 +600,28 @@ class VolumeRpcAPITestCase(test.TestCase):
|
||||
volume=self.fake_volume,
|
||||
version='1.30')
|
||||
|
||||
def test_get_backup_device(self):
|
||||
@mock.patch('oslo_messaging.RPCClient.can_send_version', return_value=True)
|
||||
def test_get_backup_device(self, mock_can_send_version):
|
||||
self._test_volume_api('get_backup_device',
|
||||
rpc_method='call',
|
||||
backup=self.fake_backup_obj,
|
||||
volume=self.fake_volume_obj,
|
||||
version='1.38')
|
||||
|
||||
def test_secure_file_operations_enabled(self):
|
||||
mock_can_send_version.return_value = False
|
||||
self.assertRaises(exception.ServiceTooOld, self._test_volume_api,
|
||||
'get_backup_device', rpc_method='call',
|
||||
backup=self.fake_backup_obj,
|
||||
volume=self.fake_volume_obj, version='1.38')
|
||||
|
||||
@mock.patch('oslo_messaging.RPCClient.can_send_version', return_value=True)
|
||||
def test_secure_file_operations_enabled(self, mock_can_send_version):
|
||||
self._test_volume_api('secure_file_operations_enabled',
|
||||
rpc_method='call',
|
||||
volume=self.fake_volume_obj,
|
||||
version='1.38')
|
||||
|
||||
mock_can_send_version.return_value = False
|
||||
self.assertRaises(exception.ServiceTooOld, self._test_volume_api,
|
||||
'secure_file_operations_enabled', rpc_method='call',
|
||||
volume=self.fake_volume_obj, version='1.38')
|
||||
|
@ -19,6 +19,8 @@ Client side of the volume RPC API.
|
||||
from oslo_config import cfg
|
||||
from oslo_serialization import jsonutils
|
||||
|
||||
from cinder import exception
|
||||
from cinder.i18n import _
|
||||
from cinder import quota
|
||||
from cinder import rpc
|
||||
from cinder.volume import utils
|
||||
@ -334,12 +336,22 @@ class VolumeAPI(rpc.RPCAPI):
|
||||
return cctxt.call(ctxt, 'get_capabilities', discover=discover)
|
||||
|
||||
def get_backup_device(self, ctxt, backup, volume):
|
||||
if not self.client.can_send_version('1.38'):
|
||||
msg = _('One of cinder-volume services is too old to accept such '
|
||||
'request. Are you running mixed Liberty-Mitaka '
|
||||
'cinder-volumes?')
|
||||
raise exception.ServiceTooOld(msg)
|
||||
new_host = utils.extract_host(volume.host)
|
||||
cctxt = self.client.prepare(server=new_host, version='1.38')
|
||||
return cctxt.call(ctxt, 'get_backup_device',
|
||||
backup=backup)
|
||||
|
||||
def secure_file_operations_enabled(self, ctxt, volume):
|
||||
if not self.client.can_send_version('1.38'):
|
||||
msg = _('One of cinder-volume services is too old to accept such '
|
||||
'request. Are you running mixed Liberty-Mitaka '
|
||||
'cinder-volumes?')
|
||||
raise exception.ServiceTooOld(msg)
|
||||
new_host = utils.extract_host(volume.host)
|
||||
cctxt = self.client.prepare(server=new_host, version='1.38')
|
||||
return cctxt.call(ctxt, 'secure_file_operations_enabled',
|
||||
|
@ -0,0 +1,8 @@
|
||||
---
|
||||
features:
|
||||
- cinder-backup service is now decoupled from
|
||||
cinder-volume, which allows more flexible scaling.
|
||||
upgrade:
|
||||
- As cinder-backup was strongly reworked in this
|
||||
release, the recommended upgrade order when executing
|
||||
live (rolling) upgrade is c-api->c-sch->c-vol->c-bak.
|
Loading…
Reference in New Issue
Block a user