Scalable backup service - Liberty compatibility

To support rolling upgrades we need to make sure that Mitaka's services
are running fine with Liberty's. It gets complicated with backups as
we've strongly reworked them. Main difference is that Mitaka c-bak can
handle backup/restore of any volume and Liberty was restricted to
operate only on volumes placed on the same node.

Now when running in version heterogeneous environment we need to use old
way of backup jobs scheduling and switch to new one (round robin) only
when everything is running Mitaka.

This commit implements that by adding a dummy backup RPC API version
(1.3) that marks the beginning of scalable backups era. Jobs are
scheduled the new way only if every c-bak reports that (or higher)
version.

There are also small changes to volume.rpcapi - to fail fast if some
c-vol services aren't supporting new calls required by scalable backups
feature. This allows us to error out backups with proper message when
upgrade was done in an improper way (in Mitaka we require c-vols to be
upgraded before c-baks).

This commit also includes small changes to CinderObjectSerializer to
block tries to "forwardport" an object when sending it over RPC. If a
service receives an older object it should handle it explicitly.

Related-Blueprint: scalable-backup-service
Co-Authored-By: Michal Dulko <michal.dulko@intel.com>
Change-Id: I45324336ba00726d53cfa012e8bd498868919a8c
This commit is contained in:
LisaLi 2016-02-19 09:28:36 +01:00 committed by Michał Dulko
parent 3631fd2576
commit 05a516da01
9 changed files with 146 additions and 11 deletions

View File

@ -25,6 +25,7 @@ from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import excutils
from oslo_utils import strutils
from oslo_utils import versionutils
from pytz import timezone
import random
@ -39,6 +40,7 @@ import cinder.policy
from cinder import quota
from cinder import utils
import cinder.volume
from cinder.volume import utils as volume_utils
backup_api_opts = [
cfg.BoolOpt('backup_use_same_backend',
@ -135,6 +137,24 @@ class API(base.Base):
return backups
def _is_scalable_only(self):
"""True if we're running in deployment where all c-bak are scalable.
We need this method to decide if we can assume that all of our c-bak
services are decoupled from c-vol.
FIXME(dulek): This shouldn't be needed in Newton.
"""
cap = self.backup_rpcapi.client.version_cap
if cap:
cap = versionutils.convert_version_to_tuple(cap)
return cap >= (1, 3) # Mitaka is marked by c-bak 1.3+.
else:
# NOTE(dulek): No version cap means we're running in an environment
# without c-bak services. Letting it pass as Mitaka, request will
# just fail anyway so it doesn't really matter.
return True
def _az_matched(self, service, availability_zone):
return ((not availability_zone) or
service.availability_zone == availability_zone)
@ -170,14 +190,29 @@ class API(base.Base):
idx = idx + 1
return None
def _get_available_backup_service_host(self, host, availability_zone):
def _get_available_backup_service_host(self, host, az, volume_host=None):
"""Return an appropriate backup service host."""
# FIXME(dulek): We need to keep compatibility with Liberty, where c-bak
# were coupled with c-vol. If we're running in mixed Liberty-Mitaka
# environment we will be scheduling backup jobs the old way.
#
# This snippet should go away in Newton. Note that volume_host
# parameter will also be unnecessary then.
if not self._is_scalable_only():
if volume_host and self._is_backup_service_enabled(az,
volume_host):
return volume_host
elif host and self._is_backup_service_enabled(az, host):
return host
else:
raise exception.ServiceNotFound(service_id='cinder-backup')
backup_host = None
if host and self._is_backup_service_enabled(availability_zone, host):
if host and self._is_backup_service_enabled(az, host):
backup_host = host
if not backup_host and (not host or CONF.backup_use_same_backend):
backup_host = self._get_any_available_backup_service(
availability_zone)
backup_host = self._get_any_available_backup_service(az)
if not backup_host:
raise exception.ServiceNotFound(service_id='cinder-backup')
return backup_host
@ -225,7 +260,8 @@ class API(base.Base):
previous_status = volume['status']
host = self._get_available_backup_service_host(
None, volume.availability_zone)
None, volume.availability_zone,
volume_utils.extract_host(volume.host, 'host'))
# Reserve a quota before setting volume status and backup status
try:

View File

@ -81,7 +81,7 @@ QUOTAS = quota.QUOTAS
class BackupManager(manager.SchedulerDependentManager):
"""Manages backup of block storage devices."""
RPC_API_VERSION = '1.2'
RPC_API_VERSION = '1.3'
target = messaging.Target(version=RPC_API_VERSION)

View File

@ -35,9 +35,12 @@ class BackupAPI(rpc.RPCAPI):
1.0 - Initial version.
1.1 - Changed methods to accept backup objects instead of IDs.
1.2 - A version that got in by mistake (without breaking anything).
1.3 - Dummy version bump to mark start of having cinder-backup service
decoupled from cinder-volume.
"""
RPC_API_VERSION = '1.1'
RPC_API_VERSION = '1.3'
TOPIC = CONF.backup_topic
BINARY = 'cinder-backup'

View File

@ -351,6 +351,10 @@ class ServiceNotFound(NotFound):
message = _("Service %(service_id)s could not be found.")
class ServiceTooOld(Invalid):
message = _("Service is too old to fulfil this request.")
class HostNotFound(NotFound):
message = _("Host %(host)s could not be found.")

View File

@ -18,6 +18,7 @@ import contextlib
import datetime
from oslo_log import log as logging
from oslo_utils import versionutils
from oslo_versionedobjects import base
from oslo_versionedobjects import fields
@ -394,8 +395,20 @@ class CinderObjectSerializer(base.VersionedObjectSerializer):
def _get_capped_obj_version(self, obj):
objname = obj.obj_name()
objver = OBJ_VERSIONS.get(self.version_cap, {})
return objver.get(objname, None)
version_dict = OBJ_VERSIONS.get(self.version_cap, {})
version_cap = version_dict.get(objname, None)
if version_cap:
cap_tuple = versionutils.convert_version_to_tuple(version_cap)
obj_tuple = versionutils.convert_version_to_tuple(obj.VERSION)
if cap_tuple > obj_tuple:
# NOTE(dulek): Do not set version cap to be higher than actual
# object version as we don't support "forwardporting" of
# objects. If service will receive an object that's too old it
# should handle it explicitly.
version_cap = None
return version_cap
def serialize_entity(self, context, entity):
if isinstance(entity, (tuple, list, set, dict)):

View File

@ -508,6 +508,26 @@ class BackupTestCase(BaseBackupTest):
self.assertEqual(fields.BackupStatus.ERROR, backup['status'])
self.assertTrue(mock_run_backup.called)
@mock.patch('cinder.utils.brick_get_connector_properties')
@mock.patch('cinder.utils.temporary_chown')
@mock.patch('six.moves.builtins.open')
def test_create_backup_old_volume_service(self, mock_open,
mock_temporary_chown,
mock_get_backup_device):
"""Test error handling when there's too old volume service in env."""
vol_id = self._create_volume_db_entry(size=1)
backup = self._create_backup_db_entry(volume_id=vol_id)
with mock.patch.object(self.backup_mgr.volume_rpcapi.client,
'version_cap', '1.37'):
self.assertRaises(exception.ServiceTooOld,
self.backup_mgr.create_backup, self.ctxt, backup)
vol = db.volume_get(self.ctxt, vol_id)
self.assertEqual('available', vol['status'])
self.assertEqual('error_backing-up', vol['previous_status'])
backup = db.backup_get(self.ctxt, backup.id)
self.assertEqual(fields.BackupStatus.ERROR, backup['status'])
@mock.patch('cinder.utils.brick_get_connector_properties')
@mock.patch('cinder.volume.rpcapi.VolumeAPI.get_backup_device')
@mock.patch('cinder.utils.temporary_chown')
@ -619,6 +639,31 @@ class BackupTestCase(BaseBackupTest):
self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status'])
self.assertTrue(mock_run_restore.called)
@mock.patch('cinder.utils.brick_get_connector_properties')
def test_restore_backup_with_old_volume_service(self, mock_get_conn):
"""Test error handling when an error occurs during backup restore."""
vol_id = self._create_volume_db_entry(status='restoring-backup',
size=1)
backup = self._create_backup_db_entry(
status=fields.BackupStatus.RESTORING, volume_id=vol_id)
# Unmock secure_file_operations_enabled
self.volume_patches['secure_file_operations_enabled'].stop()
with mock.patch.object(self.backup_mgr.volume_rpcapi.client,
'version_cap', '1.37'):
self.assertRaises(exception.ServiceTooOld,
self.backup_mgr.restore_backup,
self.ctxt,
backup,
vol_id)
vol = db.volume_get(self.ctxt, vol_id)
self.assertEqual('error_restoring', vol['status'])
backup = db.backup_get(self.ctxt, backup.id)
self.assertEqual(fields.BackupStatus.AVAILABLE, backup['status'])
self.volume_patches['secure_file_operations_enabled'].start()
def test_restore_backup_with_bad_service(self):
"""Test error handling.

View File

@ -24,6 +24,7 @@ from oslo_serialization import jsonutils
from cinder import context
from cinder import db
from cinder import exception
from cinder import objects
from cinder import test
from cinder.tests.unit import fake_backup
@ -599,15 +600,28 @@ class VolumeRpcAPITestCase(test.TestCase):
volume=self.fake_volume,
version='1.30')
def test_get_backup_device(self):
@mock.patch('oslo_messaging.RPCClient.can_send_version', return_value=True)
def test_get_backup_device(self, mock_can_send_version):
self._test_volume_api('get_backup_device',
rpc_method='call',
backup=self.fake_backup_obj,
volume=self.fake_volume_obj,
version='1.38')
def test_secure_file_operations_enabled(self):
mock_can_send_version.return_value = False
self.assertRaises(exception.ServiceTooOld, self._test_volume_api,
'get_backup_device', rpc_method='call',
backup=self.fake_backup_obj,
volume=self.fake_volume_obj, version='1.38')
@mock.patch('oslo_messaging.RPCClient.can_send_version', return_value=True)
def test_secure_file_operations_enabled(self, mock_can_send_version):
self._test_volume_api('secure_file_operations_enabled',
rpc_method='call',
volume=self.fake_volume_obj,
version='1.38')
mock_can_send_version.return_value = False
self.assertRaises(exception.ServiceTooOld, self._test_volume_api,
'secure_file_operations_enabled', rpc_method='call',
volume=self.fake_volume_obj, version='1.38')

View File

@ -19,6 +19,8 @@ Client side of the volume RPC API.
from oslo_config import cfg
from oslo_serialization import jsonutils
from cinder import exception
from cinder.i18n import _
from cinder import quota
from cinder import rpc
from cinder.volume import utils
@ -334,12 +336,22 @@ class VolumeAPI(rpc.RPCAPI):
return cctxt.call(ctxt, 'get_capabilities', discover=discover)
def get_backup_device(self, ctxt, backup, volume):
if not self.client.can_send_version('1.38'):
msg = _('One of cinder-volume services is too old to accept such '
'request. Are you running mixed Liberty-Mitaka '
'cinder-volumes?')
raise exception.ServiceTooOld(msg)
new_host = utils.extract_host(volume.host)
cctxt = self.client.prepare(server=new_host, version='1.38')
return cctxt.call(ctxt, 'get_backup_device',
backup=backup)
def secure_file_operations_enabled(self, ctxt, volume):
if not self.client.can_send_version('1.38'):
msg = _('One of cinder-volume services is too old to accept such '
'request. Are you running mixed Liberty-Mitaka '
'cinder-volumes?')
raise exception.ServiceTooOld(msg)
new_host = utils.extract_host(volume.host)
cctxt = self.client.prepare(server=new_host, version='1.38')
return cctxt.call(ctxt, 'secure_file_operations_enabled',

View File

@ -0,0 +1,8 @@
---
features:
- cinder-backup service is now decoupled from
cinder-volume, which allows more flexible scaling.
upgrade:
- As cinder-backup was strongly reworked in this
release, the recommended upgrade order when executing
live (rolling) upgrade is c-api->c-sch->c-vol->c-bak.