Fix AZ not matching backend after migration

When we migrate, or retype with migration, a volume between availability
zones we are setting the AZ of the migrated volume to the same AZ as the
source volume, so we end up with a volume where the AZ does not match
the backend's AZ.

This patch fixes this issue for the generic migration code as well as
the optimized driver migration.

Change-Id: Ia1bf3ae0b7cd6d209354024848aef6029179d8e4
Closes-Bug: #1747949
This commit is contained in:
Gorka Eguileor 2018-02-07 17:07:47 +01:00
parent 28bb3e9b23
commit fb8085894b
4 changed files with 95 additions and 37 deletions

View File

@ -483,6 +483,7 @@ def default_service_values():
'topic': 'fake_topic',
'report_count': 3,
'disabled': False,
'availability_zone': 'nova',
}

View File

@ -24,6 +24,7 @@ from oslo_concurrency import processutils
from oslo_config import cfg
from oslo_utils import imageutils
from cinder.common import constants
from cinder import context
from cinder import db
from cinder import exception
@ -75,6 +76,9 @@ class VolumeMigrationTestCase(base.BaseVolumeTestCase):
autospec=True)
self._clear_patch.start()
self.expected_status = 'available'
self._service = tests_utils.create_service(
self.context,
values={'host': 'newhost', 'binary': constants.VOLUME_BINARY})
def tearDown(self):
super(VolumeMigrationTestCase, self).tearDown()
@ -99,6 +103,28 @@ class VolumeMigrationTestCase(base.BaseVolumeTestCase):
self.assertEqual('newhost', volume.host)
self.assertEqual('success', volume.migration_status)
def test_migrate_volume_driver_cross_az(self):
"""Test volume migration done by driver."""
# Mock driver and rpc functions
self.mock_object(self.volume.driver, 'migrate_volume',
lambda x, y, z, new_type_id=None: (
True, {'user_id': fake.USER_ID}))
dst_az = 'AZ2'
db.service_update(self.context, self._service.id,
{'availability_zone': dst_az})
volume = tests_utils.create_volume(self.context, size=0,
host=CONF.host,
migration_status='migrating')
host_obj = {'host': 'newhost', 'capabilities': {}}
self.volume.migrate_volume(self.context, volume, host_obj, False)
# check volume properties
volume.refresh()
self.assertEqual('newhost', volume.host)
self.assertEqual('success', volume.migration_status)
self.assertEqual(dst_az, volume.availability_zone)
def _fake_create_volume(self, ctxt, volume, req_spec, filters,
allow_reschedule=True):
return db.volume_update(ctxt, volume['id'],
@ -154,6 +180,41 @@ class VolumeMigrationTestCase(base.BaseVolumeTestCase):
self.context, volume, new_volume_obj, error=False)
self.assertFalse(update_server_volume.called)
@mock.patch('cinder.compute.API')
@mock.patch('cinder.volume.manager.VolumeManager.'
'migrate_volume_completion')
def test_migrate_volume_generic_cross_az(self, migrate_volume_completion,
nova_api):
"""Test that we set the right AZ in cross AZ migrations."""
original_create = objects.Volume.create
dst_az = 'AZ2'
db.service_update(self.context, self._service.id,
{'availability_zone': dst_az})
def my_create(self, *args, **kwargs):
self.status = 'available'
original_create(self, *args, **kwargs)
volume = tests_utils.create_volume(self.context, size=1,
host=CONF.host)
host_obj = {'host': 'newhost', 'capabilities': {}}
create_vol = self.patch('cinder.objects.Volume.create',
side_effect=my_create, autospec=True)
with mock.patch.object(self.volume, '_copy_volume_data') as copy_mock:
self.volume._migrate_volume_generic(self.context, volume, host_obj,
None)
copy_mock.assert_called_with(self.context, volume, mock.ANY,
remote='dest')
migrate_volume_completion.assert_called_with(
self.context, volume, mock.ANY, error=False)
nova_api.return_value.update_server_volume.assert_not_called()
self.assertEqual(dst_az,
create_vol.call_args[0][0]['availability_zone'])
@mock.patch('cinder.compute.API')
@mock.patch('cinder.volume.manager.VolumeManager.'
'migrate_volume_completion')

View File

@ -198,6 +198,12 @@ class VolumeManager(manager.CleanableManager,
'attach_status', 'migration_status', 'volume_type',
'consistencygroup', 'volume_attachment', 'group'}
def _get_service(self, host=None, binary=constants.VOLUME_BINARY):
host = host or self.host
ctxt = context.get_admin_context()
svc_host = vol_utils.extract_host(host, 'backend')
return objects.Service.get_by_args(ctxt, svc_host, binary)
def __init__(self, volume_driver=None, service_name=None,
*args, **kwargs):
"""Load the driver from the one specified in args, or from flags."""
@ -227,12 +233,8 @@ class VolumeManager(manager.CleanableManager,
# We pass the current setting for service.active_backend_id to
# the driver on init, in case there was a restart or something
curr_active_backend_id = None
svc_host = vol_utils.extract_host(self.host, 'backend')
try:
service = objects.Service.get_by_args(
context.get_admin_context(),
svc_host,
constants.VOLUME_BINARY)
service = self._get_service()
except exception.ServiceNotFound:
# NOTE(jdg): This is to solve problems with unit tests
LOG.info("Service not found for updating "
@ -530,12 +532,8 @@ class VolumeManager(manager.CleanableManager,
return
stats = self.driver.get_volume_stats(refresh=True)
svc_host = vol_utils.extract_host(self.host, 'backend')
try:
service = objects.Service.get_by_args(
context.get_admin_context(),
svc_host,
constants.VOLUME_BINARY)
service = self._get_service()
except exception.ServiceNotFound:
with excutils.save_and_reraise_exception():
LOG.error("Service not found for updating replication_status.")
@ -2068,8 +2066,10 @@ class VolumeManager(manager.CleanableManager,
# Create new volume on remote host
tmp_skip = {'snapshot_id', 'source_volid'}
skip = self._VOLUME_CLONE_SKIP_PROPERTIES | tmp_skip | {'host',
'cluster_name'}
skip = {'host', 'cluster_name', 'availability_zone'}
skip.update(tmp_skip)
skip.update(self._VOLUME_CLONE_SKIP_PROPERTIES)
new_vol_values = {k: volume[k] for k in set(volume.keys()) - skip}
if new_type_id:
new_vol_values['volume_type_id'] = new_type_id
@ -2079,9 +2079,11 @@ class VolumeManager(manager.CleanableManager,
ctxt, self.key_manager, new_type_id)
new_vol_values['encryption_key_id'] = encryption_key_id
dst_service = self._get_service(backend['host'])
new_volume = objects.Volume(
context=ctxt,
host=backend['host'],
availability_zone=dst_service.availability_zone,
cluster_name=backend.get('cluster_name'),
status='creating',
attach_status=fields.VolumeAttachStatus.DETACHED,
@ -2368,10 +2370,14 @@ class VolumeManager(manager.CleanableManager,
volume,
host)
if moved:
updates = {'host': host['host'],
'cluster_name': host.get('cluster_name'),
'migration_status': 'success',
'previous_status': volume.status}
dst_service = self._get_service(host['host'])
updates = {
'host': host['host'],
'cluster_name': host.get('cluster_name'),
'migration_status': 'success',
'availability_zone': dst_service.availability_zone,
'previous_status': volume.status,
}
if status_update:
updates.update(status_update)
if model_update:
@ -2404,13 +2410,9 @@ class VolumeManager(manager.CleanableManager,
# value isn't set (we didn't restart services), so we'll go ahead
# and make this a part of the service periodic
if not self.service_uuid:
svc_host = vol_utils.extract_host(self.host, 'backend')
# We hack this with a try/except for unit tests temporarily
try:
service = objects.Service.get_by_args(
context,
svc_host,
constants.VOLUME_BINARY)
service = self._get_service()
self.service_uuid = service.uuid
except exception.ServiceNotFound:
LOG.warning("Attempt to update service_uuid "
@ -3994,9 +3996,7 @@ class VolumeManager(manager.CleanableManager,
updates = {}
repl_status = fields.ReplicationStatus
svc_host = vol_utils.extract_host(self.host, 'backend')
service = objects.Service.get_by_args(context, svc_host,
constants.VOLUME_BINARY)
service = self._get_service()
# TODO(geguileo): We should optimize these updates by doing them
# directly on the DB with just 3 queries, one to change the volumes
@ -4168,9 +4168,7 @@ class VolumeManager(manager.CleanableManager,
doing the failover of the volumes after finished processing the
volumes.
"""
svc_host = vol_utils.extract_host(self.host, 'backend')
service = objects.Service.get_by_args(context, svc_host,
constants.VOLUME_BINARY)
service = self._get_service()
service.update(updates)
try:
self.driver.failover_completed(context, service.active_backend_id)
@ -4206,12 +4204,8 @@ class VolumeManager(manager.CleanableManager,
LOG.warning('Error encountered on Cinder backend during '
'freeze operation, service is frozen, however '
'notification to driver has failed.')
svc_host = vol_utils.extract_host(self.host, 'backend')
service = objects.Service.get_by_args(
context,
svc_host,
constants.VOLUME_BINARY)
service = self._get_service()
service.disabled = True
service.disabled_reason = "frozen"
service.save()
@ -4239,12 +4233,8 @@ class VolumeManager(manager.CleanableManager,
LOG.error('Error encountered on Cinder backend during '
'thaw operation, service will remain frozen.')
return False
svc_host = vol_utils.extract_host(self.host, 'backend')
service = objects.Service.get_by_args(
context,
svc_host,
constants.VOLUME_BINARY)
service = self._get_service()
service.disabled = False
service.disabled_reason = ""
service.save()

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Resolve issue with cross AZ migrations and retypes where the destination
volume kept the source volume's AZ, so we ended up with a volume where the
AZ does not match the backend. (bug 1747949)