Fix AZ not matching backend after migration

When we migrate, or retype with migration, a volume between availability
zones we are setting the AZ of the migrated volume to the same AZ as the
source volume, so we end up with a volume where the AZ does not match
the backend's AZ.

This patch fixes this issue for the generic migration code as well as
the optimized driver migration.

Change-Id: Ia1bf3ae0b7cd6d209354024848aef6029179d8e4
Closes-Bug: #1747949
This commit is contained in:
Gorka Eguileor 2018-02-07 17:07:47 +01:00
parent 28bb3e9b23
commit fb8085894b
4 changed files with 95 additions and 37 deletions

View File

@ -483,6 +483,7 @@ def default_service_values():
'topic': 'fake_topic', 'topic': 'fake_topic',
'report_count': 3, 'report_count': 3,
'disabled': False, 'disabled': False,
'availability_zone': 'nova',
} }

View File

@ -24,6 +24,7 @@ from oslo_concurrency import processutils
from oslo_config import cfg from oslo_config import cfg
from oslo_utils import imageutils from oslo_utils import imageutils
from cinder.common import constants
from cinder import context from cinder import context
from cinder import db from cinder import db
from cinder import exception from cinder import exception
@ -75,6 +76,9 @@ class VolumeMigrationTestCase(base.BaseVolumeTestCase):
autospec=True) autospec=True)
self._clear_patch.start() self._clear_patch.start()
self.expected_status = 'available' self.expected_status = 'available'
self._service = tests_utils.create_service(
self.context,
values={'host': 'newhost', 'binary': constants.VOLUME_BINARY})
def tearDown(self): def tearDown(self):
super(VolumeMigrationTestCase, self).tearDown() super(VolumeMigrationTestCase, self).tearDown()
@ -99,6 +103,28 @@ class VolumeMigrationTestCase(base.BaseVolumeTestCase):
self.assertEqual('newhost', volume.host) self.assertEqual('newhost', volume.host)
self.assertEqual('success', volume.migration_status) self.assertEqual('success', volume.migration_status)
def test_migrate_volume_driver_cross_az(self):
"""Test volume migration done by driver."""
# Mock driver and rpc functions
self.mock_object(self.volume.driver, 'migrate_volume',
lambda x, y, z, new_type_id=None: (
True, {'user_id': fake.USER_ID}))
dst_az = 'AZ2'
db.service_update(self.context, self._service.id,
{'availability_zone': dst_az})
volume = tests_utils.create_volume(self.context, size=0,
host=CONF.host,
migration_status='migrating')
host_obj = {'host': 'newhost', 'capabilities': {}}
self.volume.migrate_volume(self.context, volume, host_obj, False)
# check volume properties
volume.refresh()
self.assertEqual('newhost', volume.host)
self.assertEqual('success', volume.migration_status)
self.assertEqual(dst_az, volume.availability_zone)
def _fake_create_volume(self, ctxt, volume, req_spec, filters, def _fake_create_volume(self, ctxt, volume, req_spec, filters,
allow_reschedule=True): allow_reschedule=True):
return db.volume_update(ctxt, volume['id'], return db.volume_update(ctxt, volume['id'],
@ -154,6 +180,41 @@ class VolumeMigrationTestCase(base.BaseVolumeTestCase):
self.context, volume, new_volume_obj, error=False) self.context, volume, new_volume_obj, error=False)
self.assertFalse(update_server_volume.called) self.assertFalse(update_server_volume.called)
@mock.patch('cinder.compute.API')
@mock.patch('cinder.volume.manager.VolumeManager.'
'migrate_volume_completion')
def test_migrate_volume_generic_cross_az(self, migrate_volume_completion,
nova_api):
"""Test that we set the right AZ in cross AZ migrations."""
original_create = objects.Volume.create
dst_az = 'AZ2'
db.service_update(self.context, self._service.id,
{'availability_zone': dst_az})
def my_create(self, *args, **kwargs):
self.status = 'available'
original_create(self, *args, **kwargs)
volume = tests_utils.create_volume(self.context, size=1,
host=CONF.host)
host_obj = {'host': 'newhost', 'capabilities': {}}
create_vol = self.patch('cinder.objects.Volume.create',
side_effect=my_create, autospec=True)
with mock.patch.object(self.volume, '_copy_volume_data') as copy_mock:
self.volume._migrate_volume_generic(self.context, volume, host_obj,
None)
copy_mock.assert_called_with(self.context, volume, mock.ANY,
remote='dest')
migrate_volume_completion.assert_called_with(
self.context, volume, mock.ANY, error=False)
nova_api.return_value.update_server_volume.assert_not_called()
self.assertEqual(dst_az,
create_vol.call_args[0][0]['availability_zone'])
@mock.patch('cinder.compute.API') @mock.patch('cinder.compute.API')
@mock.patch('cinder.volume.manager.VolumeManager.' @mock.patch('cinder.volume.manager.VolumeManager.'
'migrate_volume_completion') 'migrate_volume_completion')

View File

@ -198,6 +198,12 @@ class VolumeManager(manager.CleanableManager,
'attach_status', 'migration_status', 'volume_type', 'attach_status', 'migration_status', 'volume_type',
'consistencygroup', 'volume_attachment', 'group'} 'consistencygroup', 'volume_attachment', 'group'}
def _get_service(self, host=None, binary=constants.VOLUME_BINARY):
host = host or self.host
ctxt = context.get_admin_context()
svc_host = vol_utils.extract_host(host, 'backend')
return objects.Service.get_by_args(ctxt, svc_host, binary)
def __init__(self, volume_driver=None, service_name=None, def __init__(self, volume_driver=None, service_name=None,
*args, **kwargs): *args, **kwargs):
"""Load the driver from the one specified in args, or from flags.""" """Load the driver from the one specified in args, or from flags."""
@ -227,12 +233,8 @@ class VolumeManager(manager.CleanableManager,
# We pass the current setting for service.active_backend_id to # We pass the current setting for service.active_backend_id to
# the driver on init, in case there was a restart or something # the driver on init, in case there was a restart or something
curr_active_backend_id = None curr_active_backend_id = None
svc_host = vol_utils.extract_host(self.host, 'backend')
try: try:
service = objects.Service.get_by_args( service = self._get_service()
context.get_admin_context(),
svc_host,
constants.VOLUME_BINARY)
except exception.ServiceNotFound: except exception.ServiceNotFound:
# NOTE(jdg): This is to solve problems with unit tests # NOTE(jdg): This is to solve problems with unit tests
LOG.info("Service not found for updating " LOG.info("Service not found for updating "
@ -530,12 +532,8 @@ class VolumeManager(manager.CleanableManager,
return return
stats = self.driver.get_volume_stats(refresh=True) stats = self.driver.get_volume_stats(refresh=True)
svc_host = vol_utils.extract_host(self.host, 'backend')
try: try:
service = objects.Service.get_by_args( service = self._get_service()
context.get_admin_context(),
svc_host,
constants.VOLUME_BINARY)
except exception.ServiceNotFound: except exception.ServiceNotFound:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
LOG.error("Service not found for updating replication_status.") LOG.error("Service not found for updating replication_status.")
@ -2068,8 +2066,10 @@ class VolumeManager(manager.CleanableManager,
# Create new volume on remote host # Create new volume on remote host
tmp_skip = {'snapshot_id', 'source_volid'} tmp_skip = {'snapshot_id', 'source_volid'}
skip = self._VOLUME_CLONE_SKIP_PROPERTIES | tmp_skip | {'host', skip = {'host', 'cluster_name', 'availability_zone'}
'cluster_name'} skip.update(tmp_skip)
skip.update(self._VOLUME_CLONE_SKIP_PROPERTIES)
new_vol_values = {k: volume[k] for k in set(volume.keys()) - skip} new_vol_values = {k: volume[k] for k in set(volume.keys()) - skip}
if new_type_id: if new_type_id:
new_vol_values['volume_type_id'] = new_type_id new_vol_values['volume_type_id'] = new_type_id
@ -2079,9 +2079,11 @@ class VolumeManager(manager.CleanableManager,
ctxt, self.key_manager, new_type_id) ctxt, self.key_manager, new_type_id)
new_vol_values['encryption_key_id'] = encryption_key_id new_vol_values['encryption_key_id'] = encryption_key_id
dst_service = self._get_service(backend['host'])
new_volume = objects.Volume( new_volume = objects.Volume(
context=ctxt, context=ctxt,
host=backend['host'], host=backend['host'],
availability_zone=dst_service.availability_zone,
cluster_name=backend.get('cluster_name'), cluster_name=backend.get('cluster_name'),
status='creating', status='creating',
attach_status=fields.VolumeAttachStatus.DETACHED, attach_status=fields.VolumeAttachStatus.DETACHED,
@ -2368,10 +2370,14 @@ class VolumeManager(manager.CleanableManager,
volume, volume,
host) host)
if moved: if moved:
updates = {'host': host['host'], dst_service = self._get_service(host['host'])
updates = {
'host': host['host'],
'cluster_name': host.get('cluster_name'), 'cluster_name': host.get('cluster_name'),
'migration_status': 'success', 'migration_status': 'success',
'previous_status': volume.status} 'availability_zone': dst_service.availability_zone,
'previous_status': volume.status,
}
if status_update: if status_update:
updates.update(status_update) updates.update(status_update)
if model_update: if model_update:
@ -2404,13 +2410,9 @@ class VolumeManager(manager.CleanableManager,
# value isn't set (we didn't restart services), so we'll go ahead # value isn't set (we didn't restart services), so we'll go ahead
# and make this a part of the service periodic # and make this a part of the service periodic
if not self.service_uuid: if not self.service_uuid:
svc_host = vol_utils.extract_host(self.host, 'backend')
# We hack this with a try/except for unit tests temporarily # We hack this with a try/except for unit tests temporarily
try: try:
service = objects.Service.get_by_args( service = self._get_service()
context,
svc_host,
constants.VOLUME_BINARY)
self.service_uuid = service.uuid self.service_uuid = service.uuid
except exception.ServiceNotFound: except exception.ServiceNotFound:
LOG.warning("Attempt to update service_uuid " LOG.warning("Attempt to update service_uuid "
@ -3994,9 +3996,7 @@ class VolumeManager(manager.CleanableManager,
updates = {} updates = {}
repl_status = fields.ReplicationStatus repl_status = fields.ReplicationStatus
svc_host = vol_utils.extract_host(self.host, 'backend') service = self._get_service()
service = objects.Service.get_by_args(context, svc_host,
constants.VOLUME_BINARY)
# TODO(geguileo): We should optimize these updates by doing them # TODO(geguileo): We should optimize these updates by doing them
# directly on the DB with just 3 queries, one to change the volumes # directly on the DB with just 3 queries, one to change the volumes
@ -4168,9 +4168,7 @@ class VolumeManager(manager.CleanableManager,
doing the failover of the volumes after finished processing the doing the failover of the volumes after finished processing the
volumes. volumes.
""" """
svc_host = vol_utils.extract_host(self.host, 'backend') service = self._get_service()
service = objects.Service.get_by_args(context, svc_host,
constants.VOLUME_BINARY)
service.update(updates) service.update(updates)
try: try:
self.driver.failover_completed(context, service.active_backend_id) self.driver.failover_completed(context, service.active_backend_id)
@ -4206,12 +4204,8 @@ class VolumeManager(manager.CleanableManager,
LOG.warning('Error encountered on Cinder backend during ' LOG.warning('Error encountered on Cinder backend during '
'freeze operation, service is frozen, however ' 'freeze operation, service is frozen, however '
'notification to driver has failed.') 'notification to driver has failed.')
svc_host = vol_utils.extract_host(self.host, 'backend')
service = objects.Service.get_by_args( service = self._get_service()
context,
svc_host,
constants.VOLUME_BINARY)
service.disabled = True service.disabled = True
service.disabled_reason = "frozen" service.disabled_reason = "frozen"
service.save() service.save()
@ -4239,12 +4233,8 @@ class VolumeManager(manager.CleanableManager,
LOG.error('Error encountered on Cinder backend during ' LOG.error('Error encountered on Cinder backend during '
'thaw operation, service will remain frozen.') 'thaw operation, service will remain frozen.')
return False return False
svc_host = vol_utils.extract_host(self.host, 'backend')
service = objects.Service.get_by_args( service = self._get_service()
context,
svc_host,
constants.VOLUME_BINARY)
service.disabled = False service.disabled = False
service.disabled_reason = "" service.disabled_reason = ""
service.save() service.save()

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Resolve issue with cross AZ migrations and retypes where the destination
volume kept the source volume's AZ, so we ended up with a volume where the
AZ does not match the backend. (bug 1747949)