Make failover DB changes consistent
There are some inconsistencies among the different drivers when it comes to setting the replication_status field in the DB on a failover. To ensure that consistent behavior is provided the manager will oversee all DB changes during failover and failback and make additional changes when necessaries to maintain this consistency. The drivers will no longer need to process non replicated volumes on failover/failback and can simplify their logic as they will only receive replicated volumes based on the replication_status field. On failover: - Non replicated volumes will have their status changed to error, have their current status saved to the previous_status field, and their replication_status changed to not_replicated. - All non replicated snapshots will have their statuses changed to error. - All replicated volumes that failed on the failover will get their status changed to error, their current status saved in previous_status, and their replication_status set to failover-error. - All snapshots from volumes with failover errors will have their status set to error. - All volumes successfully failed over will get their replication_status changed to failed-over. On failback: - All non replicated volumes and snapshots will have no model update. - All replicated volumes that failed on the failover will get their status changed to error, their current status saved in previous_status, and their replication_status set to failover-error. - All snapshots from volumes with failover errors will have their status set to error. - All volumes successfully failed back will get their replication_status changed to enabled. A more detailed explanation is provided in the updated replication devref [3]. Since there were some drivers that where not setting the replication_status on creation [1] and retype [2] we have added a migration script to fix this inconsistency. [1]: Fixed in change: I10a7931ed4a73dfd2b69f0db979bc32a71aedb11 [2]: Fixed in change: Id4df2b7ad55e9b5def91329f5437da9caa185c30 [3]: Change: I180b89c1ceaeea6d4da8e995e46181990d52825f Closes-Bug: #1643886 Change-Id: Iefc409f2b19d8575a4ca1ec98a15276f5604eb8d
This commit is contained in:
parent
cb67f4f177
commit
3963595bed
|
@ -0,0 +1,45 @@
|
|||
/* Copyright (c) 2016 Red Hat, Inc.
|
||||
All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
not use this file except in compliance with the License. You may obtain
|
||||
a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
License for the specific language governing permissions and limitations
|
||||
under the License.
|
||||
*/
|
||||
|
||||
/* Fix replication_status field in volumes table.
|
||||
|
||||
There are some drivers that did not update the replication_status field on
|
||||
the volumes on creation and since the scheduler was not updating them on
|
||||
creation there is an inconsistency between the database and the storage
|
||||
device backend.
|
||||
|
||||
Some of the drivers that have been detected to be missing this are:
|
||||
- kaminario
|
||||
- pure
|
||||
- solidfire
|
||||
|
||||
This migration will fix this updating the volume_status field based on the
|
||||
volume type's replication status.
|
||||
*/
|
||||
|
||||
UPDATE volumes
|
||||
SET replication_status='enabled'
|
||||
WHERE (not volumes.deleted or volumes.deleted IS NULL)
|
||||
AND volumes.replication_status='disabled'
|
||||
AND EXISTS(
|
||||
SELECT *
|
||||
FROM volume_type_extra_specs
|
||||
WHERE volumes.volume_type_id=volume_type_extra_specs.volume_type_id
|
||||
AND (volume_type_extra_specs.deleted IS NULL
|
||||
OR not volume_type_extra_specs.deleted)
|
||||
AND volume_type_extra_specs.key='replication_enabled'
|
||||
AND volume_type_extra_specs.value='<is> True'
|
||||
);
|
|
@ -1176,6 +1176,68 @@ class MigrationsMixin(test_migrations.WalkVersionsMixin):
|
|||
|
||||
self.assertSetEqual(set(), non_indexed_foreign_keys)
|
||||
|
||||
def _pre_upgrade_101(self, engine):
|
||||
"""Add data to test the SQL migration."""
|
||||
types_table = db_utils.get_table(engine, 'volume_types')
|
||||
for i in range(1, 5):
|
||||
types_table.insert().execute({'id': str(i)})
|
||||
|
||||
specs_table = db_utils.get_table(engine, 'volume_type_extra_specs')
|
||||
specs = [
|
||||
{'volume_type_id': '1', 'key': 'key', 'value': '<is> False'},
|
||||
{'volume_type_id': '2', 'key': 'replication_enabled',
|
||||
'value': '<is> False'},
|
||||
{'volume_type_id': '3', 'key': 'replication_enabled',
|
||||
'value': '<is> True', 'deleted': True},
|
||||
{'volume_type_id': '3', 'key': 'key', 'value': '<is> True'},
|
||||
{'volume_type_id': '4', 'key': 'replication_enabled',
|
||||
'value': '<is> True'},
|
||||
{'volume_type_id': '4', 'key': 'key', 'value': '<is> True'},
|
||||
]
|
||||
for spec in specs:
|
||||
specs_table.insert().execute(spec)
|
||||
|
||||
volumes_table = db_utils.get_table(engine, 'volumes')
|
||||
volumes = [
|
||||
{'id': '1', 'replication_status': 'disabled',
|
||||
'volume_type_id': None},
|
||||
{'id': '2', 'replication_status': 'disabled',
|
||||
'volume_type_id': ''},
|
||||
{'id': '3', 'replication_status': 'disabled',
|
||||
'volume_type_id': '1'},
|
||||
{'id': '4', 'replication_status': 'disabled',
|
||||
'volume_type_id': '2'},
|
||||
{'id': '5', 'replication_status': 'disabled',
|
||||
'volume_type_id': '2'},
|
||||
{'id': '6', 'replication_status': 'disabled',
|
||||
'volume_type_id': '3'},
|
||||
{'id': '7', 'replication_status': 'error', 'volume_type_id': '4'},
|
||||
{'id': '8', 'deleted': True, 'replication_status': 'disabled',
|
||||
'volume_type_id': '4'},
|
||||
{'id': '9', 'replication_status': 'disabled', 'deleted': None,
|
||||
'volume_type_id': '4'},
|
||||
{'id': '10', 'replication_status': 'disabled', 'deleted': False,
|
||||
'volume_type_id': '4'},
|
||||
]
|
||||
for volume in volumes:
|
||||
volumes_table.insert().execute(volume)
|
||||
|
||||
# Only the last volume should be changed to enabled
|
||||
expected = {v['id']: v['replication_status'] for v in volumes}
|
||||
expected['9'] = 'enabled'
|
||||
expected['10'] = 'enabled'
|
||||
return expected
|
||||
|
||||
def _check_101(self, engine, data):
|
||||
# Get existing volumes after the migration
|
||||
volumes_table = db_utils.get_table(engine, 'volumes')
|
||||
volumes = volumes_table.select().execute()
|
||||
# Check that the replication_status is the one we expect according to
|
||||
# _pre_upgrade_098
|
||||
for volume in volumes:
|
||||
self.assertEqual(data[volume.id], volume.replication_status,
|
||||
'id %s' % volume.id)
|
||||
|
||||
def test_walk_versions(self):
|
||||
self.walk_versions(False, False)
|
||||
self.assert_each_foreign_key_is_part_of_an_index()
|
||||
|
|
|
@ -18,6 +18,7 @@ import socket
|
|||
import sys
|
||||
import uuid
|
||||
|
||||
import mock
|
||||
from oslo_config import cfg
|
||||
from oslo_service import loopingcall
|
||||
from oslo_utils import timeutils
|
||||
|
@ -38,6 +39,19 @@ def get_test_admin_context():
|
|||
return context.get_admin_context()
|
||||
|
||||
|
||||
def obj_attr_is_set(obj_class):
|
||||
"""Method to allow setting the ID on an OVO on creation."""
|
||||
original_method = obj_class.obj_attr_is_set
|
||||
|
||||
def wrapped(self, attr):
|
||||
if attr == 'id' and not hasattr(self, 'id_first_call'):
|
||||
self.id_first_call = False
|
||||
return False
|
||||
else:
|
||||
original_method(self, attr)
|
||||
return wrapped
|
||||
|
||||
|
||||
def create_volume(ctxt,
|
||||
host='test_host',
|
||||
display_name='test_volume',
|
||||
|
@ -54,6 +68,7 @@ def create_volume(ctxt,
|
|||
group_id=None,
|
||||
previous_status=None,
|
||||
testcase_instance=None,
|
||||
id=None,
|
||||
**kwargs):
|
||||
"""Create a volume object in the DB."""
|
||||
vol = {}
|
||||
|
@ -85,8 +100,14 @@ def create_volume(ctxt,
|
|||
if previous_status:
|
||||
vol['previous_status'] = previous_status
|
||||
|
||||
volume = objects.Volume(ctxt, **vol)
|
||||
volume.create()
|
||||
if id:
|
||||
with mock.patch('cinder.objects.Volume.obj_attr_is_set',
|
||||
obj_attr_is_set(objects.Volume)):
|
||||
volume = objects.Volume(ctxt, id=id, **vol)
|
||||
volume.create()
|
||||
else:
|
||||
volume = objects.Volume(ctxt, **vol)
|
||||
volume.create()
|
||||
|
||||
# If we get a TestCase instance we add cleanup
|
||||
if testcase_instance:
|
||||
|
@ -118,6 +139,7 @@ def create_snapshot(ctxt,
|
|||
cgsnapshot_id = None,
|
||||
status=fields.SnapshotStatus.CREATING,
|
||||
testcase_instance=None,
|
||||
id=None,
|
||||
**kwargs):
|
||||
vol = db.volume_get(ctxt, volume_id)
|
||||
snap = objects.Snapshot(ctxt)
|
||||
|
@ -129,7 +151,15 @@ def create_snapshot(ctxt,
|
|||
snap.display_name = display_name
|
||||
snap.display_description = display_description
|
||||
snap.cgsnapshot_id = cgsnapshot_id
|
||||
snap.create()
|
||||
|
||||
if id:
|
||||
with mock.patch('cinder.objects.Snapshot.obj_attr_is_set',
|
||||
obj_attr_is_set(objects.Snapshot)):
|
||||
snap.id = id
|
||||
snap.create()
|
||||
else:
|
||||
snap.create()
|
||||
|
||||
# We do the update after creating the snapshot in case we want to set
|
||||
# deleted field
|
||||
snap.update(kwargs)
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import uuid
|
||||
|
||||
import ddt
|
||||
|
||||
import mock
|
||||
|
@ -30,6 +32,7 @@ from cinder.tests.unit import volume as base
|
|||
import cinder.volume
|
||||
from cinder.volume import manager
|
||||
from cinder.volume import rpcapi as volume_rpcapi
|
||||
from cinder.volume import utils as vol_utils
|
||||
|
||||
|
||||
CONF = cfg.CONF
|
||||
|
@ -70,7 +73,7 @@ class ReplicationTestCase(base.BaseVolumeTestCase):
|
|||
mock_getall.assert_called_once_with(self.context,
|
||||
filters={'host': self.host})
|
||||
mock_failover.assert_called_once_with(self.context,
|
||||
mock_getall.return_value,
|
||||
[],
|
||||
secondary_id=new_backend)
|
||||
|
||||
db_svc = objects.Service.get_by_id(self.context, svc.id)
|
||||
|
@ -439,3 +442,174 @@ class ReplicationTestCase(base.BaseVolumeTestCase):
|
|||
self.assertEqual(rep_field.ENABLED, cluster.replication_status)
|
||||
|
||||
failover_mock.assert_not_called()
|
||||
|
||||
def _check_failover_db(self, get_method, expected_results):
|
||||
db_data = get_method.get_all(self.context, None)
|
||||
db_data = {e.id: e for e in db_data}
|
||||
for expected in expected_results:
|
||||
id_ = expected['id']
|
||||
for key, value in expected.items():
|
||||
self.assertEqual(value, getattr(db_data[id_], key),
|
||||
'(%s) ref=%s != act=%s' % (
|
||||
key, expected, dict(db_data[id_])))
|
||||
|
||||
def _test_failover_model_updates(self, in_volumes, in_snapshots,
|
||||
driver_volumes, driver_result,
|
||||
out_volumes, out_snapshots,
|
||||
secondary_id=None):
|
||||
host = vol_utils.extract_host(self.manager.host)
|
||||
utils.create_service(self.context, {'host': host,
|
||||
'binary': 'cinder-volume'})
|
||||
for volume in in_volumes:
|
||||
utils.create_volume(self.context, self.manager.host, **volume)
|
||||
|
||||
for snapshot in in_snapshots:
|
||||
utils.create_snapshot(self.context, **snapshot)
|
||||
|
||||
with mock.patch.object(
|
||||
self.manager.driver, 'failover_host',
|
||||
return_value=(secondary_id, driver_result)) as driver_mock:
|
||||
self.manager.failover_host(self.context, secondary_id)
|
||||
|
||||
self.assertSetEqual(driver_volumes,
|
||||
{v.id for v in driver_mock.call_args[0][1]})
|
||||
|
||||
self._check_failover_db(objects.VolumeList, out_volumes)
|
||||
self._check_failover_db(objects.SnapshotList, out_snapshots)
|
||||
|
||||
def test_failover_host_model_updates(self):
|
||||
status = fields.ReplicationStatus
|
||||
# IDs will be overwritten with UUIDs, but they help follow the code
|
||||
in_volumes = [
|
||||
{'id': 0, 'status': 'available',
|
||||
'replication_status': status.DISABLED},
|
||||
{'id': 1, 'status': 'in-use',
|
||||
'replication_status': status.NOT_CAPABLE},
|
||||
{'id': 2, 'status': 'available',
|
||||
'replication_status': status.FAILOVER_ERROR},
|
||||
{'id': 3, 'status': 'in-use',
|
||||
'replication_status': status.ENABLED},
|
||||
{'id': 4, 'status': 'available',
|
||||
'replication_status': status.FAILOVER_ERROR},
|
||||
{'id': 5, 'status': 'in-use',
|
||||
'replication_status': status.ENABLED},
|
||||
]
|
||||
# Generate real volume IDs
|
||||
for volume in in_volumes:
|
||||
volume['id'] = str(uuid.uuid4())
|
||||
in_snapshots = [
|
||||
{'id': v['id'], 'volume_id': v['id'], 'status': 'available'}
|
||||
for v in in_volumes
|
||||
]
|
||||
driver_volumes = {
|
||||
v['id'] for v in in_volumes
|
||||
if v['replication_status'] not in (status.DISABLED,
|
||||
status.NOT_CAPABLE)}
|
||||
driver_result = [
|
||||
{'volume_id': in_volumes[3]['id'],
|
||||
'updates': {'status': 'error'}},
|
||||
{'volume_id': in_volumes[4]['id'],
|
||||
'updates': {'replication_status': status.FAILOVER_ERROR}},
|
||||
{'volume_id': in_volumes[5]['id'],
|
||||
'updates': {'replication_status': status.FAILED_OVER}},
|
||||
]
|
||||
out_volumes = [
|
||||
{'id': in_volumes[0]['id'], 'status': 'error',
|
||||
'replication_status': status.NOT_CAPABLE,
|
||||
'previous_status': in_volumes[0]['status']},
|
||||
{'id': in_volumes[1]['id'], 'status': 'error',
|
||||
'replication_status': status.NOT_CAPABLE,
|
||||
'previous_status': in_volumes[1]['status']},
|
||||
{'id': in_volumes[2]['id'], 'status': in_volumes[2]['status'],
|
||||
'replication_status': status.FAILED_OVER},
|
||||
{'id': in_volumes[3]['id'], 'status': 'error',
|
||||
'previous_status': in_volumes[3]['status'],
|
||||
'replication_status': status.FAILOVER_ERROR},
|
||||
{'id': in_volumes[4]['id'], 'status': 'error',
|
||||
'previous_status': in_volumes[4]['status'],
|
||||
'replication_status': status.FAILOVER_ERROR},
|
||||
{'id': in_volumes[5]['id'], 'status': in_volumes[5]['status'],
|
||||
'replication_status': status.FAILED_OVER},
|
||||
]
|
||||
out_snapshots = [
|
||||
{'id': ov['id'],
|
||||
'status': 'error' if ov['status'] == 'error' else 'available'}
|
||||
for ov in out_volumes
|
||||
]
|
||||
self._test_failover_model_updates(in_volumes, in_snapshots,
|
||||
driver_volumes, driver_result,
|
||||
out_volumes, out_snapshots)
|
||||
|
||||
def test_failback_host_model_updates(self):
|
||||
status = fields.ReplicationStatus
|
||||
# IDs will be overwritten with UUIDs, but they help follow the code
|
||||
in_volumes = [
|
||||
{'id': 0, 'status': 'available',
|
||||
'replication_status': status.DISABLED},
|
||||
{'id': 1, 'status': 'in-use',
|
||||
'replication_status': status.NOT_CAPABLE},
|
||||
{'id': 2, 'status': 'available',
|
||||
'replication_status': status.FAILOVER_ERROR},
|
||||
{'id': 3, 'status': 'in-use',
|
||||
'replication_status': status.ENABLED},
|
||||
{'id': 4, 'status': 'available',
|
||||
'replication_status': status.FAILOVER_ERROR},
|
||||
{'id': 5, 'status': 'in-use',
|
||||
'replication_status': status.FAILED_OVER},
|
||||
]
|
||||
# Generate real volume IDs
|
||||
for volume in in_volumes:
|
||||
volume['id'] = str(uuid.uuid4())
|
||||
in_snapshots = [
|
||||
{'id': in_volumes[0]['id'], 'volume_id': in_volumes[0]['id'],
|
||||
'status': fields.SnapshotStatus.ERROR_DELETING},
|
||||
{'id': in_volumes[1]['id'], 'volume_id': in_volumes[1]['id'],
|
||||
'status': fields.SnapshotStatus.AVAILABLE},
|
||||
{'id': in_volumes[2]['id'], 'volume_id': in_volumes[2]['id'],
|
||||
'status': fields.SnapshotStatus.CREATING},
|
||||
{'id': in_volumes[3]['id'], 'volume_id': in_volumes[3]['id'],
|
||||
'status': fields.SnapshotStatus.DELETING},
|
||||
{'id': in_volumes[4]['id'], 'volume_id': in_volumes[4]['id'],
|
||||
'status': fields.SnapshotStatus.CREATING},
|
||||
{'id': in_volumes[5]['id'], 'volume_id': in_volumes[5]['id'],
|
||||
'status': fields.SnapshotStatus.CREATING},
|
||||
]
|
||||
driver_volumes = {
|
||||
v['id'] for v in in_volumes
|
||||
if v['replication_status'] not in (status.DISABLED,
|
||||
status.NOT_CAPABLE)}
|
||||
driver_result = [
|
||||
{'volume_id': in_volumes[3]['id'],
|
||||
'updates': {'status': 'error'}},
|
||||
{'volume_id': in_volumes[4]['id'],
|
||||
'updates': {'replication_status': status.FAILOVER_ERROR}},
|
||||
{'volume_id': in_volumes[5]['id'],
|
||||
'updates': {'replication_status': status.FAILED_OVER}},
|
||||
]
|
||||
out_volumes = [
|
||||
{'id': in_volumes[0]['id'], 'status': in_volumes[0]['status'],
|
||||
'replication_status': in_volumes[0]['replication_status'],
|
||||
'previous_status': None},
|
||||
{'id': in_volumes[1]['id'], 'status': in_volumes[1]['status'],
|
||||
'replication_status': in_volumes[1]['replication_status'],
|
||||
'previous_status': None},
|
||||
{'id': in_volumes[2]['id'], 'status': in_volumes[2]['status'],
|
||||
'replication_status': status.ENABLED},
|
||||
{'id': in_volumes[3]['id'], 'status': 'error',
|
||||
'previous_status': in_volumes[3]['status'],
|
||||
'replication_status': status.FAILOVER_ERROR},
|
||||
{'id': in_volumes[4]['id'], 'status': 'error',
|
||||
'previous_status': in_volumes[4]['status'],
|
||||
'replication_status': status.FAILOVER_ERROR},
|
||||
{'id': in_volumes[5]['id'], 'status': in_volumes[5]['status'],
|
||||
'replication_status': status.ENABLED},
|
||||
]
|
||||
# Snapshot status is preserved except for those that error the failback
|
||||
out_snapshots = in_snapshots[:]
|
||||
out_snapshots[3]['status'] = fields.SnapshotStatus.ERROR
|
||||
out_snapshots[4]['status'] = fields.SnapshotStatus.ERROR
|
||||
|
||||
self._test_failover_model_updates(in_volumes, in_snapshots,
|
||||
driver_volumes, driver_result,
|
||||
out_volumes, out_snapshots,
|
||||
self.manager.FAILBACK_SENTINEL)
|
||||
|
|
|
@ -184,6 +184,8 @@ class VolumeManager(manager.CleanableManager,
|
|||
|
||||
RPC_API_VERSION = volume_rpcapi.VolumeAPI.RPC_API_VERSION
|
||||
|
||||
FAILBACK_SENTINEL = 'default'
|
||||
|
||||
target = messaging.Target(version=RPC_API_VERSION)
|
||||
|
||||
# On cloning a volume, we shouldn't copy volume_type, consistencygroup
|
||||
|
@ -3660,7 +3662,7 @@ class VolumeManager(manager.CleanableManager,
|
|||
|
||||
Instructs a replication capable/configured backend to failover
|
||||
to one of it's secondary replication targets. host=None is
|
||||
an acceptable input, and leaves it to the driver to failover
|
||||
an acceetable input, and leaves it to the driver to failover
|
||||
to the only configured target, or to choose a target on it's
|
||||
own. All of the hosts volumes will be passed on to the driver
|
||||
in order for it to determine the replicated volumes on the host,
|
||||
|
@ -3675,9 +3677,32 @@ class VolumeManager(manager.CleanableManager,
|
|||
svc_host = vol_utils.extract_host(self.host, 'backend')
|
||||
service = objects.Service.get_by_args(context, svc_host,
|
||||
constants.VOLUME_BINARY)
|
||||
|
||||
# TODO(geguileo): We should optimize these updates by doing them
|
||||
# directly on the DB with just 3 queries, one to change the volumes
|
||||
# another to change all the snapshots, and another to get replicated
|
||||
# volumes.
|
||||
|
||||
# Change non replicated volumes and their snapshots to error if we are
|
||||
# failing over, leave them as they are for failback
|
||||
volumes = self._get_my_volumes(context)
|
||||
|
||||
exception_encountered = True
|
||||
replicated_vols = []
|
||||
for volume in volumes:
|
||||
if volume.replication_status not in (repl_status.DISABLED,
|
||||
repl_status.NOT_CAPABLE):
|
||||
replicated_vols.append(volume)
|
||||
elif secondary_backend_id != self.FAILBACK_SENTINEL:
|
||||
volume.previous_status = volume.status
|
||||
volume.status = 'error'
|
||||
volume.replication_status = repl_status.NOT_CAPABLE
|
||||
volume.save()
|
||||
|
||||
for snapshot in volume.snapshots:
|
||||
snapshot.status = 'error'
|
||||
snapshot.save()
|
||||
|
||||
volume_update_list = None
|
||||
try:
|
||||
# For non clustered we can call v2.1 failover_host, but for
|
||||
# clustered we call a/a failover method. We know a/a method
|
||||
|
@ -3688,40 +3713,46 @@ class VolumeManager(manager.CleanableManager,
|
|||
# expected form of volume_update_list:
|
||||
# [{volume_id: <cinder-volid>, updates: {'provider_id': xxxx....}},
|
||||
# {volume_id: <cinder-volid>, updates: {'provider_id': xxxx....}}]
|
||||
|
||||
active_backend_id, volume_update_list = failover(
|
||||
context,
|
||||
volumes,
|
||||
replicated_vols,
|
||||
secondary_id=secondary_backend_id)
|
||||
exception_encountered = False
|
||||
except exception.UnableToFailOver:
|
||||
LOG.exception("Failed to perform replication failover")
|
||||
updates['replication_status'] = repl_status.FAILOVER_ERROR
|
||||
except exception.InvalidReplicationTarget:
|
||||
LOG.exception("Invalid replication target specified "
|
||||
"for failover")
|
||||
# Preserve the replication_status: Status should be failed over if
|
||||
# we were failing back or if we were failing over from one
|
||||
# secondary to another secondary. In both cases active_backend_id
|
||||
# will be set.
|
||||
if service.active_backend_id:
|
||||
updates['replication_status'] = repl_status.FAILED_OVER
|
||||
else:
|
||||
updates['replication_status'] = repl_status.ENABLED
|
||||
except exception.VolumeDriverException:
|
||||
try:
|
||||
update_data = {u['volume_id']: u['updates']
|
||||
for u in volume_update_list}
|
||||
except KeyError:
|
||||
msg = "Update list, doesn't include volume_id"
|
||||
raise exception.ProgrammingError(reason=msg)
|
||||
except Exception as exc:
|
||||
# NOTE(jdg): Drivers need to be aware if they fail during
|
||||
# a failover sequence, we're expecting them to cleanup
|
||||
# and make sure the driver state is such that the original
|
||||
# backend is still set as primary as per driver memory
|
||||
LOG.error("Driver reported error during "
|
||||
"replication failover.")
|
||||
updates.update(disabled=True,
|
||||
replication_status=repl_status.FAILOVER_ERROR)
|
||||
if exception_encountered:
|
||||
LOG.error(
|
||||
"Error encountered during failover on host: "
|
||||
"%(host)s invalid target ID %(backend_id)s",
|
||||
{'host': self.host, 'backend_id':
|
||||
secondary_backend_id})
|
||||
|
||||
# We don't want to log the exception trace invalid replication
|
||||
# target
|
||||
if isinstance(exc, exception.InvalidReplicationTarget):
|
||||
log_method = LOG.error
|
||||
# Preserve the replication_status: Status should be failed over
|
||||
# if we were failing back or if we were failing over from one
|
||||
# secondary to another secondary. In both cases
|
||||
# active_backend_id will be set.
|
||||
if service.active_backend_id:
|
||||
updates['replication_status'] = repl_status.FAILED_OVER
|
||||
else:
|
||||
updates['replication_status'] = repl_status.ENABLED
|
||||
else:
|
||||
log_method = LOG.exception
|
||||
updates.update(disabled=True,
|
||||
replication_status=repl_status.FAILOVER_ERROR)
|
||||
|
||||
log_method("Error encountered during failover on host: %(host)s "
|
||||
"to %(backend_id)s: %(error)s",
|
||||
{'host': self.host, 'backend_id': secondary_backend_id,
|
||||
'error': exc})
|
||||
# We dump the update list for manual recovery
|
||||
LOG.error('Failed update_list is: %s', volume_update_list)
|
||||
self.finish_failover(context, service, updates)
|
||||
return
|
||||
|
||||
|
@ -3738,19 +3769,24 @@ class VolumeManager(manager.CleanableManager,
|
|||
|
||||
self.finish_failover(context, service, updates)
|
||||
|
||||
for update in volume_update_list:
|
||||
# Response must include an id key: {volume_id: <cinder-uuid>}
|
||||
if not update.get('volume_id'):
|
||||
raise exception.UnableToFailOver(
|
||||
reason=_("Update list, doesn't include volume_id"))
|
||||
# Key things to consider (attaching failed-over volumes):
|
||||
# provider_location
|
||||
# provider_auth
|
||||
# provider_id
|
||||
# replication_status
|
||||
vobj = objects.Volume.get_by_id(context, update['volume_id'])
|
||||
vobj.update(update.get('updates', {}))
|
||||
vobj.save()
|
||||
for volume in replicated_vols:
|
||||
update = update_data.get(volume.id, {})
|
||||
if update.get('status', '') == 'error':
|
||||
update['replication_status'] = repl_status.FAILOVER_ERROR
|
||||
elif update.get('replication_status') in (None,
|
||||
repl_status.FAILED_OVER):
|
||||
update['replication_status'] = updates['replication_status']
|
||||
|
||||
if update['replication_status'] == repl_status.FAILOVER_ERROR:
|
||||
update.setdefault('status', 'error')
|
||||
# Set all volume snapshots to error
|
||||
for snapshot in volume.snapshots:
|
||||
snapshot.status = 'error'
|
||||
snapshot.save()
|
||||
if 'status' in update:
|
||||
update['previous_status'] = volume.status
|
||||
volume.update(update)
|
||||
volume.save()
|
||||
|
||||
LOG.info("Failed over to replication target successfully.")
|
||||
|
||||
|
|
Loading…
Reference in New Issue