Merge "cinder-manage online_data_migrations fixes"

This commit is contained in:
Zuul 2018-10-31 01:30:23 +00:00 committed by Gerrit Code Review
commit 92a0d9185d
6 changed files with 150 additions and 40 deletions

View File

@ -88,6 +88,8 @@ from cinder.volume import utils as vutils
CONF = cfg.CONF CONF = cfg.CONF
LOG = logging.getLogger(__name__)
RPC_VERSIONS = { RPC_VERSIONS = {
'cinder-scheduler': scheduler_rpcapi.SchedulerAPI.RPC_API_VERSION, 'cinder-scheduler': scheduler_rpcapi.SchedulerAPI.RPC_API_VERSION,
'cinder-volume': volume_rpcapi.VolumeAPI.RPC_API_VERSION, 'cinder-volume': volume_rpcapi.VolumeAPI.RPC_API_VERSION,
@ -342,34 +344,32 @@ class DbCommands(object):
def _run_migration(self, ctxt, max_count): def _run_migration(self, ctxt, max_count):
ran = 0 ran = 0
exceptions = False
migrations = {} migrations = {}
for migration_meth in self.online_migrations: for migration_meth in self.online_migrations:
count = max_count - ran count = max_count - ran
try: try:
found, done = migration_meth(ctxt, count) found, done = migration_meth(ctxt, count)
except Exception: except Exception:
print(_("Error attempting to run %(method)s") % msg = (_("Error attempting to run %(method)s") %
{'method': migration_meth.__name__}) {'method': migration_meth.__name__})
print(msg)
LOG.exception(msg)
exceptions = True
found = done = 0 found = done = 0
name = migration_meth.__name__ name = migration_meth.__name__
remaining = found - done
if found: if found:
print(_('%(found)i rows matched query %(meth)s, %(done)i ' print(_('%(found)i rows matched query %(meth)s, %(done)i '
'migrated, %(remaining)i remaining') % {'found': found, 'migrated') % {'found': found,
'meth': name, 'meth': name,
'done': done, 'done': done})
'remaining': migrations[name] = found, done
remaining})
migrations.setdefault(name, (0, 0, 0))
migrations[name] = (migrations[name][0] + found,
migrations[name][1] + done,
migrations[name][2] + remaining)
if max_count is not None: if max_count is not None:
ran += done ran += done
if ran >= max_count: if ran >= max_count:
break break
return migrations return migrations, exceptions
@args('--max_count', metavar='<number>', dest='max_count', type=int, @args('--max_count', metavar='<number>', dest='max_count', type=int,
help='Maximum number of objects to consider.') help='Maximum number of objects to consider.')
@ -386,20 +386,19 @@ class DbCommands(object):
max_count = 50 max_count = 50
print(_('Running batches of %i until complete.') % max_count) print(_('Running batches of %i until complete.') % max_count)
# FIXME(jdg): So this is annoying and confusing,
# we iterate through in batches until there are no
# more updates, that's AWESOME!! BUT we only print
# out a table reporting found/done AFTER the loop
# here, so that means the response the user sees is
# always a table of "needed 0" and "completed 0".
# So it's an indication of "all done" but it seems like
# some feedback as we go would be nice to have here.
ran = None ran = None
exceptions = False
migration_info = {} migration_info = {}
while ran is None or ran != 0: while ran is None or ran != 0:
migrations = self._run_migration(ctxt, max_count) migrations, exceptions = self._run_migration(ctxt, max_count)
migration_info.update(migrations) ran = 0
ran = sum([done for found, done, remaining in migrations.values()]) for name in migrations:
migration_info.setdefault(name, (0, 0))
migration_info[name] = (
max(migration_info[name][0], migrations[name][0]),
migration_info[name][1] + migrations[name][1],
)
ran += migrations[name][1]
if not unlimited: if not unlimited:
break break
headers = ["{}".format(_('Migration')), headers = ["{}".format(_('Migration')),
@ -411,6 +410,18 @@ class DbCommands(object):
t.add_row([name, info[0], info[1]]) t.add_row([name, info[0], info[1]])
print(t) print(t)
# NOTE(imacdonn): In the "unlimited" case, the loop above will only
# terminate when all possible migrations have been effected. If we're
# still getting exceptions, there's a problem that requires
# intervention. In the max-count case, exceptions are only considered
# fatal if no work was done by any other migrations ("not ran"),
# because otherwise work may still remain to be done, and that work
# may resolve dependencies for the failing migrations.
if exceptions and (unlimited or not ran):
print(_("Some migrations failed unexpectedly. Check log for "
"details."))
sys.exit(2)
sys.exit(1 if ran else 0) sys.exit(1 if ran else 0)
@args('--enable-replication', action='store_true', default=False, @args('--enable-replication', action='store_true', default=False,

View File

@ -450,8 +450,8 @@ class TestCinderManageCmd(test.TestCase):
command.online_data_migrations, 10) command.online_data_migrations, 10)
self.assertEqual(1, exit.code) self.assertEqual(1, exit.code)
expected = """\ expected = """\
5 rows matched query mock_mig_1, 4 migrated, 1 remaining 5 rows matched query mock_mig_1, 4 migrated
6 rows matched query mock_mig_2, 6 migrated, 0 remaining 6 rows matched query mock_mig_2, 6 migrated
+------------+--------------+-----------+ +------------+--------------+-----------+
| Migration | Total Needed | Completed | | Migration | Total Needed | Completed |
+------------+--------------+-----------+ +------------+--------------+-----------+
@ -466,6 +466,78 @@ class TestCinderManageCmd(test.TestCase):
self.assertEqual(expected, sys.stdout.getvalue()) self.assertEqual(expected, sys.stdout.getvalue())
@mock.patch('cinder.context.get_admin_context')
def test_online_migrations_no_max_count(self, mock_get_context):
self.useFixture(fixtures.MonkeyPatch('sys.stdout', StringIO()))
fake_remaining = [120]
def fake_migration(context, count):
self.assertEqual(mock_get_context.return_value, context)
found = 120
done = min(fake_remaining[0], count)
fake_remaining[0] -= done
return found, done
command_cls = self._fake_db_command((fake_migration,))
command = command_cls()
exit = self.assertRaises(SystemExit,
command.online_data_migrations, None)
self.assertEqual(0, exit.code)
expected = """\
Running batches of 50 until complete.
120 rows matched query fake_migration, 50 migrated
120 rows matched query fake_migration, 50 migrated
120 rows matched query fake_migration, 20 migrated
120 rows matched query fake_migration, 0 migrated
+----------------+--------------+-----------+
| Migration | Total Needed | Completed |
+----------------+--------------+-----------+
| fake_migration | 120 | 120 |
+----------------+--------------+-----------+
"""
self.assertEqual(expected, sys.stdout.getvalue())
@mock.patch('cinder.context.get_admin_context')
def test_online_migrations_error(self, mock_get_context):
self.useFixture(fixtures.MonkeyPatch('sys.stdout', StringIO()))
good_remaining = [50]
def good_migration(context, count):
self.assertEqual(mock_get_context.return_value, context)
found = 50
done = min(good_remaining[0], count)
good_remaining[0] -= done
return found, done
bad_migration = mock.MagicMock()
bad_migration.side_effect = test.TestingException
bad_migration.__name__ = 'bad_migration'
command_cls = self._fake_db_command((bad_migration, good_migration))
command = command_cls()
# bad_migration raises an exception, but it could be because
# good_migration had not completed yet. We should get 1 in this case,
# because some work was done, and the command should be reiterated.
exit = self.assertRaises(SystemExit,
command.online_data_migrations, max_count=50)
self.assertEqual(1, exit.code)
# When running this for the second time, there's no work left for
# good_migration to do, but bad_migration still fails - should
# get 2 this time.
exit = self.assertRaises(SystemExit,
command.online_data_migrations, max_count=50)
self.assertEqual(2, exit.code)
# When --max-count is not used, we should get 2 if all possible
# migrations completed but some raise exceptions
good_remaining = [50]
exit = self.assertRaises(SystemExit,
command.online_data_migrations, None)
self.assertEqual(2, exit.code)
@mock.patch('cinder.cmd.manage.DbCommands.online_migrations', @mock.patch('cinder.cmd.manage.DbCommands.online_migrations',
(mock.Mock(side_effect=((2, 2), (0, 0)), __name__='foo'),)) (mock.Mock(side_effect=((2, 2), (0, 0)), __name__='foo'),))
def test_db_commands_online_data_migrations_ignore_state_and_max(self): def test_db_commands_online_data_migrations_ignore_state_and_max(self):

View File

@ -67,16 +67,25 @@ version Database version
Purge database entries that are marked as deleted, that are older than the number of days specified. Purge database entries that are marked as deleted, that are older than the number of days specified.
``cinder-manage db online_data_migrations`` ``cinder-manage db online_data_migrations [--max-count <n>]``
Perform online data migrations for database upgrade between releases in batches. Perform online data migrations for database upgrade between releases in batches.
This command interprets the following options when it is invoked: This command interprets the following options when it is invoked:
--max_count Maximum number of objects to consider. --max-count Maximum number of objects to migrate. If not specified, all possible migrations will be completed, in batches of 50 at a time.
--ignore_state Force records to migrate even if another operation is
performed on them. This may be dangerous, please refer to Returns exit status 0 if no (further) updates are possible, 1 if the ``--max-count``
release notes for more information. option was used and some updates were completed successfully (even if others generated
errors), 2 if some updates generated errors and no other migrations were able to take
effect in the last batch attempted, or 127 if invalid input is provided (e.g.
non-numeric max-count).
This command should be run after upgrading the database schema. If it exits with partial
updates (exit status 1) it should be called again, even if some updates initially generated
errors, because some updates may depend on others having completed. If it exits with
status 2, intervention is required to resolve the issue causing remaining updates to fail.
It should be considered successfully completed only when the exit status is 0.
Cinder Logs Cinder Logs
~~~~~~~~~~~ ~~~~~~~~~~~

View File

@ -310,10 +310,8 @@ and cgsnapshots will be removed from the database::
cinder-manage db online_data_migrations cinder-manage db online_data_migrations
--max_count <max> --max_count <max>
--ignore_state
max_count is optional. Default is 50. max_count is optional. Default is 50.
ignore_state is optional. Default is False.
After running the above migration command to migrate CGs to generic After running the above migration command to migrate CGs to generic
volume groups, CG and group APIs work as follows: volume groups, CG and group APIs work as follows:

View File

@ -238,10 +238,17 @@ After maintenance window
upgrade is performed. upgrade is performed.
* Since Ocata, you also need to run ``cinder-manage db online_data_migrations`` * Since Ocata, you also need to run ``cinder-manage db online_data_migrations``
command to make sure data migrations are applied. The tool let's you limit command to make sure data migrations are applied. The tool lets you limit
the impact of the data migrations by using ``--max_number`` option to limit the impact of the data migrations by using ``--max-count`` option to limit
number of migrations executed in one run. You need to complete all of the number of migrations executed in one run. If this option is used, the
migrations before starting upgrade to the next version (e.g. you need to exit status will be 1 if any migrations were successful (even if others
complete Ocata's data migrations before proceeding with upgrade to Pike; you generated errors, which could be due to dependencies between migrations).
won't be able to execute Pike's DB schema migrations before completing The command should be rerun while the exit status is 1. If no further
Ocata's data migrations). migrations are possible, the exit status will be 2 if some migrations are
still generating errors, which requires intervention to resolve. The
command should be considered completed successfully only when the exit
status is 0. You need to complete all of the migrations before starting
upgrade to the next version (e.g. you need to complete Ocata's data
migrations before proceeding with upgrade to Pike; you won't be able to
execute Pike's DB schema migrations before completing Ocata's data
migrations).

View File

@ -0,0 +1,13 @@
---
upgrade:
- |
The ``cinder-manage db online_data_migrations`` command now returns exit
status 2 in the case where some migrations failed (raised exceptions) and
no others were completed successfully from the last batch attempted. This
should be considered a fatal condition that requires intervention. Exit
status 1 will be returned in the case where the ``--max-count`` option was
used and some migrations failed but others succeeded (updated at least one
row), because more work may remain for the non-failing migrations, and
their completion may be a dependency for the failing ones. The command
should be reiterated while it returns exit status 1, and considered
completed successfully only when it returns exit status 0.