consumer gen: more tests for delete allocation cases
User confirming migration as well as a successfull live migration also triggers the delete allocation code path. This patch adds test coverage for these code paths. If the deletion of the source allocation of a confirmed migration fails then nova puts the instance to ERROR state. The instance still has two allocations in this state and deleting the instance only deletes the one that is held by the instance_uuid. This patch logs an ERROR describing that in this case the allocation held by the migration_uuid is leaked. The same true for live migration failing to delete allocaton on the source host. As this makes every caller of _delete_allocation_after_move logging the same error for AllocationDeleteFailed exception this patch moves that logging into _delete_allocation_after_move. Blueprint: use-nested-allocation-candidates Change-Id: I99427a52676826990d2a2ffc82cf30ad945b939c
This commit is contained in:
parent
53ca096750
commit
3a43a931d4
@ -3893,7 +3893,8 @@ class ComputeManager(manager.Manager):
|
||||
rt = self._get_resource_tracker()
|
||||
rt.drop_move_claim(context, instance, migration.source_node,
|
||||
old_instance_type, prefix='old_')
|
||||
self._delete_allocation_after_move(context, instance, migration,
|
||||
self._delete_allocation_after_move(context, instance,
|
||||
migration,
|
||||
old_instance_type,
|
||||
migration.source_node)
|
||||
instance.drop_migration_context()
|
||||
@ -3933,18 +3934,28 @@ class ComputeManager(manager.Manager):
|
||||
|
||||
if migration.source_node == nodename:
|
||||
if migration.status in ('confirmed', 'completed'):
|
||||
# NOTE(danms): We're finishing on the source node, so try to
|
||||
# delete the allocation based on the migration uuid
|
||||
deleted = self.reportclient.delete_allocation_for_instance(
|
||||
context, migration.uuid)
|
||||
if deleted:
|
||||
LOG.info(_('Source node %(node)s confirmed migration '
|
||||
'%(mig)s; deleted migration-based '
|
||||
'allocation'),
|
||||
{'node': nodename, 'mig': migration.uuid})
|
||||
# NOTE(danms): We succeeded, which means we do not
|
||||
# need to do the complex double allocation dance
|
||||
return
|
||||
try:
|
||||
# NOTE(danms): We're finishing on the source node, so try
|
||||
# to delete the allocation based on the migration uuid
|
||||
deleted = self.reportclient.delete_allocation_for_instance(
|
||||
context, migration.uuid)
|
||||
if deleted:
|
||||
LOG.info(_('Source node %(node)s confirmed migration '
|
||||
'%(mig)s; deleted migration-based '
|
||||
'allocation'),
|
||||
{'node': nodename, 'mig': migration.uuid})
|
||||
# NOTE(danms): We succeeded, which means we do not
|
||||
# need to do the complex double allocation dance
|
||||
return
|
||||
except exception.AllocationDeleteFailed:
|
||||
LOG.error('Deleting allocation in placement for migration '
|
||||
'%(migration_uuid)s failed. The instance '
|
||||
'%(instance_uuid)s will be put to ERROR state '
|
||||
'but the allocation held by the migration is '
|
||||
'leaked.',
|
||||
{'instance_uuid': instance.uuid,
|
||||
'migration_uuid': migration.uuid})
|
||||
raise
|
||||
else:
|
||||
# We're reverting (or failed) on the source, so we
|
||||
# need to check if our migration holds a claim and if
|
||||
|
@ -17,6 +17,7 @@ import datetime
|
||||
import time
|
||||
import zlib
|
||||
|
||||
from keystoneauth1 import adapter
|
||||
import mock
|
||||
from oslo_log import log as logging
|
||||
from oslo_serialization import base64
|
||||
@ -4498,6 +4499,59 @@ class ConsumerGenerationConflictTest(
|
||||
|
||||
self._delete_and_check_allocations(server)
|
||||
|
||||
def test_confirm_migrate_delete_alloc_on_source_fails(self):
|
||||
source_hostname = self.compute1.host
|
||||
dest_hostname = self.compute2.host
|
||||
source_rp_uuid = self._get_provider_uuid_by_host(source_hostname)
|
||||
dest_rp_uuid = self._get_provider_uuid_by_host(dest_hostname)
|
||||
|
||||
server = self._boot_and_check_allocations(self.flavor, source_hostname)
|
||||
self._migrate_and_check_allocations(
|
||||
server, self.flavor, source_rp_uuid, dest_rp_uuid)
|
||||
|
||||
rsp = fake_requests.FakeResponse(
|
||||
409,
|
||||
jsonutils.dumps(
|
||||
{'errors': [
|
||||
{'code': 'placement.concurrent_update',
|
||||
'detail': 'consumer generation conflict'}]}))
|
||||
|
||||
with mock.patch('keystoneauth1.adapter.Adapter.put',
|
||||
autospec=True) as mock_put:
|
||||
mock_put.return_value = rsp
|
||||
|
||||
post = {'confirmResize': None}
|
||||
self.api.post_server_action(
|
||||
server['id'], post, check_response_status=[204])
|
||||
server = self._wait_for_state_change(self.api, server, 'ERROR')
|
||||
self.assertIn('Failed to delete allocations',
|
||||
server['fault']['message'])
|
||||
|
||||
self.assertEqual(1, mock_put.call_count)
|
||||
|
||||
migrations = self.api.get_migrations()
|
||||
self.assertEqual(1, len(migrations))
|
||||
self.assertEqual('migration', migrations[0]['migration_type'])
|
||||
self.assertEqual(server['id'], migrations[0]['instance_uuid'])
|
||||
self.assertEqual(source_hostname, migrations[0]['source_compute'])
|
||||
# NOTE(gibi): it might be better to mark the migration as error
|
||||
self.assertEqual('confirmed', migrations[0]['status'])
|
||||
|
||||
# NOTE(gibi): Nova leaks the allocation held by the migration_uuid even
|
||||
# after the instance is deleted. At least nova logs a fat ERROR.
|
||||
self.assertIn('Deleting allocation in placement for migration %s '
|
||||
'failed. The instance %s will be put to ERROR state but '
|
||||
'the allocation held by the migration is leaked.' %
|
||||
(migrations[0]['uuid'], server['id']),
|
||||
self.stdlog.logger.output)
|
||||
self.api.delete_server(server['id'])
|
||||
self._wait_until_deleted(server)
|
||||
fake_notifier.wait_for_versioned_notifications('instance.delete.end')
|
||||
|
||||
allocations = self._get_allocations_by_server_uuid(
|
||||
migrations[0]['uuid'])
|
||||
self.assertEqual(1, len(allocations))
|
||||
|
||||
def test_revert_migrate_delete_dest_allocation_fails_due_to_conflict(self):
|
||||
source_hostname = self.compute1.host
|
||||
dest_hostname = self.compute2.host
|
||||
@ -4599,6 +4653,100 @@ class ConsumerGenerationConflictTest(
|
||||
migrations[0]['uuid'])
|
||||
self.assertEqual(1, len(allocations))
|
||||
|
||||
def test_live_migrate_drop_allocation_on_source_fails(self):
|
||||
source_hostname = self.compute1.host
|
||||
dest_hostname = self.compute2.host
|
||||
source_rp_uuid = self._get_provider_uuid_by_host(source_hostname)
|
||||
dest_rp_uuid = self._get_provider_uuid_by_host(dest_hostname)
|
||||
|
||||
server = self._boot_and_check_allocations(
|
||||
self.flavor, source_hostname)
|
||||
|
||||
fake_notifier.stub_notifier(self)
|
||||
self.addCleanup(fake_notifier.reset)
|
||||
|
||||
orig_put = adapter.Adapter.put
|
||||
|
||||
rsp = fake_requests.FakeResponse(
|
||||
409,
|
||||
jsonutils.dumps(
|
||||
{'errors': [
|
||||
{'code': 'placement.concurrent_update',
|
||||
'detail': 'consumer generation conflict'}]}))
|
||||
|
||||
def fake_put(_self, url, *args, **kwargs):
|
||||
migration_uuid = self.get_migration_uuid_for_instance(server['id'])
|
||||
if url == '/allocations/%s' % migration_uuid:
|
||||
return rsp
|
||||
else:
|
||||
return orig_put(_self, url, *args, **kwargs)
|
||||
|
||||
with mock.patch('keystoneauth1.adapter.Adapter.put',
|
||||
autospec=True) as mock_put:
|
||||
mock_put.side_effect = fake_put
|
||||
|
||||
post = {
|
||||
'os-migrateLive': {
|
||||
'host': dest_hostname,
|
||||
'block_migration': True,
|
||||
'force': True,
|
||||
}
|
||||
}
|
||||
|
||||
self.api.post_server_action(server['id'], post)
|
||||
|
||||
# nova does the source host cleanup _after_ setting the migration
|
||||
# to completed and sending end notifications so we have to wait
|
||||
# here a bit.
|
||||
time.sleep(1)
|
||||
|
||||
# Nova failed to clean up on the source host. This right now puts
|
||||
# the instance to ERROR state and fails the migration.
|
||||
server = self._wait_for_server_parameter(self.api, server,
|
||||
{'OS-EXT-SRV-ATTR:host': dest_hostname,
|
||||
'status': 'ERROR'})
|
||||
self._wait_for_migration_status(server, ['error'])
|
||||
fake_notifier.wait_for_versioned_notifications(
|
||||
'instance.live_migration_post.end')
|
||||
|
||||
# 1 claim on destination, 1 normal delete on dest that fails,
|
||||
self.assertEqual(2, mock_put.call_count)
|
||||
|
||||
source_usages = self._get_provider_usages(source_rp_uuid)
|
||||
# As the cleanup on the source host failed Nova leaks the allocation
|
||||
# held by the migration.
|
||||
self.assertFlavorMatchesAllocation(self.flavor, source_usages)
|
||||
migration_uuid = self.get_migration_uuid_for_instance(server['id'])
|
||||
allocations = self._get_allocations_by_server_uuid(migration_uuid)
|
||||
self.assertEqual(1, len(allocations))
|
||||
self.assertIn(source_rp_uuid, allocations)
|
||||
source_allocation = allocations[source_rp_uuid]['resources']
|
||||
self.assertFlavorMatchesAllocation(self.flavor, source_allocation)
|
||||
|
||||
dest_usages = self._get_provider_usages(dest_rp_uuid)
|
||||
self.assertFlavorMatchesAllocation(self.flavor, dest_usages)
|
||||
|
||||
allocations = self._get_allocations_by_server_uuid(server['id'])
|
||||
self.assertEqual(1, len(allocations))
|
||||
self.assertNotIn(source_rp_uuid, allocations)
|
||||
dest_allocation = allocations[dest_rp_uuid]['resources']
|
||||
self.assertFlavorMatchesAllocation(self.flavor, dest_allocation)
|
||||
|
||||
# NOTE(gibi): Nova leaks the allocation held by the migration_uuid even
|
||||
# after the instance is deleted. At least nova logs a fat ERROR.
|
||||
self.assertIn('Deleting allocation in placement for migration %s '
|
||||
'failed. The instance %s will be put to ERROR state but '
|
||||
'the allocation held by the migration is leaked.' %
|
||||
(migration_uuid, server['id']),
|
||||
self.stdlog.logger.output)
|
||||
|
||||
self.api.delete_server(server['id'])
|
||||
self._wait_until_deleted(server)
|
||||
fake_notifier.wait_for_versioned_notifications('instance.delete.end')
|
||||
|
||||
allocations = self._get_allocations_by_server_uuid(migration_uuid)
|
||||
self.assertEqual(1, len(allocations))
|
||||
|
||||
def test_server_delete_fails_due_to_conflict(self):
|
||||
source_hostname = self.compute1.host
|
||||
|
||||
|
@ -2,9 +2,11 @@
|
||||
issues:
|
||||
- |
|
||||
Nova leaks resource allocations in placement during
|
||||
``POST /servers/{server_id}/action (revertResize Action)`` if the
|
||||
``POST /servers/{server_id}/action (revertResize Action)`` and
|
||||
``POST /servers/{server_id}/action (confirmResize Action)`` and
|
||||
``POST /servers/{server_id}/action (os-migrateLive Action)`` and if the
|
||||
allocation held by the migration_uuid is modified in parallel with the
|
||||
revert operation. Nova will log an ERROR and will put the server into ERROR
|
||||
state but will not delete the migration allocation. We assume that this can
|
||||
only happen if somebody outside of nova is actively changing the migration
|
||||
allocation in placement. Therefore it is not considered as a bug.
|
||||
lifecycle operation. Nova will log an ERROR and will put the server into
|
||||
ERROR state but will not delete the migration allocation. We assume that
|
||||
this can only happen if somebody outside of nova is actively changing the
|
||||
migration allocation in placement. Therefore it is not considered as a bug.
|
||||
|
Loading…
Reference in New Issue
Block a user