consumer gen: move_allocations

This patch renames the set_and_clear_allocations function in the scheduler report client to move_allocations and adds handling of consumer generation conflict for it. This call now moves everything from one consumer to another and raises AllocationMoveFailed to the caller if the move fails due to consumer generation conflict. When migration or resize fails to move the source host allocation to the migration_uuid then the API returns HTTP 409 and the migration is aborted. If reverting a migration, a resize, or a resize to same host fails to move the source host allocation back to the instance_uuid due consumer generation conflict the instance will be put into ERROR state. The instance still has two allocations in this state and deleting the instance only deletes the one that is held by the instance_uuid. This patch logs an ERROR describing that in this case the allocation held by the migration_uuid is leaked. Blueprint: use-nested-allocation-candidates Change-Id: Ie991d4b53e9bb5e7ec26da99219178ab7695abf6
2018-08-14 19:23:32 +02:00 · 2018-08-14 19:23:32 +02:00 · 53ca096750
commit 53ca096750
parent 1ced590571
14 changed files with 501 additions and 247 deletions
--- a/nova/api/openstack/compute/migrate_server.py
+++ b/nova/api/openstack/compute/migrate_server.py
@ -57,7 +57,8 @@ class MigrateServerController(wsgi.Controller):
        except (exception.TooManyInstances, exception.QuotaError) as e:
            raise exc.HTTPForbidden(explanation=e.format_message())
        except (exception.InstanceIsLocked,
-                exception.CannotMigrateWithTargetHost) as e:
+                exception.CannotMigrateWithTargetHost,
+                exception.AllocationMoveFailed) as e:
            raise exc.HTTPConflict(explanation=e.format_message())
        except exception.InstanceInvalidState as state_error:
            common.raise_http_conflict_for_instance_invalid_state(state_error,
--- a/nova/api/openstack/compute/servers.py
+++ b/nova/api/openstack/compute/servers.py
@ -830,7 +830,8 @@ class ServersController(wsgi.Controller):
        except exception.QuotaError as error:
            raise exc.HTTPForbidden(
                explanation=error.format_message())
-        except exception.InstanceIsLocked as e:
+        except (exception.InstanceIsLocked,
+                exception.AllocationMoveFailed) as e:
            raise exc.HTTPConflict(explanation=e.format_message())
        except exception.InstanceInvalidState as state_error:
            common.raise_http_conflict_for_instance_invalid_state(state_error,
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@ -3949,6 +3949,8 @@ class ComputeManager(manager.Manager):
                # We're reverting (or failed) on the source, so we
                # need to check if our migration holds a claim and if
                # so, avoid doing the legacy behavior below.
+                # NOTE(gibi): We are only hitting this in case of reverting
+                # a resize-on-same-host operation
                mig_allocs = (
                    self.reportclient.get_allocations_for_consumer_by_provider(
                        context, cn_uuid, migration.uuid))
@ -4097,7 +4099,16 @@ class ComputeManager(manager.Manager):
            instance.node = migration.source_node
            instance.save()

-            self._revert_allocation(context, instance, migration)
+            try:
+                self._revert_allocation(context, instance, migration)
+            except exception.AllocationMoveFailed:
+                LOG.error('Reverting allocation in placement for migration '
+                          '%(migration_uuid)s failed. The instance '
+                          '%(instance_uuid)s will be put into ERROR state but '
+                          'the allocation held by the migration is leaked.',
+                          {'instance_uuid': instance.uuid,
+                           'migration_uuid': migration.uuid})
+                raise

            self.network_api.setup_networks_on_host(context, instance,
                                                    migration.source_compute)
@ -4190,9 +4201,6 @@ class ComputeManager(manager.Manager):
        # We only have a claim against one provider, it is the source node
        cn_uuid = list(orig_alloc.keys())[0]

-        # Get just the resources part of the one allocation we need below
-        orig_alloc = orig_alloc[cn_uuid].get('resources', {})
-
        # FIXME(danms): This method is flawed in that it asssumes allocations
        # against only one provider. So, this may overwite allocations against
        # a shared provider, if we had one.
@ -4201,9 +4209,8 @@ class ComputeManager(manager.Manager):
                 {'node': cn_uuid, 'mig': migration.uuid},
                 instance=instance)
        # TODO(cdent): Should we be doing anything with return values here?
-        self.reportclient.set_and_clear_allocations(
-            context, cn_uuid, instance.uuid, orig_alloc, instance.project_id,
-            instance.user_id, consumer_to_clear=migration.uuid)
+        self.reportclient.move_allocations(context, migration.uuid,
+                                           instance.uuid)
        return True

    def _prep_resize(self, context, image, instance, instance_type,
--- a/nova/conductor/tasks/live_migrate.py
+++ b/nova/conductor/tasks/live_migrate.py
@ -140,8 +140,7 @@ class LiveMigrationTask(base.TaskBase):
            migrate.revert_allocation_for_migration(self.context,
                                                    self._source_cn,
                                                    self.instance,
-                                                    self.migration,
-                                                    self._held_allocations)
+                                                    self.migration)

    def _check_instance_is_active(self):
        if self.instance.power_state not in (power_state.RUNNING,
--- a/nova/conductor/tasks/migrate.py
+++ b/nova/conductor/tasks/migrate.py
@ -57,9 +57,8 @@ def replace_allocation_with_migration(context, instance, migration):
    # FIXME(danms): This method is flawed in that it asssumes allocations
    # against only one provider. So, this may overwite allocations against
    # a shared provider, if we had one.
-    success = reportclient.set_and_clear_allocations(
-        context, source_cn.uuid, migration.uuid, orig_alloc,
-        instance.project_id, instance.user_id, consumer_to_clear=instance.uuid)
+    success = reportclient.move_allocations(context, instance.uuid,
+                                            migration.uuid)
    if not success:
        LOG.error('Unable to replace resource claim on source '
                  'host %(host)s node %(node)s for instance',
@ -78,8 +77,7 @@ def replace_allocation_with_migration(context, instance, migration):
    return source_cn, orig_alloc


-def revert_allocation_for_migration(context, source_cn, instance, migration,
-                                    orig_alloc):
+def revert_allocation_for_migration(context, source_cn, instance, migration):
    """Revert an allocation made for a migration back to the instance."""

    schedclient = scheduler_client.SchedulerClient()
@ -88,10 +86,8 @@ def revert_allocation_for_migration(context, source_cn, instance, migration,
    # FIXME(danms): This method is flawed in that it asssumes allocations
    # against only one provider. So, this may overwite allocations against
    # a shared provider, if we had one.
-    success = reportclient.set_and_clear_allocations(
-        context, source_cn.uuid, instance.uuid, orig_alloc,
-        instance.project_id, instance.user_id,
-        consumer_to_clear=migration.uuid)
+    success = reportclient.move_allocations(context, migration.uuid,
+                                            instance.uuid)
    if not success:
        LOG.error('Unable to replace resource claim on source '
                  'host %(host)s node %(node)s for instance',
@ -320,5 +316,4 @@ class MigrationTask(base.TaskBase):
        # now.

        revert_allocation_for_migration(self.context, self._source_cn,
-                                        self.instance, self._migration,
-                                        self._held_allocations)
+                                        self.instance, self._migration)
--- a/nova/exception.py
+++ b/nova/exception.py
@ -2322,6 +2322,12 @@ class AllocationUpdateFailed(NovaException):
                'Error: %(error)s')


+class AllocationMoveFailed(NovaException):
+    msg_fmt = _('Failed to move allocations from consumer %(source_consumer)s '
+                'to consumer %(target_consumer)s. '
+                'Error: %(error)s')
+
+
 class AllocationDeleteFailed(NovaException):
    msg_fmt = _('Failed to delete allocations for consumer %(consumer_uuid)s. '
                'Error: %(error)s')
--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@ -1922,68 +1922,81 @@ class SchedulerReportClient(object):

    @safe_connect
    @retries
-    def set_and_clear_allocations(self, context, rp_uuid, consumer_uuid,
-                                  alloc_data, project_id, user_id,
-                                  consumer_to_clear=None):
-        """Create allocation records for the supplied consumer UUID while
-        simultaneously clearing any allocations identified by the uuid
-        in consumer_to_clear, for example a migration uuid when moving an
-        instance to another host. This is for atomically managing so-called
-        "doubled" migration records.
+    def move_allocations(self, context, source_consumer_uuid,
+                         target_consumer_uuid):
+        """Move allocations from one consumer to the other

-        :note Currently we only allocate against a single resource provider.
-              Once shared storage and things like NUMA allocations are a
-              reality, this will change to allocate against multiple providers.
+        Note that this call moves the current allocation from the source
+        consumer to the target consumer. If parallel update happens on either
+        or both consumers during this call then Placement will detect that and
+        this code will re-read the new state of the consumers and retry the
+        operation. If you want to move a known piece of allocation from source
+        to target then this function might not be what you want as it always
+        moves what source has in Placement.

        :param context: The security context
-        :param rp_uuid: The UUID of the resource provider to allocate against.
-        :param consumer_uuid: The consumer UUID for which allocations are
-                              being set.
-        :param alloc_data: Dict, keyed by resource class, of amounts to
-                           consume.
-        :param project_id: The project_id associated with the allocations.
-        :param user_id: The user_id associated with the allocations.
-        :param consumer_to_clear: A UUID identifying allocations for a
-                                  consumer that should be cleared.
-        :returns: True if the allocations were created, False otherwise.
-        :raises: Retry if the operation should be retried due to a concurrent
-                 update.
+        :param source_consumer_uuid: the UUID of the consumer from which
+                                     allocations are moving
+        :param target_consumer_uuid: the UUID of the target consumer for the
+                                     allocations
+        :returns: True if the move was successful False otherwise.
+        :raises AllocationMoveFailed: If the source or the target consumer has
+                                      been modified while this call tries to
+                                      move allocations.
        """
-        # FIXME(cdent): Fair amount of duplicate with put in here, but now
-        # just working things through.
-        payload = {
-            consumer_uuid: {
-                'allocations': {
-                    rp_uuid: {
-                        'resources': alloc_data
-                    }
-                },
-                'project_id': project_id,
-                'user_id': user_id,
+        source_alloc = self.get_allocs_for_consumer(
+            context, source_consumer_uuid)
+        target_alloc = self.get_allocs_for_consumer(
+            context, target_consumer_uuid)
+
+        if target_alloc and target_alloc['allocations']:
+            LOG.warning('Overwriting current allocation %(allocation)s on '
+                        'consumer %(consumer)s',
+                        {'allocation': target_alloc,
+                         'consumer': target_consumer_uuid})
+
+        new_allocs = {
+            source_consumer_uuid: {
+                # 'allocations': {} means we are removing the allocation from
+                # the source consumer
+                'allocations': {},
+                'project_id': source_alloc['project_id'],
+                'user_id': source_alloc['user_id'],
+                'consumer_generation': source_alloc['consumer_generation']},
+            target_consumer_uuid: {
+                'allocations': source_alloc['allocations'],
+                # NOTE(gibi): Is there any case when we need to keep the
+                # project_id and user_id of the target allocation that we are
+                # about to overwrite?
+                'project_id': source_alloc['project_id'],
+                'user_id': source_alloc['user_id'],
+                'consumer_generation': target_alloc.get('consumer_generation')
            }
        }
-        if consumer_to_clear:
-            payload[consumer_to_clear] = {
-                'allocations': {},
-                'project_id': project_id,
-                'user_id': user_id,
-            }
-        r = self.post('/allocations', payload,
-                      version=POST_ALLOCATIONS_API_VERSION,
+        r = self.post('/allocations', new_allocs,
+                      version=CONSUMER_GENERATION_VERSION,
                      global_request_id=context.global_id)
        if r.status_code != 204:
-            # NOTE(jaypipes): Yes, it sucks doing string comparison like this
-            # but we have no error codes, only error messages.
-            if 'concurrently updated' in r.text:
+            err = r.json()['errors'][0]
+            if err['code'] == 'placement.concurrent_update':
+                # NOTE(jaypipes): Yes, it sucks doing string comparison like
+                # this but we have no error codes, only error messages.
+                # TODO(gibi): Use more granular error codes when available
+                if 'consumer generation conflict' in err['detail']:
+                    raise exception.AllocationMoveFailed(
+                        source_consumer=source_consumer_uuid,
+                        target_consumer=target_consumer_uuid,
+                        error=r.text)
+
                reason = ('another process changed the resource providers '
                          'involved in our attempt to post allocations for '
-                          'consumer %s' % consumer_uuid)
-                raise Retry('set_and_clear_allocations', reason)
+                          'consumer %s' % target_consumer_uuid)
+                raise Retry('move_allocations', reason)
            else:
                LOG.warning(
-                    'Unable to post allocations for instance '
+                    'Unable to post allocations for consumer '
                    '%(uuid)s (%(code)i %(text)s)',
-                    {'uuid': consumer_uuid,
+                    {'uuid': target_consumer_uuid,
                     'code': r.status_code,
                     'text': r.text})
        return r.status_code == 204
--- a/nova/tests/functional/integrated_helpers.py
+++ b/nova/tests/functional/integrated_helpers.py
@ -28,6 +28,7 @@ import nova.conf
 from nova import context
 from nova.db import api as db
 import nova.image.glance
+from nova import objects
 from nova import test
 from nova.tests import fixtures as nova_fixtures
 from nova.tests.functional.api import client as api_client
@ -643,3 +644,103 @@ class ProviderUsageBaseTestCase(test.TestCase, InstanceHelperMixin):
                compute.manager.host)
            compute.manager.update_available_resource(ctx)
        LOG.info('Finished with periodics')
+
+    def _move_and_check_allocations(self, server, request, old_flavor,
+                                    new_flavor, source_rp_uuid, dest_rp_uuid):
+        self.api.post_server_action(server['id'], request)
+        self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE')
+
+        def _check_allocation():
+            source_usages = self._get_provider_usages(source_rp_uuid)
+            self.assertFlavorMatchesAllocation(old_flavor, source_usages)
+            dest_usages = self._get_provider_usages(dest_rp_uuid)
+            self.assertFlavorMatchesAllocation(new_flavor, dest_usages)
+
+            # The instance should own the new_flavor allocation against the
+            # destination host created by the scheduler
+            allocations = self._get_allocations_by_server_uuid(server['id'])
+            self.assertEqual(1, len(allocations))
+            dest_alloc = allocations[dest_rp_uuid]['resources']
+            self.assertFlavorMatchesAllocation(new_flavor, dest_alloc)
+
+            # The migration should own the old_flavor allocation against the
+            # source host created by conductor
+            migration_uuid = self.get_migration_uuid_for_instance(server['id'])
+            allocations = self._get_allocations_by_server_uuid(migration_uuid)
+            source_alloc = allocations[source_rp_uuid]['resources']
+            self.assertFlavorMatchesAllocation(old_flavor, source_alloc)
+
+        # OK, so the move operation has run, but we have not yet confirmed or
+        # reverted the move operation. Before we run periodics, make sure
+        # that we have allocations/usages on BOTH the source and the
+        # destination hosts.
+        _check_allocation()
+        self._run_periodics()
+        _check_allocation()
+
+        # Make sure the RequestSpec.flavor matches the new_flavor.
+        ctxt = context.get_admin_context()
+        reqspec = objects.RequestSpec.get_by_instance_uuid(ctxt, server['id'])
+        self.assertEqual(new_flavor['id'], reqspec.flavor.flavorid)
+
+    def _migrate_and_check_allocations(self, server, flavor, source_rp_uuid,
+                                       dest_rp_uuid):
+        request = {
+            'migrate': None
+        }
+        self._move_and_check_allocations(
+            server, request=request, old_flavor=flavor, new_flavor=flavor,
+            source_rp_uuid=source_rp_uuid, dest_rp_uuid=dest_rp_uuid)
+
+    def _resize_to_same_host_and_check_allocations(self, server, old_flavor,
+                                                   new_flavor, rp_uuid):
+        # Resize the server to the same host and check usages in VERIFY_RESIZE
+        # state
+        self.flags(allow_resize_to_same_host=True)
+        resize_req = {
+            'resize': {
+                'flavorRef': new_flavor['id']
+            }
+        }
+        self.api.post_server_action(server['id'], resize_req)
+        self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE')
+
+        usages = self._get_provider_usages(rp_uuid)
+        self.assertFlavorsMatchAllocation(old_flavor, new_flavor, usages)
+
+        # The instance should hold a new_flavor allocation
+        allocations = self._get_allocations_by_server_uuid(server['id'])
+        self.assertEqual(1, len(allocations))
+        allocation = allocations[rp_uuid]['resources']
+        self.assertFlavorMatchesAllocation(new_flavor, allocation)
+
+        # The migration should hold an old_flavor allocation
+        migration_uuid = self.get_migration_uuid_for_instance(server['id'])
+        allocations = self._get_allocations_by_server_uuid(migration_uuid)
+        self.assertEqual(1, len(allocations))
+        allocation = allocations[rp_uuid]['resources']
+        self.assertFlavorMatchesAllocation(old_flavor, allocation)
+
+        # We've resized to the same host and have doubled allocations for both
+        # the old and new flavor on the same host. Run the periodic on the
+        # compute to see if it tramples on what the scheduler did.
+        self._run_periodics()
+
+        usages = self._get_provider_usages(rp_uuid)
+
+        # In terms of usage, it's still double on the host because the instance
+        # and the migration each hold an allocation for the new and old
+        # flavors respectively.
+        self.assertFlavorsMatchAllocation(old_flavor, new_flavor, usages)
+
+        # The instance should hold a new_flavor allocation
+        allocations = self._get_allocations_by_server_uuid(server['id'])
+        self.assertEqual(1, len(allocations))
+        allocation = allocations[rp_uuid]['resources']
+        self.assertFlavorMatchesAllocation(new_flavor, allocation)
+
+        # The migration should hold an old_flavor allocation
+        allocations = self._get_allocations_by_server_uuid(migration_uuid)
+        self.assertEqual(1, len(allocations))
+        allocation = allocations[rp_uuid]['resources']
+        self.assertFlavorMatchesAllocation(old_flavor, allocation)
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@ -2023,53 +2023,6 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
        self.compute2.manager.update_available_resource(ctx)
        LOG.info('Finished with periodics')

-    def _migrate_and_check_allocations(self, server, flavor, source_rp_uuid,
-                                       dest_rp_uuid):
-        request = {
-            'migrate': None
-        }
-        self._move_and_check_allocations(
-            server, request=request, old_flavor=flavor, new_flavor=flavor,
-            source_rp_uuid=source_rp_uuid, dest_rp_uuid=dest_rp_uuid)
-
-    def _move_and_check_allocations(self, server, request, old_flavor,
-                                    new_flavor, source_rp_uuid, dest_rp_uuid):
-        self.api.post_server_action(server['id'], request)
-        self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE')
-
-        def _check_allocation():
-            source_usages = self._get_provider_usages(source_rp_uuid)
-            self.assertFlavorMatchesAllocation(old_flavor, source_usages)
-            dest_usages = self._get_provider_usages(dest_rp_uuid)
-            self.assertFlavorMatchesAllocation(new_flavor, dest_usages)
-
-            # The instance should own the new_flavor allocation against the
-            # destination host created by the scheduler
-            allocations = self._get_allocations_by_server_uuid(server['id'])
-            self.assertEqual(1, len(allocations))
-            dest_alloc = allocations[dest_rp_uuid]['resources']
-            self.assertFlavorMatchesAllocation(new_flavor, dest_alloc)
-
-            # The migration should own the old_flavor allocation against the
-            # source host created by conductor
-            migration_uuid = self.get_migration_uuid_for_instance(server['id'])
-            allocations = self._get_allocations_by_server_uuid(migration_uuid)
-            source_alloc = allocations[source_rp_uuid]['resources']
-            self.assertFlavorMatchesAllocation(old_flavor, source_alloc)
-
-        # OK, so the move operation has run, but we have not yet confirmed or
-        # reverted the move operation. Before we run periodics, make sure
-        # that we have allocations/usages on BOTH the source and the
-        # destination hosts.
-        _check_allocation()
-        self._run_periodics()
-        _check_allocation()
-
-        # Make sure the RequestSpec.flavor matches the new_flavor.
-        ctxt = context.get_admin_context()
-        reqspec = objects.RequestSpec.get_by_instance_uuid(ctxt, server['id'])
-        self.assertEqual(new_flavor['id'], reqspec.flavor.flavorid)
-
    def test_resize_revert(self):
        self._test_resize_revert(dest_hostname='host1')

@ -2204,59 +2157,6 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):

        self._delete_and_check_allocations(server)

-    def _resize_to_same_host_and_check_allocations(self, server, old_flavor,
-                                                   new_flavor, rp_uuid):
-        # Resize the server to the same host and check usages in VERIFY_RESIZE
-        # state
-        self.flags(allow_resize_to_same_host=True)
-        resize_req = {
-            'resize': {
-                'flavorRef': new_flavor['id']
-            }
-        }
-        self.api.post_server_action(server['id'], resize_req)
-        self._wait_for_state_change(self.api, server, 'VERIFY_RESIZE')
-
-        usages = self._get_provider_usages(rp_uuid)
-        self.assertFlavorsMatchAllocation(old_flavor, new_flavor, usages)
-
-        # The instance should hold a new_flavor allocation
-        allocations = self._get_allocations_by_server_uuid(server['id'])
-        self.assertEqual(1, len(allocations))
-        allocation = allocations[rp_uuid]['resources']
-        self.assertFlavorMatchesAllocation(new_flavor, allocation)
-
-        # The migration should hold an old_flavor allocation
-        migration_uuid = self.get_migration_uuid_for_instance(server['id'])
-        allocations = self._get_allocations_by_server_uuid(migration_uuid)
-        self.assertEqual(1, len(allocations))
-        allocation = allocations[rp_uuid]['resources']
-        self.assertFlavorMatchesAllocation(old_flavor, allocation)
-
-        # We've resized to the same host and have doubled allocations for both
-        # the old and new flavor on the same host. Run the periodic on the
-        # compute to see if it tramples on what the scheduler did.
-        self._run_periodics()
-
-        usages = self._get_provider_usages(rp_uuid)
-
-        # In terms of usage, it's still double on the host because the instance
-        # and the migration each hold an allocation for the new and old
-        # flavors respectively.
-        self.assertFlavorsMatchAllocation(old_flavor, new_flavor, usages)
-
-        # The instance should hold a new_flavor allocation
-        allocations = self._get_allocations_by_server_uuid(server['id'])
-        self.assertEqual(1, len(allocations))
-        allocation = allocations[rp_uuid]['resources']
-        self.assertFlavorMatchesAllocation(new_flavor, allocation)
-
-        # The migration should hold an old_flavor allocation
-        allocations = self._get_allocations_by_server_uuid(migration_uuid)
-        self.assertEqual(1, len(allocations))
-        allocation = allocations[rp_uuid]['resources']
-        self.assertFlavorMatchesAllocation(old_flavor, allocation)
-
    def test_resize_revert_same_host(self):
        # make sure that the test only uses a single host
        compute2_service_id = self.admin_api.get_services(
@ -4548,6 +4448,157 @@ class ConsumerGenerationConflictTest(
        self.compute1 = self._start_compute('compute1')
        self.compute2 = self._start_compute('compute2')

+    def test_migrate_move_allocation_fails_due_to_conflict(self):
+        source_hostname = self.compute1.host
+        source_rp_uuid = self._get_provider_uuid_by_host(source_hostname)
+
+        server = self._boot_and_check_allocations(self.flavor, source_hostname)
+
+        rsp = fake_requests.FakeResponse(
+            409,
+            jsonutils.dumps(
+                {'errors': [
+                    {'code': 'placement.concurrent_update',
+                     'detail': 'consumer generation conflict'}]}))
+
+        with mock.patch('keystoneauth1.adapter.Adapter.post',
+                        autospec=True) as mock_post:
+            mock_post.return_value = rsp
+
+            request = {'migrate': None}
+            exception = self.assertRaises(client.OpenStackApiException,
+                                          self.api.post_server_action,
+                                          server['id'], request)
+
+        self.assertEqual(1, mock_post.call_count)
+
+        self.assertEqual(409, exception.response.status_code)
+        self.assertIn('Failed to move allocations', exception.response.text)
+
+        migrations = self.api.get_migrations()
+        self.assertEqual(1, len(migrations))
+        self.assertEqual('migration', migrations[0]['migration_type'])
+        self.assertEqual(server['id'], migrations[0]['instance_uuid'])
+        self.assertEqual(source_hostname, migrations[0]['source_compute'])
+        self.assertEqual('error', migrations[0]['status'])
+
+        # The migration is aborted so the instance is ACTIVE on the source
+        # host instead of being in VERIFY_RESIZE state.
+        server = self.api.get_server(server['id'])
+        self.assertEqual('ACTIVE', server['status'])
+        self.assertEqual(source_hostname, server['OS-EXT-SRV-ATTR:host'])
+
+        source_usages = self._get_provider_usages(source_rp_uuid)
+        self.assertFlavorMatchesAllocation(self.flavor, source_usages)
+
+        allocations = self._get_allocations_by_server_uuid(server['id'])
+        self.assertEqual(1, len(allocations))
+        alloc = allocations[source_rp_uuid]['resources']
+        self.assertFlavorMatchesAllocation(self.flavor, alloc)
+
+        self._delete_and_check_allocations(server)
+
+    def test_revert_migrate_delete_dest_allocation_fails_due_to_conflict(self):
+        source_hostname = self.compute1.host
+        dest_hostname = self.compute2.host
+        source_rp_uuid = self._get_provider_uuid_by_host(source_hostname)
+        dest_rp_uuid = self._get_provider_uuid_by_host(dest_hostname)
+
+        server = self._boot_and_check_allocations(self.flavor, source_hostname)
+        self._migrate_and_check_allocations(
+            server, self.flavor, source_rp_uuid, dest_rp_uuid)
+
+        rsp = fake_requests.FakeResponse(
+            409,
+            jsonutils.dumps(
+                {'errors': [
+                    {'code': 'placement.concurrent_update',
+                     'detail': 'consumer generation conflict'}]}))
+
+        with mock.patch('keystoneauth1.adapter.Adapter.post',
+                        autospec=True) as mock_post:
+            mock_post.return_value = rsp
+
+            post = {'revertResize': None}
+            self.api.post_server_action(server['id'], post)
+            server = self._wait_for_state_change(self.api, server, 'ERROR',)
+
+        self.assertEqual(1, mock_post.call_count)
+
+        migrations = self.api.get_migrations()
+        self.assertEqual(1, len(migrations))
+        self.assertEqual('migration', migrations[0]['migration_type'])
+        self.assertEqual(server['id'], migrations[0]['instance_uuid'])
+        self.assertEqual(source_hostname, migrations[0]['source_compute'])
+        self.assertEqual('error', migrations[0]['status'])
+
+        # NOTE(gibi): Nova leaks the allocation held by the migration_uuid even
+        # after the instance is deleted. At least nova logs a fat ERROR.
+        self.assertIn('Reverting allocation in placement for migration %s '
+                      'failed. The instance %s will be put into ERROR state '
+                      'but the allocation held by the migration is leaked.' %
+                      (migrations[0]['uuid'], server['id']),
+                      self.stdlog.logger.output)
+        self.api.delete_server(server['id'])
+        self._wait_until_deleted(server)
+        fake_notifier.wait_for_versioned_notifications('instance.delete.end')
+
+        allocations = self._get_allocations_by_server_uuid(
+            migrations[0]['uuid'])
+        self.assertEqual(1, len(allocations))
+
+    def test_revert_resize_same_host_delete_dest_fails_due_to_conflict(self):
+        # make sure that the test only uses a single host
+        compute2_service_id = self.admin_api.get_services(
+            host=self.compute2.host, binary='nova-compute')[0]['id']
+        self.admin_api.put_service(compute2_service_id, {'status': 'disabled'})
+
+        hostname = self.compute1.manager.host
+        rp_uuid = self._get_provider_uuid_by_host(hostname)
+
+        server = self._boot_and_check_allocations(self.flavor, hostname)
+
+        self._resize_to_same_host_and_check_allocations(
+            server, self.flavor, self.other_flavor, rp_uuid)
+
+        rsp = fake_requests.FakeResponse(
+            409,
+            jsonutils.dumps(
+                {'errors': [
+                    {'code': 'placement.concurrent_update',
+                     'detail': 'consumer generation conflict'}]}))
+        with mock.patch('keystoneauth1.adapter.Adapter.post',
+                        autospec=True) as mock_post:
+            mock_post.return_value = rsp
+
+            post = {'revertResize': None}
+            self.api.post_server_action(server['id'], post)
+            server = self._wait_for_state_change(self.api, server, 'ERROR',)
+
+        self.assertEqual(1, mock_post.call_count)
+
+        migrations = self.api.get_migrations()
+        self.assertEqual(1, len(migrations))
+        self.assertEqual('resize', migrations[0]['migration_type'])
+        self.assertEqual(server['id'], migrations[0]['instance_uuid'])
+        self.assertEqual(hostname, migrations[0]['source_compute'])
+        self.assertEqual('error', migrations[0]['status'])
+
+        # NOTE(gibi): Nova leaks the allocation held by the migration_uuid even
+        # after the instance is deleted. At least nova logs a fat ERROR.
+        self.assertIn('Reverting allocation in placement for migration %s '
+                      'failed. The instance %s will be put into ERROR state '
+                      'but the allocation held by the migration is leaked.' %
+                      (migrations[0]['uuid'], server['id']),
+                      self.stdlog.logger.output)
+        self.api.delete_server(server['id'])
+        self._wait_until_deleted(server)
+        fake_notifier.wait_for_versioned_notifications('instance.delete.end')
+
+        allocations = self._get_allocations_by_server_uuid(
+            migrations[0]['uuid'])
+        self.assertEqual(1, len(allocations))
+
    def test_server_delete_fails_due_to_conflict(self):
        source_hostname = self.compute1.host

--- a/nova/tests/unit/compute/test_compute.py
+++ b/nova/tests/unit/compute/test_compute.py
@ -6210,11 +6210,8 @@ class ComputeTestCase(BaseTestCase,
                              instance=instance, migration=migration,
                              migrate_data=migrate_data)
            mock_setup.assert_called_once_with(c, instance, self.compute.host)
-            mock_client.set_and_clear_allocations.assert_called_once_with(
-                c, mock.sentinel.source, instance.uuid,
-                mock.sentinel.allocs,
-                instance.project_id, instance.user_id,
-                consumer_to_clear=migration.uuid)
+            mock_client.move_allocations.assert_called_once_with(
+                c, migration.uuid, instance.uuid)

        do_it()

--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@ -7151,10 +7151,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
                                                self.instance, self.migration)

            self.assertTrue(r)
-            mock_report.set_and_clear_allocations.assert_called_once_with(
-                mock.sentinel.ctx, cu, self.instance.uuid, {'DISK_GB': 1},
-                self.instance.project_id, self.instance.user_id,
-                consumer_to_clear=self.migration.uuid)
+            mock_report.move_allocations.assert_called_once_with(
+                mock.sentinel.ctx, self.migration.uuid, self.instance.uuid)

        doit()

@ -7170,7 +7168,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
                                                self.instance, self.migration)

            self.assertFalse(r)
-            self.assertFalse(mock_report.set_and_clear_allocations.called)
+            self.assertFalse(mock_report.move_allocations.called)

        doit()

@ -7195,7 +7193,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
                                                self.instance, self.migration)

            self.assertTrue(r)
-            self.assertTrue(mock_report.set_and_clear_allocations.called)
+            self.assertTrue(mock_report.move_allocations.called)

        doit()

--- a/nova/tests/unit/conductor/tasks/test_migrate.py
+++ b/nova/tests/unit/conductor/tasks/test_migrate.py
@ -227,8 +227,7 @@ class MigrationTaskTestCase(test.NoDBTestCase):
        task._migration.save.assert_called_once_with()
        self.assertEqual('error', task._migration.status)
        mock_ra.assert_called_once_with(task.context, task._source_cn,
-                                        task.instance, task._migration,
-                                        task._held_allocations)
+                                        task.instance, task._migration)


 class MigrationTaskAllocationUtils(test.NoDBTestCase):
--- a/nova/tests/unit/scheduler/client/test_report.py
+++ b/nova/tests/unit/scheduler/client/test_report.py
@ -1112,10 +1112,10 @@ class TestPutAllocations(SchedulerReportClientTestCase):
        self.assertFalse(res)


-class TestSetAndClearAllocations(SchedulerReportClientTestCase):
+class TestMoveAllocations(SchedulerReportClientTestCase):

    def setUp(self):
-        super(TestSetAndClearAllocations, self).setUp()
+        super(TestMoveAllocations, self).setUp()
        # We want to reuse the mock throughout the class, but with
        # different return values.
        patcher = mock.patch(
@ -1126,86 +1126,153 @@ class TestSetAndClearAllocations(SchedulerReportClientTestCase):
        self.rp_uuid = mock.sentinel.rp
        self.consumer_uuid = mock.sentinel.consumer
        self.data = {"MEMORY_MB": 1024}
+        patcher = mock.patch(
+            'nova.scheduler.client.report.SchedulerReportClient.get')
+        self.mock_get = patcher.start()
+        self.addCleanup(patcher.stop)
+
        self.project_id = mock.sentinel.project_id
        self.user_id = mock.sentinel.user_id
+
+        self.mock_post.return_value.status_code = 204
+        self.rp_uuid = mock.sentinel.rp
+        self.source_consumer_uuid = mock.sentinel.source_consumer
+        self.target_consumer_uuid = mock.sentinel.target_consumer
+        self.source_consumer_data = {
+            "allocations": {
+                self.rp_uuid: {
+                    "generation": 1,
+                    "resources": {
+                        "MEMORY_MB": 1024
+                    }
+                }
+            },
+            "consumer_generation": 2,
+            "project_id": self.project_id,
+            "user_id": self.user_id
+        }
+        self.source_rsp = mock.Mock()
+        self.source_rsp.json.return_value = self.source_consumer_data
+        self.target_consumer_data = {
+            "allocations": {
+                self.rp_uuid: {
+                    "generation": 1,
+                    "resources": {
+                        "MEMORY_MB": 2048
+                    }
+                }
+            },
+            "consumer_generation": 1,
+            "project_id": self.project_id,
+            "user_id": self.user_id
+        }
+        self.target_rsp = mock.Mock()
+        self.target_rsp.json.return_value = self.target_consumer_data
+        self.mock_get.side_effect = [self.source_rsp, self.target_rsp]
        self.expected_url = '/allocations'
+        self.expected_microversion = '1.28'

    def test_url_microversion(self):
-        expected_microversion = '1.13'
-
-        resp = self.client.set_and_clear_allocations(
-            self.context, self.rp_uuid, self.consumer_uuid, self.data,
-            self.project_id, self.user_id)
+        resp = self.client.move_allocations(
+            self.context, self.source_consumer_uuid, self.target_consumer_uuid)

        self.assertTrue(resp)
        self.mock_post.assert_called_once_with(
            self.expected_url, mock.ANY,
-            version=expected_microversion,
+            version=self.expected_microversion,
            global_request_id=self.context.global_id)

-    def test_payload_no_clear(self):
+    def test_move_to_empty_target(self):
+        self.target_consumer_data = {"allocations": {}}
+        target_rsp = mock.Mock()
+        target_rsp.json.return_value = self.target_consumer_data
+        self.mock_get.side_effect = [self.source_rsp, target_rsp]
+
        expected_payload = {
-            self.consumer_uuid: {
-                'user_id': self.user_id,
-                'project_id': self.project_id,
-                'allocations': {
+            self.target_consumer_uuid: {
+                "allocations": {
                    self.rp_uuid: {
-                        'resources': {
-                            'MEMORY_MB': 1024
-                        }
+                        "resources": {
+                            "MEMORY_MB": 1024
+                        },
+                        "generation": 1
                    }
-                }
-            }
-        }
-
-        resp = self.client.set_and_clear_allocations(
-            self.context, self.rp_uuid, self.consumer_uuid, self.data,
-            self.project_id, self.user_id)
-
-        self.assertTrue(resp)
-        args, kwargs = self.mock_post.call_args
-        payload = args[1]
-        self.assertEqual(expected_payload, payload)
-
-    def test_payload_with_clear(self):
-        expected_payload = {
-            self.consumer_uuid: {
-                'user_id': self.user_id,
-                'project_id': self.project_id,
-                'allocations': {
-                    self.rp_uuid: {
-                        'resources': {
-                            'MEMORY_MB': 1024
-                        }
-                    }
-                }
+                },
+                "consumer_generation": None,
+                "project_id": self.project_id,
+                "user_id": self.user_id,
            },
-            mock.sentinel.migration_uuid: {
-                'user_id': self.user_id,
-                'project_id': self.project_id,
-                'allocations': {}
+            self.source_consumer_uuid: {
+                "allocations": {},
+                "consumer_generation": 2,
+                "project_id": self.project_id,
+                "user_id": self.user_id,
            }
        }

-        resp = self.client.set_and_clear_allocations(
-            self.context, self.rp_uuid, self.consumer_uuid, self.data,
-            self.project_id, self.user_id,
-            consumer_to_clear=mock.sentinel.migration_uuid)
+        resp = self.client.move_allocations(
+            self.context, self.source_consumer_uuid, self.target_consumer_uuid)

        self.assertTrue(resp)
-        args, kwargs = self.mock_post.call_args
-        payload = args[1]
-        self.assertEqual(expected_payload, payload)
+        self.mock_post.assert_called_once_with(
+            self.expected_url, expected_payload,
+            version=self.expected_microversion,
+            global_request_id=self.context.global_id)
+
+    def test_move_to_non_empty_target(self):
+        self.mock_get.side_effect = [self.source_rsp, self.target_rsp]
+
+        expected_payload = {
+            self.target_consumer_uuid: {
+                "allocations": {
+                    self.rp_uuid: {
+                        "resources": {
+                            "MEMORY_MB": 1024
+                        },
+                        "generation": 1
+                    }
+                },
+                "consumer_generation": 1,
+                "project_id": self.project_id,
+                "user_id": self.user_id,
+            },
+            self.source_consumer_uuid: {
+                "allocations": {},
+                "consumer_generation": 2,
+                "project_id": self.project_id,
+                "user_id": self.user_id,
+            }
+        }
+
+        resp = self.client.move_allocations(
+            self.context, self.source_consumer_uuid, self.target_consumer_uuid)
+
+        self.assertTrue(resp)
+        self.mock_post.assert_called_once_with(
+            self.expected_url, expected_payload,
+            version=self.expected_microversion,
+            global_request_id=self.context.global_id)
+        self.assertIn('Overwriting current allocation',
+                      self.stdlog.logger.output)

    @mock.patch('time.sleep')
-    def test_409_concurrent_update(self, mock_sleep):
-        self.mock_post.return_value.status_code = 409
-        self.mock_post.return_value.text = 'concurrently updated'
+    def test_409_concurrent_provider_update(self, mock_sleep):
+        # there will be 1 normal call and 3 retries
+        self.mock_get.side_effect = [self.source_rsp, self.target_rsp,
+                                     self.source_rsp, self.target_rsp,
+                                     self.source_rsp, self.target_rsp,
+                                     self.source_rsp, self.target_rsp]
+        rsp = fake_requests.FakeResponse(
+            409,
+            jsonutils.dumps(
+                {'errors': [
+                    {'code': 'placement.concurrent_update',
+                     'detail': ''}]}))

-        resp = self.client.set_and_clear_allocations(
-            self.context, self.rp_uuid, self.consumer_uuid, self.data,
-            self.project_id, self.user_id,
-            consumer_to_clear=mock.sentinel.migration_uuid)
+        self.mock_post.return_value = rsp
+
+        resp = self.client.move_allocations(
+            self.context, self.source_consumer_uuid, self.target_consumer_uuid)

        self.assertFalse(resp)
        # Post was attempted four times.
@ -1217,10 +1284,8 @@ class TestSetAndClearAllocations(SchedulerReportClientTestCase):
        self.mock_post.return_value.status_code = 503
        self.mock_post.return_value.text = error_message

-        resp = self.client.set_and_clear_allocations(
-            self.context, self.rp_uuid, self.consumer_uuid, self.data,
-            self.project_id, self.user_id,
-            consumer_to_clear=mock.sentinel.migration_uuid)
+        resp = self.client.move_allocations(
+            self.context, self.source_consumer_uuid, self.target_consumer_uuid)

        self.assertFalse(resp)
        args, kwargs = mock_log.call_args
@ -1229,6 +1294,17 @@ class TestSetAndClearAllocations(SchedulerReportClientTestCase):
        self.assertIn('Unable to post allocations', log_message)
        self.assertEqual(error_message, log_args['text'])

+    def test_409_concurrent_consumer_update(self):
+        self.mock_post.return_value = fake_requests.FakeResponse(
+            status_code=409,
+            content=jsonutils.dumps(
+                {'errors': [{'code': 'placement.concurrent_update',
+                             'detail': 'consumer generation conflict'}]}))
+
+        self.assertRaises(exception.AllocationMoveFailed,
+            self.client.move_allocations, self.context,
+            self.source_consumer_uuid, self.target_consumer_uuid)
+

 class TestProviderOperations(SchedulerReportClientTestCase):
    @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
--- a/releasenotes/notes/leaking_migration_allocations_in_placement-bd0a6f2a30e2e3d2.yaml
+++ b/releasenotes/notes/leaking_migration_allocations_in_placement-bd0a6f2a30e2e3d2.yaml
@ -0,0 +1,10 @@
+---
+issues:
+  - |
+    Nova leaks resource allocations in placement during
+    ``POST /servers/{server_id}/action (revertResize Action)`` if the
+    allocation held by the migration_uuid is modified in parallel with the
+    revert operation. Nova will log an ERROR and will put the server into ERROR
+    state but will not delete the migration allocation. We assume that this can
+    only happen if somebody outside of nova is actively changing the migration
+    allocation in placement. Therefore it is not considered as a bug.