Heal allocations with incomplete consumer information

Allocations created before microversion 1.8 didn't have project_id
/ user_id consumer information. In Rocky those will be migrated
to have consumer records, but using configurable sentinel values.

As part of heal_allocations, we can detect this and heal the
allocations using the instance.project_id/user_id information.

This is something we'd need if we ever use Placement allocation
information counting quotas.

Note that we should be using Placement API version 1.28 with
consumer_generation when updating the allocations, but since
people might backport this change the usage of consumer
generations is left for a follow up patch.

Related to blueprint add-consumer-generation

Change-Id: Idba40838b7b1d5389ab308f2ea40e28911aecffa
This commit is contained in:
Matt Riedemann 2018-06-11 19:46:16 -04:00
parent 4f9a7da581
commit 6b6d81cf2b
7 changed files with 248 additions and 21 deletions

View File

@ -296,6 +296,11 @@ Placement
the compute node resource provider for that instance based on the flavor
associated with the instance.
There is also a special case handled for instances that *do* have
allocations created before Placement API microversion 1.8 where project_id
and user_id values were required. For those types of allocations, the
project_id and user_id are updated using the values from the instance.
Specify ``--max-count`` to control the maximum number of instances to
process. If not specified, all instances in each cell will be mapped in
batches of 50. If you have a large number of instances, consider
@ -311,7 +316,7 @@ Placement
* 0: Command completed successfully and allocations were created.
* 1: --max-count was reached and there are more instances to process.
* 2: Unable to find a compute node record for a given instance.
* 3: Unable to create allocations for an instance against its
* 3: Unable to create (or update) allocations for an instance against its
compute node resource provider.
* 4: Command completed successfully but no allocations were created.
* 127: Invalid input.

View File

@ -1778,6 +1778,8 @@ class PlacementCommands(object):
given instance cannot be found
:raises: AllocationCreateFailed if unable to create allocations for
a given instance against a given compute node resource provider
:raises: AllocationUpdateFailed if unable to update allocations for
a given instance with consumer project/user information
"""
# Keep a cache of instance.node to compute node resource provider UUID.
# This will save some queries for non-ironic instances to the
@ -1817,15 +1819,45 @@ class PlacementCommands(object):
continue
allocations = placement.get_allocations_for_consumer(
ctxt, instance.uuid)
if allocations:
output(_('Instance %s already has allocations.') %
instance.uuid)
# TODO(mriedem): Check to see if the allocation project_id
ctxt, instance.uuid, include_project_user=True)
# get_allocations_for_consumer uses safe_connect which will
# return None if we can't communicate with Placement, and the
# response can have an empty {'allocations': {}} response if
# there are no allocations for the instance so handle both
if allocations and allocations.get('allocations'):
# Check to see if the allocation project_id
# and user_id matches the instance project and user and
# fix the allocation project/user if they don't match; see
# blueprint add-consumer-generation for details.
continue
# fix the allocation project/user if they don't match.
# Allocations created before Placement API version 1.8
# did not have a project_id/user_id, and migrated records
# could have sentinel values from config.
if (allocations.get('project_id') ==
instance.project_id and
allocations.get('user_id') == instance.user_id):
output(_('Instance %s already has allocations with '
'matching consumer project/user.') %
instance.uuid)
continue
# We have an instance with allocations but not the correct
# project_id/user_id, so we want to update the allocations
# and re-put them. We don't use put_allocations here
# because we don't want to mess up shared or nested
# provider allocations.
allocations['project_id'] = instance.project_id
allocations['user_id'] = instance.user_id
# We use 1.12 for PUT /allocations/{consumer_id} to mirror
# the body structure from get_allocations_for_consumer.
# TODO(mriedem): Pass a consumer generation using 1.28.
resp = placement.put('/allocations/%s' % instance.uuid,
allocations, version='1.12')
if resp:
num_processed += 1
output(_('Successfully updated allocations for '
'instance %s.') % instance.uuid)
continue
else:
raise exception.AllocationUpdateFailed(
instance=instance.uuid, error=resp.text)
# This instance doesn't have allocations so we need to find
# its compute node resource provider.
@ -1866,12 +1898,14 @@ class PlacementCommands(object):
@action_description(
_("Iterates over non-cell0 cells looking for instances which do "
"not have allocations in the Placement service and which are not "
"undergoing a task state transition. For each instance found, "
"allocations are created against the compute node resource provider "
"for that instance based on the flavor associated with the "
"instance. This command requires that the [api_database]/connection "
"and [placement] configuration options are set."))
"not have allocations in the Placement service, or have incomplete "
"consumer project_id/user_id values in existing allocations, and "
"which are not undergoing a task state transition. For each "
"instance found, allocations are created (or updated) against the "
"compute node resource provider for that instance based on the "
"flavor associated with the instance. This command requires that "
"the [api_database]/connection and [placement] configuration "
"options are set."))
@args('--max-count', metavar='<max_count>', dest='max_count',
help='Maximum number of instances to process. If not specified, all '
'instances in each cell will be mapped in batches of 50. '
@ -1888,8 +1922,8 @@ class PlacementCommands(object):
* 0: Command completed successfully and allocations were created.
* 1: --max-count was reached and there are more instances to process.
* 2: Unable to find a compute node record for a given instance.
* 3: Unable to create allocations for an instance against its
compute node resource provider.
* 3: Unable to create (or update) allocations for an instance against
its compute node resource provider.
* 4: Command completed successfully but no allocations were created.
* 127: Invalid input.
"""
@ -1961,7 +1995,8 @@ class PlacementCommands(object):
except exception.ComputeHostNotFound as e:
print(e.format_message())
return 2
except exception.AllocationCreateFailed as e:
except (exception.AllocationCreateFailed,
exception.AllocationUpdateFailed) as e:
print(e.format_message())
return 3

View File

@ -2286,6 +2286,11 @@ class AllocationCreateFailed(NovaException):
'against resource provider %(provider)s.')
class AllocationUpdateFailed(NovaException):
msg_fmt = _('Failed to update allocations for instance %(instance)s. '
'Error: %(error)s')
class CertificateValidationFailed(NovaException):
msg_fmt = _("Image signature certificate validation failed for "
"certificate: %(cert_uuid)s. %(reason)s")

View File

@ -49,6 +49,7 @@ GRANULAR_AC_VERSION = '1.25'
POST_RPS_RETURNS_PAYLOAD_API_VERSION = '1.20'
NESTED_PROVIDER_API_VERSION = '1.14'
POST_ALLOCATIONS_API_VERSION = '1.13'
ALLOCATION_PROJECT_USER = '1.12'
def warn_limit(self, msg):
@ -1460,12 +1461,27 @@ class SchedulerReportClient(object):
raise exception.ResourceProviderSyncFailed()
@safe_connect
def get_allocations_for_consumer(self, context, consumer):
def get_allocations_for_consumer(self, context, consumer,
include_project_user=False):
"""Makes a GET /allocations/{consumer} call to Placement.
:param context: The nova.context.RequestContext auth context
:param consumer: UUID of the consumer resource
:param include_project_user: True if the response should be the
full allocations response including project_id and user_id (new
in microversion 1.12), False if only the "allocations" dict from
the response body should be returned.
:returns: dict, see ``include_project_user`` for details on format;
returns None if unable to connect to Placement (see safe_connect)
"""
url = '/allocations/%s' % consumer
resp = self.get(url, global_request_id=context.global_id)
resp = self.get(url, version=ALLOCATION_PROJECT_USER,
global_request_id=context.global_id)
if not resp:
return {}
else:
if include_project_user:
return resp.json()
return resp.json()['allocations']
def get_allocations_for_consumer_by_provider(self, context, rp_uuid,

View File

@ -14,11 +14,14 @@ import fixtures
from six.moves import StringIO
from nova.cmd import manage
from nova import config
from nova import context
from nova import objects
from nova import test
from nova.tests.functional import integrated_helpers
CONF = config.CONF
class NovaManageDBIronicTest(test.TestCase):
def setUp(self):
@ -585,3 +588,57 @@ class TestNovaManagePlacementHealAllocations(
result = self.cli.heal_allocations(verbose=True)
self.assertEqual(0, result, self.output.getvalue())
self.assertIn('Processed 1 instances.', self.output.getvalue())
def test_heal_allocations_update_sentinel_consumer(self):
"""Tests the scenario that allocations were created before microversion
1.8 when consumer (project_id and user_id) were not required so the
consumer information is using sentinel values from config.
Since the CachingScheduler used in this test class won't actually
create allocations during scheduling, we have to create the allocations
out-of-band and then run our heal routine to see they get updated with
the instance project and user information.
"""
server, rp_uuid = self._boot_and_assert_no_allocations(
self.flavor, 'cell1')
# Now we'll create allocations using microversion < 1.8 to so that
# placement creates the consumer record with the config-based project
# and user values.
alloc_body = {
"allocations": [
{
"resource_provider": {
"uuid": rp_uuid
},
"resources": {
"MEMORY_MB": self.flavor['ram'],
"VCPU": self.flavor['vcpus'],
"DISK_GB": self.flavor['disk']
}
}
]
}
self.placement_api.put('/allocations/%s' % server['id'], alloc_body)
# Make sure we did that correctly. Use version 1.12 so we can assert
# the project_id and user_id are based on the sentinel values.
allocations = self.placement_api.get(
'/allocations/%s' % server['id'], version='1.12').body
self.assertEqual(CONF.placement.incomplete_consumer_project_id,
allocations['project_id'])
self.assertEqual(CONF.placement.incomplete_consumer_user_id,
allocations['user_id'])
allocations = allocations['allocations']
self.assertIn(rp_uuid, allocations)
self.assertFlavorMatchesAllocation(
self.flavor, allocations[rp_uuid]['resources'])
# Now run heal_allocations which should update the consumer info.
result = self.cli.heal_allocations(verbose=True)
self.assertEqual(0, result, self.output.getvalue())
output = self.output.getvalue()
self.assertIn('Successfully updated allocations for instance', output)
self.assertIn('Processed 1 instances.', output)
# Now assert that the consumer was actually updated.
allocations = self.placement_api.get(
'/allocations/%s' % server['id'], version='1.12').body
self.assertEqual(server['tenant_id'], allocations['project_id'])
self.assertEqual(server['user_id'], allocations['user_id'])

View File

@ -3202,7 +3202,7 @@ class TestAllocations(SchedulerReportClientTestCase):
self.client.update_instance_allocation(self.context, cn, inst, 1)
self.assertFalse(mock_put.called)
mock_get.assert_called_once_with(
'/allocations/%s' % inst.uuid,
'/allocations/%s' % inst.uuid, version='1.12',
global_request_id=self.context.global_id)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'

View File

@ -35,6 +35,7 @@ from nova import objects
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.unit.db import fakes as db_fakes
from nova.tests.unit import fake_requests
from nova.tests.unit.objects import test_network
from nova.tests import uuidsentinel
@ -2488,6 +2489,114 @@ class TestNovaManagePlacement(test.NoDBTestCase):
uuidsentinel.instance, mock.sentinel.resources, 'fake-project',
'fake-user')
@mock.patch('nova.objects.CellMappingList.get_all',
return_value=objects.CellMappingList(objects=[
objects.CellMapping(name='cell1',
uuid=uuidsentinel.cell1)]))
@mock.patch('nova.objects.InstanceList.get_by_filters',
# Called twice, first returns 1 instance, second returns []
side_effect=(
objects.InstanceList(objects=[
objects.Instance(
uuid=uuidsentinel.instance, host='fake',
node='fake', task_state=None,
project_id='fake-project', user_id='fake-user')]),
objects.InstanceList()))
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocations_for_consumer')
@mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename',
new_callable=mock.NonCallableMock) # assert not called
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.put',
return_value=fake_requests.FakeResponse(204))
def test_heal_allocations_sentinel_consumer(
self, mock_put, mock_get_compute_node, mock_get_allocs,
mock_get_instances, mock_get_all_cells):
"""Tests the scenario that there are allocations created using
placement API microversion < 1.8 where project/user weren't provided.
The allocations will be re-put with the instance project_id/user_id
values. Note that GET /allocations/{consumer_id} since commit f44965010
will create the missing consumer record using the config option
sentinels for project and user, so we won't get null back for the
consumer project/user.
"""
mock_get_allocs.return_value = {
"allocations": {
"92637880-2d79-43c6-afab-d860886c6391": {
"generation": 2,
"resources": {
"DISK_GB": 50,
"MEMORY_MB": 512,
"VCPU": 2
}
}
},
"project_id": CONF.placement.incomplete_consumer_project_id,
"user_id": CONF.placement.incomplete_consumer_user_id
}
self.assertEqual(0, self.cli.heal_allocations(verbose=True))
self.assertIn('Processed 1 instances.', self.output.getvalue())
mock_get_allocs.assert_called_once_with(
test.MatchType(context.RequestContext), uuidsentinel.instance,
include_project_user=True)
expected_put_data = mock_get_allocs.return_value
expected_put_data['project_id'] = 'fake-project'
expected_put_data['user_id'] = 'fake-user'
mock_put.assert_called_once_with(
'/allocations/%s' % uuidsentinel.instance, expected_put_data,
version='1.12')
@mock.patch('nova.objects.CellMappingList.get_all',
return_value=objects.CellMappingList(objects=[
objects.CellMapping(name='cell1',
uuid=uuidsentinel.cell1)]))
@mock.patch('nova.objects.InstanceList.get_by_filters',
return_value=objects.InstanceList(objects=[
objects.Instance(
uuid=uuidsentinel.instance, host='fake', node='fake',
task_state=None, project_id='fake-project',
user_id='fake-user')]))
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocations_for_consumer')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.put',
return_value=fake_requests.FakeResponse(
409, content='Inventory and/or allocations changed while '
'attempting to allocate'))
def test_heal_allocations_sentinel_consumer_put_fails(
self, mock_put, mock_get_allocs, mock_get_instances,
mock_get_all_cells):
"""Tests the scenario that there are allocations created using
placement API microversion < 1.8 where project/user weren't provided
and there was no consumer. The allocations will be re-put with the
instance project_id/user_id values but that fails with a 409 so a
return code of 3 is expected from the command.
"""
mock_get_allocs.return_value = {
"allocations": {
"92637880-2d79-43c6-afab-d860886c6391": {
"generation": 2,
"resources": {
"DISK_GB": 50,
"MEMORY_MB": 512,
"VCPU": 2
}
}
},
"project_id": CONF.placement.incomplete_consumer_project_id,
"user_id": CONF.placement.incomplete_consumer_user_id
}
self.assertEqual(3, self.cli.heal_allocations(verbose=True))
self.assertIn(
'Inventory and/or allocations changed', self.output.getvalue())
mock_get_allocs.assert_called_once_with(
test.MatchType(context.RequestContext), uuidsentinel.instance,
include_project_user=True)
expected_put_data = mock_get_allocs.return_value
expected_put_data['project_id'] = 'fake-project'
expected_put_data['user_id'] = 'fake-user'
mock_put.assert_called_once_with(
'/allocations/%s' % uuidsentinel.instance, expected_put_data,
version='1.12')
class TestNovaManageMain(test.NoDBTestCase):
"""Tests the nova-manage:main() setup code."""