Merge "Add a placement audit command"

This commit is contained in:
Zuul 2020-03-30 12:01:33 +00:00 committed by Gerrit Code Review
commit fe66c28dac
5 changed files with 706 additions and 0 deletions

View File

@ -686,6 +686,42 @@ Placement
- An unexpected error occurred.
``nova-manage placement audit [--verbose] [--delete] [--resource_provider <uuid>]``
Iterates over all the Resource Providers (or just one if you provide the
UUID) and then verifies if the compute allocations are either related to
an existing instance or a migration UUID.
If not, it will tell which allocations are orphaned.
You can also ask to delete all the orphaned allocations by specifying
``-delete``.
Specify ``--verbose`` to get detailed progress output during execution.
This command requires that the
:oslo.config:option:`api_database.connection` and
:oslo.config:group:`placement` configuration options are set. Placement API
>= 1.14 is required.
**Return Codes**
.. list-table::
:widths: 20 80
:header-rows: 1
* - Return code
- Description
* - 0
- No orphaned allocations were found
* - 1
- An unexpected error occurred
* - 3
- Orphaned allocations were found
* - 4
- All found orphaned allocations were deleted
* - 127
- Invalid input
See Also
========

View File

@ -32,6 +32,7 @@ import traceback
from dateutil import parser as dateutil_parser
from keystoneauth1 import exceptions as ks_exc
from neutronclient.common import exceptions as neutron_client_exc
import os_resource_classes as orc
from oslo_config import cfg
from oslo_db import exception as db_exc
from oslo_log import log as logging
@ -2416,6 +2417,300 @@ class PlacementCommands(object):
return return_code
def _get_instances_and_current_migrations(self, ctxt, cn_uuid):
if self.cn_uuid_mapping.get(cn_uuid):
cell_uuid, cn_host, cn_node = self.cn_uuid_mapping[cn_uuid]
else:
# We need to find the compute node record from all cells.
results = context.scatter_gather_skip_cell0(
ctxt, objects.ComputeNode.get_by_uuid, cn_uuid)
for result_cell_uuid, result in results.items():
if not context.is_cell_failure_sentinel(result):
cn = result
cell_uuid = result_cell_uuid
break
else:
return False
cn_host, cn_node = (cn.host, cn.hypervisor_hostname)
self.cn_uuid_mapping[cn_uuid] = (cell_uuid, cn_host, cn_node)
cell_mapping = objects.CellMapping.get_by_uuid(ctxt, cell_uuid)
# Get all the active instances from this compute node
if self.instances_mapping.get(cn_uuid):
inst_uuids = self.instances_mapping[cn_uuid]
else:
# Get the instance list record from the cell.
with context.target_cell(ctxt, cell_mapping) as cctxt:
instances = objects.InstanceList.get_by_host_and_node(
cctxt, cn_host, cn_node, expected_attrs=[])
inst_uuids = [instance.uuid for instance in instances]
self.instances_mapping[cn_uuid] = inst_uuids
# Get all *active* migrations for this compute node
# NOTE(sbauza): Since migrations are transient, it's better to not
# cache the results as they could be stale
with context.target_cell(ctxt, cell_mapping) as cctxt:
migs = objects.MigrationList.get_in_progress_by_host_and_node(
cctxt, cn_host, cn_node)
mig_uuids = [migration.uuid for migration in migs]
return (inst_uuids, mig_uuids)
def _delete_allocations_from_consumer(self, ctxt, placement, provider,
consumer_uuid, consumer_type):
"""Deletes allocations from a resource provider with consumer UUID.
:param ctxt: nova.context.RequestContext
:param placement: nova.scheduler.client.report.SchedulerReportClient
to communicate with the Placement service API.
:param provider: Resource Provider to look at.
:param consumer_uuid: the consumer UUID having allocations.
:param consumer_type: the type of consumer,
either 'instance' or 'migration'
:returns: bool whether the allocations were deleted.
"""
# We need to be careful and only remove the allocations
# against this specific RP or we would delete the
# whole instance usage and then it would require some
# healing.
# TODO(sbauza): Remove this extra check once placement
# supports querying allocation delete on both
# consumer and resource provider parameters.
allocations = placement.get_allocs_for_consumer(
ctxt, consumer_uuid)
if len(allocations['allocations']) > 1:
# This consumer has resources spreaded amongst
# multiple RPs (think nested or shared for example)
# We then need to just update the usage to remove
# the orphaned resources on the specific RP
del allocations['allocations'][provider['uuid']]
try:
placement.put_allocations(
ctxt, consumer_uuid, allocations)
except exception.AllocationUpdateFailed:
return False
else:
try:
placement.delete_allocation_for_instance(
ctxt, consumer_uuid, consumer_type)
except exception.AllocationDeleteFailed:
return False
return True
def _check_orphaned_allocations_for_provider(self, ctxt, placement,
output, provider,
delete):
"""Finds orphaned allocations for a specific resource provider.
:param ctxt: nova.context.RequestContext
:param placement: nova.scheduler.client.report.SchedulerReportClient
to communicate with the Placement service API.
:param output: function that takes a single message for verbose output
:param provider: Resource Provider to look at.
:param delete: deletes the found orphaned allocations.
:return: a tuple (<number of orphaned allocs>, <number of faults>)
"""
num_processed = 0
faults = 0
# TODO(sbauza): Are we sure we have all Nova RCs ?
# FIXME(sbauza): Possibly use consumer types once Placement API
# supports them.
# NOTE(sbauza): We check allocations having *any* below RC, not having
# *all* of them.
NOVA_RCS = [orc.VCPU, orc.MEMORY_MB, orc.DISK_GB, orc.VGPU,
orc.NET_BW_EGR_KILOBIT_PER_SEC,
orc.NET_BW_IGR_KILOBIT_PER_SEC,
orc.PCPU, orc.MEM_ENCRYPTION_CONTEXT]
# Since the RP can be a child RP, we need to get the root RP as it's
# the compute node UUID
# NOTE(sbauza): In case Placement doesn't support 1.14 microversion,
# that means we don't have nested RPs.
# Since we ask for microversion 1.14, all RPs have a root RP UUID.
cn_uuid = provider.get("root_provider_uuid")
# Now get all the existing instances and active migrations for this
# compute node
result = self._get_instances_and_current_migrations(ctxt, cn_uuid)
if result is False:
# We don't want to hard stop here because the compute service could
# have disappear while we could still have orphaned allocations.
output(_('The compute node for UUID %s can not be '
'found') % cn_uuid)
inst_uuids, mig_uuids = result or ([], [])
try:
pallocs = placement.get_allocations_for_resource_provider(
ctxt, provider['uuid'])
except exception.ResourceProviderAllocationRetrievalFailed:
print(_('Not able to find allocations for resource '
'provider %s.') % provider['uuid'])
raise
# Verify every allocations for each consumer UUID
for consumer_uuid, consumer_resources in six.iteritems(
pallocs.allocations):
consumer_allocs = consumer_resources['resources']
if any(rc in NOVA_RCS
for rc in consumer_allocs):
# We reset the consumer type for each allocation
consumer_type = None
# This is an allocation for Nova resources
# We need to guess whether the instance was deleted
# or if the instance is currently migrating
if not (consumer_uuid in inst_uuids or
consumer_uuid in mig_uuids):
# By default we suspect the orphaned allocation was for a
# migration...
consumer_type = 'migration'
if not(consumer_uuid in inst_uuids):
# ... but if we can't find it either for an instance,
# that means it was for this.
consumer_type = 'instance'
if consumer_type is not None:
output(_('Allocations were set against consumer UUID '
'%(consumer_uuid)s but no existing instances or '
'active migrations are related. ')
% {'consumer_uuid': consumer_uuid})
if delete:
deleted = self._delete_allocations_from_consumer(
ctxt, placement, provider, consumer_uuid,
consumer_type)
if not deleted:
print(_('Not able to delete allocations '
'for consumer UUID %s')
% consumer_uuid)
faults += 1
continue
output(_('Deleted allocations for consumer UUID '
'%(consumer_uuid)s on Resource Provider '
'%(rp)s: %(allocations)s')
% {'consumer_uuid': consumer_uuid,
'rp': provider['uuid'],
'allocations': consumer_allocs})
else:
output(_('Allocations for consumer UUID '
'%(consumer_uuid)s on Resource Provider '
'%(rp)s can be deleted: '
'%(allocations)s')
% {'consumer_uuid': consumer_uuid,
'rp': provider['uuid'],
'allocations': consumer_allocs})
num_processed += 1
return (num_processed, faults)
# TODO(sbauza): Move this to the scheduler report client ?
def _get_resource_provider(self, context, placement, uuid):
"""Returns a single Resource Provider by its UUID.
:param context: The nova.context.RequestContext auth context
:param placement: nova.scheduler.client.report.SchedulerReportClient
to communicate with the Placement service API.
:param uuid: A specific Resource Provider UUID
:return: the existing resource provider.
:raises: keystoneauth1.exceptions.base.ClientException on failure to
communicate with the placement API
"""
resource_providers = self._get_resource_providers(context, placement,
uuid=uuid)
if not resource_providers:
# The endpoint never returns a 404, it rather returns an empty list
raise exception.ResourceProviderNotFound(name_or_uuid=uuid)
return resource_providers[0]
def _get_resource_providers(self, context, placement, **kwargs):
"""Returns all resource providers regardless of their relationships.
:param context: The nova.context.RequestContext auth context
:param placement: nova.scheduler.client.report.SchedulerReportClient
to communicate with the Placement service API.
:param kwargs: extra attributes for the query string
:return: list of resource providers.
:raises: keystoneauth1.exceptions.base.ClientException on failure to
communicate with the placement API
"""
url = '/resource_providers'
if 'uuid' in kwargs:
url += '&uuid=%s' % kwargs['uuid']
resp = placement.get(url, global_request_id=context.global_id,
version='1.14')
if resp is None:
raise exception.PlacementAPIConnectFailure()
data = resp.json()
resource_providers = data.get('resource_providers')
return resource_providers
@action_description(
_("Audits orphaned allocations that are no longer corresponding to "
"existing instance resources. This command requires that "
"the [api_database]/connection and [placement] configuration "
"options are set."))
@args('--verbose', action='store_true', dest='verbose', default=False,
help='Provide verbose output during execution.')
@args('--resource_provider', metavar='<provider_uuid>',
dest='provider_uuid',
help='UUID of a specific resource provider to verify.')
@args('--delete', action='store_true', dest='delete', default=False,
help='Deletes orphaned allocations that were found.')
def audit(self, verbose=False, provider_uuid=None, delete=False):
"""Provides information about orphaned allocations that can be removed
Return codes:
* 0: Command completed successfully and no orphaned allocations exist.
* 1: An unexpected error happened during run.
* 3: Orphaned allocations were detected.
* 4: Orphaned allocations were detected and deleted.
* 127: Invalid input.
"""
ctxt = context.get_admin_context()
output = lambda msg: None
if verbose:
output = lambda msg: print(msg)
placement = report.SchedulerReportClient()
# Resets two in-memory dicts for knowing instances per compute node
self.cn_uuid_mapping = collections.defaultdict(tuple)
self.instances_mapping = collections.defaultdict(list)
num_processed = 0
faults = 0
if provider_uuid:
try:
resource_provider = self._get_resource_provider(
ctxt, placement, provider_uuid)
except exception.ResourceProviderNotFound:
print(_('Resource provider with UUID %s does not exist.') %
provider_uuid)
return 127
resource_providers = [resource_provider]
else:
resource_providers = self._get_resource_providers(ctxt, placement)
for provider in resource_providers:
(nb_p, faults) = self._check_orphaned_allocations_for_provider(
ctxt, placement, output, provider, delete)
num_processed += nb_p
if faults > 0:
print(_('The Resource Provider %s had problems when '
'deleting allocations. Stopping now. Please fix the '
'problem by hand and run again.') %
provider['uuid'])
return 1
if num_processed > 0:
suffix = 's.' if num_processed > 1 else '.'
output(_('Processed %(num)s allocation%(suffix)s')
% {'num': num_processed,
'suffix': suffix})
return 4 if delete else 3
return 0
CATEGORIES = {
'api_db': ApiDbCommands,

View File

@ -1393,6 +1393,232 @@ class TestNovaManagePlacementSyncAggregates(
'%s should be in two provider aggregates' % host)
class TestNovaManagePlacementAudit(
integrated_helpers.ProviderUsageBaseTestCase):
"""Functional tests for nova-manage placement audit"""
# Let's just use a simple fake driver
compute_driver = 'fake.SmallFakeDriver'
def setUp(self):
super(TestNovaManagePlacementAudit, self).setUp()
self.cli = manage.PlacementCommands()
# Make sure we have two computes for migrations
self.compute1 = self._start_compute('host1')
self.compute2 = self._start_compute('host2')
# Make sure we have two hypervisors reported in the API.
hypervisors = self.admin_api.api_get(
'/os-hypervisors').body['hypervisors']
self.assertEqual(2, len(hypervisors))
self.output = StringIO()
self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output))
self.flavor = self.api.get_flavors()[0]
def _delete_instance_but_keep_its_allocations(self, server):
"""Mocks out the call to Placement for deleting the allocations but
still performs the instance deletion.
"""
with mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete_allocation_for_instance'):
self.api.delete_server(server['id'])
self._wait_until_deleted(server)
def test_audit_orphaned_allocation_from_instance_delete(self):
"""Creates a server and deletes it by retaining its allocations so the
audit command can find it.
"""
target_hostname = self.compute1.host
rp_uuid = self._get_provider_uuid_by_host(target_hostname)
server = self._boot_and_check_allocations(self.flavor, target_hostname)
# let's mock the allocation delete call to placement
with mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete_allocation_for_instance'):
self.api.delete_server(server['id'])
self._wait_until_deleted(server)
# make sure the allocation is still around
self.assertFlavorMatchesUsage(rp_uuid, self.flavor)
# Don't ask to delete the orphaned allocations, just audit them
ret = self.cli.audit(verbose=True)
# The allocation should still exist
self.assertFlavorMatchesUsage(rp_uuid, self.flavor)
output = self.output.getvalue()
self.assertIn(
'Allocations for consumer UUID %(consumer_uuid)s on '
'Resource Provider %(rp_uuid)s can be deleted' %
{'consumer_uuid': server['id'],
'rp_uuid': rp_uuid},
output)
self.assertIn('Processed 1 allocation.', output)
self.assertEqual(3, ret)
# Now ask the audit command to delete the rogue allocations.
ret = self.cli.audit(delete=True, verbose=True)
# The allocations are now deleted
self.assertRequestMatchesUsage({'VCPU': 0,
'MEMORY_MB': 0,
'DISK_GB': 0}, rp_uuid)
output = self.output.getvalue()
self.assertIn(
'Deleted allocations for consumer UUID %s' % server['id'], output)
self.assertIn('Processed 1 allocation.', output)
self.assertEqual(4, ret)
def test_audit_orphaned_allocations_from_confirmed_resize(self):
"""Resize a server but when confirming it, leave the migration
allocation there so the audit command can find it.
"""
source_hostname = self.compute1.host
dest_hostname = self.compute2.host
source_rp_uuid = self._get_provider_uuid_by_host(source_hostname)
dest_rp_uuid = self._get_provider_uuid_by_host(dest_hostname)
old_flavor = self.flavor
new_flavor = self.api.get_flavors()[1]
# we want to make sure we resize to compute2
self.flags(allow_resize_to_same_host=False)
server = self._boot_and_check_allocations(self.flavor, source_hostname)
# Do a resize
post = {
'resize': {
'flavorRef': new_flavor['id']
}
}
self._move_and_check_allocations(
server, request=post, old_flavor=old_flavor,
new_flavor=new_flavor, source_rp_uuid=source_rp_uuid,
dest_rp_uuid=dest_rp_uuid)
# Retain the migration UUID record for later usage
migration_uuid = self.get_migration_uuid_for_instance(server['id'])
# Confirm the resize so it should in theory delete the source
# allocations but mock out the allocation delete for the source
post = {'confirmResize': None}
with mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'delete_allocation_for_instance'):
self.api.post_server_action(
server['id'], post, check_response_status=[204])
self._wait_for_state_change(server, 'ACTIVE')
# The target host usage should be according to the new flavor...
self.assertFlavorMatchesUsage(dest_rp_uuid, new_flavor)
# ...but we should still see allocations for the source compute
self.assertFlavorMatchesUsage(source_rp_uuid, old_flavor)
# Now, run the audit command that will find this orphaned allocation
ret = self.cli.audit(verbose=True)
output = self.output.getvalue()
self.assertIn(
'Allocations for consumer UUID %(consumer_uuid)s on '
'Resource Provider %(rp_uuid)s can be deleted' %
{'consumer_uuid': migration_uuid,
'rp_uuid': source_rp_uuid},
output)
self.assertIn('Processed 1 allocation.', output)
self.assertEqual(3, ret)
# Now we want to delete the orphaned allocation that is duplicate
ret = self.cli.audit(delete=True, verbose=True)
# There should be no longer usage for the source host since the
# allocation disappeared
self.assertRequestMatchesUsage({'VCPU': 0,
'MEMORY_MB': 0,
'DISK_GB': 0}, source_rp_uuid)
output = self.output.getvalue()
self.assertIn(
'Deleted allocations for consumer UUID %(consumer_uuid)s on '
'Resource Provider %(rp_uuid)s' %
{'consumer_uuid': migration_uuid,
'rp_uuid': source_rp_uuid},
output)
self.assertIn('Processed 1 allocation.', output)
self.assertEqual(4, ret)
# TODO(sbauza): Mock this test once bug #1829479 is fixed
def test_audit_orphaned_allocations_from_deleted_compute_evacuate(self):
"""Evacuate a server and the delete the source node so that it will
leave a source allocation that the audit command will find.
"""
source_hostname = self.compute1.host
dest_hostname = self.compute2.host
source_rp_uuid = self._get_provider_uuid_by_host(source_hostname)
dest_rp_uuid = self._get_provider_uuid_by_host(dest_hostname)
server = self._boot_and_check_allocations(self.flavor, source_hostname)
# Stop the service and fake it down
self.compute1.stop()
source_service_id = self.admin_api.get_services(
host=source_hostname, binary='nova-compute')[0]['id']
self.admin_api.put_service(source_service_id, {'forced_down': 'true'})
# evacuate the instance to the target
post = {'evacuate': {"host": dest_hostname}}
self.admin_api.post_server_action(server['id'], post)
self._wait_for_server_parameter(server,
{'OS-EXT-SRV-ATTR:host': dest_hostname,
'status': 'ACTIVE'})
# Now the instance is gone, we can delete the compute service
self.admin_api.api_delete('/os-services/%s' % source_service_id)
# Since the compute is deleted, we should have in theory a single
# allocation against the destination resource provider, but evacuated
# instances are not having their allocations deleted. See bug #1829479.
# We have two allocations for the same consumer, source and destination
self._check_allocation_during_evacuate(
self.flavor, server['id'], source_rp_uuid, dest_rp_uuid)
# Now, run the audit command that will find this orphaned allocation
ret = self.cli.audit(verbose=True)
output = self.output.getvalue()
self.assertIn(
'Allocations for consumer UUID %(consumer_uuid)s on '
'Resource Provider %(rp_uuid)s can be deleted' %
{'consumer_uuid': server['id'],
'rp_uuid': source_rp_uuid},
output)
self.assertIn('Processed 1 allocation.', output)
self.assertEqual(3, ret)
# Now we want to delete the orphaned allocation that is duplicate
ret = self.cli.audit(delete=True, verbose=True)
# We finally should only have the target allocations
self.assertFlavorMatchesUsage(dest_rp_uuid, self.flavor)
self.assertRequestMatchesUsage({'VCPU': 0,
'MEMORY_MB': 0,
'DISK_GB': 0}, source_rp_uuid)
output = self.output.getvalue()
self.assertIn(
'Deleted allocations for consumer UUID %(consumer_uuid)s on '
'Resource Provider %(rp_uuid)s' %
{'consumer_uuid': server['id'],
'rp_uuid': source_rp_uuid},
output)
self.assertIn('Processed 1 allocation.', output)
self.assertEqual(4, ret)
class TestDBArchiveDeletedRows(integrated_helpers._IntegratedTestBase):
"""Functional tests for the "nova-manage db archive_deleted_rows" CLI."""
api_major_version = 'v2.1'

View File

@ -34,6 +34,7 @@ from nova.db import migration
from nova.db.sqlalchemy import migration as sqla_migration
from nova import exception
from nova import objects
from nova.scheduler.client import report
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.unit import fake_requests
@ -2860,6 +2861,142 @@ class TestNovaManagePlacement(test.NoDBTestCase):
neutron.update_port.assert_called_once_with(
uuidsentinel.port_id, body=expected_update_body)
def test_audit_with_wrong_provider_uuid(self):
with mock.patch.object(
self.cli, '_get_resource_provider',
side_effect=exception.ResourceProviderNotFound(
name_or_uuid=uuidsentinel.fake_uuid)):
ret = self.cli.audit(
provider_uuid=uuidsentinel.fake_uuid)
self.assertEqual(127, ret)
output = self.output.getvalue()
self.assertIn(
'Resource provider with UUID %s' % uuidsentinel.fake_uuid,
output)
@mock.patch.object(manage.PlacementCommands,
'_check_orphaned_allocations_for_provider')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.get')
def _test_audit(self, get_resource_providers, check_orphaned_allocs,
verbose=False, delete=False, errors=False, found=False):
rps = [
{"generation": 1,
"uuid": uuidsentinel.rp1,
"links": None,
"name": "rp1",
"parent_provider_uuid": None,
"root_provider_uuid": uuidsentinel.rp1},
{"generation": 1,
"uuid": uuidsentinel.rp2,
"links": None,
"name": "rp2",
"parent_provider_uuid": None,
"root_provider_uuid": uuidsentinel.rp2},
]
get_resource_providers.return_value = fake_requests.FakeResponse(
200, content=jsonutils.dumps({"resource_providers": rps}))
if errors:
# We found one orphaned allocation per RP but RP1 got a fault
check_orphaned_allocs.side_effect = ((1, 1), (1, 0))
elif found:
# we found one orphaned allocation per RP and we had no faults
check_orphaned_allocs.side_effect = ((1, 0), (1, 0))
else:
# No orphaned allocations are found for all the RPs
check_orphaned_allocs.side_effect = ((0, 0), (0, 0))
ret = self.cli.audit(verbose=verbose, delete=delete)
if errors:
# Any fault stops the audit and provides a return code equals to 1
expected_ret = 1
elif found and delete:
# We found orphaned allocations and deleted them
expected_ret = 4
elif found and not delete:
# We found orphaned allocations but we left them
expected_ret = 3
else:
# Nothing was found
expected_ret = 0
self.assertEqual(expected_ret, ret)
call1 = mock.call(mock.ANY, mock.ANY, mock.ANY, rps[0], delete)
call2 = mock.call(mock.ANY, mock.ANY, mock.ANY, rps[1], delete)
if errors:
# We stop checking other RPs once we got a fault
check_orphaned_allocs.assert_has_calls([call1])
else:
# All the RPs are checked
check_orphaned_allocs.assert_has_calls([call1, call2])
if verbose and found:
output = self.output.getvalue()
self.assertIn('Processed 2 allocations', output)
if errors:
output = self.output.getvalue()
self.assertIn(
'The Resource Provider %s had problems' % rps[0]["uuid"],
output)
def test_audit_not_found_orphaned_allocs(self):
self._test_audit(found=False)
def test_audit_found_orphaned_allocs_not_verbose(self):
self._test_audit(found=True)
def test_audit_found_orphaned_allocs_verbose(self):
self._test_audit(found=True, verbose=True)
def test_audit_found_orphaned_allocs_and_deleted_them(self):
self._test_audit(found=True, delete=True)
def test_audit_found_orphaned_allocs_but_got_errors(self):
self._test_audit(errors=True)
@mock.patch.object(manage.PlacementCommands,
'_delete_allocations_from_consumer')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'get_allocations_for_resource_provider')
@mock.patch.object(manage.PlacementCommands,
'_get_instances_and_current_migrations')
def test_check_orphaned_allocations_for_provider(self,
get_insts_and_migs,
get_allocs_for_rp,
delete_allocs):
provider = {"generation": 1,
"uuid": uuidsentinel.rp1,
"links": None,
"name": "rp1",
"parent_provider_uuid": None,
"root_provider_uuid": uuidsentinel.rp1}
compute_resources = {'VCPU': 1, 'MEMORY_MB': 2048, 'DISK_GB': 20}
allocations = {
# Some orphaned compute allocation
uuidsentinel.orphaned_alloc1: {'resources': compute_resources},
# Some existing instance allocation
uuidsentinel.inst1: {'resources': compute_resources},
# Some existing migration allocation
uuidsentinel.mig1: {'resources': compute_resources},
# Some other allocation not related to Nova
uuidsentinel.other_alloc1: {'resources': {'CUSTOM_GOO'}},
}
get_insts_and_migs.return_value = (
[uuidsentinel.inst1],
[uuidsentinel.mig1])
get_allocs_for_rp.return_value = report.ProviderAllocInfo(allocations)
ctxt = context.RequestContext()
placement = report.SchedulerReportClient()
ret = self.cli._check_orphaned_allocations_for_provider(
ctxt, placement, lambda x: x, provider, True)
get_allocs_for_rp.assert_called_once_with(ctxt, uuidsentinel.rp1)
delete_allocs.assert_called_once_with(ctxt, placement, provider,
uuidsentinel.orphaned_alloc1,
'instance')
self.assertEqual((1, 0), ret)
class TestNovaManageMain(test.NoDBTestCase):
"""Tests the nova-manage:main() setup code."""

View File

@ -0,0 +1,12 @@
---
other:
- |
A new ``nova-manage placement audit`` CLI has been added to help identify
orphaned compute allocations in the Placement API that are no longer
related to either instances or migrations.
Some race conditions in Nova could not remove allocations for some
instances or migrations when they're done and then it would create some
capacity issues. Thanks to the command, you could know the orphaned
allocations and ask to remove them.
For more details on CLI usage, see the man page entry:
https://docs.openstack.org/nova/latest/cli/nova-manage.html#placement