diff --git a/doc/source/cli/nova-manage.rst b/doc/source/cli/nova-manage.rst index 5ea10d3b05bf..f0640296b138 100644 --- a/doc/source/cli/nova-manage.rst +++ b/doc/source/cli/nova-manage.rst @@ -276,6 +276,38 @@ Nova Cells v2 found, 3 if a host with that name is not in a cell with that uuid, 4 if a host with that name has instances (host not empty). + +Placement +~~~~~~~~~ + +``nova-manage placement heal_allocations [--max-count ] [--verbose]`` + Iterates over non-cell0 cells looking for instances which do not have + allocations in the Placement service and which are not undergoing a task + state transition. For each instance found, allocations are created against + the compute node resource provider for that instance based on the flavor + associated with the instance. + + Specify ``--max-count`` to control the maximum number of instances to + process. If not specified, all instances in each cell will be mapped in + batches of 50. If you have a large number of instances, consider + specifying a custom value and run the command until it exits with 0 or 4. + + Specify ``--verbose`` to get detailed progress output during execution. + + This command requires that the ``[api_database]/connection`` and + ``[placement]`` configuration options are set. + + Return codes: + + * 0: Command completed successfully and allocations were created. + * 1: --max-count was reached and there are more instances to process. + * 2: Unable to find a compute node record for a given instance. + * 3: Unable to create allocations for an instance against its + compute node resource provider. + * 4: Command completed successfully but no allocations were created. + * 127: Invalid input. + + See Also ======== diff --git a/nova/cmd/manage.py b/nova/cmd/manage.py index 29c1bf292202..812fddedd5d3 100644 --- a/nova/cmd/manage.py +++ b/nova/cmd/manage.py @@ -63,6 +63,8 @@ from nova.objects import quotas as quotas_obj from nova.objects import request_spec from nova import quota from nova import rpc +from nova.scheduler.client import report +from nova.scheduler import utils as scheduler_utils from nova import utils from nova import version from nova.virt import ironic @@ -1707,6 +1709,246 @@ class CellV2Commands(object): return 0 +class PlacementCommands(object): + """Commands for managing placement resources.""" + + @staticmethod + def _get_compute_node_uuid(ctxt, instance, node_cache): + """Find the ComputeNode.uuid for the given Instance + + :param ctxt: cell-targeted nova.context.RequestContext + :param instance: the instance to lookup a compute node + :param node_cache: dict of Instance.node keys to ComputeNode.uuid + values; this cache is updated if a new node is processed. + :returns: ComputeNode.uuid for the given instance + :raises: nova.exception.ComputeHostNotFound + """ + if instance.node in node_cache: + return node_cache[instance.node] + + compute_node = objects.ComputeNode.get_by_host_and_nodename( + ctxt, instance.host, instance.node) + node_uuid = compute_node.uuid + node_cache[instance.node] = node_uuid + return node_uuid + + def _heal_instances_in_cell(self, ctxt, max_count, unlimited, output, + placement): + """Checks for instances to heal in a given cell. + + :param ctxt: cell-targeted nova.context.RequestContext + :param max_count: batch size (limit per instance query) + :param unlimited: True if all instances in the cell should be + processed, else False to just process $max_count instances + :param outout: function that takes a single message for verbose output + :param placement: nova.scheduler.client.report.SchedulerReportClient + to communicate with the Placement service API. + :return: Number of instances that had allocations created. + :raises: nova.exception.ComputeHostNotFound if a compute node for a + given instance cannot be found + :raises: AllocationCreateFailed if unable to create allocations for + a given instance against a given compute node resource provider + """ + # Keep a cache of instance.node to compute node resource provider UUID. + # This will save some queries for non-ironic instances to the + # compute_nodes table. + node_cache = {} + # Track the total number of instances that have allocations created + # for them in this cell. We return when num_processed equals max_count + # and unlimited=True or we exhaust the number of instances to process + # in this cell. + num_processed = 0 + # Get all instances from this cell which have a host and are not + # undergoing a task state transition. Go from oldest to newest. + # NOTE(mriedem): Unfortunately we don't have a marker to use + # between runs where the user is specifying --max-count. + # TODO(mriedem): Store a marker in system_metadata so we can + # automatically pick up where we left off without the user having + # to pass it in (if unlimited is False). + instances = objects.InstanceList.get_by_filters( + ctxt, filters={}, sort_key='created_at', sort_dir='asc', + limit=max_count, expected_attrs=['flavor']) + while instances: + output(_('Found %s candidate instances.') % len(instances)) + # For each instance in this list, we need to see if it has + # allocations in placement and if so, assume it's correct and + # continue. + for instance in instances: + if instance.task_state is not None: + output(_('Instance %(instance)s is undergoing a task ' + 'state transition: %(task_state)s') % + {'instance': instance.uuid, + 'task_state': instance.task_state}) + continue + + if instance.node is None: + output(_('Instance %s is not on a host.') % instance.uuid) + continue + + allocations = placement.get_allocations_for_consumer( + ctxt, instance.uuid) + if allocations: + output(_('Instance %s already has allocations.') % + instance.uuid) + # TODO(mriedem): Check to see if the allocation project_id + # and user_id matches the instance project and user and + # fix the allocation project/user if they don't match; see + # blueprint add-consumer-generation for details. + continue + + # This instance doesn't have allocations so we need to find + # its compute node resource provider. + node_uuid = self._get_compute_node_uuid( + ctxt, instance, node_cache) + + # Now get the resource allocations for the instance based + # on its embedded flavor. + resources = scheduler_utils.resources_from_flavor( + instance, instance.flavor) + if placement.put_allocations( + ctxt, node_uuid, instance.uuid, resources, + instance.project_id, instance.user_id): + num_processed += 1 + output(_('Successfully created allocations for ' + 'instance %(instance)s against resource ' + 'provider %(provider)s.') % + {'instance': instance.uuid, 'provider': node_uuid}) + else: + raise exception.AllocationCreateFailed( + instance=instance.uuid, provider=node_uuid) + + # Make sure we don't go over the max count. Note that we + # don't include instances that already have allocations in the + # max_count number, only the number of instances that have + # successfully created allocations. + if not unlimited and num_processed == max_count: + return num_processed + + # Use a marker to get the next page of instances in this cell. + # Note that InstanceList doesn't support slice notation. + marker = instances[len(instances) - 1].uuid + instances = objects.InstanceList.get_by_filters( + ctxt, filters={}, sort_key='created_at', sort_dir='asc', + limit=max_count, marker=marker, expected_attrs=['flavor']) + + return num_processed + + @action_description( + _("Iterates over non-cell0 cells looking for instances which do " + "not have allocations in the Placement service and which are not " + "undergoing a task state transition. For each instance found, " + "allocations are created against the compute node resource provider " + "for that instance based on the flavor associated with the " + "instance. This command requires that the [api_database]/connection " + "and [placement] configuration options are set.")) + @args('--max-count', metavar='', dest='max_count', + help='Maximum number of instances to process. If not specified, all ' + 'instances in each cell will be mapped in batches of 50. ' + 'If you have a large number of instances, consider specifying ' + 'a custom value and run the command until it exits with ' + '0 or 4.') + @args('--verbose', action='store_true', dest='verbose', default=False, + help='Provide verbose output during execution.') + def heal_allocations(self, max_count=None, verbose=False): + """Heals instance allocations in the Placement service + + Return codes: + + * 0: Command completed successfully and allocations were created. + * 1: --max-count was reached and there are more instances to process. + * 2: Unable to find a compute node record for a given instance. + * 3: Unable to create allocations for an instance against its + compute node resource provider. + * 4: Command completed successfully but no allocations were created. + * 127: Invalid input. + """ + # NOTE(mriedem): Thoughts on ways to expand this: + # - add a --dry-run option to just print which instances would have + # allocations created for them + # - allow passing a specific cell to heal + # - allow filtering on enabled/disabled cells + # - allow passing a specific instance to heal + # - add a force option to force allocations for instances which have + # task_state is not None (would get complicated during a migration); + # for example, this could cleanup ironic instances that have + # allocations on VCPU/MEMORY_MB/DISK_GB but are now using a custom + # resource class + # - add an option to overwrite allocations for instances which already + # have allocations (but the operator thinks might be wrong?); this + # would probably only be safe with a specific instance. + # - deal with nested resource providers? + + output = lambda msg: None + if verbose: + output = lambda msg: print(msg) + + # TODO(mriedem): Rather than --max-count being both a total and batch + # count, should we have separate options to be specific, i.e. --total + # and --batch-size? Then --batch-size defaults to 50 and --total + # defaults to None to mean unlimited. + if max_count is not None: + try: + max_count = int(max_count) + except ValueError: + max_count = -1 + unlimited = False + if max_count < 1: + print(_('Must supply a positive integer for --max-count.')) + return 127 + else: + max_count = 50 + unlimited = True + output(_('Running batches of %i until complete') % max_count) + + ctxt = context.get_admin_context() + cells = objects.CellMappingList.get_all(ctxt) + if not cells: + output(_('No cells to process.')) + return 4 + + placement = report.SchedulerReportClient() + num_processed = 0 + # TODO(mriedem): Use context.scatter_gather_skip_cell0. + for cell in cells: + # Skip cell0 since that is where instances go that do not get + # scheduled and hence would not have allocations against a host. + if cell.uuid == objects.CellMapping.CELL0_UUID: + continue + output(_('Looking for instances in cell: %s') % cell.identity) + + limit_per_cell = max_count + if not unlimited: + # Adjust the limit for the next cell. For example, if the user + # only wants to process a total of 100 instances and we did + # 75 in cell1, then we only need 25 more from cell2 and so on. + limit_per_cell = max_count - num_processed + + with context.target_cell(ctxt, cell) as cctxt: + try: + num_processed += self._heal_instances_in_cell( + cctxt, limit_per_cell, unlimited, output, placement) + except exception.ComputeHostNotFound as e: + print(e.format_message()) + return 2 + except exception.AllocationCreateFailed as e: + print(e.format_message()) + return 3 + + # Make sure we don't go over the max count. Note that we + # don't include instances that already have allocations in the + # max_count number, only the number of instances that have + # successfully created allocations. + if num_processed == max_count: + output(_('Max count reached. Processed %s instances.') + % num_processed) + return 1 + + output(_('Processed %s instances.') % num_processed) + if not num_processed: + return 4 + return 0 + + CATEGORIES = { 'api_db': ApiDbCommands, 'cell': CellCommands, @@ -1714,6 +1956,7 @@ CATEGORIES = { 'db': DbCommands, 'floating': FloatingIpCommands, 'network': NetworkCommands, + 'placement': PlacementCommands } diff --git a/nova/exception.py b/nova/exception.py index b6365b36bdbb..d9964c2331d7 100644 --- a/nova/exception.py +++ b/nova/exception.py @@ -2274,3 +2274,8 @@ class DeviceDeletionException(NovaException): class OptRequiredIfOtherOptValue(NovaException): msg_fmt = _("The %(then_opt)s option is required if %(if_opt)s is " "specified as '%(if_value)s'.") + + +class AllocationCreateFailed(NovaException): + msg_fmt = _('Failed to create allocations for instance %(instance)s ' + 'against resource provider %(provider)s.') diff --git a/nova/test.py b/nova/test.py index a22025f4889f..7eb9ea0b1989 100644 --- a/nova/test.py +++ b/nova/test.py @@ -413,7 +413,8 @@ class TestCase(testtools.TestCase): # otherwise we'll fail to update the scheduler while running # the compute node startup routines below. ctxt = context.get_context() - cell = self.cell_mappings[kwargs.pop('cell', CELL1_NAME)] + cell_name = kwargs.pop('cell', CELL1_NAME) or CELL1_NAME + cell = self.cell_mappings[cell_name] hm = objects.HostMapping(context=ctxt, host=host or name, cell_mapping=cell) diff --git a/nova/tests/functional/test_nova_manage.py b/nova/tests/functional/test_nova_manage.py index 8544d1db6aa2..304c78735e4e 100644 --- a/nova/tests/functional/test_nova_manage.py +++ b/nova/tests/functional/test_nova_manage.py @@ -10,10 +10,14 @@ # License for the specific language governing permissions and limitations # under the License. +import fixtures +from six.moves import StringIO + from nova.cmd import manage from nova import context from nova import objects from nova import test +from nova.tests.functional import test_servers class NovaManageDBIronicTest(test.TestCase): @@ -348,3 +352,222 @@ class NovaManageCellV2Test(test.TestCase): cns = objects.ComputeNodeList.get_all(self.context) self.assertEqual(1, len(cns)) self.assertEqual(0, cns[0].mapped) + + +class TestNovaManagePlacementHealAllocations( + test_servers.ProviderUsageBaseTestCase): + """Functional tests for nova-manage placement heal_allocations""" + + # This is required by the parent class. + compute_driver = 'fake.SmallFakeDriver' + # We want to test iterating across multiple cells. + NUMBER_OF_CELLS = 2 + + def setUp(self): + # Since the CachingScheduler does not use Placement, we want to use + # the CachingScheduler to create instances and then we can heal their + # allocations via the CLI. + self.flags(driver='caching_scheduler', group='scheduler') + super(TestNovaManagePlacementHealAllocations, self).setUp() + self.cli = manage.PlacementCommands() + # We need to start a compute in each non-cell0 cell. + for cell_name, cell_mapping in self.cell_mappings.items(): + if cell_mapping.uuid == objects.CellMapping.CELL0_UUID: + continue + self._start_compute(cell_name, cell_name=cell_name) + # Make sure we have two hypervisors reported in the API. + hypervisors = self.admin_api.api_get( + '/os-hypervisors').body['hypervisors'] + self.assertEqual(2, len(hypervisors)) + self.flavor = self.api.get_flavors()[0] + self.output = StringIO() + self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output)) + + def _boot_and_assert_no_allocations(self, flavor, hostname): + """Creates a server on the given host and asserts neither have usage + + :param flavor: the flavor used to create the server + :param hostname: the host on which to create the server + :returns: two-item tuple of the server and the compute node resource + provider uuid + """ + server_req = self._build_minimal_create_server_request( + self.api, 'some-server', flavor_id=flavor['id'], + image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', + networks=[]) + server_req['availability_zone'] = 'nova:%s' % hostname + created_server = self.api.post_server({'server': server_req}) + server = self._wait_for_state_change( + self.admin_api, created_server, 'ACTIVE') + + # Verify that our source host is what the server ended up on + self.assertEqual(hostname, server['OS-EXT-SRV-ATTR:host']) + + # Check that the compute node resource provider has no allocations. + rp_uuid = self._get_provider_uuid_by_host(hostname) + provider_usages = self._get_provider_usages(rp_uuid) + for resource_class, usage in provider_usages.items(): + self.assertEqual( + 0, usage, + 'Compute node resource provider %s should not have %s ' + 'usage when using the CachingScheduler.' % + (hostname, resource_class)) + + # Check that the server has no allocations. + allocations = self._get_allocations_by_server_uuid(server['id']) + self.assertEqual({}, allocations, + 'Server should not have allocations when using ' + 'the CachingScheduler.') + return server, rp_uuid + + def _assert_healed(self, server, rp_uuid): + allocations = self._get_allocations_by_server_uuid(server['id']) + self.assertIn(rp_uuid, allocations, + 'Allocations not found for server %s and compute node ' + 'resource provider. %s\nOutput:%s' % + (server['id'], rp_uuid, self.output.getvalue())) + self.assertFlavorMatchesAllocation( + self.flavor, allocations[rp_uuid]['resources']) + + def test_heal_allocations_paging(self): + """This test runs the following scenario: + + * Schedule server1 to cell1 and assert it doesn't have allocations. + * Schedule server2 to cell2 and assert it doesn't have allocations. + * Run "nova-manage placement heal_allocations --max-count 1" to make + sure we stop with just one instance and the return code is 1. + * Run "nova-manage placement heal_allocations" and assert both + both instances now have allocations against their respective compute + node resource providers. + """ + server1, rp_uuid1 = self._boot_and_assert_no_allocations( + self.flavor, 'cell1') + server2, rp_uuid2 = self._boot_and_assert_no_allocations( + self.flavor, 'cell2') + + # heal server1 and server2 in separate calls + for x in range(2): + result = self.cli.heal_allocations(max_count=1, verbose=True) + self.assertEqual(1, result, self.output.getvalue()) + output = self.output.getvalue() + self.assertIn('Max count reached. Processed 1 instances.', output) + # If this is the 2nd call, we'll have skipped the first instance. + if x == 0: + self.assertNotIn('already has allocations', output) + else: + self.assertIn('already has allocations', output) + + self._assert_healed(server1, rp_uuid1) + self._assert_healed(server2, rp_uuid2) + + # run it again to make sure nothing was processed + result = self.cli.heal_allocations(verbose=True) + self.assertEqual(4, result, self.output.getvalue()) + self.assertIn('already has allocations', self.output.getvalue()) + + def test_heal_allocations_paging_max_count_more_than_num_instances(self): + """Sets up 2 instances in cell1 and 1 instance in cell2. Then specify + --max-count=10, processes 3 instances, rc is 0 + """ + servers = [] # This is really a list of 2-item tuples. + for x in range(2): + servers.append( + self._boot_and_assert_no_allocations(self.flavor, 'cell1')) + servers.append( + self._boot_and_assert_no_allocations(self.flavor, 'cell2')) + result = self.cli.heal_allocations(max_count=10, verbose=True) + self.assertEqual(0, result, self.output.getvalue()) + self.assertIn('Processed 3 instances.', self.output.getvalue()) + for server, rp_uuid in servers: + self._assert_healed(server, rp_uuid) + + def test_heal_allocations_paging_more_instances_remain(self): + """Tests that there is one instance in cell1 and two instances in + cell2, with a --max-count=2. This tests that we stop in cell2 once + max_count is reached. + """ + servers = [] # This is really a list of 2-item tuples. + servers.append( + self._boot_and_assert_no_allocations(self.flavor, 'cell1')) + for x in range(2): + servers.append( + self._boot_and_assert_no_allocations(self.flavor, 'cell2')) + result = self.cli.heal_allocations(max_count=2, verbose=True) + self.assertEqual(1, result, self.output.getvalue()) + self.assertIn('Max count reached. Processed 2 instances.', + self.output.getvalue()) + # Assert that allocations were healed on the instances we expect. Order + # works here because cell mappings are retrieved by id in ascending + # order so oldest to newest, and instances are also retrieved from each + # cell by created_at in ascending order, which matches the order we put + # created servers in our list. + for x in range(2): + self._assert_healed(*servers[x]) + # And assert the remaining instance does not have allocations. + allocations = self._get_allocations_by_server_uuid( + servers[2][0]['id']) + self.assertEqual({}, allocations) + + def test_heal_allocations_unlimited(self): + """Sets up 2 instances in cell1 and 1 instance in cell2. Then + don't specify --max-count, processes 3 instances, rc is 0. + """ + servers = [] # This is really a list of 2-item tuples. + for x in range(2): + servers.append( + self._boot_and_assert_no_allocations(self.flavor, 'cell1')) + servers.append( + self._boot_and_assert_no_allocations(self.flavor, 'cell2')) + result = self.cli.heal_allocations(verbose=True) + self.assertEqual(0, result, self.output.getvalue()) + self.assertIn('Processed 3 instances.', self.output.getvalue()) + for server, rp_uuid in servers: + self._assert_healed(server, rp_uuid) + + def test_heal_allocations_shelved(self): + """Tests the scenario that an instance with no allocations is shelved + so heal_allocations skips it (since the instance is not on a host). + """ + server, rp_uuid = self._boot_and_assert_no_allocations( + self.flavor, 'cell1') + self.api.post_server_action(server['id'], {'shelve': None}) + # The server status goes to SHELVED_OFFLOADED before the host/node + # is nulled out in the compute service, so we also have to wait for + # that so we don't race when we run heal_allocations. + server = self._wait_for_server_parameter( + self.admin_api, server, + {'OS-EXT-SRV-ATTR:host': None, 'status': 'SHELVED_OFFLOADED'}) + result = self.cli.heal_allocations(verbose=True) + self.assertEqual(4, result, self.output.getvalue()) + self.assertIn('Instance %s is not on a host.' % server['id'], + self.output.getvalue()) + # Check that the server has no allocations. + allocations = self._get_allocations_by_server_uuid(server['id']) + self.assertEqual({}, allocations, + 'Shelved-offloaded server should not have ' + 'allocations.') + + def test_heal_allocations_task_in_progress(self): + """Tests the case that heal_allocations skips over an instance which + is undergoing a task state transition (in this case pausing). + """ + server, rp_uuid = self._boot_and_assert_no_allocations( + self.flavor, 'cell1') + + def fake_pause_instance(_self, ctxt, instance, *a, **kw): + self.assertEqual('pausing', instance.task_state) + # We have to stub out pause_instance so that the instance is stuck with + # task_state != None. + self.stub_out('nova.compute.manager.ComputeManager.pause_instance', + fake_pause_instance) + self.api.post_server_action(server['id'], {'pause': None}) + result = self.cli.heal_allocations(verbose=True) + self.assertEqual(4, result, self.output.getvalue()) + # Check that the server has no allocations. + allocations = self._get_allocations_by_server_uuid(server['id']) + self.assertEqual({}, allocations, + 'Server undergoing task state transition should ' + 'not have allocations.') + # Assert something was logged for this instance when it was skipped. + self.assertIn('Instance %s is undergoing a task state transition: ' + 'pausing' % server['id'], self.output.getvalue()) diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index a9a7887fce93..2aaf6e5bb279 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -1416,16 +1416,18 @@ class ProviderUsageBaseTestCase(test.TestCase, self.computes = {} - def _start_compute(self, host): + def _start_compute(self, host, cell_name=None): """Start a nova compute service on the given host :param host: the name of the host that will be associated to the compute service. + :param cell_name: optional name of the cell in which to start the + compute service (defaults to cell1) :return: the nova compute service object """ fake.set_nodes([host]) self.addCleanup(fake.restore_nodes) - compute = self.start_service('compute', host=host) + compute = self.start_service('compute', host=host, cell=cell_name) self.computes[host] = compute return compute diff --git a/nova/tests/unit/test_nova_manage.py b/nova/tests/unit/test_nova_manage.py index a4ce36983f10..91be05dc8133 100644 --- a/nova/tests/unit/test_nova_manage.py +++ b/nova/tests/unit/test_nova_manage.py @@ -2395,6 +2395,97 @@ class CellV2CommandsTestCase(test.NoDBTestCase): node.save.assert_called_once_with() +@ddt.ddt +class TestNovaManagePlacement(test.NoDBTestCase): + """Unit tests for the nova-manage placement commands. + + Tests in this class should be simple and can rely on mock, so they + are usually restricted to negative or side-effect type tests. + + For more involved functional scenarios, use + nova.tests.functional.test_nova_manage. + """ + def setUp(self): + super(TestNovaManagePlacement, self).setUp() + self.output = StringIO() + self.useFixture(fixtures.MonkeyPatch('sys.stdout', self.output)) + self.cli = manage.PlacementCommands() + + @ddt.data(-1, 0, "one") + def test_heal_allocations_invalid_max_count(self, max_count): + self.assertEqual(127, self.cli.heal_allocations(max_count=max_count)) + + @mock.patch('nova.objects.CellMappingList.get_all', + return_value=objects.CellMappingList()) + def test_heal_allocations_no_cells(self, mock_get_all_cells): + self.assertEqual(4, self.cli.heal_allocations(verbose=True)) + self.assertIn('No cells to process', self.output.getvalue()) + + @mock.patch('nova.objects.CellMappingList.get_all', + return_value=objects.CellMappingList(objects=[ + objects.CellMapping(name='cell1', + uuid=uuidsentinel.cell1)])) + @mock.patch('nova.objects.InstanceList.get_by_filters', + return_value=objects.InstanceList()) + def test_heal_allocations_no_instances( + self, mock_get_instances, mock_get_all_cells): + self.assertEqual(4, self.cli.heal_allocations(verbose=True)) + self.assertIn('Processed 0 instances.', self.output.getvalue()) + + @mock.patch('nova.objects.CellMappingList.get_all', + return_value=objects.CellMappingList(objects=[ + objects.CellMapping(name='cell1', + uuid=uuidsentinel.cell1)])) + @mock.patch('nova.objects.InstanceList.get_by_filters', + return_value=objects.InstanceList(objects=[ + objects.Instance( + uuid=uuidsentinel.instance, host='fake', node='fake', + task_state=None)])) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'get_allocations_for_consumer', return_value={}) + @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename', + side_effect=exception.ComputeHostNotFound(host='fake')) + def test_heal_allocations_compute_host_not_found( + self, mock_get_compute_node, mock_get_allocs, mock_get_instances, + mock_get_all_cells): + self.assertEqual(2, self.cli.heal_allocations()) + self.assertIn('Compute host fake could not be found.', + self.output.getvalue()) + + @mock.patch('nova.objects.CellMappingList.get_all', + return_value=objects.CellMappingList(objects=[ + objects.CellMapping(name='cell1', + uuid=uuidsentinel.cell1)])) + @mock.patch('nova.objects.InstanceList.get_by_filters', + return_value=objects.InstanceList(objects=[ + objects.Instance( + uuid=uuidsentinel.instance, host='fake', node='fake', + task_state=None, flavor=objects.Flavor(), + project_id='fake-project', user_id='fake-user')])) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'get_allocations_for_consumer', return_value={}) + @mock.patch('nova.objects.ComputeNode.get_by_host_and_nodename', + return_value=objects.ComputeNode(uuid=uuidsentinel.node)) + @mock.patch('nova.scheduler.utils.resources_from_flavor', + return_value=mock.sentinel.resources) + @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' + 'put_allocations', return_value=False) + def test_heal_allocations_put_allocations_fails( + self, mock_put_allocations, mock_res_from_flavor, + mock_get_compute_node, mock_get_allocs, mock_get_instances, + mock_get_all_cells): + self.assertEqual(3, self.cli.heal_allocations()) + self.assertIn('Failed to create allocations for instance', + self.output.getvalue()) + instance = mock_get_instances.return_value[0] + mock_res_from_flavor.assert_called_once_with( + instance, instance.flavor) + mock_put_allocations.assert_called_once_with( + test.MatchType(context.RequestContext), uuidsentinel.node, + uuidsentinel.instance, mock.sentinel.resources, 'fake-project', + 'fake-user') + + class TestNovaManageMain(test.NoDBTestCase): """Tests the nova-manage:main() setup code.""" diff --git a/releasenotes/notes/nova-manage-placement-heal-allocations-13a9a0a3df910e0b.yaml b/releasenotes/notes/nova-manage-placement-heal-allocations-13a9a0a3df910e0b.yaml new file mode 100644 index 000000000000..88144592e658 --- /dev/null +++ b/releasenotes/notes/nova-manage-placement-heal-allocations-13a9a0a3df910e0b.yaml @@ -0,0 +1,18 @@ +--- +other: + - | + A new ``nova-manage placement heal_allocations`` CLI has been added to + help migrate users from the deprecated CachingScheduler. Starting in + 16.0.0 (Pike), the nova-compute service no longer reports instance + allocations to the Placement service because the FilterScheduler does + that as part of scheduling. However, the CachingScheduler does not create + the allocations in the Placement service, so any instances created using + the CachingScheduler after Ocata will not have allocations in Placement. + The new CLI allows operators using the CachingScheduler to find all + instances in all cells which do not have allocations in Placement and + create those allocations. The CLI will skip any instances that are + undergoing a task state transition, so ideally this would be run when + the API is down but it can be run, if necessary, while the API is up. + For more details on CLI usage, see the man page entry: + + https://docs.openstack.org/nova/latest/cli/nova-manage.html#placement