diff --git a/nova/compute/instance_list.py b/nova/compute/instance_list.py index b0f0b93b7cc7..1363ffa39555 100644 --- a/nova/compute/instance_list.py +++ b/nova/compute/instance_list.py @@ -98,6 +98,37 @@ def get_instances_sorted(ctx, filters, limit, marker, columns_to_join, ctx, filters, limit, marker, columns_to_join=columns_to_join) +def get_instance_list_cells_batch_size(limit, cells): + """Calculate the proper batch size for a list request. + + This will consider config, request limit, and cells being queried and + return an appropriate batch size to use for querying said cells. + + :param limit: The overall limit specified in the request + :param cells: The list of CellMapping objects being queried + :returns: An integer batch size + """ + strategy = CONF.api.instance_list_cells_batch_strategy + limit = limit or CONF.api.max_limit + + if len(cells) <= 1: + # If we're limited to one (or no) cell for whatever reason, do + # not do any batching and just pull the desired limit from the + # single cell in one shot. + return limit + + if strategy == 'fixed': + # Fixed strategy, always a static batch size + batch_size = CONF.api.instance_list_cells_batch_fixed_size + elif strategy == 'distributed': + # Distributed strategy, 10% more than even partitioning + batch_size = int((limit / len(cells)) * 1.10) + + # We never query a larger batch than the total requested, and never + # smaller than the lower limit of 100. + return max(min(batch_size, limit), 100) + + def get_instance_objects_sorted(ctx, filters, limit, marker, expected_attrs, sort_keys, sort_dirs): """Same as above, but return an InstanceList.""" @@ -115,11 +146,14 @@ def get_instance_objects_sorted(ctx, filters, limit, marker, expected_attrs, context.load_cells() cell_mappings = context.CELLS + batch_size = get_instance_list_cells_batch_size(limit, cell_mappings) + columns_to_join = instance_obj._expected_cols(expected_attrs) instance_generator = get_instances_sorted(ctx, filters, limit, marker, columns_to_join, sort_keys, sort_dirs, - cell_mappings=cell_mappings) + cell_mappings=cell_mappings, + batch_size=batch_size) if 'fault' in expected_attrs: # We join fault above, so we need to make sure we don't ask diff --git a/nova/conf/api.py b/nova/conf/api.py index 045d3e9f967b..e542f2b25d43 100644 --- a/nova/conf/api.py +++ b/nova/conf/api.py @@ -265,6 +265,61 @@ to provide the result. If you have a small number of cells, or tenants are likely to have instances in all cells, then this should be False. If you have many cells, especially if you confine tenants to a small subset of those cells, this should be True. +"""), + cfg.StrOpt("instance_list_cells_batch_strategy", + choices=("fixed", "distributed"), + default="distributed", + help=""" +This controls the method by which the API queries cell databases in +smaller batches during large instance list operations. If batching is +performed, a large instance list operation will request some fraction +of the overall API limit from each cell database initially, and will +re-request that same batch size as records are consumed (returned) +from each cell as necessary. Larger batches mean less chattiness +between the API and the database, but potentially more wasted effort +processing the results from the database which will not be returned to +the user. Any strategy will yield a batch size of at least 100 records, +to avoid a user causing many tiny database queries in their request. + +``distributed`` (the default) will attempt to divide the limit +requested by the user by the number of cells in the system. This +requires counting the cells in the system initially, which will not be +refreshed until service restart or SIGHUP. The actual batch size will +be increased by 10% over the result of ($limit / $num_cells). + +``fixed`` will simply request fixed-size batches from each cell, as +defined by ``instance_list_cells_batch_fixed_size``. If the limit is +smaller than the batch size, the limit will be used instead. If you do +not wish batching to be used at all, setting the fixed size equal to +the ``max_limit`` value will cause only one request per cell database +to be issued. + +Possible values: + +* distributed (default) +* fixed + +Related options: + +* instance_list_cells_batch_fixed_size +* max_limit +"""), + cfg.IntOpt("instance_list_cells_batch_fixed_size", + min=100, + default=100, + help=""" +This controls the batch size of instances requested from each cell +database if ``instance_list_cells_batch_strategy``` is set to ``fixed``. +This integral value will define the limit issued to each cell every time +a batch of instances is requested, regardless of the number of cells in +the system or any other factors. Per the general logic called out in +the documentation for ``instance_list_cells_batch_strategy``, the +minimum value for this is 100 records per batch. + +Related options: + +* instance_list_cells_batch_strategy +* max_limit """), ] diff --git a/nova/tests/unit/compute/test_instance_list.py b/nova/tests/unit/compute/test_instance_list.py index 2c2e86ca2dd6..e5422c274305 100644 --- a/nova/tests/unit/compute/test_instance_list.py +++ b/nova/tests/unit/compute/test_instance_list.py @@ -46,6 +46,8 @@ class TestInstanceList(test.NoDBTestCase): self.context = nova_context.RequestContext() self.useFixture(fixtures.SpawnIsSynchronousFixture()) + self.flags(instance_list_cells_batch_strategy='fixed', group='api') + def test_compare_simple_instance_quirks(self): # Ensure uuid,asc is added ctx = instance_list.InstanceSortContext(['key0'], ['asc']) @@ -94,7 +96,8 @@ class TestInstanceList(test.NoDBTestCase): user_context, {}, None, None, [], None, None) mock_gi.assert_called_once_with(user_context, {}, None, None, [], None, None, - cell_mappings=mock_cm.return_value) + cell_mappings=mock_cm.return_value, + batch_size=1000) @mock.patch('nova.context.CELLS', new=FAKE_CELLS) @mock.patch('nova.context.load_cells') @@ -110,7 +113,8 @@ class TestInstanceList(test.NoDBTestCase): admin_context, {}, None, None, [], None, None) mock_gi.assert_called_once_with(admin_context, {}, None, None, [], None, None, - cell_mappings=FAKE_CELLS) + cell_mappings=FAKE_CELLS, + batch_size=100) mock_cm.assert_not_called() mock_lc.assert_called_once_with() @@ -128,7 +132,8 @@ class TestInstanceList(test.NoDBTestCase): user_context, {}, None, None, [], None, None) mock_gi.assert_called_once_with(user_context, {}, None, None, [], None, None, - cell_mappings=FAKE_CELLS) + cell_mappings=FAKE_CELLS, + batch_size=100) mock_lc.assert_called_once_with() @mock.patch('nova.context.CELLS', new=FAKE_CELLS) @@ -147,7 +152,8 @@ class TestInstanceList(test.NoDBTestCase): admin_context, {}, None, None, [], None, None) mock_gi.assert_called_once_with(admin_context, {}, None, None, [], None, None, - cell_mappings=FAKE_CELLS) + cell_mappings=FAKE_CELLS, + batch_size=100) mock_cm.assert_not_called() mock_lc.assert_called_once_with() @@ -177,3 +183,121 @@ class TestInstanceList(test.NoDBTestCase): # return the results from the up cell, ignoring the down cell. self.assertEqual(uuid_initial, uuid_final) + + def test_batch_size_fixed(self): + fixed_size = 200 + self.flags(instance_list_cells_batch_strategy='fixed', group='api') + self.flags(instance_list_cells_batch_fixed_size=fixed_size, + group='api') + + # We call the batch size calculator with various arguments, including + # lists of cells which are just counted, so the cardinality is all that + # matters. + + # One cell, so batch at $limit + ret = instance_list.get_instance_list_cells_batch_size( + 1000, [mock.sentinel.cell1]) + self.assertEqual(1000, ret) + + # Two cells, so batch at $fixed_size + ret = instance_list.get_instance_list_cells_batch_size( + 1000, [mock.sentinel.cell1, mock.sentinel.cell2]) + + self.assertEqual(fixed_size, ret) + + # Four cells, so batch at $fixed_size + ret = instance_list.get_instance_list_cells_batch_size( + 1000, [mock.sentinel.cell1, mock.sentinel.cell2, + mock.sentinel.cell3, mock.sentinel.cell4]) + self.assertEqual(fixed_size, ret) + + # Three cells, tiny limit, so batch at lower threshold + ret = instance_list.get_instance_list_cells_batch_size( + 10, [mock.sentinel.cell1, + mock.sentinel.cell2, + mock.sentinel.cell3]) + self.assertEqual(100, ret) + + # Three cells, limit above floor, so batch at limit + ret = instance_list.get_instance_list_cells_batch_size( + 110, [mock.sentinel.cell1, + mock.sentinel.cell2, + mock.sentinel.cell3]) + self.assertEqual(110, ret) + + def test_batch_size_distributed(self): + self.flags(instance_list_cells_batch_strategy='distributed', + group='api') + + # One cell, so batch at $limit + ret = instance_list.get_instance_list_cells_batch_size(1000, [1]) + self.assertEqual(1000, ret) + + # Two cells so batch at ($limit/2)+10% + ret = instance_list.get_instance_list_cells_batch_size(1000, [1, 2]) + self.assertEqual(550, ret) + + # Four cells so batch at ($limit/4)+10% + ret = instance_list.get_instance_list_cells_batch_size(1000, [1, 2, + 3, 4]) + self.assertEqual(275, ret) + + # Three cells, tiny limit, so batch at lower threshold + ret = instance_list.get_instance_list_cells_batch_size(10, [1, 2, 3]) + self.assertEqual(100, ret) + + # Three cells, small limit, so batch at lower threshold + ret = instance_list.get_instance_list_cells_batch_size(110, [1, 2, 3]) + self.assertEqual(100, ret) + + # No cells, so batch at $limit + ret = instance_list.get_instance_list_cells_batch_size(1000, []) + self.assertEqual(1000, ret) + + +class TestInstanceListBig(test.NoDBTestCase): + def setUp(self): + super(TestInstanceListBig, self).setUp() + + cells = [objects.CellMapping(uuid=getattr(uuids, 'cell%i' % i), + name='cell%i' % i, + transport_url='fake:///', + database_connection='fake://') + for i in range(0, 3)] + + insts = list([ + dict( + uuid=getattr(uuids, 'inst%i' % i), + hostname='inst%i' % i) + for i in range(0, 100)]) + + self.cells = cells + self.insts = insts + self.context = nova_context.RequestContext() + self.useFixture(fixtures.SpawnIsSynchronousFixture()) + + @mock.patch('nova.db.api.instance_get_all_by_filters_sort') + @mock.patch('nova.objects.CellMappingList.get_all') + def test_get_instances_batched(self, mock_cells, mock_inst): + mock_cells.return_value = self.cells + + def fake_get_insts(ctx, filters, limit, *a, **k): + for i in range(0, limit): + yield self.insts.pop() + + mock_inst.side_effect = fake_get_insts + insts = instance_list.get_instances_sorted(self.context, {}, + 50, None, + [], ['hostname'], ['desc'], + batch_size=10) + + # Make sure we returned exactly how many were requested + insts = list(insts) + self.assertEqual(50, len(insts)) + + # Since the instances are all uniform, we should have a + # predictable number of queries to the database. 5 queries + # would get us 50 results, plus one more gets triggered by the + # sort to fill the buffer for the first cell feeder that runs + # dry. + self.assertEqual(6, mock_inst.call_count) diff --git a/releasenotes/notes/instance-list-batching-45f90a8b13eef512.yaml b/releasenotes/notes/instance-list-batching-45f90a8b13eef512.yaml new file mode 100644 index 000000000000..6d392b61d0e1 --- /dev/null +++ b/releasenotes/notes/instance-list-batching-45f90a8b13eef512.yaml @@ -0,0 +1,11 @@ +--- +features: + - | + Instance list operations across cells are now made more efficient by batching queries + as a fraction of the total limit for a request. Before this, an instance list with a + limit of 1000 records (the default) would generate queries to each cell with that + limit, and potentially process/sort/merge $num_cells*$limit records, despite only + returning $limit to the user. The strategy can now be controlled via + ``[api]/instance_list_cells_batch_strategy`` and related options to either use + fixed batch sizes, or a fractional value that scales with the number of configured + cells. \ No newline at end of file