Allow scheduling only to enabled cells (Filter Scheduler)

This by default would be scheduling to all cells since all cells
would be enabled at the time of creation unless specified otherwise.

Since the list of enabled cells are stored as a global cache on the
host_manager, a reset() handler for the SIGHUP signal has also been
added in the scheduler. Hence upon every create-cell/enable-cell/
disable-cell operation the scheduler has to be signaled so that the
cache is refreshed.

Co-Authored-By: Dan Smith <dms@danplanet.com>

Implements blueprint cell-disable

Change-Id: I6a9007d172b55238d02da8046311f8dc954703c5
This commit is contained in:
Surya Seetharaman 2018-03-07 17:32:42 +01:00 committed by Matt Riedemann
parent 378bef70db
commit ba083b0c98
6 changed files with 102 additions and 29 deletions

View File

@ -340,7 +340,7 @@ class HostManager(object):
return HostState(host, node, cell) return HostState(host, node, cell)
def __init__(self): def __init__(self):
self.cells = None self.refresh_cells_caches()
self.filter_handler = filters.HostFilterHandler() self.filter_handler = filters.HostFilterHandler()
filter_classes = self.filter_handler.get_matching_classes( filter_classes = self.filter_handler.get_matching_classes(
CONF.filter_scheduler.available_filters) CONF.filter_scheduler.available_filters)
@ -417,7 +417,6 @@ class HostManager(object):
def _async_init_instance_info(computes_by_cell): def _async_init_instance_info(computes_by_cell):
context = context_module.RequestContext() context = context_module.RequestContext()
self._load_cells(context)
LOG.debug("START:_async_init_instance_info") LOG.debug("START:_async_init_instance_info")
self._instance_info = {} self._instance_info = {}
@ -633,25 +632,41 @@ class HostManager(object):
include_disabled=True)}) include_disabled=True)})
return compute_nodes, services return compute_nodes, services
def _load_cells(self, context): def refresh_cells_caches(self):
if not self.cells: # NOTE(tssurya): This function is called from the scheduler manager's
temp_cells = objects.CellMappingList.get_all(context) # reset signal handler and also upon startup of the scheduler.
# NOTE(tssurya): filtering cell0 from the list since it need context = context_module.RequestContext()
# not be considered for scheduling. temp_cells = objects.CellMappingList.get_all(context)
for c in temp_cells: # NOTE(tssurya): filtering cell0 from the list since it need
if c.is_cell0(): # not be considered for scheduling.
temp_cells.objects.remove(c) for c in temp_cells:
# once its done break for optimization if c.is_cell0():
break temp_cells.objects.remove(c)
# NOTE(danms): global list of cells cached forever right now # once its done break for optimization
self.cells = temp_cells break
LOG.debug('Found %(count)i cells: %(cells)s', # NOTE(danms, tssurya): global list of cells cached which
{'count': len(self.cells), # will be refreshed every time a SIGHUP is sent to the scheduler.
'cells': ', '.join([c.uuid for c in self.cells])}) self.cells = temp_cells
LOG.debug('Found %(count)i cells: %(cells)s',
{'count': len(self.cells),
'cells': ', '.join([c.uuid for c in self.cells])})
# NOTE(tssurya): Global cache of only the enabled cells. This way
# scheduling is limited only to the enabled cells. However this
# cache will be refreshed every time a cell is disabled or enabled
# or when a new cell is created as long as a SIGHUP signal is sent
# to the scheduler.
self.enabled_cells = [c for c in self.cells if not c.disabled]
# Filtering the disabled cells only for logging purposes.
disabled_cells = [c for c in self.cells if c.disabled]
LOG.debug('Found %(count)i disabled cells: %(cells)s',
{'count': len(disabled_cells),
'cells': ', '.join(
[c.identity for c in disabled_cells])})
def get_host_states_by_uuids(self, context, compute_uuids, spec_obj): def get_host_states_by_uuids(self, context, compute_uuids, spec_obj):
self._load_cells(context) if not self.cells:
LOG.warning("No cells were found")
if (spec_obj and 'requested_destination' in spec_obj and if (spec_obj and 'requested_destination' in spec_obj and
spec_obj.requested_destination and spec_obj.requested_destination and
'cell' in spec_obj.requested_destination): 'cell' in spec_obj.requested_destination):
@ -662,7 +677,7 @@ class HostManager(object):
if only_cell: if only_cell:
cells = [only_cell] cells = [only_cell]
else: else:
cells = self.cells cells = self.enabled_cells
compute_nodes, services = self._get_computes_for_cells( compute_nodes, services = self._get_computes_for_cells(
context, cells, compute_uuids=compute_uuids) context, cells, compute_uuids=compute_uuids)
@ -673,7 +688,6 @@ class HostManager(object):
the HostManager knows about. Also, each of the consumable resources the HostManager knows about. Also, each of the consumable resources
in HostState are pre-populated and adjusted based on data in the db. in HostState are pre-populated and adjusted based on data in the db.
""" """
self._load_cells(context)
compute_nodes, services = self._get_computes_for_cells(context, compute_nodes, services = self._get_computes_for_cells(context,
self.cells) self.cells)
return self._get_host_states(context, compute_nodes, services) return self._get_host_states(context, compute_nodes, services)

View File

@ -117,7 +117,6 @@ class IronicHostManager(host_manager.HostManager):
def _init_instance_info(self, computes_by_cell=None): def _init_instance_info(self, computes_by_cell=None):
"""Ironic hosts should not pass instance info.""" """Ironic hosts should not pass instance info."""
context = context_module.RequestContext() context = context_module.RequestContext()
self._load_cells(context)
if not computes_by_cell: if not computes_by_cell:
computes_by_cell = {} computes_by_cell = {}
for cell in self.cells: for cell in self.cells:

View File

@ -81,6 +81,13 @@ class SchedulerManager(manager.Manager):
def _run_periodic_tasks(self, context): def _run_periodic_tasks(self, context):
self.driver.run_periodic_tasks(context) self.driver.run_periodic_tasks(context)
def reset(self):
# NOTE(tssurya): This is a SIGHUP handler which will reset the cells
# and enabled cells caches in the host manager. So every time an
# existing cell is disabled or enabled or a new cell is created, a
# SIGHUP signal has to be sent to the scheduler for proper scheduling.
self.driver.host_manager.refresh_cells_caches()
@messaging.expected_exceptions(exception.NoValidHost) @messaging.expected_exceptions(exception.NoValidHost)
def select_destinations(self, ctxt, request_spec=None, def select_destinations(self, ctxt, request_spec=None,
filter_properties=None, spec_obj=_sentinel, instance_uuids=None, filter_properties=None, spec_obj=_sentinel, instance_uuids=None,

View File

@ -78,13 +78,44 @@ class HostManagerTestCase(test.NoDBTestCase):
filters = self.host_manager._load_filters() filters = self.host_manager._load_filters()
self.assertEqual(filters, ['FakeFilterClass1']) self.assertEqual(filters, ['FakeFilterClass1'])
def test_load_cells_except_cell0(self): def test_refresh_cells_caches(self):
ctxt = nova_context.RequestContext('fake-user', 'fake_project') ctxt = nova_context.RequestContext('fake', 'fake')
self.assertIsNone(self.host_manager.cells)
self.host_manager._load_cells(ctxt)
# Loading the non-cell0 mapping from the base test class. # Loading the non-cell0 mapping from the base test class.
self.assertEqual(1, len(self.host_manager.enabled_cells))
self.assertEqual(1, len(self.host_manager.cells)) self.assertEqual(1, len(self.host_manager.cells))
self.host_manager.cells = None # Creating cell mappings for mocking the list of cell_mappings obtained
# so that the refreshing mechanism can be properly tested. This will in
# turn ignore the loaded cell mapping from the base test case setup.
cell_uuid1 = uuids.cell1
cell_mapping1 = objects.CellMapping(context=ctxt,
uuid=cell_uuid1,
database_connection='fake:///db1',
transport_url='fake:///mq1',
disabled=False)
cell_uuid2 = uuids.cell2
cell_mapping2 = objects.CellMapping(context=ctxt,
uuid=cell_uuid2,
database_connection='fake:///db2',
transport_url='fake:///mq2',
disabled=True)
cell_uuid3 = uuids.cell3
cell_mapping3 = objects.CellMapping(context=ctxt,
uuid=cell_uuid3,
database_connection='fake:///db3',
transport_url='fake:///mq3',
disabled=False)
cells = [cell_mapping1, cell_mapping2, cell_mapping3]
with mock.patch('nova.objects.CellMappingList.get_all',
return_value=cells) as mock_cm:
self.host_manager.refresh_cells_caches()
mock_cm.assert_called_once()
self.assertEqual(2, len(self.host_manager.enabled_cells))
self.assertEqual(cell_uuid3, self.host_manager.enabled_cells[1].uuid)
self.assertEqual(3, len(self.host_manager.cells))
self.assertEqual(cell_uuid2, self.host_manager.cells[1].uuid)
def test_refresh_cells_caches_except_cell0(self):
ctxt = nova_context.RequestContext('fake-user', 'fake_project')
cell_uuid0 = objects.CellMapping.CELL0_UUID cell_uuid0 = objects.CellMapping.CELL0_UUID
cell_mapping0 = objects.CellMapping(context=ctxt, cell_mapping0 = objects.CellMapping(context=ctxt,
uuid=cell_uuid0, uuid=cell_uuid0,
@ -92,11 +123,11 @@ class HostManagerTestCase(test.NoDBTestCase):
transport_url='fake:///mq1') transport_url='fake:///mq1')
cells = objects.CellMappingList(cell_mapping0) cells = objects.CellMappingList(cell_mapping0)
# Mocking the return value of get_all cell_mappings to return only # Mocking the return value of get_all cell_mappings to return only
# the cell0 mapping to check if load_cells() filters it or not. # the cell0 mapping to check if its filtered or not.
with mock.patch('nova.objects.CellMappingList.get_all', with mock.patch('nova.objects.CellMappingList.get_all',
return_value=cells) as mock_cm: return_value=cells) as mock_cm:
self.host_manager._load_cells(ctxt) self.host_manager.refresh_cells_caches()
mock_cm.assert_called_once_with(ctxt) mock_cm.assert_called_once()
self.assertEqual(0, len(self.host_manager.cells)) self.assertEqual(0, len(self.host_manager.cells))
@mock.patch.object(nova.objects.InstanceList, 'get_by_filters') @mock.patch.object(nova.objects.InstanceList, 'get_by_filters')

View File

@ -321,6 +321,12 @@ class SchedulerManagerTestCase(test.NoDBTestCase):
mock.sentinel.host_name, mock.sentinel.host_name,
mock.sentinel.instance_uuids) mock.sentinel.instance_uuids)
def test_reset(self):
with mock.patch.object(self.manager.driver.host_manager,
'refresh_cells_caches') as mock_refresh:
self.manager.reset()
mock_refresh.assert_called_once_with()
@mock.patch('nova.objects.host_mapping.discover_hosts') @mock.patch('nova.objects.host_mapping.discover_hosts')
def test_discover_hosts(self, mock_discover): def test_discover_hosts(self, mock_discover):
cm1 = objects.CellMapping(name='cell1') cm1 = objects.CellMapping(name='cell1')

View File

@ -0,0 +1,16 @@
---
features:
- |
Support for filtering out disabled cells during scheduling for server
create requests has been added. Firstly the concept of disabled
cells has been introduced which means such disabled cells will not be
candidates for the scheduler. Secondly changes have been made to the filter
scheduler to ensure that it chooses only the enabled cells for scheduling
and filters out the disabled ones. Note that operations on existing
instances already inside a disabled cell like move operations will not be
blocked.
upgrade:
- |
If the scheduler service is started before the cell mappings are created or
setup, nova-scheduler needs to be restarted or SIGHUP-ed for the newly
added cells to get registered in the scheduler cache.