Browse Source

Merge "Cache host to cell mapping in HostManager" into stable/stein

changes/19/679519/1
Zuul 3 weeks ago
parent
commit
307acab3ab

+ 4
- 0
doc/source/cli/nova-manage.rst View File

@@ -327,6 +327,10 @@ Nova Cells v2
327 327
     found, 3 if a host with that name is not in a cell with that uuid, 4 if
328 328
     a host with that name has instances (host not empty).
329 329
 
330
+    .. note::
331
+
332
+      The scheduler caches host-to-cell mapping information so when deleting
333
+      a host the scheduler may need to be restarted or sent the SIGHUP signal.
330 334
 
331 335
 Placement
332 336
 ~~~~~~~~~

+ 4
- 0
nova/cmd/manage.py View File

@@ -1765,6 +1765,10 @@ class CellV2Commands(object):
1765 1765
         * The host has instances.
1766 1766
 
1767 1767
         Returns 0 if the host is deleted successfully.
1768
+
1769
+        NOTE: The scheduler caches host-to-cell mapping information so when
1770
+        deleting a host the scheduler may need to be restarted or sent the
1771
+        SIGHUP signal.
1768 1772
         """
1769 1773
         ctxt = context.get_admin_context()
1770 1774
         # Find the CellMapping given the uuid.

+ 37
- 2
nova/scheduler/host_manager.py View File

@@ -675,6 +675,10 @@ class HostManager(object):
675 675
                        'cells': ', '.join(
676 676
                        [c.identity for c in disabled_cells])})
677 677
 
678
+        # Dict, keyed by host name, to cell UUID to be used to look up the
679
+        # cell a particular host is in (used with self.cells).
680
+        self.host_to_cell_uuid = {}
681
+
678 682
     def get_host_states_by_uuids(self, context, compute_uuids, spec_obj):
679 683
 
680 684
         if not self.cells:
@@ -738,9 +742,40 @@ class HostManager(object):
738 742
         return [self.aggs_by_id[agg_id] for agg_id in
739 743
                 self.host_aggregates_map[host]]
740 744
 
745
+    def _get_cell_mapping_for_host(self, context, host_name):
746
+        """Finds the CellMapping for a particular host name
747
+
748
+        Relies on a cache to quickly fetch the CellMapping if we have looked
749
+        up this host before, otherwise gets the CellMapping via the
750
+        HostMapping record for the given host name.
751
+
752
+        :param context: nova auth request context
753
+        :param host_name: compute service host name
754
+        :returns: CellMapping object
755
+        :raises: HostMappingNotFound if the host is not mapped to a cell
756
+        """
757
+        # Check to see if we have the host in our cache.
758
+        if host_name in self.host_to_cell_uuid:
759
+            cell_uuid = self.host_to_cell_uuid[host_name]
760
+            if cell_uuid in self.cells:
761
+                return self.cells[cell_uuid]
762
+            # Something is wrong so log a warning and just fall through to
763
+            # lookup the HostMapping.
764
+            LOG.warning('Host %s is expected to be in cell %s but that cell '
765
+                        'uuid was not found in our cache. The service may '
766
+                        'need to be restarted to refresh the cache.',
767
+                        host_name, cell_uuid)
768
+
769
+        # We have not cached this host yet so get the HostMapping, cache the
770
+        # result and return the CellMapping.
771
+        hm = objects.HostMapping.get_by_host(context, host_name)
772
+        cell_mapping = hm.cell_mapping
773
+        self.host_to_cell_uuid[host_name] = cell_mapping.uuid
774
+        return cell_mapping
775
+
741 776
     def _get_instances_by_host(self, context, host_name):
742 777
         try:
743
-            hm = objects.HostMapping.get_by_host(context, host_name)
778
+            cm = self._get_cell_mapping_for_host(context, host_name)
744 779
         except exception.HostMappingNotFound:
745 780
             # It's possible to hit this when the compute service first starts
746 781
             # up and casts to update_instance_info with an empty list but
@@ -748,7 +783,7 @@ class HostManager(object):
748 783
             LOG.info('Host mapping not found for host %s. Not tracking '
749 784
                      'instance info for this host.', host_name)
750 785
             return {}
751
-        with context_module.target_cell(context, hm.cell_mapping) as cctxt:
786
+        with context_module.target_cell(context, cm) as cctxt:
752 787
             uuids = objects.InstanceList.get_uuids_by_host(cctxt, host_name)
753 788
             # Putting the context in the otherwise fake Instance object at
754 789
             # least allows out of tree filters to lazy-load fields.

+ 3
- 0
nova/scheduler/manager.py View File

@@ -85,6 +85,9 @@ class SchedulerManager(manager.Manager):
85 85
         # and enabled cells caches in the host manager. So every time an
86 86
         # existing cell is disabled or enabled or a new cell is created, a
87 87
         # SIGHUP signal has to be sent to the scheduler for proper scheduling.
88
+        # NOTE(mriedem): Similarly there is a host-to-cell cache which should
89
+        # be reset if a host is deleted from a cell and "discovered" in another
90
+        # cell.
88 91
         self.driver.host_manager.refresh_cells_caches()
89 92
 
90 93
     @messaging.expected_exceptions(exception.NoValidHost)

+ 44
- 0
nova/tests/unit/scheduler/test_host_manager.py View File

@@ -104,6 +104,8 @@ class HostManagerTestCase(test.NoDBTestCase):
104 104
                                             transport_url='fake:///mq3',
105 105
                                             disabled=False)
106 106
         cells = [cell_mapping1, cell_mapping2, cell_mapping3]
107
+        # Throw a random host-to-cell in that cache to make sure it gets reset.
108
+        self.host_manager.host_to_cell_uuid['fake-host'] = cell_uuid1
107 109
         with mock.patch('nova.objects.CellMappingList.get_all',
108 110
                         return_value=cells) as mock_cm:
109 111
             self.host_manager.refresh_cells_caches()
@@ -114,6 +116,8 @@ class HostManagerTestCase(test.NoDBTestCase):
114 116
         # But it is still in the full list.
115 117
         self.assertEqual(3, len(self.host_manager.cells))
116 118
         self.assertIn(cell_uuid2, self.host_manager.cells)
119
+        # The host_to_cell_uuid cache should have been reset.
120
+        self.assertEqual({}, self.host_manager.host_to_cell_uuid)
117 121
 
118 122
     def test_refresh_cells_caches_except_cell0(self):
119 123
         ctxt = nova_context.RequestContext('fake-user', 'fake_project')
@@ -131,6 +135,46 @@ class HostManagerTestCase(test.NoDBTestCase):
131 135
             mock_cm.assert_called_once()
132 136
         self.assertEqual(0, len(self.host_manager.cells))
133 137
 
138
+    @mock.patch('nova.objects.HostMapping.get_by_host',
139
+                return_value=objects.HostMapping(
140
+                    cell_mapping=objects.CellMapping(uuid=uuids.cell1)))
141
+    def test_get_cell_mapping_for_host(self, mock_get_by_host):
142
+        # Starting with an empty cache, assert that the HostMapping is looked
143
+        # up and the result is cached.
144
+        ctxt = nova_context.get_admin_context()
145
+        host = 'fake-host'
146
+        self.assertEqual({}, self.host_manager.host_to_cell_uuid)
147
+        cm = self.host_manager._get_cell_mapping_for_host(ctxt, host)
148
+        self.assertIs(cm, mock_get_by_host.return_value.cell_mapping)
149
+        self.assertIn(host, self.host_manager.host_to_cell_uuid)
150
+        self.assertEqual(
151
+            uuids.cell1, self.host_manager.host_to_cell_uuid[host])
152
+        mock_get_by_host.assert_called_once_with(ctxt, host)
153
+
154
+        # Reset the mock and do it again, assert we do not query the DB.
155
+        mock_get_by_host.reset_mock()
156
+        self.host_manager._get_cell_mapping_for_host(ctxt, host)
157
+        mock_get_by_host.assert_not_called()
158
+
159
+        # Mix up the cache such that the host is mapped to a cell that
160
+        # is not in the cache which will make us query the DB. Also make the
161
+        # HostMapping query raise HostMappingNotFound to make sure that comes
162
+        # up to the caller.
163
+        mock_get_by_host.reset_mock()
164
+        self.host_manager.host_to_cell_uuid[host] = uuids.random_cell
165
+        mock_get_by_host.side_effect = exception.HostMappingNotFound(name=host)
166
+        with mock.patch('nova.scheduler.host_manager.LOG.warning') as warning:
167
+            self.assertRaises(exception.HostMappingNotFound,
168
+                              self.host_manager._get_cell_mapping_for_host,
169
+                              ctxt, host)
170
+            # We should have logged a warning because the host is cached to
171
+            # a cell uuid but that cell uuid is not in the cells cache.
172
+            warning.assert_called_once()
173
+            self.assertIn('Host %s is expected to be in cell %s',
174
+                          warning.call_args[0][0])
175
+            # And we should have also tried to lookup the HostMapping in the DB
176
+            mock_get_by_host.assert_called_once_with(ctxt, host)
177
+
134 178
     @mock.patch.object(nova.objects.InstanceList, 'get_by_filters')
135 179
     @mock.patch.object(nova.objects.ComputeNodeList, 'get_all')
136 180
     def test_init_instance_info_batches(self, mock_get_all,

Loading…
Cancel
Save