Browse Source

Merge "Introduce live_migration_claim()"

changes/93/671793/24
Zuul 1 week ago
parent
commit
6592716cca

+ 65
- 5
nova/compute/claims.py View File

@@ -58,8 +58,8 @@ class Claim(NopClaim):
58 58
     """
59 59
 
60 60
     def __init__(self, context, instance, nodename, tracker, resources,
61
-                 pci_requests, limits=None):
62
-        super(Claim, self).__init__()
61
+                 pci_requests, migration=None, limits=None):
62
+        super(Claim, self).__init__(migration=migration)
63 63
         # Stash a copy of the instance at the current point of time
64 64
         self.instance = instance.obj_clone()
65 65
         self.nodename = nodename
@@ -160,7 +160,7 @@ class MoveClaim(Claim):
160 160
     Move can be either a migrate/resize, live-migrate or an evacuate operation.
161 161
     """
162 162
     def __init__(self, context, instance, nodename, instance_type, image_meta,
163
-                 tracker, resources, pci_requests, limits=None):
163
+                 tracker, resources, pci_requests, migration, limits=None):
164 164
         self.context = context
165 165
         self.instance_type = instance_type
166 166
         if isinstance(image_meta, dict):
@@ -168,8 +168,7 @@ class MoveClaim(Claim):
168 168
         self.image_meta = image_meta
169 169
         super(MoveClaim, self).__init__(context, instance, nodename, tracker,
170 170
                                         resources, pci_requests,
171
-                                        limits=limits)
172
-        self.migration = None
171
+                                        migration=migration, limits=limits)
173 172
 
174 173
     @property
175 174
     def numa_topology(self):
@@ -186,3 +185,64 @@ class MoveClaim(Claim):
186 185
             self.instance, self.nodename,
187 186
             instance_type=self.instance_type)
188 187
         self.instance.drop_migration_context()
188
+
189
+    def _test_pci(self):
190
+        """Test whether this host can accept this claim's PCI requests. For
191
+        live migration, only Neutron SRIOV PCI requests are supported. Any
192
+        other type of PCI device would need to be removed and re-added for live
193
+        migration to work, and there is currently no support for that. For cold
194
+        migration, all types of PCI requests are supported, so we just call up
195
+        to normal Claim's _test_pci().
196
+        """
197
+        if self.migration.migration_type != 'live-migration':
198
+            return super(MoveClaim, self)._test_pci()
199
+        elif self._pci_requests.requests:
200
+            for pci_request in self._pci_requests.requests:
201
+                if (pci_request.source !=
202
+                        objects.InstancePCIRequest.NEUTRON_PORT):
203
+                    return (_('Non-VIF related PCI requests are not '
204
+                              'supported for live migration.'))
205
+            # TODO(artom) At this point, once we've made sure we only have
206
+            # NEUTRON_PORT (aka SRIOV) PCI requests, we should check whether
207
+            # the host can support them, like Claim._test_pci() does. However,
208
+            # SRIOV live migration is currently being handled separately - see
209
+            # for example _claim_pci_for_instance_vifs() in the compute
210
+            # manager. So we do nothing here to avoid stepping on that code's
211
+            # toes, but ideally MoveClaim would be used for all live migration
212
+            # resource claims.
213
+
214
+    def _test_live_migration_page_size(self):
215
+        """Tests that the current page size and the requested page size are the
216
+        same.
217
+
218
+        Must be called after _test_numa_topology() to make sure
219
+        self.claimed_numa_topology is set.
220
+
221
+        This only applies for live migrations when the hw:mem_page_size
222
+        extra spec has been set to a non-numeric value (like 'large'). That
223
+        would in theory allow an instance to live migrate from a host with a 1M
224
+        page size to a host with a 2M page size, for example. This is not
225
+        something we want to support, so fail the claim if the page sizes are
226
+        different.
227
+        """
228
+        if (self.migration.migration_type == 'live-migration' and
229
+                self.instance.numa_topology and
230
+                # NOTE(artom) We only support a single page size across all
231
+                # cells, checking cell 0 is sufficient.
232
+                self.claimed_numa_topology.cells[0].pagesize !=
233
+                self.instance.numa_topology.cells[0].pagesize):
234
+            return (_('Requested page size is different from current '
235
+                      'page size.'))
236
+
237
+    def _test_numa_topology(self, resources, limit):
238
+        """Test whether this host can accept the instance's NUMA topology. The
239
+        _test methods return None on success, and a string-like Message _()
240
+        object explaining the reason on failure. So we call up to the normal
241
+        Claim's _test_numa_topology(), and if we get nothing back we test the
242
+        page size.
243
+        """
244
+        numa_test_failure = super(MoveClaim,
245
+                                  self)._test_numa_topology(resources, limit)
246
+        if numa_test_failure:
247
+            return numa_test_failure
248
+        return self._test_live_migration_page_size()

+ 35
- 5
nova/compute/resource_tracker.py View File

@@ -243,6 +243,27 @@ class ResourceTracker(object):
243 243
                                 migration, image_meta=image_meta,
244 244
                                 limits=limits)
245 245
 
246
+    @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
247
+    def live_migration_claim(self, context, instance, nodename, migration,
248
+                             limits):
249
+        """Builds a MoveClaim for a live migration.
250
+
251
+        :param context: The request context.
252
+        :param instance: The instance being live migrated.
253
+        :param nodename: The nodename of the destination host.
254
+        :param migration: The Migration object associated with this live
255
+                          migration.
256
+        :param limits: A SchedulerLimits object from when the scheduler
257
+                       selected the destination host.
258
+        :returns: A MoveClaim for this live migration.
259
+        """
260
+        # Flavor and image cannot change during a live migration.
261
+        instance_type = instance.flavor
262
+        image_meta = instance.image_meta
263
+        return self._move_claim(context, instance, instance_type, nodename,
264
+                                migration, move_type='live-migration',
265
+                                image_meta=image_meta, limits=limits)
266
+
246 267
     def _move_claim(self, context, instance, new_instance_type, nodename,
247 268
                     migration, move_type=None, image_meta=None, limits=None):
248 269
         """Indicate that resources are needed for a move to this host.
@@ -295,12 +316,17 @@ class ResourceTracker(object):
295 316
                     new_pci_requests.requests.append(request)
296 317
         claim = claims.MoveClaim(context, instance, nodename,
297 318
                                  new_instance_type, image_meta, self, cn,
298
-                                 new_pci_requests,
299
-                                 limits=limits)
319
+                                 new_pci_requests, migration, limits=limits)
300 320
 
301
-        claim.migration = migration
302 321
         claimed_pci_devices_objs = []
303
-        if self.pci_tracker:
322
+        # TODO(artom) The second part of this condition should not be
323
+        # necessary, but since SRIOV live migration is currently handled
324
+        # elsewhere - see for example _claim_pci_for_instance_vifs() in the
325
+        # compute manager - we don't do any PCI claims if this is a live
326
+        # migration to avoid stepping on that code's toes. Ideally,
327
+        # MoveClaim/this method would be used for all live migration resource
328
+        # claims.
329
+        if self.pci_tracker and migration.migration_type != 'live-migration':
304 330
             # NOTE(jaypipes): ComputeNode.pci_device_pools is set below
305 331
             # in _update_usage_from_instance().
306 332
             claimed_pci_devices_objs = self.pci_tracker.claim_instance(
@@ -367,7 +393,11 @@ class ResourceTracker(object):
367 393
         migration.dest_compute = self.host
368 394
         migration.dest_node = nodename
369 395
         migration.dest_host = self.driver.get_host_ip_addr()
370
-        migration.status = 'pre-migrating'
396
+        # NOTE(artom) Migration objects for live migrations are created with
397
+        # status 'accepted' by the conductor in live_migrate_instance() and do
398
+        # not have a 'pre-migrating' status.
399
+        if migration.migration_type != 'live-migration':
400
+            migration.status = 'pre-migrating'
371 401
         migration.save()
372 402
 
373 403
     def _set_instance_host_and_node(self, instance, nodename):

+ 70
- 4
nova/tests/unit/compute/test_claims.py View File

@@ -338,10 +338,10 @@ class MoveClaimTestCase(ClaimTestCase):
338 338
         @mock.patch('nova.db.api.instance_extra_get_by_instance_uuid',
339 339
                     return_value=self.db_numa_topology)
340 340
         def get_claim(mock_extra_get, mock_numa_get):
341
-            return claims.MoveClaim(self.context, self.instance, _NODENAME,
342
-                                    instance_type, image_meta, self.tracker,
343
-                                    self.resources, requests,
344
-                                    limits=limits)
341
+            return claims.MoveClaim(
342
+                self.context, self.instance, _NODENAME, instance_type,
343
+                image_meta, self.tracker, self.resources, requests,
344
+                objects.Migration(migration_type='migration'), limits=limits)
345 345
         return get_claim()
346 346
 
347 347
     @mock.patch('nova.objects.Instance.drop_migration_context')
@@ -358,3 +358,69 @@ class MoveClaimTestCase(ClaimTestCase):
358 358
         image_meta = objects.ImageMeta()
359 359
         claim = self._claim(image_meta=image_meta)
360 360
         self.assertIsInstance(claim.image_meta, objects.ImageMeta)
361
+
362
+
363
+class LiveMigrationClaimTestCase(ClaimTestCase):
364
+
365
+    def test_live_migration_claim_bad_pci_request(self):
366
+        instance_type = self._fake_instance_type()
367
+        instance = self._fake_instance()
368
+        instance.numa_topology = None
369
+        self.assertRaisesRegex(
370
+            exception.ComputeResourcesUnavailable,
371
+            'PCI requests are not supported',
372
+            claims.MoveClaim, self.context, instance, _NODENAME, instance_type,
373
+            {}, self.tracker, self.resources,
374
+            objects.InstancePCIRequests(requests=[
375
+                objects.InstancePCIRequest(alias_name='fake-alias')]),
376
+            objects.Migration(migration_type='live-migration'), None)
377
+
378
+    def test_live_migration_page_size(self):
379
+        instance_type = self._fake_instance_type()
380
+        instance = self._fake_instance()
381
+        instance.numa_topology = objects.InstanceNUMATopology(
382
+            cells=[objects.InstanceNUMACell(id=1, cpuset=set([1, 2]),
383
+                                            memory=512, pagesize=2)])
384
+        claimed_numa_topology = objects.InstanceNUMATopology(
385
+            cells=[objects.InstanceNUMACell(id=1, cpuset=set([1, 2]),
386
+                                            memory=512, pagesize=1)])
387
+        with mock.patch('nova.virt.hardware.numa_fit_instance_to_host',
388
+                        return_value=claimed_numa_topology):
389
+            self.assertRaisesRegex(
390
+                exception.ComputeResourcesUnavailable,
391
+                'Requested page size is different',
392
+                claims.MoveClaim, self.context, instance, _NODENAME,
393
+                instance_type, {}, self.tracker, self.resources,
394
+                self.empty_requests,
395
+                objects.Migration(migration_type='live-migration'), None)
396
+
397
+    def test_claim_fails_page_size_not_called(self):
398
+        instance_type = self._fake_instance_type()
399
+        instance = self._fake_instance()
400
+        # This topology cannot fit in self.resources (see _fake_resources())
401
+        numa_topology = objects.InstanceNUMATopology(
402
+            cells=[objects.InstanceNUMACell(id=1, cpuset=set([1, 2, 3]),
403
+                                            memory=1024)])
404
+        with test.nested(
405
+            mock.patch('nova.virt.hardware.numa_get_constraints',
406
+                        return_value=numa_topology),
407
+            mock.patch(
408
+                'nova.compute.claims.MoveClaim._test_live_migration_page_size'
409
+        )) as (mock_test_numa, mock_test_page_size):
410
+            self.assertRaisesRegex(
411
+                exception.ComputeResourcesUnavailable,
412
+                'Requested instance NUMA topology',
413
+                claims.MoveClaim, self.context, instance, _NODENAME,
414
+                instance_type, {}, self.tracker, self.resources,
415
+                self.empty_requests,
416
+                objects.Migration(migration_type='live-migration'), None)
417
+            mock_test_page_size.assert_not_called()
418
+
419
+    def test_live_migration_no_instance_numa_topology(self):
420
+        instance_type = self._fake_instance_type()
421
+        instance = self._fake_instance()
422
+        instance.numa_topology = None
423
+        claims.MoveClaim(
424
+            self.context, instance, _NODENAME, instance_type, {}, self.tracker,
425
+            self.resources, self.empty_requests,
426
+            objects.Migration(migration_type='live-migration'), None)

+ 37
- 0
nova/tests/unit/compute/test_resource_tracker.py View File

@@ -38,6 +38,7 @@ from nova.objects import pci_device
38 38
 from nova.pci import manager as pci_manager
39 39
 from nova.scheduler.client import report
40 40
 from nova import test
41
+from nova.tests.unit import fake_instance
41 42
 from nova.tests.unit import fake_notifier
42 43
 from nova.tests.unit.objects import test_pci_device as fake_pci_device
43 44
 from nova.virt import driver
@@ -2824,6 +2825,42 @@ class TestRebuild(BaseTestCase):
2824 2825
         inst_save_mock.assert_called_once_with()
2825 2826
 
2826 2827
 
2828
+class TestLiveMigration(BaseTestCase):
2829
+
2830
+    def test_live_migration_claim(self):
2831
+        self._setup_rt()
2832
+        self.rt.compute_nodes[_NODENAME] = _COMPUTE_NODE_FIXTURES[0]
2833
+        ctxt = context.get_admin_context()
2834
+        instance = fake_instance.fake_instance_obj(ctxt)
2835
+        instance.pci_requests = None
2836
+        instance.pci_devices = None
2837
+        instance.numa_topology = None
2838
+        migration = objects.Migration(id=42, migration_type='live-migration',
2839
+                                      status='accepted')
2840
+        image_meta = objects.ImageMeta(properties=objects.ImageMetaProps())
2841
+        self.rt.pci_tracker = pci_manager.PciDevTracker(mock.sentinel.ctx)
2842
+        with test.nested(
2843
+            mock.patch.object(objects.ImageMeta, 'from_instance',
2844
+                              return_value=image_meta),
2845
+            mock.patch.object(objects.Migration, 'save'),
2846
+            mock.patch.object(objects.Instance, 'save'),
2847
+            mock.patch.object(self.rt, '_update'),
2848
+            mock.patch.object(self.rt.pci_tracker, 'claim_instance'),
2849
+        ) as (mock_from_instance, mock_migration_save, mock_instance_save,
2850
+              mock_update, mock_pci_claim_instance):
2851
+            claim = self.rt.live_migration_claim(ctxt, instance, _NODENAME,
2852
+                                                 migration, limits=None)
2853
+            self.assertEqual(42, claim.migration.id)
2854
+            # Check that we didn't set the status to 'pre-migrating', like we
2855
+            # do for cold migrations, but which doesn't exist for live
2856
+            # migrations.
2857
+            self.assertEqual('accepted', claim.migration.status)
2858
+            self.assertIn('migration_context', instance)
2859
+            mock_update.assert_called_with(
2860
+                mock.ANY, _COMPUTE_NODE_FIXTURES[0])
2861
+            mock_pci_claim_instance.assert_not_called()
2862
+
2863
+
2827 2864
 class TestUpdateUsageFromMigration(test.NoDBTestCase):
2828 2865
     @mock.patch('nova.compute.resource_tracker.ResourceTracker.'
2829 2866
                 '_get_instance_type')

Loading…
Cancel
Save