Merge "Introduce live_migration_claim()"

This commit is contained in:
Zuul 2019-09-11 20:43:10 +00:00 committed by Gerrit Code Review
commit 6592716cca
4 changed files with 207 additions and 14 deletions

View File

@ -58,8 +58,8 @@ class Claim(NopClaim):
"""
def __init__(self, context, instance, nodename, tracker, resources,
pci_requests, limits=None):
super(Claim, self).__init__()
pci_requests, migration=None, limits=None):
super(Claim, self).__init__(migration=migration)
# Stash a copy of the instance at the current point of time
self.instance = instance.obj_clone()
self.nodename = nodename
@ -160,7 +160,7 @@ class MoveClaim(Claim):
Move can be either a migrate/resize, live-migrate or an evacuate operation.
"""
def __init__(self, context, instance, nodename, instance_type, image_meta,
tracker, resources, pci_requests, limits=None):
tracker, resources, pci_requests, migration, limits=None):
self.context = context
self.instance_type = instance_type
if isinstance(image_meta, dict):
@ -168,8 +168,7 @@ class MoveClaim(Claim):
self.image_meta = image_meta
super(MoveClaim, self).__init__(context, instance, nodename, tracker,
resources, pci_requests,
limits=limits)
self.migration = None
migration=migration, limits=limits)
@property
def numa_topology(self):
@ -186,3 +185,64 @@ class MoveClaim(Claim):
self.instance, self.nodename,
instance_type=self.instance_type)
self.instance.drop_migration_context()
def _test_pci(self):
"""Test whether this host can accept this claim's PCI requests. For
live migration, only Neutron SRIOV PCI requests are supported. Any
other type of PCI device would need to be removed and re-added for live
migration to work, and there is currently no support for that. For cold
migration, all types of PCI requests are supported, so we just call up
to normal Claim's _test_pci().
"""
if self.migration.migration_type != 'live-migration':
return super(MoveClaim, self)._test_pci()
elif self._pci_requests.requests:
for pci_request in self._pci_requests.requests:
if (pci_request.source !=
objects.InstancePCIRequest.NEUTRON_PORT):
return (_('Non-VIF related PCI requests are not '
'supported for live migration.'))
# TODO(artom) At this point, once we've made sure we only have
# NEUTRON_PORT (aka SRIOV) PCI requests, we should check whether
# the host can support them, like Claim._test_pci() does. However,
# SRIOV live migration is currently being handled separately - see
# for example _claim_pci_for_instance_vifs() in the compute
# manager. So we do nothing here to avoid stepping on that code's
# toes, but ideally MoveClaim would be used for all live migration
# resource claims.
def _test_live_migration_page_size(self):
"""Tests that the current page size and the requested page size are the
same.
Must be called after _test_numa_topology() to make sure
self.claimed_numa_topology is set.
This only applies for live migrations when the hw:mem_page_size
extra spec has been set to a non-numeric value (like 'large'). That
would in theory allow an instance to live migrate from a host with a 1M
page size to a host with a 2M page size, for example. This is not
something we want to support, so fail the claim if the page sizes are
different.
"""
if (self.migration.migration_type == 'live-migration' and
self.instance.numa_topology and
# NOTE(artom) We only support a single page size across all
# cells, checking cell 0 is sufficient.
self.claimed_numa_topology.cells[0].pagesize !=
self.instance.numa_topology.cells[0].pagesize):
return (_('Requested page size is different from current '
'page size.'))
def _test_numa_topology(self, resources, limit):
"""Test whether this host can accept the instance's NUMA topology. The
_test methods return None on success, and a string-like Message _()
object explaining the reason on failure. So we call up to the normal
Claim's _test_numa_topology(), and if we get nothing back we test the
page size.
"""
numa_test_failure = super(MoveClaim,
self)._test_numa_topology(resources, limit)
if numa_test_failure:
return numa_test_failure
return self._test_live_migration_page_size()

View File

@ -243,6 +243,27 @@ class ResourceTracker(object):
migration, image_meta=image_meta,
limits=limits)
@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE)
def live_migration_claim(self, context, instance, nodename, migration,
limits):
"""Builds a MoveClaim for a live migration.
:param context: The request context.
:param instance: The instance being live migrated.
:param nodename: The nodename of the destination host.
:param migration: The Migration object associated with this live
migration.
:param limits: A SchedulerLimits object from when the scheduler
selected the destination host.
:returns: A MoveClaim for this live migration.
"""
# Flavor and image cannot change during a live migration.
instance_type = instance.flavor
image_meta = instance.image_meta
return self._move_claim(context, instance, instance_type, nodename,
migration, move_type='live-migration',
image_meta=image_meta, limits=limits)
def _move_claim(self, context, instance, new_instance_type, nodename,
migration, move_type=None, image_meta=None, limits=None):
"""Indicate that resources are needed for a move to this host.
@ -295,12 +316,17 @@ class ResourceTracker(object):
new_pci_requests.requests.append(request)
claim = claims.MoveClaim(context, instance, nodename,
new_instance_type, image_meta, self, cn,
new_pci_requests,
limits=limits)
new_pci_requests, migration, limits=limits)
claim.migration = migration
claimed_pci_devices_objs = []
if self.pci_tracker:
# TODO(artom) The second part of this condition should not be
# necessary, but since SRIOV live migration is currently handled
# elsewhere - see for example _claim_pci_for_instance_vifs() in the
# compute manager - we don't do any PCI claims if this is a live
# migration to avoid stepping on that code's toes. Ideally,
# MoveClaim/this method would be used for all live migration resource
# claims.
if self.pci_tracker and migration.migration_type != 'live-migration':
# NOTE(jaypipes): ComputeNode.pci_device_pools is set below
# in _update_usage_from_instance().
claimed_pci_devices_objs = self.pci_tracker.claim_instance(
@ -367,7 +393,11 @@ class ResourceTracker(object):
migration.dest_compute = self.host
migration.dest_node = nodename
migration.dest_host = self.driver.get_host_ip_addr()
migration.status = 'pre-migrating'
# NOTE(artom) Migration objects for live migrations are created with
# status 'accepted' by the conductor in live_migrate_instance() and do
# not have a 'pre-migrating' status.
if migration.migration_type != 'live-migration':
migration.status = 'pre-migrating'
migration.save()
def _set_instance_host_and_node(self, instance, nodename):

View File

@ -338,10 +338,10 @@ class MoveClaimTestCase(ClaimTestCase):
@mock.patch('nova.db.api.instance_extra_get_by_instance_uuid',
return_value=self.db_numa_topology)
def get_claim(mock_extra_get, mock_numa_get):
return claims.MoveClaim(self.context, self.instance, _NODENAME,
instance_type, image_meta, self.tracker,
self.resources, requests,
limits=limits)
return claims.MoveClaim(
self.context, self.instance, _NODENAME, instance_type,
image_meta, self.tracker, self.resources, requests,
objects.Migration(migration_type='migration'), limits=limits)
return get_claim()
@mock.patch('nova.objects.Instance.drop_migration_context')
@ -358,3 +358,69 @@ class MoveClaimTestCase(ClaimTestCase):
image_meta = objects.ImageMeta()
claim = self._claim(image_meta=image_meta)
self.assertIsInstance(claim.image_meta, objects.ImageMeta)
class LiveMigrationClaimTestCase(ClaimTestCase):
def test_live_migration_claim_bad_pci_request(self):
instance_type = self._fake_instance_type()
instance = self._fake_instance()
instance.numa_topology = None
self.assertRaisesRegex(
exception.ComputeResourcesUnavailable,
'PCI requests are not supported',
claims.MoveClaim, self.context, instance, _NODENAME, instance_type,
{}, self.tracker, self.resources,
objects.InstancePCIRequests(requests=[
objects.InstancePCIRequest(alias_name='fake-alias')]),
objects.Migration(migration_type='live-migration'), None)
def test_live_migration_page_size(self):
instance_type = self._fake_instance_type()
instance = self._fake_instance()
instance.numa_topology = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell(id=1, cpuset=set([1, 2]),
memory=512, pagesize=2)])
claimed_numa_topology = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell(id=1, cpuset=set([1, 2]),
memory=512, pagesize=1)])
with mock.patch('nova.virt.hardware.numa_fit_instance_to_host',
return_value=claimed_numa_topology):
self.assertRaisesRegex(
exception.ComputeResourcesUnavailable,
'Requested page size is different',
claims.MoveClaim, self.context, instance, _NODENAME,
instance_type, {}, self.tracker, self.resources,
self.empty_requests,
objects.Migration(migration_type='live-migration'), None)
def test_claim_fails_page_size_not_called(self):
instance_type = self._fake_instance_type()
instance = self._fake_instance()
# This topology cannot fit in self.resources (see _fake_resources())
numa_topology = objects.InstanceNUMATopology(
cells=[objects.InstanceNUMACell(id=1, cpuset=set([1, 2, 3]),
memory=1024)])
with test.nested(
mock.patch('nova.virt.hardware.numa_get_constraints',
return_value=numa_topology),
mock.patch(
'nova.compute.claims.MoveClaim._test_live_migration_page_size'
)) as (mock_test_numa, mock_test_page_size):
self.assertRaisesRegex(
exception.ComputeResourcesUnavailable,
'Requested instance NUMA topology',
claims.MoveClaim, self.context, instance, _NODENAME,
instance_type, {}, self.tracker, self.resources,
self.empty_requests,
objects.Migration(migration_type='live-migration'), None)
mock_test_page_size.assert_not_called()
def test_live_migration_no_instance_numa_topology(self):
instance_type = self._fake_instance_type()
instance = self._fake_instance()
instance.numa_topology = None
claims.MoveClaim(
self.context, instance, _NODENAME, instance_type, {}, self.tracker,
self.resources, self.empty_requests,
objects.Migration(migration_type='live-migration'), None)

View File

@ -38,6 +38,7 @@ from nova.objects import pci_device
from nova.pci import manager as pci_manager
from nova.scheduler.client import report
from nova import test
from nova.tests.unit import fake_instance
from nova.tests.unit import fake_notifier
from nova.tests.unit.objects import test_pci_device as fake_pci_device
from nova.virt import driver
@ -2824,6 +2825,42 @@ class TestRebuild(BaseTestCase):
inst_save_mock.assert_called_once_with()
class TestLiveMigration(BaseTestCase):
def test_live_migration_claim(self):
self._setup_rt()
self.rt.compute_nodes[_NODENAME] = _COMPUTE_NODE_FIXTURES[0]
ctxt = context.get_admin_context()
instance = fake_instance.fake_instance_obj(ctxt)
instance.pci_requests = None
instance.pci_devices = None
instance.numa_topology = None
migration = objects.Migration(id=42, migration_type='live-migration',
status='accepted')
image_meta = objects.ImageMeta(properties=objects.ImageMetaProps())
self.rt.pci_tracker = pci_manager.PciDevTracker(mock.sentinel.ctx)
with test.nested(
mock.patch.object(objects.ImageMeta, 'from_instance',
return_value=image_meta),
mock.patch.object(objects.Migration, 'save'),
mock.patch.object(objects.Instance, 'save'),
mock.patch.object(self.rt, '_update'),
mock.patch.object(self.rt.pci_tracker, 'claim_instance'),
) as (mock_from_instance, mock_migration_save, mock_instance_save,
mock_update, mock_pci_claim_instance):
claim = self.rt.live_migration_claim(ctxt, instance, _NODENAME,
migration, limits=None)
self.assertEqual(42, claim.migration.id)
# Check that we didn't set the status to 'pre-migrating', like we
# do for cold migrations, but which doesn't exist for live
# migrations.
self.assertEqual('accepted', claim.migration.status)
self.assertIn('migration_context', instance)
mock_update.assert_called_with(
mock.ANY, _COMPUTE_NODE_FIXTURES[0])
mock_pci_claim_instance.assert_not_called()
class TestUpdateUsageFromMigration(test.NoDBTestCase):
@mock.patch('nova.compute.resource_tracker.ResourceTracker.'
'_get_instance_type')