Merge "Cleanup allocations on invalid dest node during live migration" into stable/pike
This commit is contained in:
commit
46554875d2
|
@ -267,8 +267,9 @@ class LiveMigrationTask(base.TaskBase):
|
|||
self._check_not_over_max_retries(attempted_hosts)
|
||||
request_spec.ignore_hosts = attempted_hosts
|
||||
try:
|
||||
host = self.scheduler_client.select_destinations(self.context,
|
||||
request_spec, [self.instance.uuid])[0]['host']
|
||||
hoststate = self.scheduler_client.select_destinations(
|
||||
self.context, request_spec, [self.instance.uuid])[0]
|
||||
host = hoststate['host']
|
||||
except messaging.RemoteError as ex:
|
||||
# TODO(ShaoHe Feng) There maybe multi-scheduler, and the
|
||||
# scheduling algorithm is R-R, we can let other scheduler try.
|
||||
|
@ -284,9 +285,47 @@ class LiveMigrationTask(base.TaskBase):
|
|||
LOG.debug("Skipping host: %(host)s because: %(e)s",
|
||||
{"host": host, "e": e})
|
||||
attempted_hosts.append(host)
|
||||
# The scheduler would have created allocations against the
|
||||
# selected destination host in Placement, so we need to remove
|
||||
# those before moving on.
|
||||
self._remove_host_allocations(host, hoststate['nodename'])
|
||||
host = None
|
||||
return host
|
||||
|
||||
def _remove_host_allocations(self, host, node):
|
||||
"""Removes instance allocations against the given host from Placement
|
||||
|
||||
:param host: The name of the host.
|
||||
:param node: The name of the node.
|
||||
"""
|
||||
# Get the compute node object since we need the UUID.
|
||||
# TODO(mriedem): If the result of select_destinations eventually
|
||||
# returns the compute node uuid, we wouldn't need to look it
|
||||
# up via host/node and we can save some time.
|
||||
try:
|
||||
compute_node = objects.ComputeNode.get_by_host_and_nodename(
|
||||
self.context, host, node)
|
||||
except exception.ComputeHostNotFound:
|
||||
# This shouldn't happen, but we're being careful.
|
||||
LOG.info('Unable to remove instance allocations from host %s '
|
||||
'and node %s since it was not found.', host, node,
|
||||
instance=self.instance)
|
||||
return
|
||||
|
||||
# Calculate the resource class amounts to subtract from the allocations
|
||||
# on the node based on the instance flavor.
|
||||
resources = scheduler_utils.resources_from_flavor(
|
||||
self.instance, self.instance.flavor)
|
||||
|
||||
# Now remove the allocations for our instance against that node.
|
||||
# Note that this does not remove allocations against any other node
|
||||
# or shared resource provider, it's just undoing what the scheduler
|
||||
# allocated for the given (destination) node.
|
||||
self.scheduler_client.reportclient.\
|
||||
remove_provider_from_instance_allocation(
|
||||
self.instance.uuid, compute_node.uuid, self.instance.user_id,
|
||||
self.instance.project_id, resources)
|
||||
|
||||
def _check_not_over_max_retries(self, attempted_hosts):
|
||||
if CONF.migrate_max_retries == -1:
|
||||
return
|
||||
|
|
|
@ -1014,13 +1014,7 @@ class SchedulerReportClient(object):
|
|||
then PUTs the resulting allocation back to the placement API for the
|
||||
consumer.
|
||||
|
||||
We call this method on three occasions:
|
||||
|
||||
1. On the source host during a confirm_resize() operation.
|
||||
2. On the destination host during a revert_resize() operation.
|
||||
3. On the destination host when prep_resize fails.
|
||||
|
||||
This is important to reconcile the "doubled-up" allocation that the
|
||||
This is used to reconcile the "doubled-up" allocation that the
|
||||
scheduler constructs when claiming resources against the destination
|
||||
host during a move operation.
|
||||
|
||||
|
|
|
@ -2185,23 +2185,17 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
|
|||
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
|
||||
|
||||
dest_usages = self._get_provider_usages(dest_rp_uuid)
|
||||
# FIXME(mriedem): This is bug 1712411 where the allocations, created
|
||||
# by the scheduler, via conductor, are not cleaned up on the migration
|
||||
# pre-check error. Uncomment the following code when this is fixed.
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
|
||||
# # Assert the allocations, created by the scheduler, are cleaned up
|
||||
# # after the migration pre-check error happens.
|
||||
# self.assertFlavorMatchesAllocation(
|
||||
# {'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages)
|
||||
# Assert the allocations, created by the scheduler, are cleaned up
|
||||
# after the migration pre-check error happens.
|
||||
self.assertFlavorMatchesAllocation(
|
||||
{'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages)
|
||||
|
||||
allocations = self._get_allocations_by_server_uuid(server['id'])
|
||||
# FIXME(mriedem): After bug 1712411 is fixed there should only be 1
|
||||
# allocation for the instance, on the source node.
|
||||
self.assertEqual(2, len(allocations))
|
||||
# There should only be 1 allocation for the instance on the source node
|
||||
self.assertEqual(1, len(allocations))
|
||||
self.assertIn(source_rp_uuid, allocations)
|
||||
self.assertIn(dest_rp_uuid, allocations)
|
||||
dest_allocation = allocations[dest_rp_uuid]['resources']
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
|
||||
self.assertFlavorMatchesAllocation(
|
||||
self.flavor1, allocations[source_rp_uuid]['resources'])
|
||||
|
||||
self._delete_and_check_allocations(
|
||||
server, source_rp_uuid, dest_rp_uuid)
|
||||
|
|
|
@ -375,7 +375,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
|
|||
scheduler_utils.setup_instance_group(self.context, self.fake_spec)
|
||||
self.task.scheduler_client.select_destinations(self.context,
|
||||
self.fake_spec, [self.instance.uuid]).AndReturn(
|
||||
[{'host': 'host1'}])
|
||||
[{'host': 'host1', 'nodename': 'node1'}])
|
||||
self.task._check_compatible_with_source_hypervisor("host1")\
|
||||
.AndRaise(error)
|
||||
|
||||
|
@ -386,7 +386,11 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
|
|||
self.task._call_livem_checks_on_host("host2")
|
||||
|
||||
self.mox.ReplayAll()
|
||||
self.assertEqual("host2", self.task._find_destination())
|
||||
with mock.patch.object(self.task,
|
||||
'_remove_host_allocations') as remove_allocs:
|
||||
self.assertEqual("host2", self.task._find_destination())
|
||||
# Should have removed allocations for the first host.
|
||||
remove_allocs.assert_called_once_with('host1', 'node1')
|
||||
|
||||
def test_find_destination_retry_with_old_hypervisor(self):
|
||||
self._test_find_destination_retry_hypervisor_raises(
|
||||
|
@ -411,7 +415,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
|
|||
scheduler_utils.setup_instance_group(self.context, self.fake_spec)
|
||||
self.task.scheduler_client.select_destinations(self.context,
|
||||
self.fake_spec, [self.instance.uuid]).AndReturn(
|
||||
[{'host': 'host1'}])
|
||||
[{'host': 'host1', 'nodename': 'node1'}])
|
||||
self.task._check_compatible_with_source_hypervisor("host1")
|
||||
self.task._call_livem_checks_on_host("host1")\
|
||||
.AndRaise(exception.Invalid)
|
||||
|
@ -423,7 +427,11 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
|
|||
self.task._call_livem_checks_on_host("host2")
|
||||
|
||||
self.mox.ReplayAll()
|
||||
self.assertEqual("host2", self.task._find_destination())
|
||||
with mock.patch.object(self.task,
|
||||
'_remove_host_allocations') as remove_allocs:
|
||||
self.assertEqual("host2", self.task._find_destination())
|
||||
# Should have removed allocations for the first host.
|
||||
remove_allocs.assert_called_once_with('host1', 'node1')
|
||||
|
||||
def test_find_destination_retry_with_failed_migration_pre_checks(self):
|
||||
self.flags(migrate_max_retries=1)
|
||||
|
@ -440,7 +448,7 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
|
|||
scheduler_utils.setup_instance_group(self.context, self.fake_spec)
|
||||
self.task.scheduler_client.select_destinations(self.context,
|
||||
self.fake_spec, [self.instance.uuid]).AndReturn(
|
||||
[{'host': 'host1'}])
|
||||
[{'host': 'host1', 'nodename': 'node1'}])
|
||||
self.task._check_compatible_with_source_hypervisor("host1")
|
||||
self.task._call_livem_checks_on_host("host1")\
|
||||
.AndRaise(exception.MigrationPreCheckError("reason"))
|
||||
|
@ -452,7 +460,11 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
|
|||
self.task._call_livem_checks_on_host("host2")
|
||||
|
||||
self.mox.ReplayAll()
|
||||
self.assertEqual("host2", self.task._find_destination())
|
||||
with mock.patch.object(self.task,
|
||||
'_remove_host_allocations') as remove_allocs:
|
||||
self.assertEqual("host2", self.task._find_destination())
|
||||
# Should have removed allocations for the first host.
|
||||
remove_allocs.assert_called_once_with('host1', 'node1')
|
||||
|
||||
def test_find_destination_retry_exceeds_max(self):
|
||||
self.flags(migrate_max_retries=0)
|
||||
|
@ -468,16 +480,23 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
|
|||
scheduler_utils.setup_instance_group(self.context, self.fake_spec)
|
||||
self.task.scheduler_client.select_destinations(self.context,
|
||||
self.fake_spec, [self.instance.uuid]).AndReturn(
|
||||
[{'host': 'host1'}])
|
||||
[{'host': 'host1', 'nodename': 'node1'}])
|
||||
self.task._check_compatible_with_source_hypervisor("host1")\
|
||||
.AndRaise(exception.DestinationHypervisorTooOld)
|
||||
|
||||
self.mox.ReplayAll()
|
||||
with mock.patch.object(self.task.migration, 'save') as save_mock:
|
||||
with test.nested(
|
||||
mock.patch.object(self.task.migration, 'save'),
|
||||
mock.patch.object(self.task, '_remove_host_allocations')
|
||||
) as (
|
||||
save_mock, remove_allocs
|
||||
):
|
||||
self.assertRaises(exception.MaxRetriesExceeded,
|
||||
self.task._find_destination)
|
||||
self.assertEqual('failed', self.task.migration.status)
|
||||
save_mock.assert_called_once_with()
|
||||
# Should have removed allocations for the first host.
|
||||
remove_allocs.assert_called_once_with('host1', 'node1')
|
||||
|
||||
def test_find_destination_when_runs_out_of_hosts(self):
|
||||
self.mox.StubOutWithMock(utils, 'get_image_from_system_metadata')
|
||||
|
@ -542,3 +561,15 @@ class LiveMigrationTaskTestCase(test.NoDBTestCase):
|
|||
self.task._get_destination_cell_mapping)
|
||||
mock_get.assert_called_once_with(
|
||||
self.task.context, self.task.destination)
|
||||
|
||||
@mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename',
|
||||
side_effect=exception.ComputeHostNotFound(host='host'))
|
||||
def test_remove_host_allocations_compute_host_not_found(self, get_cn):
|
||||
"""Tests that failing to find a ComputeNode will not blow up
|
||||
the _remove_host_allocations method.
|
||||
"""
|
||||
with mock.patch.object(
|
||||
self.task.scheduler_client.reportclient,
|
||||
'remove_provider_from_instance_allocation') as remove_provider:
|
||||
self.task._remove_host_allocations('host', 'node')
|
||||
remove_provider.assert_not_called()
|
||||
|
|
Loading…
Reference in New Issue