Create allocations against forced dest host during evacuate

If a host is specified during an evacuate with the force=True
flag, conductor bypasses the scheduler. Since the scheduler
is what creates the "doubled up" allocation on the destination
node, and the scheduler is bypassed in the force case, we have
to create the allocations against the destination node in
conductor directly.

The unit tests cover the failure scenarios. The functional
test covers the happy path.

This is a short-term backportable fix. Long-term we'll likely
want to call the scheduler even in the 'force' scenario but pass
a flag to the scheduler to tell it to skip the filters but still
create the allocation on the destination node so we don't have
to duplicate that in conductor.

Change-Id: I6590f0eda4ec4996543ad40d8c2640b83fc3dd9d
Partial-Bug: #1713786
This commit is contained in:
Matt Riedemann 2017-08-30 15:05:44 -04:00
parent 3f6447120b
commit d564266a01
4 changed files with 189 additions and 28 deletions

View File

@ -736,6 +736,67 @@ class ComputeTaskManager(base.Base):
instance.save()
return
def _allocate_for_evacuate_dest_host(self, context, instance, host,
request_spec=None):
# The user is forcing the destination host and bypassing the
# scheduler. We need to copy the source compute node
# allocations in Placement to the destination compute node.
# Normally select_destinations() in the scheduler would do this
# for us, but when forcing the target host we don't call the
# scheduler.
source_node = None # This is used for error handling below.
try:
source_node = objects.ComputeNode.get_by_host_and_nodename(
context, instance.host, instance.node)
dest_node = (
objects.ComputeNode.get_first_node_by_host_for_old_compat(
context, host, use_slave=True))
except exception.ComputeHostNotFound as ex:
with excutils.save_and_reraise_exception():
# TODO(mriedem): This ugly RequestSpec handling should be
# tucked away in _set_vm_state_and_notify.
if request_spec:
request_spec = \
request_spec.to_legacy_request_spec_dict()
else:
request_spec = {}
self._set_vm_state_and_notify(
context, instance.uuid, 'rebuild_server',
{'vm_state': instance.vm_state,
'task_state': None}, ex, request_spec)
if source_node:
LOG.warning('Specified host %s for evacuate was not '
'found.', host, instance=instance)
else:
LOG.warning('Source host %s and node %s for evacuate was '
'not found.', instance.host, instance.node,
instance=instance)
# TODO(mriedem): In Queens, call select_destinations() with a
# skip_filters=True flag so the scheduler does the work of
# claiming resources on the destination in Placement but still
# bypass the scheduler filters, which honors the 'force' flag
# in the API.
try:
scheduler_utils.claim_resources_on_destination(
self.scheduler_client.reportclient, instance,
source_node, dest_node)
except exception.NoValidHost as ex:
with excutils.save_and_reraise_exception():
# TODO(mriedem): This ugly RequestSpec handling should be
# tucked away in _set_vm_state_and_notify.
if request_spec:
request_spec = \
request_spec.to_legacy_request_spec_dict()
else:
request_spec = {}
self._set_vm_state_and_notify(
context, instance.uuid, 'rebuild_server',
{'vm_state': instance.vm_state,
'task_state': None}, ex, request_spec)
LOG.warning('Specified host %s for evacuate is '
'invalid.', host, instance=instance)
@targets_cell
def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
injected_files, new_pass, orig_sys_metadata,
@ -746,7 +807,28 @@ class ComputeTaskManager(base.Base):
with compute_utils.EventReporter(context, 'rebuild_server',
instance.uuid):
node = limits = None
if not host:
# The host variable is passed in two cases:
# 1. rebuild - the instance.host is passed to rebuild on the
# same host and bypass the scheduler.
# 2. evacuate with specified host and force=True - the specified
# host is passed and is meant to bypass the scheduler.
# NOTE(mriedem): This could be a lot more straight-forward if we
# had separate methods for rebuild and evacuate...
if host:
# We only create a new allocation on the specified host if
# we're doing an evacuate since that is a move operation.
if host != instance.host:
# If a destination host is forced for evacuate, create
# allocations against it in Placement.
self._allocate_for_evacuate_dest_host(
context, instance, host, request_spec)
else:
# At this point, either the user is doing a rebuild on the
# same host (not evacuate), or they are evacuating and
# specified a host but are not forcing it. The API passes
# host=None in this case but sets up the
# RequestSpec.requested_destination field for the specified
# host.
if not request_spec:
# NOTE(sbauza): We were unable to find an original
# RequestSpec object - probably because the instance is old

View File

@ -1712,20 +1712,14 @@ class ServerMovingTests(ProviderUsageBaseTestCase):
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
dest_usages = self._get_provider_usages(dest_rp_uuid)
# FIXME(mriedem): Due to bug 1713786 the dest node won't have
# allocations. Uncomment when fixed.
# self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
self.assertFlavorMatchesAllocation(
{'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages)
self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
allocations = self._get_allocations_by_server_uuid(server['id'])
# FIXME(mriedem): Uncomment when bug 1713786 is fixed
# self.assertEqual(2, len(allocations))
self.assertEqual(1, len(allocations))
self.assertEqual(2, len(allocations))
source_allocation = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, source_allocation)
# dest_allocation = allocations[dest_rp_uuid]['resources']
# self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
dest_allocation = allocations[dest_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
# start up the source compute
self.compute1.start()
@ -1738,30 +1732,18 @@ class ServerMovingTests(ProviderUsageBaseTestCase):
# When the source node starts up, the instance has moved so the
# ResourceTracker should cleanup allocations for the source node.
source_usages = self._get_provider_usages(source_rp_uuid)
# FIXME(mriedem): Due to bug 1713786, the source node fails to
# remove the allocation because the dest allocation doesn't exist
# yet and Placement won't allow an empty allocation body. Uncomment
# when fixed.
# self.assertEqual(
# {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, source_usages)
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
self.assertEqual(
{'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, source_usages)
# The usages/allocations should still exist on the destination node
# after the source node starts back up.
dest_usages = self._get_provider_usages(dest_rp_uuid)
# FIXME(mriedem): Uncomment when bug 1713786 is fixed.
# self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
self.assertFlavorMatchesAllocation(
{'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages)
self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
allocations = self._get_allocations_by_server_uuid(server['id'])
self.assertEqual(1, len(allocations))
# FIXME(mriedem): Due to bug 1713786 it's just the source for now.
# Uncomment when fixed.
# dest_allocation = allocations[dest_rp_uuid]['resources']
# self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
source_allocation = allocations[source_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, source_allocation)
dest_allocation = allocations[dest_rp_uuid]['resources']
self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
self._delete_and_check_allocations(
server, source_rp_uuid, dest_rp_uuid)

View File

@ -2517,6 +2517,91 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
fake_inst.system_metadata)
mock_save.assert_called_once_with()
@mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename',
side_effect=exc.ComputeHostNotFound('source-host'))
def test_allocate_for_evacuate_dest_host_source_node_not_found_no_reqspec(
self, get_compute_node):
"""Tests that the source node for the instance isn't found. In this
case there is no request spec provided.
"""
instance = self.params['build_requests'][0].instance
instance.host = 'source-host'
with mock.patch.object(self.conductor,
'_set_vm_state_and_notify') as notify:
ex = self.assertRaises(
exc.ComputeHostNotFound,
self.conductor._allocate_for_evacuate_dest_host,
self.ctxt, instance, 'dest-host')
get_compute_node.assert_called_once_with(
self.ctxt, instance.host, instance.node)
notify.assert_called_once_with(
self.ctxt, instance.uuid, 'rebuild_server',
{'vm_state': instance.vm_state, 'task_state': None}, ex, {})
@mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename',
return_value=objects.ComputeNode(host='source-host'))
@mock.patch.object(objects.ComputeNode,
'get_first_node_by_host_for_old_compat',
side_effect=exc.ComputeHostNotFound(host='dest-host'))
def test_allocate_for_evacuate_dest_host_dest_node_not_found_reqspec(
self, get_dest_node, get_source_node):
"""Tests that the destination node for the request isn't found. In this
case there is a request spec provided.
"""
instance = self.params['build_requests'][0].instance
instance.host = 'source-host'
reqspec = self.params['request_specs'][0]
with mock.patch.object(self.conductor,
'_set_vm_state_and_notify') as notify:
ex = self.assertRaises(
exc.ComputeHostNotFound,
self.conductor._allocate_for_evacuate_dest_host,
self.ctxt, instance, 'dest-host', reqspec)
get_source_node.assert_called_once_with(
self.ctxt, instance.host, instance.node)
get_dest_node.assert_called_once_with(
self.ctxt, 'dest-host', use_slave=True)
notify.assert_called_once_with(
self.ctxt, instance.uuid, 'rebuild_server',
{'vm_state': instance.vm_state, 'task_state': None}, ex,
reqspec.to_legacy_request_spec_dict())
@mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename',
return_value=objects.ComputeNode(host='source-host'))
@mock.patch.object(objects.ComputeNode,
'get_first_node_by_host_for_old_compat',
return_value=objects.ComputeNode(host='dest-host'))
def test_allocate_for_evacuate_dest_host_claim_fails(
self, get_dest_node, get_source_node):
"""Tests that the allocation claim fails."""
instance = self.params['build_requests'][0].instance
instance.host = 'source-host'
reqspec = self.params['request_specs'][0]
with test.nested(
mock.patch.object(self.conductor,
'_set_vm_state_and_notify'),
mock.patch.object(scheduler_utils,
'claim_resources_on_destination',
side_effect=exc.NoValidHost(reason='I am full'))
) as (
notify, claim
):
ex = self.assertRaises(
exc.NoValidHost,
self.conductor._allocate_for_evacuate_dest_host,
self.ctxt, instance, 'dest-host', reqspec)
get_source_node.assert_called_once_with(
self.ctxt, instance.host, instance.node)
get_dest_node.assert_called_once_with(
self.ctxt, 'dest-host', use_slave=True)
claim.assert_called_once_with(
self.conductor.scheduler_client.reportclient, instance,
get_source_node.return_value, get_dest_node.return_value)
notify.assert_called_once_with(
self.ctxt, instance.uuid, 'rebuild_server',
{'vm_state': instance.vm_state, 'task_state': None}, ex,
reqspec.to_legacy_request_spec_dict())
class ConductorTaskRPCAPITestCase(_BaseTaskTestCase,
test_compute.BaseTestCase):

View File

@ -0,0 +1,12 @@
---
fixes:
- |
When forcing a specified destination host during evacuate, the
scheduler is bypassed but resource allocations will still be made in the
Placement service against the forced destination host. If the resource
allocation against the destination host fails, the evacuate operation
will fail, regardless of the ``force`` flag being specified in the API.
The guest will be unchanged on the source host. For more details, see
`bug 1713786`_.
.. _bug 1713786: https://bugs.launchpad.net/nova/+bug/1713786