Create allocations against forced dest host during evacuate
If a host is specified during an evacuate with the force=True flag, conductor bypasses the scheduler. Since the scheduler is what creates the "doubled up" allocation on the destination node, and the scheduler is bypassed in the force case, we have to create the allocations against the destination node in conductor directly. The unit tests cover the failure scenarios. The functional test covers the happy path. This is a short-term backportable fix. Long-term we'll likely want to call the scheduler even in the 'force' scenario but pass a flag to the scheduler to tell it to skip the filters but still create the allocation on the destination node so we don't have to duplicate that in conductor. Change-Id: I6590f0eda4ec4996543ad40d8c2640b83fc3dd9d Partial-Bug: #1713786
This commit is contained in:
parent
3f6447120b
commit
d564266a01
@ -736,6 +736,67 @@ class ComputeTaskManager(base.Base):
|
||||
instance.save()
|
||||
return
|
||||
|
||||
def _allocate_for_evacuate_dest_host(self, context, instance, host,
|
||||
request_spec=None):
|
||||
# The user is forcing the destination host and bypassing the
|
||||
# scheduler. We need to copy the source compute node
|
||||
# allocations in Placement to the destination compute node.
|
||||
# Normally select_destinations() in the scheduler would do this
|
||||
# for us, but when forcing the target host we don't call the
|
||||
# scheduler.
|
||||
source_node = None # This is used for error handling below.
|
||||
try:
|
||||
source_node = objects.ComputeNode.get_by_host_and_nodename(
|
||||
context, instance.host, instance.node)
|
||||
dest_node = (
|
||||
objects.ComputeNode.get_first_node_by_host_for_old_compat(
|
||||
context, host, use_slave=True))
|
||||
except exception.ComputeHostNotFound as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
# TODO(mriedem): This ugly RequestSpec handling should be
|
||||
# tucked away in _set_vm_state_and_notify.
|
||||
if request_spec:
|
||||
request_spec = \
|
||||
request_spec.to_legacy_request_spec_dict()
|
||||
else:
|
||||
request_spec = {}
|
||||
self._set_vm_state_and_notify(
|
||||
context, instance.uuid, 'rebuild_server',
|
||||
{'vm_state': instance.vm_state,
|
||||
'task_state': None}, ex, request_spec)
|
||||
if source_node:
|
||||
LOG.warning('Specified host %s for evacuate was not '
|
||||
'found.', host, instance=instance)
|
||||
else:
|
||||
LOG.warning('Source host %s and node %s for evacuate was '
|
||||
'not found.', instance.host, instance.node,
|
||||
instance=instance)
|
||||
|
||||
# TODO(mriedem): In Queens, call select_destinations() with a
|
||||
# skip_filters=True flag so the scheduler does the work of
|
||||
# claiming resources on the destination in Placement but still
|
||||
# bypass the scheduler filters, which honors the 'force' flag
|
||||
# in the API.
|
||||
try:
|
||||
scheduler_utils.claim_resources_on_destination(
|
||||
self.scheduler_client.reportclient, instance,
|
||||
source_node, dest_node)
|
||||
except exception.NoValidHost as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
# TODO(mriedem): This ugly RequestSpec handling should be
|
||||
# tucked away in _set_vm_state_and_notify.
|
||||
if request_spec:
|
||||
request_spec = \
|
||||
request_spec.to_legacy_request_spec_dict()
|
||||
else:
|
||||
request_spec = {}
|
||||
self._set_vm_state_and_notify(
|
||||
context, instance.uuid, 'rebuild_server',
|
||||
{'vm_state': instance.vm_state,
|
||||
'task_state': None}, ex, request_spec)
|
||||
LOG.warning('Specified host %s for evacuate is '
|
||||
'invalid.', host, instance=instance)
|
||||
|
||||
@targets_cell
|
||||
def rebuild_instance(self, context, instance, orig_image_ref, image_ref,
|
||||
injected_files, new_pass, orig_sys_metadata,
|
||||
@ -746,7 +807,28 @@ class ComputeTaskManager(base.Base):
|
||||
with compute_utils.EventReporter(context, 'rebuild_server',
|
||||
instance.uuid):
|
||||
node = limits = None
|
||||
if not host:
|
||||
# The host variable is passed in two cases:
|
||||
# 1. rebuild - the instance.host is passed to rebuild on the
|
||||
# same host and bypass the scheduler.
|
||||
# 2. evacuate with specified host and force=True - the specified
|
||||
# host is passed and is meant to bypass the scheduler.
|
||||
# NOTE(mriedem): This could be a lot more straight-forward if we
|
||||
# had separate methods for rebuild and evacuate...
|
||||
if host:
|
||||
# We only create a new allocation on the specified host if
|
||||
# we're doing an evacuate since that is a move operation.
|
||||
if host != instance.host:
|
||||
# If a destination host is forced for evacuate, create
|
||||
# allocations against it in Placement.
|
||||
self._allocate_for_evacuate_dest_host(
|
||||
context, instance, host, request_spec)
|
||||
else:
|
||||
# At this point, either the user is doing a rebuild on the
|
||||
# same host (not evacuate), or they are evacuating and
|
||||
# specified a host but are not forcing it. The API passes
|
||||
# host=None in this case but sets up the
|
||||
# RequestSpec.requested_destination field for the specified
|
||||
# host.
|
||||
if not request_spec:
|
||||
# NOTE(sbauza): We were unable to find an original
|
||||
# RequestSpec object - probably because the instance is old
|
||||
|
@ -1712,20 +1712,14 @@ class ServerMovingTests(ProviderUsageBaseTestCase):
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
|
||||
|
||||
dest_usages = self._get_provider_usages(dest_rp_uuid)
|
||||
# FIXME(mriedem): Due to bug 1713786 the dest node won't have
|
||||
# allocations. Uncomment when fixed.
|
||||
# self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
|
||||
self.assertFlavorMatchesAllocation(
|
||||
{'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages)
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
|
||||
|
||||
allocations = self._get_allocations_by_server_uuid(server['id'])
|
||||
# FIXME(mriedem): Uncomment when bug 1713786 is fixed
|
||||
# self.assertEqual(2, len(allocations))
|
||||
self.assertEqual(1, len(allocations))
|
||||
self.assertEqual(2, len(allocations))
|
||||
source_allocation = allocations[source_rp_uuid]['resources']
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, source_allocation)
|
||||
# dest_allocation = allocations[dest_rp_uuid]['resources']
|
||||
# self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
|
||||
dest_allocation = allocations[dest_rp_uuid]['resources']
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
|
||||
|
||||
# start up the source compute
|
||||
self.compute1.start()
|
||||
@ -1738,30 +1732,18 @@ class ServerMovingTests(ProviderUsageBaseTestCase):
|
||||
# When the source node starts up, the instance has moved so the
|
||||
# ResourceTracker should cleanup allocations for the source node.
|
||||
source_usages = self._get_provider_usages(source_rp_uuid)
|
||||
# FIXME(mriedem): Due to bug 1713786, the source node fails to
|
||||
# remove the allocation because the dest allocation doesn't exist
|
||||
# yet and Placement won't allow an empty allocation body. Uncomment
|
||||
# when fixed.
|
||||
# self.assertEqual(
|
||||
# {'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, source_usages)
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, source_usages)
|
||||
self.assertEqual(
|
||||
{'VCPU': 0, 'MEMORY_MB': 0, 'DISK_GB': 0}, source_usages)
|
||||
|
||||
# The usages/allocations should still exist on the destination node
|
||||
# after the source node starts back up.
|
||||
dest_usages = self._get_provider_usages(dest_rp_uuid)
|
||||
# FIXME(mriedem): Uncomment when bug 1713786 is fixed.
|
||||
# self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
|
||||
self.assertFlavorMatchesAllocation(
|
||||
{'vcpus': 0, 'ram': 0, 'disk': 0}, dest_usages)
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, dest_usages)
|
||||
|
||||
allocations = self._get_allocations_by_server_uuid(server['id'])
|
||||
self.assertEqual(1, len(allocations))
|
||||
# FIXME(mriedem): Due to bug 1713786 it's just the source for now.
|
||||
# Uncomment when fixed.
|
||||
# dest_allocation = allocations[dest_rp_uuid]['resources']
|
||||
# self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
|
||||
source_allocation = allocations[source_rp_uuid]['resources']
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, source_allocation)
|
||||
dest_allocation = allocations[dest_rp_uuid]['resources']
|
||||
self.assertFlavorMatchesAllocation(self.flavor1, dest_allocation)
|
||||
|
||||
self._delete_and_check_allocations(
|
||||
server, source_rp_uuid, dest_rp_uuid)
|
||||
|
@ -2517,6 +2517,91 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
|
||||
fake_inst.system_metadata)
|
||||
mock_save.assert_called_once_with()
|
||||
|
||||
@mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename',
|
||||
side_effect=exc.ComputeHostNotFound('source-host'))
|
||||
def test_allocate_for_evacuate_dest_host_source_node_not_found_no_reqspec(
|
||||
self, get_compute_node):
|
||||
"""Tests that the source node for the instance isn't found. In this
|
||||
case there is no request spec provided.
|
||||
"""
|
||||
instance = self.params['build_requests'][0].instance
|
||||
instance.host = 'source-host'
|
||||
with mock.patch.object(self.conductor,
|
||||
'_set_vm_state_and_notify') as notify:
|
||||
ex = self.assertRaises(
|
||||
exc.ComputeHostNotFound,
|
||||
self.conductor._allocate_for_evacuate_dest_host,
|
||||
self.ctxt, instance, 'dest-host')
|
||||
get_compute_node.assert_called_once_with(
|
||||
self.ctxt, instance.host, instance.node)
|
||||
notify.assert_called_once_with(
|
||||
self.ctxt, instance.uuid, 'rebuild_server',
|
||||
{'vm_state': instance.vm_state, 'task_state': None}, ex, {})
|
||||
|
||||
@mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename',
|
||||
return_value=objects.ComputeNode(host='source-host'))
|
||||
@mock.patch.object(objects.ComputeNode,
|
||||
'get_first_node_by_host_for_old_compat',
|
||||
side_effect=exc.ComputeHostNotFound(host='dest-host'))
|
||||
def test_allocate_for_evacuate_dest_host_dest_node_not_found_reqspec(
|
||||
self, get_dest_node, get_source_node):
|
||||
"""Tests that the destination node for the request isn't found. In this
|
||||
case there is a request spec provided.
|
||||
"""
|
||||
instance = self.params['build_requests'][0].instance
|
||||
instance.host = 'source-host'
|
||||
reqspec = self.params['request_specs'][0]
|
||||
with mock.patch.object(self.conductor,
|
||||
'_set_vm_state_and_notify') as notify:
|
||||
ex = self.assertRaises(
|
||||
exc.ComputeHostNotFound,
|
||||
self.conductor._allocate_for_evacuate_dest_host,
|
||||
self.ctxt, instance, 'dest-host', reqspec)
|
||||
get_source_node.assert_called_once_with(
|
||||
self.ctxt, instance.host, instance.node)
|
||||
get_dest_node.assert_called_once_with(
|
||||
self.ctxt, 'dest-host', use_slave=True)
|
||||
notify.assert_called_once_with(
|
||||
self.ctxt, instance.uuid, 'rebuild_server',
|
||||
{'vm_state': instance.vm_state, 'task_state': None}, ex,
|
||||
reqspec.to_legacy_request_spec_dict())
|
||||
|
||||
@mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename',
|
||||
return_value=objects.ComputeNode(host='source-host'))
|
||||
@mock.patch.object(objects.ComputeNode,
|
||||
'get_first_node_by_host_for_old_compat',
|
||||
return_value=objects.ComputeNode(host='dest-host'))
|
||||
def test_allocate_for_evacuate_dest_host_claim_fails(
|
||||
self, get_dest_node, get_source_node):
|
||||
"""Tests that the allocation claim fails."""
|
||||
instance = self.params['build_requests'][0].instance
|
||||
instance.host = 'source-host'
|
||||
reqspec = self.params['request_specs'][0]
|
||||
with test.nested(
|
||||
mock.patch.object(self.conductor,
|
||||
'_set_vm_state_and_notify'),
|
||||
mock.patch.object(scheduler_utils,
|
||||
'claim_resources_on_destination',
|
||||
side_effect=exc.NoValidHost(reason='I am full'))
|
||||
) as (
|
||||
notify, claim
|
||||
):
|
||||
ex = self.assertRaises(
|
||||
exc.NoValidHost,
|
||||
self.conductor._allocate_for_evacuate_dest_host,
|
||||
self.ctxt, instance, 'dest-host', reqspec)
|
||||
get_source_node.assert_called_once_with(
|
||||
self.ctxt, instance.host, instance.node)
|
||||
get_dest_node.assert_called_once_with(
|
||||
self.ctxt, 'dest-host', use_slave=True)
|
||||
claim.assert_called_once_with(
|
||||
self.conductor.scheduler_client.reportclient, instance,
|
||||
get_source_node.return_value, get_dest_node.return_value)
|
||||
notify.assert_called_once_with(
|
||||
self.ctxt, instance.uuid, 'rebuild_server',
|
||||
{'vm_state': instance.vm_state, 'task_state': None}, ex,
|
||||
reqspec.to_legacy_request_spec_dict())
|
||||
|
||||
|
||||
class ConductorTaskRPCAPITestCase(_BaseTaskTestCase,
|
||||
test_compute.BaseTestCase):
|
||||
|
12
releasenotes/notes/bug-1713786-0ee9e543683dafa4.yaml
Normal file
12
releasenotes/notes/bug-1713786-0ee9e543683dafa4.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
When forcing a specified destination host during evacuate, the
|
||||
scheduler is bypassed but resource allocations will still be made in the
|
||||
Placement service against the forced destination host. If the resource
|
||||
allocation against the destination host fails, the evacuate operation
|
||||
will fail, regardless of the ``force`` flag being specified in the API.
|
||||
The guest will be unchanged on the source host. For more details, see
|
||||
`bug 1713786`_.
|
||||
|
||||
.. _bug 1713786: https://bugs.launchpad.net/nova/+bug/1713786
|
Loading…
Reference in New Issue
Block a user