Merge "Resource tracker compatibility with Ocata and Pike"
This commit is contained in:
commit
4cd6a3a1b4
@ -13,7 +13,7 @@
|
||||
"disabled_reason": null,
|
||||
"report_count": 1,
|
||||
"forced_down": false,
|
||||
"version": 21,
|
||||
"version": 22,
|
||||
"availability_zone": null,
|
||||
"uuid": "fa69c544-906b-4a6a-a9c6-c1f7a8078c73"
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ def main():
|
||||
|
||||
cmd_common.block_db_access('nova-compute')
|
||||
objects_base.NovaObject.indirection_api = conductor_rpcapi.ConductorAPI()
|
||||
|
||||
objects.Service.enable_min_version_cache()
|
||||
server = service.Service.create(binary='nova-compute',
|
||||
topic=compute_rpcapi.RPC_TOPIC)
|
||||
service.serve(server)
|
||||
|
@ -1010,7 +1010,7 @@ class ResourceTracker(object):
|
||||
continue
|
||||
|
||||
def _update_usage_from_instance(self, context, instance, nodename,
|
||||
is_removed=False):
|
||||
is_removed=False, has_ocata_computes=False):
|
||||
"""Update usage for a single instance."""
|
||||
|
||||
uuid = instance['uuid']
|
||||
@ -1038,7 +1038,40 @@ class ResourceTracker(object):
|
||||
self.pci_tracker.update_pci_for_instance(context,
|
||||
instance,
|
||||
sign=sign)
|
||||
self.reportclient.update_instance_allocation(cn, instance, sign)
|
||||
if has_ocata_computes:
|
||||
LOG.debug("We're on a Pike compute host in a deployment "
|
||||
"with Ocata compute hosts. Auto-correcting "
|
||||
"allocations to handle Ocata-style assumptions.")
|
||||
self.reportclient.update_instance_allocation(cn, instance,
|
||||
sign)
|
||||
else:
|
||||
# NOTE(jaypipes): We're on a Pike compute host or later in
|
||||
# a deployment with all compute hosts upgraded to Pike or
|
||||
# later
|
||||
#
|
||||
# If that is the case, then we know that the scheduler will
|
||||
# have properly created an allocation and that the compute
|
||||
# hosts have not attempted to overwrite allocations
|
||||
# **during the periodic update_available_resource() call**.
|
||||
# However, Pike compute hosts may still rework an
|
||||
# allocation for an instance in a move operation during
|
||||
# confirm_resize() on the source host which will remove the
|
||||
# source resource provider from any allocation for an
|
||||
# instance.
|
||||
#
|
||||
# In Queens and beyond, the scheduler will understand when
|
||||
# a move operation has been requested and instead of
|
||||
# creating a doubled-up allocation that contains both the
|
||||
# source and destination host, the scheduler will take the
|
||||
# original allocation (against the source host) and change
|
||||
# the consumer ID of that allocation to be the migration
|
||||
# UUID and not the instance UUID. The scheduler will
|
||||
# allocate the resources for the destination host to the
|
||||
# instance UUID.
|
||||
LOG.debug("We're on a Pike compute host in a deployment "
|
||||
"with all Pike compute hosts. Skipping "
|
||||
"auto-correction of allocations.")
|
||||
|
||||
# new instance, update compute node resource usage:
|
||||
self._update_usage(self._get_usage_dict(instance), nodename,
|
||||
sign=sign)
|
||||
@ -1068,9 +1101,44 @@ class ResourceTracker(object):
|
||||
cn.current_workload = 0
|
||||
cn.running_vms = 0
|
||||
|
||||
# NOTE(jaypipes): In Pike, we need to be tolerant of Ocata compute
|
||||
# nodes that overwrite placement allocations to look like what the
|
||||
# resource tracker *thinks* is correct. When an instance is
|
||||
# migrated from an Ocata compute node to a Pike compute node, the
|
||||
# Pike scheduler will have created a "doubled-up" allocation that
|
||||
# contains allocated resources against both the source and
|
||||
# destination hosts. The Ocata source compute host, during its
|
||||
# update_available_resource() periodic call will find the instance
|
||||
# in its list of known instances and will call
|
||||
# update_instance_allocation() in the report client. That call will
|
||||
# pull the allocations for the instance UUID which will contain
|
||||
# both the source and destination host providers in the allocation
|
||||
# set. Seeing that this is different from what the Ocata source
|
||||
# host thinks it should be and will overwrite the allocation to
|
||||
# only be an allocation against itself.
|
||||
#
|
||||
# And therefore, here we need to have Pike compute hosts
|
||||
# "correct" the improper healing that the Ocata source host did
|
||||
# during its periodic interval. When the instance is fully migrated
|
||||
# to the Pike compute host, the Ocata compute host will find an
|
||||
# allocation that refers to itself for an instance it no longer
|
||||
# controls and will *delete* all allocations that refer to that
|
||||
# instance UUID, assuming that the instance has been deleted. We
|
||||
# need the destination Pike compute host to recreate that
|
||||
# allocation to refer to its own resource provider UUID.
|
||||
#
|
||||
# For Pike compute nodes that migrate to either a Pike compute host
|
||||
# or a Queens compute host, we do NOT want the Pike compute host to
|
||||
# be "healing" allocation information. Instead, we rely on the Pike
|
||||
# scheduler to properly create allocations during scheduling.
|
||||
compute_version = objects.Service.get_minimum_version(
|
||||
context, 'nova-compute')
|
||||
has_ocata_computes = compute_version < 22
|
||||
|
||||
for instance in instances:
|
||||
if instance.vm_state not in vm_states.ALLOW_RESOURCE_REMOVAL:
|
||||
self._update_usage_from_instance(context, instance, nodename)
|
||||
self._update_usage_from_instance(context, instance, nodename,
|
||||
has_ocata_computes=has_ocata_computes)
|
||||
|
||||
self._remove_deleted_instances_allocations(context, cn)
|
||||
|
||||
@ -1141,7 +1209,6 @@ class ResourceTracker(object):
|
||||
"There are allocations remaining against the source "
|
||||
"host that might need to be removed: %s.",
|
||||
instance_uuid, instance.host, instance.node, alloc)
|
||||
continue
|
||||
|
||||
def _find_orphaned_instances(self):
|
||||
"""Given the set of instances and migrations already account for
|
||||
|
@ -32,7 +32,7 @@ LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# NOTE(danms): This is the global service version counter
|
||||
SERVICE_VERSION = 21
|
||||
SERVICE_VERSION = 22
|
||||
|
||||
|
||||
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
|
||||
@ -110,6 +110,9 @@ SERVICE_VERSION_HISTORY = (
|
||||
{'compute_rpc': '4.16'},
|
||||
# Version 21: Compute RPC version 4.17
|
||||
{'compute_rpc': '4.17'},
|
||||
# Version 22: A marker for the behaviour change of auto-healing code on the
|
||||
# compute host regarding allocations against an instance
|
||||
{'compute_rpc': '4.17'},
|
||||
)
|
||||
|
||||
|
||||
|
@ -260,6 +260,7 @@ class TestCase(testtools.TestCase):
|
||||
self._base_test_obj_backup = copy.copy(
|
||||
objects_base.NovaObjectRegistry._registry._obj_classes)
|
||||
self.addCleanup(self._restore_obj_registry)
|
||||
objects.Service.clear_min_version_cache()
|
||||
|
||||
# NOTE(danms): Reset the cached list of cells
|
||||
from nova.compute import api
|
||||
|
@ -290,12 +290,17 @@ class IronicResourceTrackerTest(test.TestCase):
|
||||
self.assertEqual(3, len(inv))
|
||||
|
||||
# Now "spawn" an instance to the first compute node by calling the
|
||||
# RT's instance_claim(), which should, in the case of an Ironic
|
||||
# instance, grab the full compute node for the instance and write
|
||||
# allocation records for VCPU, MEMORY_MB, and DISK_GB
|
||||
# RT's instance_claim().
|
||||
cn1_obj = self.COMPUTE_NODE_FIXTURES[uuids.cn1]
|
||||
cn1_nodename = cn1_obj.hypervisor_hostname
|
||||
inst = self.INSTANCE_FIXTURES[uuids.instance1]
|
||||
# Since we're pike, the scheduler would have created our
|
||||
# allocation for us. So, we can use our old update routine
|
||||
# here to mimic that before we go do the compute RT claim,
|
||||
# and then the checks below.
|
||||
self.rt.reportclient.update_instance_allocation(cn1_obj,
|
||||
inst,
|
||||
1)
|
||||
with self.rt.instance_claim(self.ctx, inst, cn1_nodename):
|
||||
pass
|
||||
|
||||
|
@ -41,7 +41,8 @@ class TestLocalDeleteAllocations(test.TestCase,
|
||||
self.start_service('conductor')
|
||||
self.start_service('consoleauth')
|
||||
|
||||
self.flags(group='scheduler', driver='chance_scheduler')
|
||||
self.flags(enabled_filters=['RetryFilter', 'ComputeFilter'],
|
||||
group='filter_scheduler')
|
||||
self.start_service('scheduler')
|
||||
|
||||
self.compute = self.start_service('compute')
|
||||
|
@ -1081,11 +1081,7 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
|
||||
|
||||
self.useFixture(policy_fixture.RealPolicyFixture())
|
||||
self.useFixture(nova_fixtures.NeutronFixture(self))
|
||||
|
||||
# NOTE(gibi): After fix 1707071 we need to set the service version to
|
||||
# pike to test pike only interactions. We need separate tests for
|
||||
# ocata - pike interactions
|
||||
# self.useFixture(nova_fixtures.AllServicesCurrent())
|
||||
self.useFixture(nova_fixtures.AllServicesCurrent())
|
||||
|
||||
placement = self.useFixture(nova_fixtures.PlacementFixture())
|
||||
self.placement_api = placement.api
|
||||
@ -1308,7 +1304,6 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
|
||||
source_allocation = allocations[source_rp_uuid]['resources']
|
||||
self.assertFlavorMatchesAllocation(old_flavor, source_allocation)
|
||||
|
||||
self.assertEqual(2, len(allocations))
|
||||
dest_allocation = allocations[dest_rp_uuid]['resources']
|
||||
self.assertFlavorMatchesAllocation(new_flavor, dest_allocation)
|
||||
|
||||
@ -1394,6 +1389,7 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
|
||||
# destination host
|
||||
allocations = self._get_allocations_by_server_uuid(server['id'])
|
||||
|
||||
# and the server allocates only from the target host
|
||||
self.assertEqual(1, len(allocations))
|
||||
|
||||
source_usages = self._get_provider_usages(source_rp_uuid)
|
||||
@ -1407,6 +1403,12 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
|
||||
'The source host %s still has usages after the '
|
||||
'resize has been confirmed' % source_hostname)
|
||||
|
||||
# and the target host allocation should be according to the new flavor
|
||||
self.assertFlavorMatchesAllocation(self.flavor2, dest_usages)
|
||||
|
||||
dest_allocation = allocations[dest_rp_uuid]['resources']
|
||||
self.assertFlavorMatchesAllocation(self.flavor2, dest_allocation)
|
||||
|
||||
self._run_periodics()
|
||||
|
||||
# Check we're still accurate after running the periodics
|
||||
@ -1462,29 +1464,13 @@ class ServerMovingTests(test.TestCase, integrated_helpers.InstanceHelperMixin):
|
||||
|
||||
usages = self._get_provider_usages(rp_uuid)
|
||||
|
||||
# NOTE(gibi): This is bug 1707071 where the compute "healing" periodic
|
||||
# tramples on the doubled allocations created in the scheduler.
|
||||
self.assertFlavorMatchesAllocation(new_flavor, usages)
|
||||
|
||||
# NOTE(gibi): After fixing bug 1707252 the following is expected
|
||||
# self.assertEqual(old_flavor['vcpus'] + new_flavor['vcpus'],
|
||||
# usages['VCPU'])
|
||||
# self.assertEqual(old_flavor['ram'] + new_flavor['ram'],
|
||||
# usages['MEMORY_MB'])
|
||||
# self.assertEqual(old_flavor['disk'] + new_flavor['disk'],
|
||||
# usages['DISK_GB'])
|
||||
self.assertFlavorsMatchAllocation(old_flavor, new_flavor, usages)
|
||||
|
||||
allocations = self._get_allocations_by_server_uuid(server['id'])
|
||||
self.assertEqual(1, len(allocations))
|
||||
allocation = allocations[rp_uuid]['resources']
|
||||
|
||||
# NOTE(gibi): After fixing bug 1707252 the following is expected
|
||||
# self.assertEqual(old_flavor['vcpus'] + new_flavor['vcpus'],
|
||||
# allocation['VCPU'])
|
||||
# self.assertEqual(old_flavor['ram'] + new_flavor['ram'],
|
||||
# allocation['MEMORY_MB'])
|
||||
# self.assertEqual(old_flavor['disk'] + new_flavor['disk'],
|
||||
# allocation['DISK_GB'])
|
||||
self.assertFlavorsMatchAllocation(old_flavor, new_flavor, allocation)
|
||||
|
||||
def test_resize_revert_same_host(self):
|
||||
# make sure that the test only uses a single host
|
||||
|
@ -5643,6 +5643,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
||||
self.instance, self.instance.node)
|
||||
self.assertIsNotNone(self.instance.migration_context)
|
||||
|
||||
@mock.patch('nova.objects.Service.get_minimum_version',
|
||||
return_value=22)
|
||||
@mock.patch.object(self.compute, "_notify_about_instance_usage")
|
||||
@mock.patch.object(self.compute, "_set_instance_info")
|
||||
@mock.patch.object(self.instance, 'save')
|
||||
@ -5665,7 +5667,8 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
|
||||
mock_mig_save,
|
||||
mock_inst_save,
|
||||
mock_set,
|
||||
mock_notify):
|
||||
mock_notify,
|
||||
mock_version):
|
||||
self.compute.finish_revert_resize(context=self.context,
|
||||
instance=self.instance,
|
||||
reservations=None,
|
||||
|
@ -31,6 +31,7 @@ from nova.objects import base as obj_base
|
||||
from nova.objects import fields as obj_fields
|
||||
from nova.objects import pci_device
|
||||
from nova.pci import manager as pci_manager
|
||||
from nova.scheduler import utils as sched_utils
|
||||
from nova import test
|
||||
from nova.tests.unit.objects import test_pci_device as fake_pci_device
|
||||
from nova.tests import uuidsentinel as uuids
|
||||
@ -467,7 +468,9 @@ class BaseTestCase(test.NoDBTestCase):
|
||||
|
||||
class TestUpdateAvailableResources(BaseTestCase):
|
||||
|
||||
def _update_available_resources(self):
|
||||
@mock.patch('nova.objects.Service.get_minimum_version',
|
||||
return_value=22)
|
||||
def _update_available_resources(self, version_mock):
|
||||
# We test RT._update separately, since the complexity
|
||||
# of the update_available_resource() function is high enough as
|
||||
# it is, we just want to focus here on testing the resources
|
||||
@ -1688,6 +1691,8 @@ class TestInstanceClaim(BaseTestCase):
|
||||
class TestResize(BaseTestCase):
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance',
|
||||
return_value=False)
|
||||
@mock.patch('nova.objects.Service.get_minimum_version',
|
||||
return_value=22)
|
||||
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance',
|
||||
return_value=objects.InstancePCIRequests(requests=[]))
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
@ -1698,7 +1703,7 @@ class TestResize(BaseTestCase):
|
||||
@mock.patch('nova.objects.ComputeNode.save')
|
||||
def test_resize_claim_same_host(self, save_mock, get_mock, migr_mock,
|
||||
get_cn_mock, pci_mock, instance_pci_mock,
|
||||
is_bfv_mock):
|
||||
version_mock, is_bfv_mock):
|
||||
# Resize an existing instance from its current flavor (instance type
|
||||
# 1) to a new flavor (instance type 2) and verify that the compute
|
||||
# node's resources are appropriately updated to account for the new
|
||||
@ -1788,6 +1793,8 @@ class TestResize(BaseTestCase):
|
||||
|
||||
@mock.patch('nova.compute.utils.is_volume_backed_instance',
|
||||
return_value=False)
|
||||
@mock.patch('nova.objects.Service.get_minimum_version',
|
||||
return_value=22)
|
||||
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid',
|
||||
return_value=objects.InstancePCIRequests(requests=[]))
|
||||
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance',
|
||||
@ -1809,8 +1816,10 @@ class TestResize(BaseTestCase):
|
||||
pci_get_by_compute_node_mock,
|
||||
pci_get_by_instance_mock,
|
||||
pci_get_by_instance_uuid_mock,
|
||||
version_mock,
|
||||
is_bfv_mock,
|
||||
revert=False):
|
||||
|
||||
self.flags(reserved_host_disk_mb=0,
|
||||
reserved_host_memory_mb=0)
|
||||
virt_resources = copy.deepcopy(_VIRT_DRIVER_AVAIL_RESOURCES)
|
||||
@ -1893,12 +1902,23 @@ class TestResize(BaseTestCase):
|
||||
ignore=['stats']
|
||||
))
|
||||
|
||||
def test_instance_build_resize_confirm(self):
|
||||
self._test_instance_build_resize()
|
||||
cn = self.rt.compute_nodes[_NODENAME]
|
||||
cn_uuid = cn.uuid
|
||||
rc = self.sched_client_mock.reportclient
|
||||
remove_method = rc.remove_provider_from_instance_allocation
|
||||
expected_resources = sched_utils.resources_from_flavor(instance,
|
||||
flavor)
|
||||
remove_method.assert_called_once_with(instance.uuid, cn_uuid,
|
||||
instance.user_id, instance.project_id, expected_resources)
|
||||
|
||||
def test_instance_build_resize_revert(self):
|
||||
self._test_instance_build_resize(revert=True)
|
||||
|
||||
def test_instance_build_resize_confirm(self):
|
||||
self._test_instance_build_resize()
|
||||
|
||||
@mock.patch('nova.objects.Service.get_minimum_version',
|
||||
return_value=22)
|
||||
@mock.patch('nova.pci.stats.PciDeviceStats.support_requests',
|
||||
return_value=True)
|
||||
@mock.patch('nova.objects.PciDevice.save')
|
||||
@ -1911,7 +1931,7 @@ class TestResize(BaseTestCase):
|
||||
@mock.patch('nova.objects.ComputeNode.save')
|
||||
def test_resize_claim_dest_host_with_pci(self, save_mock, get_mock,
|
||||
migr_mock, get_cn_mock, pci_mock, pci_req_mock, pci_claim_mock,
|
||||
pci_dev_save_mock, pci_supports_mock):
|
||||
pci_dev_save_mock, pci_supports_mock, version_mock):
|
||||
# Starting from an empty destination compute node, perform a resize
|
||||
# operation for an instance containing SR-IOV PCI devices on the
|
||||
# original host.
|
||||
@ -2038,7 +2058,12 @@ class TestResize(BaseTestCase):
|
||||
self.rt.drop_move_claim(ctx, instance, _NODENAME)
|
||||
mock_pci_free_device.assert_called_once_with(
|
||||
pci_dev, mock.ANY)
|
||||
# Check that we grabbed resourced for the right flavor...
|
||||
mock_resources.assert_called_once_with(instance,
|
||||
instance.flavor)
|
||||
|
||||
@mock.patch('nova.objects.Service.get_minimum_version',
|
||||
return_value=22)
|
||||
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance',
|
||||
return_value=objects.InstancePCIRequests(requests=[]))
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
@ -2048,7 +2073,7 @@ class TestResize(BaseTestCase):
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
@mock.patch('nova.objects.ComputeNode.save')
|
||||
def test_resize_claim_two_instances(self, save_mock, get_mock, migr_mock,
|
||||
get_cn_mock, pci_mock, instance_pci_mock):
|
||||
get_cn_mock, pci_mock, instance_pci_mock, version_mock):
|
||||
# Issue two resize claims against a destination host with no prior
|
||||
# instances on it and validate that the accounting for resources is
|
||||
# correct.
|
||||
@ -2163,6 +2188,8 @@ class TestResize(BaseTestCase):
|
||||
|
||||
|
||||
class TestRebuild(BaseTestCase):
|
||||
@mock.patch('nova.objects.Service.get_minimum_version',
|
||||
return_value=22)
|
||||
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance',
|
||||
return_value=objects.InstancePCIRequests(requests=[]))
|
||||
@mock.patch('nova.objects.PciDeviceList.get_by_compute_node',
|
||||
@ -2172,7 +2199,7 @@ class TestRebuild(BaseTestCase):
|
||||
@mock.patch('nova.objects.InstanceList.get_by_host_and_node')
|
||||
@mock.patch('nova.objects.ComputeNode.save')
|
||||
def test_rebuild_claim(self, save_mock, get_mock, migr_mock, get_cn_mock,
|
||||
pci_mock, instance_pci_mock):
|
||||
pci_mock, instance_pci_mock, version_mock):
|
||||
# Rebuild an instance, emulating an evacuate command issued against the
|
||||
# original instance. The rebuild operation uses the resource tracker's
|
||||
# _move_claim() method, but unlike with resize_claim(), rebuild_claim()
|
||||
|
Loading…
Reference in New Issue
Block a user