Make get_allocations_for_resource_provider raise
In preparation for reshaper work, and because it's The Right Thing To Do, this patch makes the report client's get_allocations_for_resource_provider method stop acting like something didn't go wrong when its placement API request fails. We remove the @safe_connect decorator so it raises (a subclass of) ksa's ClientException when something goes wrong with the API communication. And the method now raises a new ResourceProviderAllocationRetrievalFailed exception on non-200. In the spirit of the aggregate and trait retrieval methods, it now returns a namedtuple containing the allocation information. Unit tests, which were entirely absent, are added for the method. The resource tracker's _remove_deleted_instances_allocations, which is get_allocations_for_resource_provider's only consumer (for now, until reshaper work starts using it) is refactored to behave the same way it used to, which is to no-op if the placement API request fails. However, a) we take an earlier short-circuit out of the method, which saves a little work copying context stuff; and b) we now emit a warning log message if the no-op is due to the newly-raised exceptions. Change-Id: I020e7dc47efc79f8907b7bfb753ec779a8da69a1
This commit is contained in:
@@ -22,6 +22,7 @@ import collections
|
||||
import copy
|
||||
import retrying
|
||||
|
||||
from keystoneauth1 import exceptions as ks_exc
|
||||
from oslo_log import log as logging
|
||||
from oslo_serialization import jsonutils
|
||||
|
||||
@@ -1271,8 +1272,20 @@ class ResourceTracker(object):
|
||||
# happen according to the normal flow of events where the scheduler
|
||||
# always creates allocations for an instance
|
||||
known_instances = set(self.tracked_instances.keys())
|
||||
allocations = self.reportclient.get_allocations_for_resource_provider(
|
||||
context, cn.uuid) or {}
|
||||
try:
|
||||
# pai: report.ProviderAllocInfo namedtuple
|
||||
pai = self.reportclient.get_allocations_for_resource_provider(
|
||||
context, cn.uuid)
|
||||
except (exception.ResourceProviderAllocationRetrievalFailed,
|
||||
ks_exc.ClientException) as e:
|
||||
LOG.error("Skipping removal of allocations for deleted instances: "
|
||||
"%s", e)
|
||||
return
|
||||
allocations = pai.allocations
|
||||
if not allocations:
|
||||
# The main loop below would short-circuit anyway, but this saves us
|
||||
# the (potentially expensive) context.elevated construction below.
|
||||
return
|
||||
read_deleted_context = context.elevated(read_deleted='yes')
|
||||
for consumer_uuid, alloc in allocations.items():
|
||||
if consumer_uuid in known_instances:
|
||||
|
||||
@@ -2368,3 +2368,8 @@ class ZVMConnectorError(ZVMDriverException):
|
||||
|
||||
class NoResourceClass(NovaException):
|
||||
msg_fmt = _("Resource class not found for Ironic node %(node)s.")
|
||||
|
||||
|
||||
class ResourceProviderAllocationRetrievalFailed(NovaException):
|
||||
msg_fmt = _("Failed to retrieve allocations for resource provider "
|
||||
"%(rp_uuid)s: %(error)s")
|
||||
|
||||
@@ -57,6 +57,8 @@ POST_ALLOCATIONS_API_VERSION = '1.13'
|
||||
|
||||
AggInfo = collections.namedtuple('AggInfo', ['aggregates', 'generation'])
|
||||
TraitInfo = collections.namedtuple('TraitInfo', ['traits', 'generation'])
|
||||
ProviderAllocInfo = collections.namedtuple(
|
||||
'ProviderAllocInfo', ['allocations'])
|
||||
|
||||
|
||||
def warn_limit(self, msg):
|
||||
@@ -1933,14 +1935,25 @@ class SchedulerReportClient(object):
|
||||
else:
|
||||
self.delete_allocation_for_instance(context, instance.uuid)
|
||||
|
||||
@safe_connect
|
||||
def get_allocations_for_resource_provider(self, context, rp_uuid):
|
||||
"""Retrieves the allocations for a specific provider.
|
||||
|
||||
:param context: The nova.context.RequestContext auth context
|
||||
:param rp_uuid: The UUID of the provider.
|
||||
:return: ProviderAllocInfo namedtuple.
|
||||
:raises: keystoneauth1.exceptions.base.ClientException on failure to
|
||||
communicate with the placement API
|
||||
:raises: ResourceProviderAllocationRetrievalFailed if the placement API
|
||||
call fails.
|
||||
"""
|
||||
url = '/resource_providers/%s/allocations' % rp_uuid
|
||||
resp = self.get(url, global_request_id=context.global_id)
|
||||
if not resp:
|
||||
return {}
|
||||
else:
|
||||
return resp.json()['allocations']
|
||||
raise exception.ResourceProviderAllocationRetrievalFailed(
|
||||
rp_uuid=rp_uuid, error=resp.text)
|
||||
|
||||
data = resp.json()
|
||||
return ProviderAllocInfo(allocations=data['allocations'])
|
||||
|
||||
def delete_resource_provider(self, context, compute_node, cascade=False):
|
||||
"""Deletes the ResourceProvider record for the compute_node.
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
import copy
|
||||
import datetime
|
||||
|
||||
from keystoneauth1 import exceptions as ks_exc
|
||||
import mock
|
||||
from oslo_config import cfg
|
||||
from oslo_utils import timeutils
|
||||
@@ -33,6 +34,7 @@ from nova.objects import fields as obj_fields
|
||||
from nova.objects import pci_device
|
||||
from nova.pci import manager as pci_manager
|
||||
from nova import rc_fields
|
||||
from nova.scheduler.client import report
|
||||
from nova import test
|
||||
from nova.tests.unit import fake_notifier
|
||||
from nova.tests.unit.objects import test_pci_device as fake_pci_device
|
||||
@@ -2799,7 +2801,8 @@ class TestUpdateUsageFromInstance(BaseTestCase):
|
||||
mock_inst_get):
|
||||
rc = self.rt.reportclient
|
||||
self.rt.tracked_instances = {}
|
||||
allocs = {uuids.deleted: "fake_deleted_instance"}
|
||||
allocs = report.ProviderAllocInfo(
|
||||
allocations={uuids.deleted: "fake_deleted_instance"})
|
||||
rc.get_allocations_for_resource_provider = mock.MagicMock(
|
||||
return_value=allocs)
|
||||
rc.delete_allocation_for_instance = mock.MagicMock()
|
||||
@@ -2824,7 +2827,8 @@ class TestUpdateUsageFromInstance(BaseTestCase):
|
||||
mock_inst_get):
|
||||
rc = self.rt.reportclient
|
||||
self.rt.tracked_instances = {}
|
||||
allocs = {uuids.deleted: "fake_deleted_instance"}
|
||||
allocs = report.ProviderAllocInfo(
|
||||
allocations={uuids.deleted: "fake_deleted_instance"})
|
||||
rc.get_allocations_for_resource_provider = mock.MagicMock(
|
||||
return_value=allocs)
|
||||
rc.delete_allocation_for_instance = mock.MagicMock()
|
||||
@@ -2843,8 +2847,9 @@ class TestUpdateUsageFromInstance(BaseTestCase):
|
||||
mock_inst_get):
|
||||
rc = self.rt.reportclient
|
||||
self.rt.tracked_instances = {}
|
||||
allocs = {uuids.deleted: "fake_deleted_instance",
|
||||
uuids.migration: "fake_migration"}
|
||||
allocs = report.ProviderAllocInfo(
|
||||
allocations={uuids.deleted: "fake_deleted_instance",
|
||||
uuids.migration: "fake_migration"})
|
||||
mig = objects.Migration(uuid=uuids.migration)
|
||||
rc.get_allocations_for_resource_provider = mock.MagicMock(
|
||||
return_value=allocs)
|
||||
@@ -2867,7 +2872,8 @@ class TestUpdateUsageFromInstance(BaseTestCase):
|
||||
mock_inst_get):
|
||||
rc = self.rt.reportclient
|
||||
self.rt.tracked_instances = {}
|
||||
allocs = {uuids.scheduled: "fake_scheduled_instance"}
|
||||
allocs = report.ProviderAllocInfo(
|
||||
allocations={uuids.scheduled: "fake_scheduled_instance"})
|
||||
rc.get_allocations_for_resource_provider = mock.MagicMock(
|
||||
return_value=allocs)
|
||||
rc.delete_allocation_for_instance = mock.MagicMock()
|
||||
@@ -2916,15 +2922,17 @@ class TestUpdateUsageFromInstance(BaseTestCase):
|
||||
rc = self.rt.reportclient
|
||||
self.rt.tracked_instances = {
|
||||
uuids.known: objects.Instance(uuid=uuids.known)}
|
||||
allocs = {
|
||||
uuids.known: {
|
||||
'resources': {
|
||||
'VCPU': 1,
|
||||
'MEMORY_MB': 2048,
|
||||
'DISK_GB': 20
|
||||
allocs = report.ProviderAllocInfo(
|
||||
allocations={
|
||||
uuids.known: {
|
||||
'resources': {
|
||||
'VCPU': 1,
|
||||
'MEMORY_MB': 2048,
|
||||
'DISK_GB': 20
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
rc.get_allocations_for_resource_provider = mock.MagicMock(
|
||||
return_value=allocs)
|
||||
rc.delete_allocation_for_instance = mock.MagicMock()
|
||||
@@ -2951,15 +2959,17 @@ class TestUpdateUsageFromInstance(BaseTestCase):
|
||||
# No tracked instances on this node.
|
||||
self.rt.tracked_instances = {}
|
||||
# But there is an allocation for an instance on this node.
|
||||
allocs = {
|
||||
instance.uuid: {
|
||||
'resources': {
|
||||
'VCPU': 1,
|
||||
'MEMORY_MB': 2048,
|
||||
'DISK_GB': 20
|
||||
allocs = report.ProviderAllocInfo(
|
||||
allocations={
|
||||
instance.uuid: {
|
||||
'resources': {
|
||||
'VCPU': 1,
|
||||
'MEMORY_MB': 2048,
|
||||
'DISK_GB': 20
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
rc.get_allocations_for_resource_provider = mock.MagicMock(
|
||||
return_value=allocs)
|
||||
rc.delete_allocation_for_instance = mock.MagicMock()
|
||||
@@ -2974,6 +2984,33 @@ class TestUpdateUsageFromInstance(BaseTestCase):
|
||||
# and figure things out, this should actually be an error.
|
||||
rc.delete_allocation_for_instance.assert_not_called()
|
||||
|
||||
def test_remove_deleted_instances_allocations_retrieval_fail(self):
|
||||
"""When the report client errs or otherwise retrieves no allocations,
|
||||
_remove_deleted_instances_allocations gracefully no-ops.
|
||||
"""
|
||||
cn = self.rt.compute_nodes[_NODENAME]
|
||||
rc = self.rt.reportclient
|
||||
# We'll test three different ways get_allocations_for_resource_provider
|
||||
# can cause us to no-op.
|
||||
side_effects = (
|
||||
# Actual placement error
|
||||
exc.ResourceProviderAllocationRetrievalFailed(
|
||||
rp_uuid='rp_uuid', error='error'),
|
||||
# API communication failure
|
||||
ks_exc.ClientException,
|
||||
# Legitimately no allocations
|
||||
report.ProviderAllocInfo(allocations={}),
|
||||
)
|
||||
rc.get_allocations_for_resource_provider = mock.Mock(
|
||||
side_effect=side_effects)
|
||||
for _ in side_effects:
|
||||
# If we didn't no op, this would blow up at 'ctx'.elevated()
|
||||
self.rt._remove_deleted_instances_allocations(
|
||||
'ctx', cn, [], self.rt.tracked_instances)
|
||||
rc.get_allocations_for_resource_provider.assert_called_once_with(
|
||||
'ctx', cn.uuid)
|
||||
rc.get_allocations_for_resource_provider.reset_mock()
|
||||
|
||||
def test_delete_allocation_for_shelve_offloaded_instance(self):
|
||||
instance = _INSTANCE_FIXTURES[0].obj_clone()
|
||||
instance.uuid = uuids.inst0
|
||||
|
||||
@@ -3406,6 +3406,28 @@ class TestAllocations(SchedulerReportClientTestCase):
|
||||
self.assertEqual(0, mock_log.info.call_count)
|
||||
self.assertEqual(1, mock_log.error.call_count)
|
||||
|
||||
@mock.patch("nova.scheduler.client.report.SchedulerReportClient.get")
|
||||
def test_get_allocations_for_resource_provider(self, mock_get):
|
||||
mock_get.return_value = fake_requests.FakeResponse(
|
||||
200, content=jsonutils.dumps(
|
||||
{'allocations': 'fake', 'resource_provider_generation': 42}))
|
||||
ret = self.client.get_allocations_for_resource_provider(
|
||||
self.context, 'rpuuid')
|
||||
self.assertEqual('fake', ret.allocations)
|
||||
mock_get.assert_called_once_with(
|
||||
'/resource_providers/rpuuid/allocations',
|
||||
global_request_id=self.context.global_id)
|
||||
|
||||
@mock.patch("nova.scheduler.client.report.SchedulerReportClient.get")
|
||||
def test_get_allocations_for_resource_provider_fail(self, mock_get):
|
||||
mock_get.return_value = fake_requests.FakeResponse(400, content="ouch")
|
||||
self.assertRaises(exception.ResourceProviderAllocationRetrievalFailed,
|
||||
self.client.get_allocations_for_resource_provider,
|
||||
self.context, 'rpuuid')
|
||||
mock_get.assert_called_once_with(
|
||||
'/resource_providers/rpuuid/allocations',
|
||||
global_request_id=self.context.global_id)
|
||||
|
||||
|
||||
class TestResourceClass(SchedulerReportClientTestCase):
|
||||
def setUp(self):
|
||||
|
||||
Reference in New Issue
Block a user