rt: ensure resource provider records exist from RT

This patch adds functionality to the scheduler "report client" to ensure
that the client calls the placement API to create a resource provider
record for the local compute host managed by the Nova resource tracker.

The report client keeps a cache of resource provider objects, keyed by
resource provider UUID and constructed from the results of placement
REST API calls to get information about a resource provider. If a
resource provider matching a UUID was not found in the placement REST
API, the report client automatically creates the resource provider
record via the placement REST API. These resource provider objects will
be used in followup patches that add creation of inventory and
allocation records to the scheduler report client.

Included in this patch is a new [placement] nova.conf configuration
section with a single os_region_name configuration option that allows
Nova to grab the placement API endpoint URL for the particular OpenStack
region that it is in. We do not support endpoint URL overrides for the
placement API service. We only use the Keystone service catalog for
finding the endpoint for the placement service. We intentionally modeled
the determination of the placement endpoint URL after similar code that
determines the volume endpoint URL in /nova/volume/cinder.py.

This redoes the placement API using keystone session, and stubs out
where we can do more reasonable handling of errors. This works if we
fill out the right credentials in the placement section of the config
file.

Co-Authored-By: Sean Dague <sean@dague.net>

Change-Id: I9d28b51da25c523d22c373039e6d8b36fd96eba6
blueprint: generic-resource-pools
This commit is contained in:
Jay Pipes 2016-08-22 13:52:44 -04:00 committed by Sean Dague
parent 1abb6f7b4e
commit 5fb6f8f511
6 changed files with 496 additions and 13 deletions

View File

@ -67,6 +67,7 @@ from nova.conf import novnc
from nova.conf import osapi_v21
from nova.conf import paths
from nova.conf import pci
from nova.conf import placement
from nova.conf import quota
from nova.conf import rdp
from nova.conf import remote_debug
@ -141,6 +142,7 @@ novnc.register_opts(CONF)
osapi_v21.register_opts(CONF)
paths.register_opts(CONF)
pci.register_opts(CONF)
placement.register_opts(CONF)
quota.register_opts(CONF)
rdp.register_opts(CONF)
rpc.register_opts(CONF)

44
nova/conf/placement.py Normal file
View File

@ -0,0 +1,44 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from keystoneauth1 import loading as ks_loading
from oslo_config import cfg
placement_group = cfg.OptGroup(
'placement',
title='Placement Service Options',
help="Configuration options for connecting to the placement API service")
placement_opts = [
cfg.StrOpt('os_region_name',
help="""
Region name of this node. This is used when picking the URL in the service
catalog.
Possible values:
* Any string representing region name
"""),
]
def register_opts(conf):
conf.register_group(placement_group)
conf.register_opts(placement_opts, group=placement_group)
ks_loading.register_auth_conf_options(conf,
placement_group.name)
def list_opts():
return {
placement_group.name: placement_opts
}

View File

@ -102,7 +102,8 @@ class RequestContext(context.RequestContext):
if service_catalog:
# Only include required parts of service_catalog
self.service_catalog = [s for s in service_catalog
if s.get('type') in ('volume', 'volumev2', 'key-manager')]
if s.get('type') in ('volume', 'volumev2', 'key-manager',
'placement')]
else:
# if list is empty or none
self.service_catalog = []

View File

@ -13,13 +13,189 @@
# License for the specific language governing permissions and limitations
# under the License.
import functools
from keystoneauth1 import exceptions as ks_exc
from keystoneauth1 import loading as keystone
from keystoneauth1 import session
from oslo_log import log as logging
import nova.conf
from nova.i18n import _LE, _LI, _LW
from nova import objects
CONF = nova.conf.CONF
LOG = logging.getLogger(__name__)
def safe_connect(f):
@functools.wraps(f)
def wrapper(self, *a, **k):
try:
# We've failed in a non recoverable way, fully give up.
if self._disabled:
return
return f(self, *a, **k)
except ks_exc.EndpointNotFound:
msg = _LW("The placement API endpoint not found. Optional use of "
"placement API for reporting is now disabled.")
LOG.warning(msg)
self._disabled = True
except ks_exc.MissingAuthPlugin:
msg = _LW("No authentication information found for placement API. "
"Optional use of placement API for reporting is now "
"disabled.")
LOG.warning(msg)
self._disabled = True
except ks_exc.ConnectFailure:
msg = _LW('Placement API service is not responding.')
LOG.warning(msg)
return wrapper
class SchedulerReportClient(object):
"""Client class for updating the scheduler."""
ks_filter = {'service_type': 'placement',
'region_name': CONF.placement.os_region_name}
def __init__(self):
# A dict, keyed by the resource provider UUID, of ResourceProvider
# objects that will have their inventories and allocations tracked by
# the placement API for the compute host
self._resource_providers = {}
auth_plugin = keystone.load_auth_from_conf_options(
CONF, 'placement')
self._client = session.Session(auth=auth_plugin)
# TODO(sdague): use this to disable fully when we don't find
# the endpoint.
self._disabled = False
def get(self, url):
return self._client.get(
url,
endpoint_filter=self.ks_filter, raise_exc=False)
def post(self, url, data):
# NOTE(sdague): using json= instead of data= sets the
# media type to application/json for us. Placement API is
# more sensitive to this than other APIs in the OpenStack
# ecosystem.
return self._client.post(
url, json=data,
endpoint_filter=self.ks_filter, raise_exc=False)
@safe_connect
def _get_resource_provider(self, uuid):
"""Queries the placement API for a resource provider record with the
supplied UUID.
Returns an `objects.ResourceProvider` object if found or None if no
such resource provider could be found.
:param uuid: UUID identifier for the resource provider to look up
"""
resp = self.get("/resource_providers/%s" % uuid)
if resp.status_code == 200:
data = resp.json()
return objects.ResourceProvider(
uuid=uuid,
name=data['name'],
generation=data['generation'],
)
elif resp.status_code == 404:
return None
else:
msg = _LE("Failed to retrieve resource provider record from "
"placement API for UUID %(uuid)s. "
"Got %(status_code)d: %(err_text)s.")
args = {
'uuid': uuid,
'status_code': resp.status_code,
'err_text': resp.text,
}
LOG.error(msg, args)
@safe_connect
def _create_resource_provider(self, uuid, name):
"""Calls the placement API to create a new resource provider record.
Returns an `objects.ResourceProvider` object representing the
newly-created resource provider object.
:param uuid: UUID of the new resource provider
:param name: Name of the resource provider
"""
url = "/resource_providers"
payload = {
'uuid': uuid,
'name': name,
}
resp = self.post(url, payload)
if resp.status_code == 201:
msg = _LI("Created resource provider record via placement API "
"for resource provider with UUID {0} and name {1}.")
msg = msg.format(uuid, name)
LOG.info(msg)
return objects.ResourceProvider(
uuid=uuid,
name=name,
generation=1,
)
elif resp.status_code == 409:
# Another thread concurrently created a resource provider with the
# same UUID. Log a warning and then just return the resource
# provider object from _get_resource_provider()
msg = _LI("Another thread already created a resource provider "
"with the UUID {0}. Grabbing that record from "
"the placement API.")
msg = msg.format(uuid)
LOG.info(msg)
return self._get_resource_provider(uuid)
else:
msg = _LE("Failed to create resource provider record in "
"placement API for UUID %(uuid)s. "
"Got %(status_code)d: %(err_text)s.")
args = {
'uuid': uuid,
'status_code': resp.status_code,
'err_text': resp.text,
}
LOG.error(msg, args)
def _ensure_resource_provider(self, uuid, name=None):
"""Ensures that the placement API has a record of a resource provider
with the supplied UUID. If not, creates the resource provider record in
the placement API for the supplied UUID, optionally passing in a name
for the resource provider.
The found or created resource provider object is returned from this
method. If the resource provider object for the supplied uuid was not
found and the resource provider record could not be created in the
placement API, we return None.
:param uuid: UUID identifier for the resource provider to ensure exists
:param name: Optional name for the resource provider if the record
does not exist. If empty, the name is set to the UUID
value
"""
if uuid in self._resource_providers:
return self._resource_providers[uuid]
rp = self._get_resource_provider(uuid)
if rp is None:
name = name or uuid
rp = self._create_resource_provider(uuid, name)
if rp is None:
return
self._resource_providers[uuid] = rp
return rp
def update_resource_stats(self, compute_node):
"""Creates or updates stats for the supplied compute node.
:param compute_node: updated nova.objects.ComputeNode to report
"""
compute_node.save()
self._ensure_resource_provider(compute_node.uuid,
compute_node.hypervisor_hostname)

View File

@ -12,11 +12,15 @@
import mock
import nova.conf
from nova import context
from nova import objects
from nova.objects import pci_device_pool
from nova.objects import base as obj_base
from nova.scheduler.client import report
from nova import test
from nova.tests import uuidsentinel as uuids
CONF = nova.conf.CONF
class SchedulerReportClientTestCase(test.NoDBTestCase):
@ -24,20 +28,265 @@ class SchedulerReportClientTestCase(test.NoDBTestCase):
def setUp(self):
super(SchedulerReportClientTestCase, self).setUp()
self.context = context.get_admin_context()
self.ks_sess_mock = mock.Mock()
self.flags(use_local=True, group='conductor')
with test.nested(
mock.patch('keystoneauth1.session.Session',
return_value=self.ks_sess_mock),
mock.patch('keystoneauth1.loading.load_auth_from_conf_options')
) as (_auth_mock, _sess_mock):
self.client = report.SchedulerReportClient()
self.client = report.SchedulerReportClient()
@mock.patch('keystoneauth1.session.Session')
@mock.patch('keystoneauth1.loading.load_auth_from_conf_options')
def test_constructor(self, load_auth_mock, ks_sess_mock):
report.SchedulerReportClient()
load_auth_mock.assert_called_once_with(CONF, 'placement')
ks_sess_mock.assert_called_once_with(auth=load_auth_mock.return_value)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_create_resource_provider')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_resource_provider')
def test_ensure_resource_provider_exists_in_cache(self, get_rp_mock,
create_rp_mock):
# Override the client object's cache to contain a resource provider
# object for the compute host and check that
# _ensure_resource_provider() doesn't call _get_resource_provider() or
# _create_resource_provider()
self.client._resource_providers = {
uuids.compute_node: mock.sentinel.rp
}
self.client._ensure_resource_provider(uuids.compute_node)
self.assertFalse(get_rp_mock.called)
self.assertFalse(create_rp_mock.called)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_create_resource_provider')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_resource_provider')
def test_ensure_resource_provider_get(self, get_rp_mock, create_rp_mock):
# No resource provider exists in the client's cache, so validate that
# if we get the resource provider from the placement API that we don't
# try to create the resource provider.
get_rp_mock.return_value = mock.sentinel.rp
self.client._ensure_resource_provider(uuids.compute_node)
get_rp_mock.assert_called_once_with(uuids.compute_node)
self.assertEqual({uuids.compute_node: mock.sentinel.rp},
self.client._resource_providers)
self.assertFalse(create_rp_mock.called)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_create_resource_provider')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_resource_provider')
def test_ensure_resource_provider_create_none(self, get_rp_mock,
create_rp_mock):
# No resource provider exists in the client's cache, and
# _create_provider returns None, indicating there was an error with the
# create call. Ensure we don't populate the resource provider cache
# with a None value.
get_rp_mock.return_value = None
create_rp_mock.return_value = None
self.client._ensure_resource_provider(uuids.compute_node)
get_rp_mock.assert_called_once_with(uuids.compute_node)
create_rp_mock.assert_called_once_with(uuids.compute_node,
uuids.compute_node)
self.assertEqual({}, self.client._resource_providers)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_create_resource_provider')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_resource_provider')
def test_ensure_resource_provider_create(self, get_rp_mock,
create_rp_mock):
# No resource provider exists in the client's cache and no resource
# provider was returned from the placement API, so verify that in this
# case we try to create the resource provider via the placement API.
get_rp_mock.return_value = None
create_rp_mock.return_value = mock.sentinel.rp
self.client._ensure_resource_provider(uuids.compute_node)
get_rp_mock.assert_called_once_with(uuids.compute_node)
create_rp_mock.assert_called_once_with(
uuids.compute_node,
uuids.compute_node, # name param defaults to UUID if None
)
self.assertEqual({uuids.compute_node: mock.sentinel.rp},
self.client._resource_providers)
create_rp_mock.reset_mock()
self.client._resource_providers = {}
self.client._ensure_resource_provider(uuids.compute_node,
mock.sentinel.name)
create_rp_mock.assert_called_once_with(
uuids.compute_node,
mock.sentinel.name,
)
def test_get_resource_provider_found(self):
# Ensure _get_resource_provider() returns a ResourceProvider object if
# it finds a resource provider record from the placement API
uuid = uuids.compute_node
resp_mock = mock.Mock(status_code=200)
json_data = {
'uuid': uuid,
'name': uuid,
'generation': 42,
}
resp_mock.json.return_value = json_data
self.ks_sess_mock.get.return_value = resp_mock
result = self.client._get_resource_provider(uuid)
expected_provider = objects.ResourceProvider(
uuid=uuid,
name=uuid,
generation=42,
)
expected_url = '/resource_providers/' + uuid
self.ks_sess_mock.get.assert_called_once_with(expected_url,
endpoint_filter=mock.ANY,
raise_exc=False)
self.assertTrue(obj_base.obj_equal_prims(expected_provider,
result))
def test_get_resource_provider_not_found(self):
# Ensure _get_resource_provider() just returns None when the placement
# API doesn't find a resource provider matching a UUID
resp_mock = mock.Mock(status_code=404)
self.ks_sess_mock.get.return_value = resp_mock
uuid = uuids.compute_node
result = self.client._get_resource_provider(uuid)
expected_url = '/resource_providers/' + uuid
self.ks_sess_mock.get.assert_called_once_with(expected_url,
endpoint_filter=mock.ANY,
raise_exc=False)
self.assertIsNone(result)
@mock.patch.object(report.LOG, 'error')
def test_get_resource_provider_error(self, logging_mock):
# Ensure _get_resource_provider() sets the error flag when trying to
# communicate with the placement API and not getting an error we can
# deal with
resp_mock = mock.Mock(status_code=503)
self.ks_sess_mock.get.return_value = resp_mock
uuid = uuids.compute_node
result = self.client._get_resource_provider(uuid)
expected_url = '/resource_providers/' + uuid
self.ks_sess_mock.get.assert_called_once_with(expected_url,
endpoint_filter=mock.ANY,
raise_exc=False)
# A 503 Service Unavailable should trigger an error logged and
# return None from _get_resource_provider()
self.assertTrue(logging_mock.called)
self.assertIsNone(result)
def test_create_resource_provider(self):
# Ensure _create_resource_provider() returns a ResourceProvider object
# constructed after creating a resource provider record in the
# placement API
uuid = uuids.compute_node
name = 'computehost'
resp_mock = mock.Mock(status_code=201)
self.ks_sess_mock.post.return_value = resp_mock
result = self.client._create_resource_provider(uuid, name)
expected_payload = {
'uuid': uuid,
'name': name,
}
expected_provider = objects.ResourceProvider(
uuid=uuid,
name=name,
generation=1,
)
expected_url = '/resource_providers'
self.ks_sess_mock.post.assert_called_once_with(
expected_url,
endpoint_filter=mock.ANY,
json=expected_payload,
raise_exc=False)
self.assertTrue(obj_base.obj_equal_prims(expected_provider,
result))
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_resource_provider')
def test_create_resource_provider_concurrent_create(self, get_rp_mock):
# Ensure _create_resource_provider() returns a ResourceProvider object
# gotten from _get_resource_provider() if the call to create the
# resource provider in the placement API returned a 409 Conflict,
# indicating another thread concurrently created the resource provider
# record.
uuid = uuids.compute_node
name = 'computehost'
resp_mock = mock.Mock(status_code=409)
self.ks_sess_mock.post.return_value = resp_mock
get_rp_mock.return_value = mock.sentinel.get_rp
result = self.client._create_resource_provider(uuid, name)
expected_payload = {
'uuid': uuid,
'name': name,
}
expected_url = '/resource_providers'
self.ks_sess_mock.post.assert_called_once_with(
expected_url,
endpoint_filter=mock.ANY,
json=expected_payload,
raise_exc=False)
self.assertEqual(mock.sentinel.get_rp, result)
@mock.patch.object(report.LOG, 'error')
def test_create_resource_provider_error(self, logging_mock):
# Ensure _create_resource_provider() sets the error flag when trying to
# communicate with the placement API and not getting an error we can
# deal with
uuid = uuids.compute_node
name = 'computehost'
resp_mock = mock.Mock(status_code=503)
self.ks_sess_mock.post.return_value = resp_mock
result = self.client._create_resource_provider(uuid, name)
expected_payload = {
'uuid': uuid,
'name': name,
}
expected_url = '/resource_providers'
self.ks_sess_mock.post.assert_called_once_with(
expected_url,
endpoint_filter=mock.ANY,
json=expected_payload,
raise_exc=False)
# A 503 Service Unavailable should log an error and
# _create_resource_provider() should return None
self.assertTrue(logging_mock.called)
self.assertIsNone(result)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_ensure_resource_provider')
@mock.patch.object(objects.ComputeNode, 'save')
def test_update_resource_stats_saves(self, mock_save):
cn = objects.ComputeNode(context=self.context)
cn.host = 'fakehost'
cn.hypervisor_hostname = 'fakenode'
cn.pci_device_pools = pci_device_pool.from_pci_stats(
[{"vendor_id": "foo",
"product_id": "foo",
"count": 1,
"a": "b"}])
def test_update_resource_stats_saves(self, mock_save, mock_ensure):
cn = objects.ComputeNode(context=self.context,
uuid=uuids.compute_node,
hypervisor_hostname='host1')
self.client.update_resource_stats(cn)
mock_save.assert_called_once_with()
mock_ensure.assert_called_once_with(uuids.compute_node, 'host1')

View File

@ -0,0 +1,11 @@
---
features:
- The nova-compute worker now communicates with the new placement API
service. Nova determines the placement API service by querying the
OpenStack service catalog for the service with a service type of
'placement'.
- A new [placement] section is added to the nova.conf configuration file for
configuration options affecting how Nova interacts with the new placement
API service. The only configuration option currently available is
`os_region_name` which provides support for Nova to query the appropriate
OpenStack region's service catalog for the placement service.