Use aggregate_add_host in nova-manage

When nova-manage placement sync_aggregates was added [1], it duplicated
some report client logic (aggregate_add_host) to do provider aggregate
retrieval and update so as not to duplicate a call to retrieve the
host's resource provider record. It also left a TODO to handle
generation conflicts.

Here we change the signature of aggregate_add_host to accept *either*
the host name or RP UUID, and refactor the nova-manage placement
sync_aggregates code to use it.

The behavior in terms of exit codes and messaging should be largely
unchanged, though there may be some subtle differences in corner cases.

[1] Iac67b6bf7e46fbac02b9d3cb59efc3c59b9e56c8

Change-Id: Iaa4ddf786ce7d31d2cee660d5196e5e530ec4bd3
This commit is contained in:
Eric Fried 2019-02-28 09:59:24 -06:00
parent 358776a303
commit c43f7e664d
9 changed files with 109 additions and 142 deletions

View File

@ -2232,65 +2232,34 @@ class PlacementCommands(object):
print(e.format_message())
return 1
# We've got our compute node record, so now we can look to
# see if the matching resource provider, found via compute
# node uuid, is in the same aggregate in placement, found via
# aggregate uuid.
# NOTE(mriedem): We could re-use placement.aggregate_add_host
# here although that has to do the provider lookup by host as
# well, but it does handle generation conflicts.
resp = placement.get( # use 1.19 to get the generation
'/resource_providers/%s/aggregates' % rp_uuid,
version='1.19')
if resp:
body = resp.json()
provider_aggregate_uuids = body['aggregates']
# The moment of truth: is the provider in the same host
# aggregate relationship?
aggregate_uuid = aggregate.uuid
if aggregate_uuid not in provider_aggregate_uuids:
# Add the resource provider to this aggregate.
provider_aggregate_uuids.append(aggregate_uuid)
# Now update the provider aggregates using the
# generation to ensure we're conflict-free.
aggregate_update_body = {
'aggregates': provider_aggregate_uuids,
'resource_provider_generation':
body['resource_provider_generation']
}
put_resp = placement.put(
'/resource_providers/%s/aggregates' % rp_uuid,
aggregate_update_body, version='1.19')
if put_resp:
output(_('Successfully added host (%(host)s) and '
'provider (%(provider)s) to aggregate '
'(%(aggregate)s).') %
{'host': host, 'provider': rp_uuid,
'aggregate': aggregate_uuid})
elif put_resp.status_code == 404:
# We must have raced with a delete on the resource
# provider.
providers_not_found[host] = rp_uuid
else:
# TODO(mriedem): Handle 409 conflicts by retrying
# the operation.
print(_('Failed updating provider aggregates for '
'host (%(host)s), provider (%(provider)s) '
'and aggregate (%(aggregate)s). Error: '
'%(error)s') %
{'host': host, 'provider': rp_uuid,
'aggregate': aggregate_uuid,
'error': put_resp.text})
return 3
elif resp.status_code == 404:
# We've got our compute node record, so now we can ensure that
# the matching resource provider, found via compute node uuid,
# is in the same aggregate in placement, found via aggregate
# uuid.
try:
placement.aggregate_add_host(ctxt, aggregate.uuid,
rp_uuid=rp_uuid)
output(_('Successfully added host (%(host)s) and '
'provider (%(provider)s) to aggregate '
'(%(aggregate)s).') %
{'host': host, 'provider': rp_uuid,
'aggregate': aggregate.uuid})
except exception.ResourceProviderNotFound:
# The resource provider wasn't found. Store this for later.
providers_not_found[host] = rp_uuid
else:
print(_('An error occurred getting resource provider '
'aggregates from placement for provider '
'%(provider)s. Error: %(error)s') %
{'provider': rp_uuid, 'error': resp.text})
except exception.ResourceProviderAggregateRetrievalFailed as e:
print(e.message)
return 2
except exception.NovaException as e:
# The exception message is too generic in this case
print(_('Failed updating provider aggregates for '
'host (%(host)s), provider (%(provider)s) '
'and aggregate (%(aggregate)s). Error: '
'%(error)s') %
{'host': host, 'provider': rp_uuid,
'aggregate': aggregate.uuid,
'error': e.message})
return 3
# Now do our error handling. Note that there is no real priority on
# the error code we return. We want to dump all of the issues we hit

View File

@ -5536,7 +5536,7 @@ class AggregateAPI(base.Base):
self.query_client.update_aggregates(context, [aggregate])
try:
self.placement_client.aggregate_add_host(
context, aggregate.uuid, host_name)
context, aggregate.uuid, host_name=host_name)
except exception.PlacementAPIConnectFailure:
# NOTE(jaypipes): Rocky should be able to tolerate the nova-api
# service not communicating with the Placement API, so just log a

View File

@ -2219,7 +2219,8 @@ class SchedulerReportClient(object):
@retrying.retry(stop_max_attempt_number=4,
retry_on_exception=lambda e: isinstance(
e, exception.ResourceProviderUpdateConflict))
def aggregate_add_host(self, context, agg_uuid, host_name):
def aggregate_add_host(self, context, agg_uuid, host_name=None,
rp_uuid=None):
"""Looks up a resource provider by the supplied host name, and adds the
aggregate with supplied UUID to that resource provider.
@ -2230,7 +2231,10 @@ class SchedulerReportClient(object):
:param context: The security context
:param agg_uuid: UUID of the aggregate being modified
:param host_name: Name of the nova-compute service worker to look up a
resource provider for
resource provider for. Either host_name or rp_uuid is
required.
:param rp_uuid: UUID of the resource provider to add to the aggregate.
Either host_name or rp_uuid is required.
:raises: `exceptions.ResourceProviderNotFound` if no resource provider
matching the host name could be found from the placement API
:raises: `exception.ResourceProviderAggregateRetrievalFailed` when
@ -2240,14 +2244,17 @@ class SchedulerReportClient(object):
:raises: `exception.ResourceProviderUpdateConflict` if a concurrent
update to the provider was detected.
"""
rp = self._get_provider_by_name(context, host_name)
# NOTE(jaypipes): Unfortunately, due to @safe_connect,
# _get_provider_by_name() can return None. If that happens, raise an
# error so we can trap for it in the Nova API code and ignore in Rocky,
# blow up in Stein.
if rp is None:
raise exception.PlacementAPIConnectFailure()
rp_uuid = rp['uuid']
if host_name is None and rp_uuid is None:
raise ValueError(_("Either host_name or rp_uuid is required"))
if rp_uuid is None:
rp = self._get_provider_by_name(context, host_name)
# NOTE(jaypipes): Unfortunately, due to @safe_connect,
# _get_provider_by_name() can return None. If that happens, raise
# an error so we can trap for it in the Nova API code and ignore in
# Rocky, blow up in Stein.
if rp is None:
raise exception.PlacementAPIConnectFailure()
rp_uuid = rp['uuid']
# Now attempt to add the aggregate to the resource provider. We don't
# want to overwrite any other aggregates the provider may be associated

View File

@ -150,7 +150,8 @@ class AggregateRequestFiltersTest(test.TestCase,
# FIXME(efried): This should be a thing we can do without internals
self.report_client._ensure_resource_provider(
self.context, host_uuid, name=host)
self.report_client.aggregate_add_host(self.context, agg['uuid'], host)
self.report_client.aggregate_add_host(self.context, agg['uuid'],
host_name=host)
def _wait_for_state_change(self, server, from_status):
for i in range(0, 50):

View File

@ -979,7 +979,7 @@ class SchedulerReportClientTests(SchedulerReportClientTestBase):
# Now associate the compute **host name** with an aggregate and
# ensure the aggregate association is saved properly
self.client.aggregate_add_host(
self.context, agg_uuid, self.compute_name)
self.context, agg_uuid, host_name=self.compute_name)
# Check that the ProviderTree cache hasn't been modified (since
# the aggregate_add_host() method is only called from nova-api and

View File

@ -12228,7 +12228,7 @@ class ComputeAPIAggrTestCase(BaseTestCase):
mock.call(context=self.context, aggregate=aggr,
action='add_host', phase='end')])
mock_add_host.assert_called_once_with(
self.context, aggr.uuid, fake_host)
self.context, aggr.uuid, host_name=fake_host)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'aggregate_add_host')
@ -12568,7 +12568,7 @@ class ComputeAPIAggrCallsSchedulerTestCase(test.NoDBTestCase):
host_param='fakehost',
host='fakehost')
mock_add_host.assert_called_once_with(
self.context, agg.uuid, 'fakehost')
self.context, agg.uuid, host_name='fakehost')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'aggregate_remove_host')

View File

@ -478,7 +478,7 @@ class ComputeHostAPITestCase(test.TestCase):
aggregate.id,
'fake-compute-host')
mock_add_host.assert_called_once_with(
mock.ANY, aggregate.uuid, 'fake-compute-host')
mock.ANY, aggregate.uuid, host_name='fake-compute-host')
self.controller.delete(self.req, compute.id)
result = self.aggregate_api.get_aggregate(self.ctxt,
aggregate.id).hosts

View File

@ -3851,11 +3851,26 @@ class TestAggregateAddRemoveHost(SchedulerReportClientTestCase):
mock_get_aggs.return_value = report.AggInfo(aggregates=set([]),
generation=42)
name = 'cn1'
self.client.aggregate_add_host(self.context, agg_uuid, name)
self.client.aggregate_add_host(self.context, agg_uuid, host_name=name)
mock_set_aggs.assert_called_once_with(
self.context, uuids.cn1, set([agg_uuid]), use_cache=False,
generation=42)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'set_aggregates_for_provider')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_provider_aggregates')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_provider_by_name', new=mock.NonCallableMock())
def test_aggregate_add_host_rp_uuid(self, mock_get_aggs, mock_set_aggs):
mock_get_aggs.return_value = report.AggInfo(
aggregates=set([]), generation=42)
self.client.aggregate_add_host(
self.context, uuids.agg1, rp_uuid=uuids.cn1)
mock_set_aggs.assert_called_once_with(
self.context, uuids.cn1, set([uuids.agg1]), use_cache=False,
generation=42)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'set_aggregates_for_provider')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
@ -3874,13 +3889,13 @@ class TestAggregateAddRemoveHost(SchedulerReportClientTestCase):
mock_get_aggs.return_value = report.AggInfo(
aggregates=set([agg1_uuid]), generation=42)
name = 'cn1'
self.client.aggregate_add_host(self.context, agg1_uuid, name)
self.client.aggregate_add_host(self.context, agg1_uuid, host_name=name)
mock_set_aggs.assert_not_called()
mock_get_aggs.reset_mock()
mock_set_aggs.reset_mock()
mock_get_aggs.return_value = report.AggInfo(
aggregates=set([agg1_uuid, agg3_uuid]), generation=43)
self.client.aggregate_add_host(self.context, agg2_uuid, name)
self.client.aggregate_add_host(self.context, agg2_uuid, host_name=name)
mock_set_aggs.assert_called_once_with(
self.context, uuids.cn1, set([agg1_uuid, agg2_uuid, agg3_uuid]),
use_cache=False, generation=43)
@ -3898,7 +3913,8 @@ class TestAggregateAddRemoveHost(SchedulerReportClientTestCase):
agg_uuid = uuids.agg1
self.assertRaises(
exception.PlacementAPIConnectFailure,
self.client.aggregate_add_host, self.context, agg_uuid, name)
self.client.aggregate_add_host, self.context, agg_uuid,
host_name=name)
self.mock_get.assert_not_called()
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
@ -3923,7 +3939,8 @@ class TestAggregateAddRemoveHost(SchedulerReportClientTestCase):
uuid='uuid', generation=43, error='error'),
None,
)
self.client.aggregate_add_host(self.context, uuids.agg1, 'cn1')
self.client.aggregate_add_host(self.context, uuids.agg1,
host_name='cn1')
mock_set_aggs.assert_has_calls([mock.call(
self.context, uuids.cn1, set([uuids.agg1]), use_cache=False,
generation=gen) for gen in gens])
@ -3948,11 +3965,17 @@ class TestAggregateAddRemoveHost(SchedulerReportClientTestCase):
uuid='uuid', generation=gen, error='error') for gen in gens)
self.assertRaises(
exception.ResourceProviderUpdateConflict,
self.client.aggregate_add_host, self.context, uuids.agg1, 'cn1')
self.client.aggregate_add_host, self.context, uuids.agg1,
host_name='cn1')
mock_set_aggs.assert_has_calls([mock.call(
self.context, uuids.cn1, set([uuids.agg1]), use_cache=False,
generation=gen) for gen in gens])
def test_aggregate_add_host_no_host_name_or_rp_uuid(self):
self.assertRaises(
ValueError,
self.client.aggregate_add_host, self.context, uuids.agg1)
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_provider_by_name')
def test_aggregate_remove_host_no_placement(self, mock_get_by_name):

View File

@ -21,7 +21,6 @@ import ddt
import fixtures
import mock
from oslo_db import exception as db_exc
from oslo_serialization import jsonutils
from oslo_utils.fixture import uuidsentinel
from oslo_utils import uuidutils
from six.moves import StringIO
@ -2866,64 +2865,42 @@ class TestNovaManagePlacement(test.NoDBTestCase):
mock.sentinel.cell_context, 'host1')
@mock.patch('nova.compute.api.AggregateAPI.get_aggregate_list',
return_value=objects.AggregateList(objects=[
objects.Aggregate(name='foo', hosts=['host1'])]))
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.get',
return_value=fake_requests.FakeResponse(404))
def test_sync_aggregates_get_provider_aggs_provider_not_found(
self, mock_placement_get, mock_get_aggs):
"""Tests the scenario that a resource provider is not found in the
placement service for a compute node found in a nova host aggregate.
"""
with mock.patch.object(self.cli, '_get_rp_uuid_for_host',
return_value=uuidsentinel.rp_uuid):
result = self.cli.sync_aggregates(verbose=True)
self.assertEqual(6, result)
self.assertIn('Unable to find matching resource provider record in '
'placement with uuid for the following hosts: '
'(host1=%s)' % uuidsentinel.rp_uuid,
self.output.getvalue())
mock_placement_get.assert_called_once_with(
'/resource_providers/%s/aggregates' % uuidsentinel.rp_uuid,
version='1.19')
@mock.patch('nova.compute.api.AggregateAPI.get_aggregate_list',
return_value=objects.AggregateList(objects=[
objects.Aggregate(name='foo', hosts=['host1'])]))
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.get',
return_value=fake_requests.FakeResponse(500, content='yikes!'))
new=mock.Mock(return_value=objects.AggregateList(objects=[
objects.Aggregate(name='foo', hosts=['host1'],
uuid=uuidsentinel.aggregate)])))
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'aggregate_add_host')
def test_sync_aggregates_get_provider_aggs_placement_server_error(
self, mock_placement_get, mock_get_aggs):
self, mock_agg_add):
"""Tests the scenario that placement returns an unexpected server
error when getting aggregates for a given resource provider.
"""
mock_agg_add.side_effect = (
exception.ResourceProviderAggregateRetrievalFailed(
uuid=uuidsentinel.rp_uuid))
with mock.patch.object(self.cli, '_get_rp_uuid_for_host',
return_value=uuidsentinel.rp_uuid):
result = self.cli.sync_aggregates(verbose=True)
self.assertEqual(2, result)
self.assertIn('An error occurred getting resource provider '
'aggregates from placement for provider %s. '
'Error: yikes!' % uuidsentinel.rp_uuid,
self.assertIn('Failed to get aggregates for resource provider with '
'UUID %s' % uuidsentinel.rp_uuid,
self.output.getvalue())
@mock.patch('nova.compute.api.AggregateAPI.get_aggregate_list',
return_value=objects.AggregateList(objects=[
new=mock.Mock(return_value=objects.AggregateList(objects=[
objects.Aggregate(name='foo', hosts=['host1'],
uuid=uuidsentinel.aggregate)]))
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.get')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.put',
return_value=fake_requests.FakeResponse(404))
uuid=uuidsentinel.aggregate)])))
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'aggregate_add_host')
def test_sync_aggregates_put_aggregates_fails_provider_not_found(
self, mock_placement_put, mock_placement_get, mock_get_aggs):
self, mock_agg_add):
"""Tests the scenario that we are trying to add a provider to an
aggregate in placement but the
PUT /resource_providers/{rp_uuid}/aggregates call fails with a 404
because the provider is not found.
"""
mock_placement_get.return_value = (
fake_requests.FakeResponse(200, content=jsonutils.dumps({
'aggregates': [],
'resource_provider_generation': 1})))
mock_agg_add.side_effect = exception.ResourceProviderNotFound(
name_or_uuid=uuidsentinel.rp_uuid)
with mock.patch.object(self.cli, '_get_rp_uuid_for_host',
return_value=uuidsentinel.rp_uuid):
result = self.cli.sync_aggregates(verbose=True)
@ -2932,43 +2909,33 @@ class TestNovaManagePlacement(test.NoDBTestCase):
'placement with uuid for the following hosts: '
'(host1=%s)' % uuidsentinel.rp_uuid,
self.output.getvalue())
expected_body = {
'aggregates': [uuidsentinel.aggregate],
'resource_provider_generation': 1
}
self.assertEqual(1, mock_placement_put.call_count)
self.assertDictEqual(expected_body, mock_placement_put.call_args[0][1])
mock_agg_add.assert_called_once_with(
mock.ANY, uuidsentinel.aggregate, rp_uuid=uuidsentinel.rp_uuid)
@mock.patch('nova.compute.api.AggregateAPI.get_aggregate_list',
return_value=objects.AggregateList(objects=[
new=mock.Mock(return_value=objects.AggregateList(objects=[
objects.Aggregate(name='foo', hosts=['host1'],
uuid=uuidsentinel.aggregate)]))
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.get')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.put',
return_value=fake_requests.FakeResponse(
409,
content="Resource provider's generation already changed"))
uuid=uuidsentinel.aggregate)])))
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'aggregate_add_host')
def test_sync_aggregates_put_aggregates_fails_generation_conflict(
self, mock_placement_put, mock_placement_get, mock_get_aggs):
self, mock_agg_add):
"""Tests the scenario that we are trying to add a provider to an
aggregate in placement but the
PUT /resource_providers/{rp_uuid}/aggregates call fails with a 404
because the provider is not found.
PUT /resource_providers/{rp_uuid}/aggregates call fails with a 409
generation conflict (even after retries).
"""
mock_placement_get.return_value = (
fake_requests.FakeResponse(200, content=jsonutils.dumps({
'aggregates': [],
'resource_provider_generation': 1})))
mock_agg_add.side_effect = exception.ResourceProviderUpdateConflict(
uuid=uuidsentinel.rp_uuid, generation=1, error="Conflict!")
with mock.patch.object(self.cli, '_get_rp_uuid_for_host',
return_value=uuidsentinel.rp_uuid):
result = self.cli.sync_aggregates(verbose=True)
self.assertEqual(3, result)
self.assertIn("Failed updating provider aggregates for "
"host (host1), provider (%s) and aggregate "
"(%s). Error: Resource provider's generation already "
"changed" %
(uuidsentinel.rp_uuid, uuidsentinel.aggregate),
"(%s)." % (uuidsentinel.rp_uuid, uuidsentinel.aggregate),
self.output.getvalue())
self.assertIn("Conflict!", self.output.getvalue())
class TestNovaManageMain(test.NoDBTestCase):