Fail placement sync if _get_rp_by_name() fails

The Placement sync process involves some input from Placement first.
That is the UUID of the compute host RP. This is a remote call just like
the Placement updates we send later and it also may fail in all the
usual ways of remote calls. We need to fail the sync procedure if this
remote call fails.

Previously I had the mistaken belief that if I set the parent_uuid to
None that will be an invalid call rejected by Placement. But no, that's
a valid call and creates a resource provider without a parent. That is
the neutron managed resource providers will be in their own resource
provider tree instead of the compute host's resource provider tree.

In this change we make sure to handle the failure of getting the compute
host RP properly. We must not continue with the updates. And we must set
the agent's resources_synced to False.

Change-Id: Ie6ad33e2170c53a16c39a31a8d7f6496170a90ce
Closes-Bug: #1818683
This commit is contained in:
Bence Romsics 2019-03-05 15:43:19 +01:00
parent 0755c97a2d
commit 732dbdaf5e
2 changed files with 78 additions and 23 deletions

View File

@ -67,14 +67,6 @@ class PlacementReportPlugin(service_base.ServicePluginBase):
rps = self._placement_client.list_resource_providers( rps = self._placement_client.list_resource_providers(
name=name)['resource_providers'] name=name)['resource_providers']
# RP names are unique, therefore we can get 0 or 1. But not many. # RP names are unique, therefore we can get 0 or 1. But not many.
if len(rps) != 1:
# NOTE(bence romsics): While we could raise() here and by detect
# an error a bit earlier, we want the error to surface in the
# sync batch below so it is going to be properly caught and is
# going to influence the agent's resources_synced attribute.
LOG.warning(
'placement client: no such resource provider: %s', name)
return {'uuid': None}
return rps[0] return rps[0]
def _sync_placement_state(self, agent, agent_db): def _sync_placement_state(self, agent, agent_db):
@ -85,12 +77,26 @@ class PlacementReportPlugin(service_base.ServicePluginBase):
supported_vnic_types = mech_driver.supported_vnic_types supported_vnic_types = mech_driver.supported_vnic_types
device_mappings = mech_driver.get_standard_device_mappings(agent) device_mappings = mech_driver.get_standard_device_mappings(agent)
log_msg = (
'Synchronization of resources '
'of agent type %(type)s '
'at host %(host)s '
'to placement %(result)s.')
try: try:
agent_host_rp_uuid = self._get_rp_by_name( agent_host_rp_uuid = self._get_rp_by_name(
name=agent['host'])['uuid'] name=agent['host'])['uuid']
except ks_exc.HttpError: except (IndexError, ks_exc.HttpError, ks_exc.ClientException):
# Delay the error for the same reason as in _get_rp_by_name(). agent_db.resources_synced = False
agent_host_rp_uuid = None agent_db.update()
LOG.warning(
log_msg,
{'type': agent['agent_type'],
'host': agent['host'],
'result': 'failed'})
return
state = placement_report.PlacementState( state = placement_report.PlacementState(
rp_bandwidths=configurations[ rp_bandwidths=configurations[
@ -139,14 +145,18 @@ class PlacementReportPlugin(service_base.ServicePluginBase):
agent_db.resources_synced = resources_synced agent_db.resources_synced = resources_synced
agent_db.update() agent_db.update()
LOG.debug( if resources_synced:
'Synchronization of resources' LOG.debug(
' of agent type %(type)s' log_msg,
' at host %(host)s' {'type': agent['agent_type'],
' to placement %(result)s.', 'host': agent['host'],
{'type': agent['agent_type'], 'result': 'succeeded'})
'host': agent['host'], else:
'result': 'succeeded' if resources_synced else 'failed'}) LOG.warning(
log_msg,
{'type': agent['agent_type'],
'host': agent['host'],
'result': 'failed'})
self._batch_notifier.queue_event(batch) self._batch_notifier.queue_event(batch)

View File

@ -14,6 +14,7 @@
import mock import mock
from keystoneauth1 import exceptions as ks_exc
from neutron_lib.agent import constants as agent_const from neutron_lib.agent import constants as agent_const
from oslo_log import log as logging from oslo_log import log as logging
@ -39,14 +40,58 @@ class PlacementReportPluginTestCases(test_plugin.Ml2PluginV2TestCase):
self.assertEqual('fake_rp', rp) self.assertEqual('fake_rp', rp)
def test__get_rp_by_name_not_found(self): def test__get_rp_by_name_not_found(self):
with mock.patch.object(
self.service_plugin._placement_client,
'list_resource_providers',
return_value={'resource_providers': []}):
self.assertRaises(
IndexError, self.service_plugin._get_rp_by_name, 'no_such_rp')
def test_no_sync_for_rp_name_not_found(self):
# looking all good
agent = {
'agent_type': 'test_mechanism_driver_agent',
'configurations': {'resource_provider_bandwidths': {}},
'host': 'fake host',
}
agent_db = mock.Mock()
with mock.patch.object( with mock.patch.object(
self.service_plugin._placement_client, self.service_plugin._placement_client,
'list_resource_providers', 'list_resource_providers',
return_value={'resource_providers': []}), \ return_value={'resource_providers': []}), \
mock.patch.object(plugin.LOG, 'warning') as log_mock: mock.patch.object(
rp = self.service_plugin._get_rp_by_name('whatever') self.service_plugin._batch_notifier,
self.assertEqual(1, log_mock.call_count) 'queue_event') as mock_queue_event:
self.assertEqual({'uuid': None}, rp)
self.service_plugin._sync_placement_state(agent, agent_db)
self.assertFalse(agent_db.resources_synced)
agent_db.update.assert_called_with()
mock_queue_event.assert_not_called()
def test_no_sync_for_placement_gone(self):
# looking all good
agent = {
'agent_type': 'test_mechanism_driver_agent',
'configurations': {'resource_provider_bandwidths': {}},
'host': 'fake host',
}
agent_db = mock.Mock()
with mock.patch.object(
self.service_plugin._placement_client,
'list_resource_providers',
side_effect=ks_exc.HttpError), \
mock.patch.object(
self.service_plugin._batch_notifier,
'queue_event') as mock_queue_event:
self.service_plugin._sync_placement_state(agent, agent_db)
self.assertFalse(agent_db.resources_synced)
agent_db.update.assert_called_with()
mock_queue_event.assert_not_called()
def test_no_sync_for_unsupported_agent_type(self): def test_no_sync_for_unsupported_agent_type(self):
payload = mock.Mock( payload = mock.Mock(