Browse Source

Fail placement sync if _get_rp_by_name() fails

The Placement sync process involves some input from Placement first.
That is the UUID of the compute host RP. This is a remote call just like
the Placement updates we send later and it also may fail in all the
usual ways of remote calls. We need to fail the sync procedure if this
remote call fails.

Previously I had the mistaken belief that if I set the parent_uuid to
None that will be an invalid call rejected by Placement. But no, that's
a valid call and creates a resource provider without a parent. That is
the neutron managed resource providers will be in their own resource
provider tree instead of the compute host's resource provider tree.

In this change we make sure to handle the failure of getting the compute
host RP properly. We must not continue with the updates. And we must set
the agent's resources_synced to False.

Change-Id: Ie6ad33e2170c53a16c39a31a8d7f6496170a90ce
Closes-Bug: #1818683
Bence Romsics 1 month ago
parent
commit
732dbdaf5e

+ 29
- 19
neutron/services/placement_report/plugin.py View File

@@ -67,14 +67,6 @@ class PlacementReportPlugin(service_base.ServicePluginBase):
67 67
         rps = self._placement_client.list_resource_providers(
68 68
             name=name)['resource_providers']
69 69
         # RP names are unique, therefore we can get 0 or 1. But not many.
70
-        if len(rps) != 1:
71
-            # NOTE(bence romsics): While we could raise() here and by detect
72
-            # an error a bit earlier, we want the error to surface in the
73
-            # sync batch below so it is going to be properly caught and is
74
-            # going to influence the agent's resources_synced attribute.
75
-            LOG.warning(
76
-                'placement client: no such resource provider: %s', name)
77
-            return {'uuid': None}
78 70
         return rps[0]
79 71
 
80 72
     def _sync_placement_state(self, agent, agent_db):
@@ -85,12 +77,26 @@ class PlacementReportPlugin(service_base.ServicePluginBase):
85 77
         supported_vnic_types = mech_driver.supported_vnic_types
86 78
         device_mappings = mech_driver.get_standard_device_mappings(agent)
87 79
 
80
+        log_msg = (
81
+            'Synchronization of resources '
82
+            'of agent type %(type)s '
83
+            'at host %(host)s '
84
+            'to placement %(result)s.')
85
+
88 86
         try:
89 87
             agent_host_rp_uuid = self._get_rp_by_name(
90 88
                 name=agent['host'])['uuid']
91
-        except ks_exc.HttpError:
92
-            # Delay the error for the same reason as in _get_rp_by_name().
93
-            agent_host_rp_uuid = None
89
+        except (IndexError, ks_exc.HttpError, ks_exc.ClientException):
90
+            agent_db.resources_synced = False
91
+            agent_db.update()
92
+
93
+            LOG.warning(
94
+                log_msg,
95
+                {'type': agent['agent_type'],
96
+                 'host': agent['host'],
97
+                 'result': 'failed'})
98
+
99
+            return
94 100
 
95 101
         state = placement_report.PlacementState(
96 102
             rp_bandwidths=configurations[
@@ -139,14 +145,18 @@ class PlacementReportPlugin(service_base.ServicePluginBase):
139 145
             agent_db.resources_synced = resources_synced
140 146
             agent_db.update()
141 147
 
142
-            LOG.debug(
143
-                'Synchronization of resources'
144
-                ' of agent type %(type)s'
145
-                ' at host %(host)s'
146
-                ' to placement %(result)s.',
147
-                {'type': agent['agent_type'],
148
-                 'host': agent['host'],
149
-                 'result': 'succeeded' if resources_synced else 'failed'})
148
+            if resources_synced:
149
+                LOG.debug(
150
+                    log_msg,
151
+                    {'type': agent['agent_type'],
152
+                     'host': agent['host'],
153
+                     'result': 'succeeded'})
154
+            else:
155
+                LOG.warning(
156
+                    log_msg,
157
+                    {'type': agent['agent_type'],
158
+                     'host': agent['host'],
159
+                     'result': 'failed'})
150 160
 
151 161
         self._batch_notifier.queue_event(batch)
152 162
 

+ 49
- 4
neutron/tests/unit/services/placement_report/test_plugin.py View File

@@ -14,6 +14,7 @@
14 14
 
15 15
 import mock
16 16
 
17
+from keystoneauth1 import exceptions as ks_exc
17 18
 from neutron_lib.agent import constants as agent_const
18 19
 from oslo_log import log as logging
19 20
 
@@ -39,14 +40,58 @@ class PlacementReportPluginTestCases(test_plugin.Ml2PluginV2TestCase):
39 40
         self.assertEqual('fake_rp', rp)
40 41
 
41 42
     def test__get_rp_by_name_not_found(self):
43
+        with mock.patch.object(
44
+                self.service_plugin._placement_client,
45
+                'list_resource_providers',
46
+                return_value={'resource_providers': []}):
47
+            self.assertRaises(
48
+                IndexError, self.service_plugin._get_rp_by_name, 'no_such_rp')
49
+
50
+    def test_no_sync_for_rp_name_not_found(self):
51
+        # looking all good
52
+        agent = {
53
+            'agent_type': 'test_mechanism_driver_agent',
54
+            'configurations': {'resource_provider_bandwidths': {}},
55
+            'host': 'fake host',
56
+        }
57
+        agent_db = mock.Mock()
58
+
42 59
         with mock.patch.object(
43 60
                 self.service_plugin._placement_client,
44 61
                 'list_resource_providers',
45 62
                 return_value={'resource_providers': []}), \
46
-            mock.patch.object(plugin.LOG, 'warning') as log_mock:
47
-            rp = self.service_plugin._get_rp_by_name('whatever')
48
-            self.assertEqual(1, log_mock.call_count)
49
-        self.assertEqual({'uuid': None}, rp)
63
+            mock.patch.object(
64
+                self.service_plugin._batch_notifier,
65
+                'queue_event') as mock_queue_event:
66
+
67
+            self.service_plugin._sync_placement_state(agent, agent_db)
68
+
69
+            self.assertFalse(agent_db.resources_synced)
70
+            agent_db.update.assert_called_with()
71
+            mock_queue_event.assert_not_called()
72
+
73
+    def test_no_sync_for_placement_gone(self):
74
+        # looking all good
75
+        agent = {
76
+            'agent_type': 'test_mechanism_driver_agent',
77
+            'configurations': {'resource_provider_bandwidths': {}},
78
+            'host': 'fake host',
79
+        }
80
+        agent_db = mock.Mock()
81
+
82
+        with mock.patch.object(
83
+                self.service_plugin._placement_client,
84
+                'list_resource_providers',
85
+                side_effect=ks_exc.HttpError), \
86
+            mock.patch.object(
87
+                self.service_plugin._batch_notifier,
88
+                'queue_event') as mock_queue_event:
89
+
90
+            self.service_plugin._sync_placement_state(agent, agent_db)
91
+
92
+            self.assertFalse(agent_db.resources_synced)
93
+            agent_db.update.assert_called_with()
94
+            mock_queue_event.assert_not_called()
50 95
 
51 96
     def test_no_sync_for_unsupported_agent_type(self):
52 97
         payload = mock.Mock(

Loading…
Cancel
Save