From 52dcc642d372e23fd59be44e0f9f5627fac5cec4 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Thu, 15 Feb 2018 18:49:12 +0100 Subject: [PATCH] Fix rare HTTP 400 from port list API This may happen when a node is deleted in parallel with calling the port list API. Ports are fetched, then we try do fetch their nodes and port groups. If either of them are removed in the meantime, the API fails with HTTP 400. This change works around it. Change-Id: Ie2d4c46c031ee86976abb6107433cdde87a4345a Closes-Bug: #1748893 --- ironic/api/controllers/v1/port.py | 25 ++++++++++- .../unit/api/controllers/v1/test_port.py | 43 +++++++++++++++++++ ...ort-list-bad-request-078512862c22118e.yaml | 6 +++ 3 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/port-list-bad-request-078512862c22118e.yaml diff --git a/ironic/api/controllers/v1/port.py b/ironic/api/controllers/v1/port.py index 2d43cb883c..0fbb14bd7d 100644 --- a/ironic/api/controllers/v1/port.py +++ b/ironic/api/controllers/v1/port.py @@ -16,6 +16,7 @@ import datetime from ironic_lib import metrics_utils +from oslo_log import log from oslo_utils import uuidutils import pecan from pecan import rest @@ -37,6 +38,7 @@ from ironic.common import utils as common_utils from ironic import objects METRICS = metrics_utils.get_metrics_logger(__name__) +LOG = log.getLogger(__name__) _DEFAULT_RETURN_FIELDS = ('uuid', 'address') @@ -263,8 +265,27 @@ class PortCollection(collection.Collection): @staticmethod def convert_with_links(rpc_ports, limit, url=None, fields=None, **kwargs): collection = PortCollection() - collection.ports = [Port.convert_with_links(p, fields=fields) - for p in rpc_ports] + collection.ports = [] + for rpc_port in rpc_ports: + try: + port = Port.convert_with_links(rpc_port, fields=fields) + except exception.NodeNotFound: + # NOTE(dtantsur): node was deleted after we fetched the port + # list, meaning that the port was also deleted. Skip it. + LOG.debug('Skipping port %s as its node was deleted', + rpc_port.uuid) + continue + except exception.PortgroupNotFound: + # NOTE(dtantsur): port group was deleted after we fetched the + # port list, it may mean that the port was deleted too, but + # we don't know it. Pretend that the port group was removed. + LOG.debug('Removing port group UUID from port %s as the port ' + 'group was deleted', rpc_port.uuid) + rpc_port.portgroup_id = None + port = Port.convert_with_links(rpc_port, fields=fields) + + collection.ports.append(port) + collection.next = collection.get_next(limit, url=url, **kwargs) return collection diff --git a/ironic/tests/unit/api/controllers/v1/test_port.py b/ironic/tests/unit/api/controllers/v1/test_port.py index 183450aca5..0effedd640 100644 --- a/ironic/tests/unit/api/controllers/v1/test_port.py +++ b/ironic/tests/unit/api/controllers/v1/test_port.py @@ -203,6 +203,49 @@ class TestListPorts(test_api_base.BaseApiTest): # never expose the node_id self.assertNotIn('node_id', data['ports'][0]) + # NOTE(dtantsur): apparently autospec does not work on class methods.. + @mock.patch.object(objects.Node, 'get') + def test_list_with_deleted_node(self, mock_get_node): + # check that we don't end up with HTTP 400 when node deletion races + # with listing ports - see https://launchpad.net/bugs/1748893 + obj_utils.create_test_port(self.context, node_id=self.node.id) + mock_get_node.side_effect = exception.NodeNotFound('boom') + data = self.get_json('/ports') + self.assertEqual([], data['ports']) + + # NOTE(dtantsur): apparently autospec does not work on class methods.. + @mock.patch.object(objects.Node, 'get') + def test_list_detailed_with_deleted_node(self, mock_get_node): + # check that we don't end up with HTTP 400 when node deletion races + # with listing ports - see https://launchpad.net/bugs/1748893 + port = obj_utils.create_test_port(self.context, node_id=self.node.id) + port2 = obj_utils.create_test_port(self.context, node_id=self.node.id, + uuid=uuidutils.generate_uuid(), + address='66:44:55:33:11:22') + mock_get_node.side_effect = [exception.NodeNotFound('boom'), self.node] + data = self.get_json('/ports/detail') + # The "correct" port is still returned + self.assertEqual(1, len(data['ports'])) + self.assertIn(data['ports'][0]['uuid'], {port.uuid, port2.uuid}) + self.assertEqual(self.node.uuid, data['ports'][0]['node_uuid']) + + # NOTE(dtantsur): apparently autospec does not work on class methods.. + @mock.patch.object(objects.Portgroup, 'get') + def test_list_with_deleted_port_group(self, mock_get_pg): + # check that we don't end up with HTTP 400 when port group deletion + # races with listing ports - see https://launchpad.net/bugs/1748893 + portgroup = obj_utils.create_test_portgroup(self.context, + node_id=self.node.id) + port = obj_utils.create_test_port(self.context, node_id=self.node.id, + portgroup_id=portgroup.id) + mock_get_pg.side_effect = exception.PortgroupNotFound('boom') + data = self.get_json( + '/ports/detail', + headers={api_base.Version.string: str(api_v1.max_version())} + ) + self.assertEqual(port.uuid, data['ports'][0]["uuid"]) + self.assertIsNone(data['ports'][0]["portgroup_uuid"]) + def test_get_one(self): port = obj_utils.create_test_port(self.context, node_id=self.node.id) data = self.get_json('/ports/%s' % port.uuid) diff --git a/releasenotes/notes/port-list-bad-request-078512862c22118e.yaml b/releasenotes/notes/port-list-bad-request-078512862c22118e.yaml new file mode 100644 index 0000000000..3f0116159a --- /dev/null +++ b/releasenotes/notes/port-list-bad-request-078512862c22118e.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fixes rare race condition which resulted in the port list API returning + HTTP 400 (bad request) if some nodes were being removed in parallel. + See `bug 1748893 `_ for details.