Fix rare HTTP 400 from port list API

This may happen when a node is deleted in parallel with calling
the port list API. Ports are fetched, then we try do fetch their
nodes and port groups. If either of them are removed in the meantime,
the API fails with HTTP 400. This change works around it.

Change-Id: Ie2d4c46c031ee86976abb6107433cdde87a4345a
Closes-Bug: #1748893
This commit is contained in:
Dmitry Tantsur 2018-02-15 18:49:12 +01:00
parent 5b75b6e4f2
commit 52dcc642d3
3 changed files with 72 additions and 2 deletions

View File

@ -16,6 +16,7 @@
import datetime import datetime
from ironic_lib import metrics_utils from ironic_lib import metrics_utils
from oslo_log import log
from oslo_utils import uuidutils from oslo_utils import uuidutils
import pecan import pecan
from pecan import rest from pecan import rest
@ -37,6 +38,7 @@ from ironic.common import utils as common_utils
from ironic import objects from ironic import objects
METRICS = metrics_utils.get_metrics_logger(__name__) METRICS = metrics_utils.get_metrics_logger(__name__)
LOG = log.getLogger(__name__)
_DEFAULT_RETURN_FIELDS = ('uuid', 'address') _DEFAULT_RETURN_FIELDS = ('uuid', 'address')
@ -263,8 +265,27 @@ class PortCollection(collection.Collection):
@staticmethod @staticmethod
def convert_with_links(rpc_ports, limit, url=None, fields=None, **kwargs): def convert_with_links(rpc_ports, limit, url=None, fields=None, **kwargs):
collection = PortCollection() collection = PortCollection()
collection.ports = [Port.convert_with_links(p, fields=fields) collection.ports = []
for p in rpc_ports] for rpc_port in rpc_ports:
try:
port = Port.convert_with_links(rpc_port, fields=fields)
except exception.NodeNotFound:
# NOTE(dtantsur): node was deleted after we fetched the port
# list, meaning that the port was also deleted. Skip it.
LOG.debug('Skipping port %s as its node was deleted',
rpc_port.uuid)
continue
except exception.PortgroupNotFound:
# NOTE(dtantsur): port group was deleted after we fetched the
# port list, it may mean that the port was deleted too, but
# we don't know it. Pretend that the port group was removed.
LOG.debug('Removing port group UUID from port %s as the port '
'group was deleted', rpc_port.uuid)
rpc_port.portgroup_id = None
port = Port.convert_with_links(rpc_port, fields=fields)
collection.ports.append(port)
collection.next = collection.get_next(limit, url=url, **kwargs) collection.next = collection.get_next(limit, url=url, **kwargs)
return collection return collection

View File

@ -203,6 +203,49 @@ class TestListPorts(test_api_base.BaseApiTest):
# never expose the node_id # never expose the node_id
self.assertNotIn('node_id', data['ports'][0]) self.assertNotIn('node_id', data['ports'][0])
# NOTE(dtantsur): apparently autospec does not work on class methods..
@mock.patch.object(objects.Node, 'get')
def test_list_with_deleted_node(self, mock_get_node):
# check that we don't end up with HTTP 400 when node deletion races
# with listing ports - see https://launchpad.net/bugs/1748893
obj_utils.create_test_port(self.context, node_id=self.node.id)
mock_get_node.side_effect = exception.NodeNotFound('boom')
data = self.get_json('/ports')
self.assertEqual([], data['ports'])
# NOTE(dtantsur): apparently autospec does not work on class methods..
@mock.patch.object(objects.Node, 'get')
def test_list_detailed_with_deleted_node(self, mock_get_node):
# check that we don't end up with HTTP 400 when node deletion races
# with listing ports - see https://launchpad.net/bugs/1748893
port = obj_utils.create_test_port(self.context, node_id=self.node.id)
port2 = obj_utils.create_test_port(self.context, node_id=self.node.id,
uuid=uuidutils.generate_uuid(),
address='66:44:55:33:11:22')
mock_get_node.side_effect = [exception.NodeNotFound('boom'), self.node]
data = self.get_json('/ports/detail')
# The "correct" port is still returned
self.assertEqual(1, len(data['ports']))
self.assertIn(data['ports'][0]['uuid'], {port.uuid, port2.uuid})
self.assertEqual(self.node.uuid, data['ports'][0]['node_uuid'])
# NOTE(dtantsur): apparently autospec does not work on class methods..
@mock.patch.object(objects.Portgroup, 'get')
def test_list_with_deleted_port_group(self, mock_get_pg):
# check that we don't end up with HTTP 400 when port group deletion
# races with listing ports - see https://launchpad.net/bugs/1748893
portgroup = obj_utils.create_test_portgroup(self.context,
node_id=self.node.id)
port = obj_utils.create_test_port(self.context, node_id=self.node.id,
portgroup_id=portgroup.id)
mock_get_pg.side_effect = exception.PortgroupNotFound('boom')
data = self.get_json(
'/ports/detail',
headers={api_base.Version.string: str(api_v1.max_version())}
)
self.assertEqual(port.uuid, data['ports'][0]["uuid"])
self.assertIsNone(data['ports'][0]["portgroup_uuid"])
def test_get_one(self): def test_get_one(self):
port = obj_utils.create_test_port(self.context, node_id=self.node.id) port = obj_utils.create_test_port(self.context, node_id=self.node.id)
data = self.get_json('/ports/%s' % port.uuid) data = self.get_json('/ports/%s' % port.uuid)

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Fixes rare race condition which resulted in the port list API returning
HTTP 400 (bad request) if some nodes were being removed in parallel.
See `bug 1748893 <https://bugs.launchpad.net/bugs/1748893>`_ for details.