Fix rare HTTP 400 from port list API
This may happen when a node is deleted in parallel with calling the port list API. Ports are fetched, then we try do fetch their nodes and port groups. If either of them are removed in the meantime, the API fails with HTTP 400. This change works around it. Change-Id: Ie2d4c46c031ee86976abb6107433cdde87a4345a Closes-Bug: #1748893
This commit is contained in:
parent
5b75b6e4f2
commit
52dcc642d3
@ -16,6 +16,7 @@
|
|||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
from ironic_lib import metrics_utils
|
from ironic_lib import metrics_utils
|
||||||
|
from oslo_log import log
|
||||||
from oslo_utils import uuidutils
|
from oslo_utils import uuidutils
|
||||||
import pecan
|
import pecan
|
||||||
from pecan import rest
|
from pecan import rest
|
||||||
@ -37,6 +38,7 @@ from ironic.common import utils as common_utils
|
|||||||
from ironic import objects
|
from ironic import objects
|
||||||
|
|
||||||
METRICS = metrics_utils.get_metrics_logger(__name__)
|
METRICS = metrics_utils.get_metrics_logger(__name__)
|
||||||
|
LOG = log.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
_DEFAULT_RETURN_FIELDS = ('uuid', 'address')
|
_DEFAULT_RETURN_FIELDS = ('uuid', 'address')
|
||||||
@ -263,8 +265,27 @@ class PortCollection(collection.Collection):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def convert_with_links(rpc_ports, limit, url=None, fields=None, **kwargs):
|
def convert_with_links(rpc_ports, limit, url=None, fields=None, **kwargs):
|
||||||
collection = PortCollection()
|
collection = PortCollection()
|
||||||
collection.ports = [Port.convert_with_links(p, fields=fields)
|
collection.ports = []
|
||||||
for p in rpc_ports]
|
for rpc_port in rpc_ports:
|
||||||
|
try:
|
||||||
|
port = Port.convert_with_links(rpc_port, fields=fields)
|
||||||
|
except exception.NodeNotFound:
|
||||||
|
# NOTE(dtantsur): node was deleted after we fetched the port
|
||||||
|
# list, meaning that the port was also deleted. Skip it.
|
||||||
|
LOG.debug('Skipping port %s as its node was deleted',
|
||||||
|
rpc_port.uuid)
|
||||||
|
continue
|
||||||
|
except exception.PortgroupNotFound:
|
||||||
|
# NOTE(dtantsur): port group was deleted after we fetched the
|
||||||
|
# port list, it may mean that the port was deleted too, but
|
||||||
|
# we don't know it. Pretend that the port group was removed.
|
||||||
|
LOG.debug('Removing port group UUID from port %s as the port '
|
||||||
|
'group was deleted', rpc_port.uuid)
|
||||||
|
rpc_port.portgroup_id = None
|
||||||
|
port = Port.convert_with_links(rpc_port, fields=fields)
|
||||||
|
|
||||||
|
collection.ports.append(port)
|
||||||
|
|
||||||
collection.next = collection.get_next(limit, url=url, **kwargs)
|
collection.next = collection.get_next(limit, url=url, **kwargs)
|
||||||
return collection
|
return collection
|
||||||
|
|
||||||
|
@ -203,6 +203,49 @@ class TestListPorts(test_api_base.BaseApiTest):
|
|||||||
# never expose the node_id
|
# never expose the node_id
|
||||||
self.assertNotIn('node_id', data['ports'][0])
|
self.assertNotIn('node_id', data['ports'][0])
|
||||||
|
|
||||||
|
# NOTE(dtantsur): apparently autospec does not work on class methods..
|
||||||
|
@mock.patch.object(objects.Node, 'get')
|
||||||
|
def test_list_with_deleted_node(self, mock_get_node):
|
||||||
|
# check that we don't end up with HTTP 400 when node deletion races
|
||||||
|
# with listing ports - see https://launchpad.net/bugs/1748893
|
||||||
|
obj_utils.create_test_port(self.context, node_id=self.node.id)
|
||||||
|
mock_get_node.side_effect = exception.NodeNotFound('boom')
|
||||||
|
data = self.get_json('/ports')
|
||||||
|
self.assertEqual([], data['ports'])
|
||||||
|
|
||||||
|
# NOTE(dtantsur): apparently autospec does not work on class methods..
|
||||||
|
@mock.patch.object(objects.Node, 'get')
|
||||||
|
def test_list_detailed_with_deleted_node(self, mock_get_node):
|
||||||
|
# check that we don't end up with HTTP 400 when node deletion races
|
||||||
|
# with listing ports - see https://launchpad.net/bugs/1748893
|
||||||
|
port = obj_utils.create_test_port(self.context, node_id=self.node.id)
|
||||||
|
port2 = obj_utils.create_test_port(self.context, node_id=self.node.id,
|
||||||
|
uuid=uuidutils.generate_uuid(),
|
||||||
|
address='66:44:55:33:11:22')
|
||||||
|
mock_get_node.side_effect = [exception.NodeNotFound('boom'), self.node]
|
||||||
|
data = self.get_json('/ports/detail')
|
||||||
|
# The "correct" port is still returned
|
||||||
|
self.assertEqual(1, len(data['ports']))
|
||||||
|
self.assertIn(data['ports'][0]['uuid'], {port.uuid, port2.uuid})
|
||||||
|
self.assertEqual(self.node.uuid, data['ports'][0]['node_uuid'])
|
||||||
|
|
||||||
|
# NOTE(dtantsur): apparently autospec does not work on class methods..
|
||||||
|
@mock.patch.object(objects.Portgroup, 'get')
|
||||||
|
def test_list_with_deleted_port_group(self, mock_get_pg):
|
||||||
|
# check that we don't end up with HTTP 400 when port group deletion
|
||||||
|
# races with listing ports - see https://launchpad.net/bugs/1748893
|
||||||
|
portgroup = obj_utils.create_test_portgroup(self.context,
|
||||||
|
node_id=self.node.id)
|
||||||
|
port = obj_utils.create_test_port(self.context, node_id=self.node.id,
|
||||||
|
portgroup_id=portgroup.id)
|
||||||
|
mock_get_pg.side_effect = exception.PortgroupNotFound('boom')
|
||||||
|
data = self.get_json(
|
||||||
|
'/ports/detail',
|
||||||
|
headers={api_base.Version.string: str(api_v1.max_version())}
|
||||||
|
)
|
||||||
|
self.assertEqual(port.uuid, data['ports'][0]["uuid"])
|
||||||
|
self.assertIsNone(data['ports'][0]["portgroup_uuid"])
|
||||||
|
|
||||||
def test_get_one(self):
|
def test_get_one(self):
|
||||||
port = obj_utils.create_test_port(self.context, node_id=self.node.id)
|
port = obj_utils.create_test_port(self.context, node_id=self.node.id)
|
||||||
data = self.get_json('/ports/%s' % port.uuid)
|
data = self.get_json('/ports/%s' % port.uuid)
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixes rare race condition which resulted in the port list API returning
|
||||||
|
HTTP 400 (bad request) if some nodes were being removed in parallel.
|
||||||
|
See `bug 1748893 <https://bugs.launchpad.net/bugs/1748893>`_ for details.
|
Loading…
Reference in New Issue
Block a user