Account for nodes with the same BMC hostname in inspection lookup

Currently, the code expects a given hostname to be used by one one node.
This is not necessarily the case for Redfish where several Systems can
co-exist under the same BMC. Use MAC addresses to distinguish them.

Add more inline comments to explain the process.

Change-Id: Ifc5a18bffc7cbcdd8bbbd660aba61fa11403e7e8
This commit is contained in:
Dmitry Tantsur 2024-01-22 18:20:14 +01:00
parent a42f23f475
commit bf673c2761
No known key found for this signature in database
GPG Key ID: 315B2AF9FD216C60
3 changed files with 73 additions and 12 deletions

View File

@ -269,7 +269,17 @@ def lookup_node(context, mac_addresses, bmc_addresses, node_uuid=None):
'state': node.provision_state}) 'state': node.provision_state})
raise exception.NotFound() raise exception.NotFound()
# NOTE(dtantsur): in theory, if the node is found at this point, we could
# short-circuit the lookup process and return it without considering BMC
# addresses. However, I've seen cases where users ended up enrolling nodes
# with BMC addresses from different nodes. Continuing to process BMC
# addresses allows us to catch these situations that otherwise can lead
# to updating wrong nodes.
if bmc_addresses: if bmc_addresses:
# NOTE(dtantsur): the same BMC hostname can be used by several nodes,
# e.g. in case of Redfish. Find all suitable nodes first.
nodes_by_bmc = set()
for candidate in objects.Node.list( for candidate in objects.Node.list(
context, context,
filters={'provision_state': states.INSPECTWAIT}, filters={'provision_state': states.INSPECTWAIT},
@ -278,26 +288,41 @@ def lookup_node(context, mac_addresses, bmc_addresses, node_uuid=None):
for addr in candidate.driver_internal_info.get( for addr in candidate.driver_internal_info.get(
LOOKUP_CACHE_FIELD) or (): LOOKUP_CACHE_FIELD) or ():
if addr in bmc_addresses: if addr in bmc_addresses:
break nodes_by_bmc.add(candidate.uuid)
else:
continue
if node and candidate.uuid != node.uuid: # NOTE(dtantsur): if none of the nodes found by the BMC match the one
# found by the MACs, something is definitely wrong.
if node and nodes_by_bmc and node.uuid not in nodes_by_bmc:
LOG.error('Conflict on inspection lookup: nodes %(node1)s ' LOG.error('Conflict on inspection lookup: nodes %(node1)s '
'and %(node2)s both satisfy MAC addresses ' 'and %(node2)s both satisfy MAC addresses '
'(%(macs)s) and BMC address(s) (%(bmc)s). The cause ' '(%(macs)s) and BMC address(s) (%(bmc)s). The cause '
'may be ports attached to a wrong node.', 'may be ports attached to a wrong node.',
{'node1': node.uuid, {'node1': ', '.join(nodes_by_bmc),
'node2': candidate.uuid, 'node2': node.uuid,
'macs': ', '.join(mac_addresses), 'macs': ', '.join(mac_addresses),
'bmc': ', '.join(bmc_addresses)}) 'bmc': ', '.join(bmc_addresses)})
raise exception.NotFound() raise exception.NotFound()
# NOTE(dtantsur): at this point, if the node was found by the MAC
# addresses, it also matches the BMC address. We only need to handle
# the case when the node was not found by the MAC addresses.
if not node and nodes_by_bmc:
if len(nodes_by_bmc) > 1:
LOG.error('Several nodes %(nodes)s satisfy BMC address(s) '
'(%(bmc)s), but none of them satisfy MAC addresses '
'(%(macs)s). Ports must be created for a successful '
'inspection in this case.',
{'nodes': ', '.join(nodes_by_bmc),
'macs': ', '.join(mac_addresses),
'bmc': ', '.join(bmc_addresses)})
raise exception.NotFound()
node_uuid = nodes_by_bmc.pop()
try: try:
# Fetch the complete object now. # Fetch the complete object now.
node = objects.Node.get_by_uuid(context, candidate.uuid) node = objects.Node.get_by_uuid(context, node_uuid)
except exception.NotFound: except exception.NotFound:
pass # Deleted in-between? raise # Deleted in-between?
if not node: if not node:
LOG.error('No nodes satisfy MAC addresses (%(macs)s) and BMC ' LOG.error('No nodes satisfy MAC addresses (%(macs)s) and BMC '

View File

@ -381,6 +381,36 @@ class LookupNodeTestCase(db_base.DbTestCase):
self.assertRaises(exception.NotFound, utils.lookup_node, self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, self.macs, [self.bmc2], None) self.context, self.macs, [self.bmc2], None)
def test_duplicate_bmc(self):
# This can happen with Redfish. There is no way to resolve the conflict
# other than by using MACs.
obj_utils.create_test_node(
self.context,
uuid=uuidutils.generate_uuid(),
driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc]},
provision_state=states.INSPECTWAIT)
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, [], [self.bmc], None)
def test_duplicate_bmc_and_unknown_mac(self):
obj_utils.create_test_node(
self.context,
uuid=uuidutils.generate_uuid(),
driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc]},
provision_state=states.INSPECTWAIT)
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, [self.unknown_mac], [self.bmc], None)
def test_duplicate_bmc_resolved_by_macs(self):
obj_utils.create_test_node(
self.context,
uuid=uuidutils.generate_uuid(),
driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc]},
provision_state=states.INSPECTWAIT)
result = utils.lookup_node(
self.context, [self.macs[0]], [self.bmc], None)
self.assertEqual(self.node.uuid, result.uuid)
def test_by_uuid(self): def test_by_uuid(self):
result = utils.lookup_node(self.context, [], [], self.node.uuid) result = utils.lookup_node(self.context, [], [], self.node.uuid)
self.assertEqual(self.node.uuid, result.uuid) self.assertEqual(self.node.uuid, result.uuid)

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Fixes the inspection lookup to consider all nodes with the same BMC
hostname, as can happen with Redfish. In this case, the nodes are
distinguished by MAC addresses.