diff --git a/releasenotes/notes/ipmi-discovery-72f93156bcaf461d.yaml b/releasenotes/notes/ipmi-discovery-72f93156bcaf461d.yaml new file mode 100644 index 000000000..425846eed --- /dev/null +++ b/releasenotes/notes/ipmi-discovery-72f93156bcaf461d.yaml @@ -0,0 +1,12 @@ +--- +features: + - | + Add two new workflows for discovering IPMI BMC: ``discover_nodes`` and + ``discover_and_enroll_nodes``. + + The former scans given IP addresses and ports, and tries to log into BMC + using given credentials. It returns node information in a format accepted + by the TripleO enrollment workflow. + + The latter calls the former, enrolls the resulting nodes and optionally + moves them to manageable state. diff --git a/requirements.txt b/requirements.txt index 265a09052..0cfc7e537 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,3 +23,4 @@ python-novaclient>=9.0.0 # Apache-2.0 passlib>=1.7.0 # BSD netifaces>=0.10.4 # MIT paramiko>=2.0 # LGPLv2.1+ +netaddr>=0.7.13,!=0.7.16 # BSD diff --git a/setup.cfg b/setup.cfg index 33be86f21..e75414877 100644 --- a/setup.cfg +++ b/setup.cfg @@ -71,6 +71,8 @@ mistral.actions = tripleo.baremetal.update_node_capability = tripleo_common.actions.baremetal:UpdateNodeCapability tripleo.baremetal.cell_v2_discover_hosts = tripleo_common.actions.baremetal:CellV2DiscoverHostsAction tripleo.baremetal.validate_nodes = tripleo_common.actions.baremetal:ValidateNodes + tripleo.baremetal.get_candidate_nodes = tripleo_common.actions.baremetal:GetCandidateNodes + tripleo.baremetal.probe_node = tripleo_common.actions.baremetal:ProbeNode tripleo.deployment.config = tripleo_common.actions.deployment:OrchestrationDeployAction tripleo.deployment.deploy = tripleo_common.actions.deployment:DeployStackAction tripleo.deployment.overcloudrc = tripleo_common.actions.deployment:OvercloudRcAction diff --git a/tripleo_common/actions/baremetal.py b/tripleo_common/actions/baremetal.py index 35e67c679..4c0a93268 100644 --- a/tripleo_common/actions/baremetal.py +++ b/tripleo_common/actions/baremetal.py @@ -13,9 +13,13 @@ # License for the specific language governing permissions and limitations # under the License. import logging +import socket +import tempfile import ironic_inspector_client from mistral_lib import actions +import netaddr +from oslo_concurrency import processutils from oslo_utils import units import six @@ -349,3 +353,135 @@ class GetProfileAction(base.TripleOAction): result['profile'] = nodes.get_node_profile(self.node) result['uuid'] = self.node.get('uuid') return result + + +class GetCandidateNodes(base.TripleOAction): + """Given IPs, ports and credentials, return potential new nodes.""" + + def __init__(self, ip_addresses, ports, credentials, existing_nodes): + self.ip_addresses = ip_addresses + self.ports = ports + self.credentials = credentials + self.existing_nodes = existing_nodes + + def _existing_ips(self): + result = set() + + for node in self.existing_nodes: + try: + handler = nodes.find_driver_handler(node['driver']) + except exception.InvalidNode: + LOG.warning('No known handler for driver %(driver)s of ' + 'node %(node)s, ignoring it', + {'driver': node['driver'], 'node': node['uuid']}) + continue + + address_field = handler.convert_key('pm_addr') + if address_field is None: + LOG.info('No address field for driver %(driver)s of ' + 'node %(node)s, ignoring it', + {'driver': node['driver'], 'node': node['uuid']}) + continue + + address = node['driver_info'].get(address_field) + if address is None: + LOG.warning('No address for node %(node)s, ignoring it', + {'node': node['uuid']}) + continue + + try: + ip = socket.gethostbyname(address) + except socket.gaierror as exc: + LOG.warning('Cannot resolve %(field)s "%(value)s" ' + 'for node %(node)s: %(error)s', + {'field': address_field, 'value': address, + 'node': node['uuid'], 'error': exc}) + continue + + port_field = handler.convert_key('pm_port') + port = node['driver_info'].get(port_field, handler.default_port) + if port is not None: + port = int(port) + + LOG.debug('Detected existing BMC at %s with port %s', ip, port) + result.add((ip, port)) + + return result + + def _ip_address_list(self): + if isinstance(self.ip_addresses, six.string_types): + return [str(ip) for ip in + netaddr.IPNetwork(self.ip_addresses).iter_hosts()] + else: + return self.ip_addresses + + def run(self, context): + existing = self._existing_ips() + try: + ip_addresses = self._ip_address_list() + except netaddr.AddrFormatError as exc: + LOG.error("Cannot parse network address: %s", exc) + return actions.Result( + error="%s: %s" % (type(exc).__name__, str(exc)) + ) + + result = [] + # NOTE(dtantsur): we iterate over IP addresses last to avoid + # spamming the same BMC with too many requests in a row. + for username, password in self.credentials: + for port in self.ports: + port = int(port) + for ip in ip_addresses: + if (ip, port) in existing or (ip, None) in existing: + LOG.info('Skipping existing node %s:%s', ip, port) + continue + + result.append({'ip': ip, 'username': username, + 'password': password, 'port': port}) + + return result + + +class ProbeNode(base.TripleOAction): + """Try to find BMCs on the given IP.""" + + def __init__(self, ip, port, username, password, + attempts=2, ipmi_driver='ipmi'): + super(ProbeNode, self).__init__() + self.ip = ip + self.port = int(port) + self.username = username + self.password = password + self.attempts = attempts + self.ipmi_driver = ipmi_driver + + def run(self, context): + # TODO(dtantsur): redfish support + LOG.debug('Probing for IPMI BMC: %s@%s:%s', + self.username, self.ip, self.port) + + with tempfile.NamedTemporaryFile(mode='wt') as fp: + fp.write(self.password or '\0') + fp.flush() + + try: + # TODO(dtantsur): try also IPMI v1.5 + processutils.execute('ipmitool', '-I', 'lanplus', + '-H', self.ip, '-L', 'ADMINISTRATOR', + '-p', str(self.port), '-U', self.username, + '-f', fp.name, 'power', 'status', + attempts=self.attempts) + except processutils.ProcessExecutionError as exc: + LOG.debug('Probing %(ip)s failed: %(exc)s', + {'ip': self.ip, 'exc': exc}) + return None + + LOG.info('Found a BMC on %(ip)s with user %(user)s', + {'ip': self.ip, 'user': self.username}) + return { + 'pm_type': self.ipmi_driver, + 'pm_addr': self.ip, + 'pm_user': self.username, + 'pm_password': self.password, + 'pm_port': self.port, + } diff --git a/tripleo_common/tests/actions/test_baremetal.py b/tripleo_common/tests/actions/test_baremetal.py index 210e84f66..b93e0d5db 100644 --- a/tripleo_common/tests/actions/test_baremetal.py +++ b/tripleo_common/tests/actions/test_baremetal.py @@ -379,3 +379,108 @@ class TestGetProfileAction(base.TestCase): 'profile': 'compute' } self.assertEqual(result, expected_result) + + +@mock.patch.object(baremetal.socket, 'gethostbyname', lambda x: x) +class TestGetCandidateNodes(base.TestCase): + def setUp(self): + super(TestGetCandidateNodes, self).setUp() + self.existing_nodes = [ + {'uuid': '1', 'driver': 'ipmi', + 'driver_info': {'ipmi_address': '10.0.0.1'}}, + {'uuid': '2', 'driver': 'pxe_ipmitool', + 'driver_info': {'ipmi_address': '10.0.0.1', 'ipmi_port': 6235}}, + {'uuid': '3', 'driver': 'foobar', 'driver_info': {}}, + {'uuid': '4', 'driver': 'fake', + 'driver_info': {'fake_address': 42}}, + {'uuid': '5', 'driver': 'ipmi', 'driver_info': {}}, + {'uuid': '6', 'driver': 'pxe_drac', + 'driver_info': {'drac_address': '10.0.0.2'}}, + {'uuid': '7', 'driver': 'pxe_drac', + 'driver_info': {'drac_address': '10.0.0.3', 'drac_port': 6230}}, + ] + + def test_existing_ips(self): + action = baremetal.GetCandidateNodes([], [], [], self.existing_nodes) + result = action._existing_ips() + + self.assertEqual({('10.0.0.1', 623), ('10.0.0.1', 6235), + ('10.0.0.2', None), ('10.0.0.3', 6230)}, + set(result)) + + def test_with_list(self): + action = baremetal.GetCandidateNodes( + ['10.0.0.1', '10.0.0.2', '10.0.0.3'], + [623, 6230, 6235], + [['admin', 'password'], ['admin', 'admin']], + self.existing_nodes) + result = action.run(mock.Mock()) + + self.assertEqual([ + {'ip': '10.0.0.3', 'port': 623, + 'username': 'admin', 'password': 'password'}, + {'ip': '10.0.0.1', 'port': 6230, + 'username': 'admin', 'password': 'password'}, + {'ip': '10.0.0.3', 'port': 6235, + 'username': 'admin', 'password': 'password'}, + {'ip': '10.0.0.3', 'port': 623, + 'username': 'admin', 'password': 'admin'}, + {'ip': '10.0.0.1', 'port': 6230, + 'username': 'admin', 'password': 'admin'}, + {'ip': '10.0.0.3', 'port': 6235, + 'username': 'admin', 'password': 'admin'}, + ], result) + + def test_with_subnet(self): + action = baremetal.GetCandidateNodes( + '10.0.0.0/30', + [623, 6230, 6235], + [['admin', 'password'], ['admin', 'admin']], + self.existing_nodes) + result = action.run(mock.Mock()) + + self.assertEqual([ + {'ip': '10.0.0.1', 'port': 6230, + 'username': 'admin', 'password': 'password'}, + {'ip': '10.0.0.1', 'port': 6230, + 'username': 'admin', 'password': 'admin'}, + ], result) + + def test_invalid_subnet(self): + action = baremetal.GetCandidateNodes( + 'meow', + [623, 6230, 6235], + [['admin', 'password'], ['admin', 'admin']], + self.existing_nodes) + result = action.run(mock.Mock()) + self.assertTrue(result.is_error()) + + +@mock.patch.object(processutils, 'execute', autospec=True) +class TestProbeNode(base.TestCase): + action = baremetal.ProbeNode('10.0.0.42', 623, 'admin', 'password') + + def test_success(self, mock_execute): + result = self.action.run(mock.Mock()) + self.assertEqual({'pm_type': 'ipmi', + 'pm_addr': '10.0.0.42', + 'pm_user': 'admin', + 'pm_password': 'password', + 'pm_port': 623}, + result) + mock_execute.assert_called_once_with('ipmitool', '-I', 'lanplus', + '-H', '10.0.0.42', + '-L', 'ADMINISTRATOR', + '-p', '623', '-U', 'admin', + '-f', mock.ANY, 'power', 'status', + attempts=2) + + def test_failure(self, mock_execute): + mock_execute.side_effect = processutils.ProcessExecutionError() + self.assertIsNone(self.action.run(mock.Mock())) + mock_execute.assert_called_once_with('ipmitool', '-I', 'lanplus', + '-H', '10.0.0.42', + '-L', 'ADMINISTRATOR', + '-p', '623', '-U', 'admin', + '-f', mock.ANY, 'power', 'status', + attempts=2) diff --git a/tripleo_common/tests/utils/test_nodes.py b/tripleo_common/tests/utils/test_nodes.py index 40b5a68bd..650b70bf0 100644 --- a/tripleo_common/tests/utils/test_nodes.py +++ b/tripleo_common/tests/utils/test_nodes.py @@ -667,13 +667,13 @@ class NodesTest(base.TestCase): def test__get_node_id_fake_pxe(self): node = self._get_node() node['pm_type'] = 'fake_pxe' - handler = nodes._find_driver_handler('fake_pxe') + handler = nodes.find_driver_handler('fake_pxe') node_map = {'mac': {'aaa': 'abcdef'}, 'pm_addr': {}} self.assertEqual('abcdef', nodes._get_node_id(node, handler, node_map)) def test__get_node_id_conflict(self): node = self._get_node() - handler = nodes._find_driver_handler('pxe_ipmitool') + handler = nodes.find_driver_handler('pxe_ipmitool') node_map = {'mac': {'aaa': 'abcdef'}, 'pm_addr': {'foo.bar': 'defabc'}} self.assertRaises(exception.InvalidNode, @@ -682,7 +682,7 @@ class NodesTest(base.TestCase): def test_get_node_id_valid_duplicate(self): node = self._get_node() - handler = nodes._find_driver_handler('pxe_ipmitool') + handler = nodes.find_driver_handler('pxe_ipmitool') node_map = {'mac': {'aaa': 'id'}, 'pm_addr': {'foo.bar': 'id'}} self.assertEqual('id', nodes._get_node_id(node, handler, node_map)) diff --git a/tripleo_common/utils/nodes.py b/tripleo_common/utils/nodes.py index d1cc51ccf..dcdb965b2 100644 --- a/tripleo_common/utils/nodes.py +++ b/tripleo_common/utils/nodes.py @@ -31,11 +31,16 @@ class DriverInfo(object): DEFAULTS = {} def __init__(self, prefix, mapping, deprecated_mapping=None, - mandatory_fields=()): + mandatory_fields=(), default_port=None): self._prefix = prefix self._mapping = mapping self._deprecated_mapping = deprecated_mapping or {} self._mandatory_fields = mandatory_fields + self._default_port = default_port + + @property + def default_port(self): + return self._default_port def convert_key(self, key): if key in self._mapping: @@ -87,7 +92,8 @@ class DriverInfo(object): class PrefixedDriverInfo(DriverInfo): def __init__(self, prefix, deprecated_mapping=None, - has_port=False, address_field='address'): + has_port=False, address_field='address', + default_port=None): mapping = { 'pm_addr': '%s_%s' % (prefix, address_field), 'pm_user': '%s_username' % prefix, @@ -103,6 +109,7 @@ class PrefixedDriverInfo(DriverInfo): prefix, mapping, deprecated_mapping=deprecated_mapping, mandatory_fields=mandatory_fields, + default_port=default_port, ) def unique_id_from_fields(self, fields): @@ -223,7 +230,8 @@ class iBootDriverInfo(PrefixedDriverInfo): DRIVER_INFO = { # production drivers - '(ipmi|.*_ipmitool)': PrefixedDriverInfo('ipmi', has_port=True), + '(ipmi|.*_ipmitool)': PrefixedDriverInfo('ipmi', has_port=True, + default_port=623), '.*_drac': PrefixedDriverInfo('drac', has_port=True), '.*_ilo': PrefixedDriverInfo('ilo'), '.*_ucs': PrefixedDriverInfo( @@ -254,7 +262,7 @@ DRIVER_INFO = { } -def _find_driver_handler(driver): +def find_driver_handler(driver): for driver_tpl, handler in DRIVER_INFO.items(): if re.match(driver_tpl, driver) is not None: return handler @@ -270,7 +278,7 @@ def _find_node_handler(fields): except KeyError: raise exception.InvalidNode('pm_type (ironic driver to use) is ' 'required', node=fields) - return _find_driver_handler(driver) + return find_driver_handler(driver) def register_ironic_node(node, client): @@ -329,7 +337,7 @@ def _populate_node_mapping(client): for port in client.node.list_ports(node.uuid): node_map['mac'][port.address] = node.uuid - handler = _find_driver_handler(node.driver) + handler = find_driver_handler(node.driver) unique_id = handler.unique_id_from_node(node) if unique_id: node_map['pm_addr'][unique_id] = node.uuid diff --git a/workbooks/baremetal.yaml b/workbooks/baremetal.yaml index cbd4ed61c..41d1d0c43 100644 --- a/workbooks/baremetal.yaml +++ b/workbooks/baremetal.yaml @@ -948,3 +948,153 @@ workflows: execution: <% execution() %> on-success: - fail: <% $.get('status') = "FAILED" %> + + + discover_nodes: + description: Run nodes discovery over the given IP range + + input: + - ip_addresses + - credentials + - ports: [623] + - queue_name: tripleo + + tasks: + + get_all_nodes: + action: ironic.node_list + input: + fields: ["uuid", "driver", "driver_info"] + limit: 0 + on-success: get_candidate_nodes + on-error: get_all_nodes_failed + publish: + existing_nodes: <% task(get_all_nodes).result %> + + get_all_nodes_failed: + on-success: send_message + publish: + status: FAILED + message: <% task(get_all_nodes).result %> + + get_candidate_nodes: + action: tripleo.baremetal.get_candidate_nodes + input: + ip_addresses: <% $.ip_addresses %> + credentials: <% $.credentials %> + ports: <% $.ports %> + existing_nodes: <% $.existing_nodes %> + on-success: probe_nodes + on-error: get_candidate_nodes_failed + publish: + candidates: <% task(get_candidate_nodes).result %> + + get_candidate_nodes_failed: + on-success: send_message + publish: + status: FAILED + message: <% task(get_candidate_nodes).result %> + + probe_nodes: + action: tripleo.baremetal.probe_node + on-success: send_message + on-error: probe_nodes_failed + input: + ip: <% $.node.ip %> + port: <% $.node.port %> + username: <% $.node.username %> + password: <% $.node.password %> + with-items: + - node in <% $.candidates %> + publish: + nodes_json: <% task(probe_nodes).result.where($ != null) %> + + probe_nodes_failed: + on-success: send_message + publish: + status: FAILED + message: <% task(probe_nodes).result %> + + send_message: + action: zaqar.queue_post + retry: count=5 delay=1 + input: + queue_name: <% $.queue_name %> + messages: + body: + type: tripleo.baremetal.v1.discover_nodes + payload: + status: <% $.get('status', 'SUCCESS') %> + message: <% $.get('message', '') %> + execution: <% execution() %> + nodes_json: <% $.get('nodes_json', []) %> + on-success: + - fail: <% $.get('status') = "FAILED" %> + + discover_and_enroll_nodes: + description: Run nodes discovery over the given IP range and enroll nodes + + input: + - ip_addresses + - credentials + - ports: [623] + - kernel_name: null + - ramdisk_name: null + - instance_boot_option: local + - initial_state: manageable + - queue_name: tripleo + + tasks: + + discover_nodes: + workflow: tripleo.baremetal.v1.discover_nodes + input: + ip_addresses: <% $.ip_addresses %> + ports: <% $.ports %> + credentials: <% $.credentials %> + queue_name: <% $.queue_name %> + on-success: enroll_nodes + on-error: discover_nodes_failed + publish: + nodes_json: <% task(discover_nodes).result.nodes_json %> + + discover_nodes_failed: + on-success: send_message + publish: + status: FAILED + message: <% task(discover_nodes).result %> + + enroll_nodes: + workflow: tripleo.baremetal.v1.register_or_update + input: + nodes_json: <% $.nodes_json %> + kernel_name: <% $.kernel_name %> + ramdisk_name: <% $.ramdisk_name %> + instance_boot_option: <% $.instance_boot_option %> + initial_state: <% $.initial_state %> + on-success: send_message + on-error: enroll_nodes_failed + publish: + registered_nodes: <% task(enroll_nodes).result.registered_nodes %> + + enroll_nodes_failed: + on-success: send_message + publish: + status: FAILED + message: <% task(enroll_nodes).result %> + + send_message: + action: zaqar.queue_post + retry: count=5 delay=1 + input: + queue_name: <% $.queue_name %> + messages: + body: + type: tripleo.baremetal.v1.discover_and_enroll_nodes + payload: + status: <% $.get('status', 'SUCCESS') %> + message: <% $.get('message', '') %> + execution: <% execution() %> + registered_nodes: <% $.get('registered_nodes', []) %> + on-success: + - fail: <% $.get('status') = "FAILED" %>