Add workflow for IPMI nodes discovery

New workflow discover_nodes works by trying to log into given IP addresses
with given set of credentials.

New workflow discover_and_enroll_nodes uses results of the previous workflow
to create new nodes and optionally move them to manageable.

Implements blueprint node-discovery-by-range

Change-Id: I158f0b8f5251d9d94e7e57b3fe24362316d26599
This commit is contained in:
Dmitry Tantsur 2017-05-04 16:32:39 +02:00
parent 1d79e161d4
commit be69f375b4
8 changed files with 423 additions and 9 deletions

View File

@ -0,0 +1,12 @@
---
features:
- |
Add two new workflows for discovering IPMI BMC: ``discover_nodes`` and
``discover_and_enroll_nodes``.
The former scans given IP addresses and ports, and tries to log into BMC
using given credentials. It returns node information in a format accepted
by the TripleO enrollment workflow.
The latter calls the former, enrolls the resulting nodes and optionally
moves them to manageable state.

View File

@ -23,3 +23,4 @@ python-novaclient>=9.0.0 # Apache-2.0
passlib>=1.7.0 # BSD
netifaces>=0.10.4 # MIT
paramiko>=2.0 # LGPLv2.1+
netaddr>=0.7.13,!=0.7.16 # BSD

View File

@ -71,6 +71,8 @@ mistral.actions =
tripleo.baremetal.update_node_capability = tripleo_common.actions.baremetal:UpdateNodeCapability
tripleo.baremetal.cell_v2_discover_hosts = tripleo_common.actions.baremetal:CellV2DiscoverHostsAction
tripleo.baremetal.validate_nodes = tripleo_common.actions.baremetal:ValidateNodes
tripleo.baremetal.get_candidate_nodes = tripleo_common.actions.baremetal:GetCandidateNodes
tripleo.baremetal.probe_node = tripleo_common.actions.baremetal:ProbeNode
tripleo.deployment.config = tripleo_common.actions.deployment:OrchestrationDeployAction
tripleo.deployment.deploy = tripleo_common.actions.deployment:DeployStackAction
tripleo.deployment.overcloudrc = tripleo_common.actions.deployment:OvercloudRcAction

View File

@ -13,9 +13,13 @@
# License for the specific language governing permissions and limitations
# under the License.
import logging
import socket
import tempfile
import ironic_inspector_client
from mistral_lib import actions
import netaddr
from oslo_concurrency import processutils
from oslo_utils import units
import six
@ -349,3 +353,135 @@ class GetProfileAction(base.TripleOAction):
result['profile'] = nodes.get_node_profile(self.node)
result['uuid'] = self.node.get('uuid')
return result
class GetCandidateNodes(base.TripleOAction):
"""Given IPs, ports and credentials, return potential new nodes."""
def __init__(self, ip_addresses, ports, credentials, existing_nodes):
self.ip_addresses = ip_addresses
self.ports = ports
self.credentials = credentials
self.existing_nodes = existing_nodes
def _existing_ips(self):
result = set()
for node in self.existing_nodes:
try:
handler = nodes.find_driver_handler(node['driver'])
except exception.InvalidNode:
LOG.warning('No known handler for driver %(driver)s of '
'node %(node)s, ignoring it',
{'driver': node['driver'], 'node': node['uuid']})
continue
address_field = handler.convert_key('pm_addr')
if address_field is None:
LOG.info('No address field for driver %(driver)s of '
'node %(node)s, ignoring it',
{'driver': node['driver'], 'node': node['uuid']})
continue
address = node['driver_info'].get(address_field)
if address is None:
LOG.warning('No address for node %(node)s, ignoring it',
{'node': node['uuid']})
continue
try:
ip = socket.gethostbyname(address)
except socket.gaierror as exc:
LOG.warning('Cannot resolve %(field)s "%(value)s" '
'for node %(node)s: %(error)s',
{'field': address_field, 'value': address,
'node': node['uuid'], 'error': exc})
continue
port_field = handler.convert_key('pm_port')
port = node['driver_info'].get(port_field, handler.default_port)
if port is not None:
port = int(port)
LOG.debug('Detected existing BMC at %s with port %s', ip, port)
result.add((ip, port))
return result
def _ip_address_list(self):
if isinstance(self.ip_addresses, six.string_types):
return [str(ip) for ip in
netaddr.IPNetwork(self.ip_addresses).iter_hosts()]
else:
return self.ip_addresses
def run(self, context):
existing = self._existing_ips()
try:
ip_addresses = self._ip_address_list()
except netaddr.AddrFormatError as exc:
LOG.error("Cannot parse network address: %s", exc)
return actions.Result(
error="%s: %s" % (type(exc).__name__, str(exc))
)
result = []
# NOTE(dtantsur): we iterate over IP addresses last to avoid
# spamming the same BMC with too many requests in a row.
for username, password in self.credentials:
for port in self.ports:
port = int(port)
for ip in ip_addresses:
if (ip, port) in existing or (ip, None) in existing:
LOG.info('Skipping existing node %s:%s', ip, port)
continue
result.append({'ip': ip, 'username': username,
'password': password, 'port': port})
return result
class ProbeNode(base.TripleOAction):
"""Try to find BMCs on the given IP."""
def __init__(self, ip, port, username, password,
attempts=2, ipmi_driver='ipmi'):
super(ProbeNode, self).__init__()
self.ip = ip
self.port = int(port)
self.username = username
self.password = password
self.attempts = attempts
self.ipmi_driver = ipmi_driver
def run(self, context):
# TODO(dtantsur): redfish support
LOG.debug('Probing for IPMI BMC: %s@%s:%s',
self.username, self.ip, self.port)
with tempfile.NamedTemporaryFile(mode='wt') as fp:
fp.write(self.password or '\0')
fp.flush()
try:
# TODO(dtantsur): try also IPMI v1.5
processutils.execute('ipmitool', '-I', 'lanplus',
'-H', self.ip, '-L', 'ADMINISTRATOR',
'-p', str(self.port), '-U', self.username,
'-f', fp.name, 'power', 'status',
attempts=self.attempts)
except processutils.ProcessExecutionError as exc:
LOG.debug('Probing %(ip)s failed: %(exc)s',
{'ip': self.ip, 'exc': exc})
return None
LOG.info('Found a BMC on %(ip)s with user %(user)s',
{'ip': self.ip, 'user': self.username})
return {
'pm_type': self.ipmi_driver,
'pm_addr': self.ip,
'pm_user': self.username,
'pm_password': self.password,
'pm_port': self.port,
}

View File

@ -379,3 +379,108 @@ class TestGetProfileAction(base.TestCase):
'profile': 'compute'
}
self.assertEqual(result, expected_result)
@mock.patch.object(baremetal.socket, 'gethostbyname', lambda x: x)
class TestGetCandidateNodes(base.TestCase):
def setUp(self):
super(TestGetCandidateNodes, self).setUp()
self.existing_nodes = [
{'uuid': '1', 'driver': 'ipmi',
'driver_info': {'ipmi_address': '10.0.0.1'}},
{'uuid': '2', 'driver': 'pxe_ipmitool',
'driver_info': {'ipmi_address': '10.0.0.1', 'ipmi_port': 6235}},
{'uuid': '3', 'driver': 'foobar', 'driver_info': {}},
{'uuid': '4', 'driver': 'fake',
'driver_info': {'fake_address': 42}},
{'uuid': '5', 'driver': 'ipmi', 'driver_info': {}},
{'uuid': '6', 'driver': 'pxe_drac',
'driver_info': {'drac_address': '10.0.0.2'}},
{'uuid': '7', 'driver': 'pxe_drac',
'driver_info': {'drac_address': '10.0.0.3', 'drac_port': 6230}},
]
def test_existing_ips(self):
action = baremetal.GetCandidateNodes([], [], [], self.existing_nodes)
result = action._existing_ips()
self.assertEqual({('10.0.0.1', 623), ('10.0.0.1', 6235),
('10.0.0.2', None), ('10.0.0.3', 6230)},
set(result))
def test_with_list(self):
action = baremetal.GetCandidateNodes(
['10.0.0.1', '10.0.0.2', '10.0.0.3'],
[623, 6230, 6235],
[['admin', 'password'], ['admin', 'admin']],
self.existing_nodes)
result = action.run(mock.Mock())
self.assertEqual([
{'ip': '10.0.0.3', 'port': 623,
'username': 'admin', 'password': 'password'},
{'ip': '10.0.0.1', 'port': 6230,
'username': 'admin', 'password': 'password'},
{'ip': '10.0.0.3', 'port': 6235,
'username': 'admin', 'password': 'password'},
{'ip': '10.0.0.3', 'port': 623,
'username': 'admin', 'password': 'admin'},
{'ip': '10.0.0.1', 'port': 6230,
'username': 'admin', 'password': 'admin'},
{'ip': '10.0.0.3', 'port': 6235,
'username': 'admin', 'password': 'admin'},
], result)
def test_with_subnet(self):
action = baremetal.GetCandidateNodes(
'10.0.0.0/30',
[623, 6230, 6235],
[['admin', 'password'], ['admin', 'admin']],
self.existing_nodes)
result = action.run(mock.Mock())
self.assertEqual([
{'ip': '10.0.0.1', 'port': 6230,
'username': 'admin', 'password': 'password'},
{'ip': '10.0.0.1', 'port': 6230,
'username': 'admin', 'password': 'admin'},
], result)
def test_invalid_subnet(self):
action = baremetal.GetCandidateNodes(
'meow',
[623, 6230, 6235],
[['admin', 'password'], ['admin', 'admin']],
self.existing_nodes)
result = action.run(mock.Mock())
self.assertTrue(result.is_error())
@mock.patch.object(processutils, 'execute', autospec=True)
class TestProbeNode(base.TestCase):
action = baremetal.ProbeNode('10.0.0.42', 623, 'admin', 'password')
def test_success(self, mock_execute):
result = self.action.run(mock.Mock())
self.assertEqual({'pm_type': 'ipmi',
'pm_addr': '10.0.0.42',
'pm_user': 'admin',
'pm_password': 'password',
'pm_port': 623},
result)
mock_execute.assert_called_once_with('ipmitool', '-I', 'lanplus',
'-H', '10.0.0.42',
'-L', 'ADMINISTRATOR',
'-p', '623', '-U', 'admin',
'-f', mock.ANY, 'power', 'status',
attempts=2)
def test_failure(self, mock_execute):
mock_execute.side_effect = processutils.ProcessExecutionError()
self.assertIsNone(self.action.run(mock.Mock()))
mock_execute.assert_called_once_with('ipmitool', '-I', 'lanplus',
'-H', '10.0.0.42',
'-L', 'ADMINISTRATOR',
'-p', '623', '-U', 'admin',
'-f', mock.ANY, 'power', 'status',
attempts=2)

View File

@ -667,13 +667,13 @@ class NodesTest(base.TestCase):
def test__get_node_id_fake_pxe(self):
node = self._get_node()
node['pm_type'] = 'fake_pxe'
handler = nodes._find_driver_handler('fake_pxe')
handler = nodes.find_driver_handler('fake_pxe')
node_map = {'mac': {'aaa': 'abcdef'}, 'pm_addr': {}}
self.assertEqual('abcdef', nodes._get_node_id(node, handler, node_map))
def test__get_node_id_conflict(self):
node = self._get_node()
handler = nodes._find_driver_handler('pxe_ipmitool')
handler = nodes.find_driver_handler('pxe_ipmitool')
node_map = {'mac': {'aaa': 'abcdef'},
'pm_addr': {'foo.bar': 'defabc'}}
self.assertRaises(exception.InvalidNode,
@ -682,7 +682,7 @@ class NodesTest(base.TestCase):
def test_get_node_id_valid_duplicate(self):
node = self._get_node()
handler = nodes._find_driver_handler('pxe_ipmitool')
handler = nodes.find_driver_handler('pxe_ipmitool')
node_map = {'mac': {'aaa': 'id'},
'pm_addr': {'foo.bar': 'id'}}
self.assertEqual('id', nodes._get_node_id(node, handler, node_map))

View File

@ -31,11 +31,16 @@ class DriverInfo(object):
DEFAULTS = {}
def __init__(self, prefix, mapping, deprecated_mapping=None,
mandatory_fields=()):
mandatory_fields=(), default_port=None):
self._prefix = prefix
self._mapping = mapping
self._deprecated_mapping = deprecated_mapping or {}
self._mandatory_fields = mandatory_fields
self._default_port = default_port
@property
def default_port(self):
return self._default_port
def convert_key(self, key):
if key in self._mapping:
@ -87,7 +92,8 @@ class DriverInfo(object):
class PrefixedDriverInfo(DriverInfo):
def __init__(self, prefix, deprecated_mapping=None,
has_port=False, address_field='address'):
has_port=False, address_field='address',
default_port=None):
mapping = {
'pm_addr': '%s_%s' % (prefix, address_field),
'pm_user': '%s_username' % prefix,
@ -103,6 +109,7 @@ class PrefixedDriverInfo(DriverInfo):
prefix, mapping,
deprecated_mapping=deprecated_mapping,
mandatory_fields=mandatory_fields,
default_port=default_port,
)
def unique_id_from_fields(self, fields):
@ -223,7 +230,8 @@ class iBootDriverInfo(PrefixedDriverInfo):
DRIVER_INFO = {
# production drivers
'(ipmi|.*_ipmitool)': PrefixedDriverInfo('ipmi', has_port=True),
'(ipmi|.*_ipmitool)': PrefixedDriverInfo('ipmi', has_port=True,
default_port=623),
'.*_drac': PrefixedDriverInfo('drac', has_port=True),
'.*_ilo': PrefixedDriverInfo('ilo'),
'.*_ucs': PrefixedDriverInfo(
@ -254,7 +262,7 @@ DRIVER_INFO = {
}
def _find_driver_handler(driver):
def find_driver_handler(driver):
for driver_tpl, handler in DRIVER_INFO.items():
if re.match(driver_tpl, driver) is not None:
return handler
@ -270,7 +278,7 @@ def _find_node_handler(fields):
except KeyError:
raise exception.InvalidNode('pm_type (ironic driver to use) is '
'required', node=fields)
return _find_driver_handler(driver)
return find_driver_handler(driver)
def register_ironic_node(node, client):
@ -329,7 +337,7 @@ def _populate_node_mapping(client):
for port in client.node.list_ports(node.uuid):
node_map['mac'][port.address] = node.uuid
handler = _find_driver_handler(node.driver)
handler = find_driver_handler(node.driver)
unique_id = handler.unique_id_from_node(node)
if unique_id:
node_map['pm_addr'][unique_id] = node.uuid

View File

@ -948,3 +948,153 @@ workflows:
execution: <% execution() %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
discover_nodes:
description: Run nodes discovery over the given IP range
input:
- ip_addresses
- credentials
- ports: [623]
- queue_name: tripleo
tasks:
get_all_nodes:
action: ironic.node_list
input:
fields: ["uuid", "driver", "driver_info"]
limit: 0
on-success: get_candidate_nodes
on-error: get_all_nodes_failed
publish:
existing_nodes: <% task(get_all_nodes).result %>
get_all_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(get_all_nodes).result %>
get_candidate_nodes:
action: tripleo.baremetal.get_candidate_nodes
input:
ip_addresses: <% $.ip_addresses %>
credentials: <% $.credentials %>
ports: <% $.ports %>
existing_nodes: <% $.existing_nodes %>
on-success: probe_nodes
on-error: get_candidate_nodes_failed
publish:
candidates: <% task(get_candidate_nodes).result %>
get_candidate_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(get_candidate_nodes).result %>
probe_nodes:
action: tripleo.baremetal.probe_node
on-success: send_message
on-error: probe_nodes_failed
input:
ip: <% $.node.ip %>
port: <% $.node.port %>
username: <% $.node.username %>
password: <% $.node.password %>
with-items:
- node in <% $.candidates %>
publish:
nodes_json: <% task(probe_nodes).result.where($ != null) %>
probe_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(probe_nodes).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.discover_nodes
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
nodes_json: <% $.get('nodes_json', []) %>
on-success:
- fail: <% $.get('status') = "FAILED" %>
discover_and_enroll_nodes:
description: Run nodes discovery over the given IP range and enroll nodes
input:
- ip_addresses
- credentials
- ports: [623]
- kernel_name: null
- ramdisk_name: null
- instance_boot_option: local
- initial_state: manageable
- queue_name: tripleo
tasks:
discover_nodes:
workflow: tripleo.baremetal.v1.discover_nodes
input:
ip_addresses: <% $.ip_addresses %>
ports: <% $.ports %>
credentials: <% $.credentials %>
queue_name: <% $.queue_name %>
on-success: enroll_nodes
on-error: discover_nodes_failed
publish:
nodes_json: <% task(discover_nodes).result.nodes_json %>
discover_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(discover_nodes).result %>
enroll_nodes:
workflow: tripleo.baremetal.v1.register_or_update
input:
nodes_json: <% $.nodes_json %>
kernel_name: <% $.kernel_name %>
ramdisk_name: <% $.ramdisk_name %>
instance_boot_option: <% $.instance_boot_option %>
initial_state: <% $.initial_state %>
on-success: send_message
on-error: enroll_nodes_failed
publish:
registered_nodes: <% task(enroll_nodes).result.registered_nodes %>
enroll_nodes_failed:
on-success: send_message
publish:
status: FAILED
message: <% task(enroll_nodes).result %>
send_message:
action: zaqar.queue_post
retry: count=5 delay=1
input:
queue_name: <% $.queue_name %>
messages:
body:
type: tripleo.baremetal.v1.discover_and_enroll_nodes
payload:
status: <% $.get('status', 'SUCCESS') %>
message: <% $.get('message', '') %>
execution: <% execution() %>
registered_nodes: <% $.get('registered_nodes', []) %>
on-success:
- fail: <% $.get('status') = "FAILED" %>