Merge "Add workflow for IPMI nodes discovery"
This commit is contained in:
commit
408f8a1306
12
releasenotes/notes/ipmi-discovery-72f93156bcaf461d.yaml
Normal file
12
releasenotes/notes/ipmi-discovery-72f93156bcaf461d.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Add two new workflows for discovering IPMI BMC: ``discover_nodes`` and
|
||||
``discover_and_enroll_nodes``.
|
||||
|
||||
The former scans given IP addresses and ports, and tries to log into BMC
|
||||
using given credentials. It returns node information in a format accepted
|
||||
by the TripleO enrollment workflow.
|
||||
|
||||
The latter calls the former, enrolls the resulting nodes and optionally
|
||||
moves them to manageable state.
|
@ -23,3 +23,4 @@ python-novaclient>=9.0.0 # Apache-2.0
|
||||
passlib>=1.7.0 # BSD
|
||||
netifaces>=0.10.4 # MIT
|
||||
paramiko>=2.0 # LGPLv2.1+
|
||||
netaddr>=0.7.13,!=0.7.16 # BSD
|
||||
|
@ -72,6 +72,8 @@ mistral.actions =
|
||||
tripleo.baremetal.update_node_capability = tripleo_common.actions.baremetal:UpdateNodeCapability
|
||||
tripleo.baremetal.cell_v2_discover_hosts = tripleo_common.actions.baremetal:CellV2DiscoverHostsAction
|
||||
tripleo.baremetal.validate_nodes = tripleo_common.actions.baremetal:ValidateNodes
|
||||
tripleo.baremetal.get_candidate_nodes = tripleo_common.actions.baremetal:GetCandidateNodes
|
||||
tripleo.baremetal.probe_node = tripleo_common.actions.baremetal:ProbeNode
|
||||
tripleo.deployment.config = tripleo_common.actions.deployment:OrchestrationDeployAction
|
||||
tripleo.deployment.deploy = tripleo_common.actions.deployment:DeployStackAction
|
||||
tripleo.deployment.overcloudrc = tripleo_common.actions.deployment:OvercloudRcAction
|
||||
|
@ -13,9 +13,13 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import logging
|
||||
import socket
|
||||
import tempfile
|
||||
|
||||
import ironic_inspector_client
|
||||
from mistral_lib import actions
|
||||
import netaddr
|
||||
from oslo_concurrency import processutils
|
||||
from oslo_utils import units
|
||||
import six
|
||||
|
||||
@ -349,3 +353,135 @@ class GetProfileAction(base.TripleOAction):
|
||||
result['profile'] = nodes.get_node_profile(self.node)
|
||||
result['uuid'] = self.node.get('uuid')
|
||||
return result
|
||||
|
||||
|
||||
class GetCandidateNodes(base.TripleOAction):
|
||||
"""Given IPs, ports and credentials, return potential new nodes."""
|
||||
|
||||
def __init__(self, ip_addresses, ports, credentials, existing_nodes):
|
||||
self.ip_addresses = ip_addresses
|
||||
self.ports = ports
|
||||
self.credentials = credentials
|
||||
self.existing_nodes = existing_nodes
|
||||
|
||||
def _existing_ips(self):
|
||||
result = set()
|
||||
|
||||
for node in self.existing_nodes:
|
||||
try:
|
||||
handler = nodes.find_driver_handler(node['driver'])
|
||||
except exception.InvalidNode:
|
||||
LOG.warning('No known handler for driver %(driver)s of '
|
||||
'node %(node)s, ignoring it',
|
||||
{'driver': node['driver'], 'node': node['uuid']})
|
||||
continue
|
||||
|
||||
address_field = handler.convert_key('pm_addr')
|
||||
if address_field is None:
|
||||
LOG.info('No address field for driver %(driver)s of '
|
||||
'node %(node)s, ignoring it',
|
||||
{'driver': node['driver'], 'node': node['uuid']})
|
||||
continue
|
||||
|
||||
address = node['driver_info'].get(address_field)
|
||||
if address is None:
|
||||
LOG.warning('No address for node %(node)s, ignoring it',
|
||||
{'node': node['uuid']})
|
||||
continue
|
||||
|
||||
try:
|
||||
ip = socket.gethostbyname(address)
|
||||
except socket.gaierror as exc:
|
||||
LOG.warning('Cannot resolve %(field)s "%(value)s" '
|
||||
'for node %(node)s: %(error)s',
|
||||
{'field': address_field, 'value': address,
|
||||
'node': node['uuid'], 'error': exc})
|
||||
continue
|
||||
|
||||
port_field = handler.convert_key('pm_port')
|
||||
port = node['driver_info'].get(port_field, handler.default_port)
|
||||
if port is not None:
|
||||
port = int(port)
|
||||
|
||||
LOG.debug('Detected existing BMC at %s with port %s', ip, port)
|
||||
result.add((ip, port))
|
||||
|
||||
return result
|
||||
|
||||
def _ip_address_list(self):
|
||||
if isinstance(self.ip_addresses, six.string_types):
|
||||
return [str(ip) for ip in
|
||||
netaddr.IPNetwork(self.ip_addresses).iter_hosts()]
|
||||
else:
|
||||
return self.ip_addresses
|
||||
|
||||
def run(self, context):
|
||||
existing = self._existing_ips()
|
||||
try:
|
||||
ip_addresses = self._ip_address_list()
|
||||
except netaddr.AddrFormatError as exc:
|
||||
LOG.error("Cannot parse network address: %s", exc)
|
||||
return actions.Result(
|
||||
error="%s: %s" % (type(exc).__name__, str(exc))
|
||||
)
|
||||
|
||||
result = []
|
||||
# NOTE(dtantsur): we iterate over IP addresses last to avoid
|
||||
# spamming the same BMC with too many requests in a row.
|
||||
for username, password in self.credentials:
|
||||
for port in self.ports:
|
||||
port = int(port)
|
||||
for ip in ip_addresses:
|
||||
if (ip, port) in existing or (ip, None) in existing:
|
||||
LOG.info('Skipping existing node %s:%s', ip, port)
|
||||
continue
|
||||
|
||||
result.append({'ip': ip, 'username': username,
|
||||
'password': password, 'port': port})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class ProbeNode(base.TripleOAction):
|
||||
"""Try to find BMCs on the given IP."""
|
||||
|
||||
def __init__(self, ip, port, username, password,
|
||||
attempts=2, ipmi_driver='ipmi'):
|
||||
super(ProbeNode, self).__init__()
|
||||
self.ip = ip
|
||||
self.port = int(port)
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.attempts = attempts
|
||||
self.ipmi_driver = ipmi_driver
|
||||
|
||||
def run(self, context):
|
||||
# TODO(dtantsur): redfish support
|
||||
LOG.debug('Probing for IPMI BMC: %s@%s:%s',
|
||||
self.username, self.ip, self.port)
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode='wt') as fp:
|
||||
fp.write(self.password or '\0')
|
||||
fp.flush()
|
||||
|
||||
try:
|
||||
# TODO(dtantsur): try also IPMI v1.5
|
||||
processutils.execute('ipmitool', '-I', 'lanplus',
|
||||
'-H', self.ip, '-L', 'ADMINISTRATOR',
|
||||
'-p', str(self.port), '-U', self.username,
|
||||
'-f', fp.name, 'power', 'status',
|
||||
attempts=self.attempts)
|
||||
except processutils.ProcessExecutionError as exc:
|
||||
LOG.debug('Probing %(ip)s failed: %(exc)s',
|
||||
{'ip': self.ip, 'exc': exc})
|
||||
return None
|
||||
|
||||
LOG.info('Found a BMC on %(ip)s with user %(user)s',
|
||||
{'ip': self.ip, 'user': self.username})
|
||||
return {
|
||||
'pm_type': self.ipmi_driver,
|
||||
'pm_addr': self.ip,
|
||||
'pm_user': self.username,
|
||||
'pm_password': self.password,
|
||||
'pm_port': self.port,
|
||||
}
|
||||
|
@ -379,3 +379,108 @@ class TestGetProfileAction(base.TestCase):
|
||||
'profile': 'compute'
|
||||
}
|
||||
self.assertEqual(result, expected_result)
|
||||
|
||||
|
||||
@mock.patch.object(baremetal.socket, 'gethostbyname', lambda x: x)
|
||||
class TestGetCandidateNodes(base.TestCase):
|
||||
def setUp(self):
|
||||
super(TestGetCandidateNodes, self).setUp()
|
||||
self.existing_nodes = [
|
||||
{'uuid': '1', 'driver': 'ipmi',
|
||||
'driver_info': {'ipmi_address': '10.0.0.1'}},
|
||||
{'uuid': '2', 'driver': 'pxe_ipmitool',
|
||||
'driver_info': {'ipmi_address': '10.0.0.1', 'ipmi_port': 6235}},
|
||||
{'uuid': '3', 'driver': 'foobar', 'driver_info': {}},
|
||||
{'uuid': '4', 'driver': 'fake',
|
||||
'driver_info': {'fake_address': 42}},
|
||||
{'uuid': '5', 'driver': 'ipmi', 'driver_info': {}},
|
||||
{'uuid': '6', 'driver': 'pxe_drac',
|
||||
'driver_info': {'drac_address': '10.0.0.2'}},
|
||||
{'uuid': '7', 'driver': 'pxe_drac',
|
||||
'driver_info': {'drac_address': '10.0.0.3', 'drac_port': 6230}},
|
||||
]
|
||||
|
||||
def test_existing_ips(self):
|
||||
action = baremetal.GetCandidateNodes([], [], [], self.existing_nodes)
|
||||
result = action._existing_ips()
|
||||
|
||||
self.assertEqual({('10.0.0.1', 623), ('10.0.0.1', 6235),
|
||||
('10.0.0.2', None), ('10.0.0.3', 6230)},
|
||||
set(result))
|
||||
|
||||
def test_with_list(self):
|
||||
action = baremetal.GetCandidateNodes(
|
||||
['10.0.0.1', '10.0.0.2', '10.0.0.3'],
|
||||
[623, 6230, 6235],
|
||||
[['admin', 'password'], ['admin', 'admin']],
|
||||
self.existing_nodes)
|
||||
result = action.run(mock.Mock())
|
||||
|
||||
self.assertEqual([
|
||||
{'ip': '10.0.0.3', 'port': 623,
|
||||
'username': 'admin', 'password': 'password'},
|
||||
{'ip': '10.0.0.1', 'port': 6230,
|
||||
'username': 'admin', 'password': 'password'},
|
||||
{'ip': '10.0.0.3', 'port': 6235,
|
||||
'username': 'admin', 'password': 'password'},
|
||||
{'ip': '10.0.0.3', 'port': 623,
|
||||
'username': 'admin', 'password': 'admin'},
|
||||
{'ip': '10.0.0.1', 'port': 6230,
|
||||
'username': 'admin', 'password': 'admin'},
|
||||
{'ip': '10.0.0.3', 'port': 6235,
|
||||
'username': 'admin', 'password': 'admin'},
|
||||
], result)
|
||||
|
||||
def test_with_subnet(self):
|
||||
action = baremetal.GetCandidateNodes(
|
||||
'10.0.0.0/30',
|
||||
[623, 6230, 6235],
|
||||
[['admin', 'password'], ['admin', 'admin']],
|
||||
self.existing_nodes)
|
||||
result = action.run(mock.Mock())
|
||||
|
||||
self.assertEqual([
|
||||
{'ip': '10.0.0.1', 'port': 6230,
|
||||
'username': 'admin', 'password': 'password'},
|
||||
{'ip': '10.0.0.1', 'port': 6230,
|
||||
'username': 'admin', 'password': 'admin'},
|
||||
], result)
|
||||
|
||||
def test_invalid_subnet(self):
|
||||
action = baremetal.GetCandidateNodes(
|
||||
'meow',
|
||||
[623, 6230, 6235],
|
||||
[['admin', 'password'], ['admin', 'admin']],
|
||||
self.existing_nodes)
|
||||
result = action.run(mock.Mock())
|
||||
self.assertTrue(result.is_error())
|
||||
|
||||
|
||||
@mock.patch.object(processutils, 'execute', autospec=True)
|
||||
class TestProbeNode(base.TestCase):
|
||||
action = baremetal.ProbeNode('10.0.0.42', 623, 'admin', 'password')
|
||||
|
||||
def test_success(self, mock_execute):
|
||||
result = self.action.run(mock.Mock())
|
||||
self.assertEqual({'pm_type': 'ipmi',
|
||||
'pm_addr': '10.0.0.42',
|
||||
'pm_user': 'admin',
|
||||
'pm_password': 'password',
|
||||
'pm_port': 623},
|
||||
result)
|
||||
mock_execute.assert_called_once_with('ipmitool', '-I', 'lanplus',
|
||||
'-H', '10.0.0.42',
|
||||
'-L', 'ADMINISTRATOR',
|
||||
'-p', '623', '-U', 'admin',
|
||||
'-f', mock.ANY, 'power', 'status',
|
||||
attempts=2)
|
||||
|
||||
def test_failure(self, mock_execute):
|
||||
mock_execute.side_effect = processutils.ProcessExecutionError()
|
||||
self.assertIsNone(self.action.run(mock.Mock()))
|
||||
mock_execute.assert_called_once_with('ipmitool', '-I', 'lanplus',
|
||||
'-H', '10.0.0.42',
|
||||
'-L', 'ADMINISTRATOR',
|
||||
'-p', '623', '-U', 'admin',
|
||||
'-f', mock.ANY, 'power', 'status',
|
||||
attempts=2)
|
||||
|
@ -667,13 +667,13 @@ class NodesTest(base.TestCase):
|
||||
def test__get_node_id_fake_pxe(self):
|
||||
node = self._get_node()
|
||||
node['pm_type'] = 'fake_pxe'
|
||||
handler = nodes._find_driver_handler('fake_pxe')
|
||||
handler = nodes.find_driver_handler('fake_pxe')
|
||||
node_map = {'mac': {'aaa': 'abcdef'}, 'pm_addr': {}}
|
||||
self.assertEqual('abcdef', nodes._get_node_id(node, handler, node_map))
|
||||
|
||||
def test__get_node_id_conflict(self):
|
||||
node = self._get_node()
|
||||
handler = nodes._find_driver_handler('pxe_ipmitool')
|
||||
handler = nodes.find_driver_handler('pxe_ipmitool')
|
||||
node_map = {'mac': {'aaa': 'abcdef'},
|
||||
'pm_addr': {'foo.bar': 'defabc'}}
|
||||
self.assertRaises(exception.InvalidNode,
|
||||
@ -682,7 +682,7 @@ class NodesTest(base.TestCase):
|
||||
|
||||
def test_get_node_id_valid_duplicate(self):
|
||||
node = self._get_node()
|
||||
handler = nodes._find_driver_handler('pxe_ipmitool')
|
||||
handler = nodes.find_driver_handler('pxe_ipmitool')
|
||||
node_map = {'mac': {'aaa': 'id'},
|
||||
'pm_addr': {'foo.bar': 'id'}}
|
||||
self.assertEqual('id', nodes._get_node_id(node, handler, node_map))
|
||||
|
@ -31,11 +31,16 @@ class DriverInfo(object):
|
||||
DEFAULTS = {}
|
||||
|
||||
def __init__(self, prefix, mapping, deprecated_mapping=None,
|
||||
mandatory_fields=()):
|
||||
mandatory_fields=(), default_port=None):
|
||||
self._prefix = prefix
|
||||
self._mapping = mapping
|
||||
self._deprecated_mapping = deprecated_mapping or {}
|
||||
self._mandatory_fields = mandatory_fields
|
||||
self._default_port = default_port
|
||||
|
||||
@property
|
||||
def default_port(self):
|
||||
return self._default_port
|
||||
|
||||
def convert_key(self, key):
|
||||
if key in self._mapping:
|
||||
@ -87,7 +92,8 @@ class DriverInfo(object):
|
||||
|
||||
class PrefixedDriverInfo(DriverInfo):
|
||||
def __init__(self, prefix, deprecated_mapping=None,
|
||||
has_port=False, address_field='address'):
|
||||
has_port=False, address_field='address',
|
||||
default_port=None):
|
||||
mapping = {
|
||||
'pm_addr': '%s_%s' % (prefix, address_field),
|
||||
'pm_user': '%s_username' % prefix,
|
||||
@ -103,6 +109,7 @@ class PrefixedDriverInfo(DriverInfo):
|
||||
prefix, mapping,
|
||||
deprecated_mapping=deprecated_mapping,
|
||||
mandatory_fields=mandatory_fields,
|
||||
default_port=default_port,
|
||||
)
|
||||
|
||||
def unique_id_from_fields(self, fields):
|
||||
@ -223,7 +230,8 @@ class iBootDriverInfo(PrefixedDriverInfo):
|
||||
|
||||
DRIVER_INFO = {
|
||||
# production drivers
|
||||
'(ipmi|.*_ipmitool)': PrefixedDriverInfo('ipmi', has_port=True),
|
||||
'(ipmi|.*_ipmitool)': PrefixedDriverInfo('ipmi', has_port=True,
|
||||
default_port=623),
|
||||
'.*_drac': PrefixedDriverInfo('drac', has_port=True),
|
||||
'.*_ilo': PrefixedDriverInfo('ilo'),
|
||||
'.*_ucs': PrefixedDriverInfo(
|
||||
@ -254,7 +262,7 @@ DRIVER_INFO = {
|
||||
}
|
||||
|
||||
|
||||
def _find_driver_handler(driver):
|
||||
def find_driver_handler(driver):
|
||||
for driver_tpl, handler in DRIVER_INFO.items():
|
||||
if re.match(driver_tpl, driver) is not None:
|
||||
return handler
|
||||
@ -270,7 +278,7 @@ def _find_node_handler(fields):
|
||||
except KeyError:
|
||||
raise exception.InvalidNode('pm_type (ironic driver to use) is '
|
||||
'required', node=fields)
|
||||
return _find_driver_handler(driver)
|
||||
return find_driver_handler(driver)
|
||||
|
||||
|
||||
def register_ironic_node(node, client):
|
||||
@ -329,7 +337,7 @@ def _populate_node_mapping(client):
|
||||
for port in client.node.list_ports(node.uuid):
|
||||
node_map['mac'][port.address] = node.uuid
|
||||
|
||||
handler = _find_driver_handler(node.driver)
|
||||
handler = find_driver_handler(node.driver)
|
||||
unique_id = handler.unique_id_from_node(node)
|
||||
if unique_id:
|
||||
node_map['pm_addr'][unique_id] = node.uuid
|
||||
|
@ -948,3 +948,153 @@ workflows:
|
||||
execution: <% execution() %>
|
||||
on-success:
|
||||
- fail: <% $.get('status') = "FAILED" %>
|
||||
|
||||
|
||||
discover_nodes:
|
||||
description: Run nodes discovery over the given IP range
|
||||
|
||||
input:
|
||||
- ip_addresses
|
||||
- credentials
|
||||
- ports: [623]
|
||||
- queue_name: tripleo
|
||||
|
||||
tasks:
|
||||
|
||||
get_all_nodes:
|
||||
action: ironic.node_list
|
||||
input:
|
||||
fields: ["uuid", "driver", "driver_info"]
|
||||
limit: 0
|
||||
on-success: get_candidate_nodes
|
||||
on-error: get_all_nodes_failed
|
||||
publish:
|
||||
existing_nodes: <% task(get_all_nodes).result %>
|
||||
|
||||
get_all_nodes_failed:
|
||||
on-success: send_message
|
||||
publish:
|
||||
status: FAILED
|
||||
message: <% task(get_all_nodes).result %>
|
||||
|
||||
get_candidate_nodes:
|
||||
action: tripleo.baremetal.get_candidate_nodes
|
||||
input:
|
||||
ip_addresses: <% $.ip_addresses %>
|
||||
credentials: <% $.credentials %>
|
||||
ports: <% $.ports %>
|
||||
existing_nodes: <% $.existing_nodes %>
|
||||
on-success: probe_nodes
|
||||
on-error: get_candidate_nodes_failed
|
||||
publish:
|
||||
candidates: <% task(get_candidate_nodes).result %>
|
||||
|
||||
get_candidate_nodes_failed:
|
||||
on-success: send_message
|
||||
publish:
|
||||
status: FAILED
|
||||
message: <% task(get_candidate_nodes).result %>
|
||||
|
||||
probe_nodes:
|
||||
action: tripleo.baremetal.probe_node
|
||||
on-success: send_message
|
||||
on-error: probe_nodes_failed
|
||||
input:
|
||||
ip: <% $.node.ip %>
|
||||
port: <% $.node.port %>
|
||||
username: <% $.node.username %>
|
||||
password: <% $.node.password %>
|
||||
with-items:
|
||||
- node in <% $.candidates %>
|
||||
publish:
|
||||
nodes_json: <% task(probe_nodes).result.where($ != null) %>
|
||||
|
||||
probe_nodes_failed:
|
||||
on-success: send_message
|
||||
publish:
|
||||
status: FAILED
|
||||
message: <% task(probe_nodes).result %>
|
||||
|
||||
send_message:
|
||||
action: zaqar.queue_post
|
||||
retry: count=5 delay=1
|
||||
input:
|
||||
queue_name: <% $.queue_name %>
|
||||
messages:
|
||||
body:
|
||||
type: tripleo.baremetal.v1.discover_nodes
|
||||
payload:
|
||||
status: <% $.get('status', 'SUCCESS') %>
|
||||
message: <% $.get('message', '') %>
|
||||
execution: <% execution() %>
|
||||
nodes_json: <% $.get('nodes_json', []) %>
|
||||
on-success:
|
||||
- fail: <% $.get('status') = "FAILED" %>
|
||||
|
||||
discover_and_enroll_nodes:
|
||||
description: Run nodes discovery over the given IP range and enroll nodes
|
||||
|
||||
input:
|
||||
- ip_addresses
|
||||
- credentials
|
||||
- ports: [623]
|
||||
- kernel_name: null
|
||||
- ramdisk_name: null
|
||||
- instance_boot_option: local
|
||||
- initial_state: manageable
|
||||
- queue_name: tripleo
|
||||
|
||||
tasks:
|
||||
|
||||
discover_nodes:
|
||||
workflow: tripleo.baremetal.v1.discover_nodes
|
||||
input:
|
||||
ip_addresses: <% $.ip_addresses %>
|
||||
ports: <% $.ports %>
|
||||
credentials: <% $.credentials %>
|
||||
queue_name: <% $.queue_name %>
|
||||
on-success: enroll_nodes
|
||||
on-error: discover_nodes_failed
|
||||
publish:
|
||||
nodes_json: <% task(discover_nodes).result.nodes_json %>
|
||||
|
||||
discover_nodes_failed:
|
||||
on-success: send_message
|
||||
publish:
|
||||
status: FAILED
|
||||
message: <% task(discover_nodes).result %>
|
||||
|
||||
enroll_nodes:
|
||||
workflow: tripleo.baremetal.v1.register_or_update
|
||||
input:
|
||||
nodes_json: <% $.nodes_json %>
|
||||
kernel_name: <% $.kernel_name %>
|
||||
ramdisk_name: <% $.ramdisk_name %>
|
||||
instance_boot_option: <% $.instance_boot_option %>
|
||||
initial_state: <% $.initial_state %>
|
||||
on-success: send_message
|
||||
on-error: enroll_nodes_failed
|
||||
publish:
|
||||
registered_nodes: <% task(enroll_nodes).result.registered_nodes %>
|
||||
|
||||
enroll_nodes_failed:
|
||||
on-success: send_message
|
||||
publish:
|
||||
status: FAILED
|
||||
message: <% task(enroll_nodes).result %>
|
||||
|
||||
send_message:
|
||||
action: zaqar.queue_post
|
||||
retry: count=5 delay=1
|
||||
input:
|
||||
queue_name: <% $.queue_name %>
|
||||
messages:
|
||||
body:
|
||||
type: tripleo.baremetal.v1.discover_and_enroll_nodes
|
||||
payload:
|
||||
status: <% $.get('status', 'SUCCESS') %>
|
||||
message: <% $.get('message', '') %>
|
||||
execution: <% execution() %>
|
||||
registered_nodes: <% $.get('registered_nodes', []) %>
|
||||
on-success:
|
||||
- fail: <% $.get('status') = "FAILED" %>
|
||||
|
Loading…
Reference in New Issue
Block a user