Fix race in undercloud cell_v2 host discovery
Ensure that the ironic nodes have been picked up by the nova resource tracker before running nova-manage cell_v2 host discovery. Also adds logging of the verbose command output to mistral engine log. Change-Id: I4cc67935df8f37cdb2d8b0bfd96cf90eb7a6ce25 Closes-Bug: #1660160
This commit is contained in:
parent
38feb02f5d
commit
f6c286dbe8
@ -64,6 +64,7 @@ mistral.actions =
|
||||
tripleo.baremetal.configure_root_device = tripleo_common.actions.baremetal:ConfigureRootDeviceAction
|
||||
tripleo.baremetal.register_or_update_nodes = tripleo_common.actions.baremetal:RegisterOrUpdateNodes
|
||||
tripleo.baremetal.update_node_capability = tripleo_common.actions.baremetal:UpdateNodeCapability
|
||||
tripleo.baremetal.cell_v2_discover_hosts = tripleo_common.actions.baremetal:CellV2DiscoverHostsAction
|
||||
tripleo.deployment.config = tripleo_common.actions.deployment:OrchestrationDeployAction
|
||||
tripleo.deployment.deploy = tripleo_common.actions.deployment:DeployStackAction
|
||||
tripleo.deployment.overcloudrc = tripleo_common.actions.deployment:OvercloudRcAction
|
||||
|
2
sudoers
2
sudoers
@ -4,5 +4,5 @@ Defaults:mistral !requiretty
|
||||
mistral ALL = (validations) NOPASSWD:SETENV: /usr/bin/run-validation
|
||||
mistral ALL = NOPASSWD: /usr/bin/chown validations\: /tmp/validations_identity_*
|
||||
mistral ALL = NOPASSWD: /usr/bin/rm -f /tmp/validations_identity_*
|
||||
mistral ALL = NOPASSWD: /bin/nova-manage cell_v2 discover_hosts
|
||||
mistral ALL = NOPASSWD: /bin/nova-manage cell_v2 discover_hosts *
|
||||
validations ALL = NOPASSWD: ALL
|
||||
|
@ -288,3 +288,25 @@ class UpdateNodeCapability(base.TripleOAction):
|
||||
return mistral_workflow_utils.Result(
|
||||
error="%s: %s" % (type(err).__name__, str(err))
|
||||
)
|
||||
|
||||
|
||||
class CellV2DiscoverHostsAction(base.TripleOAction):
|
||||
"""Run cell_v2 host discovery
|
||||
|
||||
Runs cell_v2 host discovery to map any newly available ironic nodes.
|
||||
|
||||
"""
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
result = nodes.run_nova_cell_v2_discovery()
|
||||
LOG.info(
|
||||
'Successfully ran cell_v2 discover_hosts\n'
|
||||
'stdout: %(stdout)r\n',
|
||||
{"stdout": result[0]}
|
||||
)
|
||||
except Exception as err:
|
||||
LOG.exception("Error running cell_v2 discover_hosts")
|
||||
return mistral_workflow_utils.Result(
|
||||
error="%s: %s" % (type(err).__name__, str(err))
|
||||
)
|
||||
|
@ -15,6 +15,7 @@ import mock
|
||||
|
||||
from glanceclient import exc as glance_exceptions
|
||||
import ironic_inspector_client
|
||||
from oslo_concurrency import processutils
|
||||
from oslo_utils import units
|
||||
|
||||
from tripleo_common.actions import baremetal
|
||||
@ -330,3 +331,25 @@ class TestConfigureRootDeviceAction(base.TestCase):
|
||||
"Cannot find a disk",
|
||||
action.run)
|
||||
self.assertEqual(self.ironic.node.update.call_count, 0)
|
||||
|
||||
|
||||
class TestCellV2DiscoverHostsAction(base.TestCase):
|
||||
|
||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
||||
def test_run(self, mock_command):
|
||||
action = baremetal.CellV2DiscoverHostsAction()
|
||||
action.run()
|
||||
mock_command.assert_called_once()
|
||||
|
||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
||||
def test_failure(self, mock_command):
|
||||
mock_command.side_effect = processutils.ProcessExecutionError(
|
||||
exit_code=1,
|
||||
stdout='captured stdout',
|
||||
stderr='captured stderr',
|
||||
cmd='command'
|
||||
)
|
||||
action = baremetal.CellV2DiscoverHostsAction()
|
||||
result = action.run()
|
||||
self.assertTrue(result.is_error())
|
||||
mock_command.assert_called_once()
|
||||
|
@ -190,8 +190,7 @@ class NodesTest(base.TestCase):
|
||||
'pm_password': 'random', 'pm_type': 'pxe_ssh', 'name': 'node1',
|
||||
'capabilities': 'num_nics:6'}
|
||||
|
||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
||||
def test_register_all_nodes_ironic_no_hw_stats(self, mock_discovery):
|
||||
def test_register_all_nodes_ironic_no_hw_stats(self):
|
||||
node_list = [self._get_node()]
|
||||
|
||||
# Remove the hardware stats from the node dictionary
|
||||
@ -219,10 +218,8 @@ class NodesTest(base.TestCase):
|
||||
address='aaa')
|
||||
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
||||
ironic.port.create.assert_has_calls([port_call])
|
||||
mock_discovery.assert_called_once()
|
||||
|
||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
||||
def test_register_all_nodes(self, mock_discovery):
|
||||
def test_register_all_nodes(self):
|
||||
node_list = [self._get_node()]
|
||||
node_properties = {"cpus": "1",
|
||||
"memory_mb": "2048",
|
||||
@ -243,10 +240,8 @@ class NodesTest(base.TestCase):
|
||||
address='aaa')
|
||||
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
||||
ironic.port.create.assert_has_calls([port_call])
|
||||
mock_discovery.assert_called_once()
|
||||
|
||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
||||
def test_register_all_nodes_kernel_ramdisk(self, mock_discovery):
|
||||
def test_register_all_nodes_kernel_ramdisk(self):
|
||||
node_list = [self._get_node()]
|
||||
node_properties = {"cpus": "1",
|
||||
"memory_mb": "2048",
|
||||
@ -275,10 +270,8 @@ class NodesTest(base.TestCase):
|
||||
address='aaa')
|
||||
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
||||
ironic.port.create.assert_has_calls([port_call])
|
||||
mock_discovery.assert_called_once()
|
||||
|
||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
||||
def test_register_all_nodes_uuid(self, mock_discovery):
|
||||
def test_register_all_nodes_uuid(self):
|
||||
node_list = [self._get_node()]
|
||||
node_list[0]['uuid'] = 'abcdef'
|
||||
node_properties = {"cpus": "1",
|
||||
@ -301,10 +294,8 @@ class NodesTest(base.TestCase):
|
||||
address='aaa')
|
||||
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
||||
ironic.port.create.assert_has_calls([port_call])
|
||||
mock_discovery.assert_called_once()
|
||||
|
||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
||||
def test_register_all_nodes_caps_dict(self, mock_discovery):
|
||||
def test_register_all_nodes_caps_dict(self):
|
||||
node_list = [self._get_node()]
|
||||
node_list[0]['capabilities'] = {
|
||||
'num_nics': 7
|
||||
@ -328,7 +319,6 @@ class NodesTest(base.TestCase):
|
||||
address='aaa')
|
||||
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
||||
ironic.port.create.assert_has_calls([port_call])
|
||||
mock_discovery.assert_called_once()
|
||||
|
||||
def test_register_update(self):
|
||||
node = self._get_node()
|
||||
|
@ -382,7 +382,6 @@ def register_all_nodes(nodes_list, client, remove=False, glance_client=None,
|
||||
seen.append(node)
|
||||
|
||||
_clean_up_extra_nodes(seen, client, remove=remove)
|
||||
run_nova_cell_v2_discovery()
|
||||
|
||||
return seen
|
||||
|
||||
@ -452,5 +451,6 @@ def run_nova_cell_v2_discovery():
|
||||
'/usr/bin/sudo',
|
||||
'/bin/nova-manage',
|
||||
'cell_v2',
|
||||
'discover_hosts'
|
||||
'discover_hosts',
|
||||
'--verbose'
|
||||
)
|
||||
|
@ -193,7 +193,7 @@ workflows:
|
||||
tasks:
|
||||
|
||||
set_nodes_available:
|
||||
on-success: try_power_off
|
||||
on-success: wait_for_nova_resources
|
||||
on-error: set_status_failed_nodes_available
|
||||
with-items: uuid in <% $.node_uuids %>
|
||||
workflow: tripleo.baremetal.v1.set_node_state
|
||||
@ -209,6 +209,36 @@ workflows:
|
||||
status: FAILED
|
||||
message: <% task(set_nodes_available).result %>
|
||||
|
||||
wait_for_nova_resources:
|
||||
on-success: cell_v2_discover_hosts
|
||||
on-error: wait_for_nova_resources_failed
|
||||
with-items: node_uuid in <% $.node_uuids %>
|
||||
action: nova.hypervisors_find hypervisor_hostname=<% $.node_uuid %>
|
||||
timeout: 900 #15 minutes
|
||||
retry:
|
||||
delay: 30
|
||||
count: 30
|
||||
|
||||
wait_for_nova_resources_failed:
|
||||
on-success: send_message
|
||||
publish:
|
||||
status: FAILED
|
||||
message: <% task(wait_for_nova_resources).result %>
|
||||
|
||||
cell_v2_discover_hosts:
|
||||
on-success: try_power_off
|
||||
on-error: cell_v2_discover_hosts_failed
|
||||
workflow: tripleo.baremetal.v1.cellv2_discovery
|
||||
input:
|
||||
node_uuids: <% $.node_uuids %>
|
||||
queue_name: <% $.queue_name %>
|
||||
|
||||
cell_v2_discover_hosts_failed:
|
||||
on-success: send_message
|
||||
publish:
|
||||
status: FAILED
|
||||
message: <% task(cell_v2_discover_hosts).result %>
|
||||
|
||||
try_power_off:
|
||||
on-success: send_message
|
||||
on-error: power_off_failed
|
||||
@ -758,3 +788,42 @@ workflows:
|
||||
|
||||
fail_workflow:
|
||||
action: std.fail
|
||||
|
||||
|
||||
cellv2_discovery:
|
||||
description: Run cell_v2 host discovery
|
||||
|
||||
input:
|
||||
- node_uuids
|
||||
- queue_name: tripleo
|
||||
|
||||
tasks:
|
||||
|
||||
cell_v2_discover_hosts:
|
||||
on-success: send_message
|
||||
on-error: cell_v2_discover_hosts_failed
|
||||
action: tripleo.baremetal.cell_v2_discover_hosts
|
||||
|
||||
cell_v2_discover_hosts_failed:
|
||||
on-success: send_message
|
||||
publish:
|
||||
status: FAILED
|
||||
message: <% task(cell_v2_discover_hosts).result %>
|
||||
|
||||
send_message:
|
||||
action: zaqar.queue_post
|
||||
retry: count=5 delay=1
|
||||
input:
|
||||
queue_name: <% $.queue_name %>
|
||||
messages:
|
||||
body:
|
||||
type: tripleo.baremetal.v1.cellv2_discovery
|
||||
payload:
|
||||
status: <% $.get('status', 'SUCCESS') %>
|
||||
message: <% $.get('message', '') %>
|
||||
execution: <% execution() %>
|
||||
on-success:
|
||||
- fail_workflow: <% $.get('status') = "FAILED" %>
|
||||
|
||||
fail_workflow:
|
||||
action: std.fail
|
||||
|
Loading…
Reference in New Issue
Block a user