Fix race in undercloud cell_v2 host discovery
Ensure that the ironic nodes have been picked up by the nova resource tracker before running nova-manage cell_v2 host discovery. Also adds logging of the verbose command output to mistral engine log. Change-Id: I4cc67935df8f37cdb2d8b0bfd96cf90eb7a6ce25 Closes-Bug: #1660160
This commit is contained in:
parent
38feb02f5d
commit
f6c286dbe8
@ -64,6 +64,7 @@ mistral.actions =
|
|||||||
tripleo.baremetal.configure_root_device = tripleo_common.actions.baremetal:ConfigureRootDeviceAction
|
tripleo.baremetal.configure_root_device = tripleo_common.actions.baremetal:ConfigureRootDeviceAction
|
||||||
tripleo.baremetal.register_or_update_nodes = tripleo_common.actions.baremetal:RegisterOrUpdateNodes
|
tripleo.baremetal.register_or_update_nodes = tripleo_common.actions.baremetal:RegisterOrUpdateNodes
|
||||||
tripleo.baremetal.update_node_capability = tripleo_common.actions.baremetal:UpdateNodeCapability
|
tripleo.baremetal.update_node_capability = tripleo_common.actions.baremetal:UpdateNodeCapability
|
||||||
|
tripleo.baremetal.cell_v2_discover_hosts = tripleo_common.actions.baremetal:CellV2DiscoverHostsAction
|
||||||
tripleo.deployment.config = tripleo_common.actions.deployment:OrchestrationDeployAction
|
tripleo.deployment.config = tripleo_common.actions.deployment:OrchestrationDeployAction
|
||||||
tripleo.deployment.deploy = tripleo_common.actions.deployment:DeployStackAction
|
tripleo.deployment.deploy = tripleo_common.actions.deployment:DeployStackAction
|
||||||
tripleo.deployment.overcloudrc = tripleo_common.actions.deployment:OvercloudRcAction
|
tripleo.deployment.overcloudrc = tripleo_common.actions.deployment:OvercloudRcAction
|
||||||
|
2
sudoers
2
sudoers
@ -4,5 +4,5 @@ Defaults:mistral !requiretty
|
|||||||
mistral ALL = (validations) NOPASSWD:SETENV: /usr/bin/run-validation
|
mistral ALL = (validations) NOPASSWD:SETENV: /usr/bin/run-validation
|
||||||
mistral ALL = NOPASSWD: /usr/bin/chown validations\: /tmp/validations_identity_*
|
mistral ALL = NOPASSWD: /usr/bin/chown validations\: /tmp/validations_identity_*
|
||||||
mistral ALL = NOPASSWD: /usr/bin/rm -f /tmp/validations_identity_*
|
mistral ALL = NOPASSWD: /usr/bin/rm -f /tmp/validations_identity_*
|
||||||
mistral ALL = NOPASSWD: /bin/nova-manage cell_v2 discover_hosts
|
mistral ALL = NOPASSWD: /bin/nova-manage cell_v2 discover_hosts *
|
||||||
validations ALL = NOPASSWD: ALL
|
validations ALL = NOPASSWD: ALL
|
||||||
|
@ -288,3 +288,25 @@ class UpdateNodeCapability(base.TripleOAction):
|
|||||||
return mistral_workflow_utils.Result(
|
return mistral_workflow_utils.Result(
|
||||||
error="%s: %s" % (type(err).__name__, str(err))
|
error="%s: %s" % (type(err).__name__, str(err))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CellV2DiscoverHostsAction(base.TripleOAction):
|
||||||
|
"""Run cell_v2 host discovery
|
||||||
|
|
||||||
|
Runs cell_v2 host discovery to map any newly available ironic nodes.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
try:
|
||||||
|
result = nodes.run_nova_cell_v2_discovery()
|
||||||
|
LOG.info(
|
||||||
|
'Successfully ran cell_v2 discover_hosts\n'
|
||||||
|
'stdout: %(stdout)r\n',
|
||||||
|
{"stdout": result[0]}
|
||||||
|
)
|
||||||
|
except Exception as err:
|
||||||
|
LOG.exception("Error running cell_v2 discover_hosts")
|
||||||
|
return mistral_workflow_utils.Result(
|
||||||
|
error="%s: %s" % (type(err).__name__, str(err))
|
||||||
|
)
|
||||||
|
@ -15,6 +15,7 @@ import mock
|
|||||||
|
|
||||||
from glanceclient import exc as glance_exceptions
|
from glanceclient import exc as glance_exceptions
|
||||||
import ironic_inspector_client
|
import ironic_inspector_client
|
||||||
|
from oslo_concurrency import processutils
|
||||||
from oslo_utils import units
|
from oslo_utils import units
|
||||||
|
|
||||||
from tripleo_common.actions import baremetal
|
from tripleo_common.actions import baremetal
|
||||||
@ -330,3 +331,25 @@ class TestConfigureRootDeviceAction(base.TestCase):
|
|||||||
"Cannot find a disk",
|
"Cannot find a disk",
|
||||||
action.run)
|
action.run)
|
||||||
self.assertEqual(self.ironic.node.update.call_count, 0)
|
self.assertEqual(self.ironic.node.update.call_count, 0)
|
||||||
|
|
||||||
|
|
||||||
|
class TestCellV2DiscoverHostsAction(base.TestCase):
|
||||||
|
|
||||||
|
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
||||||
|
def test_run(self, mock_command):
|
||||||
|
action = baremetal.CellV2DiscoverHostsAction()
|
||||||
|
action.run()
|
||||||
|
mock_command.assert_called_once()
|
||||||
|
|
||||||
|
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
||||||
|
def test_failure(self, mock_command):
|
||||||
|
mock_command.side_effect = processutils.ProcessExecutionError(
|
||||||
|
exit_code=1,
|
||||||
|
stdout='captured stdout',
|
||||||
|
stderr='captured stderr',
|
||||||
|
cmd='command'
|
||||||
|
)
|
||||||
|
action = baremetal.CellV2DiscoverHostsAction()
|
||||||
|
result = action.run()
|
||||||
|
self.assertTrue(result.is_error())
|
||||||
|
mock_command.assert_called_once()
|
||||||
|
@ -190,8 +190,7 @@ class NodesTest(base.TestCase):
|
|||||||
'pm_password': 'random', 'pm_type': 'pxe_ssh', 'name': 'node1',
|
'pm_password': 'random', 'pm_type': 'pxe_ssh', 'name': 'node1',
|
||||||
'capabilities': 'num_nics:6'}
|
'capabilities': 'num_nics:6'}
|
||||||
|
|
||||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
def test_register_all_nodes_ironic_no_hw_stats(self):
|
||||||
def test_register_all_nodes_ironic_no_hw_stats(self, mock_discovery):
|
|
||||||
node_list = [self._get_node()]
|
node_list = [self._get_node()]
|
||||||
|
|
||||||
# Remove the hardware stats from the node dictionary
|
# Remove the hardware stats from the node dictionary
|
||||||
@ -219,10 +218,8 @@ class NodesTest(base.TestCase):
|
|||||||
address='aaa')
|
address='aaa')
|
||||||
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
||||||
ironic.port.create.assert_has_calls([port_call])
|
ironic.port.create.assert_has_calls([port_call])
|
||||||
mock_discovery.assert_called_once()
|
|
||||||
|
|
||||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
def test_register_all_nodes(self):
|
||||||
def test_register_all_nodes(self, mock_discovery):
|
|
||||||
node_list = [self._get_node()]
|
node_list = [self._get_node()]
|
||||||
node_properties = {"cpus": "1",
|
node_properties = {"cpus": "1",
|
||||||
"memory_mb": "2048",
|
"memory_mb": "2048",
|
||||||
@ -243,10 +240,8 @@ class NodesTest(base.TestCase):
|
|||||||
address='aaa')
|
address='aaa')
|
||||||
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
||||||
ironic.port.create.assert_has_calls([port_call])
|
ironic.port.create.assert_has_calls([port_call])
|
||||||
mock_discovery.assert_called_once()
|
|
||||||
|
|
||||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
def test_register_all_nodes_kernel_ramdisk(self):
|
||||||
def test_register_all_nodes_kernel_ramdisk(self, mock_discovery):
|
|
||||||
node_list = [self._get_node()]
|
node_list = [self._get_node()]
|
||||||
node_properties = {"cpus": "1",
|
node_properties = {"cpus": "1",
|
||||||
"memory_mb": "2048",
|
"memory_mb": "2048",
|
||||||
@ -275,10 +270,8 @@ class NodesTest(base.TestCase):
|
|||||||
address='aaa')
|
address='aaa')
|
||||||
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
||||||
ironic.port.create.assert_has_calls([port_call])
|
ironic.port.create.assert_has_calls([port_call])
|
||||||
mock_discovery.assert_called_once()
|
|
||||||
|
|
||||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
def test_register_all_nodes_uuid(self):
|
||||||
def test_register_all_nodes_uuid(self, mock_discovery):
|
|
||||||
node_list = [self._get_node()]
|
node_list = [self._get_node()]
|
||||||
node_list[0]['uuid'] = 'abcdef'
|
node_list[0]['uuid'] = 'abcdef'
|
||||||
node_properties = {"cpus": "1",
|
node_properties = {"cpus": "1",
|
||||||
@ -301,10 +294,8 @@ class NodesTest(base.TestCase):
|
|||||||
address='aaa')
|
address='aaa')
|
||||||
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
||||||
ironic.port.create.assert_has_calls([port_call])
|
ironic.port.create.assert_has_calls([port_call])
|
||||||
mock_discovery.assert_called_once()
|
|
||||||
|
|
||||||
@mock.patch('tripleo_common.utils.nodes.run_nova_cell_v2_discovery')
|
def test_register_all_nodes_caps_dict(self):
|
||||||
def test_register_all_nodes_caps_dict(self, mock_discovery):
|
|
||||||
node_list = [self._get_node()]
|
node_list = [self._get_node()]
|
||||||
node_list[0]['capabilities'] = {
|
node_list[0]['capabilities'] = {
|
||||||
'num_nics': 7
|
'num_nics': 7
|
||||||
@ -328,7 +319,6 @@ class NodesTest(base.TestCase):
|
|||||||
address='aaa')
|
address='aaa')
|
||||||
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
ironic.node.create.assert_has_calls([pxe_node, mock.ANY])
|
||||||
ironic.port.create.assert_has_calls([port_call])
|
ironic.port.create.assert_has_calls([port_call])
|
||||||
mock_discovery.assert_called_once()
|
|
||||||
|
|
||||||
def test_register_update(self):
|
def test_register_update(self):
|
||||||
node = self._get_node()
|
node = self._get_node()
|
||||||
|
@ -382,7 +382,6 @@ def register_all_nodes(nodes_list, client, remove=False, glance_client=None,
|
|||||||
seen.append(node)
|
seen.append(node)
|
||||||
|
|
||||||
_clean_up_extra_nodes(seen, client, remove=remove)
|
_clean_up_extra_nodes(seen, client, remove=remove)
|
||||||
run_nova_cell_v2_discovery()
|
|
||||||
|
|
||||||
return seen
|
return seen
|
||||||
|
|
||||||
@ -452,5 +451,6 @@ def run_nova_cell_v2_discovery():
|
|||||||
'/usr/bin/sudo',
|
'/usr/bin/sudo',
|
||||||
'/bin/nova-manage',
|
'/bin/nova-manage',
|
||||||
'cell_v2',
|
'cell_v2',
|
||||||
'discover_hosts'
|
'discover_hosts',
|
||||||
|
'--verbose'
|
||||||
)
|
)
|
||||||
|
@ -193,7 +193,7 @@ workflows:
|
|||||||
tasks:
|
tasks:
|
||||||
|
|
||||||
set_nodes_available:
|
set_nodes_available:
|
||||||
on-success: try_power_off
|
on-success: wait_for_nova_resources
|
||||||
on-error: set_status_failed_nodes_available
|
on-error: set_status_failed_nodes_available
|
||||||
with-items: uuid in <% $.node_uuids %>
|
with-items: uuid in <% $.node_uuids %>
|
||||||
workflow: tripleo.baremetal.v1.set_node_state
|
workflow: tripleo.baremetal.v1.set_node_state
|
||||||
@ -209,6 +209,36 @@ workflows:
|
|||||||
status: FAILED
|
status: FAILED
|
||||||
message: <% task(set_nodes_available).result %>
|
message: <% task(set_nodes_available).result %>
|
||||||
|
|
||||||
|
wait_for_nova_resources:
|
||||||
|
on-success: cell_v2_discover_hosts
|
||||||
|
on-error: wait_for_nova_resources_failed
|
||||||
|
with-items: node_uuid in <% $.node_uuids %>
|
||||||
|
action: nova.hypervisors_find hypervisor_hostname=<% $.node_uuid %>
|
||||||
|
timeout: 900 #15 minutes
|
||||||
|
retry:
|
||||||
|
delay: 30
|
||||||
|
count: 30
|
||||||
|
|
||||||
|
wait_for_nova_resources_failed:
|
||||||
|
on-success: send_message
|
||||||
|
publish:
|
||||||
|
status: FAILED
|
||||||
|
message: <% task(wait_for_nova_resources).result %>
|
||||||
|
|
||||||
|
cell_v2_discover_hosts:
|
||||||
|
on-success: try_power_off
|
||||||
|
on-error: cell_v2_discover_hosts_failed
|
||||||
|
workflow: tripleo.baremetal.v1.cellv2_discovery
|
||||||
|
input:
|
||||||
|
node_uuids: <% $.node_uuids %>
|
||||||
|
queue_name: <% $.queue_name %>
|
||||||
|
|
||||||
|
cell_v2_discover_hosts_failed:
|
||||||
|
on-success: send_message
|
||||||
|
publish:
|
||||||
|
status: FAILED
|
||||||
|
message: <% task(cell_v2_discover_hosts).result %>
|
||||||
|
|
||||||
try_power_off:
|
try_power_off:
|
||||||
on-success: send_message
|
on-success: send_message
|
||||||
on-error: power_off_failed
|
on-error: power_off_failed
|
||||||
@ -758,3 +788,42 @@ workflows:
|
|||||||
|
|
||||||
fail_workflow:
|
fail_workflow:
|
||||||
action: std.fail
|
action: std.fail
|
||||||
|
|
||||||
|
|
||||||
|
cellv2_discovery:
|
||||||
|
description: Run cell_v2 host discovery
|
||||||
|
|
||||||
|
input:
|
||||||
|
- node_uuids
|
||||||
|
- queue_name: tripleo
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
|
||||||
|
cell_v2_discover_hosts:
|
||||||
|
on-success: send_message
|
||||||
|
on-error: cell_v2_discover_hosts_failed
|
||||||
|
action: tripleo.baremetal.cell_v2_discover_hosts
|
||||||
|
|
||||||
|
cell_v2_discover_hosts_failed:
|
||||||
|
on-success: send_message
|
||||||
|
publish:
|
||||||
|
status: FAILED
|
||||||
|
message: <% task(cell_v2_discover_hosts).result %>
|
||||||
|
|
||||||
|
send_message:
|
||||||
|
action: zaqar.queue_post
|
||||||
|
retry: count=5 delay=1
|
||||||
|
input:
|
||||||
|
queue_name: <% $.queue_name %>
|
||||||
|
messages:
|
||||||
|
body:
|
||||||
|
type: tripleo.baremetal.v1.cellv2_discovery
|
||||||
|
payload:
|
||||||
|
status: <% $.get('status', 'SUCCESS') %>
|
||||||
|
message: <% $.get('message', '') %>
|
||||||
|
execution: <% execution() %>
|
||||||
|
on-success:
|
||||||
|
- fail_workflow: <% $.get('status') = "FAILED" %>
|
||||||
|
|
||||||
|
fail_workflow:
|
||||||
|
action: std.fail
|
||||||
|
Loading…
Reference in New Issue
Block a user