From ee9ace2087e979326a890eccad8f9010fa1e6440 Mon Sep 17 00:00:00 2001 From: Alan Bishop Date: Mon, 2 Apr 2018 16:57:58 -0400 Subject: [PATCH] Use scheduler hints in derived_parameters workflow Update the tripleo.derive_params.v1.derived_parameters workflow to use scheduler hints associated with a given role. The scheduler hints are used to identify overcloud nodes associated with the role, and take precedence over nodes identified by their profile/flavor. If a role does not specify any scheduler hints then the workflow falls back to matching nodes by profile/flavor. Add a tripleo.baremetal.v1.nodes_with_node_id workflow that searches for ironic nodes matching a node placement regex. This workflow is analogous to the existing nodes_with_profile workflow that searches for nodes that match a given profile. Add a tripleo.baremetal.get_node_id Mistral action that returns the node placement ID (e.g. "controller-0") for a given node, or None if a node ID is not defined. This is analogous to the existing get_profile action that returns the profile assigned to the node. Fix the 'message' published when a derived parameter sub-workflow fails so it just returns the sub-workflow 'message' and not the entire sub- workflow result. The 'message' portion is the only part that is meaningful to the user. Closes-Bug: #1760659 Change-Id: I7eff355620aecaca49e77112ba491a5f3ce2eed6 (cherry picked from commit e25e8564a42d1074034a76da5412bea4fb77b414) --- ...sing-scheduler-hints-5bb65bc78c1f6f91.yaml | 7 +++ setup.cfg | 1 + tripleo_common/actions/baremetal.py | 14 +++++ .../tests/actions/test_baremetal.py | 21 ++++++- tripleo_common/utils/nodes.py | 12 ++++ workbooks/baremetal.yaml | 52 ++++++++++++++++ workbooks/derive_params.yaml | 61 ++++++++++++++----- 7 files changed, 153 insertions(+), 15 deletions(-) create mode 100644 releasenotes/notes/derive-parameters-using-scheduler-hints-5bb65bc78c1f6f91.yaml diff --git a/releasenotes/notes/derive-parameters-using-scheduler-hints-5bb65bc78c1f6f91.yaml b/releasenotes/notes/derive-parameters-using-scheduler-hints-5bb65bc78c1f6f91.yaml new file mode 100644 index 000000000..7dd256d90 --- /dev/null +++ b/releasenotes/notes/derive-parameters-using-scheduler-hints-5bb65bc78c1f6f91.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - Fix `bug 1760659 `__ by + updating the derived parameters workflow to use scheduler hints associated + with a given role. The scheduler hints are used to identify overcloud + nodes associated with the role, and take precedence over nodes identified + by their profile/flavor. diff --git a/setup.cfg b/setup.cfg index 12cf2af61..6fc8f443d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -71,6 +71,7 @@ output_file = tripleo_common/locale/tripleo-common.pot mistral.actions = tripleo.baremetal.configure_boot = tripleo_common.actions.baremetal:ConfigureBootAction tripleo.baremetal.configure_root_device = tripleo_common.actions.baremetal:ConfigureRootDeviceAction + tripleo.baremetal.get_node_hint = tripleo_common.actions.baremetal:GetNodeHintAction tripleo.baremetal.get_profile = tripleo_common.actions.baremetal:GetProfileAction tripleo.baremetal.register_or_update_nodes = tripleo_common.actions.baremetal:RegisterOrUpdateNodes tripleo.baremetal.update_node_capability = tripleo_common.actions.baremetal:UpdateNodeCapability diff --git a/tripleo_common/actions/baremetal.py b/tripleo_common/actions/baremetal.py index dc73355ec..270ee2d46 100644 --- a/tripleo_common/actions/baremetal.py +++ b/tripleo_common/actions/baremetal.py @@ -355,6 +355,20 @@ class GetProfileAction(base.TripleOAction): return result +class GetNodeHintAction(base.TripleOAction): + """Return the scheduler hint associated with the given node """ + + def __init__(self, node): + super(GetNodeHintAction, self).__init__() + self.node = node + + def run(self, context): + result = {} + result['hint'] = nodes.get_node_hint(self.node) + result['uuid'] = self.node.get('uuid') + return result + + class GetCandidateNodes(base.TripleOAction): """Given IPs, ports and credentials, return potential new nodes.""" diff --git a/tripleo_common/tests/actions/test_baremetal.py b/tripleo_common/tests/actions/test_baremetal.py index 5f313397d..3732b872d 100644 --- a/tripleo_common/tests/actions/test_baremetal.py +++ b/tripleo_common/tests/actions/test_baremetal.py @@ -408,7 +408,26 @@ class TestGetProfileAction(base.TestCase): 'uuid': 'abcd1', 'profile': 'compute' } - self.assertEqual(result, expected_result) + self.assertEqual(expected_result, result) + + +class TestGetNodeHintAction(base.TestCase): + + def test_run(self): + mock_ctx = mock.MagicMock() + node = { + 'uuid': 'abcd1', + 'properties': { + 'capabilities': 'profile:compute,node:compute-0' + } + } + action = baremetal.GetNodeHintAction(node=node) + result = action.run(mock_ctx) + expected_result = { + 'uuid': 'abcd1', + 'hint': 'compute-0' + } + self.assertEqual(expected_result, result) @mock.patch.object(baremetal.socket, 'gethostbyname', lambda x: x) diff --git a/tripleo_common/utils/nodes.py b/tripleo_common/utils/nodes.py index 76d4d056c..2b90f8c9a 100644 --- a/tripleo_common/utils/nodes.py +++ b/tripleo_common/utils/nodes.py @@ -657,3 +657,15 @@ def get_node_profile(node): return capabilities_dict['profile'] return None + + +def get_node_hint(node): + """Return the 'capabilities:node' hint associated with the node """ + + capabilities = node.get('properties').get('capabilities') + capabilities_dict = capabilities_to_dict(capabilities) + + if 'node' in capabilities_dict: + return capabilities_dict['node'] + + return None diff --git a/workbooks/baremetal.yaml b/workbooks/baremetal.yaml index 18bcbba6a..5eeb65e3c 100644 --- a/workbooks/baremetal.yaml +++ b/workbooks/baremetal.yaml @@ -983,6 +983,58 @@ workflows: on-success: - fail: <% $.get('status') = "FAILED" %> + nodes_with_hint: + description: Find nodes matching a hint regex + input: + - hint_regex + - queue_name: tripleo + + tags: + - tripleo-common-managed + + tasks: + get_nodes: + with-items: provision_state in <% ['available', 'active'] %> + action: ironic.node_list maintenance=false provision_state=<% $.provision_state %> detail=true + on-success: get_matching_nodes + on-error: set_status_failed_get_nodes + + get_matching_nodes: + with-items: node in <% task(get_nodes).result.flatten() %> + action: tripleo.baremetal.get_node_hint node=<% $.node %> + on-success: send_message + on-error: set_status_failed_get_matching_nodes + publish: + matching_nodes: <% let(hint_regex => $.hint_regex) -> task().result.where($.hint and $.hint.matches($hint_regex)).uuid %> + + set_status_failed_get_nodes: + on-success: send_message + publish: + status: FAILED + message: <% task(get_nodes).result %> + + set_status_failed_get_matching_nodes: + on-success: send_message + publish: + status: FAILED + message: <% task(get_matching_nodes).result %> + + send_message: + action: zaqar.queue_post + retry: count=5 delay=1 + input: + queue_name: <% $.queue_name %> + messages: + body: + type: tripleo.baremetal.v1.nodes_with_hint + payload: + status: <% $.get('status', 'SUCCESS') %> + message: <% $.get('message', '') %> + execution: <% execution() %> + matching_nodes: <% $.matching_nodes or [] %> + on-success: + - fail: <% $.get('status') = "FAILED" %> + nodes_with_profile: description: Find nodes with a specific profile input: diff --git a/workbooks/derive_params.yaml b/workbooks/derive_params.yaml index fef03b22d..83a43b396 100644 --- a/workbooks/derive_params.yaml +++ b/workbooks/derive_params.yaml @@ -178,14 +178,40 @@ workflows: role_services: <% task().result.get('role_services', []) %> on-success: # Continue only if there are features associated with this role. Otherwise, we're done. - - get_flavor_name: <% $.role_features %> + - get_scheduler_hints: <% $.role_features %> on-error: set_status_failed_get_role_info - # Getting introspection data workflow, which will take care of - # 1) profile and flavor based mapping - # 2) Nova placement api based mapping - # Currently we have implemented profile and flavor based mapping - # TODO-Nova placement api based mapping is pending, we will enchance it later. + # Find a node associated with this role. Look for nodes matching any scheduler hints + # associated with the role, and if there are no scheduler hints then locate nodes + # with a profile matching the role's flavor. + get_scheduler_hints: + publish: + scheduler_hints: <% let(param_name => concat($.role_name, 'SchedulerHints')) -> $.heat_resource_tree.parameters.get($param_name, {}).get('default', {}) %> + on-success: + - get_hint_regex: <% $.scheduler_hints %> + # If there are no scheduler hints then move on to use the flavor + - get_flavor_name: <% not $.scheduler_hints %> + + get_hint_regex: + publish: + hint_regex: <% $.scheduler_hints.get('capabilities:node', '').replace('%index%', '(\d+)') %> + on-success: + - get_node_with_hint: <% $.hint_regex %> + # If there is no 'capabilities:node' hint then move on to use the flavor + - get_flavor_name: <% not $.hint_regex %> + + get_node_with_hint: + workflow: tripleo.baremetal.v1.nodes_with_hint + input: + hint_regex: <% concat('^', $.hint_regex, '$') %> + publish: + role_node_uuid: <% task().result.matching_nodes.first('') %> + on-success: + - get_introspection_data: <% $.role_node_uuid %> + # If no nodes match the scheduler hint then move on to use the flavor + - get_flavor_name: <% not $.role_node_uuid %> + on-error: set_status_failed_on_error_get_node_with_hint + get_flavor_name: publish: flavor_name: <% let(param_name => concat('Overcloud', $.role_name, 'Flavor').replace('OvercloudControllerFlavor', 'OvercloudControlFlavor')) -> $.heat_resource_tree.parameters.get($param_name, {}).get('default', '') %> @@ -205,14 +231,14 @@ workflows: input: profile: <% $.profile_name %> publish: - profile_node_uuid: <% task().result.matching_nodes.first('') %> + role_node_uuid: <% task().result.matching_nodes.first('') %> on-success: - - get_introspection_data: <% $.profile_node_uuid %> - - set_status_failed_no_matching_node_get_profile_node: <% not $.profile_node_uuid %> + - get_introspection_data: <% $.role_node_uuid %> + - set_status_failed_no_matching_node_get_profile_node: <% not $.role_node_uuid %> on-error: set_status_failed_on_error_get_profile_node get_introspection_data: - action: baremetal_introspection.get_data uuid=<% $.profile_node_uuid %> + action: baremetal_introspection.get_data uuid=<% $.role_node_uuid %> publish: hw_data: <% task().result %> # Establish an empty dictionary of derived_parameters prior to @@ -324,6 +350,13 @@ workflows: message: <% task(get_profile_node).result %> on-success: fail + set_status_failed_on_error_get_node_with_hint: + publish: + role_name: <% $.role_name %> + status: FAILED + message: <% task(get_node_with_hint).result %> + on-success: fail + set_status_failed_get_introspection_data: publish: role_name: <% $.role_name %> @@ -335,28 +368,28 @@ workflows: publish: role_name: <% $.role_name %> status: FAILED - message: <% task(get_dpdk_derive_params).result %> + message: <% task(get_dpdk_derive_params).result.message %> on-success: fail set_status_failed_get_sriov_derive_params: publish: role_name: <% $.role_name %> status: FAILED - message: <% task(get_sriov_derive_params).result %> + message: <% task(get_sriov_derive_params).result.message %> on-success: fail set_status_failed_get_host_derive_params: publish: role_name: <% $.role_name %> status: FAILED - message: <% task(get_host_derive_params).result %> + message: <% task(get_host_derive_params).result.message %> on-success: fail set_status_failed_get_hci_derive_params: publish: role_name: <% $.role_name %> status: FAILED - message: <% task(get_hci_derive_params).result %> + message: <% task(get_hci_derive_params).result.message %> on-success: fail