From b595bacddbdb526b5db31c1c48168cc10f39723e Mon Sep 17 00:00:00 2001 From: Kevin Carter Date: Tue, 25 Feb 2020 10:34:58 -0600 Subject: [PATCH] Implement concurrency in baremetal cleaning This change implements the concurrency functionality in the os_baremetal_clean_node module. The module will now process all nodes provided concurently limiting threads to the number of nodes or the option `concurrency` which has a default of 20. If the number of nodes less that value of `concurrency` the thread pool will be limited to the number of nodes. Change-Id: I07f3c731c79cfe9bac8652f082e291832dc9b351 Signed-off-by: Kevin Carter --- .../modules/os_baremetal_clean_node.py | 137 ++++++++++++------ .../playbooks/cli-baremetal-clean.yaml | 11 +- 2 files changed, 98 insertions(+), 50 deletions(-) diff --git a/tripleo_ansible/ansible_plugins/modules/os_baremetal_clean_node.py b/tripleo_ansible/ansible_plugins/modules/os_baremetal_clean_node.py index e42f9a299..683970b06 100644 --- a/tripleo_ansible/ansible_plugins/modules/os_baremetal_clean_node.py +++ b/tripleo_ansible/ansible_plugins/modules/os_baremetal_clean_node.py @@ -69,6 +69,18 @@ options: - Don't provide cleaned nodes info in output of the module type: bool default: False + max_retries: + description: + - Number of attempts before failing. + type: int + required: False + default: 0 + concurrency: + description: + - Max level of concurrency. + type: int + required: False + default: 20 requirements: ["openstacksdk"] ''' @@ -367,9 +379,11 @@ EXAMPLES = ''' checksum: "a94e683ea16d9ae44768f0a65942234d" component: "ilo" ''' -import time + import yaml +from concurrent import futures + from ansible.module_utils.basic import AnsibleModule from ansible.module_utils.openstack import (openstack_full_argument_spec, openstack_module_kwargs, @@ -382,55 +396,85 @@ def parallel_nodes_cleaning(conn, module): nodes = module.params['node_uuid'] + module.params['node_name'] clean_steps = module.params['clean_steps'] result = {} - nodes_wait = nodes[:] - for node in nodes: - try: - client.set_node_provision_state( + workers = min(len(nodes), module.params['concurrency']) + with futures.ThreadPoolExecutor(max_workers=workers) as executor: + future_to_build = { + executor.submit( + client.set_node_provision_state, node, "clean", clean_steps=clean_steps, - wait=False) - except Exception as e: - nodes_wait.remove(node) + wait=True + ): node for node in nodes + } + + done, not_done = futures.wait( + future_to_build, + timeout=node_timeout, + return_when=futures.ALL_COMPLETED + ) + + nodes_wait = list() + for job in done: + if job._exception: + result.update( + { + future_to_build[job]: { + 'msg': 'Cleaning failed for node {}: {}'.format( + future_to_build[job], + str(job._exception) + ), + 'failed': True, + 'info': {} + } + } + ) + else: + nodes_wait.append(future_to_build[job]) + else: + if not_done: + for job in not_done: + result.update( + { + future_to_build[job]: { + 'msg': 'Cleaning incomplete for node {}'.format( + future_to_build[job], + ), + 'failed': True, + 'info': {} + } + } + ) + + nodes_to_delete = [] + for node in nodes_wait: + node_info = client.get_node( + node, + fields=['provision_state', 'last_error'] + ).to_dict() + state = node_info['provision_state'] + if state == 'manageable': + nodes_to_delete.append(node) result.update({node: { - 'msg': 'Can not start cleaning for node %s: %s' % ( - node, str(e)), - 'failed': True, - 'info': {} + 'msg': 'Successful cleaning for node %s' % node, + 'failed': False, + 'error': '', + 'info': node_info, }}) - start = time.time() - while nodes_wait and time.time() - start < node_timeout: - nodes_to_delete = [] - for node in nodes_wait: - node_info = client.get_node( - node, - fields=['provision_state', 'last_error'] - ).to_dict() - state = node_info['provision_state'] - if state == 'manageable': - nodes_to_delete.append(node) - result.update({node: { - 'msg': 'Successful cleaning for node %s' % node, - 'failed': False, - 'error': '', - 'info': node_info, - }}) - elif state not in [ - 'manageable', 'cleaning', 'clean wait', 'available']: - nodes_to_delete.append(node) - result.update({node: { - 'msg': 'Failed cleaning for node %s: %s' % ( - node, - node_info['last_error'] or 'state %s' % state), - 'failed': True, - 'info': node_info, - }}) - # timeout between get_node() calls - time.sleep(1) - for node in nodes_to_delete: - nodes_wait.remove(node) - # timeout between cycles - time.sleep(5) + elif state not in [ + 'manageable', 'cleaning', 'clean wait', 'available']: + nodes_to_delete.append(node) + result.update({node: { + 'msg': 'Failed cleaning for node %s: %s' % ( + node, + node_info['last_error'] or 'state %s' % state), + 'failed': True, + 'info': node_info, + }}) + + for node in nodes_to_delete: + nodes_wait.remove(node) + if nodes_wait: for node in nodes_wait: node_info = client.get_node( @@ -444,8 +488,7 @@ def parallel_nodes_cleaning(conn, module): 'failed': True, 'info': node_info, }}) - # timeout between get_node() calls - time.sleep(1) + return result diff --git a/tripleo_ansible/playbooks/cli-baremetal-clean.yaml b/tripleo_ansible/playbooks/cli-baremetal-clean.yaml index 3131ee655..9d403a6ce 100644 --- a/tripleo_ansible/playbooks/cli-baremetal-clean.yaml +++ b/tripleo_ansible/playbooks/cli-baremetal-clean.yaml @@ -46,6 +46,8 @@ node_uuids_clean: "{{ node_uuids }}" - name: exit if nothing to do + when: + - (node_uuids_clean | length) < 1 block: - name: Notice debug: @@ -53,15 +55,18 @@ - name: end play meta: end_play - when: - - (node_uuids_clean | length) < 1 + + - name: Notice + debug: + msg: >- + Running cleaning on the following nodes, {{ node_uuids_clean }}. # Clean nodes - name: Start baremetal cleaning os_baremetal_clean_node: cloud: undercloud node_uuid: "{{ node_uuids_clean }}" - # concurrency: "{{ concurrency }}" NotImplemented + concurrency: "{{ concurrency }}" # max_retries: "{{ max_retries }}" NotImplemented timeout: "{{ node_timeout }}" clean_steps: