From d116a5afada7c99ca6d076fab61560e68b5e5b16 Mon Sep 17 00:00:00 2001 From: rabi Date: Wed, 1 Jun 2022 10:52:16 +0530 Subject: [PATCH] Ignore unreachable errors for scale playbook We need to continue with scale tasks if some nodes are unreachable. Looks like ansible returns "Return Code: 4" [1] for unreachable nodes, let's ignore that. [1] https://github.com/ansible/ansible/blob/devel/lib/ansible/executor/task_queue_manager.py#L103 Change-Id: Iea2022bf8dd27eb8762158c04cd3e0186da4fe0c --- .../v1/overcloud_node/test_overcloud_node.py | 1 + tripleoclient/utils.py | 40 ++++++++++--------- tripleoclient/v1/overcloud_node.py | 1 + 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py b/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py index 7ef0353d8..8dbab2364 100644 --- a/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py +++ b/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py @@ -246,6 +246,7 @@ class TestDeleteNode(fakes.TestDeleteNode): ssh_user='tripleo-admin', limit_hosts='overcast-controller-1:overcast-compute-0', reproduce_command=True, + ignore_unreachable=True, extra_env_variables={ "ANSIBLE_BECOME": True, "ANSIBLE_PRIVATE_KEY_FILE": diff --git a/tripleoclient/utils.py b/tripleoclient/utils.py index ce8751fe8..4755a0c90 100644 --- a/tripleoclient/utils.py +++ b/tripleoclient/utils.py @@ -254,7 +254,8 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None, callback_whitelist=constants.ANSIBLE_CWL, ansible_cfg=None, ansible_timeout=30, reproduce_command=True, - timeout=None, forks=None): + timeout=None, forks=None, + ignore_unreachable=False): """Simple wrapper for ansible-playbook. :param playbook: Playbook filename. @@ -706,27 +707,30 @@ def run_ansible_playbook(playbook, inventory, workdir, playbook_dir=None, f.write(str(val)) if rc != 0: - err_msg = ( - 'Ansible execution failed. playbook: {},' - ' Run Status: {},' - ' Return Code: {}'.format( - playbook, - status, - rc - ) - ) - if command_path: - err_msg += ( - ', To rerun the failed command manually execute the' - ' following script: {}'.format( - command_path + if rc == 4 and ignore_unreachable: + LOG.info('Ignoring unreachable nodes') + else: + err_msg = ( + 'Ansible execution failed. playbook: {},' + ' Run Status: {},' + ' Return Code: {}'.format( + playbook, + status, + rc ) ) + if command_path: + err_msg += ( + ', To rerun the failed command manually execute the' + ' following script: {}'.format( + command_path + ) + ) - if not quiet: - LOG.error(err_msg) + if not quiet: + LOG.error(err_msg) - raise RuntimeError(err_msg) + raise RuntimeError(err_msg) LOG.info( 'Ansible execution success. playbook: {}'.format( diff --git a/tripleoclient/v1/overcloud_node.py b/tripleoclient/v1/overcloud_node.py index b59bc8c46..60a7430d4 100644 --- a/tripleoclient/v1/overcloud_node.py +++ b/tripleoclient/v1/overcloud_node.py @@ -180,6 +180,7 @@ class DeleteNode(command.Command): ssh_user='tripleo-admin', limit_hosts=':'.join('%s' % node for node in nodes), reproduce_command=True, + ignore_unreachable=True, extra_env_variables={ "ANSIBLE_BECOME": True, "ANSIBLE_PRIVATE_KEY_FILE": key_file