From 87dbe9f6d4e79127681d183fba45b682b6e31778 Mon Sep 17 00:00:00 2001 From: Brendan Shephard Date: Sat, 9 Oct 2021 05:12:46 +0000 Subject: [PATCH] Fix node scaling for Ephemeral Heat With Ephemeral Heat, we can no longer call the Heat API while removing nodes, nor is it necessary. This change removes all calls to Heat and instead just executes the required playbooks to perform the scale down actions. Change-Id: Iba56d41d132275bd55e77290a6fca87b917de9e9 --- .../v1/overcloud_node/test_overcloud_node.py | 67 +++++++--------- tripleoclient/v1/overcloud_node.py | 43 ++++++----- tripleoclient/workflows/scale.py | 76 ------------------- 3 files changed, 51 insertions(+), 135 deletions(-) diff --git a/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py b/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py index c98b80865..4e67e4509 100644 --- a/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py +++ b/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py @@ -124,8 +124,8 @@ class TestDeleteNode(fakes.TestDeleteNode): parsed_args = self.check_parser(self.cmd, arglist, verifylist) self.cmd.take_action(parsed_args) - @mock.patch('tripleoclient.workflows.scale.remove_node_from_stack', - autospec=True) + @mock.patch('tripleoclient.utils.get_key') + @mock.patch('tripleoclient.utils.get_default_working_dir') @mock.patch('heatclient.common.event_utils.get_events', autospec=True) @mock.patch('tripleoclient.utils.run_ansible_playbook', @@ -135,7 +135,8 @@ class TestDeleteNode(fakes.TestDeleteNode): mock_tempfile, mock_playbook, mock_get_events, - mock_remove_from_stack): + mock_dir, + mock_key): bm_yaml = [{ 'name': 'Compute', @@ -164,6 +165,21 @@ class TestDeleteNode(fakes.TestDeleteNode): tempfile.mkdtemp() ] + mock_dir.return_value = "/home/stack/overcloud-deploy" + ansible_dir = "{}/config-download/overcast".format( + mock_dir.return_value + ) + + inventory = "{}/tripleo-ansible-inventory.yaml".format( + ansible_dir + ) + + ansible_cfg = "{}/ansible.cfg".format( + ansible_dir + ) + + mock_key.return_value = '/home/stack/.ssh/id_rsa_tripleo' + unprovision_confirm = os.path.join(tmp, 'unprovision_confirm.json') with open(unprovision_confirm, 'w') as confirm: confirm.write(json.dumps([ @@ -225,44 +241,19 @@ class TestDeleteNode(fakes.TestDeleteNode): }, ), mock.call( - playbook='cli-grant-local-access.yaml', - inventory='localhost,', - workdir=mock.ANY, - playbook_dir='/usr/share/ansible/tripleo-playbooks', - verbosity=mock.ANY, - reproduce_command=True, - extra_vars={ - 'access_path': os.path.join(os.environ.get('HOME'), - 'config-download'), - 'execution_user': mock.ANY}, - ), - mock.call( - playbook='cli-config-download.yaml', - inventory='localhost,', - workdir=mock.ANY, - playbook_dir='/usr/share/ansible/tripleo-playbooks', - verbosity=mock.ANY, - extra_vars=mock.ANY, - reproduce_command=True, - ), - mock.call( - playbook=mock.ANY, - inventory=mock.ANY, - workdir=mock.ANY, - playbook_dir=mock.ANY, - skip_tags='opendev-validation', - ansible_cfg=None, - verbosity=mock.ANY, + playbook='scale_playbook.yaml', + inventory=inventory, + workdir=ansible_dir, + playbook_dir=ansible_dir, + ansible_cfg=ansible_cfg, ssh_user='tripleo-admin', - key=mock.ANY, limit_hosts='overcast-controller-1:overcast-compute-0', - ansible_timeout=42, reproduce_command=True, - extra_env_variables={'ANSIBLE_BECOME': True}, - extra_vars=None, - tags=None, - timeout=90, - forks=None + extra_env_variables={ + "ANSIBLE_BECOME": True, + "ANSIBLE_PRIVATE_KEY_FILE": + "/home/stack/.ssh/id_rsa_tripleo" + } ), mock.call( inventory='localhost,', diff --git a/tripleoclient/v1/overcloud_node.py b/tripleoclient/v1/overcloud_node.py index d6f43d746..f4ae0b22c 100644 --- a/tripleoclient/v1/overcloud_node.py +++ b/tripleoclient/v1/overcloud_node.py @@ -31,10 +31,8 @@ import yaml from tripleoclient import command from tripleoclient import constants -from tripleoclient.exceptions import InvalidConfiguration from tripleoclient import utils as oooutils from tripleoclient.workflows import baremetal -from tripleoclient.workflows import scale class DeleteNode(command.Command): @@ -133,7 +131,6 @@ class DeleteNode(command.Command): def take_action(self, parsed_args): self.log.debug("take_action(%s)" % parsed_args) - clients = self.app.client_manager if parsed_args.baremetal_deployment: with open(parsed_args.baremetal_deployment, 'r') as fp: @@ -155,27 +152,31 @@ class DeleteNode(command.Command): if not confirm: raise oscexc.CommandError("Action not confirmed, exiting.") - orchestration_client = clients.orchestration + ansible_dir = os.path.join(oooutils.get_default_working_dir( + parsed_args.stack + ), + 'config-download', + parsed_args.stack) - stack = oooutils.get_stack(orchestration_client, parsed_args.stack) + inventory = os.path.join(ansible_dir, + 'tripleo-ansible-inventory.yaml') - if not stack: - raise InvalidConfiguration("stack {} not found".format( - parsed_args.stack)) + ansible_cfg = os.path.join(ansible_dir, 'ansible.cfg') + key_file = oooutils.get_key(parsed_args.stack) - print("Deleting the following nodes from stack {stack}:\n{nodes}" - .format(stack=stack.stack_name, nodes=nodes_text)) - - self._check_skiplist_exists(stack.environment()) - - scale.scale_down( - log=self.log, - clients=clients, - stack=stack, - nodes=nodes, - connection_timeout=parsed_args.overcloud_ssh_port_timeout, - timeout=parsed_args.timeout, - verbosity=oooutils.playbook_verbosity(self=self) + oooutils.run_ansible_playbook( + playbook='scale_playbook.yaml', + inventory=inventory, + workdir=ansible_dir, + playbook_dir=ansible_dir, + ansible_cfg=ansible_cfg, + ssh_user='tripleo-admin', + limit_hosts=':'.join('%s' % node for node in nodes), + reproduce_command=True, + extra_env_variables={ + "ANSIBLE_BECOME": True, + "ANSIBLE_PRIVATE_KEY_FILE": key_file + } ) if parsed_args.baremetal_deployment: diff --git a/tripleoclient/workflows/scale.py b/tripleoclient/workflows/scale.py index 08bc90188..e68c507af 100644 --- a/tripleoclient/workflows/scale.py +++ b/tripleoclient/workflows/scale.py @@ -17,12 +17,8 @@ import collections import shutil import tempfile -from heatclient.common import event_utils -from keystoneauth1.exceptions.catalog import EndpointNotFound - from tripleoclient import constants from tripleoclient import utils -from tripleoclient.workflows import deployment def get_group_resources_after_delete(groupname, res_to_delete, resources): @@ -157,75 +153,3 @@ def remove_node_from_stack(clients, stack, nodes, timeout): heat.stacks.update(stack.id, **stack_args) finally: shutil.rmtree(tht_tmp) - - -def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0, - connection_timeout=None): - """Unprovision and deletes overcloud nodes from a heat stack. - - :param log: Logging object - :type log: Object - - :param clients: Application client object. - :type clients: Object - - :param stack: Heat Stack object - :type stack: Object - - :param nodes: List of nodes to delete. If the node UUID is used the - UUID will be used to lookup the node name before being - passed through to the cleanup playbook. - :type nodes: List - - :param timeout: Timeout to use when deleting nodes. If timeout is None - it will be set to 240 minutes. - :type timeout: Integer - - :param verbosity: Verbosity level - :type verbosity: Integer - - :param connection_timeout: Ansible connection timeout in seconds. - :type connection_timeout: Integer - """ - - if not timeout: - timeout = 240 - - limit_list = list() - for node in nodes: - try: - _node = clients.compute.servers.get(node) - limit_list.append(_node.name) - except (KeyError, ValueError, AttributeError, EndpointNotFound): - limit_list.append(node) - - if limit_list: - limit_list = ':'.join(limit_list) - else: - limit_list = None - - deployment.config_download( - log=log, - clients=clients, - stack=stack, - timeout=connection_timeout, - ansible_playbook_name='scale_playbook.yaml', - limit_hosts=limit_list, - verbosity=verbosity, - deployment_timeout=timeout - ) - - events = event_utils.get_events( - clients.orchestration, stack_id=stack.stack_name, - event_args={'sort_dir': 'desc', 'limit': 1}) - marker = events[0].id if events else None - - print('Running scale down') - - remove_node_from_stack(clients, stack, nodes, timeout) - - utils.wait_for_stack_ready( - orchestration_client=clients.orchestration, - stack_name=stack.stack_name, - action='UPDATE', - marker=marker)