Fix node scaling for Ephemeral Heat

With Ephemeral Heat, we can no longer call
the Heat API while removing nodes, nor is it
necessary. This change removes all calls to
Heat and instead just executes the required
playbooks to perform the scale down actions.

Change-Id: Iba56d41d132275bd55e77290a6fca87b917de9e9
This commit is contained in:
Brendan Shephard 2021-10-09 05:12:46 +00:00
parent 664c642d5e
commit 87dbe9f6d4
3 changed files with 51 additions and 135 deletions

View File

@ -124,8 +124,8 @@ class TestDeleteNode(fakes.TestDeleteNode):
parsed_args = self.check_parser(self.cmd, arglist, verifylist) parsed_args = self.check_parser(self.cmd, arglist, verifylist)
self.cmd.take_action(parsed_args) self.cmd.take_action(parsed_args)
@mock.patch('tripleoclient.workflows.scale.remove_node_from_stack', @mock.patch('tripleoclient.utils.get_key')
autospec=True) @mock.patch('tripleoclient.utils.get_default_working_dir')
@mock.patch('heatclient.common.event_utils.get_events', @mock.patch('heatclient.common.event_utils.get_events',
autospec=True) autospec=True)
@mock.patch('tripleoclient.utils.run_ansible_playbook', @mock.patch('tripleoclient.utils.run_ansible_playbook',
@ -135,7 +135,8 @@ class TestDeleteNode(fakes.TestDeleteNode):
mock_tempfile, mock_tempfile,
mock_playbook, mock_playbook,
mock_get_events, mock_get_events,
mock_remove_from_stack): mock_dir,
mock_key):
bm_yaml = [{ bm_yaml = [{
'name': 'Compute', 'name': 'Compute',
@ -164,6 +165,21 @@ class TestDeleteNode(fakes.TestDeleteNode):
tempfile.mkdtemp() tempfile.mkdtemp()
] ]
mock_dir.return_value = "/home/stack/overcloud-deploy"
ansible_dir = "{}/config-download/overcast".format(
mock_dir.return_value
)
inventory = "{}/tripleo-ansible-inventory.yaml".format(
ansible_dir
)
ansible_cfg = "{}/ansible.cfg".format(
ansible_dir
)
mock_key.return_value = '/home/stack/.ssh/id_rsa_tripleo'
unprovision_confirm = os.path.join(tmp, 'unprovision_confirm.json') unprovision_confirm = os.path.join(tmp, 'unprovision_confirm.json')
with open(unprovision_confirm, 'w') as confirm: with open(unprovision_confirm, 'w') as confirm:
confirm.write(json.dumps([ confirm.write(json.dumps([
@ -225,44 +241,19 @@ class TestDeleteNode(fakes.TestDeleteNode):
}, },
), ),
mock.call( mock.call(
playbook='cli-grant-local-access.yaml', playbook='scale_playbook.yaml',
inventory='localhost,', inventory=inventory,
workdir=mock.ANY, workdir=ansible_dir,
playbook_dir='/usr/share/ansible/tripleo-playbooks', playbook_dir=ansible_dir,
verbosity=mock.ANY, ansible_cfg=ansible_cfg,
reproduce_command=True,
extra_vars={
'access_path': os.path.join(os.environ.get('HOME'),
'config-download'),
'execution_user': mock.ANY},
),
mock.call(
playbook='cli-config-download.yaml',
inventory='localhost,',
workdir=mock.ANY,
playbook_dir='/usr/share/ansible/tripleo-playbooks',
verbosity=mock.ANY,
extra_vars=mock.ANY,
reproduce_command=True,
),
mock.call(
playbook=mock.ANY,
inventory=mock.ANY,
workdir=mock.ANY,
playbook_dir=mock.ANY,
skip_tags='opendev-validation',
ansible_cfg=None,
verbosity=mock.ANY,
ssh_user='tripleo-admin', ssh_user='tripleo-admin',
key=mock.ANY,
limit_hosts='overcast-controller-1:overcast-compute-0', limit_hosts='overcast-controller-1:overcast-compute-0',
ansible_timeout=42,
reproduce_command=True, reproduce_command=True,
extra_env_variables={'ANSIBLE_BECOME': True}, extra_env_variables={
extra_vars=None, "ANSIBLE_BECOME": True,
tags=None, "ANSIBLE_PRIVATE_KEY_FILE":
timeout=90, "/home/stack/.ssh/id_rsa_tripleo"
forks=None }
), ),
mock.call( mock.call(
inventory='localhost,', inventory='localhost,',

View File

@ -31,10 +31,8 @@ import yaml
from tripleoclient import command from tripleoclient import command
from tripleoclient import constants from tripleoclient import constants
from tripleoclient.exceptions import InvalidConfiguration
from tripleoclient import utils as oooutils from tripleoclient import utils as oooutils
from tripleoclient.workflows import baremetal from tripleoclient.workflows import baremetal
from tripleoclient.workflows import scale
class DeleteNode(command.Command): class DeleteNode(command.Command):
@ -133,7 +131,6 @@ class DeleteNode(command.Command):
def take_action(self, parsed_args): def take_action(self, parsed_args):
self.log.debug("take_action(%s)" % parsed_args) self.log.debug("take_action(%s)" % parsed_args)
clients = self.app.client_manager
if parsed_args.baremetal_deployment: if parsed_args.baremetal_deployment:
with open(parsed_args.baremetal_deployment, 'r') as fp: with open(parsed_args.baremetal_deployment, 'r') as fp:
@ -155,27 +152,31 @@ class DeleteNode(command.Command):
if not confirm: if not confirm:
raise oscexc.CommandError("Action not confirmed, exiting.") raise oscexc.CommandError("Action not confirmed, exiting.")
orchestration_client = clients.orchestration ansible_dir = os.path.join(oooutils.get_default_working_dir(
parsed_args.stack
),
'config-download',
parsed_args.stack)
stack = oooutils.get_stack(orchestration_client, parsed_args.stack) inventory = os.path.join(ansible_dir,
'tripleo-ansible-inventory.yaml')
if not stack: ansible_cfg = os.path.join(ansible_dir, 'ansible.cfg')
raise InvalidConfiguration("stack {} not found".format( key_file = oooutils.get_key(parsed_args.stack)
parsed_args.stack))
print("Deleting the following nodes from stack {stack}:\n{nodes}" oooutils.run_ansible_playbook(
.format(stack=stack.stack_name, nodes=nodes_text)) playbook='scale_playbook.yaml',
inventory=inventory,
self._check_skiplist_exists(stack.environment()) workdir=ansible_dir,
playbook_dir=ansible_dir,
scale.scale_down( ansible_cfg=ansible_cfg,
log=self.log, ssh_user='tripleo-admin',
clients=clients, limit_hosts=':'.join('%s' % node for node in nodes),
stack=stack, reproduce_command=True,
nodes=nodes, extra_env_variables={
connection_timeout=parsed_args.overcloud_ssh_port_timeout, "ANSIBLE_BECOME": True,
timeout=parsed_args.timeout, "ANSIBLE_PRIVATE_KEY_FILE": key_file
verbosity=oooutils.playbook_verbosity(self=self) }
) )
if parsed_args.baremetal_deployment: if parsed_args.baremetal_deployment:

View File

@ -17,12 +17,8 @@ import collections
import shutil import shutil
import tempfile import tempfile
from heatclient.common import event_utils
from keystoneauth1.exceptions.catalog import EndpointNotFound
from tripleoclient import constants from tripleoclient import constants
from tripleoclient import utils from tripleoclient import utils
from tripleoclient.workflows import deployment
def get_group_resources_after_delete(groupname, res_to_delete, resources): def get_group_resources_after_delete(groupname, res_to_delete, resources):
@ -157,75 +153,3 @@ def remove_node_from_stack(clients, stack, nodes, timeout):
heat.stacks.update(stack.id, **stack_args) heat.stacks.update(stack.id, **stack_args)
finally: finally:
shutil.rmtree(tht_tmp) shutil.rmtree(tht_tmp)
def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0,
connection_timeout=None):
"""Unprovision and deletes overcloud nodes from a heat stack.
:param log: Logging object
:type log: Object
:param clients: Application client object.
:type clients: Object
:param stack: Heat Stack object
:type stack: Object
:param nodes: List of nodes to delete. If the node UUID is used the
UUID will be used to lookup the node name before being
passed through to the cleanup playbook.
:type nodes: List
:param timeout: Timeout to use when deleting nodes. If timeout is None
it will be set to 240 minutes.
:type timeout: Integer
:param verbosity: Verbosity level
:type verbosity: Integer
:param connection_timeout: Ansible connection timeout in seconds.
:type connection_timeout: Integer
"""
if not timeout:
timeout = 240
limit_list = list()
for node in nodes:
try:
_node = clients.compute.servers.get(node)
limit_list.append(_node.name)
except (KeyError, ValueError, AttributeError, EndpointNotFound):
limit_list.append(node)
if limit_list:
limit_list = ':'.join(limit_list)
else:
limit_list = None
deployment.config_download(
log=log,
clients=clients,
stack=stack,
timeout=connection_timeout,
ansible_playbook_name='scale_playbook.yaml',
limit_hosts=limit_list,
verbosity=verbosity,
deployment_timeout=timeout
)
events = event_utils.get_events(
clients.orchestration, stack_id=stack.stack_name,
event_args={'sort_dir': 'desc', 'limit': 1})
marker = events[0].id if events else None
print('Running scale down')
remove_node_from_stack(clients, stack, nodes, timeout)
utils.wait_for_stack_ready(
orchestration_client=clients.orchestration,
stack_name=stack.stack_name,
action='UPDATE',
marker=marker)