Fix node scaling for Ephemeral Heat

With Ephemeral Heat, we can no longer call
the Heat API while removing nodes, nor is it
necessary. This change removes all calls to
Heat and instead just executes the required
playbooks to perform the scale down actions.

Change-Id: Iba56d41d132275bd55e77290a6fca87b917de9e9
This commit is contained in:
Brendan Shephard 2021-10-09 05:12:46 +00:00
parent 664c642d5e
commit 87dbe9f6d4
3 changed files with 51 additions and 135 deletions

View File

@ -124,8 +124,8 @@ class TestDeleteNode(fakes.TestDeleteNode):
parsed_args = self.check_parser(self.cmd, arglist, verifylist)
self.cmd.take_action(parsed_args)
@mock.patch('tripleoclient.workflows.scale.remove_node_from_stack',
autospec=True)
@mock.patch('tripleoclient.utils.get_key')
@mock.patch('tripleoclient.utils.get_default_working_dir')
@mock.patch('heatclient.common.event_utils.get_events',
autospec=True)
@mock.patch('tripleoclient.utils.run_ansible_playbook',
@ -135,7 +135,8 @@ class TestDeleteNode(fakes.TestDeleteNode):
mock_tempfile,
mock_playbook,
mock_get_events,
mock_remove_from_stack):
mock_dir,
mock_key):
bm_yaml = [{
'name': 'Compute',
@ -164,6 +165,21 @@ class TestDeleteNode(fakes.TestDeleteNode):
tempfile.mkdtemp()
]
mock_dir.return_value = "/home/stack/overcloud-deploy"
ansible_dir = "{}/config-download/overcast".format(
mock_dir.return_value
)
inventory = "{}/tripleo-ansible-inventory.yaml".format(
ansible_dir
)
ansible_cfg = "{}/ansible.cfg".format(
ansible_dir
)
mock_key.return_value = '/home/stack/.ssh/id_rsa_tripleo'
unprovision_confirm = os.path.join(tmp, 'unprovision_confirm.json')
with open(unprovision_confirm, 'w') as confirm:
confirm.write(json.dumps([
@ -225,44 +241,19 @@ class TestDeleteNode(fakes.TestDeleteNode):
},
),
mock.call(
playbook='cli-grant-local-access.yaml',
inventory='localhost,',
workdir=mock.ANY,
playbook_dir='/usr/share/ansible/tripleo-playbooks',
verbosity=mock.ANY,
reproduce_command=True,
extra_vars={
'access_path': os.path.join(os.environ.get('HOME'),
'config-download'),
'execution_user': mock.ANY},
),
mock.call(
playbook='cli-config-download.yaml',
inventory='localhost,',
workdir=mock.ANY,
playbook_dir='/usr/share/ansible/tripleo-playbooks',
verbosity=mock.ANY,
extra_vars=mock.ANY,
reproduce_command=True,
),
mock.call(
playbook=mock.ANY,
inventory=mock.ANY,
workdir=mock.ANY,
playbook_dir=mock.ANY,
skip_tags='opendev-validation',
ansible_cfg=None,
verbosity=mock.ANY,
playbook='scale_playbook.yaml',
inventory=inventory,
workdir=ansible_dir,
playbook_dir=ansible_dir,
ansible_cfg=ansible_cfg,
ssh_user='tripleo-admin',
key=mock.ANY,
limit_hosts='overcast-controller-1:overcast-compute-0',
ansible_timeout=42,
reproduce_command=True,
extra_env_variables={'ANSIBLE_BECOME': True},
extra_vars=None,
tags=None,
timeout=90,
forks=None
extra_env_variables={
"ANSIBLE_BECOME": True,
"ANSIBLE_PRIVATE_KEY_FILE":
"/home/stack/.ssh/id_rsa_tripleo"
}
),
mock.call(
inventory='localhost,',

View File

@ -31,10 +31,8 @@ import yaml
from tripleoclient import command
from tripleoclient import constants
from tripleoclient.exceptions import InvalidConfiguration
from tripleoclient import utils as oooutils
from tripleoclient.workflows import baremetal
from tripleoclient.workflows import scale
class DeleteNode(command.Command):
@ -133,7 +131,6 @@ class DeleteNode(command.Command):
def take_action(self, parsed_args):
self.log.debug("take_action(%s)" % parsed_args)
clients = self.app.client_manager
if parsed_args.baremetal_deployment:
with open(parsed_args.baremetal_deployment, 'r') as fp:
@ -155,27 +152,31 @@ class DeleteNode(command.Command):
if not confirm:
raise oscexc.CommandError("Action not confirmed, exiting.")
orchestration_client = clients.orchestration
ansible_dir = os.path.join(oooutils.get_default_working_dir(
parsed_args.stack
),
'config-download',
parsed_args.stack)
stack = oooutils.get_stack(orchestration_client, parsed_args.stack)
inventory = os.path.join(ansible_dir,
'tripleo-ansible-inventory.yaml')
if not stack:
raise InvalidConfiguration("stack {} not found".format(
parsed_args.stack))
ansible_cfg = os.path.join(ansible_dir, 'ansible.cfg')
key_file = oooutils.get_key(parsed_args.stack)
print("Deleting the following nodes from stack {stack}:\n{nodes}"
.format(stack=stack.stack_name, nodes=nodes_text))
self._check_skiplist_exists(stack.environment())
scale.scale_down(
log=self.log,
clients=clients,
stack=stack,
nodes=nodes,
connection_timeout=parsed_args.overcloud_ssh_port_timeout,
timeout=parsed_args.timeout,
verbosity=oooutils.playbook_verbosity(self=self)
oooutils.run_ansible_playbook(
playbook='scale_playbook.yaml',
inventory=inventory,
workdir=ansible_dir,
playbook_dir=ansible_dir,
ansible_cfg=ansible_cfg,
ssh_user='tripleo-admin',
limit_hosts=':'.join('%s' % node for node in nodes),
reproduce_command=True,
extra_env_variables={
"ANSIBLE_BECOME": True,
"ANSIBLE_PRIVATE_KEY_FILE": key_file
}
)
if parsed_args.baremetal_deployment:

View File

@ -17,12 +17,8 @@ import collections
import shutil
import tempfile
from heatclient.common import event_utils
from keystoneauth1.exceptions.catalog import EndpointNotFound
from tripleoclient import constants
from tripleoclient import utils
from tripleoclient.workflows import deployment
def get_group_resources_after_delete(groupname, res_to_delete, resources):
@ -157,75 +153,3 @@ def remove_node_from_stack(clients, stack, nodes, timeout):
heat.stacks.update(stack.id, **stack_args)
finally:
shutil.rmtree(tht_tmp)
def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0,
connection_timeout=None):
"""Unprovision and deletes overcloud nodes from a heat stack.
:param log: Logging object
:type log: Object
:param clients: Application client object.
:type clients: Object
:param stack: Heat Stack object
:type stack: Object
:param nodes: List of nodes to delete. If the node UUID is used the
UUID will be used to lookup the node name before being
passed through to the cleanup playbook.
:type nodes: List
:param timeout: Timeout to use when deleting nodes. If timeout is None
it will be set to 240 minutes.
:type timeout: Integer
:param verbosity: Verbosity level
:type verbosity: Integer
:param connection_timeout: Ansible connection timeout in seconds.
:type connection_timeout: Integer
"""
if not timeout:
timeout = 240
limit_list = list()
for node in nodes:
try:
_node = clients.compute.servers.get(node)
limit_list.append(_node.name)
except (KeyError, ValueError, AttributeError, EndpointNotFound):
limit_list.append(node)
if limit_list:
limit_list = ':'.join(limit_list)
else:
limit_list = None
deployment.config_download(
log=log,
clients=clients,
stack=stack,
timeout=connection_timeout,
ansible_playbook_name='scale_playbook.yaml',
limit_hosts=limit_list,
verbosity=verbosity,
deployment_timeout=timeout
)
events = event_utils.get_events(
clients.orchestration, stack_id=stack.stack_name,
event_args={'sort_dir': 'desc', 'limit': 1})
marker = events[0].id if events else None
print('Running scale down')
remove_node_from_stack(clients, stack, nodes, timeout)
utils.wait_for_stack_ready(
orchestration_client=clients.orchestration,
stack_name=stack.stack_name,
action='UPDATE',
marker=marker)