From 4b0b290ec17325b1ea6dcf2914bb8d9edd0c1f49 Mon Sep 17 00:00:00 2001 From: ramishra Date: Tue, 16 Feb 2021 08:27:27 +0530 Subject: [PATCH] Fix node delete to not use overcloud plan This also moves some helper functions from tripleo-common mistral action to utils.py. A subsequent patch would cleanup the mistral actions in tripleo-common. Closes-Bug: #1915780 Change-Id: I8cf4c983c8810a42bd703b097dc1cb8034798314 --- .../overcloud_deploy/test_overcloud_deploy.py | 28 ++- .../v1/overcloud_node/test_overcloud_node.py | 24 +-- tripleoclient/utils.py | 46 +++++ tripleoclient/v1/overcloud_deploy.py | 50 +----- tripleoclient/workflows/scale.py | 160 ++++++++++++++++-- 5 files changed, 223 insertions(+), 85 deletions(-) diff --git a/tripleoclient/tests/v1/overcloud_deploy/test_overcloud_deploy.py b/tripleoclient/tests/v1/overcloud_deploy/test_overcloud_deploy.py index 07ed19130..1bdecf423 100644 --- a/tripleoclient/tests/v1/overcloud_deploy/test_overcloud_deploy.py +++ b/tripleoclient/tests/v1/overcloud_deploy/test_overcloud_deploy.py @@ -144,8 +144,7 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud): @mock.patch("heatclient.common.event_utils.get_events") @mock.patch('tripleo_common.update.add_breakpoints_cleanup_into_env', autospec=True) - @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' - '_create_parameters_env', autospec=True) + @mock.patch('tripleoclient.utils.create_parameters_env', autospec=True) @mock.patch('tripleoclient.utils.create_tempest_deployer_input', autospec=True) @mock.patch('heatclient.common.template_utils.get_template_contents', @@ -214,8 +213,8 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud): 'CtlplaneNetworkAttributes': {}, } - def _custom_create_params_env(_self, parameters, tht_root, - container_name): + def _custom_create_params_env(parameters, tht_root, + stack): for key, value in six.iteritems(parameters): self.assertEqual(value, expected_parameters[key]) parameter_defaults = {"parameter_defaults": parameters} @@ -368,8 +367,7 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud): autospec=True) @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' '_validate_args') - @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' - '_create_parameters_env', autospec=True) + @mock.patch('tripleoclient.utils.create_parameters_env', autospec=True) @mock.patch('heatclient.common.template_utils.get_template_contents', autospec=True) @mock.patch('shutil.rmtree', autospec=True) @@ -846,8 +844,7 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud): return_value='192.168.0.1 uc.ctlplane.localhost uc.ctlplane') @mock.patch('tripleoclient.utils.check_stack_network_matches_env_files') @mock.patch('tripleoclient.utils.check_ceph_fsid_matches_env_files') - @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' - '_create_parameters_env', autospec=True) + @mock.patch('tripleoclient.utils.create_parameters_env', autospec=True) @mock.patch('heatclient.common.template_utils.' 'process_environment_and_files', autospec=True) @mock.patch('heatclient.common.template_utils.get_template_contents', @@ -875,8 +872,8 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud): ('templates', '/usr/share/openstack-tripleo-heat-templates/'), ] - def _custom_create_params_env(_self, parameters, tht_root, - container_name): + def _custom_create_params_env(parameters, tht_root, + stack): parameters.update({"ControllerCount": 3}) parameter_defaults = {"parameter_defaults": parameters} return parameter_defaults @@ -919,8 +916,7 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud): @mock.patch('tripleo_common.update.add_breakpoints_cleanup_into_env') @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' '_validate_args') - @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' - '_create_parameters_env', autospec=True) + @mock.patch('tripleoclient.utils.create_parameters_env', autospec=True) @mock.patch('tripleoclient.utils.create_tempest_deployer_input', autospec=True) @mock.patch('heatclient.common.template_utils.' @@ -1006,8 +1002,8 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud): 'CtlplaneNetworkAttributes': {}, } - def _custom_create_params_env(_self, parameters, tht_root, - container_name): + def _custom_create_params_env(parameters, tht_root, + stack): for key, value in six.iteritems(parameters): self.assertEqual(value, expected_parameters[key]) parameter_defaults = {"parameter_defaults": parameters} @@ -1345,8 +1341,7 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud): verbosity=3, workdir=mock.ANY, forks=None)], utils_fixture2.mock_run_ansible_playbook.mock_calls) - @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' - '_write_user_environment', autospec=True) + @mock.patch('tripleoclient.utils.write_user_environment', autospec=True) def test_provision_baremetal(self, mock_write): mock_write.return_value = ( '/tmp/tht/user-environments/baremetal-deployed.yaml', @@ -1427,7 +1422,6 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud): ) ]) mock_write.assert_called_once_with( - self.cmd, {'parameter_defaults': {'foo': 'bar'}}, 'baremetal-deployed.yaml', tht_root, diff --git a/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py b/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py index e9a3f8b67..e4afe2e18 100644 --- a/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py +++ b/tripleoclient/tests/v1/overcloud_node/test_overcloud_node.py @@ -49,14 +49,6 @@ class TestDeleteNode(fakes.TestDeleteNode): ) stack.output_show.return_value = {'output': {'output_value': []}} - delete_node = mock.patch( - 'tripleo_common.actions.scale.ScaleDownAction.run', - autospec=True - ) - delete_node.start() - delete_node.return_value = None - self.addCleanup(delete_node.stop) - wait_stack = mock.patch( 'tripleoclient.utils.wait_for_stack_ready', autospec=True @@ -66,11 +58,15 @@ class TestDeleteNode(fakes.TestDeleteNode): self.addCleanup(wait_stack.stop) self.app.client_manager.compute.servers.get.return_value = None + @mock.patch('tripleoclient.workflows.scale.remove_node_from_stack', + autospec=True) @mock.patch('heatclient.common.event_utils.get_events', autospec=True) @mock.patch('tripleoclient.utils.run_ansible_playbook', autospec=True) - def test_node_delete(self, mock_playbook, mock_get_events): + def test_node_delete(self, mock_playbook, + mock_get_events, + mock_remove_stack): argslist = ['instance1', 'instance2', '--stack', 'overcast', '--timeout', '90', '--yes'] verifylist = [ @@ -112,12 +108,15 @@ class TestDeleteNode(fakes.TestDeleteNode): self.cmd.take_action, parsed_args) + @mock.patch('tripleoclient.workflows.scale.remove_node_from_stack', + autospec=True) @mock.patch('heatclient.common.event_utils.get_events', autospec=True) @mock.patch('tripleoclient.utils.run_ansible_playbook', autospec=True) def test_node_delete_without_stack(self, mock_playbook, - mock_get_events): + mock_get_events, + mock_remove_stack): arglist = ['instance1', '--yes'] verifylist = [ @@ -127,6 +126,8 @@ class TestDeleteNode(fakes.TestDeleteNode): parsed_args = self.check_parser(self.cmd, arglist, verifylist) self.cmd.take_action(parsed_args) + @mock.patch('tripleoclient.workflows.scale.remove_node_from_stack', + autospec=True) @mock.patch('heatclient.common.event_utils.get_events', autospec=True) @mock.patch('tripleoclient.utils.run_ansible_playbook', @@ -135,7 +136,8 @@ class TestDeleteNode(fakes.TestDeleteNode): def test_node_delete_baremetal_deployment(self, mock_tempfile, mock_playbook, - mock_get_events): + mock_get_events, + mock_remove_from_stack): bm_yaml = [{ 'name': 'Compute', diff --git a/tripleoclient/utils.py b/tripleoclient/utils.py index b63042e51..467c4255b 100644 --- a/tripleoclient/utils.py +++ b/tripleoclient/utils.py @@ -61,6 +61,7 @@ from six.moves.urllib import error as url_error from six.moves.urllib import request from tripleo_common.utils import stack as stack_utils +from tripleo_common import update from tripleoclient import constants from tripleoclient import exceptions @@ -2483,3 +2484,48 @@ def update_deployment_status(stack_name, status): default_flow_style=False) safe_write(get_status_yaml(stack_name), contents) + + +def create_breakpoint_cleanup_env(tht_root, stack): + bp_env = {} + update.add_breakpoints_cleanup_into_env(bp_env) + env_path = write_user_environment( + bp_env, + 'tripleoclient-breakpoint-cleanup.yaml', + tht_root, + stack) + return [env_path] + + +def create_parameters_env(parameters, tht_root, stack, + env_file='tripleoclient-parameters.yaml'): + parameter_defaults = {"parameter_defaults": parameters} + env_path = write_user_environment( + parameter_defaults, + env_file, + tht_root, + stack) + return [env_path] + + +def build_user_env_path(abs_env_path, tht_root): + env_dirname = os.path.dirname(abs_env_path) + user_env_dir = os.path.join( + tht_root, 'user-environments', env_dirname[1:]) + user_env_path = os.path.join( + user_env_dir, os.path.basename(abs_env_path)) + makedirs(user_env_dir) + return user_env_path + + +def write_user_environment(env_map, abs_env_path, tht_root, + stack): + # We write the env_map to the local /tmp tht_root and also + # to the swift plan container. + contents = yaml.safe_dump(env_map, default_flow_style=False) + user_env_path = build_user_env_path(abs_env_path, tht_root) + LOG.debug("user_env_path=%s" % user_env_path) + with open(user_env_path, 'w') as f: + LOG.debug("Writing user environment %s" % user_env_path) + f.write(contents) + return user_env_path diff --git a/tripleoclient/v1/overcloud_deploy.py b/tripleoclient/v1/overcloud_deploy.py index e248402f0..16ef84bfb 100644 --- a/tripleoclient/v1/overcloud_deploy.py +++ b/tripleoclient/v1/overcloud_deploy.py @@ -204,52 +204,12 @@ class DeployOvercloud(command.Command): % ctlplane_hostname) return self._cleanup_host_entry(out) - def _create_breakpoint_cleanup_env(self, tht_root, container_name): - bp_env = {} - update.add_breakpoints_cleanup_into_env(bp_env) - env_path = self._write_user_environment( - bp_env, - 'tripleoclient-breakpoint-cleanup.yaml', - tht_root, - container_name) - return [env_path] - - def _create_parameters_env(self, parameters, tht_root, container_name): - parameter_defaults = {"parameter_defaults": parameters} - env_path = self._write_user_environment( - parameter_defaults, - 'tripleoclient-parameters.yaml', - tht_root, - container_name) - return [env_path] - def _check_limit_skiplist_warning(self, env): if env.get('parameter_defaults').get('DeploymentServerBlacklist'): msg = _('[WARNING] DeploymentServerBlacklist is defined and will ' 'be ignored because --limit has been specified.') self.log.warning(msg) - def _user_env_path(self, abs_env_path, tht_root): - env_dirname = os.path.dirname(abs_env_path) - user_env_dir = os.path.join( - tht_root, 'user-environments', env_dirname[1:]) - user_env_path = os.path.join( - user_env_dir, os.path.basename(abs_env_path)) - utils.makedirs(user_env_dir) - return user_env_path - - def _write_user_environment(self, env_map, abs_env_path, tht_root, - container_name): - # We write the env_map to the local /tmp tht_root and also - # to the swift plan container. - contents = yaml.safe_dump(env_map, default_flow_style=False) - user_env_path = self._user_env_path(abs_env_path, tht_root) - self.log.debug("user_env_path=%s" % user_env_path) - with open(user_env_path, 'w') as f: - self.log.debug("Writing user environment %s" % user_env_path) - f.write(contents) - return user_env_path - def _heat_deploy(self, stack, stack_name, template_path, parameters, env_files, timeout, tht_root, env, run_validations, @@ -330,7 +290,7 @@ class DeployOvercloud(command.Command): parameters = {} parameters.update(self._update_parameters( parsed_args, stack, tht_root, user_tht_root)) - param_env = self._create_parameters_env( + param_env = utils.create_parameters_env( parameters, tht_root, parsed_args.stack) created_env_files.extend(param_env) @@ -347,7 +307,7 @@ class DeployOvercloud(command.Command): parsed_args.deployment_python_interpreter if stack: - env_path = self._create_breakpoint_cleanup_env( + env_path = utils.create_breakpoint_cleanup_env( tht_root, parsed_args.stack) created_env_files.extend(env_path) @@ -360,7 +320,7 @@ class DeployOvercloud(command.Command): # Invokes the workflows specified in plan environment file if parsed_args.plan_environment_file: - output_path = self._user_env_path( + output_path = utils.build_user_env_path( 'derived_parameters.yaml', tht_root) workflow_params.build_derived_params_environment( self.clients, parsed_args.stack, tht_root, env_files, @@ -566,7 +526,7 @@ class DeployOvercloud(command.Command): with open('{}.pub'.format(key), 'rt') as fp: ssh_key = fp.read() - output_path = self._user_env_path( + output_path = utils.build_user_env_path( 'baremetal-deployed.yaml', tht_root ) @@ -591,7 +551,7 @@ class DeployOvercloud(command.Command): with open(output_path, 'r') as fp: parameter_defaults = yaml.safe_load(fp) - self._write_user_environment( + utils.write_user_environment( parameter_defaults, 'baremetal-deployed.yaml', tht_root, diff --git a/tripleoclient/workflows/scale.py b/tripleoclient/workflows/scale.py index a920a1862..76a3d3d96 100644 --- a/tripleoclient/workflows/scale.py +++ b/tripleoclient/workflows/scale.py @@ -13,13 +13,151 @@ # License for the specific language governing permissions and limitations # under the License. -from heatclient.common import event_utils -from tripleo_common.actions import scale +import collections +import shutil +import tempfile +from heatclient.common import event_utils + +from tripleoclient import constants from tripleoclient import utils from tripleoclient.workflows import deployment +def get_group_resources_after_delete(groupname, res_to_delete, resources): + group = next(res for res in resources if + res.resource_name == groupname and + res.resource_type == 'OS::Heat::ResourceGroup') + members = [] + for res in resources: + stack_name, stack_id = next( + x['href'] for x in res.links if + x['rel'] == 'stack').rsplit('/', 2)[1:] + # desired new count of nodes after delete operation should be + # count of all existing nodes in ResourceGroup which are not + # in set of nodes being deleted. Also nodes in any delete state + # from a previous failed update operation are not included in + # overall count (if such nodes exist) + if (stack_id == group.physical_resource_id and + res not in res_to_delete and + not res.resource_status.startswith('DELETE')): + + members.append(res) + + return members + + +def _get_removal_params_from_heat(resources_by_role, resources): + stack_params = {} + for role, role_resources in resources_by_role.items(): + param_name = "{0}Count".format(role) + + # get real count of nodes for each role. *Count stack parameters + # can not be used because stack parameters return parameters + # passed by user no matter if previous update operation succeeded + # or not + group_members = get_group_resources_after_delete( + role, role_resources, resources) + stack_params[param_name] = str(len(group_members)) + + # add instance resource names into removal_policies + # so heat knows which instances should be removed + removal_param = "{0}RemovalPolicies".format(role) + stack_params[removal_param] = [{ + 'resource_list': [r.resource_name for r in role_resources] + }] + + # force reset the removal_policies_mode to 'append' + # as 'update' can lead to deletion of unintended nodes. + removal_mode = "{0}RemovalPoliciesMode".format(role) + stack_params[removal_mode] = 'append' + + return stack_params + + +def _match_hostname(heatclient, instance_list, res, stack_name): + type_patterns = ['DeployedServer', 'Server'] + if any(res.resource_type.endswith(x) for x in type_patterns): + res_details = heatclient.resources.get( + stack_name, res.resource_name) + if 'name' in res_details.attributes: + try: + instance_list.remove(res_details.attributes['name']) + return True + except ValueError: + return False + return False + + +def remove_node_from_stack(clients, stack, nodes, timeout): + heat = clients.orchestration + resources = heat.resources.list(stack.stack_name, + nested_depth=5) + resources_by_role = collections.defaultdict(list) + instance_list = list(nodes) + + for res in resources: + stack_name, stack_id = next( + x['href'] for x in res.links if + x['rel'] == 'stack').rsplit('/', 2)[1:] + + try: + instance_list.remove(res.physical_resource_id) + except ValueError: + if not _match_hostname(heat, instance_list, + res, stack_name): + continue + + # get resource to remove from resource group (it's parent resource + # of nova server) + role_resource = next(x for x in resources if + x.physical_resource_id == stack_id) + # get the role name which is parent resource name in Heat + role = role_resource.parent_resource + resources_by_role[role].append(role_resource) + + resources_by_role = dict(resources_by_role) + + if instance_list: + raise ValueError( + "Couldn't find following instances in stack %s: %s" % + (stack, ','.join(instance_list))) + + # decrease count for each role (or resource group) and set removal + # policy for each resource group + stack_params = _get_removal_params_from_heat( + resources_by_role, resources) + try: + tht_tmp = tempfile.mkdtemp(prefix='tripleoclient-') + tht_root = "%s/tripleo-heat-templates" % tht_tmp + + created_env_files = [] + env_path = utils.create_breakpoint_cleanup_env( + tht_root, stack.stack_name) + created_env_files.extend(env_path) + param_env_path = utils.create_parameters_env( + stack_params, tht_root, stack.stack_name, + 'scale-down-parameters.yaml') + created_env_files.extend(param_env_path) + env_files_tracker = [] + env_files, _ = utils.process_multiple_environments( + created_env_files, tht_root, + constants.TRIPLEO_HEAT_TEMPLATES, + env_files_tracker=env_files_tracker) + + stack_args = { + 'stack_name': stack.stack_name, + 'environment_files': env_files_tracker, + 'files': env_files, + 'timeout_mins': timeout, + 'existing': True, + 'clear_parameters': list(stack_params.keys())} + + heat.stacks.update(stack.id, **stack_args) + finally: + shutil.rmtree(tht_tmp) + + def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0, connection_timeout=None): """Unprovision and deletes overcloud nodes from a heat stack. @@ -75,20 +213,18 @@ def scale_down(log, clients, stack, nodes, timeout=None, verbosity=0, verbosity=verbosity, deployment_timeout=timeout ) - events = event_utils.get_events(clients.orchestration, - stack_id=stack.stack_name, - event_args={'sort_dir': 'desc', - 'limit': 1}) + + events = event_utils.get_events( + clients.orchestration, stack_id=stack.stack_name, + event_args={'sort_dir': 'desc', 'limit': 1}) marker = events[0].id if events else None print('Running scale down') - context = clients.tripleoclient.create_mistral_context() - scale_down_action = scale.ScaleDownAction(nodes=nodes, timeout=timeout, - container=stack.stack_name) - scale_down_action.run(context=context) + + remove_node_from_stack(clients, stack, nodes, timeout) + utils.wait_for_stack_ready( orchestration_client=clients.orchestration, stack_name=stack.stack_name, action='UPDATE', - marker=marker - ) + marker=marker)