From 6f8134b260058fd7793abc62c9301ef0f26e355f Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Tue, 21 Jul 2020 10:10:20 -0400 Subject: [PATCH] [train] manual backport & squash for overcloud_deploy raise changes This manual backport and squash allows to backport a feature without a regression that was introduced. It has 2 commits: 1) overcloud_deploy: raise at the end if Ansible Runner had exception In the context of MaxFailPercentage, if a node (e.g. one of 100 computes) has any fatal error during an Ansible play and we tolerate some percentage of failure; we want to raise the error but at the very end of the deployment. So this patch puts the following actions in a "finally" block so they always execute: * Create overcloudrc * Sets the right permissions to the clouds.yaml * Execute _deploy_postconfig * Print infos like Keystone entpoint, Horizon URL, RC file location and deploy message (saying if there is an error or not) And at the very end we raise the actual AnsibleRunner trace if the deployment failed. So even if a node failed and we tolerate it, we'll still finish the deployment until the end, but for UX purpose we want to raise it at the very end. Note that when it fails and it's tolerated, Ansible prints the node as "ignored": PLAY RECAP ****************************************************************** compute-0 : ok=555 (...) failed=0 skipped=484 rescued=0 ignored=0 compute-1 : ok=60 (...) failed=1 skipped=40 rescued=0 ignored=1 controller-0 : ok=960 (...) failed=0 skipped=709 rescued=0 ignored=0 controller-1 : ok=920 (...) failed=0 skipped=693 rescued=0 ignored=0 controller-2 : ok=919 (...) failed=0 skipped=693 rescued=0 ignored=0 undercloud : ok=86 (...) failed=0 skipped=57 rescued=0 ignored=0 To improve UX, we'll investigate an Ansible callback to properly tell what nodes needs to be re-deployed. Note: also mock copy_clouds_yaml since it wasn't tested before but it's failing to reach the files on the filesystem. 2) Don't run config-download for --stack-only We now seem to run config-download irrespective of the --stack-only flag. This moves the config-download related code within the if block. Change-Id: I9c52a790b55175310b8cecaeb6d7a23950cff271 Closes-Bug: #1887811 --- .../overcloud_deploy/test_overcloud_deploy.py | 4 +- tripleoclient/v1/overcloud_deploy.py | 74 ++++++++++--------- 2 files changed, 44 insertions(+), 34 deletions(-) diff --git a/tripleoclient/tests/v1/overcloud_deploy/test_overcloud_deploy.py b/tripleoclient/tests/v1/overcloud_deploy/test_overcloud_deploy.py index 2b2868dbe..082d5ada8 100644 --- a/tripleoclient/tests/v1/overcloud_deploy/test_overcloud_deploy.py +++ b/tripleoclient/tests/v1/overcloud_deploy/test_overcloud_deploy.py @@ -1389,6 +1389,7 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud): fixture.mock_set_deployment_status.call_args[0][1]) mock_copy.assert_called_once() + @mock.patch('tripleoclient.utils.copy_clouds_yaml') @mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.' '_get_undercloud_host_entry', autospec=True, return_value='192.168.0.1 uc.ctlplane.localhost uc.ctlplane') @@ -1405,7 +1406,8 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud): mock_overcloudrc, mock_write_overcloudrc, mock_overcloud_endpoint, mock_create_tempest_deployer_input, - mock_get_undercloud_host_entry): + mock_get_undercloud_host_entry, + mock_copy): fixture = deployment.DeploymentWorkflowFixture() self.useFixture(fixture) clients = self.app.client_manager diff --git a/tripleoclient/v1/overcloud_deploy.py b/tripleoclient/v1/overcloud_deploy.py index fe64329f6..f836dc579 100644 --- a/tripleoclient/v1/overcloud_deploy.py +++ b/tripleoclient/v1/overcloud_deploy.py @@ -931,6 +931,9 @@ class DeployOvercloud(command.Command): def take_action(self, parsed_args): self.log.debug("take_action(%s)" % parsed_args) + deploy_status = 'success' + deploy_message = 'without error' + self._setup_clients(parsed_args) # Swiftclient logs things like 404s at error level, which is a problem @@ -982,13 +985,13 @@ class DeployOvercloud(command.Command): # wont do anything. return - if parsed_args.config_download: - print("Deploying overcloud configuration") - deployment.set_deployment_status( - self.clients, 'deploying', - plan=stack.stack_name) + try: + if parsed_args.config_download: + print("Deploying overcloud configuration") + deployment.set_deployment_status( + self.clients, 'deploying', + plan=stack.stack_name) - try: if not parsed_args.config_download_only: deployment.get_hosts_and_enable_ssh_admin( self.log, self.clients, stack, @@ -1028,40 +1031,45 @@ class DeployOvercloud(command.Command): limit_nodes=parsed_args.limit ) ) - except Exception: - deployment.set_deployment_status( - self.clients, 'failed', - plan=stack.stack_name) - raise + except Exception as deploy_e: + deploy_status = 'failed' + deploy_message = 'with error' + deploy_trace = deploy_e + deployment.set_deployment_status( + self.clients, deploy_status, + plan=stack.stack_name) + finally: + # Force fetching of attributes + stack.get() - # Force fetching of attributes - stack.get() + overcloudrcs = deployment.create_overcloudrc( + self.clients, container=stack.stack_name, + no_proxy=parsed_args.no_proxy) - overcloudrcs = deployment.create_overcloudrc( - self.clients, container=stack.stack_name, - no_proxy=parsed_args.no_proxy) + # Copy clouds.yaml to the cloud user directory + user = getpwuid(os.stat(constants.CLOUD_HOME_DIR).st_uid).pw_name + utils.copy_clouds_yaml(user) + rcpath = utils.write_overcloudrc(stack.stack_name, overcloudrcs) + utils.create_tempest_deployer_input() - # Copy clouds.yaml to the cloud user directory - user = getpwuid(os.stat(constants.CLOUD_HOME_DIR).st_uid).pw_name - utils.copy_clouds_yaml(user) - rcpath = utils.write_overcloudrc(stack.stack_name, overcloudrcs) - utils.create_tempest_deployer_input() + # Run postconfig on create or force. Use force to makes sure + # endpoints are created with deploy reruns and upgrades + if (stack_create or parsed_args.force_postconfig + and not parsed_args.skip_postconfig): + self._deploy_postconfig(stack, parsed_args) - # Run postconfig on create or force. Use force to makes sure endpoints - # are created with deploy reruns and upgrades - if (stack_create or parsed_args.force_postconfig - and not parsed_args.skip_postconfig): - self._deploy_postconfig(stack, parsed_args) + overcloud_endpoint = utils.get_overcloud_endpoint(stack) - overcloud_endpoint = utils.get_overcloud_endpoint(stack) + horizon_url = deployment.get_horizon_url( + self.clients, stack=stack.stack_name) - horizon_url = deployment.get_horizon_url( - self.clients, stack=stack.stack_name) + print("Overcloud Endpoint: {0}".format(overcloud_endpoint)) + print("Overcloud Horizon Dashboard URL: {0}".format(horizon_url)) + print("Overcloud rc file: {0}".format(rcpath)) + print("Overcloud Deployed {0}".format(deploy_message)) - print("Overcloud Endpoint: {0}".format(overcloud_endpoint)) - print("Overcloud Horizon Dashboard URL: {0}".format(horizon_url)) - print("Overcloud rc file: {0}".format(rcpath)) - print("Overcloud Deployed") + if deploy_status == 'failed': + raise(deploy_trace) class GetDeploymentStatus(command.Command):