overcloud_deploy: raise at the end if Ansible Runner had exception

In the context of MaxFailPercentage, if a node (e.g. one of 100
computes) has any fatal error during an Ansible play and we tolerate
some percentage of failure; we want to raise the error but at the very
end of the deployment.

So this patch puts the following actions in a "finally" block so they
always execute:

* Create overcloudrc
* Sets the right permissions to the clouds.yaml
* Execute _deploy_postconfig
* Print infos like Keystone entpoint, Horizon URL, RC file location
  and deploy message (saying if there is an error or not)

And at the very end we raise the actual AnsibleRunner trace if the
deployment failed.

So even if a node failed and we tolerate it, we'll still finish the
deployment until the end, but for UX purpose we want to raise it at the
very end.
Note that when it fails and it's tolerated, Ansible prints the node as
"ignored":

PLAY RECAP ******************************************************************
compute-0    : ok=555  (...)  failed=0    skipped=484  rescued=0    ignored=0
compute-1    : ok=60   (...)  failed=1    skipped=40   rescued=0    ignored=1
controller-0 : ok=960  (...)  failed=0    skipped=709  rescued=0    ignored=0
controller-1 : ok=920  (...)  failed=0    skipped=693  rescued=0    ignored=0
controller-2 : ok=919  (...)  failed=0    skipped=693  rescued=0    ignored=0
undercloud   : ok=86   (...)  failed=0    skipped=57   rescued=0    ignored=0

To improve UX, we'll investigate an Ansible callback to properly tell
what nodes needs to be re-deployed.

Note: also mock copy_clouds_yaml since it wasn't tested before but it's
failing to reach the files on the filesystem.
Change-Id: I7d733499e74abe2cdf91526df608dc7c273bf19e
This commit is contained in:
Emilien Macchi 2020-06-16 10:34:44 -04:00
parent 11f4cf1998
commit 5b63958105
2 changed files with 93 additions and 78 deletions

View File

@ -1404,6 +1404,7 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud):
fixture.mock_set_deployment_status.call_args[-1]['status'])
mock_copy.assert_called_once()
@mock.patch('tripleoclient.utils.copy_clouds_yaml')
@mock.patch('tripleoclient.v1.overcloud_deploy.DeployOvercloud.'
'_get_undercloud_host_entry', autospec=True,
return_value='192.168.0.1 uc.ctlplane.localhost uc.ctlplane')
@ -1419,7 +1420,8 @@ class TestDeployOvercloud(fakes.TestDeployOvercloud):
mock_overcloudrc,
mock_overcloud_endpoint,
mock_create_tempest_deployer_input,
mock_get_undercloud_host_entry):
mock_get_undercloud_host_entry,
mock_copy):
fixture = deployment.DeploymentWorkflowFixture()
self.useFixture(fixture)
clients = self.app.client_manager

View File

@ -968,6 +968,9 @@ class DeployOvercloud(command.Command):
def take_action(self, parsed_args):
self.log.debug("take_action(%s)" % parsed_args)
deploy_status = 'DEPLOY_SUCCESS'
deploy_message = 'without error'
self._setup_clients(parsed_args)
# Swiftclient logs things like 404s at error level, which is a problem
@ -1014,94 +1017,104 @@ class DeployOvercloud(command.Command):
# wont do anything.
return
if parsed_args.config_download:
print("Deploying overcloud configuration")
try:
if parsed_args.config_download:
print("Deploying overcloud configuration")
deployment.set_deployment_status(
clients=self.clients,
plan=stack.stack_name,
status='DEPLOYING'
)
if not parsed_args.config_download_only:
deployment.get_hosts_and_enable_ssh_admin(
stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user,
self.get_key_pair(parsed_args),
parsed_args.overcloud_ssh_port_timeout,
verbosity=utils.playbook_verbosity(self=self)
)
if parsed_args.config_download_timeout:
timeout = parsed_args.config_download_timeout
else:
used = int((time.time() - start) // 60)
timeout = parsed_args.timeout - used
if timeout <= 0:
raise exceptions.DeploymentError(
'Deployment timed out after %sm' % used)
deployment_options = {}
if parsed_args.deployment_python_interpreter:
deployment_options['ansible_python_interpreter'] = \
parsed_args.deployment_python_interpreter
deployment.config_download(
self.log,
self.clients,
stack,
parsed_args.overcloud_ssh_network,
parsed_args.output_dir,
parsed_args.override_ansible_cfg,
timeout=parsed_args.overcloud_ssh_port_timeout,
verbosity=utils.playbook_verbosity(self=self),
deployment_options=deployment_options,
in_flight_validations=parsed_args.inflight,
deployment_timeout=timeout,
tags=parsed_args.tags,
skip_tags=parsed_args.skip_tags,
limit_hosts=utils.playbook_limit_parse(
limit_nodes=parsed_args.limit
)
)
deployment.set_deployment_status(
clients=self.clients,
plan=stack.stack_name,
status='DEPLOYING'
status=deploy_status)
except Exception as deploy_e:
deploy_status = 'DEPLOY_FAILED'
deploy_message = 'with error'
deploy_trace = deploy_e
deployment.set_deployment_status(
clients=self.clients,
plan=stack.stack_name,
status=deploy_status
)
finally:
# Force fetching of attributes
stack.get()
try:
if not parsed_args.config_download_only:
deployment.get_hosts_and_enable_ssh_admin(
stack,
parsed_args.overcloud_ssh_network,
parsed_args.overcloud_ssh_user,
self.get_key_pair(parsed_args),
parsed_args.overcloud_ssh_port_timeout,
verbosity=utils.playbook_verbosity(self=self)
)
rcpath = deployment.create_overcloudrc(
container=stack.stack_name,
no_proxy=parsed_args.no_proxy)
if parsed_args.config_download_timeout:
timeout = parsed_args.config_download_timeout
else:
used = int((time.time() - start) // 60)
timeout = parsed_args.timeout - used
if timeout <= 0:
raise exceptions.DeploymentError(
'Deployment timed out after %sm' % used)
# Copy clouds.yaml to the cloud user directory
user = \
getpwuid(os.stat(constants.CLOUD_HOME_DIR).st_uid).pw_name
utils.copy_clouds_yaml(user)
utils.create_tempest_deployer_input()
deployment_options = {}
if parsed_args.deployment_python_interpreter:
deployment_options['ansible_python_interpreter'] = \
parsed_args.deployment_python_interpreter
# Run postconfig on create or force. Use force to makes sure
# endpoints are created with deploy reruns and upgrades
if (stack_create or parsed_args.force_postconfig
and not parsed_args.skip_postconfig):
self._deploy_postconfig(stack, parsed_args)
deployment.config_download(
self.log,
self.clients,
stack,
parsed_args.overcloud_ssh_network,
parsed_args.output_dir,
parsed_args.override_ansible_cfg,
timeout=parsed_args.overcloud_ssh_port_timeout,
verbosity=utils.playbook_verbosity(self=self),
deployment_options=deployment_options,
in_flight_validations=parsed_args.inflight,
deployment_timeout=timeout,
tags=parsed_args.tags,
skip_tags=parsed_args.skip_tags,
limit_hosts=utils.playbook_limit_parse(
limit_nodes=parsed_args.limit
)
)
deployment.set_deployment_status(
clients=self.clients,
plan=stack.stack_name,
status='DEPLOY_SUCCESS')
except Exception:
deployment.set_deployment_status(
clients=self.clients,
plan=stack.stack_name,
status='DEPLOY_FAILED'
)
raise
overcloud_endpoint = utils.get_overcloud_endpoint(stack)
# Force fetching of attributes
stack.get()
horizon_url = deployment.get_horizon_url(
stack=stack.stack_name)
rcpath = deployment.create_overcloudrc(container=stack.stack_name,
no_proxy=parsed_args.no_proxy)
print("Overcloud Endpoint: {0}".format(overcloud_endpoint))
print("Overcloud Horizon Dashboard URL: {0}".format(
horizon_url))
print("Overcloud rc file: {0}".format(rcpath))
print("Overcloud Deployed {0}".format(deploy_message))
# Copy clouds.yaml to the cloud user directory
user = getpwuid(os.stat(constants.CLOUD_HOME_DIR).st_uid).pw_name
utils.copy_clouds_yaml(user)
utils.create_tempest_deployer_input()
# Run postconfig on create or force. Use force to makes sure endpoints
# are created with deploy reruns and upgrades
if (stack_create or parsed_args.force_postconfig
and not parsed_args.skip_postconfig):
self._deploy_postconfig(stack, parsed_args)
overcloud_endpoint = utils.get_overcloud_endpoint(stack)
horizon_url = deployment.get_horizon_url(stack=stack.stack_name)
print("Overcloud Endpoint: {0}".format(overcloud_endpoint))
print("Overcloud Horizon Dashboard URL: {0}".format(horizon_url))
print("Overcloud rc file: {0}".format(rcpath))
print("Overcloud Deployed")
if deploy_status == 'DEPLOY_FAILED':
raise(deploy_trace)
class GetDeploymentStatus(command.Command):