# Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. from __future__ import print_function import copy import os import pprint import shutil import socket import subprocess import tempfile import time import yaml from heatclient.common import event_utils from openstackclient import shell from tripleoclient.constants import ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT from tripleoclient.constants import ENABLE_SSH_ADMIN_STATUS_INTERVAL from tripleoclient.constants import ENABLE_SSH_ADMIN_TIMEOUT from tripleoclient import exceptions from tripleoclient import utils from tripleoclient.workflows import base _WORKFLOW_TIMEOUT = 360 # 6 * 60 seconds def deploy(log, clients, **workflow_input): workflow_client = clients.workflow_engine tripleoclients = clients.tripleoclient wf_name = 'tripleo.deployment.v1.deploy_plan' with tripleoclients.messaging_websocket() as ws: execution = base.start_workflow( workflow_client, wf_name, workflow_input=workflow_input ) # The deploy workflow ends once the Heat create/update starts. This # means that is shouldn't take very long. Wait for 10 minutes for # messages from the workflow. for payload in base.wait_for_messages(workflow_client, ws, execution, 600): status = payload.get('status', 'RUNNING') message = payload.get('message') if message and status == "RUNNING": print(message) if payload['status'] != "SUCCESS": log.info(pprint.pformat(payload)) print(payload['message']) raise ValueError("Unexpected status %s for %s" % (payload['status'], wf_name)) def deploy_and_wait(log, clients, stack, plan_name, verbose_level, timeout=None, run_validations=False, skip_deploy_identifier=False, deployment_options={}): """Start the deploy and wait for it to finish""" workflow_input = { "container": plan_name, "run_validations": run_validations, "skip_deploy_identifier": skip_deploy_identifier, "deployment_options": deployment_options, } if timeout is not None: workflow_input['timeout'] = timeout deploy(log, clients, **workflow_input) orchestration_client = clients.orchestration if stack is None: log.info("Performing Heat stack create") action = 'CREATE' marker = None else: log.info("Performing Heat stack update") # Make sure existing parameters for stack are reused # Find the last top-level event to use for the first marker events = event_utils.get_events(orchestration_client, stack_id=plan_name, event_args={'sort_dir': 'desc', 'limit': 1}) marker = events[0].id if events else None action = 'UPDATE' time.sleep(10) verbose_events = verbose_level >= 1 create_result = utils.wait_for_stack_ready( orchestration_client, plan_name, marker, action, verbose_events) if not create_result: shell.OpenStackShell().run(["stack", "failures", "list", plan_name]) set_deployment_status(clients, 'failed', plan=plan_name) if stack is None: raise exceptions.DeploymentError("Heat Stack create failed.") else: raise exceptions.DeploymentError("Heat Stack update failed.") def create_overcloudrc(clients, **workflow_input): workflow_client = clients.workflow_engine tripleoclients = clients.tripleoclient with tripleoclients.messaging_websocket() as ws: execution = base.start_workflow( workflow_client, 'tripleo.deployment.v1.create_overcloudrc', workflow_input=workflow_input ) for payload in base.wait_for_messages(workflow_client, ws, execution): # the workflow will return the overcloudrc data, an error message # or blank. if payload.get('status') == 'SUCCESS': return payload.get('message') else: raise exceptions.WorkflowServiceError( 'Exception creating overcloudrc: {}'.format( payload.get('message'))) def create_cloudsyaml(clients, **workflow_input): workflow_client = clients.workflow_engine tripleoclients = clients.tripleoclient with tripleoclients.messaging_websocket() as ws: execution = base.start_workflow( workflow_client, 'tripleo.deployment.v1.createcloudsyaml', workflow_input=workflow_input ) for payload in base.wait_for_messages(workflow_client, ws, execution): # the workflow will return the overcloud cloud yaml data, an error # message or blank. if payload.get('status') == 'SUCCESS': return payload.get('message') else: raise exceptions.WorkflowServiceError( 'Exception creating overcloud clouds.yaml file: {}'.format( payload.get('message'))) def get_overcloud_hosts(stack, ssh_network): ips = [] role_net_ip_map = utils.get_role_net_ip_map(stack) blacklisted_ips = utils.get_blacklisted_ip_addresses(stack) for net_ip_map in role_net_ip_map.values(): # get a copy of the lists of ssh_network and ctlplane ips # as blacklisted_ips will only be the ctlplane ips, we need # both lists to determine which to actually blacklist net_ips = copy.copy(net_ip_map.get(ssh_network, [])) ctlplane_ips = copy.copy(net_ip_map.get('ctlplane', [])) blacklisted_ctlplane_ips = \ [ip for ip in ctlplane_ips if ip in blacklisted_ips] # for each blacklisted ctlplane ip, remove the corresponding # ssh_network ip at that same index in the net_ips list for bcip in blacklisted_ctlplane_ips: index = ctlplane_ips.index(bcip) ctlplane_ips.pop(index) net_ips.pop(index) ips.extend(net_ips) return ips def wait_for_ssh_port(host, timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT): start = int(time.time()) while True: now = int(time.time()) if (now - start) > timeout: raise exceptions.DeploymentError( "Timed out waiting for port 22 from %s" % host) # first check ipv4 then check ipv6 try: sock = socket.socket() sock.connect((host, 22)) sock.close() return except socket.error: try: sock = socket.socket(socket.AF_INET6) sock.connect((host, 22)) sock.close() return except socket.error: pass time.sleep(1) def get_hosts_and_enable_ssh_admin( log, clients, stack, overcloud_ssh_network, overcloud_ssh_user, overcloud_ssh_key, enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT, enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT): hosts = get_overcloud_hosts(stack, overcloud_ssh_network) if [host for host in hosts if host]: try: enable_ssh_admin(log, clients, stack.stack_name, hosts, overcloud_ssh_user, overcloud_ssh_key, enable_ssh_timeout, enable_ssh_port_timeout) except subprocess.CalledProcessError as e: if e.returncode == 255: log.error("Couldn't not import keys to one of {}. " "Check if the user/ip are corrects.\n".format(hosts)) else: log.error("Unknown error. " "Original message is:\n{}".format(hosts, e.message)) else: raise exceptions.DeploymentError("Cannot find any hosts on '{}'" " in network '{}'" .format(stack.stack_name, overcloud_ssh_network)) def enable_ssh_admin(log, clients, plan_name, hosts, ssh_user, ssh_key, enable_ssh_timeout=ENABLE_SSH_ADMIN_TIMEOUT, enable_ssh_port_timeout=ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT ): print("Enabling ssh admin (tripleo-admin) for hosts:") print(" ".join(hosts)) print("Using ssh user %s for initial connection." % ssh_user) print("Using ssh key at %s for initial connection." % ssh_key) ssh_options = ("-o ConnectionAttempts=6 " "-o ConnectTimeout=30 " "-o StrictHostKeyChecking=no " "-o PasswordAuthentication=no " "-o UserKnownHostsFile=/dev/null") tmp_key_dir = tempfile.mkdtemp() tmp_key_private = os.path.join(tmp_key_dir, 'id_rsa') tmp_key_public = os.path.join(tmp_key_dir, 'id_rsa.pub') tmp_key_comment = "TripleO split stack short term key" try: tmp_key_command = ["ssh-keygen", "-N", "", "-t", "rsa", "-b", "4096", "-f", tmp_key_private, "-C", tmp_key_comment] DEVNULL = open(os.devnull, 'w') try: subprocess.check_call(tmp_key_command, stdout=DEVNULL, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as exc: log.error("ssh-keygen has failed with return code {0}". format(exc.returncode)) else: log.info("ssh-keygen has been run successfully") DEVNULL.close() with open(tmp_key_public) as pubkey: tmp_key_public_contents = pubkey.read() with open(tmp_key_private) as privkey: tmp_key_private_contents = privkey.read() for host in hosts: wait_for_ssh_port(host, enable_ssh_port_timeout) copy_tmp_key_command = ["ssh"] + ssh_options.split() copy_tmp_key_command += \ ["-o", "StrictHostKeyChecking=no", "-i", ssh_key, "-l", ssh_user, host, "echo -e '\n%s' >> $HOME/.ssh/authorized_keys" % tmp_key_public_contents] print("Inserting TripleO short term key for %s" % host) subprocess.check_call(copy_tmp_key_command, stderr=subprocess.STDOUT) print("Starting ssh admin enablement workflow") workflow_client = clients.workflow_engine workflow_input = { "ssh_user": ssh_user, "ssh_servers": hosts, "ssh_private_key": tmp_key_private_contents, "plan_name": plan_name } execution = base.start_workflow( workflow_client, 'tripleo.access.v1.enable_ssh_admin', workflow_input=workflow_input ) start = int(time.time()) while True: now = int(time.time()) if (now - start) > enable_ssh_timeout: raise exceptions.DeploymentError( "ssh admin enablement workflow - TIMED OUT.") time.sleep(1) execution = workflow_client.executions.get(execution.id) state = execution.state if state == 'RUNNING': if (now - start) % ENABLE_SSH_ADMIN_STATUS_INTERVAL\ == 0: print("ssh admin enablement workflow - RUNNING.") continue elif state == 'SUCCESS': print("ssh admin enablement workflow - COMPLETE.") break elif state in ('FAILED', 'ERROR'): error = "ssh admin enablement workflow - FAILED.\n" error += execution.to_dict()['state_info'] raise exceptions.DeploymentError(error) for host in hosts: rm_tmp_key_command = ["ssh"] + ssh_options.split() rm_tmp_key_command += \ ["-i", ssh_key, "-l", ssh_user, host, "sed -i -e '/%s/d' $HOME/.ssh/authorized_keys" % tmp_key_comment] print("Removing TripleO short term key from %s" % host) subprocess.check_call(rm_tmp_key_command, stderr=subprocess.STDOUT) finally: print("Removing short term keys locally") shutil.rmtree(tmp_key_dir) print("Enabling ssh admin - COMPLETE.") def config_download(log, clients, stack, templates, ssh_user, ssh_key, ssh_network, output_dir, override_ansible_cfg, timeout, verbosity=0, deployment_options={}, in_flight_validations=False): workflow_client = clients.workflow_engine tripleoclients = clients.tripleoclient if in_flight_validations: skip_tags = '' else: skip_tags = 'opendev-validation' workflow_input = { 'verbosity': verbosity, 'plan_name': stack.stack_name, 'ssh_network': ssh_network, 'config_download_timeout': timeout, 'deployment_options': deployment_options, 'skip_tags': skip_tags } if output_dir: workflow_input.update(dict(work_dir=output_dir)) if override_ansible_cfg: with open(override_ansible_cfg) as cfg: override_ansible_cfg_contents = cfg.read() workflow_input.update( dict(override_ansible_cfg=override_ansible_cfg_contents)) workflow_name = 'tripleo.deployment.v1.config_download_deploy' # Check to see if any existing executions for the same stack are already in # progress. log.info("Checking for existing executions of config_download for " "%s" % stack.stack_name) for execution in workflow_client.executions.find( workflow_name=workflow_name, state='RUNNING'): try: exec_input = yaml.safe_load(execution.input) except yaml.YAMLError as ye: log.error("YAML error loading input for execution %s: %s" % (execution.id, str(ye))) raise if exec_input.get('plan_name', 'overcloud') == stack.stack_name: raise exceptions.ConfigDownloadInProgress(execution.id, stack.stack_name) with tripleoclients.messaging_websocket() as ws: execution = base.start_workflow( workflow_client, workflow_name, workflow_input=workflow_input ) for payload in base.wait_for_messages(workflow_client, ws, execution): print(payload['message']) if payload['status'] == 'SUCCESS': print("Overcloud configuration completed.") else: raise exceptions.DeploymentError("Overcloud configuration failed.") def config_download_export(clients, **workflow_input): workflow_client = clients.workflow_engine tripleoclients = clients.tripleoclient with tripleoclients.messaging_websocket() as ws: execution = base.start_workflow( workflow_client, 'tripleo.deployment.v1.config_download_export', workflow_input=workflow_input ) for payload in base.wait_for_messages(workflow_client, ws, execution, _WORKFLOW_TIMEOUT): message = payload.get('message') if message: print(message) if payload['status'] == 'SUCCESS': return payload['tempurl'] else: raise exceptions.WorkflowServiceError( 'Exception exporting config-download: {}'.format( payload['message'])) def get_horizon_url(clients, **workflow_input): workflow_client = clients.workflow_engine tripleoclients = clients.tripleoclient with tripleoclients.messaging_websocket() as ws: execution = base.start_workflow( workflow_client, 'tripleo.deployment.v1.get_horizon_url', workflow_input=workflow_input ) for payload in base.wait_for_messages(workflow_client, ws, execution, 360): assert payload['status'] == "SUCCESS" return payload['horizon_url'] def get_deployment_status(clients, **workflow_input): workflow_client = clients.workflow_engine tripleoclients = clients.tripleoclient with tripleoclients.messaging_websocket() as ws: execution = base.start_workflow( workflow_client, 'tripleo.deployment.v1.get_deployment_status', workflow_input=workflow_input ) for payload in base.wait_for_messages(workflow_client, ws, execution, _WORKFLOW_TIMEOUT): message = payload.get('message') if message: print(message) if payload['status'] == 'SUCCESS': return payload['deployment_status'] else: raise exceptions.WorkflowServiceError( 'Exception getting deployment status: {}'.format( payload.get('message', ''))) def set_deployment_status(clients, status='success', **workflow_input): workflow_client = clients.workflow_engine tripleoclients = clients.tripleoclient if status == 'success': workflow = 'tripleo.deployment.v1.set_deployment_status_success' elif status == 'failed': workflow = 'tripleo.deployment.v1.set_deployment_status_failed' elif status == 'deploying': workflow = 'tripleo.deployment.v1.set_deployment_status_deploying' else: raise Exception("Can't set unknown deployment status: %s" % status) with tripleoclients.messaging_websocket() as ws: execution = base.start_workflow( workflow_client, workflow, workflow_input=workflow_input ) for payload in base.wait_for_messages(workflow_client, ws, execution, _WORKFLOW_TIMEOUT): # Just continue until workflow is done continue if payload['status'] != 'SUCCESS': raise exceptions.WorkflowServiceError( 'Exception setting deployment status: {}'.format( payload.get('message', ''))) def get_deployment_failures(clients, **workflow_input): workflow_client = clients.workflow_engine result = base.call_action( workflow_client, 'tripleo.deployment.get_deployment_failures', **workflow_input ) message = result.get('message') if message: print(message) return result['failures']