python utility to manage a tripleo based cloud
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

341 lines
12KB

  1. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  2. # not use this file except in compliance with the License. You may obtain
  3. # a copy of the License at
  4. #
  5. # http://www.apache.org/licenses/LICENSE-2.0
  6. #
  7. # Unless required by applicable law or agreed to in writing, software
  8. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  9. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  10. # License for the specific language governing permissions and limitations
  11. # under the License.
  12. from __future__ import print_function
  13. import os
  14. import pprint
  15. import re
  16. import shutil
  17. import socket
  18. import subprocess
  19. import tempfile
  20. import time
  21. from heatclient.common import event_utils
  22. from openstackclient import shell
  23. from tripleoclient import constants
  24. from tripleoclient import exceptions
  25. from tripleoclient import utils
  26. from tripleoclient.workflows import base
  27. _WORKFLOW_TIMEOUT = 360 # 6 * 60 seconds
  28. def deploy(clients, **workflow_input):
  29. workflow_client = clients.workflow_engine
  30. tripleoclients = clients.tripleoclient
  31. wf_name = 'tripleo.deployment.v1.deploy_plan'
  32. with tripleoclients.messaging_websocket() as ws:
  33. execution = base.start_workflow(
  34. workflow_client,
  35. wf_name,
  36. workflow_input=workflow_input
  37. )
  38. # The deploy workflow ends once the Heat create/update starts. This
  39. # means that is shouldn't take very long. Wait for 10 minutes for
  40. # messages from the workflow.
  41. for payload in base.wait_for_messages(workflow_client, ws, execution,
  42. 360):
  43. status = payload.get('status', 'RUNNING')
  44. if 'message' in payload and status == "RUNNING":
  45. print(payload['message'])
  46. if payload['status'] != "SUCCESS":
  47. pprint.pformat(payload)
  48. raise ValueError("Unexpected status %s for %s"
  49. % (payload['status'], wf_name))
  50. def deploy_and_wait(log, clients, stack, plan_name, verbose_level,
  51. timeout=None, run_validations=False,
  52. skip_deploy_identifier=False):
  53. """Start the deploy and wait for it to finish"""
  54. workflow_input = {
  55. "container": plan_name,
  56. "run_validations": run_validations,
  57. "skip_deploy_identifier": skip_deploy_identifier
  58. }
  59. if timeout is not None:
  60. workflow_input['timeout'] = timeout
  61. deploy(clients, **workflow_input)
  62. orchestration_client = clients.orchestration
  63. if stack is None:
  64. log.info("Performing Heat stack create")
  65. action = 'CREATE'
  66. marker = None
  67. else:
  68. log.info("Performing Heat stack update")
  69. # Make sure existing parameters for stack are reused
  70. # Find the last top-level event to use for the first marker
  71. events = event_utils.get_events(orchestration_client,
  72. stack_id=plan_name,
  73. event_args={'sort_dir': 'desc',
  74. 'limit': 1})
  75. marker = events[0].id if events else None
  76. action = 'UPDATE'
  77. time.sleep(10)
  78. verbose_events = verbose_level > 0
  79. create_result = utils.wait_for_stack_ready(
  80. orchestration_client, plan_name, marker, action, verbose_events)
  81. if not create_result:
  82. shell.OpenStackShell().run(["stack", "failures", "list", plan_name])
  83. if stack is None:
  84. raise exceptions.DeploymentError("Heat Stack create failed.")
  85. else:
  86. raise exceptions.DeploymentError("Heat Stack update failed.")
  87. def overcloudrc(workflow_client, **input_):
  88. return base.call_action(workflow_client, 'tripleo.deployment.overcloudrc',
  89. **input_)
  90. def get_overcloud_hosts(clients, stack):
  91. role_net_hostname_map = utils.get_role_net_hostname_map(stack)
  92. hostnames = []
  93. for role in role_net_hostname_map:
  94. hostnames.extend(role_net_hostname_map[role].get('ctlplane', []))
  95. hosts = []
  96. hosts_entry = utils.get_hosts_entry(stack)
  97. for hostname in hostnames:
  98. for line in hosts_entry.split('\n'):
  99. match = re.search('\s*%s\s*' % hostname, line)
  100. if match:
  101. hosts.append(line.split(' ')[0])
  102. return hosts
  103. def wait_for_ssh_port(host):
  104. start = int(time.time())
  105. while True:
  106. now = int(time.time())
  107. if (now - start) > constants.ENABLE_SSH_ADMIN_SSH_PORT_TIMEOUT:
  108. raise exceptions.DeploymentError(
  109. "Timed out waiting for port 22 from %s" % host)
  110. try:
  111. socket.socket().connect((host, 22))
  112. return
  113. except socket.error:
  114. pass
  115. time.sleep(1)
  116. def enable_ssh_admin(log, clients, hosts, ssh_user, ssh_key):
  117. print("Enabling ssh admin (tripleo-admin) for hosts:")
  118. print(" ".join(hosts))
  119. print("Using ssh user %s for initial connection." % ssh_user)
  120. print("Using ssh key at %s for initial connection." % ssh_key)
  121. ssh_options = ("-o ConnectionAttempts=6 "
  122. "-o ConnectTimeout=30 "
  123. "-o StrictHostKeyChecking=no "
  124. "-o PasswordAuthentication=no "
  125. "-o UserKnownHostsFile=/dev/null")
  126. tmp_key_dir = tempfile.mkdtemp()
  127. tmp_key_private = os.path.join(tmp_key_dir, 'id_rsa')
  128. tmp_key_public = os.path.join(tmp_key_dir, 'id_rsa.pub')
  129. tmp_key_comment = "TripleO split stack short term key"
  130. try:
  131. tmp_key_command = ["ssh-keygen", "-N", "", "-t", "rsa", "-b", "4096",
  132. "-f", tmp_key_private, "-C", tmp_key_comment]
  133. DEVNULL = open(os.devnull, 'w')
  134. try:
  135. subprocess.check_call(tmp_key_command, stdout=DEVNULL,
  136. stderr=subprocess.STDOUT)
  137. except subprocess.CalledProcessError as exc:
  138. log.error("ssh-keygen has failed with return code {0}".
  139. format(exc.returncode))
  140. else:
  141. log.info("ssh-keygen has been run successfully")
  142. DEVNULL.close()
  143. with open(tmp_key_public) as pubkey:
  144. tmp_key_public_contents = pubkey.read()
  145. for host in hosts:
  146. wait_for_ssh_port(host)
  147. copy_tmp_key_command = ["ssh"] + ssh_options.split()
  148. copy_tmp_key_command += \
  149. ["-o", "StrictHostKeyChecking=no",
  150. "-i", ssh_key, "-l", ssh_user, host,
  151. "echo -e '\n%s' >> $HOME/.ssh/authorized_keys" %
  152. tmp_key_public_contents]
  153. print("Inserting TripleO short term key for %s" % host)
  154. subprocess.check_call(copy_tmp_key_command,
  155. stderr=subprocess.STDOUT)
  156. print("Starting ssh admin enablement workflow")
  157. workflow_client = clients.workflow_engine
  158. workflow_input = {
  159. "ssh_user": ssh_user,
  160. "ssh_servers": hosts,
  161. "ssh_private_key": open(tmp_key_private).read(),
  162. }
  163. execution = base.start_workflow(
  164. workflow_client,
  165. 'tripleo.access.v1.enable_ssh_admin',
  166. workflow_input=workflow_input
  167. )
  168. start = int(time.time())
  169. while True:
  170. now = int(time.time())
  171. if (now - start) > constants.ENABLE_SSH_ADMIN_TIMEOUT:
  172. raise exceptions.DeploymentError(
  173. "ssh admin enablement workflow - TIMED OUT.")
  174. time.sleep(1)
  175. execution = workflow_client.executions.get(execution.id)
  176. state = execution.state
  177. if state == 'RUNNING':
  178. if (now - start) % constants.ENABLE_SSH_ADMIN_STATUS_INTERVAL\
  179. == 0:
  180. print("ssh admin enablement workflow - RUNNING.")
  181. continue
  182. elif state == 'SUCCESS':
  183. print("ssh admin enablement workflow - COMPLETE.")
  184. break
  185. elif state in ('FAILED', 'ERROR'):
  186. error = "ssh admin enablement workflow - FAILED.\n"
  187. error += execution.to_dict()['state_info']
  188. raise exceptions.DeploymentError(error)
  189. for host in hosts:
  190. rm_tmp_key_command = ["ssh"] + ssh_options.split()
  191. rm_tmp_key_command += \
  192. ["-i", ssh_key, "-l", ssh_user, host,
  193. "sed -i -e '/%s/d' $HOME/.ssh/authorized_keys" %
  194. tmp_key_comment]
  195. print("Removing TripleO short term key from %s" % host)
  196. subprocess.check_call(rm_tmp_key_command, stderr=subprocess.STDOUT)
  197. finally:
  198. print("Removing short term keys locally")
  199. shutil.rmtree(tmp_key_dir)
  200. print("Enabling ssh admin - COMPLETE.")
  201. def config_download(log, clients, stack, templates,
  202. ssh_user, ssh_key, output_dir, verbosity=0):
  203. workflow_client = clients.workflow_engine
  204. tripleoclients = clients.tripleoclient
  205. workflow_input = {
  206. 'verbosity': verbosity,
  207. 'plan_name': stack.stack_name
  208. }
  209. if output_dir:
  210. workflow_input.update(dict(work_dir=output_dir))
  211. with tripleoclients.messaging_websocket() as ws:
  212. execution = base.start_workflow(
  213. workflow_client,
  214. 'tripleo.deployment.v1.config_download_deploy',
  215. workflow_input=workflow_input
  216. )
  217. for payload in base.wait_for_messages(workflow_client, ws, execution,
  218. 3600):
  219. print(payload['message'])
  220. if payload['status'] == 'SUCCESS':
  221. print("Overcloud configuration completed.")
  222. else:
  223. raise exceptions.DeploymentError("Overcloud configuration failed.")
  224. def get_horizon_url(clients, **workflow_input):
  225. workflow_client = clients.workflow_engine
  226. tripleoclients = clients.tripleoclient
  227. with tripleoclients.messaging_websocket() as ws:
  228. execution = base.start_workflow(
  229. workflow_client,
  230. 'tripleo.deployment.v1.get_horizon_url',
  231. workflow_input=workflow_input
  232. )
  233. for payload in base.wait_for_messages(workflow_client, ws, execution,
  234. 360):
  235. assert payload['status'] == "SUCCESS"
  236. return payload['horizon_url']
  237. def get_deployment_status(clients, **workflow_input):
  238. workflow_client = clients.workflow_engine
  239. tripleoclients = clients.tripleoclient
  240. execution = base.start_workflow(
  241. workflow_client,
  242. 'tripleo.deployment.v1.get_deployment_status',
  243. workflow_input=workflow_input
  244. )
  245. with tripleoclients.messaging_websocket() as ws:
  246. for payload in base.wait_for_messages(workflow_client, ws, execution,
  247. _WORKFLOW_TIMEOUT):
  248. if 'message' in payload:
  249. print(payload['message'])
  250. if payload['status'] == 'SUCCESS':
  251. return payload['deployment_status']
  252. else:
  253. raise exceptions.WorkflowServiceError(
  254. 'Exception getting deployment status: {}'.format(
  255. payload.get('message', '')))
  256. def get_deployment_failures(clients, **workflow_input):
  257. workflow_client = clients.workflow_engine
  258. tripleoclients = clients.tripleoclient
  259. execution = base.start_workflow(
  260. workflow_client,
  261. 'tripleo.deployment.v1.get_deployment_failures',
  262. workflow_input=workflow_input
  263. )
  264. with tripleoclients.messaging_websocket() as ws:
  265. for payload in base.wait_for_messages(workflow_client, ws, execution,
  266. _WORKFLOW_TIMEOUT):
  267. if 'message' in payload:
  268. print(payload['message'])
  269. if payload['status'] == 'SUCCESS':
  270. return payload['deployment_failures']['failures']
  271. else:
  272. raise exceptions.WorkflowServiceError(
  273. 'Exception getting deployment failures: {}'.format(
  274. payload.get('message', '')))