CI for the TripleO project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

testenv-client 8.8KB


  1. #!/usr/bin/python
  2. #
  3. # Runs a tripleo-ci test-client
  4. #
  5. # Copyright 2013 Red Hat, Inc.
  6. # All Rights Reserved.
  7. #
  8. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  9. # not use this file except in compliance with the License. You may obtain
  10. # a copy of the License at
  11. #
  12. # http://www.apache.org/licenses/LICENSE-2.0
  13. #
  14. # Unless required by applicable law or agreed to in writing, software
  15. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  16. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  17. # License for the specific language governing permissions and limitations
  18. # under the License.
  19. #
  20. import argparse
  21. import json
  22. import logging
  23. import sys
  24. import subprocess
  25. import os
  26. import tempfile
  27. import textwrap
  28. import threading
  29. import time
  30. import uuid
  31. import gear
  32. logging.basicConfig(
  33. format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  34. logger = logging.getLogger('testenv-client')
  35. logger.setLevel(logging.INFO)
  36. class TestCallback(object):
  37. def __init__(self, servers, name, command):
  38. self.servers = servers
  39. self.name = name
  40. self.command = command
  41. self.created = time.time()
  42. # Default the return value to None, this may end up being
  43. # used if the gearman worker goes down before the job finishes
  44. self.rv = None
  45. def __call__(self):
  46. self.worker = gear.Worker('testenv-client-%s' % self.name)
  47. add_servers(self.worker, self.servers)
  48. self.worker.waitForServer()
  49. self.worker.registerFunction(self.name)
  50. try:
  51. job = self.worker.getJob()
  52. except gear.InterruptedError:
  53. return
  54. logger.info('Received job : %s', job.arguments.strip())
  55. time_waiting = time.time() - self.created
  56. if time_waiting > 90:
  57. logger.warn('%.1f seconds waiting for a worker.' % (time_waiting))
  58. if "Couldn't retrieve env" in job.arguments or "Failed creating OVB stack" in job.arguments:
  59. logger.error(job.arguments)
  60. self.rv = 2
  61. job.sendWorkComplete("")
  62. return
  63. logger.info('Running command "%s"', ' '.join(self.command))
  64. with tempfile.NamedTemporaryFile('w') as fp:
  65. fp.write(job.arguments)
  66. fp.flush()
  67. os.environ["TE_DATAFILE"] = fp.name
  68. try:
  69. self.rv = subprocess.call(self.command)
  70. except:
  71. logger.exception("Error calling command")
  72. self.rv = 2
  73. job.sendWorkComplete("")
  74. class TestEnvClient(gear.Client):
  75. def __init__(self):
  76. super(TestEnvClient, self).__init__()
  77. self.event = threading.Event()
  78. def handleWorkComplete(self, packet):
  79. super(TestEnvClient, self).handleWorkComplete(packet)
  80. self.event.set()
  81. def handleWorkException(self, packet):
  82. super(TestEnvClient, self).handleWorkException(packet)
  83. self.event.set()
  84. def handleWorkFail(self, packet):
  85. super(TestEnvClient, self).handleWorkFail(packet)
  86. self.event.set()
  87. def wait(self, timeout=None):
  88. """Wait for notification of completion, error or failure.
  89. :param timeout: a timeout for the operation in seconds
  90. :type timeout: float
  91. :returns: True if a notification was received, False on timeout
  92. """
  93. self.event.wait(timeout)
  94. return self.event.is_set()
  95. def add_servers(client, servers):
  96. for server in servers.split(','):
  97. server = server.rsplit(':', 1)
  98. if len(server) == 1:
  99. server.append('4730')
  100. client.addServer(server[0], int(server[1]))
  101. def main(args=sys.argv[1:]):
  102. parser = argparse.ArgumentParser(
  103. description=(textwrap.dedent("""
  104. Starts up a gearman worker and then calls the job "lockenv" over
  105. gearman, then waits for the worker to be called, once the worker
  106. is called it will place the provided data in a datafile (indicated
  107. by the TE_DATAFILE environment variable) and run the "command"
  108. provided, the exit code will be the exit code of the command that
  109. was run. Essentially this allows a command to be run while the
  110. worker is holding a test environment in a locked state e.g. to
  111. simply output the data provided one could run the command:
  112. $ echo 'cat $TE_DATAFILE' | %s -- bash
  113. """ % sys.argv[0])),
  114. formatter_class=argparse.RawTextHelpFormatter
  115. )
  116. parser.add_argument('command', nargs="+",
  117. help='A command to run once the test env is locked')
  118. parser.add_argument('--geard', '-b', default='127.0.0.1:4730',
  119. help='A comma separated list of gearman brokers to '
  120. 'connect to.')
  121. parser.add_argument('--jobnum', '-n', default=uuid.uuid4().hex,
  122. help='A unique identifier identifing this job.')
  123. parser.add_argument('--timeout', '-t', default='10800',
  124. help='Set a timeout, after which the command will '
  125. 'be killed.')
  126. parser.add_argument('--envsize', default="2",
  127. help='Number of baremetal nodes to request')
  128. parser.add_argument('--compute-envsize', default='0',
  129. help='Number of compute baremetal nodes to request. '
  130. 'When this is set to a value > 0, the primary '
  131. 'nodes will be tagged with the controller '
  132. 'profile and the extra nodes with compute. The '
  133. 'compute nodes will be a smaller flavor in order '
  134. 'to use less resources.')
  135. parser.add_argument('--ucinstance',
  136. help='uuid for the undercloud instance (where an '
  137. 'interface on the provisioning net is attached')
  138. parser.add_argument('--create-undercloud', action='store_true',
  139. help='deploy the undercloud node.')
  140. parser.add_argument('--ssh-key', default='',
  141. help='ssh key for the ovb nodes to be deployed.')
  142. parser.add_argument('--net-iso',
  143. default="multi-nic",
  144. choices=['none', 'multi-nic', 'public-bond'],
  145. help='"none" requests an environment without network '
  146. 'isolation, "multi-nic" requests one with a '
  147. 'basic multiple nic configuration, and '
  148. '"public-bond" requests one like "multi-nic" '
  149. 'but with two public nics for use with bonded '
  150. 'nic-configs.')
  151. parser.add_argument('--extra-nodes', default='0',
  152. help='Number of extra undercloud-like nodes to '
  153. 'request')
  154. parser.add_argument('--debug', '-d', action='store_true',
  155. help='Set to debug mode.')
  156. opts = parser.parse_args(args)
  157. if opts.debug:
  158. logger.setLevel(logging.DEBUG)
  159. callback_name = "callback_" + opts.jobnum
  160. cb = TestCallback(opts.geard, callback_name, opts.command)
  161. threading.Thread(target=cb).start()
  162. client = TestEnvClient()
  163. add_servers(client, opts.geard)
  164. client.waitForServer()
  165. job_identifier = '%s: %s' % (os.environ.get('ZUUL_CHANGE', 'No change'),
  166. os.environ['TOCI_JOBTYPE'])
  167. job_params = {
  168. "callback_name": callback_name,
  169. "timeout": opts.timeout,
  170. "envsize":opts.envsize,
  171. "compute_envsize":opts.compute_envsize,
  172. "ucinstance":opts.ucinstance,
  173. "create_undercloud": "true" if opts.create_undercloud else "",
  174. "ssh_key":opts.ssh_key,
  175. "net_iso":opts.net_iso,
  176. "extra_nodes":opts.extra_nodes,
  177. "job_identifier":job_identifier,
  178. }
  179. job = gear.Job('lockenv', json.dumps(job_params))
  180. client.submitJob(job)
  181. # No timeout here as there will be a timeout on the jenkins jobs, which is
  182. # also passed to the testenv-worker, lets not second guess them.
  183. client.wait()
  184. if job.failure:
  185. # This signals an error with the gearman connection to the worker
  186. # we log it, but still return cb.rv the command may have succeeded
  187. logger.error("The gearman Job has failed")
  188. cb.worker.stopWaitingForJobs()
  189. # If the testenv worker releases the environment before our command
  190. # completes we kill this process and all its children, to immediately
  191. # stop the running job
  192. if cb.rv is None:
  193. logger.error("The command hasn't completed but the testenv worker has "
  194. "released the environment. Killing all processes.")
  195. subprocess.call(["sudo", "kill", "-9", "-%d" % os.getpgrp()])
  196. logger.debug("Exiting with status : %d", cb.rv)
  197. return cb.rv
  198. if __name__ == '__main__':
  199. exit(main())