Manage a pool of nodes for a distributed test infrastructure
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

provider.py 8.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. # Copyright 2018 Red Hat
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  4. # not use this file except in compliance with the License. You may obtain
  5. # a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  11. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  12. # License for the specific language governing permissions and limitations
  13. # under the License.
  14. import logging
  15. import urllib3
  16. import time
  17. from kubernetes.config import config_exception as kce
  18. from kubernetes import client as k8s_client
  19. from openshift import client as os_client
  20. from openshift import config
  21. from nodepool import exceptions
  22. from nodepool.driver import Provider
  23. from nodepool.driver.openshift import handler
  24. urllib3.disable_warnings()
  25. class OpenshiftProvider(Provider):
  26. log = logging.getLogger("nodepool.driver.openshift.OpenshiftProvider")
  27. def __init__(self, provider, *args):
  28. self.provider = provider
  29. self.ready = False
  30. try:
  31. self.os_client, self.k8s_client = self._get_client(
  32. provider.context)
  33. except kce.ConfigException:
  34. self.log.exception(
  35. "Couldn't load context %s from config", provider.context)
  36. self.os_client = None
  37. self.k8s_client = None
  38. self.project_names = set()
  39. for pool in provider.pools.values():
  40. self.project_names.add(pool.name)
  41. def _get_client(self, context):
  42. conf = config.new_client_from_config(context=context)
  43. return (
  44. os_client.OapiApi(conf),
  45. k8s_client.CoreV1Api(conf))
  46. def start(self, zk_conn):
  47. self.log.debug("Starting")
  48. if self.ready or not self.os_client or not self.k8s_client:
  49. return
  50. self.ready = True
  51. def stop(self):
  52. self.log.debug("Stopping")
  53. def listNodes(self):
  54. servers = []
  55. class FakeServer:
  56. def __init__(self, project, provider, valid_names):
  57. self.id = project.metadata.name
  58. self.name = project.metadata.name
  59. self.metadata = {}
  60. if [True for valid_name in valid_names
  61. if project.metadata.name.startswith("%s-" % valid_name)]:
  62. node_id = project.metadata.name.split('-')[-1]
  63. try:
  64. # Make sure last component of name is an id
  65. int(node_id)
  66. self.metadata['nodepool_provider_name'] = provider
  67. self.metadata['nodepool_node_id'] = node_id
  68. except Exception:
  69. # Probably not a managed project, let's skip metadata
  70. pass
  71. def get(self, name, default=None):
  72. return getattr(self, name, default)
  73. if self.ready:
  74. for project in self.os_client.list_project().items:
  75. servers.append(FakeServer(
  76. project, self.provider.name, self.project_names))
  77. return servers
  78. def labelReady(self, name):
  79. # Labels are always ready
  80. return True
  81. def join(self):
  82. pass
  83. def cleanupLeakedResources(self):
  84. pass
  85. def cleanupNode(self, server_id):
  86. if not self.ready:
  87. return
  88. self.log.debug("%s: removing project" % server_id)
  89. try:
  90. self.os_client.delete_project(server_id)
  91. self.log.info("%s: project removed" % server_id)
  92. except Exception:
  93. # TODO: implement better exception handling
  94. self.log.exception("Couldn't remove project %s" % server_id)
  95. def waitForNodeCleanup(self, server_id):
  96. for retry in range(300):
  97. try:
  98. self.os_client.read_project(server_id)
  99. except Exception:
  100. break
  101. time.sleep(1)
  102. def createProject(self, project):
  103. self.log.debug("%s: creating project" % project)
  104. # Create the project
  105. proj_body = {
  106. 'apiVersion': 'v1',
  107. 'kind': 'ProjectRequest',
  108. 'metadata': {
  109. 'name': project,
  110. }
  111. }
  112. self.os_client.create_project_request(proj_body)
  113. return project
  114. def prepareProject(self, project):
  115. user = "zuul-worker"
  116. # Create the service account
  117. sa_body = {
  118. 'apiVersion': 'v1',
  119. 'kind': 'ServiceAccount',
  120. 'metadata': {'name': user}
  121. }
  122. self.k8s_client.create_namespaced_service_account(project, sa_body)
  123. # Wait for the token to be created
  124. for retry in range(30):
  125. sa = self.k8s_client.read_namespaced_service_account(
  126. user, project)
  127. token = None
  128. if sa.secrets:
  129. for secret_obj in sa.secrets:
  130. secret = self.k8s_client.read_namespaced_secret(
  131. secret_obj.name, project)
  132. token = secret.metadata.annotations.get(
  133. 'openshift.io/token-secret.value')
  134. if token:
  135. break
  136. if token:
  137. break
  138. time.sleep(1)
  139. if not token:
  140. raise exceptions.LaunchNodepoolException(
  141. "%s: couldn't find token for service account %s" %
  142. (project, sa))
  143. # Give service account admin access
  144. role_body = {
  145. 'apiVersion': 'v1',
  146. 'kind': 'RoleBinding',
  147. 'metadata': {'name': 'admin-0'},
  148. 'roleRef': {'name': 'admin'},
  149. 'subjects': [{
  150. 'kind': 'ServiceAccount',
  151. 'name': user,
  152. 'namespace': project,
  153. }],
  154. 'userNames': ['system:serviceaccount:%s:zuul-worker' % project]
  155. }
  156. try:
  157. self.os_client.create_namespaced_role_binding(project, role_body)
  158. except ValueError:
  159. # https://github.com/ansible/ansible/issues/36939
  160. pass
  161. resource = {
  162. 'namespace': project,
  163. 'host': self.os_client.api_client.configuration.host,
  164. 'skiptls': not self.os_client.api_client.configuration.verify_ssl,
  165. 'token': token,
  166. 'user': user,
  167. }
  168. self.log.info("%s: project created" % project)
  169. return resource
  170. def createPod(self, project, label):
  171. spec_body = {
  172. 'name': label.name,
  173. 'image': label.image,
  174. 'imagePullPolicy': label.image_pull,
  175. 'command': ["/bin/bash", "-c", "--"],
  176. 'args': ["while true; do sleep 30; done;"],
  177. 'workingDir': '/tmp',
  178. }
  179. if label.cpu or label.memory:
  180. spec_body['resources'] = {}
  181. for rtype in ('requests', 'limits'):
  182. rbody = {}
  183. if label.cpu:
  184. rbody['cpu'] = int(label.cpu)
  185. if label.memory:
  186. rbody['memory'] = '%dMi' % int(label.memory)
  187. spec_body['resources'][rtype] = rbody
  188. pod_body = {
  189. 'apiVersion': 'v1',
  190. 'kind': 'Pod',
  191. 'metadata': {'name': label.name},
  192. 'spec': {
  193. 'containers': [spec_body],
  194. },
  195. 'restartPolicy': 'Never',
  196. }
  197. self.k8s_client.create_namespaced_pod(project, pod_body)
  198. for retry in range(300):
  199. pod = self.k8s_client.read_namespaced_pod(label.name, project)
  200. if pod.status.phase == "Running":
  201. break
  202. self.log.debug("%s: pod status is %s", project, pod.status.phase)
  203. time.sleep(1)
  204. if retry == 299:
  205. raise exceptions.LaunchNodepoolException(
  206. "%s: pod failed to initialize (%s)" % (
  207. project, pod.status.phase))
  208. def getRequestHandler(self, poolworker, request):
  209. return handler.OpenshiftNodeRequestHandler(poolworker, request)