# Copyright 2019 Red Hat, Inc # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import concurrent.futures import datetime import time import requests import urllib.parse # urllib3 wants to warn us that we're making insecure requests - when we tell # it we want to make insecure requests. We know, but in this case the requests # are not insecure. import urllib3 urllib3.disable_warnings() from ansible.module_utils.basic import AnsibleModule SB_REPO = 'https://storyboard.openstack.org/#!/project/{org}/{repo}' SB_FORMAT = 'https://storyboard.openstack.org/#!/story/{{index}}' LP_REPO = 'https://bugs.launchpad.net/{repo}' LP_FORMAT = 'https://bugs.launchpad.net/{repo}/+bug/{{index}}' class Gitea(object): def __init__(self, url, password, always_update, projects): self.url = url self.password = password self.always_update = always_update self.projects = projects self.orgs = { f['project'].split('/')[0] for f in self.projects } self.org_projects = {} for org in self.orgs: p = [ f for f in self.projects if (f['project'].split('/')[0] == org) ] self.org_projects[org] = p self._log = [] self.session = requests.Session() self.failed = False def log(self, *args): now = datetime.datetime.utcnow().isoformat() self._log.append(" ".join((now,) + args)) def get_log(self): return "\n".join(self._log) def request(self, method, endpoint, *args, **kwargs): resp = self.session.request( method, urllib.parse.urljoin(self.url, endpoint), auth=('root', self.password), verify=False, *args, **kwargs) resp.raise_for_status() return resp def get(self, endpoint, *args, **kwargs): return self.request('GET', endpoint, *args, **kwargs) def get_paginated(self, endpoint, *args, **kwargs): params = { 'limit': 50, 'page': 1 } results = [] gitea_data = self.get(endpoint, *args, params=params, **kwargs).json() while gitea_data: results.extend(gitea_data) # Gitea paginates and returns an empty list at the end of the # listing. 50 items is the max limit. params['page'] += 1 gitea_data = self.get(endpoint, *args, params=params, **kwargs).json() return results def patch(self, endpoint, *args, **kwargs): return self.request('PATCH', endpoint, *args, **kwargs) def post(self, endpoint, *args, **kwargs): return self.request('POST', endpoint, *args, **kwargs) def put(self, endpoint, *args, **kwargs): return self.request('PUT', endpoint, *args, **kwargs) def get_gitea_orgs(self): orgs = self.get_paginated("/api/v1/user/orgs") return [f['username'] for f in orgs] def make_gitea_org(self, org): self.post( '/api/v1/admin/users/root/orgs', json=dict(username=org)) self.log("Created org:", org) def ensure_gitea_teams(self, org): team_list = self.get_paginated('/api/v1/orgs/{org}/teams'.format(org=org)) owner_id = [f['id'] for f in team_list if f['name'] == 'Owners'][0] org_owners = self.get_paginated( '/api/v1/teams/{owner_id}/members'.format(owner_id=owner_id)) if 'gerrit' not in [f['username'] for f in org_owners]: self.put('/api/v1/teams/{owner_id}/members/gerrit'.format( owner_id=owner_id)) self.log("Added gerrit to team:", org) def get_org_repo_list(self, org): return [x['full_name'] for x in self.get_paginated('/api/v1/orgs/{org}/repos'.format(org=org))] def make_gitea_project(self, project): org, repo = project['project'].split('/', 1) repo_properties = { 'auto_init': True, 'name': repo, 'description': project.get('description', '')[:255], # Do not use this functionality until jeepyb can do similar # for the gerrit side. Once Gerrit and Gitea can be configured # this could be used on new repos. # Note we default to master to avoid relying on tool defaults # as we currently rely on Gitea, Gerrit, and Git to all be in # sync which may not be the case going forward. 'default_branch': project.get('default-branch', 'master'), 'private': False, 'readme': 'Default', } resp = self.post( '/api/v1/orgs/{org}/repos'.format(org=org), json=repo_properties) self.log("Created repo:", project['project']) def update_gitea_project_settings(self, project): org, repo = project['project'].split('/', 1) settings = {} settings['default_branch'] = project.get('default-branch', 'master') description = project.get('description', '')[:255] if description: settings['description'] = description settings['has_pull_requests'] = False settings['has_projects'] = False settings['has_wiki'] = False settings['external_wiki'] = {'external_wiki_url': ''} if project.get('use-storyboard'): external_tracker_url = SB_REPO.format(org=org, repo=repo) tracker_url_format = SB_FORMAT elif project.get('groups'): external_tracker_url = LP_REPO.format(repo=project['groups'][0]) tracker_url_format = LP_FORMAT.format(repo=project['groups'][0]) else: external_tracker_url = LP_REPO.format(repo=repo) tracker_url_format = LP_FORMAT.format(repo=repo) # We enable issues so that the external tracker works settings['has_issues'] = True settings['external_tracker'] = { 'external_tracker_url': external_tracker_url, 'external_tracker_format': tracker_url_format, 'external_tracker_style': 'numeric', } for count in range(0, 5): try: self.patch( '/api/v1/repos/{org}/{repo}'.format(org=org, repo=repo), json=settings) self.log("Updated settings:", project['project']) return except requests.exceptions.HTTPError as e: time.sleep(3) raise Exception("Could not update settings") def make_projects(self, projects, gitea_repos, settings_thread_pool, branches_thread_pool, futures): for project in projects: create = False if project['project'] not in gitea_repos: try: self.get('/' + project['project']) except requests.HTTPError: # If the project isn't in the listing we do an explicit # check for its existence. This is because gitea repo # listings require pagination and they don't use stable # sorting and that causes problems reliably producing a # complete repo list. If we cannot find the project # then create it. create = True if create: # TODO: use threadpool when we're running with # https://github.com/go-gitea/gitea/pull/7493 self.make_gitea_project(project) if create or self.always_update: futures.append(settings_thread_pool.submit( self.update_gitea_project_settings, project)) def run(self): futures = [] gitea_orgs = self.get_gitea_orgs() gitea_repos = [] for org in self.orgs: if org not in gitea_orgs: self.make_gitea_org(org) self.ensure_gitea_teams(org) gitea_repos.extend(self.get_org_repo_list(org)) # We can create repos in parallel, as long as all the repos # for the same org are in series (due to database contention, # until https://github.com/go-gitea/gitea/pull/7493 is # merged). It doesn't help to have more than 2 since # openstack is the largest and everything else combined is # less than that. org_thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=2) settings_thread_pool = concurrent.futures.ThreadPoolExecutor() branches_thread_pool = concurrent.futures.ThreadPoolExecutor() # The very first update to the repo_unit table needs to happen # without any other actions in parallel, otherwise a lock will # be held for a significant amount of time causing requests to # back up (and some to fail). Work through the project list # in series until we find the first that updates the project # settings (this will be the first with any significant work). org_task_lists = [] for org, projects in self.org_projects.items(): org_task_lists.append(projects) first_settings = False for task_list in org_task_lists: while task_list: project = task_list.pop(0) self.make_projects([project], gitea_repos, settings_thread_pool, branches_thread_pool, futures) if len(futures) > 1: first_settings = True self.wait_for_futures(futures) futures = [] if first_settings: break # Once that is done, we can parallelize the rest. Sort the # org task lists by length so that we pack them into our two # threads efficiently. sorted_task_lists = sorted( org_task_lists, key=lambda x: len(x), reverse=True) for projects in sorted_task_lists: futures.append(org_thread_pool.submit( self.make_projects, projects, gitea_repos, settings_thread_pool, branches_thread_pool, futures)) self.wait_for_futures(futures) def wait_for_futures(self, futures): for f in futures: try: r = f.result() except Exception as e: self.log(str(e)) self.failed = True def ansible_main(): module = AnsibleModule( argument_spec=dict( url=dict(required=True), password=dict(required=True, no_log=True), projects=dict(required=True, type='list'), always_update=dict(type='bool', default=True), ) ) p = module.params gitea = Gitea( url=p.get('url'), password=p.get('password'), always_update=p.get('always_update'), projects=p.get('projects'), ) try: gitea.run() except Exception as e: module.fail_json(msg=str(e), changed=True) log = gitea.get_log() if gitea.failed: module.fail_json(msg="Failure during repo creation, see log", changed=bool(log), log=log) module.exit_json(changed=bool(log), log=log) if __name__ == '__main__': ansible_main()