#!/usr/bin/env python # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse from collections import OrderedDict import random import bs4 import requests import yaml base_url = "https://translate.openstack.org/%s" yaml_comment = """\ # Language codes: sorted in the alphabetical order (case-sensitive) # Zanata IDs are sorted in the order of Zanata language team info # : https://translate.openstack.org/language/list # : Do not use the alphabetical order to make the maitenance easier. """ class ZanataUtility(object): """Utilities to collect Zanata language contributors""" user_agents = [ 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/32.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_6) AppleWebKit/537.78.2', 'Mozilla/5.0 (Windows NT 6.3; WOW64) Gecko/20100101 Firefox/32.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/37.0.2062.120', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko' ] def read_uri(self, uri): headers = { 'User-Agent': random.choice(ZanataUtility.user_agents) } req = requests.get(url=uri, headers=headers) return req.text def iter_language_members(self, uri): data = self.read_uri(base_url % uri) soup = bs4.BeautifulSoup(data, 'html.parser') users = soup.find('ul', {'class': 'list--stats'}) \ .findAll('li', {'class': 'l--pad-all-quarter'}) for user in users: user_id = user.find('a').text.strip() roles_tag = user.find('ul', {'class': 'list--horizontal'}) \ .find('li') roles = roles_tag.text.strip().split(', ') for role_name in roles: yield role_name, user_id def get_languages(self): data = self.read_uri(base_url % 'language/list') soup = bs4.BeautifulSoup(data, 'html.parser') languages = {} ul = soup.find('ul', {'class': 'list--stats'}).findAll('li') for li in ul: lang_tag = li.find('h3', {'class': 'list__title'}).text language = lang_tag.split('\n')[1].lstrip() span_txt = li.find('span', {'class': 'list__item__meta'}).text language_meta = span_txt.split(' ')[0] member_url = li.find('a')['href'] span_txt = li.find('span', {'class': 'txt--understated'}).text total_user = span_txt.lstrip().rstrip() if total_user == '0': continue languages[language_meta] = { 'language': language, 'member_url': member_url, 'coordinators': [], 'reviewers': [], 'translators': [], } return languages def save_to_yaml(data, output_file): with open(output_file, 'w') as out: out.write(yaml_comment) for (k, v) in data.items(): yaml.safe_dump({k: v}, out, allow_unicode=True, indent=4, encoding='utf-8', default_flow_style=False) def convert_role_name(role): roles = { 'Translator': 'translators', 'Reviewer': 'reviewers', 'Coordinator': 'coordinators' } return roles.get(role) def collect_zanata_language_and_members(): zanata = ZanataUtility() print("Retreiving language list") languages = zanata.get_languages() for language in languages.keys(): print("Getting member list from language %s" % language) member_url = languages[language].pop('member_url') for role, user_id in zanata.iter_language_members(member_url): role = convert_role_name(role) if not role: print('[Warn] Unknown role : %s' % role) continue languages[language][role].append(user_id) if role == 'coordinators': languages[language]['translators'].append(user_id) languages[language]['reviewers'].append(user_id) result = OrderedDict((k, languages[k]) for k in sorted(languages)) return result if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-o", "--output-file", default="translation_team.yaml", help=("Specify the output file. " "Default: translation_team.yaml")) options = parser.parse_args() output_file = options.output_file data = collect_zanata_language_and_members() save_to_yaml(data, output_file) print("output is saved to filename: %s" % output_file)