i18n/tools/zanata/zanata_users.py
SeongSoo Cho b03c50bb14 Zanata member list update tool
Unfortunately current Zanata 3.7.3 does not support
member list API. This tool is developed to retrieve member
list in Zanata and save the list into yaml based format.
The output file name is translation_team.yaml by default,
and the name can be changed using -o option.

Note that this tool uses BeautifulSoap4 and requests
libraries.

Change-Id: Iaf3d845514e85f8c160efdf6c10afeaaf8077857
Co-Authored-By: Ian Y. Choi <ianyrchoi@gmail.com>
2017-01-16 12:19:49 +00:00

147 lines
5.0 KiB
Python
Executable File

#!/usr/bin/python
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from collections import OrderedDict
import random
import bs4
import requests
import yaml
base_url = "https://translate.openstack.org/%s"
class ZanataUtility(object):
"""Utilities to collect Zanata language contributors"""
user_agents = [
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/32.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_6) AppleWebKit/537.78.2',
'Mozilla/5.0 (Windows NT 6.3; WOW64) Gecko/20100101 Firefox/32.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/37.0.2062.120',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
]
def read_uri(self, uri):
headers = {
'User-Agent': random.choice(ZanataUtility.user_agents)
}
req = requests.get(url=uri, headers=headers)
return req.text
def iter_language_members(self, uri):
data = self.read_uri(base_url % uri)
soup = bs4.BeautifulSoup(data, 'html.parser')
users = soup.find('ul', {'class': 'list--stats'}) \
.findAll('li', {'class': 'l--pad-all-quarter'})
for user in users:
span_txt = user.find('span', {'class': 'list__item__meta'}).text
user_id = span_txt.lstrip().rstrip()
roles = user.find('ul', {'class': 'list--horizontal'}) \
.findAll('li')
for role in roles:
role_name = role.text.lstrip().rstrip()
role_status = role.find('i')['class'][1]
if role_status != 'i--checkmark':
continue
yield role_name, user_id
def get_languages(self):
data = self.read_uri(base_url % 'language/list')
soup = bs4.BeautifulSoup(data, 'html.parser')
languages = {}
ul = soup.find('ul', {'class': 'list--stats'}).findAll('li')
for li in ul:
lang_tag = li.find('h3', {'class': 'list__title'}).text
language = lang_tag.split('\n')[1].lstrip()
span_txt = li.find('span', {'class': 'list__item__meta'}).text
language_meta = span_txt.split(' ')[0]
member_url = li.find('a')['href']
span_txt = li.find('span', {'class': 'txt--understated'}).text
total_user = span_txt.lstrip().rstrip()
if total_user == '0':
continue
languages[language_meta] = {
'language': language,
'member_url': member_url
}
return languages
def save_to_yaml(data, output_file):
with open(output_file, 'w') as out:
out.write("# Use the order of Zanata language team info.\n")
out.write("# Do not use the alphabetical order to make the "
"maitenance easier.\n")
for (k, v) in data.items():
yaml.safe_dump({k: v}, out, allow_unicode=True, indent=4,
encoding='utf-8', default_flow_style=False)
def convert_role_name(role):
roles = {
'Translator': 'translators',
'Reviewer': 'reviewers',
'Coordinator': 'coordinators'
}
return roles[role] if role in roles else None
def collect_zanata_language_and_members():
zanata = ZanataUtility()
print("Retreiving language list")
languages = zanata.get_languages()
for language in languages.keys():
print("Getting member list from language %s" % language)
member_url = languages[language].pop('member_url')
for role, user_id in zanata.iter_language_members(member_url):
role = convert_role_name(role)
if role is None:
print('[Warn] Unknown role : %s' % role)
continue
if role not in languages[language]:
languages[language][role] = list()
languages[language][role].append(user_id)
sorted_key = sorted(languages, reverse=True,
key=lambda k: len(languages[k]['translators']))
result = OrderedDict((k, languages[k]) for k in sorted_key)
return result
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-o", "--output-file",
default="translation_team.yaml",
help=("Specify the output file. "
"Default: translation_team.yaml"))
options = parser.parse_args()
output_file = options.output_file
data = collect_zanata_language_and_members()
save_to_yaml(data, output_file)
print("output is saved to filename: %s" % output_file)