Add scripts to get translators' statistic data from Zanata
Usage: zanata_stats.py -s 2015-08-01 -e 2016-01-31 translation_team.yaml zanata_stats.py will query translator's contributions in words, including the translated translations, approved translations and rejected translations, in a certain period of all projects and all versions, and then save the data in a csv file. translation_team.yaml stores all translation team and translators information. Co-Authored-By: Akihiro Motoki <amotoki@gmail.com> Change-Id: If7b92cb2e7ccc3e2e59d977976d7c5d726e606dd
This commit is contained in:
parent
639f653d71
commit
73a36041db
|
@ -0,0 +1,241 @@
|
|||
zh-CN:
|
||||
coordinators:
|
||||
- Ying Chun Guo (daisyycguo)
|
||||
language:
|
||||
- Chinese (China)
|
||||
translators:
|
||||
- actionchen
|
||||
- aeng
|
||||
- chenli
|
||||
- ccheng
|
||||
- coffee
|
||||
- daisyycguo
|
||||
- zhugaoxiao
|
||||
- guoshan
|
||||
- fionahuang
|
||||
- irina
|
||||
- jimmyli
|
||||
- liyucai
|
||||
- linwwu
|
||||
- liuchaoxin
|
||||
- liujunpeng
|
||||
- lapalm
|
||||
- lvfyongfeng
|
||||
- macjack
|
||||
- meteor
|
||||
- rustinpeace
|
||||
- tianzhong
|
||||
- fifieldt
|
||||
- tfu
|
||||
- xuyang
|
||||
- yanheven
|
||||
- yanghy
|
||||
- yuzg
|
||||
- huyupeng
|
||||
- zhangjingwen
|
||||
- zhaochao
|
||||
- johnwoo_lee
|
||||
reviewers:
|
||||
- daisyycguo
|
||||
ja:
|
||||
coordinators:
|
||||
- to222
|
||||
- amotoki
|
||||
language:
|
||||
- Japanese
|
||||
translators:
|
||||
- amotoki
|
||||
- yosshy
|
||||
- nanodayo
|
||||
- hidekazuna
|
||||
- to222
|
||||
- sasukeh
|
||||
- macjack
|
||||
- masmiyam
|
||||
- mmasaki
|
||||
- myamamot
|
||||
- yuanying
|
||||
- ststake
|
||||
- shinya_kwbt
|
||||
- shu
|
||||
- tjcocozz
|
||||
- fifieldt
|
||||
- ykatabam
|
||||
- yuta_hono
|
||||
reviewers:
|
||||
- amotoki
|
||||
- to222
|
||||
- ykatabam
|
||||
- myamamot
|
||||
- yosshy
|
||||
ko-KR:
|
||||
coordinators:
|
||||
- Sungjin Kang (ujuc)
|
||||
language:
|
||||
- Korean (South Korea)
|
||||
translators:
|
||||
- gemma
|
||||
- dbgong
|
||||
- chuinggun
|
||||
- gyahoo617
|
||||
- openstack
|
||||
- ianychoi
|
||||
- jaygtv
|
||||
- whitekjs
|
||||
- jennie
|
||||
- ryan
|
||||
- powhapki
|
||||
- kiseok7
|
||||
- kepark07
|
||||
- leegy1
|
||||
- lapalm
|
||||
- macjack
|
||||
- choi
|
||||
- ujuc
|
||||
- frontjang
|
||||
- wonki
|
||||
reviewers:
|
||||
- ujuc
|
||||
fr:
|
||||
coordinators:
|
||||
- jftalta
|
||||
language:
|
||||
- French
|
||||
translators:
|
||||
- adri2000
|
||||
- nokostya
|
||||
- liced
|
||||
- croe
|
||||
- corin
|
||||
- seveillard
|
||||
- fdotfr
|
||||
- gael_rehault
|
||||
- pat
|
||||
- jftalta
|
||||
- jfenal
|
||||
- kodokuu
|
||||
- zigmax
|
||||
- obuisson
|
||||
- paulgonin
|
||||
- romainsc
|
||||
- gonarys
|
||||
- henris
|
||||
- eouzans
|
||||
- tjcocozz
|
||||
zh-TW:
|
||||
coordinators:
|
||||
- zero00072
|
||||
language:
|
||||
- Chinese (Taiwan)
|
||||
translators:
|
||||
- bokaitseng
|
||||
- ccheng
|
||||
- danny
|
||||
- elliswu
|
||||
- fionahuang
|
||||
- jameslee
|
||||
- kyle
|
||||
- t09sunny
|
||||
- lapalm
|
||||
- macjack
|
||||
- mikeli
|
||||
- rico
|
||||
- fifieldt
|
||||
- webberguo
|
||||
- willychen
|
||||
- xuyang
|
||||
- max821214
|
||||
- zero00072
|
||||
pt-BR:
|
||||
coordinators:
|
||||
- marcelodieder
|
||||
language:
|
||||
- Portuguese (Brazil)
|
||||
translators:
|
||||
- nokostya
|
||||
- andrecampos
|
||||
- fcpimenta
|
||||
- gabrielcw
|
||||
- josemello
|
||||
- lucasagomes
|
||||
- lapalm
|
||||
- marcelodieder
|
||||
- mariaandrada
|
||||
- raildomascena
|
||||
- rgmorales
|
||||
- rbraga
|
||||
- sanporci
|
||||
es:
|
||||
coordinators:
|
||||
- mariantb
|
||||
language:
|
||||
- Spanish
|
||||
translators:
|
||||
- hybridpollo
|
||||
- albertomolina
|
||||
- camunoz
|
||||
- egongu90
|
||||
- toxickore
|
||||
- gguerrer
|
||||
- mariantb
|
||||
- iranzo
|
||||
- pnavarro
|
||||
- tjcocozz
|
||||
- travisn
|
||||
- vresy
|
||||
ru:
|
||||
coordinators:
|
||||
- adiantum
|
||||
language:
|
||||
- Russian
|
||||
translators:
|
||||
- antoniok
|
||||
- v12aml
|
||||
- doug_fish
|
||||
- ftarasenko
|
||||
- sfilatov
|
||||
- adiantum
|
||||
- shakhat
|
||||
- kzaitsev
|
||||
- lapalm
|
||||
- nburtsev
|
||||
de:
|
||||
coordinators:
|
||||
- rsimai
|
||||
language:
|
||||
- German
|
||||
translators:
|
||||
- jaegerandi
|
||||
- cduch
|
||||
- atalanttore
|
||||
- eblock
|
||||
- eumel8
|
||||
- lstemmle
|
||||
- spielkind
|
||||
- rsimai
|
||||
- tjcocozz
|
||||
it:
|
||||
coordinators:
|
||||
- daisyycguo
|
||||
language:
|
||||
- Italian
|
||||
translators:
|
||||
- alessandra
|
||||
- daisyycguo
|
||||
- fpezzell
|
||||
- faber66
|
||||
- miumiento
|
||||
- matgand
|
||||
- tjcocozz
|
||||
tr-TR:
|
||||
coordinators:
|
||||
- mucahit
|
||||
language:
|
||||
- Turkish (Turkey)
|
||||
translators:
|
||||
- doug_fish
|
||||
- duyarli
|
||||
- halit
|
||||
- isbaran
|
||||
- mucahit
|
||||
- oguzy
|
|
@ -0,0 +1,270 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import datetime
|
||||
import io
|
||||
import json
|
||||
import operator
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
|
||||
from oslo_log import log as logging
|
||||
import six
|
||||
import yaml
|
||||
|
||||
ZANATA_URI = 'https://translate.openstack.org/rest/%s'
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
ZANATA_VERSION_PATTERN = re.compile(r'^(master|stable-[a-z]+)$')
|
||||
|
||||
|
||||
class ZanataUtility(object):
|
||||
"""Utilities to invoke Zanata REST API."""
|
||||
user_agents = [
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/32.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_6) AppleWebKit/537.78.2',
|
||||
'Mozilla/5.0 (Windows NT 6.3; WOW64) Gecko/20100101 Firefox/32.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/37.0.2062.120',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
||||
]
|
||||
|
||||
def read_uri(self, uri, headers):
|
||||
try:
|
||||
headers['User-Agent'] = random.choice(ZanataUtility.user_agents)
|
||||
req = six.moves.urllib.request.Request(url=uri, headers=headers)
|
||||
fd = six.moves.urllib.request.urlopen(req)
|
||||
raw = fd.read()
|
||||
fd.close()
|
||||
return raw
|
||||
except Exception as e:
|
||||
print('exception happen', e)
|
||||
LOG.warn('Error "%(error)s" while reading uri %(uri)s',
|
||||
{'error': e, 'uri': uri})
|
||||
|
||||
def read_json_from_uri(self, uri):
|
||||
try:
|
||||
data = self.read_uri(uri, {'Accept': 'application/json'})
|
||||
return json.loads(data)
|
||||
except Exception as e:
|
||||
LOG.warn('Error "%(error)s" parsing json from uri %(uri)s',
|
||||
{'error': e, 'uri': uri})
|
||||
|
||||
def zanata_get_projects(self):
|
||||
uri = ZANATA_URI % ('projects')
|
||||
LOG.debug("Reading projects from %s" % uri)
|
||||
projects_data = self.read_json_from_uri(uri)
|
||||
for project in projects_data:
|
||||
yield project['id']
|
||||
|
||||
def _is_valid_version(self, version):
|
||||
return bool(ZANATA_VERSION_PATTERN.match(version))
|
||||
|
||||
def zanata_get_project_versions(self, project_id):
|
||||
uri = ZANATA_URI % ('projects/p/%s' % project_id)
|
||||
LOG.debug("Reading iterations for project %s" % project_id)
|
||||
project_data = self.read_json_from_uri(uri)
|
||||
if ('iterations' in project_data):
|
||||
for interation_data in project_data['iterations']:
|
||||
if self._is_valid_version(interation_data['id']):
|
||||
yield interation_data['id']
|
||||
else:
|
||||
yield None
|
||||
|
||||
def zanata_get_user_stats(self, project_id, iteration_id, zanata_user_id,
|
||||
start_date, end_date):
|
||||
uri = ZANATA_URI % ('stats/project/%s/version/%s/contributor/%s/%s..%s'
|
||||
% (project_id, iteration_id, zanata_user_id,
|
||||
start_date, end_date))
|
||||
return self.read_json_from_uri(uri)
|
||||
|
||||
|
||||
def _make_language_team(name, team_info):
|
||||
return {
|
||||
'tag': 'language_team',
|
||||
'language_code': name,
|
||||
'language': team_info['language'],
|
||||
'translators': team_info['translators'],
|
||||
'reviewers': team_info.get('reviewers', []),
|
||||
'coordinators': team_info.get('coordinators', []),
|
||||
}
|
||||
|
||||
|
||||
def _make_user(user_id, language_code):
|
||||
return {
|
||||
'user_id': user_id,
|
||||
'lang': language_code,
|
||||
'translated': 0,
|
||||
'approved': 0,
|
||||
'rejected': 0
|
||||
}
|
||||
|
||||
|
||||
def read_language_team_yaml(translation_team_uri, lang_list):
|
||||
LOG.debug('Process list of language team from uri: %s',
|
||||
translation_team_uri)
|
||||
|
||||
content = yaml.safe_load(io.open(translation_team_uri, 'r'))
|
||||
language_teams = {}
|
||||
|
||||
if lang_list:
|
||||
lang_notfound = [lang_code for lang_code in lang_list
|
||||
if lang_code not in content]
|
||||
if lang_notfound:
|
||||
print('Language %s not tound in %s.' %
|
||||
(', '.join(lang_notfound),
|
||||
translation_team_uri))
|
||||
sys.exit(1)
|
||||
|
||||
for lang_code, team_info in content.items():
|
||||
if lang_list and lang_code not in lang_list:
|
||||
continue
|
||||
language_teams[lang_code] = _make_language_team(lang_code, team_info)
|
||||
|
||||
return language_teams
|
||||
|
||||
|
||||
def get_zanata_stats(start_date, end_date, language_teams, project_list):
|
||||
print('Getting Zanata contributors statistics (from %s to %s) ...' %
|
||||
(start_date, end_date))
|
||||
zanataUtil = ZanataUtility()
|
||||
users = {}
|
||||
for language_code in language_teams:
|
||||
language_team = language_teams[language_code]
|
||||
for user in language_team['translators']:
|
||||
users[user] = _make_user(user, language_code)
|
||||
|
||||
if not project_list:
|
||||
project_list = zanataUtil.zanata_get_projects()
|
||||
for project_id in project_list:
|
||||
for version in zanataUtil.zanata_get_project_versions(project_id):
|
||||
for user_id in users:
|
||||
user = users.get(user_id)
|
||||
print('Getting %(project_id)s %(version)s '
|
||||
'for user %(user_id)s %(user_lang)s'
|
||||
% {'project_id': project_id,
|
||||
'version': version,
|
||||
'user_id': user_id,
|
||||
'user_lang': user['lang']})
|
||||
statisticdata = zanataUtil.zanata_get_user_stats(
|
||||
project_id, version, user_id, start_date, end_date)
|
||||
if statisticdata:
|
||||
user_contributes = statisticdata[user_id]
|
||||
if (user['lang'] in user_contributes):
|
||||
user_stat = user_contributes[user['lang']]
|
||||
user['translated'] += int(user_stat['translated'])
|
||||
user['approved'] += int(user_stat['approved'])
|
||||
user['rejected'] += int(user_stat['rejected'])
|
||||
|
||||
return users
|
||||
|
||||
|
||||
def write_stats_to_file(users, output_file, file_format,
|
||||
include_no_activities):
|
||||
stats = [user for user in
|
||||
sorted(users.values(), key=operator.itemgetter('lang', 'user_id'))
|
||||
if _needs_output(include_no_activities, user)]
|
||||
if file_format == 'csv':
|
||||
_write_stats_to_csvfile(stats, output_file)
|
||||
else:
|
||||
_write_stats_to_jsonfile(stats, output_file)
|
||||
print('Stats has been written to %s' % output_file)
|
||||
|
||||
|
||||
def _needs_output(include_no_activities, user):
|
||||
if include_no_activities:
|
||||
return True
|
||||
elif user['translated'] or user['approved'] or user['rejected']:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def _write_stats_to_csvfile(stats, output_file):
|
||||
with open(output_file, 'wb') as csvfile:
|
||||
writer = csv.writer(csvfile)
|
||||
writer.writerow(['user_id', 'lang',
|
||||
'translated', 'approved', 'rejected'])
|
||||
for stat in stats:
|
||||
writer.writerow([stat['user_id'], stat['lang'],
|
||||
stat['translated'], stat['approved'],
|
||||
stat['rejected']])
|
||||
|
||||
|
||||
def _write_stats_to_jsonfile(stats, output_file):
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(json.dumps(stats, indent=4))
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
default_end_date = datetime.datetime.now()
|
||||
default_start_date = default_end_date - datetime.timedelta(days=180)
|
||||
default_start_date = default_start_date.strftime('%Y-%m-%d')
|
||||
default_end_date = default_end_date.strftime('%Y-%m-%d')
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-s", "--start-date",
|
||||
default=default_start_date,
|
||||
help=("Specify the start date. "
|
||||
"Default:%s" % default_start_date))
|
||||
parser.add_argument("-e", "--end-date",
|
||||
default=default_end_date,
|
||||
help=("Specify the end date. "
|
||||
"Default:%s" % default_end_date))
|
||||
parser.add_argument("-o", "--output-file",
|
||||
help=("Specify the output file. "
|
||||
"Default: zanata_stats_output.{csv,json}."))
|
||||
parser.add_argument("-p", "--project",
|
||||
default='',
|
||||
help=("Specify project(s). Comma-separated list. "
|
||||
"Otherwise all Zanata projects are processed."))
|
||||
parser.add_argument("-l", "--lang",
|
||||
default='',
|
||||
help=("Specify language(s). Comma-separated list. "
|
||||
"Language code like zh-CN, ja needs to be used. "
|
||||
"Otherwise all languages are processed."))
|
||||
parser.add_argument("--include-no-activities",
|
||||
action='store_true',
|
||||
help=("If specified, stats for users with no "
|
||||
"activities are output as well."
|
||||
"By default, stats only for users with "
|
||||
"any activities are output."))
|
||||
parser.add_argument("-f", "--format",
|
||||
default='csv', choices=['csv', 'json'],
|
||||
help="Output file format.")
|
||||
parser.add_argument("user_yaml",
|
||||
help="YAML file of the user list")
|
||||
options = parser.parse_args()
|
||||
|
||||
project_list = options.project.split(',')
|
||||
lang_list = options.lang.split(',')
|
||||
|
||||
language_teams = read_language_team_yaml(options.user_yaml, lang_list)
|
||||
|
||||
users = get_zanata_stats(options.start_date, options.end_date,
|
||||
language_teams, project_list)
|
||||
|
||||
if not options.output_file:
|
||||
output_file = 'zanata_stats_output.%s' % options.format
|
||||
|
||||
write_stats_to_file(users, output_file, options.format,
|
||||
options.include_no_activities)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue