Add scripts to get translators' statistic data from Zanata

Usage: zanata_stats.py -s 2015-08-01 -e 2016-01-31 translation_team.yaml

zanata_stats.py will query translator's contributions in words,
including the translated translations, approved translations
and rejected translations, in a certain period of all projects and all
versions, and then save the data in a csv file.

translation_team.yaml stores all translation team and translators information.

Co-Authored-By: Akihiro Motoki <amotoki@gmail.com>
Change-Id: If7b92cb2e7ccc3e2e59d977976d7c5d726e606dd
This commit is contained in:
daisy-ycguo 2016-02-02 19:44:37 +08:00 committed by Akihiro Motoki
parent 639f653d71
commit 73a36041db
2 changed files with 511 additions and 0 deletions

View File

@ -0,0 +1,241 @@
zh-CN:
coordinators:
- Ying Chun Guo (daisyycguo)
language:
- Chinese (China)
translators:
- actionchen
- aeng
- chenli
- ccheng
- coffee
- daisyycguo
- zhugaoxiao
- guoshan
- fionahuang
- irina
- jimmyli
- liyucai
- linwwu
- liuchaoxin
- liujunpeng
- lapalm
- lvfyongfeng
- macjack
- meteor
- rustinpeace
- tianzhong
- fifieldt
- tfu
- xuyang
- yanheven
- yanghy
- yuzg
- huyupeng
- zhangjingwen
- zhaochao
- johnwoo_lee
reviewers:
- daisyycguo
ja:
coordinators:
- to222
- amotoki
language:
- Japanese
translators:
- amotoki
- yosshy
- nanodayo
- hidekazuna
- to222
- sasukeh
- macjack
- masmiyam
- mmasaki
- myamamot
- yuanying
- ststake
- shinya_kwbt
- shu
- tjcocozz
- fifieldt
- ykatabam
- yuta_hono
reviewers:
- amotoki
- to222
- ykatabam
- myamamot
- yosshy
ko-KR:
coordinators:
- Sungjin Kang (ujuc)
language:
- Korean (South Korea)
translators:
- gemma
- dbgong
- chuinggun
- gyahoo617
- openstack
- ianychoi
- jaygtv
- whitekjs
- jennie
- ryan
- powhapki
- kiseok7
- kepark07
- leegy1
- lapalm
- macjack
- choi
- ujuc
- frontjang
- wonki
reviewers:
- ujuc
fr:
coordinators:
- jftalta
language:
- French
translators:
- adri2000
- nokostya
- liced
- croe
- corin
- seveillard
- fdotfr
- gael_rehault
- pat
- jftalta
- jfenal
- kodokuu
- zigmax
- obuisson
- paulgonin
- romainsc
- gonarys
- henris
- eouzans
- tjcocozz
zh-TW:
coordinators:
- zero00072
language:
- Chinese (Taiwan)
translators:
- bokaitseng
- ccheng
- danny
- elliswu
- fionahuang
- jameslee
- kyle
- t09sunny
- lapalm
- macjack
- mikeli
- rico
- fifieldt
- webberguo
- willychen
- xuyang
- max821214
- zero00072
pt-BR:
coordinators:
- marcelodieder
language:
- Portuguese (Brazil)
translators:
- nokostya
- andrecampos
- fcpimenta
- gabrielcw
- josemello
- lucasagomes
- lapalm
- marcelodieder
- mariaandrada
- raildomascena
- rgmorales
- rbraga
- sanporci
es:
coordinators:
- mariantb
language:
- Spanish
translators:
- hybridpollo
- albertomolina
- camunoz
- egongu90
- toxickore
- gguerrer
- mariantb
- iranzo
- pnavarro
- tjcocozz
- travisn
- vresy
ru:
coordinators:
- adiantum
language:
- Russian
translators:
- antoniok
- v12aml
- doug_fish
- ftarasenko
- sfilatov
- adiantum
- shakhat
- kzaitsev
- lapalm
- nburtsev
de:
coordinators:
- rsimai
language:
- German
translators:
- jaegerandi
- cduch
- atalanttore
- eblock
- eumel8
- lstemmle
- spielkind
- rsimai
- tjcocozz
it:
coordinators:
- daisyycguo
language:
- Italian
translators:
- alessandra
- daisyycguo
- fpezzell
- faber66
- miumiento
- matgand
- tjcocozz
tr-TR:
coordinators:
- mucahit
language:
- Turkish (Turkey)
translators:
- doug_fish
- duyarli
- halit
- isbaran
- mucahit
- oguzy

270
tools/zanata/zanata_stats.py Executable file
View File

@ -0,0 +1,270 @@
#!/usr/bin/python
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import csv
import datetime
import io
import json
import operator
import random
import re
import sys
from oslo_log import log as logging
import six
import yaml
ZANATA_URI = 'https://translate.openstack.org/rest/%s'
LOG = logging.getLogger(__name__)
ZANATA_VERSION_PATTERN = re.compile(r'^(master|stable-[a-z]+)$')
class ZanataUtility(object):
"""Utilities to invoke Zanata REST API."""
user_agents = [
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/32.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_6) AppleWebKit/537.78.2',
'Mozilla/5.0 (Windows NT 6.3; WOW64) Gecko/20100101 Firefox/32.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/37.0.2062.120',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
]
def read_uri(self, uri, headers):
try:
headers['User-Agent'] = random.choice(ZanataUtility.user_agents)
req = six.moves.urllib.request.Request(url=uri, headers=headers)
fd = six.moves.urllib.request.urlopen(req)
raw = fd.read()
fd.close()
return raw
except Exception as e:
print('exception happen', e)
LOG.warn('Error "%(error)s" while reading uri %(uri)s',
{'error': e, 'uri': uri})
def read_json_from_uri(self, uri):
try:
data = self.read_uri(uri, {'Accept': 'application/json'})
return json.loads(data)
except Exception as e:
LOG.warn('Error "%(error)s" parsing json from uri %(uri)s',
{'error': e, 'uri': uri})
def zanata_get_projects(self):
uri = ZANATA_URI % ('projects')
LOG.debug("Reading projects from %s" % uri)
projects_data = self.read_json_from_uri(uri)
for project in projects_data:
yield project['id']
def _is_valid_version(self, version):
return bool(ZANATA_VERSION_PATTERN.match(version))
def zanata_get_project_versions(self, project_id):
uri = ZANATA_URI % ('projects/p/%s' % project_id)
LOG.debug("Reading iterations for project %s" % project_id)
project_data = self.read_json_from_uri(uri)
if ('iterations' in project_data):
for interation_data in project_data['iterations']:
if self._is_valid_version(interation_data['id']):
yield interation_data['id']
else:
yield None
def zanata_get_user_stats(self, project_id, iteration_id, zanata_user_id,
start_date, end_date):
uri = ZANATA_URI % ('stats/project/%s/version/%s/contributor/%s/%s..%s'
% (project_id, iteration_id, zanata_user_id,
start_date, end_date))
return self.read_json_from_uri(uri)
def _make_language_team(name, team_info):
return {
'tag': 'language_team',
'language_code': name,
'language': team_info['language'],
'translators': team_info['translators'],
'reviewers': team_info.get('reviewers', []),
'coordinators': team_info.get('coordinators', []),
}
def _make_user(user_id, language_code):
return {
'user_id': user_id,
'lang': language_code,
'translated': 0,
'approved': 0,
'rejected': 0
}
def read_language_team_yaml(translation_team_uri, lang_list):
LOG.debug('Process list of language team from uri: %s',
translation_team_uri)
content = yaml.safe_load(io.open(translation_team_uri, 'r'))
language_teams = {}
if lang_list:
lang_notfound = [lang_code for lang_code in lang_list
if lang_code not in content]
if lang_notfound:
print('Language %s not tound in %s.' %
(', '.join(lang_notfound),
translation_team_uri))
sys.exit(1)
for lang_code, team_info in content.items():
if lang_list and lang_code not in lang_list:
continue
language_teams[lang_code] = _make_language_team(lang_code, team_info)
return language_teams
def get_zanata_stats(start_date, end_date, language_teams, project_list):
print('Getting Zanata contributors statistics (from %s to %s) ...' %
(start_date, end_date))
zanataUtil = ZanataUtility()
users = {}
for language_code in language_teams:
language_team = language_teams[language_code]
for user in language_team['translators']:
users[user] = _make_user(user, language_code)
if not project_list:
project_list = zanataUtil.zanata_get_projects()
for project_id in project_list:
for version in zanataUtil.zanata_get_project_versions(project_id):
for user_id in users:
user = users.get(user_id)
print('Getting %(project_id)s %(version)s '
'for user %(user_id)s %(user_lang)s'
% {'project_id': project_id,
'version': version,
'user_id': user_id,
'user_lang': user['lang']})
statisticdata = zanataUtil.zanata_get_user_stats(
project_id, version, user_id, start_date, end_date)
if statisticdata:
user_contributes = statisticdata[user_id]
if (user['lang'] in user_contributes):
user_stat = user_contributes[user['lang']]
user['translated'] += int(user_stat['translated'])
user['approved'] += int(user_stat['approved'])
user['rejected'] += int(user_stat['rejected'])
return users
def write_stats_to_file(users, output_file, file_format,
include_no_activities):
stats = [user for user in
sorted(users.values(), key=operator.itemgetter('lang', 'user_id'))
if _needs_output(include_no_activities, user)]
if file_format == 'csv':
_write_stats_to_csvfile(stats, output_file)
else:
_write_stats_to_jsonfile(stats, output_file)
print('Stats has been written to %s' % output_file)
def _needs_output(include_no_activities, user):
if include_no_activities:
return True
elif user['translated'] or user['approved'] or user['rejected']:
return True
else:
return False
def _write_stats_to_csvfile(stats, output_file):
with open(output_file, 'wb') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['user_id', 'lang',
'translated', 'approved', 'rejected'])
for stat in stats:
writer.writerow([stat['user_id'], stat['lang'],
stat['translated'], stat['approved'],
stat['rejected']])
def _write_stats_to_jsonfile(stats, output_file):
with open(output_file, 'w') as f:
f.write(json.dumps(stats, indent=4))
def main():
default_end_date = datetime.datetime.now()
default_start_date = default_end_date - datetime.timedelta(days=180)
default_start_date = default_start_date.strftime('%Y-%m-%d')
default_end_date = default_end_date.strftime('%Y-%m-%d')
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--start-date",
default=default_start_date,
help=("Specify the start date. "
"Default:%s" % default_start_date))
parser.add_argument("-e", "--end-date",
default=default_end_date,
help=("Specify the end date. "
"Default:%s" % default_end_date))
parser.add_argument("-o", "--output-file",
help=("Specify the output file. "
"Default: zanata_stats_output.{csv,json}."))
parser.add_argument("-p", "--project",
default='',
help=("Specify project(s). Comma-separated list. "
"Otherwise all Zanata projects are processed."))
parser.add_argument("-l", "--lang",
default='',
help=("Specify language(s). Comma-separated list. "
"Language code like zh-CN, ja needs to be used. "
"Otherwise all languages are processed."))
parser.add_argument("--include-no-activities",
action='store_true',
help=("If specified, stats for users with no "
"activities are output as well."
"By default, stats only for users with "
"any activities are output."))
parser.add_argument("-f", "--format",
default='csv', choices=['csv', 'json'],
help="Output file format.")
parser.add_argument("user_yaml",
help="YAML file of the user list")
options = parser.parse_args()
project_list = options.project.split(',')
lang_list = options.lang.split(',')
language_teams = read_language_team_yaml(options.user_yaml, lang_list)
users = get_zanata_stats(options.start_date, options.end_date,
language_teams, project_list)
if not options.output_file:
output_file = 'zanata_stats_output.%s' % options.format
write_stats_to_file(users, output_file, options.format,
options.include_no_activities)
if __name__ == '__main__':
main()