From 73a36041dbdc45212051c60cbeef3f7783200fd2 Mon Sep 17 00:00:00 2001 From: daisy-ycguo Date: Tue, 2 Feb 2016 19:44:37 +0800 Subject: [PATCH] Add scripts to get translators' statistic data from Zanata Usage: zanata_stats.py -s 2015-08-01 -e 2016-01-31 translation_team.yaml zanata_stats.py will query translator's contributions in words, including the translated translations, approved translations and rejected translations, in a certain period of all projects and all versions, and then save the data in a csv file. translation_team.yaml stores all translation team and translators information. Co-Authored-By: Akihiro Motoki Change-Id: If7b92cb2e7ccc3e2e59d977976d7c5d726e606dd --- tools/zanata/translation_team.yaml | 241 +++++++++++++++++++++++++ tools/zanata/zanata_stats.py | 270 +++++++++++++++++++++++++++++ 2 files changed, 511 insertions(+) create mode 100644 tools/zanata/translation_team.yaml create mode 100755 tools/zanata/zanata_stats.py diff --git a/tools/zanata/translation_team.yaml b/tools/zanata/translation_team.yaml new file mode 100644 index 0000000..2d8efb8 --- /dev/null +++ b/tools/zanata/translation_team.yaml @@ -0,0 +1,241 @@ +zh-CN: + coordinators: + - Ying Chun Guo (daisyycguo) + language: + - Chinese (China) + translators: + - actionchen + - aeng + - chenli + - ccheng + - coffee + - daisyycguo + - zhugaoxiao + - guoshan + - fionahuang + - irina + - jimmyli + - liyucai + - linwwu + - liuchaoxin + - liujunpeng + - lapalm + - lvfyongfeng + - macjack + - meteor + - rustinpeace + - tianzhong + - fifieldt + - tfu + - xuyang + - yanheven + - yanghy + - yuzg + - huyupeng + - zhangjingwen + - zhaochao + - johnwoo_lee + reviewers: + - daisyycguo +ja: + coordinators: + - to222 + - amotoki + language: + - Japanese + translators: + - amotoki + - yosshy + - nanodayo + - hidekazuna + - to222 + - sasukeh + - macjack + - masmiyam + - mmasaki + - myamamot + - yuanying + - ststake + - shinya_kwbt + - shu + - tjcocozz + - fifieldt + - ykatabam + - yuta_hono + reviewers: + - amotoki + - to222 + - ykatabam + - myamamot + - yosshy +ko-KR: + coordinators: + - Sungjin Kang (ujuc) + language: + - Korean (South Korea) + translators: + - gemma + - dbgong + - chuinggun + - gyahoo617 + - openstack + - ianychoi + - jaygtv + - whitekjs + - jennie + - ryan + - powhapki + - kiseok7 + - kepark07 + - leegy1 + - lapalm + - macjack + - choi + - ujuc + - frontjang + - wonki + reviewers: + - ujuc +fr: + coordinators: + - jftalta + language: + - French + translators: + - adri2000 + - nokostya + - liced + - croe + - corin + - seveillard + - fdotfr + - gael_rehault + - pat + - jftalta + - jfenal + - kodokuu + - zigmax + - obuisson + - paulgonin + - romainsc + - gonarys + - henris + - eouzans + - tjcocozz +zh-TW: + coordinators: + - zero00072 + language: + - Chinese (Taiwan) + translators: + - bokaitseng + - ccheng + - danny + - elliswu + - fionahuang + - jameslee + - kyle + - t09sunny + - lapalm + - macjack + - mikeli + - rico + - fifieldt + - webberguo + - willychen + - xuyang + - max821214 + - zero00072 +pt-BR: + coordinators: + - marcelodieder + language: + - Portuguese (Brazil) + translators: + - nokostya + - andrecampos + - fcpimenta + - gabrielcw + - josemello + - lucasagomes + - lapalm + - marcelodieder + - mariaandrada + - raildomascena + - rgmorales + - rbraga + - sanporci +es: + coordinators: + - mariantb + language: + - Spanish + translators: + - hybridpollo + - albertomolina + - camunoz + - egongu90 + - toxickore + - gguerrer + - mariantb + - iranzo + - pnavarro + - tjcocozz + - travisn + - vresy +ru: + coordinators: + - adiantum + language: + - Russian + translators: + - antoniok + - v12aml + - doug_fish + - ftarasenko + - sfilatov + - adiantum + - shakhat + - kzaitsev + - lapalm + - nburtsev +de: + coordinators: + - rsimai + language: + - German + translators: + - jaegerandi + - cduch + - atalanttore + - eblock + - eumel8 + - lstemmle + - spielkind + - rsimai + - tjcocozz +it: + coordinators: + - daisyycguo + language: + - Italian + translators: + - alessandra + - daisyycguo + - fpezzell + - faber66 + - miumiento + - matgand + - tjcocozz +tr-TR: + coordinators: + - mucahit + language: + - Turkish (Turkey) + translators: + - doug_fish + - duyarli + - halit + - isbaran + - mucahit + - oguzy diff --git a/tools/zanata/zanata_stats.py b/tools/zanata/zanata_stats.py new file mode 100755 index 0000000..13985da --- /dev/null +++ b/tools/zanata/zanata_stats.py @@ -0,0 +1,270 @@ +#!/usr/bin/python + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import csv +import datetime +import io +import json +import operator +import random +import re +import sys + +from oslo_log import log as logging +import six +import yaml + +ZANATA_URI = 'https://translate.openstack.org/rest/%s' +LOG = logging.getLogger(__name__) + +ZANATA_VERSION_PATTERN = re.compile(r'^(master|stable-[a-z]+)$') + + +class ZanataUtility(object): + """Utilities to invoke Zanata REST API.""" + user_agents = [ + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/32.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_6) AppleWebKit/537.78.2', + 'Mozilla/5.0 (Windows NT 6.3; WOW64) Gecko/20100101 Firefox/32.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/37.0.2062.120', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko' + ] + + def read_uri(self, uri, headers): + try: + headers['User-Agent'] = random.choice(ZanataUtility.user_agents) + req = six.moves.urllib.request.Request(url=uri, headers=headers) + fd = six.moves.urllib.request.urlopen(req) + raw = fd.read() + fd.close() + return raw + except Exception as e: + print('exception happen', e) + LOG.warn('Error "%(error)s" while reading uri %(uri)s', + {'error': e, 'uri': uri}) + + def read_json_from_uri(self, uri): + try: + data = self.read_uri(uri, {'Accept': 'application/json'}) + return json.loads(data) + except Exception as e: + LOG.warn('Error "%(error)s" parsing json from uri %(uri)s', + {'error': e, 'uri': uri}) + + def zanata_get_projects(self): + uri = ZANATA_URI % ('projects') + LOG.debug("Reading projects from %s" % uri) + projects_data = self.read_json_from_uri(uri) + for project in projects_data: + yield project['id'] + + def _is_valid_version(self, version): + return bool(ZANATA_VERSION_PATTERN.match(version)) + + def zanata_get_project_versions(self, project_id): + uri = ZANATA_URI % ('projects/p/%s' % project_id) + LOG.debug("Reading iterations for project %s" % project_id) + project_data = self.read_json_from_uri(uri) + if ('iterations' in project_data): + for interation_data in project_data['iterations']: + if self._is_valid_version(interation_data['id']): + yield interation_data['id'] + else: + yield None + + def zanata_get_user_stats(self, project_id, iteration_id, zanata_user_id, + start_date, end_date): + uri = ZANATA_URI % ('stats/project/%s/version/%s/contributor/%s/%s..%s' + % (project_id, iteration_id, zanata_user_id, + start_date, end_date)) + return self.read_json_from_uri(uri) + + +def _make_language_team(name, team_info): + return { + 'tag': 'language_team', + 'language_code': name, + 'language': team_info['language'], + 'translators': team_info['translators'], + 'reviewers': team_info.get('reviewers', []), + 'coordinators': team_info.get('coordinators', []), + } + + +def _make_user(user_id, language_code): + return { + 'user_id': user_id, + 'lang': language_code, + 'translated': 0, + 'approved': 0, + 'rejected': 0 + } + + +def read_language_team_yaml(translation_team_uri, lang_list): + LOG.debug('Process list of language team from uri: %s', + translation_team_uri) + + content = yaml.safe_load(io.open(translation_team_uri, 'r')) + language_teams = {} + + if lang_list: + lang_notfound = [lang_code for lang_code in lang_list + if lang_code not in content] + if lang_notfound: + print('Language %s not tound in %s.' % + (', '.join(lang_notfound), + translation_team_uri)) + sys.exit(1) + + for lang_code, team_info in content.items(): + if lang_list and lang_code not in lang_list: + continue + language_teams[lang_code] = _make_language_team(lang_code, team_info) + + return language_teams + + +def get_zanata_stats(start_date, end_date, language_teams, project_list): + print('Getting Zanata contributors statistics (from %s to %s) ...' % + (start_date, end_date)) + zanataUtil = ZanataUtility() + users = {} + for language_code in language_teams: + language_team = language_teams[language_code] + for user in language_team['translators']: + users[user] = _make_user(user, language_code) + + if not project_list: + project_list = zanataUtil.zanata_get_projects() + for project_id in project_list: + for version in zanataUtil.zanata_get_project_versions(project_id): + for user_id in users: + user = users.get(user_id) + print('Getting %(project_id)s %(version)s ' + 'for user %(user_id)s %(user_lang)s' + % {'project_id': project_id, + 'version': version, + 'user_id': user_id, + 'user_lang': user['lang']}) + statisticdata = zanataUtil.zanata_get_user_stats( + project_id, version, user_id, start_date, end_date) + if statisticdata: + user_contributes = statisticdata[user_id] + if (user['lang'] in user_contributes): + user_stat = user_contributes[user['lang']] + user['translated'] += int(user_stat['translated']) + user['approved'] += int(user_stat['approved']) + user['rejected'] += int(user_stat['rejected']) + + return users + + +def write_stats_to_file(users, output_file, file_format, + include_no_activities): + stats = [user for user in + sorted(users.values(), key=operator.itemgetter('lang', 'user_id')) + if _needs_output(include_no_activities, user)] + if file_format == 'csv': + _write_stats_to_csvfile(stats, output_file) + else: + _write_stats_to_jsonfile(stats, output_file) + print('Stats has been written to %s' % output_file) + + +def _needs_output(include_no_activities, user): + if include_no_activities: + return True + elif user['translated'] or user['approved'] or user['rejected']: + return True + else: + return False + + +def _write_stats_to_csvfile(stats, output_file): + with open(output_file, 'wb') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(['user_id', 'lang', + 'translated', 'approved', 'rejected']) + for stat in stats: + writer.writerow([stat['user_id'], stat['lang'], + stat['translated'], stat['approved'], + stat['rejected']]) + + +def _write_stats_to_jsonfile(stats, output_file): + with open(output_file, 'w') as f: + f.write(json.dumps(stats, indent=4)) + + +def main(): + + default_end_date = datetime.datetime.now() + default_start_date = default_end_date - datetime.timedelta(days=180) + default_start_date = default_start_date.strftime('%Y-%m-%d') + default_end_date = default_end_date.strftime('%Y-%m-%d') + + parser = argparse.ArgumentParser() + parser.add_argument("-s", "--start-date", + default=default_start_date, + help=("Specify the start date. " + "Default:%s" % default_start_date)) + parser.add_argument("-e", "--end-date", + default=default_end_date, + help=("Specify the end date. " + "Default:%s" % default_end_date)) + parser.add_argument("-o", "--output-file", + help=("Specify the output file. " + "Default: zanata_stats_output.{csv,json}.")) + parser.add_argument("-p", "--project", + default='', + help=("Specify project(s). Comma-separated list. " + "Otherwise all Zanata projects are processed.")) + parser.add_argument("-l", "--lang", + default='', + help=("Specify language(s). Comma-separated list. " + "Language code like zh-CN, ja needs to be used. " + "Otherwise all languages are processed.")) + parser.add_argument("--include-no-activities", + action='store_true', + help=("If specified, stats for users with no " + "activities are output as well." + "By default, stats only for users with " + "any activities are output.")) + parser.add_argument("-f", "--format", + default='csv', choices=['csv', 'json'], + help="Output file format.") + parser.add_argument("user_yaml", + help="YAML file of the user list") + options = parser.parse_args() + + project_list = options.project.split(',') + lang_list = options.lang.split(',') + + language_teams = read_language_team_yaml(options.user_yaml, lang_list) + + users = get_zanata_stats(options.start_date, options.end_date, + language_teams, project_list) + + if not options.output_file: + output_file = 'zanata_stats_output.%s' % options.format + + write_stats_to_file(users, output_file, options.format, + options.include_no_activities) + + +if __name__ == '__main__': + main()