#!/usr/bin/env python3 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import collections import csv import datetime from datetime import timedelta import io import json import logging import os import random import re import requests import sys from typing import Optional from weblate_records import WeblateComponentInfo from weblate_records import WeblateObjectStats from weblate_records import WeblateProjectStats from weblate_records import WeblateUserInfo from weblate_records import WeblateUserStats from WeblateUtils import IniConfig from wlc import Weblate import yaml WEBLATE_HOST = "https://openstack.weblate.cloud" EXAMPLE_HOST = "http://weblate.example.com" WEBLATE_URI = WEBLATE_HOST + "/%s" LOG = logging.getLogger("weblate_stats") WEBLATE_VER_EXPR = r"^(master[-,a-z]*|stable-[a-z]+|openstack-user-survey)$" WEBLATE_VER_PATTERN = re.compile(WEBLATE_VER_EXPR) DEFAULT_STATS = { "translated": 0, "approved": 0, "needReview": 0, "fuzzy": 0, "failingCheck": 0, "pending": 0, } class WeblateUtility(object): """Utilities to invoke Weblate REST API. https://docs.weblate.org/en/weblate-4.18.2/api.html#projects https://docs.weblate.org/en/weblate-4.18.2/api.html#get--api-users-(str-username)-statistics- """ user_agents = [ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/32.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_6) AppleWebKit/537.78.2", "Mozilla/5.0 (Windows NT 6.3; WOW64) Gecko/20100101 Firefox/32.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/37.0.2062.120", "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko", ] def __init__( self, wconfig, verify: bool = True, accept: str = "application/json, text/javascript", content_type: str = "application/json", ): self.url, self.key = wconfig.url, wconfig.key self.weblate_obj = Weblate() self.headers = { "Accept": accept, "Content-Type": content_type, "Authorization": "Token " + self.key, } self.verify = verify def _unify(self, locale: str) -> str: return locale.replace("-", "").replace("_", "").lower() # API REQUEST def read_uri(self, uri, headers): try: headers["User-Agent"] = random.choice(WeblateUtility.user_agents) req = requests.get(uri, headers=headers) return req.text except Exception as e: LOG.error('Error "%s" while reading uri %s', (e, uri)) raise def read_json_from_uri(self, uri): data = self.read_uri(uri, self.headers) try: return json.loads(data) except Exception as e: LOG.error( 'Error "%(error)s" parsing json from uri %(uri)s', {"error": e, "uri": uri}, ) raise def get_projects(self, **kargs) -> list: uri = WEBLATE_URI % ("api/projects/") if "url" in kargs: uri = kargs.get("url") uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) LOG.debug("Reading projects from %s" % uri) projects_data = self.read_json_from_uri(uri) return projects_data["results"] def get_project_statistics(self, project_slug: str, **kargs): uri = WEBLATE_URI % ("api/projects/%s/statistics/" % (project_slug)) if "url" in kargs: uri = kargs.get("url") uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) LOG.debug("Reading project statistics from %s" % uri) project_statistics_data = self.read_json_from_uri(uri) return WeblateProjectStats.from_dict(project_statistics_data) def get_object_statistics(self, obj: str, **kargs): uri = WEBLATE_URI % ("api/%s/statistics/" % (obj)) if "url" in kargs: uri = kargs.get("url") uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) LOG.debug("Reading object statistics from %s" % uri) object_data = self.read_json_from_uri(uri) return WeblateObjectStats.from_dict(object_data) def get_users(self, **kargs) -> list: uri = WEBLATE_URI % ("api/users/") if "url" in kargs: uri = kargs.get("url") uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) LOG.debug("Reading users from %s" % uri) users_data = self.read_json_from_uri(uri) return users_data["results"] def get_user(self, username: str, **kargs): uri = WEBLATE_URI % ("api/users/%s/" % (username)) if "url" in kargs: uri = kargs.get("url") uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) LOG.debug("Reading user from %s" % uri) user_data = self.read_json_from_uri(uri) return user_data def get_user_statistics(self, username: str, **kargs): uri = WEBLATE_URI % ("api/users/%s/statistics/" % (username)) if "url" in kargs: uri = kargs.get("url") uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) LOG.debug("Reading user statistics from %s" % uri) user_data = self.read_json_from_uri(uri) return WeblateUserStats.from_dict(user_data) def get_group(self, group_id: int, **kargs): uri = WEBLATE_URI % ("api/groups/%s/" % (group_id)) if "url" in kargs: uri = kargs.get("url") uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) LOG.debug("Reading group from %s" % uri) group_data = self.read_json_from_uri(uri) return WeblateUserInfo.from_dict(group_data) def get_component( self, project: str, component: str, **kargs ) -> WeblateComponentInfo: uri = WEBLATE_URI % ("api/components/%s/%s/" % (project, component)) if "url" in kargs: uri = kargs.get("url") uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) LOG.debug("Reading component from %s" % uri) component_data = self.read_json_from_uri(uri) return WeblateComponentInfo.from_dict(component_data) def get_translation_changes( self, project: str, component: str, lang: str, **kargs ) -> list: uri: str = WEBLATE_URI % ( "api/translations/%s/%s/%s/changes/" % (project, component, lang) ) if "url" in kargs: uri = kargs.get("url") uri = WEBLATE_HOST + uri.split(EXAMPLE_HOST)[1] uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) if "checksum" in kargs: uri += "?checksum=%s" % (kargs.get("checksum")) LOG.debug("Reading translation changes from %s" % uri) translation_data = self.read_json_from_uri(uri) return translation_data def get_translations( self, project: str, component: str, language: str, **kargs ) -> Optional[dict]: uri: str = WEBLATE_URI % ( "api/translations/%s/%s/%s/" % (project, component, language) ) checksum: str = "" if "?checksum" in kargs.get("checksum"): checksum = kargs.get("checksum").split("?checksum=")[1] if "url" in kargs: uri = kargs.get("url") uri = WEBLATE_HOST + uri.split(EXAMPLE_HOST)[1] uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) if "checksum" in kargs: uri += "?checksum=%s" % (checksum) LOG.debug("Reading translation from %s" % uri) translation_data = self.read_json_from_uri(uri) langs = set() langs.add(self._unify(translation_data["language"]["code"])) langs.add(self._unify(translation_data["language"]["name"])) langs.add(self._unify(translation_data["language_code"])) for lang in translation_data["language"]["aliases"]: langs.add(self._unify(lang)) user_lang = self._unify(kargs.get("user_lang")) if "user_lang" in kargs and user_lang not in langs: return None return translation_data def get_change(self, id: int, **kargs) -> dict: uri = WEBLATE_URI % ("api/changes/%d/" % (id)) if "url" in kargs: uri = kargs.get("url") uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) LOG.debug("Reading change from %s" % uri) change_data = self.read_json_from_uri(uri) return change_data def get_units(self, start_datetime: str, end_datetime: str, **kargs): change_date_query = "?q=changed:>=%s AND changed:<=%s " % ( start_datetime, end_datetime, ) uri = WEBLATE_URI % ("api/units/%s" % (change_date_query)) if "url" in kargs: uri = kargs.get("url") uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1) LOG.debug("Reading units from %s" % uri) units_data = self.read_json_from_uri(uri) return units_data["results"] class LanguageTeam(object): def __init__(self, language_code, team_info): self.language_code = language_code self.language = team_info["language"] # Weblate ID which only consists of numbers is a valid ID in Weblate # Such entry is interpreted as integer unless it is quoted # in the YAML file. Exnsure to stringify them. self.translators = [str(i) for i in team_info["translators"]] self.reviewers = [str(i) for i in team_info.get("reviewers", [])] self.coordinators = [str(i) for i in team_info.get("coordinators", [])] @classmethod def load_from_language_team_yaml(cls, trans_team_uri, lang_list): LOG.debug("Process list of language team from uri: %s", trans_team_uri) content = yaml.safe_load(io.open(trans_team_uri, "r")) if lang_list: lang_notfound = [ lang_code for lang_code in lang_list if lang_code not in content ] if lang_notfound: LOG.error( "Language %s not tound in %s.", ", ".join(lang_notfound), trans_team_uri, ) sys.exit(1) return [ cls(lang_code, team_info) for lang_code, team_info in content.items() if not lang_list or lang_code in lang_list ] class User(object): trans_fields = [ "translated", "approved", "needReview", "fuzzy", "failingCheck", "pending", ] review_fields = ["total", "approved"] # Todo def __init__(self, user_id, language_code): self.user_id = user_id self.lang = language_code self.stats = collections.defaultdict(dict) def __str__(self): return "<%s: user_id=%s, lang=%s, stats=%s" % ( self.__class__.__name__, self.user_id, self.lang, self.stats, ) # def __repr__(self): # return repr(self.convert_to_serializable_data()) def __lt__(self, other): if self.lang != other.lang: return self.lang < other.lang else: return self.user_id < other.user_id def needs_output(self, include_no_activities): if include_no_activities: return True return bool(self.stats) and all(self.stats.values()) @staticmethod def get_flattened_data_title(): return [ "user_id", "main_lang", "translated", "needReview", "approved", "fuzzy", "failingCheck", "pending", ] def convert_to_flattened_data(self, detail=False): data = [] for stat, count in self.stats.items(): if detail: data.append( [self.user_id, self.lang] + [count for k in self.trans_fields] ) stat_sum: int = sum([self.stats[k] for k in self.trans_fields]) if stat_sum > 0: data.append( [self.user_id, self.lang] + [self.stats[k] for k in self.trans_fields] ) return data def write_stats_to_file(users, output_file, include_no_activities, detail): before_sort = [] for user in users: if not user.stats.keys(): user.stats = DEFAULT_STATS if user.needs_output(include_no_activities): before_sort.append(user) users = sorted(before_sort) _write_stats_to_csvfile(users, output_file, detail) LOG.info("Stats has been written to %s", output_file) def _write_stats_to_csvfile(users, output_file, detail): with open(output_file, "w") as csvfile: writer = csv.writer(csvfile) writer.writerow(User.get_flattened_data_title()) for user in users: writer.writerows(user.convert_to_flattened_data(detail)) def _comma_separated_list(s): return s.split(",") def main(): # Loads weblate.ini configuration file try: wc = IniConfig(os.path.expanduser("~/.config/weblate.ini")) except ValueError as e: sys.exit(e) default_end_date = datetime.now() default_start_date = default_end_date - timedelta(days=180) default_start_date = default_start_date.strftime("%Y-%m-%d") default_end_date = default_end_date.strftime("%Y-%m-%d") parser = argparse.ArgumentParser() parser.add_argument( "-s", "--start-date", default=default_start_date, help=("Specify the start date. " "Default:%s" % default_start_date), ) parser.add_argument( "-e", "--end-date", default=default_end_date, help=("Specify the end date. " "Default:%s" % default_end_date), ) parser.add_argument( "-o", "--output-file", help=( "Specify the output file. " "Default: weblate_stats_output.csv." ), ) parser.add_argument( "-p", "--project", type=_comma_separated_list, help=( "Specify project(s). Comma-separated list. " "Otherwise all Weblate projects are processed." ), ) parser.add_argument( "-l", "--lang", type=_comma_separated_list, help=( "Specify language(s). Comma-separated list. " "Language code like zh-CN, ja needs to be used. " "Otherwise all languages are processed." ), ) parser.add_argument( "-t", "--target-version", type=_comma_separated_list, help=( "Specify version(s). Comma-separated list. " "Otherwise all available versions are " "processed." ), ) parser.add_argument( "-u", "--user", type=_comma_separated_list, help=( "Specify user(s). Comma-separated list. " "Otherwise all users are processed." ), ) parser.add_argument( "--detail", action="store_true", help=( "If specified, statistics per project " "and version are output in addition to " "total statistics." ), ) parser.add_argument( "--include-no-activities", action="store_true", help=( "If specified, stats for users with no " "activities are output as well." "By default, stats only for users with " "any activities are output." ), ) parser.add_argument( "--no-verify", action="store_false", dest="verify", help="Do not perform HTTPS certificate verification", ) parser.add_argument( "--debug", action="store_true", help="Enable debug message." ) parser.add_argument("user_yaml", help="YAML file of the user list") options = parser.parse_args() logging_level = logging.DEBUG if options.debug else logging.INFO formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s") handler = logging.StreamHandler() handler.setLevel(logging_level) handler.setFormatter(formatter) LOG.setLevel(logging_level) LOG.addHandler(handler) language_teams = LanguageTeam.load_from_language_team_yaml( options.user_yaml, options.lang ) versions = [v.replace("/", "-") for v in options.target_version or []] users = get_weblate_stats( wc, options.verify, options.start_date, options.end_date, language_teams, options.project, versions, options.user, ) output_file = "weblate_stats_output.%s" % options.format output_file = output_file or options.output_file write_stats_to_file( users, output_file, options.include_no_activities, options.detail, ) def get_weblate_stats( wc, verify, start_date, end_date, language_teams, project_list, version_list, user_list, ): LOG.info( "Getting Weblate contributors statistics (from %s to %s) ...", start_date, end_date, ) weblateUtil = WeblateUtility(wc, verify) if not project_list: project_list = weblateUtil.get_projects() users = [] for team in language_teams: users += [User(user_id, team.language_code) for user_id in team.translators] data = dict() for user in users: if user_list and user.user_id not in user_list: continue user_data = weblateUtil.get_user(user.user_id) if "detail" in user_data and user_data["detail"] == "Not found.": continue LOG.info( "Getting for user %(user_id)s %(user_lang)s", {"user_id": user.user_id, "user_lang": user.lang}, ) unit_data = weblateUtil.get_units(start_date, end_date) for unit in unit_data: translation_data = weblateUtil.get_translations( None, None, None, url=unit["translation"], checksum=unit["web_url"], user_lang=user.lang, ) if translation_data is None: continue user_full_name = translation_data["last_author"] if user_full_name not in data: data[user_full_name] = dict() data[user_full_name]["translated"] = 0 data[user_full_name]["approved"] = 0 data[user_full_name]["needReview"] = 0 data[user_full_name]["fuzzy"] = 0 data[user_full_name]["failingCheck"] = 0 data[user_full_name]["pending"] = 0 if unit["translated"]: data[user_full_name]["translated"] += 1 if unit["approved"]: data[user_full_name]["approved"] += 1 if unit["has_suggestion"] or unit["has_comment"]: data[user_full_name]["needReview"] += 1 if unit["fuzzy"]: data[user_full_name]["fuzzy"] += 1 if unit["has_failing_check"]: data[user_full_name]["failingCheck"] += 1 if unit["pending"]: data[user_full_name]["pending"] += 1 LOG.debug("Got: %s", data) user.stats = data.get(user_full_name) return users if __name__ == "__main__": main()