migrate zanata_stats to weblate_stats

- weblate_stats.py
- weblate_records.py
- __init__.py

story: 2010915
tast: 48768
Change-Id: Ie196e43f418e730d6fee7f0a107d198e7daacbc4
This commit is contained in:
Kuemjong 2023-09-23 18:08:53 +09:00
parent 66888cbd31
commit da63d01b52
3 changed files with 971 additions and 0 deletions

View File

View File

@ -0,0 +1,322 @@
#!/usr/bin/env python3
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABC
from dataclasses import dataclass, asdict
from typing import Optional
class AbstractWeblateRecord(ABC):
def to_dict(self):
return asdict(self)
@dataclass
class WeblateObjectStats(AbstractWeblateRecord):
"""Object statistics API response
weblate-4.18.2
GET /api/(str: object)/statistics/
"""
total: int
total_words: int
total_chars: int
last_change: str
translated: int
translated_percent: float
translated_words: int
translated_words_percent: float
translated_chars: int
translated_chars_percent: float
fuzzy: int
fuzzy_percent: float
failing: int
failing_percent: float
approved: int
approved_percent: float
readonly: int
readonly_percent: float
suggestions: int
comments: int
name: str
url: str
url_translate: str
code: str
@classmethod
def from_dict(cls, data_dict):
return cls(**data_dict)
@dataclass
class WeblateProject(AbstractWeblateRecord):
"""Project API response
weblate-4.18.2
GET /api/projects/
GET /api/projects/(str: project)/
GET /api/(str: project)/statistics/
"""
name: str
slug: str
web: str
components_list_url: str
repository_url: str
changes_list_url: str
translation_review: bool
source_review: bool
set_language_team: bool
enable_hooks: bool
instructions: str
language_aliases: str
@classmethod
def from_dict(cls, data_dict):
return cls(**data_dict)
@dataclass
class WeblateProjectStats(AbstractWeblateRecord):
"""Project statistics API response
weblate-4.18.2
GET /api/(str: project)/statistics/
"""
total: int
total_words: int
total_chars: int
last_change: str
translated: int
translated_percent: float
translated_words: int
translated_words_percent: float
translated_chars: int
translated_chars_percent: float
fuzzy: int
fuzzy_percent: float
failing: int
failing_percent: float
approved: int
approved_percent: float
readonly: int
readonly_percent: float
recent_changes: int
suggestions: int
comments: int
name: str
url: str
url_translate: Optional[str] = None
code: Optional[str] = None
@classmethod
def from_dict(cls, data_dict):
return cls(**data_dict)
@dataclass
class WeblateUserStats(AbstractWeblateRecord):
"""User statistics API response
weblate-4.18.2
GET /api/users/(str: username)/statistics/
"""
translated: int
suggested: int
uploaded: int
commented: int
languages: int
@classmethod
def from_dict(cls, data_dict):
return cls(**data_dict)
@dataclass
class WeblateUserInfo(AbstractWeblateRecord):
"""User information API response
weblate-4.18.2
GET /api/users/(str: username)/
"""
username: str
full_name: str
email: str
is_superuser: bool
is_active: bool
is_bot: bool
date_joined: str
groups: list
url: Optional[str] = None
statistics_url: Optional[str] = None
@classmethod
def from_dict(cls, data_dict):
return cls(**data_dict)
@dataclass
class WeblateGroupInfo(AbstractWeblateRecord):
"""Group information API response
weblate-4.18.2
GET /api/groups/(int: id)/
"""
name: str
project_selection: int
language_selection: int
roles: list
projects: list
components: list
componentlists: list
defining_project: Optional[str]
url: Optional[str] = None
@classmethod
def from_dict(cls, data_dict):
return cls(**data_dict)
@dataclass
class WeblateComponentInfo(AbstractWeblateRecord):
"""Component API response
weblate-4.18.2
GET /api/components/(string: project)/(string: component)/
"""
project: str
name: str
slug: str
vcs: str
repo: str
git_export: str
branch: str
push_branch: str
filemask: str
template: str
edit_template: str
intermediate: str
new_base: str
file_format: str
license: str
agreement: str
new_lang: str
language_code_style: str
push: str
check_flags: str
priority: str
enforced_checks: str
restricted: str
repoweb: str
report_source_bugs: str
merge_style: str
commit_message: str
add_message: str
delete_message: str
merge_message: str
addon_message: str
pull_message: str
allow_translation_propagation: str
enable_suggestions: str
suggestion_voting: str
suggestion_autoaccept: str
push_on_commit: str
commit_pending_age: str
auto_lock_error: str
language_regex: str
variant_regex: str
is_glossary: bool
glossary_color: str
repository_url: str
translations_url: str
lock_url: str
changes_list_url: str
task_url: str
source_language: dict
@classmethod
def from_dict(cls, data_dict):
return cls(**data_dict)
@dataclass
class WeblateTranslationInfo(AbstractWeblateRecord):
"""Translation API response
weblate-4.18.2
GET /api/translations/(string: project)/(string: component)/(string: lang)/
"""
component: dict
failing_checks: int
failing_checks_percent: float
failing_checks_words: int
filename: str
fuzzy: int
fuzzy_percent: float
fuzzy_words: int
have_comment: int
have_suggestion: int
is_template: bool
language: dict
language_code: str
last_author: str
last_change: str
revision: str
share_url: str
total: int
total_words: int
translate_url: str
translated: int
translated_percent: float
translated_words: int
repository_url: str
file_url: str
changes_list_url: str
units_list_url: str
@classmethod
def from_dict(cls, data_dict):
return cls(**data_dict)
@dataclass
class WeblateChangeInfo(AbstractWeblateRecord):
"""Change API response
weblate-4.18.2
GET /api/changes/(int: id)/
"""
unit: str
translation: str
component: str
user: str
author: str
timestamp: str
action: int
action_name: str
target: str
id: int
@classmethod
def from_dict(cls, data_dict):
return cls(**data_dict)

649
tools/weblate/weblate_stats.py Executable file
View File

@ -0,0 +1,649 @@
#!/usr/bin/env python3
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import collections
import csv
import datetime
from datetime import timedelta
import io
import json
import logging
import os
import random
import re
import requests
import sys
from typing import Optional
from weblate_records import WeblateComponentInfo
from weblate_records import WeblateObjectStats
from weblate_records import WeblateProjectStats
from weblate_records import WeblateUserInfo
from weblate_records import WeblateUserStats
from WeblateUtils import IniConfig
from wlc import Weblate
import yaml
WEBLATE_HOST = "https://openstack.weblate.cloud"
EXAMPLE_HOST = "http://weblate.example.com"
WEBLATE_URI = WEBLATE_HOST + "/%s"
LOG = logging.getLogger("weblate_stats")
WEBLATE_VER_EXPR = r"^(master[-,a-z]*|stable-[a-z]+|openstack-user-survey)$"
WEBLATE_VER_PATTERN = re.compile(WEBLATE_VER_EXPR)
DEFAULT_STATS = {
"translated": 0,
"approved": 0,
"needReview": 0,
"fuzzy": 0,
"failingCheck": 0,
"pending": 0,
}
class WeblateUtility(object):
"""Utilities to invoke Weblate REST API.
https://docs.weblate.org/en/weblate-4.18.2/api.html#projects
https://docs.weblate.org/en/weblate-4.18.2/api.html#get--api-users-(str-username)-statistics-
"""
user_agents = [
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/32.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_6) AppleWebKit/537.78.2",
"Mozilla/5.0 (Windows NT 6.3; WOW64) Gecko/20100101 Firefox/32.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/37.0.2062.120",
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
]
def __init__(
self,
wconfig,
verify: bool = True,
accept: str = "application/json, text/javascript",
content_type: str = "application/json",
):
self.url, self.key = wconfig.url, wconfig.key
self.weblate_obj = Weblate()
self.headers = {
"Accept": accept,
"Content-Type": content_type,
"Authorization": "Token " + self.key,
}
self.verify = verify
def _unify(self, locale: str) -> str:
return locale.replace("-", "").replace("_", "").lower()
# API REQUEST
def read_uri(self, uri, headers):
try:
headers["User-Agent"] = random.choice(WeblateUtility.user_agents)
req = requests.get(uri, headers=headers)
return req.text
except Exception as e:
LOG.error('Error "%s" while reading uri %s', (e, uri))
raise
def read_json_from_uri(self, uri):
data = self.read_uri(uri, self.headers)
try:
return json.loads(data)
except Exception as e:
LOG.error(
'Error "%(error)s" parsing json from uri %(uri)s',
{"error": e, "uri": uri},
)
raise
def get_projects(self, **kargs) -> list:
uri = WEBLATE_URI % ("api/projects/")
if "url" in kargs:
uri = kargs.get("url")
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
LOG.debug("Reading projects from %s" % uri)
projects_data = self.read_json_from_uri(uri)
return projects_data["results"]
def get_project_statistics(self, project_slug: str, **kargs):
uri = WEBLATE_URI % ("api/projects/%s/statistics/" % (project_slug))
if "url" in kargs:
uri = kargs.get("url")
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
LOG.debug("Reading project statistics from %s" % uri)
project_statistics_data = self.read_json_from_uri(uri)
return WeblateProjectStats.from_dict(project_statistics_data)
def get_object_statistics(self, obj: str, **kargs):
uri = WEBLATE_URI % ("api/%s/statistics/" % (obj))
if "url" in kargs:
uri = kargs.get("url")
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
LOG.debug("Reading object statistics from %s" % uri)
object_data = self.read_json_from_uri(uri)
return WeblateObjectStats.from_dict(object_data)
def get_users(self, **kargs) -> list:
uri = WEBLATE_URI % ("api/users/")
if "url" in kargs:
uri = kargs.get("url")
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
LOG.debug("Reading users from %s" % uri)
users_data = self.read_json_from_uri(uri)
return users_data["results"]
def get_user(self, username: str, **kargs):
uri = WEBLATE_URI % ("api/users/%s/" % (username))
if "url" in kargs:
uri = kargs.get("url")
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
LOG.debug("Reading user from %s" % uri)
user_data = self.read_json_from_uri(uri)
return user_data
def get_user_statistics(self, username: str, **kargs):
uri = WEBLATE_URI % ("api/users/%s/statistics/" % (username))
if "url" in kargs:
uri = kargs.get("url")
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
LOG.debug("Reading user statistics from %s" % uri)
user_data = self.read_json_from_uri(uri)
return WeblateUserStats.from_dict(user_data)
def get_group(self, group_id: int, **kargs):
uri = WEBLATE_URI % ("api/groups/%s/" % (group_id))
if "url" in kargs:
uri = kargs.get("url")
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
LOG.debug("Reading group from %s" % uri)
group_data = self.read_json_from_uri(uri)
return WeblateUserInfo.from_dict(group_data)
def get_component(
self, project: str, component: str, **kargs
) -> WeblateComponentInfo:
uri = WEBLATE_URI % ("api/components/%s/%s/" % (project, component))
if "url" in kargs:
uri = kargs.get("url")
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
LOG.debug("Reading component from %s" % uri)
component_data = self.read_json_from_uri(uri)
return WeblateComponentInfo.from_dict(component_data)
def get_translation_changes(
self, project: str, component: str, lang: str, **kargs
) -> list:
uri: str = WEBLATE_URI % (
"api/translations/%s/%s/%s/changes/" % (project, component, lang)
)
if "url" in kargs:
uri = kargs.get("url")
uri = WEBLATE_HOST + uri.split(EXAMPLE_HOST)[1]
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
if "checksum" in kargs:
uri += "?checksum=%s" % (kargs.get("checksum"))
LOG.debug("Reading translation changes from %s" % uri)
translation_data = self.read_json_from_uri(uri)
return translation_data
def get_translations(
self, project: str, component: str, language: str, **kargs
) -> Optional[dict]:
uri: str = WEBLATE_URI % (
"api/translations/%s/%s/%s/" % (project, component, language)
)
checksum: str = ""
if "?checksum" in kargs.get("checksum"):
checksum = kargs.get("checksum").split("?checksum=")[1]
if "url" in kargs:
uri = kargs.get("url")
uri = WEBLATE_HOST + uri.split(EXAMPLE_HOST)[1]
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
if "checksum" in kargs:
uri += "?checksum=%s" % (checksum)
LOG.debug("Reading translation from %s" % uri)
translation_data = self.read_json_from_uri(uri)
langs = set()
langs.add(self._unify(translation_data["language"]["code"]))
langs.add(self._unify(translation_data["language"]["name"]))
langs.add(self._unify(translation_data["language_code"]))
for lang in translation_data["language"]["aliases"]:
langs.add(self._unify(lang))
user_lang = self._unify(kargs.get("user_lang"))
if "user_lang" in kargs and user_lang not in langs:
return None
return translation_data
def get_change(self, id: int, **kargs) -> dict:
uri = WEBLATE_URI % ("api/changes/%d/" % (id))
if "url" in kargs:
uri = kargs.get("url")
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
LOG.debug("Reading change from %s" % uri)
change_data = self.read_json_from_uri(uri)
return change_data
def get_units(self, start_datetime: str, end_datetime: str, **kargs):
change_date_query = "?q=changed:>=%s AND changed:<=%s " % (
start_datetime,
end_datetime,
)
uri = WEBLATE_URI % ("api/units/%s" % (change_date_query))
if "url" in kargs:
uri = kargs.get("url")
uri.replace(EXAMPLE_HOST, WEBLATE_HOST, 1)
LOG.debug("Reading units from %s" % uri)
units_data = self.read_json_from_uri(uri)
return units_data["results"]
class LanguageTeam(object):
def __init__(self, language_code, team_info):
self.language_code = language_code
self.language = team_info["language"]
# Weblate ID which only consists of numbers is a valid ID in Weblate
# Such entry is interpreted as integer unless it is quoted
# in the YAML file. Exnsure to stringify them.
self.translators = [str(i) for i in team_info["translators"]]
self.reviewers = [str(i) for i in team_info.get("reviewers", [])]
self.coordinators = [str(i) for i in team_info.get("coordinators", [])]
@classmethod
def load_from_language_team_yaml(cls, trans_team_uri, lang_list):
LOG.debug("Process list of language team from uri: %s", trans_team_uri)
content = yaml.safe_load(io.open(trans_team_uri, "r"))
if lang_list:
lang_notfound = [
lang_code for lang_code in lang_list
if lang_code not in content
]
if lang_notfound:
LOG.error(
"Language %s not tound in %s.",
", ".join(lang_notfound),
trans_team_uri,
)
sys.exit(1)
return [
cls(lang_code, team_info)
for lang_code, team_info in content.items()
if not lang_list or lang_code in lang_list
]
class User(object):
trans_fields = [
"translated",
"approved",
"needReview",
"fuzzy",
"failingCheck",
"pending",
]
review_fields = ["total", "approved"] # Todo
def __init__(self, user_id, language_code):
self.user_id = user_id
self.lang = language_code
self.stats = collections.defaultdict(dict)
def __str__(self):
return "<%s: user_id=%s, lang=%s, stats=%s" % (
self.__class__.__name__,
self.user_id,
self.lang,
self.stats,
)
# def __repr__(self):
# return repr(self.convert_to_serializable_data())
def __lt__(self, other):
if self.lang != other.lang:
return self.lang < other.lang
else:
return self.user_id < other.user_id
def needs_output(self, include_no_activities):
if include_no_activities:
return True
return bool(self.stats) and all(self.stats.values())
@staticmethod
def get_flattened_data_title():
return [
"user_id",
"main_lang",
"translated",
"needReview",
"approved",
"fuzzy",
"failingCheck",
"pending",
]
def convert_to_flattened_data(self, detail=False):
data = []
for stat, count in self.stats.items():
if detail:
data.append(
[self.user_id, self.lang]
+ [count for k in self.trans_fields]
)
stat_sum: int = sum([self.stats[k] for k in self.trans_fields])
if stat_sum > 0:
data.append(
[self.user_id, self.lang]
+ [self.stats[k] for k in self.trans_fields]
)
return data
def write_stats_to_file(users, output_file, include_no_activities, detail):
before_sort = []
for user in users:
if not user.stats.keys():
user.stats = DEFAULT_STATS
if user.needs_output(include_no_activities):
before_sort.append(user)
users = sorted(before_sort)
_write_stats_to_csvfile(users, output_file, detail)
LOG.info("Stats has been written to %s", output_file)
def _write_stats_to_csvfile(users, output_file, detail):
with open(output_file, "w") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(User.get_flattened_data_title())
for user in users:
writer.writerows(user.convert_to_flattened_data(detail))
def _comma_separated_list(s):
return s.split(",")
def main():
# Loads weblate.ini configuration file
try:
wc = IniConfig(os.path.expanduser("~/.config/weblate.ini"))
except ValueError as e:
sys.exit(e)
default_end_date = datetime.now()
default_start_date = default_end_date - timedelta(days=180)
default_start_date = default_start_date.strftime("%Y-%m-%d")
default_end_date = default_end_date.strftime("%Y-%m-%d")
parser = argparse.ArgumentParser()
parser.add_argument(
"-s",
"--start-date",
default=default_start_date,
help=("Specify the start date. " "Default:%s" % default_start_date),
)
parser.add_argument(
"-e",
"--end-date",
default=default_end_date,
help=("Specify the end date. " "Default:%s" % default_end_date),
)
parser.add_argument(
"-o",
"--output-file",
help=(
"Specify the output file. "
"Default: weblate_stats_output.csv."
),
)
parser.add_argument(
"-p",
"--project",
type=_comma_separated_list,
help=(
"Specify project(s). Comma-separated list. "
"Otherwise all Weblate projects are processed."
),
)
parser.add_argument(
"-l",
"--lang",
type=_comma_separated_list,
help=(
"Specify language(s). Comma-separated list. "
"Language code like zh-CN, ja needs to be used. "
"Otherwise all languages are processed."
),
)
parser.add_argument(
"-t",
"--target-version",
type=_comma_separated_list,
help=(
"Specify version(s). Comma-separated list. "
"Otherwise all available versions are "
"processed."
),
)
parser.add_argument(
"-u",
"--user",
type=_comma_separated_list,
help=(
"Specify user(s). Comma-separated list. "
"Otherwise all users are processed."
),
)
parser.add_argument(
"--detail",
action="store_true",
help=(
"If specified, statistics per project "
"and version are output in addition to "
"total statistics."
),
)
parser.add_argument(
"--include-no-activities",
action="store_true",
help=(
"If specified, stats for users with no "
"activities are output as well."
"By default, stats only for users with "
"any activities are output."
),
)
parser.add_argument(
"--no-verify",
action="store_false",
dest="verify",
help="Do not perform HTTPS certificate verification",
)
parser.add_argument(
"--debug",
action="store_true",
help="Enable debug message."
)
parser.add_argument("user_yaml", help="YAML file of the user list")
options = parser.parse_args()
logging_level = logging.DEBUG if options.debug else logging.INFO
formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
handler = logging.StreamHandler()
handler.setLevel(logging_level)
handler.setFormatter(formatter)
LOG.setLevel(logging_level)
LOG.addHandler(handler)
language_teams = LanguageTeam.load_from_language_team_yaml(
options.user_yaml, options.lang
)
versions = [v.replace("/", "-") for v in options.target_version or []]
users = get_weblate_stats(
wc,
options.verify,
options.start_date,
options.end_date,
language_teams,
options.project,
versions,
options.user,
)
output_file = "weblate_stats_output.%s" % options.format
output_file = output_file or options.output_file
write_stats_to_file(
users,
output_file,
options.include_no_activities,
options.detail,
)
def get_weblate_stats(
wc,
verify,
start_date,
end_date,
language_teams,
project_list,
version_list,
user_list,
):
LOG.info(
"Getting Weblate contributors statistics (from %s to %s) ...",
start_date,
end_date,
)
weblateUtil = WeblateUtility(wc, verify)
if not project_list:
project_list = weblateUtil.get_projects()
users = []
for team in language_teams:
users += [User(user_id, team.language_code) for user_id in team.translators]
data = dict()
for user in users:
if user_list and user.user_id not in user_list:
continue
user_data = weblateUtil.get_user(user.user_id)
if "detail" in user_data and user_data["detail"] == "Not found.":
continue
LOG.info(
"Getting for user %(user_id)s %(user_lang)s",
{"user_id": user.user_id, "user_lang": user.lang},
)
unit_data = weblateUtil.get_units(start_date, end_date)
for unit in unit_data:
translation_data = weblateUtil.get_translations(
None,
None,
None,
url=unit["translation"],
checksum=unit["web_url"],
user_lang=user.lang,
)
if translation_data is None:
continue
user_full_name = translation_data["last_author"]
if user_full_name not in data:
data[user_full_name] = dict()
data[user_full_name]["translated"] = 0
data[user_full_name]["approved"] = 0
data[user_full_name]["needReview"] = 0
data[user_full_name]["fuzzy"] = 0
data[user_full_name]["failingCheck"] = 0
data[user_full_name]["pending"] = 0
if unit["translated"]:
data[user_full_name]["translated"] += 1
if unit["approved"]:
data[user_full_name]["approved"] += 1
if unit["has_suggestion"] or unit["has_comment"]:
data[user_full_name]["needReview"] += 1
if unit["fuzzy"]:
data[user_full_name]["fuzzy"] += 1
if unit["has_failing_check"]:
data[user_full_name]["failingCheck"] += 1
if unit["pending"]:
data[user_full_name]["pending"] += 1
LOG.debug("Got: %s", data)
user.stats = data.get(user_full_name)
return users
if __name__ == "__main__":
main()