Zanata stats tool: improve speed with user API

This patch enhances the speed of Zanata stats by using
user-based Zanata APIs rather than project-based APIs,
which took >1 days as more Zanata users and translation data
were accumulated.

Note that the API scheme follows with Stackalytics
: https://opendev.org/x/stackalytics/src/branch/master/stackalytics/processor/zanata.py#L38

Change-Id: Ia2462447f7a0cc2534e877976838de09e8683e89
This commit is contained in:
Ian Y. Choi 2021-02-28 03:43:32 +09:00 committed by Akihiro Motoki
parent 27a98e3ec4
commit 82b56bc56f

View File

@ -74,22 +74,9 @@ class ZanataUtility(object):
def _is_valid_version(version): def _is_valid_version(version):
return bool(ZANATA_VERSION_PATTERN.match(version)) return bool(ZANATA_VERSION_PATTERN.match(version))
def get_project_versions(self, project_id): def get_user_stats(self, zanata_user_id, start_date, end_date):
uri = ZANATA_URI % ('projects/p/%s' % project_id) uri = ZANATA_URI % ('stats/user/%s/%s..%s'
LOG.debug("Reading iterations for project %s" % project_id) % (zanata_user_id, start_date, end_date))
project_data = self.read_json_from_uri(uri)
if 'iterations' in project_data:
return [interation_data['id']
for interation_data in project_data['iterations']
if self._is_valid_version(interation_data['id'])]
else:
return []
def get_user_stats(self, project_id, iteration_id, zanata_user_id,
start_date, end_date):
uri = ZANATA_URI % ('stats/project/%s/version/%s/contributor/%s/%s..%s'
% (project_id, iteration_id, zanata_user_id,
start_date, end_date))
return self.read_json_from_uri(uri) return self.read_json_from_uri(uri)
@ -128,9 +115,9 @@ class LanguageTeam(object):
class User(object): class User(object):
trans_fields = ['total', 'translated', 'needReview', trans_fields = ['total', 'Translated', 'NeedReview',
'approved', 'rejected'] 'Approved', 'Rejected']
review_fields = ['total', 'approved', 'rejected'] review_fields = ['total', 'Approved', 'Rejected']
def __init__(self, user_id, language_code): def __init__(self, user_id, language_code):
self.user_id = user_id self.user_id = user_id
@ -151,40 +138,55 @@ class User(object):
else: else:
return self.user_id < other.user_id return self.user_id < other.user_id
def read_from_zanata_stats(self, zanata_stats, project_id, version): def read_from_zanata_stats(self, zanata_stats, project_list, version_list):
# data format (Zanata 3.9.6) # data format (Zanata 4.3.3)
# [
# { # {
# "username": "amotoki", # "savedDate": "2020-09-06",
# "contributions": [ # "projectSlug": "i18n",
# { # "projectName": "i18n",
# "locale": "ja", # "versionSlug": "master",
# "translation-stats": { # "localeId": "ko-KR",
# "translated": 7360, # "localeDisplayName": "Korean (South Korea)",
# "needReview": 0, # "savedState": "Translated",
# "approved": 152, # "wordCount": 119
# "rejected": 0
# },
# "review-stats": {
# "approved": 220,
# "rejected": 0
# }
# } # }
# ] # ]
# } for zanata_stat in zanata_stats:
stats = [d for d in zanata_stats['contributions']
if d['locale'] == self.lang]
if not stats:
return
stats = stats[0] project_id = zanata_stat['projectSlug']
trans_stats = stats.get('translation-stats', {}) version = zanata_stat['versionSlug']
if trans_stats: lang = zanata_stat['localeId']
trans_stats['total'] = sum(trans_stats.values()) stat_state = zanata_stat['savedState']
review_stats = stats.get('review-stats', {}) word_count = zanata_stat['wordCount']
if review_stats:
review_stats['total'] = sum(review_stats.values()) if project_list and project_id not in project_list:
self.stats[project_id][version] = {'translation-stats': trans_stats, continue
'review-stats': review_stats}
if version_list and version not in version_list:
continue
if self.lang != lang:
continue
my_project = self.stats[project_id]
if version not in my_project:
my_project[version] = {
'translation-stats': collections.defaultdict(int),
'review-stats': collections.defaultdict(int),
}
my_version = my_project[version]
if stat_state in self.trans_fields:
my_trans_stats = my_version['translation-stats']
my_trans_stats[stat_state] += word_count
my_trans_stats['total'] += word_count
if stat_state in self.review_fields:
my_review_stats = my_version['review-stats']
my_review_stats[stat_state] += word_count
my_review_stats['total'] += word_count
def populate_total_stats(self): def populate_total_stats(self):
@ -269,23 +271,15 @@ def get_zanata_stats(start_date, end_date, language_teams, project_list,
if not project_list: if not project_list:
project_list = zanataUtil.get_projects() project_list = zanataUtil.get_projects()
for project_id in project_list:
for version in zanataUtil.get_project_versions(project_id):
if version_list and version not in version_list:
continue
for user in users: for user in users:
if user_list and user.user_id not in user_list: if user_list and user.user_id not in user_list:
continue continue
LOG.info('Getting %(project_id)s %(version)s ' LOG.info('Getting for user %(user_id)s %(user_lang)s',
'for user %(user_id)s %(user_lang)s', {'user_id': user.user_id, 'user_lang': user.lang})
{'project_id': project_id,
'version': version,
'user_id': user.user_id,
'user_lang': user.lang})
data = zanataUtil.get_user_stats( data = zanataUtil.get_user_stats(
project_id, version, user.user_id, start_date, end_date) user.user_id, start_date, end_date)
LOG.debug('Got: %s', data) LOG.debug('Got: %s', data)
user.read_from_zanata_stats(data, project_id, version) user.read_from_zanata_stats(data, project_list, version_list)
LOG.debug('=> %s', user) LOG.debug('=> %s', user)
return users return users