From 8d478d277fbcbc569ea726696505a0190fba4bc2 Mon Sep 17 00:00:00 2001 From: Akihiro Motoki Date: Tue, 7 Mar 2017 19:58:41 +0000 Subject: [PATCH] zanata_stat: calculate stats summary across proejcts and versions Change-Id: Id9ea9e3b66bb14720ccc53893a9590d59c34bde6 Closes-Bug: #1670640 --- tools/zanata/zanata_stats.py | 123 ++++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 44 deletions(-) diff --git a/tools/zanata/zanata_stats.py b/tools/zanata/zanata_stats.py index 4d8f8a7..a95d8a0 100755 --- a/tools/zanata/zanata_stats.py +++ b/tools/zanata/zanata_stats.py @@ -16,6 +16,7 @@ from __future__ import print_function import argparse +import collections import csv import datetime import io @@ -129,19 +130,19 @@ class LanguageTeam(object): class User(object): + trans_fields = ['total', 'translated', 'needReview', + 'approved', 'rejected'] + review_fields = ['total', 'approved', 'rejected'] + def __init__(self, user_id, language_code): self.user_id = user_id self.lang = language_code - self.translation_stats = {} - self.review_stats = {} + self.stats = collections.defaultdict(dict) def __str__(self): - return ("<%s: user_id=%s, lang=%s, " - "translation_stats=%s, review_stats=%s" % + return ("<%s: user_id=%s, lang=%s, stats=%s" % (self.__class__.__name__, - self.user_id, self.lang, - self.translation_stats, - self.review_stats)) + self.user_id, self.lang, self.stats,)) def __repr__(self): return repr(self.convert_to_serializable_data()) @@ -152,7 +153,7 @@ class User(object): else: return self.user_id < other.user_id - def read_from_zanata_stats(self, zanata_stats): + def read_from_zanata_stats(self, zanata_stats, project_id, version): # data format (Zanata 3.9.6) # { # "username": "amotoki", @@ -178,28 +179,43 @@ class User(object): return stats = stats[0] - trans_stats = stats.get('translation-stats') + trans_stats = stats.get('translation-stats', {}) if trans_stats: trans_stats['total'] = sum(trans_stats.values()) - self.translation_stats = trans_stats - review_stats = stats.get('review-stats') + review_stats = stats.get('review-stats', {}) if review_stats: review_stats['total'] = sum(review_stats.values()) - self.review_stats = review_stats + self.stats[project_id][version] = {'translation-stats': trans_stats, + 'review-stats': review_stats} + + def populate_total_stats(self): + + total_trans = dict([(k, 0) for k in self.trans_fields]) + total_review = dict([(k, 0) for k in self.review_fields]) + + for project_id, versions in self.stats.items(): + for version, stats in versions.items(): + trans_stats = stats.get('translation-stats', {}) + for k in self.trans_fields: + total_trans[k] += trans_stats.get(k, 0) + review_stats = stats.get('review-stats', {}) + for k in self.review_fields: + total_review[k] += review_stats.get(k, 0) + self.stats['__total__']['translation-stats'] = total_trans + self.stats['__total__']['review-stats'] = total_review def needs_output(self, include_no_activities): if include_no_activities: return True - elif self.translation_stats or self.review_stats: - return True - else: - return False + return bool(self.stats) and all(self.stats.values()) @staticmethod def get_flattened_data_title(): return [ 'user_id', 'lang', + 'project', + 'version', 'translation-total', 'translated', 'needReview', @@ -210,25 +226,37 @@ class User(object): 'review-rejected' ] - def convert_to_flattened_data(self): - return [ - self.user_id, - self.lang, - self.translation_stats.get('total', 0), - self.translation_stats.get('translated', 0), - self.translation_stats.get('needReview', 0), - self.translation_stats.get('approved', 0), - self.translation_stats.get('rejected', 0), - self.review_stats.get('total', 0), - self.review_stats.get('approved', 0), - self.review_stats.get('rejected', 0), - ] + def convert_to_flattened_data(self, detail=False): + self.populate_total_stats() - def convert_to_serializable_data(self): + data = [] + + for project_id, versions in self.stats.items(): + if project_id == '__total__': + continue + for version, stats in versions.items(): + trans_stats = stats.get('translation-stats', {}) + review_stats = stats.get('review-stats', {}) + if detail: + data.append( + [self.user_id, self.lang, project_id, version] + + [trans_stats.get(k, 0) for k in self.trans_fields] + + [review_stats.get(k, 0) for k in self.review_fields]) + + data.append([self.user_id, self.lang, '-', '-'] + + [self.stats['__total__']['translation-stats'][k] + for k in self.trans_fields] + + [self.stats['__total__']['review-stats'][k] + for k in self.review_fields]) + + return data + + def convert_to_serializable_data(self, detail): + self.populate_total_stats() return {'user_id': self.user_id, 'lang': self.lang, - 'translation-stats': self.translation_stats, - 'review-stats': self.review_stats} + 'stats': (self.stats if detail + else self.stats['__total__'])} def get_zanata_stats(start_date, end_date, language_teams, project_list, @@ -259,36 +287,37 @@ def get_zanata_stats(start_date, end_date, language_teams, project_list, data = zanataUtil.get_user_stats( project_id, version, user.user_id, start_date, end_date) LOG.debug('Got: %s', data) - user.read_from_zanata_stats(data) + user.read_from_zanata_stats(data, project_id, version) LOG.debug('=> %s', user) return users def write_stats_to_file(users, output_file, file_format, - include_no_activities): - stats = sorted([user for user in users + include_no_activities, detail): + users = sorted([user for user in users if user.needs_output(include_no_activities)]) if file_format == 'csv': - _write_stats_to_csvfile(stats, output_file) + _write_stats_to_csvfile(users, output_file, detail) else: - _write_stats_to_jsonfile(stats, output_file) + _write_stats_to_jsonfile(users, output_file, detail) LOG.info('Stats has been written to %s', output_file) -def _write_stats_to_csvfile(stats, output_file): +def _write_stats_to_csvfile(users, output_file, detail): mode = 'w' if six.PY3 else 'wb' with open(output_file, mode) as csvfile: writer = csv.writer(csvfile) writer.writerow(User.get_flattened_data_title()) - for stat in stats: - writer.writerow(stat.convert_to_flattened_data()) + for user in users: + writer.writerows(user.convert_to_flattened_data(detail)) -def _write_stats_to_jsonfile(stats, output_file): - stats = [stat.convert_to_serializable_data() for stat in stats] +def _write_stats_to_jsonfile(users, output_file, detail): + users = [user.convert_to_serializable_data(detail) + for user in users] with open(output_file, 'w') as f: - f.write(json.dumps(stats, indent=4, sort_keys=True)) + f.write(json.dumps(users, indent=4, sort_keys=True)) def _comma_separated_list(s): @@ -332,6 +361,11 @@ def main(): type=_comma_separated_list, help=("Specify user(s). Comma-separated list. " "Otherwise all users are processed.")) + parser.add_argument('--detail', + action='store_true', + help=("If specified, statistics per project " + "and version are output in addition to " + "total statistics.")) parser.add_argument("--include-no-activities", action='store_true', help=("If specified, stats for users with no " @@ -368,7 +402,8 @@ def main(): 'zanata_stats_output.%s' % options.format) write_stats_to_file(users, output_file, options.format, - options.include_no_activities) + options.include_no_activities, + options.detail) if __name__ == '__main__':