From 8d478d277fbcbc569ea726696505a0190fba4bc2 Mon Sep 17 00:00:00 2001
From: Akihiro Motoki <motoki@da.jp.nec.com>
Date: Tue, 7 Mar 2017 19:58:41 +0000
Subject: [PATCH] zanata_stat: calculate stats summary across proejcts and
 versions

Change-Id: Id9ea9e3b66bb14720ccc53893a9590d59c34bde6
Closes-Bug: #1670640
---
 tools/zanata/zanata_stats.py | 123 ++++++++++++++++++++++-------------
 1 file changed, 79 insertions(+), 44 deletions(-)

diff --git a/tools/zanata/zanata_stats.py b/tools/zanata/zanata_stats.py
index 4d8f8a7..a95d8a0 100755
--- a/tools/zanata/zanata_stats.py
+++ b/tools/zanata/zanata_stats.py
@@ -16,6 +16,7 @@
 from __future__ import print_function
 
 import argparse
+import collections
 import csv
 import datetime
 import io
@@ -129,19 +130,19 @@ class LanguageTeam(object):
 
 class User(object):
 
+    trans_fields = ['total', 'translated', 'needReview',
+                    'approved', 'rejected']
+    review_fields = ['total', 'approved', 'rejected']
+
     def __init__(self, user_id, language_code):
         self.user_id = user_id
         self.lang = language_code
-        self.translation_stats = {}
-        self.review_stats = {}
+        self.stats = collections.defaultdict(dict)
 
     def __str__(self):
-        return ("<%s: user_id=%s, lang=%s, "
-                "translation_stats=%s, review_stats=%s" %
+        return ("<%s: user_id=%s, lang=%s, stats=%s" %
                 (self.__class__.__name__,
-                 self.user_id, self.lang,
-                 self.translation_stats,
-                 self.review_stats))
+                 self.user_id, self.lang, self.stats,))
 
     def __repr__(self):
         return repr(self.convert_to_serializable_data())
@@ -152,7 +153,7 @@ class User(object):
         else:
             return self.user_id < other.user_id
 
-    def read_from_zanata_stats(self, zanata_stats):
+    def read_from_zanata_stats(self, zanata_stats, project_id, version):
         # data format (Zanata 3.9.6)
         # {
         #     "username": "amotoki",
@@ -178,28 +179,43 @@ class User(object):
             return
 
         stats = stats[0]
-        trans_stats = stats.get('translation-stats')
+        trans_stats = stats.get('translation-stats', {})
         if trans_stats:
             trans_stats['total'] = sum(trans_stats.values())
-            self.translation_stats = trans_stats
-        review_stats = stats.get('review-stats')
+        review_stats = stats.get('review-stats', {})
         if review_stats:
             review_stats['total'] = sum(review_stats.values())
-            self.review_stats = review_stats
+        self.stats[project_id][version] = {'translation-stats': trans_stats,
+                                           'review-stats': review_stats}
+
+    def populate_total_stats(self):
+
+        total_trans = dict([(k, 0) for k in self.trans_fields])
+        total_review = dict([(k, 0) for k in self.review_fields])
+
+        for project_id, versions in self.stats.items():
+            for version, stats in versions.items():
+                trans_stats = stats.get('translation-stats', {})
+                for k in self.trans_fields:
+                    total_trans[k] += trans_stats.get(k, 0)
+                review_stats = stats.get('review-stats', {})
+                for k in self.review_fields:
+                    total_review[k] += review_stats.get(k, 0)
+        self.stats['__total__']['translation-stats'] = total_trans
+        self.stats['__total__']['review-stats'] = total_review
 
     def needs_output(self, include_no_activities):
         if include_no_activities:
             return True
-        elif self.translation_stats or self.review_stats:
-            return True
-        else:
-            return False
+        return bool(self.stats) and all(self.stats.values())
 
     @staticmethod
     def get_flattened_data_title():
         return [
             'user_id',
             'lang',
+            'project',
+            'version',
             'translation-total',
             'translated',
             'needReview',
@@ -210,25 +226,37 @@ class User(object):
             'review-rejected'
         ]
 
-    def convert_to_flattened_data(self):
-        return [
-            self.user_id,
-            self.lang,
-            self.translation_stats.get('total', 0),
-            self.translation_stats.get('translated', 0),
-            self.translation_stats.get('needReview', 0),
-            self.translation_stats.get('approved', 0),
-            self.translation_stats.get('rejected', 0),
-            self.review_stats.get('total', 0),
-            self.review_stats.get('approved', 0),
-            self.review_stats.get('rejected', 0),
-        ]
+    def convert_to_flattened_data(self, detail=False):
+        self.populate_total_stats()
 
-    def convert_to_serializable_data(self):
+        data = []
+
+        for project_id, versions in self.stats.items():
+            if project_id == '__total__':
+                continue
+            for version, stats in versions.items():
+                trans_stats = stats.get('translation-stats', {})
+                review_stats = stats.get('review-stats', {})
+                if detail:
+                    data.append(
+                        [self.user_id, self.lang, project_id, version] +
+                        [trans_stats.get(k, 0) for k in self.trans_fields] +
+                        [review_stats.get(k, 0) for k in self.review_fields])
+
+        data.append([self.user_id, self.lang, '-', '-'] +
+                    [self.stats['__total__']['translation-stats'][k]
+                     for k in self.trans_fields] +
+                    [self.stats['__total__']['review-stats'][k]
+                     for k in self.review_fields])
+
+        return data
+
+    def convert_to_serializable_data(self, detail):
+        self.populate_total_stats()
         return {'user_id': self.user_id,
                 'lang': self.lang,
-                'translation-stats': self.translation_stats,
-                'review-stats': self.review_stats}
+                'stats': (self.stats if detail
+                          else self.stats['__total__'])}
 
 
 def get_zanata_stats(start_date, end_date, language_teams, project_list,
@@ -259,36 +287,37 @@ def get_zanata_stats(start_date, end_date, language_teams, project_list,
                 data = zanataUtil.get_user_stats(
                     project_id, version, user.user_id, start_date, end_date)
                 LOG.debug('Got: %s', data)
-                user.read_from_zanata_stats(data)
+                user.read_from_zanata_stats(data, project_id, version)
                 LOG.debug('=> %s', user)
 
     return users
 
 
 def write_stats_to_file(users, output_file, file_format,
-                        include_no_activities):
-    stats = sorted([user for user in users
+                        include_no_activities, detail):
+    users = sorted([user for user in users
                     if user.needs_output(include_no_activities)])
     if file_format == 'csv':
-        _write_stats_to_csvfile(stats, output_file)
+        _write_stats_to_csvfile(users, output_file, detail)
     else:
-        _write_stats_to_jsonfile(stats, output_file)
+        _write_stats_to_jsonfile(users, output_file, detail)
     LOG.info('Stats has been written to %s', output_file)
 
 
-def _write_stats_to_csvfile(stats, output_file):
+def _write_stats_to_csvfile(users, output_file, detail):
     mode = 'w' if six.PY3 else 'wb'
     with open(output_file, mode) as csvfile:
         writer = csv.writer(csvfile)
         writer.writerow(User.get_flattened_data_title())
-        for stat in stats:
-            writer.writerow(stat.convert_to_flattened_data())
+        for user in users:
+            writer.writerows(user.convert_to_flattened_data(detail))
 
 
-def _write_stats_to_jsonfile(stats, output_file):
-    stats = [stat.convert_to_serializable_data() for stat in stats]
+def _write_stats_to_jsonfile(users, output_file, detail):
+    users = [user.convert_to_serializable_data(detail)
+             for user in users]
     with open(output_file, 'w') as f:
-        f.write(json.dumps(stats, indent=4, sort_keys=True))
+        f.write(json.dumps(users, indent=4, sort_keys=True))
 
 
 def _comma_separated_list(s):
@@ -332,6 +361,11 @@ def main():
                         type=_comma_separated_list,
                         help=("Specify user(s). Comma-separated list. "
                               "Otherwise all users are processed."))
+    parser.add_argument('--detail',
+                        action='store_true',
+                        help=("If specified, statistics per project "
+                              "and version are output in addition to "
+                              "total statistics."))
     parser.add_argument("--include-no-activities",
                         action='store_true',
                         help=("If specified, stats for users with no "
@@ -368,7 +402,8 @@ def main():
                    'zanata_stats_output.%s' % options.format)
 
     write_stats_to_file(users, output_file, options.format,
-                        options.include_no_activities)
+                        options.include_no_activities,
+                        options.detail)
 
 
 if __name__ == '__main__':