From bc434a6d14ca93f27144623cef9fd8173bc09601 Mon Sep 17 00:00:00 2001
From: Ilya Shakhat <ishakhat@mirantis.com>
Date: Mon, 15 Feb 2016 17:13:42 +0300
Subject: [PATCH] [WIP] Added translation metric

Translation stats are retrieved from Zanata service. List of projects
and versions is taken from Zanata API. List of users is configured
in openstack/i18n repo and referred by URI.

Processor reads stats for every project-version-user-week and yields
records of new type 'tr'. UI is extended to show breakdown by languages.

Zanata module is based on https://review.openstack.org/#/c/275145/

Co-Authored-By: daisy-ycguo <guoyingc@cn.ibm.com>
Co-Authored-By: Akihiro Motoki <amotoki@gmail.com>

Change-Id: Ic1b44f09f7eb592d75d435f66a71d3110a2f49a5
---
 doc/source/tools/stackalytics-processor.txt   |   3 +
 etc/default_data.schema.json                  |   3 +
 etc/stackalytics.conf                         |   3 +
 stackalytics/dashboard/decorators.py          |   1 +
 stackalytics/dashboard/helpers.py             |   4 +
 stackalytics/dashboard/parameters.py          |   2 +
 .../templates/_macros/activity_log.html       |   2 +
 .../_macros/contribution_summary.html         |   1 +
 .../dashboard/templates/overview.html         |  25 ++++
 stackalytics/dashboard/web.py                 |  42 +++++-
 stackalytics/processor/config.py              |   4 +
 stackalytics/processor/main.py                |  13 ++
 stackalytics/processor/record_processor.py    |  55 ++++++--
 stackalytics/processor/user_processor.py      |  12 +-
 stackalytics/processor/utils.py               |  12 +-
 stackalytics/processor/zanata.py              | 128 ++++++++++++++++++
 16 files changed, 293 insertions(+), 17 deletions(-)
 create mode 100644 stackalytics/processor/zanata.py

diff --git a/doc/source/tools/stackalytics-processor.txt b/doc/source/tools/stackalytics-processor.txt
index f531fb407..4bb62a4bc 100644
--- a/doc/source/tools/stackalytics-processor.txt
+++ b/doc/source/tools/stackalytics-processor.txt
@@ -19,6 +19,7 @@ usage: stackalytics-processor [-h] [--config-dir DIR] [--config-file PATH]
                               [--ssh-key-filename SSH_KEY_FILENAME]
                               [--ssh-username SSH_USERNAME]
                               [--syslog-log-facility SYSLOG_LOG_FACILITY]
+                              [--translation-team-uri TRANSLATION_TEAM_URI]
                               [--use-syslog] [--use-syslog-rfc-format]
                               [--verbose] [--version] [--watch-log-file]
 
@@ -96,6 +97,8 @@ optional arguments:
   --syslog-log-facility SYSLOG_LOG_FACILITY
                         Syslog facility to receive log lines. This option is
                         ignored if log_config_append is set.
+  --translation-team-uri TRANSLATION_TEAM_URI
+                        URI for translation team data
   --use-syslog          Use syslog for logging. Existing syslog format is
                         DEPRECATED and will be changed later to honor RFC5424.
                         This option is ignored if log_config_append is set.
diff --git a/etc/default_data.schema.json b/etc/default_data.schema.json
index abbb096fb..7e855171f 100644
--- a/etc/default_data.schema.json
+++ b/etc/default_data.schema.json
@@ -21,6 +21,9 @@
                     "ldap_id": {
                         "type": "string"
                     },
+                    "zanata_id": {
+                        "type": "string"
+                    },
                     "user_name": {
                         "type": "string"
                     },
diff --git a/etc/stackalytics.conf b/etc/stackalytics.conf
index 395352a87..7073d338a 100644
--- a/etc/stackalytics.conf
+++ b/etc/stackalytics.conf
@@ -144,6 +144,9 @@
 # URI for default data (string value)
 #driverlog_data_uri = https://git.openstack.org/cgit/openstack/driverlog/plain/etc/default_data.json
 
+# URI for translation team data (string value)
+#translation_team_uri = https://git.openstack.org/cgit/openstack/i18n/plain/tools/zanata/translation_team.yaml
+
 # How many member profiles to look ahead after the last (integer value)
 #members_look_ahead = 250
 
diff --git a/stackalytics/dashboard/decorators.py b/stackalytics/dashboard/decorators.py
index 6fc578981..b382bd565 100644
--- a/stackalytics/dashboard/decorators.py
+++ b/stackalytics/dashboard/decorators.py
@@ -365,6 +365,7 @@ def aggregate_filter():
                 'person-day': (person_day_filter, None),
                 'ci': (ci_filter, ci_finalize),
                 'patches': (None, None),
+                'translations': (loc_filter, None),
             }
             if metric not in metric_to_filters_map:
                 metric = parameters.get_default('metric')
diff --git a/stackalytics/dashboard/helpers.py b/stackalytics/dashboard/helpers.py
index e7eb66d6e..c960ef578 100644
--- a/stackalytics/dashboard/helpers.py
+++ b/stackalytics/dashboard/helpers.py
@@ -200,6 +200,7 @@ def get_contribution_summary(records):
     patch_set_count = 0
     change_request_count = 0
     abandoned_change_requests_count = 0
+    translations = 0
 
     for record in records:
         record_type = record.record_type
@@ -236,6 +237,8 @@ def get_contribution_summary(records):
             change_request_count += 1
             if record.status == 'ABANDONED':
                 abandoned_change_requests_count += 1
+        elif record_type == 'tr':
+            translations += record.loc
 
     result = {
         'drafted_blueprint_count': drafted_blueprint_count,
@@ -249,6 +252,7 @@ def get_contribution_summary(records):
         'patch_set_count': patch_set_count,
         'change_request_count': change_request_count,
         'abandoned_change_requests_count': abandoned_change_requests_count,
+        'translations': translations,
     }
     return result
 
diff --git a/stackalytics/dashboard/parameters.py b/stackalytics/dashboard/parameters.py
index bbec0f38e..43a01d1cf 100644
--- a/stackalytics/dashboard/parameters.py
+++ b/stackalytics/dashboard/parameters.py
@@ -41,6 +41,7 @@ METRIC_LABELS = {
     'person-day': "Person-day effort",
     'ci': 'CI votes',
     'patches': 'Patch Sets',
+    'translations': 'Translations',
 }
 
 METRIC_TO_RECORD_TYPE = {
@@ -56,6 +57,7 @@ METRIC_TO_RECORD_TYPE = {
     'person-day': ['mark', 'patch', 'email', 'bpd', 'bugf'],
     'ci': ['ci'],
     'patches': ['patch'],
+    'translations': ['tr'],
 }
 
 FILTER_PARAMETERS = ['release', 'project_type', 'module', 'company', 'user_id',
diff --git a/stackalytics/dashboard/templates/_macros/activity_log.html b/stackalytics/dashboard/templates/_macros/activity_log.html
index e52704d1f..5a7924424 100644
--- a/stackalytics/dashboard/templates/_macros/activity_log.html
+++ b/stackalytics/dashboard/templates/_macros/activity_log.html
@@ -169,6 +169,8 @@ show_record_type=True, show_user_gravatar=True, gravatar_size=32, show_all=True)
             <div>Change Id: <a href="https://review.openstack.org/#/c/${review_number}" target="_blank">${review_id}</a></div>
         {%elif record_type == "member" %}
             <div class="header"><a href="${member_uri}" target="_blank">Registered</a> in OpenStack Foundation</div>
+        {%elif record_type == "tr" %}
+            <div class="header">Translated ${loc} terms into ${language}</div>
         {%/if%}
     </div>
     </div>
diff --git a/stackalytics/dashboard/templates/_macros/contribution_summary.html b/stackalytics/dashboard/templates/_macros/contribution_summary.html
index 3643155d9..5872dba42 100644
--- a/stackalytics/dashboard/templates/_macros/contribution_summary.html
+++ b/stackalytics/dashboard/templates/_macros/contribution_summary.html
@@ -48,6 +48,7 @@
 <div>Filed Bugs: <b>${filed_bug_count}</b></div>
 <div>Resolved Bugs: <b>${resolved_bug_count}</b></div>
 <div>Emails: <b>${email_count}</b></div>
+<div>Translations: <b>${translations}</b></div>
 {% endraw %}
 </script>
 
diff --git a/stackalytics/dashboard/templates/overview.html b/stackalytics/dashboard/templates/overview.html
index 2c7c2ada2..f9135ca81 100644
--- a/stackalytics/dashboard/templates/overview.html
+++ b/stackalytics/dashboard/templates/overview.html
@@ -8,6 +8,7 @@
 {% set show_engineer_breakdown = (not user_id) %}
 {% set show_bp_breakdown = (metric in ['bpd', 'bpc']) %}
 {% set show_module_breakdown = (not module) %}
+{% set show_languages_breakdown = (metric in ['translations']) %}
 {% set show_user_activity = (user_id) %}
 {% set show_module_activity = (module) and (not user_id) %}
 {% set show_activity = (show_user_activity) or (show_module_activity) %}
@@ -61,6 +62,9 @@
     {% if show_module_breakdown %}
     renderTableAndChart("/api/1.0/stats/modules", "module_container", "module_table", "module_chart", "module");
     {% endif %}
+    {% if show_languages_breakdown %}
+    renderTableAndChart("/api/1.0/stats/languages", "language_container", "language_table", "language_chart", "language");
+    {% endif %}
 
 </script>
 
@@ -184,6 +188,27 @@
     </div>
     {% endif %}
 
+    {% if show_languages_breakdown %}
+    <div id="language_container">
+    <h2>Languages</h2>
+
+    <div id="language_chart" style="width: 100%; height: 350px; margin-bottom: 1em;"></div>
+
+    <table id="language_table" class="display">
+        <thead>
+            <tr>
+                <th>#</th>
+                <th>Language</th>
+                <th>Translations</th>
+            </tr>
+        </thead>
+        <tbody>
+        </tbody>
+    </table>
+    <div class="spacer"></div>
+    </div>
+    {% endif %}
+
     {% if show_contribution_on_right %}
         {{ contribution_summary.show_contribution_summary(show_all=False) }}
         {{ show_report_links(module, company, user_id) }}
diff --git a/stackalytics/dashboard/web.py b/stackalytics/dashboard/web.py
index d3bbe671c..f4d61c8fb 100644
--- a/stackalytics/dashboard/web.py
+++ b/stackalytics/dashboard/web.py
@@ -402,6 +402,32 @@ def get_bpd(records, **kwargs):
     return result
 
 
+@app.route('/api/1.0/stats/languages')
+@decorators.exception_handler()
+@decorators.response()
+@decorators.cached()
+@decorators.jsonify('stats')
+@decorators.record_filter()
+def get_languages(records, **kwargs):
+    result = []
+    languages = collections.defaultdict(int)
+    for record in records:
+        if record.record_type in ['tr']:
+            languages[record.value] += record.loc
+
+    for lang, val in six.iteritems(languages):
+        result.append({
+            'id': lang,
+            'name': lang,
+            'metric': val,
+        })
+
+    result.sort(key=lambda x: x['metric'], reverse=True)
+    utils.add_index(result)
+
+    return result
+
+
 @app.route('/api/1.0/users')
 @decorators.exception_handler()
 @decorators.response()
@@ -587,12 +613,16 @@ def timeline(records, **kwargs):
     week_stat_commits = dict((c, 0) for c in weeks)
     week_stat_commits_hl = dict((c, 0) for c in weeks)
 
+    commits_handler = lambda record: 1
+    if 'translations' in metric:
+        commits_handler = lambda record: record.loc
+
     if ('commits' in metric) or ('loc' in metric):
-        handler = lambda record: record.loc
+        loc_handler = lambda record: record.loc
     elif 'ci' in metric:
-        handler = lambda record: 0 if record.value else 1
+        loc_handler = lambda record: 0 if record.value else 1
     else:
-        handler = lambda record: 0
+        loc_handler = lambda record: 0
 
     # fill stats with the data
     if 'person-day' in metric:
@@ -616,14 +646,14 @@ def timeline(records, **kwargs):
         for record in records:
             week = record.week
             if start_week <= week < end_week:
-                week_stat_loc[week] += handler(record)
-                week_stat_commits[week] += 1
+                week_stat_loc[week] += loc_handler(record)
+                week_stat_commits[week] += commits_handler(record)
                 if 'members' in metric:
                     if record.date >= start_date:
                         week_stat_commits_hl[week] += 1
                 else:
                     if record.release == release_name:
-                        week_stat_commits_hl[week] += 1
+                        week_stat_commits_hl[week] += commits_handler(record)
 
     if 'all' == release_name and 'members' not in metric:
         week_stat_commits_hl = week_stat_commits
diff --git a/stackalytics/processor/config.py b/stackalytics/processor/config.py
index 03ce48fa7..358322e59 100644
--- a/stackalytics/processor/config.py
+++ b/stackalytics/processor/config.py
@@ -48,6 +48,10 @@ PROCESSOR_OPTS = [
                default='https://git.openstack.org/cgit/'
                        'openstack/driverlog/plain/etc/default_data.json',
                help='URI for default data'),
+    cfg.StrOpt('translation-team-uri',
+               default='https://git.openstack.org/cgit/openstack/i18n/'
+                       'plain/tools/zanata/translation_team.yaml',
+               help='URI of translation team data'),
     cfg.IntOpt('members-look-ahead', default=250,
                help='How many member profiles to look ahead after the last'),
     cfg.IntOpt('read-timeout', default=120,
diff --git a/stackalytics/processor/main.py b/stackalytics/processor/main.py
index 63b774178..214ff684b 100644
--- a/stackalytics/processor/main.py
+++ b/stackalytics/processor/main.py
@@ -34,6 +34,7 @@ from stackalytics.processor import record_processor
 from stackalytics.processor import runtime_storage
 from stackalytics.processor import utils
 from stackalytics.processor import vcs
+from stackalytics.processor import zanata
 
 LOG = logging.getLogger(__name__)
 
@@ -180,6 +181,15 @@ def _process_mail_list(uri, runtime_storage_inst, record_processor_inst):
     runtime_storage_inst.set_records(processed_mail_iterator)
 
 
+def _process_translation_stats(runtime_storage_inst, record_processor_inst):
+    translation_iterator = zanata.log(runtime_storage_inst,
+                                      cfg.CONF.translation_team_uri)
+    translation_iterator_typed = _record_typer(translation_iterator, 'i18n')
+    processed_translation_iterator = record_processor_inst.process(
+        translation_iterator_typed)
+    runtime_storage_inst.set_records(processed_translation_iterator)
+
+
 def _process_member_list(uri, runtime_storage_inst, record_processor_inst):
     member_iterator = mps.log(uri, runtime_storage_inst,
                               cfg.CONF.days_to_update_members,
@@ -227,6 +237,9 @@ def process(runtime_storage_inst, record_processor_inst):
         _process_mail_list(mail_list, runtime_storage_inst,
                            record_processor_inst)
 
+    LOG.info('Processing translations stats')
+    _process_translation_stats(runtime_storage_inst, record_processor_inst)
+
     _post_process_records(record_processor_inst, repos)
 
 
diff --git a/stackalytics/processor/record_processor.py b/stackalytics/processor/record_processor.py
index 072a6485a..241b886e9 100644
--- a/stackalytics/processor/record_processor.py
+++ b/stackalytics/processor/record_processor.py
@@ -96,7 +96,8 @@ class RecordProcessor(object):
                     return self.domains_index[m]
         return None
 
-    def _create_user(self, launchpad_id, email, gerrit_id, user_name):
+    def _create_user(self, launchpad_id, email, gerrit_id, zanata_id,
+                     user_name):
         company = (self._get_company_by_email(email) or
                    self._get_independent())
         emails = []
@@ -104,7 +105,8 @@ class RecordProcessor(object):
             emails = [email]
         user = {
             'user_id': user_processor.make_user_id(
-                emails=emails, launchpad_id=launchpad_id, gerrit_id=gerrit_id),
+                emails=emails, launchpad_id=launchpad_id, gerrit_id=gerrit_id,
+                zanata_id=zanata_id),
             'launchpad_id': launchpad_id,
             'user_name': user_name or '',
             'companies': [{
@@ -115,6 +117,8 @@ class RecordProcessor(object):
         }
         if gerrit_id:
             user['gerrit_id'] = gerrit_id
+        if zanata_id:
+            user['zanata_id'] = zanata_id
         return user
 
     def _get_lp_info(self, email):
@@ -182,7 +186,8 @@ class RecordProcessor(object):
 
         # collect ordinary fields
         for key in ['seq', 'user_name', 'user_id', 'gerrit_id', 'github_id',
-                    'launchpad_id', 'companies', 'static', 'ldap_id']:
+                    'launchpad_id', 'companies', 'static', 'ldap_id',
+                    'zanata_id']:
             value = next((v.get(key) for v in user_profiles if v.get(key)),
                          None)
             if value:
@@ -255,19 +260,34 @@ class RecordProcessor(object):
         else:
             user_g = {}
 
+        zanata_id = record.get('zanata_id')
+        if zanata_id:
+            user_z = user_processor.load_user(
+                self.runtime_storage_inst, zanata_id=zanata_id) or {}
+            if ((not user_z) and (not launchpad_id) and
+                    (not user_e.get('launchpad_id'))):
+                # query LP
+                guessed_lp_id = zanata_id
+                user_name = self._get_lp_user_name(guessed_lp_id)
+                if user_name != guessed_lp_id:
+                    launchpad_id = guessed_lp_id
+        else:
+            user_z = {}
+
         user_l = user_processor.load_user(
             self.runtime_storage_inst, launchpad_id=launchpad_id) or {}
 
-        if ((user_e.get('seq') == user_l.get('seq') == user_g.get('seq')) and
-                user_e.get('seq')):
+        if ((user_e.get('seq') == user_l.get('seq') == user_g.get('seq') ==
+             user_z.get('seq')) and user_e.get('seq')):
             # sequence numbers are set and the same, merge is not needed
             user = user_e
         else:
-            user = self._create_user(launchpad_id, email, gerrit_id, user_name)
+            user = self._create_user(launchpad_id, email, gerrit_id, zanata_id,
+                                     user_name)
 
-            if user_e or user_l or user_g:
+            if user_e or user_l or user_g or user_z:
                 user = self._merge_user_profiles(
-                    [user_e, user_l, user_g, user])
+                    [user_e, user_l, user_g, user_z, user])
             else:
                 # create new
                 if not user_name:
@@ -586,6 +606,24 @@ class RecordProcessor(object):
 
         yield ci_vote
 
+    def _process_translation(self, record):
+        # todo split translation and approval
+        translation = record.copy()
+        user_id = user_processor.make_user_id(zanata_id=record['zanata_id'])
+
+        translation['record_type'] = 'tr'
+        translation['primary_key'] = '%s:%s:%s' % (
+            user_id, record['module'], record['date'])
+        translation['author_name'] = user_id
+
+        # following fields are put into standard fields stored in dashboard mem
+        translation['loc'] = record['translated']
+        translation['value'] = record['language']
+
+        self._update_record_and_user(translation)
+
+        yield translation
+
     def _renew_record_date(self, record):
         record['week'] = utils.timestamp_to_week(record['date'])
         if ('release' not in record) or (not record['release']):
@@ -600,6 +638,7 @@ class RecordProcessor(object):
             'bug': self._process_bug,
             'member': self._process_member,
             'ci': self._process_ci,
+            'i18n': self._process_translation,
         }
 
         for record in record_iterator:
diff --git a/stackalytics/processor/user_processor.py b/stackalytics/processor/user_processor.py
index 8bce20a88..bb4616101 100644
--- a/stackalytics/processor/user_processor.py
+++ b/stackalytics/processor/user_processor.py
@@ -22,7 +22,8 @@ LOG = logging.getLogger(__name__)
 
 
 def make_user_id(emails=None, launchpad_id=None, gerrit_id=None,
-                 member_id=None, github_id=None, ldap_id=None, ci_id=None):
+                 member_id=None, github_id=None, ldap_id=None, ci_id=None,
+                 zanata_id=None):
     if launchpad_id or emails:
         return launchpad_id or emails[0]
     if gerrit_id:
@@ -33,6 +34,8 @@ def make_user_id(emails=None, launchpad_id=None, gerrit_id=None,
         return 'github:%s' % github_id
     if ldap_id:
         return 'ldap:%s' % ldap_id
+    if zanata_id:
+        return 'zanata:%s' % zanata_id
     if ci_id:
         return 'ci:%s' % re.sub(r'[^\w]', '_', ci_id.lower())
 
@@ -68,13 +71,16 @@ def store_user(runtime_storage_inst, user):
     if user.get('ldap_id'):
         runtime_storage_inst.set_by_key('user:ldap:%s' % user['ldap_id'],
                                         user)
+    if user.get('zanata_id'):
+        runtime_storage_inst.set_by_key('user:zanata:%s' % user['zanata_id'],
+                                        user)
     for email in user.get('emails') or []:
         runtime_storage_inst.set_by_key('user:%s' % email, user)
 
 
 def load_user(runtime_storage_inst, seq=None, user_id=None, email=None,
               launchpad_id=None, gerrit_id=None, member_id=None,
-              github_id=None, ldap_id=None):
+              github_id=None, ldap_id=None, zanata_id=None):
     if gerrit_id:
         key = 'gerrit:%s' % gerrit_id
     elif member_id:
@@ -83,6 +89,8 @@ def load_user(runtime_storage_inst, seq=None, user_id=None, email=None,
         key = 'github:%s' % github_id
     elif ldap_id:
         key = 'ldap:%s' % ldap_id
+    elif zanata_id:
+        key = 'zanata:%s' % zanata_id
     else:
         key = seq or user_id or launchpad_id or email
     if key:
diff --git a/stackalytics/processor/utils.py b/stackalytics/processor/utils.py
index 589255210..9af1460a0 100644
--- a/stackalytics/processor/utils.py
+++ b/stackalytics/processor/utils.py
@@ -27,6 +27,7 @@ from oslo_log import log as logging
 import requests
 import requests_file
 import six
+import yaml
 
 
 LOG = logging.getLogger(__name__)
@@ -121,7 +122,8 @@ def _session_request(session, uri, method):
     session.mount('file://', requests_file.FileAdapter())
     user_agent = random.choice(user_agents)
 
-    return session.request(method, uri, headers={'User-Agent': user_agent},
+    headers = {'User-Agent': user_agent, 'Accept': 'application/json'}
+    return session.request(method, uri, headers=headers,
                            timeout=cfg.CONF.read_timeout)
 
 
@@ -149,6 +151,14 @@ def read_json_from_uri(uri, session=None):
                     {'error': e, 'uri': uri})
 
 
+def read_yaml_from_uri(uri):
+    try:
+        return yaml.safe_load(read_uri(uri))
+    except Exception as e:
+        LOG.warning('Error "%(error)s" parsing yaml from uri %(uri)s',
+                    {'error': e, 'uri': uri})
+
+
 def _gzip_decompress(content):
     if six.PY3:
         return gzip.decompress(content).decode('utf8')
diff --git a/stackalytics/processor/zanata.py b/stackalytics/processor/zanata.py
new file mode 100644
index 000000000..fedf313cb
--- /dev/null
+++ b/stackalytics/processor/zanata.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2016 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import re
+import time
+
+import itertools
+from oslo_log import log as logging
+import requests
+
+from stackalytics.processor import utils
+
+
+LOG = logging.getLogger(__name__)
+
+DAY = 24 * 60 * 60
+WEEK = 7 * DAY
+
+ZANATA_URI = 'https://translate.openstack.org/rest/%s'
+ZANATA_FIRST_RECORD = '2015-08-31'  # must be Monday
+
+# We limit the projects and versions to reduce number of requests to Zanata API
+ZANATA_VERSION_PATTERN = re.compile(r'^(master)$')
+ZANATA_PROJECT_PATTERN = re.compile(r'(horizon$|.*guide|.*manual)')
+
+zanata_session = requests.Session()
+
+
+def _zanata_get_projects():
+    uri = ZANATA_URI % 'projects'
+    LOG.debug("Reading projects from %s" % uri)
+    projects_data = utils.read_json_from_uri(uri, session=zanata_session)
+
+    return (p['id'] for p in projects_data
+            if ZANATA_PROJECT_PATTERN.match(p['id']))
+
+
+def _zanata_get_project_versions(project_id):
+    LOG.debug("Reading iterations for project %s" % project_id)
+    uri = ZANATA_URI % ('projects/p/%s' % project_id)
+    project_data = utils.read_json_from_uri(uri, session=zanata_session)
+
+    return (it['id'] for it in project_data.get('iterations', [])
+            if ZANATA_VERSION_PATTERN.match(it['id']))
+
+
+def _zanata_get_user_stats(project_id, iteration_id, zanata_user_id,
+                           start_date, end_date):
+    uri = ZANATA_URI % ('stats/project/%s/version/%s/contributor/%s/%s..%s'
+                        % (project_id, iteration_id, zanata_user_id,
+                           start_date, end_date))
+    return utils.read_json_from_uri(uri, session=zanata_session)
+
+
+def _timestamp_to_date(timestamp):
+    return datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d')
+
+
+def _date_to_timestamp(d):
+    return int(time.mktime(
+        datetime.datetime.strptime(d, '%Y-%m-%d').timetuple()))
+
+
+def log(runtime_storage_inst, translation_team_uri):
+
+    last_update_key = 'zanata:last_update'
+    last_update = int(runtime_storage_inst.get_by_key(last_update_key) or
+                      _date_to_timestamp(ZANATA_FIRST_RECORD))
+    LOG.info('Last update: %d', last_update)
+    now = int(time.time())
+
+    LOG.info('Reading translation team from uri: %s', translation_team_uri)
+    translation_team = utils.read_yaml_from_uri(translation_team_uri)
+
+    if not translation_team:
+        LOG.warning('Translation team data is not available')
+        return
+
+    languages = dict((k, v['language'][0])
+                     for k, v in translation_team.items())
+
+    user_ids = set(u['zanata_id'] for u in runtime_storage_inst.get_all_users()
+                   if 'zanata_id' in u)
+    user_ids |= set(itertools.chain.from_iterable(
+        team.get('translators', []) for team in translation_team.values()))
+
+    for project_id in _zanata_get_projects():
+        for version in _zanata_get_project_versions(project_id):
+            for user_id in user_ids:
+
+                for day in range(last_update, now, WEEK):
+                    day_str = _timestamp_to_date(day)
+                    end_str = _timestamp_to_date(day + WEEK - DAY)
+
+                    stats = _zanata_get_user_stats(
+                        project_id, version, user_id, day_str, end_str)
+                    user_stats = stats[user_id]
+
+                    if user_stats:
+                        for lang, data in user_stats.items():
+                            record = dict(
+                                zanata_id=user_id,
+                                date=day,
+                                language_code=lang,
+                                language=languages.get(lang) or lang,
+                                translated=data['translated'],
+                                approved=data['approved'],
+                                module=project_id,
+                                branch=version,  # todo adapt version to branch
+                            )
+                            yield record
+
+    last_update += (now - last_update) // WEEK * WEEK
+    LOG.info('New last update: %d', last_update)
+    runtime_storage_inst.set_by_key(last_update_key, last_update)