diff --git a/doc/source/tools/stackalytics-processor.txt b/doc/source/tools/stackalytics-processor.txt
index f531fb407..4bb62a4bc 100644
--- a/doc/source/tools/stackalytics-processor.txt
+++ b/doc/source/tools/stackalytics-processor.txt
@@ -19,6 +19,7 @@ usage: stackalytics-processor [-h] [--config-dir DIR] [--config-file PATH]
[--ssh-key-filename SSH_KEY_FILENAME]
[--ssh-username SSH_USERNAME]
[--syslog-log-facility SYSLOG_LOG_FACILITY]
+ [--translation-team-uri TRANSLATION_TEAM_URI]
[--use-syslog] [--use-syslog-rfc-format]
[--verbose] [--version] [--watch-log-file]
@@ -96,6 +97,8 @@ optional arguments:
--syslog-log-facility SYSLOG_LOG_FACILITY
Syslog facility to receive log lines. This option is
ignored if log_config_append is set.
+ --translation-team-uri TRANSLATION_TEAM_URI
+ URI for translation team data
--use-syslog Use syslog for logging. Existing syslog format is
DEPRECATED and will be changed later to honor RFC5424.
This option is ignored if log_config_append is set.
diff --git a/etc/default_data.schema.json b/etc/default_data.schema.json
index abbb096fb..7e855171f 100644
--- a/etc/default_data.schema.json
+++ b/etc/default_data.schema.json
@@ -21,6 +21,9 @@
"ldap_id": {
"type": "string"
},
+ "zanata_id": {
+ "type": "string"
+ },
"user_name": {
"type": "string"
},
diff --git a/etc/stackalytics.conf b/etc/stackalytics.conf
index 395352a87..7073d338a 100644
--- a/etc/stackalytics.conf
+++ b/etc/stackalytics.conf
@@ -144,6 +144,9 @@
# URI for default data (string value)
#driverlog_data_uri = https://git.openstack.org/cgit/openstack/driverlog/plain/etc/default_data.json
+# URI for translation team data (string value)
+#translation_team_uri = https://git.openstack.org/cgit/openstack/i18n/plain/tools/zanata/translation_team.yaml
+
# How many member profiles to look ahead after the last (integer value)
#members_look_ahead = 250
diff --git a/stackalytics/dashboard/decorators.py b/stackalytics/dashboard/decorators.py
index 6fc578981..b382bd565 100644
--- a/stackalytics/dashboard/decorators.py
+++ b/stackalytics/dashboard/decorators.py
@@ -365,6 +365,7 @@ def aggregate_filter():
'person-day': (person_day_filter, None),
'ci': (ci_filter, ci_finalize),
'patches': (None, None),
+ 'translations': (loc_filter, None),
}
if metric not in metric_to_filters_map:
metric = parameters.get_default('metric')
diff --git a/stackalytics/dashboard/helpers.py b/stackalytics/dashboard/helpers.py
index e7eb66d6e..c960ef578 100644
--- a/stackalytics/dashboard/helpers.py
+++ b/stackalytics/dashboard/helpers.py
@@ -200,6 +200,7 @@ def get_contribution_summary(records):
patch_set_count = 0
change_request_count = 0
abandoned_change_requests_count = 0
+ translations = 0
for record in records:
record_type = record.record_type
@@ -236,6 +237,8 @@ def get_contribution_summary(records):
change_request_count += 1
if record.status == 'ABANDONED':
abandoned_change_requests_count += 1
+ elif record_type == 'tr':
+ translations += record.loc
result = {
'drafted_blueprint_count': drafted_blueprint_count,
@@ -249,6 +252,7 @@ def get_contribution_summary(records):
'patch_set_count': patch_set_count,
'change_request_count': change_request_count,
'abandoned_change_requests_count': abandoned_change_requests_count,
+ 'translations': translations,
}
return result
diff --git a/stackalytics/dashboard/parameters.py b/stackalytics/dashboard/parameters.py
index bbec0f38e..43a01d1cf 100644
--- a/stackalytics/dashboard/parameters.py
+++ b/stackalytics/dashboard/parameters.py
@@ -41,6 +41,7 @@ METRIC_LABELS = {
'person-day': "Person-day effort",
'ci': 'CI votes',
'patches': 'Patch Sets',
+ 'translations': 'Translations',
}
METRIC_TO_RECORD_TYPE = {
@@ -56,6 +57,7 @@ METRIC_TO_RECORD_TYPE = {
'person-day': ['mark', 'patch', 'email', 'bpd', 'bugf'],
'ci': ['ci'],
'patches': ['patch'],
+ 'translations': ['tr'],
}
FILTER_PARAMETERS = ['release', 'project_type', 'module', 'company', 'user_id',
diff --git a/stackalytics/dashboard/templates/_macros/activity_log.html b/stackalytics/dashboard/templates/_macros/activity_log.html
index e52704d1f..5a7924424 100644
--- a/stackalytics/dashboard/templates/_macros/activity_log.html
+++ b/stackalytics/dashboard/templates/_macros/activity_log.html
@@ -169,6 +169,8 @@ show_record_type=True, show_user_gravatar=True, gravatar_size=32, show_all=True)
{%elif record_type == "member" %}
+ {%elif record_type == "tr" %}
+
{%/if%}
diff --git a/stackalytics/dashboard/templates/_macros/contribution_summary.html b/stackalytics/dashboard/templates/_macros/contribution_summary.html
index 3643155d9..5872dba42 100644
--- a/stackalytics/dashboard/templates/_macros/contribution_summary.html
+++ b/stackalytics/dashboard/templates/_macros/contribution_summary.html
@@ -48,6 +48,7 @@
Filed Bugs: ${filed_bug_count}
Resolved Bugs: ${resolved_bug_count}
Emails: ${email_count}
+Translations: ${translations}
{% endraw %}
diff --git a/stackalytics/dashboard/templates/overview.html b/stackalytics/dashboard/templates/overview.html
index 2c7c2ada2..f9135ca81 100644
--- a/stackalytics/dashboard/templates/overview.html
+++ b/stackalytics/dashboard/templates/overview.html
@@ -8,6 +8,7 @@
{% set show_engineer_breakdown = (not user_id) %}
{% set show_bp_breakdown = (metric in ['bpd', 'bpc']) %}
{% set show_module_breakdown = (not module) %}
+{% set show_languages_breakdown = (metric in ['translations']) %}
{% set show_user_activity = (user_id) %}
{% set show_module_activity = (module) and (not user_id) %}
{% set show_activity = (show_user_activity) or (show_module_activity) %}
@@ -61,6 +62,9 @@
{% if show_module_breakdown %}
renderTableAndChart("/api/1.0/stats/modules", "module_container", "module_table", "module_chart", "module");
{% endif %}
+ {% if show_languages_breakdown %}
+ renderTableAndChart("/api/1.0/stats/languages", "language_container", "language_table", "language_chart", "language");
+ {% endif %}
@@ -184,6 +188,27 @@
{% endif %}
+ {% if show_languages_breakdown %}
+
+
Languages
+
+
+
+
+
+
+ # |
+ Language |
+ Translations |
+
+
+
+
+
+
+
+ {% endif %}
+
{% if show_contribution_on_right %}
{{ contribution_summary.show_contribution_summary(show_all=False) }}
{{ show_report_links(module, company, user_id) }}
diff --git a/stackalytics/dashboard/web.py b/stackalytics/dashboard/web.py
index d3bbe671c..f4d61c8fb 100644
--- a/stackalytics/dashboard/web.py
+++ b/stackalytics/dashboard/web.py
@@ -402,6 +402,32 @@ def get_bpd(records, **kwargs):
return result
+@app.route('/api/1.0/stats/languages')
+@decorators.exception_handler()
+@decorators.response()
+@decorators.cached()
+@decorators.jsonify('stats')
+@decorators.record_filter()
+def get_languages(records, **kwargs):
+ result = []
+ languages = collections.defaultdict(int)
+ for record in records:
+ if record.record_type in ['tr']:
+ languages[record.value] += record.loc
+
+ for lang, val in six.iteritems(languages):
+ result.append({
+ 'id': lang,
+ 'name': lang,
+ 'metric': val,
+ })
+
+ result.sort(key=lambda x: x['metric'], reverse=True)
+ utils.add_index(result)
+
+ return result
+
+
@app.route('/api/1.0/users')
@decorators.exception_handler()
@decorators.response()
@@ -587,12 +613,16 @@ def timeline(records, **kwargs):
week_stat_commits = dict((c, 0) for c in weeks)
week_stat_commits_hl = dict((c, 0) for c in weeks)
+ commits_handler = lambda record: 1
+ if 'translations' in metric:
+ commits_handler = lambda record: record.loc
+
if ('commits' in metric) or ('loc' in metric):
- handler = lambda record: record.loc
+ loc_handler = lambda record: record.loc
elif 'ci' in metric:
- handler = lambda record: 0 if record.value else 1
+ loc_handler = lambda record: 0 if record.value else 1
else:
- handler = lambda record: 0
+ loc_handler = lambda record: 0
# fill stats with the data
if 'person-day' in metric:
@@ -616,14 +646,14 @@ def timeline(records, **kwargs):
for record in records:
week = record.week
if start_week <= week < end_week:
- week_stat_loc[week] += handler(record)
- week_stat_commits[week] += 1
+ week_stat_loc[week] += loc_handler(record)
+ week_stat_commits[week] += commits_handler(record)
if 'members' in metric:
if record.date >= start_date:
week_stat_commits_hl[week] += 1
else:
if record.release == release_name:
- week_stat_commits_hl[week] += 1
+ week_stat_commits_hl[week] += commits_handler(record)
if 'all' == release_name and 'members' not in metric:
week_stat_commits_hl = week_stat_commits
diff --git a/stackalytics/processor/config.py b/stackalytics/processor/config.py
index 03ce48fa7..358322e59 100644
--- a/stackalytics/processor/config.py
+++ b/stackalytics/processor/config.py
@@ -48,6 +48,10 @@ PROCESSOR_OPTS = [
default='https://git.openstack.org/cgit/'
'openstack/driverlog/plain/etc/default_data.json',
help='URI for default data'),
+ cfg.StrOpt('translation-team-uri',
+ default='https://git.openstack.org/cgit/openstack/i18n/'
+ 'plain/tools/zanata/translation_team.yaml',
+ help='URI of translation team data'),
cfg.IntOpt('members-look-ahead', default=250,
help='How many member profiles to look ahead after the last'),
cfg.IntOpt('read-timeout', default=120,
diff --git a/stackalytics/processor/main.py b/stackalytics/processor/main.py
index 63b774178..214ff684b 100644
--- a/stackalytics/processor/main.py
+++ b/stackalytics/processor/main.py
@@ -34,6 +34,7 @@ from stackalytics.processor import record_processor
from stackalytics.processor import runtime_storage
from stackalytics.processor import utils
from stackalytics.processor import vcs
+from stackalytics.processor import zanata
LOG = logging.getLogger(__name__)
@@ -180,6 +181,15 @@ def _process_mail_list(uri, runtime_storage_inst, record_processor_inst):
runtime_storage_inst.set_records(processed_mail_iterator)
+def _process_translation_stats(runtime_storage_inst, record_processor_inst):
+ translation_iterator = zanata.log(runtime_storage_inst,
+ cfg.CONF.translation_team_uri)
+ translation_iterator_typed = _record_typer(translation_iterator, 'i18n')
+ processed_translation_iterator = record_processor_inst.process(
+ translation_iterator_typed)
+ runtime_storage_inst.set_records(processed_translation_iterator)
+
+
def _process_member_list(uri, runtime_storage_inst, record_processor_inst):
member_iterator = mps.log(uri, runtime_storage_inst,
cfg.CONF.days_to_update_members,
@@ -227,6 +237,9 @@ def process(runtime_storage_inst, record_processor_inst):
_process_mail_list(mail_list, runtime_storage_inst,
record_processor_inst)
+ LOG.info('Processing translations stats')
+ _process_translation_stats(runtime_storage_inst, record_processor_inst)
+
_post_process_records(record_processor_inst, repos)
diff --git a/stackalytics/processor/record_processor.py b/stackalytics/processor/record_processor.py
index 072a6485a..241b886e9 100644
--- a/stackalytics/processor/record_processor.py
+++ b/stackalytics/processor/record_processor.py
@@ -96,7 +96,8 @@ class RecordProcessor(object):
return self.domains_index[m]
return None
- def _create_user(self, launchpad_id, email, gerrit_id, user_name):
+ def _create_user(self, launchpad_id, email, gerrit_id, zanata_id,
+ user_name):
company = (self._get_company_by_email(email) or
self._get_independent())
emails = []
@@ -104,7 +105,8 @@ class RecordProcessor(object):
emails = [email]
user = {
'user_id': user_processor.make_user_id(
- emails=emails, launchpad_id=launchpad_id, gerrit_id=gerrit_id),
+ emails=emails, launchpad_id=launchpad_id, gerrit_id=gerrit_id,
+ zanata_id=zanata_id),
'launchpad_id': launchpad_id,
'user_name': user_name or '',
'companies': [{
@@ -115,6 +117,8 @@ class RecordProcessor(object):
}
if gerrit_id:
user['gerrit_id'] = gerrit_id
+ if zanata_id:
+ user['zanata_id'] = zanata_id
return user
def _get_lp_info(self, email):
@@ -182,7 +186,8 @@ class RecordProcessor(object):
# collect ordinary fields
for key in ['seq', 'user_name', 'user_id', 'gerrit_id', 'github_id',
- 'launchpad_id', 'companies', 'static', 'ldap_id']:
+ 'launchpad_id', 'companies', 'static', 'ldap_id',
+ 'zanata_id']:
value = next((v.get(key) for v in user_profiles if v.get(key)),
None)
if value:
@@ -255,19 +260,34 @@ class RecordProcessor(object):
else:
user_g = {}
+ zanata_id = record.get('zanata_id')
+ if zanata_id:
+ user_z = user_processor.load_user(
+ self.runtime_storage_inst, zanata_id=zanata_id) or {}
+ if ((not user_z) and (not launchpad_id) and
+ (not user_e.get('launchpad_id'))):
+ # query LP
+ guessed_lp_id = zanata_id
+ user_name = self._get_lp_user_name(guessed_lp_id)
+ if user_name != guessed_lp_id:
+ launchpad_id = guessed_lp_id
+ else:
+ user_z = {}
+
user_l = user_processor.load_user(
self.runtime_storage_inst, launchpad_id=launchpad_id) or {}
- if ((user_e.get('seq') == user_l.get('seq') == user_g.get('seq')) and
- user_e.get('seq')):
+ if ((user_e.get('seq') == user_l.get('seq') == user_g.get('seq') ==
+ user_z.get('seq')) and user_e.get('seq')):
# sequence numbers are set and the same, merge is not needed
user = user_e
else:
- user = self._create_user(launchpad_id, email, gerrit_id, user_name)
+ user = self._create_user(launchpad_id, email, gerrit_id, zanata_id,
+ user_name)
- if user_e or user_l or user_g:
+ if user_e or user_l or user_g or user_z:
user = self._merge_user_profiles(
- [user_e, user_l, user_g, user])
+ [user_e, user_l, user_g, user_z, user])
else:
# create new
if not user_name:
@@ -586,6 +606,24 @@ class RecordProcessor(object):
yield ci_vote
+ def _process_translation(self, record):
+ # todo split translation and approval
+ translation = record.copy()
+ user_id = user_processor.make_user_id(zanata_id=record['zanata_id'])
+
+ translation['record_type'] = 'tr'
+ translation['primary_key'] = '%s:%s:%s' % (
+ user_id, record['module'], record['date'])
+ translation['author_name'] = user_id
+
+ # following fields are put into standard fields stored in dashboard mem
+ translation['loc'] = record['translated']
+ translation['value'] = record['language']
+
+ self._update_record_and_user(translation)
+
+ yield translation
+
def _renew_record_date(self, record):
record['week'] = utils.timestamp_to_week(record['date'])
if ('release' not in record) or (not record['release']):
@@ -600,6 +638,7 @@ class RecordProcessor(object):
'bug': self._process_bug,
'member': self._process_member,
'ci': self._process_ci,
+ 'i18n': self._process_translation,
}
for record in record_iterator:
diff --git a/stackalytics/processor/user_processor.py b/stackalytics/processor/user_processor.py
index 8bce20a88..bb4616101 100644
--- a/stackalytics/processor/user_processor.py
+++ b/stackalytics/processor/user_processor.py
@@ -22,7 +22,8 @@ LOG = logging.getLogger(__name__)
def make_user_id(emails=None, launchpad_id=None, gerrit_id=None,
- member_id=None, github_id=None, ldap_id=None, ci_id=None):
+ member_id=None, github_id=None, ldap_id=None, ci_id=None,
+ zanata_id=None):
if launchpad_id or emails:
return launchpad_id or emails[0]
if gerrit_id:
@@ -33,6 +34,8 @@ def make_user_id(emails=None, launchpad_id=None, gerrit_id=None,
return 'github:%s' % github_id
if ldap_id:
return 'ldap:%s' % ldap_id
+ if zanata_id:
+ return 'zanata:%s' % zanata_id
if ci_id:
return 'ci:%s' % re.sub(r'[^\w]', '_', ci_id.lower())
@@ -68,13 +71,16 @@ def store_user(runtime_storage_inst, user):
if user.get('ldap_id'):
runtime_storage_inst.set_by_key('user:ldap:%s' % user['ldap_id'],
user)
+ if user.get('zanata_id'):
+ runtime_storage_inst.set_by_key('user:zanata:%s' % user['zanata_id'],
+ user)
for email in user.get('emails') or []:
runtime_storage_inst.set_by_key('user:%s' % email, user)
def load_user(runtime_storage_inst, seq=None, user_id=None, email=None,
launchpad_id=None, gerrit_id=None, member_id=None,
- github_id=None, ldap_id=None):
+ github_id=None, ldap_id=None, zanata_id=None):
if gerrit_id:
key = 'gerrit:%s' % gerrit_id
elif member_id:
@@ -83,6 +89,8 @@ def load_user(runtime_storage_inst, seq=None, user_id=None, email=None,
key = 'github:%s' % github_id
elif ldap_id:
key = 'ldap:%s' % ldap_id
+ elif zanata_id:
+ key = 'zanata:%s' % zanata_id
else:
key = seq or user_id or launchpad_id or email
if key:
diff --git a/stackalytics/processor/utils.py b/stackalytics/processor/utils.py
index 589255210..9af1460a0 100644
--- a/stackalytics/processor/utils.py
+++ b/stackalytics/processor/utils.py
@@ -27,6 +27,7 @@ from oslo_log import log as logging
import requests
import requests_file
import six
+import yaml
LOG = logging.getLogger(__name__)
@@ -121,7 +122,8 @@ def _session_request(session, uri, method):
session.mount('file://', requests_file.FileAdapter())
user_agent = random.choice(user_agents)
- return session.request(method, uri, headers={'User-Agent': user_agent},
+ headers = {'User-Agent': user_agent, 'Accept': 'application/json'}
+ return session.request(method, uri, headers=headers,
timeout=cfg.CONF.read_timeout)
@@ -149,6 +151,14 @@ def read_json_from_uri(uri, session=None):
{'error': e, 'uri': uri})
+def read_yaml_from_uri(uri):
+ try:
+ return yaml.safe_load(read_uri(uri))
+ except Exception as e:
+ LOG.warning('Error "%(error)s" parsing yaml from uri %(uri)s',
+ {'error': e, 'uri': uri})
+
+
def _gzip_decompress(content):
if six.PY3:
return gzip.decompress(content).decode('utf8')
diff --git a/stackalytics/processor/zanata.py b/stackalytics/processor/zanata.py
new file mode 100644
index 000000000..fedf313cb
--- /dev/null
+++ b/stackalytics/processor/zanata.py
@@ -0,0 +1,128 @@
+# Copyright (c) 2016 OpenStack Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import re
+import time
+
+import itertools
+from oslo_log import log as logging
+import requests
+
+from stackalytics.processor import utils
+
+
+LOG = logging.getLogger(__name__)
+
+DAY = 24 * 60 * 60
+WEEK = 7 * DAY
+
+ZANATA_URI = 'https://translate.openstack.org/rest/%s'
+ZANATA_FIRST_RECORD = '2015-08-31' # must be Monday
+
+# We limit the projects and versions to reduce number of requests to Zanata API
+ZANATA_VERSION_PATTERN = re.compile(r'^(master)$')
+ZANATA_PROJECT_PATTERN = re.compile(r'(horizon$|.*guide|.*manual)')
+
+zanata_session = requests.Session()
+
+
+def _zanata_get_projects():
+ uri = ZANATA_URI % 'projects'
+ LOG.debug("Reading projects from %s" % uri)
+ projects_data = utils.read_json_from_uri(uri, session=zanata_session)
+
+ return (p['id'] for p in projects_data
+ if ZANATA_PROJECT_PATTERN.match(p['id']))
+
+
+def _zanata_get_project_versions(project_id):
+ LOG.debug("Reading iterations for project %s" % project_id)
+ uri = ZANATA_URI % ('projects/p/%s' % project_id)
+ project_data = utils.read_json_from_uri(uri, session=zanata_session)
+
+ return (it['id'] for it in project_data.get('iterations', [])
+ if ZANATA_VERSION_PATTERN.match(it['id']))
+
+
+def _zanata_get_user_stats(project_id, iteration_id, zanata_user_id,
+ start_date, end_date):
+ uri = ZANATA_URI % ('stats/project/%s/version/%s/contributor/%s/%s..%s'
+ % (project_id, iteration_id, zanata_user_id,
+ start_date, end_date))
+ return utils.read_json_from_uri(uri, session=zanata_session)
+
+
+def _timestamp_to_date(timestamp):
+ return datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d')
+
+
+def _date_to_timestamp(d):
+ return int(time.mktime(
+ datetime.datetime.strptime(d, '%Y-%m-%d').timetuple()))
+
+
+def log(runtime_storage_inst, translation_team_uri):
+
+ last_update_key = 'zanata:last_update'
+ last_update = int(runtime_storage_inst.get_by_key(last_update_key) or
+ _date_to_timestamp(ZANATA_FIRST_RECORD))
+ LOG.info('Last update: %d', last_update)
+ now = int(time.time())
+
+ LOG.info('Reading translation team from uri: %s', translation_team_uri)
+ translation_team = utils.read_yaml_from_uri(translation_team_uri)
+
+ if not translation_team:
+ LOG.warning('Translation team data is not available')
+ return
+
+ languages = dict((k, v['language'][0])
+ for k, v in translation_team.items())
+
+ user_ids = set(u['zanata_id'] for u in runtime_storage_inst.get_all_users()
+ if 'zanata_id' in u)
+ user_ids |= set(itertools.chain.from_iterable(
+ team.get('translators', []) for team in translation_team.values()))
+
+ for project_id in _zanata_get_projects():
+ for version in _zanata_get_project_versions(project_id):
+ for user_id in user_ids:
+
+ for day in range(last_update, now, WEEK):
+ day_str = _timestamp_to_date(day)
+ end_str = _timestamp_to_date(day + WEEK - DAY)
+
+ stats = _zanata_get_user_stats(
+ project_id, version, user_id, day_str, end_str)
+ user_stats = stats[user_id]
+
+ if user_stats:
+ for lang, data in user_stats.items():
+ record = dict(
+ zanata_id=user_id,
+ date=day,
+ language_code=lang,
+ language=languages.get(lang) or lang,
+ translated=data['translated'],
+ approved=data['approved'],
+ module=project_id,
+ branch=version, # todo adapt version to branch
+ )
+ yield record
+
+ last_update += (now - last_update) // WEEK * WEEK
+ LOG.info('New last update: %d', last_update)
+ runtime_storage_inst.set_by_key(last_update_key, last_update)