[WIP] Added translation metric

Translation stats are retrieved from Zanata service. List of projects
and versions is taken from Zanata API. List of users is configured
in openstack/i18n repo and referred by URI.

Processor reads stats for every project-version-user-week and yields
records of new type 'tr'. UI is extended to show breakdown by languages.

Zanata module is based on https://review.openstack.org/#/c/275145/

Co-Authored-By: daisy-ycguo <guoyingc@cn.ibm.com>
Co-Authored-By: Akihiro Motoki <amotoki@gmail.com>

Change-Id: Ic1b44f09f7eb592d75d435f66a71d3110a2f49a5
This commit is contained in:
Ilya Shakhat 2016-02-15 17:13:42 +03:00
parent 00912c1941
commit bc434a6d14
16 changed files with 293 additions and 17 deletions

View File

@ -19,6 +19,7 @@ usage: stackalytics-processor [-h] [--config-dir DIR] [--config-file PATH]
[--ssh-key-filename SSH_KEY_FILENAME]
[--ssh-username SSH_USERNAME]
[--syslog-log-facility SYSLOG_LOG_FACILITY]
[--translation-team-uri TRANSLATION_TEAM_URI]
[--use-syslog] [--use-syslog-rfc-format]
[--verbose] [--version] [--watch-log-file]
@ -96,6 +97,8 @@ optional arguments:
--syslog-log-facility SYSLOG_LOG_FACILITY
Syslog facility to receive log lines. This option is
ignored if log_config_append is set.
--translation-team-uri TRANSLATION_TEAM_URI
URI for translation team data
--use-syslog Use syslog for logging. Existing syslog format is
DEPRECATED and will be changed later to honor RFC5424.
This option is ignored if log_config_append is set.

View File

@ -21,6 +21,9 @@
"ldap_id": {
"type": "string"
},
"zanata_id": {
"type": "string"
},
"user_name": {
"type": "string"
},

View File

@ -144,6 +144,9 @@
# URI for default data (string value)
#driverlog_data_uri = https://git.openstack.org/cgit/openstack/driverlog/plain/etc/default_data.json
# URI for translation team data (string value)
#translation_team_uri = https://git.openstack.org/cgit/openstack/i18n/plain/tools/zanata/translation_team.yaml
# How many member profiles to look ahead after the last (integer value)
#members_look_ahead = 250

View File

@ -365,6 +365,7 @@ def aggregate_filter():
'person-day': (person_day_filter, None),
'ci': (ci_filter, ci_finalize),
'patches': (None, None),
'translations': (loc_filter, None),
}
if metric not in metric_to_filters_map:
metric = parameters.get_default('metric')

View File

@ -200,6 +200,7 @@ def get_contribution_summary(records):
patch_set_count = 0
change_request_count = 0
abandoned_change_requests_count = 0
translations = 0
for record in records:
record_type = record.record_type
@ -236,6 +237,8 @@ def get_contribution_summary(records):
change_request_count += 1
if record.status == 'ABANDONED':
abandoned_change_requests_count += 1
elif record_type == 'tr':
translations += record.loc
result = {
'drafted_blueprint_count': drafted_blueprint_count,
@ -249,6 +252,7 @@ def get_contribution_summary(records):
'patch_set_count': patch_set_count,
'change_request_count': change_request_count,
'abandoned_change_requests_count': abandoned_change_requests_count,
'translations': translations,
}
return result

View File

@ -41,6 +41,7 @@ METRIC_LABELS = {
'person-day': "Person-day effort",
'ci': 'CI votes',
'patches': 'Patch Sets',
'translations': 'Translations',
}
METRIC_TO_RECORD_TYPE = {
@ -56,6 +57,7 @@ METRIC_TO_RECORD_TYPE = {
'person-day': ['mark', 'patch', 'email', 'bpd', 'bugf'],
'ci': ['ci'],
'patches': ['patch'],
'translations': ['tr'],
}
FILTER_PARAMETERS = ['release', 'project_type', 'module', 'company', 'user_id',

View File

@ -169,6 +169,8 @@ show_record_type=True, show_user_gravatar=True, gravatar_size=32, show_all=True)
<div>Change Id: <a href="https://review.openstack.org/#/c/${review_number}" target="_blank">${review_id}</a></div>
{%elif record_type == "member" %}
<div class="header"><a href="${member_uri}" target="_blank">Registered</a> in OpenStack Foundation</div>
{%elif record_type == "tr" %}
<div class="header">Translated ${loc} terms into ${language}</div>
{%/if%}
</div>
</div>

View File

@ -48,6 +48,7 @@
<div>Filed Bugs: <b>${filed_bug_count}</b></div>
<div>Resolved Bugs: <b>${resolved_bug_count}</b></div>
<div>Emails: <b>${email_count}</b></div>
<div>Translations: <b>${translations}</b></div>
{% endraw %}
</script>

View File

@ -8,6 +8,7 @@
{% set show_engineer_breakdown = (not user_id) %}
{% set show_bp_breakdown = (metric in ['bpd', 'bpc']) %}
{% set show_module_breakdown = (not module) %}
{% set show_languages_breakdown = (metric in ['translations']) %}
{% set show_user_activity = (user_id) %}
{% set show_module_activity = (module) and (not user_id) %}
{% set show_activity = (show_user_activity) or (show_module_activity) %}
@ -61,6 +62,9 @@
{% if show_module_breakdown %}
renderTableAndChart("/api/1.0/stats/modules", "module_container", "module_table", "module_chart", "module");
{% endif %}
{% if show_languages_breakdown %}
renderTableAndChart("/api/1.0/stats/languages", "language_container", "language_table", "language_chart", "language");
{% endif %}
</script>
@ -184,6 +188,27 @@
</div>
{% endif %}
{% if show_languages_breakdown %}
<div id="language_container">
<h2>Languages</h2>
<div id="language_chart" style="width: 100%; height: 350px; margin-bottom: 1em;"></div>
<table id="language_table" class="display">
<thead>
<tr>
<th>#</th>
<th>Language</th>
<th>Translations</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
<div class="spacer"></div>
</div>
{% endif %}
{% if show_contribution_on_right %}
{{ contribution_summary.show_contribution_summary(show_all=False) }}
{{ show_report_links(module, company, user_id) }}

View File

@ -402,6 +402,32 @@ def get_bpd(records, **kwargs):
return result
@app.route('/api/1.0/stats/languages')
@decorators.exception_handler()
@decorators.response()
@decorators.cached()
@decorators.jsonify('stats')
@decorators.record_filter()
def get_languages(records, **kwargs):
result = []
languages = collections.defaultdict(int)
for record in records:
if record.record_type in ['tr']:
languages[record.value] += record.loc
for lang, val in six.iteritems(languages):
result.append({
'id': lang,
'name': lang,
'metric': val,
})
result.sort(key=lambda x: x['metric'], reverse=True)
utils.add_index(result)
return result
@app.route('/api/1.0/users')
@decorators.exception_handler()
@decorators.response()
@ -587,12 +613,16 @@ def timeline(records, **kwargs):
week_stat_commits = dict((c, 0) for c in weeks)
week_stat_commits_hl = dict((c, 0) for c in weeks)
commits_handler = lambda record: 1
if 'translations' in metric:
commits_handler = lambda record: record.loc
if ('commits' in metric) or ('loc' in metric):
handler = lambda record: record.loc
loc_handler = lambda record: record.loc
elif 'ci' in metric:
handler = lambda record: 0 if record.value else 1
loc_handler = lambda record: 0 if record.value else 1
else:
handler = lambda record: 0
loc_handler = lambda record: 0
# fill stats with the data
if 'person-day' in metric:
@ -616,14 +646,14 @@ def timeline(records, **kwargs):
for record in records:
week = record.week
if start_week <= week < end_week:
week_stat_loc[week] += handler(record)
week_stat_commits[week] += 1
week_stat_loc[week] += loc_handler(record)
week_stat_commits[week] += commits_handler(record)
if 'members' in metric:
if record.date >= start_date:
week_stat_commits_hl[week] += 1
else:
if record.release == release_name:
week_stat_commits_hl[week] += 1
week_stat_commits_hl[week] += commits_handler(record)
if 'all' == release_name and 'members' not in metric:
week_stat_commits_hl = week_stat_commits

View File

@ -48,6 +48,10 @@ PROCESSOR_OPTS = [
default='https://git.openstack.org/cgit/'
'openstack/driverlog/plain/etc/default_data.json',
help='URI for default data'),
cfg.StrOpt('translation-team-uri',
default='https://git.openstack.org/cgit/openstack/i18n/'
'plain/tools/zanata/translation_team.yaml',
help='URI of translation team data'),
cfg.IntOpt('members-look-ahead', default=250,
help='How many member profiles to look ahead after the last'),
cfg.IntOpt('read-timeout', default=120,

View File

@ -34,6 +34,7 @@ from stackalytics.processor import record_processor
from stackalytics.processor import runtime_storage
from stackalytics.processor import utils
from stackalytics.processor import vcs
from stackalytics.processor import zanata
LOG = logging.getLogger(__name__)
@ -180,6 +181,15 @@ def _process_mail_list(uri, runtime_storage_inst, record_processor_inst):
runtime_storage_inst.set_records(processed_mail_iterator)
def _process_translation_stats(runtime_storage_inst, record_processor_inst):
translation_iterator = zanata.log(runtime_storage_inst,
cfg.CONF.translation_team_uri)
translation_iterator_typed = _record_typer(translation_iterator, 'i18n')
processed_translation_iterator = record_processor_inst.process(
translation_iterator_typed)
runtime_storage_inst.set_records(processed_translation_iterator)
def _process_member_list(uri, runtime_storage_inst, record_processor_inst):
member_iterator = mps.log(uri, runtime_storage_inst,
cfg.CONF.days_to_update_members,
@ -227,6 +237,9 @@ def process(runtime_storage_inst, record_processor_inst):
_process_mail_list(mail_list, runtime_storage_inst,
record_processor_inst)
LOG.info('Processing translations stats')
_process_translation_stats(runtime_storage_inst, record_processor_inst)
_post_process_records(record_processor_inst, repos)

View File

@ -96,7 +96,8 @@ class RecordProcessor(object):
return self.domains_index[m]
return None
def _create_user(self, launchpad_id, email, gerrit_id, user_name):
def _create_user(self, launchpad_id, email, gerrit_id, zanata_id,
user_name):
company = (self._get_company_by_email(email) or
self._get_independent())
emails = []
@ -104,7 +105,8 @@ class RecordProcessor(object):
emails = [email]
user = {
'user_id': user_processor.make_user_id(
emails=emails, launchpad_id=launchpad_id, gerrit_id=gerrit_id),
emails=emails, launchpad_id=launchpad_id, gerrit_id=gerrit_id,
zanata_id=zanata_id),
'launchpad_id': launchpad_id,
'user_name': user_name or '',
'companies': [{
@ -115,6 +117,8 @@ class RecordProcessor(object):
}
if gerrit_id:
user['gerrit_id'] = gerrit_id
if zanata_id:
user['zanata_id'] = zanata_id
return user
def _get_lp_info(self, email):
@ -182,7 +186,8 @@ class RecordProcessor(object):
# collect ordinary fields
for key in ['seq', 'user_name', 'user_id', 'gerrit_id', 'github_id',
'launchpad_id', 'companies', 'static', 'ldap_id']:
'launchpad_id', 'companies', 'static', 'ldap_id',
'zanata_id']:
value = next((v.get(key) for v in user_profiles if v.get(key)),
None)
if value:
@ -255,19 +260,34 @@ class RecordProcessor(object):
else:
user_g = {}
zanata_id = record.get('zanata_id')
if zanata_id:
user_z = user_processor.load_user(
self.runtime_storage_inst, zanata_id=zanata_id) or {}
if ((not user_z) and (not launchpad_id) and
(not user_e.get('launchpad_id'))):
# query LP
guessed_lp_id = zanata_id
user_name = self._get_lp_user_name(guessed_lp_id)
if user_name != guessed_lp_id:
launchpad_id = guessed_lp_id
else:
user_z = {}
user_l = user_processor.load_user(
self.runtime_storage_inst, launchpad_id=launchpad_id) or {}
if ((user_e.get('seq') == user_l.get('seq') == user_g.get('seq')) and
user_e.get('seq')):
if ((user_e.get('seq') == user_l.get('seq') == user_g.get('seq') ==
user_z.get('seq')) and user_e.get('seq')):
# sequence numbers are set and the same, merge is not needed
user = user_e
else:
user = self._create_user(launchpad_id, email, gerrit_id, user_name)
user = self._create_user(launchpad_id, email, gerrit_id, zanata_id,
user_name)
if user_e or user_l or user_g:
if user_e or user_l or user_g or user_z:
user = self._merge_user_profiles(
[user_e, user_l, user_g, user])
[user_e, user_l, user_g, user_z, user])
else:
# create new
if not user_name:
@ -586,6 +606,24 @@ class RecordProcessor(object):
yield ci_vote
def _process_translation(self, record):
# todo split translation and approval
translation = record.copy()
user_id = user_processor.make_user_id(zanata_id=record['zanata_id'])
translation['record_type'] = 'tr'
translation['primary_key'] = '%s:%s:%s' % (
user_id, record['module'], record['date'])
translation['author_name'] = user_id
# following fields are put into standard fields stored in dashboard mem
translation['loc'] = record['translated']
translation['value'] = record['language']
self._update_record_and_user(translation)
yield translation
def _renew_record_date(self, record):
record['week'] = utils.timestamp_to_week(record['date'])
if ('release' not in record) or (not record['release']):
@ -600,6 +638,7 @@ class RecordProcessor(object):
'bug': self._process_bug,
'member': self._process_member,
'ci': self._process_ci,
'i18n': self._process_translation,
}
for record in record_iterator:

View File

@ -22,7 +22,8 @@ LOG = logging.getLogger(__name__)
def make_user_id(emails=None, launchpad_id=None, gerrit_id=None,
member_id=None, github_id=None, ldap_id=None, ci_id=None):
member_id=None, github_id=None, ldap_id=None, ci_id=None,
zanata_id=None):
if launchpad_id or emails:
return launchpad_id or emails[0]
if gerrit_id:
@ -33,6 +34,8 @@ def make_user_id(emails=None, launchpad_id=None, gerrit_id=None,
return 'github:%s' % github_id
if ldap_id:
return 'ldap:%s' % ldap_id
if zanata_id:
return 'zanata:%s' % zanata_id
if ci_id:
return 'ci:%s' % re.sub(r'[^\w]', '_', ci_id.lower())
@ -68,13 +71,16 @@ def store_user(runtime_storage_inst, user):
if user.get('ldap_id'):
runtime_storage_inst.set_by_key('user:ldap:%s' % user['ldap_id'],
user)
if user.get('zanata_id'):
runtime_storage_inst.set_by_key('user:zanata:%s' % user['zanata_id'],
user)
for email in user.get('emails') or []:
runtime_storage_inst.set_by_key('user:%s' % email, user)
def load_user(runtime_storage_inst, seq=None, user_id=None, email=None,
launchpad_id=None, gerrit_id=None, member_id=None,
github_id=None, ldap_id=None):
github_id=None, ldap_id=None, zanata_id=None):
if gerrit_id:
key = 'gerrit:%s' % gerrit_id
elif member_id:
@ -83,6 +89,8 @@ def load_user(runtime_storage_inst, seq=None, user_id=None, email=None,
key = 'github:%s' % github_id
elif ldap_id:
key = 'ldap:%s' % ldap_id
elif zanata_id:
key = 'zanata:%s' % zanata_id
else:
key = seq or user_id or launchpad_id or email
if key:

View File

@ -27,6 +27,7 @@ from oslo_log import log as logging
import requests
import requests_file
import six
import yaml
LOG = logging.getLogger(__name__)
@ -121,7 +122,8 @@ def _session_request(session, uri, method):
session.mount('file://', requests_file.FileAdapter())
user_agent = random.choice(user_agents)
return session.request(method, uri, headers={'User-Agent': user_agent},
headers = {'User-Agent': user_agent, 'Accept': 'application/json'}
return session.request(method, uri, headers=headers,
timeout=cfg.CONF.read_timeout)
@ -149,6 +151,14 @@ def read_json_from_uri(uri, session=None):
{'error': e, 'uri': uri})
def read_yaml_from_uri(uri):
try:
return yaml.safe_load(read_uri(uri))
except Exception as e:
LOG.warning('Error "%(error)s" parsing yaml from uri %(uri)s',
{'error': e, 'uri': uri})
def _gzip_decompress(content):
if six.PY3:
return gzip.decompress(content).decode('utf8')

View File

@ -0,0 +1,128 @@
# Copyright (c) 2016 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import re
import time
import itertools
from oslo_log import log as logging
import requests
from stackalytics.processor import utils
LOG = logging.getLogger(__name__)
DAY = 24 * 60 * 60
WEEK = 7 * DAY
ZANATA_URI = 'https://translate.openstack.org/rest/%s'
ZANATA_FIRST_RECORD = '2015-08-31' # must be Monday
# We limit the projects and versions to reduce number of requests to Zanata API
ZANATA_VERSION_PATTERN = re.compile(r'^(master)$')
ZANATA_PROJECT_PATTERN = re.compile(r'(horizon$|.*guide|.*manual)')
zanata_session = requests.Session()
def _zanata_get_projects():
uri = ZANATA_URI % 'projects'
LOG.debug("Reading projects from %s" % uri)
projects_data = utils.read_json_from_uri(uri, session=zanata_session)
return (p['id'] for p in projects_data
if ZANATA_PROJECT_PATTERN.match(p['id']))
def _zanata_get_project_versions(project_id):
LOG.debug("Reading iterations for project %s" % project_id)
uri = ZANATA_URI % ('projects/p/%s' % project_id)
project_data = utils.read_json_from_uri(uri, session=zanata_session)
return (it['id'] for it in project_data.get('iterations', [])
if ZANATA_VERSION_PATTERN.match(it['id']))
def _zanata_get_user_stats(project_id, iteration_id, zanata_user_id,
start_date, end_date):
uri = ZANATA_URI % ('stats/project/%s/version/%s/contributor/%s/%s..%s'
% (project_id, iteration_id, zanata_user_id,
start_date, end_date))
return utils.read_json_from_uri(uri, session=zanata_session)
def _timestamp_to_date(timestamp):
return datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d')
def _date_to_timestamp(d):
return int(time.mktime(
datetime.datetime.strptime(d, '%Y-%m-%d').timetuple()))
def log(runtime_storage_inst, translation_team_uri):
last_update_key = 'zanata:last_update'
last_update = int(runtime_storage_inst.get_by_key(last_update_key) or
_date_to_timestamp(ZANATA_FIRST_RECORD))
LOG.info('Last update: %d', last_update)
now = int(time.time())
LOG.info('Reading translation team from uri: %s', translation_team_uri)
translation_team = utils.read_yaml_from_uri(translation_team_uri)
if not translation_team:
LOG.warning('Translation team data is not available')
return
languages = dict((k, v['language'][0])
for k, v in translation_team.items())
user_ids = set(u['zanata_id'] for u in runtime_storage_inst.get_all_users()
if 'zanata_id' in u)
user_ids |= set(itertools.chain.from_iterable(
team.get('translators', []) for team in translation_team.values()))
for project_id in _zanata_get_projects():
for version in _zanata_get_project_versions(project_id):
for user_id in user_ids:
for day in range(last_update, now, WEEK):
day_str = _timestamp_to_date(day)
end_str = _timestamp_to_date(day + WEEK - DAY)
stats = _zanata_get_user_stats(
project_id, version, user_id, day_str, end_str)
user_stats = stats[user_id]
if user_stats:
for lang, data in user_stats.items():
record = dict(
zanata_id=user_id,
date=day,
language_code=lang,
language=languages.get(lang) or lang,
translated=data['translated'],
approved=data['approved'],
module=project_id,
branch=version, # todo adapt version to branch
)
yield record
last_update += (now - last_update) // WEEK * WEEK
LOG.info('New last update: %d', last_update)
runtime_storage_inst.set_by_key(last_update_key, last_update)