diff --git a/doc/source/tools/stackalytics-processor.txt b/doc/source/tools/stackalytics-processor.txt index f531fb407..4bb62a4bc 100644 --- a/doc/source/tools/stackalytics-processor.txt +++ b/doc/source/tools/stackalytics-processor.txt @@ -19,6 +19,7 @@ usage: stackalytics-processor [-h] [--config-dir DIR] [--config-file PATH] [--ssh-key-filename SSH_KEY_FILENAME] [--ssh-username SSH_USERNAME] [--syslog-log-facility SYSLOG_LOG_FACILITY] + [--translation-team-uri TRANSLATION_TEAM_URI] [--use-syslog] [--use-syslog-rfc-format] [--verbose] [--version] [--watch-log-file] @@ -96,6 +97,8 @@ optional arguments: --syslog-log-facility SYSLOG_LOG_FACILITY Syslog facility to receive log lines. This option is ignored if log_config_append is set. + --translation-team-uri TRANSLATION_TEAM_URI + URI for translation team data --use-syslog Use syslog for logging. Existing syslog format is DEPRECATED and will be changed later to honor RFC5424. This option is ignored if log_config_append is set. diff --git a/etc/default_data.schema.json b/etc/default_data.schema.json index abbb096fb..7e855171f 100644 --- a/etc/default_data.schema.json +++ b/etc/default_data.schema.json @@ -21,6 +21,9 @@ "ldap_id": { "type": "string" }, + "zanata_id": { + "type": "string" + }, "user_name": { "type": "string" }, diff --git a/etc/stackalytics.conf b/etc/stackalytics.conf index 395352a87..7073d338a 100644 --- a/etc/stackalytics.conf +++ b/etc/stackalytics.conf @@ -144,6 +144,9 @@ # URI for default data (string value) #driverlog_data_uri = https://git.openstack.org/cgit/openstack/driverlog/plain/etc/default_data.json +# URI for translation team data (string value) +#translation_team_uri = https://git.openstack.org/cgit/openstack/i18n/plain/tools/zanata/translation_team.yaml + # How many member profiles to look ahead after the last (integer value) #members_look_ahead = 250 diff --git a/stackalytics/dashboard/decorators.py b/stackalytics/dashboard/decorators.py index 6fc578981..b382bd565 100644 --- a/stackalytics/dashboard/decorators.py +++ b/stackalytics/dashboard/decorators.py @@ -365,6 +365,7 @@ def aggregate_filter(): 'person-day': (person_day_filter, None), 'ci': (ci_filter, ci_finalize), 'patches': (None, None), + 'translations': (loc_filter, None), } if metric not in metric_to_filters_map: metric = parameters.get_default('metric') diff --git a/stackalytics/dashboard/helpers.py b/stackalytics/dashboard/helpers.py index e7eb66d6e..c960ef578 100644 --- a/stackalytics/dashboard/helpers.py +++ b/stackalytics/dashboard/helpers.py @@ -200,6 +200,7 @@ def get_contribution_summary(records): patch_set_count = 0 change_request_count = 0 abandoned_change_requests_count = 0 + translations = 0 for record in records: record_type = record.record_type @@ -236,6 +237,8 @@ def get_contribution_summary(records): change_request_count += 1 if record.status == 'ABANDONED': abandoned_change_requests_count += 1 + elif record_type == 'tr': + translations += record.loc result = { 'drafted_blueprint_count': drafted_blueprint_count, @@ -249,6 +252,7 @@ def get_contribution_summary(records): 'patch_set_count': patch_set_count, 'change_request_count': change_request_count, 'abandoned_change_requests_count': abandoned_change_requests_count, + 'translations': translations, } return result diff --git a/stackalytics/dashboard/parameters.py b/stackalytics/dashboard/parameters.py index bbec0f38e..43a01d1cf 100644 --- a/stackalytics/dashboard/parameters.py +++ b/stackalytics/dashboard/parameters.py @@ -41,6 +41,7 @@ METRIC_LABELS = { 'person-day': "Person-day effort", 'ci': 'CI votes', 'patches': 'Patch Sets', + 'translations': 'Translations', } METRIC_TO_RECORD_TYPE = { @@ -56,6 +57,7 @@ METRIC_TO_RECORD_TYPE = { 'person-day': ['mark', 'patch', 'email', 'bpd', 'bugf'], 'ci': ['ci'], 'patches': ['patch'], + 'translations': ['tr'], } FILTER_PARAMETERS = ['release', 'project_type', 'module', 'company', 'user_id', diff --git a/stackalytics/dashboard/templates/_macros/activity_log.html b/stackalytics/dashboard/templates/_macros/activity_log.html index e52704d1f..5a7924424 100644 --- a/stackalytics/dashboard/templates/_macros/activity_log.html +++ b/stackalytics/dashboard/templates/_macros/activity_log.html @@ -169,6 +169,8 @@ show_record_type=True, show_user_gravatar=True, gravatar_size=32, show_all=True)
Change Id: ${review_id}
{%elif record_type == "member" %}
Registered in OpenStack Foundation
+ {%elif record_type == "tr" %} +
Translated ${loc} terms into ${language}
{%/if%} diff --git a/stackalytics/dashboard/templates/_macros/contribution_summary.html b/stackalytics/dashboard/templates/_macros/contribution_summary.html index 3643155d9..5872dba42 100644 --- a/stackalytics/dashboard/templates/_macros/contribution_summary.html +++ b/stackalytics/dashboard/templates/_macros/contribution_summary.html @@ -48,6 +48,7 @@
Filed Bugs: ${filed_bug_count}
Resolved Bugs: ${resolved_bug_count}
Emails: ${email_count}
+
Translations: ${translations}
{% endraw %} diff --git a/stackalytics/dashboard/templates/overview.html b/stackalytics/dashboard/templates/overview.html index 2c7c2ada2..f9135ca81 100644 --- a/stackalytics/dashboard/templates/overview.html +++ b/stackalytics/dashboard/templates/overview.html @@ -8,6 +8,7 @@ {% set show_engineer_breakdown = (not user_id) %} {% set show_bp_breakdown = (metric in ['bpd', 'bpc']) %} {% set show_module_breakdown = (not module) %} +{% set show_languages_breakdown = (metric in ['translations']) %} {% set show_user_activity = (user_id) %} {% set show_module_activity = (module) and (not user_id) %} {% set show_activity = (show_user_activity) or (show_module_activity) %} @@ -61,6 +62,9 @@ {% if show_module_breakdown %} renderTableAndChart("/api/1.0/stats/modules", "module_container", "module_table", "module_chart", "module"); {% endif %} + {% if show_languages_breakdown %} + renderTableAndChart("/api/1.0/stats/languages", "language_container", "language_table", "language_chart", "language"); + {% endif %} @@ -184,6 +188,27 @@ {% endif %} + {% if show_languages_breakdown %} +
+

Languages

+ +
+ + + + + + + + + + + +
#LanguageTranslations
+
+
+ {% endif %} + {% if show_contribution_on_right %} {{ contribution_summary.show_contribution_summary(show_all=False) }} {{ show_report_links(module, company, user_id) }} diff --git a/stackalytics/dashboard/web.py b/stackalytics/dashboard/web.py index d3bbe671c..f4d61c8fb 100644 --- a/stackalytics/dashboard/web.py +++ b/stackalytics/dashboard/web.py @@ -402,6 +402,32 @@ def get_bpd(records, **kwargs): return result +@app.route('/api/1.0/stats/languages') +@decorators.exception_handler() +@decorators.response() +@decorators.cached() +@decorators.jsonify('stats') +@decorators.record_filter() +def get_languages(records, **kwargs): + result = [] + languages = collections.defaultdict(int) + for record in records: + if record.record_type in ['tr']: + languages[record.value] += record.loc + + for lang, val in six.iteritems(languages): + result.append({ + 'id': lang, + 'name': lang, + 'metric': val, + }) + + result.sort(key=lambda x: x['metric'], reverse=True) + utils.add_index(result) + + return result + + @app.route('/api/1.0/users') @decorators.exception_handler() @decorators.response() @@ -587,12 +613,16 @@ def timeline(records, **kwargs): week_stat_commits = dict((c, 0) for c in weeks) week_stat_commits_hl = dict((c, 0) for c in weeks) + commits_handler = lambda record: 1 + if 'translations' in metric: + commits_handler = lambda record: record.loc + if ('commits' in metric) or ('loc' in metric): - handler = lambda record: record.loc + loc_handler = lambda record: record.loc elif 'ci' in metric: - handler = lambda record: 0 if record.value else 1 + loc_handler = lambda record: 0 if record.value else 1 else: - handler = lambda record: 0 + loc_handler = lambda record: 0 # fill stats with the data if 'person-day' in metric: @@ -616,14 +646,14 @@ def timeline(records, **kwargs): for record in records: week = record.week if start_week <= week < end_week: - week_stat_loc[week] += handler(record) - week_stat_commits[week] += 1 + week_stat_loc[week] += loc_handler(record) + week_stat_commits[week] += commits_handler(record) if 'members' in metric: if record.date >= start_date: week_stat_commits_hl[week] += 1 else: if record.release == release_name: - week_stat_commits_hl[week] += 1 + week_stat_commits_hl[week] += commits_handler(record) if 'all' == release_name and 'members' not in metric: week_stat_commits_hl = week_stat_commits diff --git a/stackalytics/processor/config.py b/stackalytics/processor/config.py index 03ce48fa7..358322e59 100644 --- a/stackalytics/processor/config.py +++ b/stackalytics/processor/config.py @@ -48,6 +48,10 @@ PROCESSOR_OPTS = [ default='https://git.openstack.org/cgit/' 'openstack/driverlog/plain/etc/default_data.json', help='URI for default data'), + cfg.StrOpt('translation-team-uri', + default='https://git.openstack.org/cgit/openstack/i18n/' + 'plain/tools/zanata/translation_team.yaml', + help='URI of translation team data'), cfg.IntOpt('members-look-ahead', default=250, help='How many member profiles to look ahead after the last'), cfg.IntOpt('read-timeout', default=120, diff --git a/stackalytics/processor/main.py b/stackalytics/processor/main.py index 63b774178..214ff684b 100644 --- a/stackalytics/processor/main.py +++ b/stackalytics/processor/main.py @@ -34,6 +34,7 @@ from stackalytics.processor import record_processor from stackalytics.processor import runtime_storage from stackalytics.processor import utils from stackalytics.processor import vcs +from stackalytics.processor import zanata LOG = logging.getLogger(__name__) @@ -180,6 +181,15 @@ def _process_mail_list(uri, runtime_storage_inst, record_processor_inst): runtime_storage_inst.set_records(processed_mail_iterator) +def _process_translation_stats(runtime_storage_inst, record_processor_inst): + translation_iterator = zanata.log(runtime_storage_inst, + cfg.CONF.translation_team_uri) + translation_iterator_typed = _record_typer(translation_iterator, 'i18n') + processed_translation_iterator = record_processor_inst.process( + translation_iterator_typed) + runtime_storage_inst.set_records(processed_translation_iterator) + + def _process_member_list(uri, runtime_storage_inst, record_processor_inst): member_iterator = mps.log(uri, runtime_storage_inst, cfg.CONF.days_to_update_members, @@ -227,6 +237,9 @@ def process(runtime_storage_inst, record_processor_inst): _process_mail_list(mail_list, runtime_storage_inst, record_processor_inst) + LOG.info('Processing translations stats') + _process_translation_stats(runtime_storage_inst, record_processor_inst) + _post_process_records(record_processor_inst, repos) diff --git a/stackalytics/processor/record_processor.py b/stackalytics/processor/record_processor.py index 072a6485a..241b886e9 100644 --- a/stackalytics/processor/record_processor.py +++ b/stackalytics/processor/record_processor.py @@ -96,7 +96,8 @@ class RecordProcessor(object): return self.domains_index[m] return None - def _create_user(self, launchpad_id, email, gerrit_id, user_name): + def _create_user(self, launchpad_id, email, gerrit_id, zanata_id, + user_name): company = (self._get_company_by_email(email) or self._get_independent()) emails = [] @@ -104,7 +105,8 @@ class RecordProcessor(object): emails = [email] user = { 'user_id': user_processor.make_user_id( - emails=emails, launchpad_id=launchpad_id, gerrit_id=gerrit_id), + emails=emails, launchpad_id=launchpad_id, gerrit_id=gerrit_id, + zanata_id=zanata_id), 'launchpad_id': launchpad_id, 'user_name': user_name or '', 'companies': [{ @@ -115,6 +117,8 @@ class RecordProcessor(object): } if gerrit_id: user['gerrit_id'] = gerrit_id + if zanata_id: + user['zanata_id'] = zanata_id return user def _get_lp_info(self, email): @@ -182,7 +186,8 @@ class RecordProcessor(object): # collect ordinary fields for key in ['seq', 'user_name', 'user_id', 'gerrit_id', 'github_id', - 'launchpad_id', 'companies', 'static', 'ldap_id']: + 'launchpad_id', 'companies', 'static', 'ldap_id', + 'zanata_id']: value = next((v.get(key) for v in user_profiles if v.get(key)), None) if value: @@ -255,19 +260,34 @@ class RecordProcessor(object): else: user_g = {} + zanata_id = record.get('zanata_id') + if zanata_id: + user_z = user_processor.load_user( + self.runtime_storage_inst, zanata_id=zanata_id) or {} + if ((not user_z) and (not launchpad_id) and + (not user_e.get('launchpad_id'))): + # query LP + guessed_lp_id = zanata_id + user_name = self._get_lp_user_name(guessed_lp_id) + if user_name != guessed_lp_id: + launchpad_id = guessed_lp_id + else: + user_z = {} + user_l = user_processor.load_user( self.runtime_storage_inst, launchpad_id=launchpad_id) or {} - if ((user_e.get('seq') == user_l.get('seq') == user_g.get('seq')) and - user_e.get('seq')): + if ((user_e.get('seq') == user_l.get('seq') == user_g.get('seq') == + user_z.get('seq')) and user_e.get('seq')): # sequence numbers are set and the same, merge is not needed user = user_e else: - user = self._create_user(launchpad_id, email, gerrit_id, user_name) + user = self._create_user(launchpad_id, email, gerrit_id, zanata_id, + user_name) - if user_e or user_l or user_g: + if user_e or user_l or user_g or user_z: user = self._merge_user_profiles( - [user_e, user_l, user_g, user]) + [user_e, user_l, user_g, user_z, user]) else: # create new if not user_name: @@ -586,6 +606,24 @@ class RecordProcessor(object): yield ci_vote + def _process_translation(self, record): + # todo split translation and approval + translation = record.copy() + user_id = user_processor.make_user_id(zanata_id=record['zanata_id']) + + translation['record_type'] = 'tr' + translation['primary_key'] = '%s:%s:%s' % ( + user_id, record['module'], record['date']) + translation['author_name'] = user_id + + # following fields are put into standard fields stored in dashboard mem + translation['loc'] = record['translated'] + translation['value'] = record['language'] + + self._update_record_and_user(translation) + + yield translation + def _renew_record_date(self, record): record['week'] = utils.timestamp_to_week(record['date']) if ('release' not in record) or (not record['release']): @@ -600,6 +638,7 @@ class RecordProcessor(object): 'bug': self._process_bug, 'member': self._process_member, 'ci': self._process_ci, + 'i18n': self._process_translation, } for record in record_iterator: diff --git a/stackalytics/processor/user_processor.py b/stackalytics/processor/user_processor.py index 8bce20a88..bb4616101 100644 --- a/stackalytics/processor/user_processor.py +++ b/stackalytics/processor/user_processor.py @@ -22,7 +22,8 @@ LOG = logging.getLogger(__name__) def make_user_id(emails=None, launchpad_id=None, gerrit_id=None, - member_id=None, github_id=None, ldap_id=None, ci_id=None): + member_id=None, github_id=None, ldap_id=None, ci_id=None, + zanata_id=None): if launchpad_id or emails: return launchpad_id or emails[0] if gerrit_id: @@ -33,6 +34,8 @@ def make_user_id(emails=None, launchpad_id=None, gerrit_id=None, return 'github:%s' % github_id if ldap_id: return 'ldap:%s' % ldap_id + if zanata_id: + return 'zanata:%s' % zanata_id if ci_id: return 'ci:%s' % re.sub(r'[^\w]', '_', ci_id.lower()) @@ -68,13 +71,16 @@ def store_user(runtime_storage_inst, user): if user.get('ldap_id'): runtime_storage_inst.set_by_key('user:ldap:%s' % user['ldap_id'], user) + if user.get('zanata_id'): + runtime_storage_inst.set_by_key('user:zanata:%s' % user['zanata_id'], + user) for email in user.get('emails') or []: runtime_storage_inst.set_by_key('user:%s' % email, user) def load_user(runtime_storage_inst, seq=None, user_id=None, email=None, launchpad_id=None, gerrit_id=None, member_id=None, - github_id=None, ldap_id=None): + github_id=None, ldap_id=None, zanata_id=None): if gerrit_id: key = 'gerrit:%s' % gerrit_id elif member_id: @@ -83,6 +89,8 @@ def load_user(runtime_storage_inst, seq=None, user_id=None, email=None, key = 'github:%s' % github_id elif ldap_id: key = 'ldap:%s' % ldap_id + elif zanata_id: + key = 'zanata:%s' % zanata_id else: key = seq or user_id or launchpad_id or email if key: diff --git a/stackalytics/processor/utils.py b/stackalytics/processor/utils.py index 589255210..9af1460a0 100644 --- a/stackalytics/processor/utils.py +++ b/stackalytics/processor/utils.py @@ -27,6 +27,7 @@ from oslo_log import log as logging import requests import requests_file import six +import yaml LOG = logging.getLogger(__name__) @@ -121,7 +122,8 @@ def _session_request(session, uri, method): session.mount('file://', requests_file.FileAdapter()) user_agent = random.choice(user_agents) - return session.request(method, uri, headers={'User-Agent': user_agent}, + headers = {'User-Agent': user_agent, 'Accept': 'application/json'} + return session.request(method, uri, headers=headers, timeout=cfg.CONF.read_timeout) @@ -149,6 +151,14 @@ def read_json_from_uri(uri, session=None): {'error': e, 'uri': uri}) +def read_yaml_from_uri(uri): + try: + return yaml.safe_load(read_uri(uri)) + except Exception as e: + LOG.warning('Error "%(error)s" parsing yaml from uri %(uri)s', + {'error': e, 'uri': uri}) + + def _gzip_decompress(content): if six.PY3: return gzip.decompress(content).decode('utf8') diff --git a/stackalytics/processor/zanata.py b/stackalytics/processor/zanata.py new file mode 100644 index 000000000..fedf313cb --- /dev/null +++ b/stackalytics/processor/zanata.py @@ -0,0 +1,128 @@ +# Copyright (c) 2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import re +import time + +import itertools +from oslo_log import log as logging +import requests + +from stackalytics.processor import utils + + +LOG = logging.getLogger(__name__) + +DAY = 24 * 60 * 60 +WEEK = 7 * DAY + +ZANATA_URI = 'https://translate.openstack.org/rest/%s' +ZANATA_FIRST_RECORD = '2015-08-31' # must be Monday + +# We limit the projects and versions to reduce number of requests to Zanata API +ZANATA_VERSION_PATTERN = re.compile(r'^(master)$') +ZANATA_PROJECT_PATTERN = re.compile(r'(horizon$|.*guide|.*manual)') + +zanata_session = requests.Session() + + +def _zanata_get_projects(): + uri = ZANATA_URI % 'projects' + LOG.debug("Reading projects from %s" % uri) + projects_data = utils.read_json_from_uri(uri, session=zanata_session) + + return (p['id'] for p in projects_data + if ZANATA_PROJECT_PATTERN.match(p['id'])) + + +def _zanata_get_project_versions(project_id): + LOG.debug("Reading iterations for project %s" % project_id) + uri = ZANATA_URI % ('projects/p/%s' % project_id) + project_data = utils.read_json_from_uri(uri, session=zanata_session) + + return (it['id'] for it in project_data.get('iterations', []) + if ZANATA_VERSION_PATTERN.match(it['id'])) + + +def _zanata_get_user_stats(project_id, iteration_id, zanata_user_id, + start_date, end_date): + uri = ZANATA_URI % ('stats/project/%s/version/%s/contributor/%s/%s..%s' + % (project_id, iteration_id, zanata_user_id, + start_date, end_date)) + return utils.read_json_from_uri(uri, session=zanata_session) + + +def _timestamp_to_date(timestamp): + return datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d') + + +def _date_to_timestamp(d): + return int(time.mktime( + datetime.datetime.strptime(d, '%Y-%m-%d').timetuple())) + + +def log(runtime_storage_inst, translation_team_uri): + + last_update_key = 'zanata:last_update' + last_update = int(runtime_storage_inst.get_by_key(last_update_key) or + _date_to_timestamp(ZANATA_FIRST_RECORD)) + LOG.info('Last update: %d', last_update) + now = int(time.time()) + + LOG.info('Reading translation team from uri: %s', translation_team_uri) + translation_team = utils.read_yaml_from_uri(translation_team_uri) + + if not translation_team: + LOG.warning('Translation team data is not available') + return + + languages = dict((k, v['language'][0]) + for k, v in translation_team.items()) + + user_ids = set(u['zanata_id'] for u in runtime_storage_inst.get_all_users() + if 'zanata_id' in u) + user_ids |= set(itertools.chain.from_iterable( + team.get('translators', []) for team in translation_team.values())) + + for project_id in _zanata_get_projects(): + for version in _zanata_get_project_versions(project_id): + for user_id in user_ids: + + for day in range(last_update, now, WEEK): + day_str = _timestamp_to_date(day) + end_str = _timestamp_to_date(day + WEEK - DAY) + + stats = _zanata_get_user_stats( + project_id, version, user_id, day_str, end_str) + user_stats = stats[user_id] + + if user_stats: + for lang, data in user_stats.items(): + record = dict( + zanata_id=user_id, + date=day, + language_code=lang, + language=languages.get(lang) or lang, + translated=data['translated'], + approved=data['approved'], + module=project_id, + branch=version, # todo adapt version to branch + ) + yield record + + last_update += (now - last_update) // WEEK * WEEK + LOG.info('New last update: %d', last_update) + runtime_storage_inst.set_by_key(last_update_key, last_update)