diff --git a/stackalytics/processor/zanata.py b/stackalytics/processor/zanata.py index be9ebd8a2..46da0a409 100644 --- a/stackalytics/processor/zanata.py +++ b/stackalytics/processor/zanata.py @@ -14,7 +14,6 @@ # limitations under the License. import datetime -import re import time import itertools @@ -32,36 +31,12 @@ WEEK = 7 * DAY ZANATA_URI = 'https://translate.openstack.org/rest/%s' ZANATA_FIRST_RECORD = '2015-08-31' # must be Monday -# We limit the projects and versions to reduce number of requests to Zanata API -ZANATA_VERSION_PATTERN = re.compile(r'^(master)$') -ZANATA_PROJECT_PATTERN = re.compile(r'(horizon$|.*guide|.*manual|.*doc)') - zanata_session = requests.Session() -def _zanata_get_projects(): - uri = ZANATA_URI % 'projects' - LOG.debug("Reading projects from %s" % uri) - projects_data = utils.read_json_from_uri(uri, session=zanata_session) - - return (p['id'] for p in projects_data - if ZANATA_PROJECT_PATTERN.match(p['id'])) - - -def _zanata_get_project_versions(project_id): - LOG.debug("Reading iterations for project %s" % project_id) - uri = ZANATA_URI % ('projects/p/%s' % project_id) - project_data = utils.read_json_from_uri(uri, session=zanata_session) - - return (it['id'] for it in project_data.get('iterations', []) - if ZANATA_VERSION_PATTERN.match(it['id'])) - - -def _zanata_get_user_stats(project_id, iteration_id, zanata_user_id, - start_date, end_date): - uri = ZANATA_URI % ('stats/project/%s/version/%s/contributor/%s/%s..%s' - % (project_id, iteration_id, zanata_user_id, - start_date, end_date)) +def _zanata_get_user_stats(zanata_user_id, start_date, end_date): + uri = ZANATA_URI % ('stats/user/%s/%s..%s' % (zanata_user_id, + start_date, end_date)) return utils.read_json_from_uri(uri, session=zanata_session) @@ -89,40 +64,32 @@ def log(runtime_storage_inst, translation_team_uri): LOG.warning('Translation team data is not available') return - languages = dict((k, v['language'][0]) - for k, v in translation_team.items()) - user_ids = set(u['zanata_id'] for u in runtime_storage_inst.get_all_users() if 'zanata_id' in u) user_ids |= set(itertools.chain.from_iterable( team.get('translators', []) for team in translation_team.values())) - for project_id in _zanata_get_projects(): - for version in _zanata_get_project_versions(project_id): - for user_id in user_ids: - - for day in range(last_update, now, WEEK): - day_str = _timestamp_to_date(day) - end_str = _timestamp_to_date(day + WEEK - DAY) - - stats = _zanata_get_user_stats( - project_id, version, user_id, day_str, end_str) - user_stats = stats[user_id] - - if user_stats: - for lang, data in user_stats.items(): - record = dict( - zanata_id=user_id, - date=day, - language_code=lang, - language=languages.get(lang) or lang, - translated=data['translated'], - approved=data['approved'], - module=project_id, - branch=version, # todo adapt version to branch - ) - yield record - + for user_id in user_ids: + for day in range(last_update, now, WEEK): + day_str = _timestamp_to_date(day) + end_str = _timestamp_to_date(day + WEEK - DAY) + user_stats = _zanata_get_user_stats(user_id, day_str, end_str) + if user_stats: + for user_stats_item in user_stats: + # Currently we only count translated words + if user_stats_item['savedState'] == 'Translated': + record = dict( + zanata_id=user_id, + date=day, + language_code=user_stats_item['localeId'], + language=user_stats_item['localeDisplayName'], + # Todo: not always consistent to the official name + module=user_stats_item['projectSlug'], + # Todo: adapt version to branch + branch=user_stats_item['versionSlug'], + translated=user_stats_item['wordCount'], + ) + yield record last_update += (now - last_update) // WEEK * WEEK LOG.info('New last update: %d', last_update) runtime_storage_inst.set_by_key(last_update_key, last_update)