From 43ffa83fe47b5a5fa058959c8e6c62c4db4c691e Mon Sep 17 00:00:00 2001 From: Ilya Shakhat Date: Thu, 7 Sep 2017 14:59:51 +0000 Subject: [PATCH] Revert "Remove Foundation members report" Foundation members report was useful to get data on new registrations. Unfortunately user registration date can only be retrieved from HTML and not available in OpenStackID-Resources API. This reverts commit fd2ba439947d754f21c5ec4c555f29519810258e. Change-Id: I8d86cec906f516be5696c679176ba4919f18edc7 --- doc/source/tools/stackalytics-processor.txt | 10 +- etc/default_data.json | 1 + etc/stackalytics.conf | 6 + etc/test_default_data.json | 1 + stackalytics/dashboard/decorators.py | 1 + stackalytics/dashboard/parameters.py | 1 + stackalytics/dashboard/reports.py | 17 + stackalytics/dashboard/static/css/style.css | 2 +- .../templates/_macros/activity_log.html | 2 + stackalytics/dashboard/templates/layout.html | 3 +- .../templates/reports/driverlog.html | 3 +- .../dashboard/templates/reports/members.html | 405 ++++++++++++++++++ .../dashboard/templates/reports/record.html | 6 + stackalytics/dashboard/web.py | 33 +- stackalytics/processor/config.py | 4 + stackalytics/processor/dump.py | 5 +- stackalytics/processor/main.py | 21 + stackalytics/processor/mps.py | 111 +++++ stackalytics/processor/record_processor.py | 68 +++ stackalytics/processor/schema.py | 6 + stackalytics/processor/user_processor.py | 12 +- stackalytics/processor/utils.py | 7 + stackalytics/tests/unit/test_mps.py | 58 +++ .../tests/unit/test_record_processor.py | 54 +++ 24 files changed, 823 insertions(+), 14 deletions(-) create mode 100644 stackalytics/dashboard/templates/reports/members.html create mode 100644 stackalytics/processor/mps.py create mode 100644 stackalytics/tests/unit/test_mps.py diff --git a/doc/source/tools/stackalytics-processor.txt b/doc/source/tools/stackalytics-processor.txt index 91b29b9cd..ece5f313e 100644 --- a/doc/source/tools/stackalytics-processor.txt +++ b/doc/source/tools/stackalytics-processor.txt @@ -1,12 +1,14 @@ usage: stackalytics-processor [-h] [--config-dir DIR] [--config-file PATH] - [--corrections-uri CORRECTIONS_URI] [--debug] - [--default-data-uri DEFAULT_DATA_URI] + [--corrections-uri CORRECTIONS_URI] + [--days_to_update_members DAYS_TO_UPDATE_MEMBERS] + [--debug] [--default-data-uri DEFAULT_DATA_URI] [--fetching-user-source FETCHING_USER_SOURCE] [--gerrit-retry GERRIT_RETRY] [--git-base-uri GIT_BASE_URI] [--log-config-append PATH] [--log-date-format DATE_FORMAT] [--log-dir LOG_DIR] [--log-file PATH] + [--members-look-ahead MEMBERS_LOOK_AHEAD] [--nodebug] [--nouse-journal] [--nouse-syslog] [--nowatch-log-file] [--read-timeout READ_TIMEOUT] @@ -33,6 +35,8 @@ optional arguments: precedence. Defaults to None. --corrections-uri CORRECTIONS_URI The address of file with corrections data + --days_to_update_members DAYS_TO_UPDATE_MEMBERS + Number of days to update members --debug, -d If set to true, the logging level will be set to DEBUG instead of the default INFO level. --default-data-uri DEFAULT_DATA_URI @@ -67,6 +71,8 @@ optional arguments: If no default is set, logging will go to stderr as defined by use_stderr. This option is ignored if log_config_append is set. + --members-look-ahead MEMBERS_LOOK_AHEAD + How many member profiles to look ahead after the last --nodebug The inverse of --debug --nouse-journal The inverse of --use-journal --nouse-syslog The inverse of --use-syslog diff --git a/etc/default_data.json b/etc/default_data.json index ebbd1e983..5c3d9eabf 100644 --- a/etc/default_data.json +++ b/etc/default_data.json @@ -30958,6 +30958,7 @@ "https://lists.opnfv.org/pipermail/opnfv-users/", "https://lists.opnfv.org/pipermail/test-wg/" ], + "member_lists": ["https://www.openstack.org/community/members/profile/"], "project_types": [ { "id": "all", diff --git a/etc/stackalytics.conf b/etc/stackalytics.conf index f67ac3519..7fde7798e 100644 --- a/etc/stackalytics.conf +++ b/etc/stackalytics.conf @@ -152,6 +152,9 @@ # The folder that holds all project sources to analyze (string value) #sources_root = /var/local/stackalytics +# Number of days to update members (integer value) +#days_to_update_members = 30 + # The address of file with corrections data (string value) #corrections_uri = https://git.openstack.org/cgit/openstack/stackalytics/plain/etc/corrections.json @@ -174,6 +177,9 @@ # Allowed values: launchpad, #fetching_user_source = launchpad +# How many member profiles to look ahead after the last (integer value) +#members_look_ahead = 250 + # Number of seconds to wait for remote response (integer value) #read_timeout = 120 diff --git a/etc/test_default_data.json b/etc/test_default_data.json index 315b65cb5..653ae4774 100644 --- a/etc/test_default_data.json +++ b/etc/test_default_data.json @@ -293,6 +293,7 @@ ], "mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"], + "member_lists": ["http://www.openstack.org/community/members/profile/"], "project_types": [ { diff --git a/stackalytics/dashboard/decorators.py b/stackalytics/dashboard/decorators.py index b9ff331bc..27b027a30 100644 --- a/stackalytics/dashboard/decorators.py +++ b/stackalytics/dashboard/decorators.py @@ -365,6 +365,7 @@ def aggregate_filter(): 'bpc': (incremental_filter, None), 'filed-bugs': (incremental_filter, None), 'resolved-bugs': (incremental_filter, None), + 'members': (incremental_filter, None), 'person-day': (person_day_filter, None), 'patches': (None, None), 'translations': (loc_filter, None), diff --git a/stackalytics/dashboard/parameters.py b/stackalytics/dashboard/parameters.py index 8002c433b..cae6cd7f6 100644 --- a/stackalytics/dashboard/parameters.py +++ b/stackalytics/dashboard/parameters.py @@ -50,6 +50,7 @@ METRIC_TO_RECORD_TYPE = { 'bpc': ['bpc'], 'filed-bugs': ['bugf'], 'resolved-bugs': ['bugr'], + 'members': ['member'], 'person-day': ['mark', 'patch', 'email', 'bpd', 'bugf'], 'patches': ['patch'], 'translations': ['tr'], diff --git a/stackalytics/dashboard/reports.py b/stackalytics/dashboard/reports.py index e9367b94c..df7480c6d 100644 --- a/stackalytics/dashboard/reports.py +++ b/stackalytics/dashboard/reports.py @@ -28,6 +28,9 @@ from stackalytics.dashboard import vault from stackalytics.processor import utils +DEFAULT_DAYS_COUNT = 7 +FIRST_MEMBER_DATE = "2012-Jul-18" + blueprint = flask.Blueprint('reports', __name__, url_prefix='/report') @@ -145,6 +148,20 @@ def contribution(module, days): } +@blueprint.route('/members') +@decorators.exception_handler() +@decorators.templated() +def members(): + days = int(flask.request.args.get('days') or DEFAULT_DAYS_COUNT) + all_days = int((time.time() - utils.date_to_timestamp_ext( + FIRST_MEMBER_DATE)) / (24 * 60 * 60)) + 1 + + return { + 'days': days, + 'all_days': all_days + } + + @blueprint.route('/affiliation_changes') @decorators.exception_handler() @decorators.templated() diff --git a/stackalytics/dashboard/static/css/style.css b/stackalytics/dashboard/static/css/style.css index 7720ae484..ff61af88b 100644 --- a/stackalytics/dashboard/static/css/style.css +++ b/stackalytics/dashboard/static/css/style.css @@ -424,7 +424,7 @@ ul#menu-stackamenu li { div.stackamenu { text-align: left; padding-bottom: 10px; - margin-left: 315px; + margin-left: 240px; } div.stackamenu a { diff --git a/stackalytics/dashboard/templates/_macros/activity_log.html b/stackalytics/dashboard/templates/_macros/activity_log.html index 321b6617f..44e11eb0f 100644 --- a/stackalytics/dashboard/templates/_macros/activity_log.html +++ b/stackalytics/dashboard/templates/_macros/activity_log.html @@ -192,6 +192,8 @@ show_twitter=False) -%}
Bug “${title}” (${number})
Status: ${status}
Importance: ${importance}
+ {%elif record_type == "member" %} +
Registered in OpenStack Foundation
{%elif record_type == "tr" %}
Translated ${loc} words into ${language}
{%/if%} diff --git a/stackalytics/dashboard/templates/layout.html b/stackalytics/dashboard/templates/layout.html index 4ef167d9d..11b1c68d5 100644 --- a/stackalytics/dashboard/templates/layout.html +++ b/stackalytics/dashboard/templates/layout.html @@ -36,10 +36,11 @@
- diff --git a/stackalytics/dashboard/templates/reports/driverlog.html b/stackalytics/dashboard/templates/reports/driverlog.html index e9e2f24c4..b81ed51ea 100644 --- a/stackalytics/dashboard/templates/reports/driverlog.html +++ b/stackalytics/dashboard/templates/reports/driverlog.html @@ -28,10 +28,11 @@
- diff --git a/stackalytics/dashboard/templates/reports/members.html b/stackalytics/dashboard/templates/reports/members.html new file mode 100644 index 000000000..05d2b7558 --- /dev/null +++ b/stackalytics/dashboard/templates/reports/members.html @@ -0,0 +1,405 @@ +{% extends "base.html" %} + +{% set active_tab = 'members' %} +{% set page_title = 'OpenStack Foundation members' %} + +{% block head %} + + + + + + +{% endblock %} + +{% block body %} + +
+
+
+ About +
+ + + + +
+
+ + +
+ +
+ + +
+
+ + + + + + + +
+

OpenStack foundation member companies

+ +
+ + + + + + + + + + +
#CompanyMembers Count
+
+
+

Members by company

+
+
+ +
+
+
+
+ + + + + + +
+

Individual Members

+ +
+ + + + + + + + + + + +
#EngineerDate JoinedCompany
+
+
+
+

New Companies

+
+ +
+ + + + + + + + + + + +
#CompanyFirst Member Joined
+
+
+ +
+
+ +{% endblock %} diff --git a/stackalytics/dashboard/templates/reports/record.html b/stackalytics/dashboard/templates/reports/record.html index 2c8ab7d6e..4eee4b070 100644 --- a/stackalytics/dashboard/templates/reports/record.html +++ b/stackalytics/dashboard/templates/reports/record.html @@ -197,6 +197,12 @@ href="https://review.openstack.org/#/c/{{ record.review_number }}" target="_blank">{{ record.review_id }} + {% elif record_type == "member" %} +
Registered in OpenStack + Foundation +
+ {% elif record_type == "tr" %}
Translated {{ record.loc }} words into {{ record.language }}
diff --git a/stackalytics/dashboard/web.py b/stackalytics/dashboard/web.py index 319c9c749..e72308c03 100644 --- a/stackalytics/dashboard/web.py +++ b/stackalytics/dashboard/web.py @@ -350,6 +350,27 @@ def get_module(module_id, **kwargs): return module +@app.route('/api/1.0/members') +@decorators.exception_handler() +@decorators.response() +@decorators.cached(ignore=['release', 'project_type', 'module']) +@decorators.jsonify('members') +@decorators.record_filter(ignore=['release', 'project_type', 'module']) +def get_members(records, **kwargs): + response = [] + for record in records: + record = vault.extend_record(record) + nr = dict([(k, record[k]) for k in + ['author_name', 'date', 'company_name', 'member_uri']]) + nr['date_str'] = helpers.format_date(nr['date']) + response.append(nr) + + response.sort(key=lambda x: x['date'], reverse=True) + utils.add_index(response) + + return response + + @app.route('/api/1.0/stats/bp') @decorators.exception_handler() @decorators.response() @@ -564,6 +585,8 @@ def _get_week(kwargs, param_name): def timeline(records, **kwargs): # find start and end dates metric = parameters.get_parameter(kwargs, 'metric') + start_date = int(parameters.get_single_parameter(kwargs, 'start_date') + or 0) release_name = parameters.get_single_parameter(kwargs, 'release') or 'all' releases = vault.get_vault()['releases'] @@ -627,10 +650,14 @@ def timeline(records, **kwargs): if start_week <= week < end_week: week_stat_loc[week] += loc_handler(record) week_stat_commits[week] += commits_handler(record) - if record.release == release_name: - week_stat_commits_hl[week] += commits_handler(record) + if 'members' in metric: + if record.date >= start_date: + week_stat_commits_hl[week] += 1 + else: + if record.release == release_name: + week_stat_commits_hl[week] += commits_handler(record) - if 'all' == release_name: + if 'all' == release_name and 'members' not in metric: week_stat_commits_hl = week_stat_commits # form arrays in format acceptable to timeline plugin diff --git a/stackalytics/processor/config.py b/stackalytics/processor/config.py index d46f58026..8f238c03d 100644 --- a/stackalytics/processor/config.py +++ b/stackalytics/processor/config.py @@ -32,6 +32,8 @@ PROCESSOR_OPTS = [ 'default_data_uri = file:///path/to/default_data.json'), cfg.StrOpt('sources-root', default='/var/local/stackalytics', help='The folder that holds all project sources to analyze'), + cfg.IntOpt('days_to_update_members', default=30, + help='Number of days to update members'), cfg.StrOpt('corrections-uri', default=('https://git.openstack.org/cgit/' 'openstack/stackalytics/plain/etc/corrections.json'), @@ -51,6 +53,8 @@ PROCESSOR_OPTS = [ cfg.StrOpt("fetching-user-source", default='launchpad', choices=['launchpad', ''], help="Source for fetching user profiles"), + cfg.IntOpt('members-look-ahead', default=250, + help='How many member profiles to look ahead after the last'), cfg.IntOpt('read-timeout', default=120, help='Number of seconds to wait for remote response'), cfg.IntOpt('gerrit-retry', default=10, diff --git a/stackalytics/processor/dump.py b/stackalytics/processor/dump.py index ea7c613c1..3b1a38918 100644 --- a/stackalytics/processor/dump.py +++ b/stackalytics/processor/dump.py @@ -43,7 +43,8 @@ OPTS = [ SINGLE_KEYS = ['module_groups', 'project_types', 'repos', 'releases', - 'companies', 'runtime_storage_update_time'] + 'companies', 'last_update_members_date', 'last_member_index', + 'runtime_storage_update_time'] ARRAY_KEYS = ['record', 'user'] BULK_READ_SIZE = 64 MEMCACHED_URI_PREFIX = r'^memcached:\/\/' @@ -139,6 +140,8 @@ def export_data(memcached_inst, fd): pickle.dump(('user:%s' % user['launchpad_id'], user), fd) if user.get('gerrit_id'): pickle.dump(('user:gerrit:%s' % user['gerrit_id'], user), fd) + if user.get('member_id'): + pickle.dump(('user:member:%s' % user['member_id'], user), fd) for email in user.get('emails') or []: pickle.dump((('user:%s' % email).encode('utf8'), user), fd) diff --git a/stackalytics/processor/main.py b/stackalytics/processor/main.py index 0656d4bcd..077e422a0 100644 --- a/stackalytics/processor/main.py +++ b/stackalytics/processor/main.py @@ -27,6 +27,7 @@ from stackalytics.processor import default_data_processor from stackalytics.processor import governance from stackalytics.processor import lp from stackalytics.processor import mls +from stackalytics.processor import mps from stackalytics.processor import rcs from stackalytics.processor import record_processor from stackalytics.processor import runtime_storage @@ -192,6 +193,23 @@ def _process_translation_stats(runtime_storage_inst, record_processor_inst): runtime_storage_inst.set_records(processed_translation_iterator) +def _process_member_list(uri, runtime_storage_inst, record_processor_inst): + member_iterator = mps.log(uri, runtime_storage_inst, + CONF.days_to_update_members, + CONF.members_look_ahead) + member_iterator_typed = _record_typer(member_iterator, 'member') + processed_member_iterator = record_processor_inst.process( + member_iterator_typed) + runtime_storage_inst.set_records(processed_member_iterator) + + +def update_members(runtime_storage_inst, record_processor_inst): + member_lists = runtime_storage_inst.get_by_key('member_lists') or [] + for member_list in member_lists: + _process_member_list(member_list, runtime_storage_inst, + record_processor_inst) + + def _post_process_records(record_processor_inst, repos): LOG.debug('Build release index') release_index = {} @@ -308,6 +326,9 @@ def main(): apply_corrections(CONF.corrections_uri, runtime_storage_inst) + # long operation should be the last + update_members(runtime_storage_inst, record_processor_inst) + runtime_storage_inst.set_by_key('runtime_storage_update_time', utils.date_to_timestamp('now')) LOG.info('stackalytics-processor succeeded.') diff --git a/stackalytics/processor/mps.py b/stackalytics/processor/mps.py new file mode 100644 index 000000000..933d321e4 --- /dev/null +++ b/stackalytics/processor/mps.py @@ -0,0 +1,111 @@ +# Copyright (c) 2013 Mirantis Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +import re +import time + +from oslo_log import log as logging +import requests +import six + +from stackalytics.processor import utils + + +LOG = logging.getLogger(__name__) + +NAME_AND_DATE_PATTERN = (r'

(?P[^<]*)[\s\S]*?' + r'
(?P[^<]*)') +COMPANY_PATTERN = (r'Date\sJoined[\s\S]*?(?P[^<]*)' + r'[\s\S]*?From\s(?P[\s\S]*?)\(Current\)') +GARBAGE_PATTERN = r'[/\\~%^\*_]+' + + +def strip_garbage(s): + return re.sub(r'\s+', ' ', re.sub(GARBAGE_PATTERN, '', s)) + + +def _retrieve_member(requests_session, uri, member_id, html_parser): + + content = utils.read_uri(uri, session=requests_session) + + if not content: + return {} + + member = {} + + for rec in re.finditer(NAME_AND_DATE_PATTERN, content): + result = rec.groupdict() + + member['member_id'] = member_id + member['member_name'] = strip_garbage(result['member_name']) + member['date_joined'] = result['date_joined'] + member['member_uri'] = uri + break + + member['company_draft'] = '*independent' + for rec in re.finditer(COMPANY_PATTERN, content): + result = rec.groupdict() + + member['company_draft'] = strip_garbage( + html_parser.unescape(result['company_draft'])) + + return member + + +def log(uri, runtime_storage_inst, days_to_update_members, members_look_ahead): + LOG.debug('Retrieving new openstack.org members') + + last_update_members_date = runtime_storage_inst.get_by_key( + 'last_update_members_date') or 0 + last_member_index = runtime_storage_inst.get_by_key( + 'last_member_index') or 0 + + end_update_date = int(time.time()) - days_to_update_members * 24 * 60 * 60 + + if last_update_members_date <= end_update_date: + last_member_index = 0 + last_update_members_date = int(time.time()) + + runtime_storage_inst.set_by_key('last_update_members_date', + last_update_members_date) + + cnt_empty = 0 + cur_index = last_member_index + 1 + html_parser = six.moves.html_parser.HTMLParser() + requests_session = requests.Session() + + while cnt_empty < members_look_ahead: + + profile_uri = uri + str(cur_index) + member = _retrieve_member(requests_session, profile_uri, + str(cur_index), html_parser) + + if 'member_name' not in member: + cnt_empty += 1 + cur_index += 1 + continue + + cnt_empty = 0 + last_member_index = cur_index + cur_index += 1 + LOG.debug('New member: %s', member['member_id']) + yield member + + time.sleep(random.random() * 5) + + requests_session.close() + LOG.debug('Last_member_index: %s', last_member_index) + runtime_storage_inst.set_by_key('last_member_index', last_member_index) diff --git a/stackalytics/processor/record_processor.py b/stackalytics/processor/record_processor.py index 4c91911fc..e93d5064a 100644 --- a/stackalytics/processor/record_processor.py +++ b/stackalytics/processor/record_processor.py @@ -425,6 +425,39 @@ class RecordProcessor(object): yield bug_fixed + def _process_member(self, record): + user_id = user_processor.make_user_id(member_id=record['member_id']) + record['primary_key'] = user_id + record['date'] = utils.member_date_to_timestamp(record['date_joined']) + record['author_name'] = record['member_name'] + record['module'] = 'unknown' + company_draft = record['company_draft'] + + company_name = self.domains_index.get(utils.normalize_company_name( + company_draft)) or (utils.normalize_company_draft(company_draft)) + + # author_email is a key to create new user + record['author_email'] = user_id + record['company_name'] = company_name + # _update_record_and_user function will create new user if needed + self._update_record_and_user(record) + record['company_name'] = company_name + user = user_processor.load_user(self.runtime_storage_inst, + user_id=user_id) + + user['user_name'] = record['author_name'] + user['companies'] = [{ + 'company_name': company_name, + 'end_date': 0, + }] + user['company_name'] = company_name + + user_processor.store_user(self.runtime_storage_inst, user) + + record['company_name'] = company_name + + yield record + def _process_translation(self, record): # todo split translation and approval translation = record.copy() @@ -455,6 +488,7 @@ class RecordProcessor(object): 'email': self._process_email, 'bp': self._process_blueprint, 'bug': self._process_bug, + 'member': self._process_member, 'i18n': self._process_translation, } @@ -681,6 +715,39 @@ class RecordProcessor(object): self.runtime_storage_inst.set_records( self._close_patch(cores, marks_patch['marks'])) + def _update_members_company_name(self): + LOG.info('Update members with company names') + + def record_handler(record): + if record['record_type'] != 'member': + return + + company_draft = record['company_draft'] + company_name = self.domains_index.get( + utils.normalize_company_name(company_draft)) or ( + utils.normalize_company_draft(company_draft)) + + if company_name == record['company_name']: + return + + LOG.debug('Update record %s, company name changed to %s', + record, company_name) + record['company_name'] = company_name + + yield record + + user = user_processor.load_user(self.runtime_storage_inst, + user_id=record['user_id']) + LOG.debug('Update user %s, company name changed to %s', + user, company_name) + user['companies'] = [{ + 'company_name': company_name, + 'end_date': 0, + }] + user_processor.store_user(self.runtime_storage_inst, user) + + yield record_handler + def _update_commits_with_module_alias(self): LOG.info('Update record with aliases') @@ -706,6 +773,7 @@ class RecordProcessor(object): self._update_commits_with_module_alias, self._update_blueprints_with_mention_info, self._determine_core_contributors, + self._update_members_company_name, self._update_marks_with_disagreement, ] diff --git a/stackalytics/processor/schema.py b/stackalytics/processor/schema.py index f44789a99..51f1f0f7c 100644 --- a/stackalytics/processor/schema.py +++ b/stackalytics/processor/schema.py @@ -225,6 +225,12 @@ default_data = { "type": "string" } }, + "member_lists": { + "type": "array", + "items": { + "type": "string" + } + }, "project_types": { "type": "array", "items": { diff --git a/stackalytics/processor/user_processor.py b/stackalytics/processor/user_processor.py index 1318a290e..01f004a5a 100644 --- a/stackalytics/processor/user_processor.py +++ b/stackalytics/processor/user_processor.py @@ -22,11 +22,13 @@ ROBOTS = '*robots' def make_user_id(emails=None, launchpad_id=None, gerrit_id=None, - github_id=None, zanata_id=None): + member_id=None, github_id=None, zanata_id=None): if launchpad_id or emails: return launchpad_id or emails[0] if gerrit_id: return 'gerrit:%s' % gerrit_id + if member_id: + return 'member:%s' % member_id if github_id: return 'github:%s' % github_id if zanata_id: @@ -58,11 +60,11 @@ def store_user(runtime_storage_inst, user): def load_user(runtime_storage_inst, seq=None, user_id=None, email=None, - launchpad_id=None, gerrit_id=None, github_id=None, - zanata_id=None): + launchpad_id=None, gerrit_id=None, member_id=None, + github_id=None, zanata_id=None): - key = make_user_id(gerrit_id=gerrit_id, github_id=github_id, - zanata_id=zanata_id) + key = make_user_id(gerrit_id=gerrit_id, member_id=member_id, + github_id=github_id, zanata_id=zanata_id) if not key: key = seq or user_id or launchpad_id or email if key: diff --git a/stackalytics/processor/utils.py b/stackalytics/processor/utils.py index 89794f60e..cf367ac42 100644 --- a/stackalytics/processor/utils.py +++ b/stackalytics/processor/utils.py @@ -63,6 +63,13 @@ def date_to_timestamp_ext(d): return int(d) +def member_date_to_timestamp(d): + if not d: + return 0 + return int(time.mktime( + datetime.datetime.strptime(d, '%B %d, %Y ').timetuple())) + + def iso8601_to_timestamp(s): return calendar.timegm(iso8601.parse_date(s).utctimetuple()) diff --git a/stackalytics/tests/unit/test_mps.py b/stackalytics/tests/unit/test_mps.py new file mode 100644 index 000000000..54012a1fd --- /dev/null +++ b/stackalytics/tests/unit/test_mps.py @@ -0,0 +1,58 @@ +# Copyright (c) 2013 Mirantis Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import testtools + +from stackalytics.processor import mps + + +class TestMps(testtools.TestCase): + + def test_member_parse_regex(self): + + content = '''

Individual Member Profile

+
+
+

 

+
+ +
+
+

Jim Battenberg

+
+
Date Joined
+
June 25, 2013

+
Affiliations
+
+
+ Rackspace From (Current) +
+
+
Statement of Interest
+
+

contribute logic and evangelize openstack

+
+

 

''' + + match = re.search(mps.NAME_AND_DATE_PATTERN, content) + self.assertTrue(match) + self.assertEqual('Jim Battenberg', match.group('member_name')) + self.assertEqual('June 25, 2013 ', match.group('date_joined')) + + match = re.search(mps.COMPANY_PATTERN, content) + self.assertTrue(match) + self.assertEqual('Rackspace', match.group('company_draft')) diff --git a/stackalytics/tests/unit/test_record_processor.py b/stackalytics/tests/unit/test_record_processor.py index 77f1252f0..5e468c3ca 100644 --- a/stackalytics/tests/unit/test_record_processor.py +++ b/stackalytics/tests/unit/test_record_processor.py @@ -799,6 +799,60 @@ class TestRecordProcessor(testtools.TestCase): record_processor_inst.runtime_storage_inst, email='john_doe@gmail.com')) + def test_create_member(self): + member_record = {'member_id': '123456789', + 'member_name': 'John Doe', + 'member_uri': 'http://www.openstack.org/community' + '/members/profile/123456789', + 'date_joined': 'August 01, 2012 ', + 'company_draft': 'Mirantis'} + + record_processor_inst = self.make_record_processor() + result_member = next(record_processor_inst._process_member( + member_record)) + + self.assertEqual(result_member['primary_key'], 'member:123456789') + self.assertEqual(result_member['date'], utils.member_date_to_timestamp( + 'August 01, 2012 ')) + self.assertEqual(result_member['author_name'], 'John Doe') + self.assertEqual(result_member['company_name'], 'Mirantis') + + result_user = user_processor.load_user( + record_processor_inst.runtime_storage_inst, + member_id='123456789') + + self.assertEqual(result_user['user_name'], 'John Doe') + self.assertEqual(result_user['company_name'], 'Mirantis') + self.assertEqual(result_user['companies'], + [{'company_name': 'Mirantis', 'end_date': 0}]) + + def test_update_member(self): + member_record = {'member_id': '123456789', + 'member_name': 'John Doe', + 'member_uri': 'http://www.openstack.org/community' + '/members/profile/123456789', + 'date_joined': 'August 01, 2012 ', + 'company_draft': 'Mirantis'} + + record_processor_inst = self.make_record_processor() + + updated_member_record = member_record + updated_member_record['member_name'] = 'Bill Smith' + updated_member_record['company_draft'] = 'Rackspace' + + result_member = next(record_processor_inst._process_member( + updated_member_record)) + self.assertEqual(result_member['author_name'], 'Bill Smith') + self.assertEqual(result_member['company_name'], 'Rackspace') + + result_user = user_processor.load_user( + record_processor_inst.runtime_storage_inst, + member_id='123456789') + + self.assertEqual(result_user['user_name'], 'Bill Smith') + self.assertEqual(result_user['companies'], + [{'company_name': 'Rackspace', 'end_date': 0}]) + def test_process_email_then_review(self): # it is expected that the user profile will contain email and # gerrit id, while LP id will be None