Remove Foundation members report

Foundation members report shows when new users are registered as
OpenStack Foundation members. However the current approach gets
data directly from HTML pages (slow!), does not update the data and does
not take into account job changes. Thus produced report does not
reflect reality and just show wrong data.

This reverts commits
    307b96efc1893d42901dde7c5a27e842b10e51c2
    2d4d2fc6107f553bf6d1fac31f5d0f3f0e6a2459
    5decf7a17079d3c653f4a60372f73fa41cb89d84
    ea37576fbfa01866222d1c84560cc5830a0ef4af
    bfb56d28c24433e85af0cf0f65846863c753b897
    1865fc804f6a4ff33908bd7b7191809ade1aa728
    e40cb6857c5b47ba41111d6f2a395c7cd4a3f76c
    1c4003c6fba50fc0b6eea78c279b4064ec71d78f
    97a64afd68984840b1379732099dbc79cb7e1843
    a18739e4158b6ba69046e9d0cf68c47c2b90faa6
    ed515b4be9e31982ef9774a0f3688cf1e0c5ef42

Change-Id: I5e4886e7ff7f1da1527d82a1e55152af58f36afe
This commit is contained in:
Ilya Shakhat 2017-08-24 11:11:56 +02:00
parent d7140a1fc1
commit fd2ba43994
24 changed files with 14 additions and 823 deletions

View File

@ -1,14 +1,12 @@
usage: stackalytics-processor [-h] [--config-dir DIR] [--config-file PATH]
[--corrections-uri CORRECTIONS_URI]
[--days_to_update_members DAYS_TO_UPDATE_MEMBERS]
[--debug] [--default-data-uri DEFAULT_DATA_URI]
[--corrections-uri CORRECTIONS_URI] [--debug]
[--default-data-uri DEFAULT_DATA_URI]
[--fetching-user-source FETCHING_USER_SOURCE]
[--gerrit-retry GERRIT_RETRY]
[--git-base-uri GIT_BASE_URI]
[--log-config-append PATH]
[--log-date-format DATE_FORMAT]
[--log-dir LOG_DIR] [--log-file PATH]
[--members-look-ahead MEMBERS_LOOK_AHEAD]
[--nodebug] [--nouse-journal] [--nouse-syslog]
[--nowatch-log-file]
[--read-timeout READ_TIMEOUT]
@ -35,8 +33,6 @@ optional arguments:
precedence. Defaults to None.
--corrections-uri CORRECTIONS_URI
The address of file with corrections data
--days_to_update_members DAYS_TO_UPDATE_MEMBERS
Number of days to update members
--debug, -d If set to true, the logging level will be set to DEBUG
instead of the default INFO level.
--default-data-uri DEFAULT_DATA_URI
@ -71,8 +67,6 @@ optional arguments:
If no default is set, logging will go to stderr as
defined by use_stderr. This option is ignored if
log_config_append is set.
--members-look-ahead MEMBERS_LOOK_AHEAD
How many member profiles to look ahead after the last
--nodebug The inverse of --debug
--nouse-journal The inverse of --use-journal
--nouse-syslog The inverse of --use-syslog

View File

@ -30958,7 +30958,6 @@
"https://lists.opnfv.org/pipermail/opnfv-users/",
"https://lists.opnfv.org/pipermail/test-wg/"
],
"member_lists": ["https://www.openstack.org/community/members/profile/"],
"project_types": [
{
"id": "all",

View File

@ -152,9 +152,6 @@
# The folder that holds all project sources to analyze (string value)
#sources_root = /var/local/stackalytics
# Number of days to update members (integer value)
#days_to_update_members = 30
# The address of file with corrections data (string value)
#corrections_uri = https://git.openstack.org/cgit/openstack/stackalytics/plain/etc/corrections.json
@ -177,9 +174,6 @@
# Allowed values: launchpad, <None>
#fetching_user_source = launchpad
# How many member profiles to look ahead after the last (integer value)
#members_look_ahead = 250
# Number of seconds to wait for remote response (integer value)
#read_timeout = 120

View File

@ -293,7 +293,6 @@
],
"mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"],
"member_lists": ["http://www.openstack.org/community/members/profile/"],
"project_types": [
{

View File

@ -365,7 +365,6 @@ def aggregate_filter():
'bpc': (incremental_filter, None),
'filed-bugs': (incremental_filter, None),
'resolved-bugs': (incremental_filter, None),
'members': (incremental_filter, None),
'person-day': (person_day_filter, None),
'patches': (None, None),
'translations': (loc_filter, None),

View File

@ -50,7 +50,6 @@ METRIC_TO_RECORD_TYPE = {
'bpc': ['bpc'],
'filed-bugs': ['bugf'],
'resolved-bugs': ['bugr'],
'members': ['member'],
'person-day': ['mark', 'patch', 'email', 'bpd', 'bugf'],
'patches': ['patch'],
'translations': ['tr'],

View File

@ -28,9 +28,6 @@ from stackalytics.dashboard import vault
from stackalytics.processor import utils
DEFAULT_DAYS_COUNT = 7
FIRST_MEMBER_DATE = "2012-Jul-18"
blueprint = flask.Blueprint('reports', __name__, url_prefix='/report')
@ -148,20 +145,6 @@ def contribution(module, days):
}
@blueprint.route('/members')
@decorators.exception_handler()
@decorators.templated()
def members():
days = int(flask.request.args.get('days') or DEFAULT_DAYS_COUNT)
all_days = int((time.time() - utils.date_to_timestamp_ext(
FIRST_MEMBER_DATE)) / (24 * 60 * 60)) + 1
return {
'days': days,
'all_days': all_days
}
@blueprint.route('/affiliation_changes')
@decorators.exception_handler()
@decorators.templated()

View File

@ -424,7 +424,7 @@ ul#menu-stackamenu li {
div.stackamenu {
text-align: left;
padding-bottom: 10px;
margin-left: 240px;
margin-left: 315px;
}
div.stackamenu a {

View File

@ -192,8 +192,6 @@ show_twitter=False) -%}
<div class="header">Bug &ldquo;${title}&rdquo; (<a href="${web_link}" class="ext_link">${number}</a>)</div>
<div>Status: <span class="status${status_class}">${status}</span></div>
<div>Importance: <span class="importance${importance}">${importance}</span></div>
{%elif record_type == "member" %}
<div class="header"><a href="${member_uri}" target="_blank">Registered</a> in OpenStack Foundation</div>
{%elif record_type == "tr" %}
<div class="header">Translated ${loc} words into ${language}</div>
{%/if%}

View File

@ -36,11 +36,10 @@
<div style="float: left;">
<span id="logo"><a href="{{ url_for('overview') }}"><img src="{{ url_for('static', filename='images/stackalytics_logo.png') }}" alt="Stackalytics" style="width: 100%; max-width: 190px;"></a></span>
</div>
<div class="stackamenu" style="margin-left: 240px">
<div class="stackamenu">
<ul id="menu-stackamenu">
<li class="menu-item current-menu-item"><a href="/"><span class="icon-pie"></span>Code Contribution</a></li>
<li class="menu-item"><a href="/report/driverlog"><span class="icon-cogs"></span>Vendor Drivers <span style="vertical-align: top; font-size: 60%;">&beta;</span></a></li>
<li class="menu-item"><a href="/report/members"><span class="icon-users"></span>Member Directory</a></li>
</ul>
</div>
</div>

View File

@ -28,11 +28,10 @@
<div style="float: left;">
<span id="logo"><a href="{{ url_for('overview') }}"><img src="{{ url_for('static', filename='images/stackalytics_logo.png') }}" alt="Stackalytics" style="width: 100%; max-width: 190px;"></a></span>
</div>
<div class="stackamenu" style="margin-left: 240px">
<div class="stackamenu">
<ul id="menu-stackamenu">
<li class="menu-item"><a href="/"><span class="icon-pie"></span>Code Contribution</a></li>
<li class="menu-item current-menu-item"><a href="/report/driverlog"><span class="icon-cogs"></span>Vendor Drivers <span style="vertical-align: top; font-size: 60%;">&beta;</span></a></li>
<li class="menu-item"><a href="/report/members"><span class="icon-users"></span>Member Directory</a></li>
</ul>
</div>
</div>

View File

@ -1,405 +0,0 @@
{% extends "base.html" %}
{% set active_tab = 'members' %}
{% set page_title = 'OpenStack Foundation members' %}
{% block head %}
<script type="text/javascript">
function get_start_date() {
var days = {{ days }};
return Math.round(new Date().getTime() / 1000) - days * 24 * 60 * 60;
}
function show_engineers_table(options) {
var table_column_names = ["index", "link", "date", "company"];
var table_id = "members_table";
var company = $('#company_selector').val();
$.ajax({
url: makeURI("/api/1.0/members", options),
dataType: "json",
success: function (data) {
var tableData = data["members"];
var tableColumns = [];
var sort_by_column = 2;
for (var i = 0; i < table_column_names.length; i++) {
tableColumns.push({"mData": table_column_names[i]});
}
for (i = 0; i < tableData.length; i++) {
var user_link = tableData[i].member_uri;
tableData[i].link = "<a href=\"" + user_link + "\">" + tableData[i].author_name + "</a>";
tableData[i].date = tableData[i].date_str;
tableData[i].company = tableData[i].company_name;
}
if (table_id) {
$("#" + table_id).dataTable({
"aaSorting": [
[ sort_by_column, "desc" ]
],
"bFilter": true,
"bInfo": true,
"bAutoWidth": false,
"aaData": tableData,
"aoColumns": tableColumns,
"bDestroy": true,
'bPaginate': true,
"sPaginationType": "full_numbers",
"aLengthMenu": [[10, 25, 50, -1], [10, 25, 50, "All"]],
"iDisplayLength": 10
});
}
}
});
}
function show_new_companies_table(options) {
var table_column_names = ["index", "link", "date"];
var table_id = "new_companies_table";
$.ajax({
url: makeURI("/api/1.0/new_companies", options),
dataType: "json",
success: function (data) {
var tableData = data["stats"];
var tableColumns = [];
var sort_by_column = 2;
for (var i = 0; i < table_column_names.length; i++) {
tableColumns.push({"mData": table_column_names[i]});
}
for (i = 0; i < tableData.length; i++) {
var company_link = makeURI('/report/members', {company:tableData[i].name});
tableData[i].link = "<a href=\"" + company_link + "\">" + tableData[i].name + "</a>";
tableData[i].date = tableData[i].date_str;
}
if (table_id) {
$("#" + table_id).dataTable({
"aaSorting": [
[ sort_by_column, "desc" ]
],
"bFilter": true,
"bInfo": true,
"bAutoWidth": false,
"aaData": tableData,
"aoColumns": tableColumns,
"bDestroy": true,
'bPaginate': true,
"sPaginationType": "full_numbers",
"aLengthMenu": [[10, 25, 50, -1], [10, 25, 50, "All"]],
"iDisplayLength": 10
});
}
}
});
}
function show_companies_table(options) {
var table_column_names = ["index", "link", "count"];
var table_id = "companies_table";
$.ajax({
url: makeURI("/api/1.0/stats/companies", options),
dataType: "json",
success: function (data) {
var tableData = data["stats"];
var tableColumns = [];
var sort_by_column = 2;
for (var i = 0; i < table_column_names.length; i++) {
tableColumns.push({"mData": table_column_names[i]});
}
for (i = 0; i < tableData.length; i++) {
var company_link = makeURI('/report/members', {company:tableData[i].name});
tableData[i].link = "<a href=\"" + company_link + "\">" + tableData[i].name + "</a>";
tableData[i].count = tableData[i].metric;
}
if (table_id) {
$("#" + table_id).dataTable({
"aaSorting": [
[ sort_by_column, "desc" ]
],
"bFilter": true,
"bInfo": true,
"bAutoWidth": false,
"aaData": tableData,
"aoColumns": tableColumns,
"bDestroy": true,
'bPaginate': true,
"sPaginationType": "full_numbers",
"aLengthMenu": [[10, 25, 50, -1], [10, 25, 50, "All"]],
"iDisplayLength": 10
});
}
}
});
}
function renderChart(url, chart_id, options) {
$(document).ready(function () {
$.ajax({
url: makeURI(url, options),
dataType: "jsonp",
success: function (data) {
var chartData = [];
const limit = 10;
var aggregate = 0;
var i;
data = data["stats"];
for (i = 0; i < data.length; i++) {
if (i < limit - 1) {
chartData.push([data[i].name, data[i].metric]);
} else {
aggregate += data[i].metric;
}
}
if (i == limit) {
chartData.push([data[i - 1].name, data[i - 1].metric]);
} else if (i > limit) {
chartData.push(["others", aggregate]);
}
if (chart_id) {
var plot = $.jqplot(chart_id, [chartData], {
seriesDefaults: {
renderer: jQuery.jqplot.PieRenderer,
rendererOptions: {
showDataLabels: true
}
},
legend: { show: true, location: 'e' }
});
}
}
});
});
}
function make_options() {
var options = {};
options['release'] = 'all';
options['metric'] = 'members';
options['project_type'] = '{{ project_type }}';
options['company'] = $('#company_selector').val();
options['days'] = $('#days_selector').val();
return options;
}
function reload() {
window.location.search = $.map(make_options(),function (val, index) {
return index + "=" + val;
}).join("&")
}
function show_page() {
var start_date = get_start_date();
var base_options = { metric: 'members', project_type: '{{ project_type }}', release: 'all', start_date: start_date };
renderTimeline(base_options);
show_engineers_table(base_options);
{% if not company %}
show_companies_table(base_options);
show_new_companies_table(base_options);
renderChart("/api/1.0/stats/companies", "members_chart", base_options);
{% else %}
$('#companies_block').hide();
$('#new_companies_table_header').hide();
$('#new_companies_table').hide();
{% endif %}
}
$(document).ready(function () {
var start_date = get_start_date();
var base_options = { metric: 'members', project_type: '{{ project_type }}', release: 'all', start_date: start_date };
initSingleSelector("company", makeURI("/api/1.0/companies", base_options), {allowClear: true});
$("#days_selector").val({{ days }}).select2().on('change', function (evt) {
reload();
});
show_page();
});
</script>
<style type="text/css">
table.dataTable tr.even {
background-color: #EEF1F4;
}
table.dataTable tr.even:hover, table.dataTable tr.odd:hover {
background-color: #F8FFEC;
}
table.dataTable tr.even td.sorting_1 {
background-color: #E0E8E8;
}
</style>
<script type='text/javascript'>
$(document).ready(function () {
$('#days_selector').val({{ days }});
$("#days_selector").select2();
show_page();
});
$(document).on('change', '#days_selector', function (evt) {
reload();
});
$(document).on('change', '#company_selector', function (evt) {
reload();
});
</script>
{% endblock %}
{% block body %}
<div class="page">
<div class="aheader">
<div style="float: right; margin-top: 10px; margin-right: 20px;">
<a href="https://wiki.openstack.org/wiki/Stackalytics" target="_blank">About</a>
</div>
<div id="analytics_header">
<div style="float: left;">
<span id="logo"><a href="{{ url_for('overview') }}"><img src="{{ url_for('static', filename='images/stackalytics_logo.png') }}" alt="Stackalytics" style="width: 100%; max-width: 190px;"></a></span>
</div>
<div class="stackamenu" style="margin-left: 240px;">
<ul id="menu-stackamenu">
<li class="menu-item"><a href="{{ url_for('overview') }}"><span class="icon-pie"></span>Code Contribution</a></li>
<li class="menu-item"><a href="/report/driverlog"><span class="icon-cogs"></span>Vendor Drivers <span style="vertical-align: top; font-size: 60%;">&beta;</span></a></li>
<li class="menu-item current-menu-item"><a href="/report/members"><span class="icon-users"></span>Member Directory</a></li>
</ul>
</div>
</div>
<div class="navigation">
<div id="timeline"
style="width: 100%; height: 120px; margin-top: 15px;"></div>
</div>
<div class="drops">
<div class="drop" style="margin-top: 1em;">
<label for="days_selector">Joined during period</label>
<select id="days_selector" name="days_selector"
style="min-width: 140px;"
data-placeholder="Select period">
<option value="7">week</option>
<option value="14">two weeks</option>
<option value="31">month</option>
<option value="93">quarter</option>
<option value="183">half year</option>
<option value="365">year</option>
<option value="{{ all_days }}">all</option>
</select>
</div>
<div class="drop" style="margin-top: 1em;">
<label for="company_selector">Company</label>
<input id="company_selector" style="width: 140px"
data-placeholder="Any company"/>
</div>
</div>
<table style="width: 100%" cellspacing="0" id="companies_block">
<tr>
<td style="width: 50%; vertical-align: top; padding-right: 3em;">
<h2>OpenStack foundation member companies</h2>
<div class="body" style="margin-right: 1em;">
<table id="companies_table">
<thead>
<tr>
<th>#</th>
<th>Company</th>
<th>Members Count</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
</div>
</td>
<td style="width: 50%; vertical-align: top; padding-left: 3em">
<h2>Members by company</h2>
<div class="body" style="margin-left: 1em;">
<div id="members_container">
<div id="members_chart"
style="width: 100%; height: 350px; margin-bottom: 1em;"></div>
</div>
</div>
</td>
</tr>
</table>
<table style="width: 100%" cellspacing="0">
<tr>
<td style="width: 50%; vertical-align: top; padding-right: 3em;">
<h2>Individual Members</h2>
<div class="body" style="margin-right: 1em;">
<table id="members_table">
<thead>
<tr>
<th>#</th>
<th>Engineer</th>
<th>Date Joined</th>
<th>Company</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
</div>
</td>
<td style="width: 50%; vertical-align: top; padding-right: 3em;">
<div id="new_companies_table_header">
<h2>New Companies</h2>
</div>
<div class="body" style="margin-right: 1em;">
<table id="new_companies_table">
<thead>
<tr>
<th>#</th>
<th>Company</th>
<th>First Member Joined</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
</div>
</td>
</tr>
</table>
</div>
</div>
{% endblock %}

View File

@ -197,12 +197,6 @@
href="https://review.openstack.org/#/c/{{ record.review_number }}"
target="_blank">{{ record.review_id }}</a></div>
{% elif record_type == "member" %}
<div class="header"><a href="{{ record.member_uri }}"
target="_blank">Registered</a> in OpenStack
Foundation
</div>
{% elif record_type == "tr" %}
<div class="header">Translated {{ record.loc }} words
into {{ record.language }}</div>

View File

@ -350,27 +350,6 @@ def get_module(module_id, **kwargs):
return module
@app.route('/api/1.0/members')
@decorators.exception_handler()
@decorators.response()
@decorators.cached(ignore=['release', 'project_type', 'module'])
@decorators.jsonify('members')
@decorators.record_filter(ignore=['release', 'project_type', 'module'])
def get_members(records, **kwargs):
response = []
for record in records:
record = vault.extend_record(record)
nr = dict([(k, record[k]) for k in
['author_name', 'date', 'company_name', 'member_uri']])
nr['date_str'] = helpers.format_date(nr['date'])
response.append(nr)
response.sort(key=lambda x: x['date'], reverse=True)
utils.add_index(response)
return response
@app.route('/api/1.0/stats/bp')
@decorators.exception_handler()
@decorators.response()
@ -585,8 +564,6 @@ def _get_week(kwargs, param_name):
def timeline(records, **kwargs):
# find start and end dates
metric = parameters.get_parameter(kwargs, 'metric')
start_date = int(parameters.get_single_parameter(kwargs, 'start_date')
or 0)
release_name = parameters.get_single_parameter(kwargs, 'release') or 'all'
releases = vault.get_vault()['releases']
@ -650,14 +627,10 @@ def timeline(records, **kwargs):
if start_week <= week < end_week:
week_stat_loc[week] += loc_handler(record)
week_stat_commits[week] += commits_handler(record)
if 'members' in metric:
if record.date >= start_date:
week_stat_commits_hl[week] += 1
else:
if record.release == release_name:
week_stat_commits_hl[week] += commits_handler(record)
if record.release == release_name:
week_stat_commits_hl[week] += commits_handler(record)
if 'all' == release_name and 'members' not in metric:
if 'all' == release_name:
week_stat_commits_hl = week_stat_commits
# form arrays in format acceptable to timeline plugin

View File

@ -32,8 +32,6 @@ PROCESSOR_OPTS = [
'default_data_uri = file:///path/to/default_data.json'),
cfg.StrOpt('sources-root', default='/var/local/stackalytics',
help='The folder that holds all project sources to analyze'),
cfg.IntOpt('days_to_update_members', default=30,
help='Number of days to update members'),
cfg.StrOpt('corrections-uri',
default=('https://git.openstack.org/cgit/'
'openstack/stackalytics/plain/etc/corrections.json'),
@ -53,8 +51,6 @@ PROCESSOR_OPTS = [
cfg.StrOpt("fetching-user-source", default='launchpad',
choices=['launchpad', '<None>'],
help="Source for fetching user profiles"),
cfg.IntOpt('members-look-ahead', default=250,
help='How many member profiles to look ahead after the last'),
cfg.IntOpt('read-timeout', default=120,
help='Number of seconds to wait for remote response'),
cfg.IntOpt('gerrit-retry', default=10,

View File

@ -43,8 +43,7 @@ OPTS = [
SINGLE_KEYS = ['module_groups', 'project_types', 'repos', 'releases',
'companies', 'last_update_members_date', 'last_member_index',
'runtime_storage_update_time']
'companies', 'runtime_storage_update_time']
ARRAY_KEYS = ['record', 'user']
BULK_READ_SIZE = 64
MEMCACHED_URI_PREFIX = r'^memcached:\/\/'
@ -140,8 +139,6 @@ def export_data(memcached_inst, fd):
pickle.dump(('user:%s' % user['launchpad_id'], user), fd)
if user.get('gerrit_id'):
pickle.dump(('user:gerrit:%s' % user['gerrit_id'], user), fd)
if user.get('member_id'):
pickle.dump(('user:member:%s' % user['member_id'], user), fd)
for email in user.get('emails') or []:
pickle.dump((('user:%s' % email).encode('utf8'), user), fd)

View File

@ -27,7 +27,6 @@ from stackalytics.processor import default_data_processor
from stackalytics.processor import governance
from stackalytics.processor import lp
from stackalytics.processor import mls
from stackalytics.processor import mps
from stackalytics.processor import rcs
from stackalytics.processor import record_processor
from stackalytics.processor import runtime_storage
@ -193,23 +192,6 @@ def _process_translation_stats(runtime_storage_inst, record_processor_inst):
runtime_storage_inst.set_records(processed_translation_iterator)
def _process_member_list(uri, runtime_storage_inst, record_processor_inst):
member_iterator = mps.log(uri, runtime_storage_inst,
CONF.days_to_update_members,
CONF.members_look_ahead)
member_iterator_typed = _record_typer(member_iterator, 'member')
processed_member_iterator = record_processor_inst.process(
member_iterator_typed)
runtime_storage_inst.set_records(processed_member_iterator)
def update_members(runtime_storage_inst, record_processor_inst):
member_lists = runtime_storage_inst.get_by_key('member_lists') or []
for member_list in member_lists:
_process_member_list(member_list, runtime_storage_inst,
record_processor_inst)
def _post_process_records(record_processor_inst, repos):
LOG.debug('Build release index')
release_index = {}
@ -326,9 +308,6 @@ def main():
apply_corrections(CONF.corrections_uri, runtime_storage_inst)
# long operation should be the last
update_members(runtime_storage_inst, record_processor_inst)
runtime_storage_inst.set_by_key('runtime_storage_update_time',
utils.date_to_timestamp('now'))
LOG.info('stackalytics-processor succeeded.')

View File

@ -1,111 +0,0 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
import re
import time
from oslo_log import log as logging
import requests
import six
from stackalytics.processor import utils
LOG = logging.getLogger(__name__)
NAME_AND_DATE_PATTERN = (r'<h3>(?P<member_name>[^<]*)[\s\S]*?'
r'<div class="span-\d last">(?P<date_joined>[^<]*)')
COMPANY_PATTERN = (r'<strong>Date\sJoined[\s\S]*?<b>(?P<company_draft>[^<]*)'
r'[\s\S]*?From\s(?P<date_from>[\s\S]*?)\(Current\)')
GARBAGE_PATTERN = r'[/\\~%^\*_]+'
def strip_garbage(s):
return re.sub(r'\s+', ' ', re.sub(GARBAGE_PATTERN, '', s))
def _retrieve_member(requests_session, uri, member_id, html_parser):
content = utils.read_uri(uri, session=requests_session)
if not content:
return {}
member = {}
for rec in re.finditer(NAME_AND_DATE_PATTERN, content):
result = rec.groupdict()
member['member_id'] = member_id
member['member_name'] = strip_garbage(result['member_name'])
member['date_joined'] = result['date_joined']
member['member_uri'] = uri
break
member['company_draft'] = '*independent'
for rec in re.finditer(COMPANY_PATTERN, content):
result = rec.groupdict()
member['company_draft'] = strip_garbage(
html_parser.unescape(result['company_draft']))
return member
def log(uri, runtime_storage_inst, days_to_update_members, members_look_ahead):
LOG.debug('Retrieving new openstack.org members')
last_update_members_date = runtime_storage_inst.get_by_key(
'last_update_members_date') or 0
last_member_index = runtime_storage_inst.get_by_key(
'last_member_index') or 0
end_update_date = int(time.time()) - days_to_update_members * 24 * 60 * 60
if last_update_members_date <= end_update_date:
last_member_index = 0
last_update_members_date = int(time.time())
runtime_storage_inst.set_by_key('last_update_members_date',
last_update_members_date)
cnt_empty = 0
cur_index = last_member_index + 1
html_parser = six.moves.html_parser.HTMLParser()
requests_session = requests.Session()
while cnt_empty < members_look_ahead:
profile_uri = uri + str(cur_index)
member = _retrieve_member(requests_session, profile_uri,
str(cur_index), html_parser)
if 'member_name' not in member:
cnt_empty += 1
cur_index += 1
continue
cnt_empty = 0
last_member_index = cur_index
cur_index += 1
LOG.debug('New member: %s', member['member_id'])
yield member
time.sleep(random.random() * 5)
requests_session.close()
LOG.debug('Last_member_index: %s', last_member_index)
runtime_storage_inst.set_by_key('last_member_index', last_member_index)

View File

@ -425,39 +425,6 @@ class RecordProcessor(object):
yield bug_fixed
def _process_member(self, record):
user_id = user_processor.make_user_id(member_id=record['member_id'])
record['primary_key'] = user_id
record['date'] = utils.member_date_to_timestamp(record['date_joined'])
record['author_name'] = record['member_name']
record['module'] = 'unknown'
company_draft = record['company_draft']
company_name = self.domains_index.get(utils.normalize_company_name(
company_draft)) or (utils.normalize_company_draft(company_draft))
# author_email is a key to create new user
record['author_email'] = user_id
record['company_name'] = company_name
# _update_record_and_user function will create new user if needed
self._update_record_and_user(record)
record['company_name'] = company_name
user = user_processor.load_user(self.runtime_storage_inst,
user_id=user_id)
user['user_name'] = record['author_name']
user['companies'] = [{
'company_name': company_name,
'end_date': 0,
}]
user['company_name'] = company_name
user_processor.store_user(self.runtime_storage_inst, user)
record['company_name'] = company_name
yield record
def _process_translation(self, record):
# todo split translation and approval
translation = record.copy()
@ -488,7 +455,6 @@ class RecordProcessor(object):
'email': self._process_email,
'bp': self._process_blueprint,
'bug': self._process_bug,
'member': self._process_member,
'i18n': self._process_translation,
}
@ -715,39 +681,6 @@ class RecordProcessor(object):
self.runtime_storage_inst.set_records(
self._close_patch(cores, marks_patch['marks']))
def _update_members_company_name(self):
LOG.info('Update members with company names')
def record_handler(record):
if record['record_type'] != 'member':
return
company_draft = record['company_draft']
company_name = self.domains_index.get(
utils.normalize_company_name(company_draft)) or (
utils.normalize_company_draft(company_draft))
if company_name == record['company_name']:
return
LOG.debug('Update record %s, company name changed to %s',
record, company_name)
record['company_name'] = company_name
yield record
user = user_processor.load_user(self.runtime_storage_inst,
user_id=record['user_id'])
LOG.debug('Update user %s, company name changed to %s',
user, company_name)
user['companies'] = [{
'company_name': company_name,
'end_date': 0,
}]
user_processor.store_user(self.runtime_storage_inst, user)
yield record_handler
def _update_commits_with_module_alias(self):
LOG.info('Update record with aliases')
@ -773,7 +706,6 @@ class RecordProcessor(object):
self._update_commits_with_module_alias,
self._update_blueprints_with_mention_info,
self._determine_core_contributors,
self._update_members_company_name,
self._update_marks_with_disagreement,
]

View File

@ -225,12 +225,6 @@ default_data = {
"type": "string"
}
},
"member_lists": {
"type": "array",
"items": {
"type": "string"
}
},
"project_types": {
"type": "array",
"items": {

View File

@ -22,13 +22,11 @@ ROBOTS = '*robots'
def make_user_id(emails=None, launchpad_id=None, gerrit_id=None,
member_id=None, github_id=None, zanata_id=None):
github_id=None, zanata_id=None):
if launchpad_id or emails:
return launchpad_id or emails[0]
if gerrit_id:
return 'gerrit:%s' % gerrit_id
if member_id:
return 'member:%s' % member_id
if github_id:
return 'github:%s' % github_id
if zanata_id:
@ -60,11 +58,11 @@ def store_user(runtime_storage_inst, user):
def load_user(runtime_storage_inst, seq=None, user_id=None, email=None,
launchpad_id=None, gerrit_id=None, member_id=None,
github_id=None, zanata_id=None):
launchpad_id=None, gerrit_id=None, github_id=None,
zanata_id=None):
key = make_user_id(gerrit_id=gerrit_id, member_id=member_id,
github_id=github_id, zanata_id=zanata_id)
key = make_user_id(gerrit_id=gerrit_id, github_id=github_id,
zanata_id=zanata_id)
if not key:
key = seq or user_id or launchpad_id or email
if key:

View File

@ -63,13 +63,6 @@ def date_to_timestamp_ext(d):
return int(d)
def member_date_to_timestamp(d):
if not d:
return 0
return int(time.mktime(
datetime.datetime.strptime(d, '%B %d, %Y ').timetuple()))
def iso8601_to_timestamp(s):
return calendar.timegm(iso8601.parse_date(s).utctimetuple())

View File

@ -1,58 +0,0 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import testtools
from stackalytics.processor import mps
class TestMps(testtools.TestCase):
def test_member_parse_regex(self):
content = '''<h1>Individual Member Profile</h1>
<div class="candidate span-14">
<div class="span-4">
<img src="/themes/openstack/images/generic-profile-photo.png"><p>&nbsp;</p>
</div>
<a name="profile-10501"></a>
<div class="details span-10 last">
<div class="last name-and-title">
<h3>Jim Battenberg</h3>
</div>
<hr><div class="span-4"><strong>Date Joined</strong></div>
<div class="span-6 last">June 25, 2013 <br><br></div>
<div class="span-4"><strong>Affiliations</strong></div>
<div class="span-6 last">
<div>
<b>Rackspace</b> From (Current)
</div>
</div>
<div class="span-4"><strong>Statement of Interest </strong></div>
<div class="span-6 last">
<p>contribute logic and evangelize openstack</p>
</div>
<p>&nbsp;</p>'''
match = re.search(mps.NAME_AND_DATE_PATTERN, content)
self.assertTrue(match)
self.assertEqual('Jim Battenberg', match.group('member_name'))
self.assertEqual('June 25, 2013 ', match.group('date_joined'))
match = re.search(mps.COMPANY_PATTERN, content)
self.assertTrue(match)
self.assertEqual('Rackspace', match.group('company_draft'))

View File

@ -799,60 +799,6 @@ class TestRecordProcessor(testtools.TestCase):
record_processor_inst.runtime_storage_inst,
email='john_doe@gmail.com'))
def test_create_member(self):
member_record = {'member_id': '123456789',
'member_name': 'John Doe',
'member_uri': 'http://www.openstack.org/community'
'/members/profile/123456789',
'date_joined': 'August 01, 2012 ',
'company_draft': 'Mirantis'}
record_processor_inst = self.make_record_processor()
result_member = next(record_processor_inst._process_member(
member_record))
self.assertEqual(result_member['primary_key'], 'member:123456789')
self.assertEqual(result_member['date'], utils.member_date_to_timestamp(
'August 01, 2012 '))
self.assertEqual(result_member['author_name'], 'John Doe')
self.assertEqual(result_member['company_name'], 'Mirantis')
result_user = user_processor.load_user(
record_processor_inst.runtime_storage_inst,
member_id='123456789')
self.assertEqual(result_user['user_name'], 'John Doe')
self.assertEqual(result_user['company_name'], 'Mirantis')
self.assertEqual(result_user['companies'],
[{'company_name': 'Mirantis', 'end_date': 0}])
def test_update_member(self):
member_record = {'member_id': '123456789',
'member_name': 'John Doe',
'member_uri': 'http://www.openstack.org/community'
'/members/profile/123456789',
'date_joined': 'August 01, 2012 ',
'company_draft': 'Mirantis'}
record_processor_inst = self.make_record_processor()
updated_member_record = member_record
updated_member_record['member_name'] = 'Bill Smith'
updated_member_record['company_draft'] = 'Rackspace'
result_member = next(record_processor_inst._process_member(
updated_member_record))
self.assertEqual(result_member['author_name'], 'Bill Smith')
self.assertEqual(result_member['company_name'], 'Rackspace')
result_user = user_processor.load_user(
record_processor_inst.runtime_storage_inst,
member_id='123456789')
self.assertEqual(result_user['user_name'], 'Bill Smith')
self.assertEqual(result_user['companies'],
[{'company_name': 'Rackspace', 'end_date': 0}])
def test_process_email_then_review(self):
# it is expected that the user profile will contain email and
# gerrit id, while LP id will be None