Implemented review processing and stats calculation

* Added review control system module that retrieves reviews from Gerrit
* Refactored runtime storage not to depend on storage contents
* Refactored commit processor and new review processor is added
* Review record processor spawns reviews and marks
* Memory storage is made able to store reviews along with commits
* Migrated to hacking-0.6
* Marks are processed in contribution details and shown on engineer details screen

Implements blueprint review-stats

Change-Id: I80b70de372752cd3d1790da1904c0d423d61e81a
This commit is contained in:
Ilya Shakhat 2013-07-18 15:08:12 +04:00
parent 95833ec196
commit 5c554a35f9
20 changed files with 642 additions and 343 deletions

View File

@ -13,8 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from stackalytics.processor import user_utils
MEMORY_STORAGE_CACHED = 0
@ -27,38 +25,42 @@ class CachedMemoryStorage(MemoryStorage):
def __init__(self, records):
super(CachedMemoryStorage, self).__init__(records)
# common indexes
self.records = {}
self.company_index = {}
self.date_index = {}
self.record_types_index = {}
self.module_index = {}
self.launchpad_id_index = {}
self.company_index = {}
self.release_index = {}
self.dates = []
self.indexes = {
'record_type': self.record_types_index,
'company_name': self.company_index,
'module': self.module_index,
'launchpad_id': self.launchpad_id_index,
'release': self.release_index,
}
for record in records:
self._save_record(record)
self.dates = sorted(self.date_index)
self.company_name_mapping = dict((c.lower(), c)
for c in self.company_index.keys())
def _save_record(self, record):
self.records[record['record_id']] = record
self._add_to_index(self.company_index, record, 'company_name')
self._add_to_index(self.module_index, record, 'module')
self._add_to_index(self.launchpad_id_index, record, 'launchpad_id')
self._add_to_index(self.release_index, record, 'release')
self._add_to_index(self.date_index, record, 'date')
for key, index in self.indexes.iteritems():
self._add_to_index(index, record, key)
record['week'] = user_utils.timestamp_to_week(record['date'])
record['loc'] = record['lines_added'] + record['lines_deleted']
def update(self, records):
for record in records:
if record['record_id'] in self.records:
self._remove_record_from_index(record)
self._save_record(record)
def _remove_record_from_index(self, record):
self.company_index[record['company_name']].remove(record['record_id'])
self.module_index[record['module']].remove(record['record_id'])
self.launchpad_id_index[record['launchpad_id']].remove(
record['record_id'])
self.release_index[record['release']].remove(record['record_id'])
self.date_index[record['date']].remove(record['record_id'])
for key, index in self.indexes.iteritems():
index[record[key]].remove(record['record_id'])
def _add_to_index(self, record_index, record, key):
record_key = record[key]
@ -70,9 +72,8 @@ class CachedMemoryStorage(MemoryStorage):
def _get_record_ids_from_index(self, items, index):
record_ids = set()
for item in items:
if item not in index:
raise Exception('Parameter %s not valid' % item)
record_ids |= index[item]
if item in index:
record_ids |= index[item]
return record_ids
def get_record_ids_by_modules(self, modules):
@ -91,7 +92,16 @@ class CachedMemoryStorage(MemoryStorage):
return self._get_record_ids_from_index(releases, self.release_index)
def get_record_ids(self):
return set(self.records.keys())
return self.records.keys()
def get_commit_ids(self):
return self.record_types_index['commit']
def get_review_ids(self):
return self.record_types_index['review']
def get_mark_ids(self):
return self.record_types_index['mark']
def get_records(self, record_ids):
for i in record_ids:
@ -112,12 +122,6 @@ class CachedMemoryStorage(MemoryStorage):
def get_launchpad_ids(self):
return self.launchpad_id_index.keys()
def update(self, records):
for record in records:
if record['record_id'] in self.records:
self._remove_record_from_index(record)
self._save_record(record)
def get_memory_storage(memory_storage_type, records):
if memory_storage_type == MEMORY_STORAGE_CACHED:

View File

@ -84,4 +84,9 @@
{% endif %}
<h4>Marks</h4>
{% for mark in [-2, -1, 0, 1, 2] %}
<div>{{ mark }}: {{ marks[mark] }}</div>
{% endfor %}
{% endblock %}

View File

@ -284,6 +284,8 @@
<label for="metric">Metric&nbsp;</label><select id="metric" name="metric">
<option value="commits">Commits</option>
<option value="loc">Lines of code</option>
<option value="reviews">Reviews</option>
<option value="marks">Marks</option>
</select>
</span>
<span class="drop_release" style="float: right;">

View File

@ -45,6 +45,8 @@ DEFAULTS = {
METRIC_LABELS = {
'loc': 'Lines of code',
'commits': 'Commits',
'reviews': 'Reviews',
'marks': 'Marks',
}
DEFAULT_RECORDS_LIMIT = 10
@ -173,12 +175,13 @@ def get_default(param_name):
return None
def get_parameter(kwargs, singular_name, plural_name, use_default=True):
def get_parameter(kwargs, singular_name, plural_name=None, use_default=True):
if singular_name in kwargs:
p = kwargs[singular_name]
else:
p = (flask.request.args.get(singular_name) or
flask.request.args.get(plural_name))
p = flask.request.args.get(singular_name)
if (not p) and plural_name:
flask.request.args.get(plural_name)
if p:
return p.split(',')
elif use_default:
@ -200,7 +203,7 @@ def record_filter(ignore=None, use_default=True):
vault = get_vault()
memory_storage = vault['memory_storage']
record_ids = memory_storage.get_record_ids()
record_ids = set(memory_storage.get_record_ids()) # make a copy
if 'module' not in ignore:
param = get_parameter(kwargs, 'module', 'modules', use_default)
@ -242,6 +245,15 @@ def record_filter(ignore=None, use_default=True):
memory_storage.get_record_ids_by_releases(
c.lower() for c in param))
if 'metric' not in ignore:
param = get_parameter(kwargs, 'metric')
if 'reviews' in param:
record_ids &= memory_storage.get_review_ids()
elif 'marks' in param:
record_ids &= memory_storage.get_mark_ids()
elif ('loc' in param) or ('commits' in param):
record_ids &= memory_storage.get_commit_ids()
kwargs['records'] = memory_storage.get_records(record_ids)
return f(*args, **kwargs)
@ -258,7 +270,7 @@ def aggregate_filter():
metric_param = (flask.request.args.get('metric') or
get_default('metric'))
metric = metric_param.lower()
if metric == 'commits':
if metric in ['commits', 'reviews', 'marks']:
metric_filter = lambda r: 1
elif metric == 'loc':
metric_filter = lambda r: r['loc']
@ -288,7 +300,7 @@ def exception_handler():
return decorator
def templated(template=None):
def templated(template=None, return_code=200):
def decorator(f):
@functools.wraps(f)
def templated_decorated_function(*args, **kwargs):
@ -326,7 +338,7 @@ def templated(template=None):
ctx['project_type_options'] = get_project_type_options()
return flask.render_template(template_name, **ctx)
return flask.render_template(template_name, **ctx), return_code
return templated_decorated_function
@ -342,8 +354,9 @@ def overview():
@app.errorhandler(404)
@templated('404.html', 404)
def page_not_found(e):
return flask.render_template('404.html'), 404
pass
def contribution_details(records, limit=DEFAULT_RECORDS_LIMIT):
@ -351,32 +364,37 @@ def contribution_details(records, limit=DEFAULT_RECORDS_LIMIT):
bugs_map = {}
companies_map = {}
commits = []
marks = dict((m, 0) for m in [-2, -1, 0, 1, 2])
loc = 0
for record in records:
loc += record['loc']
commits.append(record)
blueprint = record['blueprint_id']
if blueprint:
if blueprint in blueprints_map:
blueprints_map[blueprint].append(record)
else:
blueprints_map[blueprint] = [record]
if record['record_type'] == 'commit':
loc += record['loc']
commits.append(record)
blueprint = record['blueprint_id']
if blueprint:
if blueprint in blueprints_map:
blueprints_map[blueprint].append(record)
else:
blueprints_map[blueprint] = [record]
bug = record['bug_id']
if bug:
if bug in bugs_map:
bugs_map[bug].append(record)
else:
bugs_map[bug] = [record]
bug = record['bug_id']
if bug:
if bug in bugs_map:
bugs_map[bug].append(record)
else:
bugs_map[bug] = [record]
company = record['company_name']
if company:
if company in companies_map:
companies_map[company]['loc'] += record['loc']
companies_map[company]['commits'] += 1
else:
companies_map[company] = {'loc': record['loc'], 'commits': 1}
company = record['company_name']
if company:
if company in companies_map:
companies_map[company]['loc'] += record['loc']
companies_map[company]['commits'] += 1
else:
companies_map[company] = {'loc': record['loc'],
'commits': 1}
elif record['record_type'] == 'mark':
marks[int(record['value'])] += 1
blueprints = sorted([{'id': key,
'module': value[0]['module'],
@ -395,6 +413,7 @@ def contribution_details(records, limit=DEFAULT_RECORDS_LIMIT):
'commit_count': len(commits),
'companies': companies_map,
'loc': loc,
'marks': marks,
}
return result
@ -423,7 +442,7 @@ def module_details(module, records):
@app.route('/engineers/<launchpad_id>')
@exception_handler()
@templated()
@record_filter()
@record_filter(ignore='metric')
def engineer_details(launchpad_id, records):
persistent_storage = get_vault()['persistent_storage']
user = list(persistent_storage.get_users(launchpad_id=launchpad_id))[0]

View File

@ -1,10 +1,9 @@
{
"corrections": [
{
"commit_id": "ee3fe4e836ca1c81e50a8324a9b5f982de4fa97f",
"primary_key": "ee3fe4e836ca1c81e50a8324a9b5f982de4fa97f",
"correction_comment": "Reset LOC to 0",
"lines_added": 0,
"lines_deleted": 0
"loc": 0
}
]
}

View File

@ -14378,11 +14378,6 @@
],
"releases": [
{
"release_name": "ALL",
"start_date": "2010-May-01",
"end_date": "now"
},
{
"release_name": "Essex",
"start_date": "2011-Oct-01",

View File

@ -3,7 +3,7 @@
# debug = False
# Default data
# default-data = /etc/stackalytics/default_data.json
# default_data = /etc/stackalytics/default_data.json
# The folder that holds all project sources to analyze
# sources_root = /var/local/stackalytics
@ -24,4 +24,13 @@
# listen_port = 8080
# The address of file with corrections data
# corrections-uri = https://raw.github.com/stackforge/stackalytics/master/etc/corrections.json
# corrections_uri = https://raw.github.com/stackforge/stackalytics/master/etc/corrections.json
# URI of review system
# review_uri = gerrit://review.openstack.org
# SSH key for gerrit review system access
# ssh_key_filename = /home/user/.ssh/id_rsa
# SSH username for gerrit review system access
# ssh_username = user

View File

@ -39,51 +39,13 @@
"repos": [
{
"branches": ["master"],
"module": "python-quantumclient",
"type": "core",
"uri": "git://github.com/openstack/python-quantumclient.git",
"module": "stackalytics",
"project_type": "stackforge",
"uri": "git://github.com/stackforge/stackalytics.git",
"releases": [
{
"release_name": "Folsom",
"tag_from": "folsom-1",
"tag_to": "2.1"
},
{
"release_name": "Grizzly",
"tag_from": "2.1",
"tag_to": "2.2.1"
},
{
"release_name": "Havana",
"tag_from": "2.2.1",
"tag_to": "HEAD"
}
]
},
{
"branches": ["master"],
"module": "keystone",
"type": "core",
"uri": "git://github.com/openstack/keystone.git",
"releases": [
{
"release_name": "Essex",
"tag_from": "2011.3",
"tag_to": "2012.1"
},
{
"release_name": "Folsom",
"tag_from": "2012.1",
"tag_to": "2012.2"
},
{
"release_name": "Grizzly",
"tag_from": "2012.2",
"tag_to": "2013.1"
},
{
"release_name": "Havana",
"tag_from": "2013.1",
"tag_from": "5a1376ca",
"tag_to": "HEAD"
}
]
@ -91,11 +53,6 @@
],
"releases": [
{
"release_name": "ALL",
"start_date": "2010-May-01",
"end_date": "now"
},
{
"release_name": "Essex",
"start_date": "2011-Oct-01",

View File

@ -3,7 +3,8 @@ Flask>=0.9
Flask-Gravatar
iso8601
launchpadlib
http://tarballs.openstack.org/oslo.config/oslo.config-1.2.0a2.tar.gz#egg=oslo.config-1.2.0a2
oslo.config
paramiko>=1.8.0
pbr>=0.5.16,<0.6
psutil
python-memcached

View File

@ -1,182 +0,0 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import re
from launchpadlib import launchpad
from oslo.config import cfg
LOG = logging.getLogger(__name__)
COMMIT_PROCESSOR_DUMMY = 0
COMMIT_PROCESSOR_CACHED = 1
class CommitProcessor(object):
def __init__(self, persistent_storage):
self.persistent_storage = persistent_storage
def process(self, commit_iterator):
pass
class DummyProcessor(CommitProcessor):
def __init__(self, persistent_storage):
super(DummyProcessor, self).__init__(persistent_storage)
def process(self, commit_iterator):
return commit_iterator
class CachedProcessor(CommitProcessor):
def __init__(self, persistent_storage):
super(CachedProcessor, self).__init__(persistent_storage)
companies = persistent_storage.get_companies()
self.domains_index = {}
for company in companies:
for domain in company['domains']:
self.domains_index[domain] = company['company_name']
users = persistent_storage.get_users()
self.users_index = {}
for user in users:
for email in user['emails']:
self.users_index[email] = user
LOG.debug('Cached commit processor is instantiated')
def _find_company(self, companies, date):
for r in companies:
if date < r['end_date']:
return r['company_name']
return companies[-1]['company_name']
def _get_company_by_email(self, email):
name, at, domain = email.partition('@')
if domain:
parts = domain.split('.')
for i in range(len(parts), 1, -1):
m = '.'.join(parts[len(parts) - i:])
if m in self.domains_index:
return self.domains_index[m]
return None
def _unknown_user_email(self, email):
lp_profile = None
if not re.match(r'[\w\d_\.-]+@([\w\d_\.-]+\.)+[\w]+', email):
LOG.debug('User email is not valid %s' % email)
else:
LOG.debug('Lookup user email %s at Launchpad' % email)
lp = launchpad.Launchpad.login_anonymously('stackalytics')
try:
lp_profile = lp.people.getByEmail(email=email)
except Exception as error:
LOG.warn('Lookup of email %s failed %s' %
(email, error.message))
if not lp_profile:
# user is not found in Launchpad, create dummy record for commit
# update
LOG.debug('Email is not found at Launchpad, mapping to nobody')
user = {
'launchpad_id': None,
'companies': [{
'company_name': self.domains_index[''],
'end_date': 0
}]
}
else:
# get user's launchpad id from his profile
launchpad_id = lp_profile.name
LOG.debug('Found user %s' % launchpad_id)
# check if user with launchpad_id exists in persistent storage
persistent_user_iterator = self.persistent_storage.get_users(
launchpad_id=launchpad_id)
for persistent_user in persistent_user_iterator:
break
else:
persistent_user = None
if persistent_user:
# user already exist, merge
LOG.debug('User exists in persistent storage, add new email')
persistent_user_email = persistent_user['emails'][0]
if persistent_user_email not in self.users_index:
raise Exception('User index is not valid')
user = self.users_index[persistent_user_email]
user['emails'].append(email)
self.persistent_storage.update_user(user)
else:
# add new user
LOG.debug('Add new user into persistent storage')
company = (self._get_company_by_email(email) or
self.domains_index[''])
user = {
'launchpad_id': launchpad_id,
'user_name': lp_profile.display_name,
'emails': [email],
'companies': [{
'company_name': company,
'end_date': 0,
}],
}
self.persistent_storage.insert_user(user)
# update local index
self.users_index[email] = user
return user
def _update_commit_with_user_data(self, commit):
email = commit['author_email'].lower()
if email in self.users_index:
user = self.users_index[email]
else:
user = self._unknown_user_email(email)
commit['launchpad_id'] = user['launchpad_id']
company = self._get_company_by_email(email)
if not company:
company = self._find_company(user['companies'], commit['date'])
commit['company_name'] = company
if 'user_name' in user:
commit['author_name'] = user['user_name']
def process(self, commit_iterator):
for commit in commit_iterator:
self._update_commit_with_user_data(commit)
if cfg.CONF.filter_robots and commit['company_name'] == '*robots':
continue
yield commit
class CommitProcessorFactory(object):
@staticmethod
def get_processor(commit_processor_type, persistent_storage):
LOG.debug('Factory is asked for commit processor type %s' %
commit_processor_type)
if commit_processor_type == COMMIT_PROCESSOR_DUMMY:
return DummyProcessor(persistent_storage)
elif commit_processor_type == COMMIT_PROCESSOR_CACHED:
return CachedProcessor(persistent_storage)
else:
raise Exception('Unknown commit processor type %s' %
commit_processor_type)

View File

@ -40,4 +40,10 @@ OPTS = [
default=('https://raw.github.com/stackforge/stackalytics/'
'master/etc/corrections.json'),
help='The address of file with corrections data'),
cfg.StrOpt('review-uri', default='gerrit://review.openstack.org',
help='URI of review system'),
cfg.StrOpt('ssh-key-filename', default='/home/ishakhat/.ssh/4launchpad_id',
help='SSH key for gerrit review system access'),
cfg.StrOpt('ssh-username', default='ishakhat',
help='SSH username for gerrit review system access'),
]

View File

@ -12,17 +12,20 @@
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import urllib
import urllib2
from oslo.config import cfg
import psutil
from psutil import _error
import urllib2
from stackalytics.openstack.common import log as logging
from stackalytics.processor import commit_processor
from stackalytics.processor import config
from stackalytics.processor import persistent_storage
from stackalytics.processor import rcs
from stackalytics.processor import record_processor
from stackalytics.processor import runtime_storage
from stackalytics.processor import vcs
@ -57,41 +60,74 @@ def update_pids(runtime_storage):
runtime_storage.active_pids(pids)
def process_repo(repo, runtime_storage, processor):
def _merge_commits(original, new):
if new['branches'] < original['branches']:
return False
else:
original['branches'] |= new['branches']
return True
def process_repo(repo, runtime_storage, commit_processor, review_processor):
uri = repo['uri']
LOG.debug('Processing repo uri %s' % uri)
vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root)
vcs_inst.fetch()
rcs_inst = rcs.get_rcs(repo, cfg.CONF.review_uri)
rcs_inst.setup(key_filename=cfg.CONF.ssh_key_filename,
username=cfg.CONF.ssh_username)
for branch in repo['branches']:
LOG.debug('Processing repo %s, branch %s' % (uri, branch))
LOG.debug('Processing repo %s, branch %s', uri, branch)
head_commit_id = runtime_storage.get_head_commit_id(uri, branch)
vcs_key = 'vcs:' + str(urllib.quote_plus(uri) + ':' + branch)
last_id = runtime_storage.get_last_id(vcs_key)
commit_iterator = vcs_inst.log(branch, head_commit_id)
processed_commit_iterator = processor.process(commit_iterator)
runtime_storage.set_records(processed_commit_iterator)
commit_iterator = vcs_inst.log(branch, last_id)
processed_commit_iterator = commit_processor.process(commit_iterator)
runtime_storage.set_records(processed_commit_iterator, _merge_commits)
head_commit_id = vcs_inst.get_head_commit_id(branch)
runtime_storage.set_head_commit_id(uri, branch, head_commit_id)
last_id = vcs_inst.get_last_id(branch)
runtime_storage.set_last_id(vcs_key, last_id)
LOG.debug('Processing reviews for repo %s, branch %s', uri, branch)
rcs_key = 'rcs:' + str(urllib.quote_plus(uri) + ':' + branch)
last_id = runtime_storage.get_last_id(rcs_key)
reviews_iterator = rcs_inst.log(branch, last_id)
processed_review_iterator = review_processor.process(reviews_iterator)
runtime_storage.set_records(processed_review_iterator)
last_id = rcs_inst.get_last_id(branch)
runtime_storage.set_last_id(rcs_key, last_id)
def update_repos(runtime_storage, persistent_storage):
repos = persistent_storage.get_repos()
processor = commit_processor.CommitProcessorFactory.get_processor(
commit_processor.COMMIT_PROCESSOR_CACHED,
persistent_storage)
commit_processor = record_processor.get_record_processor(
record_processor.COMMIT_PROCESSOR, persistent_storage)
review_processor = record_processor.get_record_processor(
record_processor.REVIEW_PROCESSOR, persistent_storage)
for repo in repos:
process_repo(repo, runtime_storage, processor)
process_repo(repo, runtime_storage, commit_processor, review_processor)
def apply_corrections(uri, runtime_storage_inst):
corrections_fd = urllib2.urlopen(uri)
raw = corrections_fd.read()
corrections_fd.close()
runtime_storage_inst.apply_corrections(json.loads(raw)['corrections'])
corrections = json.loads(raw)['corrections']
valid_corrections = []
for c in corrections:
if 'primary_key' in c:
valid_corrections.append(c)
else:
LOG.warn('Correction misses primary key: %s', c)
runtime_storage_inst.apply_corrections(corrections)
def main():

View File

@ -0,0 +1,124 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import re
import paramiko
from stackalytics.openstack.common import log as logging
LOG = logging.getLogger(__name__)
DEFAULT_PORT = 29418
GERRIT_URI_PREFIX = r'^gerrit:\/\/'
class Rcs(object):
def __init__(self, repo, uri):
self.repo = repo
def setup(self, **kwargs):
pass
def log(self, branch, last_id):
pass
def get_last_id(self, branch):
pass
class Gerrit(Rcs):
def __init__(self, repo, uri):
super(Gerrit, self).__init__(repo, uri)
stripped = re.sub(GERRIT_URI_PREFIX, '', uri)
if stripped:
self.hostname, semicolon, self.port = stripped.partition(':')
if not self.port:
self.port = DEFAULT_PORT
else:
raise Exception('Invalid rcs uri %s' % uri)
self.client = paramiko.SSHClient()
self.client.load_system_host_keys()
self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
def setup(self, **kwargs):
if 'key_filename' in kwargs:
self.key_filename = kwargs['key_filename']
else:
self.key_filename = None
if 'username' in kwargs:
self.username = kwargs['username']
else:
self.username = None
def _connect(self):
self.client.connect(self.hostname, port=self.port,
key_filename=self.key_filename,
username=self.username)
LOG.debug('Successfully connected to Gerrit')
def log(self, branch, last_id):
module = self.repo['module']
LOG.debug('Retrieve reviews from gerrit for project %s', module)
self._connect()
cmd = ('gerrit query --all-approvals --patch-sets --format JSON '
'%(module)s '
'branch:%(branch)s' %
{'module': module, 'branch': branch})
if last_id:
cmd += ' NOT resume_sortkey:%016x' % (last_id + 1)
stdin, stdout, stderr = self.client.exec_command(cmd)
for line in stdout:
review = json.loads(line)
if 'sortKey' in review:
review['module'] = module
yield review
def get_last_id(self, branch):
module = self.repo['module']
LOG.debug('Get last id for module %s', module)
self._connect()
cmd = ('gerrit query --all-approvals --patch-sets --format JSON '
'%(module)s branch:%(branch)s limit:1' %
{'module': module, 'branch': branch})
stdin, stdout, stderr = self.client.exec_command(cmd)
for line in stdout:
review = json.loads(line)
if 'sortKey' in review:
return int(review['sortKey'], 16)
raise Exception('Last id is not found for module %s' % module)
def get_rcs(repo, uri):
LOG.debug('Review control system is requested for uri %s' % uri)
match = re.search(GERRIT_URI_PREFIX, uri)
if match:
return Gerrit(repo, uri)
else:
raise Exception('Unknown review control system for uri %s' % uri)

View File

@ -0,0 +1,278 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import bisect
import logging
import re
from launchpadlib import launchpad
from oslo.config import cfg
from stackalytics.processor import user_utils
LOG = logging.getLogger(__name__)
COMMIT_PROCESSOR = 1
REVIEW_PROCESSOR = 2
class RecordProcessor(object):
def __init__(self, persistent_storage):
self.persistent_storage = persistent_storage
def process(self, record_iterator):
pass
class CachedProcessor(RecordProcessor):
def __init__(self, persistent_storage):
super(CachedProcessor, self).__init__(persistent_storage)
companies = persistent_storage.get_companies()
self.domains_index = {}
for company in companies:
for domain in company['domains']:
self.domains_index[domain] = company['company_name']
users = persistent_storage.get_users()
self.users_index = {}
for user in users:
for email in user['emails']:
self.users_index[email] = user
def _find_company(self, companies, date):
for r in companies:
if date < r['end_date']:
return r['company_name']
return companies[-1]['company_name']
def _get_company_by_email(self, email):
name, at, domain = email.partition('@')
if domain:
parts = domain.split('.')
for i in range(len(parts), 1, -1):
m = '.'.join(parts[len(parts) - i:])
if m in self.domains_index:
return self.domains_index[m]
return None
def _persist_user(self, launchpad_id, email, user_name):
# check if user with launchpad_id exists in persistent storage
persistent_user_iterator = self.persistent_storage.get_users(
launchpad_id=launchpad_id)
for persistent_user in persistent_user_iterator:
break
else:
persistent_user = None
if persistent_user:
# user already exist, merge
LOG.debug('User exists in persistent storage, add new email %s',
email)
persistent_user_email = persistent_user['emails'][0]
if persistent_user_email not in self.users_index:
raise Exception('User index is not valid')
user = self.users_index[persistent_user_email]
user['emails'].append(email)
self.persistent_storage.update_user(user)
else:
# add new user
LOG.debug('Add new user into persistent storage')
company = (self._get_company_by_email(email) or
self.domains_index[''])
user = {
'launchpad_id': launchpad_id,
'user_name': user_name,
'emails': [email],
'companies': [{
'company_name': company,
'end_date': 0,
}],
}
self.persistent_storage.insert_user(user)
return user
def _unknown_user_email(self, email):
lp_profile = None
if not re.match(r'[\w\d_\.-]+@([\w\d_\.-]+\.)+[\w]+', email):
LOG.debug('User email is not valid %s' % email)
else:
LOG.debug('Lookup user email %s at Launchpad' % email)
lp = launchpad.Launchpad.login_anonymously('stackalytics')
try:
lp_profile = lp.people.getByEmail(email=email)
except Exception as error:
LOG.warn('Lookup of email %s failed %s' %
(email, error.message))
if not lp_profile:
# user is not found in Launchpad, create dummy record for commit
# update
LOG.debug('Email is not found at Launchpad, mapping to nobody')
user = {
'launchpad_id': None,
'companies': [{
'company_name': self.domains_index[''],
'end_date': 0
}]
}
else:
# get user's launchpad id from his profile
launchpad_id = lp_profile.name
user_name = lp_profile.display_name
LOG.debug('Found user %s' % launchpad_id)
user = self._persist_user(launchpad_id, email, user_name)
# update local index
self.users_index[email] = user
return user
class CommitProcessor(CachedProcessor):
def __init__(self, persistent_storage):
super(CommitProcessor, self).__init__(persistent_storage)
LOG.debug('Commit processor is instantiated')
def _update_commit_with_user_data(self, commit):
email = commit['author_email'].lower()
if email in self.users_index:
user = self.users_index[email]
else:
user = self._unknown_user_email(email)
commit['launchpad_id'] = user['launchpad_id']
company = self._get_company_by_email(email)
if not company:
company = self._find_company(user['companies'], commit['date'])
commit['company_name'] = company
if 'user_name' in user:
commit['author_name'] = user['user_name']
def process(self, record_iterator):
for record in record_iterator:
self._update_commit_with_user_data(record)
if cfg.CONF.filter_robots and record['company_name'] == '*robots':
continue
record['record_type'] = 'commit'
record['primary_key'] = record['commit_id']
record['week'] = user_utils.timestamp_to_week(record['date'])
record['loc'] = record['lines_added'] + record['lines_deleted']
yield record
class ReviewProcessor(CachedProcessor):
def __init__(self, persistent_storage):
super(ReviewProcessor, self).__init__(persistent_storage)
self.launchpad_to_company_index = {}
users = persistent_storage.get_users()
for user in users:
self.launchpad_to_company_index[user['launchpad_id']] = user
self.releases = []
for release in persistent_storage.get_releases():
r = release.copy()
r['end_date_ts'] = user_utils.date_to_timestamp(r['end_date'])
r['release_name'] = r['release_name'].lower()
self.releases.append(r)
self.releases.sort(key=lambda x: x['end_date_ts'])
self.releases_dates = [r['end_date_ts'] for r in self.releases]
LOG.debug('Review processor is instantiated')
def _get_release(self, timestamp):
release_index = bisect.bisect(self.releases_dates, timestamp)
return self.releases[release_index]['release_name']
def _process_user(self, email, launchpad_id, user_name, date):
if email in self.users_index:
user = self.users_index[email]
else:
user = self._persist_user(launchpad_id, email, user_name)
self.users_index[email] = user
company = self._get_company_by_email(email)
if not company:
company = self._find_company(user['companies'], date)
return company
def _spawn_review(self, record):
# copy everything except pathsets and flatten user data
review = dict([(k, v) for k, v in record.iteritems()
if k not in ['patchSets', 'owner']])
owner = record['owner']
company = self._process_user(owner['email'].lower(),
owner['username'],
owner['name'],
record['createdOn'])
review['record_type'] = 'review'
review['primary_key'] = record['id']
review['company_name'] = company
review['launchpad_id'] = owner['username']
review['release'] = self._get_release(review['createdOn'])
yield review
def _spawn_marks(self, record):
review_id = record['id']
for patch in record['patchSets']:
if 'approvals' not in patch:
continue # not reviewed by anyone
for approval in patch['approvals']:
# copy everything and flatten user data
mark = dict([(k, v) for k, v in approval.iteritems()
if k != 'by'])
reviewer = approval['by']
mark['record_type'] = 'mark'
mark['primary_key'] = (record['id'] +
str(mark['grantedOn']) +
mark['type'])
mark['launchpad_id'] = reviewer['username']
mark['module'] = record['module']
if 'email' not in reviewer:
continue
company = self._process_user(reviewer['email'],
reviewer['username'],
reviewer['name'],
mark['grantedOn'])
mark['company_name'] = company
mark['review_id'] = review_id
mark['release'] = self._get_release(mark['grantedOn'])
yield mark
def process(self, record_iterator):
"""
Process a review. Review spawns into records of two types:
* review - records that a user created review request
* mark - records that a user set approval mark to given review
"""
for record in record_iterator:
for gen in [self._spawn_review, self._spawn_marks]:
for r in gen(record):
yield r
def get_record_processor(processor_type, persistent_storage):
LOG.debug('Record processor is requested of type %s' % processor_type)
if processor_type == COMMIT_PROCESSOR:
return CommitProcessor(persistent_storage)
elif processor_type == REVIEW_PROCESSOR:
return ReviewProcessor(persistent_storage)
else:
raise Exception('Unknown commit processor type %s' % processor_type)

View File

@ -16,7 +16,6 @@
import logging
import re
import urllib
import memcache
@ -25,6 +24,7 @@ LOG = logging.getLogger(__name__)
BULK_READ_SIZE = 64
RECORD_ID_PREFIX = 'record:'
UPDATE_ID_PREFIX = 'update:'
MEMCACHED_URI_PREFIX = r'^memcached:\/\/'
class RuntimeStorage(object):
@ -37,10 +37,10 @@ class RuntimeStorage(object):
def apply_corrections(self, corrections_iterator):
pass
def get_head_commit_id(self, uri, branch):
def get_last_id(self, key):
pass
def set_head_commit_id(self, uri, branch, head_commit_id):
def set_last_id(self, key, head_commit_id):
pass
def get_update(self, pid):
@ -55,7 +55,7 @@ class MemcachedStorage(RuntimeStorage):
def __init__(self, uri):
super(MemcachedStorage, self).__init__(uri)
stripped = re.sub(r'memcached:\/\/', '', uri)
stripped = re.sub(MEMCACHED_URI_PREFIX, '', uri)
if stripped:
storage_uri = stripped.split(',')
self.memcached = memcache.Client(storage_uri)
@ -63,15 +63,22 @@ class MemcachedStorage(RuntimeStorage):
else:
raise Exception('Invalid storage uri %s' % uri)
def set_records(self, records_iterator):
def _build_index(self):
self.record_index = {}
for record in self._get_all_records():
self.record_index[record['primary_key']] = record['record_id']
def set_records(self, records_iterator, merge_handler=None):
for record in records_iterator:
if record['commit_id'] in self.commit_id_index:
if record['primary_key'] in self.record_index:
# update
record_id = self.commit_id_index[record['commit_id']]
record_id = self.record_index[record['primary_key']]
original = self.memcached.get(self._get_record_name(record_id))
original['branches'] |= record['branches']
LOG.debug('Update record %s' % record)
self.memcached.set(self._get_record_name(record_id), original)
if merge_handler:
if merge_handler(original, record):
LOG.debug('Update record %s' % record)
self.memcached.set(self._get_record_name(record_id),
original)
else:
# insert record
record_id = self._get_record_count()
@ -84,10 +91,10 @@ class MemcachedStorage(RuntimeStorage):
def apply_corrections(self, corrections_iterator):
for correction in corrections_iterator:
if correction['commit_id'] not in self.commit_id_index:
if correction['primary_key'] not in self.record_index:
continue
record_id = self.commit_id_index[correction['commit_id']]
record_id = self.record_index[correction['primary_key']]
original = self.memcached.get(self._get_record_name(record_id))
need_update = False
@ -100,12 +107,10 @@ class MemcachedStorage(RuntimeStorage):
self.memcached.set(self._get_record_name(record_id), original)
self._commit_update(record_id)
def get_head_commit_id(self, uri, branch):
key = str(urllib.quote_plus(uri) + ':' + branch)
def get_last_id(self, key):
return self.memcached.get(key)
def set_head_commit_id(self, uri, branch, head_commit_id):
key = str(urllib.quote_plus(uri) + ':' + branch)
def set_last_id(self, key, head_commit_id):
self.memcached.set(key, head_commit_id)
def get_update(self, pid):
@ -186,15 +191,10 @@ class MemcachedStorage(RuntimeStorage):
self.memcached.set(UPDATE_ID_PREFIX + str(count), record_id)
self.memcached.set('update:count', count + 1)
def _build_index(self):
self.commit_id_index = {}
for record in self._get_all_records():
self.commit_id_index[record['commit_id']] = record['record_id']
def get_runtime_storage(uri):
LOG.debug('Runtime storage is requested for uri %s' % uri)
match = re.search(r'^memcached:\/\/', uri)
match = re.search(MEMCACHED_URI_PREFIX, uri)
if match:
return MemcachedStorage(uri)
else:

View File

@ -13,13 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import re
import sh
from stackalytics.openstack.common import log as logging
LOG = logging.getLogger(__name__)
@ -41,7 +42,7 @@ class Vcs(object):
def log(self, branch, head_commit_id):
pass
def get_head_commit_id(self, branch):
def get_last_id(self, branch):
pass
@ -162,7 +163,7 @@ class Git(Vcs):
yield commit
def get_head_commit_id(self, branch):
def get_last_id(self, branch):
LOG.debug('Get head commit for repo uri %s' % self.repo['uri'])
self._chdir()

View File

@ -2,7 +2,7 @@
pep8==1.4.5
pyflakes==0.7.2
flake8==2.0
hacking>=0.5.3,<0.6
hacking>=0.5.3,<0.7
coverage
discover

View File

@ -1,10 +1,25 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from launchpadlib import launchpad
import mock
from oslo.config import cfg
import testtools
from stackalytics.processor import commit_processor
from stackalytics.processor import persistent_storage
from stackalytics.processor import record_processor
class TestCommitProcessor(testtools.TestCase):
@ -41,7 +56,7 @@ class TestCommitProcessor(testtools.TestCase):
])
self.persistent_storage = p_storage
self.commit_processor = commit_processor.CachedProcessor(p_storage)
self.commit_processor = record_processor.CommitProcessor(p_storage)
self.launchpad_patch = mock.patch('launchpadlib.launchpad.Launchpad')
self.launchpad_patch.start()
cfg.CONF = mock.MagicMock()

View File

@ -1,3 +1,18 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import mock
import os
import testtools

View File

@ -1,3 +1,18 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import testtools
from dashboard import web