Merge "Post-process all records every time the processor runs"

This commit is contained in:
Jenkins
2014-06-17 06:35:52 +00:00
committed by Gerrit Code Review
6 changed files with 90 additions and 103 deletions

View File

@@ -29,7 +29,7 @@ def _get_bug_id(web_link):
return web_link[web_link.rfind('/') + 1:] return web_link[web_link.rfind('/') + 1:]
def log(repo, last_bug_date): def log(repo, modified_since):
module = repo['module'] module = repo['module']
LOG.debug('Retrieving list of bugs for module: %s', module) LOG.debug('Retrieving list of bugs for module: %s', module)
@@ -38,7 +38,7 @@ def log(repo, last_bug_date):
return return
for record_draft in launchpad_utils.lp_bug_generator(module, for record_draft in launchpad_utils.lp_bug_generator(module,
last_bug_date): modified_since):
record = {} record = {}

View File

@@ -22,9 +22,7 @@ import six
from stackalytics.openstack.common import log as logging from stackalytics.openstack.common import log as logging
from stackalytics.processor import normalizer from stackalytics.processor import normalizer
from stackalytics.processor import record_processor
from stackalytics.processor import utils from stackalytics.processor import utils
from stackalytics.processor import vcs
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@@ -162,67 +160,10 @@ def _store_default_data(runtime_storage_inst, default_data):
runtime_storage_inst.set_by_key(key, value) runtime_storage_inst.set_by_key(key, value)
def _update_records(runtime_storage_inst, sources_root): def process(runtime_storage_inst, default_data):
LOG.debug('Update existing records')
release_index = {}
for repo in utils.load_repos(runtime_storage_inst):
vcs_inst = vcs.get_vcs(repo, sources_root)
release_index.update(vcs_inst.fetch())
record_processor_inst = record_processor.RecordProcessor(
runtime_storage_inst)
record_processor_inst.update(release_index)
def _get_changed_member_records(runtime_storage_inst, record_processor_inst):
for record in runtime_storage_inst.get_all_records():
if record['record_type'] == 'member' and 'company_name' in record:
company_draft = record['company_draft']
company_name = record_processor_inst.domains_index.get(
utils.normalize_company_name(company_draft)) or (
utils.normalize_company_draft(company_draft))
if company_name != record['company_name']:
record['company_name'] = company_name
yield record
def _update_members_company_name(runtime_storage_inst):
LOG.debug('Update company names for members')
record_processor_inst = record_processor.RecordProcessor(
runtime_storage_inst)
member_iterator = _get_changed_member_records(runtime_storage_inst,
record_processor_inst)
for record in member_iterator:
company_name = record['company_name']
user = utils.load_user(runtime_storage_inst, record['user_id'])
user['companies'] = [{
'company_name': company_name,
'end_date': 0,
}]
user['company_name'] = company_name
utils.store_user(runtime_storage_inst, user)
LOG.debug('Company name changed for user %s', user)
record_id = record['record_id']
runtime_storage_inst.memcached.set(
runtime_storage_inst._get_record_name(record_id), record)
runtime_storage_inst._commit_update(record_id)
def process(runtime_storage_inst, default_data, sources_root, force_update):
LOG.debug('Process default data') LOG.debug('Process default data')
dd_changed = _check_default_data_change(runtime_storage_inst, default_data)
if 'project_sources' in default_data: if 'project_sources' in default_data:
_update_project_list(default_data) _update_project_list(default_data)
if dd_changed or force_update: _store_default_data(runtime_storage_inst, default_data)
_store_default_data(runtime_storage_inst, default_data)
_update_records(runtime_storage_inst, sources_root)
_update_members_company_name(runtime_storage_inst)

View File

@@ -76,12 +76,12 @@ def lp_blueprint_generator(module):
uri = chunk.get('next_collection_link') uri = chunk.get('next_collection_link')
def lp_bug_generator(module, last_bug_date): def lp_bug_generator(module, modified_since):
uri = LP_URI_DEVEL % (module + '?ws.op=searchTasks') uri = LP_URI_DEVEL % (module + '?ws.op=searchTasks')
for status in BUG_STATUSES: for status in BUG_STATUSES:
uri += '&status=' + six.moves.urllib.parse.quote_plus(status) uri += '&status=' + six.moves.urllib.parse.quote_plus(status)
if last_bug_date: if modified_since:
uri += '&modified_since=' + last_bug_date uri += '&modified_since=' + utils.timestamp_to_utc_date(modified_since)
while uri: while uri:
LOG.debug('Reading chunk from uri %s', uri) LOG.debug('Reading chunk from uri %s', uri)

View File

@@ -19,7 +19,6 @@ from oslo.config import cfg
import psutil import psutil
import six import six
from six.moves.urllib import parse from six.moves.urllib import parse
import time
import yaml import yaml
from stackalytics.openstack.common import log as logging from stackalytics.openstack.common import log as logging
@@ -78,11 +77,12 @@ def _record_typer(record_iterator, record_type):
yield record yield record
def process_repo(repo, runtime_storage_inst, record_processor_inst, def _process_repo(repo, runtime_storage_inst, record_processor_inst,
last_bug_date): bug_modified_since):
uri = repo['uri'] uri = repo['uri']
LOG.debug('Processing repo uri %s' % uri) LOG.info('Processing repo uri: %s', uri)
LOG.debug('Processing blueprints for repo uri: %s', uri)
bp_iterator = lp.log(repo) bp_iterator = lp.log(repo)
bp_iterator_typed = _record_typer(bp_iterator, 'bp') bp_iterator_typed = _record_typer(bp_iterator, 'bp')
processed_bp_iterator = record_processor_inst.process( processed_bp_iterator = record_processor_inst.process(
@@ -90,7 +90,8 @@ def process_repo(repo, runtime_storage_inst, record_processor_inst,
runtime_storage_inst.set_records(processed_bp_iterator, runtime_storage_inst.set_records(processed_bp_iterator,
utils.merge_records) utils.merge_records)
bug_iterator = bps.log(repo, last_bug_date) LOG.debug('Processing bugs for repo uri: %s', uri)
bug_iterator = bps.log(repo, bug_modified_since)
bug_iterator_typed = _record_typer(bug_iterator, 'bug') bug_iterator_typed = _record_typer(bug_iterator, 'bug')
processed_bug_iterator = record_processor_inst.process( processed_bug_iterator = record_processor_inst.process(
bug_iterator_typed) bug_iterator_typed)
@@ -110,7 +111,7 @@ def process_repo(repo, runtime_storage_inst, record_processor_inst,
branches.add(release['branch']) branches.add(release['branch'])
for branch in branches: for branch in branches:
LOG.debug('Processing repo %s, branch %s', uri, branch) LOG.debug('Processing commits in repo: %s, branch: %s', uri, branch)
vcs_key = 'vcs:' + str(parse.quote_plus(uri) + ':' + branch) vcs_key = 'vcs:' + str(parse.quote_plus(uri) + ':' + branch)
last_id = runtime_storage_inst.get_by_key(vcs_key) last_id = runtime_storage_inst.get_by_key(vcs_key)
@@ -125,7 +126,7 @@ def process_repo(repo, runtime_storage_inst, record_processor_inst,
last_id = vcs_inst.get_last_id(branch) last_id = vcs_inst.get_last_id(branch)
runtime_storage_inst.set_by_key(vcs_key, last_id) runtime_storage_inst.set_by_key(vcs_key, last_id)
LOG.debug('Processing reviews for repo %s, branch %s', uri, branch) LOG.debug('Processing reviews for repo: %s, branch: %s', uri, branch)
rcs_key = 'rcs:' + str(parse.quote_plus(uri) + ':' + branch) rcs_key = 'rcs:' + str(parse.quote_plus(uri) + ':' + branch)
last_id = runtime_storage_inst.get_by_key(rcs_key) last_id = runtime_storage_inst.get_by_key(rcs_key)
@@ -141,7 +142,7 @@ def process_repo(repo, runtime_storage_inst, record_processor_inst,
runtime_storage_inst.set_by_key(rcs_key, last_id) runtime_storage_inst.set_by_key(rcs_key, last_id)
def process_mail_list(uri, runtime_storage_inst, record_processor_inst): def _process_mail_list(uri, runtime_storage_inst, record_processor_inst):
mail_iterator = mls.log(uri, runtime_storage_inst) mail_iterator = mls.log(uri, runtime_storage_inst)
mail_iterator_typed = _record_typer(mail_iterator, 'email') mail_iterator_typed = _record_typer(mail_iterator, 'email')
processed_mail_iterator = record_processor_inst.process( processed_mail_iterator = record_processor_inst.process(
@@ -149,7 +150,7 @@ def process_mail_list(uri, runtime_storage_inst, record_processor_inst):
runtime_storage_inst.set_records(processed_mail_iterator) runtime_storage_inst.set_records(processed_mail_iterator)
def process_member_list(uri, runtime_storage_inst, record_processor_inst): def _process_member_list(uri, runtime_storage_inst, record_processor_inst):
member_iterator = mps.log(uri, runtime_storage_inst, member_iterator = mps.log(uri, runtime_storage_inst,
cfg.CONF.days_to_update_members) cfg.CONF.days_to_update_members)
member_iterator_typed = _record_typer(member_iterator, 'member') member_iterator_typed = _record_typer(member_iterator, 'member')
@@ -161,26 +162,38 @@ def process_member_list(uri, runtime_storage_inst, record_processor_inst):
def update_members(runtime_storage_inst, record_processor_inst): def update_members(runtime_storage_inst, record_processor_inst):
member_lists = runtime_storage_inst.get_by_key('member_lists') or [] member_lists = runtime_storage_inst.get_by_key('member_lists') or []
for member_list in member_lists: for member_list in member_lists:
process_member_list(member_list, runtime_storage_inst, _process_member_list(member_list, runtime_storage_inst,
record_processor_inst) record_processor_inst)
def update_records(runtime_storage_inst, record_processor_inst): def _post_process_records(record_processor_inst, repos):
LOG.debug('Build release index')
release_index = {}
for repo in repos:
vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root)
release_index.update(vcs_inst.fetch())
LOG.debug('Post-process all records')
record_processor_inst.post_processing(release_index)
def process(runtime_storage_inst, record_processor_inst):
repos = utils.load_repos(runtime_storage_inst) repos = utils.load_repos(runtime_storage_inst)
current_date = utils.timestamp_to_utc_date(int(time.time())) current_date = utils.date_to_timestamp('now')
last_bug_date = runtime_storage_inst.get_by_key('last_bug_date') bug_modified_since = runtime_storage_inst.get_by_key('bug_modified_since')
for repo in repos: for repo in repos:
process_repo(repo, runtime_storage_inst, record_processor_inst, _process_repo(repo, runtime_storage_inst, record_processor_inst,
last_bug_date) bug_modified_since)
runtime_storage_inst.set_by_key('last_bug_date', current_date) runtime_storage_inst.set_by_key('bug_modified_since', current_date)
LOG.info('Processing mail lists')
mail_lists = runtime_storage_inst.get_by_key('mail_lists') or [] mail_lists = runtime_storage_inst.get_by_key('mail_lists') or []
for mail_list in mail_lists: for mail_list in mail_lists:
process_mail_list(mail_list, runtime_storage_inst, _process_mail_list(mail_list, runtime_storage_inst,
record_processor_inst) record_processor_inst)
record_processor_inst.update() _post_process_records(record_processor_inst, repos)
def apply_corrections(uri, runtime_storage_inst): def apply_corrections(uri, runtime_storage_inst):
@@ -290,9 +303,7 @@ def main():
LOG.critical('Unable to load default data') LOG.critical('Unable to load default data')
return not 0 return not 0
default_data_processor.process(runtime_storage_inst, default_data_processor.process(runtime_storage_inst,
default_data, default_data)
cfg.CONF.sources_root,
cfg.CONF.force_update)
process_program_list(runtime_storage_inst, cfg.CONF.program_list_uri) process_program_list(runtime_storage_inst, cfg.CONF.program_list_uri)
@@ -301,7 +312,7 @@ def main():
record_processor_inst = record_processor.RecordProcessor( record_processor_inst = record_processor.RecordProcessor(
runtime_storage_inst) runtime_storage_inst)
update_records(runtime_storage_inst, record_processor_inst) process(runtime_storage_inst, record_processor_inst)
apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst) apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst)

View File

@@ -542,6 +542,8 @@ class RecordProcessor(object):
yield record yield record
def _update_commits_with_merge_date(self): def _update_commits_with_merge_date(self):
LOG.debug('Update commits with merge date')
change_id_to_date = {} change_id_to_date = {}
for record in self.runtime_storage_inst.get_all_records(): for record in self.runtime_storage_inst.get_all_records():
if (record['record_type'] == 'review' and if (record['record_type'] == 'review' and
@@ -728,13 +730,46 @@ class RecordProcessor(object):
for processed in self._close_patch(cores, marks_patch['marks']): for processed in self._close_patch(cores, marks_patch['marks']):
yield processed yield processed
def update(self, release_index=None): def _update_members_company_name(self):
LOG.debug('Update members with company names')
for record in self.runtime_storage_inst.get_all_records():
if record['record_type'] != 'member':
continue
company_draft = record['company_draft']
company_name = self.domains_index.get(
utils.normalize_company_name(company_draft)) or (
utils.normalize_company_draft(company_draft))
if company_name == record['company_name']:
continue
LOG.debug('Update record %s, company name changed to %s',
record, company_name)
record['company_name'] = company_name
yield record
user = utils.load_user(self.runtime_storage_inst,
record['user_id'])
LOG.debug('Update user %s, company name changed to %s',
user, company_name)
user['companies'] = [{
'company_name': company_name,
'end_date': 0,
}]
utils.store_user(self.runtime_storage_inst, user)
def post_processing(self, release_index):
self.runtime_storage_inst.set_records( self.runtime_storage_inst.set_records(
self._update_records_with_user_info()) self._update_records_with_user_info())
if release_index: self.runtime_storage_inst.set_records(
self.runtime_storage_inst.set_records( self._update_commits_with_merge_date())
self._update_records_with_releases(release_index))
self.runtime_storage_inst.set_records(
self._update_records_with_releases(release_index))
self.runtime_storage_inst.set_records( self.runtime_storage_inst.set_records(
self._update_reviews_with_sequence_number()) self._update_reviews_with_sequence_number())
@@ -742,11 +777,11 @@ class RecordProcessor(object):
self.runtime_storage_inst.set_records( self.runtime_storage_inst.set_records(
self._update_blueprints_with_mention_info()) self._update_blueprints_with_mention_info())
self.runtime_storage_inst.set_records(
self._update_commits_with_merge_date())
self._determine_core_contributors() self._determine_core_contributors()
# disagreement calculation must go after determining core contributors # disagreement calculation must go after determining core contributors
self.runtime_storage_inst.set_records( self.runtime_storage_inst.set_records(
self._update_marks_with_disagreement()) self._update_marks_with_disagreement())
self.runtime_storage_inst.set_records(
self._update_members_company_name())

View File

@@ -782,7 +782,7 @@ class TestRecordProcessor(testtools.TestCase):
'module': 'nova', 'branch': 'master'} 'module': 'nova', 'branch': 'master'}
])) ]))
record_processor_inst.update() record_processor_inst.post_processing({})
user = {'seq': 2, user = {'seq': 2,
'core': [], 'core': [],
@@ -853,7 +853,7 @@ class TestRecordProcessor(testtools.TestCase):
}]} }]}
])) ]))
record_processor_inst.update() record_processor_inst.post_processing({})
user_1 = {'seq': 1, 'user_id': 'john_doe', user_1 = {'seq': 1, 'user_id': 'john_doe',
'launchpad_id': 'john_doe', 'user_name': 'John Doe', 'launchpad_id': 'john_doe', 'user_name': 'John Doe',
@@ -942,7 +942,7 @@ class TestRecordProcessor(testtools.TestCase):
'date': 1234567895, 'date': 1234567895,
'blueprint_id': ['mod:blueprint', 'mod:invalid']}, 'blueprint_id': ['mod:blueprint', 'mod:invalid']},
])) ]))
record_processor_inst.update() record_processor_inst.post_processing({})
bp1 = runtime_storage_inst.get_by_primary_key('bpd:mod:blueprint') bp1 = runtime_storage_inst.get_by_primary_key('bpd:mod:blueprint')
self.assertEqual(2, bp1['mention_count']) self.assertEqual(2, bp1['mention_count'])
@@ -978,7 +978,7 @@ class TestRecordProcessor(testtools.TestCase):
'createdOn': 5, 'createdOn': 5,
'module': 'glance', 'branch': 'master'}, 'module': 'glance', 'branch': 'master'},
])) ]))
record_processor_inst.update() record_processor_inst.post_processing({})
review1 = runtime_storage_inst.get_by_primary_key('I111') review1 = runtime_storage_inst.get_by_primary_key('I111')
self.assertEqual(2, review1['review_number']) self.assertEqual(2, review1['review_number'])
@@ -1065,7 +1065,7 @@ class TestRecordProcessor(testtools.TestCase):
} }
]} ]}
])) ]))
record_processor_inst.update() record_processor_inst.post_processing({})
marks = list([r for r in runtime_storage_inst.get_all_records() marks = list([r for r in runtime_storage_inst.get_all_records()
if r['record_type'] == 'mark']) if r['record_type'] == 'mark'])
@@ -1110,7 +1110,7 @@ class TestRecordProcessor(testtools.TestCase):
'status': 'MERGED', 'status': 'MERGED',
'module': 'nova', 'branch': 'master'}, 'module': 'nova', 'branch': 'master'},
])) ]))
record_processor_inst.update() record_processor_inst.post_processing({})
commit = runtime_storage_inst.get_by_primary_key('de7e8f2') commit = runtime_storage_inst.get_by_primary_key('de7e8f2')
self.assertEqual(1385490000, commit['date']) self.assertEqual(1385490000, commit['date'])