Merge "Post-process all records every time the processor runs"
This commit is contained in:
@@ -29,7 +29,7 @@ def _get_bug_id(web_link):
|
|||||||
return web_link[web_link.rfind('/') + 1:]
|
return web_link[web_link.rfind('/') + 1:]
|
||||||
|
|
||||||
|
|
||||||
def log(repo, last_bug_date):
|
def log(repo, modified_since):
|
||||||
module = repo['module']
|
module = repo['module']
|
||||||
LOG.debug('Retrieving list of bugs for module: %s', module)
|
LOG.debug('Retrieving list of bugs for module: %s', module)
|
||||||
|
|
||||||
@@ -38,7 +38,7 @@ def log(repo, last_bug_date):
|
|||||||
return
|
return
|
||||||
|
|
||||||
for record_draft in launchpad_utils.lp_bug_generator(module,
|
for record_draft in launchpad_utils.lp_bug_generator(module,
|
||||||
last_bug_date):
|
modified_since):
|
||||||
|
|
||||||
record = {}
|
record = {}
|
||||||
|
|
||||||
|
@@ -22,9 +22,7 @@ import six
|
|||||||
|
|
||||||
from stackalytics.openstack.common import log as logging
|
from stackalytics.openstack.common import log as logging
|
||||||
from stackalytics.processor import normalizer
|
from stackalytics.processor import normalizer
|
||||||
from stackalytics.processor import record_processor
|
|
||||||
from stackalytics.processor import utils
|
from stackalytics.processor import utils
|
||||||
from stackalytics.processor import vcs
|
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -162,67 +160,10 @@ def _store_default_data(runtime_storage_inst, default_data):
|
|||||||
runtime_storage_inst.set_by_key(key, value)
|
runtime_storage_inst.set_by_key(key, value)
|
||||||
|
|
||||||
|
|
||||||
def _update_records(runtime_storage_inst, sources_root):
|
def process(runtime_storage_inst, default_data):
|
||||||
LOG.debug('Update existing records')
|
|
||||||
release_index = {}
|
|
||||||
for repo in utils.load_repos(runtime_storage_inst):
|
|
||||||
vcs_inst = vcs.get_vcs(repo, sources_root)
|
|
||||||
release_index.update(vcs_inst.fetch())
|
|
||||||
|
|
||||||
record_processor_inst = record_processor.RecordProcessor(
|
|
||||||
runtime_storage_inst)
|
|
||||||
record_processor_inst.update(release_index)
|
|
||||||
|
|
||||||
|
|
||||||
def _get_changed_member_records(runtime_storage_inst, record_processor_inst):
|
|
||||||
for record in runtime_storage_inst.get_all_records():
|
|
||||||
if record['record_type'] == 'member' and 'company_name' in record:
|
|
||||||
company_draft = record['company_draft']
|
|
||||||
company_name = record_processor_inst.domains_index.get(
|
|
||||||
utils.normalize_company_name(company_draft)) or (
|
|
||||||
utils.normalize_company_draft(company_draft))
|
|
||||||
|
|
||||||
if company_name != record['company_name']:
|
|
||||||
record['company_name'] = company_name
|
|
||||||
yield record
|
|
||||||
|
|
||||||
|
|
||||||
def _update_members_company_name(runtime_storage_inst):
|
|
||||||
LOG.debug('Update company names for members')
|
|
||||||
record_processor_inst = record_processor.RecordProcessor(
|
|
||||||
runtime_storage_inst)
|
|
||||||
member_iterator = _get_changed_member_records(runtime_storage_inst,
|
|
||||||
record_processor_inst)
|
|
||||||
|
|
||||||
for record in member_iterator:
|
|
||||||
company_name = record['company_name']
|
|
||||||
user = utils.load_user(runtime_storage_inst, record['user_id'])
|
|
||||||
|
|
||||||
user['companies'] = [{
|
|
||||||
'company_name': company_name,
|
|
||||||
'end_date': 0,
|
|
||||||
}]
|
|
||||||
user['company_name'] = company_name
|
|
||||||
|
|
||||||
utils.store_user(runtime_storage_inst, user)
|
|
||||||
|
|
||||||
LOG.debug('Company name changed for user %s', user)
|
|
||||||
|
|
||||||
record_id = record['record_id']
|
|
||||||
runtime_storage_inst.memcached.set(
|
|
||||||
runtime_storage_inst._get_record_name(record_id), record)
|
|
||||||
runtime_storage_inst._commit_update(record_id)
|
|
||||||
|
|
||||||
|
|
||||||
def process(runtime_storage_inst, default_data, sources_root, force_update):
|
|
||||||
LOG.debug('Process default data')
|
LOG.debug('Process default data')
|
||||||
|
|
||||||
dd_changed = _check_default_data_change(runtime_storage_inst, default_data)
|
|
||||||
|
|
||||||
if 'project_sources' in default_data:
|
if 'project_sources' in default_data:
|
||||||
_update_project_list(default_data)
|
_update_project_list(default_data)
|
||||||
|
|
||||||
if dd_changed or force_update:
|
_store_default_data(runtime_storage_inst, default_data)
|
||||||
_store_default_data(runtime_storage_inst, default_data)
|
|
||||||
_update_records(runtime_storage_inst, sources_root)
|
|
||||||
_update_members_company_name(runtime_storage_inst)
|
|
||||||
|
@@ -76,12 +76,12 @@ def lp_blueprint_generator(module):
|
|||||||
uri = chunk.get('next_collection_link')
|
uri = chunk.get('next_collection_link')
|
||||||
|
|
||||||
|
|
||||||
def lp_bug_generator(module, last_bug_date):
|
def lp_bug_generator(module, modified_since):
|
||||||
uri = LP_URI_DEVEL % (module + '?ws.op=searchTasks')
|
uri = LP_URI_DEVEL % (module + '?ws.op=searchTasks')
|
||||||
for status in BUG_STATUSES:
|
for status in BUG_STATUSES:
|
||||||
uri += '&status=' + six.moves.urllib.parse.quote_plus(status)
|
uri += '&status=' + six.moves.urllib.parse.quote_plus(status)
|
||||||
if last_bug_date:
|
if modified_since:
|
||||||
uri += '&modified_since=' + last_bug_date
|
uri += '&modified_since=' + utils.timestamp_to_utc_date(modified_since)
|
||||||
|
|
||||||
while uri:
|
while uri:
|
||||||
LOG.debug('Reading chunk from uri %s', uri)
|
LOG.debug('Reading chunk from uri %s', uri)
|
||||||
|
@@ -19,7 +19,6 @@ from oslo.config import cfg
|
|||||||
import psutil
|
import psutil
|
||||||
import six
|
import six
|
||||||
from six.moves.urllib import parse
|
from six.moves.urllib import parse
|
||||||
import time
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from stackalytics.openstack.common import log as logging
|
from stackalytics.openstack.common import log as logging
|
||||||
@@ -78,11 +77,12 @@ def _record_typer(record_iterator, record_type):
|
|||||||
yield record
|
yield record
|
||||||
|
|
||||||
|
|
||||||
def process_repo(repo, runtime_storage_inst, record_processor_inst,
|
def _process_repo(repo, runtime_storage_inst, record_processor_inst,
|
||||||
last_bug_date):
|
bug_modified_since):
|
||||||
uri = repo['uri']
|
uri = repo['uri']
|
||||||
LOG.debug('Processing repo uri %s' % uri)
|
LOG.info('Processing repo uri: %s', uri)
|
||||||
|
|
||||||
|
LOG.debug('Processing blueprints for repo uri: %s', uri)
|
||||||
bp_iterator = lp.log(repo)
|
bp_iterator = lp.log(repo)
|
||||||
bp_iterator_typed = _record_typer(bp_iterator, 'bp')
|
bp_iterator_typed = _record_typer(bp_iterator, 'bp')
|
||||||
processed_bp_iterator = record_processor_inst.process(
|
processed_bp_iterator = record_processor_inst.process(
|
||||||
@@ -90,7 +90,8 @@ def process_repo(repo, runtime_storage_inst, record_processor_inst,
|
|||||||
runtime_storage_inst.set_records(processed_bp_iterator,
|
runtime_storage_inst.set_records(processed_bp_iterator,
|
||||||
utils.merge_records)
|
utils.merge_records)
|
||||||
|
|
||||||
bug_iterator = bps.log(repo, last_bug_date)
|
LOG.debug('Processing bugs for repo uri: %s', uri)
|
||||||
|
bug_iterator = bps.log(repo, bug_modified_since)
|
||||||
bug_iterator_typed = _record_typer(bug_iterator, 'bug')
|
bug_iterator_typed = _record_typer(bug_iterator, 'bug')
|
||||||
processed_bug_iterator = record_processor_inst.process(
|
processed_bug_iterator = record_processor_inst.process(
|
||||||
bug_iterator_typed)
|
bug_iterator_typed)
|
||||||
@@ -110,7 +111,7 @@ def process_repo(repo, runtime_storage_inst, record_processor_inst,
|
|||||||
branches.add(release['branch'])
|
branches.add(release['branch'])
|
||||||
|
|
||||||
for branch in branches:
|
for branch in branches:
|
||||||
LOG.debug('Processing repo %s, branch %s', uri, branch)
|
LOG.debug('Processing commits in repo: %s, branch: %s', uri, branch)
|
||||||
|
|
||||||
vcs_key = 'vcs:' + str(parse.quote_plus(uri) + ':' + branch)
|
vcs_key = 'vcs:' + str(parse.quote_plus(uri) + ':' + branch)
|
||||||
last_id = runtime_storage_inst.get_by_key(vcs_key)
|
last_id = runtime_storage_inst.get_by_key(vcs_key)
|
||||||
@@ -125,7 +126,7 @@ def process_repo(repo, runtime_storage_inst, record_processor_inst,
|
|||||||
last_id = vcs_inst.get_last_id(branch)
|
last_id = vcs_inst.get_last_id(branch)
|
||||||
runtime_storage_inst.set_by_key(vcs_key, last_id)
|
runtime_storage_inst.set_by_key(vcs_key, last_id)
|
||||||
|
|
||||||
LOG.debug('Processing reviews for repo %s, branch %s', uri, branch)
|
LOG.debug('Processing reviews for repo: %s, branch: %s', uri, branch)
|
||||||
|
|
||||||
rcs_key = 'rcs:' + str(parse.quote_plus(uri) + ':' + branch)
|
rcs_key = 'rcs:' + str(parse.quote_plus(uri) + ':' + branch)
|
||||||
last_id = runtime_storage_inst.get_by_key(rcs_key)
|
last_id = runtime_storage_inst.get_by_key(rcs_key)
|
||||||
@@ -141,7 +142,7 @@ def process_repo(repo, runtime_storage_inst, record_processor_inst,
|
|||||||
runtime_storage_inst.set_by_key(rcs_key, last_id)
|
runtime_storage_inst.set_by_key(rcs_key, last_id)
|
||||||
|
|
||||||
|
|
||||||
def process_mail_list(uri, runtime_storage_inst, record_processor_inst):
|
def _process_mail_list(uri, runtime_storage_inst, record_processor_inst):
|
||||||
mail_iterator = mls.log(uri, runtime_storage_inst)
|
mail_iterator = mls.log(uri, runtime_storage_inst)
|
||||||
mail_iterator_typed = _record_typer(mail_iterator, 'email')
|
mail_iterator_typed = _record_typer(mail_iterator, 'email')
|
||||||
processed_mail_iterator = record_processor_inst.process(
|
processed_mail_iterator = record_processor_inst.process(
|
||||||
@@ -149,7 +150,7 @@ def process_mail_list(uri, runtime_storage_inst, record_processor_inst):
|
|||||||
runtime_storage_inst.set_records(processed_mail_iterator)
|
runtime_storage_inst.set_records(processed_mail_iterator)
|
||||||
|
|
||||||
|
|
||||||
def process_member_list(uri, runtime_storage_inst, record_processor_inst):
|
def _process_member_list(uri, runtime_storage_inst, record_processor_inst):
|
||||||
member_iterator = mps.log(uri, runtime_storage_inst,
|
member_iterator = mps.log(uri, runtime_storage_inst,
|
||||||
cfg.CONF.days_to_update_members)
|
cfg.CONF.days_to_update_members)
|
||||||
member_iterator_typed = _record_typer(member_iterator, 'member')
|
member_iterator_typed = _record_typer(member_iterator, 'member')
|
||||||
@@ -161,26 +162,38 @@ def process_member_list(uri, runtime_storage_inst, record_processor_inst):
|
|||||||
def update_members(runtime_storage_inst, record_processor_inst):
|
def update_members(runtime_storage_inst, record_processor_inst):
|
||||||
member_lists = runtime_storage_inst.get_by_key('member_lists') or []
|
member_lists = runtime_storage_inst.get_by_key('member_lists') or []
|
||||||
for member_list in member_lists:
|
for member_list in member_lists:
|
||||||
process_member_list(member_list, runtime_storage_inst,
|
_process_member_list(member_list, runtime_storage_inst,
|
||||||
record_processor_inst)
|
record_processor_inst)
|
||||||
|
|
||||||
|
|
||||||
def update_records(runtime_storage_inst, record_processor_inst):
|
def _post_process_records(record_processor_inst, repos):
|
||||||
|
LOG.debug('Build release index')
|
||||||
|
release_index = {}
|
||||||
|
for repo in repos:
|
||||||
|
vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root)
|
||||||
|
release_index.update(vcs_inst.fetch())
|
||||||
|
|
||||||
|
LOG.debug('Post-process all records')
|
||||||
|
record_processor_inst.post_processing(release_index)
|
||||||
|
|
||||||
|
|
||||||
|
def process(runtime_storage_inst, record_processor_inst):
|
||||||
repos = utils.load_repos(runtime_storage_inst)
|
repos = utils.load_repos(runtime_storage_inst)
|
||||||
|
|
||||||
current_date = utils.timestamp_to_utc_date(int(time.time()))
|
current_date = utils.date_to_timestamp('now')
|
||||||
last_bug_date = runtime_storage_inst.get_by_key('last_bug_date')
|
bug_modified_since = runtime_storage_inst.get_by_key('bug_modified_since')
|
||||||
for repo in repos:
|
for repo in repos:
|
||||||
process_repo(repo, runtime_storage_inst, record_processor_inst,
|
_process_repo(repo, runtime_storage_inst, record_processor_inst,
|
||||||
last_bug_date)
|
bug_modified_since)
|
||||||
runtime_storage_inst.set_by_key('last_bug_date', current_date)
|
runtime_storage_inst.set_by_key('bug_modified_since', current_date)
|
||||||
|
|
||||||
|
LOG.info('Processing mail lists')
|
||||||
mail_lists = runtime_storage_inst.get_by_key('mail_lists') or []
|
mail_lists = runtime_storage_inst.get_by_key('mail_lists') or []
|
||||||
for mail_list in mail_lists:
|
for mail_list in mail_lists:
|
||||||
process_mail_list(mail_list, runtime_storage_inst,
|
_process_mail_list(mail_list, runtime_storage_inst,
|
||||||
record_processor_inst)
|
record_processor_inst)
|
||||||
|
|
||||||
record_processor_inst.update()
|
_post_process_records(record_processor_inst, repos)
|
||||||
|
|
||||||
|
|
||||||
def apply_corrections(uri, runtime_storage_inst):
|
def apply_corrections(uri, runtime_storage_inst):
|
||||||
@@ -290,9 +303,7 @@ def main():
|
|||||||
LOG.critical('Unable to load default data')
|
LOG.critical('Unable to load default data')
|
||||||
return not 0
|
return not 0
|
||||||
default_data_processor.process(runtime_storage_inst,
|
default_data_processor.process(runtime_storage_inst,
|
||||||
default_data,
|
default_data)
|
||||||
cfg.CONF.sources_root,
|
|
||||||
cfg.CONF.force_update)
|
|
||||||
|
|
||||||
process_program_list(runtime_storage_inst, cfg.CONF.program_list_uri)
|
process_program_list(runtime_storage_inst, cfg.CONF.program_list_uri)
|
||||||
|
|
||||||
@@ -301,7 +312,7 @@ def main():
|
|||||||
record_processor_inst = record_processor.RecordProcessor(
|
record_processor_inst = record_processor.RecordProcessor(
|
||||||
runtime_storage_inst)
|
runtime_storage_inst)
|
||||||
|
|
||||||
update_records(runtime_storage_inst, record_processor_inst)
|
process(runtime_storage_inst, record_processor_inst)
|
||||||
|
|
||||||
apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst)
|
apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst)
|
||||||
|
|
||||||
|
@@ -542,6 +542,8 @@ class RecordProcessor(object):
|
|||||||
yield record
|
yield record
|
||||||
|
|
||||||
def _update_commits_with_merge_date(self):
|
def _update_commits_with_merge_date(self):
|
||||||
|
LOG.debug('Update commits with merge date')
|
||||||
|
|
||||||
change_id_to_date = {}
|
change_id_to_date = {}
|
||||||
for record in self.runtime_storage_inst.get_all_records():
|
for record in self.runtime_storage_inst.get_all_records():
|
||||||
if (record['record_type'] == 'review' and
|
if (record['record_type'] == 'review' and
|
||||||
@@ -728,13 +730,46 @@ class RecordProcessor(object):
|
|||||||
for processed in self._close_patch(cores, marks_patch['marks']):
|
for processed in self._close_patch(cores, marks_patch['marks']):
|
||||||
yield processed
|
yield processed
|
||||||
|
|
||||||
def update(self, release_index=None):
|
def _update_members_company_name(self):
|
||||||
|
LOG.debug('Update members with company names')
|
||||||
|
|
||||||
|
for record in self.runtime_storage_inst.get_all_records():
|
||||||
|
if record['record_type'] != 'member':
|
||||||
|
continue
|
||||||
|
|
||||||
|
company_draft = record['company_draft']
|
||||||
|
company_name = self.domains_index.get(
|
||||||
|
utils.normalize_company_name(company_draft)) or (
|
||||||
|
utils.normalize_company_draft(company_draft))
|
||||||
|
|
||||||
|
if company_name == record['company_name']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
LOG.debug('Update record %s, company name changed to %s',
|
||||||
|
record, company_name)
|
||||||
|
record['company_name'] = company_name
|
||||||
|
|
||||||
|
yield record
|
||||||
|
|
||||||
|
user = utils.load_user(self.runtime_storage_inst,
|
||||||
|
record['user_id'])
|
||||||
|
LOG.debug('Update user %s, company name changed to %s',
|
||||||
|
user, company_name)
|
||||||
|
user['companies'] = [{
|
||||||
|
'company_name': company_name,
|
||||||
|
'end_date': 0,
|
||||||
|
}]
|
||||||
|
utils.store_user(self.runtime_storage_inst, user)
|
||||||
|
|
||||||
|
def post_processing(self, release_index):
|
||||||
self.runtime_storage_inst.set_records(
|
self.runtime_storage_inst.set_records(
|
||||||
self._update_records_with_user_info())
|
self._update_records_with_user_info())
|
||||||
|
|
||||||
if release_index:
|
self.runtime_storage_inst.set_records(
|
||||||
self.runtime_storage_inst.set_records(
|
self._update_commits_with_merge_date())
|
||||||
self._update_records_with_releases(release_index))
|
|
||||||
|
self.runtime_storage_inst.set_records(
|
||||||
|
self._update_records_with_releases(release_index))
|
||||||
|
|
||||||
self.runtime_storage_inst.set_records(
|
self.runtime_storage_inst.set_records(
|
||||||
self._update_reviews_with_sequence_number())
|
self._update_reviews_with_sequence_number())
|
||||||
@@ -742,11 +777,11 @@ class RecordProcessor(object):
|
|||||||
self.runtime_storage_inst.set_records(
|
self.runtime_storage_inst.set_records(
|
||||||
self._update_blueprints_with_mention_info())
|
self._update_blueprints_with_mention_info())
|
||||||
|
|
||||||
self.runtime_storage_inst.set_records(
|
|
||||||
self._update_commits_with_merge_date())
|
|
||||||
|
|
||||||
self._determine_core_contributors()
|
self._determine_core_contributors()
|
||||||
|
|
||||||
# disagreement calculation must go after determining core contributors
|
# disagreement calculation must go after determining core contributors
|
||||||
self.runtime_storage_inst.set_records(
|
self.runtime_storage_inst.set_records(
|
||||||
self._update_marks_with_disagreement())
|
self._update_marks_with_disagreement())
|
||||||
|
|
||||||
|
self.runtime_storage_inst.set_records(
|
||||||
|
self._update_members_company_name())
|
||||||
|
@@ -782,7 +782,7 @@ class TestRecordProcessor(testtools.TestCase):
|
|||||||
'module': 'nova', 'branch': 'master'}
|
'module': 'nova', 'branch': 'master'}
|
||||||
]))
|
]))
|
||||||
|
|
||||||
record_processor_inst.update()
|
record_processor_inst.post_processing({})
|
||||||
|
|
||||||
user = {'seq': 2,
|
user = {'seq': 2,
|
||||||
'core': [],
|
'core': [],
|
||||||
@@ -853,7 +853,7 @@ class TestRecordProcessor(testtools.TestCase):
|
|||||||
}]}
|
}]}
|
||||||
]))
|
]))
|
||||||
|
|
||||||
record_processor_inst.update()
|
record_processor_inst.post_processing({})
|
||||||
|
|
||||||
user_1 = {'seq': 1, 'user_id': 'john_doe',
|
user_1 = {'seq': 1, 'user_id': 'john_doe',
|
||||||
'launchpad_id': 'john_doe', 'user_name': 'John Doe',
|
'launchpad_id': 'john_doe', 'user_name': 'John Doe',
|
||||||
@@ -942,7 +942,7 @@ class TestRecordProcessor(testtools.TestCase):
|
|||||||
'date': 1234567895,
|
'date': 1234567895,
|
||||||
'blueprint_id': ['mod:blueprint', 'mod:invalid']},
|
'blueprint_id': ['mod:blueprint', 'mod:invalid']},
|
||||||
]))
|
]))
|
||||||
record_processor_inst.update()
|
record_processor_inst.post_processing({})
|
||||||
|
|
||||||
bp1 = runtime_storage_inst.get_by_primary_key('bpd:mod:blueprint')
|
bp1 = runtime_storage_inst.get_by_primary_key('bpd:mod:blueprint')
|
||||||
self.assertEqual(2, bp1['mention_count'])
|
self.assertEqual(2, bp1['mention_count'])
|
||||||
@@ -978,7 +978,7 @@ class TestRecordProcessor(testtools.TestCase):
|
|||||||
'createdOn': 5,
|
'createdOn': 5,
|
||||||
'module': 'glance', 'branch': 'master'},
|
'module': 'glance', 'branch': 'master'},
|
||||||
]))
|
]))
|
||||||
record_processor_inst.update()
|
record_processor_inst.post_processing({})
|
||||||
|
|
||||||
review1 = runtime_storage_inst.get_by_primary_key('I111')
|
review1 = runtime_storage_inst.get_by_primary_key('I111')
|
||||||
self.assertEqual(2, review1['review_number'])
|
self.assertEqual(2, review1['review_number'])
|
||||||
@@ -1065,7 +1065,7 @@ class TestRecordProcessor(testtools.TestCase):
|
|||||||
}
|
}
|
||||||
]}
|
]}
|
||||||
]))
|
]))
|
||||||
record_processor_inst.update()
|
record_processor_inst.post_processing({})
|
||||||
|
|
||||||
marks = list([r for r in runtime_storage_inst.get_all_records()
|
marks = list([r for r in runtime_storage_inst.get_all_records()
|
||||||
if r['record_type'] == 'mark'])
|
if r['record_type'] == 'mark'])
|
||||||
@@ -1110,7 +1110,7 @@ class TestRecordProcessor(testtools.TestCase):
|
|||||||
'status': 'MERGED',
|
'status': 'MERGED',
|
||||||
'module': 'nova', 'branch': 'master'},
|
'module': 'nova', 'branch': 'master'},
|
||||||
]))
|
]))
|
||||||
record_processor_inst.update()
|
record_processor_inst.post_processing({})
|
||||||
|
|
||||||
commit = runtime_storage_inst.get_by_primary_key('de7e8f2')
|
commit = runtime_storage_inst.get_by_primary_key('de7e8f2')
|
||||||
self.assertEqual(1385490000, commit['date'])
|
self.assertEqual(1385490000, commit['date'])
|
||||||
|
Reference in New Issue
Block a user