diff --git a/stackalytics/processor/main.py b/stackalytics/processor/main.py index fe822a50b..6e16ec4a4 100644 --- a/stackalytics/processor/main.py +++ b/stackalytics/processor/main.py @@ -82,7 +82,8 @@ def process_repo(repo, runtime_storage_inst, record_processor_inst): bp_iterator_typed = _record_typer(bp_iterator, 'bp') processed_bp_iterator = record_processor_inst.process( bp_iterator_typed) - runtime_storage_inst.set_records(processed_bp_iterator) + runtime_storage_inst.set_records(processed_bp_iterator, + utils.merge_records) vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root) vcs_inst.fetch() @@ -116,7 +117,8 @@ def process_repo(repo, runtime_storage_inst, record_processor_inst): review_iterator_typed = _record_typer(review_iterator, 'review') processed_review_iterator = record_processor_inst.process( review_iterator_typed) - runtime_storage_inst.set_records(processed_review_iterator) + runtime_storage_inst.set_records(processed_review_iterator, + utils.merge_records) last_id = rcs_inst.get_last_id(branch) runtime_storage_inst.set_by_key(rcs_key, last_id) diff --git a/stackalytics/processor/utils.py b/stackalytics/processor/utils.py index 280c01ecd..37ec6e4ec 100644 --- a/stackalytics/processor/utils.py +++ b/stackalytics/processor/utils.py @@ -108,3 +108,12 @@ def format_text(s): s = cgi.escape(re.sub(re.compile('\n{2,}', flags=re.MULTILINE), '\n', s)) s = re.sub(r'([/\/]+)', r'\1​', s) return s + + +def merge_records(original, new): + need_update = False + for key, value in new.iteritems(): + if original.get(key) != value: + need_update = True + original[key] = value + return need_update