From 5481e7796b7b098455ccf1a74a59d049197274bb Mon Sep 17 00:00:00 2001 From: Ilya Shakhat Date: Mon, 28 Apr 2014 18:52:39 +0400 Subject: [PATCH] Optimize memory consumption in dashboard * Store records with fields required for aggregation * Load records from runtime storage when details are needed * Refactoring of activity summary report Part of blueprint memory-optimizations Change-Id: I87c14ef0ad9583632cb2fcba4af6d43a0d3ec0b1 --- dashboard/helpers.py | 5 +- dashboard/reports.py | 47 ++++++++++++------- dashboard/templates/reports/contribution.html | 8 ++-- dashboard/vault.py | 24 +++++++++- dashboard/web.py | 8 ++-- stackalytics/processor/record_processor.py | 1 - tests/api/test_api.py | 2 +- 7 files changed, 63 insertions(+), 32 deletions(-) diff --git a/dashboard/helpers.py b/dashboard/helpers.py index 959eba6bb..a9d4012ba 100644 --- a/dashboard/helpers.py +++ b/dashboard/helpers.py @@ -53,14 +53,14 @@ def _extend_record_common_fields(record): def _extend_by_parent_info(record, parent): - parent = parent.copy() + parent = vault.extend_record(parent) _extend_record_common_fields(parent) for k, v in six.iteritems(parent): record['parent_' + k] = v def extend_record(record): - record = record.copy() + record = vault.extend_record(record) _extend_record_common_fields(record) if record['record_type'] == 'commit': @@ -120,6 +120,7 @@ def extend_user(user): def get_activity(records, start_record, page_size, query_message=None): if query_message: + records = [vault.extend_record(r) for r in records] records = [r for r in records if (r.get('message') and r.get('message').find(query_message) > 0)] diff --git a/dashboard/reports.py b/dashboard/reports.py index c88ac45ff..c301c417e 100644 --- a/dashboard/reports.py +++ b/dashboard/reports.py @@ -105,7 +105,7 @@ def open_reviews(module): if review['status'] == 'NEW': total_open += 1 if review['value'] in [1, 2]: - waiting_on_reviewer.append(review) + waiting_on_reviewer.append(vault.extend_record(review)) return { 'module': module, @@ -167,6 +167,28 @@ def _get_punch_card_data(records): return json.dumps(punch_card_data) +def _get_activity_summary(record_ids): + memory_storage_inst = vault.get_memory_storage() + + types = ['mark', 'patch', 'email', 'bpd', 'bpc', 'commit'] + record_ids_by_type = set() + for t in types: + record_ids_by_type |= memory_storage_inst.get_record_ids_by_type(t) + + record_ids &= record_ids_by_type + contribution_summary = helpers.get_contribution_summary( + memory_storage_inst.get_records(record_ids)) + + record_ids -= memory_storage_inst.get_record_ids_by_type('commit') + punch_card_data = _get_punch_card_data( + memory_storage_inst.get_records(record_ids)) + + return { + 'contribution': contribution_summary, + 'punch_card_data': punch_card_data, + } + + @blueprint.route('/users/') @decorators.templated() @decorators.exception_handler() @@ -177,16 +199,11 @@ def user_activity(user_id): user = helpers.extend_user(user) memory_storage_inst = vault.get_memory_storage() - records = memory_storage_inst.get_records( + result = _get_activity_summary( memory_storage_inst.get_record_ids_by_user_ids([user_id])) - records = sorted(records, key=operator.itemgetter('date'), reverse=True) + result['user'] = user - return { - 'user': user, - 'total_records': len(records), - 'contribution': helpers.get_contribution_summary(records), - 'punch_card_data': _get_punch_card_data(records), - } + return result @blueprint.route('/companies/') @@ -196,17 +213,11 @@ def company_activity(company): memory_storage_inst = vault.get_memory_storage() original_name = memory_storage_inst.get_original_company_name(company) - memory_storage_inst = vault.get_memory_storage() - records = memory_storage_inst.get_records( + result = _get_activity_summary( memory_storage_inst.get_record_ids_by_companies([original_name])) - records = sorted(records, key=operator.itemgetter('date'), reverse=True) + result['company_name'] = original_name - return { - 'company_name': original_name, - 'total_records': len(records), - 'contribution': helpers.get_contribution_summary(records), - 'punch_card_data': _get_punch_card_data(records), - } + return result @blueprint.route('/activity') diff --git a/dashboard/templates/reports/contribution.html b/dashboard/templates/reports/contribution.html index b0c0b821c..f374382ec 100644 --- a/dashboard/templates/reports/contribution.html +++ b/dashboard/templates/reports/contribution.html @@ -33,7 +33,7 @@ Contribution into {{ module }} for the last {{ days }} days 'core_reviewers': 0, 'commits': 0, 'reviews': 0, - 'patch_count': 0, + 'patch': 0, 'emails': 0 }; @@ -57,11 +57,11 @@ Contribution into {{ module }} for the last {{ days }} days if (tableData[i].mark > 0) { summary.reviewers ++; } - tableData[i].review_ratio = tableData[i].review + " / " + tableData[i].patch_count; + tableData[i].review_ratio = tableData[i].review + " / " + tableData[i].patch; summary.marks += tableData[i].mark; summary.commits += tableData[i].commit; summary.reviews += tableData[i].review; - summary.patch_count += tableData[i].patch_count; + summary.patch += tableData[i].patch; summary.emails += tableData[i].email; } @@ -111,7 +111,7 @@ Contribution into {{ module }} for the last {{ days }} days
Core team size: ${core_reviewers} (${(core_marks / core_reviewers / {{ days }}).toFixed(1) } per core per day)

Contribution Summary

On review: ${reviews} (${(reviews / {{ days }}).toFixed(1) } per day)
-
Patch sets: ${patch_count} (${(patch_count / {{ days }}).toFixed(1) } per day)
+
Patch sets: ${patch} (${(patch / {{ days }}).toFixed(1) } per day)
Commits: ${commits} (${(commits / {{ days }}).toFixed(1) } per day)
Emails: ${emails} (${(emails / {{ days }}).toFixed(1) } per day)
diff --git a/dashboard/vault.py b/dashboard/vault.py index 65d20a3b1..c01daa7fb 100644 --- a/dashboard/vault.py +++ b/dashboard/vault.py @@ -28,6 +28,28 @@ from stackalytics.processor import utils LOG = logging.getLogger(__name__) +RECORD_FIELDS_FOR_AGGREGATE = ['record_id', 'primary_key', 'record_type', + 'company_name', 'module', 'user_id', 'release', + 'date', 'week', 'author_name', 'loc', 'type', + 'x', 'value', 'status', 'blueprint_id'] + + +def compact_records(records): + for record in records: + compact = dict([(k, record[k]) for k in RECORD_FIELDS_FOR_AGGREGATE + if k in record]) + yield compact + + if 'blueprint_id' in compact: + del compact['blueprint_id'] + + +def extend_record(record): + runtime_storage_inst = get_vault()['runtime_storage'] + return runtime_storage_inst.get_by_key( + runtime_storage_inst._get_record_name(record['record_id'])) + + def get_vault(): vault = getattr(flask.current_app, 'stackalytics_vault', None) if not vault: @@ -51,7 +73,7 @@ def get_vault(): flask.request.stackalytics_updated = True memory_storage_inst = vault['memory_storage'] have_updates = memory_storage_inst.update( - vault['runtime_storage'].get_update(os.getpid())) + compact_records(vault['runtime_storage'].get_update(os.getpid()))) if have_updates: _init_releases(vault) diff --git a/dashboard/web.py b/dashboard/web.py index b7c29f0fe..42e85cf52 100644 --- a/dashboard/web.py +++ b/dashboard/web.py @@ -162,7 +162,7 @@ def get_engineers_extended(records): record = decorators.mark_finalize(record) if not (record['mark'] or record['review'] or record['commit'] or - record['email'] or record['patch_count']): + record['email'] or record['patch']): return user = vault.get_user_from_runtime_storage(record['id']) @@ -176,16 +176,13 @@ def get_engineers_extended(records): result_row[record_type] = result_row.get(record_type, 0) + 1 if record_type == 'mark': decorators.mark_filter(result, record, param_id) - if record_type == 'review': - result_row['patch_count'] = (result_row.get('patch_count', 0) + - record['patch_count']) result = {} for record in records: user_id = record['user_id'] if user_id not in result: result[user_id] = {'id': user_id, 'mark': 0, 'review': 0, - 'commit': 0, 'email': 0, 'patch_count': 0, + 'commit': 0, 'email': 0, 'patch': 0, 'metric': 0} record_processing(result, record, 'user_id') result[user_id]['name'] = record['author_name'] @@ -342,6 +339,7 @@ def get_bpd(records): result = [] for record in records: if record['record_type'] in ['bpd', 'bpc']: + record = vault.extend_record(record) mention_date = record.get('mention_date') if mention_date: date = helpers.format_date(mention_date) diff --git a/stackalytics/processor/record_processor.py b/stackalytics/processor/record_processor.py index c9f2872ee..b1ced9a68 100644 --- a/stackalytics/processor/record_processor.py +++ b/stackalytics/processor/record_processor.py @@ -270,7 +270,6 @@ class RecordProcessor(object): patch_sets = record.get('patchSets', []) review['updated_on'] = review['date'] - review['patch_count'] = len(patch_sets) if patch_sets: patch = patch_sets[-1] if 'approvals' in patch: diff --git a/tests/api/test_api.py b/tests/api/test_api.py index eb57cdae1..7c598f614 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -144,7 +144,7 @@ def _generate_review(): 'branch': 'master', 'launchpad_id': 'john_doe', 'lastUpdated': 1387865203, 'author_name': 'John Doe', 'date': 1386547707, - 'url': 'https://review.openstack.org/60721', 'patch_count': 2, + 'url': 'https://review.openstack.org/60721', 'sortKey': '0029f92e0000ed31', 'project': 'openstack/glance', 'week': 2292, 'release': 'icehouse', 'updated_on': 1387865147 }