From 3c9bed12b3b3f32e5f53c736623c7343d397a85a Mon Sep 17 00:00:00 2001 From: Sergey Lukjanov Date: Fri, 14 Mar 2014 15:20:47 +0400 Subject: [PATCH] Add support for module aliases, step #1 It adds support for module aliases in the mls processing. Additionally, it adds alias 'savanna' for 'sahara' module. Change-Id: Ifb02910956a7c1a8937063af9eb9e48031f06ee4 --- etc/default_data.json | 3 ++ etc/default_data.schema.json | 8 ++++- etc/test_default_data.json | 2 +- stackalytics/processor/record_processor.py | 36 ++++++++++++++-------- tests/unit/test_record_processor.py | 18 +++++++++-- 5 files changed, 50 insertions(+), 17 deletions(-) diff --git a/etc/default_data.json b/etc/default_data.json index 00d82e0dd..afc7e3824 100644 --- a/etc/default_data.json +++ b/etc/default_data.json @@ -6328,6 +6328,9 @@ }, { "module": "sahara", + "aliases": [ + "savanna" + ], "uri": "git://github.com/openstack/sahara.git", "organization": "openstack", "releases": [ diff --git a/etc/default_data.schema.json b/etc/default_data.schema.json index 1ad6fd8d4..03692f2ca 100644 --- a/etc/default_data.schema.json +++ b/etc/default_data.schema.json @@ -100,6 +100,12 @@ }, "required": ["tag_from", "tag_to", "release_name"] } + }, + "aliases": { + "type": "array", + "items": { + "type": "string" + } } }, "required": ["uri", "module", "organization"], @@ -203,4 +209,4 @@ } } } -} \ No newline at end of file +} diff --git a/etc/test_default_data.json b/etc/test_default_data.json index 0be094a2b..597766a77 100644 --- a/etc/test_default_data.json +++ b/etc/test_default_data.json @@ -183,4 +183,4 @@ } ] -} \ No newline at end of file +} diff --git a/stackalytics/processor/record_processor.py b/stackalytics/processor/record_processor.py index 8e6a75615..2b0469fba 100644 --- a/stackalytics/processor/record_processor.py +++ b/stackalytics/processor/record_processor.py @@ -37,6 +37,7 @@ class RecordProcessor(object): self.releases_dates = [r['end_date'] for r in self.releases] self.modules = None + self.alias_module_map = None def _get_release(self, timestamp): release_index = bisect.bisect(self.releases_dates, timestamp) @@ -45,20 +46,28 @@ class RecordProcessor(object): def _get_modules(self): if self.modules is None: self.modules = set() + self.alias_module_map = dict() + for repo in utils.load_repos(self.runtime_storage_inst): module = repo['module'].lower() - add = True - for m in self.modules: - if module.find(m) >= 0: - add = False - break - if m.find(module) >= 0: - self.modules.remove(m) - break - if add: - self.modules.add(module) + module_aliases = filter(str.lower, repo.get('aliases') or []) - return self.modules + add = True + for module_name in ([module] + module_aliases): + for m in self.modules: + if module_name.find(m) >= 0: + add = False + break + if m.find(module_name) >= 0: + self.modules.remove(m) + break + if add: + self.modules.add(module_name) + + for alias in module_aliases: + self.alias_module_map[alias] = module + + return self.modules, self.alias_module_map def _find_company(self, companies, date): for r in companies: @@ -328,7 +337,8 @@ class RecordProcessor(object): pos = len(subject) best_guess_module = None - for module in self._get_modules(): + modules, alias_module_map = self._get_modules() + for module in modules: find = subject.find(module) if (find >= 0) and (find < pos): pos = find @@ -341,6 +351,8 @@ class RecordProcessor(object): if not record.get('module'): record['module'] = 'unknown' + elif record['module'] in alias_module_map: + record['module'] = alias_module_map[record['module']] def _process_email(self, record): record['primary_key'] = record['message_id'] diff --git a/tests/unit/test_record_processor.py b/tests/unit/test_record_processor.py index 9440bf3c5..e771128b1 100644 --- a/tests/unit/test_record_processor.py +++ b/tests/unit/test_record_processor.py @@ -1138,9 +1138,21 @@ class TestRecordProcessor(testtools.TestCase): with mock.patch('stackalytics.processor.utils.load_repos') as patch: patch.return_value = [{'module': 'nova'}, {'module': 'python-novaclient'}, - {'module': 'neutron'}] - modules = record_processor_inst._get_modules() - self.assertEqual(set(['nova', 'neutron']), set(modules)) + {'module': 'neutron'}, + {'module': 'sahara', 'aliases': ['savanna']}] + modules, module_alias_map = record_processor_inst._get_modules() + self.assertEqual(set(['nova', 'neutron', 'sahara', 'savanna']), + set(modules)) + self.assertEqual({'savanna': 'sahara'}, module_alias_map) + + def test_guess_module(self): + record_processor_inst = self.make_record_processor() + with mock.patch('stackalytics.processor.utils.load_repos') as patch: + patch.return_value = [{'module': 'sahara', 'aliases': ['savanna']}] + record = {'subject': '[savanna] T'} + record_processor_inst._guess_module(record) + self.assertEqual({'subject': '[savanna] T', 'module': 'sahara'}, + record) def assertRecordsMatch(self, expected, actual): for key, value in six.iteritems(expected):