diff --git a/stackalytics/processor/openstackid_utils.py b/stackalytics/processor/openstackid_utils.py new file mode 100644 index 000000000..93ed08368 --- /dev/null +++ b/stackalytics/processor/openstackid_utils.py @@ -0,0 +1,93 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import logging + +import requests + +from stackalytics.processor import utils + + +LOG = logging.getLogger(__name__) + +OSID_URI = ('https://openstackid-resources.openstack.org/' + 'api/public/v1/members?' + 'filter=email==%s&relations=all_affiliations') +INTERVAL_GAP_THRESHOLD = 60 * 60 * 24 # ignore gaps shorter than this + +_openstackid_session = requests.Session() + + +def _openstack_profile_by_email(email): + LOG.debug('Lookup user email %s at OpenStackID', email) + uri = OSID_URI % email + data = utils.read_json_from_uri(uri, session=_openstackid_session) + + if not data: + return None + + if not data.get('data'): + return None # not found + + return data['data'][-1] # return the last (most recent) record + + +Interval = collections.namedtuple('Interval', ['start', 'end', 'value']) + + +def _iterate_intervals(intervals, threshold=INTERVAL_GAP_THRESHOLD): + """Iterate intervals and fill gaps around of them + + :param intervals: list of Interval objects + :param threshold: do not yield intervals shorted than threshold + """ + if not intervals: + yield Interval(0, 0, None) + else: + intervals.sort(key=lambda x: x.start) + + prev_start = 0 + + for interval in intervals: + if interval.start and interval.start - prev_start > threshold: + yield Interval(prev_start, interval.start, None) # prior + + yield interval + + prev_start = interval.end + + last_end = intervals[-1].end + if last_end: + yield Interval(last_end, 0, None) + + +def user_profile_by_email(email): + data = _openstack_profile_by_email(email) + + if not data: # user is not found + return None + + intervals = [Interval(a.get('start_date'), a.get('end_date') or 0, + a.get('organization', {}).get('name')) + for a in data.get('affiliations', [])] + companies = [dict(company_name=interval.value or '*independent', + end_date=interval.end) + for interval in _iterate_intervals(intervals)] + user = { + 'openstack_id': data['id'], + 'user_name': ' '.join([data.get('first_name'), data.get('last_name')]), + 'emails': [email], + 'companies': companies, + } + return user diff --git a/stackalytics/tests/unit/test_openstackid_utils.py b/stackalytics/tests/unit/test_openstackid_utils.py new file mode 100644 index 000000000..a64172658 --- /dev/null +++ b/stackalytics/tests/unit/test_openstackid_utils.py @@ -0,0 +1,255 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +import testtools + +from stackalytics.processor import openstackid_utils as ou + +USER_PROFILE = { + "total": 1, + "data": [ + { + "id": 5555, + "first_name": "John", + "last_name": "Smith", + "pic": "https://www.openstack.org/profile_images/members/5555", + "affiliations": [ + { + "start_date": 1193875200, + "end_date": 1496188800, + "organization": { + "name": "Mirantis" + } + }, + { + "start_date": 1496275200, + "end_date": None, + "organization": { + "name": "Huawei" + } + } + ] + } + ] +} +USER_PROFILE_NO_AFFILIATIONS = { + "total": 1, + "data": [ + { + "id": 5555, + "first_name": "John", + "last_name": "Smith", + "affiliations": [] + } + ] +} +USER_PROFILE_NO_DATES = { + "total": 1, + "data": [ + { + "id": 2222, + "first_name": "John", + "last_name": "Smith", + "affiliations": [ + { + "start_date": None, + "end_date": None, + "is_current": True, + "organization": { + "name": "Hewlett Packard Enterprise" + } + } + ] + } + ] +} +USER_PROFILE_MULTIPLE_RECORDS = { + "total": 1, + "data": [ + { + "id": 2222, + "first_name": "John", + "last_name": "Smith", + "affiliations": [ + { + "start_date": None, + "end_date": None, + "is_current": True, + "organization": { + "name": "Hewlett Packard Enterprise" + } + } + ] + }, + { + "id": 5555, + "first_name": "John", + "last_name": "Smith", + "affiliations": [ + { + "start_date": 1456790400, + "end_date": None, + "organization": { + "name": "Red Hat" + } + } + ] + } + ] +} + + +class TestOpenStackIDUtils(testtools.TestCase): + + def test_iterate_intervals(self): + origin = [ou.Interval(100, 200, 'a'), + ou.Interval(200, 0, 'b')] + expected = [ou.Interval(0, 100, None), + ou.Interval(100, 200, 'a'), + ou.Interval(200, 0, 'b')] + + observed = list(ou._iterate_intervals(origin, threshold=10)) + self.assertEqual(expected, observed) + + def test_iterate_intervals_2(self): + origin = [ou.Interval(100, 200, 'a'), + ou.Interval(300, 400, 'b')] + expected = [ou.Interval(0, 100, None), + ou.Interval(100, 200, 'a'), + ou.Interval(200, 300, None), + ou.Interval(300, 400, 'b'), + ou.Interval(400, 0, None)] + + observed = list(ou._iterate_intervals(origin, threshold=10)) + self.assertEqual(expected, observed) + + @mock.patch('stackalytics.processor.utils.read_json_from_uri') + def test_user_profile_by_email(self, reader_mock): + reader_mock.return_value = USER_PROFILE + email = 'dummy@dummy.org' + + expected = { + 'openstack_id': 5555, + 'user_name': 'John Smith', + 'emails': [email], + 'companies': [{ + 'company_name': '*independent', + 'end_date': 1193875200 + }, { + 'company_name': 'Mirantis', + 'end_date': 1496188800 + }, { + 'company_name': 'Huawei', + 'end_date': 0 + }] + } + + observed = ou.user_profile_by_email(email) + + reader_mock.assert_called_once_with( + ou.OSID_URI % email, session=ou._openstackid_session) + self.assertEqual(expected, observed) + + @mock.patch('stackalytics.processor.utils.read_json_from_uri') + def test_user_profile_by_email_not_affiliated(self, reader_mock): + reader_mock.return_value = USER_PROFILE_NO_AFFILIATIONS + email = 'dummy@dummy.org' + + expected = { + 'openstack_id': 5555, + 'user_name': 'John Smith', + 'emails': [email], + 'companies': [{ + 'company_name': '*independent', + 'end_date': 0 + }] + } + + observed = ou.user_profile_by_email(email) + + reader_mock.assert_called_once_with( + ou.OSID_URI % email, session=ou._openstackid_session) + self.assertEqual(expected, observed) + + @mock.patch('stackalytics.processor.utils.read_json_from_uri') + def test_user_profile_by_email_not_found(self, reader_mock): + reader_mock.return_value = { + "total": 0, + "data": [] + } + email = 'dummy@dummy.org' + + expected = None + observed = ou.user_profile_by_email(email) + + reader_mock.assert_called_once_with( + ou.OSID_URI % email, session=ou._openstackid_session) + self.assertEqual(expected, observed) + + @mock.patch('stackalytics.processor.utils.read_json_from_uri') + def test_user_profile_by_email_not_read(self, reader_mock): + reader_mock.return_value = None + email = 'dummy@dummy.org' + + expected = None + observed = ou.user_profile_by_email(email) + + reader_mock.assert_called_once_with( + ou.OSID_URI % email, session=ou._openstackid_session) + self.assertEqual(expected, observed) + + @mock.patch('stackalytics.processor.utils.read_json_from_uri') + def test_user_profile_by_email_no_dates(self, reader_mock): + reader_mock.return_value = USER_PROFILE_NO_DATES + email = 'dummy@dummy.org' + + expected = { + 'openstack_id': 2222, + 'user_name': 'John Smith', + 'emails': [email], + 'companies': [{ + 'company_name': 'Hewlett Packard Enterprise', + 'end_date': 0 + }] + } + + observed = ou.user_profile_by_email(email) + + reader_mock.assert_called_once_with( + ou.OSID_URI % email, session=ou._openstackid_session) + self.assertEqual(expected, observed) + + @mock.patch('stackalytics.processor.utils.read_json_from_uri') + def test_user_profile_by_email_multiple_records(self, reader_mock): + reader_mock.return_value = USER_PROFILE_MULTIPLE_RECORDS + email = 'dummy@dummy.org' + + expected = { + 'openstack_id': 5555, + 'user_name': 'John Smith', + 'emails': [email], + 'companies': [{ + 'company_name': '*independent', + 'end_date': 1456790400 + }, { + 'company_name': 'Red Hat', + 'end_date': 0 + }] + } + + observed = ou.user_profile_by_email(email) + + reader_mock.assert_called_once_with( + ou.OSID_URI % email, session=ou._openstackid_session) + self.assertEqual(expected, observed)