# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. import copy from oslo_log import log as logging from stackalytics.processor import utils LOG = logging.getLogger(__name__) INDEPENDENT = '*independent' ROBOTS = '*robots' def make_user_id(emails=None, launchpad_id=None, gerrit_tuple=None, member_id=None, github_id=None, zanata_id=None): if launchpad_id or emails: return launchpad_id or emails[0] if gerrit_tuple: return 'gerrit:%s:%s' % gerrit_tuple if member_id: return 'member:%s' % member_id if github_id: return 'github:%s' % github_id if zanata_id: return 'zanata:%s' % zanata_id return None def store_user(runtime_storage_inst, user): if not user.get('seq'): user['seq'] = runtime_storage_inst.inc_user_count() LOG.debug('New user: %s', user) runtime_storage_inst.set_by_key('user:%d' % user['seq'], user) if user.get('user_id'): runtime_storage_inst.set_by_key('user:%s' % user['user_id'], user) if user.get('launchpad_id'): runtime_storage_inst.set_by_key('user:%s' % user['launchpad_id'], user) for hostname, ids in user.get('gerrit_ids', {}).items(): for gerrit_id in ids: runtime_storage_inst.set_by_key( 'user:gerrit:%s:%s' % (hostname, gerrit_id), user) if user.get('github_id'): runtime_storage_inst.set_by_key('user:github:%s' % user['github_id'], user) if user.get('zanata_id'): runtime_storage_inst.set_by_key('user:zanata:%s' % user['zanata_id'], user) for email in user.get('emails') or []: runtime_storage_inst.set_by_key('user:%s' % email, user) def load_user(runtime_storage_inst, seq=None, user_id=None, email=None, launchpad_id=None, gerrit_tuple=None, member_id=None, github_id=None, zanata_id=None): key = make_user_id(gerrit_tuple=gerrit_tuple, member_id=member_id, github_id=github_id, zanata_id=zanata_id) if not key: key = seq or user_id or launchpad_id or email if key: return runtime_storage_inst.get_by_key('user:%s' % key) return None def delete_users(runtime_storage_inst, users): for user in users: LOG.debug('Delete user: %s', user) runtime_storage_inst.delete_by_key('user:%s' % user['seq']) def update_user_profile(stored_user, user): # update stored_user with user and return it if stored_user: updated_user = copy.deepcopy(stored_user) updated_user.update(user) updated_user['emails'] = sorted( list(set(stored_user.get('emails', [])) | set(user.get('emails', []))) ) gerrit_ids = _merge_gerrit_ids([stored_user, user]) if gerrit_ids: updated_user['gerrit_ids'] = gerrit_ids else: updated_user = copy.deepcopy(user) updated_user['static'] = True return updated_user def get_company_for_date(companies, date): for r in companies: if date < r['end_date']: return r['company_name'], 'strict' return companies[-1]['company_name'], 'open' # may be overridden def get_company_by_email(domains_index, email): """Get company based on email domain Automatically maps email domain into company name. Prefers subdomains to root domains. :param domains_index: dict {domain -> company name} :param email: valid email. may be empty :return: company name or None if nothing matches """ if not email: return None name, at, domain = email.partition('@') if domain: parts = domain.split('.') for i in range(len(parts), 1, -1): m = '.'.join(parts[len(parts) - i:]) if m in domains_index: return domains_index[m] return None def create_user(domains_index, launchpad_id, email, gerrit_tuple, zanata_id, user_name): company = get_company_by_email(domains_index, email) or INDEPENDENT emails = [email] if email else [] user = { 'user_id': make_user_id( emails=emails, launchpad_id=launchpad_id, gerrit_tuple=gerrit_tuple, zanata_id=zanata_id), 'launchpad_id': launchpad_id, 'user_name': user_name or '', 'companies': [{ 'company_name': company, 'end_date': 0, }], 'emails': emails, } if gerrit_tuple: user['gerrit_ids'] = { gerrit_tuple[0]: [gerrit_tuple[1]] } if zanata_id: user['zanata_id'] = zanata_id return user def update_user_affiliation(domains_index, user): """Update user affiliation Affiliation is updated only if user is currently independent but makes contribution from company domain. :param domains_index: dict {domain -> company name} :param user: user profile """ for email in user.get('emails'): company_name = get_company_by_email(domains_index, email) uc = user['companies'] if (company_name and (len(uc) == 1) and (uc[0]['company_name'] == INDEPENDENT)): LOG.debug('Updating affiliation of user %s to %s', user['user_id'], company_name) uc[0]['company_name'] = company_name break def _merge_gerrit_ids(users): gerrit_ids = {} hostnames = set() for user in users: hostnames.update(set(user.get('gerrit_ids', {}).keys())) for hostname in hostnames: ids = set() for user in users: ids |= set(user.get('gerrit_ids', {}).get(hostname, [])) if ids: gerrit_ids[hostname] = sorted(list(ids)) return gerrit_ids def merge_user_profiles(domains_index, user_profiles): """Merge user profiles into one The function merges list of user profiles into one figures out which profiles can be deleted. :param domains_index: dict {domain -> company name} :param user_profiles: user profiles to merge :return: tuple (merged user profile, [user profiles to delete]) """ LOG.debug('Merge profiles: %s', user_profiles) # check of there are more than 1 launchpad_id lp_ids = set(u.get('launchpad_id') for u in user_profiles if u.get('launchpad_id')) if len(lp_ids) > 1: LOG.debug('Ambiguous launchpad ids: %s on profiles: %s', lp_ids, user_profiles) merged_user = {} # merged user profile # collect ordinary fields for key in ['seq', 'user_name', 'user_id', 'github_id', 'launchpad_id', 'companies', 'static', 'zanata_id', 'gravatar_email']: value = next((v.get(key) for v in user_profiles if v.get(key)), None) if value: merged_user[key] = value # update user_id, prefer it to be equal to launchpad_id merged_user['user_id'] = (merged_user.get('launchpad_id') or merged_user.get('user_id')) # always preserve `user_name` since its required field if 'user_name' not in merged_user: merged_user['user_name'] = merged_user['user_id'] # merge emails emails = set([]) core_in = set([]) for u in user_profiles: emails |= set(u.get('emails', [])) core_in |= set(u.get('core', [])) merged_user['emails'] = sorted(list(emails)) if core_in: merged_user['core'] = sorted(list(core_in)) gerrit_ids = _merge_gerrit_ids(user_profiles) if gerrit_ids: merged_user['gerrit_ids'] = gerrit_ids # merge companies merged_companies = merged_user['companies'] for u in user_profiles: companies = u.get('companies') if companies: if (companies[0]['company_name'] != INDEPENDENT or len(companies) > 1): merged_companies = companies break merged_user['companies'] = merged_companies update_user_affiliation(domains_index, merged_user) users_to_delete = [] seqs = set(u.get('seq') for u in user_profiles if u.get('seq')) if len(seqs) > 1: # profiles are merged, keep only one, remove others seqs.remove(merged_user['seq']) for u in user_profiles: if u.get('seq') in seqs: users_to_delete.append(u) return merged_user, users_to_delete def are_users_same(users): """True if all users are the same and not Nones""" x = set(u.get('seq') for u in users) return len(x) == 1 and None not in x def resolve_companies_aliases(domains_index, companies): norm_companies = [] prev_company_name = None for c in reversed(companies): company_name = c['company_name'] company_name = (domains_index.get( utils.normalize_company_name(company_name)) or (utils.normalize_company_draft(company_name))) if company_name != prev_company_name: r = copy.deepcopy(c) r['company_name'] = company_name norm_companies.append(r) prev_company_name = company_name return list(reversed(norm_companies))