298 lines
9.5 KiB
Python
298 lines
9.5 KiB
Python
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import copy
|
|
|
|
from oslo_log import log as logging
|
|
|
|
from stackalytics.processor import utils
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
INDEPENDENT = '*independent'
|
|
ROBOTS = '*robots'
|
|
|
|
|
|
def make_user_id(emails=None, launchpad_id=None, gerrit_tuple=None,
|
|
member_id=None, github_id=None, zanata_id=None):
|
|
if launchpad_id or emails:
|
|
return launchpad_id or emails[0]
|
|
if gerrit_tuple:
|
|
return 'gerrit:%s:%s' % gerrit_tuple
|
|
if member_id:
|
|
return 'member:%s' % member_id
|
|
if github_id:
|
|
return 'github:%s' % github_id
|
|
if zanata_id:
|
|
return 'zanata:%s' % zanata_id
|
|
return None
|
|
|
|
|
|
def store_user(runtime_storage_inst, user):
|
|
if not user.get('seq'):
|
|
user['seq'] = runtime_storage_inst.inc_user_count()
|
|
LOG.debug('New user: %s', user)
|
|
|
|
runtime_storage_inst.set_by_key('user:%d' % user['seq'], user)
|
|
if user.get('user_id'):
|
|
runtime_storage_inst.set_by_key('user:%s' % user['user_id'], user)
|
|
if user.get('launchpad_id'):
|
|
runtime_storage_inst.set_by_key('user:%s' % user['launchpad_id'], user)
|
|
for hostname, ids in user.get('gerrit_ids', {}).items():
|
|
for gerrit_id in ids:
|
|
runtime_storage_inst.set_by_key(
|
|
'user:gerrit:%s:%s' % (hostname, gerrit_id), user)
|
|
if user.get('github_id'):
|
|
runtime_storage_inst.set_by_key('user:github:%s' % user['github_id'],
|
|
user)
|
|
if user.get('zanata_id'):
|
|
runtime_storage_inst.set_by_key('user:zanata:%s' % user['zanata_id'],
|
|
user)
|
|
for email in user.get('emails') or []:
|
|
runtime_storage_inst.set_by_key('user:%s' % email, user)
|
|
|
|
|
|
def load_user(runtime_storage_inst, seq=None, user_id=None, email=None,
|
|
launchpad_id=None, gerrit_tuple=None, member_id=None,
|
|
github_id=None, zanata_id=None):
|
|
|
|
key = make_user_id(gerrit_tuple=gerrit_tuple, member_id=member_id,
|
|
github_id=github_id, zanata_id=zanata_id)
|
|
if not key:
|
|
key = seq or user_id or launchpad_id or email
|
|
if key:
|
|
return runtime_storage_inst.get_by_key('user:%s' % key)
|
|
return None
|
|
|
|
|
|
def delete_users(runtime_storage_inst, users):
|
|
for user in users:
|
|
LOG.debug('Delete user: %s', user)
|
|
runtime_storage_inst.delete_by_key('user:%s' % user['seq'])
|
|
|
|
|
|
def update_user_profile(stored_user, user):
|
|
# update stored_user with user and return it
|
|
if stored_user:
|
|
updated_user = copy.deepcopy(stored_user)
|
|
updated_user.update(user)
|
|
updated_user['emails'] = sorted(
|
|
list(set(stored_user.get('emails', [])) |
|
|
set(user.get('emails', [])))
|
|
)
|
|
gerrit_ids = _merge_gerrit_ids([stored_user, user])
|
|
if gerrit_ids:
|
|
updated_user['gerrit_ids'] = gerrit_ids
|
|
|
|
else:
|
|
updated_user = copy.deepcopy(user)
|
|
updated_user['static'] = True
|
|
return updated_user
|
|
|
|
|
|
def get_company_for_date(companies, date):
|
|
for r in companies:
|
|
if date < r['end_date']:
|
|
return r['company_name'], 'strict'
|
|
return companies[-1]['company_name'], 'open' # may be overridden
|
|
|
|
|
|
def get_company_by_email(domains_index, email):
|
|
"""Get company based on email domain
|
|
|
|
Automatically maps email domain into company name. Prefers
|
|
subdomains to root domains.
|
|
|
|
:param domains_index: dict {domain -> company name}
|
|
:param email: valid email. may be empty
|
|
:return: company name or None if nothing matches
|
|
"""
|
|
if not email:
|
|
return None
|
|
|
|
name, at, domain = email.partition('@')
|
|
if domain:
|
|
parts = domain.split('.')
|
|
for i in range(len(parts), 1, -1):
|
|
m = '.'.join(parts[len(parts) - i:])
|
|
if m in domains_index:
|
|
return domains_index[m]
|
|
return None
|
|
|
|
|
|
def create_user(domains_index, launchpad_id, email, gerrit_tuple, zanata_id,
|
|
user_name):
|
|
company = get_company_by_email(domains_index, email) or INDEPENDENT
|
|
emails = [email] if email else []
|
|
|
|
user = {
|
|
'user_id': make_user_id(
|
|
emails=emails, launchpad_id=launchpad_id,
|
|
gerrit_tuple=gerrit_tuple,
|
|
zanata_id=zanata_id),
|
|
'launchpad_id': launchpad_id,
|
|
'user_name': user_name or '',
|
|
'companies': [{
|
|
'company_name': company,
|
|
'end_date': 0,
|
|
}],
|
|
'emails': emails,
|
|
}
|
|
|
|
if gerrit_tuple:
|
|
user['gerrit_ids'] = {
|
|
gerrit_tuple[0]: [gerrit_tuple[1]]
|
|
}
|
|
if zanata_id:
|
|
user['zanata_id'] = zanata_id
|
|
|
|
return user
|
|
|
|
|
|
def update_user_affiliation(domains_index, user):
|
|
"""Update user affiliation
|
|
|
|
Affiliation is updated only if user is currently independent
|
|
but makes contribution from company domain.
|
|
|
|
:param domains_index: dict {domain -> company name}
|
|
:param user: user profile
|
|
"""
|
|
for email in user.get('emails'):
|
|
company_name = get_company_by_email(domains_index, email)
|
|
|
|
uc = user['companies']
|
|
if (company_name and (len(uc) == 1) and
|
|
(uc[0]['company_name'] == INDEPENDENT)):
|
|
LOG.debug('Updating affiliation of user %s to %s',
|
|
user['user_id'], company_name)
|
|
uc[0]['company_name'] = company_name
|
|
break
|
|
|
|
|
|
def _merge_gerrit_ids(users):
|
|
gerrit_ids = {}
|
|
hostnames = set()
|
|
for user in users:
|
|
hostnames.update(set(user.get('gerrit_ids', {}).keys()))
|
|
for hostname in hostnames:
|
|
ids = set()
|
|
for user in users:
|
|
ids |= set(user.get('gerrit_ids', {}).get(hostname, []))
|
|
if ids:
|
|
gerrit_ids[hostname] = sorted(list(ids))
|
|
return gerrit_ids
|
|
|
|
|
|
def merge_user_profiles(domains_index, user_profiles):
|
|
"""Merge user profiles into one
|
|
|
|
The function merges list of user profiles into one figures out which
|
|
profiles can be deleted.
|
|
|
|
:param domains_index: dict {domain -> company name}
|
|
:param user_profiles: user profiles to merge
|
|
:return: tuple (merged user profile, [user profiles to delete])
|
|
"""
|
|
LOG.debug('Merge profiles: %s', user_profiles)
|
|
|
|
# check of there are more than 1 launchpad_id
|
|
lp_ids = set(u.get('launchpad_id') for u in user_profiles
|
|
if u.get('launchpad_id'))
|
|
if len(lp_ids) > 1:
|
|
LOG.debug('Ambiguous launchpad ids: %s on profiles: %s',
|
|
lp_ids, user_profiles)
|
|
|
|
merged_user = {} # merged user profile
|
|
|
|
# collect ordinary fields
|
|
for key in ['seq', 'user_name', 'user_id', 'github_id', 'launchpad_id',
|
|
'companies', 'static', 'zanata_id', 'gravatar_email']:
|
|
value = next((v.get(key) for v in user_profiles if v.get(key)),
|
|
None)
|
|
if value:
|
|
merged_user[key] = value
|
|
|
|
# update user_id, prefer it to be equal to launchpad_id
|
|
merged_user['user_id'] = (merged_user.get('launchpad_id') or
|
|
merged_user.get('user_id'))
|
|
|
|
# always preserve `user_name` since its required field
|
|
if 'user_name' not in merged_user:
|
|
merged_user['user_name'] = merged_user['user_id']
|
|
|
|
# merge emails
|
|
emails = set([])
|
|
core_in = set([])
|
|
for u in user_profiles:
|
|
emails |= set(u.get('emails', []))
|
|
core_in |= set(u.get('core', []))
|
|
merged_user['emails'] = sorted(list(emails))
|
|
if core_in:
|
|
merged_user['core'] = sorted(list(core_in))
|
|
gerrit_ids = _merge_gerrit_ids(user_profiles)
|
|
if gerrit_ids:
|
|
merged_user['gerrit_ids'] = gerrit_ids
|
|
|
|
# merge companies
|
|
merged_companies = merged_user['companies']
|
|
for u in user_profiles:
|
|
companies = u.get('companies')
|
|
if companies:
|
|
if (companies[0]['company_name'] != INDEPENDENT or
|
|
len(companies) > 1):
|
|
merged_companies = companies
|
|
break
|
|
merged_user['companies'] = merged_companies
|
|
|
|
update_user_affiliation(domains_index, merged_user)
|
|
|
|
users_to_delete = []
|
|
seqs = set(u.get('seq') for u in user_profiles if u.get('seq'))
|
|
|
|
if len(seqs) > 1:
|
|
# profiles are merged, keep only one, remove others
|
|
seqs.remove(merged_user['seq'])
|
|
|
|
for u in user_profiles:
|
|
if u.get('seq') in seqs:
|
|
users_to_delete.append(u)
|
|
|
|
return merged_user, users_to_delete
|
|
|
|
|
|
def are_users_same(users):
|
|
"""True if all users are the same and not Nones"""
|
|
x = set(u.get('seq') for u in users)
|
|
return len(x) == 1 and None not in x
|
|
|
|
|
|
def resolve_companies_aliases(domains_index, companies):
|
|
norm_companies = []
|
|
|
|
prev_company_name = None
|
|
for c in reversed(companies):
|
|
company_name = c['company_name']
|
|
company_name = (domains_index.get(
|
|
utils.normalize_company_name(company_name))
|
|
or (utils.normalize_company_draft(company_name)))
|
|
|
|
if company_name != prev_company_name:
|
|
r = copy.deepcopy(c)
|
|
r['company_name'] = company_name
|
|
norm_companies.append(r)
|
|
|
|
prev_company_name = company_name
|
|
|
|
return list(reversed(norm_companies))
|