Files
stackalytics/stackalytics/processor/default_data_processor.py
Andrii Ostapenko 3e3eccc4c3 Remove gravatar_email field and use first email in profile instead
Also use larger image for user profile and include link to
related documentation.

Change-Id: Ib81f6ecf03f1d00cff307b97aae53538c57d4dc6
Signed-off-by: Andrii Ostapenko <aostapenko@stackalytics.io>
2021-03-27 21:28:02 -05:00

256 lines
9.0 KiB
Python

# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import hashlib
import json
import re
from github import MainClass
from oslo_config import cfg
from oslo_log import log as logging
import six
from stackalytics.processor import normalizer
from stackalytics.processor import rcs
from stackalytics.processor import user_processor
from stackalytics.processor import utils
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
GITHUB_URI_PREFIX = r'^github:\/\/'
def _check_default_data_change(runtime_storage_inst, default_data):
h = hashlib.new('sha1')
h.update(json.dumps(default_data))
digest = h.hexdigest()
p_digest = runtime_storage_inst.get_by_key('default_data_digest')
if digest == p_digest:
LOG.debug('No changes in default data, sha1: %s', digest)
return False
LOG.debug('Default data has changes, sha1: %s', digest)
runtime_storage_inst.set_by_key('default_data_digest', digest)
return True
def _retrieve_project_list_from_sources(project_sources):
for project_source in project_sources:
uri = project_source.get('uri') or CONF.review_uri
repo_iterator = []
if re.search(rcs.GERRIT_URI_PREFIX, uri):
repo_iterator = _retrieve_project_list_from_gerrit(project_source)
elif re.search(GITHUB_URI_PREFIX, uri):
repo_iterator = _retrieve_project_list_from_github(project_source)
exclude = set(project_source.get('exclude', []))
for repo in repo_iterator:
if repo['module'] not in exclude:
yield repo
def _retrieve_project_list_from_gerrit(project_source):
organization = project_source['organization']
LOG.info('Retrieving project list from Gerrit for %s', organization)
pattern = project_source.get('pattern')
if pattern is None:
pattern = "^%s/.*" % organization
try:
uri = project_source.get('uri') or CONF.review_uri
gerrit_inst = rcs.Gerrit(uri)
key_filename = (project_source.get('ssh_key_filename') or
CONF.ssh_key_filename)
username = project_source.get('ssh_username') or CONF.ssh_username
gerrit_inst.setup(key_filename=key_filename, username=username)
git_repos = gerrit_inst.get_project_list(pattern)
gerrit_inst.close()
except rcs.RcsException:
LOG.error('Failed to retrieve list of projects')
raise
git_base_uri = project_source.get('git_base_uri') or CONF.git_base_uri
use_launchpad_metrics = project_source.get('launchpad_metrics', True)
for repo in git_repos:
name = repo.split('/')[-1]
launchpad_name = name if use_launchpad_metrics else None
repo_uri = '%(git_base_uri)s/%(repo)s.git' % dict(
git_base_uri=git_base_uri, repo=repo)
yield {
'branches': ['master'],
'module': name,
'organization': organization,
'uri': repo_uri,
'releases': [],
'repo_name': repo,
'gerrit_uri': uri,
'ssh_username': username,
'key_filename': key_filename,
'launchpad_name': launchpad_name,
}
def _retrieve_project_list_from_github(project_source):
LOG.info('Retrieving project list from GitHub')
if CONF.github_token:
github = MainClass.Github(timeout=60,
login_or_token=CONF.github_token)
else:
github = MainClass.Github(timeout=60,
login_or_token=CONF.github_login,
password=CONF.github_password)
organization = project_source['organization']
LOG.debug('Get list of projects for organization %s', organization)
try:
github_repos = github.get_organization(organization).get_repos()
except Exception as e:
LOG.error('Failed to retrieve list of projects from GitHub: %s',
e, exc_info=True)
raise
use_launchpad_metrics = project_source.get('launchpad_metrics', True)
for repo in github_repos:
name = repo.name.lower()
launchpad_name = name if use_launchpad_metrics else None
yield {
'branches': [project_source.get('default_branch', 'master')],
'module': name,
'organization': organization,
'uri': repo.git_url,
'releases': [],
'launchpad_name': launchpad_name,
}
def _create_module_groups_for_project_sources(project_sources, repos):
organizations = collections.defaultdict(list)
for repo in repos:
organizations[repo['organization']].append(repo['module'])
# organization -> (module_group_id, module_group_name)
ps_organizations = dict(
[(ps.get('organization'),
(ps.get('module_group_id') or ps.get('organization'),
ps.get('module_group_name') or ps.get('organization')))
for ps in project_sources])
module_groups = []
for ogn, modules in six.iteritems(organizations):
module_group_id = ogn
module_group_name = ogn
if ogn in ps_organizations:
module_group_id = ps_organizations[ogn][0]
module_group_name = ps_organizations[ogn][1]
module_groups.append(utils.make_module_group(
module_group_id, name=module_group_name, modules=modules,
tag='organization'))
return module_groups
def _update_project_list(default_data):
configured_repos = set([r['uri'] for r in default_data['repos']])
repos = _retrieve_project_list_from_sources(
default_data['project_sources'])
if repos:
# update pre-configured and exclude all projects start with 'deb-'
repos_dict = dict((r['uri'], r) for r in repos
if not r['module'].startswith('deb-'))
for r in default_data['repos']:
if r['uri'] in repos_dict:
for k, v in repos_dict[r['uri']].items():
if k not in r:
r[k] = v
# update default data
default_data['repos'] += [r for r in repos_dict.values()
if r['uri'] not in configured_repos]
default_data['module_groups'] += _create_module_groups_for_project_sources(
default_data['project_sources'], default_data['repos'])
def _store_users(runtime_storage_inst, users):
for user in users:
user['gravatar_email'] = user['emails'][0]
stored_user = user_processor.load_user(runtime_storage_inst,
user_id=user['user_id'])
updated_user = user_processor.update_user_profile(stored_user, user)
user_processor.store_user(runtime_storage_inst, updated_user)
def _store_companies(runtime_storage_inst, companies):
domains_index = {}
for company in companies:
for domain in company['domains']:
domains_index[domain] = company['company_name']
if 'aliases' in company:
for alias in company['aliases']:
normalized_alias = utils.normalize_company_name(alias)
domains_index[normalized_alias] = company['company_name']
normalized_company_name = utils.normalize_company_name(
company['company_name'])
domains_index[normalized_company_name] = company['company_name']
runtime_storage_inst.set_by_key('companies', domains_index)
def _store_module_groups(runtime_storage_inst, module_groups):
stored_mg = runtime_storage_inst.get_by_key('module_groups') or {}
for mg in module_groups:
name = mg['module_group_name']
module_group_id = mg.get('id') or name
stored_mg[module_group_id] = utils.make_module_group(
module_group_id, name=name, modules=mg['modules'],
tag=mg.get('tag', 'group'))
runtime_storage_inst.set_by_key('module_groups', stored_mg)
STORE_FUNCS = {
'users': _store_users,
'companies': _store_companies,
'module_groups': _store_module_groups,
}
def _store_default_data(runtime_storage_inst, default_data):
normalizer.normalize_default_data(default_data)
LOG.debug('Update runtime storage with default data')
for key, value in six.iteritems(default_data):
if key in STORE_FUNCS:
STORE_FUNCS[key](runtime_storage_inst, value)
else:
runtime_storage_inst.set_by_key(key, value)
def process(runtime_storage_inst, default_data):
LOG.info('Process default data')
if 'project_sources' in default_data:
_update_project_list(default_data)
_store_default_data(runtime_storage_inst, default_data)