381 lines
10 KiB
Python
381 lines
10 KiB
Python
# Copyright (c) 2013 Mirantis Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import calendar
|
|
import datetime
|
|
import gzip
|
|
import html
|
|
import random
|
|
import re
|
|
import time
|
|
|
|
import iso8601
|
|
from oslo_config import cfg
|
|
from oslo_log import log as logging
|
|
import requests
|
|
import requests_file
|
|
import six
|
|
import yaml
|
|
|
|
|
|
CONF = cfg.CONF
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
def init_config_and_logging(opts):
|
|
CONF.register_cli_opts(opts)
|
|
CONF.register_opts(opts)
|
|
logging.register_options(CONF)
|
|
logging.set_defaults()
|
|
|
|
CONF(project='stackalytics')
|
|
|
|
logging.setup(CONF, 'stackalytics')
|
|
LOG.info('Logging enabled')
|
|
CONF.log_opt_values(LOG, logging.DEBUG)
|
|
|
|
|
|
def date_to_timestamp(d):
|
|
if not d:
|
|
return 0
|
|
if d == 'now':
|
|
return int(time.time())
|
|
return int(time.mktime(
|
|
datetime.datetime.strptime(d, '%Y-%b-%d').timetuple()))
|
|
|
|
|
|
def date_to_timestamp_ext(d):
|
|
try:
|
|
return date_to_timestamp(d)
|
|
except (ValueError, TypeError):
|
|
return int(d)
|
|
|
|
|
|
def member_date_to_timestamp(d):
|
|
if not d:
|
|
return 0
|
|
return int(time.mktime(
|
|
datetime.datetime.strptime(d, '%B %d, %Y ').timetuple()))
|
|
|
|
|
|
def iso8601_to_timestamp(s):
|
|
return calendar.timegm(iso8601.parse_date(s).utctimetuple())
|
|
|
|
|
|
def timestamp_to_date(timestamp):
|
|
return (datetime.datetime.fromtimestamp(timestamp).
|
|
strftime('%Y-%b-%d'))
|
|
|
|
|
|
def timestamp_to_week(timestamp):
|
|
# Jan 4th 1970 is the first Sunday in the Epoch
|
|
return (timestamp - 3 * 24 * 3600) // (7 * 24 * 3600)
|
|
|
|
|
|
def week_to_date(week):
|
|
timestamp = week * 7 * 24 * 3600 + 3 * 24 * 3600
|
|
return (datetime.datetime.fromtimestamp(timestamp).
|
|
strftime('%Y-%m-%d %H:%M:%S'))
|
|
|
|
|
|
def timestamp_to_day(timestamp):
|
|
return timestamp // (24 * 3600)
|
|
|
|
|
|
def timestamp_to_utc_date(timestamp):
|
|
return (datetime.datetime.fromtimestamp(timestamp).
|
|
strftime('%Y-%m-%d'))
|
|
|
|
|
|
def round_timestamp_to_day(timestamp):
|
|
return (int(timestamp) // (24 * 3600)) * (24 * 3600)
|
|
|
|
|
|
def check_email_validity(email):
|
|
if email:
|
|
return re.match(r'[\w\d_\.-\\+]+@([\w\d_\.-]+\.)+[\w]+', email)
|
|
return False
|
|
|
|
|
|
user_agents = [
|
|
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/41.0',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/600.8.9',
|
|
'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0',
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/45.0.2062.120',
|
|
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
|
]
|
|
|
|
|
|
def _session_request(session, uri, method):
|
|
session.mount('file://', requests_file.FileAdapter())
|
|
user_agent = random.choice(user_agents)
|
|
|
|
headers = {'User-Agent': user_agent, 'Accept': 'application/json'}
|
|
return session.request(method, uri, headers=headers,
|
|
timeout=CONF.read_timeout)
|
|
|
|
|
|
def do_request(uri, method='get', session=None):
|
|
if session:
|
|
return _session_request(session, uri, method)
|
|
else:
|
|
with requests.Session() as session:
|
|
return _session_request(session, uri, method)
|
|
|
|
|
|
def read_uri(uri, session=None):
|
|
try:
|
|
return do_request(uri, session=session).text
|
|
except Exception as e:
|
|
LOG.warning('Error "%(error)s" retrieving uri %(uri)s',
|
|
{'error': e, 'uri': uri})
|
|
|
|
|
|
def read_json_from_uri(uri, session=None):
|
|
try:
|
|
return do_request(uri, session=session).json()
|
|
except Exception as e:
|
|
LOG.warning('Error "%(error)s" parsing json from uri %(uri)s',
|
|
{'error': e, 'uri': uri})
|
|
|
|
|
|
def read_yaml_from_uri(uri):
|
|
try:
|
|
return yaml.safe_load(read_uri(uri))
|
|
except Exception as e:
|
|
LOG.warning('Error "%(error)s" parsing yaml from uri %(uri)s',
|
|
{'error': e, 'uri': uri})
|
|
|
|
|
|
def _gzip_decompress(content):
|
|
if six.PY3:
|
|
return gzip.decompress(content).decode('utf8')
|
|
else:
|
|
gzip_fd = gzip.GzipFile(fileobj=six.moves.StringIO(content))
|
|
return gzip_fd.read()
|
|
|
|
|
|
def read_txt_from_uri(uri):
|
|
try:
|
|
return do_request(uri).content.decode('utf8')
|
|
except Exception as e:
|
|
LOG.warning('Error "%(error)s" retrieving uri %(uri)s',
|
|
{'error': e, 'uri': uri})
|
|
|
|
|
|
def read_gzip_from_uri(uri):
|
|
try:
|
|
return _gzip_decompress(do_request(uri).content)
|
|
except Exception as e:
|
|
LOG.warning('Error "%(error)s" retrieving uri %(uri)s',
|
|
{'error': e, 'uri': uri})
|
|
|
|
|
|
def get_uri_last_modified(uri):
|
|
try:
|
|
return do_request(uri, method='head').headers['last-modified']
|
|
except Exception as e:
|
|
LOG.warning('Error "%(error)s" retrieving uri %(uri)s',
|
|
{'error': e, 'uri': uri})
|
|
|
|
|
|
def cmp_to_key(mycmp): # ported from python 3
|
|
"""Convert a cmp= function into a key= function."""
|
|
class K(object):
|
|
__slots__ = ['obj']
|
|
|
|
def __init__(self, obj):
|
|
self.obj = obj
|
|
|
|
def __lt__(self, other):
|
|
return mycmp(self.obj, other.obj) < 0
|
|
|
|
def __gt__(self, other):
|
|
return mycmp(self.obj, other.obj) > 0
|
|
|
|
def __eq__(self, other):
|
|
return mycmp(self.obj, other.obj) == 0
|
|
|
|
def __le__(self, other):
|
|
return mycmp(self.obj, other.obj) <= 0
|
|
|
|
def __ge__(self, other):
|
|
return mycmp(self.obj, other.obj) >= 0
|
|
|
|
def __ne__(self, other):
|
|
return mycmp(self.obj, other.obj) != 0
|
|
|
|
__hash__ = None
|
|
return K
|
|
|
|
|
|
def make_range(start, stop, step):
|
|
last_full = stop - ((stop - start) % step)
|
|
for i in six.moves.range(start, last_full, step):
|
|
yield six.moves.range(i, i + step)
|
|
if stop > last_full:
|
|
yield six.moves.range(last_full, stop)
|
|
|
|
|
|
def load_repos(runtime_storage_inst):
|
|
return runtime_storage_inst.get_by_key('repos') or []
|
|
|
|
|
|
def unwrap_text(text):
|
|
res = ''
|
|
for line in text.splitlines():
|
|
s = line.rstrip()
|
|
if not s:
|
|
continue
|
|
res += line
|
|
if (not s[0].isalpha()) or (s[-1] in ['.', '!', '?', '>', ':', ';']):
|
|
res += '\n'
|
|
else:
|
|
res += ' '
|
|
return res.rstrip()
|
|
|
|
|
|
def format_text(s):
|
|
# TODO(snikitin) Maybe we need to remove 'False' from escape()
|
|
# to escape ' and " symbols?
|
|
s = html.escape(re.sub(re.compile('\n{2,}', flags=re.MULTILINE), '\n', s),
|
|
False)
|
|
|
|
def replace_dots(match_obj):
|
|
return re.sub(r'([\./]+)', r'\1​', match_obj.group(0))
|
|
|
|
s = re.sub(r'((?:\w+[\./]+)+\w+)', replace_dots, s)
|
|
return s
|
|
|
|
|
|
def make_age_string(seconds):
|
|
days = seconds / (3600 * 24)
|
|
hours = (seconds / 3600) - (days * 24)
|
|
return '%d days and %d hours' % (days, hours)
|
|
|
|
|
|
def merge_records(original, new):
|
|
need_update = False
|
|
for key, value in six.iteritems(new):
|
|
if original.get(key) != value:
|
|
need_update = True
|
|
original[key] = value
|
|
return need_update
|
|
|
|
|
|
def get_blueprint_id(module, name):
|
|
return module + ':' + name
|
|
|
|
|
|
def make_bug_id(bug_id, module, release=None):
|
|
if release:
|
|
return '/'.join([module, release, bug_id])
|
|
else:
|
|
return '/'.join([module, bug_id])
|
|
|
|
|
|
def get_patch_id(review_id, patch_number):
|
|
return '%s:%s' % (review_id, patch_number)
|
|
|
|
|
|
def add_index(sequence, start=1, item_filter=lambda x: True):
|
|
n = start
|
|
for item in sequence:
|
|
if item_filter(item):
|
|
item['index'] = n
|
|
n += 1
|
|
else:
|
|
item['index'] = ''
|
|
return sequence
|
|
|
|
|
|
def safe_encode(s):
|
|
return six.moves.urllib.parse.quote(s.encode('utf-8'))
|
|
|
|
|
|
def keep_safe_chars(s):
|
|
return re.sub(r'[^\x21-\x7e\x80-\xff]+', '', s)
|
|
|
|
|
|
def make_module_group(module_group_id, name=None, modules=None, tag='module'):
|
|
return {'id': module_group_id,
|
|
'module_group_name': name or module_group_id,
|
|
'modules': modules or {module_group_id},
|
|
'tag': tag}
|
|
|
|
|
|
BAD_NAME_SUFFIXES = ['Ltd', 'Pvt', 'Inc', 'GmbH', 'AG', 'Corporation', 'Corp',
|
|
'Company', 'Co', 'Group', 'Srl', 'Limited', 'LLC', 'IT']
|
|
|
|
BAD_NAME_SUFFIXES_WITH_STOPS = ['S.p.A.', 's.r.o.', 'L.P.', 'B.V.', 'K.K.',
|
|
'd.o.o.']
|
|
|
|
|
|
def normalize_company_name(name):
|
|
regex = '(\\b(' + '|'.join(BAD_NAME_SUFFIXES) + ')\\b)'
|
|
regex += '|' + '((^|\\s)(' + '|'.join(BAD_NAME_SUFFIXES_WITH_STOPS) + '))'
|
|
name = re.sub(re.compile(regex, re.IGNORECASE), '', name)
|
|
return ''.join([c.lower() for c in name if c.isalnum()])
|
|
|
|
|
|
def normalize_company_draft(name):
|
|
name = re.sub(',', ' ', name)
|
|
name = re.sub(r'\s+', ' ', name)
|
|
return name
|
|
|
|
|
|
def validate_lp_display_name(lp_profile):
|
|
if lp_profile:
|
|
if "<email address hidden>" == lp_profile['display_name']:
|
|
lp_profile['display_name'] = lp_profile['name']
|
|
|
|
|
|
def make_pipeline_processor(processors):
|
|
|
|
def get_passes(_processors):
|
|
# every processor yields one or more record handlers
|
|
# this function groups record handlers by pass and returns list of them
|
|
processor_generators = [p() for p in _processors]
|
|
|
|
work = True
|
|
while work:
|
|
work = False
|
|
record_handlers = []
|
|
|
|
for generator in processor_generators:
|
|
try:
|
|
record_handlers.append(next(generator))
|
|
except StopIteration:
|
|
pass
|
|
|
|
if record_handlers:
|
|
work = True
|
|
yield record_handlers
|
|
|
|
def pipeline_processor(record_generator):
|
|
|
|
# for every pass
|
|
for one_pass in get_passes(processors):
|
|
# iterate every record in producer
|
|
for record in record_generator():
|
|
# iterate over record handlers within single pass
|
|
for record_handler in one_pass:
|
|
# feed record to the handler
|
|
for r in record_handler(record) or []:
|
|
# yield processed record
|
|
yield r
|
|
|
|
return pipeline_processor
|