310 lines
11 KiB
Python
310 lines
11 KiB
Python
# Copyright (c) 2013 Mirantis Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import os
|
|
import re
|
|
import shutil
|
|
|
|
from oslo_log import log as logging
|
|
import sh
|
|
import six
|
|
|
|
from stackalytics.processor import utils
|
|
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
class Vcs(object):
|
|
"""Base object for Version Control System"""
|
|
|
|
def __init__(self, repo, sources_root):
|
|
self.repo = repo
|
|
self.sources_root = sources_root
|
|
if not os.path.exists(sources_root):
|
|
os.mkdir(sources_root)
|
|
else:
|
|
if not os.access(sources_root, os.W_OK):
|
|
raise Exception('Sources root folder %s is not writable' %
|
|
sources_root)
|
|
|
|
def fetch(self):
|
|
pass
|
|
|
|
def log(self, branch, head_commit_id):
|
|
pass
|
|
|
|
def get_last_id(self, branch):
|
|
pass
|
|
|
|
|
|
GIT_LOG_PARAMS = [
|
|
('commit_id', '%H'),
|
|
('date', '%at'),
|
|
('author_name', '%an'),
|
|
('author_email', '%ae'),
|
|
('subject', '%s'),
|
|
('message', '%b'),
|
|
]
|
|
GIT_LOG_FORMAT = ''.join([(r[0] + ':' + r[1] + '%n')
|
|
for r in GIT_LOG_PARAMS]) + 'diff_stat:'
|
|
DIFF_STAT_PATTERN = ('[^\d]+(\d+)\s+[^\s]*\s+changed'
|
|
'(,\s+(\d+)\s+([^\d\s]*)\s+(\d+)?)?')
|
|
GIT_LOG_PATTERN = re.compile(''.join([(r[0] + ':(.*?)\n')
|
|
for r in GIT_LOG_PARAMS]) +
|
|
'diff_stat:(?P<diff_stat>.+?)(?=commit|\Z)',
|
|
re.DOTALL)
|
|
|
|
CO_AUTHOR_PATTERN_RAW = ('(?P<author_name>.*?)\s*'
|
|
'<?(?P<author_email>[\w\.-]+@[\w\.-]+)>?')
|
|
CO_AUTHOR_PATTERN = re.compile(CO_AUTHOR_PATTERN_RAW, re.IGNORECASE)
|
|
|
|
MESSAGE_PATTERNS = {
|
|
'bug_id': re.compile(r'bug[\s#:]*(?P<id>\d+)', re.IGNORECASE),
|
|
'blueprint_id': re.compile(r'\b(?:blueprint|bp)\b[ \t]*[#:]?[ \t]*'
|
|
r'(?P<id>[a-z0-9-]+)', re.IGNORECASE),
|
|
'change_id': re.compile('Change-Id: (?P<id>I[0-9a-f]{40})', re.IGNORECASE),
|
|
'coauthor': re.compile(r'(?:Co-Authored-By|Also-By|Co-Author):'
|
|
r'\s*(?P<id>%s)\s' % CO_AUTHOR_PATTERN_RAW,
|
|
re.IGNORECASE)
|
|
}
|
|
|
|
|
|
class Git(Vcs):
|
|
|
|
def __init__(self, repo, sources_root):
|
|
super(Git, self).__init__(repo, sources_root)
|
|
uri = self.repo['uri']
|
|
match = re.search(r'([^/]+)\.git$', uri)
|
|
if match:
|
|
self.folder = os.path.normpath(self.sources_root + '/' +
|
|
match.group(1))
|
|
else:
|
|
raise Exception('Unexpected uri %s for git' % uri)
|
|
self.release_index = {}
|
|
|
|
def _checkout(self, branch):
|
|
try:
|
|
sh.git('clean', '-d', '--force')
|
|
sh.git('reset', '--hard')
|
|
sh.git('checkout', 'origin/' + branch)
|
|
return True
|
|
except sh.ErrorReturnCode:
|
|
LOG.error('Unable to checkout branch %(branch)s from repo '
|
|
'%(uri)s. Ignore it',
|
|
{'branch': branch, 'uri': self.repo['uri']},
|
|
exc_info=True)
|
|
return False
|
|
|
|
def fetch(self):
|
|
LOG.debug('Fetching repo uri %s', self.repo['uri'])
|
|
|
|
if os.path.exists(self.folder):
|
|
os.chdir(self.folder)
|
|
try:
|
|
uri = str(
|
|
sh.git('config', '--get', 'remote.origin.url')).strip()
|
|
except sh.ErrorReturnCode:
|
|
LOG.error('Unable to get config for git repo %s. Ignore it',
|
|
self.repo['uri'], exc_info=True)
|
|
return {}
|
|
|
|
if uri != self.repo['uri']:
|
|
LOG.warning('Repo uri %(uri)s differs from cloned %(old)s',
|
|
{'uri': self.repo['uri'], 'old': uri})
|
|
os.chdir('..')
|
|
shutil.rmtree(self.folder)
|
|
|
|
if not os.path.exists(self.folder):
|
|
os.chdir(self.sources_root)
|
|
try:
|
|
sh.git('clone', self.repo['uri'])
|
|
os.chdir(self.folder)
|
|
except sh.ErrorReturnCode:
|
|
LOG.error('Unable to clone git repo %s. Ignore it',
|
|
self.repo['uri'], exc_info=True)
|
|
else:
|
|
os.chdir(self.folder)
|
|
try:
|
|
sh.git('fetch')
|
|
except sh.ErrorReturnCode:
|
|
LOG.error('Unable to fetch git repo %s. Ignore it',
|
|
self.repo['uri'], exc_info=True)
|
|
|
|
return self._get_release_index()
|
|
|
|
def _get_release_index(self):
|
|
if not os.path.exists(self.folder):
|
|
return {}
|
|
|
|
LOG.debug('Get release index for repo uri: %s', self.repo['uri'])
|
|
os.chdir(self.folder)
|
|
if not self.release_index:
|
|
for release in self.repo.get('releases', []):
|
|
release_name = release['release_name'].lower()
|
|
|
|
if 'branch' in release:
|
|
branch = release['branch']
|
|
else:
|
|
branch = 'master'
|
|
if not self._checkout(branch):
|
|
continue
|
|
|
|
if 'tag_from' in release:
|
|
tag_range = release['tag_from'] + '..' + release['tag_to']
|
|
else:
|
|
tag_range = release['tag_to']
|
|
|
|
try:
|
|
git_log_iterator = sh.git('log', '--pretty=%H', tag_range,
|
|
_tty_out=False)
|
|
for commit_id in git_log_iterator:
|
|
self.release_index[commit_id.strip()] = release_name
|
|
except sh.ErrorReturnCode:
|
|
LOG.error('Unable to get log of git repo %s. Ignore it',
|
|
self.repo['uri'], exc_info=True)
|
|
return self.release_index
|
|
|
|
def log(self, branch, head_commit_id):
|
|
LOG.debug('Parsing git log for repo uri %s', self.repo['uri'])
|
|
|
|
os.chdir(self.folder)
|
|
|
|
if not self._checkout(branch):
|
|
return
|
|
|
|
commit_range = 'HEAD'
|
|
if head_commit_id:
|
|
commit_range = head_commit_id + '..HEAD'
|
|
|
|
try:
|
|
output = sh.git('log', '--pretty=' + GIT_LOG_FORMAT, '--shortstat',
|
|
'-M', '--no-merges', commit_range, _tty_out=False,
|
|
_decode_errors='ignore', _encoding='utf8')
|
|
except sh.ErrorReturnCode:
|
|
LOG.error('Unable to get log of git repo %s. Ignore it',
|
|
self.repo['uri'], exc_info=True)
|
|
return
|
|
|
|
for rec in re.finditer(GIT_LOG_PATTERN, six.text_type(output)):
|
|
i = 1
|
|
commit = {}
|
|
for param in GIT_LOG_PARAMS:
|
|
commit[param[0]] = rec.group(i)
|
|
i += 1
|
|
|
|
# ignore machine/script produced submodule auto updates
|
|
if commit['subject'] == u'Update git submodules':
|
|
continue
|
|
|
|
if not commit['author_email']:
|
|
# ignore commits with empty email (there are some < Essex)
|
|
continue
|
|
|
|
commit['author_email'] = utils.keep_safe_chars(
|
|
commit['author_email'])
|
|
|
|
diff_stat_str = rec.group('diff_stat')
|
|
diff_rec = re.search(DIFF_STAT_PATTERN, diff_stat_str)
|
|
|
|
if diff_rec:
|
|
files_changed = int(diff_rec.group(1))
|
|
lines_changed_group = diff_rec.group(2)
|
|
lines_changed = diff_rec.group(3)
|
|
deleted_or_inserted = diff_rec.group(4)
|
|
lines_deleted = diff_rec.group(5)
|
|
|
|
if lines_changed_group: # there inserted or deleted lines
|
|
if not lines_deleted:
|
|
if deleted_or_inserted[0] == 'd': # deleted
|
|
lines_deleted = lines_changed
|
|
lines_changed = 0
|
|
else:
|
|
files_changed = 0
|
|
lines_changed = 0
|
|
lines_deleted = 0
|
|
|
|
commit['files_changed'] = files_changed
|
|
commit['lines_added'] = int(lines_changed or 0)
|
|
commit['lines_deleted'] = int(lines_deleted or 0)
|
|
|
|
for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS):
|
|
collection = set()
|
|
for item in re.finditer(pattern, commit['message']):
|
|
collection.add(item.group('id'))
|
|
if collection:
|
|
commit[pattern_name] = list(collection)
|
|
|
|
commit['date'] = int(commit['date'])
|
|
commit['module'] = self.repo['module']
|
|
commit['branches'] = set([branch])
|
|
if commit['commit_id'] in self.release_index:
|
|
commit['release'] = self.release_index[commit['commit_id']]
|
|
else:
|
|
commit['release'] = None
|
|
|
|
if commit['release'] == 'ignored':
|
|
# drop commits that are marked by 'ignored' release
|
|
continue
|
|
|
|
if 'blueprint_id' in commit:
|
|
commit['blueprint_id'] = [(commit['module'] + ':' + bp_name)
|
|
for bp_name
|
|
in commit['blueprint_id']]
|
|
|
|
if 'coauthor' in commit:
|
|
verified_coauthors = []
|
|
for coauthor in commit['coauthor']:
|
|
m = re.match(CO_AUTHOR_PATTERN, coauthor)
|
|
if m and utils.check_email_validity(
|
|
m.group("author_email")):
|
|
verified_coauthors.append(m.groupdict())
|
|
|
|
if verified_coauthors:
|
|
commit['coauthor'] = verified_coauthors
|
|
else:
|
|
del commit['coauthor'] # no valid authors
|
|
|
|
yield commit
|
|
|
|
def get_last_id(self, branch):
|
|
LOG.debug('Get head commit for repo uri: %s', self.repo['uri'])
|
|
|
|
if not os.path.exists(self.folder):
|
|
return None
|
|
|
|
os.chdir(self.folder)
|
|
if not self._checkout(branch):
|
|
return None
|
|
|
|
try:
|
|
return str(sh.git('rev-parse', 'HEAD')).strip()
|
|
except sh.ErrorReturnCode:
|
|
LOG.error('Unable to get HEAD for git repo %s. Ignore it',
|
|
self.repo['uri'], exc_info=True)
|
|
|
|
return None
|
|
|
|
|
|
def get_vcs(repo, sources_root):
|
|
uri = repo['uri']
|
|
LOG.debug('Factory is asked for VCS uri: %s', uri)
|
|
match = re.search(r'\.git$', uri)
|
|
if match:
|
|
return Git(repo, sources_root)
|
|
else:
|
|
LOG.warning('Unsupported VCS, fallback to dummy')
|
|
return Vcs(repo, uri)
|