
The logic to decide from where to fetch refs is getting complicated and depends on the full repo url and ref. This information can be different for each revision of a change, but is only needed in the rare case that a local repo was damaged or removed. Rather than storing all of this information in the database for what should be a rare event, simply sync any changes that have missing refs and use the normal code path for deciding where to fetch refs. Change-Id: I4308db51b4847163b255686a62d5bad4e6226b4d
487 lines
17 KiB
Python
487 lines
17 KiB
Python
# Copyright 2014 OpenStack Foundation
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import datetime
|
|
import logging
|
|
import difflib
|
|
import itertools
|
|
import os
|
|
import re
|
|
|
|
import git
|
|
import gitdb
|
|
|
|
OLD = 0
|
|
NEW = 1
|
|
START = 0
|
|
END = 1
|
|
LINENO = 0
|
|
LINE = 1
|
|
|
|
class GitTimeZone(datetime.tzinfo):
|
|
"""Because we can't have nice things."""
|
|
|
|
def __init__(self, offset_seconds):
|
|
self._offset = offset_seconds
|
|
|
|
def utcoffset(self, dt):
|
|
return datetime.timedelta(seconds=self._offset)
|
|
|
|
def dst(self, dt):
|
|
return datetime.timedelta(0)
|
|
|
|
def tzname(self, dt):
|
|
return None
|
|
|
|
|
|
class CommitBlob(object):
|
|
def __init__(self):
|
|
self.path = '/COMMIT_MSG'
|
|
|
|
|
|
class CommitContext(object):
|
|
"""A git.diff.Diff for commit messages."""
|
|
|
|
def decorateGitTime(self, seconds, tz):
|
|
dt = datetime.datetime.fromtimestamp(seconds, GitTimeZone(-tz))
|
|
return dt.strftime('%Y-%m-%d %H:%M:%S %Z%z')
|
|
|
|
def decorateMessage(self, commit):
|
|
"""Put the Gerrit commit metadata at the front of the message.
|
|
|
|
e.g.:
|
|
Parent: cc8a51ca (Initial commit) 1
|
|
Author: Robert Collins <rbtcollins@hp.com> 2
|
|
AuthorDate: 2014-05-27 14:05:47 +1200 3
|
|
Commit: Robert Collins <rbtcollins@hp.com> 4
|
|
CommitDate: 2014-05-27 14:07:57 +1200 5
|
|
6
|
|
"""
|
|
# NB: If folk report that commits have comments at the wrong place
|
|
# Then this function, which reproduces gerrit behaviour, will need
|
|
# to be fixed (e.g. by making the behaviour match more closely.
|
|
if not commit:
|
|
return []
|
|
if commit.parents:
|
|
parentsha = commit.parents[0].hexsha[:8]
|
|
else:
|
|
parentsha = None
|
|
author = commit.author
|
|
committer = commit.committer
|
|
author_date = self.decorateGitTime(
|
|
commit.authored_date, commit.author_tz_offset)
|
|
commit_date = self.decorateGitTime(
|
|
commit.committed_date, commit.committer_tz_offset)
|
|
if type(author.email) is unicode:
|
|
author_email = author.email
|
|
else:
|
|
author_email = unicode(author.email, 'utf8')
|
|
if type(committer.email) is unicode:
|
|
committer_email = committer.email
|
|
else:
|
|
committer_email = unicode(committer.email, 'utf8')
|
|
return [u"Parent: %s\n" % parentsha,
|
|
u"Author: %s <%s>\n" % (author.name, author_email),
|
|
u"AuthorDate: %s\n" % author_date,
|
|
u"Commit: %s <%s>\n" % (committer.name, committer_email),
|
|
u"CommitDate: %s\n" % commit_date,
|
|
u"\n"] + commit.message.splitlines(True)
|
|
|
|
def __init__(self, old, new):
|
|
"""Create a CommitContext.
|
|
|
|
:param old: A git.objects.commit object or None.
|
|
:param new: A git.objects.commit object.
|
|
"""
|
|
self.rename_from = self.rename_to = None
|
|
if old is None:
|
|
self.new_file = True
|
|
else:
|
|
self.new_file = False
|
|
self.deleted_file = False
|
|
self.a_blob = CommitBlob()
|
|
self.b_blob = CommitBlob()
|
|
self.a_path = self.a_blob.path
|
|
self.b_path = self.b_blob.path
|
|
self.diff = ''.join(difflib.unified_diff(
|
|
self.decorateMessage(old), self.decorateMessage(new),
|
|
fromfile="/a/COMMIT_MSG", tofile="/b/COMMIT_MSG"))
|
|
|
|
|
|
class DiffChunk(object):
|
|
def __init__(self):
|
|
self.oldlines = []
|
|
self.newlines = []
|
|
self.first = False
|
|
self.last = False
|
|
self.lines = []
|
|
self.calcRange()
|
|
|
|
def __repr__(self):
|
|
return '<%s old lines %s-%s / new lines %s-%s>' % (
|
|
self.__class__.__name__,
|
|
self.range[OLD][START], self.range[OLD][END],
|
|
self.range[NEW][START], self.range[NEW][END])
|
|
|
|
def calcRange(self):
|
|
self.range = [[0, 0],
|
|
[0, 0]]
|
|
for l in self.lines:
|
|
if self.range[OLD][START] == 0 and l[OLD][LINENO] is not None:
|
|
self.range[OLD][START] = l[OLD][LINENO]
|
|
if self.range[NEW][START] == 0 and l[NEW][LINENO] is not None:
|
|
self.range[NEW][START] = l[NEW][LINENO]
|
|
if (self.range[OLD][START] != 0 and
|
|
self.range[NEW][START] != 0):
|
|
break
|
|
|
|
for l in self.lines[::-1]:
|
|
if self.range[OLD][END] == 0 and l[OLD][LINENO] is not None:
|
|
self.range[OLD][END] = l[OLD][LINENO]
|
|
if self.range[NEW][END] == 0 and l[NEW][LINENO] is not None:
|
|
self.range[NEW][END] = l[NEW][LINENO]
|
|
if (self.range[OLD][END] != 0 and
|
|
self.range[NEW][END] != 0):
|
|
break
|
|
|
|
def indexOfLine(self, oldnew, lineno):
|
|
for i, l in enumerate(self.lines):
|
|
if l[oldnew][LINENO] == lineno:
|
|
return i
|
|
|
|
class DiffContextChunk(DiffChunk):
|
|
context = True
|
|
|
|
class DiffChangedChunk(DiffChunk):
|
|
context = False
|
|
|
|
class DiffFile(object):
|
|
def __init__(self):
|
|
self.newname = 'Unknown File'
|
|
self.oldname = 'Unknown File'
|
|
self.chunks = []
|
|
self.current_chunk = None
|
|
self.old_lineno = 0
|
|
self.new_lineno = 0
|
|
self.offset = 0
|
|
|
|
def finalize(self):
|
|
if not self.current_chunk:
|
|
return
|
|
self.current_chunk.lines = zip(self.current_chunk.oldlines,
|
|
self.current_chunk.newlines)
|
|
if not self.chunks:
|
|
self.current_chunk.first = True
|
|
else:
|
|
self.chunks[-1].last = False
|
|
self.current_chunk.last = True
|
|
self.current_chunk.calcRange()
|
|
self.chunks.append(self.current_chunk)
|
|
self.current_chunk = None
|
|
|
|
def addDiffLines(self, old, new):
|
|
if (self.current_chunk and
|
|
not isinstance(self.current_chunk, DiffChangedChunk)):
|
|
self.finalize()
|
|
if not self.current_chunk:
|
|
self.current_chunk = DiffChangedChunk()
|
|
for l in old:
|
|
self.current_chunk.oldlines.append((self.old_lineno, '-', l))
|
|
self.old_lineno += 1
|
|
self.offset -= 1
|
|
for l in new:
|
|
self.current_chunk.newlines.append((self.new_lineno, '+', l))
|
|
self.new_lineno += 1
|
|
self.offset += 1
|
|
while self.offset > 0:
|
|
self.current_chunk.oldlines.append((None, '', ''))
|
|
self.offset -= 1
|
|
while self.offset < 0:
|
|
self.current_chunk.newlines.append((None, '', ''))
|
|
self.offset += 1
|
|
|
|
def addNewLine(self, line):
|
|
if (self.current_chunk and
|
|
not isinstance(self.current_chunk, DiffChangedChunk)):
|
|
self.finalize()
|
|
if not self.current_chunk:
|
|
self.current_chunk = DiffChangedChunk()
|
|
|
|
def addContextLine(self, line):
|
|
if (self.current_chunk and
|
|
not isinstance(self.current_chunk, DiffContextChunk)):
|
|
self.finalize()
|
|
if not self.current_chunk:
|
|
self.current_chunk = DiffContextChunk()
|
|
self.current_chunk.oldlines.append((self.old_lineno, ' ', line))
|
|
self.current_chunk.newlines.append((self.new_lineno, ' ', line))
|
|
self.old_lineno += 1
|
|
self.new_lineno += 1
|
|
|
|
class GitCheckoutError(Exception):
|
|
def __init__(self, msg):
|
|
super(GitCheckoutError, self).__init__(msg)
|
|
self.msg = msg
|
|
|
|
class GitCloneError(Exception):
|
|
def __init__(self, msg):
|
|
super(GitCloneError, self).__init__(msg)
|
|
self.msg = msg
|
|
|
|
class Repo(object):
|
|
def __init__(self, url, path):
|
|
self.log = logging.getLogger('gertty.gitrepo')
|
|
self.url = url
|
|
self.path = path
|
|
self.differ = difflib.Differ()
|
|
if not os.path.exists(path):
|
|
if url is None:
|
|
raise GitCloneError("No URL available for git clone")
|
|
git.Repo.clone_from(self.url, self.path)
|
|
|
|
def hasCommit(self, sha):
|
|
repo = git.Repo(self.path)
|
|
try:
|
|
repo.commit(sha)
|
|
except gitdb.exc.BadObject:
|
|
return False
|
|
return True
|
|
|
|
def fetch(self, url, refspec):
|
|
repo = git.Repo(self.path)
|
|
try:
|
|
repo.git.fetch(url, refspec)
|
|
except AssertionError:
|
|
repo.git.fetch(url, refspec)
|
|
|
|
def checkout(self, ref):
|
|
repo = git.Repo(self.path)
|
|
try:
|
|
repo.git.checkout(ref)
|
|
except git.exc.GitCommandError as e:
|
|
raise GitCheckoutError(e.stderr.replace('\t', ' '))
|
|
|
|
def cherryPick(self, ref):
|
|
repo = git.Repo(self.path)
|
|
try:
|
|
repo.git.cherry_pick(ref)
|
|
except git.exc.GitCommandError as e:
|
|
raise GitCheckoutError(e.stderr.replace('\t', ' '))
|
|
|
|
def diffstat(self, old, new):
|
|
repo = git.Repo(self.path)
|
|
diff = repo.git.diff('-M', '--color=never', '--numstat', old, new)
|
|
ret = []
|
|
for x in diff.split('\n'):
|
|
# Added, removed, filename
|
|
ret.append(x.split('\t'))
|
|
return ret
|
|
|
|
def intralineDiff(self, old, new):
|
|
# takes a list of old lines and a list of new lines
|
|
prevline = None
|
|
prevstyle = None
|
|
output_old = []
|
|
output_new = []
|
|
#self.log.debug('startold' + repr(old))
|
|
#self.log.debug('startnew' + repr(new))
|
|
for line in self.differ.compare(old, new):
|
|
#self.log.debug('diff output: ' + line)
|
|
key = line[0]
|
|
rest = line[2:]
|
|
if key == '?':
|
|
result = []
|
|
accumulator = ''
|
|
emphasis = False
|
|
rest = rest[:-1] # It has a newline.
|
|
for i, c in enumerate(prevline):
|
|
if i >= len(rest):
|
|
indicator = ' '
|
|
else:
|
|
indicator = rest[i]
|
|
#self.log.debug('%s %s %s %s %s' % (i, c, indicator, emphasis, accumulator))
|
|
if indicator != ' ' and not emphasis:
|
|
# changing from not emph to emph
|
|
if accumulator:
|
|
result.append((prevstyle+'-line', accumulator))
|
|
accumulator = ''
|
|
emphasis = True
|
|
elif indicator == ' ' and emphasis:
|
|
# changing from emph to not emph
|
|
if accumulator:
|
|
result.append((prevstyle+'-word', accumulator))
|
|
accumulator = ''
|
|
emphasis = False
|
|
accumulator += c
|
|
if accumulator:
|
|
if emphasis:
|
|
result.append((prevstyle+'-word', accumulator))
|
|
else:
|
|
result.append((prevstyle+'-line', accumulator))
|
|
if prevstyle == 'added':
|
|
output_new.append(result)
|
|
elif prevstyle == 'removed':
|
|
output_old.append(result)
|
|
prevline = None
|
|
continue
|
|
if prevline is not None:
|
|
if prevstyle == 'added' or prevstyle == 'context':
|
|
output_new.append((prevstyle+'-line', prevline))
|
|
if prevstyle == 'removed' or prevstyle == 'context':
|
|
output_old.append((prevstyle+'-line', prevline))
|
|
if key == '+':
|
|
prevstyle = 'added'
|
|
elif key == '-':
|
|
prevstyle = 'removed'
|
|
elif key == ' ':
|
|
prevstyle = 'context'
|
|
prevline = rest
|
|
#self.log.debug('prev'+repr(prevline))
|
|
if prevline is not None:
|
|
if prevstyle == 'added':
|
|
output_new.append((prevstyle+'-line', prevline))
|
|
elif prevstyle == 'removed':
|
|
output_old.append((prevstyle+'-line', prevline))
|
|
#self.log.debug(repr(output_old))
|
|
#self.log.debug(repr(output_new))
|
|
return output_old, output_new
|
|
|
|
header_re = re.compile('@@ -(\d+)(,\d+)? \+(\d+)(,\d+)? @@')
|
|
def diff(self, old, new, context=10000, show_old_commit=False):
|
|
"""Create a diff from old to new.
|
|
|
|
Note that the commit message is also diffed, and listed as /COMMIT_MSG.
|
|
"""
|
|
repo = git.Repo(self.path)
|
|
#'-y', '-x', 'diff -C10', old, new, path).split('\n'):
|
|
oldc = repo.commit(old)
|
|
newc = repo.commit(new)
|
|
files = []
|
|
extra_contexts = []
|
|
if show_old_commit:
|
|
extra_contexts.append(CommitContext(oldc, newc))
|
|
else:
|
|
extra_contexts.append(CommitContext(None, newc))
|
|
contexts = itertools.chain(
|
|
extra_contexts, oldc.diff(
|
|
newc, color='never',create_patch=True, U=context))
|
|
for diff_context in contexts:
|
|
# Each iteration of this is a file
|
|
f = DiffFile()
|
|
if diff_context.a_blob:
|
|
f.oldname = diff_context.a_blob.path
|
|
if diff_context.b_blob:
|
|
f.newname = diff_context.b_blob.path
|
|
# TODO(jeblair): if/when https://github.com/gitpython-developers/GitPython/pull/266 merges,
|
|
# remove above 4 lines and replace with these two:
|
|
# f.oldname = diff_context.a_path
|
|
# f.newname = diff_context.b_path
|
|
if diff_context.new_file:
|
|
f.oldname = 'Empty file'
|
|
if diff_context.deleted_file:
|
|
f.newname = 'Empty file'
|
|
files.append(f)
|
|
if diff_context.rename_from:
|
|
f.oldname = diff_context.rename_from
|
|
if diff_context.rename_to:
|
|
f.newname = diff_context.rename_to
|
|
oldchunk = []
|
|
newchunk = []
|
|
prev_key = ''
|
|
diff_lines = diff_context.diff.split('\n')
|
|
for i, line in enumerate(diff_lines):
|
|
last_line = (i == len(diff_lines)-1)
|
|
if line.startswith('---'):
|
|
continue
|
|
if line.startswith('+++'):
|
|
continue
|
|
if line.startswith('@@'):
|
|
#socket.sendall(line)
|
|
m = self.header_re.match(line)
|
|
#socket.sendall(str(m.groups()))
|
|
f.old_lineno = int(m.group(1))
|
|
f.new_lineno = int(m.group(3))
|
|
continue
|
|
if not line:
|
|
if prev_key != '\\':
|
|
# Strangely, we get an extra newline in the
|
|
# diff in the case that the last line is "\ No
|
|
# newline at end of file". This is a
|
|
# workaround for that.
|
|
prev_key = ''
|
|
line = 'X '
|
|
else:
|
|
line = ' '
|
|
key = line[0]
|
|
rest = line[1:]
|
|
if key == '\\':
|
|
# This is for "\ No newline at end of file" which
|
|
# follows either a -, + or ' ' line to indicate
|
|
# which file it's talking about (or both). For
|
|
# now, treat it like normal text and let the user
|
|
# infer from context that it's not actually in the
|
|
# file. Potential TODO: highlight it to make that
|
|
# more clear.
|
|
if prev_key:
|
|
key = prev_key
|
|
else:
|
|
key = ' '
|
|
prev_key = '\\'
|
|
if key == '-':
|
|
prev_key = '-'
|
|
oldchunk.append(rest)
|
|
if not last_line:
|
|
continue
|
|
if key == '+':
|
|
prev_key = '+'
|
|
newchunk.append(rest)
|
|
if not last_line:
|
|
continue
|
|
prev_key = ''
|
|
# end of chunk
|
|
if oldchunk or newchunk:
|
|
oldchunk, newchunk = self.intralineDiff(oldchunk, newchunk)
|
|
f.addDiffLines(oldchunk, newchunk)
|
|
oldchunk = []
|
|
newchunk = []
|
|
if key == ' ':
|
|
f.addContextLine(rest)
|
|
continue
|
|
if line.startswith("similarity index"):
|
|
continue
|
|
if line.startswith("rename"):
|
|
continue
|
|
if line.startswith("index"):
|
|
continue
|
|
if line.startswith("Binary files"):
|
|
continue
|
|
if not last_line:
|
|
raise Exception("Unhandled line: %s" % line)
|
|
f.finalize()
|
|
return files
|
|
|
|
def getFile(self, old, new, path):
|
|
f = DiffFile()
|
|
f.oldname = path
|
|
f.newname = path
|
|
f.old_lineno = 1
|
|
f.new_lineno = 1
|
|
repo = git.Repo(self.path)
|
|
newc = repo.commit(new)
|
|
blob = newc.tree[path]
|
|
for line in blob.data_stream.read().splitlines():
|
|
f.addContextLine(line)
|
|
f.finalize()
|
|
return f
|