boartty/gertty/gitrepo.py
James E. Blair 6265622074 Sync change when missing refs
The logic to decide from where to fetch refs is getting complicated
and depends on the full repo url and ref.  This information can be
different for each revision of a change, but is only needed in the
rare case that a local repo was damaged or removed.  Rather than
storing all of this information in the database for what should be
a rare event, simply sync any changes that have missing refs and
use the normal code path for deciding where to fetch refs.

Change-Id: I4308db51b4847163b255686a62d5bad4e6226b4d
2015-04-06 07:47:23 -07:00

487 lines
17 KiB
Python

# Copyright 2014 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import logging
import difflib
import itertools
import os
import re
import git
import gitdb
OLD = 0
NEW = 1
START = 0
END = 1
LINENO = 0
LINE = 1
class GitTimeZone(datetime.tzinfo):
"""Because we can't have nice things."""
def __init__(self, offset_seconds):
self._offset = offset_seconds
def utcoffset(self, dt):
return datetime.timedelta(seconds=self._offset)
def dst(self, dt):
return datetime.timedelta(0)
def tzname(self, dt):
return None
class CommitBlob(object):
def __init__(self):
self.path = '/COMMIT_MSG'
class CommitContext(object):
"""A git.diff.Diff for commit messages."""
def decorateGitTime(self, seconds, tz):
dt = datetime.datetime.fromtimestamp(seconds, GitTimeZone(-tz))
return dt.strftime('%Y-%m-%d %H:%M:%S %Z%z')
def decorateMessage(self, commit):
"""Put the Gerrit commit metadata at the front of the message.
e.g.:
Parent: cc8a51ca (Initial commit) 1
Author: Robert Collins <rbtcollins@hp.com> 2
AuthorDate: 2014-05-27 14:05:47 +1200 3
Commit: Robert Collins <rbtcollins@hp.com> 4
CommitDate: 2014-05-27 14:07:57 +1200 5
6
"""
# NB: If folk report that commits have comments at the wrong place
# Then this function, which reproduces gerrit behaviour, will need
# to be fixed (e.g. by making the behaviour match more closely.
if not commit:
return []
if commit.parents:
parentsha = commit.parents[0].hexsha[:8]
else:
parentsha = None
author = commit.author
committer = commit.committer
author_date = self.decorateGitTime(
commit.authored_date, commit.author_tz_offset)
commit_date = self.decorateGitTime(
commit.committed_date, commit.committer_tz_offset)
if type(author.email) is unicode:
author_email = author.email
else:
author_email = unicode(author.email, 'utf8')
if type(committer.email) is unicode:
committer_email = committer.email
else:
committer_email = unicode(committer.email, 'utf8')
return [u"Parent: %s\n" % parentsha,
u"Author: %s <%s>\n" % (author.name, author_email),
u"AuthorDate: %s\n" % author_date,
u"Commit: %s <%s>\n" % (committer.name, committer_email),
u"CommitDate: %s\n" % commit_date,
u"\n"] + commit.message.splitlines(True)
def __init__(self, old, new):
"""Create a CommitContext.
:param old: A git.objects.commit object or None.
:param new: A git.objects.commit object.
"""
self.rename_from = self.rename_to = None
if old is None:
self.new_file = True
else:
self.new_file = False
self.deleted_file = False
self.a_blob = CommitBlob()
self.b_blob = CommitBlob()
self.a_path = self.a_blob.path
self.b_path = self.b_blob.path
self.diff = ''.join(difflib.unified_diff(
self.decorateMessage(old), self.decorateMessage(new),
fromfile="/a/COMMIT_MSG", tofile="/b/COMMIT_MSG"))
class DiffChunk(object):
def __init__(self):
self.oldlines = []
self.newlines = []
self.first = False
self.last = False
self.lines = []
self.calcRange()
def __repr__(self):
return '<%s old lines %s-%s / new lines %s-%s>' % (
self.__class__.__name__,
self.range[OLD][START], self.range[OLD][END],
self.range[NEW][START], self.range[NEW][END])
def calcRange(self):
self.range = [[0, 0],
[0, 0]]
for l in self.lines:
if self.range[OLD][START] == 0 and l[OLD][LINENO] is not None:
self.range[OLD][START] = l[OLD][LINENO]
if self.range[NEW][START] == 0 and l[NEW][LINENO] is not None:
self.range[NEW][START] = l[NEW][LINENO]
if (self.range[OLD][START] != 0 and
self.range[NEW][START] != 0):
break
for l in self.lines[::-1]:
if self.range[OLD][END] == 0 and l[OLD][LINENO] is not None:
self.range[OLD][END] = l[OLD][LINENO]
if self.range[NEW][END] == 0 and l[NEW][LINENO] is not None:
self.range[NEW][END] = l[NEW][LINENO]
if (self.range[OLD][END] != 0 and
self.range[NEW][END] != 0):
break
def indexOfLine(self, oldnew, lineno):
for i, l in enumerate(self.lines):
if l[oldnew][LINENO] == lineno:
return i
class DiffContextChunk(DiffChunk):
context = True
class DiffChangedChunk(DiffChunk):
context = False
class DiffFile(object):
def __init__(self):
self.newname = 'Unknown File'
self.oldname = 'Unknown File'
self.chunks = []
self.current_chunk = None
self.old_lineno = 0
self.new_lineno = 0
self.offset = 0
def finalize(self):
if not self.current_chunk:
return
self.current_chunk.lines = zip(self.current_chunk.oldlines,
self.current_chunk.newlines)
if not self.chunks:
self.current_chunk.first = True
else:
self.chunks[-1].last = False
self.current_chunk.last = True
self.current_chunk.calcRange()
self.chunks.append(self.current_chunk)
self.current_chunk = None
def addDiffLines(self, old, new):
if (self.current_chunk and
not isinstance(self.current_chunk, DiffChangedChunk)):
self.finalize()
if not self.current_chunk:
self.current_chunk = DiffChangedChunk()
for l in old:
self.current_chunk.oldlines.append((self.old_lineno, '-', l))
self.old_lineno += 1
self.offset -= 1
for l in new:
self.current_chunk.newlines.append((self.new_lineno, '+', l))
self.new_lineno += 1
self.offset += 1
while self.offset > 0:
self.current_chunk.oldlines.append((None, '', ''))
self.offset -= 1
while self.offset < 0:
self.current_chunk.newlines.append((None, '', ''))
self.offset += 1
def addNewLine(self, line):
if (self.current_chunk and
not isinstance(self.current_chunk, DiffChangedChunk)):
self.finalize()
if not self.current_chunk:
self.current_chunk = DiffChangedChunk()
def addContextLine(self, line):
if (self.current_chunk and
not isinstance(self.current_chunk, DiffContextChunk)):
self.finalize()
if not self.current_chunk:
self.current_chunk = DiffContextChunk()
self.current_chunk.oldlines.append((self.old_lineno, ' ', line))
self.current_chunk.newlines.append((self.new_lineno, ' ', line))
self.old_lineno += 1
self.new_lineno += 1
class GitCheckoutError(Exception):
def __init__(self, msg):
super(GitCheckoutError, self).__init__(msg)
self.msg = msg
class GitCloneError(Exception):
def __init__(self, msg):
super(GitCloneError, self).__init__(msg)
self.msg = msg
class Repo(object):
def __init__(self, url, path):
self.log = logging.getLogger('gertty.gitrepo')
self.url = url
self.path = path
self.differ = difflib.Differ()
if not os.path.exists(path):
if url is None:
raise GitCloneError("No URL available for git clone")
git.Repo.clone_from(self.url, self.path)
def hasCommit(self, sha):
repo = git.Repo(self.path)
try:
repo.commit(sha)
except gitdb.exc.BadObject:
return False
return True
def fetch(self, url, refspec):
repo = git.Repo(self.path)
try:
repo.git.fetch(url, refspec)
except AssertionError:
repo.git.fetch(url, refspec)
def checkout(self, ref):
repo = git.Repo(self.path)
try:
repo.git.checkout(ref)
except git.exc.GitCommandError as e:
raise GitCheckoutError(e.stderr.replace('\t', ' '))
def cherryPick(self, ref):
repo = git.Repo(self.path)
try:
repo.git.cherry_pick(ref)
except git.exc.GitCommandError as e:
raise GitCheckoutError(e.stderr.replace('\t', ' '))
def diffstat(self, old, new):
repo = git.Repo(self.path)
diff = repo.git.diff('-M', '--color=never', '--numstat', old, new)
ret = []
for x in diff.split('\n'):
# Added, removed, filename
ret.append(x.split('\t'))
return ret
def intralineDiff(self, old, new):
# takes a list of old lines and a list of new lines
prevline = None
prevstyle = None
output_old = []
output_new = []
#self.log.debug('startold' + repr(old))
#self.log.debug('startnew' + repr(new))
for line in self.differ.compare(old, new):
#self.log.debug('diff output: ' + line)
key = line[0]
rest = line[2:]
if key == '?':
result = []
accumulator = ''
emphasis = False
rest = rest[:-1] # It has a newline.
for i, c in enumerate(prevline):
if i >= len(rest):
indicator = ' '
else:
indicator = rest[i]
#self.log.debug('%s %s %s %s %s' % (i, c, indicator, emphasis, accumulator))
if indicator != ' ' and not emphasis:
# changing from not emph to emph
if accumulator:
result.append((prevstyle+'-line', accumulator))
accumulator = ''
emphasis = True
elif indicator == ' ' and emphasis:
# changing from emph to not emph
if accumulator:
result.append((prevstyle+'-word', accumulator))
accumulator = ''
emphasis = False
accumulator += c
if accumulator:
if emphasis:
result.append((prevstyle+'-word', accumulator))
else:
result.append((prevstyle+'-line', accumulator))
if prevstyle == 'added':
output_new.append(result)
elif prevstyle == 'removed':
output_old.append(result)
prevline = None
continue
if prevline is not None:
if prevstyle == 'added' or prevstyle == 'context':
output_new.append((prevstyle+'-line', prevline))
if prevstyle == 'removed' or prevstyle == 'context':
output_old.append((prevstyle+'-line', prevline))
if key == '+':
prevstyle = 'added'
elif key == '-':
prevstyle = 'removed'
elif key == ' ':
prevstyle = 'context'
prevline = rest
#self.log.debug('prev'+repr(prevline))
if prevline is not None:
if prevstyle == 'added':
output_new.append((prevstyle+'-line', prevline))
elif prevstyle == 'removed':
output_old.append((prevstyle+'-line', prevline))
#self.log.debug(repr(output_old))
#self.log.debug(repr(output_new))
return output_old, output_new
header_re = re.compile('@@ -(\d+)(,\d+)? \+(\d+)(,\d+)? @@')
def diff(self, old, new, context=10000, show_old_commit=False):
"""Create a diff from old to new.
Note that the commit message is also diffed, and listed as /COMMIT_MSG.
"""
repo = git.Repo(self.path)
#'-y', '-x', 'diff -C10', old, new, path).split('\n'):
oldc = repo.commit(old)
newc = repo.commit(new)
files = []
extra_contexts = []
if show_old_commit:
extra_contexts.append(CommitContext(oldc, newc))
else:
extra_contexts.append(CommitContext(None, newc))
contexts = itertools.chain(
extra_contexts, oldc.diff(
newc, color='never',create_patch=True, U=context))
for diff_context in contexts:
# Each iteration of this is a file
f = DiffFile()
if diff_context.a_blob:
f.oldname = diff_context.a_blob.path
if diff_context.b_blob:
f.newname = diff_context.b_blob.path
# TODO(jeblair): if/when https://github.com/gitpython-developers/GitPython/pull/266 merges,
# remove above 4 lines and replace with these two:
# f.oldname = diff_context.a_path
# f.newname = diff_context.b_path
if diff_context.new_file:
f.oldname = 'Empty file'
if diff_context.deleted_file:
f.newname = 'Empty file'
files.append(f)
if diff_context.rename_from:
f.oldname = diff_context.rename_from
if diff_context.rename_to:
f.newname = diff_context.rename_to
oldchunk = []
newchunk = []
prev_key = ''
diff_lines = diff_context.diff.split('\n')
for i, line in enumerate(diff_lines):
last_line = (i == len(diff_lines)-1)
if line.startswith('---'):
continue
if line.startswith('+++'):
continue
if line.startswith('@@'):
#socket.sendall(line)
m = self.header_re.match(line)
#socket.sendall(str(m.groups()))
f.old_lineno = int(m.group(1))
f.new_lineno = int(m.group(3))
continue
if not line:
if prev_key != '\\':
# Strangely, we get an extra newline in the
# diff in the case that the last line is "\ No
# newline at end of file". This is a
# workaround for that.
prev_key = ''
line = 'X '
else:
line = ' '
key = line[0]
rest = line[1:]
if key == '\\':
# This is for "\ No newline at end of file" which
# follows either a -, + or ' ' line to indicate
# which file it's talking about (or both). For
# now, treat it like normal text and let the user
# infer from context that it's not actually in the
# file. Potential TODO: highlight it to make that
# more clear.
if prev_key:
key = prev_key
else:
key = ' '
prev_key = '\\'
if key == '-':
prev_key = '-'
oldchunk.append(rest)
if not last_line:
continue
if key == '+':
prev_key = '+'
newchunk.append(rest)
if not last_line:
continue
prev_key = ''
# end of chunk
if oldchunk or newchunk:
oldchunk, newchunk = self.intralineDiff(oldchunk, newchunk)
f.addDiffLines(oldchunk, newchunk)
oldchunk = []
newchunk = []
if key == ' ':
f.addContextLine(rest)
continue
if line.startswith("similarity index"):
continue
if line.startswith("rename"):
continue
if line.startswith("index"):
continue
if line.startswith("Binary files"):
continue
if not last_line:
raise Exception("Unhandled line: %s" % line)
f.finalize()
return files
def getFile(self, old, new, path):
f = DiffFile()
f.oldname = path
f.newname = path
f.old_lineno = 1
f.new_lineno = 1
repo = git.Repo(self.path)
newc = repo.commit(new)
blob = newc.tree[path]
for line in blob.data_stream.read().splitlines():
f.addContextLine(line)
f.finalize()
return f