# -*- coding: utf-8 -*- # Copyright 2014 OpenStack Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. # Test changes: # https://review.opendev.org/275862 # https://review.opendev.org/119302 # https://review.opendev.org/133550 import datetime import logging import difflib import itertools import os import re import git import gitdb import six OLD = 0 NEW = 1 START = 0 END = 1 LINENO = 0 LINE = 1 class GitTimeZone(datetime.tzinfo): """Because we can't have nice things.""" def __init__(self, offset_seconds): self._offset = offset_seconds def utcoffset(self, dt): return datetime.timedelta(seconds=self._offset) def dst(self, dt): return datetime.timedelta(0) def tzname(self, dt): return None class CommitBlob(object): def __init__(self): self.path = '/COMMIT_MSG' class CommitContext(object): """A git.diff.Diff for commit messages.""" def decorateGitTime(self, seconds, tz): dt = datetime.datetime.fromtimestamp(seconds, GitTimeZone(-tz)) return dt.strftime('%Y-%m-%d %H:%M:%S %Z%z') def decorateMessage(self, commit): """Put the Gerrit commit metadata at the front of the message. e.g.: Parent: cc8a51ca (Initial commit) 1 Author: Robert Collins 2 AuthorDate: 2014-05-27 14:05:47 +1200 3 Commit: Robert Collins 4 CommitDate: 2014-05-27 14:07:57 +1200 5 6 """ # NB: If folk report that commits have comments at the wrong place # Then this function, which reproduces gerrit behaviour, will need # to be fixed (e.g. by making the behaviour match more closely. if not commit: return [] if commit.parents: parentsha = commit.parents[0].hexsha[:8] else: parentsha = None author = commit.author committer = commit.committer author_date = self.decorateGitTime( commit.authored_date, commit.author_tz_offset) commit_date = self.decorateGitTime( commit.committed_date, commit.committer_tz_offset) if isinstance(author.email, six.text_type): author_email = author.email else: author_email = author.email.decode('utf8') if isinstance(committer.email, six.text_type): committer_email = committer.email else: committer_email = committer.email.decode('utf8') return [u"Parent: %s\n" % parentsha, u"Author: %s <%s>\n" % (author.name, author_email), u"AuthorDate: %s\n" % author_date, u"Commit: %s <%s>\n" % (committer.name, committer_email), u"CommitDate: %s\n" % commit_date, u"\n"] + commit.message.splitlines(True) def __init__(self, old, new): """Create a CommitContext. :param old: A git.objects.commit object or None. :param new: A git.objects.commit object. """ self.rename_from = self.rename_to = None if old is None: self.new_file = True else: self.new_file = False self.deleted_file = False self.a_blob = CommitBlob() self.b_blob = CommitBlob() self.a_path = self.a_blob.path self.b_path = self.b_blob.path self.diff = ''.join(difflib.unified_diff( self.decorateMessage(old), self.decorateMessage(new), fromfile="/a/COMMIT_MSG", tofile="/b/COMMIT_MSG")) class DiffChunk(object): def __init__(self): self.oldlines = [] self.newlines = [] self.first = False self.last = False self.lines = [] self.calcRange() def __repr__(self): return '<%s old lines %s-%s / new lines %s-%s>' % ( self.__class__.__name__, self.range[OLD][START], self.range[OLD][END], self.range[NEW][START], self.range[NEW][END]) def calcRange(self): self.range = [[0, 0], [0, 0]] for l in self.lines: if self.range[OLD][START] == 0 and l[OLD][LINENO] is not None: self.range[OLD][START] = l[OLD][LINENO] if self.range[NEW][START] == 0 and l[NEW][LINENO] is not None: self.range[NEW][START] = l[NEW][LINENO] if (self.range[OLD][START] != 0 and self.range[NEW][START] != 0): break for l in self.lines[::-1]: if self.range[OLD][END] == 0 and l[OLD][LINENO] is not None: self.range[OLD][END] = l[OLD][LINENO] if self.range[NEW][END] == 0 and l[NEW][LINENO] is not None: self.range[NEW][END] = l[NEW][LINENO] if (self.range[OLD][END] != 0 and self.range[NEW][END] != 0): break def indexOfLine(self, oldnew, lineno): for i, l in enumerate(self.lines): if l[oldnew][LINENO] == lineno: return i class DiffContextChunk(DiffChunk): context = True class DiffChangedChunk(DiffChunk): context = False class DiffFile(object): log = logging.getLogger('gertty.gitrepo') def __init__(self): self.newname = 'Unknown File' self.oldname = 'Unknown File' self.old_empty = False self.new_empty = False self.chunks = [] self.current_chunk = None self.old_lineno = 0 self.new_lineno = 0 self.offset = 0 def finalize(self): if not self.current_chunk: return oldlines = [(n, d, self.expand_tabs(l)) for (n, d, l) in self.current_chunk.oldlines] newlines = [(n, d, self.expand_tabs(l)) for (n, d, l) in self.current_chunk.newlines] self.current_chunk.lines = list( six.moves.zip(oldlines, newlines)) if not self.chunks: self.current_chunk.first = True else: self.chunks[-1].last = False self.current_chunk.last = True self.current_chunk.calcRange() self.chunks.append(self.current_chunk) self.current_chunk = None def expand_tabs(self, l, tabstop = 8): offset = { 'start': 0, 'prevstart': 0 } def replace(match): offset['start'] += match.start(0) - offset['prevstart'] offset['prevstart'] = match.start(0) cnt = tabstop - offset['start'] % tabstop - 1 offset['start'] += cnt return "ยป" + " " * cnt try: if isinstance(l, six.string_types): return re.sub(r'\t', replace, l) elif isinstance(l, list): return [self.expand_tabs(e) for e in l] else: (a, b) = l return (a, re.sub(r'\t', replace, b)) except Exception: self.log.exception("Error expanding tabs") return l def addDiffLines(self, old, new): if (self.current_chunk and not isinstance(self.current_chunk, DiffChangedChunk)): self.finalize() if not self.current_chunk: self.current_chunk = DiffChangedChunk() for l in old: self.current_chunk.oldlines.append((self.old_lineno, '-', l)) self.old_lineno += 1 self.offset -= 1 for l in new: self.current_chunk.newlines.append((self.new_lineno, '+', l)) self.new_lineno += 1 self.offset += 1 while self.offset > 0: self.current_chunk.oldlines.append((None, '', '')) self.offset -= 1 while self.offset < 0: self.current_chunk.newlines.append((None, '', '')) self.offset += 1 def addNewLine(self, line): if (self.current_chunk and not isinstance(self.current_chunk, DiffChangedChunk)): self.finalize() if not self.current_chunk: self.current_chunk = DiffChangedChunk() def addContextLine(self, line): if (self.current_chunk and not isinstance(self.current_chunk, DiffContextChunk)): self.finalize() if not self.current_chunk: self.current_chunk = DiffContextChunk() self.current_chunk.oldlines.append((self.old_lineno, ' ', line)) self.current_chunk.newlines.append((self.new_lineno, ' ', line)) self.old_lineno += 1 self.new_lineno += 1 class GitCheckoutError(Exception): def __init__(self, msg): super(GitCheckoutError, self).__init__(msg) self.msg = msg class GitCloneError(Exception): def __init__(self, msg): super(GitCloneError, self).__init__(msg) self.msg = msg class Repo(object): def __init__(self, url, path): self.log = logging.getLogger('gertty.gitrepo') self.url = url self.path = path self.differ = difflib.Differ() if not os.path.exists(path): if url is None: raise GitCloneError("No URL available for git clone") git.Repo.clone_from(self.url, self.path) def checkCommits(self, shas): invalid = set() repo = git.Repo(self.path) for sha in shas: try: repo.commit(sha) except gitdb.exc.BadObject: invalid.add(sha) except ValueError: invalid.add(sha) return invalid def fetch(self, url, refspec): repo = git.Repo(self.path) try: repo.git.fetch(url, refspec) except AssertionError: repo.git.fetch(url, refspec) def deleteRef(self, ref): repo = git.Repo(self.path) git.Reference.delete(repo, ref) def checkout(self, ref): repo = git.Repo(self.path) try: repo.git.checkout(ref) except git.exc.GitCommandError as e: raise GitCheckoutError(e.stderr.replace('\t', ' ')) def cherryPick(self, ref): repo = git.Repo(self.path) try: repo.git.cherry_pick(ref) except git.exc.GitCommandError as e: raise GitCheckoutError(e.stderr.replace('\t', ' ')) def diffstat(self, old, new): repo = git.Repo(self.path) diff = repo.git.diff('-M', '--color=never', '--numstat', old, new) ret = [] for x in diff.split('\n'): # Added, removed, filename ret.append(x.split('\t')) return ret trailing_ws_re = re.compile('\s+$') def _emph_trail_ws(self, style, line): result = (style, line) re_result = self.trailing_ws_re.search(line) if (re_result): span = re_result.span() if len(line[:span[0]]) == 0: ws_line = ('trailing-ws', line) else: ws_line = [(style, line[:span[0]]), ('trailing-ws', line[span[0]:span[1]])] result = ws_line return result def intralineDiff(self, old, new): # takes a list of old lines and a list of new lines prevline = None prevstyle = None output_old = [] output_new = [] #self.log.debug('startold' + repr(old)) #self.log.debug('startnew' + repr(new)) for line in self.differ.compare(old, new): #self.log.debug('diff output: ' + line) key = line[0] rest = line[2:] if key == '?': result = [] accumulator = '' emphasis = False rest = rest[:-1] # It has a newline. for i, c in enumerate(prevline): if i >= len(rest): indicator = ' ' else: indicator = rest[i] #self.log.debug('%s %s %s %s %s' % (i, c, indicator, emphasis, accumulator)) if indicator != ' ' and not emphasis: # changing from not emph to emph if accumulator: result.append((prevstyle+'-line', accumulator)) accumulator = '' emphasis = True elif indicator == ' ' and emphasis: # changing from emph to not emph if accumulator: result.append((prevstyle+'-word', accumulator)) accumulator = '' emphasis = False accumulator += c if accumulator: if emphasis: result.append(self._emph_trail_ws(prevstyle+'-word', accumulator)) else: result.append(self._emph_trail_ws(prevstyle+'-line', accumulator)) if prevstyle == 'added': output_new.append(result) elif prevstyle == 'removed': output_old.append(result) prevline = None continue if prevline is not None: if prevstyle == 'added' or prevstyle == 'context': output_new.append(self._emph_trail_ws(prevstyle+'-line', prevline)) if prevstyle == 'removed' or prevstyle == 'context': output_old.append((prevstyle+'-line', prevline)) if key == '+': prevstyle = 'added' elif key == '-': prevstyle = 'removed' elif key == ' ': prevstyle = 'context' prevline = rest #self.log.debug('prev'+repr(prevline)) if prevline is not None: if prevstyle == 'added': output_new.append(self._emph_trail_ws(prevstyle+'-line', prevline)) elif prevstyle == 'removed': output_old.append((prevstyle+'-line', prevline)) #self.log.debug(repr(output_old)) #self.log.debug(repr(output_new)) return output_old, output_new header_re = re.compile('@@ -(\d+)(,\d+)? \+(\d+)(,\d+)? @@') def diff(self, old, new, context=10000, show_old_commit=False): """Create a diff from old to new. Note that the commit message is also diffed, and listed as /COMMIT_MSG. """ repo = git.Repo(self.path) #'-y', '-x', 'diff -C10', old, new, path).split('\n'): oldc = repo.commit(old) newc = repo.commit(new) files = [] extra_contexts = [] if show_old_commit: extra_contexts.append(CommitContext(oldc, newc)) else: extra_contexts.append(CommitContext(None, newc)) contexts = itertools.chain( extra_contexts, oldc.diff( newc, color='never', create_patch=True, unified=context)) for diff_context in contexts: # Each iteration of this is a file f = DiffFile() f.oldname = diff_context.a_path f.newname = diff_context.b_path if diff_context.new_file: f.oldname = 'Empty file' f.old_empty = True if diff_context.deleted_file: f.newname = 'Empty file' f.new_empty = True files.append(f) if diff_context.rename_from: f.oldname = diff_context.rename_from if diff_context.rename_to: f.newname = diff_context.rename_to oldchunk = [] newchunk = [] prev_key = '' if isinstance(diff_context.diff, six.string_types): diff_text = diff_context.diff else: diff_text = diff_context.diff.decode('utf-8') diff_lines = diff_text.split('\n') for i, line in enumerate(diff_lines): last_line = (i == len(diff_lines)-1) if line.startswith('---'): continue if line.startswith('+++'): continue if line.startswith('@@'): #socket.sendall(line) m = self.header_re.match(line) #socket.sendall(str(m.groups())) f.old_lineno = int(m.group(1)) f.new_lineno = int(m.group(3)) continue if not line: if prev_key != '\\': # Strangely, we get an extra newline in the # diff in the case that the last line is "\ No # newline at end of file". This is a # workaround for that. prev_key = '' line = 'X ' else: line = ' ' key = line[0] rest = line[1:] if key == '\\': # This is for "\ No newline at end of file" which # follows either a -, + or ' ' line to indicate # which file it's talking about (or both). For # now, treat it like normal text and let the user # infer from context that it's not actually in the # file. Potential TODO: highlight it to make that # more clear. if prev_key: key = prev_key else: key = ' ' prev_key = '\\' if key == '-': prev_key = '-' oldchunk.append(rest) if not last_line: continue if key == '+': prev_key = '+' newchunk.append(rest) if not last_line: continue prev_key = '' # end of chunk if oldchunk or newchunk: oldchunk, newchunk = self.intralineDiff(oldchunk, newchunk) f.addDiffLines(oldchunk, newchunk) oldchunk = [] newchunk = [] if key == ' ': f.addContextLine(rest) continue if line.startswith("similarity index"): continue if line.startswith("rename"): continue if line.startswith("index"): continue if line.startswith("Binary files"): continue if not last_line: raise Exception("Unhandled line: %s" % line) if not diff_context.diff: # There is no diff, possibly because this is simply a # rename. Include context lines so that comments may # appear. if not f.new_empty: blob = newc.tree[f.newname] else: blob = oldc.tree[f.oldname] f.old_lineno = 1 f.new_lineno = 1 for line in blob.data_stream.read().splitlines(): if isinstance(line, six.string_types): f.addContextLine(line) else: f.addContextLine(line.decode('utf8')) f.finalize() return files def getFile(self, old, new, path): f = DiffFile() f.oldname = path f.newname = path f.old_lineno = 1 f.new_lineno = 1 repo = git.Repo(self.path) newc = repo.commit(new) try: blob = newc.tree[path] except KeyError: return None for line in blob.data_stream.read().splitlines(): if isinstance(line, six.string_types): f.addContextLine(line) else: f.addContextLine(line.decode('utf8')) f.finalize() return f def get_repo(project_name, config): local_path = os.path.join(config.git_root, project_name) local_root = os.path.abspath(config.git_root) assert os.path.commonprefix((local_root, local_path)) == local_root return Repo(config.git_url + project_name, local_path)