Tweak the intraline difference heuristics
We had a few cases where the intraline difference was showing us something confusing, like replacing characters in a common word with other characters from a different word. A bit of shifting the offsets after collapsing neighboring edits resolves this nicely. Change-Id: Ic947912277d96bf78afbc9831fb16e4b64e31fb1 Signed-off-by: Shawn O. Pearce <sop@google.com>
This commit is contained in:
@@ -20,7 +20,11 @@ class CharText implements Sequence {
|
||||
private final String content;
|
||||
|
||||
CharText(Text text, int s, int e) {
|
||||
content = text.getLines(s, e);
|
||||
content = text.getLines(s, e, false /* keep LF */);
|
||||
}
|
||||
|
||||
char charAt(int idx) {
|
||||
return content.charAt(idx);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@@ -229,23 +229,137 @@ public class PatchListCacheImpl implements PatchListCache {
|
||||
CharText b = new CharText(bContent, e.getBeginB(), e.getEndB());
|
||||
|
||||
List<Edit> wordEdits = new MyersDiff(a, b).getEdits();
|
||||
|
||||
// Combine edits that are really close together. If they are
|
||||
// just a few characters apart we tend to get better results
|
||||
// by joining them together and taking the whole span.
|
||||
//
|
||||
for (int j = 0; j < wordEdits.size() - 1;) {
|
||||
Edit c = wordEdits.get(j);
|
||||
Edit n = wordEdits.get(j + 1);
|
||||
|
||||
if (n.getBeginA() - c.getEndA() <= 2
|
||||
|| n.getBeginB() - c.getEndB() <= 2) {
|
||||
// This edit is incredibly close to the start of the next.
|
||||
// Combine them together.
|
||||
//
|
||||
wordEdits.set(j, new Edit(c.getBeginA(), n.getEndA(),
|
||||
c.getBeginB(), n.getEndB()));
|
||||
if (n.getBeginA() - c.getEndA() <= 5
|
||||
|| n.getBeginB() - c.getEndB() <= 5) {
|
||||
int ab = c.getBeginA();
|
||||
int ae = n.getEndA();
|
||||
|
||||
int bb = c.getBeginB();
|
||||
int be = n.getEndB();
|
||||
|
||||
wordEdits.set(j, new Edit(ab, ae, bb, be));
|
||||
wordEdits.remove(j + 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
j++;
|
||||
}
|
||||
|
||||
// Apply some simple rules to fix up some of the edits. Our
|
||||
// logic above, along with our per-character difference tends
|
||||
// to produce some crazy stuff.
|
||||
//
|
||||
for (int j = 0; j < wordEdits.size(); j++) {
|
||||
Edit c = wordEdits.get(j);
|
||||
int ab = c.getBeginA();
|
||||
int ae = c.getEndA();
|
||||
|
||||
int bb = c.getBeginB();
|
||||
int be = c.getEndB();
|
||||
|
||||
// We sometimes collapsed an edit together in a strange way,
|
||||
// such that the edges of each text is identical. Fix by
|
||||
// by dropping out that incorrectly replaced region.
|
||||
//
|
||||
while (ab < ae && bb < be && a.equals(ab, b, bb)) {
|
||||
ab++;
|
||||
bb++;
|
||||
}
|
||||
while (ab < ae && bb < be && a.equals(ae - 1, b, be - 1)) {
|
||||
ae--;
|
||||
be--;
|
||||
}
|
||||
|
||||
// The leading part of an edit and its trailing part in the same
|
||||
// text might be identical. Slide down that edit and use the tail
|
||||
// rather than the leading bit. If however the edit is only on a
|
||||
// whitespace block try to shift it to the left margin, assuming
|
||||
// that it is an indentation change.
|
||||
//
|
||||
boolean aShiftRight = true;
|
||||
if (ab < ae && isOnlyWhitespace(a, ab, ae)) {
|
||||
int lf = findLF(wordEdits, j, a, ab);
|
||||
if (a.charAt(lf) == '\n') {
|
||||
int nb = lf + 1;
|
||||
int p = 0;
|
||||
while (p < ae - ab) {
|
||||
if (a.equals(ab + p, a, ab + p))
|
||||
p++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (p == ae - ab) {
|
||||
ab = nb;
|
||||
ae = nb + p;
|
||||
aShiftRight = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (aShiftRight) {
|
||||
while (ab < ae && ae < a.size() && a.equals(ab, a, ae)) {
|
||||
ab++;
|
||||
ae++;
|
||||
if (a.charAt(ae - 1) == '\n') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
boolean bShiftRight = true;
|
||||
if (bb < be && isOnlyWhitespace(b, bb, be)) {
|
||||
int lf = findLF(wordEdits, j, b, bb);
|
||||
if (b.charAt(lf) == '\n') {
|
||||
int nb = lf + 1;
|
||||
int p = 0;
|
||||
while (p < be - bb) {
|
||||
if (b.equals(bb + p, b, bb + p))
|
||||
p++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (p == be - bb) {
|
||||
bb = nb;
|
||||
be = nb + p;
|
||||
bShiftRight = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bShiftRight) {
|
||||
while (bb < be && be < b.size() && b.equals(bb, b, be)) {
|
||||
bb++;
|
||||
be++;
|
||||
if (b.charAt(be - 1) == '\n') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If most of a line was modified except the LF was common, make
|
||||
// the LF part of the modification region. This is easier to read.
|
||||
//
|
||||
if (ab < ae //
|
||||
&& (ab == 0 || a.charAt(ab - 1) == '\n') //
|
||||
&& ae < a.size() && a.charAt(ae) == '\n') {
|
||||
ae++;
|
||||
}
|
||||
if (bb < be //
|
||||
&& (bb == 0 || b.charAt(bb - 1) == '\n') //
|
||||
&& be < b.size() && b.charAt(be) == '\n') {
|
||||
be++;
|
||||
}
|
||||
|
||||
wordEdits.set(j, new Edit(ab, ae, bb, be));
|
||||
}
|
||||
|
||||
edits.set(i, new ReplaceEdit(e, wordEdits));
|
||||
}
|
||||
}
|
||||
@@ -253,6 +367,24 @@ public class PatchListCacheImpl implements PatchListCache {
|
||||
return new PatchListEntry(fileHeader, edits);
|
||||
}
|
||||
|
||||
private static int findLF(List<Edit> edits, int j, CharText t, int b) {
|
||||
int lf = b;
|
||||
int limit = 0 < j ? edits.get(j - 1).getEndB() : 0;
|
||||
while (limit < lf && t.charAt(lf) != '\n') {
|
||||
lf--;
|
||||
}
|
||||
return lf;
|
||||
}
|
||||
|
||||
private static boolean isOnlyWhitespace(CharText t, final int b, final int e) {
|
||||
for (int c = b; c < e; c++) {
|
||||
if (!Character.isWhitespace(t.charAt(c))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return b < e;
|
||||
}
|
||||
|
||||
private static Text read(Repository repo, String path, RevTree tree)
|
||||
throws IOException {
|
||||
TreeWalk tw = TreeWalk.forPath(repo, path, tree);
|
||||
|
@@ -52,17 +52,17 @@ public class Text extends RawText {
|
||||
}
|
||||
|
||||
public String getLine(final int i) {
|
||||
return getLines(i, i + 1);
|
||||
return getLines(i, i + 1, true);
|
||||
}
|
||||
|
||||
public String getLines(final int begin, final int end) {
|
||||
public String getLines(final int begin, final int end, boolean dropLF) {
|
||||
if (begin == end) {
|
||||
return "";
|
||||
}
|
||||
|
||||
final int s = getLineStart(begin);
|
||||
int e = getLineEnd(end - 1);
|
||||
if (content[e - 1] == '\n') {
|
||||
if (dropLF && content[e - 1] == '\n') {
|
||||
e--;
|
||||
}
|
||||
return decode(s, e);
|
||||
|
Reference in New Issue
Block a user