Tweak the intraline difference heuristics

We had a few cases where the intraline difference was showing us
something confusing, like replacing characters in a common word
with other characters from a different word.

A bit of shifting the offsets after collapsing neighboring edits
resolves this nicely.

Change-Id: Ic947912277d96bf78afbc9831fb16e4b64e31fb1
Signed-off-by: Shawn O. Pearce <sop@google.com>
This commit is contained in:
Shawn O. Pearce
2010-02-25 19:52:40 -08:00
parent c1fe3e4563
commit e76ca310aa
3 changed files with 147 additions and 11 deletions

View File

@@ -20,7 +20,11 @@ class CharText implements Sequence {
private final String content;
CharText(Text text, int s, int e) {
content = text.getLines(s, e);
content = text.getLines(s, e, false /* keep LF */);
}
char charAt(int idx) {
return content.charAt(idx);
}
@Override

View File

@@ -229,23 +229,137 @@ public class PatchListCacheImpl implements PatchListCache {
CharText b = new CharText(bContent, e.getBeginB(), e.getEndB());
List<Edit> wordEdits = new MyersDiff(a, b).getEdits();
// Combine edits that are really close together. If they are
// just a few characters apart we tend to get better results
// by joining them together and taking the whole span.
//
for (int j = 0; j < wordEdits.size() - 1;) {
Edit c = wordEdits.get(j);
Edit n = wordEdits.get(j + 1);
if (n.getBeginA() - c.getEndA() <= 2
|| n.getBeginB() - c.getEndB() <= 2) {
// This edit is incredibly close to the start of the next.
// Combine them together.
//
wordEdits.set(j, new Edit(c.getBeginA(), n.getEndA(),
c.getBeginB(), n.getEndB()));
if (n.getBeginA() - c.getEndA() <= 5
|| n.getBeginB() - c.getEndB() <= 5) {
int ab = c.getBeginA();
int ae = n.getEndA();
int bb = c.getBeginB();
int be = n.getEndB();
wordEdits.set(j, new Edit(ab, ae, bb, be));
wordEdits.remove(j + 1);
continue;
}
j++;
}
// Apply some simple rules to fix up some of the edits. Our
// logic above, along with our per-character difference tends
// to produce some crazy stuff.
//
for (int j = 0; j < wordEdits.size(); j++) {
Edit c = wordEdits.get(j);
int ab = c.getBeginA();
int ae = c.getEndA();
int bb = c.getBeginB();
int be = c.getEndB();
// We sometimes collapsed an edit together in a strange way,
// such that the edges of each text is identical. Fix by
// by dropping out that incorrectly replaced region.
//
while (ab < ae && bb < be && a.equals(ab, b, bb)) {
ab++;
bb++;
}
while (ab < ae && bb < be && a.equals(ae - 1, b, be - 1)) {
ae--;
be--;
}
// The leading part of an edit and its trailing part in the same
// text might be identical. Slide down that edit and use the tail
// rather than the leading bit. If however the edit is only on a
// whitespace block try to shift it to the left margin, assuming
// that it is an indentation change.
//
boolean aShiftRight = true;
if (ab < ae && isOnlyWhitespace(a, ab, ae)) {
int lf = findLF(wordEdits, j, a, ab);
if (a.charAt(lf) == '\n') {
int nb = lf + 1;
int p = 0;
while (p < ae - ab) {
if (a.equals(ab + p, a, ab + p))
p++;
else
break;
}
if (p == ae - ab) {
ab = nb;
ae = nb + p;
aShiftRight = false;
}
}
}
if (aShiftRight) {
while (ab < ae && ae < a.size() && a.equals(ab, a, ae)) {
ab++;
ae++;
if (a.charAt(ae - 1) == '\n') {
break;
}
}
}
boolean bShiftRight = true;
if (bb < be && isOnlyWhitespace(b, bb, be)) {
int lf = findLF(wordEdits, j, b, bb);
if (b.charAt(lf) == '\n') {
int nb = lf + 1;
int p = 0;
while (p < be - bb) {
if (b.equals(bb + p, b, bb + p))
p++;
else
break;
}
if (p == be - bb) {
bb = nb;
be = nb + p;
bShiftRight = false;
}
}
}
if (bShiftRight) {
while (bb < be && be < b.size() && b.equals(bb, b, be)) {
bb++;
be++;
if (b.charAt(be - 1) == '\n') {
break;
}
}
}
// If most of a line was modified except the LF was common, make
// the LF part of the modification region. This is easier to read.
//
if (ab < ae //
&& (ab == 0 || a.charAt(ab - 1) == '\n') //
&& ae < a.size() && a.charAt(ae) == '\n') {
ae++;
}
if (bb < be //
&& (bb == 0 || b.charAt(bb - 1) == '\n') //
&& be < b.size() && b.charAt(be) == '\n') {
be++;
}
wordEdits.set(j, new Edit(ab, ae, bb, be));
}
edits.set(i, new ReplaceEdit(e, wordEdits));
}
}
@@ -253,6 +367,24 @@ public class PatchListCacheImpl implements PatchListCache {
return new PatchListEntry(fileHeader, edits);
}
private static int findLF(List<Edit> edits, int j, CharText t, int b) {
int lf = b;
int limit = 0 < j ? edits.get(j - 1).getEndB() : 0;
while (limit < lf && t.charAt(lf) != '\n') {
lf--;
}
return lf;
}
private static boolean isOnlyWhitespace(CharText t, final int b, final int e) {
for (int c = b; c < e; c++) {
if (!Character.isWhitespace(t.charAt(c))) {
return false;
}
}
return b < e;
}
private static Text read(Repository repo, String path, RevTree tree)
throws IOException {
TreeWalk tw = TreeWalk.forPath(repo, path, tree);

View File

@@ -52,17 +52,17 @@ public class Text extends RawText {
}
public String getLine(final int i) {
return getLines(i, i + 1);
return getLines(i, i + 1, true);
}
public String getLines(final int begin, final int end) {
public String getLines(final int begin, final int end, boolean dropLF) {
if (begin == end) {
return "";
}
final int s = getLineStart(begin);
int e = getLineEnd(end - 1);
if (content[e - 1] == '\n') {
if (dropLF && content[e - 1] == '\n') {
e--;
}
return decode(s, e);