Tweak the intraline difference heuristics
We had a few cases where the intraline difference was showing us something confusing, like replacing characters in a common word with other characters from a different word. A bit of shifting the offsets after collapsing neighboring edits resolves this nicely. Change-Id: Ic947912277d96bf78afbc9831fb16e4b64e31fb1 Signed-off-by: Shawn O. Pearce <sop@google.com>
This commit is contained in:
@@ -20,7 +20,11 @@ class CharText implements Sequence {
|
|||||||
private final String content;
|
private final String content;
|
||||||
|
|
||||||
CharText(Text text, int s, int e) {
|
CharText(Text text, int s, int e) {
|
||||||
content = text.getLines(s, e);
|
content = text.getLines(s, e, false /* keep LF */);
|
||||||
|
}
|
||||||
|
|
||||||
|
char charAt(int idx) {
|
||||||
|
return content.charAt(idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -229,23 +229,137 @@ public class PatchListCacheImpl implements PatchListCache {
|
|||||||
CharText b = new CharText(bContent, e.getBeginB(), e.getEndB());
|
CharText b = new CharText(bContent, e.getBeginB(), e.getEndB());
|
||||||
|
|
||||||
List<Edit> wordEdits = new MyersDiff(a, b).getEdits();
|
List<Edit> wordEdits = new MyersDiff(a, b).getEdits();
|
||||||
|
|
||||||
|
// Combine edits that are really close together. If they are
|
||||||
|
// just a few characters apart we tend to get better results
|
||||||
|
// by joining them together and taking the whole span.
|
||||||
|
//
|
||||||
for (int j = 0; j < wordEdits.size() - 1;) {
|
for (int j = 0; j < wordEdits.size() - 1;) {
|
||||||
Edit c = wordEdits.get(j);
|
Edit c = wordEdits.get(j);
|
||||||
Edit n = wordEdits.get(j + 1);
|
Edit n = wordEdits.get(j + 1);
|
||||||
|
|
||||||
if (n.getBeginA() - c.getEndA() <= 2
|
if (n.getBeginA() - c.getEndA() <= 5
|
||||||
|| n.getBeginB() - c.getEndB() <= 2) {
|
|| n.getBeginB() - c.getEndB() <= 5) {
|
||||||
// This edit is incredibly close to the start of the next.
|
int ab = c.getBeginA();
|
||||||
// Combine them together.
|
int ae = n.getEndA();
|
||||||
//
|
|
||||||
wordEdits.set(j, new Edit(c.getBeginA(), n.getEndA(),
|
int bb = c.getBeginB();
|
||||||
c.getBeginB(), n.getEndB()));
|
int be = n.getEndB();
|
||||||
|
|
||||||
|
wordEdits.set(j, new Edit(ab, ae, bb, be));
|
||||||
wordEdits.remove(j + 1);
|
wordEdits.remove(j + 1);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
j++;
|
j++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply some simple rules to fix up some of the edits. Our
|
||||||
|
// logic above, along with our per-character difference tends
|
||||||
|
// to produce some crazy stuff.
|
||||||
|
//
|
||||||
|
for (int j = 0; j < wordEdits.size(); j++) {
|
||||||
|
Edit c = wordEdits.get(j);
|
||||||
|
int ab = c.getBeginA();
|
||||||
|
int ae = c.getEndA();
|
||||||
|
|
||||||
|
int bb = c.getBeginB();
|
||||||
|
int be = c.getEndB();
|
||||||
|
|
||||||
|
// We sometimes collapsed an edit together in a strange way,
|
||||||
|
// such that the edges of each text is identical. Fix by
|
||||||
|
// by dropping out that incorrectly replaced region.
|
||||||
|
//
|
||||||
|
while (ab < ae && bb < be && a.equals(ab, b, bb)) {
|
||||||
|
ab++;
|
||||||
|
bb++;
|
||||||
|
}
|
||||||
|
while (ab < ae && bb < be && a.equals(ae - 1, b, be - 1)) {
|
||||||
|
ae--;
|
||||||
|
be--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The leading part of an edit and its trailing part in the same
|
||||||
|
// text might be identical. Slide down that edit and use the tail
|
||||||
|
// rather than the leading bit. If however the edit is only on a
|
||||||
|
// whitespace block try to shift it to the left margin, assuming
|
||||||
|
// that it is an indentation change.
|
||||||
|
//
|
||||||
|
boolean aShiftRight = true;
|
||||||
|
if (ab < ae && isOnlyWhitespace(a, ab, ae)) {
|
||||||
|
int lf = findLF(wordEdits, j, a, ab);
|
||||||
|
if (a.charAt(lf) == '\n') {
|
||||||
|
int nb = lf + 1;
|
||||||
|
int p = 0;
|
||||||
|
while (p < ae - ab) {
|
||||||
|
if (a.equals(ab + p, a, ab + p))
|
||||||
|
p++;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (p == ae - ab) {
|
||||||
|
ab = nb;
|
||||||
|
ae = nb + p;
|
||||||
|
aShiftRight = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (aShiftRight) {
|
||||||
|
while (ab < ae && ae < a.size() && a.equals(ab, a, ae)) {
|
||||||
|
ab++;
|
||||||
|
ae++;
|
||||||
|
if (a.charAt(ae - 1) == '\n') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean bShiftRight = true;
|
||||||
|
if (bb < be && isOnlyWhitespace(b, bb, be)) {
|
||||||
|
int lf = findLF(wordEdits, j, b, bb);
|
||||||
|
if (b.charAt(lf) == '\n') {
|
||||||
|
int nb = lf + 1;
|
||||||
|
int p = 0;
|
||||||
|
while (p < be - bb) {
|
||||||
|
if (b.equals(bb + p, b, bb + p))
|
||||||
|
p++;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (p == be - bb) {
|
||||||
|
bb = nb;
|
||||||
|
be = nb + p;
|
||||||
|
bShiftRight = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bShiftRight) {
|
||||||
|
while (bb < be && be < b.size() && b.equals(bb, b, be)) {
|
||||||
|
bb++;
|
||||||
|
be++;
|
||||||
|
if (b.charAt(be - 1) == '\n') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If most of a line was modified except the LF was common, make
|
||||||
|
// the LF part of the modification region. This is easier to read.
|
||||||
|
//
|
||||||
|
if (ab < ae //
|
||||||
|
&& (ab == 0 || a.charAt(ab - 1) == '\n') //
|
||||||
|
&& ae < a.size() && a.charAt(ae) == '\n') {
|
||||||
|
ae++;
|
||||||
|
}
|
||||||
|
if (bb < be //
|
||||||
|
&& (bb == 0 || b.charAt(bb - 1) == '\n') //
|
||||||
|
&& be < b.size() && b.charAt(be) == '\n') {
|
||||||
|
be++;
|
||||||
|
}
|
||||||
|
|
||||||
|
wordEdits.set(j, new Edit(ab, ae, bb, be));
|
||||||
|
}
|
||||||
|
|
||||||
edits.set(i, new ReplaceEdit(e, wordEdits));
|
edits.set(i, new ReplaceEdit(e, wordEdits));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -253,6 +367,24 @@ public class PatchListCacheImpl implements PatchListCache {
|
|||||||
return new PatchListEntry(fileHeader, edits);
|
return new PatchListEntry(fileHeader, edits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int findLF(List<Edit> edits, int j, CharText t, int b) {
|
||||||
|
int lf = b;
|
||||||
|
int limit = 0 < j ? edits.get(j - 1).getEndB() : 0;
|
||||||
|
while (limit < lf && t.charAt(lf) != '\n') {
|
||||||
|
lf--;
|
||||||
|
}
|
||||||
|
return lf;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isOnlyWhitespace(CharText t, final int b, final int e) {
|
||||||
|
for (int c = b; c < e; c++) {
|
||||||
|
if (!Character.isWhitespace(t.charAt(c))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b < e;
|
||||||
|
}
|
||||||
|
|
||||||
private static Text read(Repository repo, String path, RevTree tree)
|
private static Text read(Repository repo, String path, RevTree tree)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
TreeWalk tw = TreeWalk.forPath(repo, path, tree);
|
TreeWalk tw = TreeWalk.forPath(repo, path, tree);
|
||||||
|
|||||||
@@ -52,17 +52,17 @@ public class Text extends RawText {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public String getLine(final int i) {
|
public String getLine(final int i) {
|
||||||
return getLines(i, i + 1);
|
return getLines(i, i + 1, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getLines(final int begin, final int end) {
|
public String getLines(final int begin, final int end, boolean dropLF) {
|
||||||
if (begin == end) {
|
if (begin == end) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
final int s = getLineStart(begin);
|
final int s = getLineStart(begin);
|
||||||
int e = getLineEnd(end - 1);
|
int e = getLineEnd(end - 1);
|
||||||
if (content[e - 1] == '\n') {
|
if (dropLF && content[e - 1] == '\n') {
|
||||||
e--;
|
e--;
|
||||||
}
|
}
|
||||||
return decode(s, e);
|
return decode(s, e);
|
||||||
|
|||||||
Reference in New Issue
Block a user