GrDiffBuilder: performance optimizations for building diff rows

This shows a nearly 4x improvement (in Chrome) for building
the DOM structure for a large diff. (this doesn't include
style calculation and layout, which is still expensive).

Bug: Issue 6514
Change-Id: Id06db0c9a281b846078aef0e6d584e1fa3c03d0f
This commit is contained in:
Nick Carter
2017-11-20 11:57:02 -08:00
committed by Wyatt Allen
parent fc8032393c
commit 8b4b718d54
2 changed files with 210 additions and 247 deletions

View File

@@ -14,21 +14,30 @@
(function(window, GrDiffGroup, GrDiffLine) {
'use strict';
const HTML_ENTITY_PATTERN = /[&<>"'`\/]/g;
const HTML_ENTITY_MAP = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
'\'': '&#39;',
'/': '&#x2F;',
'`': '&#96;',
};
// Prevent redefinition.
if (window.GrDiffBuilder) { return; }
const REGEX_ASTRAL_SYMBOL = /[\uD800-\uDBFF][\uDC00-\uDFFF]/;
/**
* In JS, unicode code points above 0xFFFF occupy two elements of a string.
* For example '𐀏'.length is 2. An occurence of such a code point is called a
* surrogate pair.
*
* This regex segments a string along tabs ('\t') and surrogate pairs, since
* these are two cases where '1 char' does not automatically imply '1 column'.
*
* TODO: For human languages whose orthographies use combining marks, this
* approach won't correctly identify the grapheme boundaries. In those cases,
* a grapheme consists of multiple code points that should count as only one
* character against the column limit. Getting that correct (if it's desired)
* is probably beyond the limits of a regex, but there are nonstandard APIs to
* do this, and proposed (but, as of Nov 2017, unimplemented) standard APIs.
*
* Further reading:
* On Unicode in JS: https://mathiasbynens.be/notes/javascript-unicode
* Graphemes: http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
* A proposed JS API: https://github.com/tc39/proposal-intl-segmenter
*/
const REGEX_TAB_OR_SURROGATE_PAIR = /\t|[\uD800-\uDBFF][\uDC00-\uDFFF]/;
function GrDiffBuilder(diff, comments, prefs, projectName, outputEl, layers) {
this._diff = diff;
@@ -41,6 +50,15 @@
this.layers = layers || [];
if (isNaN(prefs.tab_size) || prefs.tab_size <= 0) {
throw Error('Invalid tab size from preferences.');
}
if (isNaN(prefs.line_length) || prefs.line_length <= 0) {
throw Error('Invalid line length from preferences.');
}
for (const layer of this.layers) {
if (layer.addListener) {
layer.addListener(this._handleLayerUpdate.bind(this));
@@ -48,14 +66,6 @@
}
}
GrDiffBuilder.LESS_THAN_CODE = '<'.charCodeAt(0);
GrDiffBuilder.GREATER_THAN_CODE = '>'.charCodeAt(0);
GrDiffBuilder.AMPERSAND_CODE = '&'.charCodeAt(0);
GrDiffBuilder.SEMICOLON_CODE = ';'.charCodeAt(0);
GrDiffBuilder.LINE_FEED_HTML =
'<span class="style-scope gr-diff br"></span>';
GrDiffBuilder.GroupType = {
ADDED: 'b',
BOTH: 'ab',
@@ -214,10 +224,6 @@
group => { return group.element; });
};
GrDiffBuilder.prototype._commentIsAtLineNum = function(side, lineNum) {
return this._commentLocations[side][lineNum] === true;
};
// TODO(wyatta): Move this completely into the processor.
GrDiffBuilder.prototype._insertContextGroups = function(groups, lines,
hiddenRange) {
@@ -350,8 +356,8 @@
return threadGroupEl;
};
GrDiffBuilder.prototype._commentThreadGroupForLine = function(line,
opt_side) {
GrDiffBuilder.prototype._commentThreadGroupForLine = function(
line, opt_side) {
const comments =
this._getCommentsForLine(this._comments, line, opt_side);
if (!comments || comments.length === 0) {
@@ -361,7 +367,7 @@
let patchNum = this._comments.meta.patchRange.patchNum;
let isOnParent = comments[0].side === 'PARENT' || false;
if (line.type === GrDiffLine.Type.REMOVE ||
opt_side === GrDiffBuilder.Side.LEFT) {
opt_side === GrDiffBuilder.Side.LEFT) {
if (this._comments.meta.patchRange.basePatchNum === 'PARENT') {
isOnParent = true;
} else {
@@ -369,9 +375,7 @@
}
}
const threadGroupEl = this.createCommentThreadGroup(
this._comments.meta.changeNum,
patchNum,
this._comments.meta.path,
this._comments.meta.changeNum, patchNum, this._comments.meta.path,
isOnParent);
threadGroupEl.comments = comments;
if (opt_side) {
@@ -380,8 +384,8 @@
return threadGroupEl;
};
GrDiffBuilder.prototype._createLineEl = function(line, number, type,
opt_class) {
GrDiffBuilder.prototype._createLineEl = function(
line, number, type, opt_class) {
const td = this._createElement('td');
if (opt_class) {
td.classList.add(opt_class);
@@ -407,32 +411,20 @@
GrDiffBuilder.prototype._createTextEl = function(line, opt_side) {
const td = this._createElement('td');
const text = line.text;
if (line.type !== GrDiffLine.Type.BLANK) {
td.classList.add('content');
}
td.classList.add(line.type);
let html = this._escapeHTML(text);
html = this._addTabWrappers(html, this._prefs.tab_size);
if (!this._prefs.line_wrapping &&
this._textLength(text, this._prefs.tab_size) >
this._prefs.line_length) {
html = this._addNewlines(text, html);
}
const contentText = this._createElement('div', 'contentText');
const lineLimit =
!this._prefs.line_wrapping ? this._prefs.line_length : Infinity;
const contentText =
this._formatText(line.text, this._prefs.tab_size, lineLimit);
if (opt_side) {
contentText.setAttribute('data-side', opt_side);
}
// If the html is equivalent to the text then it didn't get highlighted
// or escaped. Use textContent which is faster than innerHTML.
if (html === text) {
contentText.textContent = text;
} else {
contentText.innerHTML = html;
}
for (const layer of this.layers) {
layer.annotate(contentText, line);
}
@@ -443,139 +435,85 @@
};
/**
* Returns the text length after normalizing unicode and tabs.
* @return {number} The normalized length of the text.
* Returns a 'div' element containing the supplied |text| as its innerText,
* with '\t' characters expanded to a width determined by |tabSize|, and the
* text wrapped at column |lineLimit|, which may be Infinity if no wrapping is
* desired.
*
* @param {string} text The text to be formatted.
* @param {number} tabSize The width of each tab stop.
* @param {number} lineLimit The column after which to wrap lines.
* @return {HTMLElement}
*/
GrDiffBuilder.prototype._textLength = function(text, tabSize) {
text = text.replace(REGEX_ASTRAL_SYMBOL, '_');
let numChars = 0;
for (let i = 0; i < text.length; i++) {
if (text[i] === '\t') {
numChars += tabSize - (numChars % tabSize);
} else {
numChars++;
}
}
return numChars;
};
GrDiffBuilder.prototype._formatText = function(text, tabSize, lineLimit) {
const contentText = this._createElement('div', 'contentText');
// Advance `index` by the appropriate number of characters that would
// represent one source code character and return that index. For
// example, for source code '<span>' the escaped html string is
// '&lt;span&gt;'. Advancing from index 0 on the prior html string would
// return 4, since &lt; maps to one source code character ('<').
GrDiffBuilder.prototype._advanceChar = function(html, index) {
// TODO(andybons): Unicode is all kinds of messed up in JS. Account for it.
// https://mathiasbynens.be/notes/javascript-unicode
// Tags don't count as characters
while (index < html.length &&
html.charCodeAt(index) === GrDiffBuilder.LESS_THAN_CODE) {
while (index < html.length &&
html.charCodeAt(index) !== GrDiffBuilder.GREATER_THAN_CODE) {
index++;
}
index++; // skip the ">" itself
}
// An HTML entity (e.g., &lt;) counts as one character.
if (index < html.length &&
html.charCodeAt(index) === GrDiffBuilder.AMPERSAND_CODE) {
while (index < html.length &&
html.charCodeAt(index) !== GrDiffBuilder.SEMICOLON_CODE) {
index++;
}
}
return index + 1;
};
GrDiffBuilder.prototype._advancePastTagClose = function(html, index) {
while (index < html.length &&
html.charCodeAt(index) !== GrDiffBuilder.GREATER_THAN_CODE) {
index++;
}
return index + 1;
};
GrDiffBuilder.prototype._addNewlines = function(text, html) {
let htmlIndex = 0;
const indices = [];
let numChars = 0;
let prevHtmlIndex = 0;
for (let i = 0; i < text.length; i++) {
if (numChars > 0 && numChars % this._prefs.line_length === 0) {
indices.push(htmlIndex);
}
htmlIndex = this._advanceChar(html, htmlIndex);
if (text[i] === '\t') {
// Advance past tab closing tag.
htmlIndex = this._advancePastTagClose(html, htmlIndex);
// ~~ is a faster Math.floor
if (~~(numChars / this._prefs.line_length) !==
~~((numChars + this._prefs.tab_size) / this._prefs.line_length)) {
// Tab crosses line limit - push it to the next line.
indices.push(prevHtmlIndex);
let columnPos = 0;
let textOffset = 0;
for (const segment of text.split(REGEX_TAB_OR_SURROGATE_PAIR)) {
if (segment) {
// |segment| contains only normal characters. If |segment| doesn't fit
// entirely on the current line, append chunks of |segment| followed by
// line breaks.
let rowStart = 0;
let rowEnd = lineLimit - columnPos;
while (rowEnd < segment.length) {
contentText.appendChild(
document.createTextNode(segment.substring(rowStart, rowEnd)));
contentText.appendChild(this._createElement('span', 'br'));
columnPos = 0;
rowStart = rowEnd;
rowEnd += lineLimit;
}
// Append the last part of |segment|, which fits on the current line.
contentText.appendChild(
document.createTextNode(segment.substring(rowStart)));
columnPos += (segment.length - rowStart);
textOffset += segment.length;
}
if (textOffset < text.length) {
// Handle the special character at |textOffset|.
if (text.startsWith('\t', textOffset)) {
// Append a single '\t' character.
let effectiveTabSize = tabSize - (columnPos % tabSize);
if (columnPos + effectiveTabSize > lineLimit) {
contentText.appendChild(this._createElement('span', 'br'));
columnPos = 0;
effectiveTabSize = tabSize;
}
contentText.appendChild(this._getTabWrapper(effectiveTabSize));
columnPos += effectiveTabSize;
textOffset++;
} else {
// Append a single surrogate pair.
if (columnPos >= lineLimit) {
contentText.appendChild(this._createElement('span', 'br'));
columnPos = 0;
}
contentText.appendChild(document.createTextNode(
text.substring(textOffset, textOffset + 2)));
textOffset += 2;
columnPos += 1;
}
numChars += this._prefs.tab_size;
} else {
numChars++;
}
prevHtmlIndex = htmlIndex;
}
let result = html;
// Since the result string is being altered in place, start from the end
// of the string so that the insertion indices are not affected as the
// result string changes.
for (let i = indices.length - 1; i >= 0; i--) {
result = result.slice(0, indices[i]) + GrDiffBuilder.LINE_FEED_HTML +
result.slice(indices[i]);
}
return result;
return contentText;
};
/**
* Takes a string of text (not HTML) and returns a string of HTML with tab
* elements in place of tab characters. In each case tab elements are given
* the width needed to reach the next tab-stop.
* Returns a <span> element holding a '\t' character, that will visually
* occupy |tabSize| many columns.
*
* @param {string} A line of text potentially containing tab characters.
* @param {number} The width for tabs.
* @return {string} An HTML string potentially containing tab elements.
* @param {number} tabSize The effective size of this tab stop.
* @return {HTMLElement}
*/
GrDiffBuilder.prototype._addTabWrappers = function(line, tabSize) {
if (!line.length) { return ''; }
let result = '';
let offset = 0;
const split = line.split('\t');
let width;
for (let i = 0; i < split.length - 1; i++) {
offset += split[i].length;
width = tabSize - (offset % tabSize);
result += split[i] + this._getTabWrapper(width);
offset += width;
}
if (split.length) {
result += split[split.length - 1];
}
return result;
};
GrDiffBuilder.prototype._getTabWrapper = function(tabSize) {
// Force this to be a number to prevent arbitrary injection.
tabSize = +tabSize;
if (isNaN(tabSize)) {
throw Error('Invalid tab size from preferences.');
}
let str = '<span class="style-scope gr-diff tab ';
str += '" style="';
// TODO(andybons): CSS tab-size is not supported in IE.
str += 'tab-size:' + tabSize + ';';
str += '-moz-tab-size:' + tabSize + ';';
str += '">\t</span>';
return str;
const result = this._createElement('span', 'tab');
result.style['tab-size'] = tabSize;
result.style['-moz-tab-size'] = tabSize;
result.innerText = '\t';
return result;
};
GrDiffBuilder.prototype._createElement = function(tagName, className) {
@@ -620,14 +558,8 @@
!(!group.adds.length && !group.removes.length);
};
GrDiffBuilder.prototype._escapeHTML = function(str) {
return str.replace(HTML_ENTITY_PATTERN, s => {
return HTML_ENTITY_MAP[s];
});
};
/**
* Set the blame information for the diff. For any already-rednered line,
* Set the blame information for the diff. For any already-rendered line,
* re-render its blame cell content.
* @param {Object} blame
*/

View File

@@ -59,6 +59,7 @@ limitations under the License.
let element;
let builder;
let sandbox;
const LINE_FEED_HTML = '<span class="style-scope gr-diff br"></span>';
setup(() => {
sandbox = sinon.sandbox.create();
@@ -109,104 +110,139 @@ limitations under the License.
test('newlines 1', () => {
let text = 'abcdef';
assert.equal(builder._addNewlines(text, text), text);
assert.equal(builder._formatText(text, 4, 10).innerHTML, text);
text = 'a'.repeat(20);
assert.equal(builder._addNewlines(text, text),
assert.equal(builder._formatText(text, 4, 10).innerHTML,
'a'.repeat(10) +
GrDiffBuilder.LINE_FEED_HTML +
LINE_FEED_HTML +
'a'.repeat(10));
});
test('newlines 2', () => {
const text = '<span class="thumbsup">👍</span>';
const html =
'&lt;span class=&quot;thumbsup&quot;&gt;👍&lt;&#x2F;span&gt;';
assert.equal(builder._addNewlines(text, html),
assert.equal(builder._formatText(text, 4, 10).innerHTML,
'&lt;span clas' +
GrDiffBuilder.LINE_FEED_HTML +
's=&quot;thumbsu' +
GrDiffBuilder.LINE_FEED_HTML +
'p&quot;&gt;👍&lt;&#x2F;spa' +
GrDiffBuilder.LINE_FEED_HTML +
'n&gt;');
LINE_FEED_HTML +
's="thumbsu' +
LINE_FEED_HTML +
'p"&gt;👍&lt;/span' +
LINE_FEED_HTML +
'&gt;');
});
test('newlines 3', () => {
const text = '01234\t56789';
const html = '01234<span>\t</span>56789';
assert.equal(builder._addNewlines(text, html),
'01234<span>\t</span>5' +
GrDiffBuilder.LINE_FEED_HTML +
'6789');
assert.equal(builder._formatText(text, 4, 10).innerHTML,
'01234' + builder._getTabWrapper(3).outerHTML + '56' +
LINE_FEED_HTML +
'789');
});
test('_addNewlines not called if line_wrapping is true', done => {
test('newlines 4', () => {
const text = '👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍';
assert.equal(builder._formatText(text, 4, 20).innerHTML,
'👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍' +
LINE_FEED_HTML +
'👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍' +
LINE_FEED_HTML +
'👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍👍');
});
test('line_length ignored if line_wrapping is true', () => {
builder._prefs = {line_wrapping: true, tab_size: 4, line_length: 50};
const text = (new Array(52)).join('a');
const text = 'a'.repeat(51);
const line = {text, highlights: []};
const newLineStub = sandbox.stub(builder, '_addNewlines');
builder._createTextEl(line);
flush(() => {
assert.isFalse(newLineStub.called);
done();
});
const result = builder._createTextEl(line).firstChild.innerHTML;
assert.equal(result, text);
});
test('_addNewlines called if line_wrapping is true and meets other ' +
'conditions', done => {
test('line_length applied if line_wrapping is false', () => {
builder._prefs = {line_wrapping: false, tab_size: 4, line_length: 50};
const text = (new Array(52)).join('a');
const text = 'a'.repeat(51);
const line = {text, highlights: []};
const newLineStub = sandbox.stub(builder, '_addNewlines');
builder._createTextEl(line);
flush(() => {
assert.isTrue(newLineStub.called);
done();
});
const expected = 'a'.repeat(50) + LINE_FEED_HTML + 'a';
const result = builder._createTextEl(line).firstChild.innerHTML;
assert.equal(result, expected);
});
test('_createTextEl linewrap with tabs', () => {
const text = _.times(7, _.constant('\t')).join('') + '!';
const text = '\t'.repeat(7) + '!';
const line = {text, highlights: []};
const el = builder._createTextEl(line);
const tabEl = el.querySelector('.contentText > .br');
assert.isOk(tabEl);
assert.equal(el.innerText, text);
// With line length 10 and tab size 2, there should be a line break
// after every two tabs.
const newlineEl = el.querySelector('.contentText > .br');
assert.isOk(newlineEl);
assert.equal(
el.querySelector('.contentText .tab:nth-child(2)').nextSibling,
tabEl);
newlineEl);
});
test('text length with tabs and unicode', () => {
assert.equal(builder._textLength('12345', 4), 5);
assert.equal(builder._textLength('\t\t12', 4), 10);
assert.equal(builder._textLength('abc💢123', 4), 7);
function expectTextLength(text, tabSize, expected) {
// Formatting to |expected| columns should not introduce line breaks.
const result = builder._formatText(text, tabSize, expected);
assert.isNotOk(result.querySelector('.contentText > .br'),
` Expected the result of: \n` +
` _formatText(${text}', ${tabSize}, ${expected})\n` +
` to not contain a br. But the actual result HTML was:\n` +
` '${result.innerHTML}'\nwhereupon`);
assert.equal(builder._textLength('abc\t', 8), 8);
assert.equal(builder._textLength('abc\t\t', 10), 20);
assert.equal(builder._textLength('', 10), 0);
assert.equal(builder._textLength('', 10), 0);
assert.equal(builder._textLength('abc\tde', 10), 12);
assert.equal(builder._textLength('abc\tde\t', 10), 20);
assert.equal(builder._textLength('\t\t\t\t\t', 20), 100);
// Increasing the line limit should produce the same markup.
assert.equal(builder._formatText(text, tabSize, Infinity).innerHTML,
result.innerHTML);
assert.equal(builder._formatText(text, tabSize, expected + 1).innerHTML,
result.innerHTML);
// Decreasing the line limit should introduce line breaks.
if (expected > 0) {
const tooSmall = builder._formatText(text, tabSize, expected - 1);
assert.isOk(tooSmall.querySelector('.contentText > .br'),
` Expected the result of: \n` +
` _formatText(${text}', ${tabSize}, ${expected - 1})\n` +
` to contain a br. But the actual result HTML was:\n` +
` '${tooSmall.innerHTML}'\nwhereupon`);
}
}
expectTextLength('12345', 4, 5);
expectTextLength('\t\t12', 4, 10);
expectTextLength('abc💢123', 4, 7);
expectTextLength('abc\t', 8, 8);
expectTextLength('abc\t\t', 10, 20);
expectTextLength('', 10, 0);
expectTextLength('', 10, 0);
// 17 Thai combining chars.
expectTextLength('ก้้้้้้้้้้้้้้้้', 4, 17);
expectTextLength('abc\tde', 10, 12);
expectTextLength('abc\tde\t', 10, 20);
expectTextLength('\t\t\t\t\t', 20, 100);
});
test('tab wrapper insertion', () => {
const html = 'abc\tdef';
const wrapper = builder._getTabWrapper(
builder._prefs.tab_size - 3,
builder._prefs.show_tabs);
const tabSize = builder._prefs.tab_size;
const wrapper = builder._getTabWrapper(tabSize - 3);
assert.ok(wrapper);
assert.isAbove(wrapper.length, 0);
assert.equal(builder._addTabWrappers(html, builder._prefs.tab_size),
'abc' + wrapper + 'def');
assert.throws(builder._getTabWrapper.bind(
builder,
// using \x3c instead of < in string so gjslint can parse
'">\x3cimg src="/" onerror="alert(1);">\x3cspan class="',
true));
assert.equal(wrapper.innerText, '\t');
assert.equal(
builder._formatText(html, tabSize, Infinity).innerHTML,
'abc' + wrapper.outerHTML + 'def');
});
test('tab wrapper style', () => {
const pattern = new RegExp('^<span class="style-scope gr-diff tab" '
+ 'style="(?:-moz-)?tab-size: (\\d+);">\\t<\\/span>$');
for (const size of [1, 3, 8, 55]) {
const html = builder._getTabWrapper(size).outerHTML;
expect(html).to.match(pattern);
assert.equal(html.match(pattern)[1], size);
}
});
test('comments', () => {
@@ -986,20 +1022,15 @@ limitations under the License.
});
});
test('_escapeHTML', () => {
test('escaping HTML', () => {
let input = '<script>alert("XSS");<' + '/script>';
let expected = '&lt;script&gt;alert(&quot;XSS&quot;);' +
'&lt;&#x2F;script&gt;';
let result = GrDiffBuilder.prototype._escapeHTML(input);
let expected = '&lt;script&gt;alert("XSS");&lt;/script&gt;';
let result = builder._formatText(input, 1, Infinity).innerHTML;
assert.equal(result, expected);
input = '& < > " \' / `';
// \u0026 is an ampersand. This is being used here instead of &
// because of the gjslinter.
expected = '\u0026amp; \u0026lt; \u0026gt; \u0026quot;' +
' \u0026#39; \u0026#x2F; \u0026#96;';
result = GrDiffBuilder.prototype._escapeHTML(input);
expected = '&amp; &lt; &gt; " \' / `';
result = builder._formatText(input, 1, Infinity).innerHTML;
assert.equal(result, expected);
});
});