Index: node_modules/vulcanize/node_modules/whacko/node_modules/parse5/lib/tokenization/preprocessor.js |
diff --git a/node_modules/vulcanize/node_modules/whacko/node_modules/parse5/lib/tokenization/preprocessor.js b/node_modules/vulcanize/node_modules/whacko/node_modules/parse5/lib/tokenization/preprocessor.js |
new file mode 100644 |
index 0000000000000000000000000000000000000000..2e5632b98505e799ffca1f664ad28e75492b01b8 |
--- /dev/null |
+++ b/node_modules/vulcanize/node_modules/whacko/node_modules/parse5/lib/tokenization/preprocessor.js |
@@ -0,0 +1,100 @@ |
+'use strict'; |
+ |
+var UNICODE = require('../common/unicode'); |
+ |
+//Aliases |
+var $ = UNICODE.CODE_POINTS; |
+ |
+//Const |
+var CARRIAGE_RETURN_NEW_LINE_REGEX = /\r\n?/g; |
+ |
+//Utils |
+ |
+//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline |
+//this functions if they will be situated in another module due to context switch. |
+//Always perform inlining check before modifying this functions ('node --trace-inlining'). |
+function isReservedCodePoint(cp) { |
+ return cp >= 0xD800 && cp <= 0xDFFF || cp > 0x10FFFF; |
+} |
+ |
+function isSurrogatePair(cp1, cp2) { |
+ return cp1 >= 0xD800 && cp1 <= 0xDBFF && cp2 >= 0xDC00 && cp2 <= 0xDFFF; |
+} |
+ |
+function getSurrogatePairCodePoint(cp1, cp2) { |
+ return (cp1 - 0xD800) * 0x400 + 0x2400 + cp2; |
+} |
+ |
+//Preprocessor |
+//NOTE: HTML input preprocessing |
+//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream) |
+var Preprocessor = module.exports = function (html) { |
+ this.write(html); |
+ |
+ //NOTE: one leading U+FEFF BYTE ORDER MARK character must be ignored if any are present in the input stream. |
+ this.pos = this.html.charCodeAt(0) === $.BOM ? 0 : -1; |
+ |
+ this.gapStack = []; |
+ this.lastGapPos = -1; |
+}; |
+ |
+Preprocessor.prototype.write = function (html) { |
+ //NOTE: All U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters. |
+ //Any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character |
+ //must be ignored. |
+ html = html.replace(CARRIAGE_RETURN_NEW_LINE_REGEX, '\n'); |
+ |
+ if (this.html) { |
+ this.html = this.html.substring(0, this.pos + 1) + |
+ html + |
+ this.html.substring(this.pos + 1, this.html.length); |
+ |
+ } |
+ else |
+ this.html = html; |
+ |
+ |
+ this.lastCharPos = this.html.length - 1; |
+}; |
+ |
+Preprocessor.prototype.advanceAndPeekCodePoint = function () { |
+ this.pos++; |
+ |
+ if (this.pos > this.lastCharPos) |
+ return $.EOF; |
+ |
+ var cp = this.html.charCodeAt(this.pos); |
+ |
+ //OPTIMIZATION: first perform check if the code point in the allowed range that covers most common |
+ //HTML input (e.g. ASCII codes) to avoid performance-cost operations for high-range code points. |
+ if (cp >= 0xD800) { |
+ //NOTE: try to peek a surrogate pair |
+ if (this.pos !== this.lastCharPos) { |
+ var nextCp = this.html.charCodeAt(this.pos + 1); |
+ |
+ if (isSurrogatePair(cp, nextCp)) { |
+ //NOTE: we have a surrogate pair. Peek pair character and recalculate code point. |
+ this.pos++; |
+ cp = getSurrogatePairCodePoint(cp, nextCp); |
+ |
+ //NOTE: add gap that should be avoided during retreat |
+ this.gapStack.push(this.lastGapPos); |
+ this.lastGapPos = this.pos; |
+ } |
+ } |
+ |
+ if (isReservedCodePoint(cp)) |
+ cp = $.REPLACEMENT_CHARACTER; |
+ } |
+ |
+ return cp; |
+}; |
+ |
+Preprocessor.prototype.retreat = function () { |
+ if (this.pos === this.lastGapPos) { |
+ this.lastGapPos = this.gapStack.pop(); |
+ this.pos--; |
+ } |
+ |
+ this.pos--; |
+}; |