Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(52)

Unified Diff: tools/vulcanize/node_modules/vulcanize/node_modules/cheerio/node_modules/htmlparser2/lib/Tokenizer.js

Issue 125733002: Add vulcanize to tools. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: tools/vulcanize/node_modules/vulcanize/node_modules/cheerio/node_modules/htmlparser2/lib/Tokenizer.js
diff --git a/tools/vulcanize/node_modules/vulcanize/node_modules/cheerio/node_modules/htmlparser2/lib/Tokenizer.js b/tools/vulcanize/node_modules/vulcanize/node_modules/cheerio/node_modules/htmlparser2/lib/Tokenizer.js
new file mode 100644
index 0000000000000000000000000000000000000000..5ef6d14f404933eb59dcf44b3eae74890302c53e
--- /dev/null
+++ b/tools/vulcanize/node_modules/vulcanize/node_modules/cheerio/node_modules/htmlparser2/lib/Tokenizer.js
@@ -0,0 +1,556 @@
+module.exports = Tokenizer;
+
+var i = 0,
+
+ TEXT = i++,
+ BEFORE_TAG_NAME = i++, //after <
+ IN_TAG_NAME = i++,
+ BEFORE_CLOSING_TAG_NAME = i++,
+ IN_CLOSING_TAG_NAME = i++,
+ AFTER_CLOSING_TAG_NAME = i++,
+
+ //attributes
+ BEFORE_ATTRIBUTE_NAME = i++,
+ IN_ATTRIBUTE_NAME = i++,
+ AFTER_ATTRIBUTE_NAME = i++,
+ BEFORE_ATTRIBUTE_VALUE = i++,
+ IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES = i++, // "
+ IN_ATTRIBUTE_VALUE_SINGLE_QUOTES = i++, // '
+ IN_ATTRIBUTE_VALUE_NO_QUOTES = i++,
+
+ //declarations
+ BEFORE_DECLARATION = i++, // !
+ IN_DECLARATION = i++,
+
+ //processing instructions
+ IN_PROCESSING_INSTRUCTION = i++, // ?
+
+ //comments
+ BEFORE_COMMENT = i++,
+ IN_COMMENT = i++,
+ AFTER_COMMENT_1 = i++,
+ AFTER_COMMENT_2 = i++,
+
+ //cdata
+ BEFORE_CDATA_1 = i++, // [
+ BEFORE_CDATA_2 = i++, // C
+ BEFORE_CDATA_3 = i++, // D
+ BEFORE_CDATA_4 = i++, // A
+ BEFORE_CDATA_5 = i++, // T
+ BEFORE_CDATA_6 = i++, // A
+ IN_CDATA = i++,// [
+ AFTER_CDATA_1 = i++, // ]
+ AFTER_CDATA_2 = i++, // ]
+
+ //special tags
+ BEFORE_SPECIAL = i++, //S
+ BEFORE_SPECIAL_END = i++, //S
+
+ BEFORE_SCRIPT_1 = i++, //C
+ BEFORE_SCRIPT_2 = i++, //R
+ BEFORE_SCRIPT_3 = i++, //I
+ BEFORE_SCRIPT_4 = i++, //P
+ BEFORE_SCRIPT_5 = i++, //T
+ AFTER_SCRIPT_1 = i++, //C
+ AFTER_SCRIPT_2 = i++, //R
+ AFTER_SCRIPT_3 = i++, //I
+ AFTER_SCRIPT_4 = i++, //P
+ AFTER_SCRIPT_5 = i++, //T
+
+ BEFORE_STYLE_1 = i++, //T
+ BEFORE_STYLE_2 = i++, //Y
+ BEFORE_STYLE_3 = i++, //L
+ BEFORE_STYLE_4 = i++, //E
+ AFTER_STYLE_1 = i++, //T
+ AFTER_STYLE_2 = i++, //Y
+ AFTER_STYLE_3 = i++, //L
+ AFTER_STYLE_4 = i++; //E
+
+
+function whitespace(c){
+ return c === " " || c === "\t" || c === "\r" || c === "\n";
+}
+
+function Tokenizer(options, cbs){
+ this._state = TEXT;
+ this._buffer = "";
+ this._sectionStart = 0;
+ this._index = 0;
+ this._options = options;
+ this._special = 0; // 1 for script, 2 for style
+ this._cbs = cbs;
+ this._running = true;
+}
+
+//TODO make events conditional
+Tokenizer.prototype.write = function(chunk){
+ this._buffer += chunk;
+
+ while(this._index < this._buffer.length && this._running){
+ var c = this._buffer.charAt(this._index);
+ if(this._state === TEXT){
+ if(c === "<"){
+ this._emitIfToken("ontext");
+ this._state = BEFORE_TAG_NAME;
+ this._sectionStart = this._index;
+ }
+ } else if(this._state === BEFORE_TAG_NAME){
+ if(c === "/"){
+ this._state = BEFORE_CLOSING_TAG_NAME;
+ } else if(c === ">" || this._special > 0) {
+ this._state = TEXT;
+ } else {
+ if(whitespace(c));
+ else if(c === "!"){
+ this._state = BEFORE_DECLARATION;
+ this._sectionStart = this._index + 1;
+ } else if(c === "?"){
+ this._state = IN_PROCESSING_INSTRUCTION;
+ this._sectionStart = this._index + 1;
+ } else if(
+ !(this._options && this._options.xmlMode) &&
+ (c === "s" || c === "S")
+ ){
+ this._state = BEFORE_SPECIAL;
+ this._sectionStart = this._index;
+ } else {
+ this._state = IN_TAG_NAME;
+ this._sectionStart = this._index;
+ }
+ }
+ } else if(this._state === IN_TAG_NAME){
+ if(c === "/"){
+ this._emitToken("onopentagname");
+ this._cbs.onselfclosingtag();
+ this._state = AFTER_CLOSING_TAG_NAME;
+ } else if(c === ">"){
+ this._emitToken("onopentagname");
+ this._cbs.onopentagend();
+ this._state = TEXT;
+ this._sectionStart = this._index + 1;
+ } else if(whitespace(c)){
+ this._emitToken("onopentagname");
+ this._state = BEFORE_ATTRIBUTE_NAME;
+ }
+ } else if(this._state === BEFORE_CLOSING_TAG_NAME){
+ if(whitespace(c));
+ else if(c === ">"){
+ this._state = TEXT;
+ } else if(this._special > 0){
+ if(c === "s" || c === "S"){
+ this._state = BEFORE_SPECIAL_END;
+ } else {
+ this._state = TEXT;
+ continue;
+ }
+ } else {
+ this._state = IN_CLOSING_TAG_NAME;
+ this._sectionStart = this._index;
+ }
+ } else if(this._state === IN_CLOSING_TAG_NAME){
+ if(c === ">"){
+ this._emitToken("onclosetag");
+ this._state = TEXT;
+ this._sectionStart = this._index + 1;
+ this._special = 0;
+ } else if(whitespace(c)){
+ this._emitToken("onclosetag");
+ this._state = AFTER_CLOSING_TAG_NAME;
+ this._special = 0;
+ }
+ } else if(this._state === AFTER_CLOSING_TAG_NAME){
+ //skip everything until ">"
+ if(c === ">"){
+ this._state = TEXT;
+ this._sectionStart = this._index + 1;
+ }
+ }
+
+ /*
+ * attributes
+ */
+ else if(this._state === BEFORE_ATTRIBUTE_NAME){
+ if(c === ">"){
+ this._state = TEXT;
+ this._cbs.onopentagend();
+ this._sectionStart = this._index + 1;
+ } else if(c === "/"){
+ this._cbs.onselfclosingtag();
+ this._state = AFTER_CLOSING_TAG_NAME;
+ } else if(!whitespace(c)){
+ this._state = IN_ATTRIBUTE_NAME;
+ this._sectionStart = this._index;
+ }
+ } else if(this._state === IN_ATTRIBUTE_NAME){
+ if(c === "="){
+ this._emitIfToken("onattribname");
+ this._state = BEFORE_ATTRIBUTE_VALUE;
+ } else if(whitespace(c)){
+ this._emitIfToken("onattribname");
+ this._state = AFTER_ATTRIBUTE_NAME;
+ } else if(c === "/" || c === ">"){
+ this._emitIfToken("onattribname");
+ this._state = BEFORE_ATTRIBUTE_NAME;
+ continue;
+ }
+ } else if(this._state === AFTER_ATTRIBUTE_NAME){
+ if(c === "="){
+ this._state = BEFORE_ATTRIBUTE_VALUE;
+ } else if(c === "/" || c === ">"){
+ this._state = BEFORE_ATTRIBUTE_NAME;
+ continue;
+ } else if(!whitespace(c)){
+ this._state = IN_ATTRIBUTE_NAME;
+ this._sectionStart = this._index;
+ }
+ } else if(this._state === BEFORE_ATTRIBUTE_VALUE){
+ if(c === "\""){
+ this._state = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES;
+ this._sectionStart = this._index + 1;
+ } else if(c === "'"){
+ this._state = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES;
+ this._sectionStart = this._index + 1;
+ } else if(!whitespace(c)){
+ this._state = IN_ATTRIBUTE_VALUE_NO_QUOTES;
+ this._sectionStart = this._index;
+ }
+ } else if(this._state === IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES){
+ if(c === "\""){
+ this._emitToken("onattribvalue");
+ this._state = BEFORE_ATTRIBUTE_NAME;
+ }
+ } else if(this._state === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES){
+ if(c === "'"){
+ this._state = BEFORE_ATTRIBUTE_NAME;
+ this._emitToken("onattribvalue");
+ }
+ } else if(this._state === IN_ATTRIBUTE_VALUE_NO_QUOTES){
+ if(c === ">"){
+ this._emitToken("onattribvalue");
+ this._state = TEXT;
+ this._cbs.onopentagend();
+ this._sectionStart = this._index + 1;
+ } else if(whitespace(c)){
+ this._emitToken("onattribvalue");
+ this._state = BEFORE_ATTRIBUTE_NAME;
+ }
+ }
+
+ /*
+ * declarations
+ */
+ else if(this._state === BEFORE_DECLARATION){
+ if(c === "[") this._state = BEFORE_CDATA_1;
+ else if(c === "-") this._state = BEFORE_COMMENT;
+ else this._state = IN_DECLARATION;
+ } else if(this._state === IN_DECLARATION){
+ if(c === ">"){
+ this._emitToken("ondeclaration");
+ this._state = TEXT;
+ this._sectionStart = this._index + 1;
+ }
+ }
+
+ /*
+ * processing instructions
+ */
+ else if(this._state === IN_PROCESSING_INSTRUCTION){
+ if(c === ">"){
+ this._emitToken("onprocessinginstruction");
+ this._state = TEXT;
+ this._sectionStart = this._index + 1;
+ }
+ }
+
+ /*
+ * comments
+ */
+ else if(this._state === BEFORE_COMMENT){
+ if(c === "-"){
+ this._state = IN_COMMENT;
+ this._sectionStart = this._index + 1;
+ } else {
+ this._state = IN_DECLARATION;
+ }
+ } else if(this._state === IN_COMMENT){
+ if(c === "-") this._state = AFTER_COMMENT_1;
+ } else if(this._state === AFTER_COMMENT_1){
+ if(c === "-") this._state = AFTER_COMMENT_2;
+ else this._state = IN_COMMENT;
+ } else if(this._state === AFTER_COMMENT_2){
+ if(c === ">"){
+ //remove 2 trailing chars
+ this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
+ this._state = TEXT;
+ this._sectionStart = this._index + 1;
+ } else if (c !== "-") {
+ this._state = IN_COMMENT;
+ }
+ // else: stay in AFTER_COMMENT_2 (`--->`)
+ }
+
+ /*
+ * cdata
+ */
+ else if(this._state === BEFORE_CDATA_1){
+ if(c === "C") this._state = BEFORE_CDATA_2;
+ else this._state = IN_DECLARATION;
+ } else if(this._state === BEFORE_CDATA_2){
+ if(c === "D") this._state = BEFORE_CDATA_3;
+ else this._state = IN_DECLARATION;
+ } else if(this._state === BEFORE_CDATA_3){
+ if(c === "A") this._state = BEFORE_CDATA_4;
+ else this._state = IN_DECLARATION;
+ } else if(this._state === BEFORE_CDATA_4){
+ if(c === "T") this._state = BEFORE_CDATA_5;
+ else this._state = IN_DECLARATION;
+ } else if(this._state === BEFORE_CDATA_5){
+ if(c === "A") this._state = BEFORE_CDATA_6;
+ else this._state = IN_DECLARATION;
+ } else if(this._state === BEFORE_CDATA_6){
+ if(c === "["){
+ this._state = IN_CDATA;
+ this._sectionStart = this._index + 1;
+ } else {
+ this._state = IN_DECLARATION;
+ }
+ } else if(this._state === IN_CDATA){
+ if(c === "]") this._state = AFTER_CDATA_1;
+ } else if(this._state === AFTER_CDATA_1){
+ if(c === "]") this._state = AFTER_CDATA_2;
+ else this._state = IN_CDATA;
+ } else if(this._state === AFTER_CDATA_2){
+ if(c === ">"){
+ //remove 2 trailing chars
+ this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
+ this._state = TEXT;
+ this._sectionStart = this._index + 1;
+ } else if (c !== "]") {
+ this._state = IN_CDATA;
+ }
+ //else: stay in AFTER_CDATA_2 (`]]]>`)
+ }
+
+ /*
+ * special tags
+ */
+ else if(this._state === BEFORE_SPECIAL){
+ if(c === "c" || c === "C"){
+ this._state = BEFORE_SCRIPT_1;
+ } else if(c === "t" || c === "T"){
+ this._state = BEFORE_STYLE_1;
+ } else {
+ this._state = IN_TAG_NAME;
+ continue; //consume the token again
+ }
+ } else if(this._state === BEFORE_SPECIAL_END){
+ if(this._special === 1 && (c === "c" || c === "C")){
+ this._state = AFTER_SCRIPT_1;
+ } else if(this._special === 2 && (c === "t" || c === "T")){
+ this._state = AFTER_STYLE_1;
+ }
+ else this._state = TEXT;
+ }
+
+ /*
+ * script
+ */
+ else if(this._state === BEFORE_SCRIPT_1){
+ if(c === "r" || c === "R"){
+ this._state = BEFORE_SCRIPT_2;
+ } else {
+ this._state = IN_TAG_NAME;
+ continue; //consume the token again
+ }
+ } else if(this._state === BEFORE_SCRIPT_2){
+ if(c === "i" || c === "I"){
+ this._state = BEFORE_SCRIPT_3;
+ } else {
+ this._state = IN_TAG_NAME;
+ continue; //consume the token again
+ }
+ } else if(this._state === BEFORE_SCRIPT_3){
+ if(c === "p" || c === "P"){
+ this._state = BEFORE_SCRIPT_4;
+ } else {
+ this._state = IN_TAG_NAME;
+ continue; //consume the token again
+ }
+ } else if(this._state === BEFORE_SCRIPT_4){
+ if(c === "t" || c === "T"){
+ this._state = BEFORE_SCRIPT_5;
+ } else {
+ this._state = IN_TAG_NAME;
+ continue; //consume the token again
+ }
+ } else if(this._state === BEFORE_SCRIPT_5){
+ if(c === "/" || c === ">" || whitespace(c)){
+ this._special = 1;
+ }
+ this._state = IN_TAG_NAME;
+ continue; //consume the token again
+ }
+
+ else if(this._state === AFTER_SCRIPT_1){
+ if(c === "r" || c === "R"){
+ this._state = AFTER_SCRIPT_2;
+ }
+ else this._state = TEXT;
+ } else if(this._state === AFTER_SCRIPT_2){
+ if(c === "i" || c === "I"){
+ this._state = AFTER_SCRIPT_3;
+ }
+ else this._state = TEXT;
+ } else if(this._state === AFTER_SCRIPT_3){
+ if(c === "p" || c === "P"){
+ this._state = AFTER_SCRIPT_4;
+ }
+ else this._state = TEXT;
+ } else if(this._state === AFTER_SCRIPT_4){
+ if(c === "t" || c === "T"){
+ this._state = AFTER_SCRIPT_5;
+ }
+ else this._state = TEXT;
+ } else if(this._state === AFTER_SCRIPT_5){
+ if(c === ">" || whitespace(c)){
+ this._state = IN_CLOSING_TAG_NAME;
+ this._sectionStart = this._index - 6;
+ continue; //reconsume the token
+ }
+ else this._state = TEXT;
+ }
+
+ /*
+ * style
+ */
+ else if(this._state === BEFORE_STYLE_1){
+ if(c === "y" || c === "Y"){
+ this._state = BEFORE_STYLE_2;
+ } else {
+ this._state = IN_TAG_NAME;
+ continue; //consume the token again
+ }
+ } else if(this._state === BEFORE_STYLE_2){
+ if(c === "l" || c === "L"){
+ this._state = BEFORE_STYLE_3;
+ } else {
+ this._state = IN_TAG_NAME;
+ continue; //consume the token again
+ }
+ } else if(this._state === BEFORE_STYLE_3){
+ if(c === "e" || c === "E"){
+ this._state = BEFORE_STYLE_4;
+ } else {
+ this._state = IN_TAG_NAME;
+ continue; //consume the token again
+ }
+ } else if(this._state === BEFORE_STYLE_4){
+ if(c === "/" || c === ">" || whitespace(c)){
+ this._special = 2;
+ }
+ this._state = IN_TAG_NAME;
+ continue; //consume the token again
+ }
+
+ else if(this._state === AFTER_STYLE_1){
+ if(c === "y" || c === "Y"){
+ this._state = AFTER_STYLE_2;
+ }
+ else this._state = TEXT;
+ } else if(this._state === AFTER_STYLE_2){
+ if(c === "l" || c === "L"){
+ this._state = AFTER_STYLE_3;
+ }
+ else this._state = TEXT;
+ } else if(this._state === AFTER_STYLE_3){
+ if(c === "e" || c === "E"){
+ this._state = AFTER_STYLE_4;
+ }
+ else this._state = TEXT;
+ } else if(this._state === AFTER_STYLE_4){
+ if(c === ">" || whitespace(c)){
+ this._state = IN_CLOSING_TAG_NAME;
+ this._sectionStart = this._index - 5;
+ continue; //reconsume the token
+ }
+ else this._state = TEXT;
+ }
+
+
+ else {
+ this._cbs.onerror(Error("unknown state"), this._state);
+ }
+
+ this._index++;
+ }
+
+ //cleanup
+ if(this._sectionStart === -1){
+ this._buffer = "";
+ this._index = 0;
+ } else {
+ if(this._state === TEXT){
+ if(this._sectionStart !== this._index){
+ this._cbs.ontext(this._buffer.substr(this._sectionStart));
+ }
+ this._buffer = "";
+ this._index = 0;
+ } else if(this._sectionStart === this._index){
+ //the section just started
+ this._buffer = "";
+ this._index = 0;
+ } else if(this._sectionStart > 0){
+ //remove everything unnecessary
+ this._buffer = this._buffer.substr(this._sectionStart);
+ this._index -= this._sectionStart;
+ }
+
+ this._sectionStart = 0;
+ }
+};
+
+Tokenizer.prototype.pause = function(){
+ this._running = false;
+};
+Tokenizer.prototype.resume = function(){
+ this._running = true;
+};
+
+Tokenizer.prototype.end = function(chunk){
+ if(chunk) this.write(chunk);
+
+ //if there is remaining data, emit it in a reasonable way
+ if(this._sectionStart > this._index){
+ var data = this._buffer.substr(this._sectionStart);
+
+ if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
+ this._cbs.oncdata(data);
+ } else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
+ this._cbs.oncomment(data);
+ } else if(this._state === IN_TAG_NAME){
+ this._cbs.onopentagname(data);
+ } else if(this._state === IN_CLOSING_TAG_NAME){
+ this._cbs.onclosetag(data);
+ } else {
+ this._cbs.ontext(data);
+ }
+ }
+
+ this._cbs.onend();
+};
+
+Tokenizer.prototype.reset = function(){
+ Tokenizer.call(this, this._options, this._cbs);
+};
+
+Tokenizer.prototype._emitToken = function(name){
+ this._cbs[name](this._buffer.substring(this._sectionStart, this._index));
+ this._sectionStart = -1;
+};
+
+Tokenizer.prototype._emitIfToken = function(name){
+ if(this._index > this._sectionStart){
+ this._cbs[name](this._buffer.substring(this._sectionStart, this._index));
+ }
+ this._sectionStart = -1;
+};

Powered by Google App Engine
This is Rietveld 408576698