| Index: tools/vulcanize/node_modules/vulcanize/node_modules/cheerio/node_modules/htmlparser2/lib/Tokenizer.js
|
| diff --git a/tools/vulcanize/node_modules/vulcanize/node_modules/cheerio/node_modules/htmlparser2/lib/Tokenizer.js b/tools/vulcanize/node_modules/vulcanize/node_modules/cheerio/node_modules/htmlparser2/lib/Tokenizer.js
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..5ef6d14f404933eb59dcf44b3eae74890302c53e
|
| --- /dev/null
|
| +++ b/tools/vulcanize/node_modules/vulcanize/node_modules/cheerio/node_modules/htmlparser2/lib/Tokenizer.js
|
| @@ -0,0 +1,556 @@
|
| +module.exports = Tokenizer;
|
| +
|
| +var i = 0,
|
| +
|
| + TEXT = i++,
|
| + BEFORE_TAG_NAME = i++, //after <
|
| + IN_TAG_NAME = i++,
|
| + BEFORE_CLOSING_TAG_NAME = i++,
|
| + IN_CLOSING_TAG_NAME = i++,
|
| + AFTER_CLOSING_TAG_NAME = i++,
|
| +
|
| + //attributes
|
| + BEFORE_ATTRIBUTE_NAME = i++,
|
| + IN_ATTRIBUTE_NAME = i++,
|
| + AFTER_ATTRIBUTE_NAME = i++,
|
| + BEFORE_ATTRIBUTE_VALUE = i++,
|
| + IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES = i++, // "
|
| + IN_ATTRIBUTE_VALUE_SINGLE_QUOTES = i++, // '
|
| + IN_ATTRIBUTE_VALUE_NO_QUOTES = i++,
|
| +
|
| + //declarations
|
| + BEFORE_DECLARATION = i++, // !
|
| + IN_DECLARATION = i++,
|
| +
|
| + //processing instructions
|
| + IN_PROCESSING_INSTRUCTION = i++, // ?
|
| +
|
| + //comments
|
| + BEFORE_COMMENT = i++,
|
| + IN_COMMENT = i++,
|
| + AFTER_COMMENT_1 = i++,
|
| + AFTER_COMMENT_2 = i++,
|
| +
|
| + //cdata
|
| + BEFORE_CDATA_1 = i++, // [
|
| + BEFORE_CDATA_2 = i++, // C
|
| + BEFORE_CDATA_3 = i++, // D
|
| + BEFORE_CDATA_4 = i++, // A
|
| + BEFORE_CDATA_5 = i++, // T
|
| + BEFORE_CDATA_6 = i++, // A
|
| + IN_CDATA = i++,// [
|
| + AFTER_CDATA_1 = i++, // ]
|
| + AFTER_CDATA_2 = i++, // ]
|
| +
|
| + //special tags
|
| + BEFORE_SPECIAL = i++, //S
|
| + BEFORE_SPECIAL_END = i++, //S
|
| +
|
| + BEFORE_SCRIPT_1 = i++, //C
|
| + BEFORE_SCRIPT_2 = i++, //R
|
| + BEFORE_SCRIPT_3 = i++, //I
|
| + BEFORE_SCRIPT_4 = i++, //P
|
| + BEFORE_SCRIPT_5 = i++, //T
|
| + AFTER_SCRIPT_1 = i++, //C
|
| + AFTER_SCRIPT_2 = i++, //R
|
| + AFTER_SCRIPT_3 = i++, //I
|
| + AFTER_SCRIPT_4 = i++, //P
|
| + AFTER_SCRIPT_5 = i++, //T
|
| +
|
| + BEFORE_STYLE_1 = i++, //T
|
| + BEFORE_STYLE_2 = i++, //Y
|
| + BEFORE_STYLE_3 = i++, //L
|
| + BEFORE_STYLE_4 = i++, //E
|
| + AFTER_STYLE_1 = i++, //T
|
| + AFTER_STYLE_2 = i++, //Y
|
| + AFTER_STYLE_3 = i++, //L
|
| + AFTER_STYLE_4 = i++; //E
|
| +
|
| +
|
| +function whitespace(c){
|
| + return c === " " || c === "\t" || c === "\r" || c === "\n";
|
| +}
|
| +
|
| +function Tokenizer(options, cbs){
|
| + this._state = TEXT;
|
| + this._buffer = "";
|
| + this._sectionStart = 0;
|
| + this._index = 0;
|
| + this._options = options;
|
| + this._special = 0; // 1 for script, 2 for style
|
| + this._cbs = cbs;
|
| + this._running = true;
|
| +}
|
| +
|
| +//TODO make events conditional
|
| +Tokenizer.prototype.write = function(chunk){
|
| + this._buffer += chunk;
|
| +
|
| + while(this._index < this._buffer.length && this._running){
|
| + var c = this._buffer.charAt(this._index);
|
| + if(this._state === TEXT){
|
| + if(c === "<"){
|
| + this._emitIfToken("ontext");
|
| + this._state = BEFORE_TAG_NAME;
|
| + this._sectionStart = this._index;
|
| + }
|
| + } else if(this._state === BEFORE_TAG_NAME){
|
| + if(c === "/"){
|
| + this._state = BEFORE_CLOSING_TAG_NAME;
|
| + } else if(c === ">" || this._special > 0) {
|
| + this._state = TEXT;
|
| + } else {
|
| + if(whitespace(c));
|
| + else if(c === "!"){
|
| + this._state = BEFORE_DECLARATION;
|
| + this._sectionStart = this._index + 1;
|
| + } else if(c === "?"){
|
| + this._state = IN_PROCESSING_INSTRUCTION;
|
| + this._sectionStart = this._index + 1;
|
| + } else if(
|
| + !(this._options && this._options.xmlMode) &&
|
| + (c === "s" || c === "S")
|
| + ){
|
| + this._state = BEFORE_SPECIAL;
|
| + this._sectionStart = this._index;
|
| + } else {
|
| + this._state = IN_TAG_NAME;
|
| + this._sectionStart = this._index;
|
| + }
|
| + }
|
| + } else if(this._state === IN_TAG_NAME){
|
| + if(c === "/"){
|
| + this._emitToken("onopentagname");
|
| + this._cbs.onselfclosingtag();
|
| + this._state = AFTER_CLOSING_TAG_NAME;
|
| + } else if(c === ">"){
|
| + this._emitToken("onopentagname");
|
| + this._cbs.onopentagend();
|
| + this._state = TEXT;
|
| + this._sectionStart = this._index + 1;
|
| + } else if(whitespace(c)){
|
| + this._emitToken("onopentagname");
|
| + this._state = BEFORE_ATTRIBUTE_NAME;
|
| + }
|
| + } else if(this._state === BEFORE_CLOSING_TAG_NAME){
|
| + if(whitespace(c));
|
| + else if(c === ">"){
|
| + this._state = TEXT;
|
| + } else if(this._special > 0){
|
| + if(c === "s" || c === "S"){
|
| + this._state = BEFORE_SPECIAL_END;
|
| + } else {
|
| + this._state = TEXT;
|
| + continue;
|
| + }
|
| + } else {
|
| + this._state = IN_CLOSING_TAG_NAME;
|
| + this._sectionStart = this._index;
|
| + }
|
| + } else if(this._state === IN_CLOSING_TAG_NAME){
|
| + if(c === ">"){
|
| + this._emitToken("onclosetag");
|
| + this._state = TEXT;
|
| + this._sectionStart = this._index + 1;
|
| + this._special = 0;
|
| + } else if(whitespace(c)){
|
| + this._emitToken("onclosetag");
|
| + this._state = AFTER_CLOSING_TAG_NAME;
|
| + this._special = 0;
|
| + }
|
| + } else if(this._state === AFTER_CLOSING_TAG_NAME){
|
| + //skip everything until ">"
|
| + if(c === ">"){
|
| + this._state = TEXT;
|
| + this._sectionStart = this._index + 1;
|
| + }
|
| + }
|
| +
|
| + /*
|
| + * attributes
|
| + */
|
| + else if(this._state === BEFORE_ATTRIBUTE_NAME){
|
| + if(c === ">"){
|
| + this._state = TEXT;
|
| + this._cbs.onopentagend();
|
| + this._sectionStart = this._index + 1;
|
| + } else if(c === "/"){
|
| + this._cbs.onselfclosingtag();
|
| + this._state = AFTER_CLOSING_TAG_NAME;
|
| + } else if(!whitespace(c)){
|
| + this._state = IN_ATTRIBUTE_NAME;
|
| + this._sectionStart = this._index;
|
| + }
|
| + } else if(this._state === IN_ATTRIBUTE_NAME){
|
| + if(c === "="){
|
| + this._emitIfToken("onattribname");
|
| + this._state = BEFORE_ATTRIBUTE_VALUE;
|
| + } else if(whitespace(c)){
|
| + this._emitIfToken("onattribname");
|
| + this._state = AFTER_ATTRIBUTE_NAME;
|
| + } else if(c === "/" || c === ">"){
|
| + this._emitIfToken("onattribname");
|
| + this._state = BEFORE_ATTRIBUTE_NAME;
|
| + continue;
|
| + }
|
| + } else if(this._state === AFTER_ATTRIBUTE_NAME){
|
| + if(c === "="){
|
| + this._state = BEFORE_ATTRIBUTE_VALUE;
|
| + } else if(c === "/" || c === ">"){
|
| + this._state = BEFORE_ATTRIBUTE_NAME;
|
| + continue;
|
| + } else if(!whitespace(c)){
|
| + this._state = IN_ATTRIBUTE_NAME;
|
| + this._sectionStart = this._index;
|
| + }
|
| + } else if(this._state === BEFORE_ATTRIBUTE_VALUE){
|
| + if(c === "\""){
|
| + this._state = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES;
|
| + this._sectionStart = this._index + 1;
|
| + } else if(c === "'"){
|
| + this._state = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES;
|
| + this._sectionStart = this._index + 1;
|
| + } else if(!whitespace(c)){
|
| + this._state = IN_ATTRIBUTE_VALUE_NO_QUOTES;
|
| + this._sectionStart = this._index;
|
| + }
|
| + } else if(this._state === IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES){
|
| + if(c === "\""){
|
| + this._emitToken("onattribvalue");
|
| + this._state = BEFORE_ATTRIBUTE_NAME;
|
| + }
|
| + } else if(this._state === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES){
|
| + if(c === "'"){
|
| + this._state = BEFORE_ATTRIBUTE_NAME;
|
| + this._emitToken("onattribvalue");
|
| + }
|
| + } else if(this._state === IN_ATTRIBUTE_VALUE_NO_QUOTES){
|
| + if(c === ">"){
|
| + this._emitToken("onattribvalue");
|
| + this._state = TEXT;
|
| + this._cbs.onopentagend();
|
| + this._sectionStart = this._index + 1;
|
| + } else if(whitespace(c)){
|
| + this._emitToken("onattribvalue");
|
| + this._state = BEFORE_ATTRIBUTE_NAME;
|
| + }
|
| + }
|
| +
|
| + /*
|
| + * declarations
|
| + */
|
| + else if(this._state === BEFORE_DECLARATION){
|
| + if(c === "[") this._state = BEFORE_CDATA_1;
|
| + else if(c === "-") this._state = BEFORE_COMMENT;
|
| + else this._state = IN_DECLARATION;
|
| + } else if(this._state === IN_DECLARATION){
|
| + if(c === ">"){
|
| + this._emitToken("ondeclaration");
|
| + this._state = TEXT;
|
| + this._sectionStart = this._index + 1;
|
| + }
|
| + }
|
| +
|
| + /*
|
| + * processing instructions
|
| + */
|
| + else if(this._state === IN_PROCESSING_INSTRUCTION){
|
| + if(c === ">"){
|
| + this._emitToken("onprocessinginstruction");
|
| + this._state = TEXT;
|
| + this._sectionStart = this._index + 1;
|
| + }
|
| + }
|
| +
|
| + /*
|
| + * comments
|
| + */
|
| + else if(this._state === BEFORE_COMMENT){
|
| + if(c === "-"){
|
| + this._state = IN_COMMENT;
|
| + this._sectionStart = this._index + 1;
|
| + } else {
|
| + this._state = IN_DECLARATION;
|
| + }
|
| + } else if(this._state === IN_COMMENT){
|
| + if(c === "-") this._state = AFTER_COMMENT_1;
|
| + } else if(this._state === AFTER_COMMENT_1){
|
| + if(c === "-") this._state = AFTER_COMMENT_2;
|
| + else this._state = IN_COMMENT;
|
| + } else if(this._state === AFTER_COMMENT_2){
|
| + if(c === ">"){
|
| + //remove 2 trailing chars
|
| + this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
|
| + this._state = TEXT;
|
| + this._sectionStart = this._index + 1;
|
| + } else if (c !== "-") {
|
| + this._state = IN_COMMENT;
|
| + }
|
| + // else: stay in AFTER_COMMENT_2 (`--->`)
|
| + }
|
| +
|
| + /*
|
| + * cdata
|
| + */
|
| + else if(this._state === BEFORE_CDATA_1){
|
| + if(c === "C") this._state = BEFORE_CDATA_2;
|
| + else this._state = IN_DECLARATION;
|
| + } else if(this._state === BEFORE_CDATA_2){
|
| + if(c === "D") this._state = BEFORE_CDATA_3;
|
| + else this._state = IN_DECLARATION;
|
| + } else if(this._state === BEFORE_CDATA_3){
|
| + if(c === "A") this._state = BEFORE_CDATA_4;
|
| + else this._state = IN_DECLARATION;
|
| + } else if(this._state === BEFORE_CDATA_4){
|
| + if(c === "T") this._state = BEFORE_CDATA_5;
|
| + else this._state = IN_DECLARATION;
|
| + } else if(this._state === BEFORE_CDATA_5){
|
| + if(c === "A") this._state = BEFORE_CDATA_6;
|
| + else this._state = IN_DECLARATION;
|
| + } else if(this._state === BEFORE_CDATA_6){
|
| + if(c === "["){
|
| + this._state = IN_CDATA;
|
| + this._sectionStart = this._index + 1;
|
| + } else {
|
| + this._state = IN_DECLARATION;
|
| + }
|
| + } else if(this._state === IN_CDATA){
|
| + if(c === "]") this._state = AFTER_CDATA_1;
|
| + } else if(this._state === AFTER_CDATA_1){
|
| + if(c === "]") this._state = AFTER_CDATA_2;
|
| + else this._state = IN_CDATA;
|
| + } else if(this._state === AFTER_CDATA_2){
|
| + if(c === ">"){
|
| + //remove 2 trailing chars
|
| + this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
|
| + this._state = TEXT;
|
| + this._sectionStart = this._index + 1;
|
| + } else if (c !== "]") {
|
| + this._state = IN_CDATA;
|
| + }
|
| + //else: stay in AFTER_CDATA_2 (`]]]>`)
|
| + }
|
| +
|
| + /*
|
| + * special tags
|
| + */
|
| + else if(this._state === BEFORE_SPECIAL){
|
| + if(c === "c" || c === "C"){
|
| + this._state = BEFORE_SCRIPT_1;
|
| + } else if(c === "t" || c === "T"){
|
| + this._state = BEFORE_STYLE_1;
|
| + } else {
|
| + this._state = IN_TAG_NAME;
|
| + continue; //consume the token again
|
| + }
|
| + } else if(this._state === BEFORE_SPECIAL_END){
|
| + if(this._special === 1 && (c === "c" || c === "C")){
|
| + this._state = AFTER_SCRIPT_1;
|
| + } else if(this._special === 2 && (c === "t" || c === "T")){
|
| + this._state = AFTER_STYLE_1;
|
| + }
|
| + else this._state = TEXT;
|
| + }
|
| +
|
| + /*
|
| + * script
|
| + */
|
| + else if(this._state === BEFORE_SCRIPT_1){
|
| + if(c === "r" || c === "R"){
|
| + this._state = BEFORE_SCRIPT_2;
|
| + } else {
|
| + this._state = IN_TAG_NAME;
|
| + continue; //consume the token again
|
| + }
|
| + } else if(this._state === BEFORE_SCRIPT_2){
|
| + if(c === "i" || c === "I"){
|
| + this._state = BEFORE_SCRIPT_3;
|
| + } else {
|
| + this._state = IN_TAG_NAME;
|
| + continue; //consume the token again
|
| + }
|
| + } else if(this._state === BEFORE_SCRIPT_3){
|
| + if(c === "p" || c === "P"){
|
| + this._state = BEFORE_SCRIPT_4;
|
| + } else {
|
| + this._state = IN_TAG_NAME;
|
| + continue; //consume the token again
|
| + }
|
| + } else if(this._state === BEFORE_SCRIPT_4){
|
| + if(c === "t" || c === "T"){
|
| + this._state = BEFORE_SCRIPT_5;
|
| + } else {
|
| + this._state = IN_TAG_NAME;
|
| + continue; //consume the token again
|
| + }
|
| + } else if(this._state === BEFORE_SCRIPT_5){
|
| + if(c === "/" || c === ">" || whitespace(c)){
|
| + this._special = 1;
|
| + }
|
| + this._state = IN_TAG_NAME;
|
| + continue; //consume the token again
|
| + }
|
| +
|
| + else if(this._state === AFTER_SCRIPT_1){
|
| + if(c === "r" || c === "R"){
|
| + this._state = AFTER_SCRIPT_2;
|
| + }
|
| + else this._state = TEXT;
|
| + } else if(this._state === AFTER_SCRIPT_2){
|
| + if(c === "i" || c === "I"){
|
| + this._state = AFTER_SCRIPT_3;
|
| + }
|
| + else this._state = TEXT;
|
| + } else if(this._state === AFTER_SCRIPT_3){
|
| + if(c === "p" || c === "P"){
|
| + this._state = AFTER_SCRIPT_4;
|
| + }
|
| + else this._state = TEXT;
|
| + } else if(this._state === AFTER_SCRIPT_4){
|
| + if(c === "t" || c === "T"){
|
| + this._state = AFTER_SCRIPT_5;
|
| + }
|
| + else this._state = TEXT;
|
| + } else if(this._state === AFTER_SCRIPT_5){
|
| + if(c === ">" || whitespace(c)){
|
| + this._state = IN_CLOSING_TAG_NAME;
|
| + this._sectionStart = this._index - 6;
|
| + continue; //reconsume the token
|
| + }
|
| + else this._state = TEXT;
|
| + }
|
| +
|
| + /*
|
| + * style
|
| + */
|
| + else if(this._state === BEFORE_STYLE_1){
|
| + if(c === "y" || c === "Y"){
|
| + this._state = BEFORE_STYLE_2;
|
| + } else {
|
| + this._state = IN_TAG_NAME;
|
| + continue; //consume the token again
|
| + }
|
| + } else if(this._state === BEFORE_STYLE_2){
|
| + if(c === "l" || c === "L"){
|
| + this._state = BEFORE_STYLE_3;
|
| + } else {
|
| + this._state = IN_TAG_NAME;
|
| + continue; //consume the token again
|
| + }
|
| + } else if(this._state === BEFORE_STYLE_3){
|
| + if(c === "e" || c === "E"){
|
| + this._state = BEFORE_STYLE_4;
|
| + } else {
|
| + this._state = IN_TAG_NAME;
|
| + continue; //consume the token again
|
| + }
|
| + } else if(this._state === BEFORE_STYLE_4){
|
| + if(c === "/" || c === ">" || whitespace(c)){
|
| + this._special = 2;
|
| + }
|
| + this._state = IN_TAG_NAME;
|
| + continue; //consume the token again
|
| + }
|
| +
|
| + else if(this._state === AFTER_STYLE_1){
|
| + if(c === "y" || c === "Y"){
|
| + this._state = AFTER_STYLE_2;
|
| + }
|
| + else this._state = TEXT;
|
| + } else if(this._state === AFTER_STYLE_2){
|
| + if(c === "l" || c === "L"){
|
| + this._state = AFTER_STYLE_3;
|
| + }
|
| + else this._state = TEXT;
|
| + } else if(this._state === AFTER_STYLE_3){
|
| + if(c === "e" || c === "E"){
|
| + this._state = AFTER_STYLE_4;
|
| + }
|
| + else this._state = TEXT;
|
| + } else if(this._state === AFTER_STYLE_4){
|
| + if(c === ">" || whitespace(c)){
|
| + this._state = IN_CLOSING_TAG_NAME;
|
| + this._sectionStart = this._index - 5;
|
| + continue; //reconsume the token
|
| + }
|
| + else this._state = TEXT;
|
| + }
|
| +
|
| +
|
| + else {
|
| + this._cbs.onerror(Error("unknown state"), this._state);
|
| + }
|
| +
|
| + this._index++;
|
| + }
|
| +
|
| + //cleanup
|
| + if(this._sectionStart === -1){
|
| + this._buffer = "";
|
| + this._index = 0;
|
| + } else {
|
| + if(this._state === TEXT){
|
| + if(this._sectionStart !== this._index){
|
| + this._cbs.ontext(this._buffer.substr(this._sectionStart));
|
| + }
|
| + this._buffer = "";
|
| + this._index = 0;
|
| + } else if(this._sectionStart === this._index){
|
| + //the section just started
|
| + this._buffer = "";
|
| + this._index = 0;
|
| + } else if(this._sectionStart > 0){
|
| + //remove everything unnecessary
|
| + this._buffer = this._buffer.substr(this._sectionStart);
|
| + this._index -= this._sectionStart;
|
| + }
|
| +
|
| + this._sectionStart = 0;
|
| + }
|
| +};
|
| +
|
| +Tokenizer.prototype.pause = function(){
|
| + this._running = false;
|
| +};
|
| +Tokenizer.prototype.resume = function(){
|
| + this._running = true;
|
| +};
|
| +
|
| +Tokenizer.prototype.end = function(chunk){
|
| + if(chunk) this.write(chunk);
|
| +
|
| + //if there is remaining data, emit it in a reasonable way
|
| + if(this._sectionStart > this._index){
|
| + var data = this._buffer.substr(this._sectionStart);
|
| +
|
| + if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
|
| + this._cbs.oncdata(data);
|
| + } else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
|
| + this._cbs.oncomment(data);
|
| + } else if(this._state === IN_TAG_NAME){
|
| + this._cbs.onopentagname(data);
|
| + } else if(this._state === IN_CLOSING_TAG_NAME){
|
| + this._cbs.onclosetag(data);
|
| + } else {
|
| + this._cbs.ontext(data);
|
| + }
|
| + }
|
| +
|
| + this._cbs.onend();
|
| +};
|
| +
|
| +Tokenizer.prototype.reset = function(){
|
| + Tokenizer.call(this, this._options, this._cbs);
|
| +};
|
| +
|
| +Tokenizer.prototype._emitToken = function(name){
|
| + this._cbs[name](this._buffer.substring(this._sectionStart, this._index));
|
| + this._sectionStart = -1;
|
| +};
|
| +
|
| +Tokenizer.prototype._emitIfToken = function(name){
|
| + if(this._index > this._sectionStart){
|
| + this._cbs[name](this._buffer.substring(this._sectionStart, this._index));
|
| + }
|
| + this._sectionStart = -1;
|
| +};
|
|
|