Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 library tokenizer; | 1 library tokenizer; |
| 2 | 2 |
| 3 import 'dart:collection'; | 3 import 'dart:collection'; |
| 4 import 'package:html/parser.dart' show HtmlParser; | 4 import 'package:html/parser.dart' show HtmlParser; |
| 5 import 'constants.dart'; | 5 import 'constants.dart'; |
| 6 import 'inputstream.dart'; | 6 import 'inputstream.dart'; |
| 7 import 'token.dart'; | 7 import 'token.dart'; |
| 8 import 'utils.dart'; | 8 import 'utils.dart'; |
| 9 | 9 |
| 10 // Group entities by their first character, for faster lookups | 10 // Group entities by their first character, for faster lookups |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 48 final Queue<Token> tokenQueue; | 48 final Queue<Token> tokenQueue; |
| 49 | 49 |
| 50 /// Holds the token that is currently being processed. | 50 /// Holds the token that is currently being processed. |
| 51 Token currentToken; | 51 Token currentToken; |
| 52 | 52 |
| 53 /// Holds a reference to the method to be invoked for the next parser state. | 53 /// Holds a reference to the method to be invoked for the next parser state. |
| 54 // TODO(jmesserly): the type should be "Predicate" but a dart2js checked mode | 54 // TODO(jmesserly): the type should be "Predicate" but a dart2js checked mode |
| 55 // bug prevents us from doing that. See http://dartbug.com/12465 | 55 // bug prevents us from doing that. See http://dartbug.com/12465 |
| 56 Function state; | 56 Function state; |
| 57 | 57 |
| 58 String temporaryBuffer; | 58 final StringBuffer _buffer = new StringBuffer(); |
| 59 | 59 |
| 60 int _lastOffset; | 60 int _lastOffset; |
| 61 | 61 |
| 62 // TODO(jmesserly): ideally this would be a LinkedHashMap and we wouldn't add | 62 // TODO(jmesserly): ideally this would be a LinkedHashMap and we wouldn't add |
| 63 // an item until it's ready. But the code doesn't have a clear notion of when | 63 // an item until it's ready. But the code doesn't have a clear notion of when |
| 64 // it's "done" with the attribute. | 64 // it's "done" with the attribute. |
| 65 List<TagAttribute> _attributes; | 65 List<TagAttribute> _attributes; |
| 66 Set<String> _attributeNames; | 66 Set<String> _attributeNames; |
| 67 | 67 |
| 68 HtmlTokenizer(doc, {String encoding, bool parseMeta: true, | 68 HtmlTokenizer(doc, {String encoding, bool parseMeta: true, |
| 69 this.lowercaseElementName: true, this.lowercaseAttrName: true, | 69 this.lowercaseElementName: true, this.lowercaseAttrName: true, |
| 70 bool generateSpans: false, String sourceUrl, this.attributeSpans: false}) | 70 bool generateSpans: false, String sourceUrl, this.attributeSpans: false}) |
| 71 : stream = new HtmlInputStream( | 71 : stream = new HtmlInputStream( |
| 72 doc, encoding, parseMeta, generateSpans, sourceUrl), | 72 doc, encoding, parseMeta, generateSpans, sourceUrl), |
| 73 tokenQueue = new Queue(), | 73 tokenQueue = new Queue(), |
| 74 generateSpans = generateSpans { | 74 generateSpans = generateSpans { |
| 75 reset(); | 75 reset(); |
| 76 } | 76 } |
| 77 | 77 |
| 78 TagToken get currentTagToken => currentToken; | 78 TagToken get currentTagToken => currentToken; |
| 79 DoctypeToken get currentDoctypeToken => currentToken; | 79 DoctypeToken get currentDoctypeToken => currentToken; |
| 80 StringToken get currentStringToken => currentToken; | 80 StringToken get currentStringToken => currentToken; |
| 81 | 81 |
| 82 Token _current; | 82 Token _current; |
| 83 Token get current => _current; | 83 Token get current => _current; |
| 84 | 84 |
| 85 String get _attributeName => _attributes.last.name; | 85 final StringBuffer _attributeName = new StringBuffer(); |
| 86 set _attributeName(String value) { | 86 final StringBuffer _attributeValue = new StringBuffer(); |
| 87 _attributes.last.name = value; | |
| 88 } | |
| 89 | |
| 90 String get _attributeValue => _attributes.last.value; | |
| 91 set _attributeValue(String value) { | |
| 92 _attributes.last.value = value; | |
| 93 } | |
| 94 | 87 |
| 95 void _markAttributeEnd(int offset) { | 88 void _markAttributeEnd(int offset) { |
| 89 _attributes.last.value = '$_attributeValue'; | |
| 96 if (attributeSpans) _attributes.last.end = stream.position + offset; | 90 if (attributeSpans) _attributes.last.end = stream.position + offset; |
| 97 } | 91 } |
| 98 | 92 |
| 99 void _markAttributeValueStart(int offset) { | 93 void _markAttributeValueStart(int offset) { |
| 100 if (attributeSpans) _attributes.last.startValue = stream.position + offset; | 94 if (attributeSpans) _attributes.last.startValue = stream.position + offset; |
| 101 } | 95 } |
| 102 | 96 |
| 103 void _markAttributeValueEnd(int offset) { | 97 void _markAttributeValueEnd(int offset) { |
| 104 if (attributeSpans) { | 98 if (attributeSpans) _attributes.last.endValue = stream.position + offset; |
| 105 _attributes.last.endValue = stream.position + offset; | 99 _markAttributeEnd(offset); |
| 106 _markAttributeEnd(offset); | |
| 107 } | |
| 108 } | 100 } |
| 109 | 101 |
| 110 // Note: we could track the name span here, if we need it. | 102 // Note: we could track the name span here, if we need it. |
| 111 void _markAttributeNameEnd(int offset) => _markAttributeEnd(offset); | 103 void _markAttributeNameEnd(int offset) => _markAttributeEnd(offset); |
| 112 | 104 |
| 113 void _addAttribute(String name) { | 105 void _addAttribute(String name) { |
| 114 if (_attributes == null) _attributes = []; | 106 if (_attributes == null) _attributes = []; |
| 115 var attr = new TagAttribute(name); | 107 _attributeName.clear(); |
| 108 _attributeName.write(name); | |
| 109 _attributeValue.clear(); | |
| 110 var attr = new TagAttribute(); | |
| 116 _attributes.add(attr); | 111 _attributes.add(attr); |
| 117 if (attributeSpans) attr.start = stream.position - name.length; | 112 if (attributeSpans) attr.start = stream.position - name.length; |
| 118 } | 113 } |
| 119 | 114 |
| 120 /// This is where the magic happens. | 115 /// This is where the magic happens. |
| 121 /// | 116 /// |
| 122 /// We do our usually processing through the states and when we have a token | 117 /// We do our usually processing through the states and when we have a token |
| 123 /// to return we yield the token which pauses processing until the next token | 118 /// to return we yield the token which pauses processing until the next token |
| 124 /// is requested. | 119 /// is requested. |
| 125 bool moveNext() { | 120 bool moveNext() { |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 139 } | 134 } |
| 140 return true; | 135 return true; |
| 141 } | 136 } |
| 142 | 137 |
| 143 /// Resets the tokenizer state. Calling this does not reset the [stream] or | 138 /// Resets the tokenizer state. Calling this does not reset the [stream] or |
| 144 /// the [parser]. | 139 /// the [parser]. |
| 145 void reset() { | 140 void reset() { |
| 146 _lastOffset = 0; | 141 _lastOffset = 0; |
| 147 tokenQueue.clear(); | 142 tokenQueue.clear(); |
| 148 currentToken = null; | 143 currentToken = null; |
| 149 temporaryBuffer = null; | 144 _buffer.clear(); |
| 150 _attributes = null; | 145 _attributes = null; |
| 151 _attributeNames = null; | 146 _attributeNames = null; |
| 152 state = dataState; | 147 state = dataState; |
| 153 } | 148 } |
| 154 | 149 |
| 155 /// Adds a token to the queue. Sets the span if needed. | 150 /// Adds a token to the queue. Sets the span if needed. |
| 156 void _addToken(Token token) { | 151 void _addToken(Token token) { |
| 157 if (generateSpans && token.span == null) { | 152 if (generateSpans && token.span == null) { |
| 158 int offset = stream.position; | 153 int offset = stream.position; |
| 159 token.span = stream.fileInfo.span(_lastOffset, offset); | 154 token.span = stream.fileInfo.span(_lastOffset, offset); |
| (...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 340 stream.unget(charStack.removeLast()); | 335 stream.unget(charStack.removeLast()); |
| 341 output = '${output}${slice(charStack, entityLen).join()}'; | 336 output = '${output}${slice(charStack, entityLen).join()}'; |
| 342 } | 337 } |
| 343 } else { | 338 } else { |
| 344 _addToken(new ParseErrorToken("expected-named-entity")); | 339 _addToken(new ParseErrorToken("expected-named-entity")); |
| 345 stream.unget(charStack.removeLast()); | 340 stream.unget(charStack.removeLast()); |
| 346 output = "&${charStack.join()}"; | 341 output = "&${charStack.join()}"; |
| 347 } | 342 } |
| 348 } | 343 } |
| 349 if (fromAttribute) { | 344 if (fromAttribute) { |
| 350 _attributeValue = '$_attributeValue$output'; | 345 _attributeValue.write(output); |
| 351 } else { | 346 } else { |
| 352 var token; | 347 var token; |
| 353 if (isWhitespace(output)) { | 348 if (isWhitespace(output)) { |
| 354 token = new SpaceCharactersToken(output); | 349 token = new SpaceCharactersToken(output); |
| 355 } else { | 350 } else { |
| 356 token = new CharactersToken(output); | 351 token = new CharactersToken(output); |
| 357 } | 352 } |
| 358 _addToken(token); | 353 _addToken(token); |
| 359 } | 354 } |
| 360 } | 355 } |
| (...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 586 currentTagToken.name = '${currentTagToken.name}$data'; | 581 currentTagToken.name = '${currentTagToken.name}$data'; |
| 587 // (Don't use charsUntil here, because tag names are | 582 // (Don't use charsUntil here, because tag names are |
| 588 // very short and it's faster to not do anything fancy) | 583 // very short and it's faster to not do anything fancy) |
| 589 } | 584 } |
| 590 return true; | 585 return true; |
| 591 } | 586 } |
| 592 | 587 |
| 593 bool rcdataLessThanSignState() { | 588 bool rcdataLessThanSignState() { |
| 594 var data = stream.char(); | 589 var data = stream.char(); |
| 595 if (data == "/") { | 590 if (data == "/") { |
| 596 temporaryBuffer = ""; | 591 _buffer.clear(); |
| 597 state = rcdataEndTagOpenState; | 592 state = rcdataEndTagOpenState; |
| 598 } else { | 593 } else { |
| 599 _addToken(new CharactersToken("<")); | 594 _addToken(new CharactersToken("<")); |
| 600 stream.unget(data); | 595 stream.unget(data); |
| 601 state = rcdataState; | 596 state = rcdataState; |
| 602 } | 597 } |
| 603 return true; | 598 return true; |
| 604 } | 599 } |
| 605 | 600 |
| 606 bool rcdataEndTagOpenState() { | 601 bool rcdataEndTagOpenState() { |
| 607 var data = stream.char(); | 602 var data = stream.char(); |
| 608 if (isLetter(data)) { | 603 if (isLetter(data)) { |
| 609 temporaryBuffer = '${temporaryBuffer}$data'; | 604 _buffer.write(data); |
| 610 state = rcdataEndTagNameState; | 605 state = rcdataEndTagNameState; |
| 611 } else { | 606 } else { |
| 612 _addToken(new CharactersToken("</")); | 607 _addToken(new CharactersToken("</")); |
| 613 stream.unget(data); | 608 stream.unget(data); |
| 614 state = rcdataState; | 609 state = rcdataState; |
| 615 } | 610 } |
| 616 return true; | 611 return true; |
| 617 } | 612 } |
| 618 | 613 |
| 619 bool _tokenIsAppropriate() { | 614 bool _tokenIsAppropriate() { |
| 615 // TODO(jmesserly): this should use case insensitive compare instead. | |
| 620 return currentToken is TagToken && | 616 return currentToken is TagToken && |
| 621 currentTagToken.name.toLowerCase() == temporaryBuffer.toLowerCase(); | 617 currentTagToken.name.toLowerCase() == '$_buffer'.toLowerCase(); |
| 622 } | 618 } |
| 623 | 619 |
| 624 bool rcdataEndTagNameState() { | 620 bool rcdataEndTagNameState() { |
| 625 var appropriate = _tokenIsAppropriate(); | 621 var appropriate = _tokenIsAppropriate(); |
| 626 var data = stream.char(); | 622 var data = stream.char(); |
| 627 if (isWhitespace(data) && appropriate) { | 623 if (isWhitespace(data) && appropriate) { |
| 628 currentToken = new EndTagToken(temporaryBuffer); | 624 currentToken = new EndTagToken('$_buffer'); |
| 629 state = beforeAttributeNameState; | 625 state = beforeAttributeNameState; |
| 630 } else if (data == "/" && appropriate) { | 626 } else if (data == "/" && appropriate) { |
| 631 currentToken = new EndTagToken(temporaryBuffer); | 627 currentToken = new EndTagToken('$_buffer'); |
| 632 state = selfClosingStartTagState; | 628 state = selfClosingStartTagState; |
| 633 } else if (data == ">" && appropriate) { | 629 } else if (data == ">" && appropriate) { |
| 634 currentToken = new EndTagToken(temporaryBuffer); | 630 currentToken = new EndTagToken('$_buffer'); |
| 635 emitCurrentToken(); | 631 emitCurrentToken(); |
| 636 state = dataState; | 632 state = dataState; |
| 637 } else if (isLetter(data)) { | 633 } else if (isLetter(data)) { |
| 638 temporaryBuffer = '${temporaryBuffer}$data'; | 634 _buffer.write(data); |
| 639 } else { | 635 } else { |
| 640 _addToken(new CharactersToken("</$temporaryBuffer")); | 636 _addToken(new CharactersToken("</$_buffer")); |
| 641 stream.unget(data); | 637 stream.unget(data); |
| 642 state = rcdataState; | 638 state = rcdataState; |
| 643 } | 639 } |
| 644 return true; | 640 return true; |
| 645 } | 641 } |
| 646 | 642 |
| 647 bool rawtextLessThanSignState() { | 643 bool rawtextLessThanSignState() { |
| 648 var data = stream.char(); | 644 var data = stream.char(); |
| 649 if (data == "/") { | 645 if (data == "/") { |
| 650 temporaryBuffer = ""; | 646 _buffer.clear(); |
| 651 state = rawtextEndTagOpenState; | 647 state = rawtextEndTagOpenState; |
| 652 } else { | 648 } else { |
| 653 _addToken(new CharactersToken("<")); | 649 _addToken(new CharactersToken("<")); |
| 654 stream.unget(data); | 650 stream.unget(data); |
| 655 state = rawtextState; | 651 state = rawtextState; |
| 656 } | 652 } |
| 657 return true; | 653 return true; |
| 658 } | 654 } |
| 659 | 655 |
| 660 bool rawtextEndTagOpenState() { | 656 bool rawtextEndTagOpenState() { |
| 661 var data = stream.char(); | 657 var data = stream.char(); |
| 662 if (isLetter(data)) { | 658 if (isLetter(data)) { |
| 663 temporaryBuffer = '${temporaryBuffer}$data'; | 659 _buffer.write(data); |
| 664 state = rawtextEndTagNameState; | 660 state = rawtextEndTagNameState; |
| 665 } else { | 661 } else { |
| 666 _addToken(new CharactersToken("</")); | 662 _addToken(new CharactersToken("</")); |
| 667 stream.unget(data); | 663 stream.unget(data); |
| 668 state = rawtextState; | 664 state = rawtextState; |
| 669 } | 665 } |
| 670 return true; | 666 return true; |
| 671 } | 667 } |
| 672 | 668 |
| 673 bool rawtextEndTagNameState() { | 669 bool rawtextEndTagNameState() { |
| 674 var appropriate = _tokenIsAppropriate(); | 670 var appropriate = _tokenIsAppropriate(); |
| 675 var data = stream.char(); | 671 var data = stream.char(); |
| 676 if (isWhitespace(data) && appropriate) { | 672 if (isWhitespace(data) && appropriate) { |
| 677 currentToken = new EndTagToken(temporaryBuffer); | 673 currentToken = new EndTagToken('$_buffer'); |
| 678 state = beforeAttributeNameState; | 674 state = beforeAttributeNameState; |
| 679 } else if (data == "/" && appropriate) { | 675 } else if (data == "/" && appropriate) { |
| 680 currentToken = new EndTagToken(temporaryBuffer); | 676 currentToken = new EndTagToken('$_buffer'); |
| 681 state = selfClosingStartTagState; | 677 state = selfClosingStartTagState; |
| 682 } else if (data == ">" && appropriate) { | 678 } else if (data == ">" && appropriate) { |
| 683 currentToken = new EndTagToken(temporaryBuffer); | 679 currentToken = new EndTagToken('$_buffer'); |
| 684 emitCurrentToken(); | 680 emitCurrentToken(); |
| 685 state = dataState; | 681 state = dataState; |
| 686 } else if (isLetter(data)) { | 682 } else if (isLetter(data)) { |
| 687 temporaryBuffer = '${temporaryBuffer}$data'; | 683 _buffer.write(data); |
| 688 } else { | 684 } else { |
| 689 _addToken(new CharactersToken("</$temporaryBuffer")); | 685 _addToken(new CharactersToken("</$_buffer")); |
| 690 stream.unget(data); | 686 stream.unget(data); |
| 691 state = rawtextState; | 687 state = rawtextState; |
| 692 } | 688 } |
| 693 return true; | 689 return true; |
| 694 } | 690 } |
| 695 | 691 |
| 696 bool scriptDataLessThanSignState() { | 692 bool scriptDataLessThanSignState() { |
| 697 var data = stream.char(); | 693 var data = stream.char(); |
| 698 if (data == "/") { | 694 if (data == "/") { |
| 699 temporaryBuffer = ""; | 695 _buffer.clear(); |
| 700 state = scriptDataEndTagOpenState; | 696 state = scriptDataEndTagOpenState; |
| 701 } else if (data == "!") { | 697 } else if (data == "!") { |
| 702 _addToken(new CharactersToken("<!")); | 698 _addToken(new CharactersToken("<!")); |
| 703 state = scriptDataEscapeStartState; | 699 state = scriptDataEscapeStartState; |
| 704 } else { | 700 } else { |
| 705 _addToken(new CharactersToken("<")); | 701 _addToken(new CharactersToken("<")); |
| 706 stream.unget(data); | 702 stream.unget(data); |
| 707 state = scriptDataState; | 703 state = scriptDataState; |
| 708 } | 704 } |
| 709 return true; | 705 return true; |
| 710 } | 706 } |
| 711 | 707 |
| 712 bool scriptDataEndTagOpenState() { | 708 bool scriptDataEndTagOpenState() { |
| 713 var data = stream.char(); | 709 var data = stream.char(); |
| 714 if (isLetter(data)) { | 710 if (isLetter(data)) { |
| 715 temporaryBuffer = '${temporaryBuffer}$data'; | 711 _buffer.write(data); |
| 716 state = scriptDataEndTagNameState; | 712 state = scriptDataEndTagNameState; |
| 717 } else { | 713 } else { |
| 718 _addToken(new CharactersToken("</")); | 714 _addToken(new CharactersToken("</")); |
| 719 stream.unget(data); | 715 stream.unget(data); |
| 720 state = scriptDataState; | 716 state = scriptDataState; |
| 721 } | 717 } |
| 722 return true; | 718 return true; |
| 723 } | 719 } |
| 724 | 720 |
| 725 bool scriptDataEndTagNameState() { | 721 bool scriptDataEndTagNameState() { |
| 726 var appropriate = _tokenIsAppropriate(); | 722 var appropriate = _tokenIsAppropriate(); |
| 727 var data = stream.char(); | 723 var data = stream.char(); |
| 728 if (isWhitespace(data) && appropriate) { | 724 if (isWhitespace(data) && appropriate) { |
| 729 currentToken = new EndTagToken(temporaryBuffer); | 725 currentToken = new EndTagToken('$_buffer'); |
| 730 state = beforeAttributeNameState; | 726 state = beforeAttributeNameState; |
| 731 } else if (data == "/" && appropriate) { | 727 } else if (data == "/" && appropriate) { |
| 732 currentToken = new EndTagToken(temporaryBuffer); | 728 currentToken = new EndTagToken('$_buffer'); |
| 733 state = selfClosingStartTagState; | 729 state = selfClosingStartTagState; |
| 734 } else if (data == ">" && appropriate) { | 730 } else if (data == ">" && appropriate) { |
| 735 currentToken = new EndTagToken(temporaryBuffer); | 731 currentToken = new EndTagToken('$_buffer'); |
| 736 emitCurrentToken(); | 732 emitCurrentToken(); |
| 737 state = dataState; | 733 state = dataState; |
| 738 } else if (isLetter(data)) { | 734 } else if (isLetter(data)) { |
| 739 temporaryBuffer = '${temporaryBuffer}$data'; | 735 _buffer.write(data); |
| 740 } else { | 736 } else { |
| 741 _addToken(new CharactersToken("</$temporaryBuffer")); | 737 _addToken(new CharactersToken("</$_buffer")); |
| 742 stream.unget(data); | 738 stream.unget(data); |
| 743 state = scriptDataState; | 739 state = scriptDataState; |
| 744 } | 740 } |
| 745 return true; | 741 return true; |
| 746 } | 742 } |
| 747 | 743 |
| 748 bool scriptDataEscapeStartState() { | 744 bool scriptDataEscapeStartState() { |
| 749 var data = stream.char(); | 745 var data = stream.char(); |
| 750 if (data == "-") { | 746 if (data == "-") { |
| 751 _addToken(new CharactersToken("-")); | 747 _addToken(new CharactersToken("-")); |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 826 } else { | 822 } else { |
| 827 _addToken(new CharactersToken(data)); | 823 _addToken(new CharactersToken(data)); |
| 828 state = scriptDataEscapedState; | 824 state = scriptDataEscapedState; |
| 829 } | 825 } |
| 830 return true; | 826 return true; |
| 831 } | 827 } |
| 832 | 828 |
| 833 bool scriptDataEscapedLessThanSignState() { | 829 bool scriptDataEscapedLessThanSignState() { |
| 834 var data = stream.char(); | 830 var data = stream.char(); |
| 835 if (data == "/") { | 831 if (data == "/") { |
| 836 temporaryBuffer = ""; | 832 _buffer.clear(); |
| 837 state = scriptDataEscapedEndTagOpenState; | 833 state = scriptDataEscapedEndTagOpenState; |
| 838 } else if (isLetter(data)) { | 834 } else if (isLetter(data)) { |
| 839 _addToken(new CharactersToken("<$data")); | 835 _addToken(new CharactersToken("<$data")); |
| 840 temporaryBuffer = data; | 836 _buffer.clear(); |
| 837 _buffer.write(data); | |
| 841 state = scriptDataDoubleEscapeStartState; | 838 state = scriptDataDoubleEscapeStartState; |
| 842 } else { | 839 } else { |
| 843 _addToken(new CharactersToken("<")); | 840 _addToken(new CharactersToken("<")); |
| 844 stream.unget(data); | 841 stream.unget(data); |
| 845 state = scriptDataEscapedState; | 842 state = scriptDataEscapedState; |
| 846 } | 843 } |
| 847 return true; | 844 return true; |
| 848 } | 845 } |
| 849 | 846 |
| 850 bool scriptDataEscapedEndTagOpenState() { | 847 bool scriptDataEscapedEndTagOpenState() { |
| 851 var data = stream.char(); | 848 var data = stream.char(); |
| 852 if (isLetter(data)) { | 849 if (isLetter(data)) { |
| 853 temporaryBuffer = data; | 850 _buffer.clear(); |
| 851 _buffer.write(data); | |
| 854 state = scriptDataEscapedEndTagNameState; | 852 state = scriptDataEscapedEndTagNameState; |
| 855 } else { | 853 } else { |
| 856 _addToken(new CharactersToken("</")); | 854 _addToken(new CharactersToken("</")); |
| 857 stream.unget(data); | 855 stream.unget(data); |
| 858 state = scriptDataEscapedState; | 856 state = scriptDataEscapedState; |
| 859 } | 857 } |
| 860 return true; | 858 return true; |
| 861 } | 859 } |
| 862 | 860 |
| 863 bool scriptDataEscapedEndTagNameState() { | 861 bool scriptDataEscapedEndTagNameState() { |
| 864 var appropriate = _tokenIsAppropriate(); | 862 var appropriate = _tokenIsAppropriate(); |
| 865 var data = stream.char(); | 863 var data = stream.char(); |
| 866 if (isWhitespace(data) && appropriate) { | 864 if (isWhitespace(data) && appropriate) { |
| 867 currentToken = new EndTagToken(temporaryBuffer); | 865 currentToken = new EndTagToken('$_buffer'); |
| 868 state = beforeAttributeNameState; | 866 state = beforeAttributeNameState; |
| 869 } else if (data == "/" && appropriate) { | 867 } else if (data == "/" && appropriate) { |
| 870 currentToken = new EndTagToken(temporaryBuffer); | 868 currentToken = new EndTagToken('$_buffer'); |
| 871 state = selfClosingStartTagState; | 869 state = selfClosingStartTagState; |
| 872 } else if (data == ">" && appropriate) { | 870 } else if (data == ">" && appropriate) { |
| 873 currentToken = new EndTagToken(temporaryBuffer); | 871 currentToken = new EndTagToken('$_buffer'); |
| 874 emitCurrentToken(); | 872 emitCurrentToken(); |
| 875 state = dataState; | 873 state = dataState; |
| 876 } else if (isLetter(data)) { | 874 } else if (isLetter(data)) { |
| 877 temporaryBuffer = '${temporaryBuffer}$data'; | 875 _buffer.write(data); |
| 878 } else { | 876 } else { |
| 879 _addToken(new CharactersToken("</$temporaryBuffer")); | 877 _addToken(new CharactersToken("</$_buffer")); |
| 880 stream.unget(data); | 878 stream.unget(data); |
| 881 state = scriptDataEscapedState; | 879 state = scriptDataEscapedState; |
| 882 } | 880 } |
| 883 return true; | 881 return true; |
| 884 } | 882 } |
| 885 | 883 |
| 886 bool scriptDataDoubleEscapeStartState() { | 884 bool scriptDataDoubleEscapeStartState() { |
| 887 var data = stream.char(); | 885 var data = stream.char(); |
| 888 if (isWhitespace(data) || data == "/" || data == ">") { | 886 if (isWhitespace(data) || data == "/" || data == ">") { |
| 889 _addToken(new CharactersToken(data)); | 887 _addToken(new CharactersToken(data)); |
| 890 if (temporaryBuffer.toLowerCase() == "script") { | 888 if ('$_buffer'.toLowerCase() == "script") { |
| 891 state = scriptDataDoubleEscapedState; | 889 state = scriptDataDoubleEscapedState; |
| 892 } else { | 890 } else { |
| 893 state = scriptDataEscapedState; | 891 state = scriptDataEscapedState; |
| 894 } | 892 } |
| 895 } else if (isLetter(data)) { | 893 } else if (isLetter(data)) { |
| 896 _addToken(new CharactersToken(data)); | 894 _addToken(new CharactersToken(data)); |
| 897 temporaryBuffer = '${temporaryBuffer}$data'; | 895 _buffer.write(data); |
| 898 } else { | 896 } else { |
| 899 stream.unget(data); | 897 stream.unget(data); |
| 900 state = scriptDataEscapedState; | 898 state = scriptDataEscapedState; |
| 901 } | 899 } |
| 902 return true; | 900 return true; |
| 903 } | 901 } |
| 904 | 902 |
| 905 bool scriptDataDoubleEscapedState() { | 903 bool scriptDataDoubleEscapedState() { |
| 906 var data = stream.char(); | 904 var data = stream.char(); |
| 907 if (data == "-") { | 905 if (data == "-") { |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 967 _addToken(new CharactersToken(data)); | 965 _addToken(new CharactersToken(data)); |
| 968 state = scriptDataDoubleEscapedState; | 966 state = scriptDataDoubleEscapedState; |
| 969 } | 967 } |
| 970 return true; | 968 return true; |
| 971 } | 969 } |
| 972 | 970 |
| 973 bool scriptDataDoubleEscapedLessThanSignState() { | 971 bool scriptDataDoubleEscapedLessThanSignState() { |
| 974 var data = stream.char(); | 972 var data = stream.char(); |
| 975 if (data == "/") { | 973 if (data == "/") { |
| 976 _addToken(new CharactersToken("/")); | 974 _addToken(new CharactersToken("/")); |
| 977 temporaryBuffer = ""; | 975 _buffer.clear(); |
| 978 state = scriptDataDoubleEscapeEndState; | 976 state = scriptDataDoubleEscapeEndState; |
| 979 } else { | 977 } else { |
| 980 stream.unget(data); | 978 stream.unget(data); |
| 981 state = scriptDataDoubleEscapedState; | 979 state = scriptDataDoubleEscapedState; |
| 982 } | 980 } |
| 983 return true; | 981 return true; |
| 984 } | 982 } |
| 985 | 983 |
| 986 bool scriptDataDoubleEscapeEndState() { | 984 bool scriptDataDoubleEscapeEndState() { |
| 987 var data = stream.char(); | 985 var data = stream.char(); |
| 988 if (isWhitespace(data) || data == "/" || data == ">") { | 986 if (isWhitespace(data) || data == "/" || data == ">") { |
| 989 _addToken(new CharactersToken(data)); | 987 _addToken(new CharactersToken(data)); |
| 990 if (temporaryBuffer.toLowerCase() == "script") { | 988 if ('$_buffer'.toLowerCase() == "script") { |
| 991 state = scriptDataEscapedState; | 989 state = scriptDataEscapedState; |
| 992 } else { | 990 } else { |
| 993 state = scriptDataDoubleEscapedState; | 991 state = scriptDataDoubleEscapedState; |
| 994 } | 992 } |
| 995 } else if (isLetter(data)) { | 993 } else if (isLetter(data)) { |
| 996 _addToken(new CharactersToken(data)); | 994 _addToken(new CharactersToken(data)); |
| 997 temporaryBuffer = '${temporaryBuffer}$data'; | 995 _buffer.write(data); |
| 998 } else { | 996 } else { |
| 999 stream.unget(data); | 997 stream.unget(data); |
| 1000 state = scriptDataDoubleEscapedState; | 998 state = scriptDataDoubleEscapedState; |
| 1001 } | 999 } |
| 1002 return true; | 1000 return true; |
| 1003 } | 1001 } |
| 1004 | 1002 |
| 1005 bool beforeAttributeNameState() { | 1003 bool beforeAttributeNameState() { |
| 1006 var data = stream.char(); | 1004 var data = stream.char(); |
| 1007 if (isWhitespace(data)) { | 1005 if (isWhitespace(data)) { |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 1031 return true; | 1029 return true; |
| 1032 } | 1030 } |
| 1033 | 1031 |
| 1034 bool attributeNameState() { | 1032 bool attributeNameState() { |
| 1035 var data = stream.char(); | 1033 var data = stream.char(); |
| 1036 bool leavingThisState = true; | 1034 bool leavingThisState = true; |
| 1037 bool emitToken = false; | 1035 bool emitToken = false; |
| 1038 if (data == "=") { | 1036 if (data == "=") { |
| 1039 state = beforeAttributeValueState; | 1037 state = beforeAttributeValueState; |
| 1040 } else if (isLetter(data)) { | 1038 } else if (isLetter(data)) { |
| 1041 _attributeName = '$_attributeName$data' | 1039 _attributeName.write(data); |
| 1042 '${stream.charsUntil(asciiLetters, true)}'; | 1040 _attributeName.write(stream.charsUntil(asciiLetters, true)); |
| 1043 leavingThisState = false; | 1041 leavingThisState = false; |
| 1044 } else if (data == ">") { | 1042 } else if (data == ">") { |
| 1045 // XXX If we emit here the attributes are converted to a dict | 1043 // XXX If we emit here the attributes are converted to a dict |
| 1046 // without being checked and when the code below runs we error | 1044 // without being checked and when the code below runs we error |
| 1047 // because data is a dict not a list | 1045 // because data is a dict not a list |
| 1048 emitToken = true; | 1046 emitToken = true; |
| 1049 } else if (isWhitespace(data)) { | 1047 } else if (isWhitespace(data)) { |
| 1050 state = afterAttributeNameState; | 1048 state = afterAttributeNameState; |
| 1051 } else if (data == "/") { | 1049 } else if (data == "/") { |
| 1052 state = selfClosingStartTagState; | 1050 state = selfClosingStartTagState; |
| 1053 } else if (data == "\u0000") { | 1051 } else if (data == "\u0000") { |
| 1054 _addToken(new ParseErrorToken("invalid-codepoint")); | 1052 _addToken(new ParseErrorToken("invalid-codepoint")); |
| 1055 _attributeName = '${_attributeName}\uFFFD'; | 1053 _attributeName.write('\uFFFD'); |
| 1056 leavingThisState = false; | 1054 leavingThisState = false; |
| 1057 } else if (data == EOF) { | 1055 } else if (data == EOF) { |
| 1058 _addToken(new ParseErrorToken("eof-in-attribute-name")); | 1056 _addToken(new ParseErrorToken("eof-in-attribute-name")); |
| 1059 state = dataState; | 1057 state = dataState; |
| 1060 } else if ("'\"<".contains(data)) { | 1058 } else if ("'\"<".contains(data)) { |
| 1061 _addToken(new ParseErrorToken("invalid-character-in-attribute-name")); | 1059 _addToken(new ParseErrorToken("invalid-character-in-attribute-name")); |
| 1062 _attributeName = '$_attributeName$data'; | 1060 _attributeName.write(data); |
| 1063 leavingThisState = false; | 1061 leavingThisState = false; |
| 1064 } else { | 1062 } else { |
| 1065 _attributeName = '$_attributeName$data'; | 1063 _attributeName.write(data); |
| 1066 leavingThisState = false; | 1064 leavingThisState = false; |
| 1067 } | 1065 } |
| 1068 | 1066 |
| 1069 if (leavingThisState) { | 1067 if (leavingThisState) { |
| 1070 _markAttributeNameEnd(-1); | 1068 _markAttributeNameEnd(-1); |
| 1071 | 1069 |
| 1072 // Attributes are not dropped at this stage. That happens when the | 1070 // Attributes are not dropped at this stage. That happens when the |
| 1073 // start tag token is emitted so values can still be safely appended | 1071 // start tag token is emitted so values can still be safely appended |
| 1074 // to attributes, but we do want to report the parse error in time. | 1072 // to attributes, but we do want to report the parse error in time. |
| 1073 var attrName = _attributeName.toString(); | |
| 1075 if (lowercaseAttrName) { | 1074 if (lowercaseAttrName) { |
| 1076 _attributeName = asciiUpper2Lower(_attributeName); | 1075 attrName = asciiUpper2Lower(attrName); |
| 1077 } | 1076 } |
| 1077 _attributes.last.name = attrName; | |
| 1078 if (_attributeNames == null) _attributeNames = new Set(); | 1078 if (_attributeNames == null) _attributeNames = new Set(); |
| 1079 if (_attributeNames.contains(_attributeName)) { | 1079 if (_attributeNames.contains(attrName)) { |
| 1080 _addToken(new ParseErrorToken("duplicate-attribute")); | 1080 _addToken(new ParseErrorToken("duplicate-attribute")); |
| 1081 } | 1081 } |
| 1082 _attributeNames.add(_attributeName); | 1082 _attributeNames.add(attrName); |
| 1083 | 1083 |
| 1084 // XXX Fix for above XXX | 1084 // XXX Fix for above XXX |
| 1085 if (emitToken) { | 1085 if (emitToken) { |
| 1086 emitCurrentToken(); | 1086 emitCurrentToken(); |
| 1087 } | 1087 } |
| 1088 } | 1088 } |
| 1089 return true; | 1089 return true; |
| 1090 } | 1090 } |
| 1091 | 1091 |
| 1092 bool afterAttributeNameState() { | 1092 bool afterAttributeNameState() { |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1134 } else if (data == "'") { | 1134 } else if (data == "'") { |
| 1135 _markAttributeValueStart(0); | 1135 _markAttributeValueStart(0); |
| 1136 state = attributeValueSingleQuotedState; | 1136 state = attributeValueSingleQuotedState; |
| 1137 } else if (data == ">") { | 1137 } else if (data == ">") { |
| 1138 _addToken(new ParseErrorToken( | 1138 _addToken(new ParseErrorToken( |
| 1139 "expected-attribute-value-but-got-right-bracket")); | 1139 "expected-attribute-value-but-got-right-bracket")); |
| 1140 emitCurrentToken(); | 1140 emitCurrentToken(); |
| 1141 } else if (data == "\u0000") { | 1141 } else if (data == "\u0000") { |
| 1142 _addToken(new ParseErrorToken("invalid-codepoint")); | 1142 _addToken(new ParseErrorToken("invalid-codepoint")); |
| 1143 _markAttributeValueStart(-1); | 1143 _markAttributeValueStart(-1); |
| 1144 _attributeValue = '${_attributeValue}\uFFFD'; | 1144 _attributeValue.write('\uFFFD'); |
| 1145 state = attributeValueUnQuotedState; | 1145 state = attributeValueUnQuotedState; |
| 1146 } else if (data == EOF) { | 1146 } else if (data == EOF) { |
| 1147 _addToken(new ParseErrorToken("expected-attribute-value-but-got-eof")); | 1147 _addToken(new ParseErrorToken("expected-attribute-value-but-got-eof")); |
| 1148 state = dataState; | 1148 state = dataState; |
| 1149 } else if ("=<`".contains(data)) { | 1149 } else if ("=<`".contains(data)) { |
| 1150 _addToken(new ParseErrorToken("equals-in-unquoted-attribute-value")); | 1150 _addToken(new ParseErrorToken("equals-in-unquoted-attribute-value")); |
| 1151 _markAttributeValueStart(-1); | 1151 _markAttributeValueStart(-1); |
| 1152 _attributeValue = '$_attributeValue$data'; | 1152 _attributeValue.write(data); |
| 1153 state = attributeValueUnQuotedState; | 1153 state = attributeValueUnQuotedState; |
| 1154 } else { | 1154 } else { |
| 1155 _markAttributeValueStart(-1); | 1155 _markAttributeValueStart(-1); |
| 1156 _attributeValue = '$_attributeValue$data'; | 1156 _attributeValue.write(data); |
| 1157 state = attributeValueUnQuotedState; | 1157 state = attributeValueUnQuotedState; |
| 1158 } | 1158 } |
| 1159 return true; | 1159 return true; |
| 1160 } | 1160 } |
| 1161 | 1161 |
| 1162 bool attributeValueDoubleQuotedState() { | 1162 bool attributeValueDoubleQuotedState() { |
| 1163 var data = stream.char(); | 1163 var data = stream.char(); |
| 1164 if (data == "\"") { | 1164 if (data == "\"") { |
| 1165 _markAttributeValueEnd(-1); | 1165 _markAttributeValueEnd(-1); |
| 1166 _markAttributeEnd(0); | 1166 _markAttributeEnd(0); |
| 1167 state = afterAttributeValueState; | 1167 state = afterAttributeValueState; |
| 1168 } else if (data == "&") { | 1168 } else if (data == "&") { |
| 1169 processEntityInAttribute('"'); | 1169 processEntityInAttribute('"'); |
| 1170 } else if (data == "\u0000") { | 1170 } else if (data == "\u0000") { |
| 1171 _addToken(new ParseErrorToken("invalid-codepoint")); | 1171 _addToken(new ParseErrorToken("invalid-codepoint")); |
| 1172 _attributeValue = '${_attributeValue}\uFFFD'; | 1172 _attributeValue.write('\uFFFD'); |
| 1173 } else if (data == EOF) { | 1173 } else if (data == EOF) { |
| 1174 _addToken(new ParseErrorToken("eof-in-attribute-value-double-quote")); | 1174 _addToken(new ParseErrorToken("eof-in-attribute-value-double-quote")); |
| 1175 _markAttributeValueEnd(-1); | 1175 _markAttributeValueEnd(-1); |
| 1176 state = dataState; | 1176 state = dataState; |
| 1177 } else { | 1177 } else { |
| 1178 _attributeValue = '$_attributeValue$data${stream.charsUntil("\"&")}'; | 1178 _attributeValue.write(data); |
| 1179 _attributeValue.write(stream.charsUntil("\"&")); | |
| 1179 } | 1180 } |
| 1180 return true; | 1181 return true; |
| 1181 } | 1182 } |
| 1182 | 1183 |
| 1183 bool attributeValueSingleQuotedState() { | 1184 bool attributeValueSingleQuotedState() { |
| 1184 var data = stream.char(); | 1185 var data = stream.char(); |
| 1185 if (data == "'") { | 1186 if (data == "'") { |
| 1186 _markAttributeValueEnd(-1); | 1187 _markAttributeValueEnd(-1); |
| 1187 _markAttributeEnd(0); | 1188 _markAttributeEnd(0); |
| 1188 state = afterAttributeValueState; | 1189 state = afterAttributeValueState; |
| 1189 } else if (data == "&") { | 1190 } else if (data == "&") { |
| 1190 processEntityInAttribute("'"); | 1191 processEntityInAttribute("'"); |
| 1191 } else if (data == "\u0000") { | 1192 } else if (data == "\u0000") { |
| 1192 _addToken(new ParseErrorToken("invalid-codepoint")); | 1193 _addToken(new ParseErrorToken("invalid-codepoint")); |
| 1193 _attributeValue = '${_attributeValue}\uFFFD'; | 1194 _attributeValue.write('\uFFFD'); |
| 1194 } else if (data == EOF) { | 1195 } else if (data == EOF) { |
| 1195 _addToken(new ParseErrorToken("eof-in-attribute-value-single-quote")); | 1196 _addToken(new ParseErrorToken("eof-in-attribute-value-single-quote")); |
| 1196 _markAttributeValueEnd(-1); | 1197 _markAttributeValueEnd(-1); |
| 1197 state = dataState; | 1198 state = dataState; |
| 1198 } else { | 1199 } else { |
| 1199 _attributeValue = '$_attributeValue$data${stream.charsUntil("\'&")}'; | 1200 _attributeValue.write(data); |
| 1201 _attributeValue.write(stream.charsUntil("\'&")); | |
| 1200 } | 1202 } |
| 1201 return true; | 1203 return true; |
| 1202 } | 1204 } |
| 1203 | 1205 |
| 1204 bool attributeValueUnQuotedState() { | 1206 bool attributeValueUnQuotedState() { |
| 1205 var data = stream.char(); | 1207 var data = stream.char(); |
| 1206 if (isWhitespace(data)) { | 1208 if (isWhitespace(data)) { |
| 1207 _markAttributeValueEnd(-1); | 1209 _markAttributeValueEnd(-1); |
| 1208 state = beforeAttributeNameState; | 1210 state = beforeAttributeNameState; |
| 1209 } else if (data == "&") { | 1211 } else if (data == "&") { |
| 1210 processEntityInAttribute(">"); | 1212 processEntityInAttribute(">"); |
| 1211 } else if (data == ">") { | 1213 } else if (data == ">") { |
| 1212 _markAttributeValueEnd(-1); | 1214 _markAttributeValueEnd(-1); |
| 1213 emitCurrentToken(); | 1215 emitCurrentToken(); |
| 1214 } else if (data == EOF) { | 1216 } else if (data == EOF) { |
| 1215 _addToken(new ParseErrorToken("eof-in-attribute-value-no-quotes")); | 1217 _addToken(new ParseErrorToken("eof-in-attribute-value-no-quotes")); |
| 1216 _markAttributeValueEnd(-1); | 1218 _markAttributeValueEnd(-1); |
| 1217 state = dataState; | 1219 state = dataState; |
| 1218 } else if ('"\'=<`'.contains(data)) { | 1220 } else if ('"\'=<`'.contains(data)) { |
| 1219 _addToken(new ParseErrorToken( | 1221 _addToken(new ParseErrorToken( |
| 1220 "unexpected-character-in-unquoted-attribute-value")); | 1222 "unexpected-character-in-unquoted-attribute-value")); |
| 1221 _attributeValue = '$_attributeValue$data'; | 1223 _attributeValue.write(data); |
| 1222 } else if (data == "\u0000") { | 1224 } else if (data == "\u0000") { |
| 1223 _addToken(new ParseErrorToken("invalid-codepoint")); | 1225 _addToken(new ParseErrorToken("invalid-codepoint")); |
| 1224 _attributeValue = '${_attributeValue}\uFFFD'; | 1226 _attributeValue.write('\uFFFD'); |
| 1225 } else { | 1227 } else { |
| 1226 _attributeValue = '$_attributeValue$data' | 1228 _attributeValue.write(data); |
| 1227 '${stream.charsUntil("&>\"\'=<`$spaceCharacters")}'; | 1229 _attributeValue.write(stream.charsUntil("&>\"\'=<`$spaceCharacters")); |
| 1228 } | 1230 } |
| 1229 return true; | 1231 return true; |
| 1230 } | 1232 } |
| 1231 | 1233 |
| 1232 bool afterAttributeValueState() { | 1234 bool afterAttributeValueState() { |
| 1233 var data = stream.char(); | 1235 var data = stream.char(); |
| 1234 if (isWhitespace(data)) { | 1236 if (isWhitespace(data)) { |
| 1235 state = beforeAttributeNameState; | 1237 state = beforeAttributeNameState; |
| 1236 } else if (data == ">") { | 1238 } else if (data == ">") { |
| 1237 emitCurrentToken(); | 1239 emitCurrentToken(); |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1281 stream.char(); | 1283 stream.char(); |
| 1282 state = dataState; | 1284 state = dataState; |
| 1283 return true; | 1285 return true; |
| 1284 } | 1286 } |
| 1285 | 1287 |
| 1286 bool markupDeclarationOpenState() { | 1288 bool markupDeclarationOpenState() { |
| 1287 var charStack = [stream.char()]; | 1289 var charStack = [stream.char()]; |
| 1288 if (charStack.last == "-") { | 1290 if (charStack.last == "-") { |
| 1289 charStack.add(stream.char()); | 1291 charStack.add(stream.char()); |
| 1290 if (charStack.last == "-") { | 1292 if (charStack.last == "-") { |
| 1291 currentToken = new CommentToken(""); | 1293 currentToken = new CommentToken(); |
| 1292 state = commentStartState; | 1294 state = commentStartState; |
| 1293 return true; | 1295 return true; |
| 1294 } | 1296 } |
| 1295 } else if (charStack.last == 'd' || charStack.last == 'D') { | 1297 } else if (charStack.last == 'd' || charStack.last == 'D') { |
| 1296 var matched = true; | 1298 var matched = true; |
| 1297 for (var expected in const ['oO', 'cC', 'tT', 'yY', 'pP', 'eE']) { | 1299 for (var expected in const ['oO', 'cC', 'tT', 'yY', 'pP', 'eE']) { |
| 1298 var char = stream.char(); | 1300 var char = stream.char(); |
| 1299 charStack.add(char); | 1301 charStack.add(char); |
| 1300 if (char == EOF || !expected.contains(char)) { | 1302 if (char == EOF || !expected.contains(char)) { |
| 1301 matched = false; | 1303 matched = false; |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1334 state = bogusCommentState; | 1336 state = bogusCommentState; |
| 1335 return true; | 1337 return true; |
| 1336 } | 1338 } |
| 1337 | 1339 |
| 1338 bool commentStartState() { | 1340 bool commentStartState() { |
| 1339 var data = stream.char(); | 1341 var data = stream.char(); |
| 1340 if (data == "-") { | 1342 if (data == "-") { |
| 1341 state = commentStartDashState; | 1343 state = commentStartDashState; |
| 1342 } else if (data == "\u0000") { | 1344 } else if (data == "\u0000") { |
| 1343 _addToken(new ParseErrorToken("invalid-codepoint")); | 1345 _addToken(new ParseErrorToken("invalid-codepoint")); |
| 1344 currentStringToken.data = '${currentStringToken.data}\uFFFD'; | 1346 currentStringToken.add('\uFFFD'); |
| 1345 } else if (data == ">") { | 1347 } else if (data == ">") { |
| 1346 _addToken(new ParseErrorToken("incorrect-comment")); | 1348 _addToken(new ParseErrorToken("incorrect-comment")); |
| 1347 _addToken(currentToken); | 1349 _addToken(currentToken); |
| 1348 state = dataState; | 1350 state = dataState; |
| 1349 } else if (data == EOF) { | 1351 } else if (data == EOF) { |
| 1350 _addToken(new ParseErrorToken("eof-in-comment")); | 1352 _addToken(new ParseErrorToken("eof-in-comment")); |
| 1351 _addToken(currentToken); | 1353 _addToken(currentToken); |
| 1352 state = dataState; | 1354 state = dataState; |
| 1353 } else { | 1355 } else { |
| 1354 currentStringToken.data = '${currentStringToken.data}$data'; | 1356 currentStringToken.add(data); |
| 1355 state = commentState; | 1357 state = commentState; |
| 1356 } | 1358 } |
| 1357 return true; | 1359 return true; |
| 1358 } | 1360 } |
| 1359 | 1361 |
| 1360 bool commentStartDashState() { | 1362 bool commentStartDashState() { |
| 1361 var data = stream.char(); | 1363 var data = stream.char(); |
| 1362 if (data == "-") { | 1364 if (data == "-") { |
| 1363 state = commentEndState; | 1365 state = commentEndState; |
| 1364 } else if (data == "\u0000") { | 1366 } else if (data == "\u0000") { |
| 1365 _addToken(new ParseErrorToken("invalid-codepoint")); | 1367 _addToken(new ParseErrorToken("invalid-codepoint")); |
| 1366 currentStringToken.data = '${currentStringToken.data}-\uFFFD'; | 1368 currentStringToken.add('-\uFFFD'); |
| 1367 } else if (data == ">") { | 1369 } else if (data == ">") { |
| 1368 _addToken(new ParseErrorToken("incorrect-comment")); | 1370 _addToken(new ParseErrorToken("incorrect-comment")); |
| 1369 _addToken(currentToken); | 1371 _addToken(currentToken); |
| 1370 state = dataState; | 1372 state = dataState; |
| 1371 } else if (data == EOF) { | 1373 } else if (data == EOF) { |
| 1372 _addToken(new ParseErrorToken("eof-in-comment")); | 1374 _addToken(new ParseErrorToken("eof-in-comment")); |
| 1373 _addToken(currentToken); | 1375 _addToken(currentToken); |
| 1374 state = dataState; | 1376 state = dataState; |
| 1375 } else { | 1377 } else { |
| 1376 currentStringToken.data = '${currentStringToken.data}-${data}'; | 1378 currentStringToken.add('-').add(data); |
| 1377 state = commentState; | 1379 state = commentState; |
| 1378 } | 1380 } |
| 1379 return true; | 1381 return true; |
| 1380 } | 1382 } |
| 1381 | 1383 |
| 1382 bool commentState() { | 1384 bool commentState() { |
| 1383 var data = stream.char(); | 1385 var data = stream.char(); |
| 1384 if (data == "-") { | 1386 if (data == "-") { |
| 1385 state = commentEndDashState; | 1387 state = commentEndDashState; |
| 1386 } else if (data == "\u0000") { | 1388 } else if (data == "\u0000") { |
| 1387 _addToken(new ParseErrorToken("invalid-codepoint")); | 1389 _addToken(new ParseErrorToken("invalid-codepoint")); |
| 1388 currentStringToken.data = '${currentStringToken.data}\uFFFD'; | 1390 currentStringToken.add('\uFFFD'); |
| 1389 } else if (data == EOF) { | 1391 } else if (data == EOF) { |
| 1390 _addToken(new ParseErrorToken("eof-in-comment")); | 1392 _addToken(new ParseErrorToken("eof-in-comment")); |
| 1391 _addToken(currentToken); | 1393 _addToken(currentToken); |
| 1392 state = dataState; | 1394 state = dataState; |
| 1393 } else { | 1395 } else { |
| 1394 currentStringToken.data = '${currentStringToken.data}$data' | 1396 currentStringToken.add(data).add(stream.charsUntil("-\u0000")); |
|
Siggi Cherem (dart-lang)
2015/03/05 23:01:56
maybe use cascades here and below? (instead of ret
| |
| 1395 '${stream.charsUntil("-\u0000")}'; | |
| 1396 } | 1397 } |
| 1397 return true; | 1398 return true; |
| 1398 } | 1399 } |
| 1399 | 1400 |
| 1400 bool commentEndDashState() { | 1401 bool commentEndDashState() { |
| 1401 var data = stream.char(); | 1402 var data = stream.char(); |
| 1402 if (data == "-") { | 1403 if (data == "-") { |
| 1403 state = commentEndState; | 1404 state = commentEndState; |
| 1404 } else if (data == "\u0000") { | 1405 } else if (data == "\u0000") { |
| 1405 _addToken(new ParseErrorToken("invalid-codepoint")); | 1406 _addToken(new ParseErrorToken("invalid-codepoint")); |
| 1406 currentStringToken.data = "${currentStringToken.data}-\uFFFD"; | 1407 currentStringToken.add('-\uFFFD'); |
| 1407 state = commentState; | 1408 state = commentState; |
| 1408 } else if (data == EOF) { | 1409 } else if (data == EOF) { |
| 1409 _addToken(new ParseErrorToken("eof-in-comment-end-dash")); | 1410 _addToken(new ParseErrorToken("eof-in-comment-end-dash")); |
| 1410 _addToken(currentToken); | 1411 _addToken(currentToken); |
| 1411 state = dataState; | 1412 state = dataState; |
| 1412 } else { | 1413 } else { |
| 1413 currentStringToken.data = "${currentStringToken.data}-${data}"; | 1414 currentStringToken.add('-').add(data); |
| 1414 state = commentState; | 1415 state = commentState; |
| 1415 } | 1416 } |
| 1416 return true; | 1417 return true; |
| 1417 } | 1418 } |
| 1418 | 1419 |
| 1419 bool commentEndState() { | 1420 bool commentEndState() { |
| 1420 var data = stream.char(); | 1421 var data = stream.char(); |
| 1421 if (data == ">") { | 1422 if (data == ">") { |
| 1422 _addToken(currentToken); | 1423 _addToken(currentToken); |
| 1423 state = dataState; | 1424 state = dataState; |
| 1424 } else if (data == "\u0000") { | 1425 } else if (data == "\u0000") { |
| 1425 _addToken(new ParseErrorToken("invalid-codepoint")); | 1426 _addToken(new ParseErrorToken("invalid-codepoint")); |
| 1426 currentStringToken.data = '${currentStringToken.data}--\uFFFD'; | 1427 currentStringToken.add('--\uFFFD'); |
| 1427 state = commentState; | 1428 state = commentState; |
| 1428 } else if (data == "!") { | 1429 } else if (data == "!") { |
| 1429 _addToken( | 1430 _addToken( |
| 1430 new ParseErrorToken("unexpected-bang-after-double-dash-in-comment")); | 1431 new ParseErrorToken("unexpected-bang-after-double-dash-in-comment")); |
| 1431 state = commentEndBangState; | 1432 state = commentEndBangState; |
| 1432 } else if (data == "-") { | 1433 } else if (data == "-") { |
| 1433 _addToken( | 1434 _addToken( |
| 1434 new ParseErrorToken("unexpected-dash-after-double-dash-in-comment")); | 1435 new ParseErrorToken("unexpected-dash-after-double-dash-in-comment")); |
| 1435 currentStringToken.data = '${currentStringToken.data}$data'; | 1436 currentStringToken.add(data); |
| 1436 } else if (data == EOF) { | 1437 } else if (data == EOF) { |
| 1437 _addToken(new ParseErrorToken("eof-in-comment-double-dash")); | 1438 _addToken(new ParseErrorToken("eof-in-comment-double-dash")); |
| 1438 _addToken(currentToken); | 1439 _addToken(currentToken); |
| 1439 state = dataState; | 1440 state = dataState; |
| 1440 } else { | 1441 } else { |
| 1441 // XXX | 1442 // XXX |
| 1442 _addToken(new ParseErrorToken("unexpected-char-in-comment")); | 1443 _addToken(new ParseErrorToken("unexpected-char-in-comment")); |
| 1443 currentStringToken.data = "${currentStringToken.data}--${data}"; | 1444 currentStringToken.add('--').add(data); |
| 1444 state = commentState; | 1445 state = commentState; |
| 1445 } | 1446 } |
| 1446 return true; | 1447 return true; |
| 1447 } | 1448 } |
| 1448 | 1449 |
| 1449 bool commentEndBangState() { | 1450 bool commentEndBangState() { |
| 1450 var data = stream.char(); | 1451 var data = stream.char(); |
| 1451 if (data == ">") { | 1452 if (data == ">") { |
| 1452 _addToken(currentToken); | 1453 _addToken(currentToken); |
| 1453 state = dataState; | 1454 state = dataState; |
| 1454 } else if (data == "-") { | 1455 } else if (data == "-") { |
| 1455 currentStringToken.data = '${currentStringToken.data}--!'; | 1456 currentStringToken.add('--!'); |
| 1456 state = commentEndDashState; | 1457 state = commentEndDashState; |
| 1457 } else if (data == "\u0000") { | 1458 } else if (data == "\u0000") { |
| 1458 _addToken(new ParseErrorToken("invalid-codepoint")); | 1459 _addToken(new ParseErrorToken("invalid-codepoint")); |
| 1459 currentStringToken.data = '${currentStringToken.data}--!\uFFFD'; | 1460 currentStringToken.add('--!\uFFFD'); |
| 1460 state = commentState; | 1461 state = commentState; |
| 1461 } else if (data == EOF) { | 1462 } else if (data == EOF) { |
| 1462 _addToken(new ParseErrorToken("eof-in-comment-end-bang-state")); | 1463 _addToken(new ParseErrorToken("eof-in-comment-end-bang-state")); |
| 1463 _addToken(currentToken); | 1464 _addToken(currentToken); |
| 1464 state = dataState; | 1465 state = dataState; |
| 1465 } else { | 1466 } else { |
| 1466 currentStringToken.data = "${currentStringToken.data}--!${data}"; | 1467 currentStringToken.add('--!').add(data); |
| 1467 state = commentState; | 1468 state = commentState; |
| 1468 } | 1469 } |
| 1469 return true; | 1470 return true; |
| 1470 } | 1471 } |
| 1471 | 1472 |
| 1472 bool doctypeState() { | 1473 bool doctypeState() { |
| 1473 var data = stream.char(); | 1474 var data = stream.char(); |
| 1474 if (isWhitespace(data)) { | 1475 if (isWhitespace(data)) { |
| 1475 state = beforeDoctypeNameState; | 1476 state = beforeDoctypeNameState; |
| 1476 } else if (data == EOF) { | 1477 } else if (data == EOF) { |
| (...skipping 421 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1898 } | 1899 } |
| 1899 } | 1900 } |
| 1900 | 1901 |
| 1901 if (data.length > 0) { | 1902 if (data.length > 0) { |
| 1902 _addToken(new CharactersToken(data.join())); | 1903 _addToken(new CharactersToken(data.join())); |
| 1903 } | 1904 } |
| 1904 state = dataState; | 1905 state = dataState; |
| 1905 return true; | 1906 return true; |
| 1906 } | 1907 } |
| 1907 } | 1908 } |
| OLD | NEW |