Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(430)

Side by Side Diff: lib/src/tokenizer.dart

Issue 987433005: remove most string concat, fixes #7 (Closed) Base URL: git@github.com:dart-lang/html.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/src/token.dart ('k') | lib/src/treebuilder.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 library tokenizer; 1 library tokenizer;
2 2
3 import 'dart:collection'; 3 import 'dart:collection';
4 import 'package:html/parser.dart' show HtmlParser; 4 import 'package:html/parser.dart' show HtmlParser;
5 import 'constants.dart'; 5 import 'constants.dart';
6 import 'inputstream.dart'; 6 import 'inputstream.dart';
7 import 'token.dart'; 7 import 'token.dart';
8 import 'utils.dart'; 8 import 'utils.dart';
9 9
10 // Group entities by their first character, for faster lookups 10 // Group entities by their first character, for faster lookups
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
48 final Queue<Token> tokenQueue; 48 final Queue<Token> tokenQueue;
49 49
50 /// Holds the token that is currently being processed. 50 /// Holds the token that is currently being processed.
51 Token currentToken; 51 Token currentToken;
52 52
53 /// Holds a reference to the method to be invoked for the next parser state. 53 /// Holds a reference to the method to be invoked for the next parser state.
54 // TODO(jmesserly): the type should be "Predicate" but a dart2js checked mode 54 // TODO(jmesserly): the type should be "Predicate" but a dart2js checked mode
55 // bug prevents us from doing that. See http://dartbug.com/12465 55 // bug prevents us from doing that. See http://dartbug.com/12465
56 Function state; 56 Function state;
57 57
58 String temporaryBuffer; 58 final StringBuffer _buffer = new StringBuffer();
59 59
60 int _lastOffset; 60 int _lastOffset;
61 61
62 // TODO(jmesserly): ideally this would be a LinkedHashMap and we wouldn't add 62 // TODO(jmesserly): ideally this would be a LinkedHashMap and we wouldn't add
63 // an item until it's ready. But the code doesn't have a clear notion of when 63 // an item until it's ready. But the code doesn't have a clear notion of when
64 // it's "done" with the attribute. 64 // it's "done" with the attribute.
65 List<TagAttribute> _attributes; 65 List<TagAttribute> _attributes;
66 Set<String> _attributeNames; 66 Set<String> _attributeNames;
67 67
68 HtmlTokenizer(doc, {String encoding, bool parseMeta: true, 68 HtmlTokenizer(doc, {String encoding, bool parseMeta: true,
69 this.lowercaseElementName: true, this.lowercaseAttrName: true, 69 this.lowercaseElementName: true, this.lowercaseAttrName: true,
70 bool generateSpans: false, String sourceUrl, this.attributeSpans: false}) 70 bool generateSpans: false, String sourceUrl, this.attributeSpans: false})
71 : stream = new HtmlInputStream( 71 : stream = new HtmlInputStream(
72 doc, encoding, parseMeta, generateSpans, sourceUrl), 72 doc, encoding, parseMeta, generateSpans, sourceUrl),
73 tokenQueue = new Queue(), 73 tokenQueue = new Queue(),
74 generateSpans = generateSpans { 74 generateSpans = generateSpans {
75 reset(); 75 reset();
76 } 76 }
77 77
78 TagToken get currentTagToken => currentToken; 78 TagToken get currentTagToken => currentToken;
79 DoctypeToken get currentDoctypeToken => currentToken; 79 DoctypeToken get currentDoctypeToken => currentToken;
80 StringToken get currentStringToken => currentToken; 80 StringToken get currentStringToken => currentToken;
81 81
82 Token _current; 82 Token _current;
83 Token get current => _current; 83 Token get current => _current;
84 84
85 String get _attributeName => _attributes.last.name; 85 final StringBuffer _attributeName = new StringBuffer();
86 set _attributeName(String value) { 86 final StringBuffer _attributeValue = new StringBuffer();
87 _attributes.last.name = value;
88 }
89
90 String get _attributeValue => _attributes.last.value;
91 set _attributeValue(String value) {
92 _attributes.last.value = value;
93 }
94 87
95 void _markAttributeEnd(int offset) { 88 void _markAttributeEnd(int offset) {
89 _attributes.last.value = '$_attributeValue';
96 if (attributeSpans) _attributes.last.end = stream.position + offset; 90 if (attributeSpans) _attributes.last.end = stream.position + offset;
97 } 91 }
98 92
99 void _markAttributeValueStart(int offset) { 93 void _markAttributeValueStart(int offset) {
100 if (attributeSpans) _attributes.last.startValue = stream.position + offset; 94 if (attributeSpans) _attributes.last.startValue = stream.position + offset;
101 } 95 }
102 96
103 void _markAttributeValueEnd(int offset) { 97 void _markAttributeValueEnd(int offset) {
104 if (attributeSpans) { 98 if (attributeSpans) _attributes.last.endValue = stream.position + offset;
105 _attributes.last.endValue = stream.position + offset; 99 _markAttributeEnd(offset);
106 _markAttributeEnd(offset);
107 }
108 } 100 }
109 101
110 // Note: we could track the name span here, if we need it. 102 // Note: we could track the name span here, if we need it.
111 void _markAttributeNameEnd(int offset) => _markAttributeEnd(offset); 103 void _markAttributeNameEnd(int offset) => _markAttributeEnd(offset);
112 104
113 void _addAttribute(String name) { 105 void _addAttribute(String name) {
114 if (_attributes == null) _attributes = []; 106 if (_attributes == null) _attributes = [];
115 var attr = new TagAttribute(name); 107 _attributeName.clear();
108 _attributeName.write(name);
109 _attributeValue.clear();
110 var attr = new TagAttribute();
116 _attributes.add(attr); 111 _attributes.add(attr);
117 if (attributeSpans) attr.start = stream.position - name.length; 112 if (attributeSpans) attr.start = stream.position - name.length;
118 } 113 }
119 114
120 /// This is where the magic happens. 115 /// This is where the magic happens.
121 /// 116 ///
122 /// We do our usually processing through the states and when we have a token 117 /// We do our usually processing through the states and when we have a token
123 /// to return we yield the token which pauses processing until the next token 118 /// to return we yield the token which pauses processing until the next token
124 /// is requested. 119 /// is requested.
125 bool moveNext() { 120 bool moveNext() {
(...skipping 13 matching lines...) Expand all
139 } 134 }
140 return true; 135 return true;
141 } 136 }
142 137
143 /// Resets the tokenizer state. Calling this does not reset the [stream] or 138 /// Resets the tokenizer state. Calling this does not reset the [stream] or
144 /// the [parser]. 139 /// the [parser].
145 void reset() { 140 void reset() {
146 _lastOffset = 0; 141 _lastOffset = 0;
147 tokenQueue.clear(); 142 tokenQueue.clear();
148 currentToken = null; 143 currentToken = null;
149 temporaryBuffer = null; 144 _buffer.clear();
150 _attributes = null; 145 _attributes = null;
151 _attributeNames = null; 146 _attributeNames = null;
152 state = dataState; 147 state = dataState;
153 } 148 }
154 149
155 /// Adds a token to the queue. Sets the span if needed. 150 /// Adds a token to the queue. Sets the span if needed.
156 void _addToken(Token token) { 151 void _addToken(Token token) {
157 if (generateSpans && token.span == null) { 152 if (generateSpans && token.span == null) {
158 int offset = stream.position; 153 int offset = stream.position;
159 token.span = stream.fileInfo.span(_lastOffset, offset); 154 token.span = stream.fileInfo.span(_lastOffset, offset);
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after
340 stream.unget(charStack.removeLast()); 335 stream.unget(charStack.removeLast());
341 output = '${output}${slice(charStack, entityLen).join()}'; 336 output = '${output}${slice(charStack, entityLen).join()}';
342 } 337 }
343 } else { 338 } else {
344 _addToken(new ParseErrorToken("expected-named-entity")); 339 _addToken(new ParseErrorToken("expected-named-entity"));
345 stream.unget(charStack.removeLast()); 340 stream.unget(charStack.removeLast());
346 output = "&${charStack.join()}"; 341 output = "&${charStack.join()}";
347 } 342 }
348 } 343 }
349 if (fromAttribute) { 344 if (fromAttribute) {
350 _attributeValue = '$_attributeValue$output'; 345 _attributeValue.write(output);
351 } else { 346 } else {
352 var token; 347 var token;
353 if (isWhitespace(output)) { 348 if (isWhitespace(output)) {
354 token = new SpaceCharactersToken(output); 349 token = new SpaceCharactersToken(output);
355 } else { 350 } else {
356 token = new CharactersToken(output); 351 token = new CharactersToken(output);
357 } 352 }
358 _addToken(token); 353 _addToken(token);
359 } 354 }
360 } 355 }
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after
586 currentTagToken.name = '${currentTagToken.name}$data'; 581 currentTagToken.name = '${currentTagToken.name}$data';
587 // (Don't use charsUntil here, because tag names are 582 // (Don't use charsUntil here, because tag names are
588 // very short and it's faster to not do anything fancy) 583 // very short and it's faster to not do anything fancy)
589 } 584 }
590 return true; 585 return true;
591 } 586 }
592 587
593 bool rcdataLessThanSignState() { 588 bool rcdataLessThanSignState() {
594 var data = stream.char(); 589 var data = stream.char();
595 if (data == "/") { 590 if (data == "/") {
596 temporaryBuffer = ""; 591 _buffer.clear();
597 state = rcdataEndTagOpenState; 592 state = rcdataEndTagOpenState;
598 } else { 593 } else {
599 _addToken(new CharactersToken("<")); 594 _addToken(new CharactersToken("<"));
600 stream.unget(data); 595 stream.unget(data);
601 state = rcdataState; 596 state = rcdataState;
602 } 597 }
603 return true; 598 return true;
604 } 599 }
605 600
606 bool rcdataEndTagOpenState() { 601 bool rcdataEndTagOpenState() {
607 var data = stream.char(); 602 var data = stream.char();
608 if (isLetter(data)) { 603 if (isLetter(data)) {
609 temporaryBuffer = '${temporaryBuffer}$data'; 604 _buffer.write(data);
610 state = rcdataEndTagNameState; 605 state = rcdataEndTagNameState;
611 } else { 606 } else {
612 _addToken(new CharactersToken("</")); 607 _addToken(new CharactersToken("</"));
613 stream.unget(data); 608 stream.unget(data);
614 state = rcdataState; 609 state = rcdataState;
615 } 610 }
616 return true; 611 return true;
617 } 612 }
618 613
619 bool _tokenIsAppropriate() { 614 bool _tokenIsAppropriate() {
615 // TODO(jmesserly): this should use case insensitive compare instead.
620 return currentToken is TagToken && 616 return currentToken is TagToken &&
621 currentTagToken.name.toLowerCase() == temporaryBuffer.toLowerCase(); 617 currentTagToken.name.toLowerCase() == '$_buffer'.toLowerCase();
622 } 618 }
623 619
624 bool rcdataEndTagNameState() { 620 bool rcdataEndTagNameState() {
625 var appropriate = _tokenIsAppropriate(); 621 var appropriate = _tokenIsAppropriate();
626 var data = stream.char(); 622 var data = stream.char();
627 if (isWhitespace(data) && appropriate) { 623 if (isWhitespace(data) && appropriate) {
628 currentToken = new EndTagToken(temporaryBuffer); 624 currentToken = new EndTagToken('$_buffer');
629 state = beforeAttributeNameState; 625 state = beforeAttributeNameState;
630 } else if (data == "/" && appropriate) { 626 } else if (data == "/" && appropriate) {
631 currentToken = new EndTagToken(temporaryBuffer); 627 currentToken = new EndTagToken('$_buffer');
632 state = selfClosingStartTagState; 628 state = selfClosingStartTagState;
633 } else if (data == ">" && appropriate) { 629 } else if (data == ">" && appropriate) {
634 currentToken = new EndTagToken(temporaryBuffer); 630 currentToken = new EndTagToken('$_buffer');
635 emitCurrentToken(); 631 emitCurrentToken();
636 state = dataState; 632 state = dataState;
637 } else if (isLetter(data)) { 633 } else if (isLetter(data)) {
638 temporaryBuffer = '${temporaryBuffer}$data'; 634 _buffer.write(data);
639 } else { 635 } else {
640 _addToken(new CharactersToken("</$temporaryBuffer")); 636 _addToken(new CharactersToken("</$_buffer"));
641 stream.unget(data); 637 stream.unget(data);
642 state = rcdataState; 638 state = rcdataState;
643 } 639 }
644 return true; 640 return true;
645 } 641 }
646 642
647 bool rawtextLessThanSignState() { 643 bool rawtextLessThanSignState() {
648 var data = stream.char(); 644 var data = stream.char();
649 if (data == "/") { 645 if (data == "/") {
650 temporaryBuffer = ""; 646 _buffer.clear();
651 state = rawtextEndTagOpenState; 647 state = rawtextEndTagOpenState;
652 } else { 648 } else {
653 _addToken(new CharactersToken("<")); 649 _addToken(new CharactersToken("<"));
654 stream.unget(data); 650 stream.unget(data);
655 state = rawtextState; 651 state = rawtextState;
656 } 652 }
657 return true; 653 return true;
658 } 654 }
659 655
660 bool rawtextEndTagOpenState() { 656 bool rawtextEndTagOpenState() {
661 var data = stream.char(); 657 var data = stream.char();
662 if (isLetter(data)) { 658 if (isLetter(data)) {
663 temporaryBuffer = '${temporaryBuffer}$data'; 659 _buffer.write(data);
664 state = rawtextEndTagNameState; 660 state = rawtextEndTagNameState;
665 } else { 661 } else {
666 _addToken(new CharactersToken("</")); 662 _addToken(new CharactersToken("</"));
667 stream.unget(data); 663 stream.unget(data);
668 state = rawtextState; 664 state = rawtextState;
669 } 665 }
670 return true; 666 return true;
671 } 667 }
672 668
673 bool rawtextEndTagNameState() { 669 bool rawtextEndTagNameState() {
674 var appropriate = _tokenIsAppropriate(); 670 var appropriate = _tokenIsAppropriate();
675 var data = stream.char(); 671 var data = stream.char();
676 if (isWhitespace(data) && appropriate) { 672 if (isWhitespace(data) && appropriate) {
677 currentToken = new EndTagToken(temporaryBuffer); 673 currentToken = new EndTagToken('$_buffer');
678 state = beforeAttributeNameState; 674 state = beforeAttributeNameState;
679 } else if (data == "/" && appropriate) { 675 } else if (data == "/" && appropriate) {
680 currentToken = new EndTagToken(temporaryBuffer); 676 currentToken = new EndTagToken('$_buffer');
681 state = selfClosingStartTagState; 677 state = selfClosingStartTagState;
682 } else if (data == ">" && appropriate) { 678 } else if (data == ">" && appropriate) {
683 currentToken = new EndTagToken(temporaryBuffer); 679 currentToken = new EndTagToken('$_buffer');
684 emitCurrentToken(); 680 emitCurrentToken();
685 state = dataState; 681 state = dataState;
686 } else if (isLetter(data)) { 682 } else if (isLetter(data)) {
687 temporaryBuffer = '${temporaryBuffer}$data'; 683 _buffer.write(data);
688 } else { 684 } else {
689 _addToken(new CharactersToken("</$temporaryBuffer")); 685 _addToken(new CharactersToken("</$_buffer"));
690 stream.unget(data); 686 stream.unget(data);
691 state = rawtextState; 687 state = rawtextState;
692 } 688 }
693 return true; 689 return true;
694 } 690 }
695 691
696 bool scriptDataLessThanSignState() { 692 bool scriptDataLessThanSignState() {
697 var data = stream.char(); 693 var data = stream.char();
698 if (data == "/") { 694 if (data == "/") {
699 temporaryBuffer = ""; 695 _buffer.clear();
700 state = scriptDataEndTagOpenState; 696 state = scriptDataEndTagOpenState;
701 } else if (data == "!") { 697 } else if (data == "!") {
702 _addToken(new CharactersToken("<!")); 698 _addToken(new CharactersToken("<!"));
703 state = scriptDataEscapeStartState; 699 state = scriptDataEscapeStartState;
704 } else { 700 } else {
705 _addToken(new CharactersToken("<")); 701 _addToken(new CharactersToken("<"));
706 stream.unget(data); 702 stream.unget(data);
707 state = scriptDataState; 703 state = scriptDataState;
708 } 704 }
709 return true; 705 return true;
710 } 706 }
711 707
712 bool scriptDataEndTagOpenState() { 708 bool scriptDataEndTagOpenState() {
713 var data = stream.char(); 709 var data = stream.char();
714 if (isLetter(data)) { 710 if (isLetter(data)) {
715 temporaryBuffer = '${temporaryBuffer}$data'; 711 _buffer.write(data);
716 state = scriptDataEndTagNameState; 712 state = scriptDataEndTagNameState;
717 } else { 713 } else {
718 _addToken(new CharactersToken("</")); 714 _addToken(new CharactersToken("</"));
719 stream.unget(data); 715 stream.unget(data);
720 state = scriptDataState; 716 state = scriptDataState;
721 } 717 }
722 return true; 718 return true;
723 } 719 }
724 720
725 bool scriptDataEndTagNameState() { 721 bool scriptDataEndTagNameState() {
726 var appropriate = _tokenIsAppropriate(); 722 var appropriate = _tokenIsAppropriate();
727 var data = stream.char(); 723 var data = stream.char();
728 if (isWhitespace(data) && appropriate) { 724 if (isWhitespace(data) && appropriate) {
729 currentToken = new EndTagToken(temporaryBuffer); 725 currentToken = new EndTagToken('$_buffer');
730 state = beforeAttributeNameState; 726 state = beforeAttributeNameState;
731 } else if (data == "/" && appropriate) { 727 } else if (data == "/" && appropriate) {
732 currentToken = new EndTagToken(temporaryBuffer); 728 currentToken = new EndTagToken('$_buffer');
733 state = selfClosingStartTagState; 729 state = selfClosingStartTagState;
734 } else if (data == ">" && appropriate) { 730 } else if (data == ">" && appropriate) {
735 currentToken = new EndTagToken(temporaryBuffer); 731 currentToken = new EndTagToken('$_buffer');
736 emitCurrentToken(); 732 emitCurrentToken();
737 state = dataState; 733 state = dataState;
738 } else if (isLetter(data)) { 734 } else if (isLetter(data)) {
739 temporaryBuffer = '${temporaryBuffer}$data'; 735 _buffer.write(data);
740 } else { 736 } else {
741 _addToken(new CharactersToken("</$temporaryBuffer")); 737 _addToken(new CharactersToken("</$_buffer"));
742 stream.unget(data); 738 stream.unget(data);
743 state = scriptDataState; 739 state = scriptDataState;
744 } 740 }
745 return true; 741 return true;
746 } 742 }
747 743
748 bool scriptDataEscapeStartState() { 744 bool scriptDataEscapeStartState() {
749 var data = stream.char(); 745 var data = stream.char();
750 if (data == "-") { 746 if (data == "-") {
751 _addToken(new CharactersToken("-")); 747 _addToken(new CharactersToken("-"));
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
826 } else { 822 } else {
827 _addToken(new CharactersToken(data)); 823 _addToken(new CharactersToken(data));
828 state = scriptDataEscapedState; 824 state = scriptDataEscapedState;
829 } 825 }
830 return true; 826 return true;
831 } 827 }
832 828
833 bool scriptDataEscapedLessThanSignState() { 829 bool scriptDataEscapedLessThanSignState() {
834 var data = stream.char(); 830 var data = stream.char();
835 if (data == "/") { 831 if (data == "/") {
836 temporaryBuffer = ""; 832 _buffer.clear();
837 state = scriptDataEscapedEndTagOpenState; 833 state = scriptDataEscapedEndTagOpenState;
838 } else if (isLetter(data)) { 834 } else if (isLetter(data)) {
839 _addToken(new CharactersToken("<$data")); 835 _addToken(new CharactersToken("<$data"));
840 temporaryBuffer = data; 836 _buffer.clear();
837 _buffer.write(data);
841 state = scriptDataDoubleEscapeStartState; 838 state = scriptDataDoubleEscapeStartState;
842 } else { 839 } else {
843 _addToken(new CharactersToken("<")); 840 _addToken(new CharactersToken("<"));
844 stream.unget(data); 841 stream.unget(data);
845 state = scriptDataEscapedState; 842 state = scriptDataEscapedState;
846 } 843 }
847 return true; 844 return true;
848 } 845 }
849 846
850 bool scriptDataEscapedEndTagOpenState() { 847 bool scriptDataEscapedEndTagOpenState() {
851 var data = stream.char(); 848 var data = stream.char();
852 if (isLetter(data)) { 849 if (isLetter(data)) {
853 temporaryBuffer = data; 850 _buffer.clear();
851 _buffer.write(data);
854 state = scriptDataEscapedEndTagNameState; 852 state = scriptDataEscapedEndTagNameState;
855 } else { 853 } else {
856 _addToken(new CharactersToken("</")); 854 _addToken(new CharactersToken("</"));
857 stream.unget(data); 855 stream.unget(data);
858 state = scriptDataEscapedState; 856 state = scriptDataEscapedState;
859 } 857 }
860 return true; 858 return true;
861 } 859 }
862 860
863 bool scriptDataEscapedEndTagNameState() { 861 bool scriptDataEscapedEndTagNameState() {
864 var appropriate = _tokenIsAppropriate(); 862 var appropriate = _tokenIsAppropriate();
865 var data = stream.char(); 863 var data = stream.char();
866 if (isWhitespace(data) && appropriate) { 864 if (isWhitespace(data) && appropriate) {
867 currentToken = new EndTagToken(temporaryBuffer); 865 currentToken = new EndTagToken('$_buffer');
868 state = beforeAttributeNameState; 866 state = beforeAttributeNameState;
869 } else if (data == "/" && appropriate) { 867 } else if (data == "/" && appropriate) {
870 currentToken = new EndTagToken(temporaryBuffer); 868 currentToken = new EndTagToken('$_buffer');
871 state = selfClosingStartTagState; 869 state = selfClosingStartTagState;
872 } else if (data == ">" && appropriate) { 870 } else if (data == ">" && appropriate) {
873 currentToken = new EndTagToken(temporaryBuffer); 871 currentToken = new EndTagToken('$_buffer');
874 emitCurrentToken(); 872 emitCurrentToken();
875 state = dataState; 873 state = dataState;
876 } else if (isLetter(data)) { 874 } else if (isLetter(data)) {
877 temporaryBuffer = '${temporaryBuffer}$data'; 875 _buffer.write(data);
878 } else { 876 } else {
879 _addToken(new CharactersToken("</$temporaryBuffer")); 877 _addToken(new CharactersToken("</$_buffer"));
880 stream.unget(data); 878 stream.unget(data);
881 state = scriptDataEscapedState; 879 state = scriptDataEscapedState;
882 } 880 }
883 return true; 881 return true;
884 } 882 }
885 883
886 bool scriptDataDoubleEscapeStartState() { 884 bool scriptDataDoubleEscapeStartState() {
887 var data = stream.char(); 885 var data = stream.char();
888 if (isWhitespace(data) || data == "/" || data == ">") { 886 if (isWhitespace(data) || data == "/" || data == ">") {
889 _addToken(new CharactersToken(data)); 887 _addToken(new CharactersToken(data));
890 if (temporaryBuffer.toLowerCase() == "script") { 888 if ('$_buffer'.toLowerCase() == "script") {
891 state = scriptDataDoubleEscapedState; 889 state = scriptDataDoubleEscapedState;
892 } else { 890 } else {
893 state = scriptDataEscapedState; 891 state = scriptDataEscapedState;
894 } 892 }
895 } else if (isLetter(data)) { 893 } else if (isLetter(data)) {
896 _addToken(new CharactersToken(data)); 894 _addToken(new CharactersToken(data));
897 temporaryBuffer = '${temporaryBuffer}$data'; 895 _buffer.write(data);
898 } else { 896 } else {
899 stream.unget(data); 897 stream.unget(data);
900 state = scriptDataEscapedState; 898 state = scriptDataEscapedState;
901 } 899 }
902 return true; 900 return true;
903 } 901 }
904 902
905 bool scriptDataDoubleEscapedState() { 903 bool scriptDataDoubleEscapedState() {
906 var data = stream.char(); 904 var data = stream.char();
907 if (data == "-") { 905 if (data == "-") {
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
967 _addToken(new CharactersToken(data)); 965 _addToken(new CharactersToken(data));
968 state = scriptDataDoubleEscapedState; 966 state = scriptDataDoubleEscapedState;
969 } 967 }
970 return true; 968 return true;
971 } 969 }
972 970
973 bool scriptDataDoubleEscapedLessThanSignState() { 971 bool scriptDataDoubleEscapedLessThanSignState() {
974 var data = stream.char(); 972 var data = stream.char();
975 if (data == "/") { 973 if (data == "/") {
976 _addToken(new CharactersToken("/")); 974 _addToken(new CharactersToken("/"));
977 temporaryBuffer = ""; 975 _buffer.clear();
978 state = scriptDataDoubleEscapeEndState; 976 state = scriptDataDoubleEscapeEndState;
979 } else { 977 } else {
980 stream.unget(data); 978 stream.unget(data);
981 state = scriptDataDoubleEscapedState; 979 state = scriptDataDoubleEscapedState;
982 } 980 }
983 return true; 981 return true;
984 } 982 }
985 983
986 bool scriptDataDoubleEscapeEndState() { 984 bool scriptDataDoubleEscapeEndState() {
987 var data = stream.char(); 985 var data = stream.char();
988 if (isWhitespace(data) || data == "/" || data == ">") { 986 if (isWhitespace(data) || data == "/" || data == ">") {
989 _addToken(new CharactersToken(data)); 987 _addToken(new CharactersToken(data));
990 if (temporaryBuffer.toLowerCase() == "script") { 988 if ('$_buffer'.toLowerCase() == "script") {
991 state = scriptDataEscapedState; 989 state = scriptDataEscapedState;
992 } else { 990 } else {
993 state = scriptDataDoubleEscapedState; 991 state = scriptDataDoubleEscapedState;
994 } 992 }
995 } else if (isLetter(data)) { 993 } else if (isLetter(data)) {
996 _addToken(new CharactersToken(data)); 994 _addToken(new CharactersToken(data));
997 temporaryBuffer = '${temporaryBuffer}$data'; 995 _buffer.write(data);
998 } else { 996 } else {
999 stream.unget(data); 997 stream.unget(data);
1000 state = scriptDataDoubleEscapedState; 998 state = scriptDataDoubleEscapedState;
1001 } 999 }
1002 return true; 1000 return true;
1003 } 1001 }
1004 1002
1005 bool beforeAttributeNameState() { 1003 bool beforeAttributeNameState() {
1006 var data = stream.char(); 1004 var data = stream.char();
1007 if (isWhitespace(data)) { 1005 if (isWhitespace(data)) {
(...skipping 23 matching lines...) Expand all
1031 return true; 1029 return true;
1032 } 1030 }
1033 1031
1034 bool attributeNameState() { 1032 bool attributeNameState() {
1035 var data = stream.char(); 1033 var data = stream.char();
1036 bool leavingThisState = true; 1034 bool leavingThisState = true;
1037 bool emitToken = false; 1035 bool emitToken = false;
1038 if (data == "=") { 1036 if (data == "=") {
1039 state = beforeAttributeValueState; 1037 state = beforeAttributeValueState;
1040 } else if (isLetter(data)) { 1038 } else if (isLetter(data)) {
1041 _attributeName = '$_attributeName$data' 1039 _attributeName.write(data);
1042 '${stream.charsUntil(asciiLetters, true)}'; 1040 _attributeName.write(stream.charsUntil(asciiLetters, true));
1043 leavingThisState = false; 1041 leavingThisState = false;
1044 } else if (data == ">") { 1042 } else if (data == ">") {
1045 // XXX If we emit here the attributes are converted to a dict 1043 // XXX If we emit here the attributes are converted to a dict
1046 // without being checked and when the code below runs we error 1044 // without being checked and when the code below runs we error
1047 // because data is a dict not a list 1045 // because data is a dict not a list
1048 emitToken = true; 1046 emitToken = true;
1049 } else if (isWhitespace(data)) { 1047 } else if (isWhitespace(data)) {
1050 state = afterAttributeNameState; 1048 state = afterAttributeNameState;
1051 } else if (data == "/") { 1049 } else if (data == "/") {
1052 state = selfClosingStartTagState; 1050 state = selfClosingStartTagState;
1053 } else if (data == "\u0000") { 1051 } else if (data == "\u0000") {
1054 _addToken(new ParseErrorToken("invalid-codepoint")); 1052 _addToken(new ParseErrorToken("invalid-codepoint"));
1055 _attributeName = '${_attributeName}\uFFFD'; 1053 _attributeName.write('\uFFFD');
1056 leavingThisState = false; 1054 leavingThisState = false;
1057 } else if (data == EOF) { 1055 } else if (data == EOF) {
1058 _addToken(new ParseErrorToken("eof-in-attribute-name")); 1056 _addToken(new ParseErrorToken("eof-in-attribute-name"));
1059 state = dataState; 1057 state = dataState;
1060 } else if ("'\"<".contains(data)) { 1058 } else if ("'\"<".contains(data)) {
1061 _addToken(new ParseErrorToken("invalid-character-in-attribute-name")); 1059 _addToken(new ParseErrorToken("invalid-character-in-attribute-name"));
1062 _attributeName = '$_attributeName$data'; 1060 _attributeName.write(data);
1063 leavingThisState = false; 1061 leavingThisState = false;
1064 } else { 1062 } else {
1065 _attributeName = '$_attributeName$data'; 1063 _attributeName.write(data);
1066 leavingThisState = false; 1064 leavingThisState = false;
1067 } 1065 }
1068 1066
1069 if (leavingThisState) { 1067 if (leavingThisState) {
1070 _markAttributeNameEnd(-1); 1068 _markAttributeNameEnd(-1);
1071 1069
1072 // Attributes are not dropped at this stage. That happens when the 1070 // Attributes are not dropped at this stage. That happens when the
1073 // start tag token is emitted so values can still be safely appended 1071 // start tag token is emitted so values can still be safely appended
1074 // to attributes, but we do want to report the parse error in time. 1072 // to attributes, but we do want to report the parse error in time.
1073 var attrName = _attributeName.toString();
1075 if (lowercaseAttrName) { 1074 if (lowercaseAttrName) {
1076 _attributeName = asciiUpper2Lower(_attributeName); 1075 attrName = asciiUpper2Lower(attrName);
1077 } 1076 }
1077 _attributes.last.name = attrName;
1078 if (_attributeNames == null) _attributeNames = new Set(); 1078 if (_attributeNames == null) _attributeNames = new Set();
1079 if (_attributeNames.contains(_attributeName)) { 1079 if (_attributeNames.contains(attrName)) {
1080 _addToken(new ParseErrorToken("duplicate-attribute")); 1080 _addToken(new ParseErrorToken("duplicate-attribute"));
1081 } 1081 }
1082 _attributeNames.add(_attributeName); 1082 _attributeNames.add(attrName);
1083 1083
1084 // XXX Fix for above XXX 1084 // XXX Fix for above XXX
1085 if (emitToken) { 1085 if (emitToken) {
1086 emitCurrentToken(); 1086 emitCurrentToken();
1087 } 1087 }
1088 } 1088 }
1089 return true; 1089 return true;
1090 } 1090 }
1091 1091
1092 bool afterAttributeNameState() { 1092 bool afterAttributeNameState() {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
1134 } else if (data == "'") { 1134 } else if (data == "'") {
1135 _markAttributeValueStart(0); 1135 _markAttributeValueStart(0);
1136 state = attributeValueSingleQuotedState; 1136 state = attributeValueSingleQuotedState;
1137 } else if (data == ">") { 1137 } else if (data == ">") {
1138 _addToken(new ParseErrorToken( 1138 _addToken(new ParseErrorToken(
1139 "expected-attribute-value-but-got-right-bracket")); 1139 "expected-attribute-value-but-got-right-bracket"));
1140 emitCurrentToken(); 1140 emitCurrentToken();
1141 } else if (data == "\u0000") { 1141 } else if (data == "\u0000") {
1142 _addToken(new ParseErrorToken("invalid-codepoint")); 1142 _addToken(new ParseErrorToken("invalid-codepoint"));
1143 _markAttributeValueStart(-1); 1143 _markAttributeValueStart(-1);
1144 _attributeValue = '${_attributeValue}\uFFFD'; 1144 _attributeValue.write('\uFFFD');
1145 state = attributeValueUnQuotedState; 1145 state = attributeValueUnQuotedState;
1146 } else if (data == EOF) { 1146 } else if (data == EOF) {
1147 _addToken(new ParseErrorToken("expected-attribute-value-but-got-eof")); 1147 _addToken(new ParseErrorToken("expected-attribute-value-but-got-eof"));
1148 state = dataState; 1148 state = dataState;
1149 } else if ("=<`".contains(data)) { 1149 } else if ("=<`".contains(data)) {
1150 _addToken(new ParseErrorToken("equals-in-unquoted-attribute-value")); 1150 _addToken(new ParseErrorToken("equals-in-unquoted-attribute-value"));
1151 _markAttributeValueStart(-1); 1151 _markAttributeValueStart(-1);
1152 _attributeValue = '$_attributeValue$data'; 1152 _attributeValue.write(data);
1153 state = attributeValueUnQuotedState; 1153 state = attributeValueUnQuotedState;
1154 } else { 1154 } else {
1155 _markAttributeValueStart(-1); 1155 _markAttributeValueStart(-1);
1156 _attributeValue = '$_attributeValue$data'; 1156 _attributeValue.write(data);
1157 state = attributeValueUnQuotedState; 1157 state = attributeValueUnQuotedState;
1158 } 1158 }
1159 return true; 1159 return true;
1160 } 1160 }
1161 1161
1162 bool attributeValueDoubleQuotedState() { 1162 bool attributeValueDoubleQuotedState() {
1163 var data = stream.char(); 1163 var data = stream.char();
1164 if (data == "\"") { 1164 if (data == "\"") {
1165 _markAttributeValueEnd(-1); 1165 _markAttributeValueEnd(-1);
1166 _markAttributeEnd(0); 1166 _markAttributeEnd(0);
1167 state = afterAttributeValueState; 1167 state = afterAttributeValueState;
1168 } else if (data == "&") { 1168 } else if (data == "&") {
1169 processEntityInAttribute('"'); 1169 processEntityInAttribute('"');
1170 } else if (data == "\u0000") { 1170 } else if (data == "\u0000") {
1171 _addToken(new ParseErrorToken("invalid-codepoint")); 1171 _addToken(new ParseErrorToken("invalid-codepoint"));
1172 _attributeValue = '${_attributeValue}\uFFFD'; 1172 _attributeValue.write('\uFFFD');
1173 } else if (data == EOF) { 1173 } else if (data == EOF) {
1174 _addToken(new ParseErrorToken("eof-in-attribute-value-double-quote")); 1174 _addToken(new ParseErrorToken("eof-in-attribute-value-double-quote"));
1175 _markAttributeValueEnd(-1); 1175 _markAttributeValueEnd(-1);
1176 state = dataState; 1176 state = dataState;
1177 } else { 1177 } else {
1178 _attributeValue = '$_attributeValue$data${stream.charsUntil("\"&")}'; 1178 _attributeValue.write(data);
1179 _attributeValue.write(stream.charsUntil("\"&"));
1179 } 1180 }
1180 return true; 1181 return true;
1181 } 1182 }
1182 1183
1183 bool attributeValueSingleQuotedState() { 1184 bool attributeValueSingleQuotedState() {
1184 var data = stream.char(); 1185 var data = stream.char();
1185 if (data == "'") { 1186 if (data == "'") {
1186 _markAttributeValueEnd(-1); 1187 _markAttributeValueEnd(-1);
1187 _markAttributeEnd(0); 1188 _markAttributeEnd(0);
1188 state = afterAttributeValueState; 1189 state = afterAttributeValueState;
1189 } else if (data == "&") { 1190 } else if (data == "&") {
1190 processEntityInAttribute("'"); 1191 processEntityInAttribute("'");
1191 } else if (data == "\u0000") { 1192 } else if (data == "\u0000") {
1192 _addToken(new ParseErrorToken("invalid-codepoint")); 1193 _addToken(new ParseErrorToken("invalid-codepoint"));
1193 _attributeValue = '${_attributeValue}\uFFFD'; 1194 _attributeValue.write('\uFFFD');
1194 } else if (data == EOF) { 1195 } else if (data == EOF) {
1195 _addToken(new ParseErrorToken("eof-in-attribute-value-single-quote")); 1196 _addToken(new ParseErrorToken("eof-in-attribute-value-single-quote"));
1196 _markAttributeValueEnd(-1); 1197 _markAttributeValueEnd(-1);
1197 state = dataState; 1198 state = dataState;
1198 } else { 1199 } else {
1199 _attributeValue = '$_attributeValue$data${stream.charsUntil("\'&")}'; 1200 _attributeValue.write(data);
1201 _attributeValue.write(stream.charsUntil("\'&"));
1200 } 1202 }
1201 return true; 1203 return true;
1202 } 1204 }
1203 1205
1204 bool attributeValueUnQuotedState() { 1206 bool attributeValueUnQuotedState() {
1205 var data = stream.char(); 1207 var data = stream.char();
1206 if (isWhitespace(data)) { 1208 if (isWhitespace(data)) {
1207 _markAttributeValueEnd(-1); 1209 _markAttributeValueEnd(-1);
1208 state = beforeAttributeNameState; 1210 state = beforeAttributeNameState;
1209 } else if (data == "&") { 1211 } else if (data == "&") {
1210 processEntityInAttribute(">"); 1212 processEntityInAttribute(">");
1211 } else if (data == ">") { 1213 } else if (data == ">") {
1212 _markAttributeValueEnd(-1); 1214 _markAttributeValueEnd(-1);
1213 emitCurrentToken(); 1215 emitCurrentToken();
1214 } else if (data == EOF) { 1216 } else if (data == EOF) {
1215 _addToken(new ParseErrorToken("eof-in-attribute-value-no-quotes")); 1217 _addToken(new ParseErrorToken("eof-in-attribute-value-no-quotes"));
1216 _markAttributeValueEnd(-1); 1218 _markAttributeValueEnd(-1);
1217 state = dataState; 1219 state = dataState;
1218 } else if ('"\'=<`'.contains(data)) { 1220 } else if ('"\'=<`'.contains(data)) {
1219 _addToken(new ParseErrorToken( 1221 _addToken(new ParseErrorToken(
1220 "unexpected-character-in-unquoted-attribute-value")); 1222 "unexpected-character-in-unquoted-attribute-value"));
1221 _attributeValue = '$_attributeValue$data'; 1223 _attributeValue.write(data);
1222 } else if (data == "\u0000") { 1224 } else if (data == "\u0000") {
1223 _addToken(new ParseErrorToken("invalid-codepoint")); 1225 _addToken(new ParseErrorToken("invalid-codepoint"));
1224 _attributeValue = '${_attributeValue}\uFFFD'; 1226 _attributeValue.write('\uFFFD');
1225 } else { 1227 } else {
1226 _attributeValue = '$_attributeValue$data' 1228 _attributeValue.write(data);
1227 '${stream.charsUntil("&>\"\'=<`$spaceCharacters")}'; 1229 _attributeValue.write(stream.charsUntil("&>\"\'=<`$spaceCharacters"));
1228 } 1230 }
1229 return true; 1231 return true;
1230 } 1232 }
1231 1233
1232 bool afterAttributeValueState() { 1234 bool afterAttributeValueState() {
1233 var data = stream.char(); 1235 var data = stream.char();
1234 if (isWhitespace(data)) { 1236 if (isWhitespace(data)) {
1235 state = beforeAttributeNameState; 1237 state = beforeAttributeNameState;
1236 } else if (data == ">") { 1238 } else if (data == ">") {
1237 emitCurrentToken(); 1239 emitCurrentToken();
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
1281 stream.char(); 1283 stream.char();
1282 state = dataState; 1284 state = dataState;
1283 return true; 1285 return true;
1284 } 1286 }
1285 1287
1286 bool markupDeclarationOpenState() { 1288 bool markupDeclarationOpenState() {
1287 var charStack = [stream.char()]; 1289 var charStack = [stream.char()];
1288 if (charStack.last == "-") { 1290 if (charStack.last == "-") {
1289 charStack.add(stream.char()); 1291 charStack.add(stream.char());
1290 if (charStack.last == "-") { 1292 if (charStack.last == "-") {
1291 currentToken = new CommentToken(""); 1293 currentToken = new CommentToken();
1292 state = commentStartState; 1294 state = commentStartState;
1293 return true; 1295 return true;
1294 } 1296 }
1295 } else if (charStack.last == 'd' || charStack.last == 'D') { 1297 } else if (charStack.last == 'd' || charStack.last == 'D') {
1296 var matched = true; 1298 var matched = true;
1297 for (var expected in const ['oO', 'cC', 'tT', 'yY', 'pP', 'eE']) { 1299 for (var expected in const ['oO', 'cC', 'tT', 'yY', 'pP', 'eE']) {
1298 var char = stream.char(); 1300 var char = stream.char();
1299 charStack.add(char); 1301 charStack.add(char);
1300 if (char == EOF || !expected.contains(char)) { 1302 if (char == EOF || !expected.contains(char)) {
1301 matched = false; 1303 matched = false;
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1334 state = bogusCommentState; 1336 state = bogusCommentState;
1335 return true; 1337 return true;
1336 } 1338 }
1337 1339
1338 bool commentStartState() { 1340 bool commentStartState() {
1339 var data = stream.char(); 1341 var data = stream.char();
1340 if (data == "-") { 1342 if (data == "-") {
1341 state = commentStartDashState; 1343 state = commentStartDashState;
1342 } else if (data == "\u0000") { 1344 } else if (data == "\u0000") {
1343 _addToken(new ParseErrorToken("invalid-codepoint")); 1345 _addToken(new ParseErrorToken("invalid-codepoint"));
1344 currentStringToken.data = '${currentStringToken.data}\uFFFD'; 1346 currentStringToken.add('\uFFFD');
1345 } else if (data == ">") { 1347 } else if (data == ">") {
1346 _addToken(new ParseErrorToken("incorrect-comment")); 1348 _addToken(new ParseErrorToken("incorrect-comment"));
1347 _addToken(currentToken); 1349 _addToken(currentToken);
1348 state = dataState; 1350 state = dataState;
1349 } else if (data == EOF) { 1351 } else if (data == EOF) {
1350 _addToken(new ParseErrorToken("eof-in-comment")); 1352 _addToken(new ParseErrorToken("eof-in-comment"));
1351 _addToken(currentToken); 1353 _addToken(currentToken);
1352 state = dataState; 1354 state = dataState;
1353 } else { 1355 } else {
1354 currentStringToken.data = '${currentStringToken.data}$data'; 1356 currentStringToken.add(data);
1355 state = commentState; 1357 state = commentState;
1356 } 1358 }
1357 return true; 1359 return true;
1358 } 1360 }
1359 1361
1360 bool commentStartDashState() { 1362 bool commentStartDashState() {
1361 var data = stream.char(); 1363 var data = stream.char();
1362 if (data == "-") { 1364 if (data == "-") {
1363 state = commentEndState; 1365 state = commentEndState;
1364 } else if (data == "\u0000") { 1366 } else if (data == "\u0000") {
1365 _addToken(new ParseErrorToken("invalid-codepoint")); 1367 _addToken(new ParseErrorToken("invalid-codepoint"));
1366 currentStringToken.data = '${currentStringToken.data}-\uFFFD'; 1368 currentStringToken.add('-\uFFFD');
1367 } else if (data == ">") { 1369 } else if (data == ">") {
1368 _addToken(new ParseErrorToken("incorrect-comment")); 1370 _addToken(new ParseErrorToken("incorrect-comment"));
1369 _addToken(currentToken); 1371 _addToken(currentToken);
1370 state = dataState; 1372 state = dataState;
1371 } else if (data == EOF) { 1373 } else if (data == EOF) {
1372 _addToken(new ParseErrorToken("eof-in-comment")); 1374 _addToken(new ParseErrorToken("eof-in-comment"));
1373 _addToken(currentToken); 1375 _addToken(currentToken);
1374 state = dataState; 1376 state = dataState;
1375 } else { 1377 } else {
1376 currentStringToken.data = '${currentStringToken.data}-${data}'; 1378 currentStringToken.add('-').add(data);
1377 state = commentState; 1379 state = commentState;
1378 } 1380 }
1379 return true; 1381 return true;
1380 } 1382 }
1381 1383
1382 bool commentState() { 1384 bool commentState() {
1383 var data = stream.char(); 1385 var data = stream.char();
1384 if (data == "-") { 1386 if (data == "-") {
1385 state = commentEndDashState; 1387 state = commentEndDashState;
1386 } else if (data == "\u0000") { 1388 } else if (data == "\u0000") {
1387 _addToken(new ParseErrorToken("invalid-codepoint")); 1389 _addToken(new ParseErrorToken("invalid-codepoint"));
1388 currentStringToken.data = '${currentStringToken.data}\uFFFD'; 1390 currentStringToken.add('\uFFFD');
1389 } else if (data == EOF) { 1391 } else if (data == EOF) {
1390 _addToken(new ParseErrorToken("eof-in-comment")); 1392 _addToken(new ParseErrorToken("eof-in-comment"));
1391 _addToken(currentToken); 1393 _addToken(currentToken);
1392 state = dataState; 1394 state = dataState;
1393 } else { 1395 } else {
1394 currentStringToken.data = '${currentStringToken.data}$data' 1396 currentStringToken.add(data).add(stream.charsUntil("-\u0000"));
Siggi Cherem (dart-lang) 2015/03/05 23:01:56 maybe use cascades here and below? (instead of ret
1395 '${stream.charsUntil("-\u0000")}';
1396 } 1397 }
1397 return true; 1398 return true;
1398 } 1399 }
1399 1400
1400 bool commentEndDashState() { 1401 bool commentEndDashState() {
1401 var data = stream.char(); 1402 var data = stream.char();
1402 if (data == "-") { 1403 if (data == "-") {
1403 state = commentEndState; 1404 state = commentEndState;
1404 } else if (data == "\u0000") { 1405 } else if (data == "\u0000") {
1405 _addToken(new ParseErrorToken("invalid-codepoint")); 1406 _addToken(new ParseErrorToken("invalid-codepoint"));
1406 currentStringToken.data = "${currentStringToken.data}-\uFFFD"; 1407 currentStringToken.add('-\uFFFD');
1407 state = commentState; 1408 state = commentState;
1408 } else if (data == EOF) { 1409 } else if (data == EOF) {
1409 _addToken(new ParseErrorToken("eof-in-comment-end-dash")); 1410 _addToken(new ParseErrorToken("eof-in-comment-end-dash"));
1410 _addToken(currentToken); 1411 _addToken(currentToken);
1411 state = dataState; 1412 state = dataState;
1412 } else { 1413 } else {
1413 currentStringToken.data = "${currentStringToken.data}-${data}"; 1414 currentStringToken.add('-').add(data);
1414 state = commentState; 1415 state = commentState;
1415 } 1416 }
1416 return true; 1417 return true;
1417 } 1418 }
1418 1419
1419 bool commentEndState() { 1420 bool commentEndState() {
1420 var data = stream.char(); 1421 var data = stream.char();
1421 if (data == ">") { 1422 if (data == ">") {
1422 _addToken(currentToken); 1423 _addToken(currentToken);
1423 state = dataState; 1424 state = dataState;
1424 } else if (data == "\u0000") { 1425 } else if (data == "\u0000") {
1425 _addToken(new ParseErrorToken("invalid-codepoint")); 1426 _addToken(new ParseErrorToken("invalid-codepoint"));
1426 currentStringToken.data = '${currentStringToken.data}--\uFFFD'; 1427 currentStringToken.add('--\uFFFD');
1427 state = commentState; 1428 state = commentState;
1428 } else if (data == "!") { 1429 } else if (data == "!") {
1429 _addToken( 1430 _addToken(
1430 new ParseErrorToken("unexpected-bang-after-double-dash-in-comment")); 1431 new ParseErrorToken("unexpected-bang-after-double-dash-in-comment"));
1431 state = commentEndBangState; 1432 state = commentEndBangState;
1432 } else if (data == "-") { 1433 } else if (data == "-") {
1433 _addToken( 1434 _addToken(
1434 new ParseErrorToken("unexpected-dash-after-double-dash-in-comment")); 1435 new ParseErrorToken("unexpected-dash-after-double-dash-in-comment"));
1435 currentStringToken.data = '${currentStringToken.data}$data'; 1436 currentStringToken.add(data);
1436 } else if (data == EOF) { 1437 } else if (data == EOF) {
1437 _addToken(new ParseErrorToken("eof-in-comment-double-dash")); 1438 _addToken(new ParseErrorToken("eof-in-comment-double-dash"));
1438 _addToken(currentToken); 1439 _addToken(currentToken);
1439 state = dataState; 1440 state = dataState;
1440 } else { 1441 } else {
1441 // XXX 1442 // XXX
1442 _addToken(new ParseErrorToken("unexpected-char-in-comment")); 1443 _addToken(new ParseErrorToken("unexpected-char-in-comment"));
1443 currentStringToken.data = "${currentStringToken.data}--${data}"; 1444 currentStringToken.add('--').add(data);
1444 state = commentState; 1445 state = commentState;
1445 } 1446 }
1446 return true; 1447 return true;
1447 } 1448 }
1448 1449
1449 bool commentEndBangState() { 1450 bool commentEndBangState() {
1450 var data = stream.char(); 1451 var data = stream.char();
1451 if (data == ">") { 1452 if (data == ">") {
1452 _addToken(currentToken); 1453 _addToken(currentToken);
1453 state = dataState; 1454 state = dataState;
1454 } else if (data == "-") { 1455 } else if (data == "-") {
1455 currentStringToken.data = '${currentStringToken.data}--!'; 1456 currentStringToken.add('--!');
1456 state = commentEndDashState; 1457 state = commentEndDashState;
1457 } else if (data == "\u0000") { 1458 } else if (data == "\u0000") {
1458 _addToken(new ParseErrorToken("invalid-codepoint")); 1459 _addToken(new ParseErrorToken("invalid-codepoint"));
1459 currentStringToken.data = '${currentStringToken.data}--!\uFFFD'; 1460 currentStringToken.add('--!\uFFFD');
1460 state = commentState; 1461 state = commentState;
1461 } else if (data == EOF) { 1462 } else if (data == EOF) {
1462 _addToken(new ParseErrorToken("eof-in-comment-end-bang-state")); 1463 _addToken(new ParseErrorToken("eof-in-comment-end-bang-state"));
1463 _addToken(currentToken); 1464 _addToken(currentToken);
1464 state = dataState; 1465 state = dataState;
1465 } else { 1466 } else {
1466 currentStringToken.data = "${currentStringToken.data}--!${data}"; 1467 currentStringToken.add('--!').add(data);
1467 state = commentState; 1468 state = commentState;
1468 } 1469 }
1469 return true; 1470 return true;
1470 } 1471 }
1471 1472
1472 bool doctypeState() { 1473 bool doctypeState() {
1473 var data = stream.char(); 1474 var data = stream.char();
1474 if (isWhitespace(data)) { 1475 if (isWhitespace(data)) {
1475 state = beforeDoctypeNameState; 1476 state = beforeDoctypeNameState;
1476 } else if (data == EOF) { 1477 } else if (data == EOF) {
(...skipping 421 matching lines...) Expand 10 before | Expand all | Expand 10 after
1898 } 1899 }
1899 } 1900 }
1900 1901
1901 if (data.length > 0) { 1902 if (data.length > 0) {
1902 _addToken(new CharactersToken(data.join())); 1903 _addToken(new CharactersToken(data.join()));
1903 } 1904 }
1904 state = dataState; 1905 state = dataState;
1905 return true; 1906 return true;
1906 } 1907 }
1907 } 1908 }
OLDNEW
« no previous file with comments | « lib/src/token.dart ('k') | lib/src/treebuilder.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698