OLD | NEW |
---|---|
1 library tokenizer; | 1 library tokenizer; |
2 | 2 |
3 import 'dart:collection'; | 3 import 'dart:collection'; |
4 import 'package:html/parser.dart' show HtmlParser; | 4 import 'package:html/parser.dart' show HtmlParser; |
5 import 'constants.dart'; | 5 import 'constants.dart'; |
6 import 'inputstream.dart'; | 6 import 'inputstream.dart'; |
7 import 'token.dart'; | 7 import 'token.dart'; |
8 import 'utils.dart'; | 8 import 'utils.dart'; |
9 | 9 |
10 // Group entities by their first character, for faster lookups | 10 // Group entities by their first character, for faster lookups |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
48 final Queue<Token> tokenQueue; | 48 final Queue<Token> tokenQueue; |
49 | 49 |
50 /// Holds the token that is currently being processed. | 50 /// Holds the token that is currently being processed. |
51 Token currentToken; | 51 Token currentToken; |
52 | 52 |
53 /// Holds a reference to the method to be invoked for the next parser state. | 53 /// Holds a reference to the method to be invoked for the next parser state. |
54 // TODO(jmesserly): the type should be "Predicate" but a dart2js checked mode | 54 // TODO(jmesserly): the type should be "Predicate" but a dart2js checked mode |
55 // bug prevents us from doing that. See http://dartbug.com/12465 | 55 // bug prevents us from doing that. See http://dartbug.com/12465 |
56 Function state; | 56 Function state; |
57 | 57 |
58 String temporaryBuffer; | 58 final StringBuffer _buffer = new StringBuffer(); |
59 | 59 |
60 int _lastOffset; | 60 int _lastOffset; |
61 | 61 |
62 // TODO(jmesserly): ideally this would be a LinkedHashMap and we wouldn't add | 62 // TODO(jmesserly): ideally this would be a LinkedHashMap and we wouldn't add |
63 // an item until it's ready. But the code doesn't have a clear notion of when | 63 // an item until it's ready. But the code doesn't have a clear notion of when |
64 // it's "done" with the attribute. | 64 // it's "done" with the attribute. |
65 List<TagAttribute> _attributes; | 65 List<TagAttribute> _attributes; |
66 Set<String> _attributeNames; | 66 Set<String> _attributeNames; |
67 | 67 |
68 HtmlTokenizer(doc, {String encoding, bool parseMeta: true, | 68 HtmlTokenizer(doc, {String encoding, bool parseMeta: true, |
69 this.lowercaseElementName: true, this.lowercaseAttrName: true, | 69 this.lowercaseElementName: true, this.lowercaseAttrName: true, |
70 bool generateSpans: false, String sourceUrl, this.attributeSpans: false}) | 70 bool generateSpans: false, String sourceUrl, this.attributeSpans: false}) |
71 : stream = new HtmlInputStream( | 71 : stream = new HtmlInputStream( |
72 doc, encoding, parseMeta, generateSpans, sourceUrl), | 72 doc, encoding, parseMeta, generateSpans, sourceUrl), |
73 tokenQueue = new Queue(), | 73 tokenQueue = new Queue(), |
74 generateSpans = generateSpans { | 74 generateSpans = generateSpans { |
75 reset(); | 75 reset(); |
76 } | 76 } |
77 | 77 |
78 TagToken get currentTagToken => currentToken; | 78 TagToken get currentTagToken => currentToken; |
79 DoctypeToken get currentDoctypeToken => currentToken; | 79 DoctypeToken get currentDoctypeToken => currentToken; |
80 StringToken get currentStringToken => currentToken; | 80 StringToken get currentStringToken => currentToken; |
81 | 81 |
82 Token _current; | 82 Token _current; |
83 Token get current => _current; | 83 Token get current => _current; |
84 | 84 |
85 String get _attributeName => _attributes.last.name; | 85 final StringBuffer _attributeName = new StringBuffer(); |
86 set _attributeName(String value) { | 86 final StringBuffer _attributeValue = new StringBuffer(); |
87 _attributes.last.name = value; | |
88 } | |
89 | |
90 String get _attributeValue => _attributes.last.value; | |
91 set _attributeValue(String value) { | |
92 _attributes.last.value = value; | |
93 } | |
94 | 87 |
95 void _markAttributeEnd(int offset) { | 88 void _markAttributeEnd(int offset) { |
89 _attributes.last.value = '$_attributeValue'; | |
96 if (attributeSpans) _attributes.last.end = stream.position + offset; | 90 if (attributeSpans) _attributes.last.end = stream.position + offset; |
97 } | 91 } |
98 | 92 |
99 void _markAttributeValueStart(int offset) { | 93 void _markAttributeValueStart(int offset) { |
100 if (attributeSpans) _attributes.last.startValue = stream.position + offset; | 94 if (attributeSpans) _attributes.last.startValue = stream.position + offset; |
101 } | 95 } |
102 | 96 |
103 void _markAttributeValueEnd(int offset) { | 97 void _markAttributeValueEnd(int offset) { |
104 if (attributeSpans) { | 98 if (attributeSpans) _attributes.last.endValue = stream.position + offset; |
105 _attributes.last.endValue = stream.position + offset; | 99 _markAttributeEnd(offset); |
106 _markAttributeEnd(offset); | |
107 } | |
108 } | 100 } |
109 | 101 |
110 // Note: we could track the name span here, if we need it. | 102 // Note: we could track the name span here, if we need it. |
111 void _markAttributeNameEnd(int offset) => _markAttributeEnd(offset); | 103 void _markAttributeNameEnd(int offset) => _markAttributeEnd(offset); |
112 | 104 |
113 void _addAttribute(String name) { | 105 void _addAttribute(String name) { |
114 if (_attributes == null) _attributes = []; | 106 if (_attributes == null) _attributes = []; |
115 var attr = new TagAttribute(name); | 107 _attributeName.clear(); |
108 _attributeName.write(name); | |
109 _attributeValue.clear(); | |
110 var attr = new TagAttribute(); | |
116 _attributes.add(attr); | 111 _attributes.add(attr); |
117 if (attributeSpans) attr.start = stream.position - name.length; | 112 if (attributeSpans) attr.start = stream.position - name.length; |
118 } | 113 } |
119 | 114 |
120 /// This is where the magic happens. | 115 /// This is where the magic happens. |
121 /// | 116 /// |
122 /// We do our usually processing through the states and when we have a token | 117 /// We do our usually processing through the states and when we have a token |
123 /// to return we yield the token which pauses processing until the next token | 118 /// to return we yield the token which pauses processing until the next token |
124 /// is requested. | 119 /// is requested. |
125 bool moveNext() { | 120 bool moveNext() { |
(...skipping 13 matching lines...) Expand all Loading... | |
139 } | 134 } |
140 return true; | 135 return true; |
141 } | 136 } |
142 | 137 |
143 /// Resets the tokenizer state. Calling this does not reset the [stream] or | 138 /// Resets the tokenizer state. Calling this does not reset the [stream] or |
144 /// the [parser]. | 139 /// the [parser]. |
145 void reset() { | 140 void reset() { |
146 _lastOffset = 0; | 141 _lastOffset = 0; |
147 tokenQueue.clear(); | 142 tokenQueue.clear(); |
148 currentToken = null; | 143 currentToken = null; |
149 temporaryBuffer = null; | 144 _buffer.clear(); |
150 _attributes = null; | 145 _attributes = null; |
151 _attributeNames = null; | 146 _attributeNames = null; |
152 state = dataState; | 147 state = dataState; |
153 } | 148 } |
154 | 149 |
155 /// Adds a token to the queue. Sets the span if needed. | 150 /// Adds a token to the queue. Sets the span if needed. |
156 void _addToken(Token token) { | 151 void _addToken(Token token) { |
157 if (generateSpans && token.span == null) { | 152 if (generateSpans && token.span == null) { |
158 int offset = stream.position; | 153 int offset = stream.position; |
159 token.span = stream.fileInfo.span(_lastOffset, offset); | 154 token.span = stream.fileInfo.span(_lastOffset, offset); |
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
340 stream.unget(charStack.removeLast()); | 335 stream.unget(charStack.removeLast()); |
341 output = '${output}${slice(charStack, entityLen).join()}'; | 336 output = '${output}${slice(charStack, entityLen).join()}'; |
342 } | 337 } |
343 } else { | 338 } else { |
344 _addToken(new ParseErrorToken("expected-named-entity")); | 339 _addToken(new ParseErrorToken("expected-named-entity")); |
345 stream.unget(charStack.removeLast()); | 340 stream.unget(charStack.removeLast()); |
346 output = "&${charStack.join()}"; | 341 output = "&${charStack.join()}"; |
347 } | 342 } |
348 } | 343 } |
349 if (fromAttribute) { | 344 if (fromAttribute) { |
350 _attributeValue = '$_attributeValue$output'; | 345 _attributeValue.write(output); |
351 } else { | 346 } else { |
352 var token; | 347 var token; |
353 if (isWhitespace(output)) { | 348 if (isWhitespace(output)) { |
354 token = new SpaceCharactersToken(output); | 349 token = new SpaceCharactersToken(output); |
355 } else { | 350 } else { |
356 token = new CharactersToken(output); | 351 token = new CharactersToken(output); |
357 } | 352 } |
358 _addToken(token); | 353 _addToken(token); |
359 } | 354 } |
360 } | 355 } |
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
586 currentTagToken.name = '${currentTagToken.name}$data'; | 581 currentTagToken.name = '${currentTagToken.name}$data'; |
587 // (Don't use charsUntil here, because tag names are | 582 // (Don't use charsUntil here, because tag names are |
588 // very short and it's faster to not do anything fancy) | 583 // very short and it's faster to not do anything fancy) |
589 } | 584 } |
590 return true; | 585 return true; |
591 } | 586 } |
592 | 587 |
593 bool rcdataLessThanSignState() { | 588 bool rcdataLessThanSignState() { |
594 var data = stream.char(); | 589 var data = stream.char(); |
595 if (data == "/") { | 590 if (data == "/") { |
596 temporaryBuffer = ""; | 591 _buffer.clear(); |
597 state = rcdataEndTagOpenState; | 592 state = rcdataEndTagOpenState; |
598 } else { | 593 } else { |
599 _addToken(new CharactersToken("<")); | 594 _addToken(new CharactersToken("<")); |
600 stream.unget(data); | 595 stream.unget(data); |
601 state = rcdataState; | 596 state = rcdataState; |
602 } | 597 } |
603 return true; | 598 return true; |
604 } | 599 } |
605 | 600 |
606 bool rcdataEndTagOpenState() { | 601 bool rcdataEndTagOpenState() { |
607 var data = stream.char(); | 602 var data = stream.char(); |
608 if (isLetter(data)) { | 603 if (isLetter(data)) { |
609 temporaryBuffer = '${temporaryBuffer}$data'; | 604 _buffer.write(data); |
610 state = rcdataEndTagNameState; | 605 state = rcdataEndTagNameState; |
611 } else { | 606 } else { |
612 _addToken(new CharactersToken("</")); | 607 _addToken(new CharactersToken("</")); |
613 stream.unget(data); | 608 stream.unget(data); |
614 state = rcdataState; | 609 state = rcdataState; |
615 } | 610 } |
616 return true; | 611 return true; |
617 } | 612 } |
618 | 613 |
619 bool _tokenIsAppropriate() { | 614 bool _tokenIsAppropriate() { |
615 // TODO(jmesserly): this should use case insensitive compare instead. | |
620 return currentToken is TagToken && | 616 return currentToken is TagToken && |
621 currentTagToken.name.toLowerCase() == temporaryBuffer.toLowerCase(); | 617 currentTagToken.name.toLowerCase() == '$_buffer'.toLowerCase(); |
622 } | 618 } |
623 | 619 |
624 bool rcdataEndTagNameState() { | 620 bool rcdataEndTagNameState() { |
625 var appropriate = _tokenIsAppropriate(); | 621 var appropriate = _tokenIsAppropriate(); |
626 var data = stream.char(); | 622 var data = stream.char(); |
627 if (isWhitespace(data) && appropriate) { | 623 if (isWhitespace(data) && appropriate) { |
628 currentToken = new EndTagToken(temporaryBuffer); | 624 currentToken = new EndTagToken('$_buffer'); |
629 state = beforeAttributeNameState; | 625 state = beforeAttributeNameState; |
630 } else if (data == "/" && appropriate) { | 626 } else if (data == "/" && appropriate) { |
631 currentToken = new EndTagToken(temporaryBuffer); | 627 currentToken = new EndTagToken('$_buffer'); |
632 state = selfClosingStartTagState; | 628 state = selfClosingStartTagState; |
633 } else if (data == ">" && appropriate) { | 629 } else if (data == ">" && appropriate) { |
634 currentToken = new EndTagToken(temporaryBuffer); | 630 currentToken = new EndTagToken('$_buffer'); |
635 emitCurrentToken(); | 631 emitCurrentToken(); |
636 state = dataState; | 632 state = dataState; |
637 } else if (isLetter(data)) { | 633 } else if (isLetter(data)) { |
638 temporaryBuffer = '${temporaryBuffer}$data'; | 634 _buffer.write(data); |
639 } else { | 635 } else { |
640 _addToken(new CharactersToken("</$temporaryBuffer")); | 636 _addToken(new CharactersToken("</$_buffer")); |
641 stream.unget(data); | 637 stream.unget(data); |
642 state = rcdataState; | 638 state = rcdataState; |
643 } | 639 } |
644 return true; | 640 return true; |
645 } | 641 } |
646 | 642 |
647 bool rawtextLessThanSignState() { | 643 bool rawtextLessThanSignState() { |
648 var data = stream.char(); | 644 var data = stream.char(); |
649 if (data == "/") { | 645 if (data == "/") { |
650 temporaryBuffer = ""; | 646 _buffer.clear(); |
651 state = rawtextEndTagOpenState; | 647 state = rawtextEndTagOpenState; |
652 } else { | 648 } else { |
653 _addToken(new CharactersToken("<")); | 649 _addToken(new CharactersToken("<")); |
654 stream.unget(data); | 650 stream.unget(data); |
655 state = rawtextState; | 651 state = rawtextState; |
656 } | 652 } |
657 return true; | 653 return true; |
658 } | 654 } |
659 | 655 |
660 bool rawtextEndTagOpenState() { | 656 bool rawtextEndTagOpenState() { |
661 var data = stream.char(); | 657 var data = stream.char(); |
662 if (isLetter(data)) { | 658 if (isLetter(data)) { |
663 temporaryBuffer = '${temporaryBuffer}$data'; | 659 _buffer.write(data); |
664 state = rawtextEndTagNameState; | 660 state = rawtextEndTagNameState; |
665 } else { | 661 } else { |
666 _addToken(new CharactersToken("</")); | 662 _addToken(new CharactersToken("</")); |
667 stream.unget(data); | 663 stream.unget(data); |
668 state = rawtextState; | 664 state = rawtextState; |
669 } | 665 } |
670 return true; | 666 return true; |
671 } | 667 } |
672 | 668 |
673 bool rawtextEndTagNameState() { | 669 bool rawtextEndTagNameState() { |
674 var appropriate = _tokenIsAppropriate(); | 670 var appropriate = _tokenIsAppropriate(); |
675 var data = stream.char(); | 671 var data = stream.char(); |
676 if (isWhitespace(data) && appropriate) { | 672 if (isWhitespace(data) && appropriate) { |
677 currentToken = new EndTagToken(temporaryBuffer); | 673 currentToken = new EndTagToken('$_buffer'); |
678 state = beforeAttributeNameState; | 674 state = beforeAttributeNameState; |
679 } else if (data == "/" && appropriate) { | 675 } else if (data == "/" && appropriate) { |
680 currentToken = new EndTagToken(temporaryBuffer); | 676 currentToken = new EndTagToken('$_buffer'); |
681 state = selfClosingStartTagState; | 677 state = selfClosingStartTagState; |
682 } else if (data == ">" && appropriate) { | 678 } else if (data == ">" && appropriate) { |
683 currentToken = new EndTagToken(temporaryBuffer); | 679 currentToken = new EndTagToken('$_buffer'); |
684 emitCurrentToken(); | 680 emitCurrentToken(); |
685 state = dataState; | 681 state = dataState; |
686 } else if (isLetter(data)) { | 682 } else if (isLetter(data)) { |
687 temporaryBuffer = '${temporaryBuffer}$data'; | 683 _buffer.write(data); |
688 } else { | 684 } else { |
689 _addToken(new CharactersToken("</$temporaryBuffer")); | 685 _addToken(new CharactersToken("</$_buffer")); |
690 stream.unget(data); | 686 stream.unget(data); |
691 state = rawtextState; | 687 state = rawtextState; |
692 } | 688 } |
693 return true; | 689 return true; |
694 } | 690 } |
695 | 691 |
696 bool scriptDataLessThanSignState() { | 692 bool scriptDataLessThanSignState() { |
697 var data = stream.char(); | 693 var data = stream.char(); |
698 if (data == "/") { | 694 if (data == "/") { |
699 temporaryBuffer = ""; | 695 _buffer.clear(); |
700 state = scriptDataEndTagOpenState; | 696 state = scriptDataEndTagOpenState; |
701 } else if (data == "!") { | 697 } else if (data == "!") { |
702 _addToken(new CharactersToken("<!")); | 698 _addToken(new CharactersToken("<!")); |
703 state = scriptDataEscapeStartState; | 699 state = scriptDataEscapeStartState; |
704 } else { | 700 } else { |
705 _addToken(new CharactersToken("<")); | 701 _addToken(new CharactersToken("<")); |
706 stream.unget(data); | 702 stream.unget(data); |
707 state = scriptDataState; | 703 state = scriptDataState; |
708 } | 704 } |
709 return true; | 705 return true; |
710 } | 706 } |
711 | 707 |
712 bool scriptDataEndTagOpenState() { | 708 bool scriptDataEndTagOpenState() { |
713 var data = stream.char(); | 709 var data = stream.char(); |
714 if (isLetter(data)) { | 710 if (isLetter(data)) { |
715 temporaryBuffer = '${temporaryBuffer}$data'; | 711 _buffer.write(data); |
716 state = scriptDataEndTagNameState; | 712 state = scriptDataEndTagNameState; |
717 } else { | 713 } else { |
718 _addToken(new CharactersToken("</")); | 714 _addToken(new CharactersToken("</")); |
719 stream.unget(data); | 715 stream.unget(data); |
720 state = scriptDataState; | 716 state = scriptDataState; |
721 } | 717 } |
722 return true; | 718 return true; |
723 } | 719 } |
724 | 720 |
725 bool scriptDataEndTagNameState() { | 721 bool scriptDataEndTagNameState() { |
726 var appropriate = _tokenIsAppropriate(); | 722 var appropriate = _tokenIsAppropriate(); |
727 var data = stream.char(); | 723 var data = stream.char(); |
728 if (isWhitespace(data) && appropriate) { | 724 if (isWhitespace(data) && appropriate) { |
729 currentToken = new EndTagToken(temporaryBuffer); | 725 currentToken = new EndTagToken('$_buffer'); |
730 state = beforeAttributeNameState; | 726 state = beforeAttributeNameState; |
731 } else if (data == "/" && appropriate) { | 727 } else if (data == "/" && appropriate) { |
732 currentToken = new EndTagToken(temporaryBuffer); | 728 currentToken = new EndTagToken('$_buffer'); |
733 state = selfClosingStartTagState; | 729 state = selfClosingStartTagState; |
734 } else if (data == ">" && appropriate) { | 730 } else if (data == ">" && appropriate) { |
735 currentToken = new EndTagToken(temporaryBuffer); | 731 currentToken = new EndTagToken('$_buffer'); |
736 emitCurrentToken(); | 732 emitCurrentToken(); |
737 state = dataState; | 733 state = dataState; |
738 } else if (isLetter(data)) { | 734 } else if (isLetter(data)) { |
739 temporaryBuffer = '${temporaryBuffer}$data'; | 735 _buffer.write(data); |
740 } else { | 736 } else { |
741 _addToken(new CharactersToken("</$temporaryBuffer")); | 737 _addToken(new CharactersToken("</$_buffer")); |
742 stream.unget(data); | 738 stream.unget(data); |
743 state = scriptDataState; | 739 state = scriptDataState; |
744 } | 740 } |
745 return true; | 741 return true; |
746 } | 742 } |
747 | 743 |
748 bool scriptDataEscapeStartState() { | 744 bool scriptDataEscapeStartState() { |
749 var data = stream.char(); | 745 var data = stream.char(); |
750 if (data == "-") { | 746 if (data == "-") { |
751 _addToken(new CharactersToken("-")); | 747 _addToken(new CharactersToken("-")); |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
826 } else { | 822 } else { |
827 _addToken(new CharactersToken(data)); | 823 _addToken(new CharactersToken(data)); |
828 state = scriptDataEscapedState; | 824 state = scriptDataEscapedState; |
829 } | 825 } |
830 return true; | 826 return true; |
831 } | 827 } |
832 | 828 |
833 bool scriptDataEscapedLessThanSignState() { | 829 bool scriptDataEscapedLessThanSignState() { |
834 var data = stream.char(); | 830 var data = stream.char(); |
835 if (data == "/") { | 831 if (data == "/") { |
836 temporaryBuffer = ""; | 832 _buffer.clear(); |
837 state = scriptDataEscapedEndTagOpenState; | 833 state = scriptDataEscapedEndTagOpenState; |
838 } else if (isLetter(data)) { | 834 } else if (isLetter(data)) { |
839 _addToken(new CharactersToken("<$data")); | 835 _addToken(new CharactersToken("<$data")); |
840 temporaryBuffer = data; | 836 _buffer.clear(); |
837 _buffer.write(data); | |
841 state = scriptDataDoubleEscapeStartState; | 838 state = scriptDataDoubleEscapeStartState; |
842 } else { | 839 } else { |
843 _addToken(new CharactersToken("<")); | 840 _addToken(new CharactersToken("<")); |
844 stream.unget(data); | 841 stream.unget(data); |
845 state = scriptDataEscapedState; | 842 state = scriptDataEscapedState; |
846 } | 843 } |
847 return true; | 844 return true; |
848 } | 845 } |
849 | 846 |
850 bool scriptDataEscapedEndTagOpenState() { | 847 bool scriptDataEscapedEndTagOpenState() { |
851 var data = stream.char(); | 848 var data = stream.char(); |
852 if (isLetter(data)) { | 849 if (isLetter(data)) { |
853 temporaryBuffer = data; | 850 _buffer.clear(); |
851 _buffer.write(data); | |
854 state = scriptDataEscapedEndTagNameState; | 852 state = scriptDataEscapedEndTagNameState; |
855 } else { | 853 } else { |
856 _addToken(new CharactersToken("</")); | 854 _addToken(new CharactersToken("</")); |
857 stream.unget(data); | 855 stream.unget(data); |
858 state = scriptDataEscapedState; | 856 state = scriptDataEscapedState; |
859 } | 857 } |
860 return true; | 858 return true; |
861 } | 859 } |
862 | 860 |
863 bool scriptDataEscapedEndTagNameState() { | 861 bool scriptDataEscapedEndTagNameState() { |
864 var appropriate = _tokenIsAppropriate(); | 862 var appropriate = _tokenIsAppropriate(); |
865 var data = stream.char(); | 863 var data = stream.char(); |
866 if (isWhitespace(data) && appropriate) { | 864 if (isWhitespace(data) && appropriate) { |
867 currentToken = new EndTagToken(temporaryBuffer); | 865 currentToken = new EndTagToken('$_buffer'); |
868 state = beforeAttributeNameState; | 866 state = beforeAttributeNameState; |
869 } else if (data == "/" && appropriate) { | 867 } else if (data == "/" && appropriate) { |
870 currentToken = new EndTagToken(temporaryBuffer); | 868 currentToken = new EndTagToken('$_buffer'); |
871 state = selfClosingStartTagState; | 869 state = selfClosingStartTagState; |
872 } else if (data == ">" && appropriate) { | 870 } else if (data == ">" && appropriate) { |
873 currentToken = new EndTagToken(temporaryBuffer); | 871 currentToken = new EndTagToken('$_buffer'); |
874 emitCurrentToken(); | 872 emitCurrentToken(); |
875 state = dataState; | 873 state = dataState; |
876 } else if (isLetter(data)) { | 874 } else if (isLetter(data)) { |
877 temporaryBuffer = '${temporaryBuffer}$data'; | 875 _buffer.write(data); |
878 } else { | 876 } else { |
879 _addToken(new CharactersToken("</$temporaryBuffer")); | 877 _addToken(new CharactersToken("</$_buffer")); |
880 stream.unget(data); | 878 stream.unget(data); |
881 state = scriptDataEscapedState; | 879 state = scriptDataEscapedState; |
882 } | 880 } |
883 return true; | 881 return true; |
884 } | 882 } |
885 | 883 |
886 bool scriptDataDoubleEscapeStartState() { | 884 bool scriptDataDoubleEscapeStartState() { |
887 var data = stream.char(); | 885 var data = stream.char(); |
888 if (isWhitespace(data) || data == "/" || data == ">") { | 886 if (isWhitespace(data) || data == "/" || data == ">") { |
889 _addToken(new CharactersToken(data)); | 887 _addToken(new CharactersToken(data)); |
890 if (temporaryBuffer.toLowerCase() == "script") { | 888 if ('$_buffer'.toLowerCase() == "script") { |
891 state = scriptDataDoubleEscapedState; | 889 state = scriptDataDoubleEscapedState; |
892 } else { | 890 } else { |
893 state = scriptDataEscapedState; | 891 state = scriptDataEscapedState; |
894 } | 892 } |
895 } else if (isLetter(data)) { | 893 } else if (isLetter(data)) { |
896 _addToken(new CharactersToken(data)); | 894 _addToken(new CharactersToken(data)); |
897 temporaryBuffer = '${temporaryBuffer}$data'; | 895 _buffer.write(data); |
898 } else { | 896 } else { |
899 stream.unget(data); | 897 stream.unget(data); |
900 state = scriptDataEscapedState; | 898 state = scriptDataEscapedState; |
901 } | 899 } |
902 return true; | 900 return true; |
903 } | 901 } |
904 | 902 |
905 bool scriptDataDoubleEscapedState() { | 903 bool scriptDataDoubleEscapedState() { |
906 var data = stream.char(); | 904 var data = stream.char(); |
907 if (data == "-") { | 905 if (data == "-") { |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
967 _addToken(new CharactersToken(data)); | 965 _addToken(new CharactersToken(data)); |
968 state = scriptDataDoubleEscapedState; | 966 state = scriptDataDoubleEscapedState; |
969 } | 967 } |
970 return true; | 968 return true; |
971 } | 969 } |
972 | 970 |
973 bool scriptDataDoubleEscapedLessThanSignState() { | 971 bool scriptDataDoubleEscapedLessThanSignState() { |
974 var data = stream.char(); | 972 var data = stream.char(); |
975 if (data == "/") { | 973 if (data == "/") { |
976 _addToken(new CharactersToken("/")); | 974 _addToken(new CharactersToken("/")); |
977 temporaryBuffer = ""; | 975 _buffer.clear(); |
978 state = scriptDataDoubleEscapeEndState; | 976 state = scriptDataDoubleEscapeEndState; |
979 } else { | 977 } else { |
980 stream.unget(data); | 978 stream.unget(data); |
981 state = scriptDataDoubleEscapedState; | 979 state = scriptDataDoubleEscapedState; |
982 } | 980 } |
983 return true; | 981 return true; |
984 } | 982 } |
985 | 983 |
986 bool scriptDataDoubleEscapeEndState() { | 984 bool scriptDataDoubleEscapeEndState() { |
987 var data = stream.char(); | 985 var data = stream.char(); |
988 if (isWhitespace(data) || data == "/" || data == ">") { | 986 if (isWhitespace(data) || data == "/" || data == ">") { |
989 _addToken(new CharactersToken(data)); | 987 _addToken(new CharactersToken(data)); |
990 if (temporaryBuffer.toLowerCase() == "script") { | 988 if ('$_buffer'.toLowerCase() == "script") { |
991 state = scriptDataEscapedState; | 989 state = scriptDataEscapedState; |
992 } else { | 990 } else { |
993 state = scriptDataDoubleEscapedState; | 991 state = scriptDataDoubleEscapedState; |
994 } | 992 } |
995 } else if (isLetter(data)) { | 993 } else if (isLetter(data)) { |
996 _addToken(new CharactersToken(data)); | 994 _addToken(new CharactersToken(data)); |
997 temporaryBuffer = '${temporaryBuffer}$data'; | 995 _buffer.write(data); |
998 } else { | 996 } else { |
999 stream.unget(data); | 997 stream.unget(data); |
1000 state = scriptDataDoubleEscapedState; | 998 state = scriptDataDoubleEscapedState; |
1001 } | 999 } |
1002 return true; | 1000 return true; |
1003 } | 1001 } |
1004 | 1002 |
1005 bool beforeAttributeNameState() { | 1003 bool beforeAttributeNameState() { |
1006 var data = stream.char(); | 1004 var data = stream.char(); |
1007 if (isWhitespace(data)) { | 1005 if (isWhitespace(data)) { |
(...skipping 23 matching lines...) Expand all Loading... | |
1031 return true; | 1029 return true; |
1032 } | 1030 } |
1033 | 1031 |
1034 bool attributeNameState() { | 1032 bool attributeNameState() { |
1035 var data = stream.char(); | 1033 var data = stream.char(); |
1036 bool leavingThisState = true; | 1034 bool leavingThisState = true; |
1037 bool emitToken = false; | 1035 bool emitToken = false; |
1038 if (data == "=") { | 1036 if (data == "=") { |
1039 state = beforeAttributeValueState; | 1037 state = beforeAttributeValueState; |
1040 } else if (isLetter(data)) { | 1038 } else if (isLetter(data)) { |
1041 _attributeName = '$_attributeName$data' | 1039 _attributeName.write(data); |
1042 '${stream.charsUntil(asciiLetters, true)}'; | 1040 _attributeName.write(stream.charsUntil(asciiLetters, true)); |
1043 leavingThisState = false; | 1041 leavingThisState = false; |
1044 } else if (data == ">") { | 1042 } else if (data == ">") { |
1045 // XXX If we emit here the attributes are converted to a dict | 1043 // XXX If we emit here the attributes are converted to a dict |
1046 // without being checked and when the code below runs we error | 1044 // without being checked and when the code below runs we error |
1047 // because data is a dict not a list | 1045 // because data is a dict not a list |
1048 emitToken = true; | 1046 emitToken = true; |
1049 } else if (isWhitespace(data)) { | 1047 } else if (isWhitespace(data)) { |
1050 state = afterAttributeNameState; | 1048 state = afterAttributeNameState; |
1051 } else if (data == "/") { | 1049 } else if (data == "/") { |
1052 state = selfClosingStartTagState; | 1050 state = selfClosingStartTagState; |
1053 } else if (data == "\u0000") { | 1051 } else if (data == "\u0000") { |
1054 _addToken(new ParseErrorToken("invalid-codepoint")); | 1052 _addToken(new ParseErrorToken("invalid-codepoint")); |
1055 _attributeName = '${_attributeName}\uFFFD'; | 1053 _attributeName.write('\uFFFD'); |
1056 leavingThisState = false; | 1054 leavingThisState = false; |
1057 } else if (data == EOF) { | 1055 } else if (data == EOF) { |
1058 _addToken(new ParseErrorToken("eof-in-attribute-name")); | 1056 _addToken(new ParseErrorToken("eof-in-attribute-name")); |
1059 state = dataState; | 1057 state = dataState; |
1060 } else if ("'\"<".contains(data)) { | 1058 } else if ("'\"<".contains(data)) { |
1061 _addToken(new ParseErrorToken("invalid-character-in-attribute-name")); | 1059 _addToken(new ParseErrorToken("invalid-character-in-attribute-name")); |
1062 _attributeName = '$_attributeName$data'; | 1060 _attributeName.write(data); |
1063 leavingThisState = false; | 1061 leavingThisState = false; |
1064 } else { | 1062 } else { |
1065 _attributeName = '$_attributeName$data'; | 1063 _attributeName.write(data); |
1066 leavingThisState = false; | 1064 leavingThisState = false; |
1067 } | 1065 } |
1068 | 1066 |
1069 if (leavingThisState) { | 1067 if (leavingThisState) { |
1070 _markAttributeNameEnd(-1); | 1068 _markAttributeNameEnd(-1); |
1071 | 1069 |
1072 // Attributes are not dropped at this stage. That happens when the | 1070 // Attributes are not dropped at this stage. That happens when the |
1073 // start tag token is emitted so values can still be safely appended | 1071 // start tag token is emitted so values can still be safely appended |
1074 // to attributes, but we do want to report the parse error in time. | 1072 // to attributes, but we do want to report the parse error in time. |
1073 var attrName = _attributeName.toString(); | |
1075 if (lowercaseAttrName) { | 1074 if (lowercaseAttrName) { |
1076 _attributeName = asciiUpper2Lower(_attributeName); | 1075 attrName = asciiUpper2Lower(attrName); |
1077 } | 1076 } |
1077 _attributes.last.name = attrName; | |
1078 if (_attributeNames == null) _attributeNames = new Set(); | 1078 if (_attributeNames == null) _attributeNames = new Set(); |
1079 if (_attributeNames.contains(_attributeName)) { | 1079 if (_attributeNames.contains(attrName)) { |
1080 _addToken(new ParseErrorToken("duplicate-attribute")); | 1080 _addToken(new ParseErrorToken("duplicate-attribute")); |
1081 } | 1081 } |
1082 _attributeNames.add(_attributeName); | 1082 _attributeNames.add(attrName); |
1083 | 1083 |
1084 // XXX Fix for above XXX | 1084 // XXX Fix for above XXX |
1085 if (emitToken) { | 1085 if (emitToken) { |
1086 emitCurrentToken(); | 1086 emitCurrentToken(); |
1087 } | 1087 } |
1088 } | 1088 } |
1089 return true; | 1089 return true; |
1090 } | 1090 } |
1091 | 1091 |
1092 bool afterAttributeNameState() { | 1092 bool afterAttributeNameState() { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1134 } else if (data == "'") { | 1134 } else if (data == "'") { |
1135 _markAttributeValueStart(0); | 1135 _markAttributeValueStart(0); |
1136 state = attributeValueSingleQuotedState; | 1136 state = attributeValueSingleQuotedState; |
1137 } else if (data == ">") { | 1137 } else if (data == ">") { |
1138 _addToken(new ParseErrorToken( | 1138 _addToken(new ParseErrorToken( |
1139 "expected-attribute-value-but-got-right-bracket")); | 1139 "expected-attribute-value-but-got-right-bracket")); |
1140 emitCurrentToken(); | 1140 emitCurrentToken(); |
1141 } else if (data == "\u0000") { | 1141 } else if (data == "\u0000") { |
1142 _addToken(new ParseErrorToken("invalid-codepoint")); | 1142 _addToken(new ParseErrorToken("invalid-codepoint")); |
1143 _markAttributeValueStart(-1); | 1143 _markAttributeValueStart(-1); |
1144 _attributeValue = '${_attributeValue}\uFFFD'; | 1144 _attributeValue.write('\uFFFD'); |
1145 state = attributeValueUnQuotedState; | 1145 state = attributeValueUnQuotedState; |
1146 } else if (data == EOF) { | 1146 } else if (data == EOF) { |
1147 _addToken(new ParseErrorToken("expected-attribute-value-but-got-eof")); | 1147 _addToken(new ParseErrorToken("expected-attribute-value-but-got-eof")); |
1148 state = dataState; | 1148 state = dataState; |
1149 } else if ("=<`".contains(data)) { | 1149 } else if ("=<`".contains(data)) { |
1150 _addToken(new ParseErrorToken("equals-in-unquoted-attribute-value")); | 1150 _addToken(new ParseErrorToken("equals-in-unquoted-attribute-value")); |
1151 _markAttributeValueStart(-1); | 1151 _markAttributeValueStart(-1); |
1152 _attributeValue = '$_attributeValue$data'; | 1152 _attributeValue.write(data); |
1153 state = attributeValueUnQuotedState; | 1153 state = attributeValueUnQuotedState; |
1154 } else { | 1154 } else { |
1155 _markAttributeValueStart(-1); | 1155 _markAttributeValueStart(-1); |
1156 _attributeValue = '$_attributeValue$data'; | 1156 _attributeValue.write(data); |
1157 state = attributeValueUnQuotedState; | 1157 state = attributeValueUnQuotedState; |
1158 } | 1158 } |
1159 return true; | 1159 return true; |
1160 } | 1160 } |
1161 | 1161 |
1162 bool attributeValueDoubleQuotedState() { | 1162 bool attributeValueDoubleQuotedState() { |
1163 var data = stream.char(); | 1163 var data = stream.char(); |
1164 if (data == "\"") { | 1164 if (data == "\"") { |
1165 _markAttributeValueEnd(-1); | 1165 _markAttributeValueEnd(-1); |
1166 _markAttributeEnd(0); | 1166 _markAttributeEnd(0); |
1167 state = afterAttributeValueState; | 1167 state = afterAttributeValueState; |
1168 } else if (data == "&") { | 1168 } else if (data == "&") { |
1169 processEntityInAttribute('"'); | 1169 processEntityInAttribute('"'); |
1170 } else if (data == "\u0000") { | 1170 } else if (data == "\u0000") { |
1171 _addToken(new ParseErrorToken("invalid-codepoint")); | 1171 _addToken(new ParseErrorToken("invalid-codepoint")); |
1172 _attributeValue = '${_attributeValue}\uFFFD'; | 1172 _attributeValue.write('\uFFFD'); |
1173 } else if (data == EOF) { | 1173 } else if (data == EOF) { |
1174 _addToken(new ParseErrorToken("eof-in-attribute-value-double-quote")); | 1174 _addToken(new ParseErrorToken("eof-in-attribute-value-double-quote")); |
1175 _markAttributeValueEnd(-1); | 1175 _markAttributeValueEnd(-1); |
1176 state = dataState; | 1176 state = dataState; |
1177 } else { | 1177 } else { |
1178 _attributeValue = '$_attributeValue$data${stream.charsUntil("\"&")}'; | 1178 _attributeValue.write(data); |
1179 _attributeValue.write(stream.charsUntil("\"&")); | |
1179 } | 1180 } |
1180 return true; | 1181 return true; |
1181 } | 1182 } |
1182 | 1183 |
1183 bool attributeValueSingleQuotedState() { | 1184 bool attributeValueSingleQuotedState() { |
1184 var data = stream.char(); | 1185 var data = stream.char(); |
1185 if (data == "'") { | 1186 if (data == "'") { |
1186 _markAttributeValueEnd(-1); | 1187 _markAttributeValueEnd(-1); |
1187 _markAttributeEnd(0); | 1188 _markAttributeEnd(0); |
1188 state = afterAttributeValueState; | 1189 state = afterAttributeValueState; |
1189 } else if (data == "&") { | 1190 } else if (data == "&") { |
1190 processEntityInAttribute("'"); | 1191 processEntityInAttribute("'"); |
1191 } else if (data == "\u0000") { | 1192 } else if (data == "\u0000") { |
1192 _addToken(new ParseErrorToken("invalid-codepoint")); | 1193 _addToken(new ParseErrorToken("invalid-codepoint")); |
1193 _attributeValue = '${_attributeValue}\uFFFD'; | 1194 _attributeValue.write('\uFFFD'); |
1194 } else if (data == EOF) { | 1195 } else if (data == EOF) { |
1195 _addToken(new ParseErrorToken("eof-in-attribute-value-single-quote")); | 1196 _addToken(new ParseErrorToken("eof-in-attribute-value-single-quote")); |
1196 _markAttributeValueEnd(-1); | 1197 _markAttributeValueEnd(-1); |
1197 state = dataState; | 1198 state = dataState; |
1198 } else { | 1199 } else { |
1199 _attributeValue = '$_attributeValue$data${stream.charsUntil("\'&")}'; | 1200 _attributeValue.write(data); |
1201 _attributeValue.write(stream.charsUntil("\'&")); | |
1200 } | 1202 } |
1201 return true; | 1203 return true; |
1202 } | 1204 } |
1203 | 1205 |
1204 bool attributeValueUnQuotedState() { | 1206 bool attributeValueUnQuotedState() { |
1205 var data = stream.char(); | 1207 var data = stream.char(); |
1206 if (isWhitespace(data)) { | 1208 if (isWhitespace(data)) { |
1207 _markAttributeValueEnd(-1); | 1209 _markAttributeValueEnd(-1); |
1208 state = beforeAttributeNameState; | 1210 state = beforeAttributeNameState; |
1209 } else if (data == "&") { | 1211 } else if (data == "&") { |
1210 processEntityInAttribute(">"); | 1212 processEntityInAttribute(">"); |
1211 } else if (data == ">") { | 1213 } else if (data == ">") { |
1212 _markAttributeValueEnd(-1); | 1214 _markAttributeValueEnd(-1); |
1213 emitCurrentToken(); | 1215 emitCurrentToken(); |
1214 } else if (data == EOF) { | 1216 } else if (data == EOF) { |
1215 _addToken(new ParseErrorToken("eof-in-attribute-value-no-quotes")); | 1217 _addToken(new ParseErrorToken("eof-in-attribute-value-no-quotes")); |
1216 _markAttributeValueEnd(-1); | 1218 _markAttributeValueEnd(-1); |
1217 state = dataState; | 1219 state = dataState; |
1218 } else if ('"\'=<`'.contains(data)) { | 1220 } else if ('"\'=<`'.contains(data)) { |
1219 _addToken(new ParseErrorToken( | 1221 _addToken(new ParseErrorToken( |
1220 "unexpected-character-in-unquoted-attribute-value")); | 1222 "unexpected-character-in-unquoted-attribute-value")); |
1221 _attributeValue = '$_attributeValue$data'; | 1223 _attributeValue.write(data); |
1222 } else if (data == "\u0000") { | 1224 } else if (data == "\u0000") { |
1223 _addToken(new ParseErrorToken("invalid-codepoint")); | 1225 _addToken(new ParseErrorToken("invalid-codepoint")); |
1224 _attributeValue = '${_attributeValue}\uFFFD'; | 1226 _attributeValue.write('\uFFFD'); |
1225 } else { | 1227 } else { |
1226 _attributeValue = '$_attributeValue$data' | 1228 _attributeValue.write(data); |
1227 '${stream.charsUntil("&>\"\'=<`$spaceCharacters")}'; | 1229 _attributeValue.write(stream.charsUntil("&>\"\'=<`$spaceCharacters")); |
1228 } | 1230 } |
1229 return true; | 1231 return true; |
1230 } | 1232 } |
1231 | 1233 |
1232 bool afterAttributeValueState() { | 1234 bool afterAttributeValueState() { |
1233 var data = stream.char(); | 1235 var data = stream.char(); |
1234 if (isWhitespace(data)) { | 1236 if (isWhitespace(data)) { |
1235 state = beforeAttributeNameState; | 1237 state = beforeAttributeNameState; |
1236 } else if (data == ">") { | 1238 } else if (data == ">") { |
1237 emitCurrentToken(); | 1239 emitCurrentToken(); |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1281 stream.char(); | 1283 stream.char(); |
1282 state = dataState; | 1284 state = dataState; |
1283 return true; | 1285 return true; |
1284 } | 1286 } |
1285 | 1287 |
1286 bool markupDeclarationOpenState() { | 1288 bool markupDeclarationOpenState() { |
1287 var charStack = [stream.char()]; | 1289 var charStack = [stream.char()]; |
1288 if (charStack.last == "-") { | 1290 if (charStack.last == "-") { |
1289 charStack.add(stream.char()); | 1291 charStack.add(stream.char()); |
1290 if (charStack.last == "-") { | 1292 if (charStack.last == "-") { |
1291 currentToken = new CommentToken(""); | 1293 currentToken = new CommentToken(); |
1292 state = commentStartState; | 1294 state = commentStartState; |
1293 return true; | 1295 return true; |
1294 } | 1296 } |
1295 } else if (charStack.last == 'd' || charStack.last == 'D') { | 1297 } else if (charStack.last == 'd' || charStack.last == 'D') { |
1296 var matched = true; | 1298 var matched = true; |
1297 for (var expected in const ['oO', 'cC', 'tT', 'yY', 'pP', 'eE']) { | 1299 for (var expected in const ['oO', 'cC', 'tT', 'yY', 'pP', 'eE']) { |
1298 var char = stream.char(); | 1300 var char = stream.char(); |
1299 charStack.add(char); | 1301 charStack.add(char); |
1300 if (char == EOF || !expected.contains(char)) { | 1302 if (char == EOF || !expected.contains(char)) { |
1301 matched = false; | 1303 matched = false; |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1334 state = bogusCommentState; | 1336 state = bogusCommentState; |
1335 return true; | 1337 return true; |
1336 } | 1338 } |
1337 | 1339 |
1338 bool commentStartState() { | 1340 bool commentStartState() { |
1339 var data = stream.char(); | 1341 var data = stream.char(); |
1340 if (data == "-") { | 1342 if (data == "-") { |
1341 state = commentStartDashState; | 1343 state = commentStartDashState; |
1342 } else if (data == "\u0000") { | 1344 } else if (data == "\u0000") { |
1343 _addToken(new ParseErrorToken("invalid-codepoint")); | 1345 _addToken(new ParseErrorToken("invalid-codepoint")); |
1344 currentStringToken.data = '${currentStringToken.data}\uFFFD'; | 1346 currentStringToken.add('\uFFFD'); |
1345 } else if (data == ">") { | 1347 } else if (data == ">") { |
1346 _addToken(new ParseErrorToken("incorrect-comment")); | 1348 _addToken(new ParseErrorToken("incorrect-comment")); |
1347 _addToken(currentToken); | 1349 _addToken(currentToken); |
1348 state = dataState; | 1350 state = dataState; |
1349 } else if (data == EOF) { | 1351 } else if (data == EOF) { |
1350 _addToken(new ParseErrorToken("eof-in-comment")); | 1352 _addToken(new ParseErrorToken("eof-in-comment")); |
1351 _addToken(currentToken); | 1353 _addToken(currentToken); |
1352 state = dataState; | 1354 state = dataState; |
1353 } else { | 1355 } else { |
1354 currentStringToken.data = '${currentStringToken.data}$data'; | 1356 currentStringToken.add(data); |
1355 state = commentState; | 1357 state = commentState; |
1356 } | 1358 } |
1357 return true; | 1359 return true; |
1358 } | 1360 } |
1359 | 1361 |
1360 bool commentStartDashState() { | 1362 bool commentStartDashState() { |
1361 var data = stream.char(); | 1363 var data = stream.char(); |
1362 if (data == "-") { | 1364 if (data == "-") { |
1363 state = commentEndState; | 1365 state = commentEndState; |
1364 } else if (data == "\u0000") { | 1366 } else if (data == "\u0000") { |
1365 _addToken(new ParseErrorToken("invalid-codepoint")); | 1367 _addToken(new ParseErrorToken("invalid-codepoint")); |
1366 currentStringToken.data = '${currentStringToken.data}-\uFFFD'; | 1368 currentStringToken.add('-\uFFFD'); |
1367 } else if (data == ">") { | 1369 } else if (data == ">") { |
1368 _addToken(new ParseErrorToken("incorrect-comment")); | 1370 _addToken(new ParseErrorToken("incorrect-comment")); |
1369 _addToken(currentToken); | 1371 _addToken(currentToken); |
1370 state = dataState; | 1372 state = dataState; |
1371 } else if (data == EOF) { | 1373 } else if (data == EOF) { |
1372 _addToken(new ParseErrorToken("eof-in-comment")); | 1374 _addToken(new ParseErrorToken("eof-in-comment")); |
1373 _addToken(currentToken); | 1375 _addToken(currentToken); |
1374 state = dataState; | 1376 state = dataState; |
1375 } else { | 1377 } else { |
1376 currentStringToken.data = '${currentStringToken.data}-${data}'; | 1378 currentStringToken.add('-').add(data); |
1377 state = commentState; | 1379 state = commentState; |
1378 } | 1380 } |
1379 return true; | 1381 return true; |
1380 } | 1382 } |
1381 | 1383 |
1382 bool commentState() { | 1384 bool commentState() { |
1383 var data = stream.char(); | 1385 var data = stream.char(); |
1384 if (data == "-") { | 1386 if (data == "-") { |
1385 state = commentEndDashState; | 1387 state = commentEndDashState; |
1386 } else if (data == "\u0000") { | 1388 } else if (data == "\u0000") { |
1387 _addToken(new ParseErrorToken("invalid-codepoint")); | 1389 _addToken(new ParseErrorToken("invalid-codepoint")); |
1388 currentStringToken.data = '${currentStringToken.data}\uFFFD'; | 1390 currentStringToken.add('\uFFFD'); |
1389 } else if (data == EOF) { | 1391 } else if (data == EOF) { |
1390 _addToken(new ParseErrorToken("eof-in-comment")); | 1392 _addToken(new ParseErrorToken("eof-in-comment")); |
1391 _addToken(currentToken); | 1393 _addToken(currentToken); |
1392 state = dataState; | 1394 state = dataState; |
1393 } else { | 1395 } else { |
1394 currentStringToken.data = '${currentStringToken.data}$data' | 1396 currentStringToken.add(data).add(stream.charsUntil("-\u0000")); |
Siggi Cherem (dart-lang)
2015/03/05 23:01:56
maybe use cascades here and below? (instead of ret
| |
1395 '${stream.charsUntil("-\u0000")}'; | |
1396 } | 1397 } |
1397 return true; | 1398 return true; |
1398 } | 1399 } |
1399 | 1400 |
1400 bool commentEndDashState() { | 1401 bool commentEndDashState() { |
1401 var data = stream.char(); | 1402 var data = stream.char(); |
1402 if (data == "-") { | 1403 if (data == "-") { |
1403 state = commentEndState; | 1404 state = commentEndState; |
1404 } else if (data == "\u0000") { | 1405 } else if (data == "\u0000") { |
1405 _addToken(new ParseErrorToken("invalid-codepoint")); | 1406 _addToken(new ParseErrorToken("invalid-codepoint")); |
1406 currentStringToken.data = "${currentStringToken.data}-\uFFFD"; | 1407 currentStringToken.add('-\uFFFD'); |
1407 state = commentState; | 1408 state = commentState; |
1408 } else if (data == EOF) { | 1409 } else if (data == EOF) { |
1409 _addToken(new ParseErrorToken("eof-in-comment-end-dash")); | 1410 _addToken(new ParseErrorToken("eof-in-comment-end-dash")); |
1410 _addToken(currentToken); | 1411 _addToken(currentToken); |
1411 state = dataState; | 1412 state = dataState; |
1412 } else { | 1413 } else { |
1413 currentStringToken.data = "${currentStringToken.data}-${data}"; | 1414 currentStringToken.add('-').add(data); |
1414 state = commentState; | 1415 state = commentState; |
1415 } | 1416 } |
1416 return true; | 1417 return true; |
1417 } | 1418 } |
1418 | 1419 |
1419 bool commentEndState() { | 1420 bool commentEndState() { |
1420 var data = stream.char(); | 1421 var data = stream.char(); |
1421 if (data == ">") { | 1422 if (data == ">") { |
1422 _addToken(currentToken); | 1423 _addToken(currentToken); |
1423 state = dataState; | 1424 state = dataState; |
1424 } else if (data == "\u0000") { | 1425 } else if (data == "\u0000") { |
1425 _addToken(new ParseErrorToken("invalid-codepoint")); | 1426 _addToken(new ParseErrorToken("invalid-codepoint")); |
1426 currentStringToken.data = '${currentStringToken.data}--\uFFFD'; | 1427 currentStringToken.add('--\uFFFD'); |
1427 state = commentState; | 1428 state = commentState; |
1428 } else if (data == "!") { | 1429 } else if (data == "!") { |
1429 _addToken( | 1430 _addToken( |
1430 new ParseErrorToken("unexpected-bang-after-double-dash-in-comment")); | 1431 new ParseErrorToken("unexpected-bang-after-double-dash-in-comment")); |
1431 state = commentEndBangState; | 1432 state = commentEndBangState; |
1432 } else if (data == "-") { | 1433 } else if (data == "-") { |
1433 _addToken( | 1434 _addToken( |
1434 new ParseErrorToken("unexpected-dash-after-double-dash-in-comment")); | 1435 new ParseErrorToken("unexpected-dash-after-double-dash-in-comment")); |
1435 currentStringToken.data = '${currentStringToken.data}$data'; | 1436 currentStringToken.add(data); |
1436 } else if (data == EOF) { | 1437 } else if (data == EOF) { |
1437 _addToken(new ParseErrorToken("eof-in-comment-double-dash")); | 1438 _addToken(new ParseErrorToken("eof-in-comment-double-dash")); |
1438 _addToken(currentToken); | 1439 _addToken(currentToken); |
1439 state = dataState; | 1440 state = dataState; |
1440 } else { | 1441 } else { |
1441 // XXX | 1442 // XXX |
1442 _addToken(new ParseErrorToken("unexpected-char-in-comment")); | 1443 _addToken(new ParseErrorToken("unexpected-char-in-comment")); |
1443 currentStringToken.data = "${currentStringToken.data}--${data}"; | 1444 currentStringToken.add('--').add(data); |
1444 state = commentState; | 1445 state = commentState; |
1445 } | 1446 } |
1446 return true; | 1447 return true; |
1447 } | 1448 } |
1448 | 1449 |
1449 bool commentEndBangState() { | 1450 bool commentEndBangState() { |
1450 var data = stream.char(); | 1451 var data = stream.char(); |
1451 if (data == ">") { | 1452 if (data == ">") { |
1452 _addToken(currentToken); | 1453 _addToken(currentToken); |
1453 state = dataState; | 1454 state = dataState; |
1454 } else if (data == "-") { | 1455 } else if (data == "-") { |
1455 currentStringToken.data = '${currentStringToken.data}--!'; | 1456 currentStringToken.add('--!'); |
1456 state = commentEndDashState; | 1457 state = commentEndDashState; |
1457 } else if (data == "\u0000") { | 1458 } else if (data == "\u0000") { |
1458 _addToken(new ParseErrorToken("invalid-codepoint")); | 1459 _addToken(new ParseErrorToken("invalid-codepoint")); |
1459 currentStringToken.data = '${currentStringToken.data}--!\uFFFD'; | 1460 currentStringToken.add('--!\uFFFD'); |
1460 state = commentState; | 1461 state = commentState; |
1461 } else if (data == EOF) { | 1462 } else if (data == EOF) { |
1462 _addToken(new ParseErrorToken("eof-in-comment-end-bang-state")); | 1463 _addToken(new ParseErrorToken("eof-in-comment-end-bang-state")); |
1463 _addToken(currentToken); | 1464 _addToken(currentToken); |
1464 state = dataState; | 1465 state = dataState; |
1465 } else { | 1466 } else { |
1466 currentStringToken.data = "${currentStringToken.data}--!${data}"; | 1467 currentStringToken.add('--!').add(data); |
1467 state = commentState; | 1468 state = commentState; |
1468 } | 1469 } |
1469 return true; | 1470 return true; |
1470 } | 1471 } |
1471 | 1472 |
1472 bool doctypeState() { | 1473 bool doctypeState() { |
1473 var data = stream.char(); | 1474 var data = stream.char(); |
1474 if (isWhitespace(data)) { | 1475 if (isWhitespace(data)) { |
1475 state = beforeDoctypeNameState; | 1476 state = beforeDoctypeNameState; |
1476 } else if (data == EOF) { | 1477 } else if (data == EOF) { |
(...skipping 421 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1898 } | 1899 } |
1899 } | 1900 } |
1900 | 1901 |
1901 if (data.length > 0) { | 1902 if (data.length > 0) { |
1902 _addToken(new CharactersToken(data.join())); | 1903 _addToken(new CharactersToken(data.join())); |
1903 } | 1904 } |
1904 state = dataState; | 1905 state = dataState; |
1905 return true; | 1906 return true; |
1906 } | 1907 } |
1907 } | 1908 } |
OLD | NEW |