Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(509)

Side by Side Diff: sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart

Issue 27510003: Scanner for UTF-8 byte arrays (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: fixes compiler tests Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of scanner_implementation; 5 part of scanner;
6 6
7 abstract 7 abstract class ArrayBasedScanner extends AbstractScanner {
8 class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> { 8 ArrayBasedScanner(SourceFile file, bool includeComments)
9 int get charOffset => byteOffset + extraCharOffset; 9 : super(file, includeComments);
10 final Token tokens;
11 Token tail;
12 int tokenStart;
13 int byteOffset;
14 final bool includeComments;
15 10
16 /** Since the input is UTF8, some characters are represented by more 11 /**
17 * than one byte. [extraCharOffset] tracks the difference. */ 12 * The stack of open groups, e.g [: { ... ( .. :]
18 int extraCharOffset; 13 * Each BeginGroupToken has a pointer to the token where the group
14 * ends. This field is set when scanning the end group token.
15 */
19 Link<BeginGroupToken> groupingStack = const Link<BeginGroupToken>(); 16 Link<BeginGroupToken> groupingStack = const Link<BeginGroupToken>();
20 17
21 ArrayBasedScanner(this.includeComments) 18 /**
22 : this.extraCharOffset = 0, 19 * Appends a token whose kind is determined by [info] and content is defined
23 this.tokenStart = -1, 20 * by the String [value].
24 this.byteOffset = -1, 21 *
25 this.tokens = new Token(EOF_INFO, -1) { 22 * This method is invoked for class names, field names, method names, types,
26 this.tail = this.tokens; 23 * etc.
24 */
25 void appendStringToken(PrecedenceInfo info, String value) {
26 tail.next = new StringToken.fromString(info, value, tokenStart, true);
27 tail = tail.next;
27 } 28 }
28 29
29 int advance() { 30 /**
30 int next = nextByte(); 31 * Appends a fixed token whose kind and content is determined by [info].
31 return next; 32 * Appends an *operator* token from [info].
33 *
34 * An operator token represent operators like ':', '.', ';', '&&', '==', '--',
35 * '=>', etc.
36 */
37 void appendPrecedenceToken(PrecedenceInfo info) {
38 tail.next = new SymbolToken(info, tokenStart);
39 tail = tail.next;
32 } 40 }
33 41
42 /**
43 * Appends a fixed token based on whether the current char is [choice] or not.
44 * If the current char is [choice] a fixed token whose kind and content
45 * is determined by [yes] is appended, otherwise a fixed token whose kind
46 * and content is determined by [no] is appended.
47 */
34 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no) { 48 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no) {
35 int next = advance(); 49 int next = advance();
36 if (identical(next, choice)) { 50 if (identical(next, choice)) {
37 appendPrecedenceToken(yes); 51 appendPrecedenceToken(yes);
38 return advance(); 52 return advance();
39 } else { 53 } else {
40 appendPrecedenceToken(no); 54 appendPrecedenceToken(no);
41 return next; 55 return next;
42 } 56 }
43 } 57 }
44 58
45 void appendPrecedenceToken(PrecedenceInfo info) { 59 /**
46 tail.next = new Token(info, tokenStart); 60 * Appends a keyword token whose kind is determined by [keyword].
47 tail = tail.next; 61 */
48 }
49
50 void appendStringToken(PrecedenceInfo info, String value) {
51 tail.next = new StringToken(info, value, tokenStart);
52 tail = tail.next;
53 }
54
55 void appendKeywordToken(Keyword keyword) { 62 void appendKeywordToken(Keyword keyword) {
56 String syntax = keyword.syntax; 63 String syntax = keyword.syntax;
57
58 // Type parameters and arguments cannot contain 'this' or 'super'. 64 // Type parameters and arguments cannot contain 'this' or 'super'.
59 if (identical(syntax, 'this') || identical(syntax, 'super')) discardOpenLt() ; 65 if (identical(syntax, 'this') || identical(syntax, 'super')) {
66 discardOpenLt();
67 }
60 tail.next = new KeywordToken(keyword, tokenStart); 68 tail.next = new KeywordToken(keyword, tokenStart);
61 tail = tail.next; 69 tail = tail.next;
62 } 70 }
63 71
64 void appendEofToken() { 72 void appendEofToken() {
65 tail.next = new Token(EOF_INFO, charOffset); 73 beginToken();
74 tail.next = new SymbolToken(EOF_INFO, tokenStart);
66 tail = tail.next; 75 tail = tail.next;
67 // EOF points to itself so there's always infinite look-ahead. 76 // EOF points to itself so there's always infinite look-ahead.
68 tail.next = tail; 77 tail.next = tail;
69 discardOpenLt(); 78 discardOpenLt();
70 while (!groupingStack.isEmpty) { 79 while (!groupingStack.isEmpty) {
71 unmatchedBeginGroup(groupingStack.head); 80 unmatchedBeginGroup(groupingStack.head);
72 groupingStack = groupingStack.tail; 81 groupingStack = groupingStack.tail;
73 } 82 }
74 } 83 }
75 84
76 void beginToken() { 85 /**
77 tokenStart = charOffset; 86 * Notifies scanning a whitespace character. Note that [appendWhiteSpace] is
87 * not always invoked for [$SPACE] characters.
88 *
89 * This method is used by the scanners to track line breaks and create the
90 * [lineStarts] map.
91 */
92 void appendWhiteSpace(int next) {
93 if (next == $LF && file != null) {
94 lineStarts.add(stringOffset + 1); // +1, the line starts after the $LF.
95 }
78 } 96 }
79 97
80 Token firstToken() { 98 /**
81 return tokens.next; 99 * Notifies on [$LF] characters in multi-line commends or strings.
ngeoffray 2013/10/18 10:19:37 commends -> comments
lukas 2013/10/24 16:48:36 Done.
100 *
101 * This method is used by the scanners to track line breaks and create the
102 * [lineStarts] map.
103 */
104 void lineFeedInMultiline() {
105 if (file != null) {
106 lineStarts.add(stringOffset + 1);
107 }
82 } 108 }
83 109
84 Token previousToken() { 110 /**
85 return tail; 111 * Appends a token that begins a new group, represented by [value].
86 } 112 * Group begin tokens are '{', '(', '[' and '${'.
87 113 */
88 void addToCharOffset(int offset) { 114 void appendBeginGroup(PrecedenceInfo info) {
89 extraCharOffset += offset; 115 Token token = new BeginGroupToken(info, tokenStart);
90 }
91
92 void appendWhiteSpace(int next) {
93 // Do nothing, we don't collect white space.
94 }
95
96 void appendBeginGroup(PrecedenceInfo info, String value) {
97 Token token = new BeginGroupToken(info, value, tokenStart);
98 tail.next = token; 116 tail.next = token;
99 tail = tail.next; 117 tail = tail.next;
118
119 // { ( [ ${ cannot appear inside a type parameters / arguments.
100 if (!identical(info.kind, LT_TOKEN)) discardOpenLt(); 120 if (!identical(info.kind, LT_TOKEN)) discardOpenLt();
101 groupingStack = groupingStack.prepend(token); 121 groupingStack = groupingStack.prepend(token);
102 } 122 }
103 123
104 int appendEndGroup(PrecedenceInfo info, String value, int openKind) { 124 /**
105 assert(!identical(openKind, LT_TOKEN)); 125 * Appends a token that begins a ends group, represented by [value].
ngeoffray 2013/10/18 10:19:37 a ends -> an end
lukas 2013/10/24 16:48:36 Done.
106 appendStringToken(info, value); 126 * It handles the group end tokens '}', ')' and ']'. The tokens '>' and
127 * '>>' are handled separately bo [appendGt] and [appendGtGt].
128 */
129 int appendEndGroup(PrecedenceInfo info, int openKind) {
130 assert(!identical(openKind, LT_TOKEN)); // openKind is < for > and >>
131 appendPrecedenceToken(info);
132 // Don't report unmatched errors for <; it is also the less-than operator.
107 discardOpenLt(); 133 discardOpenLt();
108 if (groupingStack.isEmpty) { 134 if (groupingStack.isEmpty) {
109 return advance(); 135 return advance();
110 } 136 }
111 BeginGroupToken begin = groupingStack.head; 137 BeginGroupToken begin = groupingStack.head;
112 if (!identical(begin.kind, openKind)) { 138 if (!identical(begin.kind, openKind)) {
113 if (!identical(openKind, OPEN_CURLY_BRACKET_TOKEN) || 139 if (!identical(openKind, OPEN_CURLY_BRACKET_TOKEN) ||
114 !identical(begin.kind, STRING_INTERPOLATION_TOKEN)) { 140 !identical(begin.kind, STRING_INTERPOLATION_TOKEN)) {
115 // Not ending string interpolation. 141 // Not ending string interpolation.
116 return error(new SourceString('Unmatched ${begin.stringValue}')); 142 unmatchedBeginGroup(begin);
143 return advance();
117 } 144 }
118 // We're ending an interpolated expression. 145 // We're ending an interpolated expression.
119 begin.endGroup = tail; 146 begin.endGroup = tail;
120 groupingStack = groupingStack.tail; 147 groupingStack = groupingStack.tail;
121 // Using "start-of-text" to signal that we're back in string 148 // Using "start-of-text" to signal that we're back in string
122 // scanning mode. 149 // scanning mode.
123 return $STX; 150 return $STX;
124 } 151 }
125 begin.endGroup = tail; 152 begin.endGroup = tail;
126 groupingStack = groupingStack.tail; 153 groupingStack = groupingStack.tail;
127 return advance(); 154 return advance();
128 } 155 }
129 156
130 void appendGt(PrecedenceInfo info, String value) { 157 /**
131 appendStringToken(info, value); 158 * Appends a token for '>'.
159 * This method does not issue unmatched errors, because > is also the
160 * greater-than operator. It does not necessarily have to close a group.
161 */
162 void appendGt(PrecedenceInfo info) {
163 appendPrecedenceToken(info);
132 if (groupingStack.isEmpty) return; 164 if (groupingStack.isEmpty) return;
133 if (identical(groupingStack.head.kind, LT_TOKEN)) { 165 if (identical(groupingStack.head.kind, LT_TOKEN)) {
134 groupingStack.head.endGroup = tail; 166 groupingStack.head.endGroup = tail;
135 groupingStack = groupingStack.tail;
136 }
137 }
138
139 void appendGtGt(PrecedenceInfo info, String value) {
140 appendStringToken(info, value);
141 if (groupingStack.isEmpty) return;
142 if (identical(groupingStack.head.kind, LT_TOKEN)) {
143 groupingStack = groupingStack.tail;
144 }
145 if (groupingStack.isEmpty) return;
146 if (identical(groupingStack.head.kind, LT_TOKEN)) {
147 groupingStack.head.endGroup = tail;
148 groupingStack = groupingStack.tail; 167 groupingStack = groupingStack.tail;
149 } 168 }
150 } 169 }
151 170
152 void appendGtGtGt(PrecedenceInfo info, String value) { 171 /**
153 appendStringToken(info, value); 172 * Appends a token for '>>'.
173 * This method does not issue unmatched errors, because >> is also the
174 * shift operator. It does not necessarily have to close a group.
175 */
176 void appendGtGt(PrecedenceInfo info) {
177 appendPrecedenceToken(info);
154 if (groupingStack.isEmpty) return; 178 if (groupingStack.isEmpty) return;
155 if (identical(groupingStack.head.kind, LT_TOKEN)) { 179 if (identical(groupingStack.head.kind, LT_TOKEN)) {
180 // Don't assign endGroup: in "T<U<V>>", the '>>' token closes the outer
181 // '<', the inner '<' is left without endGroup.
156 groupingStack = groupingStack.tail; 182 groupingStack = groupingStack.tail;
157 } 183 }
158 if (groupingStack.isEmpty) return; 184 if (groupingStack.isEmpty) return;
159 if (identical(groupingStack.head.kind, LT_TOKEN)) {
160 groupingStack = groupingStack.tail;
161 }
162 if (groupingStack.isEmpty) return;
163 if (identical(groupingStack.head.kind, LT_TOKEN)) { 185 if (identical(groupingStack.head.kind, LT_TOKEN)) {
164 groupingStack.head.endGroup = tail; 186 groupingStack.head.endGroup = tail;
165 groupingStack = groupingStack.tail; 187 groupingStack = groupingStack.tail;
166 } 188 }
167 } 189 }
168 190
169 void appendComment() { 191 void appendComment(start, bool asciiOnly) {
170 if (!includeComments) return; 192 if (!includeComments) return;
171 SourceString value = utf8String(tokenStart, -1); 193 appendSubstringToken(COMMENT_INFO, start, asciiOnly);
172 appendByteStringToken(COMMENT_INFO, value);
173 } 194 }
174 195
196 /**
197 * We call this method to discard '<' from the "grouping" stack
ngeoffray 2013/10/18 10:19:37 Replace 'We' by the actual callers.
lukas 2013/10/24 16:48:36 Done.
198 * (maintained by subclasses).
199 *
200 * [PartialParser.skipExpression] relies on the fact that we do not
201 * create groups for stuff like:
202 * [:a = b < c, d = e > f:].
203 *
204 * In other words, this method is called when the scanner recognizes
205 * something which cannot possibly be part of a type
206 * parameter/argument list.
207 */
175 void discardOpenLt() { 208 void discardOpenLt() {
176 while (!groupingStack.isEmpty 209 while (!groupingStack.isEmpty
177 && identical(groupingStack.head.kind, LT_TOKEN)) { 210 && identical(groupingStack.head.kind, LT_TOKEN)) {
178 groupingStack = groupingStack.tail; 211 groupingStack = groupingStack.tail;
179 } 212 }
180 } 213 }
181 214 }
182 void unmatchedBeginGroup(BeginGroupToken begin);
183 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698