OLD | NEW |
---|---|
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of scanner; | 5 part of scanner; |
6 | 6 |
7 const int EOF_TOKEN = 0; | 7 const int EOF_TOKEN = 0; |
8 | 8 |
9 const int KEYWORD_TOKEN = $k; | 9 const int KEYWORD_TOKEN = $k; |
10 const int IDENTIFIER_TOKEN = $a; | 10 const int IDENTIFIER_TOKEN = $a; |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
72 const int TILDE_SLASH_TOKEN = TILDE_SLASH_EQ_TOKEN + 1; | 72 const int TILDE_SLASH_TOKEN = TILDE_SLASH_EQ_TOKEN + 1; |
73 const int PERCENT_EQ_TOKEN = TILDE_SLASH_TOKEN + 1; | 73 const int PERCENT_EQ_TOKEN = TILDE_SLASH_TOKEN + 1; |
74 const int GT_GT_TOKEN = PERCENT_EQ_TOKEN + 1; | 74 const int GT_GT_TOKEN = PERCENT_EQ_TOKEN + 1; |
75 const int CARET_EQ_TOKEN = GT_GT_TOKEN + 1; | 75 const int CARET_EQ_TOKEN = GT_GT_TOKEN + 1; |
76 const int COMMENT_TOKEN = CARET_EQ_TOKEN + 1; | 76 const int COMMENT_TOKEN = CARET_EQ_TOKEN + 1; |
77 const int STRING_INTERPOLATION_IDENTIFIER_TOKEN = COMMENT_TOKEN + 1; | 77 const int STRING_INTERPOLATION_IDENTIFIER_TOKEN = COMMENT_TOKEN + 1; |
78 | 78 |
79 /** | 79 /** |
80 * A token that doubles as a linked list. | 80 * A token that doubles as a linked list. |
81 */ | 81 */ |
82 class Token implements Spannable { | 82 abstract class Token implements Spannable { |
83 /** | |
84 * The precedence info for this token. [info] determines the kind and the | |
85 * precedence level of this token. | |
86 */ | |
87 final PrecedenceInfo info; | |
88 | |
89 /** | 83 /** |
90 * The character offset of the start of this token within the source text. | 84 * The character offset of the start of this token within the source text. |
91 */ | 85 */ |
92 final int charOffset; | 86 final int charOffset; |
93 | 87 |
88 Token(this.charOffset); | |
89 | |
94 /** | 90 /** |
95 * The next token in the token stream. | 91 * The next token in the token stream. |
96 */ | 92 */ |
97 Token next; | 93 Token next; |
98 | 94 |
99 Token(this.info, this.charOffset); | 95 /** |
100 | 96 * The precedence info for this token. [info] determines the kind and the |
101 get value => info.value; | 97 * precedence level of this token. |
98 * | |
99 * Defined as getter to save a field in the [KeywordToken] subclass. | |
100 */ | |
101 PrecedenceInfo get info; | |
102 | 102 |
103 /** | 103 /** |
104 * Returns the string value for keywords and symbols. For instance 'class' for | 104 * The string represented by this token, a substring of the source code. |
105 * the [CLASS] keyword token and '*' for a [Token] based on [STAR_INFO]. For | |
106 * other tokens, such identifiers, strings, numbers, etc, [stringValue] | |
107 * returns [:null:]. | |
108 * | 105 * |
109 * [stringValue] should only be used for testing keywords and symbols. | 106 * For [StringToken]s the value includes the quotes, explicit escapes, etc. |
ngeoffray
2013/10/18 10:19:37
the value -> [value]
lukas
2013/10/24 16:48:36
Done.
| |
107 * | |
110 */ | 108 */ |
111 String get stringValue => info.value.stringValue; | 109 String get value; |
110 | |
111 /** | |
112 * For symbol and keyword tokens, returns the string value reprenseted by this | |
ngeoffray
2013/10/18 10:19:37
represented
lukas
2013/10/24 16:48:36
Done.
| |
113 * token. For [StringToken]s this method returns [:null:]. | |
114 * | |
115 * For [SymbolToken]s and [KeywordToken]s, the string value is a compile-time | |
116 * constant originating in the [PrecedenceInfo] or in the [Keyword] instance. | |
117 * This allows testing for keywords and symbols using [:identical:], e.g., | |
118 * [:identical('class', token.value):]. | |
119 * | |
120 * Note that returning [:null:] for string tokens is important to identify | |
121 * symbols and keywords, we cannot use [value] instead. The string literal | |
122 * "$a($b" | |
123 * produces ..., SymbolToken($), StringToken(a), StringToken((), ... | |
124 * | |
125 * After parsing the identifier 'a', the parser tests for a function | |
126 * declaration using [:identical(next.stringValue, '('):], which (rihgtfully) | |
127 * returns false because stringValue returns [:null:]. | |
128 */ | |
129 String get stringValue; | |
112 | 130 |
113 /** | 131 /** |
114 * The kind enum of this token as determined by its [info]. | 132 * The kind enum of this token as determined by its [info]. |
115 */ | 133 */ |
116 int get kind => info.kind; | 134 int get kind => info.kind; |
117 | 135 |
118 /** | 136 /** |
119 * The precedence level for this token. | 137 * The precedence level for this token. |
120 */ | 138 */ |
121 int get precedence => info.precedence; | 139 int get precedence => info.precedence; |
122 | 140 |
123 bool isIdentifier() => identical(kind, IDENTIFIER_TOKEN); | 141 /** |
142 * True if this token is an identifier. Some keywords allowed as identifiers, | |
143 * see implementaiton in [KeywordToken]. | |
ngeoffray
2013/10/18 10:19:37
implementation
lukas
2013/10/24 16:48:36
Done.
| |
144 */ | |
145 bool isIdentifier(); | |
124 | 146 |
125 /** | 147 /** |
126 * Returns a textual representation of this token to be used for debugging | 148 * Returns a textual representation of this token to be used for debugging |
127 * purposes. The resulting string might contain information about the | 149 * purposes. The resulting string might contain information about the |
128 * structure of the token, for example 'StringToken(foo)' for the identifier | 150 * structure of the token, for example 'StringToken(foo)' for the identifier |
129 * token 'foo'. Use [slowToString] for the text actually parsed by the token. | 151 * token 'foo'. |
152 * | |
153 * Use [value] for the text actually parsed by the token. | |
130 */ | 154 */ |
131 String toString() => info.value.toString(); | 155 String toString(); |
132 | |
133 /** | |
134 * The text parsed by this token. | |
135 */ | |
136 String slowToString() => toString(); | |
137 | 156 |
138 /** | 157 /** |
139 * The number of characters parsed by this token. | 158 * The number of characters parsed by this token. |
140 */ | 159 */ |
141 int get slowCharCount { | 160 int get charCount { |
142 if (info == BAD_INPUT_INFO) { | 161 if (info == BAD_INPUT_INFO) { |
143 // This is a token that wraps around an error message. Return 1 | 162 // This is a token that wraps around an error message. Return 1 |
144 // instead of the size of the length of the error message. | 163 // instead of the size of the length of the error message. |
145 return 1; | 164 return 1; |
146 } else { | 165 } else { |
147 return slowToString().length; | 166 return value.length; |
148 } | 167 } |
149 } | 168 } |
150 | 169 |
151 int get hashCode => computeHashCode(charOffset, info, value); | 170 int get hashCode => computeHashCode(charOffset, info, value); |
152 } | 171 } |
153 | 172 |
154 /** | 173 /** |
174 * A symbol token represents the symbol in its precendence info. | |
ngeoffray
2013/10/18 10:19:37
symbol token -> [SymbolToken]
lukas
2013/10/24 16:48:36
Done.
| |
175 * Also used for end of file with EOF_INFO. | |
176 */ | |
177 class SymbolToken extends Token { | |
178 | |
179 final PrecedenceInfo info; | |
180 | |
181 SymbolToken(this.info, int charOffset) : super(charOffset); | |
182 | |
183 String get value => info.value; | |
184 | |
185 String get stringValue => info.value; | |
186 | |
187 bool isIdentifier() => false; | |
188 | |
189 String toString() => "SymbolToken($value)"; | |
190 } | |
191 | |
192 /** | |
193 * A [BeginGroupToken] reprsents a symbol that may be the beginning of | |
ngeoffray
2013/10/18 10:19:37
represents
lukas
2013/10/24 16:48:36
Done.
| |
194 * a pair of brackets, i.e., ( { [ < or ${ | |
195 * The [endGroup] token points to the matching closing bracked in case | |
196 * it can be identified during scanning. | |
197 */ | |
198 class BeginGroupToken extends SymbolToken { | |
199 Token endGroup; | |
200 | |
201 BeginGroupToken(PrecedenceInfo info, int charOffset) | |
202 : super(info, charOffset); | |
ngeoffray
2013/10/18 10:19:37
Fits in one line?
lukas
2013/10/24 16:48:36
No :)
| |
203 } | |
204 | |
205 /** | |
155 * A keyword token. | 206 * A keyword token. |
156 */ | 207 */ |
157 class KeywordToken extends Token { | 208 class KeywordToken extends Token { |
158 final Keyword value; | 209 final Keyword keyword; |
159 String get stringValue => value.syntax; | 210 |
160 | 211 KeywordToken(this.keyword, int charOffset) : super(charOffset); |
161 KeywordToken(Keyword value, int charOffset) | 212 |
162 : this.value = value, super(value.info, charOffset); | 213 PrecedenceInfo get info => keyword.info; |
163 | 214 |
164 bool isIdentifier() => value.isPseudo || value.isBuiltIn; | 215 String get value => keyword.syntax; |
165 | 216 |
166 String toString() => value.syntax; | 217 String get stringValue => keyword.syntax; |
167 } | 218 |
168 | 219 bool isIdentifier() => keyword.isPseudo || keyword.isBuiltIn; |
169 /** | 220 |
170 * A String-valued token. | 221 String toString() => "KeywordToken($value)"; |
222 } | |
223 | |
224 /** | |
225 * A String-valued token. Represents identifiers, string literals, | |
226 * number literals, comments and error tokens, using the corresponding | |
ngeoffray
2013/10/18 10:19:37
comments, and ...
lukas
2013/10/24 16:48:36
Done.
| |
227 * precedence info. | |
171 */ | 228 */ |
172 class StringToken extends Token { | 229 class StringToken extends Token { |
173 final SourceString value; | 230 /** |
174 | 231 * The length threshold above which substring tokens are computed lazily. |
175 StringToken(PrecedenceInfo info, String value, int charOffset) | 232 * |
176 : this.fromSource(info, new SourceString(value), charOffset); | 233 * For string tokens that are substrings of the program source, the actual |
177 | 234 * substring extraction is performed lazily. This is beneficial because |
178 StringToken.fromSource(PrecedenceInfo info, this.value, int charOffset) | 235 * not all scanned code is actually used. For unused parts, the substrings |
179 : super(info, charOffset); | 236 * are never computed and allocated. |
180 | 237 */ |
181 String toString() => "StringToken(${value.slowToString()})"; | 238 static const int LAZY_THRESHOLD = 4; |
sra1
2013/10/22 19:52:31
How did you calculate this threshold?
lukas
2013/10/23 07:11:01
Short strings have a smaller footprint than a Comp
| |
182 | 239 |
183 String slowToString() => value.slowToString(); | 240 var valueOrLazySubstring; |
ngeoffray
2013/10/18 10:19:37
You could put the union type of this field in comm
lukas
2013/10/24 16:48:36
Done.
| |
184 } | 241 |
185 | 242 final PrecedenceInfo info; |
186 abstract class SourceString extends IterableBase<int> { | 243 |
187 const factory SourceString(String string) = StringWrapper; | 244 /** |
188 | 245 * Creates a non-lazy string token. If [canonicalize] is true, the string |
189 static final Map<String, StringWrapper> canonicalizedValues = | 246 * is canonicalized before the token is created. |
190 new Map<String, StringWrapper>(); | 247 */ |
191 | 248 StringToken.fromString(this.info, String value, int charOffset, |
192 factory SourceString.fromSubstring(String string, int begin, int end) { | 249 [bool canonicalize = false]) |
ngeoffray
2013/10/18 10:19:37
Make it a named parameter? Easier when reading cal
lukas
2013/10/24 16:48:36
Done.
| |
193 var substring = string.substring(begin, end); | 250 : valueOrLazySubstring = canonicalizedString(value, canonicalize), |
194 return canonicalizedValues.putIfAbsent( | 251 super(charOffset); |
195 substring, () => new StringWrapper(substring)); | 252 |
196 } | 253 /** |
197 | 254 * Creates a lazy string token. If [canonicalize] is true, the string |
198 void printOn(StringBuffer sb); | 255 * is canonicalized before the token is created. |
199 | 256 */ |
200 /** Gives a [SourceString] that is not including the [initial] first and | 257 StringToken.fromSubstring(this.info, String data, int start, int end, |
201 * [terminal] last characters. This is only intended to be used to remove | 258 int charOffset, [bool canonicalize = false]) |
ngeoffray
2013/10/18 10:19:37
ditto
lukas
2013/10/24 16:48:36
Done.
| |
202 * quotes from string literals (including an initial '@' for raw strings). | 259 : super(charOffset) { |
203 */ | 260 int length = end - start; |
204 SourceString copyWithoutQuotes(int initial, int terminal); | 261 if (length <= LAZY_THRESHOLD) { |
205 | 262 valueOrLazySubstring = canonicalizedString(data.substring(start, end), |
206 String get stringValue; | 263 canonicalize); |
ngeoffray
2013/10/18 10:19:37
indentation.
lukas
2013/10/24 16:48:36
Done.
| |
207 | 264 } else { |
208 String slowToString(); | 265 valueOrLazySubstring = |
209 | 266 new LazySubstring(data, start, length, canonicalize); |
210 bool get isEmpty; | 267 } |
211 | 268 } |
212 bool isPrivate(); | 269 |
213 } | 270 /** |
214 | 271 * Creates a lazy string token. If [asciiOnly] is false, the byte array |
215 class StringWrapper extends IterableBase<int> implements SourceString { | 272 * is passed through a UTF-8 decoder. |
216 final String stringValue; | 273 */ |
217 | 274 StringToken.fromUtf8Bytes(this.info, List<int> data, int start, int end, |
218 const StringWrapper(this.stringValue); | 275 bool asciiOnly, int charOffset) |
219 | 276 : super(charOffset) { |
220 int get hashCode => stringValue.hashCode; | 277 int length = end - start; |
221 | 278 if (length <= LAZY_THRESHOLD) { |
222 bool operator ==(other) { | 279 valueOrLazySubstring = decodeUtf8(data, start, end, asciiOnly); |
223 return other is SourceString && toString() == other.slowToString(); | 280 } else { |
224 } | 281 valueOrLazySubstring = new LazySubstring(data, start, length, asciiOnly); |
225 | 282 } |
226 Iterator<int> get iterator => new StringCodeIterator(stringValue); | 283 } |
227 | 284 |
228 void printOn(StringBuffer sb) { | 285 String get value { |
229 sb.write(stringValue); | 286 if (valueOrLazySubstring is String) { |
230 } | 287 return valueOrLazySubstring; |
231 | 288 } else { |
232 String toString() => stringValue; | 289 assert(valueOrLazySubstring is LazySubstring); |
233 | 290 var data = valueOrLazySubstring.data; |
234 String slowToString() => stringValue; | 291 int start = valueOrLazySubstring.start; |
235 | 292 int end = start + valueOrLazySubstring.length; |
236 SourceString copyWithoutQuotes(int initial, int terminal) { | 293 if (data is String) { |
237 assert(0 <= initial); | 294 valueOrLazySubstring = canonicalizedString( |
238 assert(0 <= terminal); | 295 data.substring(start, end), valueOrLazySubstring.boolValue); |
239 assert(initial + terminal <= stringValue.length); | 296 } else { |
240 return new StringWrapper( | 297 valueOrLazySubstring = decodeUtf8( |
241 stringValue.substring(initial, stringValue.length - terminal)); | 298 data, start, end, valueOrLazySubstring.boolValue); |
242 } | 299 } |
243 | 300 return valueOrLazySubstring; |
244 bool get isEmpty => stringValue.isEmpty; | 301 } |
245 | 302 } |
246 bool isPrivate() => !isEmpty && stringValue.codeUnitAt(0) == $_; | 303 |
247 } | 304 String get stringValue => null; |
248 | 305 |
249 class StringCodeIterator implements Iterator<int> { | 306 bool isIdentifier() => identical(kind, IDENTIFIER_TOKEN); |
250 final String string; | 307 |
251 int index; | 308 String toString() => "StringToken($value)"; |
252 final int end; | 309 |
253 int _current; | 310 static final HashSet<String> canonicalizedSubstrings = |
254 | 311 new HashSet(); |
ngeoffray
2013/10/18 10:19:37
HashSet<String>()
lukas
2013/10/24 16:48:36
Done.
| |
255 StringCodeIterator(String string) : | 312 |
256 this.string = string, index = 0, end = string.length; | 313 static String canonicalizedString(String s, bool canonicalize) { |
257 | 314 if (!canonicalize) return s; |
258 StringCodeIterator.substring(this.string, this.index, this.end) { | 315 var result = canonicalizedSubstrings.lookup(s); |
259 assert(0 <= index); | 316 if (result != null) return result; |
260 assert(index <= end); | 317 canonicalizedSubstrings.add(s); |
261 assert(end <= string.length); | 318 return s; |
262 } | 319 } |
263 | 320 |
264 int get current => _current; | 321 static String decodeUtf8(List<int> data, int start, int end, bool asciiOnly) { |
265 | 322 var s; |
266 bool moveNext() { | 323 if (asciiOnly) { |
267 _current = null; | 324 // getRange returns an iterator, it does not copy the data. |
268 if (index >= end) return false; | 325 s = new String.fromCharCodes(data.getRange(start, end)); |
269 _current = string.codeUnitAt(index++); | 326 } else { |
270 return true; | 327 // TODO(lry): this is measurably slow. Also sublist is copied eagerly. |
271 } | 328 var bytes = data.sublist(start, end); |
272 } | 329 s = UTF8.decode(bytes); |
273 | 330 } |
274 class BeginGroupToken extends StringToken { | 331 return canonicalizedString(s, true); |
275 Token endGroup; | 332 } |
276 BeginGroupToken(PrecedenceInfo info, String value, int charOffset) | 333 } |
277 : super(info, value, charOffset); | 334 |
335 /** | |
336 * This class represents the necessary information to compute a substring | |
337 * lazily. The substring can either originate in a string or in a [:List<int>:] | |
ngeoffray
2013/10/18 10:19:37
originate in -> originate from?
lukas
2013/10/24 16:48:36
Done.
| |
338 * of UTF-8 bytes. | |
339 */ | |
340 abstract class LazySubstring { | |
341 /** The original data, either a string or a List<int> */ | |
342 get data; | |
343 | |
344 int get start; | |
345 int get length; | |
346 | |
347 /** | |
348 * If this substring is based on a String, the boolean indicates wheter the | |
ngeoffray
2013/10/18 10:19:37
the boolean -> [boolValue]
lukas
2013/10/24 16:48:36
Done.
| |
349 * resulting substring should be canonicalized. | |
350 * | |
351 * For substrings based on a byte array, the boolean value is true if the | |
ngeoffray
2013/10/18 10:19:37
ditto
lukas
2013/10/24 16:48:36
Done.
| |
352 * array only holds ASCII characters. The resulting substring will be | |
353 * canonicalized after decoding. | |
354 */ | |
355 bool get boolValue; | |
356 | |
357 LazySubstring.internal(); | |
358 | |
359 factory LazySubstring(data, int start, int length, bool b) { | |
360 // See comment on [CompactLazySubstring]. | |
361 if (start < 0x100000 && length < 0x200) { | |
362 int fields = (start << 9); | |
363 fields = fields | length; | |
364 fields = fields << 1; | |
365 if (b) fields |= 1; | |
366 return new CompactLazySubstring(data, fields); | |
367 } else { | |
368 return new FullLazySubstring(data, start, length, b); | |
369 } | |
370 } | |
371 } | |
372 | |
373 /** | |
374 * This class encodes [start], [length] and [boolValue] in a single | |
375 * 30 bit integer. It uses 20 bits for [start], which covers source files | |
376 * of 1M. [length] has 9 bits, which covers 512 characters. | |
ngeoffray
2013/10/18 10:19:37
1M -> 1MB.
lukas
2013/10/24 16:48:36
Done.
| |
377 * | |
378 * The file html_dart2js.dart is currently around 1M. | |
ngeoffray
2013/10/18 10:19:37
1M -> 1MB
lukas
2013/10/24 16:48:36
Done.
| |
379 */ | |
380 class CompactLazySubstring extends LazySubstring { | |
381 final data; | |
382 final int fields; | |
383 | |
384 CompactLazySubstring(this.data, this.fields) : super.internal(); | |
385 | |
386 int get start => fields >> 10; | |
387 int get length => (fields >> 1) & 0x1ff; | |
388 bool get boolValue => (fields & 1) == 1; | |
389 } | |
390 | |
391 class FullLazySubstring extends LazySubstring { | |
392 final data; | |
393 final int start; | |
394 final int length; | |
395 final bool boolValue; | |
396 FullLazySubstring(this.data, this.start, this.length, this.boolValue) | |
397 : super.internal(); | |
278 } | 398 } |
279 | 399 |
280 bool isUserDefinableOperator(String value) { | 400 bool isUserDefinableOperator(String value) { |
281 return | 401 return |
282 isBinaryOperator(value) || | 402 isBinaryOperator(value) || |
283 isMinusOperator(value) || | 403 isMinusOperator(value) || |
284 isTernaryOperator(value) || | 404 isTernaryOperator(value) || |
285 isUnaryOperator(value); | 405 isUnaryOperator(value); |
286 } | 406 } |
287 | 407 |
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
538 | 658 |
539 const PrecedenceInfo STRING_INTERPOLATION_IDENTIFIER_INFO = | 659 const PrecedenceInfo STRING_INTERPOLATION_IDENTIFIER_INFO = |
540 const PrecedenceInfo('\$', 0, | 660 const PrecedenceInfo('\$', 0, |
541 STRING_INTERPOLATION_IDENTIFIER_TOKEN); | 661 STRING_INTERPOLATION_IDENTIFIER_TOKEN); |
542 | 662 |
543 const PrecedenceInfo HEXADECIMAL_INFO = | 663 const PrecedenceInfo HEXADECIMAL_INFO = |
544 const PrecedenceInfo('hexadecimal', 0, HEXADECIMAL_TOKEN); | 664 const PrecedenceInfo('hexadecimal', 0, HEXADECIMAL_TOKEN); |
545 | 665 |
546 const PrecedenceInfo COMMENT_INFO = | 666 const PrecedenceInfo COMMENT_INFO = |
547 const PrecedenceInfo('comment', 0, COMMENT_TOKEN); | 667 const PrecedenceInfo('comment', 0, COMMENT_TOKEN); |
OLD | NEW |