Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(68)

Side by Side Diff: pkg/dart_scanner/lib/src/token.dart

Issue 2621153006: Copy scanner and parser to own packages. (Closed)
Patch Set: Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « pkg/dart_scanner/lib/src/precedence.dart ('k') | pkg/dart_scanner/lib/src/token_constants.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 library dart2js.tokens;
6
7 import 'dart:collection' show HashSet;
8 import 'dart:convert' show UTF8;
9
10 import '../common.dart';
11 import '../util/util.dart' show computeHashCode;
12 import 'keyword.dart' show Keyword;
13 import 'precedence.dart' show PrecedenceInfo;
14 import 'precedence_constants.dart' as Precedence show BAD_INPUT_INFO;
15 import 'token_constants.dart' as Tokens show IDENTIFIER_TOKEN;
16
17 /**
18 * A token that doubles as a linked list.
19 */
20 abstract class Token implements Spannable {
21 /**
22 * The character offset of the start of this token within the source text.
23 */
24 final int charOffset;
25
26 Token(this.charOffset);
27
28 /**
29 * The next token in the token stream.
30 */
31 Token next;
32
33 /**
34 * The precedence info for this token. [info] determines the kind and the
35 * precedence level of this token.
36 *
37 * Defined as getter to save a field in the [KeywordToken] subclass.
38 */
39 PrecedenceInfo get info;
40
41 /**
42 * The string represented by this token, a substring of the source code.
43 *
44 * For [StringToken]s the [value] includes the quotes, explicit escapes, etc.
45 */
46 String get value;
47
48 /**
49 * For symbol and keyword tokens, returns the string value represented by this
50 * token. For [StringToken]s this method returns [:null:].
51 *
52 * For [SymbolToken]s and [KeywordToken]s, the string value is a compile-time
53 * constant originating in the [PrecedenceInfo] or in the [Keyword] instance.
54 * This allows testing for keywords and symbols using [:identical:], e.g.,
55 * [:identical('class', token.value):].
56 *
57 * Note that returning [:null:] for string tokens is important to identify
58 * symbols and keywords, we cannot use [value] instead. The string literal
59 * "$a($b"
60 * produces ..., SymbolToken($), StringToken(a), StringToken((), ...
61 *
62 * After parsing the identifier 'a', the parser tests for a function
63 * declaration using [:identical(next.stringValue, '('):], which (rightfully)
64 * returns false because stringValue returns [:null:].
65 */
66 String get stringValue;
67
68 /**
69 * The kind enum of this token as determined by its [info].
70 */
71 int get kind => info.kind;
72
73 /**
74 * The precedence level for this token.
75 */
76 int get precedence => info.precedence;
77
78 /**
79 * True if this token is an identifier. Some keywords allowed as identifiers,
80 * see implementation in [KeywordToken].
81 */
82 bool isIdentifier();
83
84 /**
85 * Returns a textual representation of this token to be used for debugging
86 * purposes. The resulting string might contain information about the
87 * structure of the token, for example 'StringToken(foo)' for the identifier
88 * token 'foo'.
89 *
90 * Use [value] for the text actually parsed by the token.
91 */
92 String toString();
93
94 /**
95 * The number of characters parsed by this token.
96 */
97 int get charCount {
98 if (info == Precedence.BAD_INPUT_INFO) {
99 // This is a token that wraps around an error message. Return 1
100 // instead of the size of the length of the error message.
101 return 1;
102 } else {
103 return value.length;
104 }
105 }
106
107 /// The character offset of the end of this token within the source text.
108 int get charEnd => charOffset + charCount;
109
110 int get hashCode => computeHashCode(charOffset, info, value);
111 }
112
113 /// A pair of tokens marking the beginning and the end of a span. Use for error
114 /// reporting.
115 class TokenPair implements Spannable {
116 final Token begin;
117 final Token end;
118
119 TokenPair(this.begin, this.end);
120 }
121
122 /**
123 * A [SymbolToken] represents the symbol in its precendence info.
124 * Also used for end of file with EOF_INFO.
125 */
126 class SymbolToken extends Token {
127 final PrecedenceInfo info;
128
129 SymbolToken(this.info, int charOffset) : super(charOffset);
130
131 String get value => info.value;
132
133 String get stringValue => info.value;
134
135 bool isIdentifier() => false;
136
137 String toString() => "SymbolToken($value)";
138 }
139
140 /**
141 * A [BeginGroupToken] represents a symbol that may be the beginning of
142 * a pair of brackets, i.e., ( { [ < or ${
143 * The [endGroup] token points to the matching closing bracked in case
144 * it can be identified during scanning.
145 */
146 class BeginGroupToken extends SymbolToken {
147 Token endGroup;
148
149 BeginGroupToken(PrecedenceInfo info, int charOffset)
150 : super(info, charOffset);
151 }
152
153 /**
154 * A keyword token.
155 */
156 class KeywordToken extends Token {
157 final Keyword keyword;
158
159 KeywordToken(this.keyword, int charOffset) : super(charOffset);
160
161 PrecedenceInfo get info => keyword.info;
162
163 String get value => keyword.syntax;
164
165 String get stringValue => keyword.syntax;
166
167 bool isIdentifier() => keyword.isPseudo || keyword.isBuiltIn;
168
169 String toString() => "KeywordToken($value)";
170 }
171
172 abstract class ErrorToken extends Token {
173 ErrorToken(int charOffset) : super(charOffset);
174
175 PrecedenceInfo get info => Precedence.BAD_INPUT_INFO;
176
177 String get value {
178 throw new SpannableAssertionFailure(this, assertionMessage);
179 }
180
181 String get stringValue => null;
182
183 bool isIdentifier() => false;
184
185 String get assertionMessage;
186 }
187
188 class BadInputToken extends ErrorToken {
189 final int character;
190
191 BadInputToken(this.character, int charOffset) : super(charOffset);
192
193 String toString() => "BadInputToken($character)";
194
195 String get assertionMessage {
196 return 'Character U+${character.toRadixString(16)} not allowed here.';
197 }
198 }
199
200 class UnterminatedToken extends ErrorToken {
201 final String start;
202 final int endOffset;
203
204 UnterminatedToken(this.start, int charOffset, this.endOffset)
205 : super(charOffset);
206
207 String toString() => "UnterminatedToken($start)";
208
209 String get assertionMessage => "'$start' isn't terminated.";
210
211 int get charCount => endOffset - charOffset;
212 }
213
214 class UnmatchedToken extends ErrorToken {
215 final BeginGroupToken begin;
216
217 UnmatchedToken(BeginGroupToken begin)
218 : this.begin = begin,
219 super(begin.charOffset);
220
221 String toString() => "UnmatchedToken(${begin.value})";
222
223 String get assertionMessage => "'$begin' isn't closed.";
224 }
225
226 /**
227 * A String-valued token. Represents identifiers, string literals,
228 * number literals, comments, and error tokens, using the corresponding
229 * precedence info.
230 */
231 class StringToken extends Token {
232 /**
233 * The length threshold above which substring tokens are computed lazily.
234 *
235 * For string tokens that are substrings of the program source, the actual
236 * substring extraction is performed lazily. This is beneficial because
237 * not all scanned code is actually used. For unused parts, the substrings
238 * are never computed and allocated.
239 */
240 static const int LAZY_THRESHOLD = 4;
241
242 var /* String | LazySubtring */ valueOrLazySubstring;
243
244 final PrecedenceInfo info;
245
246 /**
247 * Creates a non-lazy string token. If [canonicalize] is true, the string
248 * is canonicalized before the token is created.
249 */
250 StringToken.fromString(this.info, String value, int charOffset,
251 {bool canonicalize: false})
252 : valueOrLazySubstring = canonicalizedString(value, canonicalize),
253 super(charOffset);
254
255 /**
256 * Creates a lazy string token. If [canonicalize] is true, the string
257 * is canonicalized before the token is created.
258 */
259 StringToken.fromSubstring(
260 this.info, String data, int start, int end, int charOffset,
261 {bool canonicalize: false})
262 : super(charOffset) {
263 int length = end - start;
264 if (length <= LAZY_THRESHOLD) {
265 valueOrLazySubstring =
266 canonicalizedString(data.substring(start, end), canonicalize);
267 } else {
268 valueOrLazySubstring =
269 new LazySubstring(data, start, length, canonicalize);
270 }
271 }
272
273 /**
274 * Creates a lazy string token. If [asciiOnly] is false, the byte array
275 * is passed through a UTF-8 decoder.
276 */
277 StringToken.fromUtf8Bytes(this.info, List<int> data, int start, int end,
278 bool asciiOnly, int charOffset)
279 : super(charOffset) {
280 int length = end - start;
281 if (length <= LAZY_THRESHOLD) {
282 valueOrLazySubstring = decodeUtf8(data, start, end, asciiOnly);
283 } else {
284 valueOrLazySubstring = new LazySubstring(data, start, length, asciiOnly);
285 }
286 }
287
288 String get value {
289 if (valueOrLazySubstring is String) {
290 return valueOrLazySubstring;
291 } else {
292 assert(valueOrLazySubstring is LazySubstring);
293 var data = valueOrLazySubstring.data;
294 int start = valueOrLazySubstring.start;
295 int end = start + valueOrLazySubstring.length;
296 if (data is String) {
297 valueOrLazySubstring = canonicalizedString(
298 data.substring(start, end), valueOrLazySubstring.boolValue);
299 } else {
300 valueOrLazySubstring =
301 decodeUtf8(data, start, end, valueOrLazySubstring.boolValue);
302 }
303 return valueOrLazySubstring;
304 }
305 }
306
307 /// See [Token.stringValue] for an explanation.
308 String get stringValue => null;
309
310 bool isIdentifier() => identical(kind, Tokens.IDENTIFIER_TOKEN);
311
312 String toString() => "StringToken($value)";
313
314 static final HashSet<String> canonicalizedSubstrings = new HashSet<String>();
315
316 static String canonicalizedString(String s, bool canonicalize) {
317 if (!canonicalize) return s;
318 var result = canonicalizedSubstrings.lookup(s);
319 if (result != null) return result;
320 canonicalizedSubstrings.add(s);
321 return s;
322 }
323
324 static String decodeUtf8(List<int> data, int start, int end, bool asciiOnly) {
325 var s;
326 if (asciiOnly) {
327 s = new String.fromCharCodes(data, start, end);
328 } else {
329 s = UTF8.decoder.convert(data, start, end);
330 }
331 return canonicalizedString(s, true);
332 }
333 }
334
335 /**
336 * This class represents the necessary information to compute a substring
337 * lazily. The substring can either originate from a string or from
338 * a [:List<int>:] of UTF-8 bytes.
339 */
340 abstract class LazySubstring {
341 /** The original data, either a string or a List<int> */
342 get data;
343
344 int get start;
345 int get length;
346
347 /**
348 * If this substring is based on a String, the [boolValue] indicates wheter
349 * the resulting substring should be canonicalized.
350 *
351 * For substrings based on a byte array, the [boolValue] is true if the
352 * array only holds ASCII characters. The resulting substring will be
353 * canonicalized after decoding.
354 */
355 bool get boolValue;
356
357 LazySubstring.internal();
358
359 factory LazySubstring(data, int start, int length, bool b) {
360 // See comment on [CompactLazySubstring].
361 if (start < 0x100000 && length < 0x200) {
362 int fields = (start << 9);
363 fields = fields | length;
364 fields = fields << 1;
365 if (b) fields |= 1;
366 return new CompactLazySubstring(data, fields);
367 } else {
368 return new FullLazySubstring(data, start, length, b);
369 }
370 }
371 }
372
373 /**
374 * This class encodes [start], [length] and [boolValue] in a single
375 * 30 bit integer. It uses 20 bits for [start], which covers source files
376 * of 1MB. [length] has 9 bits, which covers 512 characters.
377 *
378 * The file html_dart2js.dart is currently around 1MB.
379 */
380 class CompactLazySubstring extends LazySubstring {
381 final data;
382 final int fields;
383
384 CompactLazySubstring(this.data, this.fields) : super.internal();
385
386 int get start => fields >> 10;
387 int get length => (fields >> 1) & 0x1ff;
388 bool get boolValue => (fields & 1) == 1;
389 }
390
391 class FullLazySubstring extends LazySubstring {
392 final data;
393 final int start;
394 final int length;
395 final bool boolValue;
396 FullLazySubstring(this.data, this.start, this.length, this.boolValue)
397 : super.internal();
398 }
399
400 bool isUserDefinableOperator(String value) {
401 return isBinaryOperator(value) ||
402 isMinusOperator(value) ||
403 isTernaryOperator(value) ||
404 isUnaryOperator(value);
405 }
406
407 bool isUnaryOperator(String value) => value == '~';
408
409 bool isBinaryOperator(String value) {
410 return value == '==' ||
411 value == '[]' ||
412 value == '*' ||
413 value == '/' ||
414 value == '%' ||
415 value == '~/' ||
416 value == '+' ||
417 value == '<<' ||
418 value == '>>' ||
419 value == '>=' ||
420 value == '>' ||
421 value == '<=' ||
422 value == '<' ||
423 value == '&' ||
424 value == '^' ||
425 value == '|';
426 }
427
428 bool isTernaryOperator(String value) => value == '[]=';
429
430 bool isMinusOperator(String value) => value == '-';
OLDNEW
« no previous file with comments | « pkg/dart_scanner/lib/src/precedence.dart ('k') | pkg/dart_scanner/lib/src/token_constants.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698