OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 library dart2js.scanner; | 5 library dart2js.scanner; |
6 | 6 |
7 import '../io/source_file.dart' show | 7 import '../io/source_file.dart' show SourceFile, Utf8BytesSourceFile; |
8 SourceFile, | 8 import '../tokens/keyword.dart' show Keyword, KeywordState; |
9 Utf8BytesSourceFile; | |
10 import '../tokens/keyword.dart' show | |
11 Keyword, | |
12 KeywordState; | |
13 import '../tokens/precedence.dart'; | 9 import '../tokens/precedence.dart'; |
14 import '../tokens/precedence_constants.dart'; | 10 import '../tokens/precedence_constants.dart'; |
15 import '../tokens/token.dart'; | 11 import '../tokens/token.dart'; |
16 import '../tokens/token_constants.dart'; | 12 import '../tokens/token_constants.dart'; |
17 import '../util/characters.dart'; | 13 import '../util/characters.dart'; |
18 | 14 |
19 import 'string_scanner.dart' show | 15 import 'string_scanner.dart' show StringScanner; |
20 StringScanner; | 16 import 'utf8_bytes_scanner.dart' show Utf8BytesScanner; |
21 import 'utf8_bytes_scanner.dart' show | |
22 Utf8BytesScanner; | |
23 | |
24 | 17 |
25 abstract class Scanner { | 18 abstract class Scanner { |
26 Token tokenize(); | 19 Token tokenize(); |
27 | 20 |
28 factory Scanner(SourceFile file, | 21 factory Scanner(SourceFile file, {bool includeComments: false}) { |
29 {bool includeComments: false}) { | |
30 if (file is Utf8BytesSourceFile) { | 22 if (file is Utf8BytesSourceFile) { |
31 return new Utf8BytesScanner(file, includeComments: includeComments); | 23 return new Utf8BytesScanner(file, includeComments: includeComments); |
32 } else { | 24 } else { |
33 return new StringScanner(file, includeComments: includeComments); | 25 return new StringScanner(file, includeComments: includeComments); |
34 } | 26 } |
35 } | 27 } |
36 } | 28 } |
37 | 29 |
38 abstract class AbstractScanner implements Scanner { | 30 abstract class AbstractScanner implements Scanner { |
39 // TODO(ahe): Move this class to implementation. | 31 // TODO(ahe): Move this class to implementation. |
(...skipping 25 matching lines...) Expand all Loading... |
65 | 57 |
66 /** | 58 /** |
67 * The source file that is being scanned. This field can be [:null:]. | 59 * The source file that is being scanned. This field can be [:null:]. |
68 * If the source file is available, the scanner assigns its [:lineStarts:] and | 60 * If the source file is available, the scanner assigns its [:lineStarts:] and |
69 * [:length:] fields at the end of [tokenize]. | 61 * [:length:] fields at the end of [tokenize]. |
70 */ | 62 */ |
71 final SourceFile file; | 63 final SourceFile file; |
72 | 64 |
73 final List<int> lineStarts = <int>[0]; | 65 final List<int> lineStarts = <int>[0]; |
74 | 66 |
75 AbstractScanner( | 67 AbstractScanner(this.file, this.includeComments) { |
76 this.file, this.includeComments) { | |
77 this.tail = this.tokens; | 68 this.tail = this.tokens; |
78 } | 69 } |
79 | 70 |
80 /** | 71 /** |
81 * Advances and returns the next character. | 72 * Advances and returns the next character. |
82 * | 73 * |
83 * If the next character is non-ASCII, then the returned value depends on the | 74 * If the next character is non-ASCII, then the returned value depends on the |
84 * scanner implementation. The [Utf8BytesScanner] returns a UTF-8 byte, while | 75 * scanner implementation. The [Utf8BytesScanner] returns a UTF-8 byte, while |
85 * the [StringScanner] returns a UTF-16 code unit. | 76 * the [StringScanner] returns a UTF-16 code unit. |
86 * | 77 * |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
159 | 150 |
160 /** | 151 /** |
161 * Appends a substring from the scan offset [:start:] to the current | 152 * Appends a substring from the scan offset [:start:] to the current |
162 * [:scanOffset:] plus the [:extraOffset:]. For example, if the current | 153 * [:scanOffset:] plus the [:extraOffset:]. For example, if the current |
163 * scanOffset is 10, then [:appendSubstringToken(5, -1):] will append the | 154 * scanOffset is 10, then [:appendSubstringToken(5, -1):] will append the |
164 * substring string [5,9). | 155 * substring string [5,9). |
165 * | 156 * |
166 * Note that [extraOffset] can only be used if the covered character(s) are | 157 * Note that [extraOffset] can only be used if the covered character(s) are |
167 * known to be ASCII. | 158 * known to be ASCII. |
168 */ | 159 */ |
169 void appendSubstringToken(PrecedenceInfo info, int start, | 160 void appendSubstringToken(PrecedenceInfo info, int start, bool asciiOnly, |
170 bool asciiOnly, [int extraOffset]); | 161 [int extraOffset]); |
171 | 162 |
172 /** Documentation in subclass [ArrayBasedScanner]. */ | 163 /** Documentation in subclass [ArrayBasedScanner]. */ |
173 void appendPrecedenceToken(PrecedenceInfo info); | 164 void appendPrecedenceToken(PrecedenceInfo info); |
174 | 165 |
175 /** Documentation in subclass [ArrayBasedScanner]. */ | 166 /** Documentation in subclass [ArrayBasedScanner]. */ |
176 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no); | 167 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no); |
177 | 168 |
178 /** Documentation in subclass [ArrayBasedScanner]. */ | 169 /** Documentation in subclass [ArrayBasedScanner]. */ |
179 void appendKeywordToken(Keyword keyword); | 170 void appendKeywordToken(Keyword keyword); |
180 | 171 |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
229 // One additional line start at the end, see [SourceFile.lineStarts]. | 220 // One additional line start at the end, see [SourceFile.lineStarts]. |
230 lineStarts.add(stringOffset + 1); | 221 lineStarts.add(stringOffset + 1); |
231 file.lineStarts = lineStarts; | 222 file.lineStarts = lineStarts; |
232 } | 223 } |
233 | 224 |
234 return firstToken(); | 225 return firstToken(); |
235 } | 226 } |
236 | 227 |
237 int bigSwitch(int next) { | 228 int bigSwitch(int next) { |
238 beginToken(); | 229 beginToken(); |
239 if (identical(next, $SPACE) || identical(next, $TAB) | 230 if (identical(next, $SPACE) || |
240 || identical(next, $LF) || identical(next, $CR)) { | 231 identical(next, $TAB) || |
| 232 identical(next, $LF) || |
| 233 identical(next, $CR)) { |
241 appendWhiteSpace(next); | 234 appendWhiteSpace(next); |
242 next = advance(); | 235 next = advance(); |
243 // Sequences of spaces are common, so advance through them fast. | 236 // Sequences of spaces are common, so advance through them fast. |
244 while (identical(next, $SPACE)) { | 237 while (identical(next, $SPACE)) { |
245 // We don't invoke [:appendWhiteSpace(next):] here for efficiency, | 238 // We don't invoke [:appendWhiteSpace(next):] here for efficiency, |
246 // assuming that it does not do anything for space characters. | 239 // assuming that it does not do anything for space characters. |
247 next = advance(); | 240 next = advance(); |
248 } | 241 } |
249 return next; | 242 return next; |
250 } | 243 } |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
347 // Type parameters and arguments cannot contain semicolon. | 340 // Type parameters and arguments cannot contain semicolon. |
348 discardOpenLt(); | 341 discardOpenLt(); |
349 return advance(); | 342 return advance(); |
350 } | 343 } |
351 | 344 |
352 if (identical(next, $QUESTION)) { | 345 if (identical(next, $QUESTION)) { |
353 return tokenizeQuestion(next); | 346 return tokenizeQuestion(next); |
354 } | 347 } |
355 | 348 |
356 if (identical(next, $CLOSE_SQUARE_BRACKET)) { | 349 if (identical(next, $CLOSE_SQUARE_BRACKET)) { |
357 return appendEndGroup(CLOSE_SQUARE_BRACKET_INFO, | 350 return appendEndGroup( |
358 OPEN_SQUARE_BRACKET_TOKEN); | 351 CLOSE_SQUARE_BRACKET_INFO, OPEN_SQUARE_BRACKET_TOKEN); |
359 } | 352 } |
360 | 353 |
361 if (identical(next, $BACKPING)) { | 354 if (identical(next, $BACKPING)) { |
362 appendPrecedenceToken(BACKPING_INFO); | 355 appendPrecedenceToken(BACKPING_INFO); |
363 return advance(); | 356 return advance(); |
364 } | 357 } |
365 | 358 |
366 if (identical(next, $OPEN_CURLY_BRACKET)) { | 359 if (identical(next, $OPEN_CURLY_BRACKET)) { |
367 appendBeginGroup(OPEN_CURLY_BRACKET_INFO); | 360 appendBeginGroup(OPEN_CURLY_BRACKET_INFO); |
368 return advance(); | 361 return advance(); |
369 } | 362 } |
370 | 363 |
371 if (identical(next, $CLOSE_CURLY_BRACKET)) { | 364 if (identical(next, $CLOSE_CURLY_BRACKET)) { |
372 return appendEndGroup(CLOSE_CURLY_BRACKET_INFO, | 365 return appendEndGroup(CLOSE_CURLY_BRACKET_INFO, OPEN_CURLY_BRACKET_TOKEN); |
373 OPEN_CURLY_BRACKET_TOKEN); | |
374 } | 366 } |
375 | 367 |
376 if (identical(next, $SLASH)) { | 368 if (identical(next, $SLASH)) { |
377 return tokenizeSlashOrComment(next); | 369 return tokenizeSlashOrComment(next); |
378 } | 370 } |
379 | 371 |
380 if (identical(next, $AT)) { | 372 if (identical(next, $AT)) { |
381 return tokenizeAt(next); | 373 return tokenizeAt(next); |
382 } | 374 } |
383 | 375 |
384 if (identical(next, $DQ) || identical(next, $SQ)) { | 376 if (identical(next, $DQ) || identical(next, $SQ)) { |
385 return tokenizeString(next, scanOffset, false); | 377 return tokenizeString(next, scanOffset, false); |
386 } | 378 } |
387 | 379 |
388 if (identical(next, $PERIOD)) { | 380 if (identical(next, $PERIOD)) { |
389 return tokenizeDotsOrNumber(next); | 381 return tokenizeDotsOrNumber(next); |
390 } | 382 } |
391 | 383 |
392 if (identical(next, $0)) { | 384 if (identical(next, $0)) { |
393 return tokenizeHexOrNumber(next); | 385 return tokenizeHexOrNumber(next); |
394 } | 386 } |
395 | 387 |
396 // TODO(ahe): Would a range check be faster? | 388 // TODO(ahe): Would a range check be faster? |
397 if (identical(next, $1) || identical(next, $2) || identical(next, $3) | 389 if (identical(next, $1) || |
398 || identical(next, $4) || identical(next, $5) || identical(next, $6) | 390 identical(next, $2) || |
399 || identical(next, $7) || identical(next, $8) || identical(next, $9)) { | 391 identical(next, $3) || |
| 392 identical(next, $4) || |
| 393 identical(next, $5) || |
| 394 identical(next, $6) || |
| 395 identical(next, $7) || |
| 396 identical(next, $8) || |
| 397 identical(next, $9)) { |
400 return tokenizeNumber(next); | 398 return tokenizeNumber(next); |
401 } | 399 } |
402 | 400 |
403 if (identical(next, $EOF)) { | 401 if (identical(next, $EOF)) { |
404 return $EOF; | 402 return $EOF; |
405 } | 403 } |
406 if (next < 0x1f) { | 404 if (next < 0x1f) { |
407 return unexpected(next); | 405 return unexpected(next); |
408 } | 406 } |
409 | 407 |
(...skipping 12 matching lines...) Expand all Loading... |
422 int tokenizeTag(int next) { | 420 int tokenizeTag(int next) { |
423 // # or #!.*[\n\r] | 421 // # or #!.*[\n\r] |
424 if (scanOffset == 0) { | 422 if (scanOffset == 0) { |
425 if (identical(peek(), $BANG)) { | 423 if (identical(peek(), $BANG)) { |
426 int start = scanOffset + 1; | 424 int start = scanOffset + 1; |
427 bool asciiOnly = true; | 425 bool asciiOnly = true; |
428 do { | 426 do { |
429 next = advance(); | 427 next = advance(); |
430 if (next > 127) asciiOnly = false; | 428 if (next > 127) asciiOnly = false; |
431 } while (!identical(next, $LF) && | 429 } while (!identical(next, $LF) && |
432 !identical(next, $CR) && | 430 !identical(next, $CR) && |
433 !identical(next, $EOF)); | 431 !identical(next, $EOF)); |
434 if (!asciiOnly) handleUnicode(start); | 432 if (!asciiOnly) handleUnicode(start); |
435 return next; | 433 return next; |
436 } | 434 } |
437 } | 435 } |
438 appendPrecedenceToken(HASH_INFO); | 436 appendPrecedenceToken(HASH_INFO); |
439 return advance(); | 437 return advance(); |
440 } | 438 } |
441 | 439 |
442 int tokenizeTilde(int next) { | 440 int tokenizeTilde(int next) { |
443 // ~ ~/ ~/= | 441 // ~ ~/ ~/= |
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
648 } | 646 } |
649 return tokenizeNumber(next); | 647 return tokenizeNumber(next); |
650 } | 648 } |
651 | 649 |
652 int tokenizeHex(int next) { | 650 int tokenizeHex(int next) { |
653 int start = scanOffset; | 651 int start = scanOffset; |
654 next = advance(); // Advance past the $x or $X. | 652 next = advance(); // Advance past the $x or $X. |
655 bool hasDigits = false; | 653 bool hasDigits = false; |
656 while (true) { | 654 while (true) { |
657 next = advance(); | 655 next = advance(); |
658 if (($0 <= next && next <= $9) | 656 if (($0 <= next && next <= $9) || |
659 || ($A <= next && next <= $F) | 657 ($A <= next && next <= $F) || |
660 || ($a <= next && next <= $f)) { | 658 ($a <= next && next <= $f)) { |
661 hasDigits = true; | 659 hasDigits = true; |
662 } else { | 660 } else { |
663 if (!hasDigits) { | 661 if (!hasDigits) { |
664 unterminated('0x', shouldAdvance: false); | 662 unterminated('0x', shouldAdvance: false); |
665 return next; | 663 return next; |
666 } | 664 } |
667 appendSubstringToken(HEXADECIMAL_INFO, start, true); | 665 appendSubstringToken(HEXADECIMAL_INFO, start, true); |
668 return next; | 666 return next; |
669 } | 667 } |
670 } | 668 } |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
759 identical($CR, next) || | 757 identical($CR, next) || |
760 identical($EOF, next)) { | 758 identical($EOF, next)) { |
761 if (!asciiOnly) handleUnicode(start); | 759 if (!asciiOnly) handleUnicode(start); |
762 appendComment(start, asciiOnly); | 760 appendComment(start, asciiOnly); |
763 return next; | 761 return next; |
764 } | 762 } |
765 } | 763 } |
766 return null; | 764 return null; |
767 } | 765 } |
768 | 766 |
769 | |
770 int tokenizeMultiLineComment(int next, int start) { | 767 int tokenizeMultiLineComment(int next, int start) { |
771 bool asciiOnlyComment = true; // Track if the entire comment is ASCII. | 768 bool asciiOnlyComment = true; // Track if the entire comment is ASCII. |
772 bool asciiOnlyLines = true; // Track ASCII since the last handleUnicode. | 769 bool asciiOnlyLines = true; // Track ASCII since the last handleUnicode. |
773 int unicodeStart = start; | 770 int unicodeStart = start; |
774 int nesting = 1; | 771 int nesting = 1; |
775 next = advance(); | 772 next = advance(); |
776 while (true) { | 773 while (true) { |
777 if (identical($EOF, next)) { | 774 if (identical($EOF, next)) { |
778 if (!asciiOnlyLines) handleUnicode(unicodeStart); | 775 if (!asciiOnlyLines) handleUnicode(unicodeStart); |
779 unterminated('/*'); | 776 unterminated('/*'); |
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
917 while (!identical(next, quoteChar)) { | 914 while (!identical(next, quoteChar)) { |
918 if (identical(next, $BACKSLASH)) { | 915 if (identical(next, $BACKSLASH)) { |
919 next = advance(); | 916 next = advance(); |
920 } else if (identical(next, $$)) { | 917 } else if (identical(next, $$)) { |
921 if (!asciiOnly) handleUnicode(start); | 918 if (!asciiOnly) handleUnicode(start); |
922 next = tokenizeStringInterpolation(start, asciiOnly); | 919 next = tokenizeStringInterpolation(start, asciiOnly); |
923 start = scanOffset; | 920 start = scanOffset; |
924 asciiOnly = true; | 921 asciiOnly = true; |
925 continue; | 922 continue; |
926 } | 923 } |
927 if (next <= $CR | 924 if (next <= $CR && |
928 && (identical(next, $LF) || | 925 (identical(next, $LF) || |
929 identical(next, $CR) || | 926 identical(next, $CR) || |
930 identical(next, $EOF))) { | 927 identical(next, $EOF))) { |
931 if (!asciiOnly) handleUnicode(start); | 928 if (!asciiOnly) handleUnicode(start); |
932 return unterminatedString(quoteChar); | 929 return unterminatedString(quoteChar); |
933 } | 930 } |
934 if (next > 127) asciiOnly = false; | 931 if (next > 127) asciiOnly = false; |
935 next = advance(); | 932 next = advance(); |
936 } | 933 } |
937 if (!asciiOnly) handleUnicode(start); | 934 if (!asciiOnly) handleUnicode(start); |
938 // Advance past the quote character. | 935 // Advance past the quote character. |
(...skipping 14 matching lines...) Expand all Loading... |
953 } | 950 } |
954 | 951 |
955 int tokenizeInterpolatedExpression(int next) { | 952 int tokenizeInterpolatedExpression(int next) { |
956 appendBeginGroup(STRING_INTERPOLATION_INFO); | 953 appendBeginGroup(STRING_INTERPOLATION_INFO); |
957 beginToken(); // The expression starts here. | 954 beginToken(); // The expression starts here. |
958 next = advance(); // Move past the curly bracket. | 955 next = advance(); // Move past the curly bracket. |
959 while (!identical(next, $EOF) && !identical(next, $STX)) { | 956 while (!identical(next, $EOF) && !identical(next, $STX)) { |
960 next = bigSwitch(next); | 957 next = bigSwitch(next); |
961 } | 958 } |
962 if (identical(next, $EOF)) return next; | 959 if (identical(next, $EOF)) return next; |
963 next = advance(); // Move past the $STX. | 960 next = advance(); // Move past the $STX. |
964 beginToken(); // The string interpolation suffix starts here. | 961 beginToken(); // The string interpolation suffix starts here. |
965 return next; | 962 return next; |
966 } | 963 } |
967 | 964 |
968 int tokenizeInterpolatedIdentifier(int next) { | 965 int tokenizeInterpolatedIdentifier(int next) { |
969 appendPrecedenceToken(STRING_INTERPOLATION_IDENTIFIER_INFO); | 966 appendPrecedenceToken(STRING_INTERPOLATION_IDENTIFIER_INFO); |
970 | 967 |
971 if ($a <= next && next <= $z) { | 968 if ($a <= next && next <= $z) { |
972 beginToken(); // The identifier starts here. | 969 beginToken(); // The identifier starts here. |
973 next = tokenizeKeywordOrIdentifier(next, false); | 970 next = tokenizeKeywordOrIdentifier(next, false); |
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1180 | 1177 |
1181 PrecedenceInfo closeBraceInfoFor(BeginGroupToken begin) { | 1178 PrecedenceInfo closeBraceInfoFor(BeginGroupToken begin) { |
1182 return const { | 1179 return const { |
1183 '(': CLOSE_PAREN_INFO, | 1180 '(': CLOSE_PAREN_INFO, |
1184 '[': CLOSE_SQUARE_BRACKET_INFO, | 1181 '[': CLOSE_SQUARE_BRACKET_INFO, |
1185 '{': CLOSE_CURLY_BRACKET_INFO, | 1182 '{': CLOSE_CURLY_BRACKET_INFO, |
1186 '<': GT_INFO, | 1183 '<': GT_INFO, |
1187 r'${': CLOSE_CURLY_BRACKET_INFO, | 1184 r'${': CLOSE_CURLY_BRACKET_INFO, |
1188 }[begin.value]; | 1185 }[begin.value]; |
1189 } | 1186 } |
OLD | NEW |