| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 library dart2js.scanner; | 5 library dart2js.scanner; |
| 6 | 6 |
| 7 import '../io/source_file.dart' show | 7 import '../io/source_file.dart' show SourceFile, Utf8BytesSourceFile; |
| 8 SourceFile, | 8 import '../tokens/keyword.dart' show Keyword, KeywordState; |
| 9 Utf8BytesSourceFile; | |
| 10 import '../tokens/keyword.dart' show | |
| 11 Keyword, | |
| 12 KeywordState; | |
| 13 import '../tokens/precedence.dart'; | 9 import '../tokens/precedence.dart'; |
| 14 import '../tokens/precedence_constants.dart'; | 10 import '../tokens/precedence_constants.dart'; |
| 15 import '../tokens/token.dart'; | 11 import '../tokens/token.dart'; |
| 16 import '../tokens/token_constants.dart'; | 12 import '../tokens/token_constants.dart'; |
| 17 import '../util/characters.dart'; | 13 import '../util/characters.dart'; |
| 18 | 14 |
| 19 import 'string_scanner.dart' show | 15 import 'string_scanner.dart' show StringScanner; |
| 20 StringScanner; | 16 import 'utf8_bytes_scanner.dart' show Utf8BytesScanner; |
| 21 import 'utf8_bytes_scanner.dart' show | |
| 22 Utf8BytesScanner; | |
| 23 | |
| 24 | 17 |
| 25 abstract class Scanner { | 18 abstract class Scanner { |
| 26 Token tokenize(); | 19 Token tokenize(); |
| 27 | 20 |
| 28 factory Scanner(SourceFile file, | 21 factory Scanner(SourceFile file, {bool includeComments: false}) { |
| 29 {bool includeComments: false}) { | |
| 30 if (file is Utf8BytesSourceFile) { | 22 if (file is Utf8BytesSourceFile) { |
| 31 return new Utf8BytesScanner(file, includeComments: includeComments); | 23 return new Utf8BytesScanner(file, includeComments: includeComments); |
| 32 } else { | 24 } else { |
| 33 return new StringScanner(file, includeComments: includeComments); | 25 return new StringScanner(file, includeComments: includeComments); |
| 34 } | 26 } |
| 35 } | 27 } |
| 36 } | 28 } |
| 37 | 29 |
| 38 abstract class AbstractScanner implements Scanner { | 30 abstract class AbstractScanner implements Scanner { |
| 39 // TODO(ahe): Move this class to implementation. | 31 // TODO(ahe): Move this class to implementation. |
| (...skipping 25 matching lines...) Expand all Loading... |
| 65 | 57 |
| 66 /** | 58 /** |
| 67 * The source file that is being scanned. This field can be [:null:]. | 59 * The source file that is being scanned. This field can be [:null:]. |
| 68 * If the source file is available, the scanner assigns its [:lineStarts:] and | 60 * If the source file is available, the scanner assigns its [:lineStarts:] and |
| 69 * [:length:] fields at the end of [tokenize]. | 61 * [:length:] fields at the end of [tokenize]. |
| 70 */ | 62 */ |
| 71 final SourceFile file; | 63 final SourceFile file; |
| 72 | 64 |
| 73 final List<int> lineStarts = <int>[0]; | 65 final List<int> lineStarts = <int>[0]; |
| 74 | 66 |
| 75 AbstractScanner( | 67 AbstractScanner(this.file, this.includeComments) { |
| 76 this.file, this.includeComments) { | |
| 77 this.tail = this.tokens; | 68 this.tail = this.tokens; |
| 78 } | 69 } |
| 79 | 70 |
| 80 /** | 71 /** |
| 81 * Advances and returns the next character. | 72 * Advances and returns the next character. |
| 82 * | 73 * |
| 83 * If the next character is non-ASCII, then the returned value depends on the | 74 * If the next character is non-ASCII, then the returned value depends on the |
| 84 * scanner implementation. The [Utf8BytesScanner] returns a UTF-8 byte, while | 75 * scanner implementation. The [Utf8BytesScanner] returns a UTF-8 byte, while |
| 85 * the [StringScanner] returns a UTF-16 code unit. | 76 * the [StringScanner] returns a UTF-16 code unit. |
| 86 * | 77 * |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 159 | 150 |
| 160 /** | 151 /** |
| 161 * Appends a substring from the scan offset [:start:] to the current | 152 * Appends a substring from the scan offset [:start:] to the current |
| 162 * [:scanOffset:] plus the [:extraOffset:]. For example, if the current | 153 * [:scanOffset:] plus the [:extraOffset:]. For example, if the current |
| 163 * scanOffset is 10, then [:appendSubstringToken(5, -1):] will append the | 154 * scanOffset is 10, then [:appendSubstringToken(5, -1):] will append the |
| 164 * substring string [5,9). | 155 * substring string [5,9). |
| 165 * | 156 * |
| 166 * Note that [extraOffset] can only be used if the covered character(s) are | 157 * Note that [extraOffset] can only be used if the covered character(s) are |
| 167 * known to be ASCII. | 158 * known to be ASCII. |
| 168 */ | 159 */ |
| 169 void appendSubstringToken(PrecedenceInfo info, int start, | 160 void appendSubstringToken(PrecedenceInfo info, int start, bool asciiOnly, |
| 170 bool asciiOnly, [int extraOffset]); | 161 [int extraOffset]); |
| 171 | 162 |
| 172 /** Documentation in subclass [ArrayBasedScanner]. */ | 163 /** Documentation in subclass [ArrayBasedScanner]. */ |
| 173 void appendPrecedenceToken(PrecedenceInfo info); | 164 void appendPrecedenceToken(PrecedenceInfo info); |
| 174 | 165 |
| 175 /** Documentation in subclass [ArrayBasedScanner]. */ | 166 /** Documentation in subclass [ArrayBasedScanner]. */ |
| 176 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no); | 167 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no); |
| 177 | 168 |
| 178 /** Documentation in subclass [ArrayBasedScanner]. */ | 169 /** Documentation in subclass [ArrayBasedScanner]. */ |
| 179 void appendKeywordToken(Keyword keyword); | 170 void appendKeywordToken(Keyword keyword); |
| 180 | 171 |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 229 // One additional line start at the end, see [SourceFile.lineStarts]. | 220 // One additional line start at the end, see [SourceFile.lineStarts]. |
| 230 lineStarts.add(stringOffset + 1); | 221 lineStarts.add(stringOffset + 1); |
| 231 file.lineStarts = lineStarts; | 222 file.lineStarts = lineStarts; |
| 232 } | 223 } |
| 233 | 224 |
| 234 return firstToken(); | 225 return firstToken(); |
| 235 } | 226 } |
| 236 | 227 |
| 237 int bigSwitch(int next) { | 228 int bigSwitch(int next) { |
| 238 beginToken(); | 229 beginToken(); |
| 239 if (identical(next, $SPACE) || identical(next, $TAB) | 230 if (identical(next, $SPACE) || |
| 240 || identical(next, $LF) || identical(next, $CR)) { | 231 identical(next, $TAB) || |
| 232 identical(next, $LF) || |
| 233 identical(next, $CR)) { |
| 241 appendWhiteSpace(next); | 234 appendWhiteSpace(next); |
| 242 next = advance(); | 235 next = advance(); |
| 243 // Sequences of spaces are common, so advance through them fast. | 236 // Sequences of spaces are common, so advance through them fast. |
| 244 while (identical(next, $SPACE)) { | 237 while (identical(next, $SPACE)) { |
| 245 // We don't invoke [:appendWhiteSpace(next):] here for efficiency, | 238 // We don't invoke [:appendWhiteSpace(next):] here for efficiency, |
| 246 // assuming that it does not do anything for space characters. | 239 // assuming that it does not do anything for space characters. |
| 247 next = advance(); | 240 next = advance(); |
| 248 } | 241 } |
| 249 return next; | 242 return next; |
| 250 } | 243 } |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 347 // Type parameters and arguments cannot contain semicolon. | 340 // Type parameters and arguments cannot contain semicolon. |
| 348 discardOpenLt(); | 341 discardOpenLt(); |
| 349 return advance(); | 342 return advance(); |
| 350 } | 343 } |
| 351 | 344 |
| 352 if (identical(next, $QUESTION)) { | 345 if (identical(next, $QUESTION)) { |
| 353 return tokenizeQuestion(next); | 346 return tokenizeQuestion(next); |
| 354 } | 347 } |
| 355 | 348 |
| 356 if (identical(next, $CLOSE_SQUARE_BRACKET)) { | 349 if (identical(next, $CLOSE_SQUARE_BRACKET)) { |
| 357 return appendEndGroup(CLOSE_SQUARE_BRACKET_INFO, | 350 return appendEndGroup( |
| 358 OPEN_SQUARE_BRACKET_TOKEN); | 351 CLOSE_SQUARE_BRACKET_INFO, OPEN_SQUARE_BRACKET_TOKEN); |
| 359 } | 352 } |
| 360 | 353 |
| 361 if (identical(next, $BACKPING)) { | 354 if (identical(next, $BACKPING)) { |
| 362 appendPrecedenceToken(BACKPING_INFO); | 355 appendPrecedenceToken(BACKPING_INFO); |
| 363 return advance(); | 356 return advance(); |
| 364 } | 357 } |
| 365 | 358 |
| 366 if (identical(next, $OPEN_CURLY_BRACKET)) { | 359 if (identical(next, $OPEN_CURLY_BRACKET)) { |
| 367 appendBeginGroup(OPEN_CURLY_BRACKET_INFO); | 360 appendBeginGroup(OPEN_CURLY_BRACKET_INFO); |
| 368 return advance(); | 361 return advance(); |
| 369 } | 362 } |
| 370 | 363 |
| 371 if (identical(next, $CLOSE_CURLY_BRACKET)) { | 364 if (identical(next, $CLOSE_CURLY_BRACKET)) { |
| 372 return appendEndGroup(CLOSE_CURLY_BRACKET_INFO, | 365 return appendEndGroup(CLOSE_CURLY_BRACKET_INFO, OPEN_CURLY_BRACKET_TOKEN); |
| 373 OPEN_CURLY_BRACKET_TOKEN); | |
| 374 } | 366 } |
| 375 | 367 |
| 376 if (identical(next, $SLASH)) { | 368 if (identical(next, $SLASH)) { |
| 377 return tokenizeSlashOrComment(next); | 369 return tokenizeSlashOrComment(next); |
| 378 } | 370 } |
| 379 | 371 |
| 380 if (identical(next, $AT)) { | 372 if (identical(next, $AT)) { |
| 381 return tokenizeAt(next); | 373 return tokenizeAt(next); |
| 382 } | 374 } |
| 383 | 375 |
| 384 if (identical(next, $DQ) || identical(next, $SQ)) { | 376 if (identical(next, $DQ) || identical(next, $SQ)) { |
| 385 return tokenizeString(next, scanOffset, false); | 377 return tokenizeString(next, scanOffset, false); |
| 386 } | 378 } |
| 387 | 379 |
| 388 if (identical(next, $PERIOD)) { | 380 if (identical(next, $PERIOD)) { |
| 389 return tokenizeDotsOrNumber(next); | 381 return tokenizeDotsOrNumber(next); |
| 390 } | 382 } |
| 391 | 383 |
| 392 if (identical(next, $0)) { | 384 if (identical(next, $0)) { |
| 393 return tokenizeHexOrNumber(next); | 385 return tokenizeHexOrNumber(next); |
| 394 } | 386 } |
| 395 | 387 |
| 396 // TODO(ahe): Would a range check be faster? | 388 // TODO(ahe): Would a range check be faster? |
| 397 if (identical(next, $1) || identical(next, $2) || identical(next, $3) | 389 if (identical(next, $1) || |
| 398 || identical(next, $4) || identical(next, $5) || identical(next, $6) | 390 identical(next, $2) || |
| 399 || identical(next, $7) || identical(next, $8) || identical(next, $9)) { | 391 identical(next, $3) || |
| 392 identical(next, $4) || |
| 393 identical(next, $5) || |
| 394 identical(next, $6) || |
| 395 identical(next, $7) || |
| 396 identical(next, $8) || |
| 397 identical(next, $9)) { |
| 400 return tokenizeNumber(next); | 398 return tokenizeNumber(next); |
| 401 } | 399 } |
| 402 | 400 |
| 403 if (identical(next, $EOF)) { | 401 if (identical(next, $EOF)) { |
| 404 return $EOF; | 402 return $EOF; |
| 405 } | 403 } |
| 406 if (next < 0x1f) { | 404 if (next < 0x1f) { |
| 407 return unexpected(next); | 405 return unexpected(next); |
| 408 } | 406 } |
| 409 | 407 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 422 int tokenizeTag(int next) { | 420 int tokenizeTag(int next) { |
| 423 // # or #!.*[\n\r] | 421 // # or #!.*[\n\r] |
| 424 if (scanOffset == 0) { | 422 if (scanOffset == 0) { |
| 425 if (identical(peek(), $BANG)) { | 423 if (identical(peek(), $BANG)) { |
| 426 int start = scanOffset + 1; | 424 int start = scanOffset + 1; |
| 427 bool asciiOnly = true; | 425 bool asciiOnly = true; |
| 428 do { | 426 do { |
| 429 next = advance(); | 427 next = advance(); |
| 430 if (next > 127) asciiOnly = false; | 428 if (next > 127) asciiOnly = false; |
| 431 } while (!identical(next, $LF) && | 429 } while (!identical(next, $LF) && |
| 432 !identical(next, $CR) && | 430 !identical(next, $CR) && |
| 433 !identical(next, $EOF)); | 431 !identical(next, $EOF)); |
| 434 if (!asciiOnly) handleUnicode(start); | 432 if (!asciiOnly) handleUnicode(start); |
| 435 return next; | 433 return next; |
| 436 } | 434 } |
| 437 } | 435 } |
| 438 appendPrecedenceToken(HASH_INFO); | 436 appendPrecedenceToken(HASH_INFO); |
| 439 return advance(); | 437 return advance(); |
| 440 } | 438 } |
| 441 | 439 |
| 442 int tokenizeTilde(int next) { | 440 int tokenizeTilde(int next) { |
| 443 // ~ ~/ ~/= | 441 // ~ ~/ ~/= |
| (...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 648 } | 646 } |
| 649 return tokenizeNumber(next); | 647 return tokenizeNumber(next); |
| 650 } | 648 } |
| 651 | 649 |
| 652 int tokenizeHex(int next) { | 650 int tokenizeHex(int next) { |
| 653 int start = scanOffset; | 651 int start = scanOffset; |
| 654 next = advance(); // Advance past the $x or $X. | 652 next = advance(); // Advance past the $x or $X. |
| 655 bool hasDigits = false; | 653 bool hasDigits = false; |
| 656 while (true) { | 654 while (true) { |
| 657 next = advance(); | 655 next = advance(); |
| 658 if (($0 <= next && next <= $9) | 656 if (($0 <= next && next <= $9) || |
| 659 || ($A <= next && next <= $F) | 657 ($A <= next && next <= $F) || |
| 660 || ($a <= next && next <= $f)) { | 658 ($a <= next && next <= $f)) { |
| 661 hasDigits = true; | 659 hasDigits = true; |
| 662 } else { | 660 } else { |
| 663 if (!hasDigits) { | 661 if (!hasDigits) { |
| 664 unterminated('0x', shouldAdvance: false); | 662 unterminated('0x', shouldAdvance: false); |
| 665 return next; | 663 return next; |
| 666 } | 664 } |
| 667 appendSubstringToken(HEXADECIMAL_INFO, start, true); | 665 appendSubstringToken(HEXADECIMAL_INFO, start, true); |
| 668 return next; | 666 return next; |
| 669 } | 667 } |
| 670 } | 668 } |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 759 identical($CR, next) || | 757 identical($CR, next) || |
| 760 identical($EOF, next)) { | 758 identical($EOF, next)) { |
| 761 if (!asciiOnly) handleUnicode(start); | 759 if (!asciiOnly) handleUnicode(start); |
| 762 appendComment(start, asciiOnly); | 760 appendComment(start, asciiOnly); |
| 763 return next; | 761 return next; |
| 764 } | 762 } |
| 765 } | 763 } |
| 766 return null; | 764 return null; |
| 767 } | 765 } |
| 768 | 766 |
| 769 | |
| 770 int tokenizeMultiLineComment(int next, int start) { | 767 int tokenizeMultiLineComment(int next, int start) { |
| 771 bool asciiOnlyComment = true; // Track if the entire comment is ASCII. | 768 bool asciiOnlyComment = true; // Track if the entire comment is ASCII. |
| 772 bool asciiOnlyLines = true; // Track ASCII since the last handleUnicode. | 769 bool asciiOnlyLines = true; // Track ASCII since the last handleUnicode. |
| 773 int unicodeStart = start; | 770 int unicodeStart = start; |
| 774 int nesting = 1; | 771 int nesting = 1; |
| 775 next = advance(); | 772 next = advance(); |
| 776 while (true) { | 773 while (true) { |
| 777 if (identical($EOF, next)) { | 774 if (identical($EOF, next)) { |
| 778 if (!asciiOnlyLines) handleUnicode(unicodeStart); | 775 if (!asciiOnlyLines) handleUnicode(unicodeStart); |
| 779 unterminated('/*'); | 776 unterminated('/*'); |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 917 while (!identical(next, quoteChar)) { | 914 while (!identical(next, quoteChar)) { |
| 918 if (identical(next, $BACKSLASH)) { | 915 if (identical(next, $BACKSLASH)) { |
| 919 next = advance(); | 916 next = advance(); |
| 920 } else if (identical(next, $$)) { | 917 } else if (identical(next, $$)) { |
| 921 if (!asciiOnly) handleUnicode(start); | 918 if (!asciiOnly) handleUnicode(start); |
| 922 next = tokenizeStringInterpolation(start, asciiOnly); | 919 next = tokenizeStringInterpolation(start, asciiOnly); |
| 923 start = scanOffset; | 920 start = scanOffset; |
| 924 asciiOnly = true; | 921 asciiOnly = true; |
| 925 continue; | 922 continue; |
| 926 } | 923 } |
| 927 if (next <= $CR | 924 if (next <= $CR && |
| 928 && (identical(next, $LF) || | 925 (identical(next, $LF) || |
| 929 identical(next, $CR) || | 926 identical(next, $CR) || |
| 930 identical(next, $EOF))) { | 927 identical(next, $EOF))) { |
| 931 if (!asciiOnly) handleUnicode(start); | 928 if (!asciiOnly) handleUnicode(start); |
| 932 return unterminatedString(quoteChar); | 929 return unterminatedString(quoteChar); |
| 933 } | 930 } |
| 934 if (next > 127) asciiOnly = false; | 931 if (next > 127) asciiOnly = false; |
| 935 next = advance(); | 932 next = advance(); |
| 936 } | 933 } |
| 937 if (!asciiOnly) handleUnicode(start); | 934 if (!asciiOnly) handleUnicode(start); |
| 938 // Advance past the quote character. | 935 // Advance past the quote character. |
| (...skipping 14 matching lines...) Expand all Loading... |
| 953 } | 950 } |
| 954 | 951 |
| 955 int tokenizeInterpolatedExpression(int next) { | 952 int tokenizeInterpolatedExpression(int next) { |
| 956 appendBeginGroup(STRING_INTERPOLATION_INFO); | 953 appendBeginGroup(STRING_INTERPOLATION_INFO); |
| 957 beginToken(); // The expression starts here. | 954 beginToken(); // The expression starts here. |
| 958 next = advance(); // Move past the curly bracket. | 955 next = advance(); // Move past the curly bracket. |
| 959 while (!identical(next, $EOF) && !identical(next, $STX)) { | 956 while (!identical(next, $EOF) && !identical(next, $STX)) { |
| 960 next = bigSwitch(next); | 957 next = bigSwitch(next); |
| 961 } | 958 } |
| 962 if (identical(next, $EOF)) return next; | 959 if (identical(next, $EOF)) return next; |
| 963 next = advance(); // Move past the $STX. | 960 next = advance(); // Move past the $STX. |
| 964 beginToken(); // The string interpolation suffix starts here. | 961 beginToken(); // The string interpolation suffix starts here. |
| 965 return next; | 962 return next; |
| 966 } | 963 } |
| 967 | 964 |
| 968 int tokenizeInterpolatedIdentifier(int next) { | 965 int tokenizeInterpolatedIdentifier(int next) { |
| 969 appendPrecedenceToken(STRING_INTERPOLATION_IDENTIFIER_INFO); | 966 appendPrecedenceToken(STRING_INTERPOLATION_IDENTIFIER_INFO); |
| 970 | 967 |
| 971 if ($a <= next && next <= $z) { | 968 if ($a <= next && next <= $z) { |
| 972 beginToken(); // The identifier starts here. | 969 beginToken(); // The identifier starts here. |
| 973 next = tokenizeKeywordOrIdentifier(next, false); | 970 next = tokenizeKeywordOrIdentifier(next, false); |
| (...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1180 | 1177 |
| 1181 PrecedenceInfo closeBraceInfoFor(BeginGroupToken begin) { | 1178 PrecedenceInfo closeBraceInfoFor(BeginGroupToken begin) { |
| 1182 return const { | 1179 return const { |
| 1183 '(': CLOSE_PAREN_INFO, | 1180 '(': CLOSE_PAREN_INFO, |
| 1184 '[': CLOSE_SQUARE_BRACKET_INFO, | 1181 '[': CLOSE_SQUARE_BRACKET_INFO, |
| 1185 '{': CLOSE_CURLY_BRACKET_INFO, | 1182 '{': CLOSE_CURLY_BRACKET_INFO, |
| 1186 '<': GT_INFO, | 1183 '<': GT_INFO, |
| 1187 r'${': CLOSE_CURLY_BRACKET_INFO, | 1184 r'${': CLOSE_CURLY_BRACKET_INFO, |
| 1188 }[begin.value]; | 1185 }[begin.value]; |
| 1189 } | 1186 } |
| OLD | NEW |