| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of scanner; | 5 part of scanner; |
| 6 | 6 |
| 7 abstract class Scanner { | 7 abstract class Scanner { |
| 8 Token tokenize(); | 8 Token tokenize(); |
| 9 | 9 |
| 10 factory Scanner(SourceFile file, {bool includeComments: false}) { | 10 factory Scanner(SourceFile file, {bool includeComments: false}) { |
| (...skipping 358 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 369 if (identical(next, $1) || identical(next, $2) || identical(next, $3) | 369 if (identical(next, $1) || identical(next, $2) || identical(next, $3) |
| 370 || identical(next, $4) || identical(next, $5) || identical(next, $6) | 370 || identical(next, $4) || identical(next, $5) || identical(next, $6) |
| 371 || identical(next, $7) || identical(next, $8) || identical(next, $9)) { | 371 || identical(next, $7) || identical(next, $8) || identical(next, $9)) { |
| 372 return tokenizeNumber(next); | 372 return tokenizeNumber(next); |
| 373 } | 373 } |
| 374 | 374 |
| 375 if (identical(next, $EOF)) { | 375 if (identical(next, $EOF)) { |
| 376 return $EOF; | 376 return $EOF; |
| 377 } | 377 } |
| 378 if (next < 0x1f) { | 378 if (next < 0x1f) { |
| 379 return error("unexpected character $next"); | 379 return error("unexpected character $next", next); |
| 380 } | 380 } |
| 381 | 381 |
| 382 next = currentAsUnicode(next); | 382 next = currentAsUnicode(next); |
| 383 | 383 |
| 384 // The following are non-ASCII characters. | 384 // The following are non-ASCII characters. |
| 385 | 385 |
| 386 if (identical(next, $NBSP)) { | 386 if (identical(next, $NBSP)) { |
| 387 appendWhiteSpace(next); | 387 appendWhiteSpace(next); |
| 388 return advance(); | 388 return advance(); |
| 389 } | 389 } |
| 390 | 390 |
| 391 return error("unexpected unicode character $next"); | 391 return error("unexpected unicode character $next", next); |
| 392 } | 392 } |
| 393 | 393 |
| 394 int tokenizeTag(int next) { | 394 int tokenizeTag(int next) { |
| 395 // # or #!.*[\n\r] | 395 // # or #!.*[\n\r] |
| 396 if (scanOffset == 0) { | 396 if (scanOffset == 0) { |
| 397 if (identical(peek(), $BANG)) { | 397 if (identical(peek(), $BANG)) { |
| 398 int start = scanOffset + 1; | 398 int start = scanOffset + 1; |
| 399 bool asciiOnly = true; | 399 bool asciiOnly = true; |
| 400 do { | 400 do { |
| 401 next = advance(); | 401 next = advance(); |
| (...skipping 209 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 611 next = advance(); // Advance past the $x or $X. | 611 next = advance(); // Advance past the $x or $X. |
| 612 bool hasDigits = false; | 612 bool hasDigits = false; |
| 613 while (true) { | 613 while (true) { |
| 614 next = advance(); | 614 next = advance(); |
| 615 if (($0 <= next && next <= $9) | 615 if (($0 <= next && next <= $9) |
| 616 || ($A <= next && next <= $F) | 616 || ($A <= next && next <= $F) |
| 617 || ($a <= next && next <= $f)) { | 617 || ($a <= next && next <= $f)) { |
| 618 hasDigits = true; | 618 hasDigits = true; |
| 619 } else { | 619 } else { |
| 620 if (!hasDigits) { | 620 if (!hasDigits) { |
| 621 return error("hex digit expected"); | 621 return error("hex digit expected", next); |
| 622 } | 622 } |
| 623 appendSubstringToken(HEXADECIMAL_INFO, start, true); | 623 appendSubstringToken(HEXADECIMAL_INFO, start, true); |
| 624 return next; | 624 return next; |
| 625 } | 625 } |
| 626 } | 626 } |
| 627 } | 627 } |
| 628 | 628 |
| 629 int tokenizeDotsOrNumber(int next) { | 629 int tokenizeDotsOrNumber(int next) { |
| 630 int start = scanOffset; | 630 int start = scanOffset; |
| 631 next = advance(); | 631 next = advance(); |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 675 int tokenizeExponent(int next) { | 675 int tokenizeExponent(int next) { |
| 676 if (identical(next, $PLUS) || identical(next, $MINUS)) { | 676 if (identical(next, $PLUS) || identical(next, $MINUS)) { |
| 677 next = advance(); | 677 next = advance(); |
| 678 } | 678 } |
| 679 bool hasDigits = false; | 679 bool hasDigits = false; |
| 680 while (true) { | 680 while (true) { |
| 681 if ($0 <= next && next <= $9) { | 681 if ($0 <= next && next <= $9) { |
| 682 hasDigits = true; | 682 hasDigits = true; |
| 683 } else { | 683 } else { |
| 684 if (!hasDigits) { | 684 if (!hasDigits) { |
| 685 return error("digit expected"); | 685 return error("digit expected", next); |
| 686 } | 686 } |
| 687 return next; | 687 return next; |
| 688 } | 688 } |
| 689 next = advance(); | 689 next = advance(); |
| 690 } | 690 } |
| 691 } | 691 } |
| 692 | 692 |
| 693 int tokenizeSlashOrComment(int next) { | 693 int tokenizeSlashOrComment(int next) { |
| 694 int start = scanOffset; | 694 int start = scanOffset; |
| 695 next = advance(); | 695 next = advance(); |
| (...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 811 while (true) { | 811 while (true) { |
| 812 if (($a <= next && next <= $z) || | 812 if (($a <= next && next <= $z) || |
| 813 ($A <= next && next <= $Z) || | 813 ($A <= next && next <= $Z) || |
| 814 ($0 <= next && next <= $9) || | 814 ($0 <= next && next <= $9) || |
| 815 identical(next, $_) || | 815 identical(next, $_) || |
| 816 (identical(next, $$) && allowDollar)) { | 816 (identical(next, $$) && allowDollar)) { |
| 817 next = advance(); | 817 next = advance(); |
| 818 } else { | 818 } else { |
| 819 // Identifier ends here. | 819 // Identifier ends here. |
| 820 if (start == scanOffset) { | 820 if (start == scanOffset) { |
| 821 return error("expected identifier"); | 821 return error("expected identifier", next); |
| 822 } else { | 822 } else { |
| 823 appendSubstringToken(IDENTIFIER_INFO, start, true); | 823 appendSubstringToken(IDENTIFIER_INFO, start, true); |
| 824 } | 824 } |
| 825 return next; | 825 return next; |
| 826 } | 826 } |
| 827 } | 827 } |
| 828 } | 828 } |
| 829 | 829 |
| 830 int tokenizeAt(int next) { | 830 int tokenizeAt(int next) { |
| 831 appendPrecedenceToken(AT_INFO); | 831 appendPrecedenceToken(AT_INFO); |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 875 next = tokenizeStringInterpolation(start, asciiOnly); | 875 next = tokenizeStringInterpolation(start, asciiOnly); |
| 876 start = scanOffset; | 876 start = scanOffset; |
| 877 asciiOnly = true; | 877 asciiOnly = true; |
| 878 continue; | 878 continue; |
| 879 } | 879 } |
| 880 if (next <= $CR | 880 if (next <= $CR |
| 881 && (identical(next, $LF) || | 881 && (identical(next, $LF) || |
| 882 identical(next, $CR) || | 882 identical(next, $CR) || |
| 883 identical(next, $EOF))) { | 883 identical(next, $EOF))) { |
| 884 if (!asciiOnly) handleUnicode(start); | 884 if (!asciiOnly) handleUnicode(start); |
| 885 return error("unterminated string literal"); | 885 return error("unterminated string literal", next); |
| 886 } | 886 } |
| 887 if (next > 127) asciiOnly = false; | 887 if (next > 127) asciiOnly = false; |
| 888 next = advance(); | 888 next = advance(); |
| 889 } | 889 } |
| 890 if (!asciiOnly) handleUnicode(start); | 890 if (!asciiOnly) handleUnicode(start); |
| 891 // Advance past the quote character. | 891 // Advance past the quote character. |
| 892 next = advance(); | 892 next = advance(); |
| 893 appendSubstringToken(STRING_INFO, start, asciiOnly); | 893 appendSubstringToken(STRING_INFO, start, asciiOnly); |
| 894 return next; | 894 return next; |
| 895 } | 895 } |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 930 bool asciiOnly = true; | 930 bool asciiOnly = true; |
| 931 next = advance(); // Advance past the quote. | 931 next = advance(); // Advance past the quote. |
| 932 while (next != $EOF) { | 932 while (next != $EOF) { |
| 933 if (identical(next, quoteChar)) { | 933 if (identical(next, quoteChar)) { |
| 934 if (!asciiOnly) handleUnicode(start); | 934 if (!asciiOnly) handleUnicode(start); |
| 935 next = advance(); | 935 next = advance(); |
| 936 appendSubstringToken(STRING_INFO, start, asciiOnly); | 936 appendSubstringToken(STRING_INFO, start, asciiOnly); |
| 937 return next; | 937 return next; |
| 938 } else if (identical(next, $LF) || identical(next, $CR)) { | 938 } else if (identical(next, $LF) || identical(next, $CR)) { |
| 939 if (!asciiOnly) handleUnicode(start); | 939 if (!asciiOnly) handleUnicode(start); |
| 940 return error("unterminated string literal"); | 940 return error("unterminated string literal", next); |
| 941 } else if (next > 127) { | 941 } else if (next > 127) { |
| 942 asciiOnly = false; | 942 asciiOnly = false; |
| 943 } | 943 } |
| 944 next = advance(); | 944 next = advance(); |
| 945 } | 945 } |
| 946 if (!asciiOnly) handleUnicode(start); | 946 if (!asciiOnly) handleUnicode(start); |
| 947 return error("unterminated string literal"); | 947 return error("unterminated string literal", next); |
| 948 } | 948 } |
| 949 | 949 |
| 950 int tokenizeMultiLineRawString(int quoteChar, int start) { | 950 int tokenizeMultiLineRawString(int quoteChar, int start) { |
| 951 bool asciiOnlyString = true; | 951 bool asciiOnlyString = true; |
| 952 bool asciiOnlyLine = true; | 952 bool asciiOnlyLine = true; |
| 953 int unicodeStart = start; | 953 int unicodeStart = start; |
| 954 int next = advance(); // Advance past the (last) quote (of three). | 954 int next = advance(); // Advance past the (last) quote (of three). |
| 955 outer: while (!identical(next, $EOF)) { | 955 outer: while (!identical(next, $EOF)) { |
| 956 while (!identical(next, quoteChar)) { | 956 while (!identical(next, quoteChar)) { |
| 957 if (identical(next, $LF)) { | 957 if (identical(next, $LF)) { |
| (...skipping 16 matching lines...) Expand all Loading... |
| 974 next = advance(); | 974 next = advance(); |
| 975 if (identical(next, quoteChar)) { | 975 if (identical(next, quoteChar)) { |
| 976 if (!asciiOnlyLine) handleUnicode(unicodeStart); | 976 if (!asciiOnlyLine) handleUnicode(unicodeStart); |
| 977 next = advance(); | 977 next = advance(); |
| 978 appendSubstringToken(STRING_INFO, start, asciiOnlyString); | 978 appendSubstringToken(STRING_INFO, start, asciiOnlyString); |
| 979 return next; | 979 return next; |
| 980 } | 980 } |
| 981 } | 981 } |
| 982 } | 982 } |
| 983 if (!asciiOnlyLine) handleUnicode(unicodeStart); | 983 if (!asciiOnlyLine) handleUnicode(unicodeStart); |
| 984 return error("unterminated string literal"); | 984 return error("unterminated string literal", next); |
| 985 } | 985 } |
| 986 | 986 |
| 987 int tokenizeMultiLineString(int quoteChar, int start, bool raw) { | 987 int tokenizeMultiLineString(int quoteChar, int start, bool raw) { |
| 988 if (raw) return tokenizeMultiLineRawString(quoteChar, start); | 988 if (raw) return tokenizeMultiLineRawString(quoteChar, start); |
| 989 bool asciiOnlyString = true; | 989 bool asciiOnlyString = true; |
| 990 bool asciiOnlyLine = true; | 990 bool asciiOnlyLine = true; |
| 991 int unicodeStart = start; | 991 int unicodeStart = start; |
| 992 int next = advance(); // Advance past the (last) quote (of three). | 992 int next = advance(); // Advance past the (last) quote (of three). |
| 993 while (!identical(next, $EOF)) { | 993 while (!identical(next, $EOF)) { |
| 994 if (identical(next, $$)) { | 994 if (identical(next, $$)) { |
| (...skipping 30 matching lines...) Expand all Loading... |
| 1025 unicodeStart = scanOffset; | 1025 unicodeStart = scanOffset; |
| 1026 } | 1026 } |
| 1027 lineFeedInMultiline(); | 1027 lineFeedInMultiline(); |
| 1028 } else if (next > 127) { | 1028 } else if (next > 127) { |
| 1029 asciiOnlyString = false; | 1029 asciiOnlyString = false; |
| 1030 asciiOnlyLine = false; | 1030 asciiOnlyLine = false; |
| 1031 } | 1031 } |
| 1032 next = advance(); | 1032 next = advance(); |
| 1033 } | 1033 } |
| 1034 if (!asciiOnlyLine) handleUnicode(unicodeStart); | 1034 if (!asciiOnlyLine) handleUnicode(unicodeStart); |
| 1035 return error("unterminated string literal"); | 1035 return error("unterminated string literal", next); |
| 1036 } | 1036 } |
| 1037 | 1037 |
| 1038 int error(String message) { | 1038 int error(String message, next) { |
| 1039 appendStringToken(BAD_INPUT_INFO, message); | 1039 appendStringToken(BAD_INPUT_INFO, message); |
| 1040 if (next == 0) return 0; |
| 1040 return advance(); // Ensure progress. | 1041 return advance(); // Ensure progress. |
| 1041 } | 1042 } |
| 1042 | 1043 |
| 1043 void unmatchedBeginGroup(BeginGroupToken begin) { | 1044 void unmatchedBeginGroup(BeginGroupToken begin) { |
| 1044 String error = 'unmatched "${begin.stringValue}"'; | 1045 String error = 'unmatched "${begin.stringValue}"'; |
| 1045 Token close = | 1046 Token close = |
| 1046 new StringToken.fromString( | 1047 new StringToken.fromString( |
| 1047 BAD_INPUT_INFO, error, begin.charOffset, canonicalize: true); | 1048 BAD_INPUT_INFO, error, begin.charOffset, canonicalize: true); |
| 1048 | 1049 |
| 1049 // We want to ensure that unmatched BeginGroupTokens are reported | 1050 // We want to ensure that unmatched BeginGroupTokens are reported |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1060 // ignore the [close] token (assuming it's correct), then the error will be | 1061 // ignore the [close] token (assuming it's correct), then the error will be |
| 1061 // reported when parsing the [next] token. | 1062 // reported when parsing the [next] token. |
| 1062 | 1063 |
| 1063 Token next = new StringToken.fromString( | 1064 Token next = new StringToken.fromString( |
| 1064 BAD_INPUT_INFO, error, begin.charOffset, canonicalize: true); | 1065 BAD_INPUT_INFO, error, begin.charOffset, canonicalize: true); |
| 1065 begin.endGroup = close; | 1066 begin.endGroup = close; |
| 1066 close.next = next; | 1067 close.next = next; |
| 1067 next.next = begin.next; | 1068 next.next = begin.next; |
| 1068 } | 1069 } |
| 1069 } | 1070 } |
| OLD | NEW |