OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of scanner; | 5 part of scanner; |
6 | 6 |
7 abstract class Scanner { | 7 abstract class Scanner { |
8 Token tokenize(); | 8 Token tokenize(); |
9 | 9 |
10 factory Scanner(SourceFile file, {bool includeComments: false}) { | 10 factory Scanner(SourceFile file, {bool includeComments: false}) { |
(...skipping 358 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
369 if (identical(next, $1) || identical(next, $2) || identical(next, $3) | 369 if (identical(next, $1) || identical(next, $2) || identical(next, $3) |
370 || identical(next, $4) || identical(next, $5) || identical(next, $6) | 370 || identical(next, $4) || identical(next, $5) || identical(next, $6) |
371 || identical(next, $7) || identical(next, $8) || identical(next, $9)) { | 371 || identical(next, $7) || identical(next, $8) || identical(next, $9)) { |
372 return tokenizeNumber(next); | 372 return tokenizeNumber(next); |
373 } | 373 } |
374 | 374 |
375 if (identical(next, $EOF)) { | 375 if (identical(next, $EOF)) { |
376 return $EOF; | 376 return $EOF; |
377 } | 377 } |
378 if (next < 0x1f) { | 378 if (next < 0x1f) { |
379 return error("unexpected character $next"); | 379 return error("unexpected character $next", next); |
380 } | 380 } |
381 | 381 |
382 next = currentAsUnicode(next); | 382 next = currentAsUnicode(next); |
383 | 383 |
384 // The following are non-ASCII characters. | 384 // The following are non-ASCII characters. |
385 | 385 |
386 if (identical(next, $NBSP)) { | 386 if (identical(next, $NBSP)) { |
387 appendWhiteSpace(next); | 387 appendWhiteSpace(next); |
388 return advance(); | 388 return advance(); |
389 } | 389 } |
390 | 390 |
391 return error("unexpected unicode character $next"); | 391 return error("unexpected unicode character $next", next); |
392 } | 392 } |
393 | 393 |
394 int tokenizeTag(int next) { | 394 int tokenizeTag(int next) { |
395 // # or #!.*[\n\r] | 395 // # or #!.*[\n\r] |
396 if (scanOffset == 0) { | 396 if (scanOffset == 0) { |
397 if (identical(peek(), $BANG)) { | 397 if (identical(peek(), $BANG)) { |
398 int start = scanOffset + 1; | 398 int start = scanOffset + 1; |
399 bool asciiOnly = true; | 399 bool asciiOnly = true; |
400 do { | 400 do { |
401 next = advance(); | 401 next = advance(); |
(...skipping 209 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
611 next = advance(); // Advance past the $x or $X. | 611 next = advance(); // Advance past the $x or $X. |
612 bool hasDigits = false; | 612 bool hasDigits = false; |
613 while (true) { | 613 while (true) { |
614 next = advance(); | 614 next = advance(); |
615 if (($0 <= next && next <= $9) | 615 if (($0 <= next && next <= $9) |
616 || ($A <= next && next <= $F) | 616 || ($A <= next && next <= $F) |
617 || ($a <= next && next <= $f)) { | 617 || ($a <= next && next <= $f)) { |
618 hasDigits = true; | 618 hasDigits = true; |
619 } else { | 619 } else { |
620 if (!hasDigits) { | 620 if (!hasDigits) { |
621 return error("hex digit expected"); | 621 return error("hex digit expected", next); |
622 } | 622 } |
623 appendSubstringToken(HEXADECIMAL_INFO, start, true); | 623 appendSubstringToken(HEXADECIMAL_INFO, start, true); |
624 return next; | 624 return next; |
625 } | 625 } |
626 } | 626 } |
627 } | 627 } |
628 | 628 |
629 int tokenizeDotsOrNumber(int next) { | 629 int tokenizeDotsOrNumber(int next) { |
630 int start = scanOffset; | 630 int start = scanOffset; |
631 next = advance(); | 631 next = advance(); |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
675 int tokenizeExponent(int next) { | 675 int tokenizeExponent(int next) { |
676 if (identical(next, $PLUS) || identical(next, $MINUS)) { | 676 if (identical(next, $PLUS) || identical(next, $MINUS)) { |
677 next = advance(); | 677 next = advance(); |
678 } | 678 } |
679 bool hasDigits = false; | 679 bool hasDigits = false; |
680 while (true) { | 680 while (true) { |
681 if ($0 <= next && next <= $9) { | 681 if ($0 <= next && next <= $9) { |
682 hasDigits = true; | 682 hasDigits = true; |
683 } else { | 683 } else { |
684 if (!hasDigits) { | 684 if (!hasDigits) { |
685 return error("digit expected"); | 685 return error("digit expected", next); |
686 } | 686 } |
687 return next; | 687 return next; |
688 } | 688 } |
689 next = advance(); | 689 next = advance(); |
690 } | 690 } |
691 } | 691 } |
692 | 692 |
693 int tokenizeSlashOrComment(int next) { | 693 int tokenizeSlashOrComment(int next) { |
694 int start = scanOffset; | 694 int start = scanOffset; |
695 next = advance(); | 695 next = advance(); |
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
811 while (true) { | 811 while (true) { |
812 if (($a <= next && next <= $z) || | 812 if (($a <= next && next <= $z) || |
813 ($A <= next && next <= $Z) || | 813 ($A <= next && next <= $Z) || |
814 ($0 <= next && next <= $9) || | 814 ($0 <= next && next <= $9) || |
815 identical(next, $_) || | 815 identical(next, $_) || |
816 (identical(next, $$) && allowDollar)) { | 816 (identical(next, $$) && allowDollar)) { |
817 next = advance(); | 817 next = advance(); |
818 } else { | 818 } else { |
819 // Identifier ends here. | 819 // Identifier ends here. |
820 if (start == scanOffset) { | 820 if (start == scanOffset) { |
821 return error("expected identifier"); | 821 return error("expected identifier", next); |
822 } else { | 822 } else { |
823 appendSubstringToken(IDENTIFIER_INFO, start, true); | 823 appendSubstringToken(IDENTIFIER_INFO, start, true); |
824 } | 824 } |
825 return next; | 825 return next; |
826 } | 826 } |
827 } | 827 } |
828 } | 828 } |
829 | 829 |
830 int tokenizeAt(int next) { | 830 int tokenizeAt(int next) { |
831 appendPrecedenceToken(AT_INFO); | 831 appendPrecedenceToken(AT_INFO); |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
875 next = tokenizeStringInterpolation(start, asciiOnly); | 875 next = tokenizeStringInterpolation(start, asciiOnly); |
876 start = scanOffset; | 876 start = scanOffset; |
877 asciiOnly = true; | 877 asciiOnly = true; |
878 continue; | 878 continue; |
879 } | 879 } |
880 if (next <= $CR | 880 if (next <= $CR |
881 && (identical(next, $LF) || | 881 && (identical(next, $LF) || |
882 identical(next, $CR) || | 882 identical(next, $CR) || |
883 identical(next, $EOF))) { | 883 identical(next, $EOF))) { |
884 if (!asciiOnly) handleUnicode(start); | 884 if (!asciiOnly) handleUnicode(start); |
885 return error("unterminated string literal"); | 885 return error("unterminated string literal", next); |
886 } | 886 } |
887 if (next > 127) asciiOnly = false; | 887 if (next > 127) asciiOnly = false; |
888 next = advance(); | 888 next = advance(); |
889 } | 889 } |
890 if (!asciiOnly) handleUnicode(start); | 890 if (!asciiOnly) handleUnicode(start); |
891 // Advance past the quote character. | 891 // Advance past the quote character. |
892 next = advance(); | 892 next = advance(); |
893 appendSubstringToken(STRING_INFO, start, asciiOnly); | 893 appendSubstringToken(STRING_INFO, start, asciiOnly); |
894 return next; | 894 return next; |
895 } | 895 } |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
930 bool asciiOnly = true; | 930 bool asciiOnly = true; |
931 next = advance(); // Advance past the quote. | 931 next = advance(); // Advance past the quote. |
932 while (next != $EOF) { | 932 while (next != $EOF) { |
933 if (identical(next, quoteChar)) { | 933 if (identical(next, quoteChar)) { |
934 if (!asciiOnly) handleUnicode(start); | 934 if (!asciiOnly) handleUnicode(start); |
935 next = advance(); | 935 next = advance(); |
936 appendSubstringToken(STRING_INFO, start, asciiOnly); | 936 appendSubstringToken(STRING_INFO, start, asciiOnly); |
937 return next; | 937 return next; |
938 } else if (identical(next, $LF) || identical(next, $CR)) { | 938 } else if (identical(next, $LF) || identical(next, $CR)) { |
939 if (!asciiOnly) handleUnicode(start); | 939 if (!asciiOnly) handleUnicode(start); |
940 return error("unterminated string literal"); | 940 return error("unterminated string literal", next); |
941 } else if (next > 127) { | 941 } else if (next > 127) { |
942 asciiOnly = false; | 942 asciiOnly = false; |
943 } | 943 } |
944 next = advance(); | 944 next = advance(); |
945 } | 945 } |
946 if (!asciiOnly) handleUnicode(start); | 946 if (!asciiOnly) handleUnicode(start); |
947 return error("unterminated string literal"); | 947 return error("unterminated string literal", next); |
948 } | 948 } |
949 | 949 |
950 int tokenizeMultiLineRawString(int quoteChar, int start) { | 950 int tokenizeMultiLineRawString(int quoteChar, int start) { |
951 bool asciiOnlyString = true; | 951 bool asciiOnlyString = true; |
952 bool asciiOnlyLine = true; | 952 bool asciiOnlyLine = true; |
953 int unicodeStart = start; | 953 int unicodeStart = start; |
954 int next = advance(); // Advance past the (last) quote (of three). | 954 int next = advance(); // Advance past the (last) quote (of three). |
955 outer: while (!identical(next, $EOF)) { | 955 outer: while (!identical(next, $EOF)) { |
956 while (!identical(next, quoteChar)) { | 956 while (!identical(next, quoteChar)) { |
957 if (identical(next, $LF)) { | 957 if (identical(next, $LF)) { |
(...skipping 16 matching lines...) Expand all Loading... |
974 next = advance(); | 974 next = advance(); |
975 if (identical(next, quoteChar)) { | 975 if (identical(next, quoteChar)) { |
976 if (!asciiOnlyLine) handleUnicode(unicodeStart); | 976 if (!asciiOnlyLine) handleUnicode(unicodeStart); |
977 next = advance(); | 977 next = advance(); |
978 appendSubstringToken(STRING_INFO, start, asciiOnlyString); | 978 appendSubstringToken(STRING_INFO, start, asciiOnlyString); |
979 return next; | 979 return next; |
980 } | 980 } |
981 } | 981 } |
982 } | 982 } |
983 if (!asciiOnlyLine) handleUnicode(unicodeStart); | 983 if (!asciiOnlyLine) handleUnicode(unicodeStart); |
984 return error("unterminated string literal"); | 984 return error("unterminated string literal", next); |
985 } | 985 } |
986 | 986 |
987 int tokenizeMultiLineString(int quoteChar, int start, bool raw) { | 987 int tokenizeMultiLineString(int quoteChar, int start, bool raw) { |
988 if (raw) return tokenizeMultiLineRawString(quoteChar, start); | 988 if (raw) return tokenizeMultiLineRawString(quoteChar, start); |
989 bool asciiOnlyString = true; | 989 bool asciiOnlyString = true; |
990 bool asciiOnlyLine = true; | 990 bool asciiOnlyLine = true; |
991 int unicodeStart = start; | 991 int unicodeStart = start; |
992 int next = advance(); // Advance past the (last) quote (of three). | 992 int next = advance(); // Advance past the (last) quote (of three). |
993 while (!identical(next, $EOF)) { | 993 while (!identical(next, $EOF)) { |
994 if (identical(next, $$)) { | 994 if (identical(next, $$)) { |
(...skipping 30 matching lines...) Expand all Loading... |
1025 unicodeStart = scanOffset; | 1025 unicodeStart = scanOffset; |
1026 } | 1026 } |
1027 lineFeedInMultiline(); | 1027 lineFeedInMultiline(); |
1028 } else if (next > 127) { | 1028 } else if (next > 127) { |
1029 asciiOnlyString = false; | 1029 asciiOnlyString = false; |
1030 asciiOnlyLine = false; | 1030 asciiOnlyLine = false; |
1031 } | 1031 } |
1032 next = advance(); | 1032 next = advance(); |
1033 } | 1033 } |
1034 if (!asciiOnlyLine) handleUnicode(unicodeStart); | 1034 if (!asciiOnlyLine) handleUnicode(unicodeStart); |
1035 return error("unterminated string literal"); | 1035 return error("unterminated string literal", next); |
1036 } | 1036 } |
1037 | 1037 |
1038 int error(String message) { | 1038 int error(String message, next) { |
1039 appendStringToken(BAD_INPUT_INFO, message); | 1039 appendStringToken(BAD_INPUT_INFO, message); |
| 1040 if (next == 0) return 0; |
1040 return advance(); // Ensure progress. | 1041 return advance(); // Ensure progress. |
1041 } | 1042 } |
1042 | 1043 |
1043 void unmatchedBeginGroup(BeginGroupToken begin) { | 1044 void unmatchedBeginGroup(BeginGroupToken begin) { |
1044 String error = 'unmatched "${begin.stringValue}"'; | 1045 String error = 'unmatched "${begin.stringValue}"'; |
1045 Token close = | 1046 Token close = |
1046 new StringToken.fromString( | 1047 new StringToken.fromString( |
1047 BAD_INPUT_INFO, error, begin.charOffset, canonicalize: true); | 1048 BAD_INPUT_INFO, error, begin.charOffset, canonicalize: true); |
1048 | 1049 |
1049 // We want to ensure that unmatched BeginGroupTokens are reported | 1050 // We want to ensure that unmatched BeginGroupTokens are reported |
(...skipping 10 matching lines...) Expand all Loading... |
1060 // ignore the [close] token (assuming it's correct), then the error will be | 1061 // ignore the [close] token (assuming it's correct), then the error will be |
1061 // reported when parsing the [next] token. | 1062 // reported when parsing the [next] token. |
1062 | 1063 |
1063 Token next = new StringToken.fromString( | 1064 Token next = new StringToken.fromString( |
1064 BAD_INPUT_INFO, error, begin.charOffset, canonicalize: true); | 1065 BAD_INPUT_INFO, error, begin.charOffset, canonicalize: true); |
1065 begin.endGroup = close; | 1066 begin.endGroup = close; |
1066 close.next = next; | 1067 close.next = next; |
1067 next.next = begin.next; | 1068 next.next = begin.next; |
1068 } | 1069 } |
1069 } | 1070 } |
OLD | NEW |