OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 23 matching lines...) Expand all Loading... | |
34 namespace v8 { | 34 namespace v8 { |
35 namespace internal { | 35 namespace internal { |
36 | 36 |
37 // ---------------------------------------------------------------------------- | 37 // ---------------------------------------------------------------------------- |
38 // Scanner | 38 // Scanner |
39 | 39 |
40 Scanner::Scanner(UnicodeCache* unicode_cache) | 40 Scanner::Scanner(UnicodeCache* unicode_cache) |
41 : unicode_cache_(unicode_cache) { } | 41 : unicode_cache_(unicode_cache) { } |
42 | 42 |
43 | 43 |
44 uc32 Scanner::ScanHexEscape(uc32 c, int length) { | 44 uc32 Scanner::ScanHexNumber(int expected_length) { |
45 ASSERT(length <= 4); // prevent overflow | 45 ASSERT(expected_length <= 4); // prevent overflow |
46 | 46 |
47 uc32 digits[4]; | 47 uc32 digits[4] = { 0, 0, 0, 0 }; |
48 uc32 x = 0; | 48 uc32 x = 0; |
49 for (int i = 0; i < length; i++) { | 49 for (int i = 0; i < expected_length; i++) { |
50 digits[i] = c0_; | 50 digits[i] = c0_; |
51 int d = HexValue(c0_); | 51 int d = HexValue(c0_); |
52 if (d < 0) { | 52 if (d < 0) { |
53 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes | 53 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes |
54 // should be illegal, but other JS VMs just return the | 54 // should be illegal, but other JS VMs just return the |
55 // non-escaped version of the original character. | 55 // non-escaped version of the original character. |
56 | 56 |
57 // Push back digits read, except the last one (in c0_). | 57 // Push back digits that we have advanced past. |
58 for (int j = i-1; j >= 0; j--) { | 58 for (int j = i-1; j >= 0; j--) { |
59 PushBack(digits[j]); | 59 PushBack(digits[j]); |
60 } | 60 } |
61 // Notice: No handling of error - treat it as "\u"->"u". | 61 return -1; |
62 return c; | |
63 } | 62 } |
64 x = x * 16 + d; | 63 x = x * 16 + d; |
65 Advance(); | 64 Advance(); |
66 } | 65 } |
67 | 66 |
68 return x; | 67 return x; |
69 } | 68 } |
70 | 69 |
71 | 70 |
72 | 71 |
(...skipping 560 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
633 | 632 |
634 switch (c) { | 633 switch (c) { |
635 case '\'': // fall through | 634 case '\'': // fall through |
636 case '"' : // fall through | 635 case '"' : // fall through |
637 case '\\': break; | 636 case '\\': break; |
638 case 'b' : c = '\b'; break; | 637 case 'b' : c = '\b'; break; |
639 case 'f' : c = '\f'; break; | 638 case 'f' : c = '\f'; break; |
640 case 'n' : c = '\n'; break; | 639 case 'n' : c = '\n'; break; |
641 case 'r' : c = '\r'; break; | 640 case 'r' : c = '\r'; break; |
642 case 't' : c = '\t'; break; | 641 case 't' : c = '\t'; break; |
643 case 'u' : c = ScanHexEscape(c, 4); break; | 642 case 'u' : { |
643 c = ScanHexNumber(4); | |
644 if (c < 0) c = 'u'; | |
645 break; | |
646 } | |
644 case 'v' : c = '\v'; break; | 647 case 'v' : c = '\v'; break; |
645 case 'x' : c = ScanHexEscape(c, 2); break; | 648 case 'x' : { |
649 c = ScanHexNumber(2); | |
650 if (c < 0) c = 'x'; | |
651 break; | |
652 } | |
646 case '0' : // fall through | 653 case '0' : // fall through |
647 case '1' : // fall through | 654 case '1' : // fall through |
648 case '2' : // fall through | 655 case '2' : // fall through |
649 case '3' : // fall through | 656 case '3' : // fall through |
650 case '4' : // fall through | 657 case '4' : // fall through |
651 case '5' : // fall through | 658 case '5' : // fall through |
652 case '6' : // fall through | 659 case '6' : // fall through |
653 case '7' : c = ScanOctalEscape(c, 2); break; | 660 case '7' : c = ScanOctalEscape(c, 2); break; |
654 } | 661 } |
655 | 662 |
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
795 return Token::ILLEGAL; | 802 return Token::ILLEGAL; |
796 | 803 |
797 literal.Complete(); | 804 literal.Complete(); |
798 | 805 |
799 return Token::NUMBER; | 806 return Token::NUMBER; |
800 } | 807 } |
801 | 808 |
802 | 809 |
803 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { | 810 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { |
804 Advance(); | 811 Advance(); |
805 if (c0_ != 'u') return unibrow::Utf8::kBadChar; | 812 if (c0_ != 'u') return -1; |
806 Advance(); | 813 Advance(); |
807 uc32 c = ScanHexEscape('u', 4); | 814 uc32 result = ScanHexNumber(4); |
808 // We do not allow a unicode escape sequence to start another | 815 if (result < 0) PushBack('u'); |
809 // unicode escape sequence. | 816 return result; |
810 if (c == '\\') return unibrow::Utf8::kBadChar; | |
811 return c; | |
812 } | 817 } |
813 | 818 |
814 | 819 |
815 // ---------------------------------------------------------------------------- | 820 // ---------------------------------------------------------------------------- |
816 // Keyword Matcher | 821 // Keyword Matcher |
817 | 822 |
818 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ | 823 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ |
819 KEYWORD_GROUP('b') \ | 824 KEYWORD_GROUP('b') \ |
820 KEYWORD("break", Token::BREAK) \ | 825 KEYWORD("break", Token::BREAK) \ |
821 KEYWORD_GROUP('c') \ | 826 KEYWORD_GROUP('c') \ |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
919 } | 924 } |
920 | 925 |
921 | 926 |
922 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { | 927 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { |
923 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); | 928 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); |
924 LiteralScope literal(this); | 929 LiteralScope literal(this); |
925 // Scan identifier start character. | 930 // Scan identifier start character. |
926 if (c0_ == '\\') { | 931 if (c0_ == '\\') { |
927 uc32 c = ScanIdentifierUnicodeEscape(); | 932 uc32 c = ScanIdentifierUnicodeEscape(); |
928 // Only allow legal identifier start characters. | 933 // Only allow legal identifier start characters. |
929 if (!unicode_cache_->IsIdentifierStart(c)) return Token::ILLEGAL; | 934 if (c < 0 || |
935 c == '\\' || // No recursive escapes. | |
936 !unicode_cache_->IsIdentifierStart(c)) { | |
937 return Token::ILLEGAL; | |
938 } | |
930 AddLiteralChar(c); | 939 AddLiteralChar(c); |
931 return ScanIdentifierSuffix(&literal); | 940 return ScanIdentifierSuffix(&literal); |
932 } | 941 } |
933 | 942 |
934 uc32 first_char = c0_; | 943 uc32 first_char = c0_; |
935 Advance(); | 944 Advance(); |
936 AddLiteralChar(first_char); | 945 AddLiteralChar(first_char); |
937 | 946 |
938 // Scan the rest of the identifier characters. | 947 // Scan the rest of the identifier characters. |
939 while (unicode_cache_->IsIdentifierPart(c0_)) { | 948 while (unicode_cache_->IsIdentifierPart(c0_)) { |
(...skipping 19 matching lines...) Expand all Loading... | |
959 return Token::IDENTIFIER; | 968 return Token::IDENTIFIER; |
960 } | 969 } |
961 | 970 |
962 | 971 |
963 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { | 972 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { |
964 // Scan the rest of the identifier characters. | 973 // Scan the rest of the identifier characters. |
965 while (unicode_cache_->IsIdentifierPart(c0_)) { | 974 while (unicode_cache_->IsIdentifierPart(c0_)) { |
966 if (c0_ == '\\') { | 975 if (c0_ == '\\') { |
967 uc32 c = ScanIdentifierUnicodeEscape(); | 976 uc32 c = ScanIdentifierUnicodeEscape(); |
968 // Only allow legal identifier part characters. | 977 // Only allow legal identifier part characters. |
969 if (!unicode_cache_->IsIdentifierPart(c)) return Token::ILLEGAL; | 978 if (c < 0 || |
979 c == '\\' || | |
980 !unicode_cache_->IsIdentifierPart(c)) { | |
981 return Token::ILLEGAL; | |
982 } | |
970 AddLiteralChar(c); | 983 AddLiteralChar(c); |
971 } else { | 984 } else { |
972 AddLiteralChar(c0_); | 985 AddLiteralChar(c0_); |
973 Advance(); | 986 Advance(); |
974 } | 987 } |
975 } | 988 } |
976 literal->Complete(); | 989 literal->Complete(); |
977 | 990 |
978 return Token::IDENTIFIER; | 991 return Token::IDENTIFIER; |
979 } | 992 } |
980 | 993 |
981 | 994 |
982 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { | 995 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { |
983 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 996 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
984 bool in_character_class = false; | 997 bool in_character_class = false; |
985 | 998 |
986 // Previous token is either '/' or '/=', in the second case, the | 999 // Previous token is either '/' or '/=', in the second case, the |
987 // pattern starts at =. | 1000 // pattern starts at =. |
988 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 1001 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
989 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 1002 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
990 | 1003 |
991 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1004 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
992 // the scanner should pass uninterpreted bodies to the RegExp | 1005 // the scanner should pass uninterpreted bodies to the RegExp |
993 // constructor. | 1006 // constructor. |
994 LiteralScope literal(this); | 1007 LiteralScope literal(this); |
995 if (seen_equal) | 1008 if (seen_equal) { |
996 AddLiteralChar('='); | 1009 AddLiteralChar('='); |
1010 } | |
997 | 1011 |
998 while (c0_ != '/' || in_character_class) { | 1012 while (c0_ != '/' || in_character_class) { |
999 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; | 1013 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; |
1000 if (c0_ == '\\') { // Escape sequence. | 1014 if (c0_ == '\\') { // Escape sequence. |
1001 AddLiteralCharAdvance(); | 1015 AddLiteralCharAdvance(); |
1002 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; | 1016 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; |
1003 AddLiteralCharAdvance(); | 1017 AddLiteralCharAdvance(); |
1004 // If the escape allows more characters, i.e., \x??, \u????, or \c?, | 1018 // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
1005 // only "safe" characters are allowed (letters, digits, underscore), | 1019 // only "safe" characters are allowed (letters, digits, underscore), |
1006 // otherwise the escape isn't valid and the invalid character has | 1020 // otherwise the escape isn't valid and the invalid character has |
(...skipping 11 matching lines...) Expand all Loading... | |
1018 } | 1032 } |
1019 } | 1033 } |
1020 Advance(); // consume '/' | 1034 Advance(); // consume '/' |
1021 | 1035 |
1022 literal.Complete(); | 1036 literal.Complete(); |
1023 | 1037 |
1024 return true; | 1038 return true; |
1025 } | 1039 } |
1026 | 1040 |
1027 | 1041 |
1042 bool JavaScriptScanner::ScanLiteralUnicodeEscape() { | |
1043 ASSERT(c0_ == '\\'); | |
1044 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; | |
1045 Advance(); | |
1046 int i = 1; | |
1047 if (c0_ == 'u') { | |
1048 Advance(); | |
1049 i++; | |
1050 while (i < 6) { | |
1051 Advance(); | |
1052 if (!IsHexDigit(c0_)) break; | |
1053 chars_read[i] = c0_; | |
1054 i++; | |
1055 } | |
1056 } | |
1057 if (i < 6) { | |
1058 // Incomplete escape. Undo all advances and return false. | |
1059 while (i > 0) { | |
1060 i--; | |
1061 PushBack(chars_read[i]); | |
1062 } | |
1063 return false; | |
1064 } | |
1065 // Complete escape. Add all chars to current literal buffer. | |
1066 for (int i = 0; i < 6; i++) { | |
1067 AddLiteralChar(chars_read[i]); | |
1068 return true; | |
Rico
2011/08/18 11:43:13
Indention seems wrong
Lasse Reichstein
2011/08/24 13:36:28
Argh, more than wrong. The return has moved itself
| |
1069 } | |
1070 } | |
1071 | |
1072 | |
1028 bool JavaScriptScanner::ScanRegExpFlags() { | 1073 bool JavaScriptScanner::ScanRegExpFlags() { |
1029 // Scan regular expression flags. | 1074 // Scan regular expression flags. |
1030 LiteralScope literal(this); | 1075 LiteralScope literal(this); |
1031 while (unicode_cache_->IsIdentifierPart(c0_)) { | 1076 while (unicode_cache_->IsIdentifierPart(c0_)) { |
1032 if (c0_ == '\\') { | 1077 if (c0_ != '\\') { |
1033 uc32 c = ScanIdentifierUnicodeEscape(); | 1078 AddLiteralCharAdvance(); |
1034 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | 1079 } else { |
1035 // We allow any escaped character, unlike the restriction on | 1080 if (!ScanLiteralUnicodeEscape()) { |
1036 // IdentifierPart when it is used to build an IdentifierName. | 1081 break; |
1037 AddLiteralChar(c); | |
1038 continue; | |
1039 } | 1082 } |
1040 } | 1083 } |
1041 AddLiteralCharAdvance(); | |
1042 } | 1084 } |
1043 literal.Complete(); | 1085 literal.Complete(); |
1044 | 1086 |
1045 next_.location.end_pos = source_pos() - 1; | 1087 next_.location.end_pos = source_pos() - 1; |
1046 return true; | 1088 return true; |
1047 } | 1089 } |
1048 | 1090 |
1049 } } // namespace v8::internal | 1091 } } // namespace v8::internal |
OLD | NEW |