Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 34 namespace v8 { | 34 namespace v8 { |
| 35 namespace internal { | 35 namespace internal { |
| 36 | 36 |
| 37 // ---------------------------------------------------------------------------- | 37 // ---------------------------------------------------------------------------- |
| 38 // Scanner | 38 // Scanner |
| 39 | 39 |
| 40 Scanner::Scanner(UnicodeCache* unicode_cache) | 40 Scanner::Scanner(UnicodeCache* unicode_cache) |
| 41 : unicode_cache_(unicode_cache) { } | 41 : unicode_cache_(unicode_cache) { } |
| 42 | 42 |
| 43 | 43 |
| 44 uc32 Scanner::ScanHexEscape(uc32 c, int length) { | 44 uc32 Scanner::ScanHexNumber(int expected_length) { |
| 45 ASSERT(length <= 4); // prevent overflow | 45 ASSERT(expected_length <= 4); // prevent overflow |
| 46 | 46 |
| 47 uc32 digits[4]; | 47 uc32 digits[4] = { 0, 0, 0, 0 }; |
| 48 uc32 x = 0; | 48 uc32 x = 0; |
| 49 for (int i = 0; i < length; i++) { | 49 for (int i = 0; i < expected_length; i++) { |
| 50 digits[i] = c0_; | 50 digits[i] = c0_; |
| 51 int d = HexValue(c0_); | 51 int d = HexValue(c0_); |
| 52 if (d < 0) { | 52 if (d < 0) { |
| 53 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes | 53 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes |
| 54 // should be illegal, but other JS VMs just return the | 54 // should be illegal, but other JS VMs just return the |
| 55 // non-escaped version of the original character. | 55 // non-escaped version of the original character. |
| 56 | 56 |
| 57 // Push back digits read, except the last one (in c0_). | 57 // Push back digits that we have advanced past. |
| 58 for (int j = i-1; j >= 0; j--) { | 58 for (int j = i-1; j >= 0; j--) { |
| 59 PushBack(digits[j]); | 59 PushBack(digits[j]); |
| 60 } | 60 } |
| 61 // Notice: No handling of error - treat it as "\u"->"u". | 61 return -1; |
| 62 return c; | |
| 63 } | 62 } |
| 64 x = x * 16 + d; | 63 x = x * 16 + d; |
| 65 Advance(); | 64 Advance(); |
| 66 } | 65 } |
| 67 | 66 |
| 68 return x; | 67 return x; |
| 69 } | 68 } |
| 70 | 69 |
| 71 | 70 |
| 72 | 71 |
| (...skipping 560 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 633 | 632 |
| 634 switch (c) { | 633 switch (c) { |
| 635 case '\'': // fall through | 634 case '\'': // fall through |
| 636 case '"' : // fall through | 635 case '"' : // fall through |
| 637 case '\\': break; | 636 case '\\': break; |
| 638 case 'b' : c = '\b'; break; | 637 case 'b' : c = '\b'; break; |
| 639 case 'f' : c = '\f'; break; | 638 case 'f' : c = '\f'; break; |
| 640 case 'n' : c = '\n'; break; | 639 case 'n' : c = '\n'; break; |
| 641 case 'r' : c = '\r'; break; | 640 case 'r' : c = '\r'; break; |
| 642 case 't' : c = '\t'; break; | 641 case 't' : c = '\t'; break; |
| 643 case 'u' : c = ScanHexEscape(c, 4); break; | 642 case 'u' : { |
| 643 c = ScanHexNumber(4); | |
| 644 if (c < 0) c = 'u'; | |
| 645 break; | |
| 646 } | |
| 644 case 'v' : c = '\v'; break; | 647 case 'v' : c = '\v'; break; |
| 645 case 'x' : c = ScanHexEscape(c, 2); break; | 648 case 'x' : { |
| 649 c = ScanHexNumber(2); | |
| 650 if (c < 0) c = 'x'; | |
| 651 break; | |
| 652 } | |
| 646 case '0' : // fall through | 653 case '0' : // fall through |
| 647 case '1' : // fall through | 654 case '1' : // fall through |
| 648 case '2' : // fall through | 655 case '2' : // fall through |
| 649 case '3' : // fall through | 656 case '3' : // fall through |
| 650 case '4' : // fall through | 657 case '4' : // fall through |
| 651 case '5' : // fall through | 658 case '5' : // fall through |
| 652 case '6' : // fall through | 659 case '6' : // fall through |
| 653 case '7' : c = ScanOctalEscape(c, 2); break; | 660 case '7' : c = ScanOctalEscape(c, 2); break; |
| 654 } | 661 } |
| 655 | 662 |
| (...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 795 return Token::ILLEGAL; | 802 return Token::ILLEGAL; |
| 796 | 803 |
| 797 literal.Complete(); | 804 literal.Complete(); |
| 798 | 805 |
| 799 return Token::NUMBER; | 806 return Token::NUMBER; |
| 800 } | 807 } |
| 801 | 808 |
| 802 | 809 |
| 803 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { | 810 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { |
| 804 Advance(); | 811 Advance(); |
| 805 if (c0_ != 'u') return unibrow::Utf8::kBadChar; | 812 if (c0_ != 'u') return -1; |
| 806 Advance(); | 813 Advance(); |
| 807 uc32 c = ScanHexEscape('u', 4); | 814 uc32 result = ScanHexNumber(4); |
| 808 // We do not allow a unicode escape sequence to start another | 815 if (result < 0) PushBack('u'); |
| 809 // unicode escape sequence. | 816 return result; |
| 810 if (c == '\\') return unibrow::Utf8::kBadChar; | |
| 811 return c; | |
| 812 } | 817 } |
| 813 | 818 |
| 814 | 819 |
| 815 // ---------------------------------------------------------------------------- | 820 // ---------------------------------------------------------------------------- |
| 816 // Keyword Matcher | 821 // Keyword Matcher |
| 817 | 822 |
| 818 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ | 823 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ |
| 819 KEYWORD_GROUP('b') \ | 824 KEYWORD_GROUP('b') \ |
| 820 KEYWORD("break", Token::BREAK) \ | 825 KEYWORD("break", Token::BREAK) \ |
| 821 KEYWORD_GROUP('c') \ | 826 KEYWORD_GROUP('c') \ |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 919 } | 924 } |
| 920 | 925 |
| 921 | 926 |
| 922 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { | 927 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { |
| 923 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); | 928 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); |
| 924 LiteralScope literal(this); | 929 LiteralScope literal(this); |
| 925 // Scan identifier start character. | 930 // Scan identifier start character. |
| 926 if (c0_ == '\\') { | 931 if (c0_ == '\\') { |
| 927 uc32 c = ScanIdentifierUnicodeEscape(); | 932 uc32 c = ScanIdentifierUnicodeEscape(); |
| 928 // Only allow legal identifier start characters. | 933 // Only allow legal identifier start characters. |
| 929 if (!unicode_cache_->IsIdentifierStart(c)) return Token::ILLEGAL; | 934 if (c < 0 || |
| 935 c == '\\' || // No recursive escapes. | |
| 936 !unicode_cache_->IsIdentifierStart(c)) { | |
| 937 return Token::ILLEGAL; | |
| 938 } | |
| 930 AddLiteralChar(c); | 939 AddLiteralChar(c); |
| 931 return ScanIdentifierSuffix(&literal); | 940 return ScanIdentifierSuffix(&literal); |
| 932 } | 941 } |
| 933 | 942 |
| 934 uc32 first_char = c0_; | 943 uc32 first_char = c0_; |
| 935 Advance(); | 944 Advance(); |
| 936 AddLiteralChar(first_char); | 945 AddLiteralChar(first_char); |
| 937 | 946 |
| 938 // Scan the rest of the identifier characters. | 947 // Scan the rest of the identifier characters. |
| 939 while (unicode_cache_->IsIdentifierPart(c0_)) { | 948 while (unicode_cache_->IsIdentifierPart(c0_)) { |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 959 return Token::IDENTIFIER; | 968 return Token::IDENTIFIER; |
| 960 } | 969 } |
| 961 | 970 |
| 962 | 971 |
| 963 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { | 972 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { |
| 964 // Scan the rest of the identifier characters. | 973 // Scan the rest of the identifier characters. |
| 965 while (unicode_cache_->IsIdentifierPart(c0_)) { | 974 while (unicode_cache_->IsIdentifierPart(c0_)) { |
| 966 if (c0_ == '\\') { | 975 if (c0_ == '\\') { |
| 967 uc32 c = ScanIdentifierUnicodeEscape(); | 976 uc32 c = ScanIdentifierUnicodeEscape(); |
| 968 // Only allow legal identifier part characters. | 977 // Only allow legal identifier part characters. |
| 969 if (!unicode_cache_->IsIdentifierPart(c)) return Token::ILLEGAL; | 978 if (c < 0 || |
| 979 c == '\\' || | |
| 980 !unicode_cache_->IsIdentifierPart(c)) { | |
| 981 return Token::ILLEGAL; | |
| 982 } | |
| 970 AddLiteralChar(c); | 983 AddLiteralChar(c); |
| 971 } else { | 984 } else { |
| 972 AddLiteralChar(c0_); | 985 AddLiteralChar(c0_); |
| 973 Advance(); | 986 Advance(); |
| 974 } | 987 } |
| 975 } | 988 } |
| 976 literal->Complete(); | 989 literal->Complete(); |
| 977 | 990 |
| 978 return Token::IDENTIFIER; | 991 return Token::IDENTIFIER; |
| 979 } | 992 } |
| 980 | 993 |
| 981 | 994 |
| 982 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { | 995 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { |
| 983 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 996 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| 984 bool in_character_class = false; | 997 bool in_character_class = false; |
| 985 | 998 |
| 986 // Previous token is either '/' or '/=', in the second case, the | 999 // Previous token is either '/' or '/=', in the second case, the |
| 987 // pattern starts at =. | 1000 // pattern starts at =. |
| 988 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 1001 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
| 989 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 1002 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
| 990 | 1003 |
| 991 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1004 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 992 // the scanner should pass uninterpreted bodies to the RegExp | 1005 // the scanner should pass uninterpreted bodies to the RegExp |
| 993 // constructor. | 1006 // constructor. |
| 994 LiteralScope literal(this); | 1007 LiteralScope literal(this); |
| 995 if (seen_equal) | 1008 if (seen_equal) { |
| 996 AddLiteralChar('='); | 1009 AddLiteralChar('='); |
| 1010 } | |
| 997 | 1011 |
| 998 while (c0_ != '/' || in_character_class) { | 1012 while (c0_ != '/' || in_character_class) { |
| 999 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; | 1013 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; |
| 1000 if (c0_ == '\\') { // Escape sequence. | 1014 if (c0_ == '\\') { // Escape sequence. |
| 1001 AddLiteralCharAdvance(); | 1015 AddLiteralCharAdvance(); |
| 1002 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; | 1016 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; |
| 1003 AddLiteralCharAdvance(); | 1017 AddLiteralCharAdvance(); |
| 1004 // If the escape allows more characters, i.e., \x??, \u????, or \c?, | 1018 // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
| 1005 // only "safe" characters are allowed (letters, digits, underscore), | 1019 // only "safe" characters are allowed (letters, digits, underscore), |
| 1006 // otherwise the escape isn't valid and the invalid character has | 1020 // otherwise the escape isn't valid and the invalid character has |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 1018 } | 1032 } |
| 1019 } | 1033 } |
| 1020 Advance(); // consume '/' | 1034 Advance(); // consume '/' |
| 1021 | 1035 |
| 1022 literal.Complete(); | 1036 literal.Complete(); |
| 1023 | 1037 |
| 1024 return true; | 1038 return true; |
| 1025 } | 1039 } |
| 1026 | 1040 |
| 1027 | 1041 |
| 1042 bool JavaScriptScanner::ScanLiteralUnicodeEscape() { | |
| 1043 ASSERT(c0_ == '\\'); | |
| 1044 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; | |
| 1045 Advance(); | |
| 1046 int i = 1; | |
| 1047 if (c0_ == 'u') { | |
| 1048 Advance(); | |
| 1049 i++; | |
| 1050 while (i < 6) { | |
| 1051 Advance(); | |
| 1052 if (!IsHexDigit(c0_)) break; | |
| 1053 chars_read[i] = c0_; | |
| 1054 i++; | |
| 1055 } | |
| 1056 } | |
| 1057 if (i < 6) { | |
| 1058 // Incomplete escape. Undo all advances and return false. | |
| 1059 while (i > 0) { | |
| 1060 i--; | |
| 1061 PushBack(chars_read[i]); | |
| 1062 } | |
| 1063 return false; | |
| 1064 } | |
| 1065 // Complete escape. Add all chars to current literal buffer. | |
| 1066 for (int i = 0; i < 6; i++) { | |
| 1067 AddLiteralChar(chars_read[i]); | |
| 1068 return true; | |
|
Rico
2011/08/18 11:43:13
Indention seems wrong
Lasse Reichstein
2011/08/24 13:36:28
Argh, more than wrong. The return has moved itself
| |
| 1069 } | |
| 1070 } | |
| 1071 | |
| 1072 | |
| 1028 bool JavaScriptScanner::ScanRegExpFlags() { | 1073 bool JavaScriptScanner::ScanRegExpFlags() { |
| 1029 // Scan regular expression flags. | 1074 // Scan regular expression flags. |
| 1030 LiteralScope literal(this); | 1075 LiteralScope literal(this); |
| 1031 while (unicode_cache_->IsIdentifierPart(c0_)) { | 1076 while (unicode_cache_->IsIdentifierPart(c0_)) { |
| 1032 if (c0_ == '\\') { | 1077 if (c0_ != '\\') { |
| 1033 uc32 c = ScanIdentifierUnicodeEscape(); | 1078 AddLiteralCharAdvance(); |
| 1034 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | 1079 } else { |
| 1035 // We allow any escaped character, unlike the restriction on | 1080 if (!ScanLiteralUnicodeEscape()) { |
| 1036 // IdentifierPart when it is used to build an IdentifierName. | 1081 break; |
| 1037 AddLiteralChar(c); | |
| 1038 continue; | |
| 1039 } | 1082 } |
| 1040 } | 1083 } |
| 1041 AddLiteralCharAdvance(); | |
| 1042 } | 1084 } |
| 1043 literal.Complete(); | 1085 literal.Complete(); |
| 1044 | 1086 |
| 1045 next_.location.end_pos = source_pos() - 1; | 1087 next_.location.end_pos = source_pos() - 1; |
| 1046 return true; | 1088 return true; |
| 1047 } | 1089 } |
| 1048 | 1090 |
| 1049 } } // namespace v8::internal | 1091 } } // namespace v8::internal |
| OLD | NEW |