Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(731)

Side by Side Diff: src/scanner.cc

Issue 5026005: Move static scanner fields to scanner-base.h (Closed)
Patch Set: Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner.h ('k') | src/scanner-base.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 12 matching lines...) Expand all
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 #include "v8.h" 28 #include "v8.h"
29 29
30 #include "ast.h" 30 #include "ast.h"
31 #include "handles.h" 31 #include "handles.h"
32 #include "scanner.h" 32 #include "scanner.h"
33 #include "unicode-inl.h"
33 34
34 namespace v8 { 35 namespace v8 {
35 namespace internal { 36 namespace internal {
36 37
37 // ---------------------------------------------------------------------------- 38 // ----------------------------------------------------------------------------
38 // Character predicates
39
40
41 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;
42 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
45
46
47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
48
49
50 // ----------------------------------------------------------------------------
51 // UTF8Buffer 39 // UTF8Buffer
52 40
53 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { } 41 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }
54 42
55 43
56 UTF8Buffer::~UTF8Buffer() {} 44 UTF8Buffer::~UTF8Buffer() {}
57 45
58 46
59 void UTF8Buffer::AddCharSlow(uc32 c) { 47 void UTF8Buffer::AddCharSlow(uc32 c) {
60 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); 48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);
(...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after
351 return source_pos() != start_position; 339 return source_pos() != start_position;
352 } 340 }
353 341
354 342
355 bool Scanner::SkipJavaScriptWhiteSpace() { 343 bool Scanner::SkipJavaScriptWhiteSpace() {
356 int start_position = source_pos(); 344 int start_position = source_pos();
357 345
358 while (true) { 346 while (true) {
359 // We treat byte-order marks (BOMs) as whitespace for better 347 // We treat byte-order marks (BOMs) as whitespace for better
360 // compatibility with Spidermonkey and other JavaScript engines. 348 // compatibility with Spidermonkey and other JavaScript engines.
361 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { 349 while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
362 // IsWhiteSpace() includes line terminators! 350 // IsWhiteSpace() includes line terminators!
363 if (kIsLineTerminator.get(c0_)) { 351 if (ScannerConstants::kIsLineTerminator.get(c0_)) {
364 // Ignore line terminators, but remember them. This is necessary 352 // Ignore line terminators, but remember them. This is necessary
365 // for automatic semicolon insertion. 353 // for automatic semicolon insertion.
366 has_line_terminator_before_next_ = true; 354 has_line_terminator_before_next_ = true;
367 } 355 }
368 Advance(); 356 Advance();
369 } 357 }
370 358
371 // If there is an HTML comment end '-->' at the beginning of a 359 // If there is an HTML comment end '-->' at the beginning of a
372 // line (with only whitespace in front of it), we treat the rest 360 // line (with only whitespace in front of it), we treat the rest
373 // of the line as a comment. This is in line with the way 361 // of the line as a comment. This is in line with the way
(...skipping 19 matching lines...) Expand all
393 381
394 382
395 Token::Value Scanner::SkipSingleLineComment() { 383 Token::Value Scanner::SkipSingleLineComment() {
396 Advance(); 384 Advance();
397 385
398 // The line terminator at the end of the line is not considered 386 // The line terminator at the end of the line is not considered
399 // to be part of the single-line comment; it is recognized 387 // to be part of the single-line comment; it is recognized
400 // separately by the lexical grammar and becomes part of the 388 // separately by the lexical grammar and becomes part of the
401 // stream of input elements for the syntactic grammar (see 389 // stream of input elements for the syntactic grammar (see
402 // ECMA-262, section 7.4, page 12). 390 // ECMA-262, section 7.4, page 12).
403 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { 391 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
404 Advance(); 392 Advance();
405 } 393 }
406 394
407 return Token::WHITESPACE; 395 return Token::WHITESPACE;
408 } 396 }
409 397
410 398
411 Token::Value Scanner::SkipMultiLineComment() { 399 Token::Value Scanner::SkipMultiLineComment() {
412 ASSERT(c0_ == '*'); 400 ASSERT(c0_ == '*');
413 Advance(); 401 Advance();
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after
624 612
625 613
626 Token::Value Scanner::ScanJsonIdentifier(const char* text, 614 Token::Value Scanner::ScanJsonIdentifier(const char* text,
627 Token::Value token) { 615 Token::Value token) {
628 LiteralScope literal(this); 616 LiteralScope literal(this);
629 while (*text != '\0') { 617 while (*text != '\0') {
630 if (c0_ != *text) return Token::ILLEGAL; 618 if (c0_ != *text) return Token::ILLEGAL;
631 Advance(); 619 Advance();
632 text++; 620 text++;
633 } 621 }
634 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; 622 if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
635 literal.Complete(); 623 literal.Complete();
636 return token; 624 return token;
637 } 625 }
638 626
639 627
640 void Scanner::ScanJavaScript() { 628 void Scanner::ScanJavaScript() {
641 next_.literal_chars = Vector<const char>(); 629 next_.literal_chars = Vector<const char>();
642 Token::Value token; 630 Token::Value token;
643 do { 631 do {
644 // Remember the position of the next token 632 // Remember the position of the next token
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after
847 835
848 case '?': 836 case '?':
849 token = Select(Token::CONDITIONAL); 837 token = Select(Token::CONDITIONAL);
850 break; 838 break;
851 839
852 case '~': 840 case '~':
853 token = Select(Token::BIT_NOT); 841 token = Select(Token::BIT_NOT);
854 break; 842 break;
855 843
856 default: 844 default:
857 if (kIsIdentifierStart.get(c0_)) { 845 if (ScannerConstants::kIsIdentifierStart.get(c0_)) {
858 token = ScanIdentifier(); 846 token = ScanIdentifier();
859 } else if (IsDecimalDigit(c0_)) { 847 } else if (IsDecimalDigit(c0_)) {
860 token = ScanNumber(false); 848 token = ScanNumber(false);
861 } else if (SkipWhiteSpace()) { 849 } else if (SkipWhiteSpace()) {
862 token = Token::WHITESPACE; 850 token = Token::WHITESPACE;
863 } else if (c0_ < 0) { 851 } else if (c0_ < 0) {
864 token = Token::EOS; 852 token = Token::EOS;
865 } else { 853 } else {
866 token = Select(Token::ILLEGAL); 854 token = Select(Token::ILLEGAL);
867 } 855 }
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
930 } 918 }
931 return x; 919 return x;
932 } 920 }
933 921
934 922
935 void Scanner::ScanEscape() { 923 void Scanner::ScanEscape() {
936 uc32 c = c0_; 924 uc32 c = c0_;
937 Advance(); 925 Advance();
938 926
939 // Skip escaped newlines. 927 // Skip escaped newlines.
940 if (kIsLineTerminator.get(c)) { 928 if (ScannerConstants::kIsLineTerminator.get(c)) {
941 // Allow CR+LF newlines in multiline string literals. 929 // Allow CR+LF newlines in multiline string literals.
942 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); 930 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
943 // Allow LF+CR newlines in multiline string literals. 931 // Allow LF+CR newlines in multiline string literals.
944 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); 932 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
945 return; 933 return;
946 } 934 }
947 935
948 switch (c) { 936 switch (c) {
949 case '\'': // fall through 937 case '\'': // fall through
950 case '"' : // fall through 938 case '"' : // fall through
(...skipping 21 matching lines...) Expand all
972 // as non-escaped characters by JS VMs. 960 // as non-escaped characters by JS VMs.
973 AddChar(c); 961 AddChar(c);
974 } 962 }
975 963
976 964
977 Token::Value Scanner::ScanString() { 965 Token::Value Scanner::ScanString() {
978 uc32 quote = c0_; 966 uc32 quote = c0_;
979 Advance(); // consume quote 967 Advance(); // consume quote
980 968
981 LiteralScope literal(this); 969 LiteralScope literal(this);
982 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { 970 while (c0_ != quote && c0_ >= 0
971 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
983 uc32 c = c0_; 972 uc32 c = c0_;
984 Advance(); 973 Advance();
985 if (c == '\\') { 974 if (c == '\\') {
986 if (c0_ < 0) return Token::ILLEGAL; 975 if (c0_ < 0) return Token::ILLEGAL;
987 ScanEscape(); 976 ScanEscape();
988 } else { 977 } else {
989 AddChar(c); 978 AddChar(c);
990 } 979 }
991 } 980 }
992 if (c0_ != quote) return Token::ILLEGAL; 981 if (c0_ != quote) return Token::ILLEGAL;
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
1085 // we must have at least one decimal digit after 'e'/'E' 1074 // we must have at least one decimal digit after 'e'/'E'
1086 return Token::ILLEGAL; 1075 return Token::ILLEGAL;
1087 } 1076 }
1088 ScanDecimalDigits(); 1077 ScanDecimalDigits();
1089 } 1078 }
1090 1079
1091 // The source character immediately following a numeric literal must 1080 // The source character immediately following a numeric literal must
1092 // not be an identifier start or a decimal digit; see ECMA-262 1081 // not be an identifier start or a decimal digit; see ECMA-262
1093 // section 7.8.3, page 17 (note that we read only one decimal digit 1082 // section 7.8.3, page 17 (note that we read only one decimal digit
1094 // if the value is 0). 1083 // if the value is 0).
1095 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) 1084 if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_))
1096 return Token::ILLEGAL; 1085 return Token::ILLEGAL;
1097 1086
1098 literal.Complete(); 1087 literal.Complete();
1099 1088
1100 return Token::NUMBER; 1089 return Token::NUMBER;
1101 } 1090 }
1102 1091
1103 1092
1104 uc32 Scanner::ScanIdentifierUnicodeEscape() { 1093 uc32 Scanner::ScanIdentifierUnicodeEscape() {
1105 Advance(); 1094 Advance();
1106 if (c0_ != 'u') return unibrow::Utf8::kBadChar; 1095 if (c0_ != 'u') return unibrow::Utf8::kBadChar;
1107 Advance(); 1096 Advance();
1108 uc32 c = ScanHexEscape('u', 4); 1097 uc32 c = ScanHexEscape('u', 4);
1109 // We do not allow a unicode escape sequence to start another 1098 // We do not allow a unicode escape sequence to start another
1110 // unicode escape sequence. 1099 // unicode escape sequence.
1111 if (c == '\\') return unibrow::Utf8::kBadChar; 1100 if (c == '\\') return unibrow::Utf8::kBadChar;
1112 return c; 1101 return c;
1113 } 1102 }
1114 1103
1115 1104
1116 Token::Value Scanner::ScanIdentifier() { 1105 Token::Value Scanner::ScanIdentifier() {
1117 ASSERT(kIsIdentifierStart.get(c0_)); 1106 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
1118 1107
1119 LiteralScope literal(this); 1108 LiteralScope literal(this);
1120 KeywordMatcher keyword_match; 1109 KeywordMatcher keyword_match;
1121 1110
1122 // Scan identifier start character. 1111 // Scan identifier start character.
1123 if (c0_ == '\\') { 1112 if (c0_ == '\\') {
1124 uc32 c = ScanIdentifierUnicodeEscape(); 1113 uc32 c = ScanIdentifierUnicodeEscape();
1125 // Only allow legal identifier start characters. 1114 // Only allow legal identifier start characters.
1126 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; 1115 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
1127 AddChar(c); 1116 AddChar(c);
1128 keyword_match.Fail(); 1117 keyword_match.Fail();
1129 } else { 1118 } else {
1130 AddChar(c0_); 1119 AddChar(c0_);
1131 keyword_match.AddChar(c0_); 1120 keyword_match.AddChar(c0_);
1132 Advance(); 1121 Advance();
1133 } 1122 }
1134 1123
1135 // Scan the rest of the identifier characters. 1124 // Scan the rest of the identifier characters.
1136 while (kIsIdentifierPart.get(c0_)) { 1125 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
1137 if (c0_ == '\\') { 1126 if (c0_ == '\\') {
1138 uc32 c = ScanIdentifierUnicodeEscape(); 1127 uc32 c = ScanIdentifierUnicodeEscape();
1139 // Only allow legal identifier part characters. 1128 // Only allow legal identifier part characters.
1140 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; 1129 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
1141 AddChar(c); 1130 AddChar(c);
1142 keyword_match.Fail(); 1131 keyword_match.Fail();
1143 } else { 1132 } else {
1144 AddChar(c0_); 1133 AddChar(c0_);
1145 keyword_match.AddChar(c0_); 1134 keyword_match.AddChar(c0_);
1146 Advance(); 1135 Advance();
1147 } 1136 }
1148 } 1137 }
1149 literal.Complete(); 1138 literal.Complete();
1150 1139
1151 return keyword_match.token(); 1140 return keyword_match.token();
1152 } 1141 }
1153 1142
1154 1143
1155 1144
1156 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
1157 // Checks whether the buffer contains an identifier (no escape).
1158 if (!buffer->has_more()) return false;
1159 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;
1160 while (buffer->has_more()) {
1161 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;
1162 }
1163 return true;
1164 }
1165
1166
1167 bool Scanner::ScanRegExpPattern(bool seen_equal) { 1145 bool Scanner::ScanRegExpPattern(bool seen_equal) {
1168 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 1146 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1169 bool in_character_class = false; 1147 bool in_character_class = false;
1170 1148
1171 // Previous token is either '/' or '/=', in the second case, the 1149 // Previous token is either '/' or '/=', in the second case, the
1172 // pattern starts at =. 1150 // pattern starts at =.
1173 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); 1151 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1174 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); 1152 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1175 1153
1176 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 1154 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1177 // the scanner should pass uninterpreted bodies to the RegExp 1155 // the scanner should pass uninterpreted bodies to the RegExp
1178 // constructor. 1156 // constructor.
1179 LiteralScope literal(this); 1157 LiteralScope literal(this);
1180 if (seen_equal) 1158 if (seen_equal)
1181 AddChar('='); 1159 AddChar('=');
1182 1160
1183 while (c0_ != '/' || in_character_class) { 1161 while (c0_ != '/' || in_character_class) {
1184 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; 1162 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
1185 if (c0_ == '\\') { // escaped character 1163 if (c0_ == '\\') { // escaped character
1186 AddCharAdvance(); 1164 AddCharAdvance();
1187 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; 1165 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
1188 AddCharAdvance(); 1166 AddCharAdvance();
1189 } else { // unescaped character 1167 } else { // unescaped character
1190 if (c0_ == '[') in_character_class = true; 1168 if (c0_ == '[') in_character_class = true;
1191 if (c0_ == ']') in_character_class = false; 1169 if (c0_ == ']') in_character_class = false;
1192 AddCharAdvance(); 1170 AddCharAdvance();
1193 } 1171 }
1194 } 1172 }
1195 Advance(); // consume '/' 1173 Advance(); // consume '/'
1196 1174
1197 literal.Complete(); 1175 literal.Complete();
1198 1176
1199 return true; 1177 return true;
1200 } 1178 }
1201 1179
1202 bool Scanner::ScanRegExpFlags() { 1180 bool Scanner::ScanRegExpFlags() {
1203 // Scan regular expression flags. 1181 // Scan regular expression flags.
1204 LiteralScope literal(this); 1182 LiteralScope literal(this);
1205 while (kIsIdentifierPart.get(c0_)) { 1183 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
1206 if (c0_ == '\\') { 1184 if (c0_ == '\\') {
1207 uc32 c = ScanIdentifierUnicodeEscape(); 1185 uc32 c = ScanIdentifierUnicodeEscape();
1208 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { 1186 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
1209 // We allow any escaped character, unlike the restriction on 1187 // We allow any escaped character, unlike the restriction on
1210 // IdentifierPart when it is used to build an IdentifierName. 1188 // IdentifierPart when it is used to build an IdentifierName.
1211 AddChar(c); 1189 AddChar(c);
1212 continue; 1190 continue;
1213 } 1191 }
1214 } 1192 }
1215 AddCharAdvance(); 1193 AddCharAdvance();
1216 } 1194 }
1217 literal.Complete(); 1195 literal.Complete();
1218 1196
1219 next_.location.end_pos = source_pos() - 1; 1197 next_.location.end_pos = source_pos() - 1;
1220 return true; 1198 return true;
1221 } 1199 }
1222 1200
1223 } } // namespace v8::internal 1201 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner.h ('k') | src/scanner-base.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698