| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 17 matching lines...) Expand all Loading... |
| 28 // Features shared by parsing and pre-parsing scanners. | 28 // Features shared by parsing and pre-parsing scanners. |
| 29 | 29 |
| 30 #include <cmath> | 30 #include <cmath> |
| 31 | 31 |
| 32 #include "scanner.h" | 32 #include "scanner.h" |
| 33 | 33 |
| 34 #include "../include/v8stdint.h" | 34 #include "../include/v8stdint.h" |
| 35 #include "char-predicates-inl.h" | 35 #include "char-predicates-inl.h" |
| 36 #include "conversions-inl.h" | 36 #include "conversions-inl.h" |
| 37 #include "list-inl.h" | 37 #include "list-inl.h" |
| 38 #include "v8.h" |
| 39 #include "parser.h" |
| 38 #include "lexer/lexer.h" | 40 #include "lexer/lexer.h" |
| 39 | 41 |
| 40 namespace v8 { | 42 namespace v8 { |
| 41 namespace internal { | 43 namespace internal { |
| 42 | 44 |
| 43 | 45 |
| 44 #ifndef V8_USE_GENERATED_LEXER | 46 #ifndef V8_USE_GENERATED_LEXER |
| 45 // ---------------------------------------------------------------------------- | 47 // ---------------------------------------------------------------------------- |
| 46 // Scanner | 48 // Scanner |
| 47 | 49 |
| (...skipping 855 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 903 KEYWORD_GROUP('v') \ | 905 KEYWORD_GROUP('v') \ |
| 904 KEYWORD("var", Token::VAR) \ | 906 KEYWORD("var", Token::VAR) \ |
| 905 KEYWORD("void", Token::VOID) \ | 907 KEYWORD("void", Token::VOID) \ |
| 906 KEYWORD_GROUP('w') \ | 908 KEYWORD_GROUP('w') \ |
| 907 KEYWORD("while", Token::WHILE) \ | 909 KEYWORD("while", Token::WHILE) \ |
| 908 KEYWORD("with", Token::WITH) \ | 910 KEYWORD("with", Token::WITH) \ |
| 909 KEYWORD_GROUP('y') \ | 911 KEYWORD_GROUP('y') \ |
| 910 KEYWORD("yield", Token::YIELD) | 912 KEYWORD("yield", Token::YIELD) |
| 911 | 913 |
| 912 | 914 |
| 913 static Token::Value KeywordOrIdentifierToken(const char* input, | 915 static Token::Value KeywordOrIdentifierToken(const uint8_t* input, |
| 914 int input_length, | 916 int input_length, |
| 915 bool harmony_scoping, | 917 bool harmony_scoping, |
| 916 bool harmony_modules) { | 918 bool harmony_modules) { |
| 917 ASSERT(input_length >= 1); | 919 ASSERT(input_length >= 1); |
| 918 const int kMinLength = 2; | 920 const int kMinLength = 2; |
| 919 const int kMaxLength = 10; | 921 const int kMaxLength = 10; |
| 920 if (input_length < kMinLength || input_length > kMaxLength) { | 922 if (input_length < kMinLength || input_length > kMaxLength) { |
| 921 return Token::IDENTIFIER; | 923 return Token::IDENTIFIER; |
| 922 } | 924 } |
| 923 switch (input[0]) { | 925 switch (input[0]) { |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 978 Advance(); | 980 Advance(); |
| 979 AddLiteralChar(next_char); | 981 AddLiteralChar(next_char); |
| 980 continue; | 982 continue; |
| 981 } | 983 } |
| 982 // Fallthrough if no longer able to complete keyword. | 984 // Fallthrough if no longer able to complete keyword. |
| 983 return ScanIdentifierSuffix(&literal); | 985 return ScanIdentifierSuffix(&literal); |
| 984 } | 986 } |
| 985 | 987 |
| 986 literal.Complete(); | 988 literal.Complete(); |
| 987 | 989 |
| 988 if (next_.literal_chars->is_ascii()) { | 990 if (next_.literal_chars->is_one_byte()) { |
| 989 Vector<const char> chars = next_.literal_chars->ascii_literal(); | 991 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
| 990 return KeywordOrIdentifierToken(chars.start(), | 992 return KeywordOrIdentifierToken(chars.start(), |
| 991 chars.length(), | 993 chars.length(), |
| 992 harmony_scoping_, | 994 harmony_scoping_, |
| 993 harmony_modules_); | 995 harmony_modules_); |
| 994 } | 996 } |
| 995 | 997 |
| 996 return Token::IDENTIFIER; | 998 return Token::IDENTIFIER; |
| 997 } | 999 } |
| 998 | 1000 |
| 999 | 1001 |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1110 Advance(); | 1112 Advance(); |
| 1111 } | 1113 } |
| 1112 } | 1114 } |
| 1113 literal.Complete(); | 1115 literal.Complete(); |
| 1114 | 1116 |
| 1115 next_.location.end_pos = source_pos() - 1; | 1117 next_.location.end_pos = source_pos() - 1; |
| 1116 return true; | 1118 return true; |
| 1117 } | 1119 } |
| 1118 | 1120 |
| 1119 | 1121 |
| 1122 Handle<String> Scanner::AllocateNextLiteralString(Isolate* isolate, |
| 1123 PretenureFlag tenured) { |
| 1124 if (is_next_literal_one_byte()) { |
| 1125 return isolate->factory()->NewStringFromOneByte( |
| 1126 Vector<const uint8_t>::cast(next_literal_one_byte_string()), tenured); |
| 1127 } else { |
| 1128 return isolate->factory()->NewStringFromTwoByte( |
| 1129 next_literal_two_byte_string(), tenured); |
| 1130 } |
| 1131 } |
| 1132 |
| 1133 |
| 1134 Handle<String> Scanner::AllocateInternalizedString(Isolate* isolate) { |
| 1135 if (is_literal_one_byte()) { |
| 1136 return isolate->factory()->InternalizeOneByteString( |
| 1137 literal_one_byte_string()); |
| 1138 } else { |
| 1139 return isolate->factory()->InternalizeTwoByteString( |
| 1140 literal_two_byte_string()); |
| 1141 } |
| 1142 } |
| 1143 |
| 1144 |
| 1145 double Scanner::DoubleValue() { |
| 1146 ASSERT(is_literal_one_byte()); |
| 1147 return StringToDouble( |
| 1148 unicode_cache_, Vector<const char>::cast(literal_one_byte_string()), |
| 1149 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); |
| 1150 } |
| 1151 |
| 1152 |
| 1153 int Scanner::FindNumber(DuplicateFinder* finder, int value) { |
| 1154 return finder->AddNumber(literal_one_byte_string(), value); |
| 1155 } |
| 1156 |
| 1157 |
| 1158 int Scanner::FindSymbol(DuplicateFinder* finder, int value) { |
| 1159 if (is_literal_one_byte()) { |
| 1160 return finder->AddOneByteSymbol(literal_one_byte_string(), value); |
| 1161 } |
| 1162 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); |
| 1163 } |
| 1164 |
| 1165 |
| 1166 void Scanner::LogSymbol(ParserRecorder* log, int position) { |
| 1167 if (is_literal_one_byte()) { |
| 1168 log->LogOneByteSymbol(position, literal_one_byte_string()); |
| 1169 } else { |
| 1170 log->LogTwoByteSymbol(position, literal_two_byte_string()); |
| 1171 } |
| 1172 } |
| 1173 |
| 1174 |
| 1120 #endif | 1175 #endif |
| 1121 | 1176 |
| 1122 | 1177 |
| 1123 int DuplicateFinder::AddAsciiSymbol(Vector<const char> key, int value) { | 1178 int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) { |
| 1124 return AddSymbol(Vector<const byte>::cast(key), true, value); | 1179 return AddSymbol(key, true, value); |
| 1125 } | 1180 } |
| 1126 | 1181 |
| 1127 | 1182 |
| 1128 int DuplicateFinder::AddUtf16Symbol(Vector<const uint16_t> key, int value) { | 1183 int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) { |
| 1129 return AddSymbol(Vector<const byte>::cast(key), false, value); | 1184 return AddSymbol(Vector<const uint8_t>::cast(key), false, value); |
| 1130 } | 1185 } |
| 1131 | 1186 |
| 1132 | 1187 |
| 1133 int DuplicateFinder::AddSymbol(Vector<const byte> key, | 1188 int DuplicateFinder::AddSymbol(Vector<const uint8_t> key, |
| 1134 bool is_ascii, | 1189 bool is_one_byte, |
| 1135 int value) { | 1190 int value) { |
| 1136 uint32_t hash = Hash(key, is_ascii); | 1191 uint32_t hash = Hash(key, is_one_byte); |
| 1137 byte* encoding = BackupKey(key, is_ascii); | 1192 byte* encoding = BackupKey(key, is_one_byte); |
| 1138 HashMap::Entry* entry = map_.Lookup(encoding, hash, true); | 1193 HashMap::Entry* entry = map_.Lookup(encoding, hash, true); |
| 1139 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value)); | 1194 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value)); |
| 1140 entry->value = | 1195 entry->value = |
| 1141 reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value)); | 1196 reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value)); |
| 1142 return old_value; | 1197 return old_value; |
| 1143 } | 1198 } |
| 1144 | 1199 |
| 1145 | 1200 |
| 1146 int DuplicateFinder::AddNumber(Vector<const char> key, int value) { | 1201 int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) { |
| 1147 ASSERT(key.length() > 0); | 1202 ASSERT(key.length() > 0); |
| 1148 // Quick check for already being in canonical form. | 1203 // Quick check for already being in canonical form. |
| 1149 if (IsNumberCanonical(key)) { | 1204 if (IsNumberCanonical(key)) { |
| 1150 return AddAsciiSymbol(key, value); | 1205 return AddOneByteSymbol(key, value); |
| 1151 } | 1206 } |
| 1152 | 1207 |
| 1153 int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY; | 1208 int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY; |
| 1154 double double_value = StringToDouble(unicode_constants_, key, flags, 0.0); | 1209 double double_value = StringToDouble( |
| 1210 unicode_constants_, Vector<const char>::cast(key), flags, 0.0); |
| 1155 int length; | 1211 int length; |
| 1156 const char* string; | 1212 const char* string; |
| 1157 if (!std::isfinite(double_value)) { | 1213 if (!std::isfinite(double_value)) { |
| 1158 string = "Infinity"; | 1214 string = "Infinity"; |
| 1159 length = 8; // strlen("Infinity"); | 1215 length = 8; // strlen("Infinity"); |
| 1160 } else { | 1216 } else { |
| 1161 string = DoubleToCString(double_value, | 1217 string = DoubleToCString(double_value, |
| 1162 Vector<char>(number_buffer_, kBufferSize)); | 1218 Vector<char>(number_buffer_, kBufferSize)); |
| 1163 length = StrLength(string); | 1219 length = StrLength(string); |
| 1164 } | 1220 } |
| 1165 return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string), | 1221 return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string), |
| 1166 length), true, value); | 1222 length), true, value); |
| 1167 } | 1223 } |
| 1168 | 1224 |
| 1169 | 1225 |
| 1170 bool DuplicateFinder::IsNumberCanonical(Vector<const char> number) { | 1226 bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) { |
| 1171 // Test for a safe approximation of number literals that are already | 1227 // Test for a safe approximation of number literals that are already |
| 1172 // in canonical form: max 15 digits, no leading zeroes, except an | 1228 // in canonical form: max 15 digits, no leading zeroes, except an |
| 1173 // integer part that is a single zero, and no trailing zeros below | 1229 // integer part that is a single zero, and no trailing zeros below |
| 1174 // the decimal point. | 1230 // the decimal point. |
| 1175 int pos = 0; | 1231 int pos = 0; |
| 1176 int length = number.length(); | 1232 int length = number.length(); |
| 1177 if (number.length() > 15) return false; | 1233 if (number.length() > 15) return false; |
| 1178 if (number[pos] == '0') { | 1234 if (number[pos] == '0') { |
| 1179 pos++; | 1235 pos++; |
| 1180 } else { | 1236 } else { |
| 1181 while (pos < length && | 1237 while (pos < length && |
| 1182 static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++; | 1238 static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++; |
| 1183 } | 1239 } |
| 1184 if (length == pos) return true; | 1240 if (length == pos) return true; |
| 1185 if (number[pos] != '.') return false; | 1241 if (number[pos] != '.') return false; |
| 1186 pos++; | 1242 pos++; |
| 1187 bool invalid_last_digit = true; | 1243 bool invalid_last_digit = true; |
| 1188 while (pos < length) { | 1244 while (pos < length) { |
| 1189 byte digit = number[pos] - '0'; | 1245 uint8_t digit = number[pos] - '0'; |
| 1190 if (digit > '9' - '0') return false; | 1246 if (digit > '9' - '0') return false; |
| 1191 invalid_last_digit = (digit == 0); | 1247 invalid_last_digit = (digit == 0); |
| 1192 pos++; | 1248 pos++; |
| 1193 } | 1249 } |
| 1194 return !invalid_last_digit; | 1250 return !invalid_last_digit; |
| 1195 } | 1251 } |
| 1196 | 1252 |
| 1197 | 1253 |
| 1198 uint32_t DuplicateFinder::Hash(Vector<const byte> key, bool is_ascii) { | 1254 uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) { |
| 1199 // Primitive hash function, almost identical to the one used | 1255 // Primitive hash function, almost identical to the one used |
| 1200 // for strings (except that it's seeded by the length and ASCII-ness). | 1256 // for strings (except that it's seeded by the length and ASCII-ness). |
| 1201 int length = key.length(); | 1257 int length = key.length(); |
| 1202 uint32_t hash = (length << 1) | (is_ascii ? 1 : 0) ; | 1258 uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0) ; |
| 1203 for (int i = 0; i < length; i++) { | 1259 for (int i = 0; i < length; i++) { |
| 1204 uint32_t c = key[i]; | 1260 uint32_t c = key[i]; |
| 1205 hash = (hash + c) * 1025; | 1261 hash = (hash + c) * 1025; |
| 1206 hash ^= (hash >> 6); | 1262 hash ^= (hash >> 6); |
| 1207 } | 1263 } |
| 1208 return hash; | 1264 return hash; |
| 1209 } | 1265 } |
| 1210 | 1266 |
| 1211 | 1267 |
| 1212 bool DuplicateFinder::Match(void* first, void* second) { | 1268 bool DuplicateFinder::Match(void* first, void* second) { |
| 1213 // Decode lengths. | 1269 // Decode lengths. |
| 1214 // Length + ASCII-bit is encoded as base 128, most significant heptet first, | 1270 // Length + ASCII-bit is encoded as base 128, most significant heptet first, |
| 1215 // with a 8th bit being non-zero while there are more heptets. | 1271 // with a 8th bit being non-zero while there are more heptets. |
| 1216 // The value encodes the number of bytes following, and whether the original | 1272 // The value encodes the number of bytes following, and whether the original |
| 1217 // was ASCII. | 1273 // was ASCII. |
| 1218 byte* s1 = reinterpret_cast<byte*>(first); | 1274 byte* s1 = reinterpret_cast<byte*>(first); |
| 1219 byte* s2 = reinterpret_cast<byte*>(second); | 1275 byte* s2 = reinterpret_cast<byte*>(second); |
| 1220 uint32_t length_ascii_field = 0; | 1276 uint32_t length_one_byte_field = 0; |
| 1221 byte c1; | 1277 byte c1; |
| 1222 do { | 1278 do { |
| 1223 c1 = *s1; | 1279 c1 = *s1; |
| 1224 if (c1 != *s2) return false; | 1280 if (c1 != *s2) return false; |
| 1225 length_ascii_field = (length_ascii_field << 7) | (c1 & 0x7f); | 1281 length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f); |
| 1226 s1++; | 1282 s1++; |
| 1227 s2++; | 1283 s2++; |
| 1228 } while ((c1 & 0x80) != 0); | 1284 } while ((c1 & 0x80) != 0); |
| 1229 int length = static_cast<int>(length_ascii_field >> 1); | 1285 int length = static_cast<int>(length_one_byte_field >> 1); |
| 1230 return memcmp(s1, s2, length) == 0; | 1286 return memcmp(s1, s2, length) == 0; |
| 1231 } | 1287 } |
| 1232 | 1288 |
| 1233 | 1289 |
| 1234 byte* DuplicateFinder::BackupKey(Vector<const byte> bytes, | 1290 byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes, |
| 1235 bool is_ascii) { | 1291 bool is_one_byte) { |
| 1236 uint32_t ascii_length = (bytes.length() << 1) | (is_ascii ? 1 : 0); | 1292 uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0); |
| 1237 backing_store_.StartSequence(); | 1293 backing_store_.StartSequence(); |
| 1238 // Emit ascii_length as base-128 encoded number, with the 7th bit set | 1294 // Emit one_byte_length as base-128 encoded number, with the 7th bit set |
| 1239 // on the byte of every heptet except the last, least significant, one. | 1295 // on the byte of every heptet except the last, least significant, one. |
| 1240 if (ascii_length >= (1 << 7)) { | 1296 if (one_byte_length >= (1 << 7)) { |
| 1241 if (ascii_length >= (1 << 14)) { | 1297 if (one_byte_length >= (1 << 14)) { |
| 1242 if (ascii_length >= (1 << 21)) { | 1298 if (one_byte_length >= (1 << 21)) { |
| 1243 if (ascii_length >= (1 << 28)) { | 1299 if (one_byte_length >= (1 << 28)) { |
| 1244 backing_store_.Add(static_cast<byte>((ascii_length >> 28) | 0x80)); | 1300 backing_store_.Add( |
| 1301 static_cast<uint8_t>((one_byte_length >> 28) | 0x80)); |
| 1245 } | 1302 } |
| 1246 backing_store_.Add(static_cast<byte>((ascii_length >> 21) | 0x80u)); | 1303 backing_store_.Add( |
| 1304 static_cast<uint8_t>((one_byte_length >> 21) | 0x80u)); |
| 1247 } | 1305 } |
| 1248 backing_store_.Add(static_cast<byte>((ascii_length >> 14) | 0x80u)); | 1306 backing_store_.Add( |
| 1307 static_cast<uint8_t>((one_byte_length >> 14) | 0x80u)); |
| 1249 } | 1308 } |
| 1250 backing_store_.Add(static_cast<byte>((ascii_length >> 7) | 0x80u)); | 1309 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
| 1251 } | 1310 } |
| 1252 backing_store_.Add(static_cast<byte>(ascii_length & 0x7f)); | 1311 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
| 1253 | 1312 |
| 1254 backing_store_.AddBlock(bytes); | 1313 backing_store_.AddBlock(bytes); |
| 1255 return backing_store_.EndSequence().start(); | 1314 return backing_store_.EndSequence().start(); |
| 1256 } | 1315 } |
| 1257 | 1316 |
| 1258 } } // namespace v8::internal | 1317 } } // namespace v8::internal |
| OLD | NEW |