| OLD | NEW |
| 1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 27 matching lines...) Expand all Loading... |
| 38 #include "ast.h" | 38 #include "ast.h" |
| 39 #include "char-predicates-inl.h" | 39 #include "char-predicates-inl.h" |
| 40 #include "messages.h" | 40 #include "messages.h" |
| 41 #include "platform.h" | 41 #include "platform.h" |
| 42 #include "runtime.h" | 42 #include "runtime.h" |
| 43 #include "scanner-character-streams.h" | 43 #include "scanner-character-streams.h" |
| 44 #include "scopeinfo.h" | 44 #include "scopeinfo.h" |
| 45 #include "string-stream.h" | 45 #include "string-stream.h" |
| 46 #include "scanner.h" | 46 #include "scanner.h" |
| 47 | 47 |
| 48 #include "experimental-scanner.h" | |
| 49 | |
| 50 using namespace v8::internal; | 48 using namespace v8::internal; |
| 51 | 49 |
| 52 byte* ReadFile(const char* name, const byte** end, int repeat, | 50 byte* ReadFile(const char* name, const byte** end, int repeat, |
| 53 bool convert_to_utf16) { | 51 bool convert_to_utf16) { |
| 54 FILE* file = fopen(name, "rb"); | 52 FILE* file = fopen(name, "rb"); |
| 55 if (file == NULL) return NULL; | 53 if (file == NULL) return NULL; |
| 56 | 54 |
| 57 fseek(file, 0, SEEK_END); | 55 fseek(file, 0, SEEK_END); |
| 58 int file_size = ftell(file); | 56 int file_size = ftell(file); |
| 59 rewind(file); | 57 rewind(file); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 72 } | 70 } |
| 73 *end = &chars[size]; | 71 *end = &chars[size]; |
| 74 | 72 |
| 75 if (!convert_to_utf16) return chars; | 73 if (!convert_to_utf16) return chars; |
| 76 | 74 |
| 77 // Length of new_chars is not strictly accurate, but should be enough. | 75 // Length of new_chars is not strictly accurate, but should be enough. |
| 78 uint16_t* new_chars = new uint16_t[size]; | 76 uint16_t* new_chars = new uint16_t[size]; |
| 79 { | 77 { |
| 80 Utf8ToUtf16CharacterStream stream(chars, size); | 78 Utf8ToUtf16CharacterStream stream(chars, size); |
| 81 uint16_t* cursor = new_chars; | 79 uint16_t* cursor = new_chars; |
| 82 uc32 c; | 80 // uc32 c; |
| 83 // The 32-bit char type is probably only so that we can have -1 as a return | 81 // The 32-bit char type is probably only so that we can have -1 as a return |
| 84 // value. If the char is not -1, it should fit into 16 bits. | 82 // value. If the char is not -1, it should fit into 16 bits. |
| 85 while ((c = stream.Advance()) != -1) { | 83 CHECK(false); |
| 86 *cursor++ = c; | 84 // while ((c = stream.Advance()) != -1) { |
| 87 } | 85 // *cursor++ = c; |
| 86 // } |
| 88 *end = reinterpret_cast<byte*>(cursor); | 87 *end = reinterpret_cast<byte*>(cursor); |
| 89 } | 88 } |
| 90 delete[] chars; | 89 delete[] chars; |
| 91 return reinterpret_cast<byte*>(new_chars); | 90 return reinterpret_cast<byte*>(new_chars); |
| 92 } | 91 } |
| 93 | 92 |
| 94 | 93 |
| 95 enum Encoding { | 94 enum Encoding { |
| 96 LATIN1, | 95 LATIN1, |
| 97 UTF8, | 96 UTF8, |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 129 }; | 128 }; |
| 130 | 129 |
| 131 class BaselineScanner { | 130 class BaselineScanner { |
| 132 public: | 131 public: |
| 133 BaselineScanner(const byte* source, | 132 BaselineScanner(const byte* source, |
| 134 const byte* source_end, | 133 const byte* source_end, |
| 135 Isolate* isolate, | 134 Isolate* isolate, |
| 136 ElapsedTimer* timer, | 135 ElapsedTimer* timer, |
| 137 const LexerShellSettings& settings) | 136 const LexerShellSettings& settings) |
| 138 : source_(source), stream_(NULL) { | 137 : source_(source), stream_(NULL) { |
| 139 unicode_cache_ = new UnicodeCache(); | 138 scanner_ = new Scanner(isolate->unicode_cache()); |
| 140 scanner_ = new Scanner(unicode_cache_); | |
| 141 scanner_->SetHarmonyNumericLiterals(settings.harmony_numeric_literals); | 139 scanner_->SetHarmonyNumericLiterals(settings.harmony_numeric_literals); |
| 142 scanner_->SetHarmonyModules(settings.harmony_modules); | 140 scanner_->SetHarmonyModules(settings.harmony_modules); |
| 143 scanner_->SetHarmonyScoping(settings.harmony_scoping); | 141 scanner_->SetHarmonyScoping(settings.harmony_scoping); |
| 144 switch (settings.encoding) { | 142 switch (settings.encoding) { |
| 145 case UTF8: | 143 case UTF8: |
| 146 case UTF8TO16: | 144 case UTF8TO16: |
| 147 stream_ = new Utf8ToUtf16CharacterStream(source_, source_end - source_); | 145 stream_ = new Utf8ToUtf16CharacterStream(source_, source_end - source_); |
| 148 break; | 146 break; |
| 149 case UTF16: { | 147 case UTF16: { |
| 150 Handle<String> result = isolate->factory()->NewStringFromTwoByte( | 148 Handle<String> result = isolate->factory()->NewStringFromTwoByte( |
| (...skipping 20 matching lines...) Expand all Loading... |
| 171 delete scanner_; | 169 delete scanner_; |
| 172 delete stream_; | 170 delete stream_; |
| 173 delete unicode_cache_; | 171 delete unicode_cache_; |
| 174 } | 172 } |
| 175 | 173 |
| 176 Scanner* scanner_; | 174 Scanner* scanner_; |
| 177 | 175 |
| 178 private: | 176 private: |
| 179 UnicodeCache* unicode_cache_; | 177 UnicodeCache* unicode_cache_; |
| 180 const byte* source_; | 178 const byte* source_; |
| 181 BufferedUtf16CharacterStream* stream_; | 179 Utf16CharacterStream* stream_; |
| 182 }; | 180 }; |
| 183 | 181 |
| 184 | 182 |
| 185 struct TokenWithLocation { | 183 struct TokenWithLocation { |
| 186 Token::Value value; | 184 Token::Value value; |
| 187 size_t beg; | 185 size_t beg; |
| 188 size_t end; | 186 size_t end; |
| 189 std::vector<int> literal; | 187 std::vector<int> literal; |
| 190 bool is_ascii; | 188 bool is_ascii; |
| 191 // The location of the latest octal position when the token was seen. | 189 // The location of the latest octal position when the token was seen. |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 275 scanner.scanner_->literal_ascii_string(); | 273 scanner.scanner_->literal_ascii_string(); |
| 276 } else { | 274 } else { |
| 277 scanner.scanner_->literal_utf16_string(); | 275 scanner.scanner_->literal_utf16_string(); |
| 278 } | 276 } |
| 279 } | 277 } |
| 280 } while (token != Token::EOS); | 278 } while (token != Token::EOS); |
| 281 return timer.Elapsed(); | 279 return timer.Elapsed(); |
| 282 } | 280 } |
| 283 | 281 |
| 284 | 282 |
| 285 template<typename Char> | |
| 286 TimeDelta RunExperimentalScanner(Handle<String> source, | |
| 287 Isolate* isolate, | |
| 288 std::vector<TokenWithLocation>* tokens, | |
| 289 LexerShellSettings settings) { | |
| 290 ElapsedTimer timer; | |
| 291 ExperimentalScanner<Char> scanner(source, isolate); | |
| 292 scanner.SetHarmonyNumericLiterals(settings.harmony_numeric_literals); | |
| 293 scanner.SetHarmonyModules(settings.harmony_modules); | |
| 294 scanner.SetHarmonyScoping(settings.harmony_scoping); | |
| 295 | |
| 296 timer.Start(); | |
| 297 scanner.Init(); | |
| 298 Token::Value token; | |
| 299 do { | |
| 300 token = scanner.Next(); | |
| 301 if (settings.dump_tokens) { | |
| 302 tokens->push_back(GetTokenWithLocation(&scanner, token)); | |
| 303 } else if (HasLiteral(token)) { | |
| 304 if (scanner.is_literal_ascii()) { | |
| 305 scanner.literal_ascii_string(); | |
| 306 } else { | |
| 307 scanner.literal_utf16_string(); | |
| 308 } | |
| 309 } | |
| 310 } while (token != Token::EOS); | |
| 311 return timer.Elapsed(); | |
| 312 } | |
| 313 | |
| 314 | |
| 315 void PrintTokens(const char* name, | 283 void PrintTokens(const char* name, |
| 316 const std::vector<TokenWithLocation>& tokens) { | 284 const std::vector<TokenWithLocation>& tokens) { |
| 317 printf("No of tokens: %d\n", | 285 printf("No of tokens: %d\n", |
| 318 static_cast<int>(tokens.size())); | 286 static_cast<int>(tokens.size())); |
| 319 printf("%s:\n", name); | 287 printf("%s:\n", name); |
| 320 for (size_t i = 0; i < tokens.size(); ++i) { | 288 for (size_t i = 0; i < tokens.size(); ++i) { |
| 321 tokens[i].Print("=>"); | 289 tokens[i].Print("=>"); |
| 322 } | 290 } |
| 323 } | 291 } |
| 324 | 292 |
| (...skipping 18 matching lines...) Expand all Loading... |
| 343 const byte* buffer = ReadFile(fname, &buffer_end, settings.repeat, false); | 311 const byte* buffer = ReadFile(fname, &buffer_end, settings.repeat, false); |
| 344 if (truncate_by > buffer_end - buffer) { | 312 if (truncate_by > buffer_end - buffer) { |
| 345 *can_truncate = false; | 313 *can_truncate = false; |
| 346 } else { | 314 } else { |
| 347 buffer_end -= truncate_by; | 315 buffer_end -= truncate_by; |
| 348 baseline_time = RunBaselineScanner( | 316 baseline_time = RunBaselineScanner( |
| 349 buffer, buffer_end, isolate, &baseline_tokens, settings); | 317 buffer, buffer_end, isolate, &baseline_tokens, settings); |
| 350 } | 318 } |
| 351 delete[] buffer; | 319 delete[] buffer; |
| 352 } | 320 } |
| 353 if (run_experimental) { | |
| 354 Handle<String> source; | |
| 355 const byte* buffer_end = 0; | |
| 356 const byte* buffer = ReadFile(fname, &buffer_end, settings.repeat, | |
| 357 settings.encoding == UTF8TO16); | |
| 358 if (truncate_by > buffer_end - buffer) { | |
| 359 *can_truncate = false; | |
| 360 } else { | |
| 361 buffer_end -= truncate_by; | |
| 362 switch (settings.encoding) { | |
| 363 case UTF8: | |
| 364 case LATIN1: | |
| 365 source = isolate->factory()->NewStringFromAscii( | |
| 366 Vector<const char>(reinterpret_cast<const char*>(buffer), | |
| 367 buffer_end - buffer)); | |
| 368 experimental_time = RunExperimentalScanner<uint8_t>( | |
| 369 source, isolate, &experimental_tokens, settings); | |
| 370 break; | |
| 371 case UTF16: | |
| 372 case UTF8TO16: { | |
| 373 const uc16* buffer_16 = reinterpret_cast<const uc16*>(buffer); | |
| 374 const uc16* buffer_end_16 = reinterpret_cast<const uc16*>(buffer_end); | |
| 375 source = isolate->factory()->NewStringFromTwoByte( | |
| 376 Vector<const uc16>(buffer_16, buffer_end_16 - buffer_16)); | |
| 377 // If the string was just an expaneded one byte string, V8 detects it | |
| 378 // and doesn't store it as two byte. | |
| 379 if (!source->IsTwoByteRepresentation()) { | |
| 380 experimental_time = RunExperimentalScanner<uint8_t>( | |
| 381 source, isolate, &experimental_tokens, settings); | |
| 382 } else { | |
| 383 experimental_time = RunExperimentalScanner<uint16_t>( | |
| 384 source, isolate, &experimental_tokens, settings); | |
| 385 } | |
| 386 break; | |
| 387 } | |
| 388 default: | |
| 389 printf("Encoding not supported by the experimental scanner\n"); | |
| 390 exit(1); | |
| 391 break; | |
| 392 } | |
| 393 } | |
| 394 delete[] buffer; | |
| 395 } | |
| 396 if (print_tokens && !run_experimental) { | 321 if (print_tokens && !run_experimental) { |
| 397 PrintTokens("Baseline", baseline_tokens); | 322 PrintTokens("Baseline", baseline_tokens); |
| 398 } | 323 } |
| 399 if (print_tokens && !run_baseline) { | 324 if (print_tokens && !run_baseline) { |
| 400 PrintTokens("Experimental", experimental_tokens); | 325 PrintTokens("Experimental", experimental_tokens); |
| 401 } | 326 } |
| 402 if ((print_tokens || settings.check_tokens) && | 327 if ((print_tokens || settings.check_tokens) && |
| 403 run_baseline && run_experimental) { | 328 run_baseline && run_experimental) { |
| 404 if (print_tokens) { | 329 if (print_tokens) { |
| 405 printf("No of tokens in Baseline: %d\n", | 330 printf("No of tokens in Baseline: %d\n", |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 445 settings.encoding = LATIN1; | 370 settings.encoding = LATIN1; |
| 446 } else if (strcmp(argv[i], "--utf8") == 0) { | 371 } else if (strcmp(argv[i], "--utf8") == 0) { |
| 447 settings.encoding = UTF8; | 372 settings.encoding = UTF8; |
| 448 } else if (strcmp(argv[i], "--utf16") == 0) { | 373 } else if (strcmp(argv[i], "--utf16") == 0) { |
| 449 settings.encoding = UTF16; | 374 settings.encoding = UTF16; |
| 450 } else if (strcmp(argv[i], "--utf8to16") == 0) { | 375 } else if (strcmp(argv[i], "--utf8to16") == 0) { |
| 451 settings.encoding = UTF8TO16; | 376 settings.encoding = UTF8TO16; |
| 452 } else if (strcmp(argv[i], "--print-tokens") == 0) { | 377 } else if (strcmp(argv[i], "--print-tokens") == 0) { |
| 453 settings.print_tokens = true; | 378 settings.print_tokens = true; |
| 454 } else if (strcmp(argv[i], "--no-baseline") == 0) { | 379 } else if (strcmp(argv[i], "--no-baseline") == 0) { |
| 455 settings.run_baseline = false; | |
| 456 } else if (strcmp(argv[i], "--no-experimental") == 0) { | 380 } else if (strcmp(argv[i], "--no-experimental") == 0) { |
| 457 settings.run_experimental = false; | |
| 458 } else if (strcmp(argv[i], "--no-check") == 0) { | 381 } else if (strcmp(argv[i], "--no-check") == 0) { |
| 459 settings.check_tokens = false; | 382 settings.check_tokens = false; |
| 460 } else if (strcmp(argv[i], "--break-after-illegal") == 0) { | 383 } else if (strcmp(argv[i], "--break-after-illegal") == 0) { |
| 461 settings.break_after_illegal = true; | 384 settings.break_after_illegal = true; |
| 462 } else if (strcmp(argv[i], "--use-harmony") == 0) { | 385 } else if (strcmp(argv[i], "--use-harmony") == 0) { |
| 463 settings.harmony_numeric_literals = true; | 386 settings.harmony_numeric_literals = true; |
| 464 settings.harmony_modules = true; | 387 settings.harmony_modules = true; |
| 465 settings.harmony_scoping = true; | 388 settings.harmony_scoping = true; |
| 466 } else if (strncmp(argv[i], "--benchmark=", 12) == 0) { | 389 } else if (strncmp(argv[i], "--benchmark=", 12) == 0) { |
| 467 benchmark = std::string(argv[i]).substr(12); | 390 benchmark = std::string(argv[i]).substr(12); |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 511 if (settings.run_experimental) { | 434 if (settings.run_experimental) { |
| 512 if (benchmark.empty()) benchmark = "Experimental"; | 435 if (benchmark.empty()) benchmark = "Experimental"; |
| 513 printf("%s(RunTime): %.f ms\n", benchmark.c_str(), | 436 printf("%s(RunTime): %.f ms\n", benchmark.c_str(), |
| 514 experimental_total); | 437 experimental_total); |
| 515 } | 438 } |
| 516 } | 439 } |
| 517 } | 440 } |
| 518 v8::V8::Dispose(); | 441 v8::V8::Dispose(); |
| 519 return 0; | 442 return 0; |
| 520 } | 443 } |
| OLD | NEW |