| OLD | NEW |
| 1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 73 print_tokens_for_compare(false), | 73 print_tokens_for_compare(false), |
| 74 break_after_illegal(false), | 74 break_after_illegal(false), |
| 75 eos_test(false), | 75 eos_test(false), |
| 76 repeat(1), | 76 repeat(1), |
| 77 harmony_numeric_literals(false), | 77 harmony_numeric_literals(false), |
| 78 harmony_modules(false), | 78 harmony_modules(false), |
| 79 harmony_scoping(false) {} | 79 harmony_scoping(false) {} |
| 80 }; | 80 }; |
| 81 | 81 |
| 82 | 82 |
| 83 struct FileData { |
| 84 const char* file_name; |
| 85 unsigned length_in_bytes; |
| 86 Encoding encoding; |
| 87 const uint16_t* data; |
| 88 }; |
| 89 |
| 90 |
| 83 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in, | 91 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in, |
| 84 unsigned* length, | 92 unsigned* length_in_bytes, |
| 85 bool* is_one_byte) { | 93 bool* is_one_byte) { |
| 86 const unsigned file_size = *length; | 94 const unsigned file_size = *length_in_bytes; |
| 87 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(data_in); | 95 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(data_in); |
| 88 const uint32_t kMaxUtf16Character = 0xffff; | 96 const uint32_t kMaxUtf16Character = 0xffff; |
| 89 // Get utf8 length. | 97 // Get utf8 length. |
| 90 unsigned utf16_chars = 0; | 98 unsigned utf16_chars = 0; |
| 91 *is_one_byte = true; | 99 *is_one_byte = true; |
| 92 { | 100 { |
| 93 unsigned position = 0; | 101 unsigned position = 0; |
| 94 while (position < file_size) { | 102 while (position < file_size) { |
| 95 uint32_t c = char_data[position]; | 103 uint32_t c = char_data[position]; |
| 96 if (c <= unibrow::Utf8::kMaxOneByteChar) { | 104 if (c <= unibrow::Utf8::kMaxOneByteChar) { |
| (...skipping 24 matching lines...) Expand all Loading... |
| 121 file_size - position, | 129 file_size - position, |
| 122 &position); | 130 &position); |
| 123 } | 131 } |
| 124 if (c > kMaxUtf16Character) { | 132 if (c > kMaxUtf16Character) { |
| 125 data[i++] = unibrow::Utf16::LeadSurrogate(c); | 133 data[i++] = unibrow::Utf16::LeadSurrogate(c); |
| 126 data[i++] = unibrow::Utf16::TrailSurrogate(c); | 134 data[i++] = unibrow::Utf16::TrailSurrogate(c); |
| 127 } else { | 135 } else { |
| 128 data[i++] = static_cast<uc16>(c); | 136 data[i++] = static_cast<uc16>(c); |
| 129 } | 137 } |
| 130 } | 138 } |
| 131 *length = 2 * utf16_chars; | 139 *length_in_bytes = 2 * utf16_chars; |
| 132 return data; | 140 return data; |
| 133 } | 141 } |
| 134 | 142 |
| 135 | 143 |
| 136 static uint16_t* ConvertUtf16ToLatin1(const uint16_t* const data_in, | 144 static uint16_t* ConvertUtf16ToLatin1(const uint16_t* const data_in, |
| 137 unsigned* length) { | 145 unsigned* length_in_bytes) { |
| 138 const unsigned size = *length / 2 + *length % 2; | 146 const unsigned size = *length_in_bytes / 2 + *length_in_bytes % 2; |
| 139 uint16_t* data = new uint16_t[size]; | 147 uint16_t* data = new uint16_t[size]; |
| 140 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); | 148 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); |
| 141 CopyChars(char_data, data_in, size); | 149 CopyChars(char_data, data_in, size); |
| 142 *length = size; | 150 *length_in_bytes = size; |
| 143 return data; | 151 return data; |
| 144 } | 152 } |
| 145 | 153 |
| 146 | 154 |
| 147 static uint16_t* Repeat(int repeat, | 155 static uint16_t* Repeat(int repeat, |
| 148 const uint16_t* const data_in, | 156 const uint16_t* const data_in, |
| 149 unsigned* length) { | 157 unsigned* length_in_bytes) { |
| 150 const unsigned file_size = *length; | 158 const unsigned file_size = *length_in_bytes; |
| 151 unsigned size = file_size * repeat; | 159 unsigned size = file_size * repeat; |
| 152 uint16_t* data = new uint16_t[size / 2 + size % 2]; | 160 uint16_t* data = new uint16_t[size / 2 + size % 2]; |
| 153 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); | 161 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); |
| 154 for (int i = 0; i < repeat; i++) { | 162 for (int i = 0; i < repeat; i++) { |
| 155 memcpy(&char_data[i * file_size], data_in, file_size); | 163 memcpy(&char_data[i * file_size], data_in, file_size); |
| 156 } | 164 } |
| 157 *length = size; | 165 *length_in_bytes = size; |
| 158 return data; | 166 return data; |
| 159 } | 167 } |
| 160 | 168 |
| 161 | 169 |
| 162 static uint16_t* ReadFile(const char* name, unsigned* length) { | 170 static uint16_t* ReadFile(const char* name, unsigned* length_in_bytes) { |
| 163 FILE* file = fopen(name, "rb"); | 171 FILE* file = fopen(name, "rb"); |
| 164 CHECK(file != NULL); | 172 CHECK(file != NULL); |
| 165 // Get file size. | 173 // Get file size. |
| 166 fseek(file, 0, SEEK_END); | 174 fseek(file, 0, SEEK_END); |
| 167 unsigned file_size = ftell(file); | 175 unsigned file_size = ftell(file); |
| 168 rewind(file); | 176 rewind(file); |
| 169 // Read file contents. | 177 // Read file contents. |
| 170 uint16_t* data = new uint16_t[file_size / 2 + file_size % 2]; | 178 uint16_t* data = new uint16_t[file_size / 2 + file_size % 2]; |
| 171 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); | 179 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); |
| 172 for (unsigned i = 0; i < file_size;) { | 180 for (unsigned i = 0; i < file_size;) { |
| 173 i += fread(&char_data[i], 1, file_size - i, file); | 181 i += fread(&char_data[i], 1, file_size - i, file); |
| 174 } | 182 } |
| 175 fclose(file); | 183 fclose(file); |
| 176 *length = file_size; | 184 *length_in_bytes = file_size; |
| 177 return data; | 185 return data; |
| 178 } | 186 } |
| 179 | 187 |
| 180 | 188 |
| 181 static uint16_t* ReadFile(const char* name, | 189 static FileData ReadFile(const char* file_name, |
| 182 const LexerShellSettings& settings, | 190 const LexerShellSettings& settings) { |
| 183 unsigned* length, | 191 unsigned length_in_bytes; |
| 184 Encoding* output_encoding) { | 192 uint16_t* data = ReadFile(file_name, &length_in_bytes); |
| 185 uint16_t* data = ReadFile(name, length); | 193 CHECK_GE(length_in_bytes, 0); |
| 186 CHECK_GE(*length, 0); | |
| 187 if (*length == 0) return data; | |
| 188 | 194 |
| 189 *output_encoding = settings.encoding; | 195 Encoding encoding = settings.encoding; |
| 190 | 196 if (encoding == UTF8TO16 || encoding == UTF8TOLATIN1) { |
| 191 if (settings.encoding == UTF8TO16 || | |
| 192 settings.encoding == UTF8TOLATIN1) { | |
| 193 bool is_one_byte; | 197 bool is_one_byte; |
| 194 uint16_t* new_data = ConvertUtf8ToUtf16(data, length, &is_one_byte); | 198 uint16_t* new_data = ConvertUtf8ToUtf16( |
| 195 if (settings.encoding == UTF8TOLATIN1 && is_one_byte) { | 199 data, &length_in_bytes, &is_one_byte); |
| 196 *output_encoding = LATIN1; | 200 if (encoding == UTF8TOLATIN1 && is_one_byte) { |
| 201 encoding = LATIN1; |
| 197 } else { | 202 } else { |
| 198 *output_encoding = UTF16; | 203 encoding = UTF16; |
| 199 } | 204 } |
| 200 delete data; | 205 delete data; |
| 201 data = new_data; | 206 data = new_data; |
| 202 } | 207 } |
| 203 | 208 |
| 204 if (settings.encoding == UTF8TOLATIN1 && *output_encoding == LATIN1) { | 209 if (settings.encoding == UTF8TOLATIN1 && encoding == LATIN1) { |
| 205 uint16_t* new_data = ConvertUtf16ToLatin1(data, length); | 210 uint16_t* new_data = ConvertUtf16ToLatin1(data, &length_in_bytes); |
| 206 delete data; | 211 delete data; |
| 207 data = new_data; | 212 data = new_data; |
| 208 } | 213 } |
| 209 | 214 |
| 210 if (settings.repeat > 1) { | 215 if (settings.repeat > 1) { |
| 211 uint16_t* new_data = Repeat(settings.repeat, data, length); | 216 uint16_t* new_data = Repeat(settings.repeat, data, &length_in_bytes); |
| 212 delete data; | 217 delete data; |
| 213 data = new_data; | 218 data = new_data; |
| 214 } | 219 } |
| 215 | 220 |
| 216 return data; | 221 FileData file_data; |
| 222 file_data.file_name = file_name; |
| 223 file_data.data = data; |
| 224 file_data.length_in_bytes = length_in_bytes; |
| 225 file_data.encoding = encoding; |
| 226 |
| 227 return file_data; |
| 217 } | 228 } |
| 218 | 229 |
| 219 | 230 |
| 220 static bool HasLiteral(Token::Value token) { | 231 static bool HasLiteral(Token::Value token) { |
| 221 return token == Token::IDENTIFIER || | 232 return token == Token::IDENTIFIER || |
| 222 token == Token::STRING || | 233 token == Token::STRING || |
| 223 token == Token::NUMBER; | 234 token == Token::NUMBER; |
| 224 } | 235 } |
| 225 | 236 |
| 226 | 237 |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 329 token = scanner.Next(); | 340 token = scanner.Next(); |
| 330 Handle<String> literal; | 341 Handle<String> literal; |
| 331 if (HasLiteral(token)) { | 342 if (HasLiteral(token)) { |
| 332 literal = scanner.AllocateInternalizedString(isolate); | 343 literal = scanner.AllocateInternalizedString(isolate); |
| 333 } | 344 } |
| 334 if (settings.print_tokens) { | 345 if (settings.print_tokens) { |
| 335 tokens.push_back(new TokenWithLocation(token, &scanner, literal)); | 346 tokens.push_back(new TokenWithLocation(token, &scanner, literal)); |
| 336 } | 347 } |
| 337 if (token == Token::ILLEGAL && settings.break_after_illegal) break; | 348 if (token == Token::ILLEGAL && settings.break_after_illegal) break; |
| 338 } while (token != Token::EOS); | 349 } while (token != Token::EOS); |
| 350 TimeDelta elapsed = timer.Elapsed(); |
| 339 // Dump tokens. | 351 // Dump tokens. |
| 340 if (settings.print_tokens) { | 352 if (settings.print_tokens) { |
| 341 if (!settings.print_tokens_for_compare) { | 353 if (!settings.print_tokens_for_compare) { |
| 342 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size())); | 354 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size())); |
| 343 } | 355 } |
| 344 for (size_t i = 0; i < tokens.size(); ++i) { | 356 for (size_t i = 0; i < tokens.size(); ++i) { |
| 345 tokens[i]->Print(settings.print_tokens_for_compare); | 357 tokens[i]->Print(settings.print_tokens_for_compare); |
| 346 } | 358 } |
| 347 } | 359 } |
| 348 for (size_t i = 0; i < tokens.size(); ++i) { | 360 for (size_t i = 0; i < tokens.size(); ++i) { |
| 349 delete tokens[i]; | 361 delete tokens[i]; |
| 350 } | 362 } |
| 351 return timer.Elapsed(); | 363 return elapsed; |
| 352 } | 364 } |
| 353 | 365 |
| 354 | 366 |
| 355 static TimeDelta ProcessFile( | 367 static TimeDelta ProcessFile( |
| 356 const char* fname, | |
| 357 Isolate* isolate, | 368 Isolate* isolate, |
| 358 const LexerShellSettings& settings, | 369 const LexerShellSettings& settings, |
| 370 const FileData& file_data, |
| 359 int truncate_by, | 371 int truncate_by, |
| 360 bool* can_truncate) { | 372 bool* can_truncate) { |
| 361 if (settings.print_tokens && !settings.print_tokens_for_compare) { | 373 if (settings.print_tokens && !settings.print_tokens_for_compare) { |
| 362 printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by); | 374 printf("Processing file %s, truncating by %d bytes\n", |
| 375 file_data.file_name, truncate_by); |
| 363 } | 376 } |
| 364 HandleScope handle_scope(isolate); | 377 HandleScope handle_scope(isolate); |
| 378 const uint16_t* buffer = file_data.data; |
| 379 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer); |
| 380 const uint8_t* buffer_end = &char_data[file_data.length_in_bytes]; |
| 365 TimeDelta time; | 381 TimeDelta time; |
| 366 { | 382 if (truncate_by > buffer_end - char_data) { |
| 367 unsigned length_in_bytes; | 383 *can_truncate = false; |
| 368 Encoding output_encoding; | 384 } else { |
| 369 const uint16_t* buffer = | 385 buffer_end -= truncate_by; |
| 370 ReadFile(fname, settings, &length_in_bytes, &output_encoding); | 386 time = RunLexer(buffer, buffer_end, isolate, file_data.encoding, settings); |
| 371 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer); | |
| 372 const uint8_t* buffer_end = &char_data[length_in_bytes]; | |
| 373 if (truncate_by > buffer_end - char_data) { | |
| 374 *can_truncate = false; | |
| 375 } else { | |
| 376 buffer_end -= truncate_by; | |
| 377 time = RunLexer(buffer, buffer_end, isolate, output_encoding, settings); | |
| 378 } | |
| 379 delete[] buffer; | |
| 380 } | 387 } |
| 381 | |
| 382 return time; | 388 return time; |
| 383 } | 389 } |
| 384 | 390 |
| 385 | 391 |
| 386 int main(int argc, char* argv[]) { | 392 int main(int argc, char* argv[]) { |
| 387 v8::V8::InitializeICU(); | 393 v8::V8::InitializeICU(); |
| 388 v8::V8::SetFlagsFromCommandLine(&argc, argv, true); | 394 v8::V8::SetFlagsFromCommandLine(&argc, argv, true); |
| 389 std::vector<std::string> fnames; | 395 std::string file_name; |
| 390 LexerShellSettings settings; | 396 LexerShellSettings settings; |
| 391 for (int i = 0; i < argc; ++i) { | 397 for (int i = 0; i < argc; ++i) { |
| 392 if (strcmp(argv[i], "--latin1") == 0) { | 398 if (strcmp(argv[i], "--latin1") == 0) { |
| 393 settings.encoding = LATIN1; | 399 settings.encoding = LATIN1; |
| 394 } else if (strcmp(argv[i], "--utf8") == 0) { | 400 } else if (strcmp(argv[i], "--utf8") == 0) { |
| 395 settings.encoding = UTF8; | 401 settings.encoding = UTF8; |
| 396 } else if (strcmp(argv[i], "--utf16") == 0) { | 402 } else if (strcmp(argv[i], "--utf16") == 0) { |
| 397 settings.encoding = UTF16; | 403 settings.encoding = UTF16; |
| 398 } else if (strcmp(argv[i], "--utf8to16") == 0) { | 404 } else if (strcmp(argv[i], "--utf8to16") == 0) { |
| 399 #ifdef V8_USE_GENERATED_LEXER | 405 #ifdef V8_USE_GENERATED_LEXER |
| (...skipping 25 matching lines...) Expand all Loading... |
| 425 settings.harmony_modules = true; | 431 settings.harmony_modules = true; |
| 426 settings.harmony_scoping = true; | 432 settings.harmony_scoping = true; |
| 427 } else if (strncmp(argv[i], "--benchmark=", 12) == 0) { | 433 } else if (strncmp(argv[i], "--benchmark=", 12) == 0) { |
| 428 // Ignore. | 434 // Ignore. |
| 429 } else if (strncmp(argv[i], "--repeat=", 9) == 0) { | 435 } else if (strncmp(argv[i], "--repeat=", 9) == 0) { |
| 430 std::string repeat_str = std::string(argv[i]).substr(9); | 436 std::string repeat_str = std::string(argv[i]).substr(9); |
| 431 settings.repeat = atoi(repeat_str.c_str()); | 437 settings.repeat = atoi(repeat_str.c_str()); |
| 432 } else if (strcmp(argv[i], "--eos-test") == 0) { | 438 } else if (strcmp(argv[i], "--eos-test") == 0) { |
| 433 settings.eos_test = true; | 439 settings.eos_test = true; |
| 434 } else if (i > 0 && argv[i][0] != '-') { | 440 } else if (i > 0 && argv[i][0] != '-') { |
| 435 fnames.push_back(std::string(argv[i])); | 441 file_name = std::string(argv[i]); |
| 436 } | 442 } |
| 437 } | 443 } |
| 444 CHECK_NE(0, file_name.size()); |
| 445 FileData file_data = ReadFile(file_name.c_str(), settings); |
| 438 { | 446 { |
| 439 v8::Isolate* isolate = v8::Isolate::GetCurrent(); | 447 v8::Isolate* isolate = v8::Isolate::GetCurrent(); |
| 440 v8::HandleScope handle_scope(isolate); | 448 v8::HandleScope handle_scope(isolate); |
| 441 v8::Local<v8::Context> context = v8::Context::New(isolate); | 449 v8::Local<v8::Context> context = v8::Context::New(isolate); |
| 442 CHECK(!context.IsEmpty()); | 450 CHECK(!context.IsEmpty()); |
| 443 v8::Context::Scope scope(context); | 451 v8::Context::Scope scope(context); |
| 444 Isolate* internal_isolate = Isolate::Current(); | 452 Isolate* internal_isolate = Isolate::Current(); |
| 445 double total_time = 0; | 453 double total_time = 0; |
| 446 for (size_t i = 0; i < fnames.size(); i++) { | 454 bool can_truncate = settings.eos_test; |
| 447 std::pair<TimeDelta, TimeDelta> times; | 455 int truncate_by = 0; |
| 448 bool can_truncate = settings.eos_test; | 456 do { |
| 449 int truncate_by = 0; | 457 TimeDelta t = ProcessFile(internal_isolate, |
| 450 do { | 458 settings, |
| 451 TimeDelta t = ProcessFile(fnames[i].c_str(), | 459 file_data, |
| 452 internal_isolate, | 460 truncate_by, |
| 453 settings, | 461 &can_truncate); |
| 454 truncate_by, | 462 total_time += t.InMillisecondsF(); |
| 455 &can_truncate); | 463 ++truncate_by; |
| 456 total_time += t.InMillisecondsF(); | 464 } while (can_truncate); |
| 457 ++truncate_by; | |
| 458 } while (can_truncate); | |
| 459 } | |
| 460 if (!settings.print_tokens_for_compare) { | 465 if (!settings.print_tokens_for_compare) { |
| 461 printf("RunTime: %.f ms\n", total_time); | 466 printf("RunTime: %.f ms\n", total_time); |
| 462 } | 467 } |
| 463 } | 468 } |
| 469 delete[] file_data.data; |
| 464 v8::V8::Dispose(); | 470 v8::V8::Dispose(); |
| 465 return 0; | 471 return 0; |
| 466 } | 472 } |
| OLD | NEW |