| OLD | NEW |
| 1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 46 #include "lexer/lexer.h" | 46 #include "lexer/lexer.h" |
| 47 | 47 |
| 48 using namespace v8::internal; | 48 using namespace v8::internal; |
| 49 | 49 |
| 50 | 50 |
| 51 enum Encoding { | 51 enum Encoding { |
| 52 LATIN1, | 52 LATIN1, |
| 53 UTF8, | 53 UTF8, |
| 54 UTF16, | 54 UTF16, |
| 55 UTF8TO16, // Convert stream via scanner input stream | 55 UTF8TO16, // Convert stream via scanner input stream |
| 56 UTF8TO16_PRECONVERT // Convert stream during file read | 56 UTF8TOLATIN1, // Convert stream via scanner input stream |
| 57 }; | 57 }; |
| 58 | 58 |
| 59 | 59 |
| 60 struct LexerShellSettings { | 60 struct LexerShellSettings { |
| 61 Encoding encoding; | 61 Encoding encoding; |
| 62 bool print_tokens; | 62 bool print_tokens; |
| 63 bool print_tokens_for_compare; | 63 bool print_tokens_for_compare; |
| 64 bool break_after_illegal; | 64 bool break_after_illegal; |
| 65 bool eos_test; | 65 bool eos_test; |
| 66 int repeat; | 66 int repeat; |
| 67 bool harmony_numeric_literals; | 67 bool harmony_numeric_literals; |
| 68 bool harmony_modules; | 68 bool harmony_modules; |
| 69 bool harmony_scoping; | 69 bool harmony_scoping; |
| 70 LexerShellSettings() | 70 LexerShellSettings() |
| 71 : encoding(LATIN1), | 71 : encoding(LATIN1), |
| 72 print_tokens(false), | 72 print_tokens(false), |
| 73 print_tokens_for_compare(false), | 73 print_tokens_for_compare(false), |
| 74 break_after_illegal(false), | 74 break_after_illegal(false), |
| 75 eos_test(false), | 75 eos_test(false), |
| 76 repeat(1), | 76 repeat(1), |
| 77 harmony_numeric_literals(false), | 77 harmony_numeric_literals(false), |
| 78 harmony_modules(false), | 78 harmony_modules(false), |
| 79 harmony_scoping(false) {} | 79 harmony_scoping(false) {} |
| 80 }; | 80 }; |
| 81 | 81 |
| 82 | 82 |
| 83 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in, | 83 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in, |
| 84 unsigned* length) { | 84 unsigned* length, |
| 85 bool* is_one_byte) { |
| 85 const unsigned file_size = *length; | 86 const unsigned file_size = *length; |
| 86 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(data_in); | 87 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(data_in); |
| 87 const uint32_t kMaxUtf16Character = 0xffff; | 88 const uint32_t kMaxUtf16Character = 0xffff; |
| 88 // Get utf8 length. | 89 // Get utf8 length. |
| 89 unsigned utf16_chars = 0; | 90 unsigned utf16_chars = 0; |
| 91 *is_one_byte = true; |
| 90 { | 92 { |
| 91 unsigned position = 0; | 93 unsigned position = 0; |
| 92 while (position < file_size) { | 94 while (position < file_size) { |
| 93 uint32_t c = char_data[position]; | 95 uint32_t c = char_data[position]; |
| 94 if (c <= unibrow::Utf8::kMaxOneByteChar) { | 96 if (c <= unibrow::Utf8::kMaxOneByteChar) { |
| 95 position++; | 97 position++; |
| 96 } else { | 98 } else { |
| 99 *is_one_byte = false; |
| 97 c = unibrow::Utf8::CalculateValue(char_data + position, | 100 c = unibrow::Utf8::CalculateValue(char_data + position, |
| 98 file_size - position, | 101 file_size - position, |
| 99 &position); | 102 &position); |
| 100 } | 103 } |
| 101 if (c > kMaxUtf16Character) { | 104 if (c > kMaxUtf16Character) { |
| 102 utf16_chars += 2; | 105 utf16_chars += 2; |
| 103 } else { | 106 } else { |
| 104 utf16_chars += 1; | 107 utf16_chars += 1; |
| 105 } | 108 } |
| 106 } | 109 } |
| (...skipping 16 matching lines...) Expand all Loading... |
| 123 data[i++] = unibrow::Utf16::TrailSurrogate(c); | 126 data[i++] = unibrow::Utf16::TrailSurrogate(c); |
| 124 } else { | 127 } else { |
| 125 data[i++] = static_cast<uc16>(c); | 128 data[i++] = static_cast<uc16>(c); |
| 126 } | 129 } |
| 127 } | 130 } |
| 128 *length = 2 * utf16_chars; | 131 *length = 2 * utf16_chars; |
| 129 return data; | 132 return data; |
| 130 } | 133 } |
| 131 | 134 |
| 132 | 135 |
| 136 static uint16_t* ConvertUtf16ToLatin1(const uint16_t* const data_in, |
| 137 unsigned* length) { |
| 138 const unsigned size = *length / 2 + *length % 2; |
| 139 uint16_t* data = new uint16_t[size]; |
| 140 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); |
| 141 CopyChars(char_data, data_in, size); |
| 142 *length = size; |
| 143 return data; |
| 144 } |
| 145 |
| 146 |
| 133 static uint16_t* Repeat(int repeat, | 147 static uint16_t* Repeat(int repeat, |
| 134 const uint16_t* const data_in, | 148 const uint16_t* const data_in, |
| 135 unsigned* length) { | 149 unsigned* length) { |
| 136 const unsigned file_size = *length; | 150 const unsigned file_size = *length; |
| 137 unsigned size = file_size * repeat; | 151 unsigned size = file_size * repeat; |
| 138 uint16_t* data = new uint16_t[size / 2 + size % 2]; | 152 uint16_t* data = new uint16_t[size / 2 + size % 2]; |
| 139 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); | 153 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); |
| 140 for (int i = 0; i < repeat; i++) { | 154 for (int i = 0; i < repeat; i++) { |
| 141 memcpy(&char_data[i * file_size], data_in, file_size); | 155 memcpy(&char_data[i * file_size], data_in, file_size); |
| 142 } | 156 } |
| (...skipping 16 matching lines...) Expand all Loading... |
| 159 i += fread(&char_data[i], 1, file_size - i, file); | 173 i += fread(&char_data[i], 1, file_size - i, file); |
| 160 } | 174 } |
| 161 fclose(file); | 175 fclose(file); |
| 162 *length = file_size; | 176 *length = file_size; |
| 163 return data; | 177 return data; |
| 164 } | 178 } |
| 165 | 179 |
| 166 | 180 |
| 167 static uint16_t* ReadFile(const char* name, | 181 static uint16_t* ReadFile(const char* name, |
| 168 const LexerShellSettings& settings, | 182 const LexerShellSettings& settings, |
| 169 unsigned* length) { | 183 unsigned* length, |
| 184 Encoding* output_encoding) { |
| 170 uint16_t* data = ReadFile(name, length); | 185 uint16_t* data = ReadFile(name, length); |
| 171 CHECK_GE(*length, 0); | 186 CHECK_GE(*length, 0); |
| 172 if (*length == 0) return data; | 187 if (*length == 0) return data; |
| 173 | 188 |
| 174 if (settings.encoding == UTF8TO16_PRECONVERT) { | 189 *output_encoding = settings.encoding; |
| 175 uint16_t* new_data = ConvertUtf8ToUtf16(data, length); | 190 |
| 191 if (settings.encoding == UTF8TO16 || |
| 192 settings.encoding == UTF8TOLATIN1) { |
| 193 bool is_one_byte; |
| 194 uint16_t* new_data = ConvertUtf8ToUtf16(data, length, &is_one_byte); |
| 195 if (settings.encoding == UTF8TOLATIN1 && is_one_byte) { |
| 196 *output_encoding = LATIN1; |
| 197 } else { |
| 198 *output_encoding = UTF16; |
| 199 } |
| 176 delete data; | 200 delete data; |
| 177 data = new_data; | 201 data = new_data; |
| 178 } | 202 } |
| 203 |
| 204 if (settings.encoding == UTF8TOLATIN1 && *output_encoding == LATIN1) { |
| 205 uint16_t* new_data = ConvertUtf16ToLatin1(data, length); |
| 206 delete data; |
| 207 data = new_data; |
| 208 } |
| 179 | 209 |
| 180 if (settings.repeat > 1) { | 210 if (settings.repeat > 1) { |
| 181 uint16_t* new_data = Repeat(settings.repeat, data, length); | 211 uint16_t* new_data = Repeat(settings.repeat, data, length); |
| 182 delete data; | 212 delete data; |
| 183 data = new_data; | 213 data = new_data; |
| 184 } | 214 } |
| 185 | 215 |
| 186 return data; | 216 return data; |
| 187 } | 217 } |
| 188 | 218 |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 258 } | 288 } |
| 259 | 289 |
| 260 private: | 290 private: |
| 261 DISALLOW_COPY_AND_ASSIGN(TokenWithLocation); | 291 DISALLOW_COPY_AND_ASSIGN(TokenWithLocation); |
| 262 }; | 292 }; |
| 263 | 293 |
| 264 | 294 |
| 265 static TimeDelta RunLexer(const uint16_t* source, | 295 static TimeDelta RunLexer(const uint16_t* source, |
| 266 const uint8_t* source_end, | 296 const uint8_t* source_end, |
| 267 Isolate* isolate, | 297 Isolate* isolate, |
| 298 Encoding output_encoding, |
| 268 const LexerShellSettings& settings) { | 299 const LexerShellSettings& settings) { |
| 269 SmartPointer<Utf16CharacterStream> stream; | 300 SmartPointer<Utf16CharacterStream> stream; |
| 270 const uint8_t* one_byte_source = reinterpret_cast<const uint8_t*>(source); | 301 const uint8_t* one_byte_source = reinterpret_cast<const uint8_t*>(source); |
| 271 int bytes = source_end - one_byte_source; | 302 int bytes = source_end - one_byte_source; |
| 272 switch (settings.encoding) { | 303 switch (output_encoding) { |
| 273 case UTF8TO16: | |
| 274 case UTF8: | 304 case UTF8: |
| 275 stream.Reset(new Utf8ToUtf16CharacterStream(one_byte_source, bytes)); | 305 stream.Reset(new Utf8ToUtf16CharacterStream(one_byte_source, bytes)); |
| 276 break; | 306 break; |
| 277 case UTF8TO16_PRECONVERT: | |
| 278 case UTF16: { | 307 case UTF16: { |
| 279 CHECK_EQ(0, bytes % 2); | 308 CHECK_EQ(0, bytes % 2); |
| 280 Handle<String> result = isolate->factory()->NewStringFromTwoByte( | 309 Handle<String> result = isolate->factory()->NewStringFromTwoByte( |
| 281 Vector<const uint16_t>(source, bytes / 2)); | 310 Vector<const uint16_t>(source, bytes / 2)); |
| 282 stream.Reset( | 311 stream.Reset( |
| 283 new GenericStringUtf16CharacterStream(result, 0, result->length())); | 312 new GenericStringUtf16CharacterStream(result, 0, result->length())); |
| 284 break; | 313 break; |
| 285 } | 314 } |
| 286 case LATIN1: { | 315 case LATIN1: { |
| 287 Handle<String> result = isolate->factory()->NewStringFromOneByte( | 316 Handle<String> result = isolate->factory()->NewStringFromOneByte( |
| 288 Vector<const uint8_t>(one_byte_source, bytes)); | 317 Vector<const uint8_t>(one_byte_source, bytes)); |
| 289 stream.Reset( | 318 stream.Reset( |
| 290 new GenericStringUtf16CharacterStream(result, 0, result->length())); | 319 new GenericStringUtf16CharacterStream(result, 0, result->length())); |
| 291 break; | 320 break; |
| 292 } | 321 } |
| 322 case UTF8TO16: |
| 323 case UTF8TOLATIN1: |
| 324 CHECK(false); |
| 293 } | 325 } |
| 294 Scanner scanner(isolate->unicode_cache()); | 326 Scanner scanner(isolate->unicode_cache()); |
| 295 scanner.SetHarmonyNumericLiterals(settings.harmony_numeric_literals); | 327 scanner.SetHarmonyNumericLiterals(settings.harmony_numeric_literals); |
| 296 scanner.SetHarmonyModules(settings.harmony_modules); | 328 scanner.SetHarmonyModules(settings.harmony_modules); |
| 297 scanner.SetHarmonyScoping(settings.harmony_scoping); | 329 scanner.SetHarmonyScoping(settings.harmony_scoping); |
| 298 ElapsedTimer timer; | 330 ElapsedTimer timer; |
| 299 std::vector<TokenWithLocation*> tokens; | 331 std::vector<TokenWithLocation*> tokens; |
| 300 timer.Start(); | 332 timer.Start(); |
| 301 scanner.Initialize(stream.get()); | 333 scanner.Initialize(stream.get()); |
| 302 Token::Value token; | 334 Token::Value token; |
| (...skipping 30 matching lines...) Expand all Loading... |
| 333 const LexerShellSettings& settings, | 365 const LexerShellSettings& settings, |
| 334 int truncate_by, | 366 int truncate_by, |
| 335 bool* can_truncate) { | 367 bool* can_truncate) { |
| 336 if (settings.print_tokens && !settings.print_tokens_for_compare) { | 368 if (settings.print_tokens && !settings.print_tokens_for_compare) { |
| 337 printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by); | 369 printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by); |
| 338 } | 370 } |
| 339 HandleScope handle_scope(isolate); | 371 HandleScope handle_scope(isolate); |
| 340 TimeDelta time; | 372 TimeDelta time; |
| 341 { | 373 { |
| 342 unsigned length_in_bytes; | 374 unsigned length_in_bytes; |
| 343 const uint16_t* buffer = ReadFile(fname, settings, &length_in_bytes); | 375 Encoding output_encoding; |
| 376 const uint16_t* buffer = |
| 377 ReadFile(fname, settings, &length_in_bytes, &output_encoding); |
| 344 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer); | 378 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer); |
| 345 const uint8_t* buffer_end = &char_data[length_in_bytes]; | 379 const uint8_t* buffer_end = &char_data[length_in_bytes]; |
| 346 if (truncate_by > buffer_end - char_data) { | 380 if (truncate_by > buffer_end - char_data) { |
| 347 *can_truncate = false; | 381 *can_truncate = false; |
| 348 } else { | 382 } else { |
| 349 buffer_end -= truncate_by; | 383 buffer_end -= truncate_by; |
| 350 time = RunLexer(buffer, buffer_end, isolate, settings); | 384 time = RunLexer(buffer, buffer_end, isolate, output_encoding, settings); |
| 351 } | 385 } |
| 352 delete[] buffer; | 386 delete[] buffer; |
| 353 } | 387 } |
| 354 | 388 |
| 355 return time; | 389 return time; |
| 356 } | 390 } |
| 357 | 391 |
| 358 | 392 |
| 359 int main(int argc, char* argv[]) { | 393 int main(int argc, char* argv[]) { |
| 360 v8::V8::InitializeICU(); | 394 v8::V8::InitializeICU(); |
| 361 v8::V8::SetFlagsFromCommandLine(&argc, argv, true); | 395 v8::V8::SetFlagsFromCommandLine(&argc, argv, true); |
| 362 std::vector<std::string> fnames; | 396 std::vector<std::string> fnames; |
| 363 LexerShellSettings settings; | 397 LexerShellSettings settings; |
| 364 for (int i = 0; i < argc; ++i) { | 398 for (int i = 0; i < argc; ++i) { |
| 365 if (strcmp(argv[i], "--latin1") == 0) { | 399 if (strcmp(argv[i], "--latin1") == 0) { |
| 366 settings.encoding = LATIN1; | 400 settings.encoding = LATIN1; |
| 367 } else if (strcmp(argv[i], "--utf8") == 0) { | 401 } else if (strcmp(argv[i], "--utf8") == 0) { |
| 368 settings.encoding = UTF8; | 402 settings.encoding = UTF8; |
| 369 } else if (strcmp(argv[i], "--utf16") == 0) { | 403 } else if (strcmp(argv[i], "--utf16") == 0) { |
| 370 settings.encoding = UTF16; | 404 settings.encoding = UTF16; |
| 371 } else if (strcmp(argv[i], "--utf8to16") == 0) { | 405 } else if (strcmp(argv[i], "--utf8to16") == 0) { |
| 372 #ifdef V8_USE_GENERATED_LEXER | 406 #ifdef V8_USE_GENERATED_LEXER |
| 373 settings.encoding = UTF8TO16_PRECONVERT; | 407 settings.encoding = UTF8TO16; |
| 374 #else | 408 #else |
| 375 settings.encoding = UTF8TO16; | 409 settings.encoding = UTF8; |
| 410 #endif |
| 411 } else if (strcmp(argv[i], "--utf8tolatin1") == 0) { |
| 412 #ifdef V8_USE_GENERATED_LEXER |
| 413 settings.encoding = UTF8TOLATIN1; |
| 414 #else |
| 415 settings.encoding = UTF8; |
| 376 #endif | 416 #endif |
| 377 } else if (strcmp(argv[i], "--print-tokens") == 0) { | 417 } else if (strcmp(argv[i], "--print-tokens") == 0) { |
| 378 settings.print_tokens = true; | 418 settings.print_tokens = true; |
| 379 } else if (strcmp(argv[i], "--print-tokens-for-compare") == 0) { | 419 } else if (strcmp(argv[i], "--print-tokens-for-compare") == 0) { |
| 380 settings.print_tokens = true; | 420 settings.print_tokens = true; |
| 381 settings.print_tokens_for_compare = true; | 421 settings.print_tokens_for_compare = true; |
| 382 } else if (strcmp(argv[i], "--no-baseline") == 0) { | 422 } else if (strcmp(argv[i], "--no-baseline") == 0) { |
| 383 // Ignore. | 423 // Ignore. |
| 384 } else if (strcmp(argv[i], "--no-experimental") == 0) { | 424 } else if (strcmp(argv[i], "--no-experimental") == 0) { |
| 385 // Ignore. | 425 // Ignore. |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 424 ++truncate_by; | 464 ++truncate_by; |
| 425 } while (can_truncate); | 465 } while (can_truncate); |
| 426 } | 466 } |
| 427 if (!settings.print_tokens_for_compare) { | 467 if (!settings.print_tokens_for_compare) { |
| 428 printf("RunTime: %.f ms\n", total_time); | 468 printf("RunTime: %.f ms\n", total_time); |
| 429 } | 469 } |
| 430 } | 470 } |
| 431 v8::V8::Dispose(); | 471 v8::V8::Dispose(); |
| 432 return 0; | 472 return 0; |
| 433 } | 473 } |
| OLD | NEW |