| OLD | NEW |
| 1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 53 UTF8, | 53 UTF8, |
| 54 UTF16, | 54 UTF16, |
| 55 UTF8TO16, // Convert stream via scanner input stream | 55 UTF8TO16, // Convert stream via scanner input stream |
| 56 UTF8TO16_PRECONVERT // Convert stream during file read | 56 UTF8TO16_PRECONVERT // Convert stream during file read |
| 57 }; | 57 }; |
| 58 | 58 |
| 59 | 59 |
| 60 struct LexerShellSettings { | 60 struct LexerShellSettings { |
| 61 Encoding encoding; | 61 Encoding encoding; |
| 62 bool print_tokens; | 62 bool print_tokens; |
| 63 bool print_tokens_for_compare; |
| 63 bool break_after_illegal; | 64 bool break_after_illegal; |
| 64 bool eos_test; | 65 bool eos_test; |
| 65 int repeat; | 66 int repeat; |
| 66 bool harmony_numeric_literals; | 67 bool harmony_numeric_literals; |
| 67 bool harmony_modules; | 68 bool harmony_modules; |
| 68 bool harmony_scoping; | 69 bool harmony_scoping; |
| 69 LexerShellSettings() | 70 LexerShellSettings() |
| 70 : encoding(LATIN1), | 71 : encoding(LATIN1), |
| 71 print_tokens(false), | 72 print_tokens(false), |
| 73 print_tokens_for_compare(false), |
| 72 break_after_illegal(false), | 74 break_after_illegal(false), |
| 73 eos_test(false), | 75 eos_test(false), |
| 74 repeat(1), | 76 repeat(1), |
| 75 harmony_numeric_literals(false), | 77 harmony_numeric_literals(false), |
| 76 harmony_modules(false), | 78 harmony_modules(false), |
| 77 harmony_scoping(false) {} | 79 harmony_scoping(false) {} |
| 78 }; | 80 }; |
| 79 | 81 |
| 80 | 82 |
| 81 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in, | 83 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in, |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 178 if (settings.repeat > 1) { | 180 if (settings.repeat > 1) { |
| 179 uint16_t* new_data = Repeat(settings.repeat, data, length); | 181 uint16_t* new_data = Repeat(settings.repeat, data, length); |
| 180 delete data; | 182 delete data; |
| 181 data = new_data; | 183 data = new_data; |
| 182 } | 184 } |
| 183 | 185 |
| 184 return data; | 186 return data; |
| 185 } | 187 } |
| 186 | 188 |
| 187 | 189 |
| 188 struct TokenWithLocation { | |
| 189 Token::Value value; | |
| 190 size_t beg; | |
| 191 size_t end; | |
| 192 std::vector<int> literal; | |
| 193 bool is_ascii; | |
| 194 // The location of the latest octal position when the token was seen. | |
| 195 int octal_beg; | |
| 196 int octal_end; | |
| 197 TokenWithLocation() : | |
| 198 value(Token::ILLEGAL), beg(0), end(0), is_ascii(false) { } | |
| 199 TokenWithLocation(Token::Value value, size_t beg, size_t end, | |
| 200 int octal_beg) : | |
| 201 value(value), beg(beg), end(end), is_ascii(false), octal_beg(octal_beg) { | |
| 202 } | |
| 203 bool operator==(const TokenWithLocation& other) { | |
| 204 return value == other.value && beg == other.beg && end == other.end && | |
| 205 literal == other.literal && is_ascii == other.is_ascii && | |
| 206 octal_beg == other.octal_beg; | |
| 207 } | |
| 208 bool operator!=(const TokenWithLocation& other) { | |
| 209 return !(*this == other); | |
| 210 } | |
| 211 void Print(const char* prefix) const { | |
| 212 printf("%s %11s at (%d, %d)", | |
| 213 prefix, Token::Name(value), | |
| 214 static_cast<int>(beg), static_cast<int>(end)); | |
| 215 if (literal.size() > 0) { | |
| 216 for (size_t i = 0; i < literal.size(); i++) { | |
| 217 printf(is_ascii ? " %02x" : " %04x", literal[i]); | |
| 218 } | |
| 219 printf(" (is ascii: %d)", is_ascii); | |
| 220 } | |
| 221 printf(" (last octal start: %d)\n", octal_beg); | |
| 222 } | |
| 223 }; | |
| 224 | |
| 225 | |
| 226 static bool HasLiteral(Token::Value token) { | 190 static bool HasLiteral(Token::Value token) { |
| 227 return token == Token::IDENTIFIER || | 191 return token == Token::IDENTIFIER || |
| 228 token == Token::STRING || | 192 token == Token::STRING || |
| 229 token == Token::NUMBER; | 193 token == Token::NUMBER; |
| 230 } | 194 } |
| 231 | 195 |
| 232 | 196 |
| 233 template<typename Char> | 197 template<typename Char> |
| 234 static std::vector<int> ToStdVector(const Vector<Char>& literal) { | 198 static void Copy(const Vector<Char>& literal, |
| 235 std::vector<int> result; | 199 SmartArrayPointer<const uint16_t>* result, |
| 200 int* literal_length) { |
| 201 uint16_t* data = new uint16_t[literal.length()]; |
| 202 result->Reset(data); |
| 236 for (int i = 0; i < literal.length(); i++) { | 203 for (int i = 0; i < literal.length(); i++) { |
| 237 result.push_back(literal[i]); | 204 data[i] = literal[i]; |
| 238 } | 205 } |
| 239 return result; | 206 *literal_length = literal.length(); |
| 240 } | 207 } |
| 241 | 208 |
| 242 | 209 |
| 243 template<typename Scanner> | 210 class TokenWithLocation { |
| 244 static TokenWithLocation GetTokenWithLocation( | 211 public: |
| 245 Scanner *scanner, Token::Value token) { | 212 Token::Value value; |
| 246 int beg = scanner->location().beg_pos; | 213 int beg; |
| 247 int end = scanner->location().end_pos; | 214 int end; |
| 248 TokenWithLocation result(token, beg, end, scanner->octal_position().beg_pos); | 215 bool is_one_byte; |
| 249 if (HasLiteral(token)) { | 216 SmartArrayPointer<const uint16_t> literal; |
| 250 result.is_ascii = scanner->is_literal_ascii(); | 217 int literal_length; |
| 251 if (scanner->is_literal_ascii()) { | 218 // The location of the latest octal position when the token was seen. |
| 252 result.literal = ToStdVector(scanner->literal_ascii_string()); | 219 int octal_beg; |
| 253 } else { | 220 int octal_end; |
| 254 result.literal = ToStdVector(scanner->literal_utf16_string()); | 221 TokenWithLocation(Token::Value token, Scanner* scanner) : value(token) { |
| 222 beg = scanner->location().beg_pos; |
| 223 end = scanner->location().end_pos; |
| 224 octal_beg = scanner->octal_position().beg_pos; |
| 225 octal_end = scanner->octal_position().end_pos; |
| 226 is_one_byte = false; |
| 227 literal_length = 0; |
| 228 if (HasLiteral(token)) { |
| 229 is_one_byte = scanner->is_literal_ascii(); |
| 230 if (scanner->is_literal_ascii()) { |
| 231 Copy(scanner->literal_ascii_string(), &literal, &literal_length); |
| 232 } else { |
| 233 Copy(scanner->literal_utf16_string(), &literal, &literal_length); |
| 234 } |
| 255 } | 235 } |
| 256 } | 236 } |
| 257 return result; | 237 void Print(bool do_compare) const { |
| 258 } | 238 if (value == Token::ILLEGAL && do_compare) { |
| 239 printf("%-15s (%d)\n", Token::Name(value), beg); |
| 240 return; |
| 241 } |
| 242 printf("%-15s (%d, %d)", Token::Name(value), beg, end); |
| 243 if (literal_length > 0) { |
| 244 // TODO(dcarney): need some sort of checksum. |
| 245 for (int i = 0; i < literal_length; i++) { |
| 246 printf(is_one_byte ? " %02x" : " %04x", literal[i]); |
| 247 } |
| 248 printf(" (is_one_byte: %d)", is_one_byte); |
| 249 } |
| 250 if (octal_beg >= 0) { |
| 251 printf(" (last octal start: %d)", octal_beg); |
| 252 } |
| 253 printf("\n"); |
| 254 } |
| 255 |
| 256 private: |
| 257 DISALLOW_COPY_AND_ASSIGN(TokenWithLocation); |
| 258 }; |
| 259 | 259 |
| 260 | 260 |
| 261 static TimeDelta RunLexer(const uint16_t* source, | 261 static TimeDelta RunLexer(const uint16_t* source, |
| 262 const uint8_t* source_end, | 262 const uint8_t* source_end, |
| 263 Isolate* isolate, | 263 Isolate* isolate, |
| 264 std::vector<TokenWithLocation>* tokens, | |
| 265 const LexerShellSettings& settings) { | 264 const LexerShellSettings& settings) { |
| 266 SmartPointer<Utf16CharacterStream> stream; | 265 SmartPointer<Utf16CharacterStream> stream; |
| 267 const uint8_t* one_byte_source = reinterpret_cast<const uint8_t*>(source); | 266 const uint8_t* one_byte_source = reinterpret_cast<const uint8_t*>(source); |
| 268 int bytes = source_end - one_byte_source; | 267 int bytes = source_end - one_byte_source; |
| 269 switch (settings.encoding) { | 268 switch (settings.encoding) { |
| 270 case UTF8TO16: | 269 case UTF8TO16: |
| 271 case UTF8: | 270 case UTF8: |
| 272 stream.Reset(new Utf8ToUtf16CharacterStream(one_byte_source, bytes)); | 271 stream.Reset(new Utf8ToUtf16CharacterStream(one_byte_source, bytes)); |
| 273 break; | 272 break; |
| 274 case UTF8TO16_PRECONVERT: | 273 case UTF8TO16_PRECONVERT: |
| (...skipping 11 matching lines...) Expand all Loading... |
| 286 stream.Reset( | 285 stream.Reset( |
| 287 new GenericStringUtf16CharacterStream(result, 0, result->length())); | 286 new GenericStringUtf16CharacterStream(result, 0, result->length())); |
| 288 break; | 287 break; |
| 289 } | 288 } |
| 290 } | 289 } |
| 291 Scanner scanner(isolate->unicode_cache()); | 290 Scanner scanner(isolate->unicode_cache()); |
| 292 scanner.SetHarmonyNumericLiterals(settings.harmony_numeric_literals); | 291 scanner.SetHarmonyNumericLiterals(settings.harmony_numeric_literals); |
| 293 scanner.SetHarmonyModules(settings.harmony_modules); | 292 scanner.SetHarmonyModules(settings.harmony_modules); |
| 294 scanner.SetHarmonyScoping(settings.harmony_scoping); | 293 scanner.SetHarmonyScoping(settings.harmony_scoping); |
| 295 ElapsedTimer timer; | 294 ElapsedTimer timer; |
| 295 std::vector<TokenWithLocation*> tokens; |
| 296 timer.Start(); | 296 timer.Start(); |
| 297 scanner.Initialize(stream.get()); | 297 scanner.Initialize(stream.get()); |
| 298 Token::Value token; | 298 Token::Value token; |
| 299 do { | 299 do { |
| 300 token = scanner.Next(); | 300 token = scanner.Next(); |
| 301 if (settings.print_tokens) { | 301 if (settings.print_tokens) { |
| 302 tokens->push_back(GetTokenWithLocation(&scanner, token)); | 302 tokens.push_back(new TokenWithLocation(token, &scanner)); |
| 303 } else if (HasLiteral(token)) { | 303 } else if (HasLiteral(token)) { |
| 304 if (scanner.is_literal_ascii()) { | 304 if (scanner.is_literal_ascii()) { |
| 305 scanner.literal_ascii_string(); | 305 scanner.literal_ascii_string(); |
| 306 } else { | 306 } else { |
| 307 scanner.literal_utf16_string(); | 307 scanner.literal_utf16_string(); |
| 308 } | 308 } |
| 309 } | 309 } |
| 310 if (token == Token::ILLEGAL && settings.break_after_illegal) break; |
| 310 } while (token != Token::EOS); | 311 } while (token != Token::EOS); |
| 312 // Dump tokens. |
| 313 if (settings.print_tokens) { |
| 314 if (!settings.print_tokens_for_compare) { |
| 315 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size())); |
| 316 } |
| 317 for (size_t i = 0; i < tokens.size(); ++i) { |
| 318 tokens[i]->Print(settings.print_tokens_for_compare); |
| 319 } |
| 320 } |
| 321 for (size_t i = 0; i < tokens.size(); ++i) { |
| 322 delete tokens[i]; |
| 323 } |
| 311 return timer.Elapsed(); | 324 return timer.Elapsed(); |
| 312 } | 325 } |
| 313 | 326 |
| 314 | 327 |
| 315 static TimeDelta ProcessFile( | 328 static TimeDelta ProcessFile( |
| 316 const char* fname, | 329 const char* fname, |
| 317 Isolate* isolate, | 330 Isolate* isolate, |
| 318 const LexerShellSettings& settings, | 331 const LexerShellSettings& settings, |
| 319 int truncate_by, | 332 int truncate_by, |
| 320 bool* can_truncate) { | 333 bool* can_truncate) { |
| 321 if (settings.print_tokens) { | 334 if (settings.print_tokens && !settings.print_tokens_for_compare) { |
| 322 printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by); | 335 printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by); |
| 323 } | 336 } |
| 324 HandleScope handle_scope(isolate); | 337 HandleScope handle_scope(isolate); |
| 325 std::vector<TokenWithLocation> tokens; | |
| 326 TimeDelta time; | 338 TimeDelta time; |
| 327 { | 339 { |
| 328 unsigned length_in_bytes; | 340 unsigned length_in_bytes; |
| 329 const uint16_t* buffer = ReadFile(fname, settings, &length_in_bytes); | 341 const uint16_t* buffer = ReadFile(fname, settings, &length_in_bytes); |
| 330 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer); | 342 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer); |
| 331 const uint8_t* buffer_end = &char_data[length_in_bytes]; | 343 const uint8_t* buffer_end = &char_data[length_in_bytes]; |
| 332 if (truncate_by > buffer_end - char_data) { | 344 if (truncate_by > buffer_end - char_data) { |
| 333 *can_truncate = false; | 345 *can_truncate = false; |
| 334 } else { | 346 } else { |
| 335 buffer_end -= truncate_by; | 347 buffer_end -= truncate_by; |
| 336 time = RunLexer(buffer, buffer_end, isolate, &tokens, settings); | 348 time = RunLexer(buffer, buffer_end, isolate, settings); |
| 337 } | 349 } |
| 338 delete[] buffer; | 350 delete[] buffer; |
| 339 } | 351 } |
| 340 if (settings.print_tokens) { | 352 |
| 341 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size())); | |
| 342 for (size_t i = 0; i < tokens.size(); ++i) { | |
| 343 tokens[i].Print("=>"); | |
| 344 if (tokens[i].value == Token::ILLEGAL) { | |
| 345 if (settings.break_after_illegal) | |
| 346 break; | |
| 347 } | |
| 348 } | |
| 349 } | |
| 350 return time; | 353 return time; |
| 351 } | 354 } |
| 352 | 355 |
| 353 | 356 |
| 354 int main(int argc, char* argv[]) { | 357 int main(int argc, char* argv[]) { |
| 355 v8::V8::InitializeICU(); | 358 v8::V8::InitializeICU(); |
| 356 v8::V8::SetFlagsFromCommandLine(&argc, argv, true); | 359 v8::V8::SetFlagsFromCommandLine(&argc, argv, true); |
| 357 std::vector<std::string> fnames; | 360 std::vector<std::string> fnames; |
| 358 LexerShellSettings settings; | 361 LexerShellSettings settings; |
| 359 for (int i = 0; i < argc; ++i) { | 362 for (int i = 0; i < argc; ++i) { |
| 360 if (strcmp(argv[i], "--latin1") == 0) { | 363 if (strcmp(argv[i], "--latin1") == 0) { |
| 361 settings.encoding = LATIN1; | 364 settings.encoding = LATIN1; |
| 362 } else if (strcmp(argv[i], "--utf8") == 0) { | 365 } else if (strcmp(argv[i], "--utf8") == 0) { |
| 363 settings.encoding = UTF8; | 366 settings.encoding = UTF8; |
| 364 } else if (strcmp(argv[i], "--utf16") == 0) { | 367 } else if (strcmp(argv[i], "--utf16") == 0) { |
| 365 settings.encoding = UTF16; | 368 settings.encoding = UTF16; |
| 366 } else if (strcmp(argv[i], "--utf8to16") == 0) { | 369 } else if (strcmp(argv[i], "--utf8to16") == 0) { |
| 367 #ifdef V8_USE_GENERATED_LEXER | 370 #ifdef V8_USE_GENERATED_LEXER |
| 368 settings.encoding = UTF8TO16_PRECONVERT; | 371 settings.encoding = UTF8TO16_PRECONVERT; |
| 369 #else | 372 #else |
| 370 settings.encoding = UTF8TO16; | 373 settings.encoding = UTF8TO16; |
| 371 #endif | 374 #endif |
| 372 } else if (strcmp(argv[i], "--print-tokens") == 0) { | 375 } else if (strcmp(argv[i], "--print-tokens") == 0) { |
| 373 settings.print_tokens = true; | 376 settings.print_tokens = true; |
| 377 } else if (strcmp(argv[i], "--print-tokens-for-compare") == 0) { |
| 378 settings.print_tokens = true; |
| 379 settings.print_tokens_for_compare = true; |
| 374 } else if (strcmp(argv[i], "--no-baseline") == 0) { | 380 } else if (strcmp(argv[i], "--no-baseline") == 0) { |
| 375 // Ignore. | 381 // Ignore. |
| 376 } else if (strcmp(argv[i], "--no-experimental") == 0) { | 382 } else if (strcmp(argv[i], "--no-experimental") == 0) { |
| 377 // Ignore. | 383 // Ignore. |
| 378 } else if (strcmp(argv[i], "--no-check") == 0) { | 384 } else if (strcmp(argv[i], "--no-check") == 0) { |
| 379 // Ignore. | 385 // Ignore. |
| 380 } else if (strcmp(argv[i], "--break-after-illegal") == 0) { | 386 } else if (strcmp(argv[i], "--break-after-illegal") == 0) { |
| 381 settings.break_after_illegal = true; | 387 settings.break_after_illegal = true; |
| 382 } else if (strcmp(argv[i], "--use-harmony") == 0) { | 388 } else if (strcmp(argv[i], "--use-harmony") == 0) { |
| 383 settings.harmony_numeric_literals = true; | 389 settings.harmony_numeric_literals = true; |
| (...skipping 25 matching lines...) Expand all Loading... |
| 409 do { | 415 do { |
| 410 TimeDelta t = ProcessFile(fnames[i].c_str(), | 416 TimeDelta t = ProcessFile(fnames[i].c_str(), |
| 411 internal_isolate, | 417 internal_isolate, |
| 412 settings, | 418 settings, |
| 413 truncate_by, | 419 truncate_by, |
| 414 &can_truncate); | 420 &can_truncate); |
| 415 total_time += t.InMillisecondsF(); | 421 total_time += t.InMillisecondsF(); |
| 416 ++truncate_by; | 422 ++truncate_by; |
| 417 } while (can_truncate); | 423 } while (can_truncate); |
| 418 } | 424 } |
| 419 printf("RunTime: %.f ms\n", total_time); | 425 if (!settings.print_tokens_for_compare) { |
| 426 printf("RunTime: %.f ms\n", total_time); |
| 427 } |
| 420 } | 428 } |
| 421 v8::V8::Dispose(); | 429 v8::V8::Dispose(); |
| 422 return 0; | 430 return 0; |
| 423 } | 431 } |
| OLD | NEW |