Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: test/cctest/test-parsing.cc

Issue 5545006: Optimized scanner to avoid virtual calls for every character read. (Closed)
Patch Set: Addressed review comments. Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner-base.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. 1 // Copyright 2006-2009 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after
253 "var x = 42;", 253 "var x = 42;",
254 "function foo(x, y) { return x + y; }", 254 "function foo(x, y) { return x + y; }",
255 "native function foo(); return %ArgleBargle(glop);", 255 "native function foo(); return %ArgleBargle(glop);",
256 "var x = new new Function('this.x = 42');", 256 "var x = new new Function('this.x = 42');",
257 NULL 257 NULL
258 }; 258 };
259 259
260 uintptr_t stack_limit = i::StackGuard::real_climit(); 260 uintptr_t stack_limit = i::StackGuard::real_climit();
261 for (int i = 0; programs[i]; i++) { 261 for (int i = 0; programs[i]; i++) {
262 const char* program = programs[i]; 262 const char* program = programs[i];
263 unibrow::Utf8InputBuffer<256> stream(program, strlen(program)); 263 i::Utf8ToUC16CharacterStream stream(
264 reinterpret_cast<const i::byte*>(program),
265 static_cast<unsigned>(strlen(program)));
264 i::CompleteParserRecorder log; 266 i::CompleteParserRecorder log;
265 i::V8JavaScriptScanner scanner; 267 i::V8JavaScriptScanner scanner;
266 scanner.Initialize(i::Handle<i::String>::null(), &stream); 268 scanner.Initialize(&stream);
267 269
268 v8::preparser::PreParser::PreParseResult result = 270 v8::preparser::PreParser::PreParseResult result =
269 v8::preparser::PreParser::PreParseProgram(&scanner, 271 v8::preparser::PreParser::PreParseProgram(&scanner,
270 &log, 272 &log,
271 true, 273 true,
272 stack_limit); 274 stack_limit);
273 CHECK_EQ(v8::preparser::PreParser::kPreParseSuccess, result); 275 CHECK_EQ(v8::preparser::PreParser::kPreParseSuccess, result);
274 i::ScriptDataImpl data(log.ExtractData()); 276 i::ScriptDataImpl data(log.ExtractData());
275 CHECK(!data.has_error()); 277 CHECK(!data.has_error());
276 } 278 }
277 } 279 }
278 280
279 281
280 TEST(RegressChromium62639) { 282 TEST(RegressChromium62639) {
281 int marker; 283 int marker;
282 i::StackGuard::SetStackLimit( 284 i::StackGuard::SetStackLimit(
283 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); 285 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
284 286
285 const char* program = "var x = 'something';\n" 287 const char* program = "var x = 'something';\n"
286 "escape: function() {}"; 288 "escape: function() {}";
287 // Fails parsing expecting an identifier after "function". 289 // Fails parsing expecting an identifier after "function".
288 // Before fix, didn't check *ok after Expect(Token::Identifier, ok), 290 // Before fix, didn't check *ok after Expect(Token::Identifier, ok),
289 // and then used the invalid currently scanned literal. This always 291 // and then used the invalid currently scanned literal. This always
290 // failed in debug mode, and sometimes crashed in release mode. 292 // failed in debug mode, and sometimes crashed in release mode.
291 293
292 unibrow::Utf8InputBuffer<256> stream(program, strlen(program)); 294 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
295 static_cast<unsigned>(strlen(program)));
293 i::ScriptDataImpl* data = 296 i::ScriptDataImpl* data =
294 i::ParserApi::PreParse(i::Handle<i::String>::null(), &stream, NULL); 297 i::ParserApi::PreParse(&stream, NULL);
295 CHECK(data->HasError()); 298 CHECK(data->HasError());
296 delete data; 299 delete data;
297 } 300 }
298 301
299 302
300 TEST(Regress928) { 303 TEST(Regress928) {
301 // Preparsing didn't consider the catch clause of a try statement 304 // Preparsing didn't consider the catch clause of a try statement
302 // as with-content, which made it assume that a function inside 305 // as with-content, which made it assume that a function inside
303 // the block could be lazily compiled, and an extra, unexpected, 306 // the block could be lazily compiled, and an extra, unexpected,
304 // entry was added to the data. 307 // entry was added to the data.
305 int marker; 308 int marker;
306 i::StackGuard::SetStackLimit( 309 i::StackGuard::SetStackLimit(
307 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); 310 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
308 311
309 const char* program = 312 const char* program =
310 "try { } catch (e) { var foo = function () { /* first */ } }" 313 "try { } catch (e) { var foo = function () { /* first */ } }"
311 "var bar = function () { /* second */ }"; 314 "var bar = function () { /* second */ }";
312 315
313 unibrow::Utf8InputBuffer<256> stream(program, strlen(program)); 316 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
317 static_cast<unsigned>(strlen(program)));
314 i::ScriptDataImpl* data = 318 i::ScriptDataImpl* data =
315 i::ParserApi::PartialPreParse(i::Handle<i::String>::null(), 319 i::ParserApi::PartialPreParse(&stream, NULL);
316 &stream, NULL);
317 CHECK(!data->HasError()); 320 CHECK(!data->HasError());
318 321
319 data->Initialize(); 322 data->Initialize();
320 323
321 int first_function = strstr(program, "function") - program; 324 int first_function = strstr(program, "function") - program;
322 int first_lbrace = first_function + strlen("function () "); 325 int first_lbrace = first_function + strlen("function () ");
323 CHECK_EQ('{', program[first_lbrace]); 326 CHECK_EQ('{', program[first_lbrace]);
324 i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace); 327 i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace);
325 CHECK(!entry1.is_valid()); 328 CHECK(!entry1.is_valid());
326 329
(...skipping 13 matching lines...) Expand all
340 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024); 343 reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
341 344
342 size_t kProgramSize = 1024 * 1024; 345 size_t kProgramSize = 1024 * 1024;
343 i::SmartPointer<char> program( 346 i::SmartPointer<char> program(
344 reinterpret_cast<char*>(malloc(kProgramSize + 1))); 347 reinterpret_cast<char*>(malloc(kProgramSize + 1)));
345 memset(*program, '(', kProgramSize); 348 memset(*program, '(', kProgramSize);
346 program[kProgramSize] = '\0'; 349 program[kProgramSize] = '\0';
347 350
348 uintptr_t stack_limit = i::StackGuard::real_climit(); 351 uintptr_t stack_limit = i::StackGuard::real_climit();
349 352
350 unibrow::Utf8InputBuffer<256> stream(*program, strlen(*program)); 353 i::Utf8ToUC16CharacterStream stream(
354 reinterpret_cast<const i::byte*>(*program),
355 static_cast<unsigned>(kProgramSize));
351 i::CompleteParserRecorder log; 356 i::CompleteParserRecorder log;
352 i::V8JavaScriptScanner scanner; 357 i::V8JavaScriptScanner scanner;
353 scanner.Initialize(i::Handle<i::String>::null(), &stream); 358 scanner.Initialize(&stream);
354 359
355 360
356 v8::preparser::PreParser::PreParseResult result = 361 v8::preparser::PreParser::PreParseResult result =
357 v8::preparser::PreParser::PreParseProgram(&scanner, 362 v8::preparser::PreParser::PreParseProgram(&scanner,
358 &log, 363 &log,
359 true, 364 true,
360 stack_limit); 365 stack_limit);
361 CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result); 366 CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result);
362 } 367 }
368
369
370 class TestExternalResource: public v8::String::ExternalStringResource {
371 public:
372 explicit TestExternalResource(uint16_t* data, int length)
373 : data_(data), length_(static_cast<size_t>(length)) { }
374
375 ~TestExternalResource() { }
376
377 const uint16_t* data() const {
378 return data_;
379 }
380
381 size_t length() const {
382 return length_;
383 }
384 private:
385 uint16_t* data_;
386 size_t length_;
387 };
388
389
390 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
391
392 void TestCharacterStream(const char* ascii_source,
393 unsigned length,
394 unsigned start = 0,
395 unsigned end = 0) {
396 if (end == 0) end = length;
397 unsigned sub_length = end - start;
398 i::HandleScope test_scope;
399 i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]);
400 for (unsigned i = 0; i < length; i++) {
401 uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);
402 }
403 i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));
404 i::Handle<i::String> ascii_string(
405 i::Factory::NewStringFromAscii(ascii_vector));
406 TestExternalResource resource(*uc16_buffer, length);
407 i::Handle<i::String> uc16_string(
408 i::Factory::NewExternalStringFromTwoByte(&resource));
409
410 i::ExternalTwoByteStringUC16CharacterStream uc16_stream(
411 i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
412 i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);
413 i::Utf8ToUC16CharacterStream utf8_stream(
414 reinterpret_cast<const i::byte*>(ascii_source), end);
415 utf8_stream.SeekForward(start);
416
417 unsigned i = start;
418 while (i < end) {
419 // Read streams one char at a time
420 CHECK_EQU(i, uc16_stream.pos());
421 CHECK_EQU(i, string_stream.pos());
422 CHECK_EQU(i, utf8_stream.pos());
423 int32_t c0 = ascii_source[i];
424 int32_t c1 = uc16_stream.Advance();
425 int32_t c2 = string_stream.Advance();
426 int32_t c3 = utf8_stream.Advance();
427 i++;
428 CHECK_EQ(c0, c1);
429 CHECK_EQ(c0, c2);
430 CHECK_EQ(c0, c3);
431 CHECK_EQU(i, uc16_stream.pos());
432 CHECK_EQU(i, string_stream.pos());
433 CHECK_EQU(i, utf8_stream.pos());
434 }
435 while (i > start + sub_length / 4) {
436 // Pushback, re-read, pushback again.
437 int32_t c0 = ascii_source[i - 1];
438 CHECK_EQU(i, uc16_stream.pos());
439 CHECK_EQU(i, string_stream.pos());
440 CHECK_EQU(i, utf8_stream.pos());
441 uc16_stream.PushBack(c0);
442 string_stream.PushBack(c0);
443 utf8_stream.PushBack(c0);
444 i--;
445 CHECK_EQU(i, uc16_stream.pos());
446 CHECK_EQU(i, string_stream.pos());
447 CHECK_EQU(i, utf8_stream.pos());
448 int32_t c1 = uc16_stream.Advance();
449 int32_t c2 = string_stream.Advance();
450 int32_t c3 = utf8_stream.Advance();
451 i++;
452 CHECK_EQU(i, uc16_stream.pos());
453 CHECK_EQU(i, string_stream.pos());
454 CHECK_EQU(i, utf8_stream.pos());
455 CHECK_EQ(c0, c1);
456 CHECK_EQ(c0, c2);
457 CHECK_EQ(c0, c3);
458 uc16_stream.PushBack(c0);
459 string_stream.PushBack(c0);
460 utf8_stream.PushBack(c0);
461 i--;
462 CHECK_EQU(i, uc16_stream.pos());
463 CHECK_EQU(i, string_stream.pos());
464 CHECK_EQU(i, utf8_stream.pos());
465 }
466 unsigned halfway = start + sub_length / 2;
467 uc16_stream.SeekForward(halfway - i);
468 string_stream.SeekForward(halfway - i);
469 utf8_stream.SeekForward(halfway - i);
470 i = halfway;
471 CHECK_EQU(i, uc16_stream.pos());
472 CHECK_EQU(i, string_stream.pos());
473 CHECK_EQU(i, utf8_stream.pos());
474
475 while (i < end) {
476 // Read streams one char at a time
477 CHECK_EQU(i, uc16_stream.pos());
478 CHECK_EQU(i, string_stream.pos());
479 CHECK_EQU(i, utf8_stream.pos());
480 int32_t c0 = ascii_source[i];
481 int32_t c1 = uc16_stream.Advance();
482 int32_t c2 = string_stream.Advance();
483 int32_t c3 = utf8_stream.Advance();
484 i++;
485 CHECK_EQ(c0, c1);
486 CHECK_EQ(c0, c2);
487 CHECK_EQ(c0, c3);
488 CHECK_EQU(i, uc16_stream.pos());
489 CHECK_EQU(i, string_stream.pos());
490 CHECK_EQU(i, utf8_stream.pos());
491 }
492
493 int32_t c1 = uc16_stream.Advance();
494 int32_t c2 = string_stream.Advance();
495 int32_t c3 = utf8_stream.Advance();
496 CHECK_LT(c1, 0);
497 CHECK_LT(c2, 0);
498 CHECK_LT(c3, 0);
499 }
500
501
502 TEST(CharacterStreams) {
503 v8::HandleScope handles;
504 v8::Persistent<v8::Context> context = v8::Context::New();
505 v8::Context::Scope context_scope(context);
506
507 TestCharacterStream("abc\0\n\r\x7f", 7);
508 static const unsigned kBigStringSize = 4096;
509 char buffer[kBigStringSize + 1];
510 for (unsigned i = 0; i < kBigStringSize; i++) {
511 buffer[i] = static_cast<char>(i & 0x7f);
512 }
513 TestCharacterStream(buffer, kBigStringSize);
514
515 TestCharacterStream(buffer, kBigStringSize, 576, 3298);
516
517 TestCharacterStream("\0", 1);
518 TestCharacterStream("", 0);
519 }
520
521
522 TEST(Utf8CharacterStream) {
523 static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;
524 static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);
525
526 static const int kAllUtf8CharsSize =
527 (unibrow::Utf8::kMaxOneByteChar + 1) +
528 (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
529 (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
530 static const unsigned kAllUtf8CharsSizeU =
531 static_cast<unsigned>(kAllUtf8CharsSize);
532
533 char buffer[kAllUtf8CharsSizeU];
534 unsigned cursor = 0;
535 for (int i = 0; i <= kMaxUC16Char; i++) {
536 cursor += unibrow::Utf8::Encode(buffer + cursor, i);
537 }
538 ASSERT(cursor == kAllUtf8CharsSizeU);
539
540 i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
541 kAllUtf8CharsSizeU);
542 for (int i = 0; i <= kMaxUC16Char; i++) {
543 CHECK_EQU(i, stream.pos());
544 int32_t c = stream.Advance();
545 CHECK_EQ(i, c);
546 CHECK_EQU(i + 1, stream.pos());
547 }
548 for (int i = kMaxUC16Char; i >= 0; i--) {
549 CHECK_EQU(i + 1, stream.pos());
550 stream.PushBack(i);
551 CHECK_EQU(i, stream.pos());
552 }
553 int i = 0;
554 while (stream.pos() < kMaxUC16CharU) {
555 CHECK_EQU(i, stream.pos());
556 unsigned progress = stream.SeekForward(12);
557 i += progress;
558 int32_t c = stream.Advance();
559 if (i <= kMaxUC16Char) {
560 CHECK_EQ(i, c);
561 } else {
562 CHECK_EQ(-1, c);
563 }
564 i += 1;
565 CHECK_EQU(i, stream.pos());
566 }
567 }
568
569 #undef CHECK_EQU
570
571 void TestStreamScanner(i::UC16CharacterStream* stream,
572 i::Token::Value* expected_tokens,
573 int skip_pos = 0, // Zero means not skipping.
574 int skip_to = 0) {
575 i::V8JavaScriptScanner scanner;
576 scanner.Initialize(stream, i::JavaScriptScanner::kAllLiterals);
577
578 int i = 0;
579 do {
580 i::Token::Value expected = expected_tokens[i];
581 i::Token::Value actual = scanner.Next();
582 CHECK_EQ(i::Token::String(expected), i::Token::String(actual));
583 if (scanner.location().end_pos == skip_pos) {
584 scanner.SeekForward(skip_to);
585 }
586 i++;
587 } while (expected_tokens[i] != i::Token::ILLEGAL);
588 }
589
590 TEST(StreamScanner) {
591 const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
592 i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
593 static_cast<unsigned>(strlen(str1)));
594 i::Token::Value expectations1[] = {
595 i::Token::LBRACE,
596 i::Token::IDENTIFIER,
597 i::Token::IDENTIFIER,
598 i::Token::FOR,
599 i::Token::COLON,
600 i::Token::MUL,
601 i::Token::DIV,
602 i::Token::LT,
603 i::Token::SUB,
604 i::Token::IDENTIFIER,
605 i::Token::EOS,
606 i::Token::ILLEGAL
607 };
608 TestStreamScanner(&stream1, expectations1, 0, 0);
609
610 const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
611 i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
612 static_cast<unsigned>(strlen(str2)));
613 i::Token::Value expectations2[] = {
614 i::Token::CASE,
615 i::Token::DEFAULT,
616 i::Token::CONST,
617 i::Token::LBRACE,
618 // Skipped part here
619 i::Token::RBRACE,
620 i::Token::DO,
621 i::Token::EOS,
622 i::Token::ILLEGAL
623 };
624 ASSERT_EQ('{', str2[19]);
625 ASSERT_EQ('}', str2[37]);
626 TestStreamScanner(&stream2, expectations2, 20, 37);
627
628 const char* str3 = "{}}}}";
629 i::Token::Value expectations3[] = {
630 i::Token::LBRACE,
631 i::Token::RBRACE,
632 i::Token::RBRACE,
633 i::Token::RBRACE,
634 i::Token::RBRACE,
635 i::Token::EOS,
636 i::Token::ILLEGAL
637 };
638 // Skip zero-four RBRACEs.
639 for (int i = 0; i <= 4; i++) {
640 expectations3[6 - i] = i::Token::ILLEGAL;
641 expectations3[5 - i] = i::Token::EOS;
642 i::Utf8ToUC16CharacterStream stream3(
643 reinterpret_cast<const i::byte*>(str3),
644 static_cast<unsigned>(strlen(str3)));
645 TestStreamScanner(&stream3, expectations3, 1, 1 + i);
646 }
647 }
OLDNEW
« no previous file with comments | « src/scanner-base.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698