test/cctest/test-parsing.cc - Issue 5545006: Optimized scanner to avoid virtual calls for every character read.

Unified Diff: test/cctest/test-parsing.cc

Issue 5545006: Optimized scanner to avoid virtual calls for every character read. (Closed)

Patch Set: Addressed review comments. Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: test/cctest/test-parsing.cc

diff --git a/test/cctest/test-parsing.cc b/test/cctest/test-parsing.cc

index badbab5eeb1614bd487489a835ca92cebcc4d2e0..e642d1b622b2aa5a2ab4212c14c21d082834799a 100755

--- a/test/cctest/test-parsing.cc

+++ b/test/cctest/test-parsing.cc

@@ -260,10 +260,12 @@ TEST(StandAlonePreParser) {

uintptr_t stack_limit = i::StackGuard::real_climit();

for (int i = 0; programs[i]; i++) {

const char* program = programs[i];

- unibrow::Utf8InputBuffer<256> stream(program, strlen(program));

+ i::Utf8ToUC16CharacterStream stream(

+ reinterpret_cast<const i::byte*>(program),

+ static_cast<unsigned>(strlen(program)));

i::CompleteParserRecorder log;

i::V8JavaScriptScanner scanner;

- scanner.Initialize(i::Handle<i::String>::null(), &stream);

+ scanner.Initialize(&stream);

v8::preparser::PreParser::PreParseResult result =

v8::preparser::PreParser::PreParseProgram(&scanner,

@@ -289,9 +291,10 @@ TEST(RegressChromium62639) {

// and then used the invalid currently scanned literal. This always

// failed in debug mode, and sometimes crashed in release mode.

- unibrow::Utf8InputBuffer<256> stream(program, strlen(program));

+ i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),

+ static_cast<unsigned>(strlen(program)));

i::ScriptDataImpl* data =

- i::ParserApi::PreParse(i::Handle<i::String>::null(), &stream, NULL);

+ i::ParserApi::PreParse(&stream, NULL);

CHECK(data->HasError());

delete data;

}

@@ -310,10 +313,10 @@ TEST(Regress928) {

"try { } catch (e) { var foo = function () { /* first */ } }"

"var bar = function () { /* second */ }";

- unibrow::Utf8InputBuffer<256> stream(program, strlen(program));

+ i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),

+ static_cast<unsigned>(strlen(program)));

i::ScriptDataImpl* data =

- i::ParserApi::PartialPreParse(i::Handle<i::String>::null(),

- &stream, NULL);

+ i::ParserApi::PartialPreParse(&stream, NULL);

CHECK(!data->HasError());

data->Initialize();

@@ -347,10 +350,12 @@ TEST(PreParseOverflow) {

uintptr_t stack_limit = i::StackGuard::real_climit();

- unibrow::Utf8InputBuffer<256> stream(*program, strlen(*program));

+ i::Utf8ToUC16CharacterStream stream(

+ reinterpret_cast<const i::byte*>(*program),

+ static_cast<unsigned>(kProgramSize));

i::CompleteParserRecorder log;

i::V8JavaScriptScanner scanner;

- scanner.Initialize(i::Handle<i::String>::null(), &stream);

+ scanner.Initialize(&stream);

v8::preparser::PreParser::PreParseResult result =

@@ -360,3 +365,283 @@ TEST(PreParseOverflow) {

stack_limit);

CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result);

}

+class TestExternalResource: public v8::String::ExternalStringResource {

+ public:

+ explicit TestExternalResource(uint16_t* data, int length)

+ : data_(data), length_(static_cast<size_t>(length)) { }

+ ~TestExternalResource() { }

+ const uint16_t* data() const {

+ return data_;

+ }

+ size_t length() const {

+ return length_;

+ }

+ private:

+ uint16_t* data_;

+ size_t length_;

+};

+#define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))

+void TestCharacterStream(const char* ascii_source,

+ unsigned length,

+ unsigned start = 0,

+ unsigned end = 0) {

+ if (end == 0) end = length;

+ unsigned sub_length = end - start;

+ i::HandleScope test_scope;

+ i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]);

+ for (unsigned i = 0; i < length; i++) {

+ uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);

+ }

+ i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));

+ i::Handle<i::String> ascii_string(

+ i::Factory::NewStringFromAscii(ascii_vector));

+ TestExternalResource resource(*uc16_buffer, length);

+ i::Handle<i::String> uc16_string(

+ i::Factory::NewExternalStringFromTwoByte(&resource));

+ i::ExternalTwoByteStringUC16CharacterStream uc16_stream(

+ i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);

+ i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);

+ i::Utf8ToUC16CharacterStream utf8_stream(

+ reinterpret_cast<const i::byte*>(ascii_source), end);

+ utf8_stream.SeekForward(start);

+ unsigned i = start;

+ while (i < end) {

+ // Read streams one char at a time

+ CHECK_EQU(i, uc16_stream.pos());

+ CHECK_EQU(i, string_stream.pos());

+ CHECK_EQU(i, utf8_stream.pos());

+ int32_t c0 = ascii_source[i];

+ int32_t c1 = uc16_stream.Advance();

+ int32_t c2 = string_stream.Advance();

+ int32_t c3 = utf8_stream.Advance();

+ i++;

+ CHECK_EQ(c0, c1);

+ CHECK_EQ(c0, c2);

+ CHECK_EQ(c0, c3);

+ CHECK_EQU(i, uc16_stream.pos());

+ CHECK_EQU(i, string_stream.pos());

+ CHECK_EQU(i, utf8_stream.pos());

+ }

+ while (i > start + sub_length / 4) {

+ // Pushback, re-read, pushback again.

+ int32_t c0 = ascii_source[i - 1];

+ CHECK_EQU(i, uc16_stream.pos());

+ CHECK_EQU(i, string_stream.pos());

+ CHECK_EQU(i, utf8_stream.pos());

+ uc16_stream.PushBack(c0);

+ string_stream.PushBack(c0);

+ utf8_stream.PushBack(c0);

+ i--;

+ CHECK_EQU(i, uc16_stream.pos());

+ CHECK_EQU(i, string_stream.pos());

+ CHECK_EQU(i, utf8_stream.pos());

+ int32_t c1 = uc16_stream.Advance();

+ int32_t c2 = string_stream.Advance();

+ int32_t c3 = utf8_stream.Advance();

+ i++;

+ CHECK_EQU(i, uc16_stream.pos());

+ CHECK_EQU(i, string_stream.pos());

+ CHECK_EQU(i, utf8_stream.pos());

+ CHECK_EQ(c0, c1);

+ CHECK_EQ(c0, c2);

+ CHECK_EQ(c0, c3);

+ uc16_stream.PushBack(c0);

+ string_stream.PushBack(c0);

+ utf8_stream.PushBack(c0);

+ i--;

+ CHECK_EQU(i, uc16_stream.pos());

+ CHECK_EQU(i, string_stream.pos());

+ CHECK_EQU(i, utf8_stream.pos());

+ }

+ unsigned halfway = start + sub_length / 2;

+ uc16_stream.SeekForward(halfway - i);

+ string_stream.SeekForward(halfway - i);

+ utf8_stream.SeekForward(halfway - i);

+ i = halfway;

+ CHECK_EQU(i, uc16_stream.pos());

+ CHECK_EQU(i, string_stream.pos());

+ CHECK_EQU(i, utf8_stream.pos());

+ while (i < end) {

+ // Read streams one char at a time

+ CHECK_EQU(i, uc16_stream.pos());

+ CHECK_EQU(i, string_stream.pos());

+ CHECK_EQU(i, utf8_stream.pos());

+ int32_t c0 = ascii_source[i];

+ int32_t c1 = uc16_stream.Advance();

+ int32_t c2 = string_stream.Advance();

+ int32_t c3 = utf8_stream.Advance();

+ i++;

+ CHECK_EQ(c0, c1);

+ CHECK_EQ(c0, c2);

+ CHECK_EQ(c0, c3);

+ CHECK_EQU(i, uc16_stream.pos());

+ CHECK_EQU(i, string_stream.pos());

+ CHECK_EQU(i, utf8_stream.pos());

+ }

+ int32_t c1 = uc16_stream.Advance();

+ int32_t c2 = string_stream.Advance();

+ int32_t c3 = utf8_stream.Advance();

+ CHECK_LT(c1, 0);

+ CHECK_LT(c2, 0);

+ CHECK_LT(c3, 0);

+TEST(CharacterStreams) {

+ v8::HandleScope handles;

+ v8::Persistent<v8::Context> context = v8::Context::New();

+ v8::Context::Scope context_scope(context);

+ TestCharacterStream("abc\0\n\r\x7f", 7);

+ static const unsigned kBigStringSize = 4096;

+ char buffer[kBigStringSize + 1];

+ for (unsigned i = 0; i < kBigStringSize; i++) {

+ buffer[i] = static_cast<char>(i & 0x7f);

+ }

+ TestCharacterStream(buffer, kBigStringSize);

+ TestCharacterStream(buffer, kBigStringSize, 576, 3298);

+ TestCharacterStream("\0", 1);

+ TestCharacterStream("", 0);

+TEST(Utf8CharacterStream) {

+ static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;

+ static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);

+ static const int kAllUtf8CharsSize =

+ (unibrow::Utf8::kMaxOneByteChar + 1) +

+ (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +

+ (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;

+ static const unsigned kAllUtf8CharsSizeU =

+ static_cast<unsigned>(kAllUtf8CharsSize);

+ char buffer[kAllUtf8CharsSizeU];

+ unsigned cursor = 0;

+ for (int i = 0; i <= kMaxUC16Char; i++) {

+ cursor += unibrow::Utf8::Encode(buffer + cursor, i);

+ }

+ ASSERT(cursor == kAllUtf8CharsSizeU);

+ i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),

+ kAllUtf8CharsSizeU);

+ for (int i = 0; i <= kMaxUC16Char; i++) {

+ CHECK_EQU(i, stream.pos());

+ int32_t c = stream.Advance();

+ CHECK_EQ(i, c);

+ CHECK_EQU(i + 1, stream.pos());

+ }

+ for (int i = kMaxUC16Char; i >= 0; i--) {

+ CHECK_EQU(i + 1, stream.pos());

+ stream.PushBack(i);

+ CHECK_EQU(i, stream.pos());

+ }

+ int i = 0;

+ while (stream.pos() < kMaxUC16CharU) {

+ CHECK_EQU(i, stream.pos());

+ unsigned progress = stream.SeekForward(12);

+ i += progress;

+ int32_t c = stream.Advance();

+ if (i <= kMaxUC16Char) {

+ CHECK_EQ(i, c);

+ } else {

+ CHECK_EQ(-1, c);

+ }

+ i += 1;

+ CHECK_EQU(i, stream.pos());

+ }

+#undef CHECK_EQU

+void TestStreamScanner(i::UC16CharacterStream* stream,

+ i::Token::Value* expected_tokens,

+ int skip_pos = 0, // Zero means not skipping.

+ int skip_to = 0) {

+ i::V8JavaScriptScanner scanner;

+ scanner.Initialize(stream, i::JavaScriptScanner::kAllLiterals);

+ int i = 0;

+ do {

+ i::Token::Value expected = expected_tokens[i];

+ i::Token::Value actual = scanner.Next();

+ CHECK_EQ(i::Token::String(expected), i::Token::String(actual));

+ if (scanner.location().end_pos == skip_pos) {

+ scanner.SeekForward(skip_to);

+ }

+ i++;

+ } while (expected_tokens[i] != i::Token::ILLEGAL);

+TEST(StreamScanner) {

+ const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";

+ i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),

+ static_cast<unsigned>(strlen(str1)));

+ i::Token::Value expectations1[] = {

+ i::Token::LBRACE,

+ i::Token::IDENTIFIER,

+ i::Token::FOR,

+ i::Token::COLON,

+ i::Token::MUL,

+ i::Token::DIV,

+ i::Token::LT,

+ i::Token::SUB,

+ i::Token::IDENTIFIER,

+ i::Token::EOS,

+ i::Token::ILLEGAL

+ };

+ TestStreamScanner(&stream1, expectations1, 0, 0);

+ const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";

+ i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),

+ static_cast<unsigned>(strlen(str2)));

+ i::Token::Value expectations2[] = {

+ i::Token::CASE,

+ i::Token::DEFAULT,

+ i::Token::CONST,

+ i::Token::LBRACE,

+ // Skipped part here

+ i::Token::RBRACE,

+ i::Token::DO,

+ i::Token::EOS,

+ i::Token::ILLEGAL

+ };

+ ASSERT_EQ('{', str2[19]);

+ ASSERT_EQ('}', str2[37]);

+ TestStreamScanner(&stream2, expectations2, 20, 37);

+ const char* str3 = "{}}}}";

+ i::Token::Value expectations3[] = {

+ i::Token::LBRACE,

+ i::Token::RBRACE,

+ i::Token::EOS,

+ i::Token::ILLEGAL

+ };

+ // Skip zero-four RBRACEs.

+ for (int i = 0; i <= 4; i++) {

+ expectations3[6 - i] = i::Token::ILLEGAL;

+ expectations3[5 - i] = i::Token::EOS;

+ i::Utf8ToUC16CharacterStream stream3(

+ reinterpret_cast<const i::byte*>(str3),

+ static_cast<unsigned>(strlen(str3)));

+ TestStreamScanner(&stream3, expectations3, 1, 1 + i);

+ }

« no previous file with comments | « src/scanner-base.cc ('k') | no next file » | no next file with comments »