src/json-parser.h - Issue 7241023: Improve JSON.parse to use less memory when using escaped and non-ascii...

Unified Diff: src/json-parser.h

Issue 7241023: Improve JSON.parse to use less memory when using escaped and non-ascii... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 9 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/json-parser.h

===================================================================

--- src/json-parser.h (revision 8369)

+++ src/json-parser.h (working copy)

@@ -109,7 +109,8 @@

Handle<String> ScanJsonString();

// Slow version for unicode support, uses the first ascii_count characters,

// as first part of a ConsString

Lasse Reichstein 2011/06/29 09:27:29 Comment out of date (what is ascii_count)?

- Handle<String> SlowScanJsonString(int beg_pos);

+ Handle<String> SlowScanJsonAsciiString(Handle<String> prefix);

+ Handle<String> SlowScanJsonTwoByteString(Handle<String> prefix);

// A JSON number (production JSONNumber) is a subset of the valid JavaScript

// decimal number literals.

@@ -410,33 +411,26 @@

return isolate()->factory()->NewNumber(number);

}

template <bool seq_ascii>

-Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(int beg_pos) {

- // The currently scanned ascii characters.

- Handle<String> ascii(isolate()->factory()->NewStrictSubString(source_,

- beg_pos,

- position_));

- Handle<String> two_byte =

- isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,

- NOT_TENURED);

+Handle<String> JsonParser<seq_ascii>::SlowScanJsonTwoByteString(

+ Handle<String> prefix) {

+ int length = kInitialSpecialStringSize;

Lasse Reichstein 2011/06/24 13:56:00 Change ...StringSize to ...StringLength. Size soun

+ Handle<String> new_two_byte =

+ isolate()->factory()->NewRawTwoByteString(length, NOT_TENURED);

Lasse Reichstein 2011/06/24 13:56:00 If NewRawTwoByteString doesn't return a Handle<Seq

Handle<SeqTwoByteString> seq_two_byte =

- Handle<SeqTwoByteString>::cast(two_byte);

- int allocation_count = 1;

+ Handle<SeqTwoByteString>::cast(new_two_byte);

int count = 0;

while (c0_ != '"') {

// Create new seq string

- if (count >= kInitialSpecialStringSize * allocation_count) {

- allocation_count = allocation_count * 2;

- int new_size = allocation_count * kInitialSpecialStringSize;

- Handle<String> new_two_byte =

- isolate()->factory()->NewRawTwoByteString(new_size,

- NOT_TENURED);

- uc16* char_start =

- Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();

- String::WriteToFlat(*seq_two_byte, char_start, 0, count);

+ if (count >= length) {

+ prefix = isolate()->factory()->NewConsString(prefix, new_two_byte);

+ length *= 2;

+ new_two_byte = isolate()->factory()->

+ NewRawTwoByteString(length, NOT_TENURED);

seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);

+ count = 0;

}

// Check for control character (0x00-0x1f) or unterminated string (<0).

@@ -445,7 +439,7 @@

seq_two_byte->SeqTwoByteStringSet(count++, c0_);

Advance();

} else {

- Advance();

+ Advance(); // Advance past the \.

switch (c0_) {

case '"':

case '\\':

@@ -486,28 +480,129 @@

Advance();

}

- // Advance past the last '"'.

- ASSERT_EQ('"', c0_);

- AdvanceSkipWhitespace();

- // Shrink the the string to our length.

- if (isolate()->heap()->InNewSpace(*seq_two_byte)) {

+ // Shrink new_two_byte from length to count.

+ if (isolate()->heap()->InNewSpace(*new_two_byte)) {

isolate()->heap()->new_space()->

template ShrinkStringAtAllocationBoundary<SeqTwoByteString>(

*seq_two_byte, count);

} else {

int string_size = SeqTwoByteString::SizeFor(count);

- int allocated_string_size =

- SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count);

+ int allocated_string_size = SeqTwoByteString::SizeFor(length);

int delta = allocated_string_size - string_size;

Address start_filler_object = seq_two_byte->address() + string_size;

seq_two_byte->set_length(count);

isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);

}

- return isolate()->factory()->NewConsString(ascii, seq_two_byte);

+ ASSERT_EQ('"', c0_);

+ AdvanceSkipWhitespace();

+ return isolate()->factory()->NewConsString(prefix, new_two_byte);

}

template <bool seq_ascii>

+Handle<String> JsonParser<seq_ascii>::SlowScanJsonAsciiString(

Lasse Reichstein 2011/06/24 13:56:00 Sounds like it scans an ASCII string, not that it

+ Handle<String> prefix) {

+ int length = kInitialSpecialStringSize;

+ Handle<String> new_ascii =

+ isolate()->factory()->NewRawAsciiString(length, NOT_TENURED);

+ Handle<SeqAsciiString> seq_ascii_str =

+ Handle<SeqAsciiString>::cast(new_ascii);

+ int count = 0;

+ while (c0_ != '"') {

+ // Create new seq string

+ if (count >= length) {

+ prefix = isolate()->factory()->NewConsString(prefix, new_ascii);

+ length *= 2;

+ new_ascii = isolate()->factory()->NewRawAsciiString(length, NOT_TENURED);

+ seq_ascii_str = Handle<SeqAsciiString>::cast(new_ascii);

+ count = 0;

+ }

+ // Check for control character (0x00-0x1f) or unterminated string (<0).

+ if (c0_ < 0x20) return Handle<String>::null();

+ if (c0_ != '\\') {

+ if (seq_ascii || c0_ <= kMaxAsciiCharCode) {

+ seq_ascii_str->SeqAsciiStringSet(count++, c0_);

+ Advance();

+ } else {

+ break;

+ }

+ } else {

+ Advance(); // Advance past the \.

+ switch (c0_) {

+ case '"':

+ case '\\':

+ case '/':

+ seq_ascii_str->SeqAsciiStringSet(count++, c0_);

+ break;

+ case 'b':

+ seq_ascii_str->SeqAsciiStringSet(count++, '\x08');

+ break;

+ case 'f':

+ seq_ascii_str->SeqAsciiStringSet(count++, '\x0c');

+ break;

+ case 'n':

+ seq_ascii_str->SeqAsciiStringSet(count++, '\x0a');

+ break;

+ case 'r':

+ seq_ascii_str->SeqAsciiStringSet(count++, '\x0d');

+ break;

+ case 't':

+ seq_ascii_str->SeqAsciiStringSet(count++, '\x09');

+ break;

+ case 'u': {

+ uc32 value = 0;

+ for (int i = 0; i < 4; i++) {

+ Advance();

+ int digit = HexValue(c0_);

+ if (digit < 0) {

+ return Handle<String>::null();

+ }

+ value = value * 16 + digit;

+ }

+ if (value <= kMaxAsciiCharCode) {

+ seq_ascii_str->SeqAsciiStringSet(count++, value);

+ break;

+ } else {

+ position_ -= 6; // Rewind position to \ in \uxxxx.

+ Advance();

+ goto outer_loop; // break out of while loop.

Lasse Reichstein 2011/06/24 13:56:00 I'm really not too keen on using gotos. Really not

+ }

+ default:

+ return Handle<String>::null();

+ }

+ Advance();

+ }

+ outer_loop:

+ // Shrink new_ascii from length to count.

+ if (isolate()->heap()->InNewSpace(*new_ascii)) {

+ isolate()->heap()->new_space()->

+ template ShrinkStringAtAllocationBoundary<SeqAsciiString>(

+ *seq_ascii_str, count);

+ } else {

+ int string_size = SeqAsciiString::SizeFor(count);

Lasse Reichstein 2011/06/24 13:56:00 If the truncation would turn a lot of space into f

+ int allocated_string_size = SeqAsciiString::SizeFor(length);

+ int delta = allocated_string_size - string_size;

+ Address start_filler_object = seq_ascii_str->address() + string_size;

+ seq_ascii_str->set_length(count);

+ isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);

+ }

Lasse Reichstein 2011/06/24 13:56:00 If you move the shrinking to a separate function,

+ prefix = isolate()->factory()->NewConsString(prefix, new_ascii);

+ if (c0_ != '"') {

+ // We have read a non-ascii character - either directly or as \uxxxx.

+ return this->SlowScanJsonTwoByteString(prefix);

+ }

+ ASSERT_EQ('"', c0_);

+ // Advance past the last '"'.

+ AdvanceSkipWhitespace();

+ return prefix;

+template <bool seq_ascii>

template <bool is_symbol>

Handle<String> JsonParser<seq_ascii>::ScanJsonString() {

ASSERT_EQ('"', c0_);

@@ -517,25 +612,36 @@

while (c0_ != '"') {

// Check for control character (0x00-0x1f) or unterminated string (<0).

if (c0_ < 0x20) return Handle<String>::null();

- if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) {

+ if (c0_ != '\\' && (seq_ascii || c0_ <= kMaxAsciiCharCode)) {

Advance();

} else {

- return this->SlowScanJsonString(beg_pos);

+ break;

Lasse Reichstein 2011/06/24 13:56:00 Again, you can abstract the part below into a func

}

- ASSERT_EQ('"', c0_);

- int end_pos = position_;

+ Handle<String> result;

+ if (seq_ascii) {

+ if (is_symbol) {

+ result = isolate()->factory()->LookupAsciiSymbol(seq_source_,

+ beg_pos,

+ position_ - beg_pos);

+ } else {

+ result = isolate()->factory()->NewStrictSubString(source_,

+ beg_pos,

+ position_);

+ }

+ } else {

+ int length = position_ - beg_pos;

+ result = isolate()->factory()->NewRawAsciiString(length);

+ ASSERT(result->IsAsciiRepresentation());

+ char* dest = SeqAsciiString::cast(*result)->GetChars();

+ String::WriteToFlat(*source_, dest, beg_pos, position_);

+ }

+ if (c0_ != '"') {

+ return this->SlowScanJsonAsciiString(result);

+ }

// Advance past the last '"'.

AdvanceSkipWhitespace();

- if (seq_ascii && is_symbol) {

- return isolate()->factory()->LookupAsciiSymbol(seq_source_,

- beg_pos,

- end_pos - beg_pos);

- } else {

- return isolate()->factory()->NewStrictSubString(source_,

- beg_pos,

- end_pos);

- }

+ return result;

}

} } // namespace v8::internal

« no previous file with comments | « no previous file | no next file » | no next file with comments »