Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(156)

Unified Diff: src/json-parser.h

Issue 7241023: Improve JSON.parse to use less memory when using escaped and non-ascii... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/json-parser.h
===================================================================
--- src/json-parser.h (revision 8369)
+++ src/json-parser.h (working copy)
@@ -109,7 +109,8 @@
Handle<String> ScanJsonString();
// Slow version for unicode support, uses the first ascii_count characters,
// as first part of a ConsString
Lasse Reichstein 2011/06/29 09:27:29 Comment out of date (what is ascii_count)?
- Handle<String> SlowScanJsonString(int beg_pos);
+ Handle<String> SlowScanJsonAsciiString(Handle<String> prefix);
+ Handle<String> SlowScanJsonTwoByteString(Handle<String> prefix);
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
// decimal number literals.
@@ -410,33 +411,26 @@
return isolate()->factory()->NewNumber(number);
}
+
template <bool seq_ascii>
-Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(int beg_pos) {
- // The currently scanned ascii characters.
- Handle<String> ascii(isolate()->factory()->NewStrictSubString(source_,
- beg_pos,
- position_));
- Handle<String> two_byte =
- isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,
- NOT_TENURED);
+Handle<String> JsonParser<seq_ascii>::SlowScanJsonTwoByteString(
+ Handle<String> prefix) {
+ int length = kInitialSpecialStringSize;
Lasse Reichstein 2011/06/24 13:56:00 Change ...StringSize to ...StringLength. Size soun
+ Handle<String> new_two_byte =
+ isolate()->factory()->NewRawTwoByteString(length, NOT_TENURED);
Lasse Reichstein 2011/06/24 13:56:00 If NewRawTwoByteString doesn't return a Handle<Seq
Handle<SeqTwoByteString> seq_two_byte =
- Handle<SeqTwoByteString>::cast(two_byte);
-
- int allocation_count = 1;
+ Handle<SeqTwoByteString>::cast(new_two_byte);
int count = 0;
while (c0_ != '"') {
// Create new seq string
- if (count >= kInitialSpecialStringSize * allocation_count) {
- allocation_count = allocation_count * 2;
- int new_size = allocation_count * kInitialSpecialStringSize;
- Handle<String> new_two_byte =
- isolate()->factory()->NewRawTwoByteString(new_size,
- NOT_TENURED);
- uc16* char_start =
- Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();
- String::WriteToFlat(*seq_two_byte, char_start, 0, count);
+ if (count >= length) {
+ prefix = isolate()->factory()->NewConsString(prefix, new_two_byte);
+ length *= 2;
+ new_two_byte = isolate()->factory()->
+ NewRawTwoByteString(length, NOT_TENURED);
seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);
+ count = 0;
}
// Check for control character (0x00-0x1f) or unterminated string (<0).
@@ -445,7 +439,7 @@
seq_two_byte->SeqTwoByteStringSet(count++, c0_);
Advance();
} else {
- Advance();
+ Advance(); // Advance past the \.
switch (c0_) {
case '"':
case '\\':
@@ -486,28 +480,129 @@
Advance();
}
}
- // Advance past the last '"'.
- ASSERT_EQ('"', c0_);
- AdvanceSkipWhitespace();
- // Shrink the the string to our length.
- if (isolate()->heap()->InNewSpace(*seq_two_byte)) {
+ // Shrink new_two_byte from length to count.
+ if (isolate()->heap()->InNewSpace(*new_two_byte)) {
isolate()->heap()->new_space()->
template ShrinkStringAtAllocationBoundary<SeqTwoByteString>(
*seq_two_byte, count);
} else {
int string_size = SeqTwoByteString::SizeFor(count);
- int allocated_string_size =
- SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count);
+ int allocated_string_size = SeqTwoByteString::SizeFor(length);
int delta = allocated_string_size - string_size;
Address start_filler_object = seq_two_byte->address() + string_size;
seq_two_byte->set_length(count);
isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
}
- return isolate()->factory()->NewConsString(ascii, seq_two_byte);
+ ASSERT_EQ('"', c0_);
+ AdvanceSkipWhitespace();
+ return isolate()->factory()->NewConsString(prefix, new_two_byte);
}
+
template <bool seq_ascii>
+Handle<String> JsonParser<seq_ascii>::SlowScanJsonAsciiString(
Lasse Reichstein 2011/06/24 13:56:00 Sounds like it scans an ASCII string, not that it
+ Handle<String> prefix) {
+ int length = kInitialSpecialStringSize;
+ Handle<String> new_ascii =
+ isolate()->factory()->NewRawAsciiString(length, NOT_TENURED);
+ Handle<SeqAsciiString> seq_ascii_str =
+ Handle<SeqAsciiString>::cast(new_ascii);
+ int count = 0;
+
+ while (c0_ != '"') {
+ // Create new seq string
+ if (count >= length) {
+ prefix = isolate()->factory()->NewConsString(prefix, new_ascii);
+ length *= 2;
+ new_ascii = isolate()->factory()->NewRawAsciiString(length, NOT_TENURED);
+ seq_ascii_str = Handle<SeqAsciiString>::cast(new_ascii);
+ count = 0;
+ }
+
+ // Check for control character (0x00-0x1f) or unterminated string (<0).
+ if (c0_ < 0x20) return Handle<String>::null();
+ if (c0_ != '\\') {
+ if (seq_ascii || c0_ <= kMaxAsciiCharCode) {
+ seq_ascii_str->SeqAsciiStringSet(count++, c0_);
+ Advance();
+ } else {
+ break;
+ }
+ } else {
+ Advance(); // Advance past the \.
+ switch (c0_) {
+ case '"':
+ case '\\':
+ case '/':
+ seq_ascii_str->SeqAsciiStringSet(count++, c0_);
+ break;
+ case 'b':
+ seq_ascii_str->SeqAsciiStringSet(count++, '\x08');
+ break;
+ case 'f':
+ seq_ascii_str->SeqAsciiStringSet(count++, '\x0c');
+ break;
+ case 'n':
+ seq_ascii_str->SeqAsciiStringSet(count++, '\x0a');
+ break;
+ case 'r':
+ seq_ascii_str->SeqAsciiStringSet(count++, '\x0d');
+ break;
+ case 't':
+ seq_ascii_str->SeqAsciiStringSet(count++, '\x09');
+ break;
+ case 'u': {
+ uc32 value = 0;
+ for (int i = 0; i < 4; i++) {
+ Advance();
+ int digit = HexValue(c0_);
+ if (digit < 0) {
+ return Handle<String>::null();
+ }
+ value = value * 16 + digit;
+ }
+ if (value <= kMaxAsciiCharCode) {
+ seq_ascii_str->SeqAsciiStringSet(count++, value);
+ break;
+ } else {
+ position_ -= 6; // Rewind position to \ in \uxxxx.
+ Advance();
+ goto outer_loop; // break out of while loop.
Lasse Reichstein 2011/06/24 13:56:00 I'm really not too keen on using gotos. Really not
+ }
+ }
+ default:
+ return Handle<String>::null();
+ }
+ Advance();
+ }
+ }
+ outer_loop:
+ // Shrink new_ascii from length to count.
+ if (isolate()->heap()->InNewSpace(*new_ascii)) {
+ isolate()->heap()->new_space()->
+ template ShrinkStringAtAllocationBoundary<SeqAsciiString>(
+ *seq_ascii_str, count);
+ } else {
+ int string_size = SeqAsciiString::SizeFor(count);
Lasse Reichstein 2011/06/24 13:56:00 If the truncation would turn a lot of space into f
+ int allocated_string_size = SeqAsciiString::SizeFor(length);
+ int delta = allocated_string_size - string_size;
+ Address start_filler_object = seq_ascii_str->address() + string_size;
+ seq_ascii_str->set_length(count);
+ isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
+ }
Lasse Reichstein 2011/06/24 13:56:00 If you move the shrinking to a separate function,
+ prefix = isolate()->factory()->NewConsString(prefix, new_ascii);
+ if (c0_ != '"') {
+ // We have read a non-ascii character - either directly or as \uxxxx.
+ return this->SlowScanJsonTwoByteString(prefix);
+ }
+ ASSERT_EQ('"', c0_);
+ // Advance past the last '"'.
+ AdvanceSkipWhitespace();
+ return prefix;
+}
+
+template <bool seq_ascii>
template <bool is_symbol>
Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
ASSERT_EQ('"', c0_);
@@ -517,25 +612,36 @@
while (c0_ != '"') {
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Handle<String>::null();
- if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) {
+ if (c0_ != '\\' && (seq_ascii || c0_ <= kMaxAsciiCharCode)) {
Advance();
} else {
- return this->SlowScanJsonString(beg_pos);
+ break;
Lasse Reichstein 2011/06/24 13:56:00 Again, you can abstract the part below into a func
}
}
- ASSERT_EQ('"', c0_);
- int end_pos = position_;
+ Handle<String> result;
+ if (seq_ascii) {
+ if (is_symbol) {
+ result = isolate()->factory()->LookupAsciiSymbol(seq_source_,
+ beg_pos,
+ position_ - beg_pos);
+ } else {
+ result = isolate()->factory()->NewStrictSubString(source_,
+ beg_pos,
+ position_);
+ }
+ } else {
+ int length = position_ - beg_pos;
+ result = isolate()->factory()->NewRawAsciiString(length);
+ ASSERT(result->IsAsciiRepresentation());
+ char* dest = SeqAsciiString::cast(*result)->GetChars();
+ String::WriteToFlat(*source_, dest, beg_pos, position_);
+ }
+ if (c0_ != '"') {
+ return this->SlowScanJsonAsciiString(result);
+ }
// Advance past the last '"'.
AdvanceSkipWhitespace();
- if (seq_ascii && is_symbol) {
- return isolate()->factory()->LookupAsciiSymbol(seq_source_,
- beg_pos,
- end_pos - beg_pos);
- } else {
- return isolate()->factory()->NewStrictSubString(source_,
- beg_pos,
- end_pos);
- }
+ return result;
}
} } // namespace v8::internal
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698