Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(618)

Unified Diff: src/json-parser.h

Issue 7241023: Improve JSON.parse to use less memory when using escaped and non-ascii... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/json-parser.h
===================================================================
--- src/json-parser.h (revision 8443)
+++ src/json-parser.h (working copy)
@@ -55,7 +55,7 @@
inline void Advance() {
position_++;
- if (position_ > source_length_) {
+ if (position_ >= source_length_) {
c0_ = kEndOfString;
} else if (seq_ascii) {
c0_ = seq_source_->SeqAsciiStringGet(position_);
@@ -107,9 +107,10 @@
}
template <bool is_symbol>
Handle<String> ScanJsonString();
- // Slow version for unicode support, uses the first ascii_count characters,
- // as first part of a ConsString
- Handle<String> SlowScanJsonString(int beg_pos);
+ // Slow version for backslash and unicode support, uses the characters from
+ // start to end in prefix as the first part of the resulting string.
+ template <typename StringType, typename SinkChar>
+ Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end);
Lasse Reichstein 2011/06/29 09:27:30 Slow version of what? Just say what the function d
sandholm 2011/06/29 10:44:39 Done.
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
// decimal number literals.
@@ -148,7 +149,7 @@
inline Isolate* isolate() { return isolate_; }
- static const int kInitialSpecialStringSize = 1024;
+ static const int kInitialSpecialStringLength = 1024;
private:
@@ -165,7 +166,7 @@
Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) {
isolate_ = source->map()->isolate();
source_ = Handle<String>(source->TryFlattenGetString());
- source_length_ = source_->length() - 1;
+ source_length_ = source_->length();
// Optimized fast case where we only have ascii characters.
Lasse Reichstein 2011/06/29 09:27:30 ASCII is an acronym when used in prose.
sandholm 2011/06/29 10:44:39 Done.
if (seq_ascii) {
@@ -410,62 +411,91 @@
return isolate()->factory()->NewNumber(number);
}
+
+template <typename StringType>
+inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
+
+template <>
+inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
+ seq_str->SeqTwoByteStringSet(i, c);
+}
+
+template <>
+inline void SeqStringSet(Handle<SeqAsciiString> seq_str, int i, uc32 c) {
+ seq_str->SeqAsciiStringSet(i, c);
+}
+
+template <typename StringType>
+inline Handle<StringType> NewRawString(Factory* factory, int length);
+
+template <>
+inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length) {
+ return factory->NewRawTwoByteString(length, NOT_TENURED);
+}
+
+template <>
+inline Handle<SeqAsciiString> NewRawString(Factory* factory, int length) {
+ return factory->NewRawAsciiString(length, NOT_TENURED);
+}
+
+
+// Scans the rest of a JSON string starting from position_ and writes
+// substring(prefix, start, end) along with the scanned characters into a
+// sequential string of type StringType.
template <bool seq_ascii>
-Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(int beg_pos) {
- // The currently scanned ascii characters.
- Handle<String> ascii(isolate()->factory()->NewProperSubString(source_,
- beg_pos,
- position_));
- Handle<String> two_byte =
- isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,
- NOT_TENURED);
- Handle<SeqTwoByteString> seq_two_byte =
- Handle<SeqTwoByteString>::cast(two_byte);
+template <typename StringType, typename SinkChar>
+Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(
+ Handle<String> prefix, int start, int end) {
+ int count = end - start;
+ int length = Min(count + source_length_ - position_,
+ Max(kInitialSpecialStringLength, 2 * count));
+ Handle<StringType> seq_str = NewRawString<StringType>(isolate()->factory(),
+ length);
+ // Copy prefix into seq_str.
+ SinkChar* dest = seq_str->GetChars();
+ String::WriteToFlat(*prefix, dest, start, end);
- int allocation_count = 1;
- int count = 0;
-
while (c0_ != '"') {
// Create new seq string
Lasse Reichstein 2011/06/29 09:27:30 Move comment to after "if" line, so it only applie
sandholm 2011/06/29 10:44:39 Done.
- if (count >= kInitialSpecialStringSize * allocation_count) {
- allocation_count = allocation_count * 2;
- int new_size = allocation_count * kInitialSpecialStringSize;
- Handle<String> new_two_byte =
- isolate()->factory()->NewRawTwoByteString(new_size,
- NOT_TENURED);
- uc16* char_start =
- Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();
- String::WriteToFlat(*seq_two_byte, char_start, 0, count);
- seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);
+ if (count >= length) {
+ return this->SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count);
}
-
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Handle<String>::null();
if (c0_ != '\\') {
- seq_two_byte->SeqTwoByteStringSet(count++, c0_);
- Advance();
+ if (sizeof(char) != sizeof(SinkChar) ||
Lasse Reichstein 2011/06/29 09:27:30 Does it lint? (Generally, us kCharSize instead of
sandholm 2011/06/29 10:44:39 Done.
+ seq_ascii ||
Lasse Reichstein 2011/06/29 09:27:30 This could use a comment: If the sink can contain
sandholm 2011/06/29 10:44:39 Done.
+ c0_ <= kMaxAsciiCharCode) {
+ SeqStringSet(seq_str, count++, c0_);
+ Advance();
+ } else {
+ // StringType is SeqAsciiString and we just read a non-ascii char.
Lasse Reichstein 2011/06/29 09:27:30 non-ASCII.
sandholm 2011/06/29 10:44:39 Done.
+ return this->SlowScanJsonString<SeqTwoByteString, uc16>(seq_str,
+ 0,
+ count);
+ }
} else {
- Advance();
+ Advance(); // Advance past the \.
switch (c0_) {
case '"':
case '\\':
case '/':
- seq_two_byte->SeqTwoByteStringSet(count++, c0_);
+ SeqStringSet(seq_str, count++, c0_);
break;
case 'b':
- seq_two_byte->SeqTwoByteStringSet(count++, '\x08');
+ SeqStringSet(seq_str, count++, '\x08');
break;
case 'f':
- seq_two_byte->SeqTwoByteStringSet(count++, '\x0c');
+ SeqStringSet(seq_str, count++, '\x0c');
break;
case 'n':
- seq_two_byte->SeqTwoByteStringSet(count++, '\x0a');
+ SeqStringSet(seq_str, count++, '\x0a');
break;
case 'r':
- seq_two_byte->SeqTwoByteStringSet(count++, '\x0d');
+ SeqStringSet(seq_str, count++, '\x0d');
break;
case 't':
- seq_two_byte->SeqTwoByteStringSet(count++, '\x09');
+ SeqStringSet(seq_str, count++, '\x09');
break;
case 'u': {
uc32 value = 0;
@@ -477,8 +507,17 @@
}
value = value * 16 + digit;
}
- seq_two_byte->SeqTwoByteStringSet(count++, value);
- break;
+ if (sizeof(char) != sizeof(SinkChar) || value <= kMaxAsciiCharCode) {
Lasse Reichstein 2011/06/29 09:27:30 sizeof(SinkChar) == kUC16Size
sandholm 2011/06/29 10:44:39 Done.
+ SeqStringSet(seq_str, count++, value);
+ break;
+ } else {
+ // StringType is SeqAsciiString and we just read a non-ascii char.
+ position_ -= 6; // Rewind position to \ in \uxxxx.
+ Advance();
+ return this->SlowScanJsonString<SeqTwoByteString, uc16>(seq_str,
+ 0,
+ count);
+ }
}
default:
return Handle<String>::null();
@@ -486,56 +525,63 @@
Advance();
}
}
- // Advance past the last '"'.
- ASSERT_EQ('"', c0_);
- AdvanceSkipWhitespace();
-
- // Shrink the the string to our length.
- if (isolate()->heap()->InNewSpace(*seq_two_byte)) {
+ // Shrink seq_string length to count.
+ if (isolate()->heap()->InNewSpace(*seq_str)) {
isolate()->heap()->new_space()->
- template ShrinkStringAtAllocationBoundary<SeqTwoByteString>(
- *seq_two_byte, count);
+ template ShrinkStringAtAllocationBoundary<StringType>(
Lasse Reichstein 2011/06/29 09:27:30 Do ASSERT that the string is at the allocation bou
sandholm 2011/06/29 10:44:39 That ASSERT is in ShrinkStringAtAllocationBoundary
Lasse Reichstein 2011/06/29 10:53:00 It'll do :) It's probably better to keep it there,
+ *seq_str, count);
} else {
- int string_size = SeqTwoByteString::SizeFor(count);
- int allocated_string_size =
- SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count);
+ int string_size = StringType::SizeFor(count);
+ int allocated_string_size = StringType::SizeFor(length);
int delta = allocated_string_size - string_size;
- Address start_filler_object = seq_two_byte->address() + string_size;
- seq_two_byte->set_length(count);
+ Address start_filler_object = seq_str->address() + string_size;
+ seq_str->set_length(count);
isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
}
- return isolate()->factory()->NewConsString(ascii, seq_two_byte);
+ ASSERT_EQ('"', c0_);
+ // Advance past the last '"'.
+ AdvanceSkipWhitespace();
+ return seq_str;
}
+
template <bool seq_ascii>
template <bool is_symbol>
Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
ASSERT_EQ('"', c0_);
Advance();
+ if (c0_ == '"') {
+ AdvanceSkipWhitespace();
+ return Handle<String>(isolate()->heap()->empty_string());
+ }
int beg_pos = position_;
// Fast case for ascii only without escape characters.
- while (c0_ != '"') {
+ do {
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Handle<String>::null();
- if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) {
+ if (c0_ != '\\' && (seq_ascii || c0_ <= kMaxAsciiCharCode)) {
Advance();
} else {
- return this->SlowScanJsonString(beg_pos);
+ return this->SlowScanJsonString<SeqAsciiString, char>(source_,
+ beg_pos,
+ position_);
}
+ } while (c0_ != '"');
+ int length = position_ - beg_pos;
+ Handle<String> result;
+ if (seq_ascii && is_symbol) {
+ result = isolate()->factory()->LookupAsciiSymbol(seq_source_,
+ beg_pos,
+ length);
+ } else {
+ result = isolate()->factory()->NewRawAsciiString(length);
+ char* dest = SeqAsciiString::cast(*result)->GetChars();
+ String::WriteToFlat(*source_, dest, beg_pos, position_);
}
ASSERT_EQ('"', c0_);
- int end_pos = position_;
// Advance past the last '"'.
AdvanceSkipWhitespace();
- if (seq_ascii && is_symbol) {
- return isolate()->factory()->LookupAsciiSymbol(seq_source_,
- beg_pos,
- end_pos - beg_pos);
- } else {
- return isolate()->factory()->NewProperSubString(source_,
- beg_pos,
- end_pos);
- }
+ return result;
}
} } // namespace v8::internal
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698