Chromium Code Reviews| Index: src/json-parser.h |
| =================================================================== |
| --- src/json-parser.h (revision 8443) |
| +++ src/json-parser.h (working copy) |
| @@ -55,7 +55,7 @@ |
| inline void Advance() { |
| position_++; |
| - if (position_ > source_length_) { |
| + if (position_ >= source_length_) { |
| c0_ = kEndOfString; |
| } else if (seq_ascii) { |
| c0_ = seq_source_->SeqAsciiStringGet(position_); |
| @@ -107,9 +107,10 @@ |
| } |
| template <bool is_symbol> |
| Handle<String> ScanJsonString(); |
| - // Slow version for unicode support, uses the first ascii_count characters, |
| - // as first part of a ConsString |
| - Handle<String> SlowScanJsonString(int beg_pos); |
| + // Slow version for backslash and unicode support, uses the characters from |
| + // start to end in prefix as the first part of the resulting string. |
| + template <typename StringType, typename SinkChar> |
| + Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end); |
|
Lasse Reichstein
2011/06/29 09:27:30
Slow version of what? Just say what the function d
sandholm
2011/06/29 10:44:39
Done.
|
| // A JSON number (production JSONNumber) is a subset of the valid JavaScript |
| // decimal number literals. |
| @@ -148,7 +149,7 @@ |
| inline Isolate* isolate() { return isolate_; } |
| - static const int kInitialSpecialStringSize = 1024; |
| + static const int kInitialSpecialStringLength = 1024; |
| private: |
| @@ -165,7 +166,7 @@ |
| Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { |
| isolate_ = source->map()->isolate(); |
| source_ = Handle<String>(source->TryFlattenGetString()); |
| - source_length_ = source_->length() - 1; |
| + source_length_ = source_->length(); |
| // Optimized fast case where we only have ascii characters. |
|
Lasse Reichstein
2011/06/29 09:27:30
ASCII is an acronym when used in prose.
sandholm
2011/06/29 10:44:39
Done.
|
| if (seq_ascii) { |
| @@ -410,62 +411,91 @@ |
| return isolate()->factory()->NewNumber(number); |
| } |
| + |
| +template <typename StringType> |
| +inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c); |
| + |
| +template <> |
| +inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) { |
| + seq_str->SeqTwoByteStringSet(i, c); |
| +} |
| + |
| +template <> |
| +inline void SeqStringSet(Handle<SeqAsciiString> seq_str, int i, uc32 c) { |
| + seq_str->SeqAsciiStringSet(i, c); |
| +} |
| + |
| +template <typename StringType> |
| +inline Handle<StringType> NewRawString(Factory* factory, int length); |
| + |
| +template <> |
| +inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length) { |
| + return factory->NewRawTwoByteString(length, NOT_TENURED); |
| +} |
| + |
| +template <> |
| +inline Handle<SeqAsciiString> NewRawString(Factory* factory, int length) { |
| + return factory->NewRawAsciiString(length, NOT_TENURED); |
| +} |
| + |
| + |
| +// Scans the rest of a JSON string starting from position_ and writes |
| +// substring(prefix, start, end) along with the scanned characters into a |
| +// sequential string of type StringType. |
| template <bool seq_ascii> |
| -Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(int beg_pos) { |
| - // The currently scanned ascii characters. |
| - Handle<String> ascii(isolate()->factory()->NewProperSubString(source_, |
| - beg_pos, |
| - position_)); |
| - Handle<String> two_byte = |
| - isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, |
| - NOT_TENURED); |
| - Handle<SeqTwoByteString> seq_two_byte = |
| - Handle<SeqTwoByteString>::cast(two_byte); |
| +template <typename StringType, typename SinkChar> |
| +Handle<String> JsonParser<seq_ascii>::SlowScanJsonString( |
| + Handle<String> prefix, int start, int end) { |
| + int count = end - start; |
| + int length = Min(count + source_length_ - position_, |
| + Max(kInitialSpecialStringLength, 2 * count)); |
| + Handle<StringType> seq_str = NewRawString<StringType>(isolate()->factory(), |
| + length); |
| + // Copy prefix into seq_str. |
| + SinkChar* dest = seq_str->GetChars(); |
| + String::WriteToFlat(*prefix, dest, start, end); |
| - int allocation_count = 1; |
| - int count = 0; |
| - |
| while (c0_ != '"') { |
| // Create new seq string |
|
Lasse Reichstein
2011/06/29 09:27:30
Move comment to after "if" line, so it only applie
sandholm
2011/06/29 10:44:39
Done.
|
| - if (count >= kInitialSpecialStringSize * allocation_count) { |
| - allocation_count = allocation_count * 2; |
| - int new_size = allocation_count * kInitialSpecialStringSize; |
| - Handle<String> new_two_byte = |
| - isolate()->factory()->NewRawTwoByteString(new_size, |
| - NOT_TENURED); |
| - uc16* char_start = |
| - Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars(); |
| - String::WriteToFlat(*seq_two_byte, char_start, 0, count); |
| - seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte); |
| + if (count >= length) { |
| + return this->SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count); |
| } |
| - |
| // Check for control character (0x00-0x1f) or unterminated string (<0). |
| if (c0_ < 0x20) return Handle<String>::null(); |
| if (c0_ != '\\') { |
| - seq_two_byte->SeqTwoByteStringSet(count++, c0_); |
| - Advance(); |
| + if (sizeof(char) != sizeof(SinkChar) || |
|
Lasse Reichstein
2011/06/29 09:27:30
Does it lint? (Generally, us kCharSize instead of
sandholm
2011/06/29 10:44:39
Done.
|
| + seq_ascii || |
|
Lasse Reichstein
2011/06/29 09:27:30
This could use a comment:
If the sink can contain
sandholm
2011/06/29 10:44:39
Done.
|
| + c0_ <= kMaxAsciiCharCode) { |
| + SeqStringSet(seq_str, count++, c0_); |
| + Advance(); |
| + } else { |
| + // StringType is SeqAsciiString and we just read a non-ascii char. |
|
Lasse Reichstein
2011/06/29 09:27:30
non-ASCII.
sandholm
2011/06/29 10:44:39
Done.
|
| + return this->SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, |
| + 0, |
| + count); |
| + } |
| } else { |
| - Advance(); |
| + Advance(); // Advance past the \. |
| switch (c0_) { |
| case '"': |
| case '\\': |
| case '/': |
| - seq_two_byte->SeqTwoByteStringSet(count++, c0_); |
| + SeqStringSet(seq_str, count++, c0_); |
| break; |
| case 'b': |
| - seq_two_byte->SeqTwoByteStringSet(count++, '\x08'); |
| + SeqStringSet(seq_str, count++, '\x08'); |
| break; |
| case 'f': |
| - seq_two_byte->SeqTwoByteStringSet(count++, '\x0c'); |
| + SeqStringSet(seq_str, count++, '\x0c'); |
| break; |
| case 'n': |
| - seq_two_byte->SeqTwoByteStringSet(count++, '\x0a'); |
| + SeqStringSet(seq_str, count++, '\x0a'); |
| break; |
| case 'r': |
| - seq_two_byte->SeqTwoByteStringSet(count++, '\x0d'); |
| + SeqStringSet(seq_str, count++, '\x0d'); |
| break; |
| case 't': |
| - seq_two_byte->SeqTwoByteStringSet(count++, '\x09'); |
| + SeqStringSet(seq_str, count++, '\x09'); |
| break; |
| case 'u': { |
| uc32 value = 0; |
| @@ -477,8 +507,17 @@ |
| } |
| value = value * 16 + digit; |
| } |
| - seq_two_byte->SeqTwoByteStringSet(count++, value); |
| - break; |
| + if (sizeof(char) != sizeof(SinkChar) || value <= kMaxAsciiCharCode) { |
|
Lasse Reichstein
2011/06/29 09:27:30
sizeof(SinkChar) == kUC16Size
sandholm
2011/06/29 10:44:39
Done.
|
| + SeqStringSet(seq_str, count++, value); |
| + break; |
| + } else { |
| + // StringType is SeqAsciiString and we just read a non-ascii char. |
| + position_ -= 6; // Rewind position to \ in \uxxxx. |
| + Advance(); |
| + return this->SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, |
| + 0, |
| + count); |
| + } |
| } |
| default: |
| return Handle<String>::null(); |
| @@ -486,56 +525,63 @@ |
| Advance(); |
| } |
| } |
| - // Advance past the last '"'. |
| - ASSERT_EQ('"', c0_); |
| - AdvanceSkipWhitespace(); |
| - |
| - // Shrink the the string to our length. |
| - if (isolate()->heap()->InNewSpace(*seq_two_byte)) { |
| + // Shrink seq_string length to count. |
| + if (isolate()->heap()->InNewSpace(*seq_str)) { |
| isolate()->heap()->new_space()-> |
| - template ShrinkStringAtAllocationBoundary<SeqTwoByteString>( |
| - *seq_two_byte, count); |
| + template ShrinkStringAtAllocationBoundary<StringType>( |
|
Lasse Reichstein
2011/06/29 09:27:30
Do ASSERT that the string is at the allocation bou
sandholm
2011/06/29 10:44:39
That ASSERT is in ShrinkStringAtAllocationBoundary
Lasse Reichstein
2011/06/29 10:53:00
It'll do :)
It's probably better to keep it there,
|
| + *seq_str, count); |
| } else { |
| - int string_size = SeqTwoByteString::SizeFor(count); |
| - int allocated_string_size = |
| - SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count); |
| + int string_size = StringType::SizeFor(count); |
| + int allocated_string_size = StringType::SizeFor(length); |
| int delta = allocated_string_size - string_size; |
| - Address start_filler_object = seq_two_byte->address() + string_size; |
| - seq_two_byte->set_length(count); |
| + Address start_filler_object = seq_str->address() + string_size; |
| + seq_str->set_length(count); |
| isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); |
| } |
| - return isolate()->factory()->NewConsString(ascii, seq_two_byte); |
| + ASSERT_EQ('"', c0_); |
| + // Advance past the last '"'. |
| + AdvanceSkipWhitespace(); |
| + return seq_str; |
| } |
| + |
| template <bool seq_ascii> |
| template <bool is_symbol> |
| Handle<String> JsonParser<seq_ascii>::ScanJsonString() { |
| ASSERT_EQ('"', c0_); |
| Advance(); |
| + if (c0_ == '"') { |
| + AdvanceSkipWhitespace(); |
| + return Handle<String>(isolate()->heap()->empty_string()); |
| + } |
| int beg_pos = position_; |
| // Fast case for ascii only without escape characters. |
| - while (c0_ != '"') { |
| + do { |
| // Check for control character (0x00-0x1f) or unterminated string (<0). |
| if (c0_ < 0x20) return Handle<String>::null(); |
| - if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) { |
| + if (c0_ != '\\' && (seq_ascii || c0_ <= kMaxAsciiCharCode)) { |
| Advance(); |
| } else { |
| - return this->SlowScanJsonString(beg_pos); |
| + return this->SlowScanJsonString<SeqAsciiString, char>(source_, |
| + beg_pos, |
| + position_); |
| } |
| + } while (c0_ != '"'); |
| + int length = position_ - beg_pos; |
| + Handle<String> result; |
| + if (seq_ascii && is_symbol) { |
| + result = isolate()->factory()->LookupAsciiSymbol(seq_source_, |
| + beg_pos, |
| + length); |
| + } else { |
| + result = isolate()->factory()->NewRawAsciiString(length); |
| + char* dest = SeqAsciiString::cast(*result)->GetChars(); |
| + String::WriteToFlat(*source_, dest, beg_pos, position_); |
| } |
| ASSERT_EQ('"', c0_); |
| - int end_pos = position_; |
| // Advance past the last '"'. |
| AdvanceSkipWhitespace(); |
| - if (seq_ascii && is_symbol) { |
| - return isolate()->factory()->LookupAsciiSymbol(seq_source_, |
| - beg_pos, |
| - end_pos - beg_pos); |
| - } else { |
| - return isolate()->factory()->NewProperSubString(source_, |
| - beg_pos, |
| - end_pos); |
| - } |
| + return result; |
| } |
| } } // namespace v8::internal |