Index: src/json-parser.h |
=================================================================== |
--- src/json-parser.h (revision 8369) |
+++ src/json-parser.h (working copy) |
@@ -109,7 +109,8 @@ |
Handle<String> ScanJsonString(); |
// Slow version for unicode support, uses the first ascii_count characters, |
// as first part of a ConsString |
Lasse Reichstein
2011/06/29 09:27:29
Comment out of date (what is ascii_count)?
|
- Handle<String> SlowScanJsonString(int beg_pos); |
+ Handle<String> SlowScanJsonAsciiString(Handle<String> prefix); |
+ Handle<String> SlowScanJsonTwoByteString(Handle<String> prefix); |
// A JSON number (production JSONNumber) is a subset of the valid JavaScript |
// decimal number literals. |
@@ -410,33 +411,26 @@ |
return isolate()->factory()->NewNumber(number); |
} |
+ |
template <bool seq_ascii> |
-Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(int beg_pos) { |
- // The currently scanned ascii characters. |
- Handle<String> ascii(isolate()->factory()->NewStrictSubString(source_, |
- beg_pos, |
- position_)); |
- Handle<String> two_byte = |
- isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, |
- NOT_TENURED); |
+Handle<String> JsonParser<seq_ascii>::SlowScanJsonTwoByteString( |
+ Handle<String> prefix) { |
+ int length = kInitialSpecialStringSize; |
Lasse Reichstein
2011/06/24 13:56:00
Change ...StringSize to ...StringLength. Size soun
|
+ Handle<String> new_two_byte = |
+ isolate()->factory()->NewRawTwoByteString(length, NOT_TENURED); |
Lasse Reichstein
2011/06/24 13:56:00
If NewRawTwoByteString doesn't return a Handle<Seq
|
Handle<SeqTwoByteString> seq_two_byte = |
- Handle<SeqTwoByteString>::cast(two_byte); |
- |
- int allocation_count = 1; |
+ Handle<SeqTwoByteString>::cast(new_two_byte); |
int count = 0; |
while (c0_ != '"') { |
// Create new seq string |
- if (count >= kInitialSpecialStringSize * allocation_count) { |
- allocation_count = allocation_count * 2; |
- int new_size = allocation_count * kInitialSpecialStringSize; |
- Handle<String> new_two_byte = |
- isolate()->factory()->NewRawTwoByteString(new_size, |
- NOT_TENURED); |
- uc16* char_start = |
- Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars(); |
- String::WriteToFlat(*seq_two_byte, char_start, 0, count); |
+ if (count >= length) { |
+ prefix = isolate()->factory()->NewConsString(prefix, new_two_byte); |
+ length *= 2; |
+ new_two_byte = isolate()->factory()-> |
+ NewRawTwoByteString(length, NOT_TENURED); |
seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte); |
+ count = 0; |
} |
// Check for control character (0x00-0x1f) or unterminated string (<0). |
@@ -445,7 +439,7 @@ |
seq_two_byte->SeqTwoByteStringSet(count++, c0_); |
Advance(); |
} else { |
- Advance(); |
+ Advance(); // Advance past the \. |
switch (c0_) { |
case '"': |
case '\\': |
@@ -486,28 +480,129 @@ |
Advance(); |
} |
} |
- // Advance past the last '"'. |
- ASSERT_EQ('"', c0_); |
- AdvanceSkipWhitespace(); |
- // Shrink the the string to our length. |
- if (isolate()->heap()->InNewSpace(*seq_two_byte)) { |
+ // Shrink new_two_byte from length to count. |
+ if (isolate()->heap()->InNewSpace(*new_two_byte)) { |
isolate()->heap()->new_space()-> |
template ShrinkStringAtAllocationBoundary<SeqTwoByteString>( |
*seq_two_byte, count); |
} else { |
int string_size = SeqTwoByteString::SizeFor(count); |
- int allocated_string_size = |
- SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count); |
+ int allocated_string_size = SeqTwoByteString::SizeFor(length); |
int delta = allocated_string_size - string_size; |
Address start_filler_object = seq_two_byte->address() + string_size; |
seq_two_byte->set_length(count); |
isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); |
} |
- return isolate()->factory()->NewConsString(ascii, seq_two_byte); |
+ ASSERT_EQ('"', c0_); |
+ AdvanceSkipWhitespace(); |
+ return isolate()->factory()->NewConsString(prefix, new_two_byte); |
} |
+ |
template <bool seq_ascii> |
+Handle<String> JsonParser<seq_ascii>::SlowScanJsonAsciiString( |
Lasse Reichstein
2011/06/24 13:56:00
Sounds like it scans an ASCII string, not that it
|
+ Handle<String> prefix) { |
+ int length = kInitialSpecialStringSize; |
+ Handle<String> new_ascii = |
+ isolate()->factory()->NewRawAsciiString(length, NOT_TENURED); |
+ Handle<SeqAsciiString> seq_ascii_str = |
+ Handle<SeqAsciiString>::cast(new_ascii); |
+ int count = 0; |
+ |
+ while (c0_ != '"') { |
+ // Create new seq string |
+ if (count >= length) { |
+ prefix = isolate()->factory()->NewConsString(prefix, new_ascii); |
+ length *= 2; |
+ new_ascii = isolate()->factory()->NewRawAsciiString(length, NOT_TENURED); |
+ seq_ascii_str = Handle<SeqAsciiString>::cast(new_ascii); |
+ count = 0; |
+ } |
+ |
+ // Check for control character (0x00-0x1f) or unterminated string (<0). |
+ if (c0_ < 0x20) return Handle<String>::null(); |
+ if (c0_ != '\\') { |
+ if (seq_ascii || c0_ <= kMaxAsciiCharCode) { |
+ seq_ascii_str->SeqAsciiStringSet(count++, c0_); |
+ Advance(); |
+ } else { |
+ break; |
+ } |
+ } else { |
+ Advance(); // Advance past the \. |
+ switch (c0_) { |
+ case '"': |
+ case '\\': |
+ case '/': |
+ seq_ascii_str->SeqAsciiStringSet(count++, c0_); |
+ break; |
+ case 'b': |
+ seq_ascii_str->SeqAsciiStringSet(count++, '\x08'); |
+ break; |
+ case 'f': |
+ seq_ascii_str->SeqAsciiStringSet(count++, '\x0c'); |
+ break; |
+ case 'n': |
+ seq_ascii_str->SeqAsciiStringSet(count++, '\x0a'); |
+ break; |
+ case 'r': |
+ seq_ascii_str->SeqAsciiStringSet(count++, '\x0d'); |
+ break; |
+ case 't': |
+ seq_ascii_str->SeqAsciiStringSet(count++, '\x09'); |
+ break; |
+ case 'u': { |
+ uc32 value = 0; |
+ for (int i = 0; i < 4; i++) { |
+ Advance(); |
+ int digit = HexValue(c0_); |
+ if (digit < 0) { |
+ return Handle<String>::null(); |
+ } |
+ value = value * 16 + digit; |
+ } |
+ if (value <= kMaxAsciiCharCode) { |
+ seq_ascii_str->SeqAsciiStringSet(count++, value); |
+ break; |
+ } else { |
+ position_ -= 6; // Rewind position to \ in \uxxxx. |
+ Advance(); |
+ goto outer_loop; // break out of while loop. |
Lasse Reichstein
2011/06/24 13:56:00
I'm really not too keen on using gotos. Really not
|
+ } |
+ } |
+ default: |
+ return Handle<String>::null(); |
+ } |
+ Advance(); |
+ } |
+ } |
+ outer_loop: |
+ // Shrink new_ascii from length to count. |
+ if (isolate()->heap()->InNewSpace(*new_ascii)) { |
+ isolate()->heap()->new_space()-> |
+ template ShrinkStringAtAllocationBoundary<SeqAsciiString>( |
+ *seq_ascii_str, count); |
+ } else { |
+ int string_size = SeqAsciiString::SizeFor(count); |
Lasse Reichstein
2011/06/24 13:56:00
If the truncation would turn a lot of space into f
|
+ int allocated_string_size = SeqAsciiString::SizeFor(length); |
+ int delta = allocated_string_size - string_size; |
+ Address start_filler_object = seq_ascii_str->address() + string_size; |
+ seq_ascii_str->set_length(count); |
+ isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); |
+ } |
Lasse Reichstein
2011/06/24 13:56:00
If you move the shrinking to a separate function,
|
+ prefix = isolate()->factory()->NewConsString(prefix, new_ascii); |
+ if (c0_ != '"') { |
+ // We have read a non-ascii character - either directly or as \uxxxx. |
+ return this->SlowScanJsonTwoByteString(prefix); |
+ } |
+ ASSERT_EQ('"', c0_); |
+ // Advance past the last '"'. |
+ AdvanceSkipWhitespace(); |
+ return prefix; |
+} |
+ |
+template <bool seq_ascii> |
template <bool is_symbol> |
Handle<String> JsonParser<seq_ascii>::ScanJsonString() { |
ASSERT_EQ('"', c0_); |
@@ -517,25 +612,36 @@ |
while (c0_ != '"') { |
// Check for control character (0x00-0x1f) or unterminated string (<0). |
if (c0_ < 0x20) return Handle<String>::null(); |
- if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) { |
+ if (c0_ != '\\' && (seq_ascii || c0_ <= kMaxAsciiCharCode)) { |
Advance(); |
} else { |
- return this->SlowScanJsonString(beg_pos); |
+ break; |
Lasse Reichstein
2011/06/24 13:56:00
Again, you can abstract the part below into a func
|
} |
} |
- ASSERT_EQ('"', c0_); |
- int end_pos = position_; |
+ Handle<String> result; |
+ if (seq_ascii) { |
+ if (is_symbol) { |
+ result = isolate()->factory()->LookupAsciiSymbol(seq_source_, |
+ beg_pos, |
+ position_ - beg_pos); |
+ } else { |
+ result = isolate()->factory()->NewStrictSubString(source_, |
+ beg_pos, |
+ position_); |
+ } |
+ } else { |
+ int length = position_ - beg_pos; |
+ result = isolate()->factory()->NewRawAsciiString(length); |
+ ASSERT(result->IsAsciiRepresentation()); |
+ char* dest = SeqAsciiString::cast(*result)->GetChars(); |
+ String::WriteToFlat(*source_, dest, beg_pos, position_); |
+ } |
+ if (c0_ != '"') { |
+ return this->SlowScanJsonAsciiString(result); |
+ } |
// Advance past the last '"'. |
AdvanceSkipWhitespace(); |
- if (seq_ascii && is_symbol) { |
- return isolate()->factory()->LookupAsciiSymbol(seq_source_, |
- beg_pos, |
- end_pos - beg_pos); |
- } else { |
- return isolate()->factory()->NewStrictSubString(source_, |
- beg_pos, |
- end_pos); |
- } |
+ return result; |
} |
} } // namespace v8::internal |