Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 48 } | 48 } |
| 49 | 49 |
| 50 static const int kEndOfString = -1; | 50 static const int kEndOfString = -1; |
| 51 | 51 |
| 52 private: | 52 private: |
| 53 // Parse a string containing a single JSON value. | 53 // Parse a string containing a single JSON value. |
| 54 Handle<Object> ParseJson(Handle<String> source); | 54 Handle<Object> ParseJson(Handle<String> source); |
| 55 | 55 |
| 56 inline void Advance() { | 56 inline void Advance() { |
| 57 position_++; | 57 position_++; |
| 58 if (position_ > source_length_) { | 58 if (position_ >= source_length_) { |
| 59 c0_ = kEndOfString; | 59 c0_ = kEndOfString; |
| 60 } else if (seq_ascii) { | 60 } else if (seq_ascii) { |
| 61 c0_ = seq_source_->SeqAsciiStringGet(position_); | 61 c0_ = seq_source_->SeqAsciiStringGet(position_); |
| 62 } else { | 62 } else { |
| 63 c0_ = source_->Get(position_); | 63 c0_ = source_->Get(position_); |
| 64 } | 64 } |
| 65 } | 65 } |
| 66 | 66 |
| 67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, | 67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, |
| 68 // section 15.12.1.1. The only allowed whitespace characters between tokens | 68 // section 15.12.1.1. The only allowed whitespace characters between tokens |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | 100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and |
| 101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | 101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. |
| 102 Handle<String> ParseJsonString() { | 102 Handle<String> ParseJsonString() { |
| 103 return ScanJsonString<false>(); | 103 return ScanJsonString<false>(); |
| 104 } | 104 } |
| 105 Handle<String> ParseJsonSymbol() { | 105 Handle<String> ParseJsonSymbol() { |
| 106 return ScanJsonString<true>(); | 106 return ScanJsonString<true>(); |
| 107 } | 107 } |
| 108 template <bool is_symbol> | 108 template <bool is_symbol> |
| 109 Handle<String> ScanJsonString(); | 109 Handle<String> ScanJsonString(); |
| 110 // Slow version for unicode support, uses the first ascii_count characters, | 110 // Slow version for backslash and unicode support, uses the characters from |
| 111 // as first part of a ConsString | 111 // start to end in prefix as the first part of the resulting string. |
| 112 Handle<String> SlowScanJsonString(int beg_pos); | 112 template <typename StringType, typename SinkChar> |
| 113 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end); | |
|
Lasse Reichstein
2011/06/29 09:27:30
Slow version of what? Just say what the function d
sandholm
2011/06/29 10:44:39
Done.
| |
| 113 | 114 |
| 114 // A JSON number (production JSONNumber) is a subset of the valid JavaScript | 115 // A JSON number (production JSONNumber) is a subset of the valid JavaScript |
| 115 // decimal number literals. | 116 // decimal number literals. |
| 116 // It includes an optional minus sign, must have at least one | 117 // It includes an optional minus sign, must have at least one |
| 117 // digit before and after a decimal point, may not have prefixed zeros (unless | 118 // digit before and after a decimal point, may not have prefixed zeros (unless |
| 118 // the integer part is zero), and may include an exponent part (e.g., "e-10"). | 119 // the integer part is zero), and may include an exponent part (e.g., "e-10"). |
| 119 // Hexadecimal and octal numbers are not allowed. | 120 // Hexadecimal and octal numbers are not allowed. |
| 120 Handle<Object> ParseJsonNumber(); | 121 Handle<Object> ParseJsonNumber(); |
| 121 | 122 |
| 122 // Parse a single JSON value from input (grammar production JSONValue). | 123 // Parse a single JSON value from input (grammar production JSONValue). |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 141 | 142 |
| 142 | 143 |
| 143 // Mark that a parsing error has happened at the current token, and | 144 // Mark that a parsing error has happened at the current token, and |
| 144 // return a null handle. Primarily for readability. | 145 // return a null handle. Primarily for readability. |
| 145 inline Handle<Object> ReportUnexpectedCharacter() { | 146 inline Handle<Object> ReportUnexpectedCharacter() { |
| 146 return Handle<Object>::null(); | 147 return Handle<Object>::null(); |
| 147 } | 148 } |
| 148 | 149 |
| 149 inline Isolate* isolate() { return isolate_; } | 150 inline Isolate* isolate() { return isolate_; } |
| 150 | 151 |
| 151 static const int kInitialSpecialStringSize = 1024; | 152 static const int kInitialSpecialStringLength = 1024; |
| 152 | 153 |
| 153 | 154 |
| 154 private: | 155 private: |
| 155 Handle<String> source_; | 156 Handle<String> source_; |
| 156 int source_length_; | 157 int source_length_; |
| 157 Handle<SeqAsciiString> seq_source_; | 158 Handle<SeqAsciiString> seq_source_; |
| 158 | 159 |
| 159 Isolate* isolate_; | 160 Isolate* isolate_; |
| 160 uc32 c0_; | 161 uc32 c0_; |
| 161 int position_; | 162 int position_; |
| 162 }; | 163 }; |
| 163 | 164 |
| 164 template <bool seq_ascii> | 165 template <bool seq_ascii> |
| 165 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { | 166 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { |
| 166 isolate_ = source->map()->isolate(); | 167 isolate_ = source->map()->isolate(); |
| 167 source_ = Handle<String>(source->TryFlattenGetString()); | 168 source_ = Handle<String>(source->TryFlattenGetString()); |
| 168 source_length_ = source_->length() - 1; | 169 source_length_ = source_->length(); |
| 169 | 170 |
| 170 // Optimized fast case where we only have ascii characters. | 171 // Optimized fast case where we only have ascii characters. |
|
Lasse Reichstein
2011/06/29 09:27:30
ASCII is an acronym when used in prose.
sandholm
2011/06/29 10:44:39
Done.
| |
| 171 if (seq_ascii) { | 172 if (seq_ascii) { |
| 172 seq_source_ = Handle<SeqAsciiString>::cast(source_); | 173 seq_source_ = Handle<SeqAsciiString>::cast(source_); |
| 173 } | 174 } |
| 174 | 175 |
| 175 // Set initial position right before the string. | 176 // Set initial position right before the string. |
| 176 position_ = -1; | 177 position_ = -1; |
| 177 // Advance to the first character (posibly EOS) | 178 // Advance to the first character (posibly EOS) |
| 178 AdvanceSkipWhitespace(); | 179 AdvanceSkipWhitespace(); |
| 179 Handle<Object> result = ParseJsonValue(); | 180 Handle<Object> result = ParseJsonValue(); |
| 180 if (result.is_null() || c0_ != kEndOfString) { | 181 if (result.is_null() || c0_ != kEndOfString) { |
| (...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 403 number = StringToDouble(isolate()->unicode_cache(), | 404 number = StringToDouble(isolate()->unicode_cache(), |
| 404 result, | 405 result, |
| 405 NO_FLAGS, // Hex, octal or trailing junk. | 406 NO_FLAGS, // Hex, octal or trailing junk. |
| 406 0.0); | 407 0.0); |
| 407 buffer.Dispose(); | 408 buffer.Dispose(); |
| 408 } | 409 } |
| 409 SkipWhitespace(); | 410 SkipWhitespace(); |
| 410 return isolate()->factory()->NewNumber(number); | 411 return isolate()->factory()->NewNumber(number); |
| 411 } | 412 } |
| 412 | 413 |
| 414 | |
| 415 template <typename StringType> | |
| 416 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c); | |
| 417 | |
| 418 template <> | |
| 419 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) { | |
| 420 seq_str->SeqTwoByteStringSet(i, c); | |
| 421 } | |
| 422 | |
| 423 template <> | |
| 424 inline void SeqStringSet(Handle<SeqAsciiString> seq_str, int i, uc32 c) { | |
| 425 seq_str->SeqAsciiStringSet(i, c); | |
| 426 } | |
| 427 | |
| 428 template <typename StringType> | |
| 429 inline Handle<StringType> NewRawString(Factory* factory, int length); | |
| 430 | |
| 431 template <> | |
| 432 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length) { | |
| 433 return factory->NewRawTwoByteString(length, NOT_TENURED); | |
| 434 } | |
| 435 | |
| 436 template <> | |
| 437 inline Handle<SeqAsciiString> NewRawString(Factory* factory, int length) { | |
| 438 return factory->NewRawAsciiString(length, NOT_TENURED); | |
| 439 } | |
| 440 | |
| 441 | |
| 442 // Scans the rest of a JSON string starting from position_ and writes | |
| 443 // substring(prefix, start, end) along with the scanned characters into a | |
| 444 // sequential string of type StringType. | |
| 413 template <bool seq_ascii> | 445 template <bool seq_ascii> |
| 414 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(int beg_pos) { | 446 template <typename StringType, typename SinkChar> |
| 415 // The currently scanned ascii characters. | 447 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString( |
| 416 Handle<String> ascii(isolate()->factory()->NewProperSubString(source_, | 448 Handle<String> prefix, int start, int end) { |
| 417 beg_pos, | 449 int count = end - start; |
| 418 position_)); | 450 int length = Min(count + source_length_ - position_, |
| 419 Handle<String> two_byte = | 451 Max(kInitialSpecialStringLength, 2 * count)); |
| 420 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, | 452 Handle<StringType> seq_str = NewRawString<StringType>(isolate()->factory(), |
| 421 NOT_TENURED); | 453 length); |
| 422 Handle<SeqTwoByteString> seq_two_byte = | 454 // Copy prefix into seq_str. |
| 423 Handle<SeqTwoByteString>::cast(two_byte); | 455 SinkChar* dest = seq_str->GetChars(); |
| 424 | 456 String::WriteToFlat(*prefix, dest, start, end); |
| 425 int allocation_count = 1; | |
| 426 int count = 0; | |
| 427 | 457 |
| 428 while (c0_ != '"') { | 458 while (c0_ != '"') { |
| 429 // Create new seq string | 459 // Create new seq string |
|
Lasse Reichstein
2011/06/29 09:27:30
Move comment to after "if" line, so it only applie
sandholm
2011/06/29 10:44:39
Done.
| |
| 430 if (count >= kInitialSpecialStringSize * allocation_count) { | 460 if (count >= length) { |
| 431 allocation_count = allocation_count * 2; | 461 return this->SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count); |
| 432 int new_size = allocation_count * kInitialSpecialStringSize; | |
| 433 Handle<String> new_two_byte = | |
| 434 isolate()->factory()->NewRawTwoByteString(new_size, | |
| 435 NOT_TENURED); | |
| 436 uc16* char_start = | |
| 437 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars(); | |
| 438 String::WriteToFlat(*seq_two_byte, char_start, 0, count); | |
| 439 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte); | |
| 440 } | 462 } |
| 441 | |
| 442 // Check for control character (0x00-0x1f) or unterminated string (<0). | 463 // Check for control character (0x00-0x1f) or unterminated string (<0). |
| 443 if (c0_ < 0x20) return Handle<String>::null(); | 464 if (c0_ < 0x20) return Handle<String>::null(); |
| 444 if (c0_ != '\\') { | 465 if (c0_ != '\\') { |
| 445 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | 466 if (sizeof(char) != sizeof(SinkChar) || |
|
Lasse Reichstein
2011/06/29 09:27:30
Does it lint? (Generally, us kCharSize instead of
sandholm
2011/06/29 10:44:39
Done.
| |
| 446 Advance(); | 467 seq_ascii || |
|
Lasse Reichstein
2011/06/29 09:27:30
This could use a comment:
If the sink can contain
sandholm
2011/06/29 10:44:39
Done.
| |
| 468 c0_ <= kMaxAsciiCharCode) { | |
| 469 SeqStringSet(seq_str, count++, c0_); | |
| 470 Advance(); | |
| 471 } else { | |
| 472 // StringType is SeqAsciiString and we just read a non-ascii char. | |
|
Lasse Reichstein
2011/06/29 09:27:30
non-ASCII.
sandholm
2011/06/29 10:44:39
Done.
| |
| 473 return this->SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, | |
| 474 0, | |
| 475 count); | |
| 476 } | |
| 447 } else { | 477 } else { |
| 448 Advance(); | 478 Advance(); // Advance past the \. |
| 449 switch (c0_) { | 479 switch (c0_) { |
| 450 case '"': | 480 case '"': |
| 451 case '\\': | 481 case '\\': |
| 452 case '/': | 482 case '/': |
| 453 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | 483 SeqStringSet(seq_str, count++, c0_); |
| 454 break; | 484 break; |
| 455 case 'b': | 485 case 'b': |
| 456 seq_two_byte->SeqTwoByteStringSet(count++, '\x08'); | 486 SeqStringSet(seq_str, count++, '\x08'); |
| 457 break; | 487 break; |
| 458 case 'f': | 488 case 'f': |
| 459 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c'); | 489 SeqStringSet(seq_str, count++, '\x0c'); |
| 460 break; | 490 break; |
| 461 case 'n': | 491 case 'n': |
| 462 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a'); | 492 SeqStringSet(seq_str, count++, '\x0a'); |
| 463 break; | 493 break; |
| 464 case 'r': | 494 case 'r': |
| 465 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d'); | 495 SeqStringSet(seq_str, count++, '\x0d'); |
| 466 break; | 496 break; |
| 467 case 't': | 497 case 't': |
| 468 seq_two_byte->SeqTwoByteStringSet(count++, '\x09'); | 498 SeqStringSet(seq_str, count++, '\x09'); |
| 469 break; | 499 break; |
| 470 case 'u': { | 500 case 'u': { |
| 471 uc32 value = 0; | 501 uc32 value = 0; |
| 472 for (int i = 0; i < 4; i++) { | 502 for (int i = 0; i < 4; i++) { |
| 473 Advance(); | 503 Advance(); |
| 474 int digit = HexValue(c0_); | 504 int digit = HexValue(c0_); |
| 475 if (digit < 0) { | 505 if (digit < 0) { |
| 476 return Handle<String>::null(); | 506 return Handle<String>::null(); |
| 477 } | 507 } |
| 478 value = value * 16 + digit; | 508 value = value * 16 + digit; |
| 479 } | 509 } |
| 480 seq_two_byte->SeqTwoByteStringSet(count++, value); | 510 if (sizeof(char) != sizeof(SinkChar) || value <= kMaxAsciiCharCode) { |
|
Lasse Reichstein
2011/06/29 09:27:30
sizeof(SinkChar) == kUC16Size
sandholm
2011/06/29 10:44:39
Done.
| |
| 481 break; | 511 SeqStringSet(seq_str, count++, value); |
| 512 break; | |
| 513 } else { | |
| 514 // StringType is SeqAsciiString and we just read a non-ascii char. | |
| 515 position_ -= 6; // Rewind position to \ in \uxxxx. | |
| 516 Advance(); | |
| 517 return this->SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, | |
| 518 0, | |
| 519 count); | |
| 520 } | |
| 482 } | 521 } |
| 483 default: | 522 default: |
| 484 return Handle<String>::null(); | 523 return Handle<String>::null(); |
| 485 } | 524 } |
| 486 Advance(); | 525 Advance(); |
| 487 } | 526 } |
| 488 } | 527 } |
| 489 // Advance past the last '"'. | 528 // Shrink seq_string length to count. |
| 490 ASSERT_EQ('"', c0_); | 529 if (isolate()->heap()->InNewSpace(*seq_str)) { |
| 491 AdvanceSkipWhitespace(); | |
| 492 | |
| 493 // Shrink the the string to our length. | |
| 494 if (isolate()->heap()->InNewSpace(*seq_two_byte)) { | |
| 495 isolate()->heap()->new_space()-> | 530 isolate()->heap()->new_space()-> |
| 496 template ShrinkStringAtAllocationBoundary<SeqTwoByteString>( | 531 template ShrinkStringAtAllocationBoundary<StringType>( |
|
Lasse Reichstein
2011/06/29 09:27:30
Do ASSERT that the string is at the allocation bou
sandholm
2011/06/29 10:44:39
That ASSERT is in ShrinkStringAtAllocationBoundary
Lasse Reichstein
2011/06/29 10:53:00
It'll do :)
It's probably better to keep it there,
| |
| 497 *seq_two_byte, count); | 532 *seq_str, count); |
| 498 } else { | 533 } else { |
| 499 int string_size = SeqTwoByteString::SizeFor(count); | 534 int string_size = StringType::SizeFor(count); |
| 500 int allocated_string_size = | 535 int allocated_string_size = StringType::SizeFor(length); |
| 501 SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count); | |
| 502 int delta = allocated_string_size - string_size; | 536 int delta = allocated_string_size - string_size; |
| 503 Address start_filler_object = seq_two_byte->address() + string_size; | 537 Address start_filler_object = seq_str->address() + string_size; |
| 504 seq_two_byte->set_length(count); | 538 seq_str->set_length(count); |
| 505 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); | 539 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); |
| 506 } | 540 } |
| 507 return isolate()->factory()->NewConsString(ascii, seq_two_byte); | 541 ASSERT_EQ('"', c0_); |
| 542 // Advance past the last '"'. | |
| 543 AdvanceSkipWhitespace(); | |
| 544 return seq_str; | |
| 508 } | 545 } |
| 509 | 546 |
| 547 | |
| 510 template <bool seq_ascii> | 548 template <bool seq_ascii> |
| 511 template <bool is_symbol> | 549 template <bool is_symbol> |
| 512 Handle<String> JsonParser<seq_ascii>::ScanJsonString() { | 550 Handle<String> JsonParser<seq_ascii>::ScanJsonString() { |
| 513 ASSERT_EQ('"', c0_); | 551 ASSERT_EQ('"', c0_); |
| 514 Advance(); | 552 Advance(); |
| 553 if (c0_ == '"') { | |
| 554 AdvanceSkipWhitespace(); | |
| 555 return Handle<String>(isolate()->heap()->empty_string()); | |
| 556 } | |
| 515 int beg_pos = position_; | 557 int beg_pos = position_; |
| 516 // Fast case for ascii only without escape characters. | 558 // Fast case for ascii only without escape characters. |
| 517 while (c0_ != '"') { | 559 do { |
| 518 // Check for control character (0x00-0x1f) or unterminated string (<0). | 560 // Check for control character (0x00-0x1f) or unterminated string (<0). |
| 519 if (c0_ < 0x20) return Handle<String>::null(); | 561 if (c0_ < 0x20) return Handle<String>::null(); |
| 520 if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) { | 562 if (c0_ != '\\' && (seq_ascii || c0_ <= kMaxAsciiCharCode)) { |
| 521 Advance(); | 563 Advance(); |
| 522 } else { | 564 } else { |
| 523 return this->SlowScanJsonString(beg_pos); | 565 return this->SlowScanJsonString<SeqAsciiString, char>(source_, |
| 566 beg_pos, | |
| 567 position_); | |
| 524 } | 568 } |
| 569 } while (c0_ != '"'); | |
| 570 int length = position_ - beg_pos; | |
| 571 Handle<String> result; | |
| 572 if (seq_ascii && is_symbol) { | |
| 573 result = isolate()->factory()->LookupAsciiSymbol(seq_source_, | |
| 574 beg_pos, | |
| 575 length); | |
| 576 } else { | |
| 577 result = isolate()->factory()->NewRawAsciiString(length); | |
| 578 char* dest = SeqAsciiString::cast(*result)->GetChars(); | |
| 579 String::WriteToFlat(*source_, dest, beg_pos, position_); | |
| 525 } | 580 } |
| 526 ASSERT_EQ('"', c0_); | 581 ASSERT_EQ('"', c0_); |
| 527 int end_pos = position_; | |
| 528 // Advance past the last '"'. | 582 // Advance past the last '"'. |
| 529 AdvanceSkipWhitespace(); | 583 AdvanceSkipWhitespace(); |
| 530 if (seq_ascii && is_symbol) { | 584 return result; |
| 531 return isolate()->factory()->LookupAsciiSymbol(seq_source_, | |
| 532 beg_pos, | |
| 533 end_pos - beg_pos); | |
| 534 } else { | |
| 535 return isolate()->factory()->NewProperSubString(source_, | |
| 536 beg_pos, | |
| 537 end_pos); | |
| 538 } | |
| 539 } | 585 } |
| 540 | 586 |
| 541 } } // namespace v8::internal | 587 } } // namespace v8::internal |
| 542 | 588 |
| 543 #endif // V8_JSON_PARSER_H_ | 589 #endif // V8_JSON_PARSER_H_ |
| OLD | NEW |