OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
48 } | 48 } |
49 | 49 |
50 static const int kEndOfString = -1; | 50 static const int kEndOfString = -1; |
51 | 51 |
52 private: | 52 private: |
53 // Parse a string containing a single JSON value. | 53 // Parse a string containing a single JSON value. |
54 Handle<Object> ParseJson(Handle<String> source); | 54 Handle<Object> ParseJson(Handle<String> source); |
55 | 55 |
56 inline void Advance() { | 56 inline void Advance() { |
57 position_++; | 57 position_++; |
58 if (position_ > source_length_) { | 58 if (position_ >= source_length_) { |
59 c0_ = kEndOfString; | 59 c0_ = kEndOfString; |
60 } else if (seq_ascii) { | 60 } else if (seq_ascii) { |
61 c0_ = seq_source_->SeqAsciiStringGet(position_); | 61 c0_ = seq_source_->SeqAsciiStringGet(position_); |
62 } else { | 62 } else { |
63 c0_ = source_->Get(position_); | 63 c0_ = source_->Get(position_); |
64 } | 64 } |
65 } | 65 } |
66 | 66 |
67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, | 67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, |
68 // section 15.12.1.1. The only allowed whitespace characters between tokens | 68 // section 15.12.1.1. The only allowed whitespace characters between tokens |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | 100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and |
101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | 101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. |
102 Handle<String> ParseJsonString() { | 102 Handle<String> ParseJsonString() { |
103 return ScanJsonString<false>(); | 103 return ScanJsonString<false>(); |
104 } | 104 } |
105 Handle<String> ParseJsonSymbol() { | 105 Handle<String> ParseJsonSymbol() { |
106 return ScanJsonString<true>(); | 106 return ScanJsonString<true>(); |
107 } | 107 } |
108 template <bool is_symbol> | 108 template <bool is_symbol> |
109 Handle<String> ScanJsonString(); | 109 Handle<String> ScanJsonString(); |
110 // Slow version for unicode support, uses the first ascii_count characters, | 110 // Creates a new string and copies prefix[start..end] into the beginning |
111 // as first part of a ConsString | 111 // of it. Then scans the rest of the string, adding characters after the |
112 Handle<String> SlowScanJsonString(int beg_pos); | 112 // prefix. Called by ScanJsonString when reaching a '\' or non-ASCII char. |
| 113 template <typename StringType, typename SinkChar> |
| 114 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end); |
113 | 115 |
114 // A JSON number (production JSONNumber) is a subset of the valid JavaScript | 116 // A JSON number (production JSONNumber) is a subset of the valid JavaScript |
115 // decimal number literals. | 117 // decimal number literals. |
116 // It includes an optional minus sign, must have at least one | 118 // It includes an optional minus sign, must have at least one |
117 // digit before and after a decimal point, may not have prefixed zeros (unless | 119 // digit before and after a decimal point, may not have prefixed zeros (unless |
118 // the integer part is zero), and may include an exponent part (e.g., "e-10"). | 120 // the integer part is zero), and may include an exponent part (e.g., "e-10"). |
119 // Hexadecimal and octal numbers are not allowed. | 121 // Hexadecimal and octal numbers are not allowed. |
120 Handle<Object> ParseJsonNumber(); | 122 Handle<Object> ParseJsonNumber(); |
121 | 123 |
122 // Parse a single JSON value from input (grammar production JSONValue). | 124 // Parse a single JSON value from input (grammar production JSONValue). |
(...skipping 18 matching lines...) Expand all Loading... |
141 | 143 |
142 | 144 |
143 // Mark that a parsing error has happened at the current token, and | 145 // Mark that a parsing error has happened at the current token, and |
144 // return a null handle. Primarily for readability. | 146 // return a null handle. Primarily for readability. |
145 inline Handle<Object> ReportUnexpectedCharacter() { | 147 inline Handle<Object> ReportUnexpectedCharacter() { |
146 return Handle<Object>::null(); | 148 return Handle<Object>::null(); |
147 } | 149 } |
148 | 150 |
149 inline Isolate* isolate() { return isolate_; } | 151 inline Isolate* isolate() { return isolate_; } |
150 | 152 |
151 static const int kInitialSpecialStringSize = 1024; | 153 static const int kInitialSpecialStringLength = 1024; |
152 | 154 |
153 | 155 |
154 private: | 156 private: |
155 Handle<String> source_; | 157 Handle<String> source_; |
156 int source_length_; | 158 int source_length_; |
157 Handle<SeqAsciiString> seq_source_; | 159 Handle<SeqAsciiString> seq_source_; |
158 | 160 |
159 Isolate* isolate_; | 161 Isolate* isolate_; |
160 uc32 c0_; | 162 uc32 c0_; |
161 int position_; | 163 int position_; |
162 }; | 164 }; |
163 | 165 |
164 template <bool seq_ascii> | 166 template <bool seq_ascii> |
165 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { | 167 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { |
166 isolate_ = source->map()->isolate(); | 168 isolate_ = source->map()->isolate(); |
167 source_ = Handle<String>(source->TryFlattenGetString()); | 169 source_ = Handle<String>(source->TryFlattenGetString()); |
168 source_length_ = source_->length() - 1; | 170 source_length_ = source_->length(); |
169 | 171 |
170 // Optimized fast case where we only have ascii characters. | 172 // Optimized fast case where we only have ASCII characters. |
171 if (seq_ascii) { | 173 if (seq_ascii) { |
172 seq_source_ = Handle<SeqAsciiString>::cast(source_); | 174 seq_source_ = Handle<SeqAsciiString>::cast(source_); |
173 } | 175 } |
174 | 176 |
175 // Set initial position right before the string. | 177 // Set initial position right before the string. |
176 position_ = -1; | 178 position_ = -1; |
177 // Advance to the first character (posibly EOS) | 179 // Advance to the first character (posibly EOS) |
178 AdvanceSkipWhitespace(); | 180 AdvanceSkipWhitespace(); |
179 Handle<Object> result = ParseJsonValue(); | 181 Handle<Object> result = ParseJsonValue(); |
180 if (result.is_null() || c0_ != kEndOfString) { | 182 if (result.is_null() || c0_ != kEndOfString) { |
(...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
403 number = StringToDouble(isolate()->unicode_cache(), | 405 number = StringToDouble(isolate()->unicode_cache(), |
404 result, | 406 result, |
405 NO_FLAGS, // Hex, octal or trailing junk. | 407 NO_FLAGS, // Hex, octal or trailing junk. |
406 0.0); | 408 0.0); |
407 buffer.Dispose(); | 409 buffer.Dispose(); |
408 } | 410 } |
409 SkipWhitespace(); | 411 SkipWhitespace(); |
410 return isolate()->factory()->NewNumber(number); | 412 return isolate()->factory()->NewNumber(number); |
411 } | 413 } |
412 | 414 |
| 415 |
| 416 template <typename StringType> |
| 417 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c); |
| 418 |
| 419 template <> |
| 420 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) { |
| 421 seq_str->SeqTwoByteStringSet(i, c); |
| 422 } |
| 423 |
| 424 template <> |
| 425 inline void SeqStringSet(Handle<SeqAsciiString> seq_str, int i, uc32 c) { |
| 426 seq_str->SeqAsciiStringSet(i, c); |
| 427 } |
| 428 |
| 429 template <typename StringType> |
| 430 inline Handle<StringType> NewRawString(Factory* factory, int length); |
| 431 |
| 432 template <> |
| 433 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length) { |
| 434 return factory->NewRawTwoByteString(length, NOT_TENURED); |
| 435 } |
| 436 |
| 437 template <> |
| 438 inline Handle<SeqAsciiString> NewRawString(Factory* factory, int length) { |
| 439 return factory->NewRawAsciiString(length, NOT_TENURED); |
| 440 } |
| 441 |
| 442 |
| 443 // Scans the rest of a JSON string starting from position_ and writes |
| 444 // prefix[start..end] along with the scanned characters into a |
| 445 // sequential string of type StringType. |
413 template <bool seq_ascii> | 446 template <bool seq_ascii> |
414 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(int beg_pos) { | 447 template <typename StringType, typename SinkChar> |
415 // The currently scanned ascii characters. | 448 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString( |
416 Handle<String> ascii(isolate()->factory()->NewProperSubString(source_, | 449 Handle<String> prefix, int start, int end) { |
417 beg_pos, | 450 int count = end - start; |
418 position_)); | 451 int max_length = count + source_length_ - position_; |
419 Handle<String> two_byte = | 452 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count)); |
420 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, | 453 Handle<StringType> seq_str = NewRawString<StringType>(isolate()->factory(), |
421 NOT_TENURED); | 454 length); |
422 Handle<SeqTwoByteString> seq_two_byte = | 455 // Copy prefix into seq_str. |
423 Handle<SeqTwoByteString>::cast(two_byte); | 456 SinkChar* dest = seq_str->GetChars(); |
424 | 457 String::WriteToFlat(*prefix, dest, start, end); |
425 int allocation_count = 1; | |
426 int count = 0; | |
427 | 458 |
428 while (c0_ != '"') { | 459 while (c0_ != '"') { |
429 // Create new seq string | 460 if (count >= length) { |
430 if (count >= kInitialSpecialStringSize * allocation_count) { | 461 // We need to create a longer sequential string for the result. |
431 allocation_count = allocation_count * 2; | 462 return SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count); |
432 int new_size = allocation_count * kInitialSpecialStringSize; | |
433 Handle<String> new_two_byte = | |
434 isolate()->factory()->NewRawTwoByteString(new_size, | |
435 NOT_TENURED); | |
436 uc16* char_start = | |
437 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars(); | |
438 String::WriteToFlat(*seq_two_byte, char_start, 0, count); | |
439 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte); | |
440 } | 463 } |
441 | |
442 // Check for control character (0x00-0x1f) or unterminated string (<0). | 464 // Check for control character (0x00-0x1f) or unterminated string (<0). |
443 if (c0_ < 0x20) return Handle<String>::null(); | 465 if (c0_ < 0x20) return Handle<String>::null(); |
444 if (c0_ != '\\') { | 466 if (c0_ != '\\') { |
445 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | 467 // If the sink can contain UC16 characters, or source_ contains only |
446 Advance(); | 468 // ASCII characters, there's no need to test whether we can store the |
| 469 // character. Otherwise check whether the UC16 source character can fit |
| 470 // in the ASCII sink. |
| 471 if (sizeof(SinkChar) == kUC16Size || |
| 472 seq_ascii || |
| 473 c0_ <= kMaxAsciiCharCode) { |
| 474 SeqStringSet(seq_str, count++, c0_); |
| 475 Advance(); |
| 476 } else { |
| 477 // StringType is SeqAsciiString and we just read a non-ASCII char. |
| 478 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, 0, count); |
| 479 } |
447 } else { | 480 } else { |
448 Advance(); | 481 Advance(); // Advance past the \. |
449 switch (c0_) { | 482 switch (c0_) { |
450 case '"': | 483 case '"': |
451 case '\\': | 484 case '\\': |
452 case '/': | 485 case '/': |
453 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | 486 SeqStringSet(seq_str, count++, c0_); |
454 break; | 487 break; |
455 case 'b': | 488 case 'b': |
456 seq_two_byte->SeqTwoByteStringSet(count++, '\x08'); | 489 SeqStringSet(seq_str, count++, '\x08'); |
457 break; | 490 break; |
458 case 'f': | 491 case 'f': |
459 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c'); | 492 SeqStringSet(seq_str, count++, '\x0c'); |
460 break; | 493 break; |
461 case 'n': | 494 case 'n': |
462 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a'); | 495 SeqStringSet(seq_str, count++, '\x0a'); |
463 break; | 496 break; |
464 case 'r': | 497 case 'r': |
465 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d'); | 498 SeqStringSet(seq_str, count++, '\x0d'); |
466 break; | 499 break; |
467 case 't': | 500 case 't': |
468 seq_two_byte->SeqTwoByteStringSet(count++, '\x09'); | 501 SeqStringSet(seq_str, count++, '\x09'); |
469 break; | 502 break; |
470 case 'u': { | 503 case 'u': { |
471 uc32 value = 0; | 504 uc32 value = 0; |
472 for (int i = 0; i < 4; i++) { | 505 for (int i = 0; i < 4; i++) { |
473 Advance(); | 506 Advance(); |
474 int digit = HexValue(c0_); | 507 int digit = HexValue(c0_); |
475 if (digit < 0) { | 508 if (digit < 0) { |
476 return Handle<String>::null(); | 509 return Handle<String>::null(); |
477 } | 510 } |
478 value = value * 16 + digit; | 511 value = value * 16 + digit; |
479 } | 512 } |
480 seq_two_byte->SeqTwoByteStringSet(count++, value); | 513 if (sizeof(SinkChar) == kUC16Size || value <= kMaxAsciiCharCode) { |
481 break; | 514 SeqStringSet(seq_str, count++, value); |
| 515 break; |
| 516 } else { |
| 517 // StringType is SeqAsciiString and we just read a non-ASCII char. |
| 518 position_ -= 6; // Rewind position_ to \ in \uxxxx. |
| 519 Advance(); |
| 520 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, |
| 521 0, |
| 522 count); |
| 523 } |
482 } | 524 } |
483 default: | 525 default: |
484 return Handle<String>::null(); | 526 return Handle<String>::null(); |
485 } | 527 } |
486 Advance(); | 528 Advance(); |
487 } | 529 } |
488 } | 530 } |
489 // Advance past the last '"'. | 531 // Shrink seq_string length to count. |
490 ASSERT_EQ('"', c0_); | 532 if (isolate()->heap()->InNewSpace(*seq_str)) { |
491 AdvanceSkipWhitespace(); | |
492 | |
493 // Shrink the the string to our length. | |
494 if (isolate()->heap()->InNewSpace(*seq_two_byte)) { | |
495 isolate()->heap()->new_space()-> | 533 isolate()->heap()->new_space()-> |
496 template ShrinkStringAtAllocationBoundary<SeqTwoByteString>( | 534 template ShrinkStringAtAllocationBoundary<StringType>( |
497 *seq_two_byte, count); | 535 *seq_str, count); |
498 } else { | 536 } else { |
499 int string_size = SeqTwoByteString::SizeFor(count); | 537 int string_size = StringType::SizeFor(count); |
500 int allocated_string_size = | 538 int allocated_string_size = StringType::SizeFor(length); |
501 SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count); | |
502 int delta = allocated_string_size - string_size; | 539 int delta = allocated_string_size - string_size; |
503 Address start_filler_object = seq_two_byte->address() + string_size; | 540 Address start_filler_object = seq_str->address() + string_size; |
504 seq_two_byte->set_length(count); | 541 seq_str->set_length(count); |
505 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); | 542 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); |
506 } | 543 } |
507 return isolate()->factory()->NewConsString(ascii, seq_two_byte); | 544 ASSERT_EQ('"', c0_); |
| 545 // Advance past the last '"'. |
| 546 AdvanceSkipWhitespace(); |
| 547 return seq_str; |
508 } | 548 } |
509 | 549 |
| 550 |
510 template <bool seq_ascii> | 551 template <bool seq_ascii> |
511 template <bool is_symbol> | 552 template <bool is_symbol> |
512 Handle<String> JsonParser<seq_ascii>::ScanJsonString() { | 553 Handle<String> JsonParser<seq_ascii>::ScanJsonString() { |
513 ASSERT_EQ('"', c0_); | 554 ASSERT_EQ('"', c0_); |
514 Advance(); | 555 Advance(); |
| 556 if (c0_ == '"') { |
| 557 AdvanceSkipWhitespace(); |
| 558 return Handle<String>(isolate()->heap()->empty_string()); |
| 559 } |
515 int beg_pos = position_; | 560 int beg_pos = position_; |
516 // Fast case for ascii only without escape characters. | 561 // Fast case for ASCII only without escape characters. |
517 while (c0_ != '"') { | 562 do { |
518 // Check for control character (0x00-0x1f) or unterminated string (<0). | 563 // Check for control character (0x00-0x1f) or unterminated string (<0). |
519 if (c0_ < 0x20) return Handle<String>::null(); | 564 if (c0_ < 0x20) return Handle<String>::null(); |
520 if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) { | 565 if (c0_ != '\\') { |
521 Advance(); | 566 if (seq_ascii || c0_ <= kMaxAsciiCharCode) { |
| 567 Advance(); |
| 568 } else { |
| 569 return SlowScanJsonString<SeqTwoByteString, uc16>(source_, |
| 570 beg_pos, |
| 571 position_); |
| 572 } |
522 } else { | 573 } else { |
523 return this->SlowScanJsonString(beg_pos); | 574 return SlowScanJsonString<SeqAsciiString, char>(source_, |
| 575 beg_pos, |
| 576 position_); |
524 } | 577 } |
| 578 } while (c0_ != '"'); |
| 579 int length = position_ - beg_pos; |
| 580 Handle<String> result; |
| 581 if (seq_ascii && is_symbol) { |
| 582 result = isolate()->factory()->LookupAsciiSymbol(seq_source_, |
| 583 beg_pos, |
| 584 length); |
| 585 } else { |
| 586 result = isolate()->factory()->NewRawAsciiString(length); |
| 587 char* dest = SeqAsciiString::cast(*result)->GetChars(); |
| 588 String::WriteToFlat(*source_, dest, beg_pos, position_); |
525 } | 589 } |
526 ASSERT_EQ('"', c0_); | 590 ASSERT_EQ('"', c0_); |
527 int end_pos = position_; | |
528 // Advance past the last '"'. | 591 // Advance past the last '"'. |
529 AdvanceSkipWhitespace(); | 592 AdvanceSkipWhitespace(); |
530 if (seq_ascii && is_symbol) { | 593 return result; |
531 return isolate()->factory()->LookupAsciiSymbol(seq_source_, | |
532 beg_pos, | |
533 end_pos - beg_pos); | |
534 } else { | |
535 return isolate()->factory()->NewProperSubString(source_, | |
536 beg_pos, | |
537 end_pos); | |
538 } | |
539 } | 594 } |
540 | 595 |
541 } } // namespace v8::internal | 596 } } // namespace v8::internal |
542 | 597 |
543 #endif // V8_JSON_PARSER_H_ | 598 #endif // V8_JSON_PARSER_H_ |
OLD | NEW |