OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
48 } | 48 } |
49 | 49 |
50 static const int kEndOfString = -1; | 50 static const int kEndOfString = -1; |
51 | 51 |
52 private: | 52 private: |
53 // Parse a string containing a single JSON value. | 53 // Parse a string containing a single JSON value. |
54 Handle<Object> ParseJson(Handle<String> source); | 54 Handle<Object> ParseJson(Handle<String> source); |
55 | 55 |
56 inline void Advance() { | 56 inline void Advance() { |
57 position_++; | 57 position_++; |
58 if (position_ > source_length_) { | 58 if (position_ >= source_length_) { |
59 c0_ = kEndOfString; | 59 c0_ = kEndOfString; |
60 } else if (seq_ascii) { | 60 } else if (seq_ascii) { |
61 c0_ = seq_source_->SeqAsciiStringGet(position_); | 61 c0_ = seq_source_->SeqAsciiStringGet(position_); |
62 } else { | 62 } else { |
63 c0_ = source_->Get(position_); | 63 c0_ = source_->Get(position_); |
64 } | 64 } |
65 } | 65 } |
66 | 66 |
67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, | 67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, |
68 // section 15.12.1.1. The only allowed whitespace characters between tokens | 68 // section 15.12.1.1. The only allowed whitespace characters between tokens |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and | 100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and |
101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. | 101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. |
102 Handle<String> ParseJsonString() { | 102 Handle<String> ParseJsonString() { |
103 return ScanJsonString<false>(); | 103 return ScanJsonString<false>(); |
104 } | 104 } |
105 Handle<String> ParseJsonSymbol() { | 105 Handle<String> ParseJsonSymbol() { |
106 return ScanJsonString<true>(); | 106 return ScanJsonString<true>(); |
107 } | 107 } |
108 template <bool is_symbol> | 108 template <bool is_symbol> |
109 Handle<String> ScanJsonString(); | 109 Handle<String> ScanJsonString(); |
110 // Slow version for unicode support, uses the first ascii_count characters, | 110 // Slow version for backslash and unicode support, uses the characters from |
111 // as first part of a ConsString | 111 // start to end in prefix as the first part of the resulting string. |
112 Handle<String> SlowScanJsonString(int beg_pos); | 112 template <typename StringType, typename SinkChar> |
113 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end); | |
Lasse Reichstein
2011/06/29 09:27:30
Slow version of what? Just say what the function d
sandholm
2011/06/29 10:44:39
Done.
| |
113 | 114 |
114 // A JSON number (production JSONNumber) is a subset of the valid JavaScript | 115 // A JSON number (production JSONNumber) is a subset of the valid JavaScript |
115 // decimal number literals. | 116 // decimal number literals. |
116 // It includes an optional minus sign, must have at least one | 117 // It includes an optional minus sign, must have at least one |
117 // digit before and after a decimal point, may not have prefixed zeros (unless | 118 // digit before and after a decimal point, may not have prefixed zeros (unless |
118 // the integer part is zero), and may include an exponent part (e.g., "e-10"). | 119 // the integer part is zero), and may include an exponent part (e.g., "e-10"). |
119 // Hexadecimal and octal numbers are not allowed. | 120 // Hexadecimal and octal numbers are not allowed. |
120 Handle<Object> ParseJsonNumber(); | 121 Handle<Object> ParseJsonNumber(); |
121 | 122 |
122 // Parse a single JSON value from input (grammar production JSONValue). | 123 // Parse a single JSON value from input (grammar production JSONValue). |
(...skipping 18 matching lines...) Expand all Loading... | |
141 | 142 |
142 | 143 |
143 // Mark that a parsing error has happened at the current token, and | 144 // Mark that a parsing error has happened at the current token, and |
144 // return a null handle. Primarily for readability. | 145 // return a null handle. Primarily for readability. |
145 inline Handle<Object> ReportUnexpectedCharacter() { | 146 inline Handle<Object> ReportUnexpectedCharacter() { |
146 return Handle<Object>::null(); | 147 return Handle<Object>::null(); |
147 } | 148 } |
148 | 149 |
149 inline Isolate* isolate() { return isolate_; } | 150 inline Isolate* isolate() { return isolate_; } |
150 | 151 |
151 static const int kInitialSpecialStringSize = 1024; | 152 static const int kInitialSpecialStringLength = 1024; |
152 | 153 |
153 | 154 |
154 private: | 155 private: |
155 Handle<String> source_; | 156 Handle<String> source_; |
156 int source_length_; | 157 int source_length_; |
157 Handle<SeqAsciiString> seq_source_; | 158 Handle<SeqAsciiString> seq_source_; |
158 | 159 |
159 Isolate* isolate_; | 160 Isolate* isolate_; |
160 uc32 c0_; | 161 uc32 c0_; |
161 int position_; | 162 int position_; |
162 }; | 163 }; |
163 | 164 |
164 template <bool seq_ascii> | 165 template <bool seq_ascii> |
165 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { | 166 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { |
166 isolate_ = source->map()->isolate(); | 167 isolate_ = source->map()->isolate(); |
167 source_ = Handle<String>(source->TryFlattenGetString()); | 168 source_ = Handle<String>(source->TryFlattenGetString()); |
168 source_length_ = source_->length() - 1; | 169 source_length_ = source_->length(); |
169 | 170 |
170 // Optimized fast case where we only have ascii characters. | 171 // Optimized fast case where we only have ascii characters. |
Lasse Reichstein
2011/06/29 09:27:30
ASCII is an acronym when used in prose.
sandholm
2011/06/29 10:44:39
Done.
| |
171 if (seq_ascii) { | 172 if (seq_ascii) { |
172 seq_source_ = Handle<SeqAsciiString>::cast(source_); | 173 seq_source_ = Handle<SeqAsciiString>::cast(source_); |
173 } | 174 } |
174 | 175 |
175 // Set initial position right before the string. | 176 // Set initial position right before the string. |
176 position_ = -1; | 177 position_ = -1; |
177 // Advance to the first character (posibly EOS) | 178 // Advance to the first character (posibly EOS) |
178 AdvanceSkipWhitespace(); | 179 AdvanceSkipWhitespace(); |
179 Handle<Object> result = ParseJsonValue(); | 180 Handle<Object> result = ParseJsonValue(); |
180 if (result.is_null() || c0_ != kEndOfString) { | 181 if (result.is_null() || c0_ != kEndOfString) { |
(...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
403 number = StringToDouble(isolate()->unicode_cache(), | 404 number = StringToDouble(isolate()->unicode_cache(), |
404 result, | 405 result, |
405 NO_FLAGS, // Hex, octal or trailing junk. | 406 NO_FLAGS, // Hex, octal or trailing junk. |
406 0.0); | 407 0.0); |
407 buffer.Dispose(); | 408 buffer.Dispose(); |
408 } | 409 } |
409 SkipWhitespace(); | 410 SkipWhitespace(); |
410 return isolate()->factory()->NewNumber(number); | 411 return isolate()->factory()->NewNumber(number); |
411 } | 412 } |
412 | 413 |
414 | |
415 template <typename StringType> | |
416 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c); | |
417 | |
418 template <> | |
419 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) { | |
420 seq_str->SeqTwoByteStringSet(i, c); | |
421 } | |
422 | |
423 template <> | |
424 inline void SeqStringSet(Handle<SeqAsciiString> seq_str, int i, uc32 c) { | |
425 seq_str->SeqAsciiStringSet(i, c); | |
426 } | |
427 | |
428 template <typename StringType> | |
429 inline Handle<StringType> NewRawString(Factory* factory, int length); | |
430 | |
431 template <> | |
432 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length) { | |
433 return factory->NewRawTwoByteString(length, NOT_TENURED); | |
434 } | |
435 | |
436 template <> | |
437 inline Handle<SeqAsciiString> NewRawString(Factory* factory, int length) { | |
438 return factory->NewRawAsciiString(length, NOT_TENURED); | |
439 } | |
440 | |
441 | |
442 // Scans the rest of a JSON string starting from position_ and writes | |
443 // substring(prefix, start, end) along with the scanned characters into a | |
444 // sequential string of type StringType. | |
413 template <bool seq_ascii> | 445 template <bool seq_ascii> |
414 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(int beg_pos) { | 446 template <typename StringType, typename SinkChar> |
415 // The currently scanned ascii characters. | 447 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString( |
416 Handle<String> ascii(isolate()->factory()->NewProperSubString(source_, | 448 Handle<String> prefix, int start, int end) { |
417 beg_pos, | 449 int count = end - start; |
418 position_)); | 450 int length = Min(count + source_length_ - position_, |
419 Handle<String> two_byte = | 451 Max(kInitialSpecialStringLength, 2 * count)); |
420 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, | 452 Handle<StringType> seq_str = NewRawString<StringType>(isolate()->factory(), |
421 NOT_TENURED); | 453 length); |
422 Handle<SeqTwoByteString> seq_two_byte = | 454 // Copy prefix into seq_str. |
423 Handle<SeqTwoByteString>::cast(two_byte); | 455 SinkChar* dest = seq_str->GetChars(); |
424 | 456 String::WriteToFlat(*prefix, dest, start, end); |
425 int allocation_count = 1; | |
426 int count = 0; | |
427 | 457 |
428 while (c0_ != '"') { | 458 while (c0_ != '"') { |
429 // Create new seq string | 459 // Create new seq string |
Lasse Reichstein
2011/06/29 09:27:30
Move comment to after "if" line, so it only applie
sandholm
2011/06/29 10:44:39
Done.
| |
430 if (count >= kInitialSpecialStringSize * allocation_count) { | 460 if (count >= length) { |
431 allocation_count = allocation_count * 2; | 461 return this->SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count); |
432 int new_size = allocation_count * kInitialSpecialStringSize; | |
433 Handle<String> new_two_byte = | |
434 isolate()->factory()->NewRawTwoByteString(new_size, | |
435 NOT_TENURED); | |
436 uc16* char_start = | |
437 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars(); | |
438 String::WriteToFlat(*seq_two_byte, char_start, 0, count); | |
439 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte); | |
440 } | 462 } |
441 | |
442 // Check for control character (0x00-0x1f) or unterminated string (<0). | 463 // Check for control character (0x00-0x1f) or unterminated string (<0). |
443 if (c0_ < 0x20) return Handle<String>::null(); | 464 if (c0_ < 0x20) return Handle<String>::null(); |
444 if (c0_ != '\\') { | 465 if (c0_ != '\\') { |
445 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | 466 if (sizeof(char) != sizeof(SinkChar) || |
Lasse Reichstein
2011/06/29 09:27:30
Does it lint? (Generally, us kCharSize instead of
sandholm
2011/06/29 10:44:39
Done.
| |
446 Advance(); | 467 seq_ascii || |
Lasse Reichstein
2011/06/29 09:27:30
This could use a comment:
If the sink can contain
sandholm
2011/06/29 10:44:39
Done.
| |
468 c0_ <= kMaxAsciiCharCode) { | |
469 SeqStringSet(seq_str, count++, c0_); | |
470 Advance(); | |
471 } else { | |
472 // StringType is SeqAsciiString and we just read a non-ascii char. | |
Lasse Reichstein
2011/06/29 09:27:30
non-ASCII.
sandholm
2011/06/29 10:44:39
Done.
| |
473 return this->SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, | |
474 0, | |
475 count); | |
476 } | |
447 } else { | 477 } else { |
448 Advance(); | 478 Advance(); // Advance past the \. |
449 switch (c0_) { | 479 switch (c0_) { |
450 case '"': | 480 case '"': |
451 case '\\': | 481 case '\\': |
452 case '/': | 482 case '/': |
453 seq_two_byte->SeqTwoByteStringSet(count++, c0_); | 483 SeqStringSet(seq_str, count++, c0_); |
454 break; | 484 break; |
455 case 'b': | 485 case 'b': |
456 seq_two_byte->SeqTwoByteStringSet(count++, '\x08'); | 486 SeqStringSet(seq_str, count++, '\x08'); |
457 break; | 487 break; |
458 case 'f': | 488 case 'f': |
459 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c'); | 489 SeqStringSet(seq_str, count++, '\x0c'); |
460 break; | 490 break; |
461 case 'n': | 491 case 'n': |
462 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a'); | 492 SeqStringSet(seq_str, count++, '\x0a'); |
463 break; | 493 break; |
464 case 'r': | 494 case 'r': |
465 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d'); | 495 SeqStringSet(seq_str, count++, '\x0d'); |
466 break; | 496 break; |
467 case 't': | 497 case 't': |
468 seq_two_byte->SeqTwoByteStringSet(count++, '\x09'); | 498 SeqStringSet(seq_str, count++, '\x09'); |
469 break; | 499 break; |
470 case 'u': { | 500 case 'u': { |
471 uc32 value = 0; | 501 uc32 value = 0; |
472 for (int i = 0; i < 4; i++) { | 502 for (int i = 0; i < 4; i++) { |
473 Advance(); | 503 Advance(); |
474 int digit = HexValue(c0_); | 504 int digit = HexValue(c0_); |
475 if (digit < 0) { | 505 if (digit < 0) { |
476 return Handle<String>::null(); | 506 return Handle<String>::null(); |
477 } | 507 } |
478 value = value * 16 + digit; | 508 value = value * 16 + digit; |
479 } | 509 } |
480 seq_two_byte->SeqTwoByteStringSet(count++, value); | 510 if (sizeof(char) != sizeof(SinkChar) || value <= kMaxAsciiCharCode) { |
Lasse Reichstein
2011/06/29 09:27:30
sizeof(SinkChar) == kUC16Size
sandholm
2011/06/29 10:44:39
Done.
| |
481 break; | 511 SeqStringSet(seq_str, count++, value); |
512 break; | |
513 } else { | |
514 // StringType is SeqAsciiString and we just read a non-ascii char. | |
515 position_ -= 6; // Rewind position to \ in \uxxxx. | |
516 Advance(); | |
517 return this->SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, | |
518 0, | |
519 count); | |
520 } | |
482 } | 521 } |
483 default: | 522 default: |
484 return Handle<String>::null(); | 523 return Handle<String>::null(); |
485 } | 524 } |
486 Advance(); | 525 Advance(); |
487 } | 526 } |
488 } | 527 } |
489 // Advance past the last '"'. | 528 // Shrink seq_string length to count. |
490 ASSERT_EQ('"', c0_); | 529 if (isolate()->heap()->InNewSpace(*seq_str)) { |
491 AdvanceSkipWhitespace(); | |
492 | |
493 // Shrink the the string to our length. | |
494 if (isolate()->heap()->InNewSpace(*seq_two_byte)) { | |
495 isolate()->heap()->new_space()-> | 530 isolate()->heap()->new_space()-> |
496 template ShrinkStringAtAllocationBoundary<SeqTwoByteString>( | 531 template ShrinkStringAtAllocationBoundary<StringType>( |
Lasse Reichstein
2011/06/29 09:27:30
Do ASSERT that the string is at the allocation bou
sandholm
2011/06/29 10:44:39
That ASSERT is in ShrinkStringAtAllocationBoundary
Lasse Reichstein
2011/06/29 10:53:00
It'll do :)
It's probably better to keep it there,
| |
497 *seq_two_byte, count); | 532 *seq_str, count); |
498 } else { | 533 } else { |
499 int string_size = SeqTwoByteString::SizeFor(count); | 534 int string_size = StringType::SizeFor(count); |
500 int allocated_string_size = | 535 int allocated_string_size = StringType::SizeFor(length); |
501 SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count); | |
502 int delta = allocated_string_size - string_size; | 536 int delta = allocated_string_size - string_size; |
503 Address start_filler_object = seq_two_byte->address() + string_size; | 537 Address start_filler_object = seq_str->address() + string_size; |
504 seq_two_byte->set_length(count); | 538 seq_str->set_length(count); |
505 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); | 539 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); |
506 } | 540 } |
507 return isolate()->factory()->NewConsString(ascii, seq_two_byte); | 541 ASSERT_EQ('"', c0_); |
542 // Advance past the last '"'. | |
543 AdvanceSkipWhitespace(); | |
544 return seq_str; | |
508 } | 545 } |
509 | 546 |
547 | |
510 template <bool seq_ascii> | 548 template <bool seq_ascii> |
511 template <bool is_symbol> | 549 template <bool is_symbol> |
512 Handle<String> JsonParser<seq_ascii>::ScanJsonString() { | 550 Handle<String> JsonParser<seq_ascii>::ScanJsonString() { |
513 ASSERT_EQ('"', c0_); | 551 ASSERT_EQ('"', c0_); |
514 Advance(); | 552 Advance(); |
553 if (c0_ == '"') { | |
554 AdvanceSkipWhitespace(); | |
555 return Handle<String>(isolate()->heap()->empty_string()); | |
556 } | |
515 int beg_pos = position_; | 557 int beg_pos = position_; |
516 // Fast case for ascii only without escape characters. | 558 // Fast case for ascii only without escape characters. |
517 while (c0_ != '"') { | 559 do { |
518 // Check for control character (0x00-0x1f) or unterminated string (<0). | 560 // Check for control character (0x00-0x1f) or unterminated string (<0). |
519 if (c0_ < 0x20) return Handle<String>::null(); | 561 if (c0_ < 0x20) return Handle<String>::null(); |
520 if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) { | 562 if (c0_ != '\\' && (seq_ascii || c0_ <= kMaxAsciiCharCode)) { |
521 Advance(); | 563 Advance(); |
522 } else { | 564 } else { |
523 return this->SlowScanJsonString(beg_pos); | 565 return this->SlowScanJsonString<SeqAsciiString, char>(source_, |
566 beg_pos, | |
567 position_); | |
524 } | 568 } |
569 } while (c0_ != '"'); | |
570 int length = position_ - beg_pos; | |
571 Handle<String> result; | |
572 if (seq_ascii && is_symbol) { | |
573 result = isolate()->factory()->LookupAsciiSymbol(seq_source_, | |
574 beg_pos, | |
575 length); | |
576 } else { | |
577 result = isolate()->factory()->NewRawAsciiString(length); | |
578 char* dest = SeqAsciiString::cast(*result)->GetChars(); | |
579 String::WriteToFlat(*source_, dest, beg_pos, position_); | |
525 } | 580 } |
526 ASSERT_EQ('"', c0_); | 581 ASSERT_EQ('"', c0_); |
527 int end_pos = position_; | |
528 // Advance past the last '"'. | 582 // Advance past the last '"'. |
529 AdvanceSkipWhitespace(); | 583 AdvanceSkipWhitespace(); |
530 if (seq_ascii && is_symbol) { | 584 return result; |
531 return isolate()->factory()->LookupAsciiSymbol(seq_source_, | |
532 beg_pos, | |
533 end_pos - beg_pos); | |
534 } else { | |
535 return isolate()->factory()->NewProperSubString(source_, | |
536 beg_pos, | |
537 end_pos); | |
538 } | |
539 } | 585 } |
540 | 586 |
541 } } // namespace v8::internal | 587 } } // namespace v8::internal |
542 | 588 |
543 #endif // V8_JSON_PARSER_H_ | 589 #endif // V8_JSON_PARSER_H_ |
OLD | NEW |