Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(648)

Side by Side Diff: src/json-parser.h

Issue 7241023: Improve JSON.parse to use less memory when using escaped and non-ascii... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 9 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
48 } 48 }
49 49
50 static const int kEndOfString = -1; 50 static const int kEndOfString = -1;
51 51
52 private: 52 private:
53 // Parse a string containing a single JSON value. 53 // Parse a string containing a single JSON value.
54 Handle<Object> ParseJson(Handle<String> source); 54 Handle<Object> ParseJson(Handle<String> source);
55 55
56 inline void Advance() { 56 inline void Advance() {
57 position_++; 57 position_++;
58 if (position_ > source_length_) { 58 if (position_ >= source_length_) {
59 c0_ = kEndOfString; 59 c0_ = kEndOfString;
60 } else if (seq_ascii) { 60 } else if (seq_ascii) {
61 c0_ = seq_source_->SeqAsciiStringGet(position_); 61 c0_ = seq_source_->SeqAsciiStringGet(position_);
62 } else { 62 } else {
63 c0_ = source_->Get(position_); 63 c0_ = source_->Get(position_);
64 } 64 }
65 } 65 }
66 66
67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, 67 // The JSON lexical grammar is specified in the ECMAScript 5 standard,
68 // section 15.12.1.1. The only allowed whitespace characters between tokens 68 // section 15.12.1.1. The only allowed whitespace characters between tokens
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and 100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
102 Handle<String> ParseJsonString() { 102 Handle<String> ParseJsonString() {
103 return ScanJsonString<false>(); 103 return ScanJsonString<false>();
104 } 104 }
105 Handle<String> ParseJsonSymbol() { 105 Handle<String> ParseJsonSymbol() {
106 return ScanJsonString<true>(); 106 return ScanJsonString<true>();
107 } 107 }
108 template <bool is_symbol> 108 template <bool is_symbol>
109 Handle<String> ScanJsonString(); 109 Handle<String> ScanJsonString();
110 // Slow version for unicode support, uses the first ascii_count characters, 110 // Creates a new string and copies prefix[start..end] into the beginning
111 // as first part of a ConsString 111 // of it. Then scans the rest of the string, adding characters after the
112 Handle<String> SlowScanJsonString(int beg_pos); 112 // prefix. Called by ScanJsonString when reaching a '\' or non-ASCII char.
113 template <typename StringType, typename SinkChar>
114 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end);
113 115
114 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 116 // A JSON number (production JSONNumber) is a subset of the valid JavaScript
115 // decimal number literals. 117 // decimal number literals.
116 // It includes an optional minus sign, must have at least one 118 // It includes an optional minus sign, must have at least one
117 // digit before and after a decimal point, may not have prefixed zeros (unless 119 // digit before and after a decimal point, may not have prefixed zeros (unless
118 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 120 // the integer part is zero), and may include an exponent part (e.g., "e-10").
119 // Hexadecimal and octal numbers are not allowed. 121 // Hexadecimal and octal numbers are not allowed.
120 Handle<Object> ParseJsonNumber(); 122 Handle<Object> ParseJsonNumber();
121 123
122 // Parse a single JSON value from input (grammar production JSONValue). 124 // Parse a single JSON value from input (grammar production JSONValue).
(...skipping 18 matching lines...) Expand all
141 143
142 144
143 // Mark that a parsing error has happened at the current token, and 145 // Mark that a parsing error has happened at the current token, and
144 // return a null handle. Primarily for readability. 146 // return a null handle. Primarily for readability.
145 inline Handle<Object> ReportUnexpectedCharacter() { 147 inline Handle<Object> ReportUnexpectedCharacter() {
146 return Handle<Object>::null(); 148 return Handle<Object>::null();
147 } 149 }
148 150
149 inline Isolate* isolate() { return isolate_; } 151 inline Isolate* isolate() { return isolate_; }
150 152
151 static const int kInitialSpecialStringSize = 1024; 153 static const int kInitialSpecialStringLength = 1024;
152 154
153 155
154 private: 156 private:
155 Handle<String> source_; 157 Handle<String> source_;
156 int source_length_; 158 int source_length_;
157 Handle<SeqAsciiString> seq_source_; 159 Handle<SeqAsciiString> seq_source_;
158 160
159 Isolate* isolate_; 161 Isolate* isolate_;
160 uc32 c0_; 162 uc32 c0_;
161 int position_; 163 int position_;
162 }; 164 };
163 165
164 template <bool seq_ascii> 166 template <bool seq_ascii>
165 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { 167 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) {
166 isolate_ = source->map()->isolate(); 168 isolate_ = source->map()->isolate();
167 source_ = Handle<String>(source->TryFlattenGetString()); 169 source_ = Handle<String>(source->TryFlattenGetString());
168 source_length_ = source_->length() - 1; 170 source_length_ = source_->length();
169 171
170 // Optimized fast case where we only have ascii characters. 172 // Optimized fast case where we only have ASCII characters.
171 if (seq_ascii) { 173 if (seq_ascii) {
172 seq_source_ = Handle<SeqAsciiString>::cast(source_); 174 seq_source_ = Handle<SeqAsciiString>::cast(source_);
173 } 175 }
174 176
175 // Set initial position right before the string. 177 // Set initial position right before the string.
176 position_ = -1; 178 position_ = -1;
177 // Advance to the first character (posibly EOS) 179 // Advance to the first character (posibly EOS)
178 AdvanceSkipWhitespace(); 180 AdvanceSkipWhitespace();
179 Handle<Object> result = ParseJsonValue(); 181 Handle<Object> result = ParseJsonValue();
180 if (result.is_null() || c0_ != kEndOfString) { 182 if (result.is_null() || c0_ != kEndOfString) {
(...skipping 222 matching lines...) Expand 10 before | Expand all | Expand 10 after
403 number = StringToDouble(isolate()->unicode_cache(), 405 number = StringToDouble(isolate()->unicode_cache(),
404 result, 406 result,
405 NO_FLAGS, // Hex, octal or trailing junk. 407 NO_FLAGS, // Hex, octal or trailing junk.
406 0.0); 408 0.0);
407 buffer.Dispose(); 409 buffer.Dispose();
408 } 410 }
409 SkipWhitespace(); 411 SkipWhitespace();
410 return isolate()->factory()->NewNumber(number); 412 return isolate()->factory()->NewNumber(number);
411 } 413 }
412 414
415
416 template <typename StringType>
417 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
418
419 template <>
420 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
421 seq_str->SeqTwoByteStringSet(i, c);
422 }
423
424 template <>
425 inline void SeqStringSet(Handle<SeqAsciiString> seq_str, int i, uc32 c) {
426 seq_str->SeqAsciiStringSet(i, c);
427 }
428
429 template <typename StringType>
430 inline Handle<StringType> NewRawString(Factory* factory, int length);
431
432 template <>
433 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length) {
434 return factory->NewRawTwoByteString(length, NOT_TENURED);
435 }
436
437 template <>
438 inline Handle<SeqAsciiString> NewRawString(Factory* factory, int length) {
439 return factory->NewRawAsciiString(length, NOT_TENURED);
440 }
441
442
443 // Scans the rest of a JSON string starting from position_ and writes
444 // prefix[start..end] along with the scanned characters into a
445 // sequential string of type StringType.
413 template <bool seq_ascii> 446 template <bool seq_ascii>
414 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(int beg_pos) { 447 template <typename StringType, typename SinkChar>
415 // The currently scanned ascii characters. 448 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(
416 Handle<String> ascii(isolate()->factory()->NewProperSubString(source_, 449 Handle<String> prefix, int start, int end) {
417 beg_pos, 450 int count = end - start;
418 position_)); 451 int max_length = count + source_length_ - position_;
419 Handle<String> two_byte = 452 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
420 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, 453 Handle<StringType> seq_str = NewRawString<StringType>(isolate()->factory(),
421 NOT_TENURED); 454 length);
422 Handle<SeqTwoByteString> seq_two_byte = 455 // Copy prefix into seq_str.
423 Handle<SeqTwoByteString>::cast(two_byte); 456 SinkChar* dest = seq_str->GetChars();
424 457 String::WriteToFlat(*prefix, dest, start, end);
425 int allocation_count = 1;
426 int count = 0;
427 458
428 while (c0_ != '"') { 459 while (c0_ != '"') {
429 // Create new seq string 460 if (count >= length) {
430 if (count >= kInitialSpecialStringSize * allocation_count) { 461 // We need to create a longer sequential string for the result.
431 allocation_count = allocation_count * 2; 462 return SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count);
432 int new_size = allocation_count * kInitialSpecialStringSize;
433 Handle<String> new_two_byte =
434 isolate()->factory()->NewRawTwoByteString(new_size,
435 NOT_TENURED);
436 uc16* char_start =
437 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();
438 String::WriteToFlat(*seq_two_byte, char_start, 0, count);
439 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);
440 } 463 }
441
442 // Check for control character (0x00-0x1f) or unterminated string (<0). 464 // Check for control character (0x00-0x1f) or unterminated string (<0).
443 if (c0_ < 0x20) return Handle<String>::null(); 465 if (c0_ < 0x20) return Handle<String>::null();
444 if (c0_ != '\\') { 466 if (c0_ != '\\') {
445 seq_two_byte->SeqTwoByteStringSet(count++, c0_); 467 // If the sink can contain UC16 characters, or source_ contains only
446 Advance(); 468 // ASCII characters, there's no need to test whether we can store the
469 // character. Otherwise check whether the UC16 source character can fit
470 // in the ASCII sink.
471 if (sizeof(SinkChar) == kUC16Size ||
472 seq_ascii ||
473 c0_ <= kMaxAsciiCharCode) {
474 SeqStringSet(seq_str, count++, c0_);
475 Advance();
476 } else {
477 // StringType is SeqAsciiString and we just read a non-ASCII char.
478 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, 0, count);
479 }
447 } else { 480 } else {
448 Advance(); 481 Advance(); // Advance past the \.
449 switch (c0_) { 482 switch (c0_) {
450 case '"': 483 case '"':
451 case '\\': 484 case '\\':
452 case '/': 485 case '/':
453 seq_two_byte->SeqTwoByteStringSet(count++, c0_); 486 SeqStringSet(seq_str, count++, c0_);
454 break; 487 break;
455 case 'b': 488 case 'b':
456 seq_two_byte->SeqTwoByteStringSet(count++, '\x08'); 489 SeqStringSet(seq_str, count++, '\x08');
457 break; 490 break;
458 case 'f': 491 case 'f':
459 seq_two_byte->SeqTwoByteStringSet(count++, '\x0c'); 492 SeqStringSet(seq_str, count++, '\x0c');
460 break; 493 break;
461 case 'n': 494 case 'n':
462 seq_two_byte->SeqTwoByteStringSet(count++, '\x0a'); 495 SeqStringSet(seq_str, count++, '\x0a');
463 break; 496 break;
464 case 'r': 497 case 'r':
465 seq_two_byte->SeqTwoByteStringSet(count++, '\x0d'); 498 SeqStringSet(seq_str, count++, '\x0d');
466 break; 499 break;
467 case 't': 500 case 't':
468 seq_two_byte->SeqTwoByteStringSet(count++, '\x09'); 501 SeqStringSet(seq_str, count++, '\x09');
469 break; 502 break;
470 case 'u': { 503 case 'u': {
471 uc32 value = 0; 504 uc32 value = 0;
472 for (int i = 0; i < 4; i++) { 505 for (int i = 0; i < 4; i++) {
473 Advance(); 506 Advance();
474 int digit = HexValue(c0_); 507 int digit = HexValue(c0_);
475 if (digit < 0) { 508 if (digit < 0) {
476 return Handle<String>::null(); 509 return Handle<String>::null();
477 } 510 }
478 value = value * 16 + digit; 511 value = value * 16 + digit;
479 } 512 }
480 seq_two_byte->SeqTwoByteStringSet(count++, value); 513 if (sizeof(SinkChar) == kUC16Size || value <= kMaxAsciiCharCode) {
481 break; 514 SeqStringSet(seq_str, count++, value);
515 break;
516 } else {
517 // StringType is SeqAsciiString and we just read a non-ASCII char.
518 position_ -= 6; // Rewind position_ to \ in \uxxxx.
519 Advance();
520 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str,
521 0,
522 count);
523 }
482 } 524 }
483 default: 525 default:
484 return Handle<String>::null(); 526 return Handle<String>::null();
485 } 527 }
486 Advance(); 528 Advance();
487 } 529 }
488 } 530 }
489 // Advance past the last '"'. 531 // Shrink seq_string length to count.
490 ASSERT_EQ('"', c0_); 532 if (isolate()->heap()->InNewSpace(*seq_str)) {
491 AdvanceSkipWhitespace();
492
493 // Shrink the the string to our length.
494 if (isolate()->heap()->InNewSpace(*seq_two_byte)) {
495 isolate()->heap()->new_space()-> 533 isolate()->heap()->new_space()->
496 template ShrinkStringAtAllocationBoundary<SeqTwoByteString>( 534 template ShrinkStringAtAllocationBoundary<StringType>(
497 *seq_two_byte, count); 535 *seq_str, count);
498 } else { 536 } else {
499 int string_size = SeqTwoByteString::SizeFor(count); 537 int string_size = StringType::SizeFor(count);
500 int allocated_string_size = 538 int allocated_string_size = StringType::SizeFor(length);
501 SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count);
502 int delta = allocated_string_size - string_size; 539 int delta = allocated_string_size - string_size;
503 Address start_filler_object = seq_two_byte->address() + string_size; 540 Address start_filler_object = seq_str->address() + string_size;
504 seq_two_byte->set_length(count); 541 seq_str->set_length(count);
505 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); 542 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
506 } 543 }
507 return isolate()->factory()->NewConsString(ascii, seq_two_byte); 544 ASSERT_EQ('"', c0_);
545 // Advance past the last '"'.
546 AdvanceSkipWhitespace();
547 return seq_str;
508 } 548 }
509 549
550
510 template <bool seq_ascii> 551 template <bool seq_ascii>
511 template <bool is_symbol> 552 template <bool is_symbol>
512 Handle<String> JsonParser<seq_ascii>::ScanJsonString() { 553 Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
513 ASSERT_EQ('"', c0_); 554 ASSERT_EQ('"', c0_);
514 Advance(); 555 Advance();
556 if (c0_ == '"') {
557 AdvanceSkipWhitespace();
558 return Handle<String>(isolate()->heap()->empty_string());
559 }
515 int beg_pos = position_; 560 int beg_pos = position_;
516 // Fast case for ascii only without escape characters. 561 // Fast case for ASCII only without escape characters.
517 while (c0_ != '"') { 562 do {
518 // Check for control character (0x00-0x1f) or unterminated string (<0). 563 // Check for control character (0x00-0x1f) or unterminated string (<0).
519 if (c0_ < 0x20) return Handle<String>::null(); 564 if (c0_ < 0x20) return Handle<String>::null();
520 if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) { 565 if (c0_ != '\\') {
521 Advance(); 566 if (seq_ascii || c0_ <= kMaxAsciiCharCode) {
567 Advance();
568 } else {
569 return SlowScanJsonString<SeqTwoByteString, uc16>(source_,
570 beg_pos,
571 position_);
572 }
522 } else { 573 } else {
523 return this->SlowScanJsonString(beg_pos); 574 return SlowScanJsonString<SeqAsciiString, char>(source_,
575 beg_pos,
576 position_);
524 } 577 }
578 } while (c0_ != '"');
579 int length = position_ - beg_pos;
580 Handle<String> result;
581 if (seq_ascii && is_symbol) {
582 result = isolate()->factory()->LookupAsciiSymbol(seq_source_,
583 beg_pos,
584 length);
585 } else {
586 result = isolate()->factory()->NewRawAsciiString(length);
587 char* dest = SeqAsciiString::cast(*result)->GetChars();
588 String::WriteToFlat(*source_, dest, beg_pos, position_);
525 } 589 }
526 ASSERT_EQ('"', c0_); 590 ASSERT_EQ('"', c0_);
527 int end_pos = position_;
528 // Advance past the last '"'. 591 // Advance past the last '"'.
529 AdvanceSkipWhitespace(); 592 AdvanceSkipWhitespace();
530 if (seq_ascii && is_symbol) { 593 return result;
531 return isolate()->factory()->LookupAsciiSymbol(seq_source_,
532 beg_pos,
533 end_pos - beg_pos);
534 } else {
535 return isolate()->factory()->NewProperSubString(source_,
536 beg_pos,
537 end_pos);
538 }
539 } 594 }
540 595
541 } } // namespace v8::internal 596 } } // namespace v8::internal
542 597
543 #endif // V8_JSON_PARSER_H_ 598 #endif // V8_JSON_PARSER_H_
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698