Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/json-parser.h

Issue 7241023: Improve JSON.parse to use less memory when using escaped and non-ascii... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 9 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
102 Handle<String> ParseJsonString() { 102 Handle<String> ParseJsonString() {
103 return ScanJsonString<false>(); 103 return ScanJsonString<false>();
104 } 104 }
105 Handle<String> ParseJsonSymbol() { 105 Handle<String> ParseJsonSymbol() {
106 return ScanJsonString<true>(); 106 return ScanJsonString<true>();
107 } 107 }
108 template <bool is_symbol> 108 template <bool is_symbol>
109 Handle<String> ScanJsonString(); 109 Handle<String> ScanJsonString();
110 // Slow version for unicode support, uses the first ascii_count characters, 110 // Slow version for unicode support, uses the first ascii_count characters,
111 // as first part of a ConsString 111 // as first part of a ConsString
Lasse Reichstein 2011/06/29 09:27:29 Comment out of date (what is ascii_count)?
112 Handle<String> SlowScanJsonString(int beg_pos); 112 Handle<String> SlowScanJsonAsciiString(Handle<String> prefix);
113 Handle<String> SlowScanJsonTwoByteString(Handle<String> prefix);
113 114
114 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 115 // A JSON number (production JSONNumber) is a subset of the valid JavaScript
115 // decimal number literals. 116 // decimal number literals.
116 // It includes an optional minus sign, must have at least one 117 // It includes an optional minus sign, must have at least one
117 // digit before and after a decimal point, may not have prefixed zeros (unless 118 // digit before and after a decimal point, may not have prefixed zeros (unless
118 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 119 // the integer part is zero), and may include an exponent part (e.g., "e-10").
119 // Hexadecimal and octal numbers are not allowed. 120 // Hexadecimal and octal numbers are not allowed.
120 Handle<Object> ParseJsonNumber(); 121 Handle<Object> ParseJsonNumber();
121 122
122 // Parse a single JSON value from input (grammar production JSONValue). 123 // Parse a single JSON value from input (grammar production JSONValue).
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after
403 number = StringToDouble(isolate()->unicode_cache(), 404 number = StringToDouble(isolate()->unicode_cache(),
404 result, 405 result,
405 NO_FLAGS, // Hex, octal or trailing junk. 406 NO_FLAGS, // Hex, octal or trailing junk.
406 0.0); 407 0.0);
407 buffer.Dispose(); 408 buffer.Dispose();
408 } 409 }
409 SkipWhitespace(); 410 SkipWhitespace();
410 return isolate()->factory()->NewNumber(number); 411 return isolate()->factory()->NewNumber(number);
411 } 412 }
412 413
414
413 template <bool seq_ascii> 415 template <bool seq_ascii>
414 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(int beg_pos) { 416 Handle<String> JsonParser<seq_ascii>::SlowScanJsonTwoByteString(
415 // The currently scanned ascii characters. 417 Handle<String> prefix) {
416 Handle<String> ascii(isolate()->factory()->NewStrictSubString(source_, 418 int length = kInitialSpecialStringSize;
Lasse Reichstein 2011/06/24 13:56:00 Change ...StringSize to ...StringLength. Size soun
417 beg_pos, 419 Handle<String> new_two_byte =
418 position_)); 420 isolate()->factory()->NewRawTwoByteString(length, NOT_TENURED);
Lasse Reichstein 2011/06/24 13:56:00 If NewRawTwoByteString doesn't return a Handle<Seq
419 Handle<String> two_byte =
420 isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize,
421 NOT_TENURED);
422 Handle<SeqTwoByteString> seq_two_byte = 421 Handle<SeqTwoByteString> seq_two_byte =
423 Handle<SeqTwoByteString>::cast(two_byte); 422 Handle<SeqTwoByteString>::cast(new_two_byte);
424
425 int allocation_count = 1;
426 int count = 0; 423 int count = 0;
427 424
428 while (c0_ != '"') { 425 while (c0_ != '"') {
429 // Create new seq string 426 // Create new seq string
430 if (count >= kInitialSpecialStringSize * allocation_count) { 427 if (count >= length) {
431 allocation_count = allocation_count * 2; 428 prefix = isolate()->factory()->NewConsString(prefix, new_two_byte);
432 int new_size = allocation_count * kInitialSpecialStringSize; 429 length *= 2;
433 Handle<String> new_two_byte = 430 new_two_byte = isolate()->factory()->
434 isolate()->factory()->NewRawTwoByteString(new_size, 431 NewRawTwoByteString(length, NOT_TENURED);
435 NOT_TENURED);
436 uc16* char_start =
437 Handle<SeqTwoByteString>::cast(new_two_byte)->GetChars();
438 String::WriteToFlat(*seq_two_byte, char_start, 0, count);
439 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte); 432 seq_two_byte = Handle<SeqTwoByteString>::cast(new_two_byte);
433 count = 0;
440 } 434 }
441 435
442 // Check for control character (0x00-0x1f) or unterminated string (<0). 436 // Check for control character (0x00-0x1f) or unterminated string (<0).
443 if (c0_ < 0x20) return Handle<String>::null(); 437 if (c0_ < 0x20) return Handle<String>::null();
444 if (c0_ != '\\') { 438 if (c0_ != '\\') {
445 seq_two_byte->SeqTwoByteStringSet(count++, c0_); 439 seq_two_byte->SeqTwoByteStringSet(count++, c0_);
446 Advance(); 440 Advance();
447 } else { 441 } else {
448 Advance(); 442 Advance(); // Advance past the \.
449 switch (c0_) { 443 switch (c0_) {
450 case '"': 444 case '"':
451 case '\\': 445 case '\\':
452 case '/': 446 case '/':
453 seq_two_byte->SeqTwoByteStringSet(count++, c0_); 447 seq_two_byte->SeqTwoByteStringSet(count++, c0_);
454 break; 448 break;
455 case 'b': 449 case 'b':
456 seq_two_byte->SeqTwoByteStringSet(count++, '\x08'); 450 seq_two_byte->SeqTwoByteStringSet(count++, '\x08');
457 break; 451 break;
458 case 'f': 452 case 'f':
(...skipping 20 matching lines...) Expand all
479 } 473 }
480 seq_two_byte->SeqTwoByteStringSet(count++, value); 474 seq_two_byte->SeqTwoByteStringSet(count++, value);
481 break; 475 break;
482 } 476 }
483 default: 477 default:
484 return Handle<String>::null(); 478 return Handle<String>::null();
485 } 479 }
486 Advance(); 480 Advance();
487 } 481 }
488 } 482 }
489 // Advance past the last '"'.
490 ASSERT_EQ('"', c0_);
491 AdvanceSkipWhitespace();
492 483
493 // Shrink the the string to our length. 484 // Shrink new_two_byte from length to count.
494 if (isolate()->heap()->InNewSpace(*seq_two_byte)) { 485 if (isolate()->heap()->InNewSpace(*new_two_byte)) {
495 isolate()->heap()->new_space()-> 486 isolate()->heap()->new_space()->
496 template ShrinkStringAtAllocationBoundary<SeqTwoByteString>( 487 template ShrinkStringAtAllocationBoundary<SeqTwoByteString>(
497 *seq_two_byte, count); 488 *seq_two_byte, count);
498 } else { 489 } else {
499 int string_size = SeqTwoByteString::SizeFor(count); 490 int string_size = SeqTwoByteString::SizeFor(count);
500 int allocated_string_size = 491 int allocated_string_size = SeqTwoByteString::SizeFor(length);
501 SeqTwoByteString::SizeFor(kInitialSpecialStringSize * allocation_count);
502 int delta = allocated_string_size - string_size; 492 int delta = allocated_string_size - string_size;
503 Address start_filler_object = seq_two_byte->address() + string_size; 493 Address start_filler_object = seq_two_byte->address() + string_size;
504 seq_two_byte->set_length(count); 494 seq_two_byte->set_length(count);
505 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); 495 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
506 } 496 }
507 return isolate()->factory()->NewConsString(ascii, seq_two_byte); 497 ASSERT_EQ('"', c0_);
498 AdvanceSkipWhitespace();
499 return isolate()->factory()->NewConsString(prefix, new_two_byte);
500 }
501
502
503 template <bool seq_ascii>
504 Handle<String> JsonParser<seq_ascii>::SlowScanJsonAsciiString(
Lasse Reichstein 2011/06/24 13:56:00 Sounds like it scans an ASCII string, not that it
505 Handle<String> prefix) {
506 int length = kInitialSpecialStringSize;
507 Handle<String> new_ascii =
508 isolate()->factory()->NewRawAsciiString(length, NOT_TENURED);
509 Handle<SeqAsciiString> seq_ascii_str =
510 Handle<SeqAsciiString>::cast(new_ascii);
511 int count = 0;
512
513 while (c0_ != '"') {
514 // Create new seq string
515 if (count >= length) {
516 prefix = isolate()->factory()->NewConsString(prefix, new_ascii);
517 length *= 2;
518 new_ascii = isolate()->factory()->NewRawAsciiString(length, NOT_TENURED);
519 seq_ascii_str = Handle<SeqAsciiString>::cast(new_ascii);
520 count = 0;
521 }
522
523 // Check for control character (0x00-0x1f) or unterminated string (<0).
524 if (c0_ < 0x20) return Handle<String>::null();
525 if (c0_ != '\\') {
526 if (seq_ascii || c0_ <= kMaxAsciiCharCode) {
527 seq_ascii_str->SeqAsciiStringSet(count++, c0_);
528 Advance();
529 } else {
530 break;
531 }
532 } else {
533 Advance(); // Advance past the \.
534 switch (c0_) {
535 case '"':
536 case '\\':
537 case '/':
538 seq_ascii_str->SeqAsciiStringSet(count++, c0_);
539 break;
540 case 'b':
541 seq_ascii_str->SeqAsciiStringSet(count++, '\x08');
542 break;
543 case 'f':
544 seq_ascii_str->SeqAsciiStringSet(count++, '\x0c');
545 break;
546 case 'n':
547 seq_ascii_str->SeqAsciiStringSet(count++, '\x0a');
548 break;
549 case 'r':
550 seq_ascii_str->SeqAsciiStringSet(count++, '\x0d');
551 break;
552 case 't':
553 seq_ascii_str->SeqAsciiStringSet(count++, '\x09');
554 break;
555 case 'u': {
556 uc32 value = 0;
557 for (int i = 0; i < 4; i++) {
558 Advance();
559 int digit = HexValue(c0_);
560 if (digit < 0) {
561 return Handle<String>::null();
562 }
563 value = value * 16 + digit;
564 }
565 if (value <= kMaxAsciiCharCode) {
566 seq_ascii_str->SeqAsciiStringSet(count++, value);
567 break;
568 } else {
569 position_ -= 6; // Rewind position to \ in \uxxxx.
570 Advance();
571 goto outer_loop; // break out of while loop.
Lasse Reichstein 2011/06/24 13:56:00 I'm really not too keen on using gotos. Really not
572 }
573 }
574 default:
575 return Handle<String>::null();
576 }
577 Advance();
578 }
579 }
580 outer_loop:
581 // Shrink new_ascii from length to count.
582 if (isolate()->heap()->InNewSpace(*new_ascii)) {
583 isolate()->heap()->new_space()->
584 template ShrinkStringAtAllocationBoundary<SeqAsciiString>(
585 *seq_ascii_str, count);
586 } else {
587 int string_size = SeqAsciiString::SizeFor(count);
Lasse Reichstein 2011/06/24 13:56:00 If the truncation would turn a lot of space into f
588 int allocated_string_size = SeqAsciiString::SizeFor(length);
589 int delta = allocated_string_size - string_size;
590 Address start_filler_object = seq_ascii_str->address() + string_size;
591 seq_ascii_str->set_length(count);
592 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
593 }
Lasse Reichstein 2011/06/24 13:56:00 If you move the shrinking to a separate function,
594 prefix = isolate()->factory()->NewConsString(prefix, new_ascii);
595 if (c0_ != '"') {
596 // We have read a non-ascii character - either directly or as \uxxxx.
597 return this->SlowScanJsonTwoByteString(prefix);
598 }
599 ASSERT_EQ('"', c0_);
600 // Advance past the last '"'.
601 AdvanceSkipWhitespace();
602 return prefix;
508 } 603 }
509 604
510 template <bool seq_ascii> 605 template <bool seq_ascii>
511 template <bool is_symbol> 606 template <bool is_symbol>
512 Handle<String> JsonParser<seq_ascii>::ScanJsonString() { 607 Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
513 ASSERT_EQ('"', c0_); 608 ASSERT_EQ('"', c0_);
514 Advance(); 609 Advance();
515 int beg_pos = position_; 610 int beg_pos = position_;
516 // Fast case for ascii only without escape characters. 611 // Fast case for ascii only without escape characters.
517 while (c0_ != '"') { 612 while (c0_ != '"') {
518 // Check for control character (0x00-0x1f) or unterminated string (<0). 613 // Check for control character (0x00-0x1f) or unterminated string (<0).
519 if (c0_ < 0x20) return Handle<String>::null(); 614 if (c0_ < 0x20) return Handle<String>::null();
520 if (c0_ != '\\' && (seq_ascii || c0_ < kMaxAsciiCharCode)) { 615 if (c0_ != '\\' && (seq_ascii || c0_ <= kMaxAsciiCharCode)) {
521 Advance(); 616 Advance();
522 } else { 617 } else {
523 return this->SlowScanJsonString(beg_pos); 618 break;
Lasse Reichstein 2011/06/24 13:56:00 Again, you can abstract the part below into a func
524 } 619 }
525 } 620 }
526 ASSERT_EQ('"', c0_); 621 Handle<String> result;
527 int end_pos = position_; 622 if (seq_ascii) {
623 if (is_symbol) {
624 result = isolate()->factory()->LookupAsciiSymbol(seq_source_,
625 beg_pos,
626 position_ - beg_pos);
627 } else {
628 result = isolate()->factory()->NewStrictSubString(source_,
629 beg_pos,
630 position_);
631 }
632 } else {
633 int length = position_ - beg_pos;
634 result = isolate()->factory()->NewRawAsciiString(length);
635 ASSERT(result->IsAsciiRepresentation());
636 char* dest = SeqAsciiString::cast(*result)->GetChars();
637 String::WriteToFlat(*source_, dest, beg_pos, position_);
638 }
639 if (c0_ != '"') {
640 return this->SlowScanJsonAsciiString(result);
641 }
528 // Advance past the last '"'. 642 // Advance past the last '"'.
529 AdvanceSkipWhitespace(); 643 AdvanceSkipWhitespace();
530 if (seq_ascii && is_symbol) { 644 return result;
531 return isolate()->factory()->LookupAsciiSymbol(seq_source_,
532 beg_pos,
533 end_pos - beg_pos);
534 } else {
535 return isolate()->factory()->NewStrictSubString(source_,
536 beg_pos,
537 end_pos);
538 }
539 } 645 }
540 646
541 } } // namespace v8::internal 647 } } // namespace v8::internal
542 648
543 #endif // V8_JSON_PARSER_H_ 649 #endif // V8_JSON_PARSER_H_
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698