Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(213)

Side by Side Diff: src/api.cc

Issue 9600009: Fix input and output to handle UTF16 surrogate pairs. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/arm/regexp-macro-assembler-arm.cc » ('j') | src/debug-agent.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1411 matching lines...) Expand 10 before | Expand all | Expand 10 after
1422 EnsureConstructor(this); 1422 EnsureConstructor(this);
1423 } 1423 }
1424 Utils::OpenHandle(this)->set_internal_field_count(i::Smi::FromInt(value)); 1424 Utils::OpenHandle(this)->set_internal_field_count(i::Smi::FromInt(value));
1425 } 1425 }
1426 1426
1427 1427
1428 // --- S c r i p t D a t a --- 1428 // --- S c r i p t D a t a ---
1429 1429
1430 1430
1431 ScriptData* ScriptData::PreCompile(const char* input, int length) { 1431 ScriptData* ScriptData::PreCompile(const char* input, int length) {
1432 i::Utf8ToUC16CharacterStream stream( 1432 i::Utf8ToUtf16CharacterStream stream(
1433 reinterpret_cast<const unsigned char*>(input), length); 1433 reinterpret_cast<const unsigned char*>(input), length);
1434 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping); 1434 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
1435 } 1435 }
1436 1436
1437 1437
1438 ScriptData* ScriptData::PreCompile(v8::Handle<String> source) { 1438 ScriptData* ScriptData::PreCompile(v8::Handle<String> source) {
1439 i::Handle<i::String> str = Utils::OpenHandle(*source); 1439 i::Handle<i::String> str = Utils::OpenHandle(*source);
1440 if (str->IsExternalTwoByteString()) { 1440 if (str->IsExternalTwoByteString()) {
1441 i::ExternalTwoByteStringUC16CharacterStream stream( 1441 i::ExternalTwoByteStringUtf16CharacterStream stream(
1442 i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length()); 1442 i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length());
1443 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping); 1443 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
1444 } else { 1444 } else {
1445 i::GenericStringUC16CharacterStream stream(str, 0, str->length()); 1445 i::GenericStringUtf16CharacterStream stream(str, 0, str->length());
1446 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping); 1446 return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping);
1447 } 1447 }
1448 } 1448 }
1449 1449
1450 1450
1451 ScriptData* ScriptData::New(const char* data, int length) { 1451 ScriptData* ScriptData::New(const char* data, int length) {
1452 // Return an empty ScriptData if the length is obviously invalid. 1452 // Return an empty ScriptData if the length is obviously invalid.
1453 if (length % sizeof(unsigned) != 0) { 1453 if (length % sizeof(unsigned) != 0) {
1454 return new i::ScriptDataImpl(); 1454 return new i::ScriptDataImpl();
1455 } 1455 }
(...skipping 2226 matching lines...) Expand 10 before | Expand all | Expand 10 after
3682 int String::Length() const { 3682 int String::Length() const {
3683 i::Handle<i::String> str = Utils::OpenHandle(this); 3683 i::Handle<i::String> str = Utils::OpenHandle(this);
3684 if (IsDeadCheck(str->GetIsolate(), "v8::String::Length()")) return 0; 3684 if (IsDeadCheck(str->GetIsolate(), "v8::String::Length()")) return 0;
3685 return str->length(); 3685 return str->length();
3686 } 3686 }
3687 3687
3688 3688
3689 int String::Utf8Length() const { 3689 int String::Utf8Length() const {
3690 i::Handle<i::String> str = Utils::OpenHandle(this); 3690 i::Handle<i::String> str = Utils::OpenHandle(this);
3691 if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0; 3691 if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;
3692 return str->Utf8Length(); 3692 int length = str->Utf8Length();
3693 if (length < 0) {
3694 FlattenString(str);
3695 length = str->Utf8Length();
3696 }
3697 ASSERT(length >= 0);
3698 return length;
3693 } 3699 }
3694 3700
3695 3701
3696 int String::WriteUtf8(char* buffer, 3702 int String::WriteUtf8(char* buffer,
3697 int capacity, 3703 int capacity,
3698 int* nchars_ref, 3704 int* nchars_ref,
3699 int options) const { 3705 int options) const {
3700 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate(); 3706 i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate();
3701 if (IsDeadCheck(isolate, "v8::String::WriteUtf8()")) return 0; 3707 if (IsDeadCheck(isolate, "v8::String::WriteUtf8()")) return 0;
3702 LOG_API(isolate, "String::WriteUtf8"); 3708 LOG_API(isolate, "String::WriteUtf8");
(...skipping 25 matching lines...) Expand all
3728 } 3734 }
3729 write_input_buffer.Reset(0, *str); 3735 write_input_buffer.Reset(0, *str);
3730 int len = str->length(); 3736 int len = str->length();
3731 // Encode the first K - 3 bytes directly into the buffer since we 3737 // Encode the first K - 3 bytes directly into the buffer since we
3732 // know there's room for them. If no capacity is given we copy all 3738 // know there's room for them. If no capacity is given we copy all
3733 // of them here. 3739 // of them here.
3734 int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1); 3740 int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1);
3735 int i; 3741 int i;
3736 int pos = 0; 3742 int pos = 0;
3737 int nchars = 0; 3743 int nchars = 0;
3744 int previous = unibrow::Utf8::kNoPreviousCharacter;
3738 for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) { 3745 for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
3739 i::uc32 c = write_input_buffer.GetNext(); 3746 i::uc32 c = write_input_buffer.GetNext();
3740 int written = unibrow::Utf8::Encode(buffer + pos, c); 3747 int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
3741 pos += written; 3748 pos += written;
3742 nchars++; 3749 nchars++;
3750 previous = c;
3743 } 3751 }
3744 if (i < len) { 3752 if (i < len) {
3745 // For the last characters we need to check the length for each one 3753 // For the last characters we need to check the length for each one
3746 // because they may be longer than the remaining space in the 3754 // because they may be longer than the remaining space in the
3747 // buffer. 3755 // buffer.
3748 char intermediate[unibrow::Utf8::kMaxEncodedSize]; 3756 char intermediate[unibrow::Utf8::kMaxEncodedSize];
3749 for (; i < len && pos < capacity; i++) { 3757 for (; i < len && pos < capacity; i++) {
3750 i::uc32 c = write_input_buffer.GetNext(); 3758 i::uc32 c = write_input_buffer.GetNext();
3751 int written = unibrow::Utf8::Encode(intermediate, c); 3759 if (unibrow::Utf16::IsTrailSurrogate(c) &&
3752 if (pos + written <= capacity) { 3760 previous != unibrow::Utf8::kNoPreviousCharacter &&
3753 for (int j = 0; j < written; j++) 3761 unibrow::Utf16::IsLeadSurrogate(previous)) {
3754 buffer[pos + j] = intermediate[j]; 3762 // We can't use the intermediate buffer here because the encoding
3763 // of surrogate pairs is done under assumption that you can step
3764 // back and fix the UTF8 stream. Luckily we only need space for one
3765 // more byte, so there is always space.
3766 ASSERT(pos < capacity);
3767 int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
3768 ASSERT(written == 1);
3755 pos += written; 3769 pos += written;
3756 nchars++; 3770 nchars++;
3757 } else { 3771 } else {
3758 // We've reached the end of the buffer 3772 int written =
3759 break; 3773 unibrow::Utf8::Encode(intermediate,
3774 c,
3775 unibrow::Utf8::kNoPreviousCharacter);
3776 if (pos + written <= capacity) {
3777 for (int j = 0; j < written; j++)
3778 buffer[pos + j] = intermediate[j];
3779 pos += written;
3780 nchars++;
3781 } else {
3782 // We've reached the end of the buffer
3783 break;
3784 }
3760 } 3785 }
3786 previous = c;
3761 } 3787 }
3762 } 3788 }
3763 if (nchars_ref != NULL) *nchars_ref = nchars; 3789 if (nchars_ref != NULL) *nchars_ref = nchars;
3764 if (!(options & NO_NULL_TERMINATION) && 3790 if (!(options & NO_NULL_TERMINATION) &&
3765 (i == len && (capacity == -1 || pos < capacity))) 3791 (i == len && (capacity == -1 || pos < capacity)))
3766 buffer[pos++] = '\0'; 3792 buffer[pos++] = '\0';
3767 return pos; 3793 return pos;
3768 } 3794 }
3769 3795
3770 3796
(...skipping 2423 matching lines...) Expand 10 before | Expand all | Expand 10 after
6194 6220
6195 6221
6196 char* HandleScopeImplementer::Iterate(ObjectVisitor* v, char* storage) { 6222 char* HandleScopeImplementer::Iterate(ObjectVisitor* v, char* storage) {
6197 HandleScopeImplementer* scope_implementer = 6223 HandleScopeImplementer* scope_implementer =
6198 reinterpret_cast<HandleScopeImplementer*>(storage); 6224 reinterpret_cast<HandleScopeImplementer*>(storage);
6199 scope_implementer->IterateThis(v); 6225 scope_implementer->IterateThis(v);
6200 return storage + ArchiveSpacePerThread(); 6226 return storage + ArchiveSpacePerThread();
6201 } 6227 }
6202 6228
6203 } } // namespace v8::internal 6229 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | src/arm/regexp-macro-assembler-arm.cc » ('j') | src/debug-agent.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698