Index: src/api.cc |
=================================================================== |
--- src/api.cc (revision 10944) |
+++ src/api.cc (working copy) |
@@ -1429,7 +1429,7 @@ |
ScriptData* ScriptData::PreCompile(const char* input, int length) { |
- i::Utf8ToUC16CharacterStream stream( |
+ i::Utf8ToUtf16CharacterStream stream( |
reinterpret_cast<const unsigned char*>(input), length); |
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping); |
} |
@@ -1438,11 +1438,11 @@ |
ScriptData* ScriptData::PreCompile(v8::Handle<String> source) { |
i::Handle<i::String> str = Utils::OpenHandle(*source); |
if (str->IsExternalTwoByteString()) { |
- i::ExternalTwoByteStringUC16CharacterStream stream( |
+ i::ExternalTwoByteStringUtf16CharacterStream stream( |
i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length()); |
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping); |
} else { |
- i::GenericStringUC16CharacterStream stream(str, 0, str->length()); |
+ i::GenericStringUtf16CharacterStream stream(str, 0, str->length()); |
return i::ParserApi::PreParse(&stream, NULL, i::FLAG_harmony_scoping); |
} |
} |
@@ -3689,7 +3689,13 @@ |
int String::Utf8Length() const { |
i::Handle<i::String> str = Utils::OpenHandle(this); |
if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0; |
- return str->Utf8Length(); |
+ int length = str->Utf8Length(); |
+ if (length < 0) { |
+ FlattenString(str); |
+ length = str->Utf8Length(); |
+ } |
+ ASSERT(length >= 0); |
+ return length; |
} |
@@ -3735,11 +3741,13 @@ |
int i; |
int pos = 0; |
int nchars = 0; |
+ int previous = unibrow::Utf8::kNoPreviousCharacter; |
for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) { |
i::uc32 c = write_input_buffer.GetNext(); |
- int written = unibrow::Utf8::Encode(buffer + pos, c); |
+ int written = unibrow::Utf8::Encode(buffer + pos, c, previous); |
pos += written; |
nchars++; |
+ previous = c; |
} |
if (i < len) { |
// For the last characters we need to check the length for each one |
@@ -3748,16 +3756,34 @@ |
char intermediate[unibrow::Utf8::kMaxEncodedSize]; |
for (; i < len && pos < capacity; i++) { |
i::uc32 c = write_input_buffer.GetNext(); |
- int written = unibrow::Utf8::Encode(intermediate, c); |
- if (pos + written <= capacity) { |
- for (int j = 0; j < written; j++) |
- buffer[pos + j] = intermediate[j]; |
+ if (unibrow::Utf16::IsTrailSurrogate(c) && |
+ previous != unibrow::Utf8::kNoPreviousCharacter && |
+ unibrow::Utf16::IsLeadSurrogate(previous)) { |
+ // We can't use the intermediate buffer here because the encoding |
+ // of surrogate pairs is done under assumption that you can step |
+ // back and fix the UTF8 stream. Luckily we only need space for one |
+ // more byte, so there is always space. |
+ ASSERT(pos < capacity); |
+ int written = unibrow::Utf8::Encode(buffer + pos, c, previous); |
+ ASSERT(written == 1); |
pos += written; |
nchars++; |
} else { |
- // We've reached the end of the buffer |
- break; |
+ int written = |
+ unibrow::Utf8::Encode(intermediate, |
+ c, |
+ unibrow::Utf8::kNoPreviousCharacter); |
+ if (pos + written <= capacity) { |
+ for (int j = 0; j < written; j++) |
+ buffer[pos + j] = intermediate[j]; |
+ pos += written; |
+ nchars++; |
+ } else { |
+ // We've reached the end of the buffer |
+ break; |
+ } |
} |
+ previous = c; |
} |
} |
if (nchars_ref != NULL) *nchars_ref = nchars; |