runtime/vm/unicode.cc - Issue 11280150: Add support for surrogates when serializing and deserializing for native ports

Unified Diff: runtime/vm/unicode.cc

Issue 11280150: Add support for surrogates when serializing and deserializing for native ports (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Created 8 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: runtime/vm/unicode.cc

diff --git a/runtime/vm/unicode.cc b/runtime/vm/unicode.cc

index 3129a06787edb5cbc96dee61444c63c96a862cc1..25e18e897badb2317ec71da8e6017498b49fe535 100644

--- a/runtime/vm/unicode.cc

+++ b/runtime/vm/unicode.cc

@@ -1,4 +1,4 @@

// BSD-style license that can be found in the LICENSE file.

@@ -59,7 +59,7 @@ static bool IsTrailByte(uint8_t code_unit) {

static bool IsLatin1SequenceStart(uint8_t code_unit) {

- // Check is codepoint is <= U+00FF

+ // Check is codepoint is <= U+00FF.

return (code_unit <= Utf8::kMaxOneByteChar);

}

@@ -108,7 +108,8 @@ intptr_t Utf8::CodePointCount(const uint8_t* utf8_array,

// Returns true if str is a valid NUL-terminated UTF-8 string.

-bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {

+static bool IsValidUtf8(

+ const uint8_t* utf8_array, intptr_t array_len, bool allow_surrogates) {

intptr_t i = 0;

while (i < array_len) {

uint32_t ch = utf8_array[i] & 0xFF;

@@ -130,7 +131,7 @@ bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {

(j == num_trail_bytes) &&

!IsOutOfRange(ch) &&

!IsNonShortestForm(ch, j) &&

- !Utf16::IsSurrogate(ch))) {

+ (!Utf16::IsSurrogate(ch) || allow_surrogates))) {

return false;

}

@@ -140,6 +141,17 @@ bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {

}

+bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {

+ return IsValidUtf8(utf8_array, array_len, false);

+bool Utf8::IsValidAllowSurrogates(

+ const uint8_t* utf8_array, intptr_t array_len) {

+ return IsValidUtf8(utf8_array, array_len, true);

intptr_t Utf8::Length(int32_t ch) {

if (ch <= kMaxOneByteChar) {

return 1;

@@ -206,9 +218,10 @@ intptr_t Utf8::Encode(const String& src, char* dst, intptr_t len) {

}

-intptr_t Utf8::Decode(const uint8_t* utf8_array,

- intptr_t array_len,

- int32_t* dst) {

+static intptr_t DecodeUTF8(const uint8_t* utf8_array,

+ intptr_t array_len,

+ int32_t* dst,

+ bool allow_surrogates) {

uint32_t ch = utf8_array[0] & 0xFF;

intptr_t i = 1;

if (ch >= 0x80) {

@@ -229,7 +242,7 @@ intptr_t Utf8::Decode(const uint8_t* utf8_array,

(i == num_trail_bytes) &&

!IsOutOfRange(ch) &&

!IsNonShortestForm(ch, i) &&

- !Utf16::IsSurrogate(ch))) {

+ (!Utf16::IsSurrogate(ch) || allow_surrogates))) {

*dst = -1;

return 0;

}

@@ -239,6 +252,20 @@ intptr_t Utf8::Decode(const uint8_t* utf8_array,

}

+intptr_t Utf8::Decode(const uint8_t* utf8_array,

+ intptr_t array_len,

+ int32_t* dst) {

+ return DecodeUTF8(utf8_array, array_len, dst, false);

+intptr_t Utf8::DecodeAllowSurrogates(const uint8_t* utf8_array,

+ intptr_t array_len,

+ int32_t* dst) {

+ return DecodeUTF8(utf8_array, array_len, dst, true);

bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,

intptr_t array_len,

uint8_t* dst,

@@ -251,31 +278,33 @@ bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,

ASSERT(IsLatin1SequenceStart(utf8_array[i]));

num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

if (ch == -1) {

- return false; // invalid input

+ return false; // Invalid input.

}

ASSERT(ch <= 0xff);

dst[j] = ch;

}

if ((i < array_len) && (j == len)) {

- return false; // output overflow

+ return false; // Output overflow.

}

- return true; // success

+ return true; // Success.

}

-bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,

- intptr_t array_len,

- uint16_t* dst,

- intptr_t len) {

+bool DecodeUTF8ToUTF16(const uint8_t* utf8_array,

+ intptr_t array_len,

+ uint16_t* dst,

+ intptr_t len,

+ bool allow_surrogates) {

intptr_t i = 0;

intptr_t j = 0;

intptr_t num_bytes;

for (; (i < array_len) && (j < len); i += num_bytes, ++j) {

int32_t ch;

bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);

- num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

+ num_bytes = DecodeUTF8(

+ &utf8_array[i], (array_len - i), &ch, allow_surrogates);

if (ch == -1) {

- return false; // invalid input

+ return false; // Invalid input.

}

if (is_supplementary) {

Utf16::Encode(ch, &dst[j]);

@@ -285,9 +314,25 @@ bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,

}

if ((i < array_len) && (j == len)) {

- return false; // output overflow

+ return false; // Output overflow.

}

- return true; // success

+ return true; // Success.

+bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,

+ intptr_t array_len,

+ uint16_t* dst,

+ intptr_t len) {

+ return DecodeUTF8ToUTF16(utf8_array, array_len, dst, len, false);

+bool Utf8::DecodeToUTF16AllowSurrogates(const uint8_t* utf8_array,

+ intptr_t array_len,

+ uint16_t* dst,

+ intptr_t len) {

+ return DecodeUTF8ToUTF16(utf8_array, array_len, dst, len, true);

}

@@ -302,14 +347,14 @@ bool Utf8::DecodeToUTF32(const uint8_t* utf8_array,

int32_t ch;

num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);

if (ch == -1) {

- return false; // invalid input

+ return false; // Invalid input.

}

dst[j] = ch;

}

if ((i < array_len) && (j == len)) {

- return false; // output overflow

+ return false; // Output overflow.

}

- return true; // success

+ return true; // Success.

}

« runtime/vm/snapshot_test.cc ('K') | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »