Index: runtime/vm/unicode.cc |
diff --git a/runtime/vm/unicode.cc b/runtime/vm/unicode.cc |
index 3129a06787edb5cbc96dee61444c63c96a862cc1..25e18e897badb2317ec71da8e6017498b49fe535 100644 |
--- a/runtime/vm/unicode.cc |
+++ b/runtime/vm/unicode.cc |
@@ -1,4 +1,4 @@ |
-// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
+// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
// for details. All rights reserved. Use of this source code is governed by a |
// BSD-style license that can be found in the LICENSE file. |
@@ -59,7 +59,7 @@ static bool IsTrailByte(uint8_t code_unit) { |
static bool IsLatin1SequenceStart(uint8_t code_unit) { |
- // Check is codepoint is <= U+00FF |
+ // Check is codepoint is <= U+00FF. |
return (code_unit <= Utf8::kMaxOneByteChar); |
} |
@@ -108,7 +108,8 @@ intptr_t Utf8::CodePointCount(const uint8_t* utf8_array, |
// Returns true if str is a valid NUL-terminated UTF-8 string. |
-bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) { |
+static bool IsValidUtf8( |
+ const uint8_t* utf8_array, intptr_t array_len, bool allow_surrogates) { |
intptr_t i = 0; |
while (i < array_len) { |
uint32_t ch = utf8_array[i] & 0xFF; |
@@ -130,7 +131,7 @@ bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) { |
(j == num_trail_bytes) && |
!IsOutOfRange(ch) && |
!IsNonShortestForm(ch, j) && |
- !Utf16::IsSurrogate(ch))) { |
+ (!Utf16::IsSurrogate(ch) || allow_surrogates))) { |
return false; |
} |
} |
@@ -140,6 +141,17 @@ bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) { |
} |
+bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) { |
+ return IsValidUtf8(utf8_array, array_len, false); |
+} |
+ |
+ |
+bool Utf8::IsValidAllowSurrogates( |
+ const uint8_t* utf8_array, intptr_t array_len) { |
+ return IsValidUtf8(utf8_array, array_len, true); |
+} |
+ |
+ |
intptr_t Utf8::Length(int32_t ch) { |
if (ch <= kMaxOneByteChar) { |
return 1; |
@@ -206,9 +218,10 @@ intptr_t Utf8::Encode(const String& src, char* dst, intptr_t len) { |
} |
-intptr_t Utf8::Decode(const uint8_t* utf8_array, |
- intptr_t array_len, |
- int32_t* dst) { |
+static intptr_t DecodeUTF8(const uint8_t* utf8_array, |
+ intptr_t array_len, |
+ int32_t* dst, |
+ bool allow_surrogates) { |
uint32_t ch = utf8_array[0] & 0xFF; |
intptr_t i = 1; |
if (ch >= 0x80) { |
@@ -229,7 +242,7 @@ intptr_t Utf8::Decode(const uint8_t* utf8_array, |
(i == num_trail_bytes) && |
!IsOutOfRange(ch) && |
!IsNonShortestForm(ch, i) && |
- !Utf16::IsSurrogate(ch))) { |
+ (!Utf16::IsSurrogate(ch) || allow_surrogates))) { |
*dst = -1; |
return 0; |
} |
@@ -239,6 +252,20 @@ intptr_t Utf8::Decode(const uint8_t* utf8_array, |
} |
+intptr_t Utf8::Decode(const uint8_t* utf8_array, |
+ intptr_t array_len, |
+ int32_t* dst) { |
+ return DecodeUTF8(utf8_array, array_len, dst, false); |
+} |
+ |
+ |
+intptr_t Utf8::DecodeAllowSurrogates(const uint8_t* utf8_array, |
+ intptr_t array_len, |
+ int32_t* dst) { |
+ return DecodeUTF8(utf8_array, array_len, dst, true); |
+} |
+ |
+ |
bool Utf8::DecodeToLatin1(const uint8_t* utf8_array, |
intptr_t array_len, |
uint8_t* dst, |
@@ -251,31 +278,33 @@ bool Utf8::DecodeToLatin1(const uint8_t* utf8_array, |
ASSERT(IsLatin1SequenceStart(utf8_array[i])); |
num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
if (ch == -1) { |
- return false; // invalid input |
+ return false; // Invalid input. |
} |
ASSERT(ch <= 0xff); |
dst[j] = ch; |
} |
if ((i < array_len) && (j == len)) { |
- return false; // output overflow |
+ return false; // Output overflow. |
} |
- return true; // success |
+ return true; // Success. |
} |
-bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, |
- intptr_t array_len, |
- uint16_t* dst, |
- intptr_t len) { |
+bool DecodeUTF8ToUTF16(const uint8_t* utf8_array, |
+ intptr_t array_len, |
+ uint16_t* dst, |
+ intptr_t len, |
+ bool allow_surrogates) { |
intptr_t i = 0; |
intptr_t j = 0; |
intptr_t num_bytes; |
for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
int32_t ch; |
bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); |
- num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
+ num_bytes = DecodeUTF8( |
+ &utf8_array[i], (array_len - i), &ch, allow_surrogates); |
if (ch == -1) { |
- return false; // invalid input |
+ return false; // Invalid input. |
} |
if (is_supplementary) { |
Utf16::Encode(ch, &dst[j]); |
@@ -285,9 +314,25 @@ bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, |
} |
} |
if ((i < array_len) && (j == len)) { |
- return false; // output overflow |
+ return false; // Output overflow. |
} |
- return true; // success |
+ return true; // Success. |
+} |
+ |
+ |
+bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, |
+ intptr_t array_len, |
+ uint16_t* dst, |
+ intptr_t len) { |
+ return DecodeUTF8ToUTF16(utf8_array, array_len, dst, len, false); |
+} |
+ |
+ |
+bool Utf8::DecodeToUTF16AllowSurrogates(const uint8_t* utf8_array, |
+ intptr_t array_len, |
+ uint16_t* dst, |
+ intptr_t len) { |
+ return DecodeUTF8ToUTF16(utf8_array, array_len, dst, len, true); |
} |
@@ -302,14 +347,14 @@ bool Utf8::DecodeToUTF32(const uint8_t* utf8_array, |
int32_t ch; |
num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
if (ch == -1) { |
- return false; // invalid input |
+ return false; // Invalid input. |
} |
dst[j] = ch; |
} |
if ((i < array_len) && (j == len)) { |
- return false; // output overflow |
+ return false; // Output overflow. |
} |
- return true; // success |
+ return true; // Success. |
} |