| Index: runtime/vm/unicode.cc
|
| diff --git a/runtime/vm/unicode.cc b/runtime/vm/unicode.cc
|
| index 3129a06787edb5cbc96dee61444c63c96a862cc1..25e18e897badb2317ec71da8e6017498b49fe535 100644
|
| --- a/runtime/vm/unicode.cc
|
| +++ b/runtime/vm/unicode.cc
|
| @@ -1,4 +1,4 @@
|
| -// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
|
| +// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
|
| // for details. All rights reserved. Use of this source code is governed by a
|
| // BSD-style license that can be found in the LICENSE file.
|
|
|
| @@ -59,7 +59,7 @@ static bool IsTrailByte(uint8_t code_unit) {
|
|
|
|
|
| static bool IsLatin1SequenceStart(uint8_t code_unit) {
|
| - // Check is codepoint is <= U+00FF
|
| + // Check is codepoint is <= U+00FF.
|
| return (code_unit <= Utf8::kMaxOneByteChar);
|
| }
|
|
|
| @@ -108,7 +108,8 @@ intptr_t Utf8::CodePointCount(const uint8_t* utf8_array,
|
|
|
|
|
| // Returns true if str is a valid NUL-terminated UTF-8 string.
|
| -bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {
|
| +static bool IsValidUtf8(
|
| + const uint8_t* utf8_array, intptr_t array_len, bool allow_surrogates) {
|
| intptr_t i = 0;
|
| while (i < array_len) {
|
| uint32_t ch = utf8_array[i] & 0xFF;
|
| @@ -130,7 +131,7 @@ bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {
|
| (j == num_trail_bytes) &&
|
| !IsOutOfRange(ch) &&
|
| !IsNonShortestForm(ch, j) &&
|
| - !Utf16::IsSurrogate(ch))) {
|
| + (!Utf16::IsSurrogate(ch) || allow_surrogates))) {
|
| return false;
|
| }
|
| }
|
| @@ -140,6 +141,17 @@ bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {
|
| }
|
|
|
|
|
| +bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {
|
| + return IsValidUtf8(utf8_array, array_len, false);
|
| +}
|
| +
|
| +
|
| +bool Utf8::IsValidAllowSurrogates(
|
| + const uint8_t* utf8_array, intptr_t array_len) {
|
| + return IsValidUtf8(utf8_array, array_len, true);
|
| +}
|
| +
|
| +
|
| intptr_t Utf8::Length(int32_t ch) {
|
| if (ch <= kMaxOneByteChar) {
|
| return 1;
|
| @@ -206,9 +218,10 @@ intptr_t Utf8::Encode(const String& src, char* dst, intptr_t len) {
|
| }
|
|
|
|
|
| -intptr_t Utf8::Decode(const uint8_t* utf8_array,
|
| - intptr_t array_len,
|
| - int32_t* dst) {
|
| +static intptr_t DecodeUTF8(const uint8_t* utf8_array,
|
| + intptr_t array_len,
|
| + int32_t* dst,
|
| + bool allow_surrogates) {
|
| uint32_t ch = utf8_array[0] & 0xFF;
|
| intptr_t i = 1;
|
| if (ch >= 0x80) {
|
| @@ -229,7 +242,7 @@ intptr_t Utf8::Decode(const uint8_t* utf8_array,
|
| (i == num_trail_bytes) &&
|
| !IsOutOfRange(ch) &&
|
| !IsNonShortestForm(ch, i) &&
|
| - !Utf16::IsSurrogate(ch))) {
|
| + (!Utf16::IsSurrogate(ch) || allow_surrogates))) {
|
| *dst = -1;
|
| return 0;
|
| }
|
| @@ -239,6 +252,20 @@ intptr_t Utf8::Decode(const uint8_t* utf8_array,
|
| }
|
|
|
|
|
| +intptr_t Utf8::Decode(const uint8_t* utf8_array,
|
| + intptr_t array_len,
|
| + int32_t* dst) {
|
| + return DecodeUTF8(utf8_array, array_len, dst, false);
|
| +}
|
| +
|
| +
|
| +intptr_t Utf8::DecodeAllowSurrogates(const uint8_t* utf8_array,
|
| + intptr_t array_len,
|
| + int32_t* dst) {
|
| + return DecodeUTF8(utf8_array, array_len, dst, true);
|
| +}
|
| +
|
| +
|
| bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,
|
| intptr_t array_len,
|
| uint8_t* dst,
|
| @@ -251,31 +278,33 @@ bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,
|
| ASSERT(IsLatin1SequenceStart(utf8_array[i]));
|
| num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
|
| if (ch == -1) {
|
| - return false; // invalid input
|
| + return false; // Invalid input.
|
| }
|
| ASSERT(ch <= 0xff);
|
| dst[j] = ch;
|
| }
|
| if ((i < array_len) && (j == len)) {
|
| - return false; // output overflow
|
| + return false; // Output overflow.
|
| }
|
| - return true; // success
|
| + return true; // Success.
|
| }
|
|
|
|
|
| -bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,
|
| - intptr_t array_len,
|
| - uint16_t* dst,
|
| - intptr_t len) {
|
| +bool DecodeUTF8ToUTF16(const uint8_t* utf8_array,
|
| + intptr_t array_len,
|
| + uint16_t* dst,
|
| + intptr_t len,
|
| + bool allow_surrogates) {
|
| intptr_t i = 0;
|
| intptr_t j = 0;
|
| intptr_t num_bytes;
|
| for (; (i < array_len) && (j < len); i += num_bytes, ++j) {
|
| int32_t ch;
|
| bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);
|
| - num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
|
| + num_bytes = DecodeUTF8(
|
| + &utf8_array[i], (array_len - i), &ch, allow_surrogates);
|
| if (ch == -1) {
|
| - return false; // invalid input
|
| + return false; // Invalid input.
|
| }
|
| if (is_supplementary) {
|
| Utf16::Encode(ch, &dst[j]);
|
| @@ -285,9 +314,25 @@ bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,
|
| }
|
| }
|
| if ((i < array_len) && (j == len)) {
|
| - return false; // output overflow
|
| + return false; // Output overflow.
|
| }
|
| - return true; // success
|
| + return true; // Success.
|
| +}
|
| +
|
| +
|
| +bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,
|
| + intptr_t array_len,
|
| + uint16_t* dst,
|
| + intptr_t len) {
|
| + return DecodeUTF8ToUTF16(utf8_array, array_len, dst, len, false);
|
| +}
|
| +
|
| +
|
| +bool Utf8::DecodeToUTF16AllowSurrogates(const uint8_t* utf8_array,
|
| + intptr_t array_len,
|
| + uint16_t* dst,
|
| + intptr_t len) {
|
| + return DecodeUTF8ToUTF16(utf8_array, array_len, dst, len, true);
|
| }
|
|
|
|
|
| @@ -302,14 +347,14 @@ bool Utf8::DecodeToUTF32(const uint8_t* utf8_array,
|
| int32_t ch;
|
| num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
|
| if (ch == -1) {
|
| - return false; // invalid input
|
| + return false; // Invalid input.
|
| }
|
| dst[j] = ch;
|
| }
|
| if ((i < array_len) && (j == len)) {
|
| - return false; // output overflow
|
| + return false; // Output overflow.
|
| }
|
| - return true; // success
|
| + return true; // Success.
|
| }
|
|
|
|
|
|
|