Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(470)

Unified Diff: runtime/vm/unicode.cc

Issue 11280150: Add support for surrogates when serializing and deserializing for native ports (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« runtime/vm/snapshot_test.cc ('K') | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: runtime/vm/unicode.cc
diff --git a/runtime/vm/unicode.cc b/runtime/vm/unicode.cc
index 3129a06787edb5cbc96dee61444c63c96a862cc1..25e18e897badb2317ec71da8e6017498b49fe535 100644
--- a/runtime/vm/unicode.cc
+++ b/runtime/vm/unicode.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
+// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
@@ -59,7 +59,7 @@ static bool IsTrailByte(uint8_t code_unit) {
static bool IsLatin1SequenceStart(uint8_t code_unit) {
- // Check is codepoint is <= U+00FF
+ // Check is codepoint is <= U+00FF.
return (code_unit <= Utf8::kMaxOneByteChar);
}
@@ -108,7 +108,8 @@ intptr_t Utf8::CodePointCount(const uint8_t* utf8_array,
// Returns true if str is a valid NUL-terminated UTF-8 string.
-bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {
+static bool IsValidUtf8(
+ const uint8_t* utf8_array, intptr_t array_len, bool allow_surrogates) {
intptr_t i = 0;
while (i < array_len) {
uint32_t ch = utf8_array[i] & 0xFF;
@@ -130,7 +131,7 @@ bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {
(j == num_trail_bytes) &&
!IsOutOfRange(ch) &&
!IsNonShortestForm(ch, j) &&
- !Utf16::IsSurrogate(ch))) {
+ (!Utf16::IsSurrogate(ch) || allow_surrogates))) {
return false;
}
}
@@ -140,6 +141,17 @@ bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {
}
+bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) {
+ return IsValidUtf8(utf8_array, array_len, false);
+}
+
+
+bool Utf8::IsValidAllowSurrogates(
+ const uint8_t* utf8_array, intptr_t array_len) {
+ return IsValidUtf8(utf8_array, array_len, true);
+}
+
+
intptr_t Utf8::Length(int32_t ch) {
if (ch <= kMaxOneByteChar) {
return 1;
@@ -206,9 +218,10 @@ intptr_t Utf8::Encode(const String& src, char* dst, intptr_t len) {
}
-intptr_t Utf8::Decode(const uint8_t* utf8_array,
- intptr_t array_len,
- int32_t* dst) {
+static intptr_t DecodeUTF8(const uint8_t* utf8_array,
+ intptr_t array_len,
+ int32_t* dst,
+ bool allow_surrogates) {
uint32_t ch = utf8_array[0] & 0xFF;
intptr_t i = 1;
if (ch >= 0x80) {
@@ -229,7 +242,7 @@ intptr_t Utf8::Decode(const uint8_t* utf8_array,
(i == num_trail_bytes) &&
!IsOutOfRange(ch) &&
!IsNonShortestForm(ch, i) &&
- !Utf16::IsSurrogate(ch))) {
+ (!Utf16::IsSurrogate(ch) || allow_surrogates))) {
*dst = -1;
return 0;
}
@@ -239,6 +252,20 @@ intptr_t Utf8::Decode(const uint8_t* utf8_array,
}
+intptr_t Utf8::Decode(const uint8_t* utf8_array,
+ intptr_t array_len,
+ int32_t* dst) {
+ return DecodeUTF8(utf8_array, array_len, dst, false);
+}
+
+
+intptr_t Utf8::DecodeAllowSurrogates(const uint8_t* utf8_array,
+ intptr_t array_len,
+ int32_t* dst) {
+ return DecodeUTF8(utf8_array, array_len, dst, true);
+}
+
+
bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,
intptr_t array_len,
uint8_t* dst,
@@ -251,31 +278,33 @@ bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,
ASSERT(IsLatin1SequenceStart(utf8_array[i]));
num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
if (ch == -1) {
- return false; // invalid input
+ return false; // Invalid input.
}
ASSERT(ch <= 0xff);
dst[j] = ch;
}
if ((i < array_len) && (j == len)) {
- return false; // output overflow
+ return false; // Output overflow.
}
- return true; // success
+ return true; // Success.
}
-bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,
- intptr_t array_len,
- uint16_t* dst,
- intptr_t len) {
+bool DecodeUTF8ToUTF16(const uint8_t* utf8_array,
+ intptr_t array_len,
+ uint16_t* dst,
+ intptr_t len,
+ bool allow_surrogates) {
intptr_t i = 0;
intptr_t j = 0;
intptr_t num_bytes;
for (; (i < array_len) && (j < len); i += num_bytes, ++j) {
int32_t ch;
bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);
- num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
+ num_bytes = DecodeUTF8(
+ &utf8_array[i], (array_len - i), &ch, allow_surrogates);
if (ch == -1) {
- return false; // invalid input
+ return false; // Invalid input.
}
if (is_supplementary) {
Utf16::Encode(ch, &dst[j]);
@@ -285,9 +314,25 @@ bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,
}
}
if ((i < array_len) && (j == len)) {
- return false; // output overflow
+ return false; // Output overflow.
}
- return true; // success
+ return true; // Success.
+}
+
+
+bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,
+ intptr_t array_len,
+ uint16_t* dst,
+ intptr_t len) {
+ return DecodeUTF8ToUTF16(utf8_array, array_len, dst, len, false);
+}
+
+
+bool Utf8::DecodeToUTF16AllowSurrogates(const uint8_t* utf8_array,
+ intptr_t array_len,
+ uint16_t* dst,
+ intptr_t len) {
+ return DecodeUTF8ToUTF16(utf8_array, array_len, dst, len, true);
}
@@ -302,14 +347,14 @@ bool Utf8::DecodeToUTF32(const uint8_t* utf8_array,
int32_t ch;
num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
if (ch == -1) {
- return false; // invalid input
+ return false; // Invalid input.
}
dst[j] = ch;
}
if ((i < array_len) && (j == len)) {
- return false; // output overflow
+ return false; // Output overflow.
}
- return true; // success
+ return true; // Success.
}
« runtime/vm/snapshot_test.cc ('K') | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698