Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(809)

Side by Side Diff: runtime/vm/unicode.cc

Issue 11280150: Add support for surrogates when serializing and deserializing for native ports (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Rebased to r15579 Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/unicode.h" 5 #include "vm/unicode.h"
6 6
7 #include "vm/allocation.h" 7 #include "vm/allocation.h"
8 #include "vm/globals.h" 8 #include "vm/globals.h"
9 #include "vm/object.h" 9 #include "vm/object.h"
10 10
11 namespace dart { 11 namespace dart {
(...skipping 12 matching lines...) Expand all
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 26 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
27 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 27 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
28 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 28 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
29 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 29 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
30 }; 30 };
31 31
32 32
33 const uint32_t Utf8::kMagicBits[7] = { 33 const uint32_t Utf8::kMagicBits[7] = {
34 0, // padding 34 0, // Padding.
35 0x00000000, 35 0x00000000,
36 0x00003080, 36 0x00003080,
37 0x000E2080, 37 0x000E2080,
38 0x03C82080, 38 0x03C82080,
39 0xFA082080, 39 0xFA082080,
40 0x82082080 40 0x82082080
41 }; 41 };
42 42
43 43
44 // Minimum values of code points used to check shortest form. 44 // Minimum values of code points used to check shortest form.
45 const uint32_t Utf8::kOverlongMinimum[7] = { 45 const uint32_t Utf8::kOverlongMinimum[7] = {
46 0, // padding 46 0, // Padding.
47 0x0, 47 0x0,
48 0x80, 48 0x80,
49 0x800, 49 0x800,
50 0x10000, 50 0x10000,
51 0xFFFFFFFF, 51 0xFFFFFFFF,
52 0xFFFFFFFF 52 0xFFFFFFFF
53 }; 53 };
54 54
55 55
56 // Returns a count of the number of UTF-8 trail bytes. 56 // Returns a count of the number of UTF-8 trail bytes.
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
129 String::CodePointIterator it(str); 129 String::CodePointIterator it(str);
130 while (it.Next()) { 130 while (it.Next()) {
131 int32_t ch = it.Current(); 131 int32_t ch = it.Current();
132 length += Utf8::Length(ch); 132 length += Utf8::Length(ch);
133 } 133 }
134 return length; 134 return length;
135 } 135 }
136 136
137 137
138 intptr_t Utf8::Encode(int32_t ch, char* dst) { 138 intptr_t Utf8::Encode(int32_t ch, char* dst) {
139 ASSERT(!Utf16::IsSurrogate(ch));
139 static const int kMask = ~(1 << 6); 140 static const int kMask = ~(1 << 6);
140 if (ch <= kMaxOneByteChar) { 141 if (ch <= kMaxOneByteChar) {
141 dst[0] = ch; 142 dst[0] = ch;
142 return 1; 143 return 1;
143 } 144 }
144 if (ch <= kMaxTwoByteChar) { 145 if (ch <= kMaxTwoByteChar) {
145 dst[0] = 0xC0 | (ch >> 6); 146 dst[0] = 0xC0 | (ch >> 6);
146 dst[1] = 0x80 | (ch & kMask); 147 dst[1] = 0x80 | (ch & kMask);
147 return 2; 148 return 2;
148 } 149 }
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
215 uint8_t* dst, 216 uint8_t* dst,
216 intptr_t len) { 217 intptr_t len) {
217 intptr_t i = 0; 218 intptr_t i = 0;
218 intptr_t j = 0; 219 intptr_t j = 0;
219 intptr_t num_bytes; 220 intptr_t num_bytes;
220 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { 221 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {
221 int32_t ch; 222 int32_t ch;
222 ASSERT(IsLatin1SequenceStart(utf8_array[i])); 223 ASSERT(IsLatin1SequenceStart(utf8_array[i]));
223 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); 224 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
224 if (ch == -1) { 225 if (ch == -1) {
225 return false; // invalid input 226 return false; // Invalid input.
226 } 227 }
227 ASSERT(Utf::IsLatin1(ch)); 228 ASSERT(Utf::IsLatin1(ch));
228 dst[j] = ch; 229 dst[j] = ch;
229 } 230 }
230 if ((i < array_len) && (j == len)) { 231 if ((i < array_len) && (j == len)) {
231 return false; // output overflow 232 return false; // Output overflow.
232 } 233 }
233 return true; // success 234 return true; // Success.
234 } 235 }
235 236
236 237
237 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, 238 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,
238 intptr_t array_len, 239 intptr_t array_len,
239 uint16_t* dst, 240 uint16_t* dst,
240 intptr_t len) { 241 intptr_t len) {
241 intptr_t i = 0; 242 intptr_t i = 0;
242 intptr_t j = 0; 243 intptr_t j = 0;
243 intptr_t num_bytes; 244 intptr_t num_bytes;
244 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { 245 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {
245 int32_t ch; 246 int32_t ch;
246 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); 247 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);
247 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); 248 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
248 if (ch == -1) { 249 if (ch == -1) {
249 return false; // invalid input 250 return false; // Invalid input.
250 } 251 }
251 if (is_supplementary) { 252 if (is_supplementary) {
252 Utf16::Encode(ch, &dst[j]); 253 Utf16::Encode(ch, &dst[j]);
253 j = j + 1; 254 j = j + 1;
254 } else { 255 } else {
255 dst[j] = ch; 256 dst[j] = ch;
256 } 257 }
257 } 258 }
258 if ((i < array_len) && (j == len)) { 259 if ((i < array_len) && (j == len)) {
259 return false; // output overflow 260 return false; // Output overflow.
260 } 261 }
261 return true; // success 262 return true; // Success.
262 } 263 }
263 264
264 265
265 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array, 266 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array,
266 intptr_t array_len, 267 intptr_t array_len,
267 int32_t* dst, 268 int32_t* dst,
268 intptr_t len) { 269 intptr_t len) {
269 intptr_t i = 0; 270 intptr_t i = 0;
270 intptr_t j = 0; 271 intptr_t j = 0;
271 intptr_t num_bytes; 272 intptr_t num_bytes;
272 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { 273 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {
273 int32_t ch; 274 int32_t ch;
274 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); 275 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
275 if (ch == -1) { 276 if (ch == -1) {
276 return false; // invalid input 277 return false; // Invalid input.
277 } 278 }
278 dst[j] = ch; 279 dst[j] = ch;
279 } 280 }
280 if ((i < array_len) && (j == len)) { 281 if ((i < array_len) && (j == len)) {
281 return false; // output overflow 282 return false; // Output overflow.
282 } 283 }
283 return true; // success 284 return true; // Success.
284 } 285 }
285 286
286 287
287 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) { 288 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) {
288 ASSERT(str != NULL); 289 ASSERT(str != NULL);
289 intptr_t array_len = strlen(str); 290 intptr_t array_len = strlen(str);
290 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str); 291 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str);
291 return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len); 292 return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len);
292 } 293 }
293 294
294 295
295 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { 296 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {
296 ASSERT(codepoint > Utf16::kMaxCodeUnit); 297 ASSERT(codepoint > Utf16::kMaxCodeUnit);
297 ASSERT(dst != NULL); 298 ASSERT(dst != NULL);
298 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); 299 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));
299 dst[1] = (0xDC00 + (codepoint & 0x3FF)); 300 dst[1] = (0xDC00 + (codepoint & 0x3FF));
300 } 301 }
301 302
302 } // namespace dart 303 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/unicode.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698