Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(422)

Side by Side Diff: runtime/vm/unicode.cc

Issue 11280150: Add support for surrogates when serializing and deserializing for native ports (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Use iterator reset Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/unicode.h" 5 #include "vm/unicode.h"
6 6
7 #include "vm/allocation.h" 7 #include "vm/allocation.h"
8 #include "vm/globals.h" 8 #include "vm/globals.h"
9 #include "vm/object.h" 9 #include "vm/object.h"
10 10
11 namespace dart { 11 namespace dart {
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
52 0xFFFFFFFF 52 0xFFFFFFFF
53 }; 53 };
54 54
55 55
56 static bool IsTrailByte(uint8_t code_unit) { 56 static bool IsTrailByte(uint8_t code_unit) {
57 return (code_unit & 0xc0) == 0x80; 57 return (code_unit & 0xc0) == 0x80;
58 } 58 }
59 59
60 60
61 static bool IsLatin1SequenceStart(uint8_t code_unit) { 61 static bool IsLatin1SequenceStart(uint8_t code_unit) {
62 // Check is codepoint is <= U+00FF 62 // Check is codepoint is <= U+00FF.
siva 2012/11/28 18:22:46 Check if codepoint is ...
Søren Gjesse 2012/11/29 09:06:14 Done.
63 return (code_unit <= Utf8::kMaxOneByteChar); 63 return (code_unit <= Utf8::kMaxOneByteChar);
64 } 64 }
65 65
66 66
67 static bool IsSupplementarySequenceStart(uint8_t code_unit) { 67 static bool IsSupplementarySequenceStart(uint8_t code_unit) {
68 // Check is codepoint is >= U+10000. 68 // Check is codepoint is >= U+10000.
69 return (code_unit >= 0xF0); 69 return (code_unit >= 0xF0);
70 } 70 }
71 71
72 72
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
122 is_malformed |= !IsTrailByte(code_unit); 122 is_malformed |= !IsTrailByte(code_unit);
123 ch = (ch << 6) + code_unit; 123 ch = (ch << 6) + code_unit;
124 } else { 124 } else {
125 return false; 125 return false;
126 } 126 }
127 } 127 }
128 ch -= kMagicBits[num_trail_bytes]; 128 ch -= kMagicBits[num_trail_bytes];
129 if (!((is_malformed == false) && 129 if (!((is_malformed == false) &&
130 (j == num_trail_bytes) && 130 (j == num_trail_bytes) &&
131 !IsOutOfRange(ch) && 131 !IsOutOfRange(ch) &&
132 !IsNonShortestForm(ch, j) && 132 !IsNonShortestForm(ch, j))) {
133 !Utf16::IsSurrogate(ch))) {
134 return false; 133 return false;
135 } 134 }
136 } 135 }
137 i += j; 136 i += j;
138 } 137 }
139 return true; 138 return true;
140 } 139 }
141 140
142 141
143 intptr_t Utf8::Length(int32_t ch) { 142 intptr_t Utf8::Length(int32_t ch) {
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
221 ch = (ch << 6) + code_unit; 220 ch = (ch << 6) + code_unit;
222 } else { 221 } else {
223 *dst = -1; 222 *dst = -1;
224 return 0; 223 return 0;
225 } 224 }
226 } 225 }
227 ch -= kMagicBits[num_trail_bytes]; 226 ch -= kMagicBits[num_trail_bytes];
228 if (!((is_malformed == false) && 227 if (!((is_malformed == false) &&
229 (i == num_trail_bytes) && 228 (i == num_trail_bytes) &&
230 !IsOutOfRange(ch) && 229 !IsOutOfRange(ch) &&
231 !IsNonShortestForm(ch, i) && 230 !IsNonShortestForm(ch, i))) {
232 !Utf16::IsSurrogate(ch))) {
233 *dst = -1; 231 *dst = -1;
234 return 0; 232 return 0;
235 } 233 }
236 } 234 }
237 *dst = ch; 235 *dst = ch;
238 return i; 236 return i;
239 } 237 }
240 238
241 239
242 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array, 240 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array,
243 intptr_t array_len, 241 intptr_t array_len,
244 uint8_t* dst, 242 uint8_t* dst,
245 intptr_t len) { 243 intptr_t len) {
246 intptr_t i = 0; 244 intptr_t i = 0;
247 intptr_t j = 0; 245 intptr_t j = 0;
248 intptr_t num_bytes; 246 intptr_t num_bytes;
249 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { 247 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {
250 int32_t ch; 248 int32_t ch;
251 ASSERT(IsLatin1SequenceStart(utf8_array[i])); 249 ASSERT(IsLatin1SequenceStart(utf8_array[i]));
252 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); 250 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
253 if (ch == -1) { 251 if (ch == -1) {
254 return false; // invalid input 252 return false; // Invalid input.
255 } 253 }
256 ASSERT(ch <= 0xff); 254 ASSERT(ch <= 0xff);
257 dst[j] = ch; 255 dst[j] = ch;
258 } 256 }
259 if ((i < array_len) && (j == len)) { 257 if ((i < array_len) && (j == len)) {
260 return false; // output overflow 258 return false; // Output overflow.
261 } 259 }
262 return true; // success 260 return true; // Success.
263 } 261 }
264 262
265 263
266 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, 264 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array,
267 intptr_t array_len, 265 intptr_t array_len,
268 uint16_t* dst, 266 uint16_t* dst,
269 intptr_t len) { 267 intptr_t len) {
270 intptr_t i = 0; 268 intptr_t i = 0;
271 intptr_t j = 0; 269 intptr_t j = 0;
272 intptr_t num_bytes; 270 intptr_t num_bytes;
273 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { 271 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {
274 int32_t ch; 272 int32_t ch;
275 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); 273 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);
276 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); 274 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
277 if (ch == -1) { 275 if (ch == -1) {
278 return false; // invalid input 276 return false; // Invalid input.
279 } 277 }
280 if (is_supplementary) { 278 if (is_supplementary) {
281 Utf16::Encode(ch, &dst[j]); 279 Utf16::Encode(ch, &dst[j]);
282 j = j + 1; 280 j = j + 1;
283 } else { 281 } else {
284 dst[j] = ch; 282 dst[j] = ch;
285 } 283 }
286 } 284 }
287 if ((i < array_len) && (j == len)) { 285 if ((i < array_len) && (j == len)) {
288 return false; // output overflow 286 return false; // Output overflow.
289 } 287 }
290 return true; // success 288 return true; // Success.
291 } 289 }
292 290
293 291
294 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array, 292 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array,
295 intptr_t array_len, 293 intptr_t array_len,
296 int32_t* dst, 294 int32_t* dst,
297 intptr_t len) { 295 intptr_t len) {
298 intptr_t i = 0; 296 intptr_t i = 0;
299 intptr_t j = 0; 297 intptr_t j = 0;
300 intptr_t num_bytes; 298 intptr_t num_bytes;
301 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { 299 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {
302 int32_t ch; 300 int32_t ch;
303 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); 301 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
304 if (ch == -1) { 302 if (ch == -1) {
305 return false; // invalid input 303 return false; // Invalid input.
306 } 304 }
307 dst[j] = ch; 305 dst[j] = ch;
308 } 306 }
309 if ((i < array_len) && (j == len)) { 307 if ((i < array_len) && (j == len)) {
310 return false; // output overflow 308 return false; // Output overflow.
311 } 309 }
312 return true; // success 310 return true; // Success.
313 } 311 }
314 312
315 313
316 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { 314 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {
317 ASSERT(codepoint > kMaxBmpCodepoint); 315 ASSERT(codepoint > kMaxBmpCodepoint);
318 ASSERT(dst != NULL); 316 ASSERT(dst != NULL);
319 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); 317 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));
320 dst[1] = (0xDC00 + (codepoint & 0x3FF)); 318 dst[1] = (0xDC00 + (codepoint & 0x3FF));
321 } 319 }
322 320
321
322 bool Utf16::CodePointIterator::Next() {
323 ASSERT(index_ >= -1);
324 ASSERT(index_ < array_len_);
325 int d = Length(ch_);
326 if (index_ == (array_len_ - d)) {
327 return false;
328 }
329 index_ += d;
330 ch_ = utf16_array_[index_];
331 if (IsLeadSurrogate(ch_) && (index_ != (array_len_ - 1))) {
332 int32_t ch2 = utf16_array_[index_ + 1];
333 if (IsTrailSurrogate(ch2)) {
334 ch_ = Decode(ch_, ch2);
335 }
336 }
337 return true;
338 }
339
323 } // namespace dart 340 } // namespace dart
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698