OLD | NEW |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/unicode.h" | 5 #include "vm/unicode.h" |
6 | 6 |
7 #include "vm/allocation.h" | 7 #include "vm/allocation.h" |
8 #include "vm/globals.h" | 8 #include "vm/globals.h" |
9 #include "vm/object.h" | 9 #include "vm/object.h" |
10 | 10 |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
51 0xFFFFFFFF, | 51 0xFFFFFFFF, |
52 0xFFFFFFFF | 52 0xFFFFFFFF |
53 }; | 53 }; |
54 | 54 |
55 | 55 |
56 static bool IsTrailByte(uint8_t code_unit) { | 56 static bool IsTrailByte(uint8_t code_unit) { |
57 return (code_unit & 0xc0) == 0x80; | 57 return (code_unit & 0xc0) == 0x80; |
58 } | 58 } |
59 | 59 |
60 | 60 |
61 static bool IsAsciiSequenceStart(uint8_t code_unit) { | 61 static bool IsLatin1SequenceStart(uint8_t code_unit) { |
62 // Check is codepoint is <= U+007F | 62 // Check is codepoint is <= U+00FF |
63 return (code_unit <= Utf8::kMaxOneByteChar); | 63 return (code_unit <= Utf8::kMaxOneByteChar); |
64 } | 64 } |
65 | 65 |
66 | 66 |
67 static bool IsSmpSequenceStart(uint8_t code_unit) { | 67 static bool IsSmpSequenceStart(uint8_t code_unit) { |
68 // Check is codepoint is >= U+10000. | 68 // Check is codepoint is >= U+10000. |
69 return (code_unit >= 0xF0); | 69 return (code_unit >= 0xF0); |
70 } | 70 } |
71 | 71 |
72 | 72 |
(...skipping 21 matching lines...) Expand all Loading... |
94 dst[0] = (Utf8::kLeadOffset + (codepoint >> 10)); | 94 dst[0] = (Utf8::kLeadOffset + (codepoint >> 10)); |
95 dst[1] = (0xDC00 + (codepoint & 0x3FF)); | 95 dst[1] = (0xDC00 + (codepoint & 0x3FF)); |
96 } | 96 } |
97 | 97 |
98 | 98 |
99 // Returns a count of the number of UTF-8 trail bytes. | 99 // Returns a count of the number of UTF-8 trail bytes. |
100 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array, | 100 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array, |
101 intptr_t array_len, | 101 intptr_t array_len, |
102 Type* type) { | 102 Type* type) { |
103 intptr_t len = 0; | 103 intptr_t len = 0; |
104 Type char_type = kAscii; | 104 Type char_type = kLatin1; |
105 for (intptr_t i = 0; i < array_len; i++) { | 105 for (intptr_t i = 0; i < array_len; i++) { |
106 uint8_t code_unit = utf8_array[i]; | 106 uint8_t code_unit = utf8_array[i]; |
107 if (!IsTrailByte(code_unit)) { | 107 if (!IsTrailByte(code_unit)) { |
108 ++len; | 108 ++len; |
109 } | 109 } |
110 if (!IsAsciiSequenceStart(code_unit)) { // > U+007F | 110 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF |
111 if (IsSmpSequenceStart(code_unit)) { // >= U+10000 | 111 if (IsSmpSequenceStart(code_unit)) { // >= U+10000 |
112 char_type = kSMP; | 112 char_type = kSMP; |
113 ++len; | 113 ++len; |
114 } else if (char_type == kAscii) { | 114 } else if (char_type == kLatin1) { |
115 char_type = kBMP; | 115 char_type = kBMP; |
116 } | 116 } |
117 } | 117 } |
118 } | 118 } |
119 *type = char_type; | 119 *type = char_type; |
120 return len; | 120 return len; |
121 } | 121 } |
122 | 122 |
123 | 123 |
124 // Returns true if str is a valid NUL-terminated UTF-8 string. | 124 // Returns true if str is a valid NUL-terminated UTF-8 string. |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
244 !IsSurrogate(ch))) { | 244 !IsSurrogate(ch))) { |
245 *dst = -1; | 245 *dst = -1; |
246 return 0; | 246 return 0; |
247 } | 247 } |
248 } | 248 } |
249 *dst = ch; | 249 *dst = ch; |
250 return i; | 250 return i; |
251 } | 251 } |
252 | 252 |
253 | 253 |
254 bool Utf8::DecodeToAscii(const uint8_t* utf8_array, | 254 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array, |
255 intptr_t array_len, | 255 intptr_t array_len, |
256 uint8_t* dst, | 256 uint8_t* dst, |
257 intptr_t len) { | 257 intptr_t len) { |
258 if (len < array_len) { | 258 intptr_t i = 0; |
| 259 intptr_t j = 0; |
| 260 intptr_t num_bytes; |
| 261 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
| 262 int32_t ch; |
| 263 ASSERT(IsLatin1SequenceStart(utf8_array[i])); |
| 264 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
| 265 if (ch == -1) { |
| 266 return false; // invalid input |
| 267 } |
| 268 ASSERT(ch <= 0xff); |
| 269 dst[j] = ch; |
| 270 } |
| 271 if ((i < array_len) && (j == len)) { |
259 return false; // output overflow | 272 return false; // output overflow |
260 } | 273 } |
261 #ifdef DEBUG | |
262 for (intptr_t i = 0; i < array_len; i++) { | |
263 ASSERT(IsAsciiSequenceStart(utf8_array[i])); | |
264 } | |
265 #endif | |
266 memmove(dst, utf8_array, array_len); | |
267 return true; // success | 274 return true; // success |
268 } | 275 } |
269 | 276 |
270 | 277 |
271 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, | 278 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, |
272 intptr_t array_len, | 279 intptr_t array_len, |
273 uint16_t* dst, | 280 uint16_t* dst, |
274 intptr_t len) { | 281 intptr_t len) { |
275 intptr_t i = 0; | 282 intptr_t i = 0; |
276 intptr_t j = 0; | 283 intptr_t j = 0; |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
311 } | 318 } |
312 dst[j] = ch; | 319 dst[j] = ch; |
313 } | 320 } |
314 if ((i < array_len) && (j == len)) { | 321 if ((i < array_len) && (j == len)) { |
315 return false; // output overflow | 322 return false; // output overflow |
316 } | 323 } |
317 return true; // success | 324 return true; // success |
318 } | 325 } |
319 | 326 |
320 } // namespace dart | 327 } // namespace dart |
OLD | NEW |