OLD | NEW |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/unicode.h" | 5 #include "vm/unicode.h" |
6 | 6 |
7 #include "vm/allocation.h" | 7 #include "vm/allocation.h" |
8 #include "vm/globals.h" | 8 #include "vm/globals.h" |
9 #include "vm/object.h" | 9 #include "vm/object.h" |
10 | 10 |
11 namespace dart { | 11 namespace dart { |
(...skipping 12 matching lines...) Expand all Loading... |
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
26 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 26 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
27 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 27 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
28 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | 28 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
29 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 | 29 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 |
30 }; | 30 }; |
31 | 31 |
32 | 32 |
33 const uint32_t Utf8::kMagicBits[7] = { | 33 const uint32_t Utf8::kMagicBits[7] = { |
34 0, // padding | 34 0, // Padding. |
35 0x00000000, | 35 0x00000000, |
36 0x00003080, | 36 0x00003080, |
37 0x000E2080, | 37 0x000E2080, |
38 0x03C82080, | 38 0x03C82080, |
39 0xFA082080, | 39 0xFA082080, |
40 0x82082080 | 40 0x82082080 |
41 }; | 41 }; |
42 | 42 |
43 | 43 |
44 // Minimum values of code points used to check shortest form. | 44 // Minimum values of code points used to check shortest form. |
45 const uint32_t Utf8::kOverlongMinimum[7] = { | 45 const uint32_t Utf8::kOverlongMinimum[7] = { |
46 0, // padding | 46 0, // Padding. |
47 0x0, | 47 0x0, |
48 0x80, | 48 0x80, |
49 0x800, | 49 0x800, |
50 0x10000, | 50 0x10000, |
51 0xFFFFFFFF, | 51 0xFFFFFFFF, |
52 0xFFFFFFFF | 52 0xFFFFFFFF |
53 }; | 53 }; |
54 | 54 |
55 | 55 |
56 // Returns a count of the number of UTF-8 trail bytes. | 56 // Returns a count of the number of UTF-8 trail bytes. |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
129 String::CodePointIterator it(str); | 129 String::CodePointIterator it(str); |
130 while (it.Next()) { | 130 while (it.Next()) { |
131 int32_t ch = it.Current(); | 131 int32_t ch = it.Current(); |
132 length += Utf8::Length(ch); | 132 length += Utf8::Length(ch); |
133 } | 133 } |
134 return length; | 134 return length; |
135 } | 135 } |
136 | 136 |
137 | 137 |
138 intptr_t Utf8::Encode(int32_t ch, char* dst) { | 138 intptr_t Utf8::Encode(int32_t ch, char* dst) { |
| 139 ASSERT(!Utf16::IsSurrogate(ch)); |
139 static const int kMask = ~(1 << 6); | 140 static const int kMask = ~(1 << 6); |
140 if (ch <= kMaxOneByteChar) { | 141 if (ch <= kMaxOneByteChar) { |
141 dst[0] = ch; | 142 dst[0] = ch; |
142 return 1; | 143 return 1; |
143 } | 144 } |
144 if (ch <= kMaxTwoByteChar) { | 145 if (ch <= kMaxTwoByteChar) { |
145 dst[0] = 0xC0 | (ch >> 6); | 146 dst[0] = 0xC0 | (ch >> 6); |
146 dst[1] = 0x80 | (ch & kMask); | 147 dst[1] = 0x80 | (ch & kMask); |
147 return 2; | 148 return 2; |
148 } | 149 } |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
215 uint8_t* dst, | 216 uint8_t* dst, |
216 intptr_t len) { | 217 intptr_t len) { |
217 intptr_t i = 0; | 218 intptr_t i = 0; |
218 intptr_t j = 0; | 219 intptr_t j = 0; |
219 intptr_t num_bytes; | 220 intptr_t num_bytes; |
220 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 221 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
221 int32_t ch; | 222 int32_t ch; |
222 ASSERT(IsLatin1SequenceStart(utf8_array[i])); | 223 ASSERT(IsLatin1SequenceStart(utf8_array[i])); |
223 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 224 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
224 if (ch == -1) { | 225 if (ch == -1) { |
225 return false; // invalid input | 226 return false; // Invalid input. |
226 } | 227 } |
227 ASSERT(Utf::IsLatin1(ch)); | 228 ASSERT(Utf::IsLatin1(ch)); |
228 dst[j] = ch; | 229 dst[j] = ch; |
229 } | 230 } |
230 if ((i < array_len) && (j == len)) { | 231 if ((i < array_len) && (j == len)) { |
231 return false; // output overflow | 232 return false; // Output overflow. |
232 } | 233 } |
233 return true; // success | 234 return true; // Success. |
234 } | 235 } |
235 | 236 |
236 | 237 |
237 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, | 238 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, |
238 intptr_t array_len, | 239 intptr_t array_len, |
239 uint16_t* dst, | 240 uint16_t* dst, |
240 intptr_t len) { | 241 intptr_t len) { |
241 intptr_t i = 0; | 242 intptr_t i = 0; |
242 intptr_t j = 0; | 243 intptr_t j = 0; |
243 intptr_t num_bytes; | 244 intptr_t num_bytes; |
244 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 245 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
245 int32_t ch; | 246 int32_t ch; |
246 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); | 247 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); |
247 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 248 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
248 if (ch == -1) { | 249 if (ch == -1) { |
249 return false; // invalid input | 250 return false; // Invalid input. |
250 } | 251 } |
251 if (is_supplementary) { | 252 if (is_supplementary) { |
252 Utf16::Encode(ch, &dst[j]); | 253 Utf16::Encode(ch, &dst[j]); |
253 j = j + 1; | 254 j = j + 1; |
254 } else { | 255 } else { |
255 dst[j] = ch; | 256 dst[j] = ch; |
256 } | 257 } |
257 } | 258 } |
258 if ((i < array_len) && (j == len)) { | 259 if ((i < array_len) && (j == len)) { |
259 return false; // output overflow | 260 return false; // Output overflow. |
260 } | 261 } |
261 return true; // success | 262 return true; // Success. |
262 } | 263 } |
263 | 264 |
264 | 265 |
265 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array, | 266 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array, |
266 intptr_t array_len, | 267 intptr_t array_len, |
267 int32_t* dst, | 268 int32_t* dst, |
268 intptr_t len) { | 269 intptr_t len) { |
269 intptr_t i = 0; | 270 intptr_t i = 0; |
270 intptr_t j = 0; | 271 intptr_t j = 0; |
271 intptr_t num_bytes; | 272 intptr_t num_bytes; |
272 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 273 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
273 int32_t ch; | 274 int32_t ch; |
274 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 275 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
275 if (ch == -1) { | 276 if (ch == -1) { |
276 return false; // invalid input | 277 return false; // Invalid input. |
277 } | 278 } |
278 dst[j] = ch; | 279 dst[j] = ch; |
279 } | 280 } |
280 if ((i < array_len) && (j == len)) { | 281 if ((i < array_len) && (j == len)) { |
281 return false; // output overflow | 282 return false; // Output overflow. |
282 } | 283 } |
283 return true; // success | 284 return true; // Success. |
284 } | 285 } |
285 | 286 |
286 | 287 |
287 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) { | 288 bool Utf8::DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len) { |
288 ASSERT(str != NULL); | 289 ASSERT(str != NULL); |
289 intptr_t array_len = strlen(str); | 290 intptr_t array_len = strlen(str); |
290 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str); | 291 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str); |
291 return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len); | 292 return Utf8::DecodeToUTF32(utf8_array, array_len, dst, len); |
292 } | 293 } |
293 | 294 |
294 | 295 |
295 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { | 296 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { |
296 ASSERT(codepoint > Utf16::kMaxCodeUnit); | 297 ASSERT(codepoint > Utf16::kMaxCodeUnit); |
297 ASSERT(dst != NULL); | 298 ASSERT(dst != NULL); |
298 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); | 299 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); |
299 dst[1] = (0xDC00 + (codepoint & 0x3FF)); | 300 dst[1] = (0xDC00 + (codepoint & 0x3FF)); |
300 } | 301 } |
301 | 302 |
302 } // namespace dart | 303 } // namespace dart |
OLD | NEW |