OLD | NEW |
---|---|
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/unicode.h" | 5 #include "vm/unicode.h" |
6 | 6 |
7 #include "vm/allocation.h" | 7 #include "vm/allocation.h" |
8 #include "vm/globals.h" | 8 #include "vm/globals.h" |
9 #include "vm/object.h" | 9 #include "vm/object.h" |
10 | 10 |
11 namespace dart { | 11 namespace dart { |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
52 0xFFFFFFFF | 52 0xFFFFFFFF |
53 }; | 53 }; |
54 | 54 |
55 | 55 |
56 static bool IsTrailByte(uint8_t code_unit) { | 56 static bool IsTrailByte(uint8_t code_unit) { |
57 return (code_unit & 0xc0) == 0x80; | 57 return (code_unit & 0xc0) == 0x80; |
58 } | 58 } |
59 | 59 |
60 | 60 |
61 static bool IsLatin1SequenceStart(uint8_t code_unit) { | 61 static bool IsLatin1SequenceStart(uint8_t code_unit) { |
62 // Check is codepoint is <= U+00FF | 62 // Check is codepoint is <= U+00FF. |
siva
2012/11/28 18:22:46
Check if codepoint is ...
Søren Gjesse
2012/11/29 09:06:14
Done.
| |
63 return (code_unit <= Utf8::kMaxOneByteChar); | 63 return (code_unit <= Utf8::kMaxOneByteChar); |
64 } | 64 } |
65 | 65 |
66 | 66 |
67 static bool IsSupplementarySequenceStart(uint8_t code_unit) { | 67 static bool IsSupplementarySequenceStart(uint8_t code_unit) { |
68 // Check is codepoint is >= U+10000. | 68 // Check is codepoint is >= U+10000. |
69 return (code_unit >= 0xF0); | 69 return (code_unit >= 0xF0); |
70 } | 70 } |
71 | 71 |
72 | 72 |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
122 is_malformed |= !IsTrailByte(code_unit); | 122 is_malformed |= !IsTrailByte(code_unit); |
123 ch = (ch << 6) + code_unit; | 123 ch = (ch << 6) + code_unit; |
124 } else { | 124 } else { |
125 return false; | 125 return false; |
126 } | 126 } |
127 } | 127 } |
128 ch -= kMagicBits[num_trail_bytes]; | 128 ch -= kMagicBits[num_trail_bytes]; |
129 if (!((is_malformed == false) && | 129 if (!((is_malformed == false) && |
130 (j == num_trail_bytes) && | 130 (j == num_trail_bytes) && |
131 !IsOutOfRange(ch) && | 131 !IsOutOfRange(ch) && |
132 !IsNonShortestForm(ch, j) && | 132 !IsNonShortestForm(ch, j))) { |
133 !Utf16::IsSurrogate(ch))) { | |
134 return false; | 133 return false; |
135 } | 134 } |
136 } | 135 } |
137 i += j; | 136 i += j; |
138 } | 137 } |
139 return true; | 138 return true; |
140 } | 139 } |
141 | 140 |
142 | 141 |
143 intptr_t Utf8::Length(int32_t ch) { | 142 intptr_t Utf8::Length(int32_t ch) { |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
221 ch = (ch << 6) + code_unit; | 220 ch = (ch << 6) + code_unit; |
222 } else { | 221 } else { |
223 *dst = -1; | 222 *dst = -1; |
224 return 0; | 223 return 0; |
225 } | 224 } |
226 } | 225 } |
227 ch -= kMagicBits[num_trail_bytes]; | 226 ch -= kMagicBits[num_trail_bytes]; |
228 if (!((is_malformed == false) && | 227 if (!((is_malformed == false) && |
229 (i == num_trail_bytes) && | 228 (i == num_trail_bytes) && |
230 !IsOutOfRange(ch) && | 229 !IsOutOfRange(ch) && |
231 !IsNonShortestForm(ch, i) && | 230 !IsNonShortestForm(ch, i))) { |
232 !Utf16::IsSurrogate(ch))) { | |
233 *dst = -1; | 231 *dst = -1; |
234 return 0; | 232 return 0; |
235 } | 233 } |
236 } | 234 } |
237 *dst = ch; | 235 *dst = ch; |
238 return i; | 236 return i; |
239 } | 237 } |
240 | 238 |
241 | 239 |
242 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array, | 240 bool Utf8::DecodeToLatin1(const uint8_t* utf8_array, |
243 intptr_t array_len, | 241 intptr_t array_len, |
244 uint8_t* dst, | 242 uint8_t* dst, |
245 intptr_t len) { | 243 intptr_t len) { |
246 intptr_t i = 0; | 244 intptr_t i = 0; |
247 intptr_t j = 0; | 245 intptr_t j = 0; |
248 intptr_t num_bytes; | 246 intptr_t num_bytes; |
249 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 247 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
250 int32_t ch; | 248 int32_t ch; |
251 ASSERT(IsLatin1SequenceStart(utf8_array[i])); | 249 ASSERT(IsLatin1SequenceStart(utf8_array[i])); |
252 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 250 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
253 if (ch == -1) { | 251 if (ch == -1) { |
254 return false; // invalid input | 252 return false; // Invalid input. |
255 } | 253 } |
256 ASSERT(ch <= 0xff); | 254 ASSERT(ch <= 0xff); |
257 dst[j] = ch; | 255 dst[j] = ch; |
258 } | 256 } |
259 if ((i < array_len) && (j == len)) { | 257 if ((i < array_len) && (j == len)) { |
260 return false; // output overflow | 258 return false; // Output overflow. |
261 } | 259 } |
262 return true; // success | 260 return true; // Success. |
263 } | 261 } |
264 | 262 |
265 | 263 |
266 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, | 264 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, |
267 intptr_t array_len, | 265 intptr_t array_len, |
268 uint16_t* dst, | 266 uint16_t* dst, |
269 intptr_t len) { | 267 intptr_t len) { |
270 intptr_t i = 0; | 268 intptr_t i = 0; |
271 intptr_t j = 0; | 269 intptr_t j = 0; |
272 intptr_t num_bytes; | 270 intptr_t num_bytes; |
273 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 271 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
274 int32_t ch; | 272 int32_t ch; |
275 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); | 273 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); |
276 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 274 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
277 if (ch == -1) { | 275 if (ch == -1) { |
278 return false; // invalid input | 276 return false; // Invalid input. |
279 } | 277 } |
280 if (is_supplementary) { | 278 if (is_supplementary) { |
281 Utf16::Encode(ch, &dst[j]); | 279 Utf16::Encode(ch, &dst[j]); |
282 j = j + 1; | 280 j = j + 1; |
283 } else { | 281 } else { |
284 dst[j] = ch; | 282 dst[j] = ch; |
285 } | 283 } |
286 } | 284 } |
287 if ((i < array_len) && (j == len)) { | 285 if ((i < array_len) && (j == len)) { |
288 return false; // output overflow | 286 return false; // Output overflow. |
289 } | 287 } |
290 return true; // success | 288 return true; // Success. |
291 } | 289 } |
292 | 290 |
293 | 291 |
294 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array, | 292 bool Utf8::DecodeToUTF32(const uint8_t* utf8_array, |
295 intptr_t array_len, | 293 intptr_t array_len, |
296 int32_t* dst, | 294 int32_t* dst, |
297 intptr_t len) { | 295 intptr_t len) { |
298 intptr_t i = 0; | 296 intptr_t i = 0; |
299 intptr_t j = 0; | 297 intptr_t j = 0; |
300 intptr_t num_bytes; | 298 intptr_t num_bytes; |
301 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 299 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
302 int32_t ch; | 300 int32_t ch; |
303 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 301 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
304 if (ch == -1) { | 302 if (ch == -1) { |
305 return false; // invalid input | 303 return false; // Invalid input. |
306 } | 304 } |
307 dst[j] = ch; | 305 dst[j] = ch; |
308 } | 306 } |
309 if ((i < array_len) && (j == len)) { | 307 if ((i < array_len) && (j == len)) { |
310 return false; // output overflow | 308 return false; // Output overflow. |
311 } | 309 } |
312 return true; // success | 310 return true; // Success. |
313 } | 311 } |
314 | 312 |
315 | 313 |
316 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { | 314 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { |
317 ASSERT(codepoint > kMaxBmpCodepoint); | 315 ASSERT(codepoint > kMaxBmpCodepoint); |
318 ASSERT(dst != NULL); | 316 ASSERT(dst != NULL); |
319 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); | 317 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); |
320 dst[1] = (0xDC00 + (codepoint & 0x3FF)); | 318 dst[1] = (0xDC00 + (codepoint & 0x3FF)); |
321 } | 319 } |
322 | 320 |
321 | |
322 bool Utf16::CodePointIterator::Next() { | |
323 ASSERT(index_ >= -1); | |
324 ASSERT(index_ < array_len_); | |
325 int d = Length(ch_); | |
326 if (index_ == (array_len_ - d)) { | |
327 return false; | |
328 } | |
329 index_ += d; | |
330 ch_ = utf16_array_[index_]; | |
331 if (IsLeadSurrogate(ch_) && (index_ != (array_len_ - 1))) { | |
332 int32_t ch2 = utf16_array_[index_ + 1]; | |
333 if (IsTrailSurrogate(ch2)) { | |
334 ch_ = Decode(ch_, ch2); | |
335 } | |
336 } | |
337 return true; | |
338 } | |
339 | |
323 } // namespace dart | 340 } // namespace dart |
OLD | NEW |