| OLD | NEW |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/unicode.h" | 5 #include "vm/unicode.h" |
| 6 | 6 |
| 7 #include "vm/allocation.h" | 7 #include "vm/allocation.h" |
| 8 #include "vm/globals.h" | 8 #include "vm/globals.h" |
| 9 #include "vm/object.h" | 9 #include "vm/object.h" |
| 10 | 10 |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 57 return (code_unit & 0xc0) == 0x80; | 57 return (code_unit & 0xc0) == 0x80; |
| 58 } | 58 } |
| 59 | 59 |
| 60 | 60 |
| 61 static bool IsLatin1SequenceStart(uint8_t code_unit) { | 61 static bool IsLatin1SequenceStart(uint8_t code_unit) { |
| 62 // Check is codepoint is <= U+00FF | 62 // Check is codepoint is <= U+00FF |
| 63 return (code_unit <= Utf8::kMaxOneByteChar); | 63 return (code_unit <= Utf8::kMaxOneByteChar); |
| 64 } | 64 } |
| 65 | 65 |
| 66 | 66 |
| 67 static bool IsSmpSequenceStart(uint8_t code_unit) { | 67 static bool IsSupplementarySequenceStart(uint8_t code_unit) { |
| 68 // Check is codepoint is >= U+10000. | 68 // Check is codepoint is >= U+10000. |
| 69 return (code_unit >= 0xF0); | 69 return (code_unit >= 0xF0); |
| 70 } | 70 } |
| 71 | 71 |
| 72 | 72 |
| 73 // Returns true if the code point value is above Plane 17. | 73 // Returns true if the code point value is above Plane 17. |
| 74 static bool IsOutOfRange(uint32_t code_point) { | 74 static bool IsOutOfRange(uint32_t code_point) { |
| 75 return (code_point > 0x10FFFF); | 75 return (code_point > 0x10FFFF); |
| 76 } | 76 } |
| 77 | 77 |
| 78 | 78 |
| 79 // Returns true if the byte sequence is ill-formed. | 79 // Returns true if the byte sequence is ill-formed. |
| 80 static bool IsNonShortestForm(uint32_t code_point, size_t num_bytes) { | 80 static bool IsNonShortestForm(uint32_t code_point, size_t num_bytes) { |
| 81 return code_point < kOverlongMinimum[num_bytes]; | 81 return code_point < kOverlongMinimum[num_bytes]; |
| 82 } | 82 } |
| 83 | 83 |
| 84 | 84 |
| 85 // Returns a count of the number of UTF-8 trail bytes. | 85 // Returns a count of the number of UTF-8 trail bytes. |
| 86 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array, | 86 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array, |
| 87 intptr_t array_len, | 87 intptr_t array_len, |
| 88 Type* type) { | 88 Type* type) { |
| 89 intptr_t len = 0; | 89 intptr_t len = 0; |
| 90 Type char_type = kLatin1; | 90 Type char_type = kLatin1; |
| 91 for (intptr_t i = 0; i < array_len; i++) { | 91 for (intptr_t i = 0; i < array_len; i++) { |
| 92 uint8_t code_unit = utf8_array[i]; | 92 uint8_t code_unit = utf8_array[i]; |
| 93 if (!IsTrailByte(code_unit)) { | 93 if (!IsTrailByte(code_unit)) { |
| 94 ++len; | 94 ++len; |
| 95 } | 95 } |
| 96 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF | 96 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF |
| 97 if (IsSmpSequenceStart(code_unit)) { // >= U+10000 | 97 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000 |
| 98 char_type = kSMP; | 98 char_type = kSupplementary; |
| 99 ++len; | 99 ++len; |
| 100 } else if (char_type == kLatin1) { | 100 } else if (char_type == kLatin1) { |
| 101 char_type = kBMP; | 101 char_type = kBMP; |
| 102 } | 102 } |
| 103 } | 103 } |
| 104 } | 104 } |
| 105 *type = char_type; | 105 *type = char_type; |
| 106 return len; | 106 return len; |
| 107 } | 107 } |
| 108 | 108 |
| (...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 265 | 265 |
| 266 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, | 266 bool Utf8::DecodeToUTF16(const uint8_t* utf8_array, |
| 267 intptr_t array_len, | 267 intptr_t array_len, |
| 268 uint16_t* dst, | 268 uint16_t* dst, |
| 269 intptr_t len) { | 269 intptr_t len) { |
| 270 intptr_t i = 0; | 270 intptr_t i = 0; |
| 271 intptr_t j = 0; | 271 intptr_t j = 0; |
| 272 intptr_t num_bytes; | 272 intptr_t num_bytes; |
| 273 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { | 273 for (; (i < array_len) && (j < len); i += num_bytes, ++j) { |
| 274 int32_t ch; | 274 int32_t ch; |
| 275 bool is_smp = IsSmpSequenceStart(utf8_array[i]); | 275 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]); |
| 276 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); | 276 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch); |
| 277 if (ch == -1) { | 277 if (ch == -1) { |
| 278 return false; // invalid input | 278 return false; // invalid input |
| 279 } | 279 } |
| 280 if (is_smp) { | 280 if (is_supplementary) { |
| 281 Utf16::Encode(ch, &dst[j]); | 281 Utf16::Encode(ch, &dst[j]); |
| 282 j = j + 1; | 282 j = j + 1; |
| 283 } else { | 283 } else { |
| 284 dst[j] = ch; | 284 dst[j] = ch; |
| 285 } | 285 } |
| 286 } | 286 } |
| 287 if ((i < array_len) && (j == len)) { | 287 if ((i < array_len) && (j == len)) { |
| 288 return false; // output overflow | 288 return false; // output overflow |
| 289 } | 289 } |
| 290 return true; // success | 290 return true; // success |
| (...skipping 23 matching lines...) Expand all Loading... |
| 314 | 314 |
| 315 | 315 |
| 316 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { | 316 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { |
| 317 ASSERT(codepoint > kMaxBmpCodepoint); | 317 ASSERT(codepoint > kMaxBmpCodepoint); |
| 318 ASSERT(dst != NULL); | 318 ASSERT(dst != NULL); |
| 319 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); | 319 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); |
| 320 dst[1] = (0xDC00 + (codepoint & 0x3FF)); | 320 dst[1] = (0xDC00 + (codepoint & 0x3FF)); |
| 321 } | 321 } |
| 322 | 322 |
| 323 } // namespace dart | 323 } // namespace dart |
| OLD | NEW |