OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/unicode.h" | 5 #include "vm/unicode.h" |
6 | 6 |
7 #include "vm/allocation.h" | 7 #include "vm/allocation.h" |
8 #include "vm/globals.h" | 8 #include "vm/globals.h" |
9 #include "vm/object.h" | 9 #include "vm/object.h" |
10 | 10 |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
46 0, // Padding. | 46 0, // Padding. |
47 0x0, | 47 0x0, |
48 0x80, | 48 0x80, |
49 0x800, | 49 0x800, |
50 0x10000, | 50 0x10000, |
51 0xFFFFFFFF, | 51 0xFFFFFFFF, |
52 0xFFFFFFFF | 52 0xFFFFFFFF |
53 }; | 53 }; |
54 | 54 |
55 | 55 |
56 // Returns a count of the number of UTF-8 trail bytes. | 56 // Returns the most restricted coding form in which the sequence of utf8 |
57 intptr_t Utf8::CodePointCount(const uint8_t* utf8_array, | 57 // characters in 'utf8_array' can be represented in, and the number of |
58 intptr_t array_len, | 58 // code units needed in that form. |
59 Type* type) { | 59 intptr_t Utf8::CodeUnitCount(const uint8_t* utf8_array, |
| 60 intptr_t array_len, |
| 61 Type* type) { |
60 intptr_t len = 0; | 62 intptr_t len = 0; |
61 Type char_type = kLatin1; | 63 Type char_type = kLatin1; |
62 for (intptr_t i = 0; i < array_len; i++) { | 64 for (intptr_t i = 0; i < array_len; i++) { |
63 uint8_t code_unit = utf8_array[i]; | 65 uint8_t code_unit = utf8_array[i]; |
64 if (!IsTrailByte(code_unit)) { | 66 if (!IsTrailByte(code_unit)) { |
65 ++len; | 67 ++len; |
66 } | 68 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF |
67 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF | 69 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000 |
68 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000 | 70 char_type = kSupplementary; |
69 char_type = kSupplementary; | 71 ++len; |
70 ++len; | 72 } else if (char_type == kLatin1) { |
71 } else if (char_type == kLatin1) { | 73 char_type = kBMP; |
72 char_type = kBMP; | 74 } |
73 } | 75 } |
74 } | 76 } |
75 } | 77 } |
76 *type = char_type; | 78 *type = char_type; |
77 return len; | 79 return len; |
78 } | 80 } |
79 | 81 |
80 | 82 |
81 // Returns true if str is a valid NUL-terminated UTF-8 string. | 83 // Returns true if str is a valid NUL-terminated UTF-8 string. |
82 bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) { | 84 bool Utf8::IsValid(const uint8_t* utf8_array, intptr_t array_len) { |
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
294 | 296 |
295 | 297 |
296 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { | 298 void Utf16::Encode(int32_t codepoint, uint16_t* dst) { |
297 ASSERT(codepoint > Utf16::kMaxCodeUnit); | 299 ASSERT(codepoint > Utf16::kMaxCodeUnit); |
298 ASSERT(dst != NULL); | 300 ASSERT(dst != NULL); |
299 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); | 301 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10)); |
300 dst[1] = (0xDC00 + (codepoint & 0x3FF)); | 302 dst[1] = (0xDC00 + (codepoint & 0x3FF)); |
301 } | 303 } |
302 | 304 |
303 } // namespace dart | 305 } // namespace dart |
OLD | NEW |