OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #ifndef VM_UNICODE_H_ | 5 #ifndef VM_UNICODE_H_ |
6 #define VM_UNICODE_H_ | 6 #define VM_UNICODE_H_ |
7 | 7 |
8 #include "vm/allocation.h" | 8 #include "vm/allocation.h" |
9 #include "vm/globals.h" | 9 #include "vm/globals.h" |
10 | 10 |
(...skipping 25 matching lines...) Expand all Loading... |
36 | 36 |
37 | 37 |
38 class Utf8 : AllStatic { | 38 class Utf8 : AllStatic { |
39 public: | 39 public: |
40 enum Type { | 40 enum Type { |
41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF]. | 41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF]. |
42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF]. | 42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF]. |
43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF]. | 43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF]. |
44 }; | 44 }; |
45 | 45 |
46 static intptr_t CodePointCount(const uint8_t* utf8_array, | 46 // Returns the most restricted coding form in which the sequence of utf8 |
47 intptr_t array_len, | 47 // characters in 'utf8_array' can be represented in, and the number of |
48 Type* type); | 48 // code units needed in that form. |
| 49 static intptr_t CodeUnitCount(const uint8_t* utf8_array, |
| 50 intptr_t array_len, |
| 51 Type* type); |
49 | 52 |
50 // Returns true if 'utf8_array' is a valid UTF-8 string. | 53 // Returns true if 'utf8_array' is a valid UTF-8 string. |
51 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); | 54 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); |
52 | 55 |
53 static intptr_t Length(int32_t ch); | 56 static intptr_t Length(int32_t ch); |
54 static intptr_t Length(const String& str); | 57 static intptr_t Length(const String& str); |
55 | 58 |
56 static intptr_t Encode(int32_t ch, char* dst); | 59 static intptr_t Encode(int32_t ch, char* dst); |
57 static intptr_t Encode(const String& src, char* dst, intptr_t len); | 60 static intptr_t Encode(const String& src, char* dst, intptr_t len); |
58 | 61 |
(...skipping 17 matching lines...) Expand all Loading... |
76 int32_t* dst, | 79 int32_t* dst, |
77 intptr_t len); | 80 intptr_t len); |
78 | 81 |
79 private: | 82 private: |
80 static const int32_t kMaxOneByteChar = 0x7F; | 83 static const int32_t kMaxOneByteChar = 0x7F; |
81 static const int32_t kMaxTwoByteChar = 0x7FF; | 84 static const int32_t kMaxTwoByteChar = 0x7FF; |
82 static const int32_t kMaxThreeByteChar = 0xFFFF; | 85 static const int32_t kMaxThreeByteChar = 0xFFFF; |
83 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint; | 86 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint; |
84 | 87 |
85 static bool IsTrailByte(uint8_t code_unit) { | 88 static bool IsTrailByte(uint8_t code_unit) { |
86 return (code_unit & 0xc0) == 0x80; | 89 return (code_unit & 0xC0) == 0x80; |
87 } | 90 } |
88 | 91 |
89 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) { | 92 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) { |
90 return code_point < kOverlongMinimum[num_code_units]; | 93 return code_point < kOverlongMinimum[num_code_units]; |
91 } | 94 } |
92 | 95 |
93 static bool IsLatin1SequenceStart(uint8_t code_unit) { | 96 static bool IsLatin1SequenceStart(uint8_t code_unit) { |
94 // Check is codepoint is <= U+00FF | 97 // Check if utf8 sequence is the start of a codepoint <= U+00FF |
95 return (code_unit <= Utf8::kMaxOneByteChar); | 98 return (code_unit <= 0xC3); |
96 } | 99 } |
97 | 100 |
98 static bool IsSupplementarySequenceStart(uint8_t code_unit) { | 101 static bool IsSupplementarySequenceStart(uint8_t code_unit) { |
99 // Check is codepoint is >= U+10000. | 102 // Check if utf8 sequence is the start of a codepoint >= U+10000. |
100 return (code_unit >= 0xF0); | 103 return (code_unit >= 0xF0); |
101 } | 104 } |
102 | 105 |
103 static const int8_t kTrailBytes[]; | 106 static const int8_t kTrailBytes[]; |
104 static const uint32_t kMagicBits[]; | 107 static const uint32_t kMagicBits[]; |
105 static const uint32_t kOverlongMinimum[]; | 108 static const uint32_t kOverlongMinimum[]; |
106 }; | 109 }; |
107 | 110 |
108 | 111 |
109 class Utf16 : AllStatic { | 112 class Utf16 : AllStatic { |
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
219 // Data for small code points with one mapping | 222 // Data for small code points with one mapping |
220 static const int16_t stage2_[]; | 223 static const int16_t stage2_[]; |
221 | 224 |
222 // Data for large code points or code points with both mappings. | 225 // Data for large code points or code points with both mappings. |
223 static const int32_t stage2_exception_[][2]; | 226 static const int32_t stage2_exception_[][2]; |
224 }; | 227 }; |
225 | 228 |
226 } // namespace dart | 229 } // namespace dart |
227 | 230 |
228 #endif // VM_UNICODE_H_ | 231 #endif // VM_UNICODE_H_ |
OLD | NEW |