OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #ifndef RUNTIME_VM_UNICODE_H_ | 5 #ifndef RUNTIME_VM_UNICODE_H_ |
6 #define RUNTIME_VM_UNICODE_H_ | 6 #define RUNTIME_VM_UNICODE_H_ |
7 | 7 |
8 #include "vm/allocation.h" | 8 #include "vm/allocation.h" |
9 #include "vm/globals.h" | 9 #include "vm/globals.h" |
10 | 10 |
(...skipping 20 matching lines...) Expand all Loading... |
31 // Returns true if the code point value is above Plane 17. | 31 // Returns true if the code point value is above Plane 17. |
32 static bool IsOutOfRange(intptr_t code_point) { | 32 static bool IsOutOfRange(intptr_t code_point) { |
33 return (code_point < 0) || (code_point > kMaxCodePoint); | 33 return (code_point < 0) || (code_point > kMaxCodePoint); |
34 } | 34 } |
35 }; | 35 }; |
36 | 36 |
37 | 37 |
38 class Utf8 : AllStatic { | 38 class Utf8 : AllStatic { |
39 public: | 39 public: |
40 enum Type { | 40 enum Type { |
41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF]. | 41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF]. |
42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF]. | 42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF]. |
43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF]. | 43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF]. |
44 }; | 44 }; |
45 | 45 |
46 // Returns the most restricted coding form in which the sequence of utf8 | 46 // Returns the most restricted coding form in which the sequence of utf8 |
47 // characters in 'utf8_array' can be represented in, and the number of | 47 // characters in 'utf8_array' can be represented in, and the number of |
48 // code units needed in that form. | 48 // code units needed in that form. |
49 static intptr_t CodeUnitCount(const uint8_t* utf8_array, | 49 static intptr_t CodeUnitCount(const uint8_t* utf8_array, |
50 intptr_t array_len, | 50 intptr_t array_len, |
51 Type* type); | 51 Type* type); |
52 | 52 |
(...skipping 15 matching lines...) Expand all Loading... |
68 uint8_t* dst, | 68 uint8_t* dst, |
69 intptr_t len); | 69 intptr_t len); |
70 static bool DecodeToUTF16(const uint8_t* utf8_array, | 70 static bool DecodeToUTF16(const uint8_t* utf8_array, |
71 intptr_t array_len, | 71 intptr_t array_len, |
72 uint16_t* dst, | 72 uint16_t* dst, |
73 intptr_t len); | 73 intptr_t len); |
74 static bool DecodeToUTF32(const uint8_t* utf8_array, | 74 static bool DecodeToUTF32(const uint8_t* utf8_array, |
75 intptr_t array_len, | 75 intptr_t array_len, |
76 int32_t* dst, | 76 int32_t* dst, |
77 intptr_t len); | 77 intptr_t len); |
78 static bool DecodeCStringToUTF32(const char* str, | 78 static bool DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len); |
79 int32_t* dst, | |
80 intptr_t len); | |
81 | 79 |
82 static const int32_t kMaxOneByteChar = 0x7F; | 80 static const int32_t kMaxOneByteChar = 0x7F; |
83 static const int32_t kMaxTwoByteChar = 0x7FF; | 81 static const int32_t kMaxTwoByteChar = 0x7FF; |
84 static const int32_t kMaxThreeByteChar = 0xFFFF; | 82 static const int32_t kMaxThreeByteChar = 0xFFFF; |
85 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint; | 83 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint; |
86 | 84 |
87 private: | 85 private: |
88 static bool IsTrailByte(uint8_t code_unit) { | 86 static bool IsTrailByte(uint8_t code_unit) { |
89 return (code_unit & 0xC0) == 0x80; | 87 return (code_unit & 0xC0) == 0x80; |
90 } | 88 } |
91 | 89 |
92 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) { | 90 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) { |
93 return code_point < kOverlongMinimum[num_code_units]; | 91 return code_point < kOverlongMinimum[num_code_units]; |
94 } | 92 } |
95 | 93 |
(...skipping 14 matching lines...) Expand all Loading... |
110 | 108 |
111 | 109 |
112 class Utf16 : AllStatic { | 110 class Utf16 : AllStatic { |
113 public: | 111 public: |
114 // Returns the length of the code point in UTF-16 code units. | 112 // Returns the length of the code point in UTF-16 code units. |
115 static intptr_t Length(int32_t ch) { | 113 static intptr_t Length(int32_t ch) { |
116 return (ch <= Utf16::kMaxCodeUnit) ? 1 : 2; | 114 return (ch <= Utf16::kMaxCodeUnit) ? 1 : 2; |
117 } | 115 } |
118 | 116 |
119 // Returns true if ch is a lead or trail surrogate. | 117 // Returns true if ch is a lead or trail surrogate. |
120 static bool IsSurrogate(uint32_t ch) { | 118 static bool IsSurrogate(uint32_t ch) { return (ch & 0xFFFFF800) == 0xD800; } |
121 return (ch & 0xFFFFF800) == 0xD800; | |
122 } | |
123 | 119 |
124 // Returns true if ch is a lead surrogate. | 120 // Returns true if ch is a lead surrogate. |
125 static bool IsLeadSurrogate(uint32_t ch) { | 121 static bool IsLeadSurrogate(uint32_t ch) { |
126 return (ch & 0xFFFFFC00) == 0xD800; | 122 return (ch & 0xFFFFFC00) == 0xD800; |
127 } | 123 } |
128 | 124 |
129 // Returns true if ch is a low surrogate. | 125 // Returns true if ch is a low surrogate. |
130 static bool IsTrailSurrogate(uint32_t ch) { | 126 static bool IsTrailSurrogate(uint32_t ch) { |
131 return (ch & 0xFFFFFC00) == 0xDC00; | 127 return (ch & 0xFFFFFC00) == 0xDC00; |
132 } | 128 } |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
222 // Data for small code points with one mapping | 218 // Data for small code points with one mapping |
223 static const int16_t stage2_[]; | 219 static const int16_t stage2_[]; |
224 | 220 |
225 // Data for large code points or code points with both mappings. | 221 // Data for large code points or code points with both mappings. |
226 static const int32_t stage2_exception_[][2]; | 222 static const int32_t stage2_exception_[][2]; |
227 }; | 223 }; |
228 | 224 |
229 } // namespace dart | 225 } // namespace dart |
230 | 226 |
231 #endif // RUNTIME_VM_UNICODE_H_ | 227 #endif // RUNTIME_VM_UNICODE_H_ |
OLD | NEW |