OLD | NEW |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #ifndef VM_UNICODE_H_ | 5 #ifndef VM_UNICODE_H_ |
6 #define VM_UNICODE_H_ | 6 #define VM_UNICODE_H_ |
7 | 7 |
8 #include "vm/allocation.h" | 8 #include "vm/allocation.h" |
9 #include "vm/globals.h" | 9 #include "vm/globals.h" |
10 | 10 |
11 namespace dart { | 11 namespace dart { |
12 | 12 |
13 class String; | 13 class String; |
14 | 14 |
| 15 class Utf16 : AllStatic { |
| 16 public: |
| 17 static const uint32_t kMaxCodeUnit = 0xffff; |
| 18 static const uint32_t kMaxCodePoint = 0x10ffff; |
| 19 |
| 20 static bool IsLeadSurrogate(uint32_t c) { |
| 21 return c >= kLeadBase && c < kLeadEnd; |
| 22 } |
| 23 |
| 24 static bool IsTrailSurrogate(uint32_t c) { |
| 25 return c >= kTrailBase && c < kTrailEnd; |
| 26 } |
| 27 |
| 28 static bool IsSurrogate(uint32_t c) { |
| 29 return (c & 0xfffff800u) == 0xd800u; |
| 30 } |
| 31 |
| 32 static int32_t CodePointFromCodeUnits(int32_t lead, int32_t trail) { |
| 33 return kSurrogateEncodingBase + |
| 34 ((lead & kSurrogateMask) << 10) + (trail & kSurrogateMask); |
| 35 } |
| 36 |
| 37 static int32_t LeadFromCodePoint(int32_t code_point) { |
| 38 ASSERT(code_point >= kSurrogateEncodingBase); |
| 39 return kLeadBase + |
| 40 (((code_point - kSurrogateEncodingBase) >> 10) & kSurrogateMask); |
| 41 } |
| 42 |
| 43 static int32_t TrailFromCodePoint(int32_t code_point) { |
| 44 ASSERT(code_point >= kSurrogateEncodingBase); |
| 45 return kTrailBase + (code_point & kSurrogateMask); |
| 46 } |
| 47 |
| 48 private: |
| 49 static const uint32_t kLeadBase = 0xd800; |
| 50 static const uint32_t kLeadEnd = 0xdbff; |
| 51 static const uint32_t kTrailBase = 0xdc00; |
| 52 static const uint32_t kTrailEnd = 0xdfff; |
| 53 static const uint32_t kSurrogateMask = 0x3ff; |
| 54 static const uint32_t kSurrogateEncodingBase = 0x10000; |
| 55 }; |
| 56 |
| 57 |
15 class Utf8 : AllStatic { | 58 class Utf8 : AllStatic { |
16 public: | 59 public: |
17 enum Type { | 60 enum Type { |
18 kAscii = 0, // ASCII character set. | 61 kAscii = 0, // ASCII character set. |
19 kBMP, // Basic Multilingual Plane. | 62 kBMP, // Basic Multilingual Plane. |
20 kSMP, // Supplementary Multilingual Plane. | 63 kSMP, // Supplementary Multilingual Plane. |
21 }; | 64 }; |
22 | 65 |
23 static const intptr_t kMaxOneByteChar = 0x7F; | 66 static const intptr_t kMaxOneByteChar = 0x7F; |
24 static const intptr_t kMaxTwoByteChar = 0x7FF; | 67 static const intptr_t kMaxTwoByteChar = 0x7FF; |
25 static const intptr_t kMaxThreeByteChar = 0xFFFF; | 68 static const intptr_t kMaxThreeByteChar = 0xFFFF; |
26 static const intptr_t kMaxFourByteChar = 0x10FFFF; | 69 static const intptr_t kMaxFourByteChar = 0x10FFFF; |
27 static const intptr_t kMaxBmpCodepoint = 0xffff; | |
28 static const int32_t kLeadOffset = (0xD800 - (0x10000 >> 10)); | |
29 static const int32_t kSurrogateOffset = (0x10000 - (0xD800 << 10) - 0xDC00); | |
30 | 70 |
31 static void ConvertUTF32ToUTF16(int32_t codepoint, uint16_t* dst); | 71 static const uint32_t kInvalidCodePoint = 0xffffffffu; |
32 static intptr_t CodePointCount(const uint8_t* utf8_array, | 72 |
33 intptr_t array_len, | 73 static intptr_t CodeUnitCount(const uint8_t* utf8_array, |
34 Type* type); | 74 intptr_t array_len, |
| 75 Type* type); |
35 | 76 |
36 // Returns true if 'utf8_array' is a valid UTF-8 string. | 77 // Returns true if 'utf8_array' is a valid UTF-8 string. |
37 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); | 78 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); |
38 | 79 |
39 static intptr_t Length(int32_t ch); | 80 static intptr_t Length(int32_t ch); |
40 static intptr_t Length(const String& str); | 81 static intptr_t Length(const String& str); |
41 | 82 |
42 static intptr_t Encode(int32_t ch, char* dst); | 83 static intptr_t Encode(int32_t ch, char* dst); |
43 static intptr_t Encode(const String& src, char* dst, intptr_t len); | 84 static intptr_t Encode(const String& src, char* dst, intptr_t len); |
44 | 85 |
45 static intptr_t Decode(const uint8_t* utf8_array, | 86 static intptr_t Decode(const uint8_t* utf8_array, |
46 intptr_t array_len, | 87 intptr_t array_len, |
47 int32_t* ch); | 88 uint32_t* ch); |
48 | 89 |
49 static bool DecodeToAscii(const uint8_t* utf8_array, | 90 static bool DecodeToAscii(const uint8_t* utf8_array, |
50 intptr_t array_len, | 91 intptr_t array_len, |
51 uint8_t* dst, | 92 uint8_t* dst, |
52 intptr_t len); | 93 intptr_t len); |
53 static bool DecodeToUTF16(const uint8_t* utf8_array, | 94 static bool DecodeToUTF16(const uint8_t* utf8_array, |
54 intptr_t array_len, | 95 intptr_t array_len, |
55 uint16_t* dst, | 96 uint16_t* dst, |
56 intptr_t len); | 97 intptr_t len); |
57 static bool DecodeToUTF32(const uint8_t* utf8_array, | 98 static bool DecodeToUTF32(const uint8_t* utf8_array, |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
128 // Data for small code points with one mapping | 169 // Data for small code points with one mapping |
129 static const int16_t stage2_[]; | 170 static const int16_t stage2_[]; |
130 | 171 |
131 // Data for large code points or code points with both mappings. | 172 // Data for large code points or code points with both mappings. |
132 static const int32_t stage2_exception_[][2]; | 173 static const int32_t stage2_exception_[][2]; |
133 }; | 174 }; |
134 | 175 |
135 } // namespace dart | 176 } // namespace dart |
136 | 177 |
137 #endif // VM_UNICODE_H_ | 178 #endif // VM_UNICODE_H_ |
OLD | NEW |