OLD | NEW |
---|---|
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #ifndef VM_UNICODE_H_ | 5 #ifndef VM_UNICODE_H_ |
6 #define VM_UNICODE_H_ | 6 #define VM_UNICODE_H_ |
7 | 7 |
8 #include "vm/allocation.h" | 8 #include "vm/allocation.h" |
9 #include "vm/globals.h" | 9 #include "vm/globals.h" |
10 | 10 |
11 namespace dart { | 11 namespace dart { |
12 | 12 |
13 class String; | 13 class String; |
14 | 14 |
15 class Utf16 : AllStatic { | |
16 public: | |
17 static const int32_t kMaxCodeUnit = 0xFFFF; | |
18 static const int32_t kMaxCodePoint = 0x10FFFF; | |
19 | |
20 static bool IsLeadSurrogate(int32_t c) { | |
21 return c >= kLeadBase && c < kLeadEnd; | |
22 } | |
23 | |
24 static bool IsTrailSurrogate(int32_t c) { | |
25 return c >= kTrailBase && c < kTrailEnd; | |
26 } | |
27 | |
28 static bool IsSurrogate(int32_t c) { | |
29 return (c & 0xFFFFF800u) == 0xD800u; | |
30 } | |
31 | |
32 static int32_t CodePointFromCodeUnits(int32_t lead, int32_t trail) { | |
Søren Gjesse
2012/11/19 14:18:36
Assert that lead is lead surrogare and that train
| |
33 return kSurrogateEncodingBase + | |
34 ((lead & kSurrogateMask) << 10) + (trail & kSurrogateMask); | |
35 } | |
36 | |
37 static int32_t LeadFromCodePoint(int32_t code_point) { | |
38 ASSERT(code_point >= kSurrogateEncodingBase); | |
39 return kLeadBase + | |
40 (((code_point - kSurrogateEncodingBase) >> 10) & kSurrogateMask); | |
41 } | |
42 | |
43 static int32_t TrailFromCodePoint(int32_t code_point) { | |
44 ASSERT(code_point >= kSurrogateEncodingBase); | |
45 return kTrailBase + (code_point & kSurrogateMask); | |
46 } | |
47 | |
48 // Gets the 21 bit Unicode code point at the given index in a string. If the | |
49 // returned value is greater than kMaxCodePoint then the next position of the | |
50 // string encodes a trail surrogate and should be skipped on iteration. May | |
51 // return individual surrogate values if they are not part of a pair. | |
52 static int32_t CodePointAt(const String& str, int index); | |
53 | |
54 private: | |
55 static const int32_t kLeadBase = 0xD800; | |
56 static const int32_t kLeadEnd = 0xDBFF; | |
57 static const int32_t kTrailBase = 0xDC00; | |
58 static const int32_t kTrailEnd = 0xDFFF; | |
59 static const int32_t kSurrogateMask = 0x3FF; | |
60 static const int32_t kSurrogateEncodingBase = 0x10000; | |
61 }; | |
62 | |
63 | |
15 class Utf8 : AllStatic { | 64 class Utf8 : AllStatic { |
16 public: | 65 public: |
17 enum Type { | 66 enum Type { |
18 kLatin1 = 0, // Latin-1 character set. | 67 kLatin1 = 0, // Latin-1 character set. |
19 kBMP, // Basic Multilingual Plane. | 68 kBMP, // Basic Multilingual Plane. |
20 kSMP, // Supplementary Multilingual Plane. | 69 kSMP, // Supplementary Multilingual Plane. |
21 }; | 70 }; |
22 | 71 |
23 static const intptr_t kMaxOneByteChar = 0x7F; | 72 static const intptr_t kMaxOneByteChar = 0x7F; |
24 static const intptr_t kMaxTwoByteChar = 0x7FF; | 73 static const intptr_t kMaxTwoByteChar = 0x7FF; |
25 static const intptr_t kMaxThreeByteChar = 0xFFFF; | 74 static const intptr_t kMaxThreeByteChar = 0xFFFF; |
26 static const intptr_t kMaxFourByteChar = 0x10FFFF; | 75 static const intptr_t kMaxFourByteChar = 0x10FFFF; |
27 static const intptr_t kMaxBmpCodepoint = 0xffff; | |
28 static const int32_t kLeadOffset = (0xD800 - (0x10000 >> 10)); | |
29 static const int32_t kSurrogateOffset = (0x10000 - (0xD800 << 10) - 0xDC00); | |
30 | 76 |
31 static void ConvertUTF32ToUTF16(int32_t codepoint, uint16_t* dst); | 77 static const int32_t kInvalidCodePoint = -1; |
32 static intptr_t CodePointCount(const uint8_t* utf8_array, | 78 |
33 intptr_t array_len, | 79 static intptr_t CodeUnitCount(const uint8_t* utf8_array, |
34 Type* type); | 80 intptr_t array_len, |
81 Type* type); | |
35 | 82 |
36 // Returns true if 'utf8_array' is a valid UTF-8 string. | 83 // Returns true if 'utf8_array' is a valid UTF-8 string. |
37 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); | 84 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); |
38 | 85 |
39 static intptr_t Length(int32_t ch); | 86 static intptr_t Length(int32_t ch); |
40 static intptr_t Length(const String& str); | 87 static intptr_t Length(const String& str); |
41 | 88 |
42 static intptr_t Encode(int32_t ch, char* dst); | 89 static intptr_t Encode(int32_t ch, char* dst); |
43 static intptr_t Encode(const String& src, char* dst, intptr_t len); | 90 static intptr_t Encode(const String& src, char* dst, intptr_t len); |
44 | 91 |
45 static intptr_t Decode(const uint8_t* utf8_array, | 92 static intptr_t Decode(const uint8_t* utf8_array, |
46 intptr_t array_len, | 93 intptr_t array_len, |
47 int32_t* ch); | 94 int32_t* ch); |
48 | 95 |
49 static bool DecodeToLatin1(const uint8_t* utf8_array, | 96 static bool DecodeToLatin1(const uint8_t* utf8_array, |
50 intptr_t array_len, | 97 intptr_t array_len, |
51 uint8_t* dst, | 98 uint8_t* dst, |
52 intptr_t len); | 99 intptr_t len); |
53 static bool DecodeToUTF16(const uint8_t* utf8_array, | 100 static bool DecodeToUTF16(const uint8_t* utf8_array, |
54 intptr_t array_len, | 101 intptr_t array_len, |
55 uint16_t* dst, | 102 uint16_t* dst, |
56 intptr_t len); | 103 intptr_t len); |
57 static bool DecodeToUTF32(const uint8_t* utf8_array, | 104 static bool DecodeToUTF32(const uint8_t* utf8_array, |
58 intptr_t array_len, | 105 intptr_t array_len, |
59 uint32_t* dst, | 106 int32_t* dst, |
60 intptr_t len); | 107 intptr_t len); |
61 static bool DecodeCStringToUTF32(const char* str, | 108 static bool DecodeCStringToUTF32(const char* str, |
62 uint32_t* dst, | 109 int32_t* dst, |
63 intptr_t len) { | 110 intptr_t len) { |
64 ASSERT(str != NULL); | 111 ASSERT(str != NULL); |
65 intptr_t array_len = strlen(str); | 112 intptr_t array_len = strlen(str); |
66 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str); | 113 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str); |
67 return DecodeToUTF32(utf8_array, array_len, dst, len); | 114 return DecodeToUTF32(utf8_array, array_len, dst, len); |
68 } | 115 } |
69 }; | 116 }; |
70 | 117 |
71 | 118 |
72 class CaseMapping : AllStatic { | 119 class CaseMapping : AllStatic { |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
128 // Data for small code points with one mapping | 175 // Data for small code points with one mapping |
129 static const int16_t stage2_[]; | 176 static const int16_t stage2_[]; |
130 | 177 |
131 // Data for large code points or code points with both mappings. | 178 // Data for large code points or code points with both mappings. |
132 static const int32_t stage2_exception_[][2]; | 179 static const int32_t stage2_exception_[][2]; |
133 }; | 180 }; |
134 | 181 |
135 } // namespace dart | 182 } // namespace dart |
136 | 183 |
137 #endif // VM_UNICODE_H_ | 184 #endif // VM_UNICODE_H_ |
OLD | NEW |