OLD | NEW |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #ifndef VM_UNICODE_H_ | 5 #ifndef VM_UNICODE_H_ |
6 #define VM_UNICODE_H_ | 6 #define VM_UNICODE_H_ |
7 | 7 |
8 #include "vm/allocation.h" | 8 #include "vm/allocation.h" |
9 #include "vm/globals.h" | 9 #include "vm/globals.h" |
10 | 10 |
11 namespace dart { | 11 namespace dart { |
12 | 12 |
13 class String; | 13 class String; |
14 | 14 |
15 class Utf8 : AllStatic { | 15 class Utf8 : AllStatic { |
16 public: | 16 public: |
| 17 enum Type { |
| 18 kAscii = 0, // ASCII character set. |
| 19 kBMP, // Basic Multilingual Plane. |
| 20 kSMP, // Supplementary Multilingual Plane. |
| 21 }; |
| 22 |
17 static const intptr_t kMaxOneByteChar = 0x7F; | 23 static const intptr_t kMaxOneByteChar = 0x7F; |
18 static const intptr_t kMaxTwoByteChar = 0x7FF; | 24 static const intptr_t kMaxTwoByteChar = 0x7FF; |
19 static const intptr_t kMaxThreeByteChar = 0xFFFF; | 25 static const intptr_t kMaxThreeByteChar = 0xFFFF; |
20 static const intptr_t kMaxFourByteChar = 0x10FFFF; | 26 static const intptr_t kMaxFourByteChar = 0x10FFFF; |
| 27 static const intptr_t kMaxBmpCodepoint = 0xffff; |
| 28 static const int32_t kLeadOffset = (0xD800 - (0x10000 >> 10)); |
| 29 static const int32_t kSurrogateOffset = (0x10000 - (0xD800 << 10) - 0xDC00); |
21 | 30 |
22 static intptr_t CodePointCount(const char* str, intptr_t* width); | 31 static void ConvertUTF32ToUTF16(int32_t codepoint, uint16_t* dst); |
| 32 static intptr_t CodePointCount(const uint8_t* utf8_array, |
| 33 intptr_t array_len, |
| 34 Type* type); |
23 | 35 |
24 static bool IsValid(const char* src); | 36 // Returns true if 'utf8_array' is a valid UTF-8 string. |
| 37 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); |
25 | 38 |
26 static intptr_t Length(int32_t ch); | 39 static intptr_t Length(int32_t ch); |
27 static intptr_t Length(const String& str); | 40 static intptr_t Length(const String& str); |
28 | 41 |
29 static intptr_t Encode(int32_t ch, char* dst); | 42 static intptr_t Encode(int32_t ch, char* dst); |
30 static intptr_t Encode(const String& src, char* dst, intptr_t len); | 43 static intptr_t Encode(const String& src, char* dst, intptr_t len); |
31 | 44 |
32 static intptr_t Decode(const char*, int32_t* ch); | 45 static intptr_t Decode(const uint8_t* utf8_array, |
33 static bool Decode(const char* src, uint8_t* dst, intptr_t len); | 46 intptr_t array_len, |
34 static bool Decode(const char* src, uint16_t* dst, intptr_t len); | 47 int32_t* ch); |
35 static bool Decode(const char* src, uint32_t* dst, intptr_t len); | 48 |
| 49 static bool DecodeToAscii(const uint8_t* utf8_array, |
| 50 intptr_t array_len, |
| 51 uint8_t* dst, |
| 52 intptr_t len); |
| 53 static bool DecodeToUTF16(const uint8_t* utf8_array, |
| 54 intptr_t array_len, |
| 55 uint16_t* dst, |
| 56 intptr_t len); |
| 57 static bool DecodeToUTF32(const uint8_t* utf8_array, |
| 58 intptr_t array_len, |
| 59 uint32_t* dst, |
| 60 intptr_t len); |
| 61 static bool DecodeCStringToUTF32(const char* str, |
| 62 uint32_t* dst, |
| 63 intptr_t len) { |
| 64 ASSERT(str != NULL); |
| 65 intptr_t array_len = strlen(str); |
| 66 const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(str); |
| 67 return DecodeToUTF32(utf8_array, array_len, dst, len); |
| 68 } |
36 }; | 69 }; |
37 | 70 |
38 | 71 |
39 class CaseMapping : AllStatic { | 72 class CaseMapping : AllStatic { |
40 public: | 73 public: |
41 // Maps a code point to uppercase. | 74 // Maps a code point to uppercase. |
42 static int32_t ToUpper(int32_t code_point) { | 75 static int32_t ToUpper(int32_t code_point) { |
43 return Convert(code_point, kUppercase); | 76 return Convert(code_point, kUppercase); |
44 } | 77 } |
45 | 78 |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
95 // Data for small code points with one mapping | 128 // Data for small code points with one mapping |
96 static const int16_t stage2_[]; | 129 static const int16_t stage2_[]; |
97 | 130 |
98 // Data for large code points or code points with both mappings. | 131 // Data for large code points or code points with both mappings. |
99 static const int32_t stage2_exception_[][2]; | 132 static const int32_t stage2_exception_[][2]; |
100 }; | 133 }; |
101 | 134 |
102 } // namespace dart | 135 } // namespace dart |
103 | 136 |
104 #endif // VM_UNICODE_H_ | 137 #endif // VM_UNICODE_H_ |
OLD | NEW |