| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef RUNTIME_VM_UNICODE_H_ | 5 #ifndef RUNTIME_VM_UNICODE_H_ |
| 6 #define RUNTIME_VM_UNICODE_H_ | 6 #define RUNTIME_VM_UNICODE_H_ |
| 7 | 7 |
| 8 #include "vm/allocation.h" | 8 #include "vm/allocation.h" |
| 9 #include "vm/globals.h" | 9 #include "vm/globals.h" |
| 10 | 10 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 31 // Returns true if the code point value is above Plane 17. | 31 // Returns true if the code point value is above Plane 17. |
| 32 static bool IsOutOfRange(intptr_t code_point) { | 32 static bool IsOutOfRange(intptr_t code_point) { |
| 33 return (code_point < 0) || (code_point > kMaxCodePoint); | 33 return (code_point < 0) || (code_point > kMaxCodePoint); |
| 34 } | 34 } |
| 35 }; | 35 }; |
| 36 | 36 |
| 37 | 37 |
| 38 class Utf8 : AllStatic { | 38 class Utf8 : AllStatic { |
| 39 public: | 39 public: |
| 40 enum Type { | 40 enum Type { |
| 41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF]. | 41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF]. |
| 42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF]. | 42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF]. |
| 43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF]. | 43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF]. |
| 44 }; | 44 }; |
| 45 | 45 |
| 46 // Returns the most restricted coding form in which the sequence of utf8 | 46 // Returns the most restricted coding form in which the sequence of utf8 |
| 47 // characters in 'utf8_array' can be represented in, and the number of | 47 // characters in 'utf8_array' can be represented in, and the number of |
| 48 // code units needed in that form. | 48 // code units needed in that form. |
| 49 static intptr_t CodeUnitCount(const uint8_t* utf8_array, | 49 static intptr_t CodeUnitCount(const uint8_t* utf8_array, |
| 50 intptr_t array_len, | 50 intptr_t array_len, |
| 51 Type* type); | 51 Type* type); |
| 52 | 52 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 68 uint8_t* dst, | 68 uint8_t* dst, |
| 69 intptr_t len); | 69 intptr_t len); |
| 70 static bool DecodeToUTF16(const uint8_t* utf8_array, | 70 static bool DecodeToUTF16(const uint8_t* utf8_array, |
| 71 intptr_t array_len, | 71 intptr_t array_len, |
| 72 uint16_t* dst, | 72 uint16_t* dst, |
| 73 intptr_t len); | 73 intptr_t len); |
| 74 static bool DecodeToUTF32(const uint8_t* utf8_array, | 74 static bool DecodeToUTF32(const uint8_t* utf8_array, |
| 75 intptr_t array_len, | 75 intptr_t array_len, |
| 76 int32_t* dst, | 76 int32_t* dst, |
| 77 intptr_t len); | 77 intptr_t len); |
| 78 static bool DecodeCStringToUTF32(const char* str, | 78 static bool DecodeCStringToUTF32(const char* str, int32_t* dst, intptr_t len); |
| 79 int32_t* dst, | |
| 80 intptr_t len); | |
| 81 | 79 |
| 82 static const int32_t kMaxOneByteChar = 0x7F; | 80 static const int32_t kMaxOneByteChar = 0x7F; |
| 83 static const int32_t kMaxTwoByteChar = 0x7FF; | 81 static const int32_t kMaxTwoByteChar = 0x7FF; |
| 84 static const int32_t kMaxThreeByteChar = 0xFFFF; | 82 static const int32_t kMaxThreeByteChar = 0xFFFF; |
| 85 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint; | 83 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint; |
| 86 | 84 |
| 87 private: | 85 private: |
| 88 static bool IsTrailByte(uint8_t code_unit) { | 86 static bool IsTrailByte(uint8_t code_unit) { |
| 89 return (code_unit & 0xC0) == 0x80; | 87 return (code_unit & 0xC0) == 0x80; |
| 90 } | 88 } |
| 91 | 89 |
| 92 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) { | 90 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) { |
| 93 return code_point < kOverlongMinimum[num_code_units]; | 91 return code_point < kOverlongMinimum[num_code_units]; |
| 94 } | 92 } |
| 95 | 93 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 110 | 108 |
| 111 | 109 |
| 112 class Utf16 : AllStatic { | 110 class Utf16 : AllStatic { |
| 113 public: | 111 public: |
| 114 // Returns the length of the code point in UTF-16 code units. | 112 // Returns the length of the code point in UTF-16 code units. |
| 115 static intptr_t Length(int32_t ch) { | 113 static intptr_t Length(int32_t ch) { |
| 116 return (ch <= Utf16::kMaxCodeUnit) ? 1 : 2; | 114 return (ch <= Utf16::kMaxCodeUnit) ? 1 : 2; |
| 117 } | 115 } |
| 118 | 116 |
| 119 // Returns true if ch is a lead or trail surrogate. | 117 // Returns true if ch is a lead or trail surrogate. |
| 120 static bool IsSurrogate(uint32_t ch) { | 118 static bool IsSurrogate(uint32_t ch) { return (ch & 0xFFFFF800) == 0xD800; } |
| 121 return (ch & 0xFFFFF800) == 0xD800; | |
| 122 } | |
| 123 | 119 |
| 124 // Returns true if ch is a lead surrogate. | 120 // Returns true if ch is a lead surrogate. |
| 125 static bool IsLeadSurrogate(uint32_t ch) { | 121 static bool IsLeadSurrogate(uint32_t ch) { |
| 126 return (ch & 0xFFFFFC00) == 0xD800; | 122 return (ch & 0xFFFFFC00) == 0xD800; |
| 127 } | 123 } |
| 128 | 124 |
| 129 // Returns true if ch is a low surrogate. | 125 // Returns true if ch is a low surrogate. |
| 130 static bool IsTrailSurrogate(uint32_t ch) { | 126 static bool IsTrailSurrogate(uint32_t ch) { |
| 131 return (ch & 0xFFFFFC00) == 0xDC00; | 127 return (ch & 0xFFFFFC00) == 0xDC00; |
| 132 } | 128 } |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 222 // Data for small code points with one mapping | 218 // Data for small code points with one mapping |
| 223 static const int16_t stage2_[]; | 219 static const int16_t stage2_[]; |
| 224 | 220 |
| 225 // Data for large code points or code points with both mappings. | 221 // Data for large code points or code points with both mappings. |
| 226 static const int32_t stage2_exception_[][2]; | 222 static const int32_t stage2_exception_[][2]; |
| 227 }; | 223 }; |
| 228 | 224 |
| 229 } // namespace dart | 225 } // namespace dart |
| 230 | 226 |
| 231 #endif // RUNTIME_VM_UNICODE_H_ | 227 #endif // RUNTIME_VM_UNICODE_H_ |
| OLD | NEW |