Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef VM_UNICODE_H_ | 5 #ifndef VM_UNICODE_H_ |
| 6 #define VM_UNICODE_H_ | 6 #define VM_UNICODE_H_ |
| 7 | 7 |
| 8 #include "vm/allocation.h" | 8 #include "vm/allocation.h" |
| 9 #include "vm/globals.h" | 9 #include "vm/globals.h" |
| 10 | 10 |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 36 | 36 |
| 37 | 37 |
| 38 class Utf8 : AllStatic { | 38 class Utf8 : AllStatic { |
| 39 public: | 39 public: |
| 40 enum Type { | 40 enum Type { |
| 41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF]. | 41 kLatin1 = 0, // Latin-1 code point [U+0000, U+00FF]. |
| 42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF]. | 42 kBMP, // Basic Multilingual Plane code point [U+0000, U+FFFF]. |
| 43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF]. | 43 kSupplementary, // Supplementary code point [U+010000, U+10FFFF]. |
| 44 }; | 44 }; |
| 45 | 45 |
| 46 static intptr_t CodePointCount(const uint8_t* utf8_array, | 46 // Returns a count of the number of UTF-16 code units needed to represent the |
|
cshapiro
2012/11/30 21:32:26
This is not strictly true, right?
This returns th
siva
2012/11/30 21:47:19
Changed the comment to:
Returns the most restricte
| |
| 47 intptr_t array_len, | 47 // sequence of utf8 characters in 'utf8_array'. |
| 48 Type* type); | 48 static intptr_t CodeUnitCount(const uint8_t* utf8_array, |
| 49 intptr_t array_len, | |
| 50 Type* type); | |
| 49 | 51 |
| 50 // Returns true if 'utf8_array' is a valid UTF-8 string. | 52 // Returns true if 'utf8_array' is a valid UTF-8 string. |
| 51 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); | 53 static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); |
| 52 | 54 |
| 53 static intptr_t Length(int32_t ch); | 55 static intptr_t Length(int32_t ch); |
| 54 static intptr_t Length(const String& str); | 56 static intptr_t Length(const String& str); |
| 55 | 57 |
| 56 static intptr_t Encode(int32_t ch, char* dst); | 58 static intptr_t Encode(int32_t ch, char* dst); |
| 57 static intptr_t Encode(const String& src, char* dst, intptr_t len); | 59 static intptr_t Encode(const String& src, char* dst, intptr_t len); |
| 58 | 60 |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 75 static bool DecodeCStringToUTF32(const char* str, | 77 static bool DecodeCStringToUTF32(const char* str, |
| 76 int32_t* dst, | 78 int32_t* dst, |
| 77 intptr_t len); | 79 intptr_t len); |
| 78 | 80 |
| 79 private: | 81 private: |
| 80 static const int32_t kMaxOneByteChar = 0x7F; | 82 static const int32_t kMaxOneByteChar = 0x7F; |
| 81 static const int32_t kMaxTwoByteChar = 0x7FF; | 83 static const int32_t kMaxTwoByteChar = 0x7FF; |
| 82 static const int32_t kMaxThreeByteChar = 0xFFFF; | 84 static const int32_t kMaxThreeByteChar = 0xFFFF; |
| 83 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint; | 85 static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint; |
| 84 | 86 |
| 85 static bool IsTrailByte(uint8_t code_unit) { | 87 static bool IsTrailByte(uint8_t utf8_byte) { |
|
cshapiro
2012/11/30 21:32:26
the utf-8 spec removed all mention of "byte" and r
siva
2012/11/30 21:47:19
Done.
| |
| 86 return (code_unit & 0xc0) == 0x80; | 88 return (utf8_byte & 0xC0) == 0x80; |
| 87 } | 89 } |
| 88 | 90 |
| 89 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) { | 91 static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) { |
| 90 return code_point < kOverlongMinimum[num_code_units]; | 92 return code_point < kOverlongMinimum[num_code_units]; |
| 91 } | 93 } |
| 92 | 94 |
| 93 static bool IsLatin1SequenceStart(uint8_t code_unit) { | 95 static bool IsLatin1SequenceStart(uint8_t utf8_byte) { |
| 94 // Check is codepoint is <= U+00FF | 96 // Check if utf8 sequence is start of a codepoint <= U+00FF |
| 95 return (code_unit <= Utf8::kMaxOneByteChar); | 97 return (utf8_byte <= 0xC3); |
| 96 } | 98 } |
| 97 | 99 |
| 98 static bool IsSupplementarySequenceStart(uint8_t code_unit) { | 100 static bool IsSupplementarySequenceStart(uint8_t utf8_byte) { |
| 99 // Check is codepoint is >= U+10000. | 101 // Check if utf8 sequence is start of a codepoint >= U+10000. |
| 100 return (code_unit >= 0xF0); | 102 return (utf8_byte >= 0xF0); |
| 101 } | 103 } |
| 102 | 104 |
| 103 static const int8_t kTrailBytes[]; | 105 static const int8_t kTrailBytes[]; |
| 104 static const uint32_t kMagicBits[]; | 106 static const uint32_t kMagicBits[]; |
| 105 static const uint32_t kOverlongMinimum[]; | 107 static const uint32_t kOverlongMinimum[]; |
| 106 }; | 108 }; |
| 107 | 109 |
| 108 | 110 |
| 109 class Utf16 : AllStatic { | 111 class Utf16 : AllStatic { |
| 110 public: | 112 public: |
| (...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 219 // Data for small code points with one mapping | 221 // Data for small code points with one mapping |
| 220 static const int16_t stage2_[]; | 222 static const int16_t stage2_[]; |
| 221 | 223 |
| 222 // Data for large code points or code points with both mappings. | 224 // Data for large code points or code points with both mappings. |
| 223 static const int32_t stage2_exception_[][2]; | 225 static const int32_t stage2_exception_[][2]; |
| 224 }; | 226 }; |
| 225 | 227 |
| 226 } // namespace dart | 228 } // namespace dart |
| 227 | 229 |
| 228 #endif // VM_UNICODE_H_ | 230 #endif // VM_UNICODE_H_ |
| OLD | NEW |