Chromium Code Reviews| Index: vm/unicode.h |
| =================================================================== |
| --- vm/unicode.h (revision 15591) |
| +++ vm/unicode.h (working copy) |
| @@ -43,9 +43,11 @@ |
| kSupplementary, // Supplementary code point [U+010000, U+10FFFF]. |
| }; |
| - static intptr_t CodePointCount(const uint8_t* utf8_array, |
| - intptr_t array_len, |
| - Type* type); |
| + // Returns a count of the number of UTF-16 code units needed to represent the |
|
cshapiro
2012/11/30 21:32:26
This is not strictly true, right?
This returns th
siva
2012/11/30 21:47:19
Changed the comment to:
Returns the most restricte
|
| + // sequence of utf8 characters in 'utf8_array'. |
| + static intptr_t CodeUnitCount(const uint8_t* utf8_array, |
| + intptr_t array_len, |
| + Type* type); |
| // Returns true if 'utf8_array' is a valid UTF-8 string. |
| static bool IsValid(const uint8_t* utf8_array, intptr_t array_len); |
| @@ -82,22 +84,22 @@ |
| static const int32_t kMaxThreeByteChar = 0xFFFF; |
| static const int32_t kMaxFourByteChar = Utf::kMaxCodePoint; |
| - static bool IsTrailByte(uint8_t code_unit) { |
| - return (code_unit & 0xc0) == 0x80; |
| + static bool IsTrailByte(uint8_t utf8_byte) { |
|
cshapiro
2012/11/30 21:32:26
the utf-8 spec removed all mention of "byte" and r
siva
2012/11/30 21:47:19
Done.
|
| + return (utf8_byte & 0xC0) == 0x80; |
| } |
| static bool IsNonShortestForm(uint32_t code_point, size_t num_code_units) { |
| return code_point < kOverlongMinimum[num_code_units]; |
| } |
| - static bool IsLatin1SequenceStart(uint8_t code_unit) { |
| - // Check is codepoint is <= U+00FF |
| - return (code_unit <= Utf8::kMaxOneByteChar); |
| + static bool IsLatin1SequenceStart(uint8_t utf8_byte) { |
| + // Check if utf8 sequence is start of a codepoint <= U+00FF |
| + return (utf8_byte <= 0xC3); |
| } |
| - static bool IsSupplementarySequenceStart(uint8_t code_unit) { |
| - // Check is codepoint is >= U+10000. |
| - return (code_unit >= 0xF0); |
| + static bool IsSupplementarySequenceStart(uint8_t utf8_byte) { |
| + // Check if utf8 sequence is start of a codepoint >= U+10000. |
| + return (utf8_byte >= 0xF0); |
| } |
| static const int8_t kTrailBytes[]; |