Chromium Code Reviews| Index: src/unicode.h |
| diff --git a/src/unicode.h b/src/unicode.h |
| index 6ba61d0e17b2a0b6ff7702c422c38ddf9276318a..1ecfeda2b8b770610572ba08e75066b139cf6d03 100644 |
| --- a/src/unicode.h |
| +++ b/src/unicode.h |
| @@ -102,6 +102,9 @@ class UnicodeData { |
| class Utf16 { |
| public: |
| + static inline bool IsSurrogatePair(int lead, int trail) { |
| + return IsLeadSurrogate(lead) && IsTrailSurrogate(trail); |
| + } |
| static inline bool IsLeadSurrogate(int code) { |
| if (code == kNoPreviousCharacter) return false; |
| return (code & 0xfc00) == 0xd800; |
| @@ -146,11 +149,16 @@ class Utf8 { |
| public: |
| static inline uchar Length(uchar chr, int previous); |
| static inline unsigned EncodeOneByte(char* out, uint8_t c); |
| - static inline unsigned Encode( |
| - char* out, uchar c, int previous); |
| + static inline unsigned Encode(char* out, |
| + uchar c, |
| + int previous, |
| + bool allow_invalid); |
|
dcarney
2014/01/13 09:19:56
needs default value
haimuiba
2014/01/15 10:52:34
Done.
|
| static uchar CalculateValue(const byte* str, |
| unsigned length, |
| unsigned* cursor); |
| + |
| + // The unicode replacement character, used to signal invalid unicode |
| + // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. |
| static const uchar kBadChar = 0xFFFD; |
| static const unsigned kMaxEncodedSize = 4; |
| static const unsigned kMaxOneByteChar = 0x7f; |
| @@ -162,6 +170,9 @@ class Utf8 { |
| // that match are coded as a 4 byte UTF-8 sequence. |
| static const unsigned kBytesSavedByCombiningSurrogates = 2; |
| static const unsigned kSizeOfUnmatchedSurrogate = 3; |
| + // The maximum size a single UTF-16 code unit may take up when encoded as |
| + // UTF-8. |
| + static const unsigned kMax16BitCodeUnitSize = 3; |
| static inline uchar ValueOf(const byte* str, |
| unsigned length, |
| unsigned* cursor); |