| Index: src/unicode.h
|
| diff --git a/src/unicode.h b/src/unicode.h
|
| index 6ba61d0e17b2a0b6ff7702c422c38ddf9276318a..bb5506d38e2531b0f60fd73f8676c3ad77277ab7 100644
|
| --- a/src/unicode.h
|
| +++ b/src/unicode.h
|
| @@ -102,6 +102,9 @@ class UnicodeData {
|
|
|
| class Utf16 {
|
| public:
|
| + static inline bool IsSurrogatePair(int lead, int trail) {
|
| + return IsLeadSurrogate(lead) && IsTrailSurrogate(trail);
|
| + }
|
| static inline bool IsLeadSurrogate(int code) {
|
| if (code == kNoPreviousCharacter) return false;
|
| return (code & 0xfc00) == 0xd800;
|
| @@ -146,11 +149,16 @@ class Utf8 {
|
| public:
|
| static inline uchar Length(uchar chr, int previous);
|
| static inline unsigned EncodeOneByte(char* out, uint8_t c);
|
| - static inline unsigned Encode(
|
| - char* out, uchar c, int previous);
|
| + static inline unsigned Encode(char* out,
|
| + uchar c,
|
| + int previous,
|
| + bool replace_invalid = false);
|
| static uchar CalculateValue(const byte* str,
|
| unsigned length,
|
| unsigned* cursor);
|
| +
|
| + // The unicode replacement character, used to signal invalid unicode
|
| + // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding.
|
| static const uchar kBadChar = 0xFFFD;
|
| static const unsigned kMaxEncodedSize = 4;
|
| static const unsigned kMaxOneByteChar = 0x7f;
|
| @@ -162,6 +170,9 @@ class Utf8 {
|
| // that match are coded as a 4 byte UTF-8 sequence.
|
| static const unsigned kBytesSavedByCombiningSurrogates = 2;
|
| static const unsigned kSizeOfUnmatchedSurrogate = 3;
|
| + // The maximum size a single UTF-16 code unit may take up when encoded as
|
| + // UTF-8.
|
| + static const unsigned kMax16BitCodeUnitSize = 3;
|
| static inline uchar ValueOf(const byte* str,
|
| unsigned length,
|
| unsigned* cursor);
|
|
|