| Index: src/unicode.h
|
| diff --git a/src/unicode.h b/src/unicode.h
|
| index 6ba61d0e17b2a0b6ff7702c422c38ddf9276318a..bc57678fac9f2c87a760f6bd86fd6ca5240c33de 100644
|
| --- a/src/unicode.h
|
| +++ b/src/unicode.h
|
| @@ -46,6 +46,12 @@ typedef unsigned char byte;
|
| */
|
| const int kMaxMappingSize = 4;
|
|
|
| +/**
|
| + * The unicode replacement character, used to signal invalid unicode sequences
|
| + * (e.g. an orphan surrogate) when converting to a UTF encoding.
|
| + */
|
| +const int kReplacementCharacter = 0xFFFD;
|
| +
|
| template <class T, int size = 256>
|
| class Predicate {
|
| public:
|
| @@ -102,6 +108,9 @@ class UnicodeData {
|
|
|
| class Utf16 {
|
| public:
|
| + static inline bool IsSurrogatePair(int lead, int trail) {
|
| + return IsLeadSurrogate(lead) && IsTrailSurrogate(trail);
|
| + }
|
| static inline bool IsLeadSurrogate(int code) {
|
| if (code == kNoPreviousCharacter) return false;
|
| return (code & 0xfc00) == 0xd800;
|
| @@ -146,8 +155,7 @@ class Utf8 {
|
| public:
|
| static inline uchar Length(uchar chr, int previous);
|
| static inline unsigned EncodeOneByte(char* out, uint8_t c);
|
| - static inline unsigned Encode(
|
| - char* out, uchar c, int previous);
|
| + static inline unsigned Encode(char* str, uchar c, bool allow_invalid);
|
| static uchar CalculateValue(const byte* str,
|
| unsigned length,
|
| unsigned* cursor);
|
|
|