Chromium Code Reviews| Index: src/unicode.h |
| diff --git a/src/unicode.h b/src/unicode.h |
| index 6ba61d0e17b2a0b6ff7702c422c38ddf9276318a..f1bc2c77c1e9d9ea720c4076bfc5405e61b66e65 100644 |
| --- a/src/unicode.h |
| +++ b/src/unicode.h |
| @@ -46,6 +46,12 @@ typedef unsigned char byte; |
| */ |
| const int kMaxMappingSize = 4; |
| +/** |
| + * The unicode replacement character, used to signal invalid unicode sequences |
| + * (e.g. an orphan surrogate) when converting to a UTF encoding. |
| + */ |
| +const int kReplacementCharacter = 0xFFFD; |
|
dcarney
2014/01/04 15:56:45
should probably be in the Utf8 class
haimuiba
2014/01/06 05:40:18
I don't feel strongly about it, but technically it
|
| + |
| template <class T, int size = 256> |
| class Predicate { |
| public: |
| @@ -102,6 +108,9 @@ class UnicodeData { |
| class Utf16 { |
| public: |
| + static inline bool IsSurrogatePair(int lead, int trail) { |
| + return IsLeadSurrogate(lead) && IsTrailSurrogate(trail); |
| + } |
| static inline bool IsLeadSurrogate(int code) { |
| if (code == kNoPreviousCharacter) return false; |
| return (code & 0xfc00) == 0xd800; |
| @@ -146,8 +155,7 @@ class Utf8 { |
| public: |
| static inline uchar Length(uchar chr, int previous); |
| static inline unsigned EncodeOneByte(char* out, uint8_t c); |
| - static inline unsigned Encode( |
| - char* out, uchar c, int previous); |
| + static inline unsigned Encode(char* str, uchar c, bool replace_surrogates); |
|
dcarney
2014/01/04 15:56:45
this is not backwards compatible. All existing ca
haimuiba
2014/01/06 05:40:18
Is there a good reason to keep compatibility, rath
|
| static uchar CalculateValue(const byte* str, |
| unsigned length, |
| unsigned* cursor); |