Index: src/unicode.h |
diff --git a/src/unicode.h b/src/unicode.h |
index 6ba61d0e17b2a0b6ff7702c422c38ddf9276318a..95a1740ebb2d79695056eb3d9bea122f319afc85 100644 |
--- a/src/unicode.h |
+++ b/src/unicode.h |
@@ -46,6 +46,12 @@ typedef unsigned char byte; |
*/ |
const int kMaxMappingSize = 4; |
+/** |
+ * The unicode replacement character, used to signal invalid unicode sequences |
+ * (e.g. an orphan surrogate) when converting to a UTF encoding. |
dcarney
2014/01/10 16:49:55
typo - UTF-8
haimuiba
2014/01/13 07:48:21
Done.
|
+ */ |
+const int kReplacementCharacter = 0xFFFD; |
dcarney
2014/01/10 16:49:55
this should be in Utf8, but see below
haimuiba
2014/01/13 07:48:21
Done.
|
+ |
template <class T, int size = 256> |
class Predicate { |
public: |
@@ -102,6 +108,9 @@ class UnicodeData { |
class Utf16 { |
public: |
+ static inline bool IsSurrogatePair(int lead, int trail) { |
+ return IsLeadSurrogate(lead) && IsTrailSurrogate(trail); |
+ } |
static inline bool IsLeadSurrogate(int code) { |
if (code == kNoPreviousCharacter) return false; |
return (code & 0xfc00) == 0xd800; |
@@ -146,8 +155,10 @@ class Utf8 { |
public: |
static inline uchar Length(uchar chr, int previous); |
static inline unsigned EncodeOneByte(char* out, uint8_t c); |
- static inline unsigned Encode( |
- char* out, uchar c, int previous); |
+ static inline unsigned Encode(char* out, |
+ uchar c, |
+ int previous, |
+ bool allow_invalid); |
dcarney
2014/01/10 16:49:55
this either needs to be an enum to avoid passing t
haimuiba
2014/01/13 07:48:21
Done by defaulting to true. I don't understand the
|
static uchar CalculateValue(const byte* str, |
unsigned length, |
unsigned* cursor); |