| Index: src/unicode-inl.h
|
| diff --git a/src/unicode-inl.h b/src/unicode-inl.h
|
| index f861f9f2d47449945d62a6fbc8044abbcd0b2a2b..5b1e3d8a8c1e87c3be7c551be41bbc82a01eab96 100644
|
| --- a/src/unicode-inl.h
|
| +++ b/src/unicode-inl.h
|
| @@ -107,8 +107,10 @@ unsigned Utf8::EncodeOneByte(char* str, uint8_t c) {
|
| return 2;
|
| }
|
|
|
| -
|
| -unsigned Utf8::Encode(char* str, uchar c, int previous) {
|
| +// Encode encodes the UTF-16 code units c and previous into the given str
|
| +// buffer. Unless allow_invalid is set to true, surrogate code points will be
|
| +// replaced with kReplacementCharacter.
|
| +unsigned Utf8::Encode(char* str, uchar c, int previous, bool allow_invalid) {
|
| static const int kMask = ~(1 << 6);
|
| if (c <= kMaxOneByteChar) {
|
| str[0] = c;
|
| @@ -118,12 +120,16 @@ unsigned Utf8::Encode(char* str, uchar c, int previous) {
|
| str[1] = 0x80 | (c & kMask);
|
| return 2;
|
| } else if (c <= kMaxThreeByteChar) {
|
| - if (Utf16::IsTrailSurrogate(c) &&
|
| - Utf16::IsLeadSurrogate(previous)) {
|
| + if (Utf16::IsSurrogatePair(previous, c)) {
|
| const int kUnmatchedSize = kSizeOfUnmatchedSurrogate;
|
| return Encode(str - kUnmatchedSize,
|
| Utf16::CombineSurrogatePair(previous, c),
|
| - Utf16::kNoPreviousCharacter) - kUnmatchedSize;
|
| + Utf16::kNoPreviousCharacter,
|
| + allow_invalid) - kUnmatchedSize;
|
| + } else if (!allow_invalid &&
|
| + (Utf16::IsLeadSurrogate(c) ||
|
| + Utf16::IsTrailSurrogate(c))) {
|
| + c = kReplacementCharacter;
|
| }
|
| str[0] = 0xE0 | (c >> 12);
|
| str[1] = 0x80 | ((c >> 6) & kMask);
|
|
|