| Index: third_party/WebKit/Source/wtf/text/UTF8.cpp
|
| diff --git a/third_party/WebKit/Source/wtf/text/UTF8.cpp b/third_party/WebKit/Source/wtf/text/UTF8.cpp
|
| index 0beca10497c8e8411d13883305a172c28b264b45..4550b77f68e9a498b36a85cbae722fe4997afdfc 100644
|
| --- a/third_party/WebKit/Source/wtf/text/UTF8.cpp
|
| +++ b/third_party/WebKit/Source/wtf/text/UTF8.cpp
|
| @@ -185,7 +185,7 @@ ConversionResult convertUTF16ToUTF8(
|
| // This must be called with the length pre-determined by the first byte.
|
| // If presented with a length > 4, this returns false. The Unicode
|
| // definition of UTF-8 goes up to 4-byte sequences.
|
| -static bool isLegalUTF8(const unsigned char* source, int length)
|
| +static bool isLegalUTF8(const unsigned char* source, int length, bool strict = true)
|
| {
|
| unsigned char a;
|
| const unsigned char* srcptr = source + length;
|
| @@ -210,7 +210,8 @@ static bool isLegalUTF8(const unsigned char* source, int length)
|
| return false;
|
| break;
|
| case 0xED:
|
| - if (a > 0x9F)
|
| + // Surrogate values are mapped to [EDA080-EDAFBF] and [EDB080-EDBFBF] in lenient mode.
|
| + if (strict && a > 0x9F)
|
| return false;
|
| break;
|
| case 0xF0:
|
| @@ -283,7 +284,7 @@ ConversionResult convertUTF8ToUTF16(
|
| break;
|
| }
|
| // Do this check whether lenient or strict
|
| - if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8SequenceLength)) {
|
| + if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8SequenceLength, strict)) {
|
| result = sourceIllegal;
|
| break;
|
| }
|
| @@ -304,8 +305,8 @@ ConversionResult convertUTF8ToUTF16(
|
| result = sourceIllegal;
|
| break;
|
| }
|
| - *target++ = replacementCharacter;
|
| - orAllData |= replacementCharacter;
|
| + *target++ = static_cast<UChar>(character);
|
| + orAllData |= character;
|
| } else {
|
| *target++ = static_cast<UChar>(character); // normal case
|
| orAllData |= character;
|
|
|