Index: third_party/WebKit/Source/wtf/text/UTF8.cpp |
diff --git a/third_party/WebKit/Source/wtf/text/UTF8.cpp b/third_party/WebKit/Source/wtf/text/UTF8.cpp |
index 0beca10497c8e8411d13883305a172c28b264b45..4550b77f68e9a498b36a85cbae722fe4997afdfc 100644 |
--- a/third_party/WebKit/Source/wtf/text/UTF8.cpp |
+++ b/third_party/WebKit/Source/wtf/text/UTF8.cpp |
@@ -185,7 +185,7 @@ ConversionResult convertUTF16ToUTF8( |
// This must be called with the length pre-determined by the first byte. |
// If presented with a length > 4, this returns false. The Unicode |
// definition of UTF-8 goes up to 4-byte sequences. |
-static bool isLegalUTF8(const unsigned char* source, int length) |
+static bool isLegalUTF8(const unsigned char* source, int length, bool strict = true) |
{ |
unsigned char a; |
const unsigned char* srcptr = source + length; |
@@ -210,7 +210,8 @@ static bool isLegalUTF8(const unsigned char* source, int length) |
return false; |
break; |
case 0xED: |
- if (a > 0x9F) |
+ // Surrogate values are mapped to [EDA080-EDAFBF] and [EDB080-EDBFBF] in lenient mode. |
+ if (strict && a > 0x9F) |
return false; |
break; |
case 0xF0: |
@@ -283,7 +284,7 @@ ConversionResult convertUTF8ToUTF16( |
break; |
} |
// Do this check whether lenient or strict |
- if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8SequenceLength)) { |
+ if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8SequenceLength, strict)) { |
result = sourceIllegal; |
break; |
} |
@@ -304,8 +305,8 @@ ConversionResult convertUTF8ToUTF16( |
result = sourceIllegal; |
break; |
} |
- *target++ = replacementCharacter; |
- orAllData |= replacementCharacter; |
+ *target++ = static_cast<UChar>(character); |
+ orAllData |= character; |
} else { |
*target++ = static_cast<UChar>(character); // normal case |
orAllData |= character; |