Index: third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp |
diff --git a/third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp b/third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp |
index a9497c74be0ff92a8233ad4cda8bcdd7f5939b5a..f0d9ccce852814c9fe7314361b4b12cbcbceec9e 100644 |
--- a/third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp |
+++ b/third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp |
@@ -52,7 +52,14 @@ bool detectTextEncoding(const char* data, |
CompactEncDet::WEB_CORPUS, |
false, // Include 7-bit encodings to detect ISO-2022-JP |
&consumedBytes, &isReliable); |
- if (encoding == UNKNOWN_ENCODING) |
+ |
+ // Should return false if the detected encoding is UTF8. This helps prevent |
+ // modern web sites from neglecting proper encoding labelling and simply |
+ // relying on browser-side encoding detection. Encoding detection is supposed |
+ // to work for web sites with legacy encoding only. Detection failure leads |
+ // |TextResourceDecoder| to use its default encoding determined from system |
+ // locale or TLD. |
+ if (encoding == UNKNOWN_ENCODING || encoding == UTF8) |
return false; |
// 7-bit encodings (except ISO-2022-JP) are not supported in WHATWG encoding |