Index: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
diff --git a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
index 49897d7c3ac409bd860dbaf44bbc4786a39ad3b4..7705bf88d8fce6e87c390645a17716a195330662 100644 |
--- a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
+++ b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
@@ -28,6 +28,7 @@ |
#include "wtf/StringExtras.h" |
#include "wtf/text/TextCodec.h" |
#include "wtf/text/TextEncodingRegistry.h" |
+#include "wtf/text/UTF8.h" |
using namespace WTF; |
@@ -349,23 +350,6 @@ void TextResourceDecoder::checkForMetaCharset(const char* data, size_t length) |
return; |
} |
-// We use the encoding detector in two cases: |
-// 1. Encoding detector is turned ON and no other encoding source is |
-// available (that is, it's DefaultEncoding). |
-// 2. Encoding detector is turned ON and the encoding is set to |
-// the encoding of the parent frame, which is also auto-detected. |
-// Note that condition #2 is NOT satisfied unless parent-child frame |
-// relationship is compliant to the same-origin policy. If they're from |
-// different domains, |m_source| would not be set to EncodingFromParentFrame |
-// in the first place. |
-bool TextResourceDecoder::shouldAutoDetect() const |
-{ |
- // Just checking m_hintEncoding suffices here because it's only set |
- // in setHintEncoding when the source is AutoDetectedEncoding. |
- return m_encodingDetectionOption == UseAllAutoDetection |
- && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); |
-} |
- |
String TextResourceDecoder::decode(const char* data, size_t len) |
{ |
size_t lengthOfBOM = 0; |
@@ -402,11 +386,7 @@ String TextResourceDecoder::decode(const char* data, size_t len) |
if (m_contentType == HTMLContent && !m_checkedForMetaCharset) |
checkForMetaCharset(dataForDecode, lengthForDecode); |
- if (shouldAutoDetect()) { |
- WTF::TextEncoding detectedEncoding; |
- if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) |
- setEncoding(detectedEncoding, EncodingFromContentSniffing); |
- } |
+ detectTextEncoding(data, len); |
ASSERT(m_encoding.isValid()); |
@@ -419,16 +399,46 @@ String TextResourceDecoder::decode(const char* data, size_t len) |
return result; |
} |
+// We use the encoding detector in following cases: |
+// 1. Encoding detector is turned ON and no other encoding source is |
+// available (that is, it's DefaultEncoding). |
+// 2. Encoding detector is turned ON and the encoding is set to |
+// the encoding of the parent frame, which is also auto-detected. |
+// Note that condition #2 is NOT satisfied unless parent-child frame |
+// relationship is compliant to the same-origin policy. If they're from |
+// different domains, |m_source| would not be set to EncodingFromParentFrame |
+// in the first place. |
+void TextResourceDecoder::detectTextEncoding(const char* data, size_t len) |
+{ |
+ if (!shouldDetectEncoding()) |
+ return; |
+ |
+ if (WTF::Unicode::isUTF8andNotASCII(data, len)) { |
+ setEncoding(UTF8Encoding(), EncodingFromContentSniffing); |
+ return; |
+ } |
+ if (m_encodingDetectionOption == UseAllAutoDetection) { |
+ WTF::TextEncoding detectedEncoding; |
+ if (detectTextEncodingUniversal(data, len, m_hintEncoding, &detectedEncoding)) |
+ setEncoding(detectedEncoding, EncodingFromContentSniffing); |
+ } |
+} |
+ |
+bool TextResourceDecoder::shouldDetectEncoding() const |
+{ |
+ // Just checking m_hintEncoding suffices here because it's only set |
+ // in setHintEncoding when the source is AutoDetectedEncoding. |
+ return m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding); |
+} |
+ |
String TextResourceDecoder::flush() |
{ |
// If we can not identify the encoding even after a document is completely |
// loaded, we need to detect the encoding if other conditions for |
// autodetection is satisfied. |
- if (m_buffer.size() && shouldAutoDetect() |
+ if (m_buffer.size() |
&& ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_contentType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) { |
- WTF::TextEncoding detectedEncoding; |
- if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding)) |
- setEncoding(detectedEncoding, EncodingFromContentSniffing); |
+ detectTextEncoding(m_buffer.data(), m_buffer.size()); |
} |
if (!m_codec) |