Index: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
diff --git a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
index 7705bf88d8fce6e87c390645a17716a195330662..49897d7c3ac409bd860dbaf44bbc4786a39ad3b4 100644 |
--- a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
+++ b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp |
@@ -28,7 +28,6 @@ |
#include "wtf/StringExtras.h" |
#include "wtf/text/TextCodec.h" |
#include "wtf/text/TextEncodingRegistry.h" |
-#include "wtf/text/UTF8.h" |
using namespace WTF; |
@@ -350,56 +349,7 @@ |
return; |
} |
-String TextResourceDecoder::decode(const char* data, size_t len) |
-{ |
- size_t lengthOfBOM = 0; |
- if (!m_checkedForBOM) |
- lengthOfBOM = checkForBOM(data, len); |
- |
- bool movedDataToBuffer = false; |
- |
- if (m_contentType == CSSContent && !m_checkedForCSSCharset) { |
- if (!checkForCSSCharset(data, len, movedDataToBuffer)) |
- return emptyString(); |
- } |
- |
- // We check XML declaration in HTML content only if there is enough data available |
- if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || m_contentType == XMLContent) && !m_checkedForXMLCharset) { |
- if (!checkForXMLCharset(data, len, movedDataToBuffer)) |
- return emptyString(); |
- } |
- |
- const char* dataForDecode = data + lengthOfBOM; |
- size_t lengthForDecode = len - lengthOfBOM; |
- |
- if (!m_buffer.isEmpty()) { |
- if (!movedDataToBuffer) { |
- size_t oldSize = m_buffer.size(); |
- m_buffer.grow(oldSize + len); |
- memcpy(m_buffer.data() + oldSize, data, len); |
- } |
- |
- dataForDecode = m_buffer.data() + lengthOfBOM; |
- lengthForDecode = m_buffer.size() - lengthOfBOM; |
- } |
- |
- if (m_contentType == HTMLContent && !m_checkedForMetaCharset) |
- checkForMetaCharset(dataForDecode, lengthForDecode); |
- |
- detectTextEncoding(data, len); |
- |
- ASSERT(m_encoding.isValid()); |
- |
- if (!m_codec) |
- m_codec = newTextCodec(m_encoding); |
- |
- String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
- |
- m_buffer.clear(); |
- return result; |
-} |
- |
-// We use the encoding detector in following cases: |
+// We use the encoding detector in two cases: |
// 1. Encoding detector is turned ON and no other encoding source is |
// available (that is, it's DefaultEncoding). |
// 2. Encoding detector is turned ON and the encoding is set to |
@@ -408,27 +358,65 @@ |
// relationship is compliant to the same-origin policy. If they're from |
// different domains, |m_source| would not be set to EncodingFromParentFrame |
// in the first place. |
-void TextResourceDecoder::detectTextEncoding(const char* data, size_t len) |
-{ |
- if (!shouldDetectEncoding()) |
- return; |
- |
- if (WTF::Unicode::isUTF8andNotASCII(data, len)) { |
- setEncoding(UTF8Encoding(), EncodingFromContentSniffing); |
- return; |
- } |
- if (m_encodingDetectionOption == UseAllAutoDetection) { |
- WTF::TextEncoding detectedEncoding; |
- if (detectTextEncodingUniversal(data, len, m_hintEncoding, &detectedEncoding)) |
- setEncoding(detectedEncoding, EncodingFromContentSniffing); |
- } |
-} |
- |
-bool TextResourceDecoder::shouldDetectEncoding() const |
+bool TextResourceDecoder::shouldAutoDetect() const |
{ |
// Just checking m_hintEncoding suffices here because it's only set |
// in setHintEncoding when the source is AutoDetectedEncoding. |
- return m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding); |
+ return m_encodingDetectionOption == UseAllAutoDetection |
+ && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); |
+} |
+ |
+String TextResourceDecoder::decode(const char* data, size_t len) |
+{ |
+ size_t lengthOfBOM = 0; |
+ if (!m_checkedForBOM) |
+ lengthOfBOM = checkForBOM(data, len); |
+ |
+ bool movedDataToBuffer = false; |
+ |
+ if (m_contentType == CSSContent && !m_checkedForCSSCharset) { |
+ if (!checkForCSSCharset(data, len, movedDataToBuffer)) |
+ return emptyString(); |
+ } |
+ |
+ // We check XML declaration in HTML content only if there is enough data available |
+ if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || m_contentType == XMLContent) && !m_checkedForXMLCharset) { |
+ if (!checkForXMLCharset(data, len, movedDataToBuffer)) |
+ return emptyString(); |
+ } |
+ |
+ const char* dataForDecode = data + lengthOfBOM; |
+ size_t lengthForDecode = len - lengthOfBOM; |
+ |
+ if (!m_buffer.isEmpty()) { |
+ if (!movedDataToBuffer) { |
+ size_t oldSize = m_buffer.size(); |
+ m_buffer.grow(oldSize + len); |
+ memcpy(m_buffer.data() + oldSize, data, len); |
+ } |
+ |
+ dataForDecode = m_buffer.data() + lengthOfBOM; |
+ lengthForDecode = m_buffer.size() - lengthOfBOM; |
+ } |
+ |
+ if (m_contentType == HTMLContent && !m_checkedForMetaCharset) |
+ checkForMetaCharset(dataForDecode, lengthForDecode); |
+ |
+ if (shouldAutoDetect()) { |
+ WTF::TextEncoding detectedEncoding; |
+ if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) |
+ setEncoding(detectedEncoding, EncodingFromContentSniffing); |
+ } |
+ |
+ ASSERT(m_encoding.isValid()); |
+ |
+ if (!m_codec) |
+ m_codec = newTextCodec(m_encoding); |
+ |
+ String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
+ |
+ m_buffer.clear(); |
+ return result; |
} |
String TextResourceDecoder::flush() |
@@ -436,9 +424,11 @@ |
// If we can not identify the encoding even after a document is completely |
// loaded, we need to detect the encoding if other conditions for |
// autodetection is satisfied. |
- if (m_buffer.size() |
+ if (m_buffer.size() && shouldAutoDetect() |
&& ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_contentType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) { |
- detectTextEncoding(m_buffer.data(), m_buffer.size()); |
+ WTF::TextEncoding detectedEncoding; |
+ if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding)) |
+ setEncoding(detectedEncoding, EncodingFromContentSniffing); |
} |
if (!m_codec) |