| Index: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| diff --git a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| index 7705bf88d8fce6e87c390645a17716a195330662..49897d7c3ac409bd860dbaf44bbc4786a39ad3b4 100644
|
| --- a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| +++ b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| @@ -28,7 +28,6 @@
|
| #include "wtf/StringExtras.h"
|
| #include "wtf/text/TextCodec.h"
|
| #include "wtf/text/TextEncodingRegistry.h"
|
| -#include "wtf/text/UTF8.h"
|
|
|
| using namespace WTF;
|
|
|
| @@ -350,56 +349,7 @@
|
| return;
|
| }
|
|
|
| -String TextResourceDecoder::decode(const char* data, size_t len)
|
| -{
|
| - size_t lengthOfBOM = 0;
|
| - if (!m_checkedForBOM)
|
| - lengthOfBOM = checkForBOM(data, len);
|
| -
|
| - bool movedDataToBuffer = false;
|
| -
|
| - if (m_contentType == CSSContent && !m_checkedForCSSCharset) {
|
| - if (!checkForCSSCharset(data, len, movedDataToBuffer))
|
| - return emptyString();
|
| - }
|
| -
|
| - // We check XML declaration in HTML content only if there is enough data available
|
| - if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || m_contentType == XMLContent) && !m_checkedForXMLCharset) {
|
| - if (!checkForXMLCharset(data, len, movedDataToBuffer))
|
| - return emptyString();
|
| - }
|
| -
|
| - const char* dataForDecode = data + lengthOfBOM;
|
| - size_t lengthForDecode = len - lengthOfBOM;
|
| -
|
| - if (!m_buffer.isEmpty()) {
|
| - if (!movedDataToBuffer) {
|
| - size_t oldSize = m_buffer.size();
|
| - m_buffer.grow(oldSize + len);
|
| - memcpy(m_buffer.data() + oldSize, data, len);
|
| - }
|
| -
|
| - dataForDecode = m_buffer.data() + lengthOfBOM;
|
| - lengthForDecode = m_buffer.size() - lengthOfBOM;
|
| - }
|
| -
|
| - if (m_contentType == HTMLContent && !m_checkedForMetaCharset)
|
| - checkForMetaCharset(dataForDecode, lengthForDecode);
|
| -
|
| - detectTextEncoding(data, len);
|
| -
|
| - ASSERT(m_encoding.isValid());
|
| -
|
| - if (!m_codec)
|
| - m_codec = newTextCodec(m_encoding);
|
| -
|
| - String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
|
| -
|
| - m_buffer.clear();
|
| - return result;
|
| -}
|
| -
|
| -// We use the encoding detector in following cases:
|
| +// We use the encoding detector in two cases:
|
| // 1. Encoding detector is turned ON and no other encoding source is
|
| // available (that is, it's DefaultEncoding).
|
| // 2. Encoding detector is turned ON and the encoding is set to
|
| @@ -408,27 +358,65 @@
|
| // relationship is compliant to the same-origin policy. If they're from
|
| // different domains, |m_source| would not be set to EncodingFromParentFrame
|
| // in the first place.
|
| -void TextResourceDecoder::detectTextEncoding(const char* data, size_t len)
|
| -{
|
| - if (!shouldDetectEncoding())
|
| - return;
|
| -
|
| - if (WTF::Unicode::isUTF8andNotASCII(data, len)) {
|
| - setEncoding(UTF8Encoding(), EncodingFromContentSniffing);
|
| - return;
|
| - }
|
| - if (m_encodingDetectionOption == UseAllAutoDetection) {
|
| - WTF::TextEncoding detectedEncoding;
|
| - if (detectTextEncodingUniversal(data, len, m_hintEncoding, &detectedEncoding))
|
| - setEncoding(detectedEncoding, EncodingFromContentSniffing);
|
| - }
|
| -}
|
| -
|
| -bool TextResourceDecoder::shouldDetectEncoding() const
|
| +bool TextResourceDecoder::shouldAutoDetect() const
|
| {
|
| // Just checking m_hintEncoding suffices here because it's only set
|
| // in setHintEncoding when the source is AutoDetectedEncoding.
|
| - return m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding);
|
| + return m_encodingDetectionOption == UseAllAutoDetection
|
| + && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding));
|
| +}
|
| +
|
| +String TextResourceDecoder::decode(const char* data, size_t len)
|
| +{
|
| + size_t lengthOfBOM = 0;
|
| + if (!m_checkedForBOM)
|
| + lengthOfBOM = checkForBOM(data, len);
|
| +
|
| + bool movedDataToBuffer = false;
|
| +
|
| + if (m_contentType == CSSContent && !m_checkedForCSSCharset) {
|
| + if (!checkForCSSCharset(data, len, movedDataToBuffer))
|
| + return emptyString();
|
| + }
|
| +
|
| + // We check XML declaration in HTML content only if there is enough data available
|
| + if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || m_contentType == XMLContent) && !m_checkedForXMLCharset) {
|
| + if (!checkForXMLCharset(data, len, movedDataToBuffer))
|
| + return emptyString();
|
| + }
|
| +
|
| + const char* dataForDecode = data + lengthOfBOM;
|
| + size_t lengthForDecode = len - lengthOfBOM;
|
| +
|
| + if (!m_buffer.isEmpty()) {
|
| + if (!movedDataToBuffer) {
|
| + size_t oldSize = m_buffer.size();
|
| + m_buffer.grow(oldSize + len);
|
| + memcpy(m_buffer.data() + oldSize, data, len);
|
| + }
|
| +
|
| + dataForDecode = m_buffer.data() + lengthOfBOM;
|
| + lengthForDecode = m_buffer.size() - lengthOfBOM;
|
| + }
|
| +
|
| + if (m_contentType == HTMLContent && !m_checkedForMetaCharset)
|
| + checkForMetaCharset(dataForDecode, lengthForDecode);
|
| +
|
| + if (shouldAutoDetect()) {
|
| + WTF::TextEncoding detectedEncoding;
|
| + if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))
|
| + setEncoding(detectedEncoding, EncodingFromContentSniffing);
|
| + }
|
| +
|
| + ASSERT(m_encoding.isValid());
|
| +
|
| + if (!m_codec)
|
| + m_codec = newTextCodec(m_encoding);
|
| +
|
| + String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
|
| +
|
| + m_buffer.clear();
|
| + return result;
|
| }
|
|
|
| String TextResourceDecoder::flush()
|
| @@ -436,9 +424,11 @@
|
| // If we can not identify the encoding even after a document is completely
|
| // loaded, we need to detect the encoding if other conditions for
|
| // autodetection is satisfied.
|
| - if (m_buffer.size()
|
| + if (m_buffer.size() && shouldAutoDetect()
|
| && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_contentType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) {
|
| - detectTextEncoding(m_buffer.data(), m_buffer.size());
|
| + WTF::TextEncoding detectedEncoding;
|
| + if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding))
|
| + setEncoding(detectedEncoding, EncodingFromContentSniffing);
|
| }
|
|
|
| if (!m_codec)
|
|
|