| Index: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| diff --git a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| index 49897d7c3ac409bd860dbaf44bbc4786a39ad3b4..7705bf88d8fce6e87c390645a17716a195330662 100644
|
| --- a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| +++ b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| @@ -28,6 +28,7 @@
|
| #include "wtf/StringExtras.h"
|
| #include "wtf/text/TextCodec.h"
|
| #include "wtf/text/TextEncodingRegistry.h"
|
| +#include "wtf/text/UTF8.h"
|
|
|
| using namespace WTF;
|
|
|
| @@ -349,23 +350,6 @@ void TextResourceDecoder::checkForMetaCharset(const char* data, size_t length)
|
| return;
|
| }
|
|
|
| -// We use the encoding detector in two cases:
|
| -// 1. Encoding detector is turned ON and no other encoding source is
|
| -// available (that is, it's DefaultEncoding).
|
| -// 2. Encoding detector is turned ON and the encoding is set to
|
| -// the encoding of the parent frame, which is also auto-detected.
|
| -// Note that condition #2 is NOT satisfied unless parent-child frame
|
| -// relationship is compliant to the same-origin policy. If they're from
|
| -// different domains, |m_source| would not be set to EncodingFromParentFrame
|
| -// in the first place.
|
| -bool TextResourceDecoder::shouldAutoDetect() const
|
| -{
|
| - // Just checking m_hintEncoding suffices here because it's only set
|
| - // in setHintEncoding when the source is AutoDetectedEncoding.
|
| - return m_encodingDetectionOption == UseAllAutoDetection
|
| - && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding));
|
| -}
|
| -
|
| String TextResourceDecoder::decode(const char* data, size_t len)
|
| {
|
| size_t lengthOfBOM = 0;
|
| @@ -402,11 +386,7 @@ String TextResourceDecoder::decode(const char* data, size_t len)
|
| if (m_contentType == HTMLContent && !m_checkedForMetaCharset)
|
| checkForMetaCharset(dataForDecode, lengthForDecode);
|
|
|
| - if (shouldAutoDetect()) {
|
| - WTF::TextEncoding detectedEncoding;
|
| - if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))
|
| - setEncoding(detectedEncoding, EncodingFromContentSniffing);
|
| - }
|
| + detectTextEncoding(data, len);
|
|
|
| ASSERT(m_encoding.isValid());
|
|
|
| @@ -419,16 +399,46 @@ String TextResourceDecoder::decode(const char* data, size_t len)
|
| return result;
|
| }
|
|
|
| +// We use the encoding detector in following cases:
|
| +// 1. Encoding detector is turned ON and no other encoding source is
|
| +// available (that is, it's DefaultEncoding).
|
| +// 2. Encoding detector is turned ON and the encoding is set to
|
| +// the encoding of the parent frame, which is also auto-detected.
|
| +// Note that condition #2 is NOT satisfied unless parent-child frame
|
| +// relationship is compliant to the same-origin policy. If they're from
|
| +// different domains, |m_source| would not be set to EncodingFromParentFrame
|
| +// in the first place.
|
| +void TextResourceDecoder::detectTextEncoding(const char* data, size_t len)
|
| +{
|
| + if (!shouldDetectEncoding())
|
| + return;
|
| +
|
| + if (WTF::Unicode::isUTF8andNotASCII(data, len)) {
|
| + setEncoding(UTF8Encoding(), EncodingFromContentSniffing);
|
| + return;
|
| + }
|
| + if (m_encodingDetectionOption == UseAllAutoDetection) {
|
| + WTF::TextEncoding detectedEncoding;
|
| + if (detectTextEncodingUniversal(data, len, m_hintEncoding, &detectedEncoding))
|
| + setEncoding(detectedEncoding, EncodingFromContentSniffing);
|
| + }
|
| +}
|
| +
|
| +bool TextResourceDecoder::shouldDetectEncoding() const
|
| +{
|
| + // Just checking m_hintEncoding suffices here because it's only set
|
| + // in setHintEncoding when the source is AutoDetectedEncoding.
|
| + return m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding);
|
| +}
|
| +
|
| String TextResourceDecoder::flush()
|
| {
|
| // If we can not identify the encoding even after a document is completely
|
| // loaded, we need to detect the encoding if other conditions for
|
| // autodetection is satisfied.
|
| - if (m_buffer.size() && shouldAutoDetect()
|
| + if (m_buffer.size()
|
| && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_contentType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) {
|
| - WTF::TextEncoding detectedEncoding;
|
| - if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding))
|
| - setEncoding(detectedEncoding, EncodingFromContentSniffing);
|
| + detectTextEncoding(m_buffer.data(), m_buffer.size());
|
| }
|
|
|
| if (!m_codec)
|
|
|