Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(54)

Unified Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp

Issue 1888083002: Revert of UTF-8 detector for pages missing encoding info (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
diff --git a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
index 7705bf88d8fce6e87c390645a17716a195330662..49897d7c3ac409bd860dbaf44bbc4786a39ad3b4 100644
--- a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
+++ b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
@@ -28,7 +28,6 @@
#include "wtf/StringExtras.h"
#include "wtf/text/TextCodec.h"
#include "wtf/text/TextEncodingRegistry.h"
-#include "wtf/text/UTF8.h"
using namespace WTF;
@@ -350,56 +349,7 @@
return;
}
-String TextResourceDecoder::decode(const char* data, size_t len)
-{
- size_t lengthOfBOM = 0;
- if (!m_checkedForBOM)
- lengthOfBOM = checkForBOM(data, len);
-
- bool movedDataToBuffer = false;
-
- if (m_contentType == CSSContent && !m_checkedForCSSCharset) {
- if (!checkForCSSCharset(data, len, movedDataToBuffer))
- return emptyString();
- }
-
- // We check XML declaration in HTML content only if there is enough data available
- if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || m_contentType == XMLContent) && !m_checkedForXMLCharset) {
- if (!checkForXMLCharset(data, len, movedDataToBuffer))
- return emptyString();
- }
-
- const char* dataForDecode = data + lengthOfBOM;
- size_t lengthForDecode = len - lengthOfBOM;
-
- if (!m_buffer.isEmpty()) {
- if (!movedDataToBuffer) {
- size_t oldSize = m_buffer.size();
- m_buffer.grow(oldSize + len);
- memcpy(m_buffer.data() + oldSize, data, len);
- }
-
- dataForDecode = m_buffer.data() + lengthOfBOM;
- lengthForDecode = m_buffer.size() - lengthOfBOM;
- }
-
- if (m_contentType == HTMLContent && !m_checkedForMetaCharset)
- checkForMetaCharset(dataForDecode, lengthForDecode);
-
- detectTextEncoding(data, len);
-
- ASSERT(m_encoding.isValid());
-
- if (!m_codec)
- m_codec = newTextCodec(m_encoding);
-
- String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
-
- m_buffer.clear();
- return result;
-}
-
-// We use the encoding detector in following cases:
+// We use the encoding detector in two cases:
// 1. Encoding detector is turned ON and no other encoding source is
// available (that is, it's DefaultEncoding).
// 2. Encoding detector is turned ON and the encoding is set to
@@ -408,27 +358,65 @@
// relationship is compliant to the same-origin policy. If they're from
// different domains, |m_source| would not be set to EncodingFromParentFrame
// in the first place.
-void TextResourceDecoder::detectTextEncoding(const char* data, size_t len)
-{
- if (!shouldDetectEncoding())
- return;
-
- if (WTF::Unicode::isUTF8andNotASCII(data, len)) {
- setEncoding(UTF8Encoding(), EncodingFromContentSniffing);
- return;
- }
- if (m_encodingDetectionOption == UseAllAutoDetection) {
- WTF::TextEncoding detectedEncoding;
- if (detectTextEncodingUniversal(data, len, m_hintEncoding, &detectedEncoding))
- setEncoding(detectedEncoding, EncodingFromContentSniffing);
- }
-}
-
-bool TextResourceDecoder::shouldDetectEncoding() const
+bool TextResourceDecoder::shouldAutoDetect() const
{
// Just checking m_hintEncoding suffices here because it's only set
// in setHintEncoding when the source is AutoDetectedEncoding.
- return m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding);
+ return m_encodingDetectionOption == UseAllAutoDetection
+ && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding));
+}
+
+String TextResourceDecoder::decode(const char* data, size_t len)
+{
+ size_t lengthOfBOM = 0;
+ if (!m_checkedForBOM)
+ lengthOfBOM = checkForBOM(data, len);
+
+ bool movedDataToBuffer = false;
+
+ if (m_contentType == CSSContent && !m_checkedForCSSCharset) {
+ if (!checkForCSSCharset(data, len, movedDataToBuffer))
+ return emptyString();
+ }
+
+ // We check XML declaration in HTML content only if there is enough data available
+ if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || m_contentType == XMLContent) && !m_checkedForXMLCharset) {
+ if (!checkForXMLCharset(data, len, movedDataToBuffer))
+ return emptyString();
+ }
+
+ const char* dataForDecode = data + lengthOfBOM;
+ size_t lengthForDecode = len - lengthOfBOM;
+
+ if (!m_buffer.isEmpty()) {
+ if (!movedDataToBuffer) {
+ size_t oldSize = m_buffer.size();
+ m_buffer.grow(oldSize + len);
+ memcpy(m_buffer.data() + oldSize, data, len);
+ }
+
+ dataForDecode = m_buffer.data() + lengthOfBOM;
+ lengthForDecode = m_buffer.size() - lengthOfBOM;
+ }
+
+ if (m_contentType == HTMLContent && !m_checkedForMetaCharset)
+ checkForMetaCharset(dataForDecode, lengthForDecode);
+
+ if (shouldAutoDetect()) {
+ WTF::TextEncoding detectedEncoding;
+ if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))
+ setEncoding(detectedEncoding, EncodingFromContentSniffing);
+ }
+
+ ASSERT(m_encoding.isValid());
+
+ if (!m_codec)
+ m_codec = newTextCodec(m_encoding);
+
+ String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
+
+ m_buffer.clear();
+ return result;
}
String TextResourceDecoder::flush()
@@ -436,9 +424,11 @@
// If we can not identify the encoding even after a document is completely
// loaded, we need to detect the encoding if other conditions for
// autodetection is satisfied.
- if (m_buffer.size()
+ if (m_buffer.size() && shouldAutoDetect()
&& ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_contentType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) {
- detectTextEncoding(m_buffer.data(), m_buffer.size());
+ WTF::TextEncoding detectedEncoding;
+ if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding))
+ setEncoding(detectedEncoding, EncodingFromContentSniffing);
}
if (!m_codec)

Powered by Google App Engine
This is Rietveld 408576698