Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1312)

Unified Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h

Issue 1470893002: [Fetch] Always use utf-8 for decoding in text() (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Reflect comments. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h
diff --git a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h
index ad167e2a128fbd956a7d430de0629e8af4251dd2..cc4983852f8dfbf21cb175adb6e4a5701fe8e5a1 100644
--- a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h
+++ b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h
@@ -49,7 +49,13 @@ public:
static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetector = false)
{
- return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
+ return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector ? UseAllAutoDetection : UseContentAndBOMBasedDetection));
+ }
+ // Corresponds to utf-8 decode in Encoding spec:
+ // https://encoding.spec.whatwg.org/#utf-8-decode.
+ static PassOwnPtr<TextResourceDecoder> createAlwaysUseUTF8ForText()
+ {
+ return adoptPtr(new TextResourceDecoder("plain/text", UTF8Encoding(), AlwaysUseUTF8ForText));
}
~TextResourceDecoder();
@@ -86,7 +92,28 @@ public:
size_t checkForBOM(const char*, size_t);
private:
- TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& defaultEncoding, bool usesEncodingDetector);
+
+ // TextResourceDecoder does three kind of encoding detection:
+ // 1. By BOM,
+ // 2. By Content if |m_contentType| is not |PlainTextContext|
+ // (e.g. <meta> tag for HTML), and
+ // 3. By detectTextEncoding().
+ enum EncodingDetectionOption {
+ // Use 1. + 2. + 3.
+ UseAllAutoDetection,
+
+ // Use 1. + 2.
+ UseContentAndBOMBasedDetection,
+
+ // Use None of them.
+ // |m_contentType| must be |PlainTextContent| and
+ // |m_encoding| must be UTF8Encoding.
+ // This doesn't change encoding based on BOMs, but still processes
+ // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result.
+ AlwaysUseUTF8ForText
+ };
+
+ TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& defaultEncoding, EncodingDetectionOption);
enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM.
static ContentType determineContentType(const String& mimeType);
@@ -110,7 +137,7 @@ private:
bool m_checkedForMetaCharset;
bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
bool m_sawError;
- bool m_usesEncodingDetector;
+ EncodingDetectionOption m_encodingDetectionOption;
OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
};

Powered by Google App Engine
This is Rietveld 408576698