Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(504)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h

Issue 1470893002: [Fetch] Always use utf-8 for decoding in text() (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Reflect comments. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5 5
6 This library is free software; you can redistribute it and/or 6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 9 version 2 of the License, or (at your option) any later version.
10 10
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
42 EncodingFromContentSniffing, 42 EncodingFromContentSniffing,
43 EncodingFromXMLHeader, 43 EncodingFromXMLHeader,
44 EncodingFromMetaTag, 44 EncodingFromMetaTag,
45 EncodingFromCSSCharset, 45 EncodingFromCSSCharset,
46 EncodingFromHTTPHeader, 46 EncodingFromHTTPHeader,
47 EncodingFromParentFrame 47 EncodingFromParentFrame
48 }; 48 };
49 49
50 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec tor = false) 50 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec tor = false)
51 { 51 {
52 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE ncodingDetector)); 52 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE ncodingDetector ? UseAllAutoDetection : UseContentAndBOMBasedDetection));
53 }
54 // Corresponds to utf-8 decode in Encoding spec:
55 // https://encoding.spec.whatwg.org/#utf-8-decode.
56 static PassOwnPtr<TextResourceDecoder> createAlwaysUseUTF8ForText()
57 {
58 return adoptPtr(new TextResourceDecoder("plain/text", UTF8Encoding(), Al waysUseUTF8ForText));
53 } 59 }
54 ~TextResourceDecoder(); 60 ~TextResourceDecoder();
55 61
56 void setEncoding(const WTF::TextEncoding&, EncodingSource); 62 void setEncoding(const WTF::TextEncoding&, EncodingSource);
57 const WTF::TextEncoding& encoding() const { return m_encoding; } 63 const WTF::TextEncoding& encoding() const { return m_encoding; }
58 bool encodingWasDetectedHeuristically() const 64 bool encodingWasDetectedHeuristically() const
59 { 65 {
60 return m_source == AutoDetectedEncoding 66 return m_source == AutoDetectedEncoding
61 || m_source == EncodingFromContentSniffing 67 || m_source == EncodingFromContentSniffing
62 || m_source == DefaultEncodingAttemptedSniffing; 68 || m_source == DefaultEncodingAttemptedSniffing;
(...skipping 16 matching lines...) Expand all
79 void setHintEncoding(const WTF::TextEncoding& encoding) 85 void setHintEncoding(const WTF::TextEncoding& encoding)
80 { 86 {
81 m_hintEncoding = encoding.name(); 87 m_hintEncoding = encoding.name();
82 } 88 }
83 89
84 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } 90 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
85 bool sawError() const { return m_sawError; } 91 bool sawError() const { return m_sawError; }
86 size_t checkForBOM(const char*, size_t); 92 size_t checkForBOM(const char*, size_t);
87 93
88 private: 94 private:
89 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, bool usesEncodingDetector); 95
96 // TextResourceDecoder does three kind of encoding detection:
97 // 1. By BOM,
98 // 2. By Content if |m_contentType| is not |PlainTextContext|
99 // (e.g. <meta> tag for HTML), and
100 // 3. By detectTextEncoding().
101 enum EncodingDetectionOption {
102 // Use 1. + 2. + 3.
103 UseAllAutoDetection,
104
105 // Use 1. + 2.
106 UseContentAndBOMBasedDetection,
107
108 // Use None of them.
109 // |m_contentType| must be |PlainTextContent| and
110 // |m_encoding| must be UTF8Encoding.
111 // This doesn't change encoding based on BOMs, but still processes
112 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result.
113 AlwaysUseUTF8ForText
114 };
115
116 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, EncodingDetectionOption);
90 117
91 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM. 118 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM.
92 static ContentType determineContentType(const String& mimeType); 119 static ContentType determineContentType(const String& mimeType);
93 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding); 120 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding);
94 121
95 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); 122 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
96 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); 123 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
97 void checkForMetaCharset(const char*, size_t); 124 void checkForMetaCharset(const char*, size_t);
98 bool shouldAutoDetect() const; 125 bool shouldAutoDetect() const;
99 void detectTextEncoding(const char*, size_t); 126 void detectTextEncoding(const char*, size_t);
100 127
101 ContentType m_contentType; 128 ContentType m_contentType;
102 WTF::TextEncoding m_encoding; 129 WTF::TextEncoding m_encoding;
103 OwnPtr<TextCodec> m_codec; 130 OwnPtr<TextCodec> m_codec;
104 EncodingSource m_source; 131 EncodingSource m_source;
105 const char* m_hintEncoding; 132 const char* m_hintEncoding;
106 Vector<char> m_buffer; 133 Vector<char> m_buffer;
107 bool m_checkedForBOM; 134 bool m_checkedForBOM;
108 bool m_checkedForCSSCharset; 135 bool m_checkedForCSSCharset;
109 bool m_checkedForXMLCharset; 136 bool m_checkedForXMLCharset;
110 bool m_checkedForMetaCharset; 137 bool m_checkedForMetaCharset;
111 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. 138 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
112 bool m_sawError; 139 bool m_sawError;
113 bool m_usesEncodingDetector; 140 EncodingDetectionOption m_encodingDetectionOption;
114 141
115 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; 142 OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
116 }; 143 };
117 144
118 } 145 }
119 146
120 #endif 147 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698