Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(832)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h

Issue 2655203002: Merge "Pass more hints to encoding detector." to M57 branch (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5 5
6 This library is free software; you can redistribute it and/or 6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 9 version 2 of the License, or (at your option) any later version.
10 10
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
43 EncodingFromContentSniffing, 43 EncodingFromContentSniffing,
44 EncodingFromXMLHeader, 44 EncodingFromXMLHeader,
45 EncodingFromMetaTag, 45 EncodingFromMetaTag,
46 EncodingFromCSSCharset, 46 EncodingFromCSSCharset,
47 EncodingFromHTTPHeader, 47 EncodingFromHTTPHeader,
48 EncodingFromParentFrame 48 EncodingFromParentFrame
49 }; 49 };
50 50
51 static std::unique_ptr<TextResourceDecoder> create( 51 static std::unique_ptr<TextResourceDecoder> create(
52 const String& mimeType, 52 const String& mimeType,
53 const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), 53 const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding()) {
54 bool usesEncodingDetector = false) {
55 return WTF::wrapUnique(new TextResourceDecoder( 54 return WTF::wrapUnique(new TextResourceDecoder(
56 mimeType, defaultEncoding, usesEncodingDetector 55 mimeType, defaultEncoding, UseContentAndBOMBasedDetection, String()));
57 ? UseAllAutoDetection
58 : UseContentAndBOMBasedDetection));
59 } 56 }
57
58 static std::unique_ptr<TextResourceDecoder> createWithAutoDetection(
59 const String& mimeType,
60 const WTF::TextEncoding& defaultEncoding,
61 const String& url) {
62 return WTF::wrapUnique(new TextResourceDecoder(mimeType, defaultEncoding,
63 UseAllAutoDetection, url));
64 }
65
60 // Corresponds to utf-8 decode in Encoding spec: 66 // Corresponds to utf-8 decode in Encoding spec:
61 // https://encoding.spec.whatwg.org/#utf-8-decode. 67 // https://encoding.spec.whatwg.org/#utf-8-decode.
62 static std::unique_ptr<TextResourceDecoder> createAlwaysUseUTF8ForText() { 68 static std::unique_ptr<TextResourceDecoder> createAlwaysUseUTF8ForText() {
63 return WTF::wrapUnique(new TextResourceDecoder("plain/text", UTF8Encoding(), 69 return WTF::wrapUnique(new TextResourceDecoder(
64 AlwaysUseUTF8ForText)); 70 "plain/text", UTF8Encoding(), AlwaysUseUTF8ForText, String()));
65 } 71 }
66 ~TextResourceDecoder(); 72 ~TextResourceDecoder();
67 73
68 void setEncoding(const WTF::TextEncoding&, EncodingSource); 74 void setEncoding(const WTF::TextEncoding&, EncodingSource);
69 const WTF::TextEncoding& encoding() const { return m_encoding; } 75 const WTF::TextEncoding& encoding() const { return m_encoding; }
70 bool encodingWasDetectedHeuristically() const { 76 bool encodingWasDetectedHeuristically() const {
71 return m_source == AutoDetectedEncoding || 77 return m_source == AutoDetectedEncoding ||
72 m_source == EncodingFromContentSniffing; 78 m_source == EncodingFromContentSniffing;
73 } 79 }
74 80
(...skipping 24 matching lines...) Expand all
99 // Use None of them. 105 // Use None of them.
100 // |m_contentType| must be |PlainTextContent| and 106 // |m_contentType| must be |PlainTextContent| and
101 // |m_encoding| must be UTF8Encoding. 107 // |m_encoding| must be UTF8Encoding.
102 // This doesn't change encoding based on BOMs, but still processes 108 // This doesn't change encoding based on BOMs, but still processes
103 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result. 109 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result.
104 AlwaysUseUTF8ForText 110 AlwaysUseUTF8ForText
105 }; 111 };
106 112
107 TextResourceDecoder(const String& mimeType, 113 TextResourceDecoder(const String& mimeType,
108 const WTF::TextEncoding& defaultEncoding, 114 const WTF::TextEncoding& defaultEncoding,
109 EncodingDetectionOption); 115 EncodingDetectionOption,
116 const String& url);
110 117
111 private: 118 private:
112 enum ContentType { 119 enum ContentType {
113 PlainTextContent, 120 PlainTextContent,
114 HTMLContent, 121 HTMLContent,
115 XMLContent, 122 XMLContent,
116 CSSContent 123 CSSContent
117 }; // PlainText only checks for BOM. 124 }; // PlainText only checks for BOM.
118 static ContentType determineContentType(const String& mimeType); 125 static ContentType determineContentType(const String& mimeType);
119 static const WTF::TextEncoding& defaultEncoding( 126 static const WTF::TextEncoding& defaultEncoding(
120 ContentType, 127 ContentType,
121 const WTF::TextEncoding& defaultEncoding); 128 const WTF::TextEncoding& defaultEncoding);
122 129
123 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); 130 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
124 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); 131 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
125 void checkForMetaCharset(const char*, size_t); 132 void checkForMetaCharset(const char*, size_t);
126 bool shouldAutoDetect() const; 133 bool shouldAutoDetect() const;
127 134
128 ContentType m_contentType; 135 ContentType m_contentType;
129 WTF::TextEncoding m_encoding; 136 WTF::TextEncoding m_encoding;
130 std::unique_ptr<TextCodec> m_codec; 137 std::unique_ptr<TextCodec> m_codec;
131 EncodingSource m_source; 138 EncodingSource m_source;
132 const char* m_hintEncoding; 139 const char* m_hintEncoding;
140 const CString m_hintUrl;
133 Vector<char> m_buffer; 141 Vector<char> m_buffer;
142 char m_hintLanguage[3];
134 bool m_checkedForBOM; 143 bool m_checkedForBOM;
135 bool m_checkedForCSSCharset; 144 bool m_checkedForCSSCharset;
136 bool m_checkedForXMLCharset; 145 bool m_checkedForXMLCharset;
137 bool m_checkedForMetaCharset; 146 bool m_checkedForMetaCharset;
138 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. 147 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
139 bool m_sawError; 148 bool m_sawError;
140 EncodingDetectionOption m_encodingDetectionOption; 149 EncodingDetectionOption m_encodingDetectionOption;
141 150
142 std::unique_ptr<HTMLMetaCharsetParser> m_charsetParser; 151 std::unique_ptr<HTMLMetaCharsetParser> m_charsetParser;
143 }; 152 };
144 153
145 } // namespace blink 154 } // namespace blink
146 155
147 #endif 156 #endif
OLDNEW
« no previous file with comments | « no previous file | third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698