Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(47)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h

Issue 1890103002: Reland "UTF-8 detector for pages missing encoding info" (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5 5
6 This library is free software; you can redistribute it and/or 6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 9 version 2 of the License, or (at your option) any later version.
10 10
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
72 { 72 {
73 m_hintEncoding = encoding.name(); 73 m_hintEncoding = encoding.name();
74 } 74 }
75 75
76 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } 76 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
77 bool sawError() const { return m_sawError; } 77 bool sawError() const { return m_sawError; }
78 size_t checkForBOM(const char*, size_t); 78 size_t checkForBOM(const char*, size_t);
79 79
80 private: 80 private:
81 81
82 // TextResourceDecoder does three kind of encoding detection: 82 // TextResourceDecoder does four kinds of encoding detection:
83 // 1. By BOM, 83 // 1. By BOM,
84 // 2. By Content if |m_contentType| is not |PlainTextContext| 84 // 2. By Content if |m_contentType| is not |PlainTextContext|
85 // (e.g. <meta> tag for HTML), and 85 // (e.g. <meta> tag for HTML),
86 // 3. By detectTextEncoding(). 86 // 3. By isUTF8Encoded() to detect if the document
87 // is of UTF-8, and
88 // 4. By detectTextEncodingUniversal().
87 enum EncodingDetectionOption { 89 enum EncodingDetectionOption {
88 // Use 1. + 2. + 3. 90 // Use 1. + 2. + 4.
89 UseAllAutoDetection, 91 UseAllAutoDetection,
90 92
91 // Use 1. + 2. 93 // Use 1. + 2. + 3.
92 UseContentAndBOMBasedDetection, 94 UseContentAndBOMBasedDetection,
93 95
94 // Use None of them. 96 // Use None of them.
95 // |m_contentType| must be |PlainTextContent| and 97 // |m_contentType| must be |PlainTextContent| and
96 // |m_encoding| must be UTF8Encoding. 98 // |m_encoding| must be UTF8Encoding.
97 // This doesn't change encoding based on BOMs, but still processes 99 // This doesn't change encoding based on BOMs, but still processes
98 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result. 100 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result.
99 AlwaysUseUTF8ForText 101 AlwaysUseUTF8ForText
100 }; 102 };
101 103
102 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, EncodingDetectionOption); 104 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, EncodingDetectionOption);
103 105
104 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM. 106 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM.
105 static ContentType determineContentType(const String& mimeType); 107 static ContentType determineContentType(const String& mimeType);
106 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding); 108 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding);
107 109
108 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); 110 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
109 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); 111 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
110 void checkForMetaCharset(const char*, size_t); 112 void checkForMetaCharset(const char*, size_t);
111 bool shouldAutoDetect() const; 113 void detectTextEncoding(const char*, size_t);
114 bool shouldDetectEncoding() const;
112 115
113 ContentType m_contentType; 116 ContentType m_contentType;
114 WTF::TextEncoding m_encoding; 117 WTF::TextEncoding m_encoding;
115 OwnPtr<TextCodec> m_codec; 118 OwnPtr<TextCodec> m_codec;
116 EncodingSource m_source; 119 EncodingSource m_source;
117 const char* m_hintEncoding; 120 const char* m_hintEncoding;
118 Vector<char> m_buffer; 121 Vector<char> m_buffer;
119 bool m_checkedForBOM; 122 bool m_checkedForBOM;
120 bool m_checkedForCSSCharset; 123 bool m_checkedForCSSCharset;
121 bool m_checkedForXMLCharset; 124 bool m_checkedForXMLCharset;
122 bool m_checkedForMetaCharset; 125 bool m_checkedForMetaCharset;
123 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. 126 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
124 bool m_sawError; 127 bool m_sawError;
125 EncodingDetectionOption m_encodingDetectionOption; 128 EncodingDetectionOption m_encodingDetectionOption;
126 129
127 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; 130 OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
128 }; 131 };
129 132
130 } // namespace blink 133 } // namespace blink
131 134
132 #endif 135 #endif
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/core/core.gypi ('k') | third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698