Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(226)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h

Issue 1979103003: Revert "Reland "UTF-8 detector for pages missing encoding info"" (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: rebased Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5 5
6 This library is free software; you can redistribute it and/or 6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 9 version 2 of the License, or (at your option) any later version.
10 10
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
72 { 72 {
73 m_hintEncoding = encoding.name(); 73 m_hintEncoding = encoding.name();
74 } 74 }
75 75
76 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } 76 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
77 bool sawError() const { return m_sawError; } 77 bool sawError() const { return m_sawError; }
78 size_t checkForBOM(const char*, size_t); 78 size_t checkForBOM(const char*, size_t);
79 79
80 private: 80 private:
81 81
82 // TextResourceDecoder does four kinds of encoding detection: 82 // TextResourceDecoder does three kind of encoding detection:
83 // 1. By BOM, 83 // 1. By BOM,
84 // 2. By Content if |m_contentType| is not |PlainTextContext| 84 // 2. By Content if |m_contentType| is not |PlainTextContext|
85 // (e.g. <meta> tag for HTML), 85 // (e.g. <meta> tag for HTML), and
86 // 3. By isUTF8Encoded() to detect if the document 86 // 3. By detectTextEncoding().
87 // is of UTF-8, and
88 // 4. By detectTextEncodingUniversal().
89 enum EncodingDetectionOption { 87 enum EncodingDetectionOption {
90 // Use 1. + 2. + 4. 88 // Use 1. + 2. + 3.
91 UseAllAutoDetection, 89 UseAllAutoDetection,
92 90
93 // Use 1. + 2. + 3. 91 // Use 1. + 2.
94 UseContentAndBOMBasedDetection, 92 UseContentAndBOMBasedDetection,
95 93
96 // Use None of them. 94 // Use None of them.
97 // |m_contentType| must be |PlainTextContent| and 95 // |m_contentType| must be |PlainTextContent| and
98 // |m_encoding| must be UTF8Encoding. 96 // |m_encoding| must be UTF8Encoding.
99 // This doesn't change encoding based on BOMs, but still processes 97 // This doesn't change encoding based on BOMs, but still processes
100 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result. 98 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result.
101 AlwaysUseUTF8ForText 99 AlwaysUseUTF8ForText
102 }; 100 };
103 101
104 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, EncodingDetectionOption); 102 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, EncodingDetectionOption);
105 103
106 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM. 104 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM.
107 static ContentType determineContentType(const String& mimeType); 105 static ContentType determineContentType(const String& mimeType);
108 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding); 106 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding);
109 107
110 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); 108 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
111 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); 109 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
112 void checkForMetaCharset(const char*, size_t); 110 void checkForMetaCharset(const char*, size_t);
113 void detectTextEncoding(const char*, size_t); 111 bool shouldAutoDetect() const;
114 bool shouldDetectEncoding() const;
115 112
116 ContentType m_contentType; 113 ContentType m_contentType;
117 WTF::TextEncoding m_encoding; 114 WTF::TextEncoding m_encoding;
118 OwnPtr<TextCodec> m_codec; 115 OwnPtr<TextCodec> m_codec;
119 EncodingSource m_source; 116 EncodingSource m_source;
120 const char* m_hintEncoding; 117 const char* m_hintEncoding;
121 Vector<char> m_buffer; 118 Vector<char> m_buffer;
122 bool m_checkedForBOM; 119 bool m_checkedForBOM;
123 bool m_checkedForCSSCharset; 120 bool m_checkedForCSSCharset;
124 bool m_checkedForXMLCharset; 121 bool m_checkedForXMLCharset;
125 bool m_checkedForMetaCharset; 122 bool m_checkedForMetaCharset;
126 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. 123 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
127 bool m_sawError; 124 bool m_sawError;
128 EncodingDetectionOption m_encodingDetectionOption; 125 EncodingDetectionOption m_encodingDetectionOption;
129 126
130 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; 127 OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
131 }; 128 };
132 129
133 } // namespace blink 130 } // namespace blink
134 131
135 #endif 132 #endif
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/core/core.gypi ('k') | third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698