Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(99)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h

Issue 1888083002: Revert of UTF-8 detector for pages missing encoding info (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5 5
6 This library is free software; you can redistribute it and/or 6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 9 version 2 of the License, or (at your option) any later version.
10 10
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
73 { 73 {
74 m_hintEncoding = encoding.name(); 74 m_hintEncoding = encoding.name();
75 } 75 }
76 76
77 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } 77 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
78 bool sawError() const { return m_sawError; } 78 bool sawError() const { return m_sawError; }
79 size_t checkForBOM(const char*, size_t); 79 size_t checkForBOM(const char*, size_t);
80 80
81 private: 81 private:
82 82
83 // TextResourceDecoder does four kinds of encoding detection: 83 // TextResourceDecoder does three kind of encoding detection:
84 // 1. By BOM, 84 // 1. By BOM,
85 // 2. By Content if |m_contentType| is not |PlainTextContext| 85 // 2. By Content if |m_contentType| is not |PlainTextContext|
86 // (e.g. <meta> tag for HTML), 86 // (e.g. <meta> tag for HTML), and
87 // 3. By isUTF8Encoded() to detect if the document 87 // 3. By detectTextEncoding().
88 // is of UTF-8, and
89 // 4. By detectTextEncodingUniversal().
90 enum EncodingDetectionOption { 88 enum EncodingDetectionOption {
91 // Use 1. + 2. + 4. 89 // Use 1. + 2. + 3.
92 UseAllAutoDetection, 90 UseAllAutoDetection,
93 91
94 // Use 1. + 2. + 3. 92 // Use 1. + 2.
95 UseContentAndBOMBasedDetection, 93 UseContentAndBOMBasedDetection,
96 94
97 // Use None of them. 95 // Use None of them.
98 // |m_contentType| must be |PlainTextContent| and 96 // |m_contentType| must be |PlainTextContent| and
99 // |m_encoding| must be UTF8Encoding. 97 // |m_encoding| must be UTF8Encoding.
100 // This doesn't change encoding based on BOMs, but still processes 98 // This doesn't change encoding based on BOMs, but still processes
101 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result. 99 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result.
102 AlwaysUseUTF8ForText 100 AlwaysUseUTF8ForText
103 }; 101 };
104 102
105 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, EncodingDetectionOption); 103 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, EncodingDetectionOption);
106 104
107 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM. 105 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM.
108 static ContentType determineContentType(const String& mimeType); 106 static ContentType determineContentType(const String& mimeType);
109 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding); 107 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding);
110 108
111 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); 109 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
112 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); 110 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
113 void checkForMetaCharset(const char*, size_t); 111 void checkForMetaCharset(const char*, size_t);
114 void detectTextEncoding(const char*, size_t); 112 bool shouldAutoDetect() const;
115 bool shouldDetectEncoding() const;
116 113
117 ContentType m_contentType; 114 ContentType m_contentType;
118 WTF::TextEncoding m_encoding; 115 WTF::TextEncoding m_encoding;
119 OwnPtr<TextCodec> m_codec; 116 OwnPtr<TextCodec> m_codec;
120 EncodingSource m_source; 117 EncodingSource m_source;
121 const char* m_hintEncoding; 118 const char* m_hintEncoding;
122 Vector<char> m_buffer; 119 Vector<char> m_buffer;
123 bool m_checkedForBOM; 120 bool m_checkedForBOM;
124 bool m_checkedForCSSCharset; 121 bool m_checkedForCSSCharset;
125 bool m_checkedForXMLCharset; 122 bool m_checkedForXMLCharset;
126 bool m_checkedForMetaCharset; 123 bool m_checkedForMetaCharset;
127 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. 124 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
128 bool m_sawError; 125 bool m_sawError;
129 EncodingDetectionOption m_encodingDetectionOption; 126 EncodingDetectionOption m_encodingDetectionOption;
130 127
131 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; 128 OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
132 }; 129 };
133 130
134 } // namespace blink 131 } // namespace blink
135 132
136 #endif 133 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698