Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(40)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h

Issue 1721373002: UTF-8 detector for pages missing encoding info (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5 5
6 This library is free software; you can redistribute it and/or 6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 9 version 2 of the License, or (at your option) any later version.
10 10
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
73 { 73 {
74 m_hintEncoding = encoding.name(); 74 m_hintEncoding = encoding.name();
75 } 75 }
76 76
77 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } 77 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
78 bool sawError() const { return m_sawError; } 78 bool sawError() const { return m_sawError; }
79 size_t checkForBOM(const char*, size_t); 79 size_t checkForBOM(const char*, size_t);
80 80
81 private: 81 private:
82 82
83 // TextResourceDecoder does three kind of encoding detection: 83 // TextResourceDecoder does four kinds of encoding detection:
84 // 1. By BOM, 84 // 1. By BOM,
85 // 2. By Content if |m_contentType| is not |PlainTextContext| 85 // 2. By Content if |m_contentType| is not |PlainTextContext|
86 // (e.g. <meta> tag for HTML), and 86 // (e.g. <meta> tag for HTML),
87 // 3. By detectTextEncoding(). 87 // 3. By isUTF8Encoded() to detect if the document
88 // is of UTF-8, and
89 // 4. By detectTextEncoding().
88 enum EncodingDetectionOption { 90 enum EncodingDetectionOption {
89 // Use 1. + 2. + 3. 91 // Use 1. + 2. + 4.
90 UseAllAutoDetection, 92 UseAllAutoDetection,
91 93
92 // Use 1. + 2. 94 // Use 1. + 2. + 3.
93 UseContentAndBOMBasedDetection, 95 UseContentAndBOMBasedDetection,
94 96
95 // Use None of them. 97 // Use None of them.
96 // |m_contentType| must be |PlainTextContent| and 98 // |m_contentType| must be |PlainTextContent| and
97 // |m_encoding| must be UTF8Encoding. 99 // |m_encoding| must be UTF8Encoding.
98 // This doesn't change encoding based on BOMs, but still processes 100 // This doesn't change encoding based on BOMs, but still processes
99 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result. 101 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result.
100 AlwaysUseUTF8ForText 102 AlwaysUseUTF8ForText
101 }; 103 };
102 104
(...skipping 21 matching lines...) Expand all
124 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. 126 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
125 bool m_sawError; 127 bool m_sawError;
126 EncodingDetectionOption m_encodingDetectionOption; 128 EncodingDetectionOption m_encodingDetectionOption;
127 129
128 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; 130 OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
129 }; 131 };
130 132
131 } // namespace blink 133 } // namespace blink
132 134
133 #endif 135 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698