Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1022)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h

Issue 1456843002: Finch experiment: auto-detect text encoding (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: addressed comments Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5 5
6 This library is free software; you can redistribute it and/or 6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 9 version 2 of the License, or (at your option) any later version.
10 10
(...skipping 19 matching lines...) Expand all
30 namespace blink { 30 namespace blink {
31 31
32 class HTMLMetaCharsetParser; 32 class HTMLMetaCharsetParser;
33 33
34 class CORE_EXPORT TextResourceDecoder { 34 class CORE_EXPORT TextResourceDecoder {
35 USING_FAST_MALLOC(TextResourceDecoder); 35 USING_FAST_MALLOC(TextResourceDecoder);
36 WTF_MAKE_NONCOPYABLE(TextResourceDecoder); 36 WTF_MAKE_NONCOPYABLE(TextResourceDecoder);
37 public: 37 public:
38 enum EncodingSource { 38 enum EncodingSource {
39 DefaultEncoding, 39 DefaultEncoding,
40 DefaultEncodingAttemptedSniffing,
40 AutoDetectedEncoding, 41 AutoDetectedEncoding,
41 EncodingFromContentSniffing, 42 EncodingFromContentSniffing,
42 EncodingFromXMLHeader, 43 EncodingFromXMLHeader,
43 EncodingFromMetaTag, 44 EncodingFromMetaTag,
44 EncodingFromCSSCharset, 45 EncodingFromCSSCharset,
45 EncodingFromHTTPHeader, 46 EncodingFromHTTPHeader,
46 EncodingFromParentFrame 47 EncodingFromParentFrame
47 }; 48 };
48 49
49 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec tor = false) 50 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec tor = false)
50 { 51 {
51 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE ncodingDetector)); 52 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE ncodingDetector));
52 } 53 }
53 ~TextResourceDecoder(); 54 ~TextResourceDecoder();
54 55
55 void setEncoding(const WTF::TextEncoding&, EncodingSource); 56 void setEncoding(const WTF::TextEncoding&, EncodingSource);
56 const WTF::TextEncoding& encoding() const { return m_encoding; } 57 const WTF::TextEncoding& encoding() const { return m_encoding; }
57 bool encodingWasDetectedHeuristically() const 58 bool encodingWasDetectedHeuristically() const
58 { 59 {
59 return m_source == AutoDetectedEncoding 60 return m_source == AutoDetectedEncoding
61 || m_source == EncodingFromContentSniffing
62 || m_source == DefaultEncodingAttemptedSniffing;
63 }
64
65 bool encodingWasDetectedFromContentSniffing() const
66 {
67 return m_source == EncodingFromContentSniffing;
68 }
69
70 bool attemptedToDetermineEncodingFromContentSniffing() const
71 {
72 return m_source == DefaultEncodingAttemptedSniffing
60 || m_source == EncodingFromContentSniffing; 73 || m_source == EncodingFromContentSniffing;
61 } 74 }
62 75
63 String decode(const char* data, size_t length); 76 String decode(const char* data, size_t length);
64 String flush(); 77 String flush();
65 78
66 void setHintEncoding(const WTF::TextEncoding& encoding) 79 void setHintEncoding(const WTF::TextEncoding& encoding)
67 { 80 {
68 m_hintEncoding = encoding.name(); 81 m_hintEncoding = encoding.name();
69 } 82 }
70 83
71 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } 84 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
72 bool sawError() const { return m_sawError; } 85 bool sawError() const { return m_sawError; }
73 size_t checkForBOM(const char*, size_t); 86 size_t checkForBOM(const char*, size_t);
74 87
75 private: 88 private:
76 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, bool usesEncodingDetector); 89 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, bool usesEncodingDetector);
77 90
78 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM. 91 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM.
79 static ContentType determineContentType(const String& mimeType); 92 static ContentType determineContentType(const String& mimeType);
80 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding); 93 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding);
81 94
82 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); 95 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
83 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); 96 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
84 void checkForMetaCharset(const char*, size_t); 97 void checkForMetaCharset(const char*, size_t);
85 bool shouldAutoDetect() const; 98 bool shouldAutoDetect() const;
99 void detectTextEncoding(const char*, size_t);
86 100
87 ContentType m_contentType; 101 ContentType m_contentType;
88 WTF::TextEncoding m_encoding; 102 WTF::TextEncoding m_encoding;
89 OwnPtr<TextCodec> m_codec; 103 OwnPtr<TextCodec> m_codec;
90 EncodingSource m_source; 104 EncodingSource m_source;
91 const char* m_hintEncoding; 105 const char* m_hintEncoding;
92 Vector<char> m_buffer; 106 Vector<char> m_buffer;
93 bool m_checkedForBOM; 107 bool m_checkedForBOM;
94 bool m_checkedForCSSCharset; 108 bool m_checkedForCSSCharset;
95 bool m_checkedForXMLCharset; 109 bool m_checkedForXMLCharset;
96 bool m_checkedForMetaCharset; 110 bool m_checkedForMetaCharset;
97 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. 111 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
98 bool m_sawError; 112 bool m_sawError;
99 bool m_usesEncodingDetector; 113 bool m_usesEncodingDetector;
100 114
101 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; 115 OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
102 }; 116 };
103 117
104 } 118 }
105 119
106 #endif 120 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698