OLD | NEW |
1 /* | 1 /* |
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) | 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) |
4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. | 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. |
5 | 5 |
6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
10 | 10 |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
42 EncodingFromContentSniffing, | 42 EncodingFromContentSniffing, |
43 EncodingFromXMLHeader, | 43 EncodingFromXMLHeader, |
44 EncodingFromMetaTag, | 44 EncodingFromMetaTag, |
45 EncodingFromCSSCharset, | 45 EncodingFromCSSCharset, |
46 EncodingFromHTTPHeader, | 46 EncodingFromHTTPHeader, |
47 EncodingFromParentFrame | 47 EncodingFromParentFrame |
48 }; | 48 }; |
49 | 49 |
50 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const
WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec
tor = false) | 50 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const
WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec
tor = false) |
51 { | 51 { |
52 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE
ncodingDetector)); | 52 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE
ncodingDetector ? UseAllAutoDetection : UseContentAndBOMBasedDetection)); |
| 53 } |
| 54 // Corresponds to utf-8 decode in Encoding spec: |
| 55 // https://encoding.spec.whatwg.org/#utf-8-decode. |
| 56 static PassOwnPtr<TextResourceDecoder> createAlwaysUseUTF8ForText() |
| 57 { |
| 58 return adoptPtr(new TextResourceDecoder("plain/text", UTF8Encoding(), Al
waysUseUTF8ForText)); |
53 } | 59 } |
54 ~TextResourceDecoder(); | 60 ~TextResourceDecoder(); |
55 | 61 |
56 void setEncoding(const WTF::TextEncoding&, EncodingSource); | 62 void setEncoding(const WTF::TextEncoding&, EncodingSource); |
57 const WTF::TextEncoding& encoding() const { return m_encoding; } | 63 const WTF::TextEncoding& encoding() const { return m_encoding; } |
58 bool encodingWasDetectedHeuristically() const | 64 bool encodingWasDetectedHeuristically() const |
59 { | 65 { |
60 return m_source == AutoDetectedEncoding | 66 return m_source == AutoDetectedEncoding |
61 || m_source == EncodingFromContentSniffing | 67 || m_source == EncodingFromContentSniffing |
62 || m_source == DefaultEncodingAttemptedSniffing; | 68 || m_source == DefaultEncodingAttemptedSniffing; |
(...skipping 16 matching lines...) Expand all Loading... |
79 void setHintEncoding(const WTF::TextEncoding& encoding) | 85 void setHintEncoding(const WTF::TextEncoding& encoding) |
80 { | 86 { |
81 m_hintEncoding = encoding.name(); | 87 m_hintEncoding = encoding.name(); |
82 } | 88 } |
83 | 89 |
84 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } | 90 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } |
85 bool sawError() const { return m_sawError; } | 91 bool sawError() const { return m_sawError; } |
86 size_t checkForBOM(const char*, size_t); | 92 size_t checkForBOM(const char*, size_t); |
87 | 93 |
88 private: | 94 private: |
89 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default
Encoding, bool usesEncodingDetector); | 95 |
| 96 // TextResourceDecoder does three kind of encoding detection: |
| 97 // 1. By BOM, |
| 98 // 2. By Content if |m_contentType| is not |PlainTextContext| |
| 99 // (e.g. <meta> tag for HTML), and |
| 100 // 3. By detectTextEncoding(). |
| 101 enum EncodingDetectionOption { |
| 102 // Use 1. + 2. + 3. |
| 103 UseAllAutoDetection, |
| 104 |
| 105 // Use 1. + 2. |
| 106 UseContentAndBOMBasedDetection, |
| 107 |
| 108 // Use None of them. |
| 109 // |m_contentType| must be |PlainTextContent| and |
| 110 // |m_encoding| must be UTF8Encoding. |
| 111 // This doesn't change encoding based on BOMs, but still processes |
| 112 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result. |
| 113 AlwaysUseUTF8ForText |
| 114 }; |
| 115 |
| 116 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default
Encoding, EncodingDetectionOption); |
90 | 117 |
91 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent };
// PlainText only checks for BOM. | 118 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent };
// PlainText only checks for BOM. |
92 static ContentType determineContentType(const String& mimeType); | 119 static ContentType determineContentType(const String& mimeType); |
93 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text
Encoding& defaultEncoding); | 120 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text
Encoding& defaultEncoding); |
94 | 121 |
95 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); | 122 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); |
96 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); | 123 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); |
97 void checkForMetaCharset(const char*, size_t); | 124 void checkForMetaCharset(const char*, size_t); |
98 bool shouldAutoDetect() const; | 125 bool shouldAutoDetect() const; |
99 void detectTextEncoding(const char*, size_t); | 126 void detectTextEncoding(const char*, size_t); |
100 | 127 |
101 ContentType m_contentType; | 128 ContentType m_contentType; |
102 WTF::TextEncoding m_encoding; | 129 WTF::TextEncoding m_encoding; |
103 OwnPtr<TextCodec> m_codec; | 130 OwnPtr<TextCodec> m_codec; |
104 EncodingSource m_source; | 131 EncodingSource m_source; |
105 const char* m_hintEncoding; | 132 const char* m_hintEncoding; |
106 Vector<char> m_buffer; | 133 Vector<char> m_buffer; |
107 bool m_checkedForBOM; | 134 bool m_checkedForBOM; |
108 bool m_checkedForCSSCharset; | 135 bool m_checkedForCSSCharset; |
109 bool m_checkedForXMLCharset; | 136 bool m_checkedForXMLCharset; |
110 bool m_checkedForMetaCharset; | 137 bool m_checkedForMetaCharset; |
111 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. | 138 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. |
112 bool m_sawError; | 139 bool m_sawError; |
113 bool m_usesEncodingDetector; | 140 EncodingDetectionOption m_encodingDetectionOption; |
114 | 141 |
115 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; | 142 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; |
116 }; | 143 }; |
117 | 144 |
118 } | 145 } |
119 | 146 |
120 #endif | 147 #endif |
OLD | NEW |