| OLD | NEW |
| 1 /* | 1 /* |
| 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
| 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) | 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) |
| 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. | 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. |
| 5 | 5 |
| 6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
| 7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
| 8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
| 9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
| 10 | 10 |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 72 { | 72 { |
| 73 m_hintEncoding = encoding.name(); | 73 m_hintEncoding = encoding.name(); |
| 74 } | 74 } |
| 75 | 75 |
| 76 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } | 76 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } |
| 77 bool sawError() const { return m_sawError; } | 77 bool sawError() const { return m_sawError; } |
| 78 size_t checkForBOM(const char*, size_t); | 78 size_t checkForBOM(const char*, size_t); |
| 79 | 79 |
| 80 private: | 80 private: |
| 81 | 81 |
| 82 // TextResourceDecoder does four kinds of encoding detection: | 82 // TextResourceDecoder does three kind of encoding detection: |
| 83 // 1. By BOM, | 83 // 1. By BOM, |
| 84 // 2. By Content if |m_contentType| is not |PlainTextContext| | 84 // 2. By Content if |m_contentType| is not |PlainTextContext| |
| 85 // (e.g. <meta> tag for HTML), | 85 // (e.g. <meta> tag for HTML), and |
| 86 // 3. By isUTF8Encoded() to detect if the document | 86 // 3. By detectTextEncoding(). |
| 87 // is of UTF-8, and | |
| 88 // 4. By detectTextEncodingUniversal(). | |
| 89 enum EncodingDetectionOption { | 87 enum EncodingDetectionOption { |
| 90 // Use 1. + 2. + 4. | 88 // Use 1. + 2. + 3. |
| 91 UseAllAutoDetection, | 89 UseAllAutoDetection, |
| 92 | 90 |
| 93 // Use 1. + 2. + 3. | 91 // Use 1. + 2. |
| 94 UseContentAndBOMBasedDetection, | 92 UseContentAndBOMBasedDetection, |
| 95 | 93 |
| 96 // Use None of them. | 94 // Use None of them. |
| 97 // |m_contentType| must be |PlainTextContent| and | 95 // |m_contentType| must be |PlainTextContent| and |
| 98 // |m_encoding| must be UTF8Encoding. | 96 // |m_encoding| must be UTF8Encoding. |
| 99 // This doesn't change encoding based on BOMs, but still processes | 97 // This doesn't change encoding based on BOMs, but still processes |
| 100 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result. | 98 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result. |
| 101 AlwaysUseUTF8ForText | 99 AlwaysUseUTF8ForText |
| 102 }; | 100 }; |
| 103 | 101 |
| 104 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default
Encoding, EncodingDetectionOption); | 102 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default
Encoding, EncodingDetectionOption); |
| 105 | 103 |
| 106 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent };
// PlainText only checks for BOM. | 104 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent };
// PlainText only checks for BOM. |
| 107 static ContentType determineContentType(const String& mimeType); | 105 static ContentType determineContentType(const String& mimeType); |
| 108 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text
Encoding& defaultEncoding); | 106 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text
Encoding& defaultEncoding); |
| 109 | 107 |
| 110 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); | 108 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); |
| 111 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); | 109 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); |
| 112 void checkForMetaCharset(const char*, size_t); | 110 void checkForMetaCharset(const char*, size_t); |
| 113 void detectTextEncoding(const char*, size_t); | 111 bool shouldAutoDetect() const; |
| 114 bool shouldDetectEncoding() const; | |
| 115 | 112 |
| 116 ContentType m_contentType; | 113 ContentType m_contentType; |
| 117 WTF::TextEncoding m_encoding; | 114 WTF::TextEncoding m_encoding; |
| 118 OwnPtr<TextCodec> m_codec; | 115 OwnPtr<TextCodec> m_codec; |
| 119 EncodingSource m_source; | 116 EncodingSource m_source; |
| 120 const char* m_hintEncoding; | 117 const char* m_hintEncoding; |
| 121 Vector<char> m_buffer; | 118 Vector<char> m_buffer; |
| 122 bool m_checkedForBOM; | 119 bool m_checkedForBOM; |
| 123 bool m_checkedForCSSCharset; | 120 bool m_checkedForCSSCharset; |
| 124 bool m_checkedForXMLCharset; | 121 bool m_checkedForXMLCharset; |
| 125 bool m_checkedForMetaCharset; | 122 bool m_checkedForMetaCharset; |
| 126 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. | 123 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. |
| 127 bool m_sawError; | 124 bool m_sawError; |
| 128 EncodingDetectionOption m_encodingDetectionOption; | 125 EncodingDetectionOption m_encodingDetectionOption; |
| 129 | 126 |
| 130 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; | 127 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; |
| 131 }; | 128 }; |
| 132 | 129 |
| 133 } // namespace blink | 130 } // namespace blink |
| 134 | 131 |
| 135 #endif | 132 #endif |
| OLD | NEW |