Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
| 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) | 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) |
| 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. | 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. |
| 5 | 5 |
| 6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
| 7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
| 8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
| 9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
| 10 | 10 |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 39 DefaultEncoding, | 39 DefaultEncoding, |
| 40 AutoDetectedEncoding, | 40 AutoDetectedEncoding, |
| 41 EncodingFromContentSniffing, | 41 EncodingFromContentSniffing, |
| 42 EncodingFromXMLHeader, | 42 EncodingFromXMLHeader, |
| 43 EncodingFromMetaTag, | 43 EncodingFromMetaTag, |
| 44 EncodingFromCSSCharset, | 44 EncodingFromCSSCharset, |
| 45 EncodingFromHTTPHeader, | 45 EncodingFromHTTPHeader, |
| 46 EncodingFromParentFrame | 46 EncodingFromParentFrame |
| 47 }; | 47 }; |
| 48 | 48 |
| 49 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec tor = false) | 49 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec tor = false) |
|
kouhei (in TOK)
2015/12/14 02:00:22
Most of the create() methods use exactly same para
hiroshige
2015/12/14 05:44:02
I'd like to enforce mimeType == "plain/text" && de
| |
| 50 { | 50 { |
| 51 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE ncodingDetector)); | 51 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE ncodingDetector ? UseAllAutoDetection : UseContentAndBOMBasedDetection)); |
| 52 } | |
| 53 // Corresponds to utf-8 decode in Encoding spec: | |
| 54 // https://encoding.spec.whatwg.org/#utf-8-decode. | |
| 55 static PassOwnPtr<TextResourceDecoder> createAlwaysUseUTF8ForText() | |
| 56 { | |
| 57 return adoptPtr(new TextResourceDecoder("plain/text", UTF8Encoding(), Al waysUseUTF8ForText)); | |
| 52 } | 58 } |
| 53 ~TextResourceDecoder(); | 59 ~TextResourceDecoder(); |
| 54 | 60 |
| 55 void setEncoding(const WTF::TextEncoding&, EncodingSource); | 61 void setEncoding(const WTF::TextEncoding&, EncodingSource); |
| 56 const WTF::TextEncoding& encoding() const { return m_encoding; } | 62 const WTF::TextEncoding& encoding() const { return m_encoding; } |
| 57 bool encodingWasDetectedHeuristically() const | 63 bool encodingWasDetectedHeuristically() const |
| 58 { | 64 { |
| 59 return m_source == AutoDetectedEncoding | 65 return m_source == AutoDetectedEncoding |
| 60 || m_source == EncodingFromContentSniffing; | 66 || m_source == EncodingFromContentSniffing; |
| 61 } | 67 } |
| 62 | 68 |
| 63 String decode(const char* data, size_t length); | 69 String decode(const char* data, size_t length); |
| 64 String flush(); | 70 String flush(); |
| 65 | 71 |
| 66 void setHintEncoding(const WTF::TextEncoding& encoding) | 72 void setHintEncoding(const WTF::TextEncoding& encoding) |
| 67 { | 73 { |
| 68 m_hintEncoding = encoding.name(); | 74 m_hintEncoding = encoding.name(); |
| 69 } | 75 } |
| 70 | 76 |
| 71 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } | 77 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } |
| 72 bool sawError() const { return m_sawError; } | 78 bool sawError() const { return m_sawError; } |
| 73 size_t checkForBOM(const char*, size_t); | 79 size_t checkForBOM(const char*, size_t); |
| 74 | 80 |
| 75 private: | 81 private: |
| 76 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, bool usesEncodingDetector); | 82 |
| 83 // TextResourceDecoder does three kind of encoding detection: | |
| 84 // 1. By BOM, | |
| 85 // 2. By Content if |m_contentType| is not |PlainTextContext| | |
| 86 // (e.g. <meta> tag for HTML), and | |
| 87 // 3. By detectTextEncoding(). | |
| 88 enum EncodingDetectionOption { | |
| 89 // Use 1. + 2. + 3. | |
| 90 UseAllAutoDetection, | |
| 91 | |
| 92 // Use 1. + 2. | |
| 93 UseContentAndBOMBasedDetection, | |
| 94 | |
| 95 // Use None of them. | |
| 96 // |m_contentType| must be |PlainTextContent| and | |
| 97 // |m_encoding| must be UTF8Encoding. | |
| 98 // This doesn't change encoding based on BOMs, but still processes | |
| 99 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result. | |
| 100 AlwaysUseUTF8ForText | |
| 101 }; | |
| 102 | |
| 103 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, EncodingDetectionOption); | |
| 77 | 104 |
| 78 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM. | 105 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM. |
| 79 static ContentType determineContentType(const String& mimeType); | 106 static ContentType determineContentType(const String& mimeType); |
| 80 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding); | 107 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding); |
| 81 | 108 |
| 82 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); | 109 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); |
| 83 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); | 110 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); |
| 84 void checkForMetaCharset(const char*, size_t); | 111 void checkForMetaCharset(const char*, size_t); |
| 85 bool shouldAutoDetect() const; | 112 bool shouldAutoDetect() const; |
| 86 | 113 |
| 87 ContentType m_contentType; | 114 ContentType m_contentType; |
| 88 WTF::TextEncoding m_encoding; | 115 WTF::TextEncoding m_encoding; |
| 89 OwnPtr<TextCodec> m_codec; | 116 OwnPtr<TextCodec> m_codec; |
| 90 EncodingSource m_source; | 117 EncodingSource m_source; |
| 91 const char* m_hintEncoding; | 118 const char* m_hintEncoding; |
| 92 Vector<char> m_buffer; | 119 Vector<char> m_buffer; |
| 93 bool m_checkedForBOM; | 120 bool m_checkedForBOM; |
| 94 bool m_checkedForCSSCharset; | 121 bool m_checkedForCSSCharset; |
| 95 bool m_checkedForXMLCharset; | 122 bool m_checkedForXMLCharset; |
| 96 bool m_checkedForMetaCharset; | 123 bool m_checkedForMetaCharset; |
| 97 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. | 124 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. |
| 98 bool m_sawError; | 125 bool m_sawError; |
| 99 bool m_usesEncodingDetector; | 126 EncodingDetectionOption m_encodingDetectionOption; |
| 100 | 127 |
| 101 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; | 128 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; |
| 102 }; | 129 }; |
| 103 | 130 |
| 104 } | 131 } |
| 105 | 132 |
| 106 #endif | 133 #endif |
| OLD | NEW |