Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h

Issue 1470893002: [Fetch] Always use utf-8 for decoding in text() (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Remove numerical enum value comparison. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5 5
6 This library is free software; you can redistribute it and/or 6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 9 version 2 of the License, or (at your option) any later version.
10 10
(...skipping 28 matching lines...) Expand all
39 DefaultEncoding, 39 DefaultEncoding,
40 AutoDetectedEncoding, 40 AutoDetectedEncoding,
41 EncodingFromContentSniffing, 41 EncodingFromContentSniffing,
42 EncodingFromXMLHeader, 42 EncodingFromXMLHeader,
43 EncodingFromMetaTag, 43 EncodingFromMetaTag,
44 EncodingFromCSSCharset, 44 EncodingFromCSSCharset,
45 EncodingFromHTTPHeader, 45 EncodingFromHTTPHeader,
46 EncodingFromParentFrame 46 EncodingFromParentFrame
47 }; 47 };
48 48
49 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec tor = false) 49 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec tor = false)
kouhei (in TOK) 2015/12/14 02:00:22 Most of the create() methods use exactly same para
hiroshige 2015/12/14 05:44:02 I'd like to enforce mimeType == "plain/text" && de
50 { 50 {
51 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE ncodingDetector)); 51 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE ncodingDetector ? UseAllAutoDetection : UseContentAndBOMBasedDetection));
52 }
53 // Corresponds to utf-8 decode in Encoding spec:
54 // https://encoding.spec.whatwg.org/#utf-8-decode.
55 static PassOwnPtr<TextResourceDecoder> createAlwaysUseUTF8ForText()
56 {
57 return adoptPtr(new TextResourceDecoder("plain/text", UTF8Encoding(), Al waysUseUTF8ForText));
52 } 58 }
53 ~TextResourceDecoder(); 59 ~TextResourceDecoder();
54 60
55 void setEncoding(const WTF::TextEncoding&, EncodingSource); 61 void setEncoding(const WTF::TextEncoding&, EncodingSource);
56 const WTF::TextEncoding& encoding() const { return m_encoding; } 62 const WTF::TextEncoding& encoding() const { return m_encoding; }
57 bool encodingWasDetectedHeuristically() const 63 bool encodingWasDetectedHeuristically() const
58 { 64 {
59 return m_source == AutoDetectedEncoding 65 return m_source == AutoDetectedEncoding
60 || m_source == EncodingFromContentSniffing; 66 || m_source == EncodingFromContentSniffing;
61 } 67 }
62 68
63 String decode(const char* data, size_t length); 69 String decode(const char* data, size_t length);
64 String flush(); 70 String flush();
65 71
66 void setHintEncoding(const WTF::TextEncoding& encoding) 72 void setHintEncoding(const WTF::TextEncoding& encoding)
67 { 73 {
68 m_hintEncoding = encoding.name(); 74 m_hintEncoding = encoding.name();
69 } 75 }
70 76
71 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } 77 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
72 bool sawError() const { return m_sawError; } 78 bool sawError() const { return m_sawError; }
73 size_t checkForBOM(const char*, size_t); 79 size_t checkForBOM(const char*, size_t);
74 80
75 private: 81 private:
76 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, bool usesEncodingDetector); 82
83 // TextResourceDecoder does three kind of encoding detection:
84 // 1. By BOM,
85 // 2. By Content if |m_contentType| is not |PlainTextContext|
86 // (e.g. <meta> tag for HTML), and
87 // 3. By detectTextEncoding().
88 enum EncodingDetectionOption {
89 // Use 1. + 2. + 3.
90 UseAllAutoDetection,
91
92 // Use 1. + 2.
93 UseContentAndBOMBasedDetection,
94
95 // Use None of them.
96 // |m_contentType| must be |PlainTextContent| and
97 // |m_encoding| must be UTF8Encoding.
98 // This doesn't change encoding based on BOMs, but still processes
99 // utf-8 BOMs so that utf-8 BOMs don't appear in the decoded result.
100 AlwaysUseUTF8ForText
101 };
102
103 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, EncodingDetectionOption);
77 104
78 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM. 105 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM.
79 static ContentType determineContentType(const String& mimeType); 106 static ContentType determineContentType(const String& mimeType);
80 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding); 107 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding);
81 108
82 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); 109 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
83 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); 110 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
84 void checkForMetaCharset(const char*, size_t); 111 void checkForMetaCharset(const char*, size_t);
85 bool shouldAutoDetect() const; 112 bool shouldAutoDetect() const;
86 113
87 ContentType m_contentType; 114 ContentType m_contentType;
88 WTF::TextEncoding m_encoding; 115 WTF::TextEncoding m_encoding;
89 OwnPtr<TextCodec> m_codec; 116 OwnPtr<TextCodec> m_codec;
90 EncodingSource m_source; 117 EncodingSource m_source;
91 const char* m_hintEncoding; 118 const char* m_hintEncoding;
92 Vector<char> m_buffer; 119 Vector<char> m_buffer;
93 bool m_checkedForBOM; 120 bool m_checkedForBOM;
94 bool m_checkedForCSSCharset; 121 bool m_checkedForCSSCharset;
95 bool m_checkedForXMLCharset; 122 bool m_checkedForXMLCharset;
96 bool m_checkedForMetaCharset; 123 bool m_checkedForMetaCharset;
97 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. 124 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
98 bool m_sawError; 125 bool m_sawError;
99 bool m_usesEncodingDetector; 126 EncodingDetectionOption m_encodingDetectionOption;
100 127
101 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; 128 OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
102 }; 129 };
103 130
104 } 131 }
105 132
106 #endif 133 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698