Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(154)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.h

Issue 1470893002: [Fetch] Always use utf-8 for decoding in text() (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Add fixes. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5 5
6 This library is free software; you can redistribute it and/or 6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 9 version 2 of the License, or (at your option) any later version.
10 10
(...skipping 28 matching lines...) Expand all
39 DefaultEncoding, 39 DefaultEncoding,
40 AutoDetectedEncoding, 40 AutoDetectedEncoding,
41 EncodingFromContentSniffing, 41 EncodingFromContentSniffing,
42 EncodingFromXMLHeader, 42 EncodingFromXMLHeader,
43 EncodingFromMetaTag, 43 EncodingFromMetaTag,
44 EncodingFromCSSCharset, 44 EncodingFromCSSCharset,
45 EncodingFromHTTPHeader, 45 EncodingFromHTTPHeader,
46 EncodingFromParentFrame 46 EncodingFromParentFrame
47 }; 47 };
48 48
49 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec tor = false) 49 enum BOMCheckOptions {
50 CheckForAllBOM,
51 // create("text/plain", UTF8Encoding(), false, CheckForOnlyUTF8BOM)
52 // always uses UTF8Encoding() and omits utf-8 BOM, and thus
53 // corresponds to utf-8 decode in Encoding spec:
54 // https://encoding.spec.whatwg.org/#utf-8-decode
55 CheckForOnlyUTF8BOM
56 };
57
58 static PassOwnPtr<TextResourceDecoder> create(const String& mimeType, const WTF::TextEncoding& defaultEncoding = WTF::TextEncoding(), bool usesEncodingDetec tor = false, BOMCheckOptions bomCheckOptions = CheckForAllBOM)
50 { 59 {
51 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE ncodingDetector)); 60 return adoptPtr(new TextResourceDecoder(mimeType, defaultEncoding, usesE ncodingDetector, bomCheckOptions));
52 } 61 }
53 ~TextResourceDecoder(); 62 ~TextResourceDecoder();
54 63
55 void setEncoding(const WTF::TextEncoding&, EncodingSource); 64 void setEncoding(const WTF::TextEncoding&, EncodingSource);
56 const WTF::TextEncoding& encoding() const { return m_encoding; } 65 const WTF::TextEncoding& encoding() const { return m_encoding; }
57 bool encodingWasDetectedHeuristically() const 66 bool encodingWasDetectedHeuristically() const
58 { 67 {
59 return m_source == AutoDetectedEncoding 68 return m_source == AutoDetectedEncoding
60 || m_source == EncodingFromContentSniffing; 69 || m_source == EncodingFromContentSniffing;
61 } 70 }
62 71
63 String decode(const char* data, size_t length); 72 String decode(const char* data, size_t length);
64 String flush(); 73 String flush();
65 74
66 void setHintEncoding(const WTF::TextEncoding& encoding) 75 void setHintEncoding(const WTF::TextEncoding& encoding)
67 { 76 {
68 m_hintEncoding = encoding.name(); 77 m_hintEncoding = encoding.name();
69 } 78 }
70 79
71 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } 80 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
72 bool sawError() const { return m_sawError; } 81 bool sawError() const { return m_sawError; }
73 size_t checkForBOM(const char*, size_t); 82 size_t checkForBOM(const char*, size_t);
74 83
75 private: 84 private:
76 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, bool usesEncodingDetector); 85 TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& default Encoding, bool usesEncodingDetector, BOMCheckOptions);
77 86
78 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM. 87 enum ContentType { PlainTextContent, HTMLContent, XMLContent, CSSContent }; // PlainText only checks for BOM.
79 static ContentType determineContentType(const String& mimeType); 88 static ContentType determineContentType(const String& mimeType);
80 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding); 89 static const WTF::TextEncoding& defaultEncoding(ContentType, const WTF::Text Encoding& defaultEncoding);
81 90
82 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); 91 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
83 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer); 92 bool checkForXMLCharset(const char*, size_t, bool& movedDataToBuffer);
84 void checkForMetaCharset(const char*, size_t); 93 void checkForMetaCharset(const char*, size_t);
85 bool shouldAutoDetect() const; 94 bool shouldAutoDetect() const;
86 95
87 ContentType m_contentType; 96 ContentType m_contentType;
88 WTF::TextEncoding m_encoding; 97 WTF::TextEncoding m_encoding;
89 OwnPtr<TextCodec> m_codec; 98 OwnPtr<TextCodec> m_codec;
90 EncodingSource m_source; 99 EncodingSource m_source;
91 const char* m_hintEncoding; 100 const char* m_hintEncoding;
92 Vector<char> m_buffer; 101 Vector<char> m_buffer;
93 bool m_checkedForBOM; 102 bool m_checkedForBOM;
94 bool m_checkedForCSSCharset; 103 bool m_checkedForCSSCharset;
95 bool m_checkedForXMLCharset; 104 bool m_checkedForXMLCharset;
96 bool m_checkedForMetaCharset; 105 bool m_checkedForMetaCharset;
97 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. 106 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
98 bool m_sawError; 107 bool m_sawError;
99 bool m_usesEncodingDetector; 108 bool m_usesEncodingDetector;
109 BOMCheckOptions m_bomCheckOptions;
100 110
101 OwnPtr<HTMLMetaCharsetParser> m_charsetParser; 111 OwnPtr<HTMLMetaCharsetParser> m_charsetParser;
102 }; 112 };
103 113
104 } 114 }
105 115
106 #endif 116 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698