OLD | NEW |
---|---|
1 /* | 1 /* |
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. |
4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
5 | 5 |
6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
10 | 10 |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
105 { | 105 { |
106 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII | 106 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII |
107 // for text/xml. This matches Firefox. | 107 // for text/xml. This matches Firefox. |
108 if (contentType == XMLContent) | 108 if (contentType == XMLContent) |
109 return UTF8Encoding(); | 109 return UTF8Encoding(); |
110 if (!specifiedDefaultEncoding.isValid()) | 110 if (!specifiedDefaultEncoding.isValid()) |
111 return Latin1Encoding(); | 111 return Latin1Encoding(); |
112 return specifiedDefaultEncoding; | 112 return specifiedDefaultEncoding; |
113 } | 113 } |
114 | 114 |
115 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) | 115 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text Encoding& specifiedDefaultEncoding, EncodingDetectionOption encodingDetectionOpt ion) |
116 : m_contentType(determineContentType(mimeType)) | 116 : m_contentType(determineContentType(mimeType)) |
117 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) | 117 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) |
118 , m_source(DefaultEncoding) | 118 , m_source(DefaultEncoding) |
119 , m_hintEncoding(0) | 119 , m_hintEncoding(0) |
120 , m_checkedForBOM(false) | 120 , m_checkedForBOM(false) |
121 , m_checkedForCSSCharset(false) | 121 , m_checkedForCSSCharset(false) |
122 , m_checkedForXMLCharset(false) | 122 , m_checkedForXMLCharset(false) |
123 , m_checkedForMetaCharset(false) | 123 , m_checkedForMetaCharset(false) |
124 , m_useLenientXMLDecoding(false) | 124 , m_useLenientXMLDecoding(false) |
125 , m_sawError(false) | 125 , m_sawError(false) |
126 , m_usesEncodingDetector(usesEncodingDetector) | 126 , m_encodingDetectionOption(encodingDetectionOption) |
127 { | 127 { |
128 ASSERT(!(m_encodingDetectionOption == AlwaysUseUTF8ForText && (m_contentType != PlainTextContent || m_encoding != UTF8Encoding()))); | |
128 } | 129 } |
129 | 130 |
130 TextResourceDecoder::~TextResourceDecoder() | 131 TextResourceDecoder::~TextResourceDecoder() |
131 { | 132 { |
132 } | 133 } |
133 | 134 |
134 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, Encodin gSource source) | 135 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, Encodin gSource source) |
135 { | 136 { |
136 // In case the encoding didn't exist, we keep the old one (helps some sites specifying invalid encodings). | 137 // In case the encoding didn't exist, we keep the old one (helps some sites specifying invalid encodings). |
137 if (!encoding.isValid()) | 138 if (!encoding.isValid()) |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
203 size_t buf1Len = bufferLength; | 204 size_t buf1Len = bufferLength; |
204 size_t buf2Len = len; | 205 size_t buf2Len = len; |
205 const unsigned char* buf1 = reinterpret_cast<const unsigned char*>(m_buffer. data()); | 206 const unsigned char* buf1 = reinterpret_cast<const unsigned char*>(m_buffer. data()); |
206 const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); | 207 const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); |
207 unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; | 208 unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; |
208 unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; | 209 unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; |
209 unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; | 210 unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; |
210 unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; | 211 unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; |
211 | 212 |
212 // Check for the BOM. | 213 // Check for the BOM. |
213 if (c1 == 0xFF && c2 == 0xFE) { | 214 if (m_encodingDetectionOption <= UseContentAndBOMBasedDetection && c1 == 0xF F && c2 == 0xFE) { |
yhirano
2015/12/02 08:28:29
[optional] I'm not a fan of numerical enum value c
| |
214 if (c3 || c4) { | 215 if (c3 || c4) { |
215 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); | 216 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); |
216 lengthOfBOM = 2; | 217 lengthOfBOM = 2; |
217 } else { | 218 } else { |
218 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); | 219 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); |
219 lengthOfBOM = 4; | 220 lengthOfBOM = 4; |
220 } | 221 } |
221 } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { | 222 } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { |
222 setEncoding(UTF8Encoding(), AutoDetectedEncoding); | 223 setEncoding(UTF8Encoding(), AutoDetectedEncoding); |
223 lengthOfBOM = 3; | 224 lengthOfBOM = 3; |
224 } else if (c1 == 0xFE && c2 == 0xFF) { | 225 } else if (m_encodingDetectionOption <= UseContentAndBOMBasedDetection && c1 == 0xFE && c2 == 0xFF) { |
225 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); | 226 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); |
226 lengthOfBOM = 2; | 227 lengthOfBOM = 2; |
227 } else if (!c1 && !c2 && c3 == 0xFE && c4 == 0xFF) { | 228 } else if (m_encodingDetectionOption <= UseContentAndBOMBasedDetection && !c 1 && !c2 && c3 == 0xFE && c4 == 0xFF) { |
228 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); | 229 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); |
229 lengthOfBOM = 4; | 230 lengthOfBOM = 4; |
230 } | 231 } |
231 | 232 |
232 if (lengthOfBOM || bufferLength + len >= 4) | 233 if (lengthOfBOM || bufferLength + len >= 4) |
233 m_checkedForBOM = true; | 234 m_checkedForBOM = true; |
234 | 235 |
235 return lengthOfBOM; | 236 return lengthOfBOM; |
236 } | 237 } |
237 | 238 |
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
353 // 2. Encoding detector is turned ON and the encoding is set to | 354 // 2. Encoding detector is turned ON and the encoding is set to |
354 // the encoding of the parent frame, which is also auto-detected. | 355 // the encoding of the parent frame, which is also auto-detected. |
355 // Note that condition #2 is NOT satisfied unless parent-child frame | 356 // Note that condition #2 is NOT satisfied unless parent-child frame |
356 // relationship is compliant to the same-origin policy. If they're from | 357 // relationship is compliant to the same-origin policy. If they're from |
357 // different domains, |m_source| would not be set to EncodingFromParentFrame | 358 // different domains, |m_source| would not be set to EncodingFromParentFrame |
358 // in the first place. | 359 // in the first place. |
359 bool TextResourceDecoder::shouldAutoDetect() const | 360 bool TextResourceDecoder::shouldAutoDetect() const |
360 { | 361 { |
361 // Just checking m_hintEncoding suffices here because it's only set | 362 // Just checking m_hintEncoding suffices here because it's only set |
362 // in setHintEncoding when the source is AutoDetectedEncoding. | 363 // in setHintEncoding when the source is AutoDetectedEncoding. |
363 return m_usesEncodingDetector | 364 return m_encodingDetectionOption == UseAllAutoDetection |
364 && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); | 365 && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); |
365 } | 366 } |
366 | 367 |
367 String TextResourceDecoder::decode(const char* data, size_t len) | 368 String TextResourceDecoder::decode(const char* data, size_t len) |
368 { | 369 { |
369 size_t lengthOfBOM = 0; | 370 size_t lengthOfBOM = 0; |
370 if (!m_checkedForBOM) | 371 if (!m_checkedForBOM) |
371 lengthOfBOM = checkForBOM(data, len); | 372 lengthOfBOM = checkForBOM(data, len); |
372 | 373 |
373 bool movedDataToBuffer = false; | 374 bool movedDataToBuffer = false; |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
433 m_codec = newTextCodec(m_encoding); | 434 m_codec = newTextCodec(m_encoding); |
434 | 435 |
435 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 436 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
436 m_buffer.clear(); | 437 m_buffer.clear(); |
437 m_codec.clear(); | 438 m_codec.clear(); |
438 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 439 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
439 return result; | 440 return result; |
440 } | 441 } |
441 | 442 |
442 } | 443 } |
OLD | NEW |