| OLD | NEW |
| 1 /* | 1 /* |
| 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
| 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. |
| 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
| 5 | 5 |
| 6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
| 7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
| 8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
| 9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
| 10 | 10 |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 104 { | 104 { |
| 105 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
instead of US-ASCII | 105 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
instead of US-ASCII |
| 106 // for text/xml. This matches Firefox. | 106 // for text/xml. This matches Firefox. |
| 107 if (contentType == XMLContent) | 107 if (contentType == XMLContent) |
| 108 return UTF8Encoding(); | 108 return UTF8Encoding(); |
| 109 if (!specifiedDefaultEncoding.isValid()) | 109 if (!specifiedDefaultEncoding.isValid()) |
| 110 return Latin1Encoding(); | 110 return Latin1Encoding(); |
| 111 return specifiedDefaultEncoding; | 111 return specifiedDefaultEncoding; |
| 112 } | 112 } |
| 113 | 113 |
| 114 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text
Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) | 114 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text
Encoding& specifiedDefaultEncoding, EncodingDetectionOption encodingDetectionOpt
ion) |
| 115 : m_contentType(determineContentType(mimeType)) | 115 : m_contentType(determineContentType(mimeType)) |
| 116 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) | 116 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) |
| 117 , m_source(DefaultEncoding) | 117 , m_source(DefaultEncoding) |
| 118 , m_hintEncoding(0) | 118 , m_hintEncoding(0) |
| 119 , m_checkedForBOM(false) | 119 , m_checkedForBOM(false) |
| 120 , m_checkedForCSSCharset(false) | 120 , m_checkedForCSSCharset(false) |
| 121 , m_checkedForXMLCharset(false) | 121 , m_checkedForXMLCharset(false) |
| 122 , m_checkedForMetaCharset(false) | 122 , m_checkedForMetaCharset(false) |
| 123 , m_useLenientXMLDecoding(false) | 123 , m_useLenientXMLDecoding(false) |
| 124 , m_sawError(false) | 124 , m_sawError(false) |
| 125 , m_usesEncodingDetector(usesEncodingDetector) | 125 , m_encodingDetectionOption(encodingDetectionOption) |
| 126 { | 126 { |
| 127 if (m_encodingDetectionOption == AlwaysUseUTF8ForText) |
| 128 ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding()
); |
| 127 } | 129 } |
| 128 | 130 |
| 129 TextResourceDecoder::~TextResourceDecoder() | 131 TextResourceDecoder::~TextResourceDecoder() |
| 130 { | 132 { |
| 131 } | 133 } |
| 132 | 134 |
| 133 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, Encodin
gSource source) | 135 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, Encodin
gSource source) |
| 134 { | 136 { |
| 135 // In case the encoding didn't exist, we keep the old one (helps some sites
specifying invalid encodings). | 137 // In case the encoding didn't exist, we keep the old one (helps some sites
specifying invalid encodings). |
| 136 if (!encoding.isValid()) | 138 if (!encoding.isValid()) |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 202 size_t buf1Len = bufferLength; | 204 size_t buf1Len = bufferLength; |
| 203 size_t buf2Len = len; | 205 size_t buf2Len = len; |
| 204 const unsigned char* buf1 = reinterpret_cast<const unsigned char*>(m_buffer.
data()); | 206 const unsigned char* buf1 = reinterpret_cast<const unsigned char*>(m_buffer.
data()); |
| 205 const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); | 207 const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); |
| 206 unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b
uf2++) : 0; | 208 unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b
uf2++) : 0; |
| 207 unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b
uf2++) : 0; | 209 unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b
uf2++) : 0; |
| 208 unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b
uf2++) : 0; | 210 unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b
uf2++) : 0; |
| 209 unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; | 211 unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; |
| 210 | 212 |
| 211 // Check for the BOM. | 213 // Check for the BOM. |
| 212 if (c1 == 0xFF && c2 == 0xFE) { | 214 if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { |
| 213 if (c3 || c4) { | 215 setEncoding(UTF8Encoding(), AutoDetectedEncoding); |
| 214 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); | 216 lengthOfBOM = 3; |
| 217 } else if (m_encodingDetectionOption != AlwaysUseUTF8ForText) { |
| 218 if (c1 == 0xFF && c2 == 0xFE) { |
| 219 if (c3 || c4) { |
| 220 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); |
| 221 lengthOfBOM = 2; |
| 222 } else { |
| 223 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); |
| 224 lengthOfBOM = 4; |
| 225 } |
| 226 } else if (c1 == 0xFE && c2 == 0xFF) { |
| 227 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); |
| 215 lengthOfBOM = 2; | 228 lengthOfBOM = 2; |
| 216 } else { | 229 } else if (!c1 && !c2 && c3 == 0xFE && c4 == 0xFF) { |
| 217 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); | 230 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); |
| 218 lengthOfBOM = 4; | 231 lengthOfBOM = 4; |
| 219 } | 232 } |
| 220 } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { | |
| 221 setEncoding(UTF8Encoding(), AutoDetectedEncoding); | |
| 222 lengthOfBOM = 3; | |
| 223 } else if (c1 == 0xFE && c2 == 0xFF) { | |
| 224 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); | |
| 225 lengthOfBOM = 2; | |
| 226 } else if (!c1 && !c2 && c3 == 0xFE && c4 == 0xFF) { | |
| 227 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); | |
| 228 lengthOfBOM = 4; | |
| 229 } | 233 } |
| 230 | 234 |
| 231 if (lengthOfBOM || bufferLength + len >= 4) | 235 if (lengthOfBOM || bufferLength + len >= 4) |
| 232 m_checkedForBOM = true; | 236 m_checkedForBOM = true; |
| 233 | 237 |
| 234 return lengthOfBOM; | 238 return lengthOfBOM; |
| 235 } | 239 } |
| 236 | 240 |
| 237 bool TextResourceDecoder::checkForCSSCharset(const char* data, size_t len, bool&
movedDataToBuffer) | 241 bool TextResourceDecoder::checkForCSSCharset(const char* data, size_t len, bool&
movedDataToBuffer) |
| 238 { | 242 { |
| (...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 352 // 2. Encoding detector is turned ON and the encoding is set to | 356 // 2. Encoding detector is turned ON and the encoding is set to |
| 353 // the encoding of the parent frame, which is also auto-detected. | 357 // the encoding of the parent frame, which is also auto-detected. |
| 354 // Note that condition #2 is NOT satisfied unless parent-child frame | 358 // Note that condition #2 is NOT satisfied unless parent-child frame |
| 355 // relationship is compliant to the same-origin policy. If they're from | 359 // relationship is compliant to the same-origin policy. If they're from |
| 356 // different domains, |m_source| would not be set to EncodingFromParentFrame | 360 // different domains, |m_source| would not be set to EncodingFromParentFrame |
| 357 // in the first place. | 361 // in the first place. |
| 358 bool TextResourceDecoder::shouldAutoDetect() const | 362 bool TextResourceDecoder::shouldAutoDetect() const |
| 359 { | 363 { |
| 360 // Just checking m_hintEncoding suffices here because it's only set | 364 // Just checking m_hintEncoding suffices here because it's only set |
| 361 // in setHintEncoding when the source is AutoDetectedEncoding. | 365 // in setHintEncoding when the source is AutoDetectedEncoding. |
| 362 return m_usesEncodingDetector | 366 return m_encodingDetectionOption == UseAllAutoDetection |
| 363 && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame
&& m_hintEncoding)); | 367 && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame
&& m_hintEncoding)); |
| 364 } | 368 } |
| 365 | 369 |
| 366 String TextResourceDecoder::decode(const char* data, size_t len) | 370 String TextResourceDecoder::decode(const char* data, size_t len) |
| 367 { | 371 { |
| 368 size_t lengthOfBOM = 0; | 372 size_t lengthOfBOM = 0; |
| 369 if (!m_checkedForBOM) | 373 if (!m_checkedForBOM) |
| 370 lengthOfBOM = checkForBOM(data, len); | 374 lengthOfBOM = checkForBOM(data, len); |
| 371 | 375 |
| 372 bool movedDataToBuffer = false; | 376 bool movedDataToBuffer = false; |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 438 m_codec = newTextCodec(m_encoding); | 442 m_codec = newTextCodec(m_encoding); |
| 439 | 443 |
| 440 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF,
m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 444 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF,
m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
| 441 m_buffer.clear(); | 445 m_buffer.clear(); |
| 442 m_codec.clear(); | 446 m_codec.clear(); |
| 443 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 447 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
| 444 return result; | 448 return result; |
| 445 } | 449 } |
| 446 | 450 |
| 447 } | 451 } |
| OLD | NEW |