Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
| 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. |
| 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
| 5 | 5 |
| 6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
| 7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
| 8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
| 9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
| 10 | 10 |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 105 { | 105 { |
| 106 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII | 106 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII |
| 107 // for text/xml. This matches Firefox. | 107 // for text/xml. This matches Firefox. |
| 108 if (contentType == XMLContent) | 108 if (contentType == XMLContent) |
| 109 return UTF8Encoding(); | 109 return UTF8Encoding(); |
| 110 if (!specifiedDefaultEncoding.isValid()) | 110 if (!specifiedDefaultEncoding.isValid()) |
| 111 return Latin1Encoding(); | 111 return Latin1Encoding(); |
| 112 return specifiedDefaultEncoding; | 112 return specifiedDefaultEncoding; |
| 113 } | 113 } |
| 114 | 114 |
| 115 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) | 115 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text Encoding& specifiedDefaultEncoding, EncodingDetectionOption encodingDetectionOpt ion) |
| 116 : m_contentType(determineContentType(mimeType)) | 116 : m_contentType(determineContentType(mimeType)) |
| 117 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) | 117 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) |
| 118 , m_source(DefaultEncoding) | 118 , m_source(DefaultEncoding) |
| 119 , m_hintEncoding(0) | 119 , m_hintEncoding(0) |
| 120 , m_checkedForBOM(false) | 120 , m_checkedForBOM(false) |
| 121 , m_checkedForCSSCharset(false) | 121 , m_checkedForCSSCharset(false) |
| 122 , m_checkedForXMLCharset(false) | 122 , m_checkedForXMLCharset(false) |
| 123 , m_checkedForMetaCharset(false) | 123 , m_checkedForMetaCharset(false) |
| 124 , m_useLenientXMLDecoding(false) | 124 , m_useLenientXMLDecoding(false) |
| 125 , m_sawError(false) | 125 , m_sawError(false) |
| 126 , m_usesEncodingDetector(usesEncodingDetector) | 126 , m_encodingDetectionOption(encodingDetectionOption) |
| 127 { | 127 { |
| 128 ASSERT(!(m_encodingDetectionOption == AlwaysUseUTF8ForText && (m_contentType != PlainTextContent || m_encoding != UTF8Encoding()))); | |
|
kouhei (in TOK)
2015/12/14 02:00:22
Optional nit: Would you split this ASSERT stmt?
if
hiroshige
2015/12/14 05:44:02
Done.
| |
| 128 } | 129 } |
| 129 | 130 |
| 130 TextResourceDecoder::~TextResourceDecoder() | 131 TextResourceDecoder::~TextResourceDecoder() |
| 131 { | 132 { |
| 132 } | 133 } |
| 133 | 134 |
| 134 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, Encodin gSource source) | 135 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, Encodin gSource source) |
| 135 { | 136 { |
| 136 // In case the encoding didn't exist, we keep the old one (helps some sites specifying invalid encodings). | 137 // In case the encoding didn't exist, we keep the old one (helps some sites specifying invalid encodings). |
| 137 if (!encoding.isValid()) | 138 if (!encoding.isValid()) |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 203 size_t buf1Len = bufferLength; | 204 size_t buf1Len = bufferLength; |
| 204 size_t buf2Len = len; | 205 size_t buf2Len = len; |
| 205 const unsigned char* buf1 = reinterpret_cast<const unsigned char*>(m_buffer. data()); | 206 const unsigned char* buf1 = reinterpret_cast<const unsigned char*>(m_buffer. data()); |
| 206 const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); | 207 const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); |
| 207 unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; | 208 unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; |
| 208 unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; | 209 unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; |
| 209 unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; | 210 unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; |
| 210 unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; | 211 unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; |
| 211 | 212 |
| 212 // Check for the BOM. | 213 // Check for the BOM. |
| 213 if (c1 == 0xFF && c2 == 0xFE) { | 214 if (m_encodingDetectionOption != AlwaysUseUTF8ForText && c1 == 0xFF && c2 == 0xFE) { |
|
kouhei (in TOK)
2015/12/14 02:00:22
Can we reorder the if stmts here?
if (c1 == 0xEF
hiroshige
2015/12/14 05:44:02
Done.
| |
| 214 if (c3 || c4) { | 215 if (c3 || c4) { |
| 215 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); | 216 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); |
| 216 lengthOfBOM = 2; | 217 lengthOfBOM = 2; |
| 217 } else { | 218 } else { |
| 218 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); | 219 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); |
| 219 lengthOfBOM = 4; | 220 lengthOfBOM = 4; |
| 220 } | 221 } |
| 221 } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { | 222 } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { |
| 222 setEncoding(UTF8Encoding(), AutoDetectedEncoding); | 223 setEncoding(UTF8Encoding(), AutoDetectedEncoding); |
| 223 lengthOfBOM = 3; | 224 lengthOfBOM = 3; |
| 224 } else if (c1 == 0xFE && c2 == 0xFF) { | 225 } else if (m_encodingDetectionOption != AlwaysUseUTF8ForText && c1 == 0xFE & & c2 == 0xFF) { |
| 225 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); | 226 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); |
| 226 lengthOfBOM = 2; | 227 lengthOfBOM = 2; |
| 227 } else if (!c1 && !c2 && c3 == 0xFE && c4 == 0xFF) { | 228 } else if (m_encodingDetectionOption != AlwaysUseUTF8ForText && !c1 && !c2 & & c3 == 0xFE && c4 == 0xFF) { |
| 228 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); | 229 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); |
| 229 lengthOfBOM = 4; | 230 lengthOfBOM = 4; |
| 230 } | 231 } |
| 231 | 232 |
| 232 if (lengthOfBOM || bufferLength + len >= 4) | 233 if (lengthOfBOM || bufferLength + len >= 4) |
| 233 m_checkedForBOM = true; | 234 m_checkedForBOM = true; |
| 234 | 235 |
| 235 return lengthOfBOM; | 236 return lengthOfBOM; |
| 236 } | 237 } |
| 237 | 238 |
| (...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 353 // 2. Encoding detector is turned ON and the encoding is set to | 354 // 2. Encoding detector is turned ON and the encoding is set to |
| 354 // the encoding of the parent frame, which is also auto-detected. | 355 // the encoding of the parent frame, which is also auto-detected. |
| 355 // Note that condition #2 is NOT satisfied unless parent-child frame | 356 // Note that condition #2 is NOT satisfied unless parent-child frame |
| 356 // relationship is compliant to the same-origin policy. If they're from | 357 // relationship is compliant to the same-origin policy. If they're from |
| 357 // different domains, |m_source| would not be set to EncodingFromParentFrame | 358 // different domains, |m_source| would not be set to EncodingFromParentFrame |
| 358 // in the first place. | 359 // in the first place. |
| 359 bool TextResourceDecoder::shouldAutoDetect() const | 360 bool TextResourceDecoder::shouldAutoDetect() const |
| 360 { | 361 { |
| 361 // Just checking m_hintEncoding suffices here because it's only set | 362 // Just checking m_hintEncoding suffices here because it's only set |
| 362 // in setHintEncoding when the source is AutoDetectedEncoding. | 363 // in setHintEncoding when the source is AutoDetectedEncoding. |
| 363 return m_usesEncodingDetector | 364 return m_encodingDetectionOption == UseAllAutoDetection |
| 364 && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); | 365 && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); |
| 365 } | 366 } |
| 366 | 367 |
| 367 String TextResourceDecoder::decode(const char* data, size_t len) | 368 String TextResourceDecoder::decode(const char* data, size_t len) |
| 368 { | 369 { |
| 369 size_t lengthOfBOM = 0; | 370 size_t lengthOfBOM = 0; |
| 370 if (!m_checkedForBOM) | 371 if (!m_checkedForBOM) |
| 371 lengthOfBOM = checkForBOM(data, len); | 372 lengthOfBOM = checkForBOM(data, len); |
| 372 | 373 |
| 373 bool movedDataToBuffer = false; | 374 bool movedDataToBuffer = false; |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 433 m_codec = newTextCodec(m_encoding); | 434 m_codec = newTextCodec(m_encoding); |
| 434 | 435 |
| 435 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 436 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
| 436 m_buffer.clear(); | 437 m_buffer.clear(); |
| 437 m_codec.clear(); | 438 m_codec.clear(); |
| 438 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 439 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
| 439 return result; | 440 return result; |
| 440 } | 441 } |
| 441 | 442 |
| 442 } | 443 } |
| OLD | NEW |