| OLD | NEW |
| 1 /* | 1 /* |
| 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
| 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. |
| 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
| 5 | 5 |
| 6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
| 7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
| 8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
| 9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
| 10 | 10 |
| 11 This library is distributed in the hope that it will be useful, | 11 This library is distributed in the hope that it will be useful, |
| 12 but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 Library General Public License for more details. | 14 Library General Public License for more details. |
| 15 | 15 |
| 16 You should have received a copy of the GNU Library General Public License | 16 You should have received a copy of the GNU Library General Public License |
| 17 along with this library; see the file COPYING.LIB. If not, write to | 17 along with this library; see the file COPYING.LIB. If not, write to |
| 18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | 18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 19 Boston, MA 02110-1301, USA. | 19 Boston, MA 02110-1301, USA. |
| 20 */ | 20 */ |
| 21 | 21 |
| 22 | 22 |
| 23 #include "config.h" | 23 #include "config.h" |
| 24 #include "core/html/parser/TextResourceDecoder.h" | 24 #include "core/fetch/TextResourceDecoder.h" |
| 25 | 25 |
| 26 #include "HTMLNames.h" | 26 #include "HTMLNames.h" |
| 27 #include "core/dom/DOMImplementation.h" | 27 #include "core/dom/DOMImplementation.h" |
| 28 #include "core/html/parser/HTMLMetaCharsetParser.h" | 28 #include "core/html/parser/HTMLMetaCharsetParser.h" |
| 29 #include "platform/text/TextEncodingDetector.h" | 29 #include "platform/text/TextEncodingDetector.h" |
| 30 #include "wtf/StringExtras.h" | 30 #include "wtf/StringExtras.h" |
| 31 #include "wtf/text/TextCodec.h" | 31 #include "wtf/text/TextCodec.h" |
| 32 #include "wtf/text/TextEncodingRegistry.h" | 32 #include "wtf/text/TextEncodingRegistry.h" |
| 33 | 33 |
| 34 using namespace WTF; | 34 using namespace WTF; |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 84 { | 84 { |
| 85 Vector<char, 64> buffer(length + 1); | 85 Vector<char, 64> buffer(length + 1); |
| 86 memcpy(buffer.data(), encodingName, length); | 86 memcpy(buffer.data(), encodingName, length); |
| 87 buffer[length] = '\0'; | 87 buffer[length] = '\0'; |
| 88 return buffer.data(); | 88 return buffer.data(); |
| 89 } | 89 } |
| 90 | 90 |
| 91 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const
String& mimeType) | 91 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const
String& mimeType) |
| 92 { | 92 { |
| 93 if (equalIgnoringCase(mimeType, "text/css")) | 93 if (equalIgnoringCase(mimeType, "text/css")) |
| 94 return CSSContent; | 94 return CSS; |
| 95 if (equalIgnoringCase(mimeType, "text/html")) | 95 if (equalIgnoringCase(mimeType, "text/html")) |
| 96 return HTMLContent; | 96 return HTML; |
| 97 if (DOMImplementation::isXMLMIMEType(mimeType)) | 97 if (DOMImplementation::isXMLMIMEType(mimeType)) |
| 98 return XMLContent; | 98 return XML; |
| 99 return PlainTextContent; | 99 return PlainText; |
| 100 } | 100 } |
| 101 | 101 |
| 102 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten
tType, const WTF::TextEncoding& specifiedDefaultEncoding) | 102 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten
tType, const WTF::TextEncoding& specifiedDefaultEncoding) |
| 103 { | 103 { |
| 104 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
instead of US-ASCII | 104 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
instead of US-ASCII |
| 105 // for text/xml. This matches Firefox. | 105 // for text/xml. This matches Firefox. |
| 106 if (contentType == XMLContent) | 106 if (contentType == XML) |
| 107 return UTF8Encoding(); | 107 return UTF8Encoding(); |
| 108 if (!specifiedDefaultEncoding.isValid()) | 108 if (!specifiedDefaultEncoding.isValid()) |
| 109 return Latin1Encoding(); | 109 return Latin1Encoding(); |
| 110 return specifiedDefaultEncoding; | 110 return specifiedDefaultEncoding; |
| 111 } | 111 } |
| 112 | 112 |
| 113 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text
Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) | 113 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text
Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) |
| 114 : m_contentType(determineContentType(mimeType)) | 114 : m_contentType(determineContentType(mimeType)) |
| 115 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) | 115 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) |
| 116 , m_source(DefaultEncoding) | 116 , m_source(DefaultEncoding) |
| (...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 361 } | 361 } |
| 362 | 362 |
| 363 String TextResourceDecoder::decode(const char* data, size_t len) | 363 String TextResourceDecoder::decode(const char* data, size_t len) |
| 364 { | 364 { |
| 365 size_t lengthOfBOM = 0; | 365 size_t lengthOfBOM = 0; |
| 366 if (!m_checkedForBOM) | 366 if (!m_checkedForBOM) |
| 367 lengthOfBOM = checkForBOM(data, len); | 367 lengthOfBOM = checkForBOM(data, len); |
| 368 | 368 |
| 369 bool movedDataToBuffer = false; | 369 bool movedDataToBuffer = false; |
| 370 | 370 |
| 371 if (m_contentType == CSSContent && !m_checkedForCSSCharset) { | 371 if (m_contentType == CSS && !m_checkedForCSSCharset) { |
| 372 if (!checkForCSSCharset(data, len, movedDataToBuffer)) | 372 if (!checkForCSSCharset(data, len, movedDataToBuffer)) |
| 373 return emptyString(); | 373 return emptyString(); |
| 374 } | 374 } |
| 375 | 375 |
| 376 if ((m_contentType == HTMLContent || m_contentType == XMLContent) && !m_chec
kedForXMLCharset) { | 376 if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForXMLChars
et) { |
| 377 if (!checkForXMLCharset(data, len, movedDataToBuffer)) | 377 if (!checkForXMLCharset(data, len, movedDataToBuffer)) |
| 378 return emptyString(); | 378 return emptyString(); |
| 379 } | 379 } |
| 380 | 380 |
| 381 const char* dataForDecode = data + lengthOfBOM; | 381 const char* dataForDecode = data + lengthOfBOM; |
| 382 size_t lengthForDecode = len - lengthOfBOM; | 382 size_t lengthForDecode = len - lengthOfBOM; |
| 383 | 383 |
| 384 if (!m_buffer.isEmpty()) { | 384 if (!m_buffer.isEmpty()) { |
| 385 if (!movedDataToBuffer) { | 385 if (!movedDataToBuffer) { |
| 386 size_t oldSize = m_buffer.size(); | 386 size_t oldSize = m_buffer.size(); |
| 387 m_buffer.grow(oldSize + len); | 387 m_buffer.grow(oldSize + len); |
| 388 memcpy(m_buffer.data() + oldSize, data, len); | 388 memcpy(m_buffer.data() + oldSize, data, len); |
| 389 } | 389 } |
| 390 | 390 |
| 391 dataForDecode = m_buffer.data() + lengthOfBOM; | 391 dataForDecode = m_buffer.data() + lengthOfBOM; |
| 392 lengthForDecode = m_buffer.size() - lengthOfBOM; | 392 lengthForDecode = m_buffer.size() - lengthOfBOM; |
| 393 } | 393 } |
| 394 | 394 |
| 395 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) | 395 if (m_contentType == HTML && !m_checkedForMetaCharset) |
| 396 checkForMetaCharset(dataForDecode, lengthForDecode); | 396 checkForMetaCharset(dataForDecode, lengthForDecode); |
| 397 | 397 |
| 398 if (shouldAutoDetect()) { | 398 if (shouldAutoDetect()) { |
| 399 WTF::TextEncoding detectedEncoding; | 399 WTF::TextEncoding detectedEncoding; |
| 400 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) | 400 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) |
| 401 setEncoding(detectedEncoding, EncodingFromContentSniffing); | 401 setEncoding(detectedEncoding, EncodingFromContentSniffing); |
| 402 } | 402 } |
| 403 | 403 |
| 404 ASSERT(m_encoding.isValid()); | 404 ASSERT(m_encoding.isValid()); |
| 405 | 405 |
| 406 if (!m_codec) | 406 if (!m_codec) |
| 407 m_codec = newTextCodec(m_encoding); | 407 m_codec = newTextCodec(m_encoding); |
| 408 | 408 |
| 409 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con
tentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 409 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con
tentType == XML && !m_useLenientXMLDecoding, m_sawError); |
| 410 | 410 |
| 411 m_buffer.clear(); | 411 m_buffer.clear(); |
| 412 return result; | 412 return result; |
| 413 } | 413 } |
| 414 | 414 |
| 415 String TextResourceDecoder::flush() | 415 String TextResourceDecoder::flush() |
| 416 { | 416 { |
| 417 // If we can not identify the encoding even after a document is completely | 417 // If we can not identify the encoding even after a document is completely |
| 418 // loaded, we need to detect the encoding if other conditions for | 418 // loaded, we need to detect the encoding if other conditions for |
| 419 // autodetection is satisfied. | 419 // autodetection is satisfied. |
| 420 if (m_buffer.size() && shouldAutoDetect() | 420 if (m_buffer.size() && shouldAutoDetect() |
| 421 && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_conte
ntType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSConte
nt)))) { | 421 && ((!m_checkedForXMLCharset && (m_contentType == HTML || m_contentType
== XML)) || (!m_checkedForCSSCharset && (m_contentType == CSS)))) { |
| 422 WTF::TextEncoding detectedEncoding; | 422 WTF::TextEncoding detectedEncoding; |
| 423 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding,
&detectedEncoding)) | 423 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding,
&detectedEncoding)) |
| 424 setEncoding(detectedEncoding, EncodingFromContentSniffing); | 424 setEncoding(detectedEncoding, EncodingFromContentSniffing); |
| 425 } | 425 } |
| 426 | 426 |
| 427 if (!m_codec) | 427 if (!m_codec) |
| 428 m_codec = newTextCodec(m_encoding); | 428 m_codec = newTextCodec(m_encoding); |
| 429 | 429 |
| 430 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co
ntentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 430 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co
ntentType == XML && !m_useLenientXMLDecoding, m_sawError); |
| 431 m_buffer.clear(); | 431 m_buffer.clear(); |
| 432 m_codec.clear(); | 432 m_codec.clear(); |
| 433 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 433 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
| 434 return result; | 434 return result; |
| 435 } | 435 } |
| 436 | 436 |
| 437 } | 437 } |
| OLD | NEW |