| OLD | NEW |
| 1 /* | 1 /* |
| 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
| 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. |
| 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
| 5 | 5 |
| 6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
| 7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
| 8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
| 9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
| 10 | 10 |
| 11 This library is distributed in the hope that it will be useful, | 11 This library is distributed in the hope that it will be useful, |
| 12 but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 Library General Public License for more details. | 14 Library General Public License for more details. |
| 15 | 15 |
| 16 You should have received a copy of the GNU Library General Public License | 16 You should have received a copy of the GNU Library General Public License |
| 17 along with this library; see the file COPYING.LIB. If not, write to | 17 along with this library; see the file COPYING.LIB. If not, write to |
| 18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | 18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 19 Boston, MA 02110-1301, USA. | 19 Boston, MA 02110-1301, USA. |
| 20 */ | 20 */ |
| 21 | 21 |
| 22 | 22 |
| 23 #include "config.h" | 23 #include "config.h" |
| 24 #include "core/fetch/TextResourceDecoder.h" | 24 #include "core/html/parser/TextResourceDecoder.h" |
| 25 | 25 |
| 26 #include "HTMLNames.h" | 26 #include "HTMLNames.h" |
| 27 #include "core/dom/DOMImplementation.h" | 27 #include "core/dom/DOMImplementation.h" |
| 28 #include "core/html/parser/HTMLMetaCharsetParser.h" | 28 #include "core/html/parser/HTMLMetaCharsetParser.h" |
| 29 #include "platform/text/TextEncodingDetector.h" | 29 #include "platform/text/TextEncodingDetector.h" |
| 30 #include "wtf/StringExtras.h" | 30 #include "wtf/StringExtras.h" |
| 31 #include "wtf/text/TextCodec.h" | 31 #include "wtf/text/TextCodec.h" |
| 32 #include "wtf/text/TextEncoding.h" | 32 #include "wtf/text/TextEncoding.h" |
| 33 #include "wtf/text/TextEncodingRegistry.h" | 33 #include "wtf/text/TextEncodingRegistry.h" |
| 34 | 34 |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 85 { | 85 { |
| 86 Vector<char, 64> buffer(length + 1); | 86 Vector<char, 64> buffer(length + 1); |
| 87 memcpy(buffer.data(), encodingName, length); | 87 memcpy(buffer.data(), encodingName, length); |
| 88 buffer[length] = '\0'; | 88 buffer[length] = '\0'; |
| 89 return buffer.data(); | 89 return buffer.data(); |
| 90 } | 90 } |
| 91 | 91 |
| 92 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const
String& mimeType) | 92 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const
String& mimeType) |
| 93 { | 93 { |
| 94 if (equalIgnoringCase(mimeType, "text/css")) | 94 if (equalIgnoringCase(mimeType, "text/css")) |
| 95 return CSS; | 95 return CSSContent; |
| 96 if (equalIgnoringCase(mimeType, "text/html")) | 96 if (equalIgnoringCase(mimeType, "text/html")) |
| 97 return HTML; | 97 return HTMLContent; |
| 98 if (DOMImplementation::isXMLMIMEType(mimeType)) | 98 if (DOMImplementation::isXMLMIMEType(mimeType)) |
| 99 return XML; | 99 return XMLContent; |
| 100 return PlainText; | 100 return PlainTextContent; |
| 101 } | 101 } |
| 102 | 102 |
| 103 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten
tType, const WTF::TextEncoding& specifiedDefaultEncoding) | 103 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten
tType, const WTF::TextEncoding& specifiedDefaultEncoding) |
| 104 { | 104 { |
| 105 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
instead of US-ASCII | 105 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
instead of US-ASCII |
| 106 // for text/xml. This matches Firefox. | 106 // for text/xml. This matches Firefox. |
| 107 if (contentType == XML) | 107 if (contentType == XMLContent) |
| 108 return UTF8Encoding(); | 108 return UTF8Encoding(); |
| 109 if (!specifiedDefaultEncoding.isValid()) | 109 if (!specifiedDefaultEncoding.isValid()) |
| 110 return Latin1Encoding(); | 110 return Latin1Encoding(); |
| 111 return specifiedDefaultEncoding; | 111 return specifiedDefaultEncoding; |
| 112 } | 112 } |
| 113 | 113 |
| 114 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text
Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) | 114 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text
Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) |
| 115 : m_contentType(determineContentType(mimeType)) | 115 : m_contentType(determineContentType(mimeType)) |
| 116 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) | 116 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) |
| 117 , m_source(DefaultEncoding) | 117 , m_source(DefaultEncoding) |
| (...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 362 } | 362 } |
| 363 | 363 |
| 364 String TextResourceDecoder::decode(const char* data, size_t len) | 364 String TextResourceDecoder::decode(const char* data, size_t len) |
| 365 { | 365 { |
| 366 size_t lengthOfBOM = 0; | 366 size_t lengthOfBOM = 0; |
| 367 if (!m_checkedForBOM) | 367 if (!m_checkedForBOM) |
| 368 lengthOfBOM = checkForBOM(data, len); | 368 lengthOfBOM = checkForBOM(data, len); |
| 369 | 369 |
| 370 bool movedDataToBuffer = false; | 370 bool movedDataToBuffer = false; |
| 371 | 371 |
| 372 if (m_contentType == CSS && !m_checkedForCSSCharset) { | 372 if (m_contentType == CSSContent && !m_checkedForCSSCharset) { |
| 373 if (!checkForCSSCharset(data, len, movedDataToBuffer)) | 373 if (!checkForCSSCharset(data, len, movedDataToBuffer)) |
| 374 return emptyString(); | 374 return emptyString(); |
| 375 } | 375 } |
| 376 | 376 |
| 377 if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForXMLChars
et) { | 377 if ((m_contentType == HTMLContent || m_contentType == XMLContent) && !m_chec
kedForXMLCharset) { |
| 378 if (!checkForXMLCharset(data, len, movedDataToBuffer)) | 378 if (!checkForXMLCharset(data, len, movedDataToBuffer)) |
| 379 return emptyString(); | 379 return emptyString(); |
| 380 } | 380 } |
| 381 | 381 |
| 382 const char* dataForDecode = data + lengthOfBOM; | 382 const char* dataForDecode = data + lengthOfBOM; |
| 383 size_t lengthForDecode = len - lengthOfBOM; | 383 size_t lengthForDecode = len - lengthOfBOM; |
| 384 | 384 |
| 385 if (!m_buffer.isEmpty()) { | 385 if (!m_buffer.isEmpty()) { |
| 386 if (!movedDataToBuffer) { | 386 if (!movedDataToBuffer) { |
| 387 size_t oldSize = m_buffer.size(); | 387 size_t oldSize = m_buffer.size(); |
| 388 m_buffer.grow(oldSize + len); | 388 m_buffer.grow(oldSize + len); |
| 389 memcpy(m_buffer.data() + oldSize, data, len); | 389 memcpy(m_buffer.data() + oldSize, data, len); |
| 390 } | 390 } |
| 391 | 391 |
| 392 dataForDecode = m_buffer.data() + lengthOfBOM; | 392 dataForDecode = m_buffer.data() + lengthOfBOM; |
| 393 lengthForDecode = m_buffer.size() - lengthOfBOM; | 393 lengthForDecode = m_buffer.size() - lengthOfBOM; |
| 394 } | 394 } |
| 395 | 395 |
| 396 if (m_contentType == HTML && !m_checkedForMetaCharset) | 396 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) |
| 397 checkForMetaCharset(dataForDecode, lengthForDecode); | 397 checkForMetaCharset(dataForDecode, lengthForDecode); |
| 398 | 398 |
| 399 if (shouldAutoDetect()) { | 399 if (shouldAutoDetect()) { |
| 400 WTF::TextEncoding detectedEncoding; | 400 WTF::TextEncoding detectedEncoding; |
| 401 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) | 401 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) |
| 402 setEncoding(detectedEncoding, EncodingFromContentSniffing); | 402 setEncoding(detectedEncoding, EncodingFromContentSniffing); |
| 403 } | 403 } |
| 404 | 404 |
| 405 ASSERT(m_encoding.isValid()); | 405 ASSERT(m_encoding.isValid()); |
| 406 | 406 |
| 407 if (!m_codec) | 407 if (!m_codec) |
| 408 m_codec = newTextCodec(m_encoding); | 408 m_codec = newTextCodec(m_encoding); |
| 409 | 409 |
| 410 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con
tentType == XML && !m_useLenientXMLDecoding, m_sawError); | 410 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con
tentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
| 411 | 411 |
| 412 m_buffer.clear(); | 412 m_buffer.clear(); |
| 413 return result; | 413 return result; |
| 414 } | 414 } |
| 415 | 415 |
| 416 String TextResourceDecoder::flush() | 416 String TextResourceDecoder::flush() |
| 417 { | 417 { |
| 418 // If we can not identify the encoding even after a document is completely | 418 // If we can not identify the encoding even after a document is completely |
| 419 // loaded, we need to detect the encoding if other conditions for | 419 // loaded, we need to detect the encoding if other conditions for |
| 420 // autodetection is satisfied. | 420 // autodetection is satisfied. |
| 421 if (m_buffer.size() && shouldAutoDetect() | 421 if (m_buffer.size() && shouldAutoDetect() |
| 422 && ((!m_checkedForXMLCharset && (m_contentType == HTML || m_contentType
== XML)) || (!m_checkedForCSSCharset && (m_contentType == CSS)))) { | 422 && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_conte
ntType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSConte
nt)))) { |
| 423 WTF::TextEncoding detectedEncoding; | 423 WTF::TextEncoding detectedEncoding; |
| 424 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding,
&detectedEncoding)) | 424 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding,
&detectedEncoding)) |
| 425 setEncoding(detectedEncoding, EncodingFromContentSniffing); | 425 setEncoding(detectedEncoding, EncodingFromContentSniffing); |
| 426 } | 426 } |
| 427 | 427 |
| 428 if (!m_codec) | 428 if (!m_codec) |
| 429 m_codec = newTextCodec(m_encoding); | 429 m_codec = newTextCodec(m_encoding); |
| 430 | 430 |
| 431 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co
ntentType == XML && !m_useLenientXMLDecoding, m_sawError); | 431 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co
ntentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
| 432 m_buffer.clear(); | 432 m_buffer.clear(); |
| 433 m_codec.clear(); | 433 m_codec.clear(); |
| 434 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 434 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
| 435 return result; | 435 return result; |
| 436 } | 436 } |
| 437 | 437 |
| 438 } | 438 } |
| OLD | NEW |