| OLD | NEW |
| 1 /* | 1 /* |
| 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
| 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. |
| 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
| 5 | 5 |
| 6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
| 7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
| 8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
| 9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
| 10 | 10 |
| 11 This library is distributed in the hope that it will be useful, | 11 This library is distributed in the hope that it will be useful, |
| 12 but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 Library General Public License for more details. | 14 Library General Public License for more details. |
| 15 | 15 |
| 16 You should have received a copy of the GNU Library General Public License | 16 You should have received a copy of the GNU Library General Public License |
| 17 along with this library; see the file COPYING.LIB. If not, write to | 17 along with this library; see the file COPYING.LIB. If not, write to |
| 18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | 18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 19 Boston, MA 02110-1301, USA. | 19 Boston, MA 02110-1301, USA. |
| 20 */ | 20 */ |
| 21 | 21 |
| 22 | |
| 23 #include "config.h" | 22 #include "config.h" |
| 24 #include "core/html/parser/TextResourceDecoder.h" | 23 #include "core/html/parser/TextResourceDecoder.h" |
| 25 | 24 |
| 26 #include "core/HTMLNames.h" | 25 #include "core/HTMLNames.h" |
| 27 #include "core/dom/DOMImplementation.h" | 26 #include "core/dom/DOMImplementation.h" |
| 28 #include "core/html/parser/HTMLMetaCharsetParser.h" | 27 #include "core/html/parser/HTMLMetaCharsetParser.h" |
| 29 #include "platform/text/TextEncodingDetector.h" | 28 #include "platform/text/TextEncodingDetector.h" |
| 30 #include "wtf/StringExtras.h" | 29 #include "wtf/StringExtras.h" |
| 31 #include "wtf/text/TextCodec.h" | 30 #include "wtf/text/TextCodec.h" |
| 32 #include "wtf/text/TextEncodingRegistry.h" | 31 #include "wtf/text/TextEncodingRegistry.h" |
| (...skipping 361 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 394 } | 393 } |
| 395 | 394 |
| 396 dataForDecode = m_buffer.data() + lengthOfBOM; | 395 dataForDecode = m_buffer.data() + lengthOfBOM; |
| 397 lengthForDecode = m_buffer.size() - lengthOfBOM; | 396 lengthForDecode = m_buffer.size() - lengthOfBOM; |
| 398 } | 397 } |
| 399 | 398 |
| 400 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) | 399 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) |
| 401 checkForMetaCharset(dataForDecode, lengthForDecode); | 400 checkForMetaCharset(dataForDecode, lengthForDecode); |
| 402 | 401 |
| 403 if (shouldAutoDetect()) { | 402 if (shouldAutoDetect()) { |
| 404 WTF::TextEncoding detectedEncoding; | 403 detectTextEncoding(data, len); |
| 405 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) | |
| 406 setEncoding(detectedEncoding, EncodingFromContentSniffing); | |
| 407 } | 404 } |
| 408 | 405 |
| 409 ASSERT(m_encoding.isValid()); | 406 ASSERT(m_encoding.isValid()); |
| 410 | 407 |
| 411 if (!m_codec) | 408 if (!m_codec) |
| 412 m_codec = newTextCodec(m_encoding); | 409 m_codec = newTextCodec(m_encoding); |
| 413 | 410 |
| 414 String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush,
m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 411 String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush,
m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
| 415 | 412 |
| 416 m_buffer.clear(); | 413 m_buffer.clear(); |
| 417 return result; | 414 return result; |
| 418 } | 415 } |
| 419 | 416 |
| 417 void TextResourceDecoder::detectTextEncoding(const char* data, size_t len) |
| 418 { |
| 419 WTF::TextEncoding detectedEncoding; |
| 420 bool detected = blink::detectTextEncoding(data, len, m_hintEncoding, &detect
edEncoding); |
| 421 if (detected && detectedEncoding != encoding()) |
| 422 setEncoding(detectedEncoding, EncodingFromContentSniffing); |
| 423 else |
| 424 setEncoding(detectedEncoding, DefaultEncodingAttemptedSniffing); |
| 425 } |
| 426 |
| 420 String TextResourceDecoder::flush() | 427 String TextResourceDecoder::flush() |
| 421 { | 428 { |
| 422 // If we can not identify the encoding even after a document is completely | 429 // If we can not identify the encoding even after a document is completely |
| 423 // loaded, we need to detect the encoding if other conditions for | 430 // loaded, we need to detect the encoding if other conditions for |
| 424 // autodetection is satisfied. | 431 // autodetection is satisfied. |
| 425 if (m_buffer.size() && shouldAutoDetect() | 432 if (m_buffer.size() && shouldAutoDetect() |
| 426 && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_conte
ntType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSConte
nt)))) { | 433 && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_conte
ntType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSConte
nt)))) { |
| 427 WTF::TextEncoding detectedEncoding; | 434 detectTextEncoding(m_buffer.data(), m_buffer.size()); |
| 428 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding,
&detectedEncoding)) | |
| 429 setEncoding(detectedEncoding, EncodingFromContentSniffing); | |
| 430 } | 435 } |
| 431 | 436 |
| 432 if (!m_codec) | 437 if (!m_codec) |
| 433 m_codec = newTextCodec(m_encoding); | 438 m_codec = newTextCodec(m_encoding); |
| 434 | 439 |
| 435 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF,
m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 440 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF,
m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
| 436 m_buffer.clear(); | 441 m_buffer.clear(); |
| 437 m_codec.clear(); | 442 m_codec.clear(); |
| 438 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 443 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
| 439 return result; | 444 return result; |
| 440 } | 445 } |
| 441 | 446 |
| 442 } | 447 } |
| OLD | NEW |