| OLD | NEW |
| 1 /* | 1 /* |
| 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
| 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All |
| 4 rights reserved. | 4 rights reserved. |
| 5 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 5 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
| 6 | 6 |
| 7 This library is free software; you can redistribute it and/or | 7 This library is free software; you can redistribute it and/or |
| 8 modify it under the terms of the GNU Library General Public | 8 modify it under the terms of the GNU Library General Public |
| 9 License as published by the Free Software Foundation; either | 9 License as published by the Free Software Foundation; either |
| 10 version 2 of the License, or (at your option) any later version. | 10 version 2 of the License, or (at your option) any later version. |
| 11 | 11 |
| 12 This library is distributed in the hope that it will be useful, | 12 This library is distributed in the hope that it will be useful, |
| 13 but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 Library General Public License for more details. | 15 Library General Public License for more details. |
| 16 | 16 |
| 17 You should have received a copy of the GNU Library General Public License | 17 You should have received a copy of the GNU Library General Public License |
| 18 along with this library; see the file COPYING.LIB. If not, write to | 18 along with this library; see the file COPYING.LIB. If not, write to |
| 19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | 19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 20 Boston, MA 02110-1301, USA. | 20 Boston, MA 02110-1301, USA. |
| 21 */ | 21 */ |
| 22 | 22 |
| 23 #include "core/html/parser/TextResourceDecoder.h" | 23 #include "core/html/parser/TextResourceDecoder.h" |
| 24 | 24 |
| 25 #include "core/HTMLNames.h" | 25 #include "core/HTMLNames.h" |
| 26 #include "core/dom/DOMImplementation.h" | 26 #include "core/dom/DOMImplementation.h" |
| 27 #include "core/html/parser/HTMLMetaCharsetParser.h" | 27 #include "core/html/parser/HTMLMetaCharsetParser.h" |
| 28 #include "platform/Language.h" |
| 28 #include "platform/text/TextEncodingDetector.h" | 29 #include "platform/text/TextEncodingDetector.h" |
| 29 #include "wtf/StringExtras.h" | 30 #include "wtf/StringExtras.h" |
| 30 #include "wtf/text/TextCodec.h" | 31 #include "wtf/text/TextCodec.h" |
| 31 #include "wtf/text/TextEncodingRegistry.h" | 32 #include "wtf/text/TextEncodingRegistry.h" |
| 32 | 33 |
| 33 using namespace WTF; | 34 using namespace WTF; |
| 34 | 35 |
| 35 namespace blink { | 36 namespace blink { |
| 36 | 37 |
| 37 using namespace HTMLNames; | 38 using namespace HTMLNames; |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 135 if (contentType == XMLContent) | 136 if (contentType == XMLContent) |
| 136 return UTF8Encoding(); | 137 return UTF8Encoding(); |
| 137 if (!specifiedDefaultEncoding.isValid()) | 138 if (!specifiedDefaultEncoding.isValid()) |
| 138 return Latin1Encoding(); | 139 return Latin1Encoding(); |
| 139 return specifiedDefaultEncoding; | 140 return specifiedDefaultEncoding; |
| 140 } | 141 } |
| 141 | 142 |
| 142 TextResourceDecoder::TextResourceDecoder( | 143 TextResourceDecoder::TextResourceDecoder( |
| 143 const String& mimeType, | 144 const String& mimeType, |
| 144 const WTF::TextEncoding& specifiedDefaultEncoding, | 145 const WTF::TextEncoding& specifiedDefaultEncoding, |
| 145 EncodingDetectionOption encodingDetectionOption) | 146 EncodingDetectionOption encodingDetectionOption, |
| 147 const String& url) |
| 146 : m_contentType(determineContentType(mimeType)), | 148 : m_contentType(determineContentType(mimeType)), |
| 147 m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)), | 149 m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)), |
| 148 m_source(DefaultEncoding), | 150 m_source(DefaultEncoding), |
| 149 m_hintEncoding(0), | 151 m_hintEncoding(0), |
| 152 m_hintUrl(url.utf8()), |
| 150 m_checkedForBOM(false), | 153 m_checkedForBOM(false), |
| 151 m_checkedForCSSCharset(false), | 154 m_checkedForCSSCharset(false), |
| 152 m_checkedForXMLCharset(false), | 155 m_checkedForXMLCharset(false), |
| 153 m_checkedForMetaCharset(false), | 156 m_checkedForMetaCharset(false), |
| 154 m_useLenientXMLDecoding(false), | 157 m_useLenientXMLDecoding(false), |
| 155 m_sawError(false), | 158 m_sawError(false), |
| 156 m_encodingDetectionOption(encodingDetectionOption) { | 159 m_encodingDetectionOption(encodingDetectionOption) { |
| 157 if (m_encodingDetectionOption == AlwaysUseUTF8ForText) | 160 m_hintLanguage[0] = 0; |
| 161 if (m_encodingDetectionOption == AlwaysUseUTF8ForText) { |
| 158 ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding()); | 162 ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding()); |
| 163 } else if (m_encodingDetectionOption == UseAllAutoDetection) { |
| 164 // Checking empty URL helps unit testing. Providing defaultLanguage() is |
| 165 // sometimes difficult in tests. |
| 166 if (!url.isEmpty()) { |
| 167 // This object is created in the main thread, but used in another thread. |
| 168 // We should not share an AtomicString. |
| 169 AtomicString locale = defaultLanguage(); |
| 170 if (locale.length() >= 2) { |
| 171 // defaultLanguage() is always an ASCII string. |
| 172 m_hintLanguage[0] = static_cast<char>(locale[0]); |
| 173 m_hintLanguage[1] = static_cast<char>(locale[1]); |
| 174 m_hintLanguage[2] = 0; |
| 175 } |
| 176 } |
| 177 } |
| 159 } | 178 } |
| 160 | 179 |
| 161 TextResourceDecoder::~TextResourceDecoder() {} | 180 TextResourceDecoder::~TextResourceDecoder() {} |
| 162 | 181 |
| 163 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, | 182 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, |
| 164 EncodingSource source) { | 183 EncodingSource source) { |
| 165 // In case the encoding didn't exist, we keep the old one (helps some sites | 184 // In case the encoding didn't exist, we keep the old one (helps some sites |
| 166 // specifying invalid encodings). | 185 // specifying invalid encodings). |
| 167 if (!encoding.isValid()) | 186 if (!encoding.isValid()) |
| 168 return; | 187 return; |
| (...skipping 279 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 448 | 467 |
| 449 dataForDecode = m_buffer.data() + lengthOfBOM; | 468 dataForDecode = m_buffer.data() + lengthOfBOM; |
| 450 lengthForDecode = m_buffer.size() - lengthOfBOM; | 469 lengthForDecode = m_buffer.size() - lengthOfBOM; |
| 451 } | 470 } |
| 452 | 471 |
| 453 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) | 472 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) |
| 454 checkForMetaCharset(dataForDecode, lengthForDecode); | 473 checkForMetaCharset(dataForDecode, lengthForDecode); |
| 455 | 474 |
| 456 if (shouldAutoDetect()) { | 475 if (shouldAutoDetect()) { |
| 457 WTF::TextEncoding detectedEncoding; | 476 WTF::TextEncoding detectedEncoding; |
| 458 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) | 477 if (detectTextEncoding(data, len, m_hintEncoding, m_hintUrl.data(), |
| 478 m_hintLanguage, &detectedEncoding)) |
| 459 setEncoding(detectedEncoding, EncodingFromContentSniffing); | 479 setEncoding(detectedEncoding, EncodingFromContentSniffing); |
| 460 } | 480 } |
| 461 | 481 |
| 462 ASSERT(m_encoding.isValid()); | 482 ASSERT(m_encoding.isValid()); |
| 463 | 483 |
| 464 if (!m_codec) | 484 if (!m_codec) |
| 465 m_codec = newTextCodec(m_encoding); | 485 m_codec = newTextCodec(m_encoding); |
| 466 | 486 |
| 467 String result = m_codec->decode( | 487 String result = m_codec->decode( |
| 468 dataForDecode, lengthForDecode, DoNotFlush, | 488 dataForDecode, lengthForDecode, DoNotFlush, |
| 469 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 489 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
| 470 | 490 |
| 471 m_buffer.clear(); | 491 m_buffer.clear(); |
| 472 return result; | 492 return result; |
| 473 } | 493 } |
| 474 | 494 |
| 475 String TextResourceDecoder::flush() { | 495 String TextResourceDecoder::flush() { |
| 476 // If we can not identify the encoding even after a document is completely | 496 // If we can not identify the encoding even after a document is completely |
| 477 // loaded, we need to detect the encoding if other conditions for | 497 // loaded, we need to detect the encoding if other conditions for |
| 478 // autodetection is satisfied. | 498 // autodetection is satisfied. |
| 479 if (m_buffer.size() && shouldAutoDetect() && | 499 if (m_buffer.size() && shouldAutoDetect() && |
| 480 ((!m_checkedForXMLCharset && | 500 ((!m_checkedForXMLCharset && |
| 481 (m_contentType == HTMLContent || m_contentType == XMLContent)) || | 501 (m_contentType == HTMLContent || m_contentType == XMLContent)) || |
| 482 (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) { | 502 (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) { |
| 483 WTF::TextEncoding detectedEncoding; | 503 WTF::TextEncoding detectedEncoding; |
| 484 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, | 504 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, |
| 485 &detectedEncoding)) | 505 m_hintUrl.data(), m_hintLanguage, &detectedEncoding)) |
| 486 setEncoding(detectedEncoding, EncodingFromContentSniffing); | 506 setEncoding(detectedEncoding, EncodingFromContentSniffing); |
| 487 } | 507 } |
| 488 | 508 |
| 489 if (!m_codec) | 509 if (!m_codec) |
| 490 m_codec = newTextCodec(m_encoding); | 510 m_codec = newTextCodec(m_encoding); |
| 491 | 511 |
| 492 String result = m_codec->decode( | 512 String result = m_codec->decode( |
| 493 m_buffer.data(), m_buffer.size(), FetchEOF, | 513 m_buffer.data(), m_buffer.size(), FetchEOF, |
| 494 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 514 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
| 495 m_buffer.clear(); | 515 m_buffer.clear(); |
| 496 m_codec.reset(); | 516 m_codec.reset(); |
| 497 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 517 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
| 498 return result; | 518 return result; |
| 499 } | 519 } |
| 500 | 520 |
| 501 } // namespace blink | 521 } // namespace blink |
| OLD | NEW |